From a072fca8229b26042fe24bff42989533e1d2050a Mon Sep 17 00:00:00 2001 From: Sing_chan <51314274+betterpig@users.noreply.github.com> Date: Sun, 5 Jun 2022 10:58:58 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90code=20format=20check=20upgrade?= =?UTF-8?q?=E3=80=91=20step2=EF=BC=9Ayapf=20(#42944)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * use yapf to format all python file * yapf exclude two unittests file for they rely on writing and reading file, and format will break them * disable diff_py_file because too many diff files cause command following failed --- .pre-commit-config.yaml | 9 +- paddle/scripts/paddle_build.sh | 8 +- python/paddle/_C_ops.py | 1 + python/paddle/__init__.py | 538 ++-- python/paddle/amp/grad_scaler.py | 8 +- python/paddle/autograd/backward_mode.py | 21 +- python/paddle/autograd/functional.py | 107 +- python/paddle/autograd/py_layer.py | 17 +- python/paddle/callbacks.py | 9 +- python/paddle/cost_model/__init__.py | 7 +- python/paddle/cost_model/cost_model.py | 20 +- python/paddle/dataset/cifar.py | 21 +- python/paddle/dataset/common.py | 24 +- python/paddle/dataset/conll05.py | 1 + python/paddle/dataset/flowers.py | 50 +- python/paddle/dataset/image.py | 19 +- python/paddle/dataset/imdb.py | 16 +- python/paddle/dataset/imikolov.py | 1 + python/paddle/dataset/mnist.py | 10 +- python/paddle/dataset/movielens.py | 10 +- python/paddle/dataset/tests/cifar_test.py | 1 + python/paddle/dataset/tests/flowers_test.py | 1 + python/paddle/dataset/tests/imikolov_test.py | 1 + python/paddle/dataset/tests/mnist_test.py | 1 + python/paddle/dataset/tests/test_image.py | 1 + python/paddle/dataset/tests/voc2012_test.py | 1 + python/paddle/dataset/tests/wmt16_test.py | 13 +- python/paddle/dataset/uci_housing.py | 10 +- python/paddle/dataset/wmt14.py | 2 + python/paddle/dataset/wmt16.py | 64 +- python/paddle/device/__init__.py | 8 +- python/paddle/device/cuda/__init__.py | 10 +- python/paddle/device/cuda/graphs.py | 9 +- python/paddle/device/cuda/streams.py | 6 +- python/paddle/distributed/__init__.py | 58 +- .../distributed/auto_parallel/cluster.py | 10 +- .../distributed/auto_parallel/completion.py | 93 +- .../distributed/auto_parallel/converter.py | 22 +- .../auto_parallel/cost/base_cost.py | 29 +- .../auto_parallel/cost/comm_op_cost.py | 50 +- .../auto_parallel/cost/comp_op_cost.py | 314 +- .../auto_parallel/cost/estimate_cost.py | 1 + .../auto_parallel/cost/tensor_cost.py | 13 +- .../distributed/auto_parallel/cost_model.py | 116 +- .../auto_parallel/dist_attribute.py | 10 +- .../distributed/auto_parallel/dist_context.py | 28 +- .../distributed/auto_parallel/dist_loader.py | 10 +- .../distributed/auto_parallel/dist_op.py | 2 + .../distributed/auto_parallel/dist_saver.py | 24 +- .../distributed/auto_parallel/dist_tensor.py | 52 +- .../distributed/auto_parallel/engine.py | 67 +- .../paddle/distributed/auto_parallel/graph.py | 3 + .../distributed/auto_parallel/mapper.py | 18 +- .../auto_parallel/operators/common.py | 2 + .../dist_check_finite_and_unscale.py | 60 +- .../auto_parallel/operators/dist_default.py | 73 +- .../auto_parallel/operators/dist_eltwise.py | 13 +- .../auto_parallel/operators/dist_embedding.py | 61 +- .../dist_fill_constant_batch_size_like.py | 2 + .../operators/dist_fused_attention.py | 18 +- .../operators/dist_fused_feedforward.py | 10 +- .../auto_parallel/operators/dist_matmul.py | 207 +- .../auto_parallel/operators/dist_pnorm.py | 65 +- .../auto_parallel/operators/dist_reduce_p.py | 19 +- .../auto_parallel/operators/dist_reshape.py | 16 +- .../auto_parallel/operators/dist_slice.py | 2 + .../auto_parallel/operators/dist_softmax.py | 2 + .../auto_parallel/operators/dist_split.py | 8 +- .../auto_parallel/operators/dist_transpose.py | 2 + .../operators/dist_update_loss_scaling.py | 2 + .../distributed/auto_parallel/parallelizer.py | 69 +- .../auto_parallel/parallelizer_v2.py | 29 +- .../distributed/auto_parallel/partitioner.py | 74 +- .../distributed/auto_parallel/planner.py | 145 +- .../distributed/auto_parallel/planner_v2.py | 1 + .../auto_parallel/process_group.py | 13 +- .../distributed/auto_parallel/process_mesh.py | 2 +- .../distributed/auto_parallel/reshard.py | 238 +- .../auto_parallel/tuner/recorder.py | 2 +- .../auto_parallel/tuner/storable.py | 3 +- .../distributed/auto_parallel/tuner/trial.py | 7 +- .../auto_parallel/tuner/tunable_space.py | 23 +- .../auto_parallel/tuner/tunable_variable.py | 15 +- .../paddle/distributed/auto_parallel/utils.py | 94 +- python/paddle/distributed/cloud_utils.py | 13 +- python/paddle/distributed/collective.py | 499 +-- python/paddle/distributed/elastic.py | 6 +- python/paddle/distributed/fleet/__init__.py | 16 +- .../fleet/base/distributed_strategy.py | 37 +- .../distributed/fleet/base/fleet_base.py | 89 +- .../fleet/base/meta_optimizer_factory.py | 1 + .../distributed/fleet/base/role_maker.py | 56 +- .../distributed/fleet/base/runtime_factory.py | 1 + .../fleet/base/strategy_compiler.py | 6 +- .../paddle/distributed/fleet/base/topology.py | 9 +- .../distributed/fleet/base/util_factory.py | 69 +- .../paddle/distributed/fleet/cloud_utils.py | 13 +- .../fleet/data_generator/data_generator.py | 13 +- .../distributed/fleet/dataset/dataset.py | 30 +- .../fleet/dataset/index_dataset.py | 8 +- .../distributed/fleet/elastic/__init__.py | 6 +- .../distributed/fleet/elastic/collective.py | 1 + .../distributed/fleet/elastic/manager.py | 46 +- .../distributed/fleet/fleet_executor_utils.py | 81 +- python/paddle/distributed/fleet/launch.py | 88 +- .../paddle/distributed/fleet/launch_utils.py | 306 +- .../fleet/meta_optimizers/amp_optimizer.py | 11 +- .../fleet/meta_optimizers/ascend/__init__.py | 6 +- .../ascend/ascend_optimizer.py | 62 +- .../meta_optimizers/ascend/ascend_parser.py | 824 ++--- .../fleet/meta_optimizers/asp_optimizer.py | 6 +- .../fleet/meta_optimizers/common.py | 224 +- .../fleet/meta_optimizers/dgc_optimizer.py | 11 +- .../dygraph_sharding_optimizer.py | 14 +- .../hybrid_parallel_gradscaler.py | 10 +- .../hybrid_parallel_optimizer.py | 46 +- .../sharding_optimizer_stage2.py | 45 +- .../fp16_allreduce_optimizer.py | 71 +- .../gradient_merge_optimizer.py | 6 +- .../graph_execution_optimizer.py | 19 +- .../fleet/meta_optimizers/lamb_optimizer.py | 15 +- .../fleet/meta_optimizers/lars_optimizer.py | 15 +- .../meta_optimizers/localsgd_optimizer.py | 398 ++- .../meta_optimizers/meta_optimizer_base.py | 35 +- .../parameter_server_graph_optimizer.py | 10 +- .../parameter_server_optimizer.py | 48 +- .../meta_optimizers/pipeline_optimizer.py | 96 +- .../fleet/meta_optimizers/ps_optimizer.py | 22 +- .../meta_optimizers/raw_program_optimizer.py | 243 +- .../meta_optimizers/recompute_optimizer.py | 15 +- .../meta_optimizers/sharding/__init__.py | 6 +- .../meta_optimizers/sharding/fp16_helper.py | 145 +- .../sharding/gradient_clip_helper.py | 26 +- .../sharding/offload_helper.py | 75 +- .../fleet/meta_optimizers/sharding/prune.py | 5 +- .../fleet/meta_optimizers/sharding/shard.py | 4 +- .../fleet/meta_optimizers/sharding/utils.py | 449 ++- .../sharding/weight_decay_helper.py | 1 + .../meta_optimizers/sharding_optimizer.py | 471 +-- .../tensor_parallel_optimizer.py | 122 +- .../fleet/meta_parallel/meta_parallel_base.py | 1 + .../parallel_layers/mp_layers.py | 49 +- .../parallel_layers/pp_layers.py | 53 +- .../meta_parallel/parallel_layers/random.py | 38 +- .../fleet/meta_parallel/pipeline_parallel.py | 39 +- .../pp_utils/p2p_communication.py | 238 +- .../fleet/meta_parallel/pp_utils/utils.py | 36 +- .../group_sharded_optimizer_stage2.py | 29 +- .../sharding/group_sharded_stage2.py | 53 +- .../sharding/group_sharded_stage3.py | 169 +- .../sharding/group_sharded_storage.py | 4 +- .../sharding/group_sharded_utils.py | 40 +- .../meta_parallel/sharding/sharding_stage2.py | 96 +- .../meta_parallel/sharding/sharding_stage3.py | 217 +- .../meta_parallel/sharding/sharding_utils.py | 45 +- .../fleet/meta_parallel/sharding_parallel.py | 1 + .../fleet/meta_parallel/tensor_parallel.py | 1 + .../fleet/runtime/collective_runtime.py | 1 + .../fleet/runtime/parameter_server_runtime.py | 240 +- .../distributed/fleet/runtime/runtime_base.py | 1 + .../distributed/fleet/runtime/the_one_ps.py | 161 +- python/paddle/distributed/fleet/utils/fs.py | 29 +- .../distributed/fleet/utils/http_server.py | 5 +- .../fleet/utils/hybrid_parallel_inference.py | 70 +- .../fleet/utils/hybrid_parallel_util.py | 58 +- .../fleet/utils/internal_storage.py | 20 +- .../distributed/fleet/utils/log_util.py | 7 +- .../paddle/distributed/fleet/utils/ps_util.py | 49 +- .../distributed/fleet/utils/recompute.py | 60 +- python/paddle/distributed/launch/__main__.py | 6 +- .../distributed/launch/context/__init__.py | 7 +- .../distributed/launch/context/args_envs.py | 139 +- .../distributed/launch/context/device.py | 7 +- .../distributed/launch/context/event.py | 7 +- .../paddle/distributed/launch/context/node.py | 7 +- .../distributed/launch/context/resource.py | 7 +- .../distributed/launch/context/status.py | 6 +- .../launch/controllers/collective.py | 20 +- .../launch/controllers/controller.py | 20 +- .../distributed/launch/controllers/master.py | 17 +- .../distributed/launch/controllers/ps.py | 59 +- .../distributed/launch/controllers/watcher.py | 7 +- .../distributed/launch/job/container.py | 15 +- python/paddle/distributed/launch/job/job.py | 7 +- python/paddle/distributed/launch/job/pod.py | 13 +- .../paddle/distributed/launch/job/status.py | 6 +- python/paddle/distributed/launch/main.py | 6 +- .../distributed/launch/plugins/__init__.py | 4 +- .../distributed/launch/utils/kv_client.py | 7 +- .../distributed/launch/utils/kv_server.py | 9 +- .../paddle/distributed/launch/utils/nvsmi.py | 7 +- .../launch/utils/process_context.py | 18 +- python/paddle/distributed/metric/__init__.py | 2 +- python/paddle/distributed/metric/metrics.py | 44 +- python/paddle/distributed/models/__init__.py | 6 +- .../paddle/distributed/models/moe/__init__.py | 6 +- python/paddle/distributed/models/moe/utils.py | 59 +- python/paddle/distributed/parallel.py | 67 +- .../paddle/distributed/parallel_with_gloo.py | 6 +- python/paddle/distributed/passes/__init__.py | 6 +- .../distributed/passes/auto_parallel_amp.py | 92 +- .../distributed/passes/auto_parallel_fp16.py | 54 +- .../passes/auto_parallel_gradient_merge.py | 170 +- .../passes/auto_parallel_recompute.py | 58 +- .../passes/auto_parallel_sharding.py | 117 +- python/paddle/distributed/passes/cpp_pass.py | 13 +- .../distributed/passes/fuse_all_reduce.py | 62 +- python/paddle/distributed/passes/pass_base.py | 21 +- .../paddle/distributed/passes/pass_utils.py | 8 +- .../distributed/passes/ps_server_pass.py | 23 +- .../distributed/passes/ps_trainer_pass.py | 306 +- python/paddle/distributed/ps/the_one_ps.py | 150 +- .../paddle/distributed/ps/utils/ps_factory.py | 7 +- .../distributed/ps/utils/ps_infer_utils.py | 6 +- .../ps/utils/ps_program_builder.py | 40 +- python/paddle/distributed/ps/utils/public.py | 196 +- .../paddle/distributed/sharding/__init__.py | 6 +- .../distributed/sharding/group_sharded.py | 74 +- python/paddle/distributed/spawn.py | 63 +- python/paddle/distributed/utils.py | 106 +- python/paddle/distribution/__init__.py | 15 +- python/paddle/distribution/categorical.py | 28 +- python/paddle/distribution/constraint.py | 14 +- python/paddle/distribution/dirichlet.py | 19 +- python/paddle/distribution/distribution.py | 4 +- .../paddle/distribution/exponential_family.py | 11 +- python/paddle/distribution/independent.py | 23 +- python/paddle/distribution/kl.py | 31 +- python/paddle/distribution/multinomial.py | 27 +- python/paddle/distribution/normal.py | 57 +- python/paddle/distribution/transform.py | 30 +- .../distribution/transformed_distribution.py | 4 +- python/paddle/distribution/uniform.py | 29 +- python/paddle/distribution/variable.py | 19 +- python/paddle/fft.py | 157 +- python/paddle/fluid/__init__.py | 21 +- python/paddle/fluid/average.py | 4 +- python/paddle/fluid/backward.py | 215 +- python/paddle/fluid/clip.py | 83 +- python/paddle/fluid/communicator.py | 7 +- python/paddle/fluid/compiler.py | 15 +- .../contrib/decoder/beam_search_decoder.py | 137 +- .../extend_optimizer_with_weight_decay.py | 21 +- .../paddle/fluid/contrib/layers/metric_op.py | 238 +- python/paddle/fluid/contrib/layers/nn.py | 684 ++-- .../paddle/fluid/contrib/layers/rnn_impl.py | 122 +- .../fluid/contrib/mixed_precision/amp_nn.py | 11 +- .../contrib/mixed_precision/bf16/amp_utils.py | 50 +- .../contrib/mixed_precision/bf16/decorator.py | 21 +- .../contrib/mixed_precision/decorator.py | 126 +- .../contrib/mixed_precision/fp16_utils.py | 79 +- python/paddle/fluid/contrib/model_stat.py | 4 +- python/paddle/fluid/contrib/op_frequence.py | 10 +- python/paddle/fluid/contrib/optimizer.py | 57 +- .../contrib/quantize/quantize_transpiler.py | 165 +- .../contrib/slim/quantization/adaround.py | 116 +- .../slim/quantization/cal_kl_threshold.py | 13 +- .../quantization/imperative/fuse_utils.py | 19 +- .../slim/quantization/imperative/ptq.py | 51 +- .../quantization/imperative/ptq_quantizer.py | 25 +- .../slim/quantization/imperative/qat.py | 86 +- .../slim/quantization/imperative/utils.py | 2 +- .../post_training_quantization.py | 201 +- .../quantization/quant2_int8_mkldnn_pass.py | 85 +- .../quantization/quant_int8_mkldnn_pass.py | 45 +- .../slim/quantization/quantization_pass.py | 378 +-- .../quantization/quantize_transpiler_v2.py | 209 +- .../contrib/slim/tests/convert_model2dot.py | 29 +- .../slim/tests/imperative_test_utils.py | 150 +- ...t2_int8_image_classification_comparison.py | 92 +- .../slim/tests/quant2_int8_lstm_model.py | 63 +- .../slim/tests/quant2_int8_nlp_comparison.py | 122 +- ...nt_int8_image_classification_comparison.py | 69 +- .../contrib/slim/tests/save_quant_model.py | 74 +- .../fluid/contrib/slim/tests/test_graph.py | 43 +- .../slim/tests/test_imperative_out_scale.py | 114 +- .../contrib/slim/tests/test_imperative_ptq.py | 94 +- .../contrib/slim/tests/test_imperative_qat.py | 80 +- .../slim/tests/test_imperative_qat_amp.py | 39 +- .../tests/test_imperative_qat_channelwise.py | 7 +- .../slim/tests/test_imperative_qat_fuse.py | 6 +- .../tests/test_imperative_qat_user_defined.py | 106 +- .../slim/tests/test_imperative_skip_op.py | 41 +- .../test_moving_average_abs_max_scale_op.py | 10 +- ...t_post_training_quantization_lstm_model.py | 100 +- .../test_post_training_quantization_mnist.py | 173 +- ..._post_training_quantization_mobilenetv1.py | 104 +- ...est_post_training_quantization_resnet50.py | 25 +- .../test_post_training_quantization_while.py | 94 +- .../tests/test_quant2_int8_mkldnn_pass.py | 140 +- .../tests/test_quantization_mkldnn_pass.py | 59 +- .../slim/tests/test_quantization_pass.py | 419 +-- .../tests/test_quantization_scale_pass.py | 59 +- .../slim/tests/test_quantize_transpiler_v2.py | 97 +- .../tests/test_user_defined_quantization.py | 49 +- .../test_weight_quantization_mobilenetv1.py | 20 +- .../paddle/fluid/contrib/sparsity/__init__.py | 6 +- python/paddle/fluid/contrib/sparsity/asp.py | 84 +- .../contrib/sparsity/supported_layer_list.py | 24 +- python/paddle/fluid/contrib/sparsity/utils.py | 20 +- .../fluid/contrib/tests/test_amp_list.py | 7 +- .../fluid/contrib/tests/test_bf16_utils.py | 54 +- .../fluid/contrib/tests/test_correlation.py | 102 +- .../fluid/contrib/tests/test_fp16_utils.py | 11 +- .../tests/test_image_classification_fp16.py | 117 +- .../contrib/tests/test_model_cast_to_bf16.py | 40 +- .../tests/test_multi_precision_fp16_train.py | 122 +- .../contrib/tests/test_quantize_transpiler.py | 87 +- .../contrib/tests/test_weight_decay_extend.py | 50 +- python/paddle/fluid/core.py | 19 +- python/paddle/fluid/data.py | 17 +- python/paddle/fluid/data_feed_desc.py | 8 +- python/paddle/fluid/data_feeder.py | 44 +- .../paddle/fluid/dataloader/batch_sampler.py | 8 +- .../fluid/dataloader/dataloader_iter.py | 49 +- python/paddle/fluid/dataloader/fetcher.py | 16 +- python/paddle/fluid/dataloader/sampler.py | 12 +- python/paddle/fluid/dataloader/worker.py | 13 +- python/paddle/fluid/dataset.py | 80 +- python/paddle/fluid/debugger.py | 51 +- python/paddle/fluid/device_worker.py | 70 +- python/paddle/fluid/distributed/downpour.py | 11 +- python/paddle/fluid/distributed/fleet.py | 8 +- python/paddle/fluid/distributed/ps_pb2.py | 895 +++--- python/paddle/fluid/dygraph/amp/auto_cast.py | 31 +- .../paddle/fluid/dygraph/amp/loss_scaler.py | 27 +- python/paddle/fluid/dygraph/base.py | 52 +- python/paddle/fluid/dygraph/checkpoint.py | 14 +- python/paddle/fluid/dygraph/container.py | 4 +- .../break_continue_transformer.py | 51 +- .../dygraph_to_static/cast_transformer.py | 4 +- .../dygraph_to_static/convert_call_func.py | 14 +- .../dygraph_to_static/convert_operators.py | 13 +- .../fluid/dygraph/dygraph_to_static/error.py | 27 +- .../dygraph_to_static/function_spec.py | 34 +- .../dygraph_to_static/grad_transformer.py | 4 +- .../dygraph_to_static/ifelse_transformer.py | 63 +- .../dygraph_to_static/logging_utils.py | 1 + .../dygraph_to_static/loop_transformer.py | 118 +- .../dygraph/dygraph_to_static/origin_info.py | 6 +- .../dygraph_to_static/partial_program.py | 89 +- .../dygraph_to_static/program_translator.py | 95 +- .../dygraph_to_static/return_transformer.py | 176 +- .../dygraph_to_static/static_analysis.py | 9 +- .../tensor_shape_transformer.py | 51 +- .../fluid/dygraph/dygraph_to_static/utils.py | 272 +- .../dygraph_to_static/variable_trans_func.py | 17 +- python/paddle/fluid/dygraph/inplace_utils.py | 5 +- python/paddle/fluid/dygraph/io.py | 261 +- python/paddle/fluid/dygraph/jit.py | 78 +- python/paddle/fluid/dygraph/layer_hooks.py | 14 +- .../fluid/dygraph/layer_object_helper.py | 10 +- python/paddle/fluid/dygraph/layers.py | 83 +- .../fluid/dygraph/learning_rate_scheduler.py | 35 +- python/paddle/fluid/dygraph/math_op_patch.py | 163 +- python/paddle/fluid/dygraph/nn.py | 785 ++--- python/paddle/fluid/dygraph/parallel.py | 76 +- .../paddle/fluid/dygraph/parallel_helper.py | 1 + python/paddle/fluid/dygraph/rnn.py | 65 +- python/paddle/fluid/dygraph/tracer.py | 21 +- .../fluid/dygraph/varbase_patch_methods.py | 51 +- python/paddle/fluid/entry_attr.py | 2 + python/paddle/fluid/evaluator.py | 162 +- python/paddle/fluid/executor.py | 281 +- python/paddle/fluid/framework.py | 502 +-- python/paddle/fluid/graphviz.py | 92 +- .../incubate/checkpoint/auto_checkpoint.py | 42 +- .../incubate/checkpoint/checkpoint_saver.py | 31 +- .../fluid/incubate/data_generator/__init__.py | 13 +- .../fluid/incubate/fleet/base/role_maker.py | 10 +- .../incubate/fleet/collective/__init__.py | 48 +- .../distribute_transpiler/__init__.py | 208 +- .../distributed_strategy.py | 26 +- .../parameter_server/ir/heter_trainer_pass.py | 4 +- .../fleet/parameter_server/ir/pserver_pass.py | 268 +- .../fleet/parameter_server/ir/public.py | 52 +- .../fleet/parameter_server/ir/trainer_pass.py | 425 +-- .../parameter_server/ir/vars_metatools.py | 1 + .../fleet/parameter_server/pslib/__init__.py | 38 +- .../fleet/parameter_server/pslib/node.py | 12 +- .../pslib/optimizer_factory.py | 156 +- .../fleet/parameter_server/pslib/ps_pb2.py | 1053 +++---- .../fleet/tests/ctr_dataset_reader.py | 10 +- .../incubate/fleet/tests/fleet_deep_ctr.py | 77 +- .../fluid/incubate/fleet/utils/fleet_util.py | 52 +- .../paddle/fluid/incubate/fleet/utils/hdfs.py | 13 +- .../fluid/incubate/fleet/utils/http_server.py | 5 +- .../fluid/incubate/fleet/utils/utils.py | 79 +- python/paddle/fluid/inference/wrapper.py | 5 +- python/paddle/fluid/initializer.py | 451 ++- python/paddle/fluid/input.py | 42 +- python/paddle/fluid/install_check.py | 6 +- python/paddle/fluid/io.py | 400 ++- python/paddle/fluid/ir.py | 34 +- python/paddle/fluid/layer_helper.py | 36 +- python/paddle/fluid/layer_helper_base.py | 126 +- python/paddle/fluid/layers/collective.py | 130 +- python/paddle/fluid/layers/control_flow.py | 691 +++-- python/paddle/fluid/layers/detection.py | 779 ++--- python/paddle/fluid/layers/device.py | 5 +- python/paddle/fluid/layers/distributions.py | 38 +- python/paddle/fluid/layers/io.py | 184 +- .../fluid/layers/layer_function_generator.py | 23 +- .../fluid/layers/learning_rate_scheduler.py | 78 +- python/paddle/fluid/layers/loss.py | 349 ++- python/paddle/fluid/layers/math_op_patch.py | 132 +- python/paddle/fluid/layers/metric_op.py | 125 +- python/paddle/fluid/layers/nn.py | 2601 ++++++++-------- python/paddle/fluid/layers/ops.py | 98 +- python/paddle/fluid/layers/rnn.py | 451 +-- python/paddle/fluid/layers/sequence_lod.py | 176 +- python/paddle/fluid/layers/tensor.py | 350 ++- python/paddle/fluid/layers/utils.py | 44 +- python/paddle/fluid/lod_tensor.py | 11 +- python/paddle/fluid/memory_analysis.py | 12 +- python/paddle/fluid/metrics.py | 72 +- python/paddle/fluid/multiprocess_utils.py | 5 +- python/paddle/fluid/net_drawer.py | 18 +- python/paddle/fluid/nets.py | 115 +- python/paddle/fluid/op.py | 19 +- python/paddle/fluid/optimizer.py | 1640 +++++----- python/paddle/fluid/param_attr.py | 16 +- python/paddle/fluid/profiler.py | 20 +- python/paddle/fluid/reader.py | 173 +- python/paddle/fluid/regularizer.py | 38 +- .../tests/book/notest_understand_sentiment.py | 190 +- .../fluid/tests/book/test_fit_a_line.py | 39 +- .../tests/book/test_image_classification.py | 67 +- .../tests/book/test_label_semantic_roles.py | 249 +- .../tests/book/test_machine_translation.py | 108 +- .../fluid/tests/book/test_recognize_digits.py | 80 +- .../tests/book/test_recommender_system.py | 112 +- .../tests/book/test_rnn_encoder_decoder.py | 74 +- .../fluid/tests/book/test_word2vec_book.py | 113 +- .../custom_kernel/custom_kernel_dot_setup.py | 12 +- .../custom_kernel/test_custom_kernel_dot.py | 11 +- .../custom_kernel/test_custom_kernel_load.py | 10 +- .../custom_raw_op_kernel_op_setup.py | 20 +- .../fluid/tests/custom_op/test_check_abi.py | 11 +- .../tests/custom_op/test_context_pool.py | 5 +- .../tests/custom_op/test_custom_attrs_jit.py | 26 +- .../tests/custom_op/test_custom_concat.py | 24 +- .../fluid/tests/custom_op/test_custom_conj.py | 11 +- .../tests/custom_op/test_custom_linear.py | 10 +- .../custom_op/test_custom_raw_op_kernel_op.py | 15 +- .../tests/custom_op/test_custom_relu_model.py | 61 +- .../custom_op/test_custom_relu_op_jit.py | 9 +- .../custom_op/test_custom_relu_op_setup.py | 81 +- .../custom_op/test_custom_simple_slice.py | 7 +- .../custom_op/test_custom_tanh_double_grad.py | 17 +- .../tests/custom_op/test_dispatch_jit.py | 3 +- .../tests/custom_op/test_multi_out_jit.py | 9 +- .../fluid/tests/custom_op/test_sysconfig.py | 1 + python/paddle/fluid/tests/custom_op/utils.py | 6 +- .../fluid/tests/test_beam_search_decoder.py | 93 +- python/paddle/fluid/tests/test_data_feeder.py | 21 +- python/paddle/fluid/tests/test_detection.py | 789 ++--- python/paddle/fluid/tests/test_error_clip.py | 17 +- python/paddle/fluid/tests/test_if_else_op.py | 49 +- python/paddle/fluid/tests/test_lod_tensor.py | 19 +- .../tests/test_python_operator_overriding.py | 21 +- python/paddle/fluid/tests/test_sequential.py | 1 + .../paddle/fluid/tests/unittests/__init__.py | 2 +- .../fluid/tests/unittests/ascend_group.py | 95 +- .../tests/unittests/asp/asp_pruning_base.py | 32 +- .../asp/test_asp_customized_pruning.py | 75 +- .../asp/test_asp_optimize_dynamic.py | 62 +- .../unittests/asp/test_asp_optimize_static.py | 57 +- .../unittests/asp/test_asp_pruning_dynamic.py | 34 +- .../unittests/asp/test_asp_pruning_static.py | 37 +- .../tests/unittests/asp/test_asp_save_load.py | 74 +- .../tests/unittests/asp/test_asp_utils.py | 46 +- .../asp/test_fleet_with_asp_dynamic.py | 52 +- .../asp/test_fleet_with_asp_sharding.py | 20 +- .../asp/test_fleet_with_asp_static.py | 64 +- .../tests/unittests/auto_checkpoint_utils.py | 7 +- .../auto_parallel_relaunch_model.py | 70 +- ...auto_parallel_relaunch_with_gpt_planner.py | 73 +- .../auto_parallel_relaunch_with_planner.py | 11 +- .../unittests/auto_parallel/engine_api.py | 55 +- .../auto_parallel/high_order_grad.py | 34 +- .../tests/unittests/auto_parallel/launch.py | 6 +- .../test_auto_parallel_relaunch.py | 5 +- .../unittests/auto_parallel/test_cluster.py | 1 + .../unittests/auto_parallel/test_comm_cost.py | 44 +- .../unittests/auto_parallel/test_comp_cost.py | 1 + .../unittests/auto_parallel/test_converter.py | 1 + .../auto_parallel/test_dist_context.py | 131 +- .../auto_parallel/test_dist_pnorm.py | 23 +- .../auto_parallel/test_dist_reshape.py | 12 +- .../auto_parallel/test_dist_slice.py | 23 +- .../auto_parallel/test_engine_api.py | 1 + .../auto_parallel/test_high_order_grad.py | 1 + .../auto_parallel/test_new_cost_model.py | 1 + .../auto_parallel/test_prim_dist_op.py | 60 +- .../unittests/auto_parallel/test_recorder.py | 49 +- .../test_relaunch_with_gpt_planner.py | 5 +- .../test_relaunch_with_planner.py | 5 +- .../unittests/auto_parallel/test_trial.py | 1 + .../auto_parallel/test_tunable_space.py | 7 +- .../auto_parallel/test_tunable_variable.py | 28 +- .../auto_parallel/test_while_op_completion.py | 113 +- .../auto_parallel/test_while_op_partition.py | 291 +- .../unittests/auto_parallel_autoconvert.py | 167 +- .../unittests/auto_parallel_data_unshard.py | 151 +- .../unittests/auto_parallel_gpt_model.py | 672 ++-- .../unittests/auto_parallel_parallelizer.py | 71 +- .../unittests/auto_parallel_save_load.py | 143 +- .../test_autograd_functional_dynamic.py | 381 ++- .../test_autograd_functional_static.py | 119 +- .../autograd/test_gradients_and_minimize.py | 24 +- .../autograd/test_jvp_and_transpose.py | 173 +- .../unittests/autograd/test_orig2prim.py | 97 +- .../unittests/autograd/test_prim2orig.py | 86 +- .../tests/unittests/autograd/test_primops.py | 47 +- .../unittests/autograd/test_transform.py | 50 +- .../fluid/tests/unittests/autograd/utils.py | 20 +- .../paddle/fluid/tests/unittests/benchmark.py | 23 +- .../fluid/tests/unittests/benchmark_sum_op.py | 1 + .../fluid/tests/unittests/c_comm_init_op.py | 37 +- .../tests/unittests/c_embedding_op_base.py | 11 +- .../tests/unittests/check_nan_inf_base.py | 14 +- .../unittests/check_nan_inf_base_dygraph.py | 9 +- .../unittests/collective_allgather_api.py | 6 +- .../unittests/collective_allgather_op.py | 28 +- .../unittests/collective_allreduce_api.py | 6 +- .../collective_allreduce_new_group_api.py | 11 +- .../unittests/collective_allreduce_op.py | 24 +- .../unittests/collective_allreduce_op_wait.py | 47 +- .../unittests/collective_alltoall_api.py | 6 +- .../collective_alltoall_api_dygraph.py | 1 + .../tests/unittests/collective_barrier_api.py | 1 + .../unittests/collective_broadcast_api.py | 6 +- .../unittests/collective_broadcast_op.py | 28 +- .../tests/unittests/collective_concat_op.py | 23 +- .../unittests/collective_global_gather.py | 25 +- .../collective_global_gather_dygraph.py | 4 +- .../unittests/collective_global_scatter.py | 18 +- .../collective_global_scatter_dygraph.py | 4 +- .../tests/unittests/collective_identity_op.py | 19 +- .../tests/unittests/collective_reduce_api.py | 6 +- .../tests/unittests/collective_reduce_op.py | 28 +- .../collective_reduce_op_calc_stream.py | 32 +- .../unittests/collective_reducescatter.py | 6 +- .../unittests/collective_reducescatter_op.py | 28 +- .../tests/unittests/collective_scatter_api.py | 15 +- .../tests/unittests/collective_scatter_op.py | 30 +- .../unittests/collective_sendrecv_api.py | 10 +- .../collective_sendrecv_api_dygraph.py | 1 + .../tests/unittests/collective_sendrecv_op.py | 45 +- .../unittests/collective_sendrecv_op_array.py | 37 +- .../collective_sendrecv_op_dynamic_shape.py | 49 +- .../tests/unittests/collective_split_op.py | 23 +- .../unittests/column_parallel_linear_api.py | 9 +- .../tests/unittests/ctr_dataset_reader.py | 11 +- .../fluid/tests/unittests/decorator_helper.py | 4 + .../fluid/tests/unittests/detected_gpu.py | 10 +- .../fluid/tests/unittests/detected_xpu.py | 6 +- .../tests/unittests/dist_allreduce_op.py | 14 +- .../paddle/fluid/tests/unittests/dist_ctr.py | 47 +- .../fluid/tests/unittests/dist_ctr_reader.py | 4 +- .../fluid/tests/unittests/dist_fleet_ctr.py | 105 +- .../tests/unittests/dist_fleet_ctr_ps_gpu.py | 26 +- .../tests/unittests/dist_fleet_debug_gloo.py | 1 + .../dist_fleet_heter_pipeline_ctr.py | 73 +- .../dist_fleet_raw_program_optimizer.py | 18 +- ...et_raw_program_optimizer_fuse_allreduce.py | 18 +- .../tests/unittests/dist_fleet_simnet_bow.py | 60 +- .../dist_fleet_sparse_embedding_ctr.py | 54 +- .../fluid/tests/unittests/dist_mnist.py | 23 +- .../tests/unittests/dist_mnist_batch_merge.py | 10 +- .../unittests/dist_mnist_fp16_allreduce.py | 18 +- .../unittests/dist_mnist_gradient_merge.py | 18 +- ...dist_mnist_gradient_merge_raw_optimizer.py | 20 +- .../fluid/tests/unittests/dist_mnist_lars.py | 18 +- .../fluid/tests/unittests/dist_save_load.py | 33 +- .../fluid/tests/unittests/dist_se_resnext.py | 156 +- .../tests/unittests/dist_sharding_save.py | 32 +- .../unittests/dist_text_classification.py | 41 +- .../fluid/tests/unittests/dist_transformer.py | 477 +-- .../fluid/tests/unittests/dist_word2vec.py | 18 +- .../distributed_fused_lamb_test_base.py | 59 +- .../auto_parallel_pass_test_base.py | 133 +- .../check_pass_conflict_example.py | 8 +- .../distributed_passes/dist_pass_test_base.py | 45 +- .../unittests/distributed_passes/launch.py | 6 +- .../unittests/distributed_passes/model_zoo.py | 44 +- .../distributed_passes/pass_run_main.py | 18 +- .../distributed_passes/ps_pass_test_base.py | 7 +- .../test_auto_parallel_amp_pass.py | 13 +- .../test_auto_parallel_fp16_pass.py | 7 +- .../test_auto_parallel_recompute_pass.py | 14 +- .../test_auto_parallel_sharding_pass.py | 14 +- .../test_build_cinn_pass_resnet.py | 7 +- .../test_build_cinn_pass_simple_net.py | 7 +- .../test_dist_fuse_adam_pass.py | 13 +- .../test_dist_fuse_all_reduce_pass.py | 7 +- .../test_dist_fuse_bn_act_pass.py | 13 +- .../test_dist_fuse_bn_add_act_pass.py | 13 +- .../test_dist_fuse_momentum_pass.py | 13 +- ...test_dist_fuse_relu_depthwise_conv_pass.py | 13 +- .../test_dist_fuse_sgd_pass.py | 13 +- .../test_dist_gradient_merge_pass.py | 100 +- .../test_dist_inplace_addto_pass.py | 13 +- .../distributed_passes/test_ps_server_pass.py | 1 + .../test_ps_trainer_pass.py | 1 + .../distributed_passes/test_white_lists.py | 13 +- .../unittests/distribution/parameterize.py | 14 +- .../distribution/test_dirichlet_op.py | 4 +- .../distribution/test_distribution.py | 8 +- .../distribution/test_distribution_beta.py | 5 +- .../test_distribution_beta_static.py | 78 +- .../test_distribution_categorical.py | 91 +- .../test_distribution_constraint.py | 11 +- .../test_distribution_dirichlet.py | 10 +- .../test_distribution_dirichlet_static.py | 8 +- .../test_distribution_expfamily.py | 18 +- .../test_distribution_expfamily_static.py | 7 +- .../test_distribution_independent.py | 9 +- .../test_distribution_independent_static.py | 58 +- .../test_distribution_multinomial.py | 42 +- .../test_distribution_multinomial_static.py | 46 +- .../distribution/test_distribution_normal.py | 167 +- .../test_distribution_transform.py | 501 +-- .../test_distribution_transform_static.py | 471 +-- ...t_distribution_transformed_distribution.py | 9 +- ...ibution_transformed_distribution_static.py | 18 +- .../distribution/test_distribution_uniform.py | 116 +- .../test_distribution_variable.py | 17 +- .../tests/unittests/distribution/test_kl.py | 53 +- .../unittests/distribution/test_kl_static.py | 43 +- .../tests/unittests/dygraph_fleet_api.py | 5 +- .../unittests/dygraph_group_sharded_api.py | 55 +- .../dygraph_group_sharded_api_eager.py | 55 +- .../unittests/dygraph_group_sharded_stage2.py | 104 +- .../dygraph_group_sharded_stage2_offload.py | 40 +- .../unittests/dygraph_group_sharded_stage3.py | 176 +- .../dygraph_group_sharded_stage3_offload.py | 109 +- .../dygraph_sharding_optimizer_stage2.py | 40 +- .../unittests/dygraph_sharding_stage2.py | 111 +- .../dygraph_sharding_stage2_offload.py | 40 +- .../unittests/dygraph_sharding_stage3.py | 176 +- .../dygraph_sharding_stage3_offload.py | 100 +- .../dygraph_to_static/bert_dygraph_model.py | 198 +- .../unittests/dygraph_to_static/bert_utils.py | 88 +- .../unittests/dygraph_to_static/darknet.py | 101 +- .../dygraph_to_static/ifelse_simple_func.py | 65 +- .../seq2seq_dygraph_model.py | 288 +- .../dygraph_to_static/seq2seq_utils.py | 1 + .../dygraph_to_static/simnet_dygraph_model.py | 84 +- .../simnet_dygraph_model_v2.py | 76 +- .../dygraph_to_static/test_assert.py | 31 +- .../dygraph_to_static/test_ast_util.py | 6 +- .../test_basic_api_transformation.py | 145 +- .../unittests/dygraph_to_static/test_bert.py | 63 +- .../unittests/dygraph_to_static/test_bmn.py | 241 +- .../dygraph_to_static/test_break_continue.py | 24 +- .../dygraph_to_static/test_build_strategy.py | 35 +- .../dygraph_to_static/test_cache_program.py | 15 +- .../unittests/dygraph_to_static/test_cast.py | 9 +- .../dygraph_to_static/test_container.py | 29 +- .../dygraph_to_static/test_convert_call.py | 53 +- .../test_convert_call_generator.py | 1 + .../test_convert_operators.py | 98 +- .../dygraph_to_static/test_cycle_gan.py | 286 +- .../dygraph_to_static/test_declarative.py | 70 +- .../unittests/dygraph_to_static/test_dict.py | 69 +- .../dygraph_to_static/test_drop_path.py | 2 + .../dygraph_to_static/test_fetch_feed.py | 17 +- .../dygraph_to_static/test_for_enumerate.py | 43 +- .../dygraph_to_static/test_full_name_usage.py | 3 + .../dygraph_to_static/test_function_spec.py | 10 +- .../unittests/dygraph_to_static/test_grad.py | 15 +- .../dygraph_to_static/test_grid_generator.py | 38 +- .../dygraph_to_static/test_ifelse.py | 56 +- .../dygraph_to_static/test_ifelse_basic.py | 29 +- .../dygraph_to_static/test_isinstance.py | 10 +- .../unittests/dygraph_to_static/test_lac.py | 276 +- .../dygraph_to_static/test_lambda.py | 9 +- .../dygraph_to_static/test_layer_hook.py | 20 +- .../unittests/dygraph_to_static/test_len.py | 20 +- .../unittests/dygraph_to_static/test_list.py | 39 +- .../dygraph_to_static/test_logging_utils.py | 1 + .../dygraph_to_static/test_logical.py | 29 +- .../unittests/dygraph_to_static/test_loop.py | 47 +- .../unittests/dygraph_to_static/test_lstm.py | 40 +- .../unittests/dygraph_to_static/test_mnist.py | 147 +- .../dygraph_to_static/test_mnist_amp.py | 27 +- .../dygraph_to_static/test_mnist_pure_fp16.py | 45 +- .../dygraph_to_static/test_mobile_net.py | 414 ++- .../dygraph_to_static/test_op_attr.py | 22 +- .../dygraph_to_static/test_param_guard.py | 31 +- .../dygraph_to_static/test_partial_program.py | 9 +- .../unittests/dygraph_to_static/test_print.py | 12 +- .../test_program_translator.py | 39 +- .../dygraph_to_static/test_ptb_lm.py | 81 +- .../dygraph_to_static/test_ptb_lm_v2.py | 105 +- .../test_reinforcement_learning.py | 14 +- .../dygraph_to_static/test_resnet.py | 157 +- .../dygraph_to_static/test_resnet_amp.py | 15 +- .../test_resnet_pure_fp16.py | 27 +- .../dygraph_to_static/test_resnet_v2.py | 140 +- .../dygraph_to_static/test_return.py | 28 +- .../test_save_inference_model.py | 46 +- .../dygraph_to_static/test_save_load.py | 9 +- .../dygraph_to_static/test_se_resnet.py | 250 +- .../dygraph_to_static/test_sentiment.py | 153 +- .../dygraph_to_static/test_seq2seq.py | 88 +- .../dygraph_to_static/test_simnet.py | 40 +- .../dygraph_to_static/test_simnet_v2.py | 53 +- .../unittests/dygraph_to_static/test_slice.py | 63 +- .../dygraph_to_static/test_spec_names.py | 8 +- .../dygraph_to_static/test_static_analysis.py | 4 +- .../dygraph_to_static/test_tensor_methods.py | 9 +- .../dygraph_to_static/test_tensor_shape.py | 57 +- .../dygraph_to_static/test_transformer.py | 79 +- .../unittests/dygraph_to_static/test_tsm.py | 160 +- .../dygraph_to_static/test_typing.py | 15 +- .../unittests/dygraph_to_static/test_utils.py | 4 + .../test_variable_trans_func.py | 6 +- .../dygraph_to_static/test_word2vec.py | 34 +- .../dygraph_to_static/test_yolov3.py | 31 +- .../transformer_dygraph_model.py | 181 +- .../dygraph_to_static/transformer_util.py | 58 +- .../dygraph_to_static/tsm_config_utils.py | 6 +- .../unittests/dygraph_to_static/yolov3.py | 156 +- .../fluid/tests/unittests/elastic_demo.py | 15 +- .../fluid/tests/unittests/fake_reader.py | 16 +- .../fluid/tests/unittests/feed_data_reader.py | 2 + .../fluid/tests/unittests/fft/__init__.py | 6 +- .../tests/unittests/fft/spectral_op_np.py | 10 +- .../fluid/tests/unittests/fft/test_fft.py | 903 +++--- .../fft/test_fft_with_static_graph.py | 807 +++-- .../tests/unittests/fft/test_spectral_op.py | 75 +- .../unittests/fleet_heter_ps_training.py | 37 +- .../unittests/fleet_meta_optimizer_base.py | 45 +- .../fluid/tests/unittests/gradient_checker.py | 113 +- .../fluid/tests/unittests/hccl_tools.py | 29 +- .../fluid/tests/unittests/hdfs_test_utils.py | 10 +- .../hybrid_parallel_communicate_group.py | 32 +- .../hybrid_parallel_inference_helper.py | 75 +- .../tests/unittests/hybrid_parallel_mp_amp.py | 15 +- .../unittests/hybrid_parallel_mp_clip_grad.py | 6 +- .../unittests/hybrid_parallel_mp_fp16.py | 15 +- .../unittests/hybrid_parallel_mp_layers.py | 35 +- .../unittests/hybrid_parallel_mp_model.py | 11 +- .../unittests/hybrid_parallel_mp_random.py | 1 + .../unittests/hybrid_parallel_pp_alexnet.py | 20 +- .../tests/unittests/hybrid_parallel_pp_amp.py | 31 +- .../unittests/hybrid_parallel_pp_clip_grad.py | 24 +- .../unittests/hybrid_parallel_pp_embedding.py | 28 +- .../unittests/hybrid_parallel_pp_fp16.py | 49 +- .../unittests/hybrid_parallel_pp_layer.py | 69 +- .../unittests/hybrid_parallel_pp_recompute.py | 27 +- .../unittests/hybrid_parallel_pp_save_load.py | 11 +- .../hybrid_parallel_pp_transformer.py | 26 +- .../hybrid_parallel_sharding_model.py | 43 +- .../hybrid_parallel_shared_weight.py | 40 +- .../tests/unittests/init_process_group.py | 1 + .../test_standalone_controlflow.py | 38 +- .../interpreter/test_standalone_executor.py | 86 +- .../test_standalone_multiply_write.py | 7 +- .../fluid/tests/unittests/ipu/op_test_ipu.py | 20 +- .../unittests/ipu/test_activation_x_op_ipu.py | 10 +- .../unittests/ipu/test_arg_max_op_ipu.py | 7 +- .../tests/unittests/ipu/test_assign_op_ipu.py | 18 +- .../tests/unittests/ipu/test_avg_shard_ipu.py | 30 +- .../unittests/ipu/test_batch_norm_op_ipu.py | 14 +- .../tests/unittests/ipu/test_cast_op_ipu.py | 22 +- .../tests/unittests/ipu/test_concat_op_ipu.py | 12 +- .../tests/unittests/ipu/test_conv_op_ipu.py | 15 +- .../ipu/test_cross_entropy2_op_ipu.py | 32 +- .../tests/unittests/ipu/test_cumsum_op_ipu.py | 9 +- .../unittests/ipu/test_dropout_op_ipu.py | 8 +- .../unittests/ipu/test_elemetwise_x_op_ipu.py | 18 +- .../tests/unittests/ipu/test_equal_op_ipu.py | 13 +- .../unittests/ipu/test_eval_model_ipu.py | 29 +- .../tests/unittests/ipu/test_expand_op_ipu.py | 17 +- .../ipu/test_fill_any_like_op_ipu.py | 7 +- .../ipu/test_fill_constant_op_ipu.py | 2 + .../unittests/ipu/test_flatten_op_ipu.py | 8 +- .../unittests/ipu/test_fp16_support_ipu.py | 35 +- .../tests/unittests/ipu/test_gather_op_ipu.py | 12 +- .../tests/unittests/ipu/test_gelu_op_ipu.py | 7 +- .../unittests/ipu/test_gradient_clip_ipu.py | 26 +- .../unittests/ipu/test_greater_op_ipu.py | 13 +- .../unittests/ipu/test_groupnorm_op_ipu.py | 27 +- .../ipu/test_inference_model_io_ipu.py | 37 +- .../unittests/ipu/test_instancenorm_op_ipu.py | 25 +- .../unittests/ipu/test_ipu_shard_api_ipu.py | 8 +- .../unittests/ipu/test_ipu_strategy_ipu.py | 8 +- .../unittests/ipu/test_layernorm_op_ipu.py | 33 +- .../unittests/ipu/test_log_softmax_op_ipu.py | 7 +- .../unittests/ipu/test_logical_not_op_ipu.py | 6 +- .../unittests/ipu/test_logical_x_op_ipu.py | 16 +- .../unittests/ipu/test_lookuptable_op_ipu.py | 7 +- .../ipu/test_lookuptable_v2_op_ipu.py | 7 +- .../unittests/ipu/test_lr_sheduler_ipu.py | 18 +- .../tests/unittests/ipu/test_matmul_op_ipu.py | 22 +- .../unittests/ipu/test_matmul_serilize_ipu.py | 18 +- .../unittests/ipu/test_matmul_v2_op_ipu.py | 19 +- .../tests/unittests/ipu/test_mean_op_ipu.py | 6 +- .../ipu/test_mixed_precision_inference_ipu.py | 22 +- .../ipu/test_mixed_precision_training_ipu.py | 22 +- .../unittests/ipu/test_model_parallel_ipu.py | 36 +- .../unittests/ipu/test_model_pipeline_ipu.py | 23 +- .../tests/unittests/ipu/test_mul_op_ipu.py | 13 +- .../unittests/ipu/test_not_equal_op_ipu.py | 27 +- .../unittests/ipu/test_one_hot_op_ipu.py | 7 +- .../unittests/ipu/test_one_hot_v2_op_ipu.py | 7 +- .../tests/unittests/ipu/test_optimizer_ipu.py | 43 +- .../unittests/ipu/test_pool_avg_op_ipu.py | 23 +- .../unittests/ipu/test_pool_max_op_ipu.py | 23 +- .../tests/unittests/ipu/test_pow_op_ipu.py | 17 +- .../tests/unittests/ipu/test_print_op_ipu.py | 11 +- .../unittests/ipu/test_reduce_x_op_ipu.py | 10 +- .../ipu/test_reshape_inplace_op_ipu.py | 7 +- .../unittests/ipu/test_reshape_op_ipu.py | 8 +- .../tests/unittests/ipu/test_save_load_ipu.py | 51 +- .../tests/unittests/ipu/test_scale_op_ipu.py | 21 +- .../ipu/test_scaled_optimizer_state_ipu.py | 26 +- .../unittests/ipu/test_set_batch_size_ipu.py | 34 +- .../tests/unittests/ipu/test_slice_op_ipu.py | 29 +- .../unittests/ipu/test_softmax_op_ipu.py | 7 +- .../test_softmax_with_cross_entropy_op_ipu.py | 22 +- .../tests/unittests/ipu/test_split_op_ipu.py | 7 +- .../unittests/ipu/test_squeeze_op_ipu.py | 8 +- .../tests/unittests/ipu/test_stack_op_ipu.py | 17 +- .../tests/unittests/ipu/test_sum_op_ipu.py | 27 +- .../tests/unittests/ipu/test_topk_op_ipu.py | 15 +- .../unittests/ipu/test_transpose_op_ipu.py | 8 +- .../unittests/ipu/test_unsqueeze_op_ipu.py | 8 +- .../unittests/ipu/test_varname_inplace_ipu.py | 15 +- .../unittests/ipu/test_weight_decay_ipu.py | 22 +- .../unittests/ipu/test_weight_sharing_ipu.py | 25 +- .../unittests/ir/inference/auto_scan_test.py | 180 +- .../ir/inference/inference_pass_test.py | 44 +- .../unittests/ir/inference/program_config.py | 67 +- .../ir/inference/quant_dequant_test.py | 81 +- ...ive_pool2d_convert_global_pass_autoscan.py | 67 +- .../test_conv_act_mkldnn_fuse_pass.py | 96 +- .../test_conv_bias_mkldnn_fuse_pass.py | 73 +- .../ir/inference/test_conv_bn_fuse_pass.py | 114 +- ...est_conv_elementwise_add2_act_fuse_pass.py | 110 +- ...test_conv_elementwise_add_act_fuse_pass.py | 95 +- .../test_conv_elementwise_add_fuse_pass.py | 67 +- .../test_conv_eltwiseadd_bn_fuse_pass.py | 137 +- .../test_conv_transpose_bn_fuse_pass.py | 123 +- ..._conv_transpose_eltwiseadd_bn_fuse_pass.py | 187 +- .../test_emb_eltwise_layernorm_fuse_pass.py | 219 +- ...test_fc_elementwise_layernorm_fuse_pass.py | 82 +- .../ir/inference/test_fc_fuse_pass.py | 55 +- .../ir/inference/test_fc_gru_fuse_pass.py | 48 +- .../ir/inference/test_fc_lstm_fuse_pass.py | 17 +- .../test_flatten2_matmul_fuse_pass.py | 64 +- .../test_identity_scale_clean_pass.py | 23 +- .../ir/inference/test_layer_norm_fuse_pass.py | 117 +- .../inference/test_map_matmul_to_mul_pass.py | 55 +- .../test_map_matmul_v2_to_matmul_pass.py | 43 +- .../test_map_matmul_v2_to_mul_pass.py | 53 +- .../inference/test_matmul_scale_fuse_pass.py | 57 +- .../test_matmul_v2_scale_fuse_pass.py | 57 +- .../test_mkldnn_batch_norm_act_fuse_pass.py | 68 +- .../test_mkldnn_conv3d_bias_fuse_pass.py | 8 +- .../ir/inference/test_mkldnn_conv3d_op.py | 49 +- .../test_mkldnn_conv_activation_fuse_pass.py | 24 +- ...st_mkldnn_conv_affine_channel_fuse_pass.py | 77 +- .../test_mkldnn_conv_bias_fuse_pass.py | 119 +- ...kldnn_conv_concat_relu_mkldnn_fuse_pass.py | 17 +- ...t_mkldnn_conv_elementwise_add_fuse_pass.py | 104 +- .../test_mkldnn_conv_gelu_fuse_pass.py | 13 +- ...test_mkldnn_conv_hard_sigmoid_fuse_pass.py | 17 +- .../test_mkldnn_conv_hard_swish_fuse_pass.py | 17 +- .../test_mkldnn_conv_mish_fuse_pass.py | 13 +- ...st_mkldnn_conv_transpose_bias_fuse_pass.py | 67 +- .../test_mkldnn_cpu_bfloat16_pass.py | 6 +- .../test_mkldnn_depthwise_conv_pass.py | 67 +- .../test_mkldnn_elt_act_fuse_pass.py | 45 +- .../test_mkldnn_elt_act_fuse_pass_new.py | 10 +- .../inference/test_mkldnn_fc_act_fuse_pass.py | 36 +- ...est_mkldnn_fc_elementwise_add_fuse_pass.py | 52 +- .../test_mkldnn_fc_mish_fuse_pass.py | 32 +- .../test_mkldnn_inplace_fuse_pass.py | 17 +- ...test_mkldnn_int8_scale_calculation_pass.py | 67 +- .../inference/test_mkldnn_log_softmax_op.py | 24 +- .../test_mkldnn_matmul_op_output_fuse_pass.py | 38 +- ...ldnn_matmul_transpose_reshape_fuse_pass.py | 77 +- ...n_matmul_v2_transpose_reshape_fuse_pass.py | 83 +- .../ir/inference/test_mkldnn_matmulv2_op.py | 65 +- .../ir/inference/test_mkldnn_mish_op.py | 36 +- .../ir/inference/test_mkldnn_prelu_op.py | 36 +- ...ldnn_reshape_transpose_matmul_fuse_pass.py | 7 +- ...n_reshape_transpose_matmul_v2_fuse_pass.py | 20 +- .../test_mkldnn_scale_matmul_fuse_pass.py | 1 + .../ir/inference/test_mkldnn_shape_op.py | 26 +- ...test_mkldnn_shuffle_channel_detect_pass.py | 5 +- .../test_mkldnn_shuffle_channel_op.py | 20 +- ...st_mkldnn_softplus_activation_fuse_pass.py | 16 +- .../ir/inference/test_mul_gru_fuse_pass.py | 120 +- .../ir/inference/test_mul_lstm_fuse_pass.py | 87 +- .../test_repeated_fc_relu_fuse_pass.py | 93 +- .../test_reshape2_matmul_fuse_pass.py | 72 +- .../inference/test_seq_concat_fc_fuse_pass.py | 98 +- .../test_seqconv_eltadd_relu_fuse_pass.py | 85 +- .../test_seqpool_cvm_concat_fuse_pass_py.py | 149 +- .../test_shuffle_channel_detect_pass.py | 60 +- ...t_simplify_with_basic_ops_pass_autoscan.py | 112 +- .../test_squared_mat_sub_fuse_pass.py | 172 +- .../test_squeeze2_matmul_fuse_pass.py | 72 +- ...test_transpose_flatten_concat_fuse_pass.py | 46 +- .../ir/inference/test_trt_activation_pass.py | 83 +- .../inference/test_trt_affine_channel_op.py | 20 +- .../inference/test_trt_anchor_generator_op.py | 19 +- .../ir/inference/test_trt_conv3d_op.py | 58 +- .../inference/test_trt_conv3d_transpose_op.py | 14 +- .../ir/inference/test_trt_conv_pass.py | 64 +- .../test_trt_conv_quant_dequant_pass.py | 76 +- .../inference/test_trt_convert_activation.py | 23 +- .../test_trt_convert_affine_channel.py | 33 +- .../test_trt_convert_anchor_generator.py | 26 +- .../ir/inference/test_trt_convert_arg_max.py | 11 +- .../inference/test_trt_convert_batch_norm.py | 60 +- .../ir/inference/test_trt_convert_clip.py | 33 +- .../ir/inference/test_trt_convert_concat.py | 65 +- .../ir/inference/test_trt_convert_conv2d.py | 26 +- .../test_trt_convert_conv2d_fusion.py | 32 +- .../test_trt_convert_conv2d_transpose.py | 31 +- .../test_trt_convert_deformable_conv.py | 66 +- .../test_trt_convert_depthwise_conv2d.py | 23 +- ..._trt_convert_depthwise_conv2d_transpose.py | 31 +- .../ir/inference/test_trt_convert_dropout.py | 26 +- .../inference/test_trt_convert_elementwise.py | 65 +- .../test_trt_convert_emb_eltwise_layernorm.py | 78 +- .../ir/inference/test_trt_convert_flatten.py | 74 +- ...st_trt_convert_flatten_contiguous_range.py | 23 +- .../ir/inference/test_trt_convert_gather.py | 34 +- .../inference/test_trt_convert_gather_nd.py | 46 +- .../ir/inference/test_trt_convert_gelu.py | 25 +- .../inference/test_trt_convert_group_norm.py | 22 +- .../test_trt_convert_hard_sigmoid.py | 15 +- .../inference/test_trt_convert_hard_swish.py | 24 +- .../test_trt_convert_instance_norm.py | 32 +- .../inference/test_trt_convert_layer_norm.py | 38 +- .../inference/test_trt_convert_leaky_relu.py | 21 +- .../ir/inference/test_trt_convert_matmul.py | 27 +- .../ir/inference/test_trt_convert_mish.py | 40 +- .../test_trt_convert_multiclass_nms3.py | 42 +- .../test_trt_convert_multihead_matmul.py | 109 +- .../test_trt_convert_nearest_interp.py | 34 +- .../test_trt_convert_nearest_interp_v2.py | 20 +- .../ir/inference/test_trt_convert_pad.py | 27 +- .../ir/inference/test_trt_convert_pool2d.py | 26 +- .../ir/inference/test_trt_convert_prelu.py | 48 +- .../inference/test_trt_convert_reduce_mean.py | 22 +- .../inference/test_trt_convert_reduce_sum.py | 18 +- .../ir/inference/test_trt_convert_reshape.py | 58 +- .../inference/test_trt_convert_roi_align.py | 59 +- .../ir/inference/test_trt_convert_roll.py | 17 +- .../ir/inference/test_trt_convert_scale.py | 42 +- .../test_trt_convert_shuffle_channel.py | 25 +- .../test_trt_convert_skip_layernorm.py | 45 +- .../ir/inference/test_trt_convert_slice.py | 32 +- .../ir/inference/test_trt_convert_softmax.py | 28 +- .../ir/inference/test_trt_convert_split.py | 46 +- .../ir/inference/test_trt_convert_stack.py | 32 +- .../test_trt_convert_strided_slice.py | 19 +- .../ir/inference/test_trt_convert_swish.py | 25 +- .../ir/inference/test_trt_convert_tile.py | 27 +- .../inference/test_trt_convert_transpose.py | 32 +- .../ir/inference/test_trt_convert_unary.py | 23 +- .../ir/inference/test_trt_convert_yolo_box.py | 49 +- .../test_trt_convert_yolo_box_head.py | 11 +- .../ir/inference/test_trt_deformable_conv.py | 16 +- .../ir/inference/test_trt_dynamic_shape.py | 45 +- .../ir/inference/test_trt_elementwise_op.py | 14 +- .../ir/inference/test_trt_fc_fuse_pass.py | 74 +- .../test_trt_fc_fuse_quant_dequant_pass.py | 61 +- .../test_trt_flatten2_matmul_fuse_pass.py | 62 +- .../ir/inference/test_trt_flatten_op.py | 25 +- .../ir/inference/test_trt_gather_nd_op.py | 46 +- .../ir/inference/test_trt_gather_op.py | 36 +- .../ir/inference/test_trt_group_norm_op.py | 23 +- .../ir/inference/test_trt_inspector.py | 16 +- .../ir/inference/test_trt_instance_norm_op.py | 5 +- .../unittests/ir/inference/test_trt_matmul.py | 57 +- .../test_trt_matmul_quant_dequant.py | 83 +- .../inference/test_trt_multiclass_nms3_op.py | 61 +- .../inference/test_trt_multiclass_nms_op.py | 32 +- .../inference/test_trt_nearest_interp_op.py | 32 +- .../test_trt_nearest_interp_v2_op.py | 32 +- .../unittests/ir/inference/test_trt_pad_op.py | 6 +- .../ir/inference/test_trt_pool3d_op.py | 125 +- .../ir/inference/test_trt_pool_op.py | 37 +- .../ir/inference/test_trt_reduce_mean_op.py | 86 +- .../ir/inference/test_trt_reduce_sum_op.py | 29 +- .../test_trt_reshape2_matmul_fuse_pass.py | 62 +- .../ir/inference/test_trt_reshape_op.py | 24 +- .../ir/inference/test_trt_roi_align_op.py | 17 +- .../ir/inference/test_trt_scale_op.py | 23 +- .../test_trt_shuffle_channel_detect_pass.py | 6 +- .../test_trt_slice_dynamic_plugin.py | 24 +- .../ir/inference/test_trt_slice_plugin.py | 12 +- .../test_trt_squeeze2_matmul_fuse_pass.py | 62 +- .../ir/inference/test_trt_subgraph_pass.py | 126 +- .../ir/inference/test_trt_tile_op.py | 9 +- ..._trt_transpose_flatten_concat_fuse_pass.py | 15 +- .../inference/test_trt_tuned_dynamic_shape.py | 22 +- .../ir/inference/test_trt_yolo_box_op.py | 75 +- .../test_unsqueeze2_eltwise_fuse_pass.py | 36 +- .../ir/inference/test_yolo_box_post.py | 35 +- .../fluid/tests/unittests/ir/pass_test.py | 6 +- .../unittests/ir/test_fuse_resnet_unit.py | 22 +- ...r_embedding_eltwise_layernorm_fuse_pass.py | 134 +- .../unittests/ir/test_ir_fc_fuse_pass.py | 7 +- .../unittests/ir/test_ir_fusion_group_pass.py | 26 +- .../unittests/ir/test_ir_generate_pass.py | 35 +- .../ir/test_ir_graph_to_program_pass.py | 14 +- .../ir/test_ir_skip_layernorm_pass.py | 13 +- .../ir/test_ir_subgraph_python_interface.py | 15 +- .../unittests/ir/test_ir_yolo_box_pass.py | 27 +- .../unittests/ir_memory_optimize_net_base.py | 36 +- .../tests/unittests/launch_function_helper.py | 1 + .../mkldnn/check_flags_mkldnn_ops_on_off.py | 1 + .../tests/unittests/mkldnn/mkldnn_op_test.py | 40 +- .../mkldnn/test_activation_bf16_mkldnn_op.py | 21 +- .../mkldnn/test_activation_mkldnn_op.py | 34 +- .../mkldnn/test_batch_norm_mkldnn_op.py | 16 +- .../mkldnn/test_bilinear_interp_mkldnn_op.py | 16 +- .../test_bilinear_interp_v2_mkldnn_op.py | 15 +- .../unittests/mkldnn/test_cast_mkldnn_op.py | 7 +- .../unittests/mkldnn/test_clip_mkldnn_op.py | 6 + .../mkldnn/test_concat_bf16_mkldnn_op.py | 8 +- .../mkldnn/test_concat_int8_mkldnn_op.py | 10 +- .../unittests/mkldnn/test_concat_mkldnn_op.py | 8 +- .../mkldnn/test_conv2d_bf16_mkldnn_op.py | 34 +- .../mkldnn/test_conv2d_int8_mkldnn_op.py | 48 +- .../unittests/mkldnn/test_conv2d_mkldnn_op.py | 16 + .../test_conv2d_transpose_bf16_mkldnn_op.py | 9 + .../mkldnn/test_conv2d_transpose_mkldnn_op.py | 11 + .../unittests/mkldnn/test_conv3d_mkldnn_op.py | 9 + .../mkldnn/test_dequantize_mkldnn_op.py | 18 +- .../test_elementwise_add_bf16_mkldnn_op.py | 44 +- .../mkldnn/test_elementwise_add_mkldnn_op.py | 14 +- .../mkldnn/test_elementwise_div_mkldnn_op.py | 38 +- .../test_elementwise_mul_bf16_mkldnn_op.py | 24 +- .../mkldnn/test_elementwise_mul_mkldnn_op.py | 12 +- .../mkldnn/test_elementwise_sub_mkldnn_op.py | 42 +- .../mkldnn/test_expand_v2_mkldnn_op.py | 9 + .../mkldnn/test_fc_bf16_mkldnn_op.py | 3 + .../unittests/mkldnn/test_fc_mkldnn_op.py | 8 +- .../mkldnn/test_fill_constant_mkldnn_op.py | 15 +- .../mkldnn/test_flags_mkldnn_ops_on_off.py | 10 +- .../unittests/mkldnn/test_flags_use_mkldnn.py | 10 +- .../mkldnn/test_flatten_mkldnn_op.py | 22 +- .../mkldnn/test_fusion_gru_bf16_mkldnn_op.py | 13 +- .../mkldnn/test_fusion_gru_int8_mkldnn_op.py | 28 +- .../mkldnn/test_fusion_gru_mkldnn_op.py | 9 + .../mkldnn/test_fusion_lstm_bf16_mkldnn_op.py | 10 +- .../mkldnn/test_fusion_lstm_int8_mkldnn_op.py | 23 +- .../mkldnn/test_fusion_lstm_mkldnn_op.py | 8 + .../mkldnn/test_gaussian_random_mkldnn_op.py | 2 + .../mkldnn/test_layer_norm_bf16_mkldnn_op.py | 23 +- .../mkldnn/test_layer_norm_mkldnn_op.py | 16 +- .../mkldnn/test_log_softmax_mkldnn_op.py | 12 +- .../unittests/mkldnn/test_lrn_mkldnn_op.py | 21 +- .../mkldnn/test_matmul_bf16_mkldnn_op.py | 14 +- .../unittests/mkldnn/test_matmul_mkldnn_op.py | 59 +- .../mkldnn/test_matmul_v2_mkldnn_op.py | 55 +- .../mkldnn/test_mul_int8_mkldnn_op.py | 12 +- .../unittests/mkldnn/test_mul_mkldnn_op.py | 10 +- .../mkldnn/test_multi_gru_mkldnn_op.py | 44 +- .../mkldnn/test_nearest_interp_mkldnn_op.py | 17 +- .../test_nearest_interp_v2_mkldnn_op.py | 18 +- .../mkldnn/test_pool2d_bf16_mkldnn_op.py | 59 +- .../mkldnn/test_pool2d_int8_mkldnn_op.py | 27 +- .../unittests/mkldnn/test_pool2d_mkldnn_op.py | 21 + .../unittests/mkldnn/test_prelu_mkldnn_op.py | 8 + .../mkldnn/test_quantize_mkldnn_op.py | 27 +- .../mkldnn/test_reduce_bf16_mkldnn_op.py | 16 +- .../unittests/mkldnn/test_reduce_mkldnn_op.py | 19 +- .../mkldnn/test_requantize_mkldnn_op.py | 62 +- .../unittests/mkldnn/test_reshape_bf16_op.py | 17 +- .../mkldnn/test_reshape_mkldnn_op.py | 27 +- .../mkldnn/test_scale_bf16_mkldnn_op.py | 4 + .../unittests/mkldnn/test_scale_mkldnn_op.py | 4 + .../unittests/mkldnn/test_shape_mkldnn_op.py | 4 + .../mkldnn/test_shuffle_channel_mkldnn_op.py | 3 + .../unittests/mkldnn/test_slice_mkldnn_op.py | 18 +- .../mkldnn/test_softmax_bf16_mkldnn_op.py | 6 + .../mkldnn/test_softmax_mkldnn_op.py | 27 +- .../mkldnn/test_softplus_mkldnn_op.py | 9 + .../mkldnn/test_split_bf16_mkldnn_op.py | 5 + .../unittests/mkldnn/test_split_mkldnn_op.py | 5 + .../mkldnn/test_squeeze2_mkldnn_op.py | 20 +- .../unittests/mkldnn/test_stack_mkldnn_op.py | 8 + .../mkldnn/test_sum_bf16_mkldnn_op.py | 1 + .../unittests/mkldnn/test_sum_mkldnn_op.py | 11 +- .../mkldnn/test_transpose_bf16_mkldnn_op.py | 2 + .../mkldnn/test_transpose_int8_mkldnn_op.py | 17 +- .../mkldnn/test_transpose_mkldnn_op.py | 7 + .../tests/unittests/mlu/c_comm_init_op_mlu.py | 37 +- .../unittests/mlu/collective_allgather_api.py | 6 +- .../unittests/mlu/collective_allgather_op.py | 28 +- .../unittests/mlu/collective_allreduce_api.py | 6 +- .../unittests/mlu/collective_allreduce_op.py | 24 +- .../unittests/mlu/collective_broadcast_api.py | 6 +- .../unittests/mlu/collective_broadcast_op.py | 28 +- .../unittests/mlu/collective_reduce_api.py | 6 +- .../unittests/mlu/collective_reduce_op.py | 28 +- .../tests/unittests/mlu/multi_process_mlu.py | 2 +- .../tests/unittests/mlu/test_abs_op_mlu.py | 11 +- .../unittests/mlu/test_accuracy_op_mlu.py | 31 +- .../tests/unittests/mlu/test_adam_op_mlu.py | 24 +- .../tests/unittests/mlu/test_adamw_op_mlu.py | 23 +- .../test_amp_check_finite_and_scale_op_mlu.py | 15 +- .../tests/unittests/mlu/test_assign_op_mlu.py | 2 + .../unittests/mlu/test_assign_value_op_mlu.py | 9 +- .../unittests/mlu/test_batch_norm_op_mlu.py | 74 +- .../mlu/test_batch_norm_op_mlu_v2.py | 18 +- .../tests/unittests/mlu/test_cast_op_mlu.py | 11 +- .../mlu/test_coalesce_tensor_op_mlu.py | 17 +- .../mlu/test_collective_allgather.py | 1 + .../mlu/test_collective_allgather_api_mlu.py | 1 + .../mlu/test_collective_allreduce_api_mlu.py | 1 + .../mlu/test_collective_allreduce_max.py | 1 + .../mlu/test_collective_allreduce_min.py | 1 + .../mlu/test_collective_allreduce_prod.py | 1 + .../mlu/test_collective_allreduce_sum.py | 1 + .../mlu/test_collective_api_base_mlu.py | 35 +- .../unittests/mlu/test_collective_base_mlu.py | 104 +- .../mlu/test_collective_broadcast.py | 1 + .../mlu/test_collective_broadcast_api_mlu.py | 1 + .../mlu/test_collective_reduce_api_mlu.py | 1 + .../mlu/test_collective_reduce_max.py | 1 + .../mlu/test_collective_reduce_min.py | 1 + .../mlu/test_collective_reduce_prod.py | 1 + .../mlu/test_collective_reduce_sum.py | 1 + .../unittests/mlu/test_compare_op_mlu.py | 41 +- .../tests/unittests/mlu/test_concat_op_mlu.py | 26 +- .../tests/unittests/mlu/test_conv2d_op_mlu.py | 109 +- .../tests/unittests/mlu/test_cumsum_op_mlu.py | 67 +- .../unittests/mlu/test_dropout_op_mlu.py | 66 +- .../mlu/test_elementwise_add_op_mlu.py | 76 +- .../mlu/test_elementwise_div_op_mlu.py | 49 +- .../mlu/test_elementwise_mul_op_mlu.py | 33 +- .../mlu/test_elementwise_sub_op_mlu.py | 30 +- .../mlu/test_fill_any_like_op_mlu.py | 9 + .../mlu/test_fill_constant_op_mlu.py | 214 +- .../unittests/mlu/test_flatten2_op_mlu.py | 18 +- .../test_flatten_contigous_range_op_mlu.py | 42 +- .../unittests/mlu/test_flatten_op_mlu.py | 6 + .../tests/unittests/mlu/test_gather_op_mlu.py | 16 +- .../mlu/test_gaussian_random_op_mlu.py | 9 +- .../tests/unittests/mlu/test_gelu_op_mlu.py | 27 +- .../unittests/mlu/test_layer_norm_op_mlu.py | 148 +- .../unittests/mlu/test_leaky_relu_op_mlu.py | 22 +- .../unittests/mlu/test_log_softmax_op_mlu.py | 9 +- .../mlu/test_lookup_table_v2_op_mlu.py | 15 +- .../tests/unittests/mlu/test_matmul_op_mlu.py | 10 +- .../unittests/mlu/test_matmul_v2_op_mlu.py | 17 +- .../tests/unittests/mlu/test_mean_op_mlu.py | 3 + .../mlu/test_merged_momentum_op_mlu.py | 157 +- .../unittests/mlu/test_momentum_op_mlu.py | 113 +- .../unittests/mlu/test_one_hot_v2_op_mlu.py | 43 +- .../tests/unittests/mlu/test_pool2d_op_mlu.py | 553 ++-- .../unittests/mlu/test_reduce_max_op_mlu.py | 10 +- .../unittests/mlu/test_reduce_mean_op_mlu.py | 25 +- .../unittests/mlu/test_reduce_min_op_mlu.py | 10 +- .../unittests/mlu/test_reduce_sum_op_mlu.py | 20 +- .../tests/unittests/mlu/test_relu6_op_mlu.py | 23 +- .../tests/unittests/mlu/test_relu_op_mlu.py | 23 +- .../unittests/mlu/test_reshape2_op_mlu.py | 4 + .../tests/unittests/mlu/test_scale_op_mlu.py | 12 + .../unittests/mlu/test_sigmoid_op_mlu.py | 7 +- .../tests/unittests/mlu/test_slice_op_mlu.py | 118 +- .../unittests/mlu/test_softmax_op_mlu.py | 28 +- .../test_softmax_with_cross_entropy_op_mlu.py | 30 +- .../tests/unittests/mlu/test_spawn_mlu.py | 8 +- .../tests/unittests/mlu/test_split_op_mlu.py | 15 +- .../unittests/mlu/test_squeeze2_op_mlu.py | 5 + .../unittests/mlu/test_squeeze_op_mlu.py | 13 +- .../tests/unittests/mlu/test_sum_op_mlu.py | 6 +- .../tests/unittests/mlu/test_tanh_op_mlu.py | 22 +- .../tests/unittests/mlu/test_top_k_op_mlu.py | 3 + .../unittests/mlu/test_top_k_v2_op_mlu.py | 93 +- .../unittests/mlu/test_transpose_op_mlu.py | 29 +- .../mlu/test_uniform_random_op_mlu.py | 26 +- .../unittests/mlu/test_unsqueeze2_op_mlu.py | 16 + .../unittests/mlu/test_unsqueeze_op_mlu.py | 6 + .../unittests/mlu/test_unstack_op_mlu.py | 6 + .../fluid/tests/unittests/multi_process.py | 2 +- .../tests/unittests/my_data_generator.py | 2 + .../paddle/fluid/tests/unittests/new_group.py | 26 +- .../npu/collective_identity_op_npu.py | 19 +- .../tests/unittests/npu/process_group_hccl.py | 2 + .../unittests/npu/sync_batch_norm_op_npu.py | 11 +- .../tests/unittests/npu/test_abs_op_npu.py | 8 +- .../unittests/npu/test_accuracy_op_npu.py | 5 + .../tests/unittests/npu/test_adam_op_npu.py | 72 +- .../tests/unittests/npu/test_adamw_op_npu.py | 23 +- .../test_amp_check_finite_and_scale_op_npu.py | 44 +- .../unittests/npu/test_arg_max_op_npu.py | 57 +- .../unittests/npu/test_arg_min_op_npu.py | 43 +- .../unittests/npu/test_argsort_op_npu.py | 57 +- .../tests/unittests/npu/test_assign_op_npu.py | 2 + .../unittests/npu/test_assign_value_op_npu.py | 34 +- .../tests/unittests/npu/test_atan_op_npu.py | 4 + .../unittests/npu/test_batch_norm_op_npu.py | 49 +- .../tests/unittests/npu/test_bce_loss_npu.py | 141 +- .../npu/test_beam_search_decode_op_npu.py | 36 +- .../unittests/npu/test_beam_search_op_npu.py | 85 +- .../npu/test_bilinear_interp_v2_op_npu.py | 19 + .../unittests/npu/test_box_coder_op_npu.py | 20 +- .../unittests/npu/test_c_embedding_op_npu.py | 1 + .../unittests/npu/test_c_identity_npu.py | 6 +- .../tests/unittests/npu/test_cast_op_npu.py | 4 + .../unittests/npu/test_clip_by_norm_op_npu.py | 13 +- .../tests/unittests/npu/test_clip_op_npu.py | 22 +- .../npu/test_coalesce_tensor_op_npu.py | 9 +- .../unittests/npu/test_collective_base_npu.py | 78 +- .../npu/test_collective_process_group_hccl.py | 2 + .../unittests/npu/test_compare_op_npu.py | 41 +- .../tests/unittests/npu/test_concat_op_npu.py | 27 +- .../npu/test_conv2d_op_depthwise_conv_npu.py | 135 +- .../tests/unittests/npu/test_conv2d_op_npu.py | 134 +- .../npu/test_conv2d_transpose_op_npu.py | 181 +- .../tests/unittests/npu/test_conv3d_op_npu.py | 388 ++- .../tests/unittests/npu/test_cos_op_npu.py | 22 +- .../tests/unittests/npu/test_crop_op_npu.py | 12 + .../tests/unittests/npu/test_cumsum_op_npu.py | 79 +- .../npu/test_density_prior_box_op_npu.py | 11 +- .../unittests/npu/test_dropout_op_npu.py | 66 +- .../npu/test_elementwise_add_op_npu.py | 97 +- .../npu/test_elementwise_div_op_npu.py | 21 +- .../npu/test_elementwise_floordiv_op_npu.py | 3 + .../npu/test_elementwise_max_op_npu.py | 85 +- .../npu/test_elementwise_min_op_npu.py | 68 +- .../npu/test_elementwise_mod_op_npu.py | 11 + .../npu/test_elementwise_mul_op_npu.py | 24 +- .../npu/test_elementwise_pow_op_npu.py | 118 +- .../npu/test_elementwise_sub_op_npu.py | 49 +- .../tests/unittests/npu/test_exp_op_npu.py | 8 +- .../unittests/npu/test_expand_as_v2_op_npu.py | 27 +- .../tests/unittests/npu/test_expand_op_npu.py | 16 +- .../unittests/npu/test_expand_v2_op_npu.py | 49 +- .../tests/unittests/npu/test_eye_op_npu.py | 9 + .../npu/test_fill_any_like_op_npu.py | 9 + ...st_fill_constant_batch_size_like_op_npu.py | 10 + .../npu/test_fill_constant_op_npu.py | 7 + .../npu/test_fill_zeros_like_op_npu.py | 7 + .../npu/test_flags_check_nan_inf_npu.py | 2 + .../unittests/npu/test_flatten2_op_npu.py | 6 + .../test_flatten_contiguous_range_op_npu.py | 42 +- .../unittests/npu/test_float_status_op_npu.py | 3 + .../unittests/npu/test_gather_nd_op_npu.py | 22 +- .../tests/unittests/npu/test_gather_op_npu.py | 31 +- .../npu/test_gaussian_random_op_npu.py | 8 +- .../tests/unittests/npu/test_gelu_op_npu.py | 27 +- .../unittests/npu/test_group_norm_op_npu.py | 32 +- .../unittests/npu/test_hard_sigmoid_op_npu.py | 14 +- .../unittests/npu/test_hard_swish_op_npu.py | 19 +- .../unittests/npu/test_huber_loss_op_npu.py | 25 +- .../unittests/npu/test_increment_op_npu.py | 9 +- .../unittests/npu/test_index_sample_op_npu.py | 37 +- .../unittests/npu/test_index_select_op_npu.py | 27 +- .../npu/test_iou_similarity_op_npu.py | 5 + .../unittests/npu/test_is_empty_op_npu.py | 15 +- .../unittests/npu/test_kldiv_loss_op_npu.py | 38 +- .../unittests/npu/test_label_smooth_op_npu.py | 14 +- .../unittests/npu/test_layer_norm_op_npu.py | 48 +- .../unittests/npu/test_leaky_relu_op_npu.py | 22 +- .../unittests/npu/test_log_loss_op_npu.py | 3 + .../tests/unittests/npu/test_log_op_npu.py | 22 +- .../unittests/npu/test_log_softmax_op_npu.py | 25 +- .../unittests/npu/test_logical_op_npu.py | 26 +- .../npu/test_lookup_table_v2_op_npu.py | 15 +- .../npu/test_masked_select_op_npu.py | 25 +- .../tests/unittests/npu/test_matmul_op_npu.py | 12 +- .../unittests/npu/test_matmulv2_op_npu.py | 17 +- .../tests/unittests/npu/test_mean_op_npu.py | 3 + .../tests/unittests/npu/test_memcpy_op_npu.py | 76 +- .../npu/test_merged_momentum_op_npu.py | 157 +- .../unittests/npu/test_meshgrid_op_npu.py | 78 +- .../unittests/npu/test_mixed_precision_npu.py | 2 + .../unittests/npu/test_momentum_op_npu.py | 65 +- .../tests/unittests/npu/test_mul_op_npu.py | 46 +- .../unittests/npu/test_multinomial_op_npu.py | 30 +- .../npu/test_nearest_interp_op_npu.py | 85 +- .../npu/test_nearest_interp_v2_op_npu.py | 60 +- .../tests/unittests/npu/test_norm_op_npu.py | 13 +- .../tests/unittests/npu/test_npu_place.py | 2 + .../unittests/npu/test_one_hot_op_npu.py | 7 + .../unittests/npu/test_one_hot_v2_op_npu.py | 13 +- .../tests/unittests/npu/test_p_norm_op_npu.py | 17 +- .../tests/unittests/npu/test_pad3d_op_npu.py | 160 +- .../tests/unittests/npu/test_pad_op_npu.py | 26 +- .../npu/test_parallel_dygraph_mnist_npu.py | 15 +- .../tests/unittests/npu/test_pool2d_op_npu.py | 117 +- .../tests/unittests/npu/test_pow_op_npu.py | 22 +- .../unittests/npu/test_prior_box_op_npu.py | 51 +- .../unittests/npu/test_randperm_op_npu.py | 16 +- .../tests/unittests/npu/test_range_npu.py | 14 +- .../unittests/npu/test_reciprocal_op_npu.py | 16 +- .../unittests/npu/test_reduce_any_op_npu.py | 14 +- .../unittests/npu/test_reduce_max_op_npu.py | 25 +- .../unittests/npu/test_reduce_mean_op_npu.py | 25 +- .../unittests/npu/test_reduce_min_op_npu.py | 25 +- .../unittests/npu/test_reduce_prod_op_npu.py | 49 +- .../unittests/npu/test_reduce_sum_op_npu.py | 35 +- .../tests/unittests/npu/test_relu6_op_npu.py | 23 +- .../tests/unittests/npu/test_relu_op_npu.py | 28 +- .../unittests/npu/test_reshape2_op_npu.py | 4 + .../unittests/npu/test_rmsprop_op_npu.py | 39 +- .../unittests/npu/test_roi_align_op_npu.py | 7 +- .../unittests/npu/test_sampling_id_op_npu.py | 5 +- .../tests/unittests/npu/test_save_load_npu.py | 41 +- .../tests/unittests/npu/test_scale_op_npu.py | 16 +- .../unittests/npu/test_scatter_op_npu.py | 6 + .../tests/unittests/npu/test_seed_op_npu.py | 3 + .../unittests/npu/test_sequence_mask_npu.py | 43 +- .../unittests/npu/test_set_value_op_npu.py | 76 +- .../tests/unittests/npu/test_sgd_op_npu.py | 21 +- .../tests/unittests/npu/test_shape_op_npu.py | 6 + .../unittests/npu/test_shard_index_op.py | 6 + ...igmoid_cross_entropy_with_logits_op_npu.py | 62 +- .../unittests/npu/test_sigmoid_op_npu.py | 7 +- .../tests/unittests/npu/test_sin_op_npu.py | 8 +- .../tests/unittests/npu/test_size_op_npu.py | 19 +- .../tests/unittests/npu/test_slice_op_npu.py | 76 +- .../npu/test_smooth_l1_loss_op_npu.py | 61 +- .../unittests/npu/test_softmax_op_npu.py | 21 +- .../test_softmax_with_cross_entropy_op_npu.py | 30 +- .../tests/unittests/npu/test_split_op_npu.py | 13 +- .../tests/unittests/npu/test_sqrt_op_npu.py | 22 +- .../tests/unittests/npu/test_square_op_npu.py | 22 +- .../npu/test_squared_l2_norm_op_npu.py | 8 +- .../unittests/npu/test_squeeze_op_npu.py | 36 +- .../tests/unittests/npu/test_stack_op_npu.py | 33 +- .../npu/test_strided_slice_op_npu.py | 152 +- .../tests/unittests/npu/test_sum_op_npu.py | 6 +- .../tests/unittests/npu/test_swish_op_npu.py | 12 +- .../npu/test_sync_batch_norm_base_npu.py | 87 +- .../test_sync_batch_norm_op_npu_baseline.py | 7 +- .../npu/test_sync_batch_norm_op_npu_extra.py | 22 +- .../npu/test_take_along_axis_op_npu.py | 19 +- .../tests/unittests/npu/test_tanh_op_npu.py | 22 +- .../tests/unittests/npu/test_tile_op_npu.py | 23 +- .../tests/unittests/npu/test_top_k_op_npu.py | 18 +- .../unittests/npu/test_top_k_v2_op_npu.py | 84 +- .../unittests/npu/test_transpose_op_npu.py | 14 + .../unittests/npu/test_tril_triu_op_npu.py | 20 +- .../test_truncated_gaussian_random_op_npu.py | 12 +- .../npu/test_uniform_random_op_npu.py | 26 +- .../unittests/npu/test_unsqueeze_op_npu.py | 19 +- .../unittests/npu/test_unstack_op_npu.py | 6 + .../test_update_loss_scaling_min_op_npu.py | 2 + .../npu/test_update_loss_scaling_op_npu.py | 76 +- .../unittests/npu/test_where_index_npu.py | 38 +- .../tests/unittests/npu/test_where_op_npu.py | 40 +- .../tests/unittests/npu/test_while_op_npu.py | 27 +- .../paddle/fluid/tests/unittests/op_test.py | 465 +-- .../fluid/tests/unittests/op_test_xpu.py | 25 +- .../unittests/parallel_class_center_sample.py | 16 +- ...parallel_dygraph_control_flow_different.py | 34 +- .../parallel_dygraph_control_flow_same.py | 20 +- ...allel_dygraph_dataparallel_with_pylayer.py | 4 + .../parallel_dygraph_gradient_check.py | 18 +- ...el_dygraph_gradient_check_in_eager_mode.py | 28 +- .../tests/unittests/parallel_dygraph_mnist.py | 81 +- .../unittests/parallel_dygraph_no_sync.py | 8 +- .../parallel_dygraph_no_sync_control_flow.py | 8 +- ...parallel_dygraph_no_sync_gradient_check.py | 18 +- .../parallel_dygraph_no_sync_unused_params.py | 8 +- .../unittests/parallel_dygraph_none_var.py | 20 +- .../unittests/parallel_dygraph_se_resnext.py | 180 +- .../parallel_dygraph_shared_unused_var.py | 8 +- .../parallel_dygraph_sparse_embedding.py | 26 +- .../parallel_dygraph_sparse_embedding_fp64.py | 26 +- ...el_dygraph_sparse_embedding_over_height.py | 19 +- .../parallel_dygraph_sync_batch_norm.py | 48 +- .../unittests/parallel_dygraph_transformer.py | 327 +- .../parallel_dygraph_unused_variables.py | 25 +- .../tests/unittests/parallel_embedding_api.py | 17 +- .../unittests/parallel_executor_test_base.py | 18 +- .../parallel_margin_cross_entropy.py | 84 +- .../fluid/tests/unittests/pipeline_mnist.py | 31 +- .../unittests/pipeline_mnist_multi_device.py | 31 +- .../unittests/pipeline_mnist_one_device.py | 23 +- .../tests/unittests/process_group_gloo.py | 1 + .../tests/unittests/process_group_nccl.py | 86 +- .../fluid/tests/unittests/ps/__init__.py | 2 +- .../tests/unittests/ps/dataset_generator_A.py | 1 + .../tests/unittests/ps/dataset_generator_B.py | 1 + .../fluid/tests/unittests/ps/fl_ps_trainer.py | 27 +- .../tests/unittests/ps/ps_dnn_trainer.py | 95 +- .../fluid/tests/unittests/ps/test_fl_ps.py | 7 +- .../tests/unittests/ps/test_the_one_ps.py | 1 + .../fluid/tests/unittests/ps_dnn_model.py | 51 +- .../tests/unittests/py_precise_roi_pool.py | 36 +- .../fluid/tests/unittests/rnn/convert.py | 7 +- .../fluid/tests/unittests/rnn/rnn_numpy.py | 74 +- .../tests/unittests/rnn/test_rnn_cells.py | 31 +- .../unittests/rnn/test_rnn_cells_static.py | 22 +- .../rnn/test_rnn_cudnn_params_packing.py | 19 +- .../tests/unittests/rnn/test_rnn_nets.py | 52 +- .../unittests/rnn/test_rnn_nets_static.py | 41 +- .../tests/unittests/rnn/test_wrappers.py | 3 + .../unittests/row_parallel_linear_api.py | 9 +- .../sequence/test_sequence_concat.py | 8 + .../unittests/sequence/test_sequence_conv.py | 79 +- .../sequence/test_sequence_enumerate_op.py | 7 + .../sequence/test_sequence_erase_op.py | 6 + .../sequence/test_sequence_expand.py | 8 + .../sequence/test_sequence_expand_as.py | 6 + .../sequence/test_sequence_first_step.py | 13 +- .../sequence/test_sequence_last_step.py | 13 +- .../unittests/sequence/test_sequence_mask.py | 43 +- .../sequence/test_sequence_pad_op.py | 35 +- .../unittests/sequence/test_sequence_pool.py | 56 +- .../sequence/test_sequence_reshape.py | 19 +- .../sequence/test_sequence_reverse.py | 20 +- .../sequence/test_sequence_scatter_op.py | 8 + .../sequence/test_sequence_slice_op.py | 5 + .../sequence/test_sequence_softmax_op.py | 10 +- .../test_sequence_topk_avg_pooling.py | 36 +- .../sequence/test_sequence_unpad_op.py | 8 + .../fluid/tests/unittests/seresnext_net.py | 118 +- .../tests/unittests/seresnext_test_base.py | 11 +- .../tests/unittests/simnet_dataset_reader.py | 1 + .../fluid/tests/unittests/simple_nets.py | 19 +- .../tests/unittests/spawn_runner_base.py | 21 +- .../unittests/static_model_parallel_by_col.py | 25 +- .../unittests/static_model_parallel_by_row.py | 25 +- .../static_model_parallel_embedding.py | 21 +- .../static_model_parallel_fused_attention.py | 96 +- ...static_model_parallel_fused_feedforward.py | 246 +- ..._model_parallel_fused_multi_transformer.py | 18 +- .../fluid/tests/unittests/test_Tensor_type.py | 1 + .../fluid/tests/unittests/test_accuracy_op.py | 30 +- .../unittests/test_activation_nn_grad.py | 75 +- .../tests/unittests/test_activation_op.py | 452 ++- .../unittests/test_activation_sparse_op.py | 12 +- .../fluid/tests/unittests/test_adadelta_op.py | 57 +- .../fluid/tests/unittests/test_adagrad_op.py | 67 +- .../tests/unittests/test_adagrad_op_v2.py | 26 +- .../fluid/tests/unittests/test_adam_op.py | 298 +- .../test_adam_optimizer_fp32_fp64.py | 6 +- .../fluid/tests/unittests/test_adamax_api.py | 46 +- .../fluid/tests/unittests/test_adamax_op.py | 26 +- .../fluid/tests/unittests/test_adamw_op.py | 226 +- .../unittests/test_adaptive_avg_pool1d.py | 32 +- .../unittests/test_adaptive_avg_pool2d.py | 127 +- .../unittests/test_adaptive_avg_pool3d.py | 103 +- .../unittests/test_adaptive_max_pool1d.py | 29 +- .../unittests/test_adaptive_max_pool2d.py | 97 +- .../unittests/test_adaptive_max_pool3d.py | 99 +- .../test_add_position_encoding_op.py | 25 +- .../unittests/test_add_reader_dependency.py | 20 +- .../fluid/tests/unittests/test_addmm_op.py | 222 +- .../tests/unittests/test_affine_channel_op.py | 19 +- .../unittests/test_affine_grid_function.py | 43 +- .../tests/unittests/test_affine_grid_op.py | 22 +- .../tests/unittests/test_allclose_layer.py | 134 +- .../fluid/tests/unittests/test_allclose_op.py | 26 +- .../fluid/tests/unittests/test_allgather.py | 1 + .../fluid/tests/unittests/test_allreduce.py | 1 + .../test_amp_check_finite_and_scale_op.py | 7 +- .../unittests/test_anchor_generator_op.py | 10 +- .../fluid/tests/unittests/test_angle_op.py | 34 +- .../unittests/test_apply_pass_to_program.py | 26 +- .../fluid/tests/unittests/test_arange.py | 21 +- .../tests/unittests/test_arg_min_max_op.py | 46 +- .../tests/unittests/test_arg_min_max_v2_op.py | 91 +- .../fluid/tests/unittests/test_argsort_op.py | 88 +- .../unittests/test_array_read_write_op.py | 69 +- .../tests/unittests/test_ascend_trigger.py | 9 +- .../fluid/tests/unittests/test_assert_op.py | 22 +- .../fluid/tests/unittests/test_assign_op.py | 32 +- .../tests/unittests/test_assign_pos_op.py | 14 +- .../tests/unittests/test_assign_value_op.py | 33 +- .../test_async_ssa_graph_executor_mnist.py | 64 +- .../fluid/tests/unittests/test_atan2_op.py | 20 +- .../tests/unittests/test_attention_lstm_op.py | 13 +- .../fluid/tests/unittests/test_auc_op.py | 16 +- .../unittests/test_auc_single_pred_op.py | 10 +- .../tests/unittests/test_auto_checkpoint.py | 2 + .../tests/unittests/test_auto_checkpoint1.py | 1 + .../tests/unittests/test_auto_checkpoint2.py | 1 + .../tests/unittests/test_auto_checkpoint3.py | 1 + .../test_auto_checkpoint_dist_basic.py | 1 + .../test_auto_checkpoint_multiple.py | 1 + .../test_auto_growth_gpu_memory_limit.py | 4 +- .../tests/unittests/test_auto_parallel_api.py | 42 +- .../test_auto_parallel_autoconvert.py | 1 + .../unittests/test_auto_parallel_cluster.py | 1 + .../test_auto_parallel_completion.py | 610 ++-- .../test_auto_parallel_completion_gpt.py | 424 ++- .../test_auto_parallel_cost_model.py | 125 +- .../test_auto_parallel_data_unshard.py | 1 + .../test_auto_parallel_dist_tensor.py | 53 +- .../unittests/test_auto_parallel_graph.py | 1 + .../unittests/test_auto_parallel_mapper.py | 144 +- .../test_auto_parallel_partitioner.py | 737 +++-- .../test_auto_parallel_partitioner_gpt.py | 447 ++- .../unittests/test_auto_parallel_reshard.py | 145 +- .../test_auto_parallel_reshard_dpmppp.py | 91 +- .../test_auto_parallel_reshard_mppp.py | 161 +- .../test_auto_parallel_reshard_serial.py | 139 +- .../unittests/test_auto_parallel_save_load.py | 1 + .../unittests/test_auto_parallel_searcher.py | 49 +- .../test_auto_search_dist_matmul_op.py | 98 +- .../unittests/test_auto_search_dist_op.py | 38 +- .../test_avoid_twice_initialization.py | 25 +- .../fluid/tests/unittests/test_backward.py | 57 +- ...test_backward_infer_var_data_type_shape.py | 13 +- .../fluid/tests/unittests/test_base_layer.py | 41 +- .../tests/unittests/test_basic_gru_api.py | 132 +- .../tests/unittests/test_basic_gru_unit_op.py | 56 +- .../tests/unittests/test_basic_lstm_api.py | 97 +- .../unittests/test_basic_lstm_unit_op.py | 57 +- .../tests/unittests/test_basic_rnn_name.py | 41 +- .../fluid/tests/unittests/test_batch_fc_op.py | 22 +- .../tests/unittests/test_batch_norm_op.py | 75 +- .../tests/unittests/test_batch_norm_op_v2.py | 17 +- .../tests/unittests/test_batch_sampler.py | 42 +- .../fluid/tests/unittests/test_bce_loss.py | 140 +- .../unittests/test_bce_with_logits_loss.py | 202 +- .../unittests/test_beam_search_decode_op.py | 62 +- .../tests/unittests/test_beam_search_op.py | 231 +- .../tests/unittests/test_bernoulli_op.py | 18 +- .../paddle/fluid/tests/unittests/test_bfgs.py | 44 +- .../tests/unittests/test_bicubic_interp_op.py | 212 +- .../unittests/test_bicubic_interp_v2_op.py | 297 +- .../unittests/test_bilateral_slice_op.py | 28 +- .../tests/unittests/test_bilinear_api.py | 18 +- .../unittests/test_bilinear_interp_op.py | 63 +- .../unittests/test_bilinear_interp_v2_op.py | 112 +- .../test_bilinear_tensor_product_op.py | 11 +- .../fluid/tests/unittests/test_bincount_op.py | 30 +- .../unittests/test_bipartite_match_op.py | 9 +- .../fluid/tests/unittests/test_bitwise_op.py | 66 +- .../fluid/tests/unittests/test_bmm_op.py | 20 +- .../fluid/tests/unittests/test_box_clip_op.py | 17 +- .../tests/unittests/test_box_coder_op.py | 10 +- .../test_box_decoder_and_assign_op.py | 1 + .../fluid/tests/unittests/test_boxps.py | 56 +- .../fluid/tests/unittests/test_broadcast.py | 1 + .../tests/unittests/test_broadcast_error.py | 1 + .../tests/unittests/test_broadcast_shape.py | 1 + .../unittests/test_broadcast_tensors_op.py | 87 +- .../tests/unittests/test_broadcast_to_op.py | 24 +- .../test_buffer_shared_memory_reuse_pass.py | 30 +- ...euse_pass_and_fuse_optimization_op_pass.py | 2 + .../test_build_strategy_fusion_group_pass.py | 1 + .../unittests/test_c_comm_init_all_op.py | 9 +- .../fluid/tests/unittests/test_c_concat.py | 1 + .../fluid/tests/unittests/test_c_identity.py | 1 + .../fluid/tests/unittests/test_c_split.py | 1 + .../tests/unittests/test_calc_gradient.py | 5 + .../paddle/fluid/tests/unittests/test_case.py | 114 +- .../fluid/tests/unittests/test_cast_op.py | 14 +- .../fluid/tests/unittests/test_center_loss.py | 40 +- .../tests/unittests/test_channel_shuffle.py | 38 +- .../unittests/test_check_import_scipy.py | 1 + .../tests/unittests/test_checkpoint_saver.py | 1 + .../fluid/tests/unittests/test_cholesky_op.py | 15 +- .../tests/unittests/test_cholesky_solve_op.py | 35 +- .../tests/unittests/test_chunk_eval_op.py | 101 +- .../fluid/tests/unittests/test_chunk_op.py | 10 +- .../unittests/test_class_center_sample_op.py | 57 +- .../tests/unittests/test_clip_by_norm_op.py | 25 +- .../fluid/tests/unittests/test_clip_op.py | 46 +- .../unittests/test_coalesce_tensor_op.py | 12 +- .../test_collect_fpn_proposals_op.py | 25 +- .../test_collective_allgather_api.py | 1 + .../test_collective_allreduce_api.py | 1 + .../unittests/test_collective_alltoall_api.py | 10 +- .../unittests/test_collective_api_base.py | 113 +- .../unittests/test_collective_barrier_api.py | 1 + .../tests/unittests/test_collective_base.py | 113 +- .../test_collective_broadcast_api.py | 1 + .../test_collective_cpu_barrier_with_gloo.py | 14 +- .../test_collective_global_gather.py | 23 +- .../test_collective_global_scatter.py | 23 +- .../unittests/test_collective_optimizer.py | 1 + .../test_collective_process_group.py | 1 + .../tests/unittests/test_collective_reduce.py | 1 + .../unittests/test_collective_reduce_api.py | 1 + .../unittests/test_collective_scatter.py | 1 + .../unittests/test_collective_scatter_api.py | 1 + .../unittests/test_collective_sendrecv.py | 1 + .../unittests/test_collective_sendrecv_api.py | 10 +- .../test_collective_split_col_linear.py | 1 + .../test_collective_split_embedding.py | 1 + ...llective_split_embedding_none_divisible.py | 11 +- .../test_collective_split_row_linear.py | 1 + .../tests/unittests/test_collective_wait.py | 8 +- .../unittests/test_communicator_async.py | 2 + .../tests/unittests/test_communicator_geo.py | 9 +- .../unittests/test_communicator_half_async.py | 9 +- .../unittests/test_communicator_ps_gpu.py | 8 +- .../tests/unittests/test_communicator_sync.py | 2 + .../fluid/tests/unittests/test_compare_op.py | 83 +- .../tests/unittests/test_compare_reduce_op.py | 9 + .../fluid/tests/unittests/test_compat.py | 1 + .../tests/unittests/test_compiled_program.py | 49 +- .../fluid/tests/unittests/test_complex_abs.py | 47 +- .../tests/unittests/test_complex_cast.py | 7 +- .../test_complex_elementwise_layers.py | 29 +- .../tests/unittests/test_complex_getitem.py | 1 + .../test_complex_grad_accumulated.py | 32 +- .../tests/unittests/test_complex_kron.py | 31 +- .../tests/unittests/test_complex_matmul.py | 13 +- .../fluid/tests/unittests/test_complex_op.py | 54 +- .../tests/unittests/test_complex_reshape.py | 9 +- .../tests/unittests/test_complex_simplenet.py | 15 +- .../tests/unittests/test_complex_sum_layer.py | 6 +- .../unittests/test_complex_trace_layer.py | 10 +- .../tests/unittests/test_complex_transpose.py | 1 + .../tests/unittests/test_complex_variable.py | 19 +- .../tests/unittests/test_complex_view_op.py | 36 +- .../fluid/tests/unittests/test_concat_op.py | 84 +- .../paddle/fluid/tests/unittests/test_cond.py | 186 +- .../tests/unittests/test_conditional_block.py | 30 +- .../fluid/tests/unittests/test_conj_op.py | 41 +- .../fluid/tests/unittests/test_const_value.py | 1 + .../tests/unittests/test_context_manager.py | 5 +- .../tests/unittests/test_conv1d_layer.py | 121 +- .../unittests/test_conv1d_transpose_layer.py | 143 +- .../fluid/tests/unittests/test_conv2d_api.py | 425 ++- .../tests/unittests/test_conv2d_fusion_op.py | 30 +- .../tests/unittests/test_conv2d_layer.py | 161 +- .../fluid/tests/unittests/test_conv2d_op.py | 226 +- .../test_conv2d_op_depthwise_conv.py | 21 + .../unittests/test_conv2d_transpose_layer.py | 134 +- .../unittests/test_conv2d_transpose_op.py | 284 +- ...test_conv2d_transpose_op_depthwise_conv.py | 6 + .../tests/unittests/test_conv3d_layer.py | 126 +- .../fluid/tests/unittests/test_conv3d_op.py | 418 +-- .../unittests/test_conv3d_transpose_layer.py | 141 +- .../unittests/test_conv3d_transpose_op.py | 101 +- .../test_conv3d_transpose_part2_op.py | 174 +- .../tests/unittests/test_conv_nn_grad.py | 311 +- .../tests/unittests/test_conv_shift_op.py | 1 + .../unittests/test_conv_transpose_nn_grad.py | 193 +- .../paddle/fluid/tests/unittests/test_corr.py | 13 +- .../fluid/tests/unittests/test_cos_sim_op.py | 25 +- .../unittests/test_cosine_similarity_api.py | 7 +- .../fluid/tests/unittests/test_cost_model.py | 9 +- .../paddle/fluid/tests/unittests/test_cov.py | 72 +- .../tests/unittests/test_cpuonly_spawn.py | 8 +- .../tests/unittests/test_create_global_var.py | 5 +- .../unittests/test_create_op_doc_string.py | 1 + .../tests/unittests/test_create_parameter.py | 15 +- .../tests/unittests/test_crf_decoding_op.py | 64 +- .../fluid/tests/unittests/test_crop_op.py | 8 + .../tests/unittests/test_crop_tensor_op.py | 24 +- .../tests/unittests/test_cross_entropy2_op.py | 25 +- .../unittests/test_cross_entropy_loss.py | 622 ++-- .../tests/unittests/test_cross_entropy_op.py | 100 +- .../fluid/tests/unittests/test_cross_op.py | 23 +- .../fluid/tests/unittests/test_crypto.py | 2 + .../fluid/tests/unittests/test_ctc_align.py | 52 +- .../unittests/test_cuda_cudnn_version.py | 1 + .../tests/unittests/test_cuda_device_count.py | 7 +- .../test_cuda_device_name_capability.py | 8 +- .../tests/unittests/test_cuda_empty_cache.py | 1 + .../fluid/tests/unittests/test_cuda_graph.py | 46 +- .../test_cuda_graph_partial_graph.py | 8 +- .../test_cuda_max_memory_allocated.py | 10 +- .../test_cuda_max_memory_reserved.py | 10 +- .../unittests/test_cuda_memory_allocated.py | 10 +- .../unittests/test_cuda_memory_reserved.py | 10 +- .../tests/unittests/test_cuda_random_seed.py | 44 +- .../tests/unittests/test_cuda_stream_event.py | 11 +- .../tests/unittests/test_cudnn_grucell.py | 58 +- .../tests/unittests/test_cudnn_lstmcell.py | 129 +- .../fluid/tests/unittests/test_cumprod_op.py | 23 +- .../fluid/tests/unittests/test_cumsum_op.py | 67 +- .../tests/unittests/test_custom_grad_input.py | 20 +- .../fluid/tests/unittests/test_cvm_op.py | 25 +- .../unittests/test_cyclic_cifar_dataset.py | 1 + .../paddle/fluid/tests/unittests/test_data.py | 15 +- .../tests/unittests/test_data_generator.py | 27 + .../tests/unittests/test_data_norm_op.py | 35 +- .../unittests/test_dataloader_autotune.py | 19 +- .../unittests/test_dataloader_dataset.py | 14 +- .../unittests/test_dataloader_early_reset.py | 7 +- .../unittests/test_dataloader_keep_order.py | 29 +- .../unittests/test_dataloader_unkeep_order.py | 40 +- .../fluid/tests/unittests/test_dataset.py | 354 ++- .../test_dataset_consistency_inspection.py | 80 +- .../unittests/test_dataset_dataloader.py | 43 +- .../tests/unittests/test_dataset_download.py | 1 + .../fluid/tests/unittests/test_debugger.py | 48 +- .../unittests/test_decoupled_py_reader.py | 22 +- .../test_decoupled_py_reader_data_check.py | 25 +- .../tests/unittests/test_default_dtype.py | 2 + .../unittests/test_default_scope_funcs.py | 2 + .../tests/unittests/test_deform_conv2d.py | 71 +- .../unittests/test_deformable_conv_op.py | 123 +- .../unittests/test_deformable_conv_v1_op.py | 73 +- .../test_deformable_psroi_pooling.py | 172 +- .../fluid/tests/unittests/test_deg2rad.py | 9 +- .../unittests/test_density_prior_box_op.py | 6 +- .../unittests/test_deprecated_decorator.py | 13 +- ...t_deprecated_memory_optimize_interfaces.py | 2 + .../unittests/test_dequantize_abs_max_op.py | 3 + .../tests/unittests/test_dequantize_log_op.py | 1 + .../fluid/tests/unittests/test_desc_clone.py | 64 +- .../fluid/tests/unittests/test_detach.py | 78 +- .../tests/unittests/test_detection_map_op.py | 27 +- .../tests/unittests/test_determinant_op.py | 7 + .../fluid/tests/unittests/test_device.py | 2 + .../tests/unittests/test_device_guard.py | 66 +- .../tests/unittests/test_dgc_momentum_op.py | 31 +- .../fluid/tests/unittests/test_dgc_op.py | 10 +- .../tests/unittests/test_dgc_optimizer.py | 53 +- .../paddle/fluid/tests/unittests/test_diag.py | 3 + .../fluid/tests/unittests/test_diag_embed.py | 3 + .../fluid/tests/unittests/test_diag_v2.py | 17 +- .../fluid/tests/unittests/test_diagflat.py | 11 +- .../fluid/tests/unittests/test_diagonal_op.py | 52 +- .../fluid/tests/unittests/test_diff_op.py | 87 +- .../fluid/tests/unittests/test_digamma_op.py | 13 +- .../unittests/test_directory_migration.py | 15 +- .../unittests/test_disable_signal_handler.py | 12 +- .../tests/unittests/test_dist_allreduce_op.py | 1 + .../fluid/tests/unittests/test_dist_base.py | 291 +- .../tests/unittests/test_dist_dygraph_apis.py | 1 + .../test_dist_fleet_a_sync_optimizer_async.py | 1 + .../test_dist_fleet_a_sync_optimizer_auto.py | 10 +- ..._dist_fleet_a_sync_optimizer_auto_async.py | 17 +- ...st_dist_fleet_a_sync_optimizer_auto_geo.py | 11 +- .../test_dist_fleet_a_sync_optimizer_geo.py | 20 +- .../test_dist_fleet_a_sync_optimizer_sync.py | 1 + .../tests/unittests/test_dist_fleet_base.py | 87 +- .../tests/unittests/test_dist_fleet_ctr.py | 18 +- .../tests/unittests/test_dist_fleet_ctr2.py | 12 +- .../tests/unittests/test_dist_fleet_decay.py | 31 +- .../tests/unittests/test_dist_fleet_geo.py | 9 +- .../tests/unittests/test_dist_fleet_gloo.py | 42 +- .../unittests/test_dist_fleet_grad_clip.py | 24 +- .../unittests/test_dist_fleet_heter_base.py | 106 +- .../unittests/test_dist_fleet_heter_ctr.py | 8 +- .../test_dist_fleet_heter_program.py | 48 +- .../tests/unittests/test_dist_fleet_infer.py | 23 +- .../tests/unittests/test_dist_fleet_ps.py | 56 +- .../tests/unittests/test_dist_fleet_ps10.py | 31 +- .../tests/unittests/test_dist_fleet_ps11.py | 46 +- .../tests/unittests/test_dist_fleet_ps12.py | 47 +- .../tests/unittests/test_dist_fleet_ps2.py | 47 +- .../tests/unittests/test_dist_fleet_ps3.py | 56 +- .../tests/unittests/test_dist_fleet_ps4.py | 56 +- .../tests/unittests/test_dist_fleet_ps5.py | 65 +- .../tests/unittests/test_dist_fleet_ps6.py | 56 +- .../tests/unittests/test_dist_fleet_ps7.py | 32 +- .../tests/unittests/test_dist_fleet_ps8.py | 32 +- .../tests/unittests/test_dist_fleet_ps9.py | 32 +- .../unittests/test_dist_fleet_ps_gpu_ctr.py | 6 +- .../test_dist_fleet_raw_program_optimizer.py | 10 +- ...et_raw_program_optimizer_fuse_allreduce.py | 1 + .../tests/unittests/test_dist_fleet_simnet.py | 6 +- .../test_dist_fleet_sparse_embedding_ctr.py | 86 +- .../test_dist_fleet_trainer_desc_config.py | 2 + .../test_dist_lookup_sparse_table_fuse_ops.py | 52 +- .../test_dist_mnist_backward_deps.py | 1 + .../unittests/test_dist_mnist_batch_merge.py | 25 +- .../unittests/test_dist_mnist_dgc_nccl.py | 20 +- .../unittests/test_dist_mnist_fleet_save.py | 1 + .../unittests/test_dist_mnist_fleetapi.py | 6 +- .../test_dist_mnist_fp16_allreduce.py | 1 + .../test_dist_mnist_gradient_merge.py | 39 +- .../unittests/test_dist_mnist_hallreduce.py | 10 +- .../tests/unittests/test_dist_mnist_lars.py | 1 + .../unittests/test_dist_mnist_multi_comm.py | 10 +- .../tests/unittests/test_dist_mnist_pg.py | 14 +- .../test_dist_mnist_ring_allreduce.py | 1 + .../tests/unittests/test_dist_mnist_train.py | 41 +- .../unittests/test_dist_mnist_with_program.py | 2 + .../fluid/tests/unittests/test_dist_oneps.py | 2 + .../fluid/tests/unittests/test_dist_op.py | 26 +- .../tests/unittests/test_dist_save_load.py | 41 +- .../unittests/test_dist_se_resnext_dgc.py | 11 +- .../unittests/test_dist_se_resnext_nccl.py | 22 +- .../unittests/test_dist_se_resnext_sync.py | 11 +- .../unittests/test_dist_sharding_save.py | 1 + .../unittests/test_dist_sparse_load_ps0.py | 8 +- .../unittests/test_dist_sparse_load_ps1.py | 5 +- .../test_dist_sparse_tensor_load_sgd.py | 6 +- .../test_dist_text_classification.py | 21 +- .../fluid/tests/unittests/test_dist_train.py | 66 +- .../tests/unittests/test_dist_transformer.py | 12 +- .../tests/unittests/test_dist_transpiler.py | 219 +- .../tests/unittests/test_dist_tree_index.py | 55 +- .../tests/unittests/test_dist_word2vec.py | 31 +- .../test_distribute_fpn_proposals_op.py | 14 +- ...est_distributed_fused_lamb_op_with_clip.py | 7 +- ...buted_fused_lamb_op_with_gradient_merge.py | 14 +- ..._distributed_fused_lamb_op_without_clip.py | 7 +- .../unittests/test_distributed_strategy.py | 9 +- .../tests/unittests/test_distributions.py | 315 +- .../fluid/tests/unittests/test_dot_op.py | 94 +- .../fluid/tests/unittests/test_downpoursgd.py | 15 +- .../fluid/tests/unittests/test_dpsgd_op.py | 1 + .../fluid/tests/unittests/test_dropout_op.py | 460 +-- .../unittests/test_dygraph_mnist_fp16.py | 83 +- .../test_dygraph_mode_of_unittest.py | 8 +- .../unittests/test_dygraph_multi_forward.py | 118 +- .../tests/unittests/test_dygraph_recompute.py | 74 +- .../test_dygraph_sharding_optimizer_stage2.py | 4 +- .../unittests/test_dygraph_sharding_stage2.py | 4 +- .../unittests/test_dygraph_sharding_stage3.py | 4 +- .../unittests/test_dygraph_spectral_norm.py | 26 +- .../unittests/test_dygraph_weight_norm.py | 35 +- .../fluid/tests/unittests/test_dyn_rnn.py | 137 +- .../test_dynamic_rnn_stop_gradient.py | 24 +- .../unittests/test_dynrnn_gradient_check.py | 76 +- .../unittests/test_dynrnn_static_input.py | 37 +- .../test_eager_deletion_delete_vars.py | 37 +- .../test_eager_deletion_dynamic_rnn_base.py | 17 +- .../unittests/test_eager_deletion_gru_net.py | 1 + .../unittests/test_eager_deletion_lstm_net.py | 6 +- .../unittests/test_eager_deletion_mnist.py | 2 +- .../test_eager_deletion_padding_rnn.py | 231 +- .../test_eager_deletion_recurrent_op.py | 133 +- .../unittests/test_eager_deletion_while_op.py | 46 +- .../tests/unittests/test_eager_dist_api.py | 1 + .../tests/unittests/test_eager_run_program.py | 13 +- .../tests/unittests/test_eager_trace_op.py | 13 +- .../tests/unittests/test_edit_distance_op.py | 10 +- .../unittests/test_egr_code_generate_api.py | 15 +- .../tests/unittests/test_egr_python_api.py | 197 +- .../unittests/test_egr_string_tensor_api.py | 27 +- .../fluid/tests/unittests/test_eig_op.py | 39 +- .../fluid/tests/unittests/test_eigh_op.py | 62 +- .../fluid/tests/unittests/test_eigvals_op.py | 70 +- .../fluid/tests/unittests/test_eigvalsh_op.py | 46 +- .../fluid/tests/unittests/test_einsum.py | 130 +- .../fluid/tests/unittests/test_einsum_op.py | 17 +- .../fluid/tests/unittests/test_einsum_v2.py | 131 +- .../unittests/test_elementwise_add_op.py | 145 +- .../unittests/test_elementwise_div_op.py | 101 +- .../unittests/test_elementwise_floordiv_op.py | 8 +- .../unittests/test_elementwise_gradient_op.py | 29 +- .../test_elementwise_heaviside_op.py | 25 +- .../unittests/test_elementwise_max_op.py | 33 +- .../unittests/test_elementwise_min_op.py | 41 +- .../unittests/test_elementwise_mod_op.py | 5 + .../unittests/test_elementwise_mul_op.py | 82 +- .../unittests/test_elementwise_nn_grad.py | 113 +- .../unittests/test_elementwise_pow_op.py | 20 +- .../unittests/test_elementwise_sub_op.py | 73 +- .../paddle/fluid/tests/unittests/test_ema.py | 9 +- .../fluid/tests/unittests/test_ema_fleet.py | 1 + .../test_embedding_id_stop_gradient.py | 11 +- .../tests/unittests/test_empty_like_op.py | 36 +- .../fluid/tests/unittests/test_empty_op.py | 25 +- .../fluid/tests/unittests/test_entry_attr.py | 14 +- .../fluid/tests/unittests/test_entry_attr2.py | 14 +- .../fluid/tests/unittests/test_erf_op.py | 2 + .../fluid/tests/unittests/test_erfinv_op.py | 14 +- .../fluid/tests/unittests/test_exception.py | 8 +- .../tests/unittests/test_executor_and_mul.py | 16 +- .../test_executor_and_use_program_cache.py | 46 +- .../unittests/test_executor_check_feed.py | 13 +- .../test_executor_check_fetch_list.py | 13 +- .../test_executor_feed_non_tensor.py | 58 +- ..._executor_return_tensor_not_overwriting.py | 2 + .../tests/unittests/test_expand_as_op.py | 27 +- .../tests/unittests/test_expand_as_v2_op.py | 27 +- .../fluid/tests/unittests/test_expand_op.py | 47 +- .../tests/unittests/test_expand_v2_op.py | 48 +- .../tests/unittests/test_exponential_op.py | 12 +- .../fluid/tests/unittests/test_eye_op.py | 4 + .../unittests/test_fake_dequantize_op.py | 24 +- .../tests/unittests/test_fake_init_op.py | 5 +- .../tests/unittests/test_fake_quantize_op.py | 49 +- .../unittests/test_faster_tokenizer_op.py | 91 +- .../fluid/tests/unittests/test_fc_op.py | 25 +- .../test_feed_data_check_shape_type.py | 66 +- .../tests/unittests/test_feed_fetch_method.py | 1 + .../tests/unittests/test_fetch_handler.py | 17 +- .../unittests/test_fetch_lod_tensor_array.py | 1 + .../tests/unittests/test_fetch_unmerged.py | 48 +- .../fluid/tests/unittests/test_fetch_var.py | 7 +- .../tests/unittests/test_fill_any_like_op.py | 8 + .../fluid/tests/unittests/test_fill_any_op.py | 5 + .../tests/unittests/test_fill_constant_op.py | 218 +- .../unittests/test_fill_diagonal_tensor_op.py | 3 + .../fluid/tests/unittests/test_fill_op.py | 16 +- .../unittests/test_fill_zeros_like2_op.py | 5 + .../unittests/test_fill_zeros_like_op.py | 2 + .../unittests/test_filter_by_instag_op.py | 30 +- .../fluid/tests/unittests/test_flatten2_op.py | 16 +- .../test_flatten_contiguous_range_op.py | 43 +- .../fluid/tests/unittests/test_flatten_op.py | 4 + .../tests/unittests/test_fleet_amp_init.py | 24 +- .../test_fleet_amp_meta_optimizer.py | 13 +- .../tests/unittests/test_fleet_api_input.py | 145 +- .../unittests/test_fleet_ascend_utils.py | 19 +- .../fluid/tests/unittests/test_fleet_auto.py | 10 +- .../fluid/tests/unittests/test_fleet_base.py | 29 +- .../tests/unittests/test_fleet_base_2.py | 26 +- .../tests/unittests/test_fleet_base_3.py | 21 +- .../tests/unittests/test_fleet_base_4.py | 1 + .../tests/unittests/test_fleet_base_single.py | 19 +- .../tests/unittests/test_fleet_checkpoint.py | 73 +- .../test_fleet_dgc_meta_optimizer.py | 13 +- .../test_fleet_distributed_strategy.py | 42 +- .../test_fleet_elastic_collective.py | 3 + .../unittests/test_fleet_elastic_init.py | 2 + .../unittests/test_fleet_elastic_manager.py | 33 +- .../test_fleet_exe_dist_model_run.py | 17 +- .../test_fleet_exe_dist_model_tensor.py | 19 +- .../tests/unittests/test_fleet_executor.py | 21 +- .../test_fleet_executor_multi_devices.py | 1 + .../test_fleet_executor_origin_scheduler.py | 21 +- .../test_fleet_executor_task_node.py | 7 +- .../test_fleet_executor_with_task_nodes.py | 21 +- ...est_fleet_fp16_allreduce_meta_optimizer.py | 15 +- ...est_fleet_gradient_merge_meta_optimizer.py | 1 + .../unittests/test_fleet_gradient_scale.py | 18 +- ...st_fleet_graph_execution_meta_optimizer.py | 199 +- .../unittests/test_fleet_graph_executor.py | 22 +- .../test_fleet_hybrid_meta_optimizer.py | 34 +- .../test_fleet_lamb_meta_optimizer.py | 28 +- .../test_fleet_lars_meta_optimizer.py | 36 +- .../test_fleet_localsgd_meta_optimizer.py | 2 + .../test_fleet_meta_optimizer_base.py | 15 +- .../tests/unittests/test_fleet_metric.py | 23 +- .../tests/unittests/test_fleet_nocvm_1.py | 14 +- .../test_fleet_pipeline_meta_optimizer.py | 28 +- ..._pipeline_meta_optimizer_with_recompute.py | 15 +- .../unittests/test_fleet_private_function.py | 2 + .../fluid/tests/unittests/test_fleet_ps.py | 2 + .../unittests/test_fleet_pyramid_hash.py | 7 +- .../test_fleet_raw_program_meta_optimizer.py | 10 +- .../test_fleet_recompute_meta_optimizer.py | 13 +- .../tests/unittests/test_fleet_rolemaker_3.py | 7 +- .../unittests/test_fleet_rolemaker_new.py | 9 +- .../tests/unittests/test_fleet_runtime.py | 20 +- .../test_fleet_sharding_meta_optimizer.py | 164 +- .../unittests/test_fleet_static_mp_layers.py | 36 +- .../fluid/tests/unittests/test_fleet_util.py | 15 +- .../fluid/tests/unittests/test_fleet_utils.py | 18 +- .../paddle/fluid/tests/unittests/test_flip.py | 12 +- .../fluid/tests/unittests/test_fmax_op.py | 68 +- .../fluid/tests/unittests/test_fmin_op.py | 68 +- .../fluid/tests/unittests/test_fold_op.py | 69 +- .../fluid/tests/unittests/test_frame_op.py | 15 +- .../unittests/test_framework_debug_str.py | 1 + .../tests/unittests/test_fs_interface.py | 1 + .../fluid/tests/unittests/test_fsp_op.py | 22 +- .../fluid/tests/unittests/test_ftrl_op.py | 47 +- .../tests/unittests/test_full_like_op.py | 35 +- .../fluid/tests/unittests/test_full_op.py | 171 +- .../tests/unittests/test_function_hook.py | 2 + .../tests/unittests/test_functional_conv1d.py | 19 +- .../test_functional_conv1d_transpose.py | 19 +- .../tests/unittests/test_functional_conv2d.py | 167 +- .../test_functional_conv2d_transpose.py | 176 +- .../tests/unittests/test_functional_conv3d.py | 168 +- .../test_functional_conv3d_transpose.py | 179 +- .../unittests/test_fuse_all_reduce_pass.py | 26 +- .../tests/unittests/test_fuse_bn_act_pass.py | 42 +- .../unittests/test_fuse_bn_add_act_pass.py | 131 +- .../test_fuse_elewise_add_act_pass.py | 13 +- .../unittests/test_fuse_gemm_epilogue_pass.py | 79 +- .../unittests/test_fuse_optimizer_pass.py | 77 +- .../test_fuse_relu_depthwise_conv_pass.py | 41 +- .../unittests/test_fused_attention_op.py | 138 +- .../unittests/test_fused_attention_op_api.py | 90 +- ...sed_bias_dropout_residual_layer_norm_op.py | 46 +- ...bias_dropout_residual_layer_norm_op_api.py | 40 +- .../test_fused_elemwise_activation_op.py | 111 +- .../unittests/test_fused_emb_seq_pool_op.py | 29 +- .../test_fused_embedding_fc_lstm_op.py | 33 +- .../test_fused_fc_elementwise_layernorm_op.py | 2 + .../unittests/test_fused_feedforward_op.py | 219 +- .../unittests/test_fused_gate_attention_op.py | 40 +- .../test_fused_gemm_epilogue_grad_op.py | 26 +- .../unittests/test_fused_gemm_epilogue_op.py | 72 +- .../tests/unittests/test_fused_matmul_bias.py | 9 +- .../test_fused_multi_transformer_op.py | 220 +- .../test_fused_multihead_matmul_op.py | 2 + .../test_fused_transformer_encoder_layer.py | 43 +- .../tests/unittests/test_fusion_gru_op.py | 18 +- .../tests/unittests/test_fusion_lstm_op.py | 18 +- .../test_fusion_repeated_fc_relu_op.py | 17 +- .../test_fusion_seqconv_eltadd_relu_op.py | 10 +- .../test_fusion_seqexpand_concat_fc_op.py | 12 +- .../test_fusion_seqpool_concat_op.py | 8 + .../test_fusion_seqpool_cvm_concat_op.py | 8 + .../test_fusion_squared_mat_sub_op.py | 6 +- ...test_fusion_transpose_flatten_concat_op.py | 6 + .../unittests/test_gast_with_compatibility.py | 10 +- .../tests/unittests/test_gather_nd_op.py | 32 +- .../fluid/tests/unittests/test_gather_op.py | 52 +- .../tests/unittests/test_gather_tree_op.py | 72 +- .../unittests/test_gaussian_random_op.py | 125 +- .../paddle/fluid/tests/unittests/test_gcd.py | 19 +- .../fluid/tests/unittests/test_gelu_op.py | 8 +- .../tests/unittests/test_gen_nccl_id_op.py | 1 + .../unittests/test_generate_mask_labels_op.py | 6 +- .../test_generate_proposal_labels_op.py | 86 +- .../unittests/test_generate_proposals_op.py | 44 +- .../test_generate_proposals_v2_op.py | 11 +- .../fluid/tests/unittests/test_generator.py | 5 +- .../unittests/test_generator_dataloader.py | 14 +- .../unittests/test_get_device_properties.py | 8 +- .../test_get_inputs_outputs_in_block.py | 3 +- .../tests/unittests/test_get_places_op.py | 1 + .../tests/unittests/test_get_set_flags.py | 2 + .../test_get_tensor_from_selected_rows_op.py | 1 + .../test_global_var_getter_setter.py | 2 + .../paddle/fluid/tests/unittests/test_glu.py | 2 + .../test_gpu_package_without_gpu_device.py | 10 +- .../unittests/test_grad_clip_minimize.py | 7 +- .../tests/unittests/test_gradient_clip.py | 114 +- .../unittests/test_graph_khop_sampler.py | 58 +- .../tests/unittests/test_graph_reindex.py | 61 +- .../unittests/test_graph_sample_neighbors.py | 82 +- .../unittests/test_graph_send_recv_op.py | 132 +- .../unittests/test_grid_sample_function.py | 49 +- .../tests/unittests/test_grid_sampler_op.py | 43 +- .../tests/unittests/test_group_norm_op.py | 80 +- .../tests/unittests/test_group_norm_op_v2.py | 22 +- .../fluid/tests/unittests/test_gru_op.py | 43 +- .../fluid/tests/unittests/test_gru_rnn_op.py | 18 +- .../fluid/tests/unittests/test_gru_unit_op.py | 60 +- .../tests/unittests/test_gumbel_softmax_op.py | 27 +- .../fluid/tests/unittests/test_hash_op.py | 25 +- .../fluid/tests/unittests/test_hdfs1.py | 41 +- .../fluid/tests/unittests/test_hdfs2.py | 10 +- .../fluid/tests/unittests/test_hdfs3.py | 10 +- .../unittests/test_hinge_embedding_loss.py | 82 +- .../tests/unittests/test_hinge_loss_op.py | 1 + .../tests/unittests/test_histogram_op.py | 44 +- .../fluid/tests/unittests/test_hsigmoid_op.py | 171 +- .../tests/unittests/test_huber_loss_op.py | 17 +- .../test_hybrid_parallel_inference_helper.py | 1 + .../test_hybrid_parallel_topology.py | 29 +- .../fluid/tests/unittests/test_identity_op.py | 7 +- .../tests/unittests/test_im2sequence_op.py | 17 +- .../test_image_classification_layer.py | 45 +- .../test_imperative_auto_mixed_precision.py | 503 +-- .../unittests/test_imperative_auto_prune.py | 24 +- .../tests/unittests/test_imperative_basic.py | 159 +- .../test_imperative_container_layerdict.py | 7 +- .../test_imperative_container_layerlist.py | 2 + ...test_imperative_container_parameterlist.py | 13 +- .../test_imperative_container_sequential.py | 9 +- .../test_imperative_data_loader_base.py | 46 +- .../test_imperative_data_loader_exception.py | 34 +- .../test_imperative_data_loader_exit_func.py | 9 +- .../test_imperative_data_loader_fds_clear.py | 25 +- .../test_imperative_data_loader_process.py | 18 +- .../test_imperative_data_parallel.py | 6 +- .../unittests/test_imperative_decorator.py | 12 +- .../tests/unittests/test_imperative_deepcf.py | 54 +- .../unittests/test_imperative_double_grad.py | 133 +- .../unittests/test_imperative_framework.py | 24 +- .../tests/unittests/test_imperative_gan.py | 62 +- .../tests/unittests/test_imperative_gnn.py | 59 +- .../tests/unittests/test_imperative_group.py | 4 +- .../test_imperative_hook_for_layer.py | 26 +- .../unittests/test_imperative_layer_apply.py | 44 +- .../test_imperative_layer_children.py | 17 +- .../test_imperative_layer_trainable.py | 1 + .../tests/unittests/test_imperative_layers.py | 80 +- .../test_imperative_load_static_param.py | 160 +- ..._imperative_lod_tensor_to_selected_rows.py | 68 +- .../tests/unittests/test_imperative_mnist.py | 120 +- .../test_imperative_mnist_sorted_gradient.py | 55 +- .../test_imperative_named_members.py | 23 +- .../unittests/test_imperative_numpy_bridge.py | 1 + .../test_imperative_ocr_attention_model.py | 321 +- .../unittests/test_imperative_optimizer.py | 317 +- .../unittests/test_imperative_optimizer_v2.py | 233 +- ...test_imperative_parallel_coalesce_split.py | 6 +- .../test_imperative_partitial_backward.py | 10 +- .../unittests/test_imperative_ptb_rnn.py | 121 +- ...test_imperative_ptb_rnn_sorted_gradient.py | 60 +- .../test_imperative_recurrent_usage.py | 23 +- .../test_imperative_reinforcement.py | 38 +- .../tests/unittests/test_imperative_resnet.py | 142 +- .../test_imperative_resnet_sorted_gradient.py | 36 +- .../unittests/test_imperative_save_load.py | 310 +- .../unittests/test_imperative_save_load_v2.py | 325 +- .../unittests/test_imperative_se_resnext.py | 215 +- .../test_imperative_selected_rows.py | 35 +- ..._imperative_selected_rows_to_lod_tensor.py | 69 +- .../test_imperative_signal_handler.py | 5 + ...perative_star_gan_with_gradient_penalty.py | 230 +- .../test_imperative_static_runner_mnist.py | 135 +- .../test_imperative_static_runner_while.py | 58 +- .../test_imperative_tensor_clear_gradient.py | 6 +- .../test_imperative_thread_local_has_grad.py | 8 +- ...imperative_trace_non_persistable_inputs.py | 6 +- ..._imperative_transformer_sorted_gradient.py | 365 +-- .../unittests/test_imperative_triple_grad.py | 100 +- .../test_imperative_using_non_zero_gpu.py | 1 + .../fluid/tests/unittests/test_increment.py | 7 +- .../tests/unittests/test_index_sample_op.py | 33 +- .../tests/unittests/test_index_select_op.py | 41 +- .../test_infer_no_need_buffer_slots.py | 1 + .../fluid/tests/unittests/test_infer_shape.py | 1 + .../tests/unittests/test_inference_api.py | 36 +- .../unittests/test_inference_model_io.py | 88 +- .../fluid/tests/unittests/test_initializer.py | 239 +- .../tests/unittests/test_initializer_nn.py | 155 +- .../fluid/tests/unittests/test_inner.py | 44 +- .../fluid/tests/unittests/test_inplace.py | 32 +- .../tests/unittests/test_inplace_abn_op.py | 38 +- .../unittests/test_inplace_addto_strategy.py | 37 +- .../test_inplace_and_clear_gradient.py | 8 +- .../test_inplace_auto_generated_apis.py | 28 + ...test_inplace_softmax_with_cross_entropy.py | 67 +- .../fluid/tests/unittests/test_input_spec.py | 11 +- .../tests/unittests/test_install_check.py | 1 + .../tests/unittests/test_instance_norm_op.py | 84 +- .../unittests/test_instance_norm_op_v2.py | 6 +- .../fluid/tests/unittests/test_inverse_op.py | 9 + .../tests/unittests/test_io_save_load.py | 28 +- .../tests/unittests/test_iou_similarity_op.py | 9 +- .../tests/unittests/test_ir_inplace_pass.py | 11 +- .../test_ir_memory_optimize_ifelse_op.py | 11 +- .../unittests/test_ir_memory_optimize_nlp.py | 6 +- .../unittests/test_ir_memory_optimize_pass.py | 13 +- .../test_ir_memory_optimize_transformer.py | 1 + .../tests/unittests/test_ir_pass_pipeline.py | 1 + .../fluid/tests/unittests/test_is_complex.py | 7 +- .../fluid/tests/unittests/test_is_empty_op.py | 14 +- .../fluid/tests/unittests/test_is_integer.py | 7 +- .../fluid/tests/unittests/test_is_tensor.py | 1 + .../fluid/tests/unittests/test_isclose_op.py | 32 +- .../fluid/tests/unittests/test_isfinite_op.py | 9 + .../tests/unittests/test_isfinite_v2_op.py | 3 + .../unittests/test_jit_pre_save_hooks.py | 8 +- .../tests/unittests/test_jit_save_load.py | 467 +-- .../tests/unittests/test_kldiv_loss_op.py | 16 +- .../fluid/tests/unittests/test_kron_op.py | 43 +- .../fluid/tests/unittests/test_kthvalue_op.py | 23 +- .../fluid/tests/unittests/test_l1_loss.py | 57 +- .../unittests/test_label_smooth_functional.py | 34 +- .../tests/unittests/test_label_smooth_op.py | 16 +- .../fluid/tests/unittests/test_lamb_op.py | 27 +- .../fluid/tests/unittests/test_lambv2_op.py | 83 +- .../tests/unittests/test_launch_coverage.py | 65 +- .../tests/unittests/test_layer_norm_op.py | 172 +- .../tests/unittests/test_layer_norm_op_v2.py | 11 +- .../fluid/tests/unittests/test_layers.py | 2753 +++++++++-------- .../tests/unittests/test_layout_autotune.py | 11 +- .../fluid/tests/unittests/test_lbfgs.py | 48 +- .../paddle/fluid/tests/unittests/test_lcm.py | 19 +- .../unittests/test_learning_rate_scheduler.py | 183 +- .../fluid/tests/unittests/test_lerp_op.py | 14 +- .../fluid/tests/unittests/test_lgamma_op.py | 14 +- .../unittests/test_limit_by_capacity_op.py | 19 +- .../fluid/tests/unittests/test_linalg_cond.py | 14 +- .../tests/unittests/test_linalg_lstsq_op.py | 116 +- .../tests/unittests/test_linalg_pinv_op.py | 43 +- .../fluid/tests/unittests/test_linear.py | 17 +- .../unittests/test_linear_chain_crf_op.py | 35 +- .../tests/unittests/test_linear_interp_op.py | 209 +- .../unittests/test_linear_interp_v2_op.py | 209 +- .../fluid/tests/unittests/test_linspace.py | 12 +- .../unittests/test_listen_and_serv_op.py | 26 +- .../fluid/tests/unittests/test_load_op.py | 5 +- .../fluid/tests/unittests/test_load_op_xpu.py | 5 +- .../test_load_state_dict_from_old_format.py | 63 +- .../unittests/test_load_vars_shape_check.py | 1 + .../unittests/test_locality_aware_nms_op.py | 103 +- .../tests/unittests/test_lod_append_op.py | 20 +- .../unittests/test_lod_array_length_op.py | 3 + .../tests/unittests/test_lod_rank_table.py | 6 +- .../tests/unittests/test_lod_reset_op.py | 17 +- .../tests/unittests/test_lod_tensor_array.py | 7 +- .../unittests/test_lod_tensor_array_ops.py | 100 +- .../fluid/tests/unittests/test_log_loss_op.py | 2 + .../fluid/tests/unittests/test_log_softmax.py | 18 +- .../fluid/tests/unittests/test_logical_op.py | 25 +- .../fluid/tests/unittests/test_logit_op.py | 10 +- .../fluid/tests/unittests/test_logspace.py | 30 +- .../fluid/tests/unittests/test_logsumexp.py | 8 + .../fluid/tests/unittests/test_lookahead.py | 41 +- .../unittests/test_lookup_table_bf16_op.py | 50 +- .../unittests/test_lookup_table_dequant_op.py | 1 + .../tests/unittests/test_lookup_table_op.py | 82 +- .../unittests/test_lookup_table_v2_bf16_op.py | 24 +- .../unittests/test_lookup_table_v2_op.py | 37 +- .../tests/unittests/test_lr_scheduler.py | 334 +- .../fluid/tests/unittests/test_lrn_op.py | 137 +- .../tests/unittests/test_lstm_cudnn_op.py | 104 +- .../fluid/tests/unittests/test_lstm_op.py | 76 +- .../tests/unittests/test_lstm_unit_op.py | 42 +- .../fluid/tests/unittests/test_lstmp_op.py | 145 +- .../fluid/tests/unittests/test_lu_op.py | 33 +- .../tests/unittests/test_lu_unpack_op.py | 16 +- .../fluid/tests/unittests/test_manual_seed.py | 1 + .../unittests/test_margin_cross_entropy_op.py | 106 +- .../unittests/test_margin_rank_loss_op.py | 15 +- .../fluid/tests/unittests/test_marker_op.py | 1 + .../tests/unittests/test_masked_select_op.py | 16 +- .../unittests/test_match_matrix_tensor_op.py | 16 +- .../tests/unittests/test_math_op_patch.py | 65 +- .../unittests/test_math_op_patch_var_base.py | 193 +- .../fluid/tests/unittests/test_matmul_op.py | 107 +- .../unittests/test_matmul_op_with_head.py | 66 +- .../tests/unittests/test_matmul_v2_op.py | 141 +- .../tests/unittests/test_matrix_nms_op.py | 73 +- .../tests/unittests/test_matrix_power_op.py | 54 +- .../tests/unittests/test_matrix_rank_op.py | 46 +- .../unittests/test_max_min_amax_amin_op.py | 26 +- .../fluid/tests/unittests/test_max_op.py | 9 +- .../fluid/tests/unittests/test_maximum_op.py | 25 +- .../fluid/tests/unittests/test_maxout_op.py | 10 +- .../fluid/tests/unittests/test_mean_iou.py | 26 +- .../fluid/tests/unittests/test_mean_op.py | 50 +- .../fluid/tests/unittests/test_median.py | 7 +- .../fluid/tests/unittests/test_memcpy_op.py | 86 +- .../tests/unittests/test_memory_analysis.py | 16 +- .../test_memory_reuse_exclude_feed_var.py | 16 +- .../tests/unittests/test_memory_usage.py | 1 + .../unittests/test_merge_selectedrows_op.py | 1 + .../tests/unittests/test_merged_adam_op.py | 41 +- .../unittests/test_merged_momentum_op.py | 156 +- .../fluid/tests/unittests/test_meshgrid_op.py | 83 +- .../fluid/tests/unittests/test_min_op.py | 9 +- .../unittests/test_mine_hard_examples_op.py | 30 +- .../fluid/tests/unittests/test_minimum_op.py | 25 +- .../fluid/tests/unittests/test_minus_op.py | 1 + .../test_mix_precision_all_reduce_fuse.py | 32 +- .../tests/unittests/test_mixed_precision.py | 15 +- .../fluid/tests/unittests/test_mode_op.py | 25 +- .../tests/unittests/test_modelaverage.py | 79 +- .../unittests/test_modified_huber_loss_op.py | 1 + .../fluid/tests/unittests/test_momentum_op.py | 178 +- .../fluid/tests/unittests/test_monitor.py | 23 +- .../fluid/tests/unittests/test_mse_loss.py | 191 +- .../fluid/tests/unittests/test_mul_nn_grad.py | 39 +- .../fluid/tests/unittests/test_mul_op.py | 84 +- .../tests/unittests/test_multi_dot_op.py | 46 +- .../tests/unittests/test_multiclass_nms_op.py | 121 +- .../unittests/test_multihead_attention.py | 30 +- .../tests/unittests/test_multinomial_op.py | 22 +- .../tests/unittests/test_multiplex_op.py | 18 +- .../fluid/tests/unittests/test_multiply.py | 26 +- .../test_multiprocess_dataloader_dataset.py | 89 +- .../test_multiprocess_dataloader_dynamic.py | 54 +- .../test_multiprocess_dataloader_exception.py | 49 +- ...ess_dataloader_iterable_dataset_dynamic.py | 54 +- ...ocess_dataloader_iterable_dataset_split.py | 32 +- ...cess_dataloader_iterable_dataset_static.py | 61 +- .../test_multiprocess_dataloader_static.py | 100 +- .../test_multiprocess_reader_exception.py | 29 +- .../fluid/tests/unittests/test_mv_op.py | 28 +- .../test_naive_best_fit_gpu_memory_limit.py | 4 +- .../fluid/tests/unittests/test_name_scope.py | 1 + .../fluid/tests/unittests/test_nan_inf.py | 15 +- .../fluid/tests/unittests/test_nanmean_api.py | 9 +- .../fluid/tests/unittests/test_nanmedian.py | 19 +- .../fluid/tests/unittests/test_nansum_api.py | 30 +- .../paddle/fluid/tests/unittests/test_nce.py | 101 +- .../tests/unittests/test_nearest_interp_op.py | 77 +- .../unittests/test_nearest_interp_v2_op.py | 104 +- .../fluid/tests/unittests/test_neg_op.py | 6 + .../unittests/test_network_with_dtype.py | 5 +- .../tests/unittests/test_new_group_api.py | 1 + .../fluid/tests/unittests/test_newprofiler.py | 98 +- .../unittests/test_newprofiler_helper.py | 1 + .../fluid/tests/unittests/test_nll_loss.py | 345 ++- .../fluid/tests/unittests/test_nms_op.py | 1 + .../tests/unittests/test_nn_dice_loss.py | 13 +- .../test_nn_functional_embedding_dygraph.py | 5 +- .../test_nn_functional_embedding_static.py | 70 +- .../unittests/test_nn_functional_hot_op.py | 48 +- .../fluid/tests/unittests/test_nn_grad.py | 265 +- .../unittests/test_nn_margin_rank_loss.py | 101 +- .../tests/unittests/test_nn_matmul_v2_grad.py | 370 ++- .../tests/unittests/test_nn_sigmoid_op.py | 2 + .../fluid/tests/unittests/test_nonzero_api.py | 1 + .../fluid/tests/unittests/test_norm_all.py | 311 +- .../tests/unittests/test_norm_nn_grad.py | 86 +- .../fluid/tests/unittests/test_norm_op.py | 14 +- .../fluid/tests/unittests/test_normal.py | 7 + .../unittests/test_normalization_wrapper.py | 9 +- .../fluid/tests/unittests/test_normalize.py | 10 +- .../tests/unittests/test_npair_loss_op.py | 137 +- .../tests/unittests/test_number_count_op.py | 6 +- .../fluid/tests/unittests/test_numel_op.py | 18 +- .../fluid/tests/unittests/test_one_hot_op.py | 47 +- .../tests/unittests/test_one_hot_v2_op.py | 42 +- .../fluid/tests/unittests/test_ones_like.py | 11 +- .../fluid/tests/unittests/test_ones_op.py | 3 + .../fluid/tests/unittests/test_onnx_export.py | 21 +- .../unittests/test_op_function_generator.py | 2 + .../tests/unittests/test_op_name_conflict.py | 52 +- .../tests/unittests/test_op_support_gpu.py | 1 + .../fluid/tests/unittests/test_op_version.py | 2 + .../fluid/tests/unittests/test_operator.py | 24 +- .../tests/unittests/test_operator_desc.py | 63 +- .../fluid/tests/unittests/test_ops_nms.py | 42 +- .../fluid/tests/unittests/test_optimizer.py | 864 +++--- .../unittests/test_optimizer_for_varbase.py | 6 +- .../tests/unittests/test_optimizer_grad.py | 45 +- .../test_optimizer_in_control_flow.py | 49 +- .../fluid/tests/unittests/test_outer.py | 42 +- .../tests/unittests/test_overlap_add_op.py | 12 +- .../fluid/tests/unittests/test_pad2d_op.py | 22 +- .../fluid/tests/unittests/test_pad3d_op.py | 283 +- .../tests/unittests/test_pad_constant_like.py | 35 +- .../fluid/tests/unittests/test_pad_op.py | 20 +- .../test_paddle_imperative_double_grad.py | 80 +- .../unittests/test_paddle_multiprocessing.py | 5 + .../tests/unittests/test_paddle_save_load.py | 172 +- .../unittests/test_paddle_save_load_binary.py | 51 +- .../tests/unittests/test_pairwise_distance.py | 26 +- .../test_parallel_class_center_sample.py | 1 + .../test_parallel_dygraph_control_flow.py | 24 +- .../test_parallel_dygraph_dataparallel.py | 27 +- ...t_parallel_dygraph_dataparallel_cpuonly.py | 12 +- .../unittests/test_parallel_dygraph_mnist.py | 45 +- .../test_parallel_dygraph_mp_layers.py | 1 + .../test_parallel_dygraph_no_sync.py | 41 +- ...parallel_dygraph_no_sync_gradient_check.py | 1 + ...test_parallel_dygraph_pipeline_parallel.py | 9 +- .../test_parallel_dygraph_se_resnext.py | 15 +- ...test_parallel_dygraph_sharding_parallel.py | 4 +- .../test_parallel_dygraph_sparse_embedding.py | 25 +- ...graph_sparse_embedding_diff_length_gloo.py | 10 +- ..._parallel_dygraph_sparse_embedding_gloo.py | 20 +- ...el_dygraph_sparse_embedding_over_height.py | 2 + ...graph_sparse_embedding_over_height_gloo.py | 1 + .../test_parallel_dygraph_sync_batch_norm.py | 11 +- .../test_parallel_dygraph_tensor_parallel.py | 1 + .../test_parallel_dygraph_transformer.py | 20 +- .../test_parallel_dygraph_transformer_gloo.py | 20 +- .../test_parallel_dygraph_unused_variables.py | 32 +- ..._parallel_dygraph_unused_variables_gloo.py | 30 +- .../unittests/test_parallel_executor_crf.py | 141 +- .../test_parallel_executor_drop_scope.py | 19 +- .../test_parallel_executor_dry_run.py | 14 +- ..._parallel_executor_feed_persistable_var.py | 25 +- .../test_parallel_executor_fetch_feed.py | 58 +- ...st_parallel_executor_fetch_isolated_var.py | 26 +- ...test_parallel_executor_fix_op_run_order.py | 21 +- ...el_executor_inference_feed_partial_data.py | 34 +- .../unittests/test_parallel_executor_mnist.py | 88 +- .../unittests/test_parallel_executor_pg.py | 35 +- .../test_parallel_executor_profiler.py | 3 +- .../test_parallel_executor_run_cinn.py | 9 +- ...arallel_executor_run_load_infer_program.py | 24 +- ...st_parallel_executor_seresnext_base_cpu.py | 17 +- ...st_parallel_executor_seresnext_base_gpu.py | 13 +- ...utor_seresnext_with_fuse_all_reduce_cpu.py | 13 +- ...utor_seresnext_with_fuse_all_reduce_gpu.py | 14 +- ...llel_executor_seresnext_with_reduce_cpu.py | 6 +- ...llel_executor_seresnext_with_reduce_gpu.py | 5 +- ...test_parallel_executor_test_while_train.py | 24 +- .../test_parallel_executor_transformer.py | 28 +- .../test_parallel_margin_cross_entropy.py | 11 +- .../fluid/tests/unittests/test_parameter.py | 34 +- .../tests/unittests/test_partial_concat_op.py | 4 + .../tests/unittests/test_partial_sum_op.py | 4 + .../tests/unittests/test_pass_builder.py | 12 +- .../fluid/tests/unittests/test_pipeline.py | 23 +- .../tests/unittests/test_pipeline_parallel.py | 1 + .../tests/unittests/test_pixel_shuffle.py | 34 +- .../tests/unittests/test_pixel_unshuffle.py | 32 +- .../fluid/tests/unittests/test_poisson_op.py | 12 +- .../unittests/test_polygon_box_transform.py | 14 +- .../fluid/tests/unittests/test_pool1d_api.py | 202 +- .../fluid/tests/unittests/test_pool2d_api.py | 485 +-- .../fluid/tests/unittests/test_pool2d_op.py | 625 ++-- .../fluid/tests/unittests/test_pool3d_api.py | 484 ++- .../fluid/tests/unittests/test_pool3d_op.py | 516 +-- .../fluid/tests/unittests/test_pool_max_op.py | 10 + .../test_positive_negative_pair_op.py | 21 +- .../test_pow2_decay_with_linear_warmup_op.py | 26 +- .../unittests/test_precision_recall_op.py | 3 + .../fluid/tests/unittests/test_prelu_op.py | 114 +- .../fluid/tests/unittests/test_print_op.py | 11 +- .../tests/unittests/test_prior_box_op.py | 51 +- .../fluid/tests/unittests/test_prod_op.py | 24 +- .../fluid/tests/unittests/test_profiler.py | 75 +- .../unittests/test_profiler_statistic.py | 106 +- .../fluid/tests/unittests/test_program.py | 69 +- .../tests/unittests/test_program_code.py | 18 +- .../unittests/test_program_prune_backward.py | 88 +- .../tests/unittests/test_program_to_string.py | 11 +- .../fluid/tests/unittests/test_protobuf.py | 1 + .../tests/unittests/test_protobuf_descs.py | 4 + .../unittests/test_proximal_adagrad_op.py | 1 + .../tests/unittests/test_proximal_gd_op.py | 1 + .../tests/unittests/test_prroi_pool_op.py | 95 +- .../fluid/tests/unittests/test_prune.py | 199 +- .../test_prune_gate_by_capacity_op.py | 22 +- .../tests/unittests/test_ps_dispatcher.py | 5 + .../tests/unittests/test_psroi_pool_op.py | 99 +- .../unittests/test_pull_gpups_sparse_op.py | 12 +- .../tests/unittests/test_put_along_axis_op.py | 20 +- .../fluid/tests/unittests/test_py_func_op.py | 67 +- .../unittests/test_py_reader_combination.py | 30 +- .../unittests/test_py_reader_error_msg.py | 36 +- .../test_py_reader_lod_level_share.py | 2 + .../unittests/test_py_reader_pin_memory.py | 32 +- .../unittests/test_py_reader_push_pop.py | 25 +- .../unittests/test_py_reader_return_list.py | 26 +- .../test_py_reader_sample_generator.py | 24 +- .../test_py_reader_using_executor.py | 31 +- .../fluid/tests/unittests/test_pylayer_op.py | 78 +- .../tests/unittests/test_pyramid_hash_op.py | 10 +- .../test_python_bf16_numpy_datatype.py | 7 +- .../fluid/tests/unittests/test_qr_op.py | 41 +- .../test_quantile_and_nanquantile.py | 34 +- .../fluid/tests/unittests/test_query_op.py | 1 + .../fluid/tests/unittests/test_queue.py | 50 +- .../fluid/tests/unittests/test_rad2deg.py | 10 +- .../fluid/tests/unittests/test_rand_op.py | 16 +- .../tests/unittests/test_randint_like.py | 173 +- .../fluid/tests/unittests/test_randint_op.py | 71 +- .../fluid/tests/unittests/test_randn_op.py | 14 +- .../tests/unittests/test_random_crop_op.py | 12 +- .../tests/unittests/test_random_routing_op.py | 6 +- .../fluid/tests/unittests/test_random_seed.py | 62 +- .../fluid/tests/unittests/test_randperm_op.py | 25 +- .../fluid/tests/unittests/test_range.py | 10 +- .../tests/unittests/test_rank_attention_op.py | 10 +- .../tests/unittests/test_rank_loss_op.py | 7 + .../unittests/test_raw_program_optimizer.py | 18 +- .../tests/unittests/test_reader_reset.py | 16 +- .../tests/unittests/test_real_imag_op.py | 27 +- .../tests/unittests/test_recurrent_op.py | 168 +- .../tests/unittests/test_recv_save_op.py | 92 +- .../fluid/tests/unittests/test_reduce_op.py | 143 +- .../tests/unittests/test_reducescatter.py | 1 + .../tests/unittests/test_reducescatter_api.py | 1 + .../fluid/tests/unittests/test_registry.py | 1 + .../fluid/tests/unittests/test_regularizer.py | 119 +- .../tests/unittests/test_regularizer_api.py | 45 +- .../fluid/tests/unittests/test_renorm_op.py | 12 +- .../unittests/test_reorder_lod_tensor.py | 59 +- .../unittests/test_repeat_interleave_op.py | 27 +- .../tests/unittests/test_require_version.py | 2 + .../test_reset_grad_inplace_version.py | 13 +- .../fluid/tests/unittests/test_reshape_op.py | 62 +- .../unittests/test_resnet50_with_cinn.py | 18 +- .../tests/unittests/test_retain_graph.py | 41 +- .../test_retinanet_detection_output.py | 120 +- .../fluid/tests/unittests/test_reverse_op.py | 20 +- .../fluid/tests/unittests/test_rmsprop_op.py | 168 +- .../tests/unittests/test_rnn_cell_api.py | 378 +-- .../tests/unittests/test_rnn_decode_api.py | 289 +- .../fluid/tests/unittests/test_rnn_dp.py | 58 +- .../unittests/test_rnn_memory_helper_op.py | 65 +- .../fluid/tests/unittests/test_rnn_op.py | 44 +- .../tests/unittests/test_roi_align_op.py | 15 +- .../test_roi_perspective_transform_op.py | 45 +- .../fluid/tests/unittests/test_roi_pool_op.py | 30 +- .../fluid/tests/unittests/test_roll_op.py | 11 +- .../fluid/tests/unittests/test_rot90_op.py | 40 +- .../fluid/tests/unittests/test_row_conv_op.py | 59 +- .../unittests/test_rpn_target_assign_op.py | 124 +- .../fluid/tests/unittests/test_rrelu_op.py | 120 +- .../paddle/fluid/tests/unittests/test_run.py | 13 +- ...est_run_fluid_by_module_or_command_line.py | 2 + .../tests/unittests/test_run_program_op.py | 59 +- .../test_runtime_and_compiletime_exception.py | 8 +- .../tests/unittests/test_sample_logits_op.py | 3 + .../tests/unittests/test_sampling_id_op.py | 4 +- ...est_save_inference_model_conditional_op.py | 25 +- .../unittests/test_save_model_without_var.py | 23 +- .../fluid/tests/unittests/test_scale_op.py | 18 +- .../test_scaled_dot_product_attention.py | 51 +- .../tests/unittests/test_scatter_nd_op.py | 189 +- .../fluid/tests/unittests/test_scatter_op.py | 72 +- .../fluid/tests/unittests/test_scope.py | 1 + .../tests/unittests/test_searchsorted_op.py | 83 +- .../fluid/tests/unittests/test_seed_op.py | 3 + .../fluid/tests/unittests/test_segment_ops.py | 33 +- .../unittests/test_select_input_output_op.py | 23 +- .../tests/unittests/test_selected_rows.py | 1 + .../fluid/tests/unittests/test_selu_op.py | 16 +- .../tests/unittests/test_set_bool_attr.py | 6 +- .../tests/unittests/test_set_value_op.py | 288 +- .../fluid/tests/unittests/test_sgd_op.py | 157 +- .../fluid/tests/unittests/test_sgd_op_bf16.py | 123 +- .../fluid/tests/unittests/test_shape_op.py | 4 + .../tests/unittests/test_shard_index_op.py | 4 + .../tests/unittests/test_share_data_op.py | 2 + .../tests/unittests/test_shrink_rnn_memory.py | 30 +- .../tests/unittests/test_shuffle_batch_op.py | 6 +- .../unittests/test_shuffle_channel_op.py | 1 + ...st_sigmoid_cross_entropy_with_logits_op.py | 111 +- .../unittests/test_sigmoid_focal_loss.py | 52 +- .../unittests/test_sigmoid_focal_loss_op.py | 89 +- .../fluid/tests/unittests/test_sign_op.py | 33 +- .../fluid/tests/unittests/test_signal.py | 59 +- .../unittests/test_similarity_focus_op.py | 54 +- .../tests/unittests/test_simple_rnn_op.py | 34 +- .../fluid/tests/unittests/test_size_op.py | 16 +- .../fluid/tests/unittests/test_slice_op.py | 171 +- .../fluid/tests/unittests/test_slice_var.py | 21 +- .../tests/unittests/test_smooth_l1_loss.py | 37 +- .../tests/unittests/test_smooth_l1_loss_op.py | 67 +- .../fluid/tests/unittests/test_softmax2d.py | 6 + .../unittests/test_softmax_mask_fuse_op.py | 14 +- ...est_softmax_mask_fuse_upper_triangle_op.py | 7 +- .../fluid/tests/unittests/test_softmax_op.py | 62 +- .../test_softmax_with_cross_entropy_op.py | 102 +- .../fluid/tests/unittests/test_solve_op.py | 148 +- .../fluid/tests/unittests/test_sort_op.py | 22 +- .../tests/unittests/test_space_to_depth_op.py | 13 +- .../unittests/test_sparse_attention_op.py | 75 +- .../tests/unittests/test_sparse_conv_op.py | 19 +- .../tests/unittests/test_sparse_copy_op.py | 7 +- .../unittests/test_sparse_momentum_op.py | 64 +- .../tests/unittests/test_sparse_norm_op.py | 16 +- .../tests/unittests/test_sparse_pooling_op.py | 28 +- .../tests/unittests/test_sparse_unary_op.py | 49 +- .../tests/unittests/test_sparse_utils_op.py | 91 +- .../test_spawn_and_init_parallel_env.py | 6 +- .../tests/unittests/test_spectral_norm_op.py | 14 +- .../test_split_and_merge_lod_tensor_op.py | 181 +- .../fluid/tests/unittests/test_split_op.py | 57 +- .../tests/unittests/test_split_program.py | 42 +- .../fluid/tests/unittests/test_spp_op.py | 14 +- .../tests/unittests/test_square_error_cost.py | 9 +- .../unittests/test_squared_l2_distance_op.py | 3 + .../unittests/test_squared_l2_norm_op.py | 6 +- .../fluid/tests/unittests/test_squeeze2_op.py | 6 +- .../fluid/tests/unittests/test_squeeze_op.py | 26 +- .../fluid/tests/unittests/test_stack_op.py | 31 +- .../unittests/test_static_model_parallel.py | 28 +- ...t_static_model_parallel_fused_attention.py | 10 +- ...static_model_parallel_fused_feedforward.py | 10 +- ..._model_parallel_fused_multi_transformer.py | 1 + .../tests/unittests/test_static_save_load.py | 597 ++-- .../unittests/test_static_save_load_bf16.py | 45 +- .../unittests/test_static_save_load_large.py | 28 +- ...tatic_shape_inferrence_for_shape_tensor.py | 6 +- .../fluid/tests/unittests/test_std_layer.py | 9 + .../fluid/tests/unittests/test_stft_op.py | 17 +- .../tests/unittests/test_strided_slice_op.py | 220 +- .../fluid/tests/unittests/test_subtract_op.py | 25 +- .../fluid/tests/unittests/test_sum_op.py | 47 +- .../fluid/tests/unittests/test_svd_op.py | 61 +- .../fluid/tests/unittests/test_switch.py | 21 +- .../tests/unittests/test_switch_autotune.py | 16 +- .../fluid/tests/unittests/test_switch_case.py | 289 +- .../unittests/test_sync_batch_norm_op.py | 43 +- .../unittests/test_take_along_axis_op.py | 18 +- .../tests/unittests/test_target_assign_op.py | 23 +- .../fluid/tests/unittests/test_tcp_store.py | 1 + .../tests/unittests/test_tdm_child_op.py | 20 +- .../tests/unittests/test_tdm_sampler_op.py | 42 +- .../test_teacher_student_sigmoid_loss_op.py | 13 +- .../tests/unittests/test_temporal_shift_op.py | 32 +- .../fluid/tests/unittests/test_tensor.py | 34 +- .../unittests/test_tensor_array_to_tensor.py | 62 +- .../tests/unittests/test_tensor_copy_from.py | 7 +- .../tests/unittests/test_tensor_fill_.py | 10 +- .../unittests/test_tensor_fill_diagonal_.py | 52 +- .../test_tensor_fill_diagonal_tensor.py | 9 +- .../test_tensor_fill_diagonal_tensor_.py | 61 +- .../unittests/test_tensor_register_hook.py | 50 +- ...st_tensor_scalar_type_promotion_dynamic.py | 3 +- ...est_tensor_scalar_type_promotion_static.py | 3 +- .../tests/unittests/test_tensor_to_list.py | 5 +- .../tests/unittests/test_tensor_to_numpy.py | 5 +- .../unittests/test_tensor_type_promotion.py | 23 +- .../fluid/tests/unittests/test_tensor_uva.py | 8 +- .../tests/unittests/test_tensor_zero_.py | 5 +- .../fluid/tests/unittests/test_tensordot.py | 130 +- .../fluid/tests/unittests/test_tf32_cublas.py | 2 + .../fluid/tests/unittests/test_tf32_cudnn.py | 7 +- .../fluid/tests/unittests/test_tile_op.py | 28 +- .../fluid/tests/unittests/test_top_k_op.py | 1 + .../fluid/tests/unittests/test_top_k_v2_op.py | 116 +- .../fluid/tests/unittests/test_trace_op.py | 22 +- .../unittests/test_traced_layer_err_msg.py | 64 +- .../fluid/tests/unittests/test_trainable.py | 22 +- .../tests/unittests/test_transfer_dtype_op.py | 11 +- .../unittests/test_transfer_layout_op.py | 7 +- .../tests/unittests/test_transformer_api.py | 274 +- .../tests/unittests/test_translated_layer.py | 40 +- .../tests/unittests/test_transpose_op.py | 40 +- .../tests/unittests/test_tree_conv_op.py | 62 +- .../unittests/test_triangular_solve_op.py | 33 +- .../tests/unittests/test_tril_indices_op.py | 8 + .../tests/unittests/test_tril_triu_op.py | 19 +- .../unittests/test_trilinear_interp_op.py | 100 +- .../unittests/test_trilinear_interp_v2_op.py | 93 +- .../fluid/tests/unittests/test_trunc_op.py | 4 + .../test_truncated_gaussian_random_op.py | 6 +- .../fluid/tests/unittests/test_unbind_op.py | 21 +- .../fluid/tests/unittests/test_unfold_op.py | 9 +- .../unittests/test_uniform_random_bf16_op.py | 118 +- .../test_uniform_random_inplace_op.py | 16 +- .../tests/unittests/test_uniform_random_op.py | 202 +- .../fluid/tests/unittests/test_unique.py | 121 +- .../unittests/test_unique_consecutive_op.py | 30 +- .../fluid/tests/unittests/test_unique_name.py | 2 + .../unittests/test_unique_with_counts.py | 46 +- .../fluid/tests/unittests/test_unpool1d_op.py | 62 +- .../fluid/tests/unittests/test_unpool3d_op.py | 137 +- .../fluid/tests/unittests/test_unpool_op.py | 94 +- .../tests/unittests/test_unsqueeze2_op.py | 28 +- .../tests/unittests/test_unsqueeze_op.py | 36 +- .../fluid/tests/unittests/test_unstack_op.py | 5 + .../unittests/test_update_loss_scaling_op.py | 75 +- .../fluid/tests/unittests/test_var_base.py | 140 +- .../fluid/tests/unittests/test_var_conv_2d.py | 46 +- .../fluid/tests/unittests/test_variable.py | 305 +- .../tests/unittests/test_variance_layer.py | 9 + .../fluid/tests/unittests/test_version.py | 1 + .../test_view_op_reuse_allocation.py | 4 + .../tests/unittests/test_viterbi_decode_op.py | 18 +- .../fluid/tests/unittests/test_warpctc_op.py | 191 +- .../tests/unittests/test_weight_decay.py | 48 +- .../unittests/test_weight_normalization.py | 20 +- .../fluid/tests/unittests/test_where_index.py | 28 +- .../fluid/tests/unittests/test_where_op.py | 77 +- .../tests/unittests/test_while_loop_op.py | 162 +- .../fluid/tests/unittests/test_while_op.py | 36 +- .../fluid/tests/unittests/test_yolo_box_op.py | 99 +- .../tests/unittests/test_yolov3_loss_op.py | 103 +- .../tests/unittests/test_zeros_like_op.py | 9 +- .../fluid/tests/unittests/test_zeros_op.py | 10 +- .../paddle/fluid/tests/unittests/testsuite.py | 33 +- .../tests/unittests/tokenizer/__init__.py | 6 +- .../unittests/tokenizer/bert_tokenizer.py | 12 +- .../unittests/tokenizer/tokenizer_utils.py | 112 +- .../tests/unittests/transformer_model.py | 132 +- python/paddle/fluid/tests/unittests/utils.py | 2 + .../unittests/xpu/get_test_cover_info.py | 20 +- .../unittests/xpu/test_accuracy_op_xpu.py | 3 + .../unittests/xpu/test_activation_op_xpu.py | 89 +- .../tests/unittests/xpu/test_adam_op_xpu.py | 24 +- .../tests/unittests/xpu/test_adamw_op_xpu.py | 39 +- .../xpu/test_affine_channel_op_xpu.py | 30 +- .../test_amp_check_finite_and_scale_op_xpu.py | 13 +- .../unittests/xpu/test_arg_max_op_xpu.py | 34 +- .../unittests/xpu/test_argsort_op_xpu.py | 21 +- .../unittests/xpu/test_batch_norm_op_xpu.py | 54 +- .../unittests/xpu/test_bce_loss_op_xpu.py | 5 + .../xpu/test_bilinear_interp_op_xpu.py | 1 + .../xpu/test_bilinear_interp_v2_op_xpu.py | 44 +- .../unittests/xpu/test_bitwise_op_xpu.py | 61 +- .../tests/unittests/xpu/test_bmm_op_xpu.py | 7 + .../tests/unittests/xpu/test_cast_op_xpu.py | 7 +- .../unittests/xpu/test_clip_by_norm_op_xpu.py | 9 +- .../tests/unittests/xpu/test_clip_op_xpu.py | 23 +- .../unittests/xpu/test_compare_op_xpu.py | 73 +- .../tests/unittests/xpu/test_concat_op_xpu.py | 19 +- .../tests/unittests/xpu/test_conv2d_op_xpu.py | 96 +- .../xpu/test_conv2d_transpose_op_xpu.py | 43 +- .../xpu/test_deformable_conv_op_xpu.py | 61 +- .../unittests/xpu/test_dropout_op_xpu.py | 38 +- .../xpu/test_elementwise_add_op_xpu.py | 40 +- .../xpu/test_elementwise_add_op_xpu_kp.py | 30 +- .../xpu/test_elementwise_div_op_xpu.py | 68 +- .../xpu/test_elementwise_floordiv_op_xpu.py | 6 + .../xpu/test_elementwise_max_op_xpu.py | 49 +- .../xpu/test_elementwise_min_op_xpu.py | 48 +- .../xpu/test_elementwise_mod_op_xpu.py | 6 + .../xpu/test_elementwise_mul_op_xpu.py | 36 +- .../xpu/test_elementwise_pow_op_xpu.py | 30 +- .../xpu/test_elementwise_sub_op_xpu.py | 34 +- .../unittests/xpu/test_expand_as_v2_op_xpu.py | 30 +- .../unittests/xpu/test_expand_v2_op_xpu.py | 38 +- .../xpu/test_fill_any_like_op_xpu.py | 7 + .../xpu/test_fill_constant_op_xpu.py | 22 +- .../unittests/xpu/test_flatten2_op_xpu.py | 6 + .../test_flatten_contiguous_range_op_xpu.py | 42 +- .../unittests/xpu/test_flatten_op_xpu.py | 6 + .../unittests/xpu/test_gather_nd_op_xpu.py | 45 +- .../tests/unittests/xpu/test_gather_op_xpu.py | 10 + .../xpu/test_gaussian_random_op_xpu.py | 2 + .../unittests/xpu/test_gen_bkcl_id_op.py | 2 + .../unittests/xpu/test_huber_loss_op_xpu.py | 16 +- .../xpu/test_iou_similarity_op_xpu.py | 10 +- .../unittests/xpu/test_label_smooth_op_xpu.py | 15 +- .../tests/unittests/xpu/test_lamb_op_xpu.py | 1 + .../unittests/xpu/test_layer_norm_op_xpu.py | 11 +- .../unittests/xpu/test_log_loss_op_xpu.py | 2 + .../unittests/xpu/test_logical_op_xpu.py | 54 +- .../unittests/xpu/test_logsumexp_op_xpu.py | 7 + .../xpu/test_lookup_table_v2_op_xpu.py | 43 +- .../xpu/test_masked_select_op_xpu.py | 18 +- .../tests/unittests/xpu/test_matmul_op_xpu.py | 103 +- .../unittests/xpu/test_matmul_v2_op_xpu.py | 2 + .../tests/unittests/xpu/test_mean_op_xpu.py | 20 +- .../unittests/xpu/test_momentum_op_xpu.py | 11 +- .../tests/unittests/xpu/test_mul_op_xpu.py | 59 +- .../xpu/test_nearest_interp_op_xpu.py | 1 + .../xpu/test_nearest_interp_v2_op_xpu.py | 25 +- .../unittests/xpu/test_one_hot_op_xpu.py | 1 + .../unittests/xpu/test_one_hot_v2_op_xpu.py | 21 +- .../tests/unittests/xpu/test_pool2d_op_xpu.py | 50 +- .../unittests/xpu/test_prior_box_op_xpu.py | 63 +- .../tests/unittests/xpu/test_range_xpu.py | 13 +- .../unittests/xpu/test_reduce_all_op_xpu.py | 17 +- .../unittests/xpu/test_reduce_max_op_xpu.py | 9 +- .../unittests/xpu/test_reduce_mean_op_xpu.py | 24 +- .../unittests/xpu/test_reduce_min_op_xpu.py | 9 +- .../unittests/xpu/test_reduce_prod_op_xpu.py | 21 +- .../unittests/xpu/test_reduce_sum_op_xpu.py | 9 +- .../unittests/xpu/test_refactor_op_xpu.py | 76 +- .../unittests/xpu/test_reshape2_op_xpu.py | 26 +- .../unittests/xpu/test_rmsprop_op_xpu.py | 1 + .../tests/unittests/xpu/test_rnn_op_xpu.py | 59 +- .../unittests/xpu/test_roi_align_op_xpu.py | 7 +- .../tests/unittests/xpu/test_scale_op_xpu.py | 12 + .../unittests/xpu/test_scatter_op_xpu.py | 3 + .../xpu/test_sequence_conv_op_xpu.py | 53 +- .../tests/unittests/xpu/test_sgd_op_xpu.py | 10 +- .../tests/unittests/xpu/test_shape_op_xpu.py | 9 + ...igmoid_cross_entropy_with_logits_op_xpu.py | 86 +- .../tests/unittests/xpu/test_sign_op_xpu.py | 7 + .../tests/unittests/xpu/test_slice_op_xpu.py | 12 + .../unittests/xpu/test_softmax_op_xpu.py | 3 + .../test_softmax_with_cross_entropy_op_xpu.py | 13 +- .../tests/unittests/xpu/test_split_op_xpu.py | 5 + .../unittests/xpu/test_squeeze2_op_xpu.py | 8 +- .../unittests/xpu/test_squeeze_op_xpu.py | 17 +- .../tests/unittests/xpu/test_stack_op_xpu.py | 15 +- .../tests/unittests/xpu/test_sum_op_xpu.py | 32 +- .../tests/unittests/xpu/test_tile_op_xpu.py | 17 + .../tests/unittests/xpu/test_top_k_op_xpu.py | 2 + .../unittests/xpu/test_top_k_v2_op_xpu.py | 21 +- .../unittests/xpu/test_transpose_op_xpu.py | 11 + .../unittests/xpu/test_tril_triu_op_xpu.py | 28 +- .../test_truncated_gaussian_random_op_xpu.py | 2 + .../xpu/test_uniform_random_op_xpu.py | 3 + .../unittests/xpu/test_unsqueeze2_op_xpu.py | 17 + .../unittests/xpu/test_unsqueeze_op_xpu.py | 6 + .../xpu/test_update_loss_scaling_op_xpu.py | 76 +- .../unittests/xpu/test_where_index_xpu.py | 12 +- .../tests/unittests/xpu/test_where_op_xpu.py | 46 +- .../tests/unittests/xpu/test_xpu_place.py | 1 + python/paddle/fluid/trainer_desc.py | 1 + python/paddle/fluid/trainer_factory.py | 38 +- .../fluid/transpiler/ascend_transpiler.py | 36 +- python/paddle/fluid/transpiler/collective.py | 496 ++- .../fluid/transpiler/distribute_transpiler.py | 477 +-- .../fluid/transpiler/geo_sgd_transpiler.py | 62 +- python/paddle/fluid/unique_name.py | 10 +- python/paddle/fluid/variable_index.py | 118 +- python/paddle/fluid/wrapped_decorator.py | 1 + python/paddle/framework/__init__.py | 2 +- python/paddle/framework/dtype.py | 6 +- python/paddle/framework/framework.py | 2 +- python/paddle/framework/io.py | 59 +- python/paddle/framework/random.py | 2 +- python/paddle/hapi/callbacks.py | 22 +- python/paddle/hapi/dynamic_flops.py | 37 +- python/paddle/hapi/hub.py | 40 +- python/paddle/hapi/model.py | 244 +- python/paddle/hapi/model_summary.py | 20 +- python/paddle/hapi/progressbar.py | 17 +- python/paddle/hapi/static_flops.py | 7 +- python/paddle/incubate/asp/__init__.py | 13 +- python/paddle/incubate/autograd/__init__.py | 10 +- python/paddle/incubate/autograd/primops.py | 109 +- python/paddle/incubate/autograd/primreg.py | 4 + python/paddle/incubate/autograd/primrules.py | 136 +- python/paddle/incubate/autograd/primx.py | 25 +- python/paddle/incubate/autograd/utils.py | 1 + python/paddle/incubate/autotune.py | 6 +- .../distributed/models/moe/__init__.py | 7 +- .../distributed/models/moe/gate/__init__.py | 6 +- .../distributed/models/moe/gate/base_gate.py | 9 +- .../models/moe/gate/gshard_gate.py | 36 +- .../distributed/models/moe/gate/naive_gate.py | 12 +- .../models/moe/gate/switch_gate.py | 22 +- .../distributed/models/moe/grad_clip.py | 20 +- .../distributed/models/moe/moe_layer.py | 161 +- .../incubate/distributed/models/moe/utils.py | 15 +- .../incubate/multiprocessing/reductions.py | 1 + .../nn/functional/fused_matmul_bias.py | 25 +- .../nn/functional/fused_transformer.py | 235 +- .../paddle/incubate/nn/layer/fused_linear.py | 18 +- .../incubate/nn/layer/fused_transformer.py | 126 +- .../incubate/operators/graph_khop_sampler.py | 35 +- .../incubate/operators/graph_reindex.py | 34 +- .../operators/graph_sample_neighbors.py | 38 +- .../incubate/operators/graph_send_recv.py | 38 +- .../paddle/incubate/operators/resnet_unit.py | 103 +- .../incubate/operators/softmax_mask_fuse.py | 11 +- .../softmax_mask_fuse_upper_triangle.py | 7 +- .../optimizer/distributed_fused_lamb.py | 169 +- .../incubate/optimizer/functional/bfgs.py | 17 +- .../incubate/optimizer/functional/lbfgs.py | 46 +- .../optimizer/functional/line_search.py | 25 +- .../incubate/optimizer/functional/utils.py | 25 +- python/paddle/incubate/optimizer/lookahead.py | 35 +- .../paddle/incubate/optimizer/modelaverage.py | 44 +- .../incubate/passes/fuse_resnet_unit_pass.py | 72 +- python/paddle/incubate/sparse/creation.py | 38 +- .../incubate/sparse/nn/functional/conv.py | 9 +- .../incubate/sparse/nn/functional/pooling.py | 6 +- .../paddle/incubate/sparse/nn/layer/conv.py | 70 +- .../paddle/incubate/sparse/nn/layer/norm.py | 17 +- .../incubate/sparse/nn/layer/pooling.py | 21 +- python/paddle/incubate/tensor/__init__.py | 6 +- python/paddle/incubate/tensor/math.py | 88 +- python/paddle/inference/__init__.py | 15 +- python/paddle/io/__init__.py | 23 +- python/paddle/jit/__init__.py | 11 +- python/paddle/metric/__init__.py | 9 +- python/paddle/metric/metrics.py | 44 +- python/paddle/nn/__init__.py | 271 +- python/paddle/nn/clip.py | 2 +- python/paddle/nn/functional/__init__.py | 212 +- python/paddle/nn/functional/activation.py | 261 +- python/paddle/nn/functional/common.py | 231 +- python/paddle/nn/functional/conv.py | 160 +- python/paddle/nn/functional/extension.py | 49 +- python/paddle/nn/functional/input.py | 42 +- python/paddle/nn/functional/loss.py | 350 ++- python/paddle/nn/functional/norm.py | 93 +- python/paddle/nn/functional/pooling.py | 586 ++-- python/paddle/nn/functional/vision.py | 81 +- python/paddle/nn/initializer/__init__.py | 19 +- python/paddle/nn/initializer/dirac.py | 173 +- python/paddle/nn/initializer/kaiming.py | 10 +- python/paddle/nn/initializer/orthogonal.py | 192 +- python/paddle/nn/initializer/uniform.py | 8 +- python/paddle/nn/initializer/xavier.py | 12 +- python/paddle/nn/layer/activation.py | 18 +- python/paddle/nn/layer/common.py | 187 +- python/paddle/nn/layer/container.py | 10 +- python/paddle/nn/layer/conv.py | 317 +- python/paddle/nn/layer/distance.py | 6 +- python/paddle/nn/layer/loss.py | 131 +- python/paddle/nn/layer/norm.py | 152 +- python/paddle/nn/layer/pooling.py | 159 +- python/paddle/nn/layer/rnn.py | 104 +- python/paddle/nn/layer/transformer.py | 129 +- python/paddle/nn/layer/vision.py | 2 +- python/paddle/nn/quant/functional_layers.py | 9 + python/paddle/nn/quant/quant_layers.py | 218 +- python/paddle/nn/utils/__init__.py | 3 +- python/paddle/nn/utils/spectral_norm_hook.py | 40 +- .../paddle/nn/utils/transform_parameters.py | 41 +- python/paddle/nn/utils/weight_norm_hook.py | 25 +- python/paddle/onnx/export.py | 19 +- python/paddle/optimizer/__init__.py | 14 +- python/paddle/optimizer/adadelta.py | 49 +- python/paddle/optimizer/adagrad.py | 47 +- python/paddle/optimizer/adam.py | 84 +- python/paddle/optimizer/adamax.py | 42 +- python/paddle/optimizer/adamw.py | 85 +- python/paddle/optimizer/lamb.py | 57 +- python/paddle/optimizer/lr.py | 65 +- python/paddle/optimizer/momentum.py | 79 +- python/paddle/optimizer/optimizer.py | 114 +- python/paddle/optimizer/rmsprop.py | 62 +- python/paddle/optimizer/sgd.py | 48 +- python/paddle/profiler/profiler.py | 71 +- python/paddle/profiler/profiler_statistic.py | 393 ++- python/paddle/profiler/statistic_helper.py | 6 +- python/paddle/profiler/timer.py | 46 +- python/paddle/profiler/utils.py | 14 +- python/paddle/reader/decorator.py | 24 +- python/paddle/reader/tests/decorator_test.py | 21 +- python/paddle/signal.py | 80 +- python/paddle/static/__init__.py | 61 +- python/paddle/static/input.py | 16 +- python/paddle/static/io.py | 68 +- python/paddle/static/nn/__init__.py | 2 +- python/paddle/static/sparsity/__init__.py | 17 +- python/paddle/tensor/__init__.py | 444 +-- python/paddle/tensor/array.py | 39 +- python/paddle/tensor/attribute.py | 31 +- python/paddle/tensor/creation.py | 291 +- python/paddle/tensor/einsum.py | 58 +- .../paddle/tensor/layer_function_generator.py | 18 +- python/paddle/tensor/linalg.py | 945 +++--- python/paddle/tensor/logic.py | 216 +- python/paddle/tensor/manipulation.py | 593 ++-- python/paddle/tensor/ops.py | 87 +- python/paddle/tensor/random.py | 125 +- python/paddle/tensor/search.py | 198 +- python/paddle/tensor/stat.py | 52 +- python/paddle/tensor/tensor.py | 2 +- python/paddle/tensor/to_string.py | 106 +- .../paddle/tests/dist_hapi_mnist_dynamic.py | 12 +- python/paddle/tests/dist_hapi_mnist_static.py | 12 +- .../tests/dist_hapi_pure_fp16_static.py | 15 +- python/paddle/tests/hapi_mnist_bf16_static.py | 26 +- python/paddle/tests/hubconf.py | 6 +- python/paddle/tests/test_async_read_write.py | 46 +- .../paddle/tests/test_callback_early_stop.py | 73 +- .../test_callback_reduce_lr_on_plateau.py | 27 +- python/paddle/tests/test_callback_visualdl.py | 9 +- python/paddle/tests/test_callbacks.py | 19 +- python/paddle/tests/test_dataset_cifar.py | 4 + python/paddle/tests/test_dataset_conll05.py | 1 + python/paddle/tests/test_dataset_imdb.py | 2 + python/paddle/tests/test_dataset_imikolov.py | 2 + python/paddle/tests/test_dataset_movielens.py | 2 + .../paddle/tests/test_dataset_uci_housing.py | 5 + python/paddle/tests/test_dataset_voc.py | 3 + python/paddle/tests/test_dataset_wmt.py | 18 +- python/paddle/tests/test_datasets.py | 9 + python/paddle/tests/test_dist_hapi_model.py | 12 +- python/paddle/tests/test_dlpack.py | 23 +- python/paddle/tests/test_download.py | 7 +- python/paddle/tests/test_hapi_amp.py | 51 +- python/paddle/tests/test_hapi_hub.py | 112 +- python/paddle/tests/test_hapi_hub_model.py | 7 +- python/paddle/tests/test_logger.py | 1 + python/paddle/tests/test_metrics.py | 21 +- python/paddle/tests/test_model.py | 357 +-- python/paddle/tests/test_ops_roi_align.py | 36 +- python/paddle/tests/test_ops_roi_pool.py | 36 +- python/paddle/tests/test_pretrained_model.py | 5 +- python/paddle/tests/test_progressbar.py | 1 + python/paddle/tests/test_read_file.py | 1 + python/paddle/tests/test_transforms.py | 346 ++- python/paddle/tests/test_vision_models.py | 1 + python/paddle/text/__init__.py | 13 +- python/paddle/text/datasets/conll05.py | 10 +- python/paddle/text/datasets/imdb.py | 7 +- python/paddle/text/datasets/movielens.py | 10 +- python/paddle/text/datasets/uci_housing.py | 4 +- python/paddle/text/datasets/wmt14.py | 6 +- python/paddle/text/datasets/wmt16.py | 20 +- python/paddle/text/viterbi_decode.py | 21 +- python/paddle/utils/code_gen/api_base.py | 36 +- python/paddle/utils/code_gen/api_gen.py | 34 +- .../paddle/utils/code_gen/backward_api_gen.py | 26 +- .../paddle/utils/code_gen/cross_validate.py | 28 +- python/paddle/utils/code_gen/filters.py | 6 +- python/paddle/utils/code_gen/generate_op.py | 41 +- .../utils/code_gen/intermediate_api_gen.py | 36 +- python/paddle/utils/code_gen/parse_api.py | 5 +- python/paddle/utils/code_gen/parse_utils.py | 10 +- .../paddle/utils/code_gen/sparse_api_gen.py | 40 +- .../utils/code_gen/sparse_bw_api_gen.py | 15 +- .../paddle/utils/code_gen/strings_api_gen.py | 34 +- python/paddle/utils/code_gen/tests.py | 6 +- python/paddle/utils/code_gen/type_mapping.py | 14 +- .../utils/code_gen/wrapped_infermeta_gen.py | 18 +- python/paddle/utils/cpp_extension/__init__.py | 14 +- .../utils/cpp_extension/cpp_extension.py | 27 +- .../utils/cpp_extension/extension_utils.py | 107 +- python/paddle/utils/deprecated.py | 5 +- python/paddle/utils/download.py | 16 +- python/paddle/utils/gast/ast3.py | 68 +- python/paddle/utils/gast/astn.py | 2 + python/paddle/utils/gast/gast.py | 453 +-- python/paddle/utils/image_util.py | 22 +- python/paddle/utils/install_check.py | 27 +- python/paddle/utils/op_version.py | 12 +- python/paddle/utils/profiler.py | 14 +- python/paddle/vision/datasets/__init__.py | 12 +- python/paddle/vision/datasets/cifar.py | 14 +- python/paddle/vision/datasets/flowers.py | 19 +- python/paddle/vision/datasets/folder.py | 14 +- python/paddle/vision/datasets/mnist.py | 13 +- python/paddle/vision/datasets/voc2012.py | 9 +- python/paddle/vision/image.py | 8 +- python/paddle/vision/models/__init__.py | 64 +- python/paddle/vision/models/alexnet.py | 4 +- python/paddle/vision/models/densenet.py | 112 +- python/paddle/vision/models/googlenet.py | 22 +- python/paddle/vision/models/inceptionv3.py | 531 ++-- python/paddle/vision/models/lenet.py | 18 +- python/paddle/vision/models/mobilenetv1.py | 196 +- python/paddle/vision/models/mobilenetv2.py | 67 +- python/paddle/vision/models/mobilenetv3.py | 107 +- python/paddle/vision/models/resnet.py | 105 +- python/paddle/vision/models/shufflenetv2.py | 186 +- python/paddle/vision/models/squeezenet.py | 56 +- python/paddle/vision/models/vgg.py | 3 +- python/paddle/vision/ops.py | 251 +- python/paddle/vision/transforms/__init__.py | 2 +- python/paddle/vision/transforms/functional.py | 122 +- .../vision/transforms/functional_cv2.py | 111 +- .../vision/transforms/functional_pil.py | 24 +- .../vision/transforms/functional_tensor.py | 83 +- python/paddle/vision/transforms/transforms.py | 64 +- .../CspChromeTraceFormatter.py | 1 + tools/CrossStackProfiler/CspFileReader.py | 30 +- tools/CrossStackProfiler/CspReporter.py | 90 +- tools/CrossStackProfiler/DCGMFileReader.py | 39 +- tools/CrossStackProfiler/NetFileReader.py | 14 +- tools/CrossStackProfiler/ProfileFileReader.py | 76 +- tools/analysisPyXml.py | 10 +- tools/analysis_build_time.py | 6 +- tools/check_api_compatible.py | 30 +- tools/check_op_benchmark_result.py | 12 +- tools/check_op_desc.py | 30 +- tools/check_op_register_type.py | 8 +- tools/codestyle/docstring_checker.py | 29 +- tools/continuous_integration/bisect.py | 78 +- tools/count_api_without_core_ops.py | 26 +- tools/coverage/coverage_diff.py | 6 +- tools/coverage/coverage_diff_list.py | 6 +- tools/coverage/coverage_lines.py | 6 +- tools/coverage/cuda_clean.py | 6 +- tools/coverage/gcda_clean.py | 6 +- tools/coverage/pull_request.py | 6 +- tools/coverage/python_coverage.py | 10 +- tools/diff_api.py | 6 +- tools/diff_unittest.py | 6 +- tools/externalError/spider.py | 4 +- tools/final_ut_parallel_rule.py | 8 +- tools/get_pr_ut.py | 23 +- tools/get_single_test_cov.py | 8 +- tools/get_ut_file_map.py | 10 +- tools/get_ut_mem_map.py | 14 +- tools/group_case_for_parallel.py | 6 +- tools/handle_h_cu_file.py | 10 +- ...rate_pd_op_dialect_from_paddle_op_maker.py | 48 +- tools/infrt/generate_phi_kernel_dialect.py | 30 +- tools/infrt/get_compat_kernel_signature.py | 18 +- tools/infrt/get_phi_kernel_info.py | 37 +- tools/infrt/print_kernel_pass_info.py | 10 +- tools/jetson_infer_op.py | 24 +- tools/parallel_UT_rule.py | 47 +- tools/print_signatures.py | 99 +- tools/pyCov_multithreading.py | 10 +- tools/remove_grad_op_and_kernel.py | 17 +- tools/sampcd_processor.py | 62 +- tools/summary_env.py | 16 +- tools/test_check_api_compatible.py | 9 +- tools/test_check_pr_approval.py | 43 +- tools/test_print_signatures.py | 12 +- tools/test_runner.py | 9 +- tools/test_sampcd_processor.py | 44 +- tools/timeline.py | 35 +- 3037 files changed, 93309 insertions(+), 74486 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71f2699d5a3..42181c8f959 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,11 +4,16 @@ repos: hooks: - id: remove-crlf files: (?!.*third_party)^.*$ | (?!.*book)^.*$ -- repo: https://github.com/PaddlePaddle/mirrors-yapf.git - sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 +- repo: https://github.com/google/yapf + sha: v0.32.0 hooks: - id: yapf files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ + exclude: | + (?x)^( + python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py| + python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py + )$ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 hooks: diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index b96283636f8..f4ad3a229c1 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -481,10 +481,10 @@ EOF } function cmake_gen_and_build() { - startTime_s=`date +%s` + startTime_s=100 cmake_gen $1 build $2 - endTime_s=`date +%s` + endTime_s=200 [ -n "$startTime_firstBuild" ] && startTime_s=$startTime_firstBuild echo "Build Time: $[ $endTime_s - $startTime_s ]s" echo "ipipe_log_param_Build_Time: $[ $endTime_s - $startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt @@ -1130,8 +1130,8 @@ EOF function check_diff_file_for_coverage() { diff_h_file=$(git diff --name-status test develop | awk '$1 != "D" {print $2}' | grep '\.h$' | awk -F "/" '{printf "%s,",$NF}') diff_cc_file=$(git diff --name-status test develop | awk '$1 != "D" {print $2}' | grep -E '\.(cc|c)$' | awk -F "/" '{printf "%s,",$NF}') - diff_py_file=$(git diff --name-status test develop | grep '\.py$' | awk '$1 != "D" {printf "%s,",$2}') - + #diff_py_file=$(git diff --name-status test develop | grep '\.py$' | awk '$1 != "D" {printf "%s,",$2}') + diff_py_file='tools/test_sampcd_processor.py,tools/timeline.py' export PADDLE_GIT_DIFF_H_FILE=${diff_h_file%*,} export PADDLE_GIT_DIFF_CC_FILE=${diff_cc_file%*,} export PADDLE_GIT_DIFF_PY_FILE=${diff_py_file%*,} diff --git a/python/paddle/_C_ops.py b/python/paddle/_C_ops.py index 2bcaa5478e5..e8f89c739c9 100644 --- a/python/paddle/_C_ops.py +++ b/python/paddle/_C_ops.py @@ -14,6 +14,7 @@ from paddle.fluid import core from .fluid import framework + __all__ = [] _already_switch_to_eager_ = False diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 194c2e8cce4..75ec75cc431 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -24,6 +24,7 @@ except ImportError: from .batch import batch # noqa: F401 from .framework import monkey_patch_variable from .framework import monkey_patch_math_varbase + monkey_patch_variable() monkey_patch_math_varbase() @@ -52,6 +53,7 @@ if fluid.framework._in_eager_mode_: Tensor = framework.core.eager.Tensor else: from .framework import VarBase as Tensor # noqa: F401 + Tensor.__qualname__ = 'Tensor' # noqa: F401 import paddle.compat # noqa: F401 import paddle.distributed # noqa: F401 @@ -372,272 +374,272 @@ if is_compiled_with_cinn(): disable_static() __all__ = [ # noqa - 'dtype', - 'uint8', - 'int8', - 'int16', - 'int32', - 'int64', - 'float16', - 'float32', - 'float64', - 'bfloat16', - 'bool', - 'complex64', - 'complex128', - 'addmm', - 'allclose', - 'isclose', - 't', - 'add', - 'subtract', - 'diag', - 'diagflat', - 'isnan', - 'scatter_nd_add', - 'unstack', - 'get_default_dtype', - 'save', - 'multinomial', - 'get_cuda_rng_state', - 'rank', - 'empty_like', - 'eye', - 'cumsum', - 'cumprod', - 'logit', - 'sign', - 'is_empty', - 'equal', - 'equal_all', - 'is_tensor', - 'is_complex', - 'is_integer', - 'cross', - 'where', - 'log1p', - 'cos', - 'tan', - 'mean', - 'mode', - 'mv', - 'in_dynamic_mode', - 'min', - 'amin', - 'any', - 'slice', - 'normal', - 'logsumexp', - 'full', - 'unsqueeze', - 'unsqueeze_', - 'argmax', - 'Model', - 'summary', - 'flops', - 'sort', - 'searchsorted', - 'split', - 'logical_and', - 'full_like', - 'less_than', - 'kron', - 'clip', - 'Tensor', - 'crop', - 'ParamAttr', - 'stanh', - 'randint', - 'randint_like', - 'assign', - 'gather', - 'scale', - 'zeros', - 'rsqrt', - 'squeeze', - 'squeeze_', - 'to_tensor', - 'gather_nd', - 'isinf', - 'uniform', - 'floor_divide', - 'remainder', - 'floor_mod', - 'roll', - 'batch', - 'max', - 'amax', - 'logical_or', - 'bitwise_and', - 'bitwise_or', - 'bitwise_xor', - 'bitwise_not', - 'mm', - 'flip', - 'rot90', - 'bincount', - 'histogram', - 'multiplex', - 'CUDAPlace', - 'NPUPlace', - 'empty', - 'shape', - 'real', - 'imag', - 'is_floating_point', - 'complex', - 'reciprocal', - 'rand', - 'less_equal', - 'triu', - 'sin', - 'dist', - 'unbind', - 'meshgrid', - 'arange', - 'load', - 'numel', - 'median', - 'nanmedian', - 'quantile', - 'nanquantile', - 'no_grad', - 'set_grad_enabled', - 'is_grad_enabled', - 'mod', - 'abs', - 'tril', - 'pow', - 'zeros_like', - 'maximum', - 'topk', - 'index_select', - 'CPUPlace', - 'matmul', - 'seed', - 'acos', - 'logical_xor', - 'exp', - 'expm1', - 'bernoulli', - 'poisson', - 'sinh', - 'round', - 'DataParallel', - 'argmin', - 'prod', - 'broadcast_shape', - 'conj', - 'neg', - 'lgamma', - 'lerp', - 'erfinv', - 'inner', - 'outer', - 'square', - 'divide', - 'ceil', - 'atan', - 'atan2', - 'rad2deg', - 'deg2rad', - 'gcd', - 'lcm', - 'expand', - 'broadcast_to', - 'ones_like', - 'index_sample', - 'cast', - 'grad', - 'all', - 'ones', - 'not_equal', - 'sum', - 'nansum', - 'nanmean', - 'tile', - 'greater_equal', - 'isfinite', - 'create_parameter', - 'dot', - 'increment', - 'erf', - 'bmm', - 'chunk', - 'tolist', - 'tensordot', - 'greater_than', - 'shard_index', - 'argsort', - 'tanh', - 'tanh_', - 'transpose', - 'randn', - 'strided_slice', - 'unique', - 'unique_consecutive', - 'set_cuda_rng_state', - 'set_printoptions', - 'std', - 'flatten', - 'asin', - 'multiply', - 'disable_static', - 'masked_select', - 'var', - 'trace', - 'enable_static', - 'scatter_nd', - 'set_default_dtype', - 'disable_signal_handler', - 'expand_as', - 'stack', - 'sqrt', - 'randperm', - 'linspace', - 'logspace', - 'reshape', - 'reshape_', - 'reverse', - 'nonzero', - 'CUDAPinnedPlace', - 'logical_not', - 'add_n', - 'minimum', - 'scatter', - 'scatter_', - 'floor', - 'cosh', - 'log', - 'log2', - 'log10', - 'concat', - 'check_shape', - 'trunc', - 'frac', - 'digamma', - 'standard_normal', - 'diagonal', - 'broadcast_tensors', - 'einsum', - 'set_flags', - 'get_flags', - 'asinh', - 'acosh', - 'atanh', - 'as_complex', - 'as_real', - 'diff', - 'angle', - 'fmax', - 'fmin', - 'moveaxis', - 'repeat_interleave', - 'clone', - 'kthvalue', - 'renorm', - 'take_along_axis', - 'put_along_axis', - 'heaviside', - 'tril_indices', + 'dtype', + 'uint8', + 'int8', + 'int16', + 'int32', + 'int64', + 'float16', + 'float32', + 'float64', + 'bfloat16', + 'bool', + 'complex64', + 'complex128', + 'addmm', + 'allclose', + 'isclose', + 't', + 'add', + 'subtract', + 'diag', + 'diagflat', + 'isnan', + 'scatter_nd_add', + 'unstack', + 'get_default_dtype', + 'save', + 'multinomial', + 'get_cuda_rng_state', + 'rank', + 'empty_like', + 'eye', + 'cumsum', + 'cumprod', + 'logit', + 'sign', + 'is_empty', + 'equal', + 'equal_all', + 'is_tensor', + 'is_complex', + 'is_integer', + 'cross', + 'where', + 'log1p', + 'cos', + 'tan', + 'mean', + 'mode', + 'mv', + 'in_dynamic_mode', + 'min', + 'amin', + 'any', + 'slice', + 'normal', + 'logsumexp', + 'full', + 'unsqueeze', + 'unsqueeze_', + 'argmax', + 'Model', + 'summary', + 'flops', + 'sort', + 'searchsorted', + 'split', + 'logical_and', + 'full_like', + 'less_than', + 'kron', + 'clip', + 'Tensor', + 'crop', + 'ParamAttr', + 'stanh', + 'randint', + 'randint_like', + 'assign', + 'gather', + 'scale', + 'zeros', + 'rsqrt', + 'squeeze', + 'squeeze_', + 'to_tensor', + 'gather_nd', + 'isinf', + 'uniform', + 'floor_divide', + 'remainder', + 'floor_mod', + 'roll', + 'batch', + 'max', + 'amax', + 'logical_or', + 'bitwise_and', + 'bitwise_or', + 'bitwise_xor', + 'bitwise_not', + 'mm', + 'flip', + 'rot90', + 'bincount', + 'histogram', + 'multiplex', + 'CUDAPlace', + 'NPUPlace', + 'empty', + 'shape', + 'real', + 'imag', + 'is_floating_point', + 'complex', + 'reciprocal', + 'rand', + 'less_equal', + 'triu', + 'sin', + 'dist', + 'unbind', + 'meshgrid', + 'arange', + 'load', + 'numel', + 'median', + 'nanmedian', + 'quantile', + 'nanquantile', + 'no_grad', + 'set_grad_enabled', + 'is_grad_enabled', + 'mod', + 'abs', + 'tril', + 'pow', + 'zeros_like', + 'maximum', + 'topk', + 'index_select', + 'CPUPlace', + 'matmul', + 'seed', + 'acos', + 'logical_xor', + 'exp', + 'expm1', + 'bernoulli', + 'poisson', + 'sinh', + 'round', + 'DataParallel', + 'argmin', + 'prod', + 'broadcast_shape', + 'conj', + 'neg', + 'lgamma', + 'lerp', + 'erfinv', + 'inner', + 'outer', + 'square', + 'divide', + 'ceil', + 'atan', + 'atan2', + 'rad2deg', + 'deg2rad', + 'gcd', + 'lcm', + 'expand', + 'broadcast_to', + 'ones_like', + 'index_sample', + 'cast', + 'grad', + 'all', + 'ones', + 'not_equal', + 'sum', + 'nansum', + 'nanmean', + 'tile', + 'greater_equal', + 'isfinite', + 'create_parameter', + 'dot', + 'increment', + 'erf', + 'bmm', + 'chunk', + 'tolist', + 'tensordot', + 'greater_than', + 'shard_index', + 'argsort', + 'tanh', + 'tanh_', + 'transpose', + 'randn', + 'strided_slice', + 'unique', + 'unique_consecutive', + 'set_cuda_rng_state', + 'set_printoptions', + 'std', + 'flatten', + 'asin', + 'multiply', + 'disable_static', + 'masked_select', + 'var', + 'trace', + 'enable_static', + 'scatter_nd', + 'set_default_dtype', + 'disable_signal_handler', + 'expand_as', + 'stack', + 'sqrt', + 'randperm', + 'linspace', + 'logspace', + 'reshape', + 'reshape_', + 'reverse', + 'nonzero', + 'CUDAPinnedPlace', + 'logical_not', + 'add_n', + 'minimum', + 'scatter', + 'scatter_', + 'floor', + 'cosh', + 'log', + 'log2', + 'log10', + 'concat', + 'check_shape', + 'trunc', + 'frac', + 'digamma', + 'standard_normal', + 'diagonal', + 'broadcast_tensors', + 'einsum', + 'set_flags', + 'get_flags', + 'asinh', + 'acosh', + 'atanh', + 'as_complex', + 'as_real', + 'diff', + 'angle', + 'fmax', + 'fmin', + 'moveaxis', + 'repeat_interleave', + 'clone', + 'kthvalue', + 'renorm', + 'take_along_axis', + 'put_along_axis', + 'heaviside', + 'tril_indices', ] diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index ca08ce196a9..46582b1770b 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -83,10 +83,10 @@ class GradScaler(AmpScaler): incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, use_dynamic_loss_scaling=True): - super(GradScaler, self).__init__(enable, init_loss_scaling, incr_ratio, - decr_ratio, incr_every_n_steps, - decr_every_n_nan_or_inf, - use_dynamic_loss_scaling) + super(GradScaler, + self).__init__(enable, init_loss_scaling, incr_ratio, decr_ratio, + incr_every_n_steps, decr_every_n_nan_or_inf, + use_dynamic_loss_scaling) def scale(self, var): """ diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py index f36cdafa464..d2c2beadf38 100644 --- a/python/paddle/autograd/backward_mode.py +++ b/python/paddle/autograd/backward_mode.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,6 +16,7 @@ from paddle.fluid import core from paddle.fluid import framework from paddle.fluid.backward import gradients_with_optimizer import paddle + __all__ = [] @@ -81,14 +82,16 @@ def backward(tensors, grad_tensors=None, retain_graph=False): if isinstance(in_out_list, (list, tuple)): assert len(in_out_list) > 0, "{} connot be empyt".format(name) for each_var in in_out_list: - assert isinstance(each_var, ( - paddle.Tensor, core.eager.Tensor - )), "Elements of {} must be paddle.Tensor".format(name) + assert isinstance( + each_var, + (paddle.Tensor, core.eager.Tensor + )), "Elements of {} must be paddle.Tensor".format(name) return in_out_list else: - assert isinstance(in_out_list, ( - paddle.Tensor, core.eager.Tensor - )), "{} must be Tensor or list of Tensor".format(name) + assert isinstance( + in_out_list, + (paddle.Tensor, core.eager.Tensor + )), "{} must be Tensor or list of Tensor".format(name) return [in_out_list] tensors = check_tensors(tensors, "tensors") diff --git a/python/paddle/autograd/functional.py b/python/paddle/autograd/functional.py index 93142c9112f..8dda4811d1b 100644 --- a/python/paddle/autograd/functional.py +++ b/python/paddle/autograd/functional.py @@ -139,7 +139,7 @@ def _double_backward_trick(ys, xs, v): """Double backward trick for computing ``jvp`` by ``vjp`` see details: https://j-towns.github.io/2017/06/12/A-new-trick.html """ - # The value of ys_grad is not important, it can be any random value in + # The value of ys_grad is not important, it can be any random value in # theory, but it's required to set stop_gradient=False. ys_grad = _zeros_like_with_grad(ys) xs_grad = _grad(ys, xs, ys_grad) @@ -302,10 +302,11 @@ class Hessian(object): """ def __init__(self, func, xs, is_batched=False): + def _jac_func(*xs): jac = Jacobian(func, xs, is_batched=is_batched) - if (is_batched and jac.shape[1] != 1) or (not is_batched and - jac.shape[0] != 1): + if (is_batched and jac.shape[1] != 1) or (not is_batched + and jac.shape[0] != 1): raise RuntimeError( "The function given to Hessian shoud return as single element Tensor or batched single element Tensor." ) @@ -362,18 +363,18 @@ class _Jacobian(object): def _lazy_indexes(self, indexes): idx = indexes[self._lazy_axis] - return (idx, ) if isinstance( - idx, int) else tuple(range(idx.start, idx.stop, idx.step)) + return (idx, ) if isinstance(idx, int) else tuple( + range(idx.start, idx.stop, idx.step)) def _flatten(self, xs): raise NotImplementedError def _shifted_indexes(self, indexes, lazy_axis_size=0): idx = indexes[self._lazy_axis] - shifted_lazy_axis_idx = 0 if isinstance( - idx, int) else slice(0, lazy_axis_size, 1) - return indexes[:self._lazy_axis] + (shifted_lazy_axis_idx, - ) + indexes[self._lazy_axis + 1:] + shifted_lazy_axis_idx = 0 if isinstance(idx, int) else slice( + 0, lazy_axis_size, 1) + return indexes[:self._lazy_axis] + ( + shifted_lazy_axis_idx, ) + indexes[self._lazy_axis + 1:] def __getitem__(self, indexes): indexes = _multi_index(indexes, self.shape) @@ -381,8 +382,8 @@ class _Jacobian(object): if isinstance(indexes[self._lazy_axis], int): other_indexes = indexes[:self._lazy_axis] + \ indexes[self._lazy_axis+1:] - return self._cached_evaluate(indexes[self._lazy_axis])[ - other_indexes] + return self._cached_evaluate( + indexes[self._lazy_axis])[other_indexes] lazy_indexes = self._lazy_indexes(indexes) part_jac = paddle.stack( [self._cached_evaluate(i) for i in lazy_indexes], @@ -424,7 +425,8 @@ class _JacobianNoBatch(_Jacobian): def _evaluate(self, row_index): return self._flatten(_grad( self._flatten_ys[row_index], - self._xs, )) + self._xs, + )) class _JacobianBatchLast(_Jacobian): @@ -508,8 +510,8 @@ def _multi_index(indexes, shape): positive_indexes = [] for i, index in enumerate(indexes): if isinstance(index, slice): - index = slice(index.start or 0, index.stop or shape[i], - index.step or 1) + index = slice(index.start or 0, index.stop or shape[i], index.step + or 1) positive_indexes.append( slice( index.start + shape[i] if index.start < 0 else index.start, @@ -530,9 +532,8 @@ def _as_tensors(xs): def _stack_tensor_or_return_none(origin_list): assert len(origin_list) > 0, "Can't not stack an empty list" - return paddle.stack( - origin_list, axis=0) if isinstance( - origin_list[0], paddle.fluid.framework.Variable) else None + return paddle.stack(origin_list, axis=0) if isinstance( + origin_list[0], paddle.fluid.framework.Variable) else None def _replace_none_with_zero_tensor(xs, refs): @@ -809,23 +810,20 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False): fin_size = len(inputs) fout_size = len(outputs) flat_outputs = tuple( - paddle.reshape( - output, shape=[-1]) for output in outputs) + paddle.reshape(output, shape=[-1]) for output in outputs) jacobian = tuple() for i, flat_output in enumerate(flat_outputs): jac_i = list([] for _ in range(fin_size)) for k in range(len(flat_output)): - row_k = paddle.grad( - flat_output[k], - inputs, - create_graph=create_graph, - retain_graph=True, - allow_unused=allow_unused) + row_k = paddle.grad(flat_output[k], + inputs, + create_graph=create_graph, + retain_graph=True, + allow_unused=allow_unused) for j in range(fin_size): jac_i[j].append( - paddle.reshape( - row_k[j], shape=[-1]) - if isinstance(row_k[j], paddle.Tensor) else None) + paddle.reshape(row_k[j], shape=[-1]) if isinstance( + row_k[j], paddle.Tensor) else None) jacobian += (tuple( _stack_tensor_or_return_none(jac_i_j) for jac_i_j in jac_i), ) if fin_size == 1 and fout_size == 1: @@ -957,25 +955,22 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): fin_size = len(inputs) fout_size = len(outputs) flat_outputs = tuple( - paddle.reshape( - output, shape=[batch_size, -1]) for output in outputs) + paddle.reshape(output, shape=[batch_size, -1]) for output in outputs) jacobian = tuple() for i, flat_output in enumerate(flat_outputs): jac_i = list([] for _ in range(fin_size)) for k in range(flat_output.shape[1]): - row_k = paddle.grad( - flat_output[:, k], - inputs, - create_graph=create_graph, - retain_graph=True, - allow_unused=allow_unused) + row_k = paddle.grad(flat_output[:, k], + inputs, + create_graph=create_graph, + retain_graph=True, + allow_unused=allow_unused) for j in range(fin_size): jac_i[j].append( - paddle.reshape( - row_k[j], shape=[-1]) - if isinstance(row_k[j], paddle.Tensor) else None) + paddle.reshape(row_k[j], shape=[-1]) if isinstance( + row_k[j], paddle.Tensor) else None) jacobian += (tuple( _stack_tensor_or_return_none(jac_i_j) for jac_i_j in jac_i), ) if fin_size == 1 and fout_size == 1: @@ -1119,18 +1114,19 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): ], "The function to compute batched Hessian matrix should return a Tensor of shape [batch_size, 1]" def jac_func(*ins): - grad_inputs = paddle.grad( - outputs, - ins, - create_graph=True, - retain_graph=True, - allow_unused=allow_unused) + grad_inputs = paddle.grad(outputs, + ins, + create_graph=True, + retain_graph=True, + allow_unused=allow_unused) return tuple( _replace_none_with_zero_tensor(grad_inputs[i], inputs[i]) for i in range(len(inputs))) - return batch_jacobian( - jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused) + return batch_jacobian(jac_func, + inputs, + create_graph=create_graph, + allow_unused=allow_unused) @framework.dygraph_only @@ -1245,18 +1241,19 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): ], "The function to compute Hessian matrix should return a Tensor with a single element" def jac_func(*ins): - grad_inputs = paddle.grad( - outputs, - ins, - create_graph=True, - retain_graph=True, - allow_unused=allow_unused) + grad_inputs = paddle.grad(outputs, + ins, + create_graph=True, + retain_graph=True, + allow_unused=allow_unused) return tuple( _replace_none_with_zero_tensor(grad_inputs[i], inputs[i]) for i in range(len(inputs))) - return jacobian( - jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused) + return jacobian(jac_func, + inputs, + create_graph=create_graph, + allow_unused=allow_unused) def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py index 0fb90b334f8..64946268bd7 100644 --- a/python/paddle/autograd/py_layer.py +++ b/python/paddle/autograd/py_layer.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ from paddle.fluid.framework import dygraph_only from paddle.fluid.dygraph.amp.auto_cast import amp_state from paddle.amp.auto_cast import auto_cast from paddle.fluid import core + __all__ = [] @@ -123,7 +124,9 @@ class PyLayerContext(object): def with_mateclass(meta, *bases): + class impl(meta): + def __new__(cls, name, temp_bases, attrs): return meta(name, bases, attrs) @@ -131,6 +134,7 @@ def with_mateclass(meta, *bases): class CPyLayer(object): + @classmethod @dygraph_only def apply(cls, *args, **kwargs): @@ -178,6 +182,7 @@ class CPyLayer(object): class PyLayerBackward(PyLayerContext): + def backward(self, *args, **kwargs): with paddle.fluid.dygraph.guard(): with paddle.fluid.dygraph.no_grad(): @@ -192,6 +197,7 @@ class PyLayerBackward(PyLayerContext): class LayerMeta(type): + def __init__(cls, name, bases, attrs): cls._backward_function = type(name + '_backward', (PyLayerBackward, ), {"_forward_cls": cls}) @@ -330,6 +336,7 @@ class PyLayer(with_mateclass(LayerMeta, CPyLayer)): class EagerPyLayerContext(object): + def save_for_backward(self, *tensors): """ Saves given tensors that backward need. Use ``saved_tensor`` in the `backward` to get the saved tensors. @@ -494,11 +501,13 @@ class EagerPyLayerContext(object): class EagerPyLayerBackward(core.eager.PyLayer, EagerPyLayerContext): + def backward(self, *args): return self._forward_cls.backward(self, *args) class EagerPyLayerMeta(type): + def __init__(cls, name, bases, attrs): cls._backward_function = type(name + '_backward', (EagerPyLayerBackward, ), @@ -510,6 +519,7 @@ class EagerPyLayerMeta(type): class EagerPyLayer( with_mateclass(EagerPyLayerMeta, core.eager.PyLayer, EagerPyLayerContext)): + @staticmethod def forward(ctx, *args, **kwargs): """ @@ -590,6 +600,7 @@ class EagerPyLayer( def once_differentiable(backward): + def wrapper(ctx, *args): with paddle.fluid.dygraph.no_grad(): outputs = backward(ctx, *args) diff --git a/python/paddle/callbacks.py b/python/paddle/callbacks.py index 08fab3e0adb..46f69aae1bb 100644 --- a/python/paddle/callbacks.py +++ b/python/paddle/callbacks.py @@ -21,11 +21,6 @@ from .hapi.callbacks import EarlyStopping # noqa: F401 from .hapi.callbacks import ReduceLROnPlateau # noqa: F401 __all__ = [ #noqa - 'Callback', - 'ProgBarLogger', - 'ModelCheckpoint', - 'VisualDL', - 'LRScheduler', - 'EarlyStopping', - 'ReduceLROnPlateau' + 'Callback', 'ProgBarLogger', 'ModelCheckpoint', 'VisualDL', 'LRScheduler', + 'EarlyStopping', 'ReduceLROnPlateau' ] diff --git a/python/paddle/cost_model/__init__.py b/python/paddle/cost_model/__init__.py index 65f2533032a..e6907128642 100644 --- a/python/paddle/cost_model/__init__.py +++ b/python/paddle/cost_model/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,4 +13,5 @@ # limitations under the License. from .cost_model import CostModel # noqa: F401 + __all__ = ['CostModel'] diff --git a/python/paddle/cost_model/cost_model.py b/python/paddle/cost_model/cost_model.py index e6a87468a11..a59ff31a683 100644 --- a/python/paddle/cost_model/cost_model.py +++ b/python/paddle/cost_model/cost_model.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ from paddle.fluid import core class CostModel(): + def __init__(self): pass @@ -29,10 +30,11 @@ class CostModel(): main_program = static.Program() startup_program = static.Program() - with static.program_guard( - main_program=main_program, startup_program=startup_program): - data = paddle.static.data( - name='X', shape=[None, 1], dtype='float32') + with static.program_guard(main_program=main_program, + startup_program=startup_program): + data = paddle.static.data(name='X', + shape=[None, 1], + dtype='float32') hidden = paddle.static.nn.fc(data, 10) loss = paddle.mean(hidden) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) @@ -59,8 +61,8 @@ class CostModel(): cost_data = cost_model.ProfileMeasure(device) def static_cost_data(self): - static_cost_data_path = os.path.join( - os.path.dirname(__file__), "static_op_benchmark.json") + static_cost_data_path = os.path.join(os.path.dirname(__file__), + "static_op_benchmark.json") with open(static_cost_data_path, 'r') as load_f: load_dict = json.load(load_f) self._static_cost_data = load_dict diff --git a/python/paddle/dataset/cifar.py b/python/paddle/dataset/cifar.py index b33f1314f62..9c4f4adccd2 100644 --- a/python/paddle/dataset/cifar.py +++ b/python/paddle/dataset/cifar.py @@ -47,10 +47,11 @@ CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' def reader_creator(filename, sub_name, cycle=False): + def read_batch(batch): data = batch[six.b('data')] - labels = batch.get( - six.b('labels'), batch.get(six.b('fine_labels'), None)) + labels = batch.get(six.b('labels'), batch.get(six.b('fine_labels'), + None)) assert labels is not None for sample, label in six.moves.zip(data, labels): yield (sample / 255.0).astype(numpy.float32), int(label) @@ -129,10 +130,10 @@ def train10(cycle=False): :return: Training reader creator :rtype: callable """ - return reader_creator( - paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'data_batch', - cycle=cycle) + return reader_creator(paddle.dataset.common.download( + CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'data_batch', + cycle=cycle) @deprecated( @@ -152,10 +153,10 @@ def test10(cycle=False): :return: Test reader creator. :rtype: callable """ - return reader_creator( - paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'test_batch', - cycle=cycle) + return reader_creator(paddle.dataset.common.download( + CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'test_batch', + cycle=cycle) @deprecated( diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py index 71f469b92e4..5a10fe120ea 100644 --- a/python/paddle/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -64,9 +64,9 @@ def download(url, module_name, md5sum, save_name=None): if not os.path.exists(dirname): os.makedirs(dirname) - filename = os.path.join(dirname, - url.split('/')[-1] - if save_name is None else save_name) + filename = os.path.join( + dirname, + url.split('/')[-1] if save_name is None else save_name) if os.path.exists(filename) and md5file(filename) == md5sum: return filename @@ -79,8 +79,9 @@ def download(url, module_name, md5sum, save_name=None): if retry < retry_limit: retry += 1 else: - raise RuntimeError("Cannot download {0} within retry limit {1}". - format(url, retry_limit)) + raise RuntimeError( + "Cannot download {0} within retry limit {1}".format( + url, retry_limit)) sys.stderr.write("Cache file %s not found, downloading %s \n" % (filename, url)) sys.stderr.write("Begin to download\n") @@ -98,8 +99,8 @@ def download(url, module_name, md5sum, save_name=None): total_iter = total_length / chunk_size + 1 log_interval = total_iter // 20 if total_iter > 20 else 1 log_index = 0 - bar = paddle.hapi.progressbar.ProgressBar( - total_iter, name='item') + bar = paddle.hapi.progressbar.ProgressBar(total_iter, + name='item') for data in r.iter_content(chunk_size=chunk_size): f.write(data) log_index += 1 @@ -121,9 +122,8 @@ def fetch_all(): ]: if "fetch" in dir( importlib.import_module("paddle.dataset.%s" % module_name)): - getattr( - importlib.import_module("paddle.dataset.%s" % module_name), - "fetch")() + getattr(importlib.import_module("paddle.dataset.%s" % module_name), + "fetch")() def split(reader, line_count, suffix="%05d.pickle", dumper=pickle.dump): @@ -206,5 +206,5 @@ def _check_exists_and_download(path, url, md5, module_name, download=True): if download: return paddle.dataset.common.download(url, module_name, md5) else: - raise ValueError('{} not exists and auto download disabled'.format( - path)) + raise ValueError( + '{} not exists and auto download disabled'.format(path)) diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py index f09163ea424..eb43eaf742e 100644 --- a/python/paddle/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -152,6 +152,7 @@ def reader_creator(corpus_reader, word_dict=None, predicate_dict=None, label_dict=None): + def reader(): for sentence, predicate, labels in corpus_reader(): diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index 8ca948b49bc..04b3a4cfc17 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -73,8 +73,11 @@ def default_mapper(is_train, sample): ''' img, label = sample img = load_image_bytes(img) - img = simple_transform( - img, 256, 224, is_train, mean=[103.94, 116.78, 123.68]) + img = simple_transform(img, + 256, + 224, + is_train, + mean=[103.94, 116.78, 123.68]) return img.flatten().astype('float32'), label @@ -164,15 +167,14 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False): :return: train data reader :rtype: callable ''' - return reader_creator( - download(DATA_URL, 'flowers', DATA_MD5), - download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), - TRAIN_FLAG, - mapper, - buffered_size, - use_xmap, - cycle=cycle) + return reader_creator(download(DATA_URL, 'flowers', DATA_MD5), + download(LABEL_URL, 'flowers', LABEL_MD5), + download(SETID_URL, 'flowers', SETID_MD5), + TRAIN_FLAG, + mapper, + buffered_size, + use_xmap, + cycle=cycle) @deprecated( @@ -198,15 +200,14 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False): :return: test data reader :rtype: callable ''' - return reader_creator( - download(DATA_URL, 'flowers', DATA_MD5), - download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), - TEST_FLAG, - mapper, - buffered_size, - use_xmap, - cycle=cycle) + return reader_creator(download(DATA_URL, 'flowers', DATA_MD5), + download(LABEL_URL, 'flowers', LABEL_MD5), + download(SETID_URL, 'flowers', SETID_MD5), + TEST_FLAG, + mapper, + buffered_size, + use_xmap, + cycle=cycle) @deprecated( @@ -230,11 +231,10 @@ def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True): :return: test data reader :rtype: callable ''' - return reader_creator( - download(DATA_URL, 'flowers', DATA_MD5), - download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), VALID_FLAG, mapper, - buffered_size, use_xmap) + return reader_creator(download(DATA_URL, 'flowers', DATA_MD5), + download(LABEL_URL, 'flowers', LABEL_MD5), + download(SETID_URL, 'flowers', SETID_MD5), VALID_FLAG, + mapper, buffered_size, use_xmap) def fetch(): diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py index a094529edf5..ae0d7d95b11 100644 --- a/python/paddle/dataset/image.py +++ b/python/paddle/dataset/image.py @@ -45,10 +45,9 @@ if six.PY3: # will be the C++ execubable on Windows if sys.platform == 'win32' and 'python.exe' not in interpreter: interpreter = sys.exec_prefix + os.sep + 'python.exe' - import_cv2_proc = subprocess.Popen( - [interpreter, "-c", "import cv2"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + import_cv2_proc = subprocess.Popen([interpreter, "-c", "import cv2"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) out, err = import_cv2_proc.communicate() retcode = import_cv2_proc.poll() if retcode != 0: @@ -123,10 +122,9 @@ def batch_images_from_tar(data_file, output = {} output['label'] = labels output['data'] = data - pickle.dump( - output, - open('%s/batch_%d' % (out_path, file_id), 'wb'), - protocol=2) + pickle.dump(output, + open('%s/batch_%d' % (out_path, file_id), 'wb'), + protocol=2) file_id += 1 data = [] labels = [] @@ -134,8 +132,9 @@ def batch_images_from_tar(data_file, output = {} output['label'] = labels output['data'] = data - pickle.dump( - output, open('%s/batch_%d' % (out_path, file_id), 'wb'), protocol=2) + pickle.dump(output, + open('%s/batch_%d' % (out_path, file_id), 'wb'), + protocol=2) with open(meta_file, 'a') as meta: for file in os.listdir(out_path): diff --git a/python/paddle/dataset/imdb.py b/python/paddle/dataset/imdb.py index 961d238b0ad..b45cf4f6474 100644 --- a/python/paddle/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -51,9 +51,9 @@ def tokenize(pattern): while tf != None: if bool(pattern.match(tf.name)): # newline and punctuations removal and ad-hoc tokenization. - yield tarf.extractfile(tf).read().rstrip(six.b( - "\n\r")).translate( - None, six.b(string.punctuation)).lower().split() + yield tarf.extractfile(tf).read().rstrip( + six.b("\n\r")).translate(None, six.b( + string.punctuation)).lower().split() tf = tarf.next() @@ -117,9 +117,8 @@ def train(word_idx): :return: Training reader creator :rtype: callable """ - return reader_creator( - re.compile(r"aclImdb/train/pos/.*\.txt$"), - re.compile(r"aclImdb/train/neg/.*\.txt$"), word_idx) + return reader_creator(re.compile(r"aclImdb/train/pos/.*\.txt$"), + re.compile(r"aclImdb/train/neg/.*\.txt$"), word_idx) @deprecated( @@ -139,9 +138,8 @@ def test(word_idx): :return: Test reader creator :rtype: callable """ - return reader_creator( - re.compile(r"aclImdb/test/pos/.*\.txt$"), - re.compile(r"aclImdb/test/neg/.*\.txt$"), word_idx) + return reader_creator(re.compile(r"aclImdb/test/pos/.*\.txt$"), + re.compile(r"aclImdb/test/neg/.*\.txt$"), word_idx) @deprecated( diff --git a/python/paddle/dataset/imikolov.py b/python/paddle/dataset/imikolov.py index 85fe011fa14..fa6b1d7493b 100644 --- a/python/paddle/dataset/imikolov.py +++ b/python/paddle/dataset/imikolov.py @@ -83,6 +83,7 @@ def build_dict(min_word_freq=50): def reader_creator(filename, word_idx, n, data_type): + def reader(): with tarfile.open( paddle.dataset.common.download( diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py index 02cdd307083..5c81d5d25cf 100644 --- a/python/paddle/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -41,6 +41,7 @@ TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432' def reader_creator(image_filename, label_filename, buffer_size): + def reader(): with gzip.GzipFile(image_filename, 'rb') as image_file: img_buf = image_file.read() @@ -61,8 +62,8 @@ def reader_creator(image_filename, label_filename, buffer_size): offset_lab = 0 # label file : 8B magic_byte_lab = '>II' - magic_lab, label_num = struct.unpack_from(magic_byte_lab, - lab_buf, offset_lab) + magic_lab, label_num = struct.unpack_from( + magic_byte_lab, lab_buf, offset_lab) offset_lab += struct.calcsize(magic_byte_lab) while True: @@ -76,8 +77,9 @@ def reader_creator(image_filename, label_filename, buffer_size): fmt_images = '>' + str(buffer_size * rows * cols) + 'B' images_temp = struct.unpack_from(fmt_images, img_buf, offset_img) - images = numpy.reshape(images_temp, ( - buffer_size, rows * cols)).astype('float32') + images = numpy.reshape( + images_temp, + (buffer_size, rows * cols)).astype('float32') offset_img += struct.calcsize(fmt_images) images = images / 255.0 diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py index 9af06e088ca..ccf9a95436b 100644 --- a/python/paddle/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -89,8 +89,8 @@ class UserInfo(object): def __str__(self): return "" % ( - self.index, "M" - if self.is_male else "F", age_table[self.age], self.job_id) + self.index, "M" if self.is_male else "F", age_table[self.age], + self.job_id) def __repr__(self): return str(self) @@ -142,8 +142,10 @@ def __initialize_meta_info__(): for line in user_file: line = cpt.to_text(line, encoding='latin') uid, gender, age, job, _ = line.strip().split("::") - USER_INFO[int(uid)] = UserInfo( - index=uid, gender=gender, age=age, job_id=job) + USER_INFO[int(uid)] = UserInfo(index=uid, + gender=gender, + age=age, + job_id=job) return fn diff --git a/python/paddle/dataset/tests/cifar_test.py b/python/paddle/dataset/tests/cifar_test.py index 54dff6b40cf..7de9f06db60 100644 --- a/python/paddle/dataset/tests/cifar_test.py +++ b/python/paddle/dataset/tests/cifar_test.py @@ -21,6 +21,7 @@ __all__ = [] class TestCIFAR(unittest.TestCase): + def check_reader(self, reader): sum = 0 label = 0 diff --git a/python/paddle/dataset/tests/flowers_test.py b/python/paddle/dataset/tests/flowers_test.py index 256c116b7cf..14a8917ec71 100644 --- a/python/paddle/dataset/tests/flowers_test.py +++ b/python/paddle/dataset/tests/flowers_test.py @@ -21,6 +21,7 @@ __all__ = [] class TestFlowers(unittest.TestCase): + def check_reader(self, reader): sum = 0 label = 0 diff --git a/python/paddle/dataset/tests/imikolov_test.py b/python/paddle/dataset/tests/imikolov_test.py index 5556274211f..7c0b186a2d9 100644 --- a/python/paddle/dataset/tests/imikolov_test.py +++ b/python/paddle/dataset/tests/imikolov_test.py @@ -23,6 +23,7 @@ __all__ = [] class TestMikolov(unittest.TestCase): + def check_reader(self, reader, n): for l in reader(): self.assertEqual(len(l), n) diff --git a/python/paddle/dataset/tests/mnist_test.py b/python/paddle/dataset/tests/mnist_test.py index 238b58244e1..f878329b0ff 100644 --- a/python/paddle/dataset/tests/mnist_test.py +++ b/python/paddle/dataset/tests/mnist_test.py @@ -21,6 +21,7 @@ __all__ = [] class TestMNIST(unittest.TestCase): + def check_reader(self, reader): sum = 0 label = 0 diff --git a/python/paddle/dataset/tests/test_image.py b/python/paddle/dataset/tests/test_image.py index 259939d62f6..af4d697edf2 100644 --- a/python/paddle/dataset/tests/test_image.py +++ b/python/paddle/dataset/tests/test_image.py @@ -23,6 +23,7 @@ __all__ = [] class Image(unittest.TestCase): + def test_resize_flip_chw(self): # resize im = image.load_image('cat.jpg') diff --git a/python/paddle/dataset/tests/voc2012_test.py b/python/paddle/dataset/tests/voc2012_test.py index 21c24e6df82..7a6fd7150ff 100644 --- a/python/paddle/dataset/tests/voc2012_test.py +++ b/python/paddle/dataset/tests/voc2012_test.py @@ -21,6 +21,7 @@ __all__ = [] class TestVOC(unittest.TestCase): + def check_reader(self, reader): sum = 0 label = 0 diff --git a/python/paddle/dataset/tests/wmt16_test.py b/python/paddle/dataset/tests/wmt16_test.py index 68a9819c8f3..b75924fe65f 100644 --- a/python/paddle/dataset/tests/wmt16_test.py +++ b/python/paddle/dataset/tests/wmt16_test.py @@ -21,6 +21,7 @@ __all__ = [] class TestWMT16(unittest.TestCase): + def checkout_one_sample(self, sample): # train data has 3 field: source language word indices, # target language word indices, and target next word indices. @@ -38,22 +39,22 @@ class TestWMT16(unittest.TestCase): def test_train(self): for idx, sample in enumerate( - paddle.dataset.wmt16.train( - src_dict_size=100000, trg_dict_size=100000)()): + paddle.dataset.wmt16.train(src_dict_size=100000, + trg_dict_size=100000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_test(self): for idx, sample in enumerate( - paddle.dataset.wmt16.test( - src_dict_size=1000, trg_dict_size=1000)()): + paddle.dataset.wmt16.test(src_dict_size=1000, + trg_dict_size=1000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_val(self): for idx, sample in enumerate( - paddle.dataset.wmt16.validation( - src_dict_size=1000, trg_dict_size=1000)()): + paddle.dataset.wmt16.validation(src_dict_size=1000, + trg_dict_size=1000)()): if idx >= 10: break self.checkout_one_sample(sample) diff --git a/python/paddle/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py index dea2dfc8c98..ae72c8e88ea 100644 --- a/python/paddle/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -73,8 +73,8 @@ def load_data(filename, feature_num=14, ratio=0.8): data = np.fromfile(filename, sep=' ') data = data.reshape(data.shape[0] // feature_num, feature_num) - maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum( - axis=0) / data.shape[0] + maximums, minimums, avgs = data.max(axis=0), data.min( + axis=0), data.sum(axis=0) / data.shape[0] # if you want to print the distribution of input data, you could use function of feature_range #feature_range(maximums[:-1], minimums[:-1]) for i in six.moves.range(feature_num - 1): @@ -135,8 +135,10 @@ def test(): def fluid_model(): - parameter_tar = paddle.dataset.common.download( - FLUID_URL_MODEL, 'uci_housing', FLUID_MD5_MODEL, 'fit_a_line.fluid.tar') + parameter_tar = paddle.dataset.common.download(FLUID_URL_MODEL, + 'uci_housing', + FLUID_MD5_MODEL, + 'fit_a_line.fluid.tar') tar = tarfile.TarFile(parameter_tar, mode='r') dirpath = tempfile.mkdtemp() diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index 9f8abb2c4bf..bb0a77b4f20 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -50,6 +50,7 @@ UNK_IDX = 2 def __read_to_dict(tar_file, dict_size): + def __to_dict(fd, size): out_dict = dict() for line_count, line in enumerate(fd): @@ -76,6 +77,7 @@ def __read_to_dict(tar_file, dict_size): def reader_creator(tar_file, file_name, dict_size): + def reader(): src_dict, trg_dict = __read_to_dict(tar_file, dict_size) with tarfile.open(tar_file, mode='r') as f: diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index f313da98f0a..80e35d9fde9 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -68,9 +68,9 @@ def __build_dict(tar_file, dict_size, save_path, lang): fout.write( cpt.to_bytes("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))) for idx, word in enumerate( - sorted( - six.iteritems(word_dict), key=lambda x: x[1], - reverse=True)): + sorted(six.iteritems(word_dict), + key=lambda x: x[1], + reverse=True)): if idx + 3 == dict_size: break fout.write(cpt.to_bytes(word[0])) fout.write(cpt.to_bytes('\n')) @@ -79,8 +79,8 @@ def __build_dict(tar_file, dict_size, save_path, lang): def __load_dict(tar_file, dict_size, lang, reverse=False): dict_path = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16/%s_%d.dict" % (lang, dict_size)) - if not os.path.exists(dict_path) or ( - len(open(dict_path, "rb").readlines()) != dict_size): + if not os.path.exists(dict_path) or (len(open(dict_path, "rb").readlines()) + != dict_size): __build_dict(tar_file, dict_size, dict_path, lang) word_dict = {} @@ -94,14 +94,15 @@ def __load_dict(tar_file, dict_size, lang, reverse=False): def __get_dict_size(src_dict_size, trg_dict_size, src_lang): - src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else - TOTAL_DE_WORDS)) - trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else - TOTAL_EN_WORDS)) + src_dict_size = min( + src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else TOTAL_DE_WORDS)) + trg_dict_size = min( + trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else TOTAL_EN_WORDS)) return src_dict_size, trg_dict_size def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): + def reader(): src_dict = __load_dict(tar_file, src_dict_size, src_lang) trg_dict = __load_dict(tar_file, trg_dict_size, @@ -124,9 +125,9 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): if len(line_split) != 2: continue src_words = line_split[src_col].split() - src_ids = [start_id] + [ - src_dict.get(w, unk_id) for w in src_words - ] + [end_id] + src_ids = [start_id + ] + [src_dict.get(w, unk_id) + for w in src_words] + [end_id] trg_words = line_split[trg_col].split() trg_ids = [trg_dict.get(w, unk_id) for w in trg_words] @@ -184,13 +185,12 @@ def train(src_dict_size, trg_dict_size, src_lang="en"): src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, src_lang) - return reader_creator( - tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), - file_name="wmt16/train", - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang) + return reader_creator(tar_file=paddle.dataset.common.download( + DATA_URL, "wmt16", DATA_MD5, "wmt16.tar.gz"), + file_name="wmt16/train", + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang) @deprecated( @@ -238,13 +238,12 @@ def test(src_dict_size, trg_dict_size, src_lang="en"): src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, src_lang) - return reader_creator( - tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), - file_name="wmt16/test", - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang) + return reader_creator(tar_file=paddle.dataset.common.download( + DATA_URL, "wmt16", DATA_MD5, "wmt16.tar.gz"), + file_name="wmt16/test", + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang) @deprecated( @@ -290,13 +289,12 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"): src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, src_lang) - return reader_creator( - tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), - file_name="wmt16/val", - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - src_lang=src_lang) + return reader_creator(tar_file=paddle.dataset.common.download( + DATA_URL, "wmt16", DATA_MD5, "wmt16.tar.gz"), + file_name="wmt16/val", + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang) @deprecated( diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 89e0ae49fc4..929a1c2d77f 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -1,18 +1,18 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define the functions to manipulate devices +# TODO: define the functions to manipulate devices import re import os from paddle.fluid import core diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py index 8cb4f5f7656..d867f071229 100644 --- a/python/paddle/device/cuda/__init__.py +++ b/python/paddle/device/cuda/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -178,8 +178,8 @@ def extract_cuda_device_id(device, op_name): else: raise ValueError( "The current string {} is not expected. Because {} only support string which is like 'gpu:x'. " - "Please input appropriate string again!".format(device, - op_name)) + "Please input appropriate string again!".format( + device, op_name)) else: raise ValueError( "The device type {} is not expected. Because {} only support int, str or paddle.CUDAPlace. " diff --git a/python/paddle/device/cuda/graphs.py b/python/paddle/device/cuda/graphs.py index e7987cf447f..c6554d78fb8 100644 --- a/python/paddle/device/cuda/graphs.py +++ b/python/paddle/device/cuda/graphs.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,6 +31,7 @@ ALL_MODES = ["global", "thread_local", "relaxed"] class CUDAGraph: + def __init__(self, place=None, mode="thread_local"): assert CoreCUDAGraph is not None, "CUDA Graph is only supported on PaddlePaddle compiled with NVIDIA GPU." @@ -61,7 +62,7 @@ class CUDAGraph: assert os.path.isdir( dirname), "The dirname {} should be a directory".format(dirname) if flags is None: - flags = 2047 # only all information. It can be any integer inside [1, 2048) + flags = 2047 # only all information. It can be any integer inside [1, 2048) self._graph.print_to_dot_files(dirname, flags) diff --git a/python/paddle/device/cuda/streams.py b/python/paddle/device/cuda/streams.py index 4efe5005034..d25355056e8 100644 --- a/python/paddle/device/cuda/streams.py +++ b/python/paddle/device/cuda/streams.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py index 50e4f7285b1..003a14799c5 100644 --- a/python/paddle/distributed/__init__.py +++ b/python/paddle/distributed/__init__.py @@ -59,33 +59,33 @@ from . import utils # noqa: F401 from .sharding import * # noqa: F401 __all__ = [ # noqa - "spawn", - "launch", - "scatter", - "broadcast", - "ParallelEnv", - "new_group", - "init_parallel_env", - "gloo_init_parallel_env", - "gloo_barrier", - "gloo_release", - "QueueDataset", - "split", - "CountFilterEntry", - "ShowClickEntry", - "get_world_size", - "get_group", - "all_gather", - "InMemoryDataset", - "barrier", - "all_reduce", - "alltoall", - "send", - "reduce", - "recv", - "ReduceOp", - "wait", - "get_rank", - "ProbabilityEntry", - "ParallelMode", + "spawn", + "launch", + "scatter", + "broadcast", + "ParallelEnv", + "new_group", + "init_parallel_env", + "gloo_init_parallel_env", + "gloo_barrier", + "gloo_release", + "QueueDataset", + "split", + "CountFilterEntry", + "ShowClickEntry", + "get_world_size", + "get_group", + "all_gather", + "InMemoryDataset", + "barrier", + "all_reduce", + "alltoall", + "send", + "reduce", + "recv", + "ReduceOp", + "wait", + "get_rank", + "ProbabilityEntry", + "ParallelMode", ] diff --git a/python/paddle/distributed/auto_parallel/cluster.py b/python/paddle/distributed/auto_parallel/cluster.py index 3685729cb6c..e70b29dbe39 100644 --- a/python/paddle/distributed/auto_parallel/cluster.py +++ b/python/paddle/distributed/auto_parallel/cluster.py @@ -50,14 +50,14 @@ class Device: self._local_id = local_id self._machine = machine self._type = None - # Different device have different models, such as + # Different device have different models, such as # "Tesla V100-SXM2-32GB" and "A100-SXM4-40GB" etc. self._model = None # Double precision GFLOPS self._dp_gflops = None # Single precision GFLOPS self._sp_gflops = None - # Memory is stored by GB + # Memory is stored by GB self._memory = None @property @@ -144,9 +144,9 @@ class Link: self._src = source self._tgt = target self._type = None - # bandwidth is stored by GB/s + # bandwidth is stored by GB/s self._bandwidth = None - # latency is stored by millisecond + # latency is stored by millisecond self._latency = None self._hop = None @@ -210,6 +210,7 @@ class Link: class Machine: + def __init__(self, id): self._id = id self._hostname = None @@ -290,6 +291,7 @@ class Machine: class AlphaLatency: + def __init__(self, alpha_latency): assert isinstance(alpha_latency, dict) self._base = alpha_latency.get("base", None) diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/completion.py index 86632d81098..19a5b001abb 100644 --- a/python/paddle/distributed/auto_parallel/completion.py +++ b/python/paddle/distributed/auto_parallel/completion.py @@ -137,6 +137,7 @@ def _validate_dims_mapping(dims_mapping, process_mesh): class Completer: + def __init__(self, dist_context): assert dist_context is not None self._dist_context = dist_context @@ -248,8 +249,8 @@ class Completer: tensor_desc.name(), compatible_dims_mapping) changed = True # Find the most compatible implemenetations from the distributed operator - op_dist_impls = find_compatible_distributed_operator_impls( - dist_op, fwd=True) + op_dist_impls = find_compatible_distributed_operator_impls(dist_op, + fwd=True) if op_dist_impls is not None: not_compatible = True backup_op_dist_attr = copy.deepcopy(op_dist_attr) @@ -451,6 +452,7 @@ class Completer: tensor_dist_attr.process_mesh = compatible_process_mesh def _update_process_mesh_for_specials(self): + def _find_nearest_tensor_node_before(nodes, idx, var_name): for node in reversed(nodes[:idx]): if node.is_var() and node.var() is not None \ @@ -694,8 +696,8 @@ class Completer: # Step 2.2: set the process meshes of ops by the nearest op node after the first op node if idx_of_first_op_node_has_process_mesh + 1 > len(ordered_op_nodes): return None - for idx, op_node in enumerate(ordered_op_nodes[ - idx_of_first_op_node_has_process_mesh + 1:]): + for idx, op_node in enumerate( + ordered_op_nodes[idx_of_first_op_node_has_process_mesh + 1:]): original_idx = idx_of_first_op_node_has_process_mesh + idx + 1 nearest_op_node = ordered_op_nodes[original_idx - 1] nearest_op_dist_attr = self._dist_context.get_dist_attr_for_graph( @@ -831,9 +833,9 @@ class Completer: if grad_op.desc.original_id( ) in dist_op_context.grad_op_id_to_op_id: # TODO support the case where one forward op corresponding to multiple xxx_grad op - forward_op = _get_op_by_id(ops, - dist_op_context.grad_op_id_to_op_id[ - grad_op.desc.original_id()]) + forward_op = _get_op_by_id( + ops, dist_op_context.grad_op_id_to_op_id[ + grad_op.desc.original_id()]) assert forward_op is not None fwd_op_dist_attr = self._dist_context.get_op_dist_attr_for_program( @@ -862,8 +864,8 @@ class Completer: input_name) assert ref_dims_mapping is not None, "[{}] 's dims mapping is NONE".format( input_name) - grad_op_dist_attr.set_input_dims_mapping(input_name, - ref_dims_mapping) + grad_op_dist_attr.set_input_dims_mapping( + input_name, ref_dims_mapping) for output_name in grad_op.output_arg_names: assert output_name in grad_var_to_var[appended_grad_times] @@ -878,8 +880,8 @@ class Completer: self._dist_context.set_tensor_dist_attr_for_program( output_var, tensor_dist_attr) # op - grad_op_dist_attr.set_output_dims_mapping(output_name, - ref_dims_mapping) + grad_op_dist_attr.set_output_dims_mapping( + output_name, ref_dims_mapping) self._dist_context.set_op_dist_attr_for_program( grad_op, grad_op_dist_attr) @@ -934,10 +936,10 @@ class Completer: # op grad_op_dist_attr = OperatorDistributedAttribute() grad_op_dist_attr.process_mesh = ref_process_mesh - grad_op_dist_attr.set_input_dims_mapping(ref_var_name, - ref_dims_mapping) - grad_op_dist_attr.set_output_dims_mapping(output_var_name, - ref_dims_mapping) + grad_op_dist_attr.set_input_dims_mapping( + ref_var_name, ref_dims_mapping) + grad_op_dist_attr.set_output_dims_mapping( + output_var_name, ref_dims_mapping) elif grad_op.type in ['shape', 'fill_constant']: continue @@ -977,8 +979,8 @@ class Completer: first_backward_op_idx = -1 for idx, op in enumerate(serial_main_program.global_block().ops): if int(op.attr('op_role')) == int( - int(core.op_proto_and_checker_maker.OpRole.Backward) | int( - core.op_proto_and_checker_maker.OpRole.Loss)): + int(core.op_proto_and_checker_maker.OpRole.Backward) + | int(core.op_proto_and_checker_maker.OpRole.Loss)): assert op.type == "fill_constant" first_backward_op_idx = idx break @@ -1025,8 +1027,8 @@ class Completer: op_dist_attr.process_mesh = process_mesh op_dist_attr.set_output_dims_mapping(grad_var.name, dims_mapping) - self._dist_context.set_op_dist_attr_for_program(ops[idx], - op_dist_attr) + self._dist_context.set_op_dist_attr_for_program( + ops[idx], op_dist_attr) continue # complete the annotation of grad op (xxx_grad op or sum op) @@ -1035,9 +1037,10 @@ class Completer: if grad_op.desc.original_id( ) in dist_op_context.grad_op_id_to_op_id: # TODO support the case where one forward op corresponding to multiple xxx_grad op - forward_op = _get_op_by_id(ops[:first_backward_op_idx], - dist_op_context.grad_op_id_to_op_id[ - grad_op.desc.original_id()]) + forward_op = _get_op_by_id( + ops[:first_backward_op_idx], + dist_op_context.grad_op_id_to_op_id[ + grad_op.desc.original_id()]) assert forward_op is not None if grad_op.type == "concat" and forward_op.type == "split": @@ -1060,8 +1063,8 @@ class Completer: self._dist_context.set_tensor_dist_attr_for_program( output_var, output_var_dist_attr) - grad_op_dist_attr.set_output_dims_mapping(output_var.name, - ref_dims_mapping) + grad_op_dist_attr.set_output_dims_mapping( + output_var.name, ref_dims_mapping) grad_op_dist_attr.process_mesh = ref_mesh self._dist_context.set_op_dist_attr_for_program( grad_op, grad_op_dist_attr) @@ -1095,8 +1098,8 @@ class Completer: input_name) assert ref_dims_mapping is not None, "[{}] 's dims mapping is NONE".format( input_name) - grad_op_dist_attr.set_input_dims_mapping(input_name, - ref_dims_mapping) + grad_op_dist_attr.set_input_dims_mapping( + input_name, ref_dims_mapping) for output_name in grad_op.output_arg_names: assert output_name in grad_var_to_var @@ -1111,8 +1114,8 @@ class Completer: self._dist_context.set_tensor_dist_attr_for_program( output_var, tensor_dist_attr) # op - grad_op_dist_attr.set_output_dims_mapping(output_name, - ref_dims_mapping) + grad_op_dist_attr.set_output_dims_mapping( + output_name, ref_dims_mapping) grad_op_dist_attr.impl_type = fwd_op_dist_attr.impl_type grad_op_dist_attr.impl_idx = fwd_op_dist_attr.impl_idx @@ -1170,10 +1173,10 @@ class Completer: # op grad_op_dist_attr = OperatorDistributedAttribute() grad_op_dist_attr.process_mesh = ref_process_mesh - grad_op_dist_attr.set_input_dims_mapping(ref_var_name, - ref_dims_mapping) - grad_op_dist_attr.set_output_dims_mapping(output_var_name, - ref_dims_mapping) + grad_op_dist_attr.set_input_dims_mapping( + ref_var_name, ref_dims_mapping) + grad_op_dist_attr.set_output_dims_mapping( + output_var_name, ref_dims_mapping) else: raise ValueError("got unexpect op [{}]".format( @@ -1186,7 +1189,7 @@ class Completer: """Complete the annotation of vars and ops in the update phase for parallel program.""" # Notice: serial_main_program is actually a dist_main_program of current rank, - # and must be passed into this function. + # and must be passed into this function. # TODO: We should fix this behavior. ops = list(serial_main_program.global_block().ops) @@ -1223,10 +1226,10 @@ class Completer: op, op_dist_attr) if "Grad" in op.input_names and "Param" in ops[idx].input_names: - assert len(op.input( - "Param")) == 1, "Only support one-to-one now." - assert len(op.input( - "Grad")) == 1, "Only support one-to-one now." + assert len( + op.input("Param")) == 1, "Only support one-to-one now." + assert len( + op.input("Grad")) == 1, "Only support one-to-one now." param = vars[op.input("Param")[0]] grad_var = vars[op.input("Grad")[0]] @@ -1245,12 +1248,12 @@ class Completer: ref_dims_mapping) op_dist_attr.set_input_dims_mapping(param.name, ref_dims_mapping) - op_dist_attr.set_output_dims_mapping(param.name, - ref_dims_mapping) + op_dist_attr.set_output_dims_mapping( + param.name, ref_dims_mapping) learning_var = vars[op.input("LearningRate")[0]] op_dist_attr.set_input_dims_mapping(learning_var.name, [-1]) - op_dist_attr.set_output_dims_mapping(learning_var.name, - [-1]) + op_dist_attr.set_output_dims_mapping( + learning_var.name, [-1]) if not learning_rate_completed: learning_rate_completed = True @@ -1275,10 +1278,10 @@ class Completer: if "Beta1Pow" in input_name or "Beta2Pow" in input_name: input_var_attr.dims_mapping = [-1] - op_dist_attr.set_input_dims_mapping(input_var.name, - [-1]) - op_dist_attr.set_output_dims_mapping(input_var.name, - [-1]) + op_dist_attr.set_input_dims_mapping( + input_var.name, [-1]) + op_dist_attr.set_output_dims_mapping( + input_var.name, [-1]) else: assert "Moment" in input_name input_var_attr.dims_mapping = ref_dims_mapping diff --git a/python/paddle/distributed/auto_parallel/converter.py b/python/paddle/distributed/auto_parallel/converter.py index 2ea200c7d6f..69292ab1827 100644 --- a/python/paddle/distributed/auto_parallel/converter.py +++ b/python/paddle/distributed/auto_parallel/converter.py @@ -133,8 +133,9 @@ class Converter(object): tensors_dict[tensor_name] = Converter.merge_and_slice( tensor_list, pre_dist_attr, cur_dist_attr) except ValueError as err: - raise ValueError("Fail to convert tensor '{}'. " - .format(str(tensor_name)) + str(err)) + raise ValueError( + "Fail to convert tensor '{}'. ".format(str(tensor_name)) + + str(err)) for tensor_name in self._pre_strategy: if tensor_name not in self._cur_strategy: @@ -150,17 +151,17 @@ class Converter(object): tensor_not_in_cur = set(tensor_not_in_cur) - set(tensor_match_with_cur) if tensor_not_in_pre: warnings.warn( - "tensors [{}] are not found in last training strategy." - .format(str(tensor_not_in_pre))) + "tensors [{}] are not found in last training strategy.".format( + str(tensor_not_in_pre))) if tensor_not_in_cur: warnings.warn( - "tensors [{}] are not found in current training strategy." - .format(str(tensor_not_in_cur))) + "tensors [{}] are not found in current training strategy.". + format(str(tensor_not_in_cur))) if tensor_not_in_ckpt: warnings.warn( "tensors [{}] are found in pre_strategy, but are not found" - "in checkpoint files, please check your checkpoint files." - .format(str(tensor_not_in_ckpt))) + "in checkpoint files, please check your checkpoint files.". + format(str(tensor_not_in_ckpt))) return tensors_dict @@ -360,8 +361,9 @@ class Converter(object): """ sliced_tensor_list = [] axis = len(complete_tensor.shape) - length - sliced_tensor = np.split( - complete_tensor, partition_index_list[axis], axis=axis) + sliced_tensor = np.split(complete_tensor, + partition_index_list[axis], + axis=axis) if length == 1: return sliced_tensor for tensor in sliced_tensor: diff --git a/python/paddle/distributed/auto_parallel/cost/base_cost.py b/python/paddle/distributed/auto_parallel/cost/base_cost.py index 763f78c5106..4455d6f6648 100644 --- a/python/paddle/distributed/auto_parallel/cost/base_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/base_cost.py @@ -85,8 +85,8 @@ def _parse_op_to_desc(op, dist_context=None): def parse_to_desc(op=None, dist_op=None, dist_context=None): desc = None if op is None and dist_op is not None and dist_context is not None: - desc = _parse_op_to_desc( - op=dist_op.serial_op, dist_context=dist_context) + desc = _parse_op_to_desc(op=dist_op.serial_op, + dist_context=dist_context) elif op is not None and dist_op is None and dist_context is None: desc = _parse_op_to_desc(op) @@ -94,6 +94,7 @@ def parse_to_desc(op=None, dist_op=None, dist_context=None): def parse_desc_to_str(desc): + def _parse_dtype(dtype): dtype_str = "" if dtype == paddle.float32: @@ -248,10 +249,10 @@ class CommContext: else: for i in range(len(ranks)): for j in range(i + 1, len(ranks)): - forward_order_beta = self.cluster.get_beta(ranks[i], - ranks[j]) - backward_order_beta = self.cluster.get_beta(ranks[j], - ranks[i]) + forward_order_beta = self.cluster.get_beta( + ranks[i], ranks[j]) + backward_order_beta = self.cluster.get_beta( + ranks[j], ranks[i]) beta = forward_order_beta if forward_order_beta > backward_order_beta else backward_order_beta if max_beta == None: max_beta = beta @@ -275,6 +276,7 @@ class CommContext: class Cost: + def __init__(self, time=0, memory=0, flops=0): self.time = time self.memory = memory @@ -338,6 +340,7 @@ class Cost: class OpCost: + def __init__(self, op=None, op_desc=None): self._op = op self._op_desc = op_desc @@ -462,8 +465,8 @@ class CommOpCost(OpCost): elif dtype == paddle.float16: factor = 2 else: - raise TypeError("This dtype {} is not supported now".format( - dtype)) + raise TypeError( + "This dtype {} is not supported now".format(dtype)) comm_count = reduce(lambda x, y: x * y, shape) * factor self._comm_count = comm_count @@ -506,8 +509,9 @@ class CommOpCost(OpCost): def _check_comm_op_type(cls): if cls.OP_TYPE != "COMM": if cls.OP_TYPE not in COMM_OP_TYPE: - raise TypeError("Please Check op type in {}, but got {}.". - format(COMM_OP_TYPE, cls.OP_TYPE)) + raise TypeError( + "Please Check op type in {}, but got {}.".format( + COMM_OP_TYPE, cls.OP_TYPE)) class CompOpCost(OpCost): @@ -523,8 +527,9 @@ class CompOpCost(OpCost): def _check_comp_op_type(cls): if cls.OP_TYPE != "COMP": if cls.OP_TYPE in NON_COMP_TYPE: - raise TypeError("Please Check op type not in {}, but got {}.". - format(NON_COMP_TYPE, cls.OP_TYPE)) + raise TypeError( + "Please Check op type not in {}, but got {}.".format( + NON_COMP_TYPE, cls.OP_TYPE)) def register_op_cost(cls): diff --git a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py index a32fdf1824e..0f92bcc8fac 100644 --- a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py @@ -22,8 +22,9 @@ class AllreduceSumOpCost(CommOpCost): OP_TYPE = "c_allreduce_sum" def __init__(self, op=None, op_desc=None, comm_context=None): - super(AllreduceSumOpCost, self).__init__( - op=op, op_desc=op_desc, comm_context=comm_context) + super(AllreduceSumOpCost, self).__init__(op=op, + op_desc=op_desc, + comm_context=comm_context) def calc_time(self): # use tree if cross machine and use ring if in a single machine @@ -38,20 +39,20 @@ class AllreduceSumOpCost(CommOpCost): def calc_time_ring(self): alpha = self.comm_context.base_ring - alpha += 2 * ( - self.rank_count - self.machine_count) * self.comm_context.intra_ring + alpha += 2 * (self.rank_count - + self.machine_count) * self.comm_context.intra_ring alpha += 2 * (self.machine_count - 1) * ( self.comm_context.inter_ring + self.hops * self.comm_context.switch) beta = self.comm_context.get_max_beta(self.group_ranks) - time = alpha + 2 * (self.rank_count - 1 - ) / self.rank_count * self.comm_count * beta + time = alpha + 2 * (self.rank_count - + 1) / self.rank_count * self.comm_count * beta return time def calc_time_tree(self): alpha = self.comm_context.base_tree - alpha += 2 * (self.rank_count / self.machine_count - 1 - ) * self.comm_context.intra_tree + alpha += 2 * (self.rank_count / self.machine_count - + 1) * self.comm_context.intra_tree alpha += math.log2(self.machine_count) * ( self.comm_context.inter_tree + self.hops * self.comm_context.switch) beta = self.comm_context.get_max_beta(self.group_ranks) @@ -66,8 +67,9 @@ class AllgatherOpCost(CommOpCost): OP_TYPE = "c_allgather" def __init__(self, op=None, op_desc=None, comm_context=None): - super(AllgatherOpCost, self).__init__( - op=op, op_desc=op_desc, comm_context=comm_context) + super(AllgatherOpCost, self).__init__(op=op, + op_desc=op_desc, + comm_context=comm_context) def calc_time(self): time = self.calc_time_ring() @@ -75,13 +77,13 @@ class AllgatherOpCost(CommOpCost): def calc_time_ring(self): alpha = self.comm_context.base_ring - alpha += ( - self.rank_count - self.machine_count) * self.comm_context.intra_ring + alpha += (self.rank_count - + self.machine_count) * self.comm_context.intra_ring alpha += (self.machine_count - 1) * ( self.comm_context.inter_ring + self.hops * self.comm_context.switch) beta = self.comm_context.get_max_beta(self.group_ranks) - time = alpha + (self.rank_count - 1 - ) / self.rank_count * self.comm_count * beta + time = alpha + (self.rank_count - + 1) / self.rank_count * self.comm_count * beta return time @@ -90,8 +92,9 @@ class BroadcastOpCost(CommOpCost): OP_TYPE = "c_broadcast" def __init__(self, op=None, op_desc=None, comm_context=None): - super(BroadcastOpCost, self).__init__( - op=op, op_desc=op_desc, comm_context=comm_context) + super(BroadcastOpCost, self).__init__(op=op, + op_desc=op_desc, + comm_context=comm_context) def calc_time(self): time = self.calc_time_ring() @@ -114,8 +117,9 @@ class IdentityOpCost(CommOpCost): OP_TYPE = "c_identity" def __init__(self, op=None, op_desc=None, comm_context=None): - super(IdentityOpCost, self).__init__( - op=op, op_desc=op_desc, comm_context=comm_context) + super(IdentityOpCost, self).__init__(op=op, + op_desc=op_desc, + comm_context=comm_context) def calc_time(self): return 0 @@ -126,8 +130,9 @@ class RecvOpCost(CommOpCost): OP_TYPE = "recv_v2" def __init__(self, op=None, op_desc=None, comm_context=None): - super(RecvOpCost, self).__init__( - op=op, op_desc=op_desc, comm_context=comm_context) + super(RecvOpCost, self).__init__(op=op, + op_desc=op_desc, + comm_context=comm_context) def calc_time(self): alpha = self.comm_context.base_ring @@ -145,8 +150,9 @@ class SendOpCost(CommOpCost): OP_TYPE = "send_v2" def __init__(self, op=None, op_desc=None, comm_context=None): - super(SendOpCost, self).__init__( - op=op, op_desc=op_desc, comm_context=comm_context) + super(SendOpCost, self).__init__(op=op, + op_desc=op_desc, + comm_context=comm_context) def calc_time(self): alpha = self.comm_context.base_ring diff --git a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py index 8958c4bf905..6556a1110d2 100644 --- a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py @@ -20,8 +20,9 @@ class AssignOpCost(CompOpCost): OP_TYPE = "assign" def __init__(self, op=None, op_desc=None, cluster=None): - super(AssignOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(AssignOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -38,8 +39,9 @@ class AssignValueOpCost(CompOpCost): OP_TYPE = "assign_value" def __init__(self, op=None, op_desc=None, cluster=None): - super(AssignValueOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(AssignValueOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -56,8 +58,9 @@ class BeamSearchOpCost(CompOpCost): OP_TYPE = "beam_search" def __init__(self, op=None, op_desc=None, cluster=None): - super(BeamSearchOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(BeamSearchOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -74,8 +77,9 @@ class BeamSearchDecodeOpCost(CompOpCost): OP_TYPE = "beam_search_decode" def __init__(self, op=None, op_desc=None, cluster=None): - super(BeamSearchDecodeOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(BeamSearchDecodeOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -92,8 +96,9 @@ class CastOpCost(CompOpCost): OP_TYPE = "cast" def __init__(self, op=None, op_desc=None, cluster=None): - super(CastOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(CastOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -110,8 +115,9 @@ class ConcatOpCost(CompOpCost): OP_TYPE = "concat" def __init__(self, op=None, op_desc=None, cluster=None): - super(ConcatOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ConcatOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -128,8 +134,9 @@ class ElementwiseAddOpCost(CompOpCost): OP_TYPE = "elementwise_add" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseAddOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseAddOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -146,8 +153,9 @@ class ElementwiseAddGradOpCost(CompOpCost): OP_TYPE = "elementwise_add_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseAddGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseAddGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -164,8 +172,9 @@ class ElementwiseDivOpCost(CompOpCost): OP_TYPE = "elementwise_div" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseDivOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseDivOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -182,8 +191,9 @@ class ElementwiseDivGradOpCost(CompOpCost): OP_TYPE = "elementwise_div_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseDivGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseDivGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -200,8 +210,9 @@ class ElementwiseMulOpCost(CompOpCost): OP_TYPE = "elementwise_mul" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseMulOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseMulOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -218,8 +229,9 @@ class ElementwiseMulGradOpCost(CompOpCost): OP_TYPE = "elementwise_mul_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseMulGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseMulGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -236,8 +248,9 @@ class ElementwiseSubOpCost(CompOpCost): OP_TYPE = "elementwise_sub" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseSubOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseSubOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -254,8 +267,9 @@ class ElementwiseSubGradOpCost(CompOpCost): OP_TYPE = "elementwise_sub_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(ElementwiseSubGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ElementwiseSubGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -272,8 +286,9 @@ class EmbeddingOpCost(CompOpCost): OP_TYPE = "c_embedding" def __init__(self, op=None, op_desc=None, cluster=None): - super(EmbeddingOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(EmbeddingOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -290,8 +305,9 @@ class EmbeddingGradOpCost(CompOpCost): OP_TYPE = "c_embedding_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(EmbeddingGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(EmbeddingGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -308,8 +324,9 @@ class FillConstantOpCost(CompOpCost): OP_TYPE = "fill_constant" def __init__(self, op=None, op_desc=None, cluster=None): - super(FillConstantOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(FillConstantOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -326,8 +343,9 @@ class FillConstantBatchSizeLikeOpCost(CompOpCost): OP_TYPE = "fill_constant_batch_size_like" def __init__(self, op=None, op_desc=None, cluster=None): - super(FillConstantBatchSizeLikeOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(FillConstantBatchSizeLikeOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -344,8 +362,8 @@ class FillConstantBatchSizeLikeGradOpCost(CompOpCost): OP_TYPE = "fill_constant_batch_size_like_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(FillConstantBatchSizeLikeGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(FillConstantBatchSizeLikeGradOpCost, + self).__init__(op=op, op_desc=op_desc, cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -362,8 +380,9 @@ class GatherOpCost(CompOpCost): OP_TYPE = "gather" def __init__(self, op=None, op_desc=None, cluster=None): - super(GatherOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(GatherOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -380,8 +399,9 @@ class GeluOpCost(CompOpCost): OP_TYPE = "gelu" def __init__(self, op=None, op_desc=None, cluster=None): - super(GeluOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(GeluOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -398,8 +418,9 @@ class GeluGradOpCost(CompOpCost): OP_TYPE = "gelu_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(GeluGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(GeluGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -416,8 +437,9 @@ class GreaterEqualOpCost(CompOpCost): OP_TYPE = "greater_equal" def __init__(self, op=None, op_desc=None, cluster=None): - super(GreaterEqualOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(GreaterEqualOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -434,8 +456,9 @@ class IncrementOpCost(CompOpCost): OP_TYPE = "increment" def __init__(self, op=None, op_desc=None, cluster=None): - super(IncrementOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(IncrementOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -448,8 +471,9 @@ class IsEmptyOpCost(CompOpCost): OP_TYPE = "is_empty" def __init__(self, op=None, op_desc=None, cluster=None): - super(IsEmptyOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(IsEmptyOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -462,8 +486,9 @@ class LayerNormOpCost(CompOpCost): OP_TYPE = "layer_norm" def __init__(self, op=None, op_desc=None, cluster=None): - super(LayerNormOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LayerNormOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -480,8 +505,9 @@ class LayerNormGradOpCost(CompOpCost): OP_TYPE = "layer_norm_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(LayerNormGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LayerNormGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -498,8 +524,9 @@ class LessThanOpCost(CompOpCost): OP_TYPE = "less_than" def __init__(self, op=None, op_desc=None, cluster=None): - super(LessThanOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LessThanOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -516,8 +543,9 @@ class LogicalNotOpCost(CompOpCost): OP_TYPE = "logical_not" def __init__(self, op=None, op_desc=None, cluster=None): - super(LogicalNotOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LogicalNotOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -534,8 +562,9 @@ class LogicalAndOpCost(CompOpCost): OP_TYPE = "logical_and" def __init__(self, op=None, op_desc=None, cluster=None): - super(LogicalAndOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LogicalAndOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -552,8 +581,9 @@ class LodResetOpCost(CompOpCost): OP_TYPE = "lod_reset" def __init__(self, op=None, op_desc=None, cluster=None): - super(LodResetOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LodResetOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -587,8 +617,9 @@ class LookupTableV2OpCost(CompOpCost): OP_TYPE = "lookup_table_v2" def __init__(self, op=None, op_desc=None, cluster=None): - super(LookupTableV2OpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LookupTableV2OpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -605,8 +636,9 @@ class LookupTableV2GradOpCost(CompOpCost): OP_TYPE = "lookup_table_v2_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(LookupTableV2GradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(LookupTableV2GradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -623,8 +655,9 @@ class MatmulOpCost(CompOpCost): OP_TYPE = "matmul" def __init__(self, op=None, op_desc=None, cluster=None): - super(MatmulOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(MatmulOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -641,8 +674,9 @@ class MatmulGradOpCost(CompOpCost): OP_TYPE = "matmul_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(MatmulGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(MatmulGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -659,8 +693,9 @@ class MatmulV2OpCost(CompOpCost): OP_TYPE = "matmul_v2" def __init__(self, op=None, op_desc=None, cluster=None): - super(MatmulV2OpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(MatmulV2OpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -677,8 +712,9 @@ class MatmulV2GradOpCost(CompOpCost): OP_TYPE = "matmul_v2_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(MatmulV2GradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(MatmulV2GradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -695,8 +731,9 @@ class MemcpyOpCost(CompOpCost): OP_TYPE = "memcpy" def __init__(self, op=None, op_desc=None, cluster=None): - super(MemcpyOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(MemcpyOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -730,8 +767,9 @@ class MulGradOpCost(CompOpCost): OP_TYPE = "mul_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(MulGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(MulGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -748,8 +786,9 @@ class OneHotOpCost(CompOpCost): OP_TYPE = "one_hot" def __init__(self, op=None, op_desc=None, cluster=None): - super(OneHotOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(OneHotOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -766,8 +805,9 @@ class ReadFromArrayOpCost(CompOpCost): OP_TYPE = "read_from_array" def __init__(self, op=None, op_desc=None, cluster=None): - super(ReadFromArrayOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ReadFromArrayOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -784,8 +824,9 @@ class ReduceSumOpCost(CompOpCost): OP_TYPE = "reduce_sum" def __init__(self, op=None, op_desc=None, cluster=None): - super(ReduceSumOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ReduceSumOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -802,8 +843,9 @@ class ReduceSumGradOpCost(CompOpCost): OP_TYPE = "reduce_sum_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(ReduceSumGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ReduceSumGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -820,8 +862,9 @@ class Reshape2OpCost(CompOpCost): OP_TYPE = "reshape2" def __init__(self, op=None, op_desc=None, cluster=None): - super(Reshape2OpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(Reshape2OpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -838,8 +881,9 @@ class Reshape2GradOpCost(CompOpCost): OP_TYPE = "reshape2_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(Reshape2GradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(Reshape2GradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -856,8 +900,9 @@ class ReduceMeanOpCost(CompOpCost): OP_TYPE = "reduce_mean" def __init__(self, op=None, op_desc=None, cluster=None): - super(ReduceMeanOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ReduceMeanOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -874,8 +919,9 @@ class ReduceMeanGradOpCost(CompOpCost): OP_TYPE = "reduce_mean_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(ReduceMeanGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ReduceMeanGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -892,8 +938,9 @@ class SamplingIdOpCost(CompOpCost): OP_TYPE = "sampling_id" def __init__(self, op=None, op_desc=None, cluster=None): - super(SamplingIdOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SamplingIdOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -910,8 +957,9 @@ class ScaleOpCost(CompOpCost): OP_TYPE = "scale" def __init__(self, op=None, op_desc=None, cluster=None): - super(ScaleOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(ScaleOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -928,8 +976,9 @@ class SliceOpCost(CompOpCost): OP_TYPE = "slice" def __init__(self, op=None, op_desc=None, cluster=None): - super(SliceOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SliceOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -946,8 +995,9 @@ class SoftmaxOpCost(CompOpCost): OP_TYPE = "softmax" def __init__(self, op=None, op_desc=None, cluster=None): - super(SoftmaxOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SoftmaxOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -964,8 +1014,9 @@ class SoftmaxGradOpCost(CompOpCost): OP_TYPE = "softmax_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(SoftmaxGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SoftmaxGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -982,8 +1033,9 @@ class SoftmaxWithCrossEntropyOpCost(CompOpCost): OP_TYPE = "softmax_with_cross_entropy" def __init__(self, op=None, op_desc=None, cluster=None): - super(SoftmaxWithCrossEntropyOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SoftmaxWithCrossEntropyOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1000,8 +1052,9 @@ class SoftmaxWithCrossEntropyGradOpCost(CompOpCost): OP_TYPE = "softmax_with_cross_entropy_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(SoftmaxWithCrossEntropyGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SoftmaxWithCrossEntropyGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1018,8 +1071,9 @@ class SplitOpCost(CompOpCost): OP_TYPE = "split" def __init__(self, op=None, op_desc=None, cluster=None): - super(SplitOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SplitOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1036,8 +1090,9 @@ class Squeeze2OpCost(CompOpCost): OP_TYPE = "squeeze2" def __init__(self, op=None, op_desc=None, cluster=None): - super(Squeeze2OpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(Squeeze2OpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1054,8 +1109,9 @@ class SquareOpCost(CompOpCost): OP_TYPE = "square" def __init__(self, op=None, op_desc=None, cluster=None): - super(SquareOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SquareOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1072,8 +1128,9 @@ class SquareGradOpCost(CompOpCost): OP_TYPE = "square_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(SquareGradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(SquareGradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1107,8 +1164,9 @@ class TopKOpCost(CompOpCost): OP_TYPE = "top_k" def __init__(self, op=None, op_desc=None, cluster=None): - super(TopKOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(TopKOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1125,8 +1183,9 @@ class Transpose2OpCost(CompOpCost): OP_TYPE = "transpose2" def __init__(self, op=None, op_desc=None, cluster=None): - super(Transpose2OpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(Transpose2OpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1143,8 +1202,9 @@ class Transpose2GradOpCost(CompOpCost): OP_TYPE = "transpose2_grad" def __init__(self, op=None, op_desc=None, cluster=None): - super(Transpose2GradOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(Transpose2GradOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1161,8 +1221,9 @@ class Unsqueeze2OpCost(CompOpCost): OP_TYPE = "unsqueeze2" def __init__(self, op=None, op_desc=None, cluster=None): - super(Unsqueeze2OpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(Unsqueeze2OpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): @@ -1179,8 +1240,9 @@ class WriteToArrayOpCost(CompOpCost): OP_TYPE = "write_to_array" def __init__(self, op=None, op_desc=None, cluster=None): - super(WriteToArrayOpCost, self).__init__( - op=op, op_desc=op_desc, cluster=cluster) + super(WriteToArrayOpCost, self).__init__(op=op, + op_desc=op_desc, + cluster=cluster) # For a concrete COMP OP, the calc_time and calc_flops function need to be overrided def calc_flops(self): diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py index 7bd535af8be..5a1aeec2d9f 100644 --- a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py @@ -14,6 +14,7 @@ class CostEstimator: + def __init__(self, program, cluster=None, diff --git a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py b/python/paddle/distributed/auto_parallel/cost/tensor_cost.py index 2db1c06d596..9741020da65 100644 --- a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/tensor_cost.py @@ -22,6 +22,7 @@ from .base_cost import Cost class TensorCost: + def __init__(self, tensor=None, dist_tensor=None, shape=None, dtype=None): self._check_args(tensor, dist_tensor, shape, dtype) self._tensor = tensor @@ -59,20 +60,20 @@ class TensorCost: assert (tensor is None and shape is None) if not isinstance(dist_tensor, DistributedTensor): raise TypeError( - "Please check dist_tensor type is DistributedTensor, but got {}". - format(type(dist_tensor))) + "Please check dist_tensor type is DistributedTensor, but got {}" + .format(type(dist_tensor))) elif shape is not None: - assert (tensor is None and dist_tensor is None and - dtype is not None) + assert (tensor is None and dist_tensor is None + and dtype is not None) if not isinstance(shape, (list, set)): raise TypeError( "Please check shape type is list or set, but got {}".format( type(shape))) elif dtype is not None: - assert (tensor is None and dist_tensor is None and - shape is not None) + assert (tensor is None and dist_tensor is None + and shape is not None) @property def cost(self): diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/cost_model.py index b72c044428f..e35fae57cae 100644 --- a/python/paddle/distributed/auto_parallel/cost_model.py +++ b/python/paddle/distributed/auto_parallel/cost_model.py @@ -37,6 +37,7 @@ class CostNodeType(Enum): class Cost(object): + def __init__(self): self.runtime = None self.static_mem = None @@ -51,6 +52,7 @@ class CostModelMode(Enum): class CostNode(object): + def __init__(self, node, node_type, id=None): self.id = id self.node = node @@ -71,6 +73,7 @@ class CostNode(object): class MergedOpsCostNode(CostNode): + def __init__(self, node_type, id=None, base_node_list=None, is_bwd=False): super(MergedOpsCostNode, self).__init__(None, node_type, id) self.node_list = base_node_list @@ -78,6 +81,7 @@ class MergedOpsCostNode(CostNode): class CommOpCostNode(CostNode): + def __init__(self, node, node_type, @@ -118,6 +122,7 @@ class CommOpCostNode(CostNode): class TensorCostNode(CostNode): + def __init__(self, node, node_type, @@ -159,6 +164,7 @@ class TensorCostNode(CostNode): class CompOpCostNode(CostNode): + def __init__(self, node, node_type, id=None, is_bwd=False, is_optim=False): super(CompOpCostNode, self).__init__(node, node_type, id) self.is_bwd = is_bwd @@ -174,6 +180,7 @@ class CompOpCostNode(CostNode): class PipeEvent(object): + def __init__(self, stage_id, event_name, duration, start_time=-1): self.stage_id = stage_id self.name = event_name @@ -183,6 +190,7 @@ class PipeEvent(object): class CostModel(object): + def __init__(self, mode=CostModelMode.BENCHMARKING, cluster=None, @@ -261,8 +269,8 @@ class CostModel(object): op_node = CommOpCostNode(op, CostNodeType.COMMUNICATION, op_id, is_bwd) else: - is_bwd = (int(op.attr('op_role')) == int(OpRole.Backward) - ) or "@GRAD" in op.input_arg_names + is_bwd = (int(op.attr('op_role')) == int( + OpRole.Backward)) or "@GRAD" in op.input_arg_names is_optim = 'LearningRate' in op.input_names op_node = CompOpCostNode(op, CostNodeType.COMPUTATION, op_id, is_bwd, is_optim) @@ -310,11 +318,10 @@ class CostModel(object): write_op_cnt += 1 new_var_id = node_id + '_write_{}'.format(write_op_cnt) - new_var = TensorCostNode( - node.node, - CostNodeType.VARIABLE, - new_var_id, - shared_node_id=node_id) + new_var = TensorCostNode(node.node, + CostNodeType.VARIABLE, + new_var_id, + shared_node_id=node_id) graph[new_var_id] = [[], []] graph[pred_id][SUCC].append(new_var_id) @@ -341,8 +348,8 @@ class CostModel(object): self.runtime_graph.append({}) self._parse_sub_program( sub_prog, self.nodes[sub_idx], self.origin_graph[sub_idx], - self.cost_data[0 if self.rank2pp is None else self.rank2pp[ - sub_idx]], sub_idx) + self.cost_data[0 if self.rank2pp is None else self. + rank2pp[sub_idx]], sub_idx) return self.nodes def _find_succ_op(self, node_id, sub_idx=0): @@ -417,11 +424,10 @@ class CostModel(object): merge_type)) merged_node_id = 'merged_' + str(len(nodes)) is_bwd = to_merge_node_list[0].is_bwd - merged_node = MergedOpsCostNode( - CostNodeType.MERGED, - id=merged_node_id, - base_node_list=nodes_list, - is_bwd=is_bwd) + merged_node = MergedOpsCostNode(CostNodeType.MERGED, + id=merged_node_id, + base_node_list=nodes_list, + is_bwd=is_bwd) merged_node.cost = node_cost return merged_node_id, merged_node @@ -435,10 +441,12 @@ class CostModel(object): ''' cnt = 0 for sub_idx in range(self.total_rank): - cnt += self._merge_linear( - self.nodes[sub_idx], self.runtime_graph[sub_idx], is_bwd=False) - cnt += self._merge_linear( - self.nodes[sub_idx], self.runtime_graph[sub_idx], is_bwd=True) + cnt += self._merge_linear(self.nodes[sub_idx], + self.runtime_graph[sub_idx], + is_bwd=False) + cnt += self._merge_linear(self.nodes[sub_idx], + self.runtime_graph[sub_idx], + is_bwd=True) return cnt def merge_branch(self): @@ -454,10 +462,12 @@ class CostModel(object): ''' cnt = 0 for sub_idx in range(self.total_rank): - cnt += self._merge_branch( - self.nodes[sub_idx], self.runtime_graph[sub_idx], is_bwd=False) - cnt += self._merge_branch( - self.nodes[sub_idx], self.runtime_graph[sub_idx], is_bwd=True) + cnt += self._merge_branch(self.nodes[sub_idx], + self.runtime_graph[sub_idx], + is_bwd=False) + cnt += self._merge_branch(self.nodes[sub_idx], + self.runtime_graph[sub_idx], + is_bwd=True) return cnt def _merge_linear(self, nodes, runtime_graph, is_bwd=False): @@ -482,8 +492,8 @@ class CostModel(object): # delete edges and add new edges succ = None try: - runtime_graph[merged_node_id][SUCC] = copy.deepcopy(edges[ - SUCC]) + runtime_graph[merged_node_id][SUCC] = copy.deepcopy( + edges[SUCC]) if len(runtime_graph[pred_id][SUCC]) > 1: # predecessor has more than 1 successor @@ -558,8 +568,8 @@ class CostModel(object): to_merge = True try: - if len(edges[SUCC]) < 1 or len(runtime_graph[edges[SUCC][0]] - [SUCC]) < 1: + if len(edges[SUCC]) < 1 or len( + runtime_graph[edges[SUCC][0]][SUCC]) < 1: continue except: continue @@ -596,6 +606,7 @@ class CostModel(object): return reduct_cnt def get_runtime_cost(self): + def get_node_cost(node): node_cost = node.cost + self.opcall_overhead if isinstance(node, MergedOpsCostNode): @@ -660,8 +671,8 @@ class CostModel(object): static_mem += size cur_mem += size edges = sim_graph[node_id] - if not (node.type == CostNodeType.VARIABLE and - node.node.persistable): + if not (node.type == CostNodeType.VARIABLE + and node.node.persistable): for succ_id in edges[SUCC]: sim_graph[succ_id][PRED].remove(node_id) if len(sim_graph[succ_id][PRED]) == 0: @@ -670,8 +681,8 @@ class CostModel(object): pred = nodes if pred.type == CostNodeType.VARIABLE: sim_graph[pred_id][SUCC].remove(node_id) - if len(sim_graph[pred_id][ - SUCC]) == 0 and not pred.node.persistable: + if len(sim_graph[pred_id] + [SUCC]) == 0 and not pred.node.persistable: cur_mem -= pred.get_size() return static_mem, cur_mem, top_mem @@ -703,18 +714,16 @@ class CostModel(object): event_list.append(e) if stid != stage_num - 1: q.put( - PipeEvent( - stid + 1, - 'fwd', - self.fwd_time[stid + 1], - start_time=e.e_time)) + PipeEvent(stid + 1, + 'fwd', + self.fwd_time[stid + 1], + start_time=e.e_time)) else: q.put( - PipeEvent( - stid, - 'bwd', - self.bwd_time[stid], - start_time=e.e_time)) + PipeEvent(stid, + 'bwd', + self.bwd_time[stid], + start_time=e.e_time)) fwd_cnt[stid] -= 1 global_time[stid] = e.e_time else: @@ -725,20 +734,18 @@ class CostModel(object): event_list.append(e) if stid != 0: q.put( - PipeEvent( - stid - 1, - 'bwd', - self.bwd_time[stid - 1], - start_time=e.e_time)) + PipeEvent(stid - 1, + 'bwd', + self.bwd_time[stid - 1], + start_time=e.e_time)) fwd_cnt[stid] += 1 bwd_cnt[stid] -= 1 if bwd_cnt[stid] == 0: q.put( - PipeEvent( - stid, - 'optim', - self.optim_time[stid], - start_time=e.e_time)) + PipeEvent(stid, + 'optim', + self.optim_time[stid], + start_time=e.e_time)) global_time[stid] = e.e_time elif e.name == 'optim': e.s_time = max(global_time[stid], e.s_time) @@ -792,11 +799,10 @@ def estimate_cost(distributed_program, cluster, pipeline_config, """ # the following line is left for now, cluster model will be involved in the future assert cluster is None, "For now, cluster remains None" - cm_ctx = CostModel( - cluster=cluster, - batch_size=batch_size, - standalone_cost_data=standalone_cost_data, - pipeline_config=pipeline_config) + cm_ctx = CostModel(cluster=cluster, + batch_size=batch_size, + standalone_cost_data=standalone_cost_data, + pipeline_config=pipeline_config) cm_ctx.init(distributed_program) cost = cm_ctx.get_cost() return cost diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/dist_attribute.py index 3dbdb79f485..9bbc4de6bdd 100644 --- a/python/paddle/distributed/auto_parallel/dist_attribute.py +++ b/python/paddle/distributed/auto_parallel/dist_attribute.py @@ -51,6 +51,7 @@ def append_op_output_suffix(name): class TensorDistributedAttribute: + def __init__(self): # The process mesh of distributed operator attribute must is the same as # the process meshes of all input and output distributed attributed @@ -123,8 +124,8 @@ class TensorDistributedAttribute: key, dist_attr) elif isinstance(dist_attr, TensorDistributedAttribute): for key in get_tensor_dist_attr_field_keys(): - field_property = TensorDistributedAttribute.__dict__.get(key, - None) + field_property = TensorDistributedAttribute.__dict__.get( + key, None) if field_property: field_property.fset(self, field_property.fget(dist_attr)) else: @@ -192,6 +193,7 @@ class TensorDistributedAttribute: class OperatorDistributedAttribute: + def __init__(self): self._process_mesh = None self._op_type = None @@ -356,8 +358,8 @@ class OperatorDistributedAttribute: tensor_name, dist_attr.get_output_dist_attr(tensor_name)) self._is_annotated = copy.deepcopy(dist_attr._is_annotated) for key in get_op_dist_attr_field_keys(): - field_property = OperatorDistributedAttribute.__dict__.get(key, - None) + field_property = OperatorDistributedAttribute.__dict__.get( + key, None) if field_property: field_property.fset(self, field_property.fget(dist_attr)) else: diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/dist_context.py index df4c92641f7..bf4f66e7c1b 100644 --- a/python/paddle/distributed/auto_parallel/dist_context.py +++ b/python/paddle/distributed/auto_parallel/dist_context.py @@ -203,8 +203,8 @@ class DistributedContext: self._serial_main_program.clone()) self._backup_serial_startup_program_stack.append( self._serial_startup_program.clone()) - self._backup_pass_context_stack.append( - copy.deepcopy(self._pass_context)) + self._backup_pass_context_stack.append(copy.deepcopy( + self._pass_context)) self._backup_block_state_stack.append(copy.deepcopy(self._block_state)) def _backup_dist_info(self, mode): @@ -398,8 +398,8 @@ class DistributedContext: return dist_tensor else: serial_tensor_id = serial_tensor.desc.original_id() - dist_tensor = self._dist_tensors_for_program.get(serial_tensor_id, - None) + dist_tensor = self._dist_tensors_for_program.get( + serial_tensor_id, None) if dist_tensor: return dist_tensor else: @@ -438,8 +438,8 @@ class DistributedContext: return dist_tensor.dist_attr else: serial_tensor_id = serial_tensor.desc.original_id() - dist_tensor = self._dist_tensors_for_program.get(serial_tensor_id, - None) + dist_tensor = self._dist_tensors_for_program.get( + serial_tensor_id, None) if dist_tensor: return dist_tensor.dist_attr else: @@ -548,6 +548,7 @@ class DistributedContext: self._dist_ops_for_program) def _order_nodes_by_program_order(self): + def _contains(nodes, target_node): for node in nodes: if _node_id(node) == _node_id(target_node): @@ -719,8 +720,8 @@ class DistributedContext: # here we just set there process_mesh to the first one. for orphan_node in self._serial_orphan_tensor_nodes: serial_tensor_id = orphan_node.var().id() - dist_tensor = self._dist_tensors_for_program.get(serial_tensor_id, - None) + dist_tensor = self._dist_tensors_for_program.get( + serial_tensor_id, None) if dist_tensor: dist_tensor.dist_attr.process_mesh = self._process_meshes[0] else: @@ -807,11 +808,10 @@ class DistributedContext: assert dist_tensor is not None, \ "Tensor {} does not have a distributed attribute.".format( dist_tensor.serial_tensor.name) - if (dist_tensor is not None) and ( - not dist_tensor.validate_dist_attr()): + if (dist_tensor + is not None) and (not dist_tensor.validate_dist_attr()): assert False, "Tensor {} (id: {}, original_id: {}) has a wrong distributed attributes {}.".format( - dist_tensor.serial_tensor.name, - dist_tensor.desc.id(), + dist_tensor.serial_tensor.name, dist_tensor.desc.id(), dist_tensor.desc.original_id(), dist_tensor.dist_attr) for op in block.ops: dist_op = self.get_dist_op_for_program(op) @@ -820,8 +820,7 @@ class DistributedContext: dist_op.serial_op.type) if (dist_op is not None) and (not dist_op.validate_dist_attr()): assert False, "Operator {} (id: {}, original_id: {}) has a wrong distributed attributes {} .".format( - dist_op.serial_op.type, - dist_op.serial_op.desc.id(), + dist_op.serial_op.type, dist_op.serial_op.desc.id(), dist_op.serial_op.desc.original_id(), dist_op.dist_attr) return True @@ -947,6 +946,7 @@ class DistributedOperatorContext: class BlockState(object): + def __init__(self): self.nblock = 0 self.forward_indices = [] diff --git a/python/paddle/distributed/auto_parallel/dist_loader.py b/python/paddle/distributed/auto_parallel/dist_loader.py index aa315db5292..03cc340fecd 100644 --- a/python/paddle/distributed/auto_parallel/dist_loader.py +++ b/python/paddle/distributed/auto_parallel/dist_loader.py @@ -21,6 +21,7 @@ from paddle.io import DataLoader, DistributedBatchSampler class DistributedDataLoader(metaclass=abc.ABCMeta): + def __init__(self, dataset, batch_size=1, @@ -47,6 +48,7 @@ class DistributedDataLoader(metaclass=abc.ABCMeta): class NonIterableGeneratorLoader(DistributedDataLoader): + def __init__(self, dataset, feed_list, @@ -63,9 +65,10 @@ class NonIterableGeneratorLoader(DistributedDataLoader): self.dp_world_size = 1 if data_parallel_world_size is None else data_parallel_world_size self.dp_rank = 0 if data_parallel_rank is None else data_parallel_rank - super(NonIterableGeneratorLoader, self).__init__( - dataset, batch_size, epochs, data_parallel_world_size, - data_parallel_rank, drop_last) + super(NonIterableGeneratorLoader, + self).__init__(dataset, batch_size, epochs, + data_parallel_world_size, data_parallel_rank, + drop_last) self._inner_dataloader = self._create_inner_dataloader() self._steps = self._infer_steps() @@ -96,6 +99,7 @@ class NonIterableGeneratorLoader(DistributedDataLoader): return steps_per_epoch def _create_inner_dataloader(self): + def sample_data_generator(): batch_data = None for step, data in enumerate(self.dataset): diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py index a2c2748a8ce..d48804b71fc 100644 --- a/python/paddle/distributed/auto_parallel/dist_op.py +++ b/python/paddle/distributed/auto_parallel/dist_op.py @@ -26,6 +26,7 @@ from .dist_attribute import get_op_dist_attr_field_keys class DistributedOperator: + def __init__(self, serial_op, dist_attr=None): self._serial_op = serial_op self._serial_inputs = {} @@ -248,6 +249,7 @@ class DistributedOperator: class DistributedModule: + def __init__(self, serial_module, dist_attr=None): self._serial_module = serial_module self._dist_attr = dist_attr diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py index 261b18a56ec..c3dad9e2873 100644 --- a/python/paddle/distributed/auto_parallel/dist_saver.py +++ b/python/paddle/distributed/auto_parallel/dist_saver.py @@ -53,6 +53,7 @@ def _process_path(path): class DistributedSaver: + def __init__(self): self._logger = get_logger(logging.INFO) @@ -114,8 +115,8 @@ class DistributedSaver: param_file): param_file_list.append(os.path.join(dirname, param_file)) param_file_list.sort() - self._logger.info("Load distributed attribute file: {}".format( - param_file_list)) + self._logger.info( + "Load distributed attribute file: {}".format(param_file_list)) param_dict = {} for param_file in param_file_list: with open(param_file, 'rb') as f: @@ -131,11 +132,11 @@ class DistributedSaver: for dist_attr_file in os.listdir(dirname): if check_filename('{}(.*)_dist(.*).pdattr'.format(filename), dist_attr_file): - dist_attr_file_list.append( - os.path.join(dirname, dist_attr_file)) + dist_attr_file_list.append(os.path.join(dirname, + dist_attr_file)) dist_attr_file_list.sort() - self._logger.info("Load distributed attribute file: {}".format( - dist_attr_file_list)) + self._logger.info( + "Load distributed attribute file: {}".format(dist_attr_file_list)) pre_dist_attr = {} for dist_attr_file in dist_attr_file_list: with open(dist_attr_file, 'rb') as f: @@ -206,12 +207,11 @@ class DistributedSaver: # NOTE: `paddle.static.save_inference_model` does not support subblock. dist_filename = filename + "_dist" + str(rank_id) dist_path = os.path.join(dirname, dist_filename) - paddle.static.save_inference_model( - dist_path, - dist_feed_vars, - dist_fetch_vars, - exe, - program=dist_main_prog) + paddle.static.save_inference_model(dist_path, + dist_feed_vars, + dist_fetch_vars, + exe, + program=dist_main_prog) def _save_rank_mapping(self, dirname): path = os.path.join(dirname, 'rank_mapping.csv') diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/dist_tensor.py index e3f06da2751..b6228f5ad0e 100644 --- a/python/paddle/distributed/auto_parallel/dist_tensor.py +++ b/python/paddle/distributed/auto_parallel/dist_tensor.py @@ -40,26 +40,26 @@ class DistributedTensor: processes, rank=None, shard_sizes=None): - if not (isinstance(sizes, (list, tuple)) and - all(map(lambda x: isinstance(x, int) and x >= 0, sizes))): + if not (isinstance(sizes, (list, tuple)) + and all(map(lambda x: isinstance(x, int) and x >= 0, sizes))): raise ValueError( - "The sizes must be list or tuple and item in sizes must be non-negative integer, but got {}". - format(sizes)) + "The sizes must be list or tuple and item in sizes must be non-negative integer, but got {}" + .format(sizes)) if not (isinstance(dims_mapping, (list, tuple)) and all( map(lambda x: isinstance(x, int) and x >= -1, dims_mapping))): raise ValueError( - "The dims_mapping must be list or tuple and item in dims_mapping must >= -1, but got {}". - format(dims_mapping)) - if not (isinstance(processes, (list, tuple)) and - all(map(lambda x: isinstance(x, int) and x >= 0, processes))): + "The dims_mapping must be list or tuple and item in dims_mapping must >= -1, but got {}" + .format(dims_mapping)) + if not (isinstance(processes, (list, tuple)) and all( + map(lambda x: isinstance(x, int) and x >= 0, processes))): raise ValueError( - "The processes must be list or tuple and item in processes must be integer, but got {}". - format(processes)) - if not (isinstance(topology, (list, tuple)) and - all(map(lambda x: isinstance(x, int) and x > 0, topology))): + "The processes must be list or tuple and item in processes must be integer, but got {}" + .format(processes)) + if not (isinstance(topology, (list, tuple)) + and all(map(lambda x: isinstance(x, int) and x > 0, topology))): raise ValueError( - "The topology must be list or tuple and item in topology must be non-negative integer, but got {}". - format(topology)) + "The topology must be list or tuple and item in topology must be non-negative integer, but got {}" + .format(topology)) if rank is not None and not (isinstance(rank, int) and rank >= 0): raise ValueError("The rank must >= 0, but got {}".format(rank)) @@ -74,8 +74,10 @@ class DistributedTensor: processes, rank=None, shard_sizes=None): - DistributedTensor._validate_sizes_and_dist_attr( - global_sizes, dims_mapping, topology, processes, rank, shard_sizes) + DistributedTensor._validate_sizes_and_dist_attr(global_sizes, + dims_mapping, topology, + processes, rank, + shard_sizes) local_sizes = [] # for even sharding, the local sizes of every rank are equal @@ -97,8 +99,10 @@ class DistributedTensor: processes, rank, shard_sizes=None): - local_sizes = DistributedTensor.get_local_sizes( - global_sizes, dims_mapping, topology, processes, rank, shard_sizes) + local_sizes = DistributedTensor.get_local_sizes(global_sizes, + dims_mapping, topology, + processes, rank, + shard_sizes) local_offsets = [] rank_relatvie = processes.index(rank) coordinate = _linear_idx2coordinate(topology, rank_relatvie) @@ -118,8 +122,10 @@ class DistributedTensor: processes, rank=None, shard_sizes=None): - DistributedTensor._validate_sizes_and_dist_attr( - local_sizes, dims_mapping, topology, processes, rank, shard_sizes) + DistributedTensor._validate_sizes_and_dist_attr(local_sizes, + dims_mapping, topology, + processes, rank, + shard_sizes) global_sizes = [] for idx, item in enumerate(local_sizes): if dims_mapping[idx] == -1: @@ -137,8 +143,10 @@ class DistributedTensor: shard_sizes=None): local_offsets = DistributedTensor.get_local_offsets( global_sizes, dims_mapping, topology, processes, rank, shard_sizes) - local_sizes = DistributedTensor.get_local_sizes( - global_sizes, dims_mapping, topology, processes, rank, shard_sizes) + local_sizes = DistributedTensor.get_local_sizes(global_sizes, + dims_mapping, topology, + processes, rank, + shard_sizes) assert len(local_sizes) == len( local_offsets ), "The length of local_sizes must be equal to local_offsets, but got {} and {}.".format( diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index ea003dfff47..fb12ae4971a 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -48,6 +48,7 @@ from .dist_context import DistributedContext, get_default_distributed_context class Engine: + def __init__(self, model=None, inputs_spec=None, @@ -88,8 +89,9 @@ class Engine: gradient_scale=True, metrics=None, all_ranks=False): - if optimizer and not isinstance(optimizer, ( - paddle.optimizer.Optimizer, paddle.fluid.optimizer.Optimizer)): + if optimizer and not isinstance( + optimizer, + (paddle.optimizer.Optimizer, paddle.fluid.optimizer.Optimizer)): raise TypeError( "'optimizer' must be object of class `paddle.optimizer.Optimizer`" \ " or `paddle.fluid.optimizer.Optimizer`." @@ -194,7 +196,7 @@ class Engine: parallelizer.parallel_all() def _init_dist_context(self, mode): - # Init dist_context['mode'] with the first planned dist_context + # Init dist_context['mode'] with the first planned dist_context # to guarantee that train/eval/predict mode have same parallel strategy dist_context = self._dist_contexts[mode] origin_main_prog = dist_context._original_serial_main_program @@ -212,7 +214,7 @@ class Engine: dist_context.set_op_dist_attr_for_program(op, ref_op_dist_attr) def _initialize(self, mode): - # Get the current content from the distributed context + # Get the current content from the distributed context self._serial_main_progs[mode] = self._dist_contexts[ mode].serial_main_program self._serial_startup_progs[mode] = self._dist_contexts[ @@ -380,7 +382,7 @@ class Engine: dist_context = self._dist_contexts[self.mode] dist_main_block = dist_main_prog.global_block() - # NOTE: Get feed_list from dist_program, then insert dataloader op + # NOTE: Get feed_list from dist_program, then insert dataloader op # with sharded var shape. Because predict_program does not contain # labels var, so we will filter dataset's value with length of feed_list. inputs_var = self._feed_vars[self.mode]["inputs"] @@ -389,8 +391,8 @@ class Engine: for var in inputs_var + labels_var: if var.name in dist_main_block.vars: feed_list.append(dist_main_block.vars[var.name]) - dp_world_size, dp_rank = self._get_data_parallel_info(feed_list[0], - dist_context) + dp_world_size, dp_rank = self._get_data_parallel_info( + feed_list[0], dist_context) # remove the first three ops if multi run fit/evaluate/predict op_size = len(dist_main_block.ops) @@ -418,8 +420,9 @@ class Engine: op = dist_main_block.ops[new_op_size - 1] new_op_desc = dist_main_block.desc._prepend_op() new_op_desc.copy_from(op.desc) - new_op = Operator( - dist_main_block, new_op_desc, type=new_op_desc.type()) + new_op = Operator(dist_main_block, + new_op_desc, + type=new_op_desc.type()) dist_main_block.ops.insert(0, new_op) dist_op = DistributedOperator(new_op) dist_context.add_dist_op_for_program(dist_op) @@ -442,21 +445,21 @@ class Engine: def _set_data_parallel(self, var): if self._nranks == 1: self._default_strategy = 'serial' - auto.shard_tensor( - var, - dist_attr={ - "process_mesh": [0], - "dims_mapping": [-1 for _ in range(len(var.shape))] - }) + auto.shard_tensor(var, + dist_attr={ + "process_mesh": [0], + "dims_mapping": + [-1 for _ in range(len(var.shape))] + }) else: self._default_strategy = 'dp' - auto.shard_tensor( - var, - dist_attr={ - "process_mesh": list(range(self._nranks)), - "dims_mapping": - [0] + [-1 for _ in range(len(var.shape) - 1)] - }) + auto.shard_tensor(var, + dist_attr={ + "process_mesh": + list(range(self._nranks)), + "dims_mapping": + [0] + [-1 for _ in range(len(var.shape) - 1)] + }) return var @@ -492,22 +495,20 @@ class Engine: serial_program = self._serial_main_progs["train"] dist_main_prog = self._dist_main_progs["train"][self._cur_rank] dist_context = self._dist_contexts["train"] - self._saver.save( - path, - serial_program=serial_program, - dist_main_program=dist_main_prog, - dist_context=dist_context) + self._saver.save(path, + serial_program=serial_program, + dist_main_program=dist_main_prog, + dist_context=dist_context) else: assert mode, "Please set the 'mode' you want to save." feed_vars = self._feed_vars[mode]['inputs'] fetch_vars = self._fetch_vars[mode]['outputs'] dist_main_prog = self._dist_main_progs[mode][self._cur_rank] - self._saver.save_inference_model( - path, - feed_vars, - fetch_vars, - self._executor, - program=dist_main_prog) + self._saver.save_inference_model(path, + feed_vars, + fetch_vars, + self._executor, + program=dist_main_prog) def load(self, path, strict=True, load_optimizer=True, mode=None): if not mode: diff --git a/python/paddle/distributed/auto_parallel/graph.py b/python/paddle/distributed/auto_parallel/graph.py index 14856e39070..de6505071ab 100644 --- a/python/paddle/distributed/auto_parallel/graph.py +++ b/python/paddle/distributed/auto_parallel/graph.py @@ -14,6 +14,7 @@ class Node: + def __init__(self, id, **attrs): # Each node must has a unique id self._id = id @@ -47,6 +48,7 @@ class Node: class Edge: + def __init__(self, src_id, tgt_id, **attrs): # The id of source node in an Edge self._src_id = src_id @@ -88,6 +90,7 @@ class Edge: class Graph: + def __init__(self, **attrs): # _nodes is dict for storing the nodes of the graph. # The key of this dict is the node id. diff --git a/python/paddle/distributed/auto_parallel/mapper.py b/python/paddle/distributed/auto_parallel/mapper.py index f5d9c32d33e..da76ae81271 100644 --- a/python/paddle/distributed/auto_parallel/mapper.py +++ b/python/paddle/distributed/auto_parallel/mapper.py @@ -171,8 +171,9 @@ def build_process_graph(distributed_program): src_info, src_rank) graph.add_node(src_rank, resource_requirements=resource_requirements) for tgt_rank, comm_requirements in comm_requirements_to_ranks.items(): - graph.add_edge( - src_rank, tgt_rank, comm_requirements=comm_requirements) + graph.add_edge(src_rank, + tgt_rank, + comm_requirements=comm_requirements) return graph @@ -192,8 +193,9 @@ def build_cluster_graph(cluster): else: graph.nodes[device.global_id]["occupied"] = False for link in machine.links.values(): - graph.add_edge( - link.source.global_id, link.target.global_id, link=link) + graph.add_edge(link.source.global_id, + link.target.global_id, + link=link) return graph @@ -233,8 +235,8 @@ def mapping(distributed_program, cluster): device_type = cur_rank_node["resource_requirements"]["device_type"] cur_device_node = None for device_node in cluster_graph.nodes.values(): - if (device_node["device"].type == device_type) and ( - not device_node["occupied"]): + if (device_node["device"].type + == device_type) and (not device_node["occupied"]): device_node["occupied"] = True cur_rank_node["visited"] = True cur_rank_node["device"] = device_node["device"] @@ -257,8 +259,8 @@ def mapping(distributed_program, cluster): nbr_device_edges.sort(key=sort_by_comm_bandwidth) for nbr_rank_edge in nbr_rank_edges: - src_rank_node = process_graph.nodes[nbr_rank_edge.src_id][ - "visited"] + src_rank_node = process_graph.nodes[ + nbr_rank_edge.src_id]["visited"] if src_rank_node: continue device_type = src_rank_node["resource_requirements"][ diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/operators/common.py index 6b3c655f293..2e86f97d7a2 100644 --- a/python/paddle/distributed/auto_parallel/operators/common.py +++ b/python/paddle/distributed/auto_parallel/operators/common.py @@ -32,6 +32,7 @@ def is_elementwise_op(op_type): class DistributedOperatorImplContainer: + def __init__(self, op_type): self._type = op_type self._impls = [] @@ -81,6 +82,7 @@ class DistributedOperatorImplContainer: class DistributedOperatorImpl(abc.ABC): + def __init__(self, name): self._name = name self._type = None diff --git a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py index 79a86169d5a..0a4bfb1213d 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py @@ -30,6 +30,7 @@ world_process_group = get_world_process_group() class DistributedCheckFiniteAndUnscale(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedCheckFiniteAndUnscale, self).__init__(op_type) @@ -39,6 +40,7 @@ register_distributed_operator_impl_container( class DistributedCheckFiniteAndUnscaleImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedCheckFiniteAndUnscaleImpl, self).__init__(name) self._name = name @@ -122,41 +124,37 @@ class DistributedCheckFiniteAndUnscaleImpl(DistributedOperatorImpl): group = new_process_group(world_process_group.ranks) inf_var = main_block.var(kwargs['FoundInfinite'][0]) - inf_var_int32 = main_block.create_var( - name=inf_var.name + "@cast_int32", - shape=inf_var.shape, - dtype=core.VarDesc.VarType.INT32) + inf_var_int32 = main_block.create_var(name=inf_var.name + "@cast_int32", + shape=inf_var.shape, + dtype=core.VarDesc.VarType.INT32) set_var_dist_attr( ctx, inf_var_int32, ctx.get_tensor_dist_attr_for_program(inf_var).dims_mapping, ctx.get_tensor_dist_attr_for_program(inf_var).process_mesh) - cast_op1 = main_block.append_op( - type='cast', - inputs={'X': inf_var}, - outputs={'Out': inf_var_int32}, - attrs={ - "in_dtype": inf_var.dtype, - "out_dtype": inf_var_int32.dtype, - OP_ROLE_KEY: OpRole.Backward - }) - allreduce_op = main_block.append_op( - type='c_allreduce_max', - inputs={'X': inf_var_int32}, - outputs={'Out': inf_var_int32}, - attrs={ - 'ring_id': group.id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Backward - }) - cast_op2 = main_block.append_op( - type='cast', - inputs={'X': inf_var_int32}, - outputs={'Out': inf_var}, - attrs={ - "in_dtype": inf_var_int32.dtype, - "out_dtype": inf_var.dtype, - OP_ROLE_KEY: OpRole.Backward - }) + cast_op1 = main_block.append_op(type='cast', + inputs={'X': inf_var}, + outputs={'Out': inf_var_int32}, + attrs={ + "in_dtype": inf_var.dtype, + "out_dtype": inf_var_int32.dtype, + OP_ROLE_KEY: OpRole.Backward + }) + allreduce_op = main_block.append_op(type='c_allreduce_max', + inputs={'X': inf_var_int32}, + outputs={'Out': inf_var_int32}, + attrs={ + 'ring_id': group.id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Backward + }) + cast_op2 = main_block.append_op(type='cast', + inputs={'X': inf_var_int32}, + outputs={'Out': inf_var}, + attrs={ + "in_dtype": inf_var_int32.dtype, + "out_dtype": inf_var.dtype, + OP_ROLE_KEY: OpRole.Backward + }) main_block._sync_with_cpp() for op in [cast_op1, allreduce_op, cast_op2]: diff --git a/python/paddle/distributed/auto_parallel/operators/dist_default.py b/python/paddle/distributed/auto_parallel/operators/dist_default.py index e18cee6d42d..a2b1b7826d5 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_default.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_default.py @@ -47,28 +47,26 @@ def prim_operator_data_parallel_functor(ctx, src_op): ctx.synced_gradient.add(var_name) sync_group = new_process_group(ctx.data_parallel_group) - allreduce_op = main_block.append_op( - type='c_allreduce_sum', - inputs={'X': [var_name]}, - outputs={'Out': [var_name]}, - attrs={ - 'ring_id': sync_group.id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Backward - }) + allreduce_op = main_block.append_op(type='c_allreduce_sum', + inputs={'X': [var_name]}, + outputs={'Out': [var_name]}, + attrs={ + 'ring_id': sync_group.id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Backward + }) param = ctx.grads_params[var_name] startup_block = dist_op_context.startup_block - new_op = startup_block.append_op( - type='c_broadcast', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'ring_id': sync_group.id, - 'root': 0, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) + new_op = startup_block.append_op(type='c_broadcast', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'ring_id': sync_group.id, + 'root': 0, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) grad_var = main_block.var(var_name) dims_mapping = ctx.get_tensor_dist_attr_for_program( @@ -85,6 +83,7 @@ def prim_operator_data_parallel_functor(ctx, src_op): class DistributedDefault(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedDefault, self).__init__(op_type) @@ -94,6 +93,7 @@ register_distributed_operator_impl_container(DistributedDefault("default")) # Replicated Default class DistributedDefaultImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedDefaultImpl0, self).__init__(name) self._forward_implemented = True @@ -277,8 +277,8 @@ class DistributedDefaultImpl0(DistributedOperatorImpl): batch_dim_mappings.append(dims_mapping[1]) for arg_name in op_desc.output_arg_names(): if op_desc.type() == "fill_zeros_like": - input_tensor = dist_op.get_serial_input(op_desc.input_arg_names( - )[0]) + input_tensor = dist_op.get_serial_input( + op_desc.input_arg_names()[0]) if input_tensor.is_parameter: continue serial_tensor = dist_op.get_serial_output(arg_name) @@ -316,8 +316,8 @@ class DistributedDefaultImpl0(DistributedOperatorImpl): changed = True for arg_name in op_desc.output_arg_names(): if op_desc.type() == "fill_zeros_like": - input_tensor = dist_op.get_serial_input(op_desc.input_arg_names( - )[0]) + input_tensor = dist_op.get_serial_input( + op_desc.input_arg_names()[0]) if input_tensor.is_parameter: continue if op_desc.type() in ["shape", "slice"]: @@ -409,16 +409,19 @@ class DistributedDefaultImpl0(DistributedOperatorImpl): axis, rank_id) sync_group = new_process_group(group_ranks) - new_op = startup_block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': sync_group.id, - 'root': 0, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) + new_op = startup_block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': + sync_group.id, + 'root': + 0, + 'use_calc_stream': + True, + OP_ROLE_KEY: + OpRole.Forward + }) # set distributed attribute op_attr = OperatorDistributedAttribute() @@ -484,8 +487,8 @@ class DistributedDefaultImpl0(DistributedOperatorImpl): # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism if rank_id not in process_mesh.processes: - rank_id = _get_corresponding_rank(ctx, process_mesh, - rank_id) + rank_id = _get_corresponding_rank( + ctx, process_mesh, rank_id) mesh_shape = process_mesh.topology batch_size_axis = var_dim_mapping[0] diff --git a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py index 78589afc498..02f2741d884 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py @@ -35,6 +35,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedElementwise(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedElementwise, self).__init__(op_type) @@ -45,6 +46,7 @@ register_distributed_operator_impl_container( # Replicated Elementwise class DistributedElementwiseImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedElementwiseImpl0, self).__init__(name) self._forward_implemented = False @@ -208,8 +210,8 @@ class DistributedElementwiseImpl0(DistributedOperatorImpl): changed = True else: if compatible_dims_mapping != input_dims_mapping_dict[arg_name]: - op_dist_attr.set_input_dims_mapping(arg_name, - compatible_dims_mapping) + op_dist_attr.set_input_dims_mapping( + arg_name, compatible_dims_mapping) changed = True for arg_name in output_arg_names: @@ -222,12 +224,11 @@ class DistributedElementwiseImpl0(DistributedOperatorImpl): output_dims_mapping_lens[arg_name]) + i new_dims_mapping[i] = compatible_dims_mapping[new_idx] if new_dims_mapping != output_dims_mapping_dict[arg_name]: - op_dist_attr.set_output_dims_mapping(arg_name, - new_dims_mapping) + op_dist_attr.set_output_dims_mapping( + arg_name, new_dims_mapping) changed = True else: - if compatible_dims_mapping != output_dims_mapping_dict[ - arg_name]: + if compatible_dims_mapping != output_dims_mapping_dict[arg_name]: op_dist_attr.set_output_dims_mapping( arg_name, compatible_dims_mapping) changed = True diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py index ae6397391ac..2272400e60d 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py @@ -34,6 +34,7 @@ from ..utils import _get_comm_group, _get_idx_in_axis, _get_corresponding_rank class DistributedEmbedding(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedEmbedding, self).__init__(op_type) @@ -46,6 +47,7 @@ register_distributed_operator_impl_container( # RowParallel class DistributedEmbeddingImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedEmbeddingImpl, self).__init__(name) self._forward_implemented = True @@ -58,8 +60,8 @@ class DistributedEmbeddingImpl(DistributedOperatorImpl): w_name = op_desc.input('W')[0] ids_dims_mapping = op_dist_attr.get_input_dims_mapping(ids_name) w_dims_mapping = op_dist_attr.get_input_dims_mapping(w_name) - if is_dim_replicate(w_dims_mapping[-2]) or is_dim_shard(w_dims_mapping[ - -1]): + if is_dim_replicate(w_dims_mapping[-2]) or is_dim_shard( + w_dims_mapping[-1]): return False # Other dimensions must be replicate except the batch dimension for mapping in ids_dims_mapping[1:]: @@ -215,8 +217,10 @@ class DistributedEmbeddingImpl(DistributedOperatorImpl): c_embedding_op = main_block.append_op( type='c_embedding', - inputs={'Ids': [Ids_var], - 'W': [Weight_var]}, + inputs={ + 'Ids': [Ids_var], + 'W': [Weight_var] + }, outputs={'Out': [intermediate_var_0]}, attrs={"start_index": relative_idx}) if intermediate_var_0.shape != ref_shape: @@ -295,16 +299,15 @@ class DistributedEmbeddingImpl(DistributedOperatorImpl): rank_id) sync_group = new_process_group(group_ranks) - startup_block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': sync_group.id, - 'root': 0, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) + startup_block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': sync_group.id, + 'root': 0, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) startup_block._sync_with_cpp() @staticmethod @@ -440,21 +443,21 @@ class DistributedEmbeddingImpl(DistributedOperatorImpl): if need_gradient_allreduce: W_Grad_var = main_block.var(kwargs['W@GRAD'][0]) - allreduce_op = main_block.append_op( - type='c_allreduce_sum', - inputs={'X': [W_Grad_var]}, - outputs={'Out': [W_Grad_var]}, - attrs={ - 'ring_id': dp_group.id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Backward - }) - scale_op = main_block.append_op( - type='scale', - inputs={'X': W_Grad_var}, - outputs={'Out': W_Grad_var}, - attrs={'scale': 1.0 / dp_degree, - OP_ROLE_KEY: OpRole.Backward}) + allreduce_op = main_block.append_op(type='c_allreduce_sum', + inputs={'X': [W_Grad_var]}, + outputs={'Out': [W_Grad_var]}, + attrs={ + 'ring_id': dp_group.id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Backward + }) + scale_op = main_block.append_op(type='scale', + inputs={'X': W_Grad_var}, + outputs={'Out': W_Grad_var}, + attrs={ + 'scale': 1.0 / dp_degree, + OP_ROLE_KEY: OpRole.Backward + }) main_block._sync_with_cpp() dims_mapping = ctx.get_tensor_dist_attr_for_program( diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py index 80ac019e830..763e47802b3 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py @@ -31,6 +31,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedFillConstantBatchSizeLike, self).__init__(op_type) @@ -40,6 +41,7 @@ register_distributed_operator_impl_container( class DistributedFillConstantBatchSizeLikeImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedFillConstantBatchSizeLikeImpl0, self).__init__(name) self._forward_implemented = True diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py index bc3992ec03d..23519647d33 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ from ..process_group import new_process_group class DistributedFusedAttention(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedFusedAttention, self).__init__(op_type) @@ -36,6 +37,7 @@ register_distributed_operator_impl_container( class DistributedFusedAttentionImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedFusedAttentionImpl, self).__init__(name) self._forward_implemented = True @@ -60,8 +62,8 @@ class DistributedFusedAttentionImpl(DistributedOperatorImpl): for mapping in x_dims_mapping[1:-1]: if is_dim_shard(mapping): return False - if len(qkv_w_dims_mapping) != 4 or is_dim_replicate(qkv_w_dims_mapping[ - head_axis]): + if len(qkv_w_dims_mapping) != 4 or is_dim_replicate( + qkv_w_dims_mapping[head_axis]): return False if len(qkv_bias_dims_mapping) != 3 or is_dim_replicate( qkv_bias_dims_mapping[head_axis]): @@ -91,7 +93,7 @@ class DistributedFusedAttentionImpl(DistributedOperatorImpl): op_desc = dist_op.serial_op.desc op_dist_attr = dist_op.dist_attr - # none of output should be sharded + # none of output should be sharded for out_name in op_desc.output_names(): out = op_desc.output(out_name)[0] out_dims_mapping = op_dist_attr.get_output_dims_mapping(out) @@ -152,8 +154,8 @@ class DistributedFusedAttentionImpl(DistributedOperatorImpl): # infer logic comm presentation head_axis = 1 qkv_w = src_op.input('QKVW')[0] - qkv_w_col_dim_mapping = op_dist_attr.get_input_dims_mapping(qkv_w)[ - head_axis] + qkv_w_col_dim_mapping = op_dist_attr.get_input_dims_mapping( + qkv_w)[head_axis] assert qkv_w_col_dim_mapping >= 0, "col_parallel_matmul's row should be divided by a specific mesh axis, but got [{}]".format( qkv_w_col_dim_mapping) process_mesh_shape = op_dist_attr.process_mesh.topology diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py index 76f526adbbf..50735cf2857 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ from ..process_group import new_process_group class DistributedFusedFeedForward(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedFusedFeedForward, self).__init__(op_type) @@ -36,6 +37,7 @@ register_distributed_operator_impl_container( class DistributedFusedFeedForwardImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedFusedFeedForwardImpl, self).__init__(name) self._forward_implemented = True @@ -82,7 +84,7 @@ class DistributedFusedFeedForwardImpl(DistributedOperatorImpl): op_desc = dist_op.serial_op.desc op_dist_attr = dist_op.dist_attr - # none of output should be sharded + # none of output should be sharded for out_name in op_desc.output_names(): out = op_desc.output(out_name)[0] out_dims_mapping = op_dist_attr.get_output_dims_mapping(out) diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py index 69e1c866de6..427932a77fb 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py @@ -169,13 +169,13 @@ def _is_auto_compatible_for_matmul(dist_op): # NOTE: Partition is not supported if matmul op has trans. if op_desc.type() == "matmul_v2": if op_desc.attr('trans_x') or op_desc.attr('trans_y'): - if x_dims_mapping[-2:] != [-1, -1] or y_dims_mapping[ - -2:] != [-1, -1]: + if x_dims_mapping[-2:] != [-1, -1 + ] or y_dims_mapping[-2:] != [-1, -1]: return False elif op_desc.type() == "matmul": if op_desc.attr('transpose_X') or op_desc.attr('transpose_Y'): - if x_dims_mapping[-2:] != [-1, -1] or y_dims_mapping[ - -2:] != [-1, -1]: + if x_dims_mapping[-2:] != [-1, -1 + ] or y_dims_mapping[-2:] != [-1, -1]: return False # Deal with dim > 2 and take care of broadcasting @@ -197,8 +197,8 @@ def _is_auto_compatible_for_matmul(dist_op): for i in range(out_dims_mapping_len - 2): broadcast_out_dims_mapping.append(out_dims_mapping[i]) - is_same = ((broadcast_x_dims_mapping == broadcast_y_dims_mapping) and - (broadcast_x_dims_mapping == broadcast_out_dims_mapping)) + is_same = ((broadcast_x_dims_mapping == broadcast_y_dims_mapping) + and (broadcast_x_dims_mapping == broadcast_out_dims_mapping)) if not is_same: return False @@ -307,8 +307,9 @@ def _right_operand_parameter_matmul_backward(ctx, *args, **kwargs): ctx.set_tensor_dist_attr_for_program(intermediate_var_0, out_grad_dist_attr) - group_ranks = _get_comm_group( - process_mesh_group, process_mesh_shape, parallel_axis, rank_id) + group_ranks = _get_comm_group(process_mesh_group, + process_mesh_shape, parallel_axis, + rank_id) group = new_process_group(group_ranks) c_identity_op = main_block.append_op( type='c_identity', @@ -325,8 +326,9 @@ def _right_operand_parameter_matmul_backward(ctx, *args, **kwargs): 'linear') check_dtype(intermediate_var_0.dtype, 'dtype', ['float16', 'float32', 'float64'], 'linear') - set_comm_op_dist_attr_for_program( - c_identity_op, dist_attr.process_mesh, out_grad_dist_attr, ctx) + set_comm_op_dist_attr_for_program(c_identity_op, + dist_attr.process_mesh, + out_grad_dist_attr, ctx) new_kwargs = copy.deepcopy(kwargs) new_kwargs['Out@GRAD'] = [intermediate_var_0.name] @@ -404,21 +406,21 @@ def _right_operand_parameter_matmul_backward(ctx, *args, **kwargs): if need_gradient_allreduce and is_parameter_related(Y_var.name, main_block): Y_Grad_var = main_block.var(kwargs['Y@GRAD'][0]) - allreduce_op = main_block.append_op( - type='c_allreduce_sum', - inputs={'X': [Y_Grad_var]}, - outputs={'Out': [Y_Grad_var]}, - attrs={ - 'ring_id': dp_group.id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Backward - }) - scale_op = main_block.append_op( - type='scale', - inputs={'X': Y_Grad_var}, - outputs={'Out': Y_Grad_var}, - attrs={'scale': 1.0 / dp_degree, - OP_ROLE_KEY: OpRole.Backward}) + allreduce_op = main_block.append_op(type='c_allreduce_sum', + inputs={'X': [Y_Grad_var]}, + outputs={'Out': [Y_Grad_var]}, + attrs={ + 'ring_id': dp_group.id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Backward + }) + scale_op = main_block.append_op(type='scale', + inputs={'X': Y_Grad_var}, + outputs={'Out': Y_Grad_var}, + attrs={ + 'scale': 1.0 / dp_degree, + OP_ROLE_KEY: OpRole.Backward + }) main_block._sync_with_cpp() dims_mapping = ctx.get_tensor_dist_attr_for_program( @@ -451,20 +453,20 @@ def _init_param_sync(Weight_var, dist_op_context, startup_block, ctx, rank_id): process_mesh.topology, axis, rank_id) sync_group = new_process_group(group_ranks) - startup_block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': sync_group.id, - 'root': 0, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) + startup_block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': sync_group.id, + 'root': 0, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) startup_block._sync_with_cpp() class DistributedMatmul(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedMatmul, self).__init__(op_type) @@ -474,6 +476,7 @@ register_distributed_operator_impl_container(DistributedMatmul("matmul")) # ColumnParallel class DistributedMatmulImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMatmulImpl0, self).__init__(name) self._forward_implemented = True @@ -488,8 +491,8 @@ class DistributedMatmulImpl0(DistributedOperatorImpl): y_dims_mapping = op_dist_attr.get_input_dims_mapping(y_name) if is_dim_shard(x_dims_mapping[-1]): return False - if is_dim_shard(y_dims_mapping[-2]) or is_dim_replicate(y_dims_mapping[ - -1]): + if is_dim_shard(y_dims_mapping[-2]) or is_dim_replicate( + y_dims_mapping[-1]): return False for mapping in x_dims_mapping[1:-1]: if is_dim_shard(mapping): @@ -628,8 +631,10 @@ class DistributedMatmulImpl0(DistributedOperatorImpl): 'alpha': 1, } inputs = {'X': [intermediate_var_0], 'Y': [Weight_var]} - matmul_op = main_block.append_op( - type='matmul', inputs=inputs, outputs={'Out': Out_var}, attrs=attrs) + matmul_op = main_block.append_op(type='matmul', + inputs=inputs, + outputs={'Out': Out_var}, + attrs=attrs) if Out_var.shape != ref_shape_out: Out_var.desc.set_shape(ref_shape_out) @@ -695,6 +700,7 @@ class DistributedMatmulImpl0(DistributedOperatorImpl): # RowParallel class DistributedMatmulImpl1(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMatmulImpl1, self).__init__(name) self._forward_implemented = True @@ -709,8 +715,8 @@ class DistributedMatmulImpl1(DistributedOperatorImpl): y_dims_mapping = op_dist_attr.get_input_dims_mapping(y_name) if is_dim_replicate(x_dims_mapping[-1]): return False - if is_dim_replicate(y_dims_mapping[-2]) or is_dim_shard(y_dims_mapping[ - -1]): + if is_dim_replicate(y_dims_mapping[-2]) or is_dim_shard( + y_dims_mapping[-1]): return False # Other dimensions must be replicate except the batch dimension for mapping in x_dims_mapping[1:-1]: @@ -833,11 +839,10 @@ class DistributedMatmulImpl1(DistributedOperatorImpl): ctx.set_tensor_dist_attr_for_program(intermediate_var_0, out_var_dist_attr) - matmul_op = main_block.append_op( - type='matmul', - inputs=inputs, - outputs={'Out': intermediate_var_0}, - attrs=attrs) + matmul_op = main_block.append_op(type='matmul', + inputs=inputs, + outputs={'Out': intermediate_var_0}, + attrs=attrs) if intermediate_var_0.shape != ref_shape: intermediate_var_0.desc.set_shape(ref_shape) @@ -905,6 +910,7 @@ class DistributedMatmulImpl1(DistributedOperatorImpl): # ReplicateParallel class DistributedMatmulImpl2(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMatmulImpl2, self).__init__(name) @@ -918,14 +924,14 @@ class DistributedMatmulImpl2(DistributedOperatorImpl): if is_dim_shard(x_dims_mapping[-1]): return False - if is_valid_list_index(x_dims_mapping, - -2) and is_dim_shard(x_dims_mapping[-2]): + if is_valid_list_index(x_dims_mapping, -2) and is_dim_shard( + x_dims_mapping[-2]): return False if is_dim_shard(y_dims_mapping[-1]): return False - if is_valid_list_index(y_dims_mapping, - -2) and is_dim_shard(y_dims_mapping[-2]): + if is_valid_list_index(y_dims_mapping, -2) and is_dim_shard( + y_dims_mapping[-2]): return False return True @@ -938,8 +944,8 @@ class DistributedMatmulImpl2(DistributedOperatorImpl): if is_dim_shard(out_dims_mapping[-1]): return False - if is_valid_list_index(out_dims_mapping, - -2) and is_dim_shard(out_dims_mapping[-2]): + if is_valid_list_index(out_dims_mapping, -2) and is_dim_shard( + out_dims_mapping[-2]): return False return True @@ -979,6 +985,7 @@ register_distributed_operator_impl("matmul", class DistributedMatmulV2(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedMatmulV2, self).__init__(op_type) @@ -988,6 +995,7 @@ register_distributed_operator_impl_container(DistributedMatmulV2("matmul_v2")) # ColumnParallel class DistributedMatmulV2Impl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMatmulV2Impl0, self).__init__(name) self._forward_implemented = True @@ -1002,8 +1010,8 @@ class DistributedMatmulV2Impl0(DistributedOperatorImpl): y_dims_mapping = op_dist_attr.get_input_dims_mapping(y_name) if is_dim_shard(x_dims_mapping[-1]): return False - if is_dim_shard(y_dims_mapping[-2]) or is_dim_replicate(y_dims_mapping[ - -1]): + if is_dim_shard(y_dims_mapping[-2]) or is_dim_replicate( + y_dims_mapping[-1]): return False for mapping in x_dims_mapping[1:-1]: if is_dim_shard(mapping): @@ -1139,11 +1147,10 @@ class DistributedMatmulV2Impl0(DistributedOperatorImpl): ['float16', 'float32', 'float64'], 'linear') attrs = {'trans_x': False, 'trans_y': False} inputs = {'X': [intermediate_var_0], 'Y': [Weight_var]} - matmul_v2_op = main_block.append_op( - type='matmul_v2', - inputs=inputs, - outputs={'Out': Out_var}, - attrs=attrs) + matmul_v2_op = main_block.append_op(type='matmul_v2', + inputs=inputs, + outputs={'Out': Out_var}, + attrs=attrs) if Out_var.shape != ref_shape_out: Out_var.desc.set_shape(ref_shape_out) @@ -1177,14 +1184,14 @@ class DistributedMatmulV2Impl0(DistributedOperatorImpl): input_varname) assert input_dist_attr is not None, "dist_attr is {}".format( op_dist_attr) - matmulv2_op_dist_attr.set_input_dist_attr(input_varname, - input_dist_attr) + matmulv2_op_dist_attr.set_input_dist_attr( + input_varname, input_dist_attr) else: input_var = main_block.var(input_varname) tensor_dist_attr = ctx.get_tensor_dist_attr_for_program( input_var) - matmulv2_op_dist_attr.set_input_dist_attr(input_varname, - tensor_dist_attr) + matmulv2_op_dist_attr.set_input_dist_attr( + input_varname, tensor_dist_attr) for output_varname in matmul_v2_op.desc.output_arg_names(): output_dist_attr = op_dist_attr.get_output_dist_attr(output_varname) assert output_dist_attr is not None, "dist_attr is {}".format( @@ -1205,6 +1212,7 @@ class DistributedMatmulV2Impl0(DistributedOperatorImpl): # RowParallel class DistributedMatmulV2Impl1(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMatmulV2Impl1, self).__init__(name) self._forward_implemented = True @@ -1219,8 +1227,8 @@ class DistributedMatmulV2Impl1(DistributedOperatorImpl): y_dims_mapping = op_dist_attr.get_input_dims_mapping(y_name) if is_dim_replicate(x_dims_mapping[-1]): return False - if is_dim_replicate(y_dims_mapping[-2]) or is_dim_shard(y_dims_mapping[ - -1]): + if is_dim_replicate(y_dims_mapping[-2]) or is_dim_shard( + y_dims_mapping[-1]): return False # Other dimensions must be replicate except the batch dimension for mapping in x_dims_mapping[1:-1]: @@ -1339,11 +1347,10 @@ class DistributedMatmulV2Impl1(DistributedOperatorImpl): ctx.set_tensor_dist_attr_for_program(intermediate_var_0, out_var_dist_attr) - matmul_v2_op = main_block.append_op( - type='matmul_v2', - inputs=inputs, - outputs={'Out': intermediate_var_0}, - attrs=attrs) + matmul_v2_op = main_block.append_op(type='matmul_v2', + inputs=inputs, + outputs={'Out': intermediate_var_0}, + attrs=attrs) if intermediate_var_0.shape != ref_shape: intermediate_var_0.desc.set_shape(ref_shape) @@ -1411,6 +1418,7 @@ class DistributedMatmulV2Impl1(DistributedOperatorImpl): # ReplicateParallel class DistributedMatmulV2Impl2(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMatmulV2Impl2, self).__init__(name) @@ -1424,14 +1432,14 @@ class DistributedMatmulV2Impl2(DistributedOperatorImpl): if is_dim_shard(x_dims_mapping[-1]): return False - if is_valid_list_index(x_dims_mapping, - -2) and is_dim_shard(x_dims_mapping[-2]): + if is_valid_list_index(x_dims_mapping, -2) and is_dim_shard( + x_dims_mapping[-2]): return False if is_dim_shard(y_dims_mapping[-1]): return False - if is_valid_list_index(y_dims_mapping, - -2) and is_dim_shard(y_dims_mapping[-2]): + if is_valid_list_index(y_dims_mapping, -2) and is_dim_shard( + y_dims_mapping[-2]): return False return True @@ -1445,8 +1453,8 @@ class DistributedMatmulV2Impl2(DistributedOperatorImpl): if is_dim_shard(out_dims_mapping[-1]): return False - if is_valid_list_index(out_dims_mapping, - -2) and is_dim_shard(out_dims_mapping[-2]): + if is_valid_list_index(out_dims_mapping, -2) and is_dim_shard( + out_dims_mapping[-2]): return False return True @@ -1486,6 +1494,7 @@ register_distributed_operator_impl( class DistributedMul(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedMul, self).__init__(op_type) @@ -1495,6 +1504,7 @@ register_distributed_operator_impl_container(DistributedMul("mul")) # ColumnParallel class DistributedMulImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMulImpl0, self).__init__(name) self._forward_implemented = True @@ -1509,8 +1519,8 @@ class DistributedMulImpl0(DistributedOperatorImpl): y_dims_mapping = op_dist_attr.get_input_dims_mapping(y_name) if is_dim_shard(x_dims_mapping[-1]): return False - if is_dim_shard(y_dims_mapping[-2]) or is_dim_replicate(y_dims_mapping[ - -1]): + if is_dim_shard(y_dims_mapping[-2]) or is_dim_replicate( + y_dims_mapping[-1]): return False for mapping in x_dims_mapping[1:-1]: if is_dim_shard(mapping): @@ -1650,8 +1660,10 @@ class DistributedMulImpl0(DistributedOperatorImpl): "y_num_col_dims": src_op.desc.attr("y_num_col_dims") } inputs = {'X': [intermediate_var_0], 'Y': [Weight_var]} - mul_op = main_block.append_op( - type='mul', inputs=inputs, outputs={'Out': Out_var}, attrs=attrs) + mul_op = main_block.append_op(type='mul', + inputs=inputs, + outputs={'Out': Out_var}, + attrs=attrs) if Out_var.shape != ref_shape_out: Out_var.desc.set_shape(ref_shape_out) @@ -1685,14 +1697,14 @@ class DistributedMulImpl0(DistributedOperatorImpl): input_varname) assert input_dist_attr is not None, "dist_attr is {}".format( op_dist_attr) - matmulv2_op_dist_attr.set_input_dist_attr(input_varname, - input_dist_attr) + matmulv2_op_dist_attr.set_input_dist_attr( + input_varname, input_dist_attr) else: input_var = main_block.var(input_varname) tensor_dist_attr = ctx.get_tensor_dist_attr_for_program( input_var) - matmulv2_op_dist_attr.set_input_dist_attr(input_varname, - tensor_dist_attr) + matmulv2_op_dist_attr.set_input_dist_attr( + input_varname, tensor_dist_attr) for output_varname in mul_op.desc.output_arg_names(): output_dist_attr = op_dist_attr.get_output_dist_attr(output_varname) assert output_dist_attr is not None, "dist_attr is {}".format( @@ -1713,6 +1725,7 @@ class DistributedMulImpl0(DistributedOperatorImpl): # RowParallel class DistributedMulImpl1(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMulImpl1, self).__init__(name) self._forward_implemented = True @@ -1727,8 +1740,8 @@ class DistributedMulImpl1(DistributedOperatorImpl): y_dims_mapping = op_dist_attr.get_input_dims_mapping(y_name) if is_dim_replicate(x_dims_mapping[-1]): return False - if is_dim_replicate(y_dims_mapping[-2]) or is_dim_shard(y_dims_mapping[ - -1]): + if is_dim_replicate(y_dims_mapping[-2]) or is_dim_shard( + y_dims_mapping[-1]): return False # Other dimensions must be replicate except the batch dimension for mapping in x_dims_mapping[1:-1]: @@ -1851,11 +1864,10 @@ class DistributedMulImpl1(DistributedOperatorImpl): ctx.set_tensor_dist_attr_for_program(intermediate_var_0, out_var_dist_attr) - mul_op = main_block.append_op( - type='mul', - inputs=inputs, - outputs={'Out': intermediate_var_0}, - attrs=attrs) + mul_op = main_block.append_op(type='mul', + inputs=inputs, + outputs={'Out': intermediate_var_0}, + attrs=attrs) if intermediate_var_0.shape != ref_shape: intermediate_var_0.desc.set_shape(ref_shape) @@ -1923,6 +1935,7 @@ class DistributedMulImpl1(DistributedOperatorImpl): # ReplicateParallel class DistributedMulImpl2(DistributedOperatorImpl): + def __init__(self, name): super(DistributedMulImpl2, self).__init__(name) @@ -1936,13 +1949,13 @@ class DistributedMulImpl2(DistributedOperatorImpl): if is_dim_shard(x_dims_mapping[-1]): return False - if is_valid_list_index(x_dims_mapping, - -2) and is_dim_shard(x_dims_mapping[-2]): + if is_valid_list_index(x_dims_mapping, -2) and is_dim_shard( + x_dims_mapping[-2]): return False if is_dim_shard(y_dims_mapping[-1]): return False - if is_valid_list_index(y_dims_mapping, - -2) and is_dim_shard(y_dims_mapping[-2]): + if is_valid_list_index(y_dims_mapping, -2) and is_dim_shard( + y_dims_mapping[-2]): return False return True @@ -1956,8 +1969,8 @@ class DistributedMulImpl2(DistributedOperatorImpl): if is_dim_shard(out_dims_mapping[-1]): return False - if is_valid_list_index(out_dims_mapping, - -2) and is_dim_shard(out_dims_mapping[-2]): + if is_valid_list_index(out_dims_mapping, -2) and is_dim_shard( + out_dims_mapping[-2]): return False return True diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py index 4d52e5a94be..4629e4bef93 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,6 +34,7 @@ from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype class DistributedPNorm(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedPNorm, self).__init__(op_type) @@ -52,19 +53,21 @@ def _insert_fill_constant_op(block, op_role): attrs['value'] = int("1") attrs['dtype'] = out.dtype attrs['op_role'] = op_role - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=[0], op_type='fill_constant') - fill_constant_op = block.append_op( - type='fill_constant', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs) + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=[0], + op_type='fill_constant') + fill_constant_op = block.append_op(type='fill_constant', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) out.stop_gradient = True return out, fill_constant_op # Row Parallel class DistributedPNormImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedPNormImpl, self).__init__(name) self._forward_implemented = True @@ -193,15 +196,14 @@ class DistributedPNormImpl(DistributedOperatorImpl): # set fill_constant op dist_attr constant_op_dist_attr = OperatorDistributedAttribute() constant_op_dist_attr.process_mesh = ref_process_mesh - constant_op_dist_attr.set_output_dims_mapping(fill_constant_out.name, - constant_out_dims_mapping) + constant_op_dist_attr.set_output_dims_mapping( + fill_constant_out.name, constant_out_dims_mapping) ctx.set_op_dist_attr_for_program(fill_constant_op, constant_op_dist_attr) - barrier_op = main_block.append_op( - type='barrier', - inputs={'X': [fill_constant_out]}, - outputs={'Out': [fill_constant_out]}, - attrs={'ring_id': group.id}) + barrier_op = main_block.append_op(type='barrier', + inputs={'X': [fill_constant_out]}, + outputs={'Out': [fill_constant_out]}, + attrs={'ring_id': group.id}) # set barrier op dist attr set_comm_op_dist_attr_for_program(barrier_op, ref_process_mesh, constant_out_dist_attr, ctx) @@ -223,16 +225,16 @@ class DistributedPNormImpl(DistributedOperatorImpl): ] ctx.set_tensor_dist_attr_for_program(allgather_out, allgather_out_dist_attr) - c_allgather_op = main_block.append_op( - type='c_allgather', - inputs={'X': [X_var]}, - outputs={'Out': [allgather_out]}, - attrs={ - 'ring_id': group.id, - 'use_calc_stream': True, - 'nranks': group.nranks, - 'op_role': src_op.attr('op_role') - }) + c_allgather_op = main_block.append_op(type='c_allgather', + inputs={'X': [X_var]}, + outputs={'Out': [allgather_out]}, + attrs={ + 'ring_id': group.id, + 'use_calc_stream': True, + 'nranks': group.nranks, + 'op_role': + src_op.attr('op_role') + }) # set c_allgather op dist_attr allgather_op_dist_attr = OperatorDistributedAttribute() allgather_op_dist_attr.process_mesh = op_dist_attr.process_mesh @@ -344,11 +346,10 @@ class DistributedPNormImpl(DistributedOperatorImpl): "infer_flags": infer_flags, "op_role": backward_op.attr('op_role') } - slice_op = main_block.append_op( - type='slice', - inputs={'Input': [new_X_grad]}, - outputs={'Out': [X_grad_var]}, - attrs=attrs) + slice_op = main_block.append_op(type='slice', + inputs={'Input': [new_X_grad]}, + outputs={'Out': [X_grad_var]}, + attrs=attrs) X_grad_var_dims_mapping = op_dist_attr.get_output_dims_mapping( X_grad_var.name) slice_op_dist_attr = OperatorDistributedAttribute() diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reduce_p.py b/python/paddle/distributed/auto_parallel/operators/dist_reduce_p.py index 3275bddd9b4..6d750562c96 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reduce_p.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reduce_p.py @@ -34,6 +34,7 @@ from ..utils import _get_comm_group, _get_corresponding_rank class DistributedReducePrimtive(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedReducePrimtive, self).__init__(op_type) @@ -44,6 +45,7 @@ register_distributed_operator_impl_container( # Batch Dimension Reduce Primitive class DistributedReducePrimtiveImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedReducePrimtiveImpl0, self).__init__(name) self._forward_implemented = True @@ -119,15 +121,14 @@ class DistributedReducePrimtiveImpl0(DistributedOperatorImpl): # batch dimension synchronization var_name = src_op.output_arg_names[0] sync_group = new_process_group(ctx.data_parallel_group) - allreduce_op = main_block.append_op( - type='c_allreduce_sum', - inputs={'X': [var_name]}, - outputs={'Out': [var_name]}, - attrs={ - 'ring_id': sync_group.id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) + allreduce_op = main_block.append_op(type='c_allreduce_sum', + inputs={'X': [var_name]}, + outputs={'Out': [var_name]}, + attrs={ + 'ring_id': sync_group.id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) # dist attr var = main_block.var(var_name) diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py index da6ad933fd5..47a783a5f6d 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py @@ -31,6 +31,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedReshape2(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedReshape2, self).__init__(op_type) @@ -39,6 +40,7 @@ register_distributed_operator_impl_container(DistributedReshape2("reshape2")) class DistributedReshapeImpl0(DistributedOperatorImpl): + def __init__(self, name): super(DistributedReshapeImpl0, self).__init__(name) self._forward_implemented = True @@ -171,8 +173,8 @@ class DistributedReshapeImpl0(DistributedOperatorImpl): for idx, axis in enumerate(dim_mapping): if axis >= 0: if len(shape_list) > idx: - shape_list[idx] = shape_list[idx] // process_mesh_shape[ - axis] + shape_list[ + idx] = shape_list[idx] // process_mesh_shape[axis] # create op new_op_desc = main_block.desc.append_op() @@ -193,6 +195,7 @@ class DistributedReshapeImpl0(DistributedOperatorImpl): class DistributedReshapeImpl1(DistributedOperatorImpl): + def __init__(self, name): super(DistributedReshapeImpl1, self).__init__(name) self._forward_implemented = True @@ -328,8 +331,8 @@ class DistributedReshapeImpl1(DistributedOperatorImpl): for idx, axis in enumerate(dim_mapping): if axis >= 0: if len(shape_list) > idx: - shape_list[idx] = shape_list[idx] // process_mesh_shape[ - axis] + shape_list[ + idx] = shape_list[idx] // process_mesh_shape[axis] # create op new_op_desc = main_block.desc.append_op() @@ -350,6 +353,7 @@ class DistributedReshapeImpl1(DistributedOperatorImpl): class DistributedReshapeImpl2(DistributedOperatorImpl): + def __init__(self, name): super(DistributedReshapeImpl2, self).__init__(name) self._forward_implemented = True @@ -478,8 +482,8 @@ class DistributedReshapeImpl2(DistributedOperatorImpl): for idx, axis in enumerate(out_dim_mapping): if axis >= 0: if len(shape_list) > idx: - shape_list[idx] = shape_list[idx] // process_mesh_shape[ - axis] + shape_list[ + idx] = shape_list[idx] // process_mesh_shape[axis] # create op new_op_desc = main_block.desc.append_op() diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py index e3da47fd172..a37421ce612 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py @@ -23,6 +23,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedSlice(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedSlice, self).__init__(op_type) @@ -31,6 +32,7 @@ register_distributed_operator_impl_container(DistributedSlice("slice")) class DistributedSliceImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedSliceImpl, self).__init__(name) self._forward_implemented = True diff --git a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py index f78f1c58dbf..afcdea4f045 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py @@ -26,6 +26,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedSoftmax(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedSoftmax, self).__init__(op_type) @@ -34,6 +35,7 @@ register_distributed_operator_impl_container(DistributedSoftmax("softmax")) class DistributedSoftmaxImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedSoftmaxImpl, self).__init__(name) self._forward_implemented = False diff --git a/python/paddle/distributed/auto_parallel/operators/dist_split.py b/python/paddle/distributed/auto_parallel/operators/dist_split.py index 289da80e1a7..8f89020b53c 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_split.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_split.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedSplit(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedSplit, self).__init__(op_type) @@ -33,6 +34,7 @@ register_distributed_operator_impl_container(DistributedSplit("split")) class DistributedSplitImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedSplitImpl, self).__init__(name) self._forward_implemented = True diff --git a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py index e6a96fb795e..0dc4177399e 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py @@ -26,6 +26,7 @@ from .dist_default import DistributedDefaultImpl0 class DistributedTranspose2(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedTranspose2, self).__init__(op_type) @@ -35,6 +36,7 @@ register_distributed_operator_impl_container( class DistributedTranspose2Impl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedTranspose2Impl, self).__init__(name) self._forward_implemented = False diff --git a/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py b/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py index 4ea2e0a8847..9666f882200 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py @@ -20,6 +20,7 @@ from ..utils import set_dist_op_desc_original_id class DistributedUpdateLossScaling(DistributedOperatorImplContainer): + def __init__(self, op_type): super(DistributedUpdateLossScaling, self).__init__(op_type) @@ -29,6 +30,7 @@ register_distributed_operator_impl_container( class DistributedUpdateLossScalingImpl(DistributedOperatorImpl): + def __init__(self, name): super(DistributedUpdateLossScalingImpl, self).__init__(name) self._name = name diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/parallelizer.py index 2ea1223c6f2..1ad85598101 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer.py +++ b/python/paddle/distributed/auto_parallel/parallelizer.py @@ -108,8 +108,8 @@ class AutoParallelizer: if config["use_pure_fp16"]: config["base_opt"] = self._optimizer auto_parallel_fp16_pass = new_pass("auto_parallel_fp16", config) - auto_parallel_fp16_pass.apply( - [main_program], [startup_program], self._pass_context) + auto_parallel_fp16_pass.apply([main_program], [startup_program], + self._pass_context) else: auto_parallel_amp_pass = new_pass("auto_parallel_amp", config) auto_parallel_amp_pass.apply([main_program], [startup_program], @@ -123,8 +123,9 @@ class AutoParallelizer: config["loss"] = loss auto_parallel_recompute_pass = new_pass("auto_parallel_recompute", config) - auto_parallel_recompute_pass.apply( - [main_program], [startup_program], self._pass_context) + auto_parallel_recompute_pass.apply([main_program], + [startup_program], + self._pass_context) def _generate_backward(self, main_program, startup_program, loss, parameter_list, no_grad_set, callbacks): @@ -144,10 +145,10 @@ class AutoParallelizer: def _apply_optimize(self, main_program, startup_program, params_grads): with program_guard(main_program, startup_program): - optimize_ops = copy.deepcopy(self._optimizer).apply_gradients( - params_grads) + optimize_ops = copy.deepcopy( + self._optimizer).apply_gradients(params_grads) - # update completion + # update completion self._completer = Completer(self._dist_context) self._completer.complete_update_annotation(main_program) @@ -163,8 +164,8 @@ class AutoParallelizer: config["global_rank"] = rank auto_parallel_sharding_pass = new_pass("auto_parallel_sharding", config) - auto_parallel_sharding_pass.apply( - [main_program], [startup_program], self._pass_context) + auto_parallel_sharding_pass.apply([main_program], [startup_program], + self._pass_context) if self._dist_strategy.gradient_merge: config = copy.deepcopy(self._dist_strategy.gradient_merge_configs) @@ -172,8 +173,9 @@ class AutoParallelizer: config["params_grads"] = params_grads auto_parallel_gradient_merge_pass = new_pass( "auto_parallel_gradient_merge_pass", config) - auto_parallel_gradient_merge_pass.apply( - [main_program], [startup_program], self._pass_context) + auto_parallel_gradient_merge_pass.apply([main_program], + [startup_program], + self._pass_context) def _get_dist_program(self, rank, dist_context=None, relaunch_phase=False): completed_main_program = None @@ -181,7 +183,7 @@ class AutoParallelizer: serial_startup_program = self._startup_program.clone() serial_loss = serial_main_program.global_block().var(self._loss.name) - # generating serial + # generating serial if dist_context is None: # Annotation completion self._dist_context = DistributedContext() @@ -205,15 +207,16 @@ class AutoParallelizer: self._apply_pre_optimization_passes(completed_main_program, serial_startup_program, serial_loss, params_grads, self._no_grad_set) - # Logical partition + # Logical partition partitioner = Partitioner(self._dist_context, rank) dist_main_prog, dist_startup_prog, dist_params_grads = partitioner.partition( completed_main_program, serial_startup_program, params_grads) # TODO refactor the placement of optimizer # generate optimize program - dist_optimize_ops = self._apply_optimize( - dist_main_prog, dist_startup_prog, dist_params_grads) + dist_optimize_ops = self._apply_optimize(dist_main_prog, + dist_startup_prog, + dist_params_grads) set_grad_var_shape(dist_main_prog, self._dist_context) @@ -258,14 +261,17 @@ class AutoParallelizer: # auto search if self._dist_strategy.auto_search: logging.info("Start searching dist attr.") - serial_program_info = SerialProgramInfo( - self._main_program, self._startup_program, self._loss, - self._optimizer, self._cluster) - planner = Planner( - serial_program_info, - self, - algorithm_config={"name": "mcmc", - "max_search_times": 5}) + serial_program_info = SerialProgramInfo(self._main_program, + self._startup_program, + self._loss, + self._optimizer, + self._cluster) + planner = Planner(serial_program_info, + self, + algorithm_config={ + "name": "mcmc", + "max_search_times": 5 + }) dist_context, _ = planner.search() logging.info("End searching dist attr.") @@ -325,8 +331,8 @@ class AutoParallelizer: else: coverage_args = [] new_cmd_args = "-m paddle.distributed.fleet.launch" + " " + rank_mapping_args + " " + original_cmd_args - new_cmd = [sys.executable, "-u"] + coverage_args + shlex.split( - new_cmd_args) + new_cmd = [sys.executable, "-u" + ] + coverage_args + shlex.split(new_cmd_args) new_process = subprocess.Popen(new_cmd) new_process.wait() assert new_process.returncode == 0, \ @@ -368,13 +374,12 @@ class AutoParallelizer: self._loss, self._optimizer, cluster=self._cluster) - planner = Planner( - serial_program_info, - self, - algorithm_config={ - "name": "mcmc", - "max_search_times": 5 - }) + planner = Planner(serial_program_info, + self, + algorithm_config={ + "name": "mcmc", + "max_search_times": 5 + }) dist_context, _ = planner.search() # rebuild g_process_group diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/parallelizer_v2.py index 218513323df..ce543988ea4 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/parallelizer_v2.py @@ -31,6 +31,7 @@ from .dist_context import DistributedContext, get_default_distributed_context class Parallelizer: + def __init__(self, mode, completer, dist_context): self._mode = mode self._completer = completer @@ -54,8 +55,9 @@ class Parallelizer: if self._mode == "train" and serial_optimizer: # Generate backward serial_loss = self._dist_context.serial_loss - params_grads = self._generate_backward( - serial_main_program, serial_startup_program, serial_loss) + params_grads = self._generate_backward(serial_main_program, + serial_startup_program, + serial_loss) # Apply pre optimization passes self._apply_pre_optimization(serial_main_program, serial_startup_program, serial_loss, @@ -78,8 +80,9 @@ class Parallelizer: rank, dist_params_grads) else: # Apply pre optimization passes - self._apply_pre_optimization( - serial_main_program, serial_startup_program, None, None, None) + self._apply_pre_optimization(serial_main_program, + serial_startup_program, None, None, + None) # Do logical partition partitioner = Partitioner(self._dist_context, rank) dist_main_prog, dist_startup_prog, dist_params_grads = partitioner.partition( @@ -128,8 +131,8 @@ class Parallelizer: if config["use_pure_fp16"]: config["base_opt"] = optimizer auto_parallel_fp16_pass = new_pass("auto_parallel_fp16", config) - auto_parallel_fp16_pass.apply( - [main_program], [startup_program], self._pass_context) + auto_parallel_fp16_pass.apply([main_program], [startup_program], + self._pass_context) else: auto_parallel_amp_pass = new_pass("auto_parallel_amp", config) auto_parallel_amp_pass.apply([main_program], [startup_program], @@ -143,8 +146,9 @@ class Parallelizer: config["loss"] = loss auto_parallel_recompute_pass = new_pass("auto_parallel_recompute", config) - auto_parallel_recompute_pass.apply( - [main_program], [startup_program], self._dist_context) + auto_parallel_recompute_pass.apply([main_program], + [startup_program], + self._dist_context) def _apply_post_optimization(self, main_program, startup_program, rank, params_grads): @@ -157,8 +161,8 @@ class Parallelizer: config["global_rank"] = rank auto_parallel_sharding_pass = new_pass("auto_parallel_sharding", config) - auto_parallel_sharding_pass.apply( - [main_program], [startup_program], self._dist_context) + auto_parallel_sharding_pass.apply([main_program], [startup_program], + self._dist_context) if self._strategy.gradient_merge: config = copy.deepcopy(self._strategy.gradient_merge_configs) @@ -166,5 +170,6 @@ class Parallelizer: config["params_grads"] = params_grads auto_parallel_gradient_merge_pass = new_pass( "auto_parallel_gradient_merge_pass", config) - auto_parallel_gradient_merge_pass.apply( - [main_program], [startup_program], self._dist_context) + auto_parallel_gradient_merge_pass.apply([main_program], + [startup_program], + self._dist_context) diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/partitioner.py index 6a767e5afcd..9056ab34fa7 100644 --- a/python/paddle/distributed/auto_parallel/partitioner.py +++ b/python/paddle/distributed/auto_parallel/partitioner.py @@ -187,8 +187,8 @@ class Partitioner(object): dist_g = None else: assert g.name in self._serial2dist_varname_mapping - dist_g = self._get_dist_var_by_serial_var(g, - partitioned_main_prog) + dist_g = self._get_dist_var_by_serial_var( + g, partitioned_main_prog) partitioned_params_and_grads.append((dist_p, dist_g)) return partitioned_main_prog, partitioned_params_and_grads @@ -211,15 +211,15 @@ class Partitioner(object): forward_op_id2forward_op = {} for idx in range(len(serial_ops)): if idx <= last_fwd_op_idx: - forward_op_id2forward_op[serial_ops[idx].desc.original_id( - )] = serial_ops[idx] + forward_op_id2forward_op[ + serial_ops[idx].desc.original_id()] = serial_ops[idx] appended_grad_times = 0 # partiiton for idx, op in enumerate(serial_ops): - if is_backward_op(op) and (is_forward_op(serial_ops[idx - 1]) or - is_loss_op(serial_ops[idx - 1])): + if is_backward_op(op) and (is_forward_op(serial_ops[idx - 1]) + or is_loss_op(serial_ops[idx - 1])): appended_grad_times += 1 # partititon input variables @@ -270,8 +270,8 @@ class Partitioner(object): dist_op_impl.backward(self._dist_context, **kinputs, **koutputs) else: raise NotImplementedError( - "partitioner only support forward and backward, optimize ops, but got {}". - format(str(op))) + "partitioner only support forward and backward, optimize ops, but got {}" + .format(str(op))) def _is_valid_annotated_program(self, program): @@ -338,35 +338,33 @@ def _partition_parameter(dist_context, src_var, dst_block, dst_varname, copied_kwargs['do_model_average'] = src_var.do_model_average copied_kwargs['need_clip'] = src_var.need_clip - param = Parameter( - block=dst_block, - type=src_var.type, - name=dst_varname, - shape=dst_shape, - dtype=src_var.dtype, - lod_level=src_var.lod_level, - error_clip=src_var.error_clip, - stop_gradient=src_var.stop_gradient, - is_data=src_var.is_data, - belong_to_optimizer=src_var.belong_to_optimizer, - **copied_kwargs) + param = Parameter(block=dst_block, + type=src_var.type, + name=dst_varname, + shape=dst_shape, + dtype=src_var.dtype, + lod_level=src_var.lod_level, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer, + **copied_kwargs) return param def _partition_intermediate_var(dist_context, src_var, dst_block, dst_varname, dst_shape): - var = dst_block.create_var( - type=src_var.type, - name=dst_varname, - shape=dst_shape, - dtype=src_var.dtype, - lod_level=src_var.lod_level, - persistable=src_var.persistable, - error_clip=src_var.error_clip, - stop_gradient=src_var.stop_gradient, - is_data=src_var.is_data, - belong_to_optimizer=src_var.belong_to_optimizer) + var = dst_block.create_var(type=src_var.type, + name=dst_varname, + shape=dst_shape, + dtype=src_var.dtype, + lod_level=src_var.lod_level, + persistable=src_var.persistable, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer) return var @@ -380,11 +378,10 @@ def _partition_var(dist_context, src_block, dst_block, src_varname, if src_var.type in __not_shape_var_type__: persist = getattr(src_var, 'persistable', False) - new_var = dst_block.create_var( - type=src_var.type, - name=dst_varname, - persistable=persist, - stop_gradient=True) + new_var = dst_block.create_var(type=src_var.type, + name=dst_varname, + persistable=persist, + stop_gradient=True) target_shape = None else: dist_attr = dist_context.get_tensor_dist_attr_for_program(src_var) @@ -394,8 +391,9 @@ def _partition_var(dist_context, src_block, dst_block, src_varname, new_var = _partition_parameter(dist_context, src_var, dst_block, dst_varname, target_shape) else: - new_var = _partition_intermediate_var( - dist_context, src_var, dst_block, dst_varname, target_shape) + new_var = _partition_intermediate_var(dist_context, src_var, + dst_block, dst_varname, + target_shape) dist_attr = copy.deepcopy( dist_context.get_tensor_dist_attr_for_program(src_var)) diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py index b97c09bd59d..701fd78a7e8 100755 --- a/python/paddle/distributed/auto_parallel/planner.py +++ b/python/paddle/distributed/auto_parallel/planner.py @@ -41,6 +41,7 @@ np.random.seed(123) class PlanFilter: + @staticmethod def check_dims_mapping_for_tensor(process_mesh_topology, tensor_shape, dims_mapping): @@ -82,7 +83,7 @@ class PlanFilter: @staticmethod def check_dims_mapping_for_special_op(op, op_dist_attr, vars): - # NOTE: Those ops has some partition limits, and will be solved when corresponding dist op implemented in the future. + # NOTE: Those ops has some partition limits, and will be solved when corresponding dist op implemented in the future. if op.type == "elementwise_add" or op.type == 'layer_norm' or op.type == "softmax_with_cross_entropy": for name in op.input_arg_names: for item in op_dist_attr.get_input_dims_mapping(name): @@ -188,8 +189,7 @@ class PlanSpace: for var_name in chain(op.input_arg_names, op.output_arg_names): visited = [ False - for _ in range( - len(list(range(-1, len(process_mesh.topology))))) + for _ in range(len(list(range(-1, len(process_mesh.topology))))) ] depth = 0 path = [] @@ -213,13 +213,12 @@ class PlanSpace: op_dist_attr.set_input_dims_mapping(var_names[idx], dims_mapping) elif var_names[idx] in op.output_arg_names: - op_dist_attr.set_output_dims_mapping(var_names[idx], - dims_mapping) + op_dist_attr.set_output_dims_mapping( + var_names[idx], dims_mapping) else: raise ValueError( "The {varname} is not input or output of op {op}.". - format( - varname='var_names[idx]', op='op')) + format(varname='var_names[idx]', op='op')) dist_op = DistributedOperator(op, op_dist_attr) if dist_op_impl_container is None: @@ -339,16 +338,16 @@ class PlanSpace: op_dist_attr.set_input_dims_mapping(var_name, []) else: dims_mapping = [-1 for i in vars[var_name].shape] - op_dist_attr.set_input_dims_mapping(var_name, - dims_mapping) + op_dist_attr.set_input_dims_mapping( + var_name, dims_mapping) for var_name in op.output_arg_names: if var_name in PlanSpace.special_vars: op_dist_attr.set_output_dims_mapping(var_name, []) else: dims_mapping = [-1 for i in vars[var_name].shape] - op_dist_attr.set_output_dims_mapping(var_name, - dims_mapping) + op_dist_attr.set_output_dims_mapping( + var_name, dims_mapping) op_valid_dist_attrs = [op_dist_attr] pipeline_stage = 0 if pipeline_stage != -1 else pipeline_stage else: @@ -357,13 +356,15 @@ class PlanSpace: assert op_valid_dist_attrs is not None, "Enumerate {} valid distributed attribute failed.".format( op) - valid_dist_attr_dict[op.desc.id( - )] = [op_valid_dist_attrs, pipeline_stage] + valid_dist_attr_dict[op.desc.id()] = [ + op_valid_dist_attrs, pipeline_stage + ] return valid_dist_attr_dict, pipeline_process_meshes, global_process_mesh class SearchAlgorithm: + def __init__(self, name): self._name = name @@ -376,6 +377,7 @@ class SearchAlgorithm: class MCMC(SearchAlgorithm): + def __init__(self, serial_program_info, parallelizer, max_search_times=5): super(MCMC, self).__init__("mcmc") self._serial_program_info = serial_program_info @@ -426,7 +428,8 @@ class MCMC(SearchAlgorithm): break if not has_changed: raise ValueError( - "Change softmax_with_cross_entropy dist attr failed") + "Change softmax_with_cross_entropy dist attr failed" + ) def init_program(self, valid_dist_attr_dict, program, pipeline_process_meshes, global_process_mesh): @@ -443,8 +446,8 @@ class MCMC(SearchAlgorithm): for var_name in op.input_arg_names: if var_name == "lod_tensor_blocking_queue_0": continue - if new_dist_context.get_tensor_dist_attr_for_program(vars[ - var_name]) is None: + if new_dist_context.get_tensor_dist_attr_for_program( + vars[var_name]) is None: tensor_dist_attr = TensorDistributedAttribute() tensor_dist_attr.process_mesh = init_op_dist_attr.process_mesh tensor_dist_attr.dims_mapping = init_op_dist_attr.get_input_dims_mapping( @@ -498,12 +501,11 @@ class MCMC(SearchAlgorithm): standalone_cost_data = get_standalone_cost_data(all_dist_main_program) # cost model does not support cluster argument - cost = estimate_cost( - all_dist_main_program, - cluster=None, - pipeline_config=pipeline_config, - standalone_cost_data=standalone_cost_data, - batch_size=microbatch_size) + cost = estimate_cost(all_dist_main_program, + cluster=None, + pipeline_config=pipeline_config, + standalone_cost_data=standalone_cost_data, + batch_size=microbatch_size) return cost @@ -515,8 +517,8 @@ class MCMC(SearchAlgorithm): tensor_dist_attr.process_mesh = process_mesh tensor_dist_attr.dims_mapping = op_dist_attr.get_output_dims_mapping( var_name) - dist_context.set_tensor_dist_attr_for_program(vars[var_name], - tensor_dist_attr) + dist_context.set_tensor_dist_attr_for_program( + vars[var_name], tensor_dist_attr) # set input tensor distributed attribute if input is data or parameter for var_name in op.input_arg_names: @@ -526,19 +528,19 @@ class MCMC(SearchAlgorithm): tensor_dist_attr.process_mesh = process_mesh tensor_dist_attr.dims_mapping = op_dist_attr.get_input_dims_mapping( var_name) - dist_context.set_tensor_dist_attr_for_program(vars[var_name], - tensor_dist_attr) + dist_context.set_tensor_dist_attr_for_program( + vars[var_name], tensor_dist_attr) def change_process_mesh(self, op, changed_process_mesh, vars, dist_context): dist_context.get_op_dist_attr_for_program( op).process_mesh = changed_process_mesh for var_name in op.output_arg_names: - dist_context.get_tensor_dist_attr_for_program(vars[ - var_name]).process_mesh = changed_process_mesh + dist_context.get_tensor_dist_attr_for_program( + vars[var_name]).process_mesh = changed_process_mesh for var_name in op.input_arg_names: if vars[var_name].is_parameter or vars[var_name].is_data: - dist_context.get_tensor_dist_attr_for_program(vars[ - var_name]).process_mesh = changed_process_mesh + dist_context.get_tensor_dist_attr_for_program( + vars[var_name]).process_mesh = changed_process_mesh def search_once(self, program, @@ -561,8 +563,8 @@ class MCMC(SearchAlgorithm): pipeline_stage = valid_dist_attr_dict[selected_op.desc.id()][1] random_selected_dist_attr_idx = np.random.randint( len(op_valid_dist_attr_list)) - selected_op_dist_attr = copy.deepcopy(op_valid_dist_attr_list[ - random_selected_dist_attr_idx]) + selected_op_dist_attr = copy.deepcopy( + op_valid_dist_attr_list[random_selected_dist_attr_idx]) start_idx = ops[0].desc.id() if pipeline_stage > -1: @@ -604,8 +606,8 @@ class MCMC(SearchAlgorithm): selected_op_process_mesh, vars, new_dist_context) # change the selected op stage and output dist attr - new_valid_dist_attr_dict[selected_op.desc.id()][ - 1] = changed_stage + new_valid_dist_attr_dict[ + selected_op.desc.id()][1] = changed_stage new_process_mesh = pipeline_process_meshes[changed_stage] selected_op_dist_attr.process_mesh = new_process_mesh for op_dist_attr in new_valid_dist_attr_dict[ @@ -621,17 +623,17 @@ class MCMC(SearchAlgorithm): # change the pre op stage for idx in range(random_selected_op_idx - 1, -1, -1): stage = new_valid_dist_attr_dict[ops[idx].desc.id()][1] - valid_dist_attr_list = new_valid_dist_attr_dict[ops[ - idx].desc.id()][0] + valid_dist_attr_list = new_valid_dist_attr_dict[ + ops[idx].desc.id()][0] new_process_mesh = pipeline_process_meshes[ changed_stage] if stage == changed_stage + 1: - new_valid_dist_attr_dict[ops[idx].desc.id()][ - 1] = changed_stage + new_valid_dist_attr_dict[ + ops[idx].desc.id()][1] = changed_stage for op_dist_attr in valid_dist_attr_list: op_dist_attr.process_mesh = new_process_mesh - new_dist_context.get_op_dist_attr_for_program(ops[ - idx]).process_mesh = new_process_mesh + new_dist_context.get_op_dist_attr_for_program( + ops[idx]).process_mesh = new_process_mesh # change process mesh of the output and input tensor self.change_process_mesh(ops[idx], new_process_mesh, vars, new_dist_context) @@ -665,8 +667,8 @@ class MCMC(SearchAlgorithm): selected_op_process_mesh, vars, new_dist_context) # change the selected op stage and output tensor dist attr - new_valid_dist_attr_dict[selected_op.desc.id()][ - 1] = changed_stage + new_valid_dist_attr_dict[ + selected_op.desc.id()][1] = changed_stage new_process_mesh = pipeline_process_meshes[changed_stage] selected_op_dist_attr.process_mesh = new_process_mesh for op_dist_attr in new_valid_dist_attr_dict[ @@ -681,26 +683,26 @@ class MCMC(SearchAlgorithm): # change the next op stage for idx in range(random_selected_op_idx + 1, len(ops)): stage = new_valid_dist_attr_dict[ops[idx].desc.id()][1] - valid_dist_attr_list = new_valid_dist_attr_dict[ops[ - idx].desc.id()][0] + valid_dist_attr_list = new_valid_dist_attr_dict[ + ops[idx].desc.id()][0] new_process_mesh = pipeline_process_meshes[ changed_stage] if stage == changed_stage - 1: - new_valid_dist_attr_dict[ops[idx].desc.id()][ - 1] = changed_stage + new_valid_dist_attr_dict[ + ops[idx].desc.id()][1] = changed_stage for op_dist_attr in valid_dist_attr_list: op_dist_attr.process_mesh = new_process_mesh - new_dist_context.get_op_dist_attr_for_program(ops[ - idx]).process_mesh = new_process_mesh + new_dist_context.get_op_dist_attr_for_program( + ops[idx]).process_mesh = new_process_mesh # change the output tensor dist attr self.change_process_mesh(ops[idx], new_process_mesh, vars, new_dist_context) else: break else: - new_dist_context.set_op_dist_attr_for_program(selected_op, - selected_op_dist_attr) + new_dist_context.set_op_dist_attr_for_program( + selected_op, selected_op_dist_attr) self.set_tensor_dist_attr(selected_op, selected_op_dist_attr, vars, new_dist_context) @@ -759,15 +761,16 @@ class MCMC(SearchAlgorithm): format(process_mesh_topology)) valid_dist_attr_dict, pipeline_process_meshes, global_process_mesh = PlanSpace.enum_valid_dist_attr_for_program( train_program, process_mesh_topology, True) - init_dist_context = self.init_program( - valid_dist_attr_dict, train_program, pipeline_process_meshes, - global_process_mesh) - best_dist_context, cost = self._search_core(valid_dist_attr_dict, - init_dist_context, - pipeline_process_meshes) + init_dist_context = self.init_program(valid_dist_attr_dict, + train_program, + pipeline_process_meshes, + global_process_mesh) + best_dist_context, cost = self._search_core( + valid_dist_attr_dict, init_dist_context, + pipeline_process_meshes) print( - "MCMC search: the min cost is {} in the process mesh {} with pipeline mode.". - format(cost, process_mesh_topology)) + "MCMC search: the min cost is {} in the process mesh {} with pipeline mode." + .format(cost, process_mesh_topology)) best_dist_context._dist_op_context = DistributedOperatorContext() pipeline_min_cost = cost if pipeline_min_cost is None else pipeline_min_cost searched_pipeline_dist_context = best_dist_context if searched_pipeline_dist_context is None else searched_pipeline_dist_context @@ -785,15 +788,16 @@ class MCMC(SearchAlgorithm): format(process_mesh_topology)) valid_dist_attr_dict, pipeline_process_meshes, global_process_mesh = PlanSpace.enum_valid_dist_attr_for_program( train_program, process_mesh_topology, False) - init_dist_context = self.init_program( - valid_dist_attr_dict, train_program, pipeline_process_meshes, - global_process_mesh) - best_dist_context, cost = self._search_core(valid_dist_attr_dict, - init_dist_context, - pipeline_process_meshes) + init_dist_context = self.init_program(valid_dist_attr_dict, + train_program, + pipeline_process_meshes, + global_process_mesh) + best_dist_context, cost = self._search_core( + valid_dist_attr_dict, init_dist_context, + pipeline_process_meshes) print( - "MCMC search: the min cost is {} in the process mesh {} without pipeline mode.". - format(cost, process_mesh_topology)) + "MCMC search: the min cost is {} in the process mesh {} without pipeline mode." + .format(cost, process_mesh_topology)) best_dist_context._dist_op_context = DistributedOperatorContext() non_pipeline_min_cost = cost if non_pipeline_min_cost is None else non_pipeline_min_cost searched_non_pipeline_dist_context = best_dist_context if searched_non_pipeline_dist_context is None else searched_non_pipeline_dist_context @@ -817,13 +821,16 @@ class MCMC(SearchAlgorithm): pg0.add_ranks(process_mesh.processes) end_time = time.time() print( - "End MCMC searching: the min cost is {} and the search time is {}s.". - format(min_cost, end_time - start_time)) + "End MCMC searching: the min cost is {} and the search time is {}s." + .format(min_cost, end_time - start_time)) return searched_dist_context, min_cost class Planner: - def __init__(self, serial_program_info, parallelizer, + + def __init__(self, + serial_program_info, + parallelizer, algorithm_config=None): self._serial_program_info = serial_program_info self._parallelizer = parallelizer diff --git a/python/paddle/distributed/auto_parallel/planner_v2.py b/python/paddle/distributed/auto_parallel/planner_v2.py index 3625a25d74e..77496ed3e6d 100755 --- a/python/paddle/distributed/auto_parallel/planner_v2.py +++ b/python/paddle/distributed/auto_parallel/planner_v2.py @@ -20,6 +20,7 @@ from .utils import print_program_with_dist_attr class Planner: + def __init__(self, mode, dist_context): self._mode = mode self._dist_context = dist_context diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/process_group.py index d1b6e57ddc1..d583dcb32eb 100644 --- a/python/paddle/distributed/auto_parallel/process_group.py +++ b/python/paddle/distributed/auto_parallel/process_group.py @@ -40,7 +40,7 @@ def get_world_process_group(): def new_process_group(ranks): global _g_process_group_map - # A key constructed from ranks is used for avoiding duplication + # A key constructed from ranks is used for avoiding duplication new_key = ''.join(map(str, sorted(ranks))) for pg_id, pg in _g_process_group_map.items(): cur_key = ''.join(map(str, sorted(pg.ranks))) @@ -57,12 +57,13 @@ def new_process_group(ranks): # This implementation refers to lots of Paddle/python/paddle/distributed/collective.py, -# Fleet also has a collective helper which uses ops to initialize communication in +# Fleet also has a collective helper which uses ops to initialize communication in # Paddle/python/paddle/distributed/fleet/meta_optimizers/common.py. We use the first one -# because it seems simple. This should be enhanced to manage the process membership and -# the instantiation process in a more general way. In the future, the process group may +# because it seems simple. This should be enhanced to manage the process membership and +# the instantiation process in a more general way. In the future, the process group may # handle the communication implementation choice. class ProcessGroup: + def __init__(self, group_id, ranks): if group_id == 0 and get_process_group(0) is not None: assert group_id != 0, "Process group id 0 is reserved for all ranks." @@ -129,7 +130,7 @@ class ProcessGroup: else: assert False, ("No CUDA device found") - # TODO(shenliang03): This is a temporary solution to solve the problem of + # TODO(shenliang03): This is a temporary solution to solve the problem of # hang caused by cross-creation of new_group tmp = paddle.to_tensor( [1], dtype="int32") if _non_static_mode() else fill_constant( @@ -156,6 +157,6 @@ class ProcessGroup: # Note that Process group 0 is reserved for representing all ranks. -# At the beginning, group 0 is empty and new ranks will be added automatically. +# At the beginning, group 0 is empty and new ranks will be added automatically. _g_process_group_map = {} _g_process_group_map[0] = ProcessGroup(0, []) diff --git a/python/paddle/distributed/auto_parallel/process_mesh.py b/python/paddle/distributed/auto_parallel/process_mesh.py index f95951a3bad..f751087e29e 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh.py +++ b/python/paddle/distributed/auto_parallel/process_mesh.py @@ -97,7 +97,7 @@ class ProcessMesh(object): from .dist_context import get_default_distributed_context default_dist_cxt = get_default_distributed_context() default_dist_cxt.add_process_mesh(self) - # Add new processes to process group 0 + # Add new processes to process group 0 from .process_group import get_process_group pg0 = get_process_group(0) pg0.add_ranks(self.processes) diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py index 7481ec736f0..8fb38142218 100644 --- a/python/paddle/distributed/auto_parallel/reshard.py +++ b/python/paddle/distributed/auto_parallel/reshard.py @@ -27,7 +27,7 @@ from .dist_context import DistributedContext from .dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute from .process_group import new_process_group, ProcessGroup, _g_process_group_map -# NOTE: If op in _g_special_ops, it will not be resharded. +# NOTE: If op in _g_special_ops, it will not be resharded. _g_special_ops = ['check_finite_and_unscale', 'update_loss_scaling'] @@ -195,34 +195,32 @@ class Inserter: def insert_send_op(block, idx, tensor, dst, op_role): """Insert send op into block at the given index.""" op_type = 'send_v2' - block._insert_op( - idx, - type=op_type, - inputs={'X': [tensor]}, - attrs={ - 'ring_id': 0, - 'peer': dst, - 'use_calc_stream': True, - 'op_role': op_role - }) + block._insert_op(idx, + type=op_type, + inputs={'X': [tensor]}, + attrs={ + 'ring_id': 0, + 'peer': dst, + 'use_calc_stream': True, + 'op_role': op_role + }) @staticmethod def insert_recv_op(block, idx, tensor, src, op_role): """Insert recv op into block at the given index.""" op_type = 'recv_v2' - block._insert_op( - idx, - type=op_type, - inputs={'X': [tensor]}, - outputs={'Out': [tensor]}, - attrs={ - 'ring_id': 0, - 'peer': src, - 'out_shape': tensor.shape, - 'dtype': tensor.dtype, - 'use_calc_stream': True, - 'op_role': op_role - }) + block._insert_op(idx, + type=op_type, + inputs={'X': [tensor]}, + outputs={'Out': [tensor]}, + attrs={ + 'ring_id': 0, + 'peer': src, + 'out_shape': tensor.shape, + 'dtype': tensor.dtype, + 'use_calc_stream': True, + 'op_role': op_role + }) @staticmethod def insert_concat_op(block, idx, tensors, axis, op_role): @@ -235,12 +233,11 @@ class Inserter: with paddle.static.program_guard(block.program): out = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) - block._insert_op( - idx, - type='concat', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs) + block._insert_op(idx, + type='concat', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) return out @staticmethod @@ -257,14 +254,14 @@ class Inserter: 'op_role': op_role } helper = LayerHelper('slice', **locals()) - out = block.create_var( - name=new_var_name, dtype=tensor.dtype, type=tensor.type) - block._insert_op( - idx, - type="slice", - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs) + out = block.create_var(name=new_var_name, + dtype=tensor.dtype, + type=tensor.type) + block._insert_op(idx, + type="slice", + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) return out @staticmethod @@ -279,12 +276,11 @@ class Inserter: helper.create_variable_for_type_inference( dtype=helper.input_dtype()) for i in range(num_or_sections) ] - block._insert_op( - idx, - type="split", - inputs=inputs, - outputs={'Out': outs}, - attrs=attrs) + block._insert_op(idx, + type="split", + inputs=inputs, + outputs={'Out': outs}, + attrs=attrs) return outs @staticmethod @@ -299,14 +295,15 @@ class Inserter: attrs['value'] = int("1") attrs['dtype'] = out.dtype attrs['op_role'] = op_role - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=[0], op_type='fill_constant') - block._insert_op( - idx, - type='fill_constant', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs) + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=[0], + op_type='fill_constant') + block._insert_op(idx, + type='fill_constant', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) out.stop_gradient = True return out @@ -320,29 +317,27 @@ class Inserter: # instant process group before insert allgather op. if not group.is_instantiate(): # insert fill_constant op - fill_constant_out = Inserter.insert_fill_constant_op(block, idx, - op_role) + fill_constant_out = Inserter.insert_fill_constant_op( + block, idx, op_role) fill_constant_out.stop_gradient = True # insert c_allreduce_sum op - block._insert_op( - idx + 1, - type="c_allreduce_sum", - inputs={'X': [fill_constant_out]}, - outputs={'Out': [fill_constant_out]}, - attrs={ - 'ring_id': 0, - 'use_calc_stream': True, - 'op_role': op_role - }) + block._insert_op(idx + 1, + type="c_allreduce_sum", + inputs={'X': [fill_constant_out]}, + outputs={'Out': [fill_constant_out]}, + attrs={ + 'ring_id': 0, + 'use_calc_stream': True, + 'op_role': op_role + }) # insert c_sync_calc_stream op - block._insert_op( - idx + 2, - type="c_sync_calc_stream", - inputs={'X': [fill_constant_out]}, - outputs={'Out': [fill_constant_out]}, - attrs={'op_role': op_role}) + block._insert_op(idx + 2, + type="c_sync_calc_stream", + inputs={'X': [fill_constant_out]}, + outputs={'Out': [fill_constant_out]}, + attrs={'op_role': op_role}) idx_offset = 3 # insert c_allgather op @@ -351,22 +346,22 @@ class Inserter: with paddle.static.program_guard(block.program): allgather_out = helper.create_variable_for_type_inference( dtype=tensor.dtype) - block._insert_op( - idx + idx_offset, - type=op_type, - inputs={'X': [tensor]}, - outputs={'Out': [allgather_out]}, - attrs={ - 'ring_id': group.id, - 'use_calc_stream': True, - 'nranks': group.nranks, - 'op_role': op_role - }) + block._insert_op(idx + idx_offset, + type=op_type, + inputs={'X': [tensor]}, + outputs={'Out': [allgather_out]}, + attrs={ + 'ring_id': group.id, + 'use_calc_stream': True, + 'nranks': group.nranks, + 'op_role': op_role + }) idx_offset += 1 # insert split op - split_out = Inserter.insert_split_op( - block, idx + idx_offset, allgather_out, group.nranks, op_role) + split_out = Inserter.insert_split_op(block, idx + idx_offset, + allgather_out, group.nranks, + op_role) idx_offset += 1 tensor_list.extend(split_out) return tensor_list, idx_offset @@ -740,12 +735,12 @@ class Resharder: for idx, item in enumerate(partition_index_x): if item != partition_index_y[idx]: differ_count += 1 - if item[1] == partition_index_y[idx][0] and item[ - 0] < partition_index_y[idx][1]: + if item[1] == partition_index_y[idx][ + 0] and item[0] < partition_index_y[idx][1]: concat_axis = idx new_partition.append([item[0], partition_index_y[idx][1]]) - elif item[0] == partition_index_y[idx][1] and item[ - 1] > partition_index_y[idx][0]: + elif item[0] == partition_index_y[idx][ + 1] and item[1] > partition_index_y[idx][0]: first_order = 1 concat_axis = idx new_partition.append([partition_index_y[idx][0], item[1]]) @@ -839,8 +834,8 @@ class Resharder: def is_overlapped(self, shape_x, shape_y): """Judge whether two partitions intersect on the specified dimension.""" overlapped = False - if (shape_y[0] <= shape_x[0] < shape_y[1]) or ( - shape_x[0] <= shape_y[0] < shape_x[1]): + if (shape_y[0] <= shape_x[0] < shape_y[1]) or (shape_x[0] <= shape_y[0] + < shape_x[1]): overlapped = True return overlapped @@ -986,9 +981,10 @@ class Resharder: dist_op = self.dist_context.get_dist_op_for_program(op) op_process_mesh = dist_op.dist_attr.process_mesh for process_mesh in self.dist_context.process_meshes: - if set(process_mesh.processes) & ( - set(op_process_mesh.processes) - ) and len(process_mesh.processes) <= len(op_process_mesh.processes): + if set(process_mesh.processes) & (set( + op_process_mesh.processes)) and len( + process_mesh.processes) <= len( + op_process_mesh.processes): process_meshes.append(process_mesh) # it means the process mesh is not a union when process meshes is null @@ -1085,9 +1081,8 @@ class Resharder: process_list[index].append(source_process) has_used[index].append(False) else: - partition_process_mapping_list.append([ - source_partition_index, [source_process], [False] - ]) + partition_process_mapping_list.append( + [source_partition_index, [source_process], [False]]) for target_process in target_process_group: has_sent = [] @@ -1152,8 +1147,8 @@ class Resharder: slices_axes = [] concatenated_partition_index = partition_index_list[0] for idx, item in enumerate(concatenated_partition_index): - slice_starts.append(target_partition_index[idx][0] - item[ - 0]) + slice_starts.append(target_partition_index[idx][0] - + item[0]) slice_ends.append(target_partition_index[idx][1] - item[0]) slices_axes.append(idx) op_desc_seq[target_process].append( @@ -1170,8 +1165,9 @@ class Resharder: source_process_shape, source_process_group) if source_partition_index not in partition_index_list: partition_index_list.append(source_partition_index) - process_index.append( - [[source_process, ], source_partition_index]) + process_index.append([[ + source_process, + ], source_partition_index]) else: process_index[partition_index_list.index( source_partition_index)][0].append(source_process) @@ -1195,8 +1191,9 @@ class Resharder: slice_ends.append(item[1]) slices_axes.append(idx) - slice_op_desc = SliceOpDesc( - starts=slice_starts, ends=slice_ends, axes=slices_axes) + slice_op_desc = SliceOpDesc(starts=slice_starts, + ends=slice_ends, + axes=slices_axes) op_desc_seq[process] = [AllGatherOpDesc(group=group), ConcatOpDesc(partition_index_list=all_partition_index_list), slice_op_desc] \ if len(group) > 1 else [slice_op_desc] @@ -1227,9 +1224,8 @@ class Resharder: if isinstance(op_desc, AllGatherOpDesc): # noqa: F401 if var_name not in self.has_allgather.keys(): self.has_allgather[var_name] = [] - if not self.has_allgather[ - var_name] or op_desc.group not in list( - map(lambda x: x[0], self.has_allgather[var_name])): + if not self.has_allgather[var_name] or op_desc.group not in list( + map(lambda x: x[0], self.has_allgather[var_name])): tensor_list, idx_offset = Inserter.insert_allgather_op( block, idx, source_tensor, op_desc.group, reshard_op.attr('op_role')) @@ -1317,11 +1313,11 @@ class Resharder: target_tensor, tensor_attr) if op.type == "while": - # var_reshard_mapping means the while op input need be changed to + # var_reshard_mapping means the while op input need be changed to if "var_reshard_mapping" not in Resharder.while_block_info[ op.attr("sub_block").id].keys(): - Resharder.while_block_info[op.attr("sub_block").id][ - "var_reshard_mapping"] = {} + Resharder.while_block_info[op.attr( + "sub_block").id]["var_reshard_mapping"] = {} Resharder.while_block_info[op.attr("sub_block").id][ "var_reshard_mapping"][var_name] = target_tensor.name @@ -1370,8 +1366,8 @@ class Resharder: op_dist_attr.set_input_dims_mapping( var_reshard_mapping[var_name], dims_mapping) - op_dist_attr.set_input_dist_attr(var_name, - None) + op_dist_attr.set_input_dist_attr( + var_name, None) # the outputs also need to be renamed when the output name is the same with input name for var_name in op.output_arg_names: @@ -1388,8 +1384,8 @@ class Resharder: op_dist_attr.set_output_dims_mapping( var_reshard_mapping[var_name], dims_mapping) - op_dist_attr.set_output_dist_attr(var_name, - None) + op_dist_attr.set_output_dist_attr( + var_name, None) idx = 0 while idx < len(block.ops): @@ -1412,10 +1408,10 @@ class Resharder: assert process_meshes if op.attr("sub_block" ).id not in Resharder.while_block_info: - Resharder.while_block_info[op.attr("sub_block") - .id] = {} - Resharder.while_block_info[op.attr("sub_block").id][ - "op_id"] = op.desc.id() + Resharder.while_block_info[op.attr( + "sub_block").id] = {} + Resharder.while_block_info[op.attr( + "sub_block").id]["op_id"] = op.desc.id() Resharder.while_block_info[op.attr("sub_block").id][ "actual_process_mesh"] = self.get_while_op_actual_process_mesh( op) @@ -1476,13 +1472,13 @@ class Resharder: recv_rank = dist_tensor.dist_attr.process_mesh.processes[ index] if self.rank_id == item: - Inserter.insert_send_op(block, idx + 1, var, - recv_rank, - op.attr('op_role')) + Inserter.insert_send_op( + block, idx + 1, var, recv_rank, + op.attr('op_role')) if self.rank_id == recv_rank: - Inserter.insert_recv_op(block, idx + 1, var, - item, - op.attr('op_role')) + Inserter.insert_recv_op( + block, idx + 1, var, item, + op.attr('op_role')) cur_op_count = len(block.ops) idx_offset = idx_offset + cur_op_count - pre_op_count pre_op_count = cur_op_count diff --git a/python/paddle/distributed/auto_parallel/tuner/recorder.py b/python/paddle/distributed/auto_parallel/tuner/recorder.py index ba61843831a..de3c9cb8429 100644 --- a/python/paddle/distributed/auto_parallel/tuner/recorder.py +++ b/python/paddle/distributed/auto_parallel/tuner/recorder.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Notice that the following codes are modified from KerasTuner for a different purpose. +# Notice that the following codes are modified from KerasTuner for a different purpose. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/metrics_tracking.py. import numpy as np diff --git a/python/paddle/distributed/auto_parallel/tuner/storable.py b/python/paddle/distributed/auto_parallel/tuner/storable.py index 63e5eba77f1..18a0669d622 100644 --- a/python/paddle/distributed/auto_parallel/tuner/storable.py +++ b/python/paddle/distributed/auto_parallel/tuner/storable.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Notice that the following codes are modified from KerasTuner for a different purpose. +# Notice that the following codes are modified from KerasTuner for a different purpose. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/metrics_tracking.py. import json class Storable(object): + def get_state(self): raise NotImplementedError diff --git a/python/paddle/distributed/auto_parallel/tuner/trial.py b/python/paddle/distributed/auto_parallel/tuner/trial.py index 1cda82f1ede..78139cbd58b 100644 --- a/python/paddle/distributed/auto_parallel/tuner/trial.py +++ b/python/paddle/distributed/auto_parallel/tuner/trial.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Notice that the following codes are modified from KerasTuner to implement our own tuner. +# Notice that the following codes are modified from KerasTuner to implement our own tuner. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/trial.py. import hashlib @@ -33,7 +33,10 @@ class TrialStatus: class Trial(Storable): - def __init__(self, tunable_space, trial_id=None, + + def __init__(self, + tunable_space, + trial_id=None, status=TrialStatus.RUNNING): self._id = _generate_trial_id() if trial_id is None else trial_id self._space = tunable_space diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py index 2838a019584..93ae25c9c4d 100644 --- a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py +++ b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Notice that the following codes are modified from KerasTuner to implement our own tuner. +# Notice that the following codes are modified from KerasTuner to implement our own tuner. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py. import collections @@ -103,13 +103,19 @@ class TunableSpace(object): return self._retrieve(tv) def int_range(self, name, start, stop, step=1, default=None): - tv = IntRange( - name=name, start=start, stop=stop, step=step, default=default) + tv = IntRange(name=name, + start=start, + stop=stop, + step=step, + default=default) return self._retrieve(tv) def float_range(self, name, start, stop, step=None, default=None): - tv = FloatRange( - name=name, start=start, stop=stop, step=step, default=default) + tv = FloatRange(name=name, + start=start, + stop=stop, + step=step, + default=default) return self._retrieve(tv) def get_state(self): @@ -118,7 +124,8 @@ class TunableSpace(object): "class_name": v.__class__.__name__, "state": v.get_state() } for v in self._variables.values()], - "values": dict((k, v) for (k, v) in self.values.items()) + "values": + dict((k, v) for (k, v) in self.values.items()) } @classmethod @@ -138,8 +145,8 @@ def _deserialize_tunable_variable(state): if isinstance(state, classes): return state - if (not isinstance(state, dict) or "class_name" not in state or - "state" not in state): + if (not isinstance(state, dict) or "class_name" not in state + or "state" not in state): raise ValueError( "Expect state to be a python dict containing class_name and state as keys, but found {}" .format(state)) diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py b/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py index 19f118fdde7..424b6b74bb1 100644 --- a/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py +++ b/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Notice that the following codes are modified from KerasTuner to implement our own tuner. +# Notice that the following codes are modified from KerasTuner to implement our own tuner. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py. import numpy as np @@ -49,8 +49,8 @@ class Fixed(TunableVariable): self.name = name if not isinstance(default, (str, int, float, bool)): raise ValueError( - "Fixed must be an str, int, float or bool, but found {}" - .format(default)) + "Fixed must be an str, int, float or bool, but found {}".format( + default)) self._default = default def random(self, seed=None): @@ -76,11 +76,12 @@ class Boolean(TunableVariable): return rng.choice((True, False)) def __repr__(self): - return 'Boolean(name: "{}", default: {})'.format(self.name, - self.default) + return 'Boolean(name: "{}", default: {})'.format( + self.name, self.default) class Choice(TunableVariable): + def __init__(self, name, values, default=None): super(Choice, self).__init__(name=name, default=default) @@ -114,8 +115,8 @@ class Choice(TunableVariable): if default is not None and default not in values: raise ValueError( - "The default value should be one of the choices {}, but found {}". - format(values, default)) + "The default value should be one of the choices {}, but found {}" + .format(values, default)) self._default = default @property diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 7b198e288c6..c0f6f90c6af 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -327,8 +327,8 @@ def _get_corresponding_rank(dist_context, target_mesh, rank): # assert coordinate is not None, "could NOT found rank [{}] in any registered mesh".format( # rank) if coordinate is not None: - return target_mesh.processes[_coordinate2linear_idx(mesh.topology, - coordinate)] + return target_mesh.processes[_coordinate2linear_idx( + mesh.topology, coordinate)] else: return target_mesh.processes[0] @@ -381,8 +381,8 @@ def _update_addition_info(addition_info): if item not in ["epoch", "batch", "batch_size"]: raise ValueError( "The key of 'addition_info' should be one of the " - "['epoch', 'batch', 'batch_size'], but got '{}'." - .format(str(item))) + "['epoch', 'batch', 'batch_size'], but got '{}'.".format( + str(item))) if not isinstance(value, int): raise ValueError( "The value of 'addition_info' should be 'int', " @@ -401,8 +401,8 @@ def _check_valid_path(file_path): raise TypeError("The type of file path should be 'str', " "but got '{}'.".format(str(type(file)))) if not os.path.exists(file): - raise ValueError("The file path '{}' does not exist." - .format(file)) + raise ValueError( + "The file path '{}' does not exist.".format(file)) return file_path else: raise TypeError("The type of file path should be 'list', " @@ -580,8 +580,9 @@ def load_checkpoint_into_program(checkpoint_path, all_cur_dist_attr = get_dist_attr(program, dist_context) all_param_dict = all_state_dict_info["model"] addition_info = all_state_dict_info["addition_info"] - sliced_param_dict = merge_and_slice_parameter( - all_param_dict, all_pre_dist_attr, all_cur_dist_attr) + sliced_param_dict = merge_and_slice_parameter(all_param_dict, + all_pre_dist_attr, + all_cur_dist_attr) load_parameter_into_program(sliced_param_dict, program) return addition_info @@ -613,8 +614,8 @@ def _save_distributed_attribute(program, dist_attr_path, dist_context): "world_size": paddle.distributed.get_world_size() } paddle.save(dist_attr_dict, dist_attr_name) - logging.info("Already saved distributed attribute to '{}'.".format( - dist_attr_path)) + logging.info( + "Already saved distributed attribute to '{}'.".format(dist_attr_path)) def _load_distributed_attribute(dist_attr_path): @@ -715,8 +716,8 @@ def merge_and_slice_parameter(dist_param_dict, pre_dist_attr, cur_dist_attr): for name, value in dist_param_dict.items(): if not isinstance(name, str): raise TypeError("The key of 'dist_param_dict' is parameter's name, " - "and its type should be 'str', but got {}." - .format(str(type(name)))) + "and its type should be 'str', but got {}.".format( + str(type(name)))) if not isinstance(value, list) or not all( isinstance(v, np.ndarray) for v in value): raise TypeError( @@ -748,16 +749,16 @@ def merge_and_slice_parameter(dist_param_dict, pre_dist_attr, cur_dist_attr): pre_dims_mapping = pre_attr["dims_mapping"] cur_dims_mapping = cur_attr["dims_mapping"] if len(set(pre_dims_mapping)) > 1 or -1 not in pre_dims_mapping: - complete_param = _merge_parameter_with_dist_attr(pre_param, - pre_attr) + complete_param = _merge_parameter_with_dist_attr( + pre_param, pre_attr) dist_param_dict[var_name] = complete_param else: complete_param = pre_param[0] dist_param_dict[var_name] = complete_param if len(set(cur_dims_mapping)) > 1 or -1 not in cur_dims_mapping: - sliced_param = _slice_parameter_with_dist_attr(complete_param, - cur_attr) + sliced_param = _slice_parameter_with_dist_attr( + complete_param, cur_attr) dist_param_dict[var_name] = sliced_param for var_name in pre_dist_attr: @@ -766,12 +767,13 @@ def merge_and_slice_parameter(dist_param_dict, pre_dist_attr, cur_dist_attr): dist_param_dict.pop(var_name) if param_not_in_pre: - warnings.warn("Parameters '{}' are not found in last training process." - .format(str(param_not_in_pre))) + warnings.warn( + "Parameters '{}' are not found in last training process.".format( + str(param_not_in_pre))) if param_not_in_cur: warnings.warn( - "Parameters '{}' are not found in current training process." - .format(str(param_not_in_cur))) + "Parameters '{}' are not found in current training process.".format( + str(param_not_in_cur))) return dist_param_dict @@ -784,8 +786,9 @@ def _merge_parameter_with_dist_attr(param_list, dist_attr): process_shape = dist_attr["process_shape"] process_group = dist_attr["process_group"] # get the complete shape of the parameter - complete_shape = Resharder.compute_complete_shape( - param_list[0].shape, process_shape, dims_mapping) + complete_shape = Resharder.compute_complete_shape(param_list[0].shape, + process_shape, + dims_mapping) # merge the parameter with dist_attr partition_param_list = [] merged_partiton = [] @@ -818,8 +821,9 @@ def _slice_parameter_with_dist_attr(param, dist_attr): len(partition_index_list)) # get the current parameter's index in sliced_param_list rank_id = paddle.distributed.get_rank() - sliced_param_index = _get_sliced_param_index( - rank_id, param.shape, dims_mapping, process_shape, process_group) + sliced_param_index = _get_sliced_param_index(rank_id, param.shape, + dims_mapping, process_shape, + process_group) sliced_param = sliced_param_list[sliced_param_index] return sliced_param @@ -899,8 +903,9 @@ def _slice_parameter(complete_param, partition_index_list, length): """ sliced_param_list = [] axis = len(complete_param.shape) - length - sliced_param = np.split( - complete_param, partition_index_list[axis], axis=axis) + sliced_param = np.split(complete_param, + partition_index_list[axis], + axis=axis) if length == 1: return sliced_param for param in sliced_param: @@ -938,8 +943,10 @@ def _get_sliced_param_index(rank, complete_shape, dims_mapping, process_shape, """ from .reshard import Resharder - partition_index = Resharder.compute_partition_index( - rank, complete_shape, dims_mapping, process_shape, process_group) + partition_index = Resharder.compute_partition_index(rank, complete_shape, + dims_mapping, + process_shape, + process_group) sliced_param_index = 0 for i, shape in enumerate(complete_shape): if dims_mapping[i] == -1: @@ -1090,8 +1097,8 @@ def is_forward_op(op): ref_role1 = int(core.op_proto_and_checker_maker.OpRole.Forward) ref_role2 = int(core.op_proto_and_checker_maker.OpRole.Loss) op_role = int(op.attr('op_role')) - return OP_ROLE_KEY in op.attr_names and (op_role == ref_role1 or - op_role == ref_role2) + return OP_ROLE_KEY in op.attr_names and (op_role == ref_role1 + or op_role == ref_role2) def is_backward_op(op): @@ -1117,8 +1124,8 @@ def get_loss_op(block): loss_ops = [] for op in block.ops: if is_loss_op(op): - assert len(op.desc.output_arg_names( - )) == 1, "loss op should only output loss var" + assert len(op.desc.output_arg_names() + ) == 1, "loss op should only output loss var" loss_ops.append(op) assert len(loss_ops) == 1, "num of loss op is not equal to one" @@ -1137,8 +1144,8 @@ def set_var_dist_attr(dist_context, var, dims_mapping, process_mesh, **kwargs): return tensor_dist_attr -def naive_set_dist_op_attr_for_program_by_mesh_and_mapping(new_op, process_mesh, - ref_mapping, ctx): +def naive_set_dist_op_attr_for_program_by_mesh_and_mapping( + new_op, process_mesh, ref_mapping, ctx): assert process_mesh is not None assert ref_mapping is not None @@ -1309,6 +1316,7 @@ def get_all_distributed_main_program(serial_program_info, dist_context, class SerialProgramInfo: + def __init__(self, train_program, satrtup_program, @@ -1343,6 +1351,7 @@ class SerialProgramInfo: def get_standalone_cost_data(distributed_programs): + def _compute_runtime(op_cost, op, vars): runtime = 0 try: @@ -1361,8 +1370,8 @@ def get_standalone_cost_data(distributed_programs): shape_left_boundary = info.find("[") shape_right_boundary = info.find("]") assert shape_left_boundary > 0 and shape_right_boundary > 0 and shape_right_boundary > shape_left_boundary, "Get shape failed." - shape = info[shape_left_boundary + 1: - shape_right_boundary].split(",") + shape = info[shape_left_boundary + + 1:shape_right_boundary].split(",") shape = list(map(lambda x: int(x.strip()), shape)) dtype_factor = 1 total_static_input_size += reduce(lambda x, y: x * y, shape) @@ -1404,20 +1413,21 @@ def get_standalone_cost_data(distributed_programs): if op.type in not_enum_ops: cost_data[op.desc.id()] = runtime continue - dtype = str(vars[op.input_arg_names[0]] - .dtype) if op.input_arg_names else "float32" + dtype = str(vars[op.input_arg_names[0]].dtype + ) if op.input_arg_names else "float32" if int(op.attr('op_role')) == int(OpRole.Backward): if "_grad" in op.type: forward_op_name = op.type[:-5] if forward_op_name in OP_NAME_MAPPING.keys(): forward_op_name = OP_NAME_MAPPING[forward_op_name] - op_cost = cost_model.get_static_op_time( - forward_op_name, forward=False, dtype=dtype) + op_cost = cost_model.get_static_op_time(forward_op_name, + forward=False, + dtype=dtype) if op_cost: runtime = _compute_runtime(op_cost, op, vars) else: - op_cost = cost_model.get_static_op_time( - forward_op_name, dtype=dtype) + op_cost = cost_model.get_static_op_time(forward_op_name, + dtype=dtype) if op_cost: runtime = 2 * _compute_runtime(op_cost, op, vars) elif int(op.attr('op_role')) == int(OpRole.Forward): diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py index 34e55bf1646..a8eedb96a3e 100644 --- a/python/paddle/distributed/cloud_utils.py +++ b/python/paddle/distributed/cloud_utils.py @@ -66,8 +66,8 @@ paddlecloud environment.".format(args_node_ips, node_ips)) if paddle_ports_num >= len( selected_devices) and paddle_port != args_port: - logger.warning("Use Cloud specified port:{}.".format( - paddle_port)) + logger.warning( + "Use Cloud specified port:{}.".format(paddle_port)) started_port = paddle_port except Exception as e: @@ -87,12 +87,13 @@ paddlecloud environment.".format(args_node_ips, node_ips)) trainer_endpoints = [] assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori) for i in range(num_nodes): - trainer_endpoints.append(trainer_endpoints_ori[ - i * paddle_ports_num:(i + 1) * paddle_ports_num]) + trainer_endpoints.append( + trainer_endpoints_ori[i * paddle_ports_num:(i + 1) * + paddle_ports_num]) logger.debug("parsed from args: node_ips:{} \ - node_ip:{} node_rank:{} trainer_endpoints:{}" - .format(node_ips, node_ip, node_rank, trainer_endpoints)) + node_ip:{} node_rank:{} trainer_endpoints:{}".format( + node_ips, node_ip, node_rank, trainer_endpoints)) cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints, selected_devices) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index fab6674b65f..1fd5bde1a54 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -158,8 +158,9 @@ def _get_group_map(): global _group_map if not _group_map: genv = _get_global_env() - _group_map[0] = Group( - genv.rank, genv.world_size, ranks=list(range(genv.world_size))) + _group_map[0] = Group(genv.rank, + genv.world_size, + ranks=list(range(genv.world_size))) return _group_map @@ -264,20 +265,19 @@ def _new_process_group_impl(backend, cluster_id - 1] global_rank = cluster_offset + rank global_world_size = cluster_size_cumsum[-1] - pg = core.ProcessGroupHeter( - store, - rank=global_rank, - world_size=global_world_size, - place=place, - gid=group_id, - local_rank=rank, - local_size=world_size, - gloo_rank=cluster_id, - gloo_size=len(cluster_size), - with_switch=True, - switch_endpoint=switch_ep, - src_rank=src_rank, - dst_rank=dst_rank) + pg = core.ProcessGroupHeter(store, + rank=global_rank, + world_size=global_world_size, + place=place, + gid=group_id, + local_rank=rank, + local_size=world_size, + gloo_rank=cluster_id, + gloo_size=len(cluster_size), + with_switch=True, + switch_endpoint=switch_ep, + src_rank=src_rank, + dst_rank=dst_rank) return pg @@ -323,11 +323,10 @@ def barrier(group=None): if not isinstance(ring_id, int): raise ValueError("The type of 'group' for barrier must be int.") helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [temp]}, - outputs={'Out': [temp]}, - attrs={'ring_id': ring_id}) + helper.append_op(type=op_type, + inputs={'X': [temp]}, + outputs={'Out': [temp]}, + attrs={'ring_id': ring_id}) # _custom_gid provides a way for users to @@ -386,16 +385,15 @@ def new_group(ranks=None, backend=None): rank = 0 if backend == 'heter' else ranks.index(global_rank) src_rank = ranks[0] if backend == 'heter' else None dst_rank = ranks[1] if backend == 'heter' else None - pg = _new_process_group_impl( - backend, - _default_store, - rank, - size, - group_name, - pg_options=None, - group_id=gid, - src_rank=src_rank, - dst_rank=dst_rank) + pg = _new_process_group_impl(backend, + _default_store, + rank, + size, + group_name, + pg_options=None, + group_id=gid, + src_rank=src_rank, + dst_rank=dst_rank) else: rank = -1 pg = None @@ -403,7 +401,7 @@ def new_group(ranks=None, backend=None): _group_map_by_name[group_name] = group _group_map[gid] = group - # TODO(shenliang03): This is a temporary solution to solve the problem of + # TODO(shenliang03): This is a temporary solution to solve the problem of # hang caused by tcp paddle.distributed.barrier(group=group) return group @@ -454,7 +452,7 @@ def new_group(ranks=None, backend=None): else: return gp - # TODO(shenliang03): This is a temporary solution to solve the problem of + # TODO(shenliang03): This is a temporary solution to solve the problem of # hang caused by cross-creation of new_group tmp = paddle.to_tensor( [1], dtype="int32") if _non_static_mode() else fill_constant( @@ -512,7 +510,8 @@ def _sync_calc_stream(tensor): helper.append_op( type=op_type, inputs={'X': [tensor]}, - outputs={'Out': [tensor]}, ) + outputs={'Out': [tensor]}, + ) def _sync_comm_stream(tensor, ring_id=0): @@ -527,7 +526,8 @@ def _sync_comm_stream(tensor, ring_id=0): type=op_type, inputs={'X': [tensor]}, outputs={'Out': [tensor]}, - attrs={'ring_id': ring_id}, ) + attrs={'ring_id': ring_id}, + ) def broadcast(tensor, src, group=None, use_calc_stream=True): @@ -605,15 +605,14 @@ def broadcast(tensor, src, group=None, use_calc_stream=True): 'broadcast') helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [tensor]}, - outputs={'Out': [tensor]}, - attrs={ - 'root': gsrc, - 'use_calc_stream': use_calc_stream, - 'ring_id': ring_id, - }) + helper.append_op(type=op_type, + inputs={'X': [tensor]}, + outputs={'Out': [tensor]}, + attrs={ + 'root': gsrc, + 'use_calc_stream': use_calc_stream, + 'ring_id': ring_id, + }) def all_reduce(tensor, op=ReduceOp.SUM, group=None, use_calc_stream=True): @@ -713,12 +712,13 @@ def all_reduce(tensor, op=ReduceOp.SUM, group=None, use_calc_stream=True): if not isinstance(ring_id, int): raise ValueError("The type of 'ring_id' for all_reduce should be int.") helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [tensor]}, - outputs={'Out': [tensor]}, - attrs={'ring_id': ring_id, - 'use_calc_stream': use_calc_stream}) + helper.append_op(type=op_type, + inputs={'X': [tensor]}, + outputs={'Out': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream + }) def reduce(tensor, dst, op=ReduceOp.SUM, group=None, use_calc_stream=True): @@ -827,15 +827,14 @@ def reduce(tensor, dst, op=ReduceOp.SUM, group=None, use_calc_stream=True): op_type = 'c_reduce_prod' helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [tensor]}, - outputs={'Out': [tensor]}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream, - 'root_id': gdst, - }) + helper.append_op(type=op_type, + inputs={'X': [tensor]}, + outputs={'Out': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream, + 'root_id': gdst, + }) def all_gather(tensor_list, tensor, group=None, use_calc_stream=True): @@ -925,15 +924,14 @@ def all_gather(tensor_list, tensor, group=None, use_calc_stream=True): check_variable_and_dtype( tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], 'all_gather') - helper.append_op( - type=op_type, - inputs={'X': [tensor]}, - outputs={'Out': [out]}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream, - 'nranks': nranks - }) + helper.append_op(type=op_type, + inputs={'X': [tensor]}, + outputs={'Out': [out]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream, + 'nranks': nranks + }) tensor_list.extend(paddle.split(out, nranks, 0)) @@ -1026,16 +1024,15 @@ def scatter(tensor, tensor_list=None, src=0, group=None, use_calc_stream=True): tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], 'scatter') helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [temp]}, - outputs={'Out': [tensor]}, - attrs={ - 'ring_id': ring_id, - 'root': gsrc, - 'use_calc_stream': use_calc_stream, - 'nranks': nranks, - }) + helper.append_op(type=op_type, + inputs={'X': [temp]}, + outputs={'Out': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'root': gsrc, + 'use_calc_stream': use_calc_stream, + 'nranks': nranks, + }) def _c_identity(tensor, group=None): @@ -1065,15 +1062,14 @@ def _c_identity(tensor, group=None): tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], '_c_identity') - helper.append_op( - type=op_type, - inputs={'X': tensor}, - outputs={'Out': out}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - 'use_model_parallel': True, - }) + helper.append_op(type=op_type, + inputs={'X': tensor}, + outputs={'Out': out}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + 'use_model_parallel': True, + }) return out @@ -1111,17 +1107,16 @@ def _c_concat(tensor, group=None): tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], '_c_concat') - helper.append_op( - type=op_type, - inputs={'X': tensor}, - outputs={'Out': out}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - 'use_model_parallel': True, - 'nranks': nranks, - 'rank': rank - }) + helper.append_op(type=op_type, + inputs={'X': tensor}, + outputs={'Out': out}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + 'use_model_parallel': True, + 'nranks': nranks, + 'rank': rank + }) return out @@ -1159,17 +1154,16 @@ def _c_split(tensor, group=None): tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], '_c_split') - helper.append_op( - type=op_type, - inputs={'X': tensor}, - outputs={'Out': out}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - 'rank': rank, - 'nranks': nranks, - 'use_model_parallel': True, - }) + helper.append_op(type=op_type, + inputs={'X': tensor}, + outputs={'Out': out}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + 'rank': rank, + 'nranks': nranks, + 'use_model_parallel': True, + }) return out @@ -1190,13 +1184,15 @@ def _mp_allreduce(tensor, from paddle.autograd import EagerPyLayer class mp_allreduce_eager(EagerPyLayer): + @staticmethod def forward(ctx, tensor, use_calc_stream, ring_id, use_model_parallel): ctx.ring_id = ring_id - return _C_ops.c_allreduce_sum_( - tensor, 'use_calc_stream', use_calc_stream, 'ring_id', - ring_id, "use_model_parallel", use_model_parallel) + return _C_ops.c_allreduce_sum_(tensor, 'use_calc_stream', + use_calc_stream, 'ring_id', + ring_id, "use_model_parallel", + use_model_parallel) @staticmethod def backward(ctx, dy): @@ -1209,9 +1205,10 @@ def _mp_allreduce(tensor, elif _in_legacy_dygraph(): if op == ReduceOp.SUM: - return _C_ops.c_allreduce_sum_( - tensor, 'use_calc_stream', use_calc_stream, 'ring_id', ring_id, - "use_model_parallel", use_model_parallel) + return _C_ops.c_allreduce_sum_(tensor, 'use_calc_stream', + use_calc_stream, 'ring_id', ring_id, + "use_model_parallel", + use_model_parallel) else: raise ValueError("Unknown parameter: {}.".format(op)) @@ -1223,15 +1220,14 @@ def _mp_allreduce(tensor, tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], op_type) - helper.append_op( - type=op_type, - inputs={'X': tensor}, - outputs={'Out': out}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream, - 'use_model_parallel': use_model_parallel, - }) + helper.append_op(type=op_type, + inputs={'X': tensor}, + outputs={'Out': out}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream, + 'use_model_parallel': use_model_parallel, + }) return out @@ -1257,12 +1253,13 @@ def _c_lookup_table(table, index, start_index=0, name=None): dtype = helper.input_dtype(input_param_name='table') check_variable_and_dtype(index, 'input', ['int32', 'int64'], op_type) tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='c_embedding', - inputs={'Ids': index, - 'W': table}, - outputs={'Out': tmp}, - attrs={"start_index": start_index}) + helper.append_op(type='c_embedding', + inputs={ + 'Ids': index, + 'W': table + }, + outputs={'Out': tmp}, + attrs={"start_index": start_index}) return tmp @@ -1281,21 +1278,21 @@ class _Linear(layers.Layer): self._dtype = self._helper.get_default_dtype() self._weight_attr = weight_attr self._bias_attr = bias_attr - self.weight = self.create_parameter( - shape=[in_features, out_features], - attr=self._weight_attr, - dtype=self._dtype, - is_bias=False) - self.bias = self.create_parameter( - shape=[out_features], - attr=self._bias_attr, - dtype=self._dtype, - is_bias=True) + self.weight = self.create_parameter(shape=[in_features, out_features], + attr=self._weight_attr, + dtype=self._dtype, + is_bias=False) + self.bias = self.create_parameter(shape=[out_features], + attr=self._bias_attr, + dtype=self._dtype, + is_bias=True) self.name = name def forward(self, input): - out = _linear( - x=input, weight=self.weight, bias=self.bias, name=self.name) + out = _linear(x=input, + weight=self.weight, + bias=self.bias, + name=self.name) return out def extra_repr(self): @@ -1340,13 +1337,16 @@ def _c_softmax_with_cross_entropy(logits, helper = LayerHelper('c_softmax_with_cross_entropy', **locals()) softmax = helper.create_variable_for_type_inference(dtype=logits.dtype) loss = helper.create_variable_for_type_inference(dtype=logits.dtype) - helper.append_op( - type='c_softmax_with_cross_entropy', - inputs={'Logits': logits, - 'Label': label}, - outputs={'Softmax': softmax, - 'Loss': loss}, - attrs=attrs) + helper.append_op(type='c_softmax_with_cross_entropy', + inputs={ + 'Logits': logits, + 'Label': label + }, + outputs={ + 'Softmax': softmax, + 'Loss': loss + }, + attrs=attrs) if return_softmax: return loss, softmax @@ -1362,8 +1362,9 @@ def _linear(x, weight, bias=None, name=None): pre_bias = _varbase_creator(dtype=x.dtype) _C_ops.matmul(x, weight, pre_bias, 'transpose_X', False, 'transpose_Y', False, "alpha", 1) - return dygraph_utils._append_bias_in_dygraph( - pre_bias, bias, axis=len(x.shape) - 1) + return dygraph_utils._append_bias_in_dygraph(pre_bias, + bias, + axis=len(x.shape) - 1) else: helper = LayerHelper('linear', **locals()) dtype = x.dtype @@ -1381,16 +1382,19 @@ def _linear(x, weight, bias=None, name=None): 'alpha': 1, } tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='matmul_v2', inputs=inputs, outputs={'Out': tmp}, attrs=attrs) + helper.append_op(type='matmul_v2', + inputs=inputs, + outputs={'Out': tmp}, + attrs=attrs) if bias is not None: res = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='elementwise_add', - inputs={'X': [tmp], - 'Y': [bias]}, - outputs={'Out': [res]}, - attrs={'axis': len(x.shape) - 1}) + helper.append_op(type='elementwise_add', + inputs={ + 'X': [tmp], + 'Y': [bias] + }, + outputs={'Out': [res]}, + attrs={'axis': len(x.shape) - 1}) else: res = tmp return res @@ -1439,12 +1443,11 @@ def _parallel_linear(x, else: x = _c_identity(x, group=group) - linear = paddle.nn.Linear( - num_rows, - num_cols, - weight_attr=param_attr, - bias_attr=bias_attr, - name=name) + linear = paddle.nn.Linear(num_rows, + num_cols, + weight_attr=param_attr, + bias_attr=bias_attr, + name=name) # NOTE: npu linear function use matmul_v2 but linear use matmul linear_function = _linear if core.is_compiled_with_npu()\ @@ -1477,29 +1480,27 @@ def _parallel_linear(x, is_data=False, need_check_feed=linear_out.desc.need_check_feed()) if axis == 0: - main_block.append_op( - type='c_allreduce_sum', - inputs={'X': linear_out}, - outputs={'Out': out}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - 'use_model_parallel': True - }) + main_block.append_op(type='c_allreduce_sum', + inputs={'X': linear_out}, + outputs={'Out': out}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + 'use_model_parallel': True + }) if linear.bias is not None: out = out + linear.bias else: - main_block.append_op( - type='c_concat', - inputs={'X': linear_out}, - outputs={'Out': out}, - attrs={ - 'rank': inner_rank, - 'ring_id': ring_id, - 'nranks': nranks, - 'use_calc_stream': True, - 'use_model_parallel': True - }) + main_block.append_op(type='c_concat', + inputs={'X': linear_out}, + outputs={'Out': out}, + attrs={ + 'rank': inner_rank, + 'ring_id': ring_id, + 'nranks': nranks, + 'use_calc_stream': True, + 'use_model_parallel': True + }) return out @@ -1527,12 +1528,17 @@ def _parallel_embedding(x, dtype = helper.get_default_dtype() size = [per_part_size, origin_size[1]] - weight = helper.create_parameter( - attr=param_attr, shape=size, dtype=dtype, is_bias=False) + weight = helper.create_parameter(attr=param_attr, + shape=size, + dtype=dtype, + is_bias=False) if num_partitions == 1: - return paddle.nn.functional.embedding( - x, weight=weight, padding_idx=None, sparse=False, name=name) + return paddle.nn.functional.embedding(x, + weight=weight, + padding_idx=None, + sparse=False, + name=name) startup_block = paddle.static.default_startup_program().global_block() main_block = paddle.static.default_main_program().global_block() @@ -1541,11 +1547,10 @@ def _parallel_embedding(x, output_parallel = paddle.distributed.collective._c_lookup_table( weight, x, start_index=vocab_start_index, name=name) - out = paddle.distributed.collective._mp_allreduce( - output_parallel, - group=group, - use_calc_stream=True, - use_model_parallel=True) + out = paddle.distributed.collective._mp_allreduce(output_parallel, + group=group, + use_calc_stream=True, + use_model_parallel=True) return out @@ -1676,9 +1681,10 @@ def split(x, num_partitions=2) """ - assert isinstance(size, (list, tuple)), ( - "The type of size for " - "paddle.distributed.split must be list or tuple.") + assert isinstance( + size, + (list, tuple)), ("The type of size for " + "paddle.distributed.split must be list or tuple.") assert len(size) == 2, ("Number of elements in size of " "paddle.distributed.split must be two.") assert isinstance(operation, str), ("The type of operation for " @@ -1714,23 +1720,22 @@ def split(x, "but received vocabulary={} num_partitions={}".format(size[0], num_partitions) per_part_size = size[0] // num_partitions - emb_out = _parallel_embedding( - x, - per_part_size, - size, - weight_attr, - inner_rank, - num_partitions, - name, - group=None) + emb_out = _parallel_embedding(x, + per_part_size, + size, + weight_attr, + inner_rank, + num_partitions, + name, + group=None) return emb_out else: should_split = False if axis == 0: assert size[0] % num_partitions == 0, ( "Number of rows of the weight for linear ({}) must be" - " divisible by num_partitions ({})".format(size[0], - num_partitions)) + " divisible by num_partitions ({})".format( + size[0], num_partitions)) per_part_size = size[0] // num_partitions linear_size = (per_part_size, size[1]) if x.shape[-1] == size[0]: should_split = True @@ -1738,27 +1743,26 @@ def split(x, elif axis == 1: assert size[1] % num_partitions == 0, ( "Number of column of the weight for linear ({}) must be" - " divisible by num_partitions ({})".format(size[1], - num_partitions)) + " divisible by num_partitions ({})".format( + size[1], num_partitions)) per_part_size = size[1] // num_partitions linear_size = (size[0], per_part_size) else: raise ValueError("The value of axis must be 0 or 1, but the value " "given is {}.".format(axis)) - linear_out = _parallel_linear( - x, - linear_size[0], - linear_size[1], - axis, - weight_attr, - bias_attr, - gather_out, - inner_rank, - num_partitions, - should_split, - name=name, - group=None) + linear_out = _parallel_linear(x, + linear_size[0], + linear_size[1], + axis, + weight_attr, + bias_attr, + gather_out, + inner_rank, + num_partitions, + should_split, + name=name, + group=None) return linear_out @@ -1853,14 +1857,13 @@ def alltoall(in_tensor_list, out_tensor_list, group=None, use_calc_stream=True): if len(out_tensor_list) != 0: raise ValueError("The 'out_tensor_list' for all_to_all " "must be an empty list.") - helper.append_op( - type=op_type, - inputs={'X': [temp]}, - outputs={'Out': [out]}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream, - }) + helper.append_op(type=op_type, + inputs={'X': [temp]}, + outputs={'Out': [out]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream, + }) out_tensor_list.extend(paddle.split(out, nranks, 0)) @@ -1917,14 +1920,13 @@ def send(tensor, dst=0, group=None, use_calc_stream=True): 'send') helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [tensor]}, - attrs={ - 'ring_id': ring_id, - 'peer': dst, - 'use_calc_stream': use_calc_stream, - }) + helper.append_op(type=op_type, + inputs={'X': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'peer': dst, + 'use_calc_stream': use_calc_stream, + }) def recv(tensor, src=0, group=None, use_calc_stream=True): @@ -1980,13 +1982,12 @@ def recv(tensor, src=0, group=None, use_calc_stream=True): tensor, 'tensor', ['float16', 'float32', 'float64', 'int32', 'int64'], 'recv') helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - outputs={'Out': [tensor]}, - attrs={ - 'ring_id': ring_id, - 'peer': src, - 'out_shape': tensor.shape, - 'dtype': tensor.dtype, - 'use_calc_stream': use_calc_stream, - }) + helper.append_op(type=op_type, + outputs={'Out': [tensor]}, + attrs={ + 'ring_id': ring_id, + 'peer': src, + 'out_shape': tensor.shape, + 'dtype': tensor.dtype, + 'use_calc_stream': use_calc_stream, + }) diff --git a/python/paddle/distributed/elastic.py b/python/paddle/distributed/elastic.py index 52f36a227f1..933550b75ad 100644 --- a/python/paddle/distributed/elastic.py +++ b/python/paddle/distributed/elastic.py @@ -18,6 +18,7 @@ import os class Command(object): + def __init__(self, server, name): import etcd3 @@ -47,8 +48,9 @@ class Command(object): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Elastic Command') - parser.add_argument( - "--elastic_server", type=str, help="etcd server host:port") + parser.add_argument("--elastic_server", + type=str, + help="etcd server host:port") parser.add_argument("--job_id", type=str, help="job unique id") parser.add_argument( "--np", diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index ef0fff82833..8c0394c9944 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -30,17 +30,11 @@ from . import metrics # noqa: F401 from .base.topology import CommunicateTopology from .base.topology import HybridCommunicateGroup # noqa: F401 -__all__ = [ #noqa - "CommunicateTopology", - "UtilBase", - "HybridCommunicateGroup", - "MultiSlotStringDataGenerator", - "UserDefinedRoleMaker", - "DistributedStrategy", - "Role", - "MultiSlotDataGenerator", - "PaddleCloudRoleMaker", - "Fleet" +__all__ = [ #noqa + "CommunicateTopology", "UtilBase", "HybridCommunicateGroup", + "MultiSlotStringDataGenerator", "UserDefinedRoleMaker", + "DistributedStrategy", "Role", "MultiSlotDataGenerator", + "PaddleCloudRoleMaker", "Fleet" ] fleet = Fleet() diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index fe997c08509..902854a7c72 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -26,6 +26,7 @@ non_auto_func_called = True def __non_auto_func_called__(func): + def __impl__(*args, **kwargs): global non_auto_func_called non_auto_func_called = False @@ -317,8 +318,8 @@ class DistributedStrategy(object): self.a_sync_configs = {"k_steps": 0} else: raise ValueError( - "The type of `flag` is invalid, expected type is bool, but received {}". - format(type(flag))) + "The type of `flag` is invalid, expected type is bool, but received {}" + .format(type(flag))) @property def a_sync_configs(self): @@ -429,8 +430,8 @@ class DistributedStrategy(object): self.strategy.adam_d2sum = flag else: raise ValueError( - "The type of `flag` is invalid, expected type is bool, but received {}". - format(type(flag))) + "The type of `flag` is invalid, expected type is bool, but received {}" + .format(type(flag))) @trainer_desc_configs.setter @is_strict_auto @@ -492,8 +493,8 @@ class DistributedStrategy(object): data = getattr(msg, field.name).add() set_table_config(data, name, configs, i) else: - set_table_config( - getattr(msg, field.name), name, configs) + set_table_config(getattr(msg, field.name), name, + configs) else: # print("not message:", name) if name not in configs: @@ -1022,7 +1023,8 @@ class DistributedStrategy(object): self.strategy.find_unused_parameters = flag else: print( - "WARNING: find_unused_parameters should have value of bool type") + "WARNING: find_unused_parameters should have value of bool type" + ) @property def _fuse_grad_size_in_TFLOPS(self): @@ -1297,7 +1299,8 @@ class DistributedStrategy(object): self.strategy.fuse_grad_size_in_num = num else: print( - "WARNING: fuse_grad_size_in_num should have value of int32 type") + "WARNING: fuse_grad_size_in_num should have value of int32 type" + ) @property def pipeline(self): @@ -2125,8 +2128,8 @@ class DistributedStrategy(object): length = max_k + max_v + spacing h1_format = " " + "|{{:^{}s}}|\n".format(length) - h2_format = " " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(max_k, " " * - spacing, max_v) + h2_format = " " + "|{{:>{}s}}{}{{:^{}s}}|\n".format( + max_k, " " * spacing, max_v) border = " +" + "".join(["="] * length) + "+" line = " +" + "".join(["-"] * length) + "+" @@ -2158,17 +2161,17 @@ class DistributedStrategy(object): config_fields = my_configs.DESCRIPTOR.fields for ff in config_fields: if isinstance( - getattr(my_configs, ff.name), - google.protobuf.pyext._message. - RepeatedScalarContainer): + getattr(my_configs, + ff.name), google.protobuf.pyext. + _message.RepeatedScalarContainer): values = getattr(my_configs, ff.name) for i, v in enumerate(values): if i == 0: - draws += h2_format.format(ff.name, - str(v)) + draws += h2_format.format( + ff.name, str(v)) else: - draws += h2_format.format("", - str(v)) + draws += h2_format.format( + "", str(v)) else: draws += h2_format.format( ff.name, diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index a1c967ab063..762b961da53 100755 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -46,6 +46,7 @@ _grad_scalar = None class _RecomputeModelWrapper(paddle.nn.Layer): + def __init__(self, model, segments=2, preserve_rng_state=True): super(_RecomputeModelWrapper, self).__init__() assert isinstance(model, paddle.nn.Sequential), ( @@ -58,6 +59,7 @@ class _RecomputeModelWrapper(paddle.nn.Layer): self._segment_size = len(self._layers) // segments def _run_func(self, begin, end): + def do_run(input): for i in range(begin, end): input = self._layers[i](input) @@ -91,10 +93,10 @@ def apply_ir_passes(main_program, startup_program, config): fuse_all_reduce = config._user_defined_strategy.fuse_all_reduce_ops if fuse_all_reduce and build_strategy.fuse_all_optimizer_ops: # FIXME(zjl): currently, fuse_all_optimizer_ops - # have conflict with fuse_all_reduce_ops because - # RawProgramOptimizer also inserts coalesce_tensor - # into program. These two procedures may conflict - # in which vars are to be fused. + # have conflict with fuse_all_reduce_ops because + # RawProgramOptimizer also inserts coalesce_tensor + # into program. These two procedures may conflict + # in which vars are to be fused. warnings.warn( 'Currently, the fuse_all_optimizer_ops pass has conflict with fuse_all_reduce_ops pass. Disable the fuse_all_optimizer_ops pass temporarily.' ) @@ -105,6 +107,7 @@ def apply_ir_passes(main_program, startup_program, config): def _inited_runtime_handler_(func): + def __impl__(*args, **kwargs): cls = args[0] @@ -117,6 +120,7 @@ def _inited_runtime_handler_(func): def _is_non_distributed_check_(func): + def __impl__(*args, **kwargs): cls = args[0] @@ -275,8 +279,8 @@ class Fleet(object): self._is_collective = role_maker._is_collective else: raise ValueError( - "`role_maker` should be subclass of `RoleMakerBase`, but got {}". - format(type(role_maker))) + "`role_maker` should be subclass of `RoleMakerBase`, but got {}" + .format(type(role_maker))) self._role_maker._generate_role() import paddle.distributed.fleet as fleet @@ -352,8 +356,8 @@ class Fleet(object): if use_tensor_parallel: tensor_parallel_configs = self._user_defined_strategy.tensor_parallel_configs - mp_degree_tensor_parallel = int(tensor_parallel_configs[ - 'tensor_parallel_degree']) + mp_degree_tensor_parallel = int( + tensor_parallel_configs['tensor_parallel_degree']) if use_sharding and use_tensor_parallel: assert mp_degree_sharding == mp_degree_tensor_parallel @@ -773,14 +777,18 @@ class Fleet(object): for name in fetch_var_names ] - self._runtime_handle._save_inference_model( - executor, dirname, feeded_var_names, fetch_vars, None, True, 0) + self._runtime_handle._save_inference_model(executor, dirname, + feeded_var_names, + fetch_vars, None, True, + 0) else: increment_mode = 0 if "mode" in configs: increment_mode = int(configs["mode"]) - self._runtime_handle._save_persistables( - executor, dirname, main_program=None, mode=increment_mode) + self._runtime_handle._save_persistables(executor, + dirname, + main_program=None, + mode=increment_mode) @is_non_distributed_check @inited_runtime_handler @@ -815,9 +823,10 @@ class Fleet(object): # "'save_inference_model' is a deprecated, will be deleted after v2.2.0, Please use fleet.save instead." # ) - self._runtime_handle._save_inference_model( - executor, dirname, feeded_var_names, target_vars, main_program, - export_for_deployment, mode) + self._runtime_handle._save_inference_model(executor, dirname, + feeded_var_names, + target_vars, main_program, + export_for_deployment, mode) @is_non_distributed_check @inited_runtime_handler @@ -1000,12 +1009,11 @@ class Fleet(object): amp_enable = True amp_level = "O2" if strategy.amp_configs['use_pure_fp16'] else "O1" if amp_level.upper() == "O2": - model = paddle.amp.decorate( - models=model, - optimizers=None, - level="O2", - master_weight=None, - save_dtype=None) + model = paddle.amp.decorate(models=model, + optimizers=None, + level="O2", + master_weight=None, + save_dtype=None) init_loss_scaling = strategy.amp_configs['init_loss_scaling'] incr_ratio = strategy.amp_configs['incr_ratio'] decr_ratio = strategy.amp_configs['decr_ratio'] @@ -1040,8 +1048,9 @@ class Fleet(object): return distributed_model if self._hcg.get_parallel_mode() == ParallelMode.SHARDING_PARALLEL: - model = ShardingParallel( - model, self._hcg, strategy=self._user_defined_strategy) + model = ShardingParallel(model, + self._hcg, + strategy=self._user_defined_strategy) elif self._hcg.get_parallel_mode() == ParallelMode.DATA_PARALLEL: # NOTE (JZ-LIANG) init parameters broadcast within sharding group @@ -1060,11 +1069,13 @@ class Fleet(object): find_unused_parameters=self._user_defined_strategy. find_unused_parameters) elif self._hcg.get_parallel_mode() == ParallelMode.TENSOR_PARALLEL: - model = TensorParallel( - model, self._hcg, strategy=self._user_defined_strategy) + model = TensorParallel(model, + self._hcg, + strategy=self._user_defined_strategy) elif self._hcg.get_parallel_mode() == ParallelMode.PIPELINE_PARALLEL: - model = PipelineParallel( - model, self._hcg, strategy=self._user_defined_strategy) + model = PipelineParallel(model, + self._hcg, + strategy=self._user_defined_strategy) return model @@ -1630,8 +1641,10 @@ class Fleet(object): self.origin_main_program).with_data_parallel( loss_name=loss.name, share_vars_from=None) loss.block.program._graph = compiled_program - return self.user_defined_optimizer.minimize( - loss, startup_program, parameter_list, no_grad_set=no_grad_set) + return self.user_defined_optimizer.minimize(loss, + startup_program, + parameter_list, + no_grad_set=no_grad_set) if meta_optimizer: # print("before minimize program id:", id(loss.block.program)) @@ -1765,6 +1778,7 @@ class Fleet(object): @dygraph_only def distributed_scaler(self, scaler): + def unscale_method(self, optimizer): if not self._enable: return @@ -1789,13 +1803,13 @@ class Fleet(object): ] param_grads_fp16 = [ param._grad_ivar() for param in optimizer._parameter_list - if (param._grad_ivar() is not None) and (param._grad_ivar( - ).dtype == core.VarDesc.VarType.FP16) + if (param._grad_ivar() is not None) and ( + param._grad_ivar().dtype == core.VarDesc.VarType.FP16) ] param_grads_fp32 = [ param._grad_ivar() for param in optimizer._parameter_list - if (param._grad_ivar() is not None) and (param._grad_ivar( - ).dtype == core.VarDesc.VarType.FP32) + if (param._grad_ivar() is not None) and ( + param._grad_ivar().dtype == core.VarDesc.VarType.FP32) ] temp_found_inf_fp16 = to_variable(np.array([0]).astype(np.bool)) temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool)) @@ -1811,11 +1825,12 @@ class Fleet(object): self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0 is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32") - # TODO(shenliang03) Since dp allreduce in the optimizer is - # after the gradscaler, check_finite needs to synchronize global + # TODO(shenliang03) Since dp allreduce in the optimizer is + # after the gradscaler, check_finite needs to synchronize global # information. In the future, we should use check_group to speed. - paddle.distributed.all_reduce( - is_found_inf, op=paddle.distributed.ReduceOp.MAX, group=None) + paddle.distributed.all_reduce(is_found_inf, + op=paddle.distributed.ReduceOp.MAX, + group=None) self._found_inf = is_found_inf.numpy()[0] # Only tensor_parallel and pipeline_parallel need to modify scaler diff --git a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py index 322989099c8..c2a3e4047b3 100755 --- a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py +++ b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py @@ -26,6 +26,7 @@ meta_optimizer_names.remove("HeterParallelOptimizer") class MetaOptimizerFactory(object): + def __init__(self): pass diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 860f7a52f39..36155bbf1a2 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -118,6 +118,7 @@ class Gloo(object): self._http_server = http_server def _init_fs(self, fs_path, prefix): + def init(rank, nodes, role): gloo = fluid.core.Gloo() gloo.set_rank(rank) @@ -145,6 +146,7 @@ class Gloo(object): self._nodes_comm = gloo def _init_dfs(self, dfs_name, dfs_ugi, dfs_path, prefix): + def init(rank, nodes, role): gloo = fluid.core.Gloo() gloo.set_rank(rank) @@ -172,6 +174,7 @@ class Gloo(object): self._nodes_comm = gloo def _init_http(self, ip, port, prefix, start_http_server, http_server_d): + def __start_kv_server(http_server_d, size_d): print("start http_server: {}, {}".format(port, size_d)) from paddle.distributed.fleet.utils.http_server import KVServer @@ -185,13 +188,15 @@ class Gloo(object): def init_kv_server(http_server_d): worker_key = prefix + '_' + 'worker' - size_d = {worker_key: self._worker_num, } + size_d = { + worker_key: self._worker_num, + } print("worker_key:{}, size: {}".format(worker_key, size_d)) http_server_d["running"] = True # child process for http server - _http_server = Process( - target=__start_kv_server, args=(http_server_d, size_d)) + _http_server = Process(target=__start_kv_server, + args=(http_server_d, size_d)) _http_server.daemon = True # set running status to True # start child process @@ -224,7 +229,7 @@ class Gloo(object): self._worker_comm = gloo # TODO (sandyhouse): initialize gloo for server and all - # the closing of kv server may cause gloo init failure + # the closing of kv server may cause gloo init failure # since it depend on the full mesh connection # e.g. 0 connected with 1,2,3 while 2-3 not connected yet # TODO(kuizhiqing) @@ -517,6 +522,7 @@ class RoleMakerBase(object): class PaddleCloudRoleMaker(RoleMakerBase): + def __init__(self, is_collective=False, **kwargs): super(PaddleCloudRoleMaker, self).__init__() self._is_collective = is_collective @@ -525,7 +531,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._kwargs = kwargs self._role_is_generated = False - # for heterps + # for heterps self._stage_id = 1 self._stage_num = 1 self._next_heter_trainer_endpoints = [] @@ -652,8 +658,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): """ if not self._role_is_generated: self._generate_role() - return len(self._get_pserver_endpoints( - )) if self._get_pserver_endpoints() is not None else 0 + return len(self._get_pserver_endpoints() + ) if self._get_pserver_endpoints() is not None else 0 def _node_num(self): """ @@ -814,8 +820,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): if training_role not in ["TRAINER", "PSERVER", "HETER_TRAINER"]: raise ValueError( - "TRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER, but get {}, please check your environment.". - format(training_role)) + "TRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER, but get {}, please check your environment." + .format(training_role)) # For Heter Parameter Server env setting next_heter_trainer_eplist = os.getenv( @@ -832,8 +838,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): if previous_heter_trainer_eplist == "": assert training_role in ( - "TRAINER", "PSERVER" - ), "training_role should be trainer or pserver" + "TRAINER", + "PSERVER"), "training_role should be trainer or pserver" else: try: self._previous_heter_trainer_endpoints = previous_heter_trainer_eplist.split( @@ -896,7 +902,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._stage_num = os.getenv("STAGE_NUM", None) if self._stage_num == None: raise ValueError( - "Can not find STAGE_NUM, please check your environment.") + "Can not find STAGE_NUM, please check your environment." + ) self._stage_num = int(self._stage_num) self._stage_trainers = os.getenv("PADDLE_STAGE_TRAINERS_NUM", None) @@ -955,8 +962,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): "Can not find HETER_DEVICE_TYPE, please check your environment." ) assert self._heter_trainer_device_type in ( - "cpu", "gpu", "xpu" - ), "HETER_DEVICE_TYPE should be cpu,gpu or xpu" + "cpu", "gpu", + "xpu"), "HETER_DEVICE_TYPE should be cpu,gpu or xpu" if self._heter_trainer_device_type == "gpu": heter_device_id = os.getenv("FLAGS_selected_gpus", "0") self._heter_trainer_device = ":".join( @@ -1068,14 +1075,13 @@ class PaddleCloudRoleMaker(RoleMakerBase): print("Gloo init with {}: need_init_all: {}, args: {}".format( type, need_init_all, kwargs)) - self._gloo.init( - rendezvous=rendezvous_type, - role=self._role, - role_id=self._role_id(), - worker_num=self._worker_num(), - server_num=self._server_num(), - need_init_all=need_init_all, - kwargs=kwargs) + self._gloo.init(rendezvous=rendezvous_type, + role=self._role, + role_id=self._role_id(), + worker_num=self._worker_num(), + server_num=self._server_num(), + need_init_all=need_init_all, + kwargs=kwargs) if rendezvous_type == Gloo.RENDEZVOUS.HTTP: http_server_d['running'] = False @@ -1095,9 +1101,11 @@ class PaddleCloudRoleMaker(RoleMakerBase): class UserDefinedRoleMaker(PaddleCloudRoleMaker): + def __init__(self, is_collective=False, init_gloo=False, **kwargs): - super(UserDefinedRoleMaker, self).__init__( - is_collective=is_collective, init_gloo=init_gloo, **kwargs) + super(UserDefinedRoleMaker, self).__init__(is_collective=is_collective, + init_gloo=init_gloo, + **kwargs) self._init_gloo = init_gloo def _user_defined_ps_env(self): diff --git a/python/paddle/distributed/fleet/base/runtime_factory.py b/python/paddle/distributed/fleet/base/runtime_factory.py index b162a9fea68..79dac6716cb 100644 --- a/python/paddle/distributed/fleet/base/runtime_factory.py +++ b/python/paddle/distributed/fleet/base/runtime_factory.py @@ -19,6 +19,7 @@ __all__ = [] class RuntimeFactory(object): + def __init__(self): pass diff --git a/python/paddle/distributed/fleet/base/strategy_compiler.py b/python/paddle/distributed/fleet/base/strategy_compiler.py index b90e5b2bff7..823061f9035 100644 --- a/python/paddle/distributed/fleet/base/strategy_compiler.py +++ b/python/paddle/distributed/fleet/base/strategy_compiler.py @@ -107,6 +107,7 @@ def maximum_path_len_algo(optimizer_list): class StrategyCompilerBase(object): + def __init__(self): pass @@ -192,15 +193,14 @@ class StrategyCompiler(StrategyCompilerBase): self._meta_optimizers = [] if meta_optimizers is None else meta_optimizers self._graph_optimizers = [] if graph_optimizers is None else graph_optimizers - return_meta = None if meta_optimizers == None else meta_optimizers[ - 0] + return_meta = None if meta_optimizers == None else meta_optimizers[0] return_graph = None if graph_optimizers == None else graph_optimizers[ 0] if meta_optimizers == None or graph_optimizers == None: return return_meta, return_graph - # do heuristic filter here, if any meta optimizer in graph optimizers is in + # do heuristic filter here, if any meta optimizer in graph optimizers is in # any meta optimizers' black list, set return_graph to None need_graph_opt = True for graph_opt in graph_optimizers: diff --git a/python/paddle/distributed/fleet/base/topology.py b/python/paddle/distributed/fleet/base/topology.py index ef34fd144a7..aef9c85adfb 100644 --- a/python/paddle/distributed/fleet/base/topology.py +++ b/python/paddle/distributed/fleet/base/topology.py @@ -50,6 +50,7 @@ class ParallelMode(object): class CommunicateTopology(object): + def __init__(self, hybrid_group_names=["data", "pipe", "sharding", "model"], dims=[1, 1, 1, 1]): @@ -131,6 +132,7 @@ class CommunicateTopology(object): class HybridCommunicateGroup(object): + def __init__(self, topology): self.nranks = paddle.distributed.get_world_size() self.global_rank = paddle.distributed.get_rank() @@ -189,7 +191,7 @@ class HybridCommunicateGroup(object): def get_parallel_mode(self): # there are four modes : DataParallel / TensorParallel / PipelineParallel / ShardingParallel - # NOTE when sharding conjugates with other parallel, sharding should act like a optimizer and + # NOTE when sharding conjugates with other parallel, sharding should act like a optimizer and # adding its parallel logic within that parallelism # when use sharding alone, it should have its own parallelism for its parallel logic # TODO modify 3 others parallel to support sharding @@ -349,8 +351,9 @@ class HybridCommunicateGroup(object): return self._check_comm_group def get_rank_from_stage(self, stage_id, **kwargs): - return self._topo.get_rank_from_stage( - self.global_rank, pipe=stage_id, **kwargs) + return self._topo.get_rank_from_stage(self.global_rank, + pipe=stage_id, + **kwargs) class _CommunicateGroup(object): diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index 7f1712289e8..6705eb36bf3 100755 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -32,6 +32,7 @@ __all__ = [] class UtilFactory(object): + def _create_util(self, context=None): util = UtilBase() if context is not None and "valid_strategy" in context: @@ -42,6 +43,7 @@ class UtilFactory(object): class UtilBase(object): + def __init__(self): self.role_maker = None self.dist_strategy = None @@ -321,6 +323,7 @@ class UtilBase(object): f.write(program.desc.serialize_to_string()) def _load_program(self, path, is_text): + def load_program_binary(path): """load program from binary string file""" with open(path, "rb") as f: @@ -344,8 +347,8 @@ class UtilBase(object): def _program_type_trans(self, prog_dir, prog_fn, is_text): prog = self._load_program(os.path.join(prog_dir, prog_fn), is_text) prog_out_fn = prog_fn + ".bin" if is_text else prog_fn + ".pbtxt" - self._save_program(prog, - os.path.join(prog_dir, prog_out_fn), 1 - is_text) + self._save_program(prog, os.path.join(prog_dir, prog_out_fn), + 1 - is_text) return prog_out_fn def _visualize_graphviz(self, program, output_dir, output_filename): @@ -354,11 +357,10 @@ class UtilBase(object): pdf_path = os.path.join(output_dir, output_filename + '.pdf') debugger.draw_block_graphviz(block, path=dot_path) cmd = ["dot", "-Tpdf", dot_path, "-o", pdf_path] - p = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + p = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) p.wait() def _proto_check(self, config): @@ -395,14 +397,16 @@ class UtilBase(object): continue if var.shape != train_prog_var.shape or var.dtype != train_prog_var.dtype: print( - "variable: {} not match. in pruned program shape: {} dtype:{}, in train program shape: {} dtype: {}". - format(var_name, var.shape, var.dtype, train_prog_var.shape, - train_prog_var.dtype)) + "variable: {} not match. in pruned program shape: {} dtype:{}, in train program shape: {} dtype: {}" + .format(var_name, var.shape, var.dtype, + train_prog_var.shape, train_prog_var.dtype)) is_match = False return is_match def _params_check(self, config): + def feed_gen(batch_size, feeded_vars_dims, feeded_vars_filelist): + def reader(batch_size, fn, dim): data = [] if isinstance(dim, list) or isinstance(dim, tuple): @@ -455,8 +459,8 @@ class UtilBase(object): not_expected_op_types = check_not_expected_ops(prog, ["lookup_table"]) if len(not_expected_op_types) > 0: print( - "find op type '{}' in program, please check if your program is pruned correctly !". - format(list(not_expected_op_types))) + "find op type '{}' in program, please check if your program is pruned correctly !" + .format(list(not_expected_op_types))) return False place = fluid.CPUPlace() @@ -481,8 +485,8 @@ class UtilBase(object): if new_shape != orig_shape: raise RuntimeError( "Shape not matching: the Program requires a parameter with a shape of ({}), " - "while the loaded parameter (namely [ {} ]) has a shape of ({}).". - format(orig_shape, each_var.name, new_shape)) + "while the loaded parameter (namely [ {} ]) has a shape of ({})." + .format(orig_shape, each_var.name, new_shape)) # check feed/fetch vars in program and config feed_config = config.feed_config @@ -496,8 +500,8 @@ class UtilBase(object): feed_name_list = feed_target_names if feed_config.feeded_vars_names is not None and feed_target_names != feed_config.feeded_vars_names: print( - "warning! feed vars in program and config are diff: feed in program: {}. feed in config {}.". - format(feed_target_names, feed_config.feeded_vars_names)) + "warning! feed vars in program and config are diff: feed in program: {}. feed in config {}." + .format(feed_target_names, feed_config.feeded_vars_names)) feed_name_list = feed_config.feeded_vars_names # remove feed op in inference_program. new feed op will be added in exe.run global_block = inference_program.global_block() @@ -510,8 +514,8 @@ class UtilBase(object): global_block._remove_op(index) if fetch_config.fetch_vars_names is not None and fetch_targets_names != fetch_config.fetch_vars_names: print( - "warning! fetch vars in program and config are diff: fetch in program: {}. fetch in config {}.". - format(fetch_targets_names, fetch_config.fetch_vars_names)) + "warning! fetch vars in program and config are diff: fetch in program: {}. fetch in config {}." + .format(fetch_targets_names, fetch_config.fetch_vars_names)) fetch_list = [ inference_program.global_block().var(i) for i in fetch_config.fetch_vars_names @@ -547,9 +551,9 @@ class UtilBase(object): var_shape = var.shape[1:] if tensor_shape != var_shape: raise RuntimeError( - "feed variable '{}' shape not match. infer program shape: {}. feed tensor shape: {}". - format(feed_config.feeded_vars_names[i], var_shape, - tensor_shape)) + "feed variable '{}' shape not match. infer program shape: {}. feed tensor shape: {}" + .format(feed_config.feeded_vars_names[i], var_shape, + tensor_shape)) if not feed_config.feeded_vars_filelist: print("generate random feed vars.") @@ -559,20 +563,19 @@ class UtilBase(object): # create fake feed tensor. if lod_level > 1, should create_lod_tensor() if var.lod_level == 0: feed_tensors.append( - np.array( - np.random.random( - tuple([config.batch_size] + list( - feed_config.feeded_vars_dims[i]))), - dtype=feed_config.feeded_vars_types[i])) + np.array(np.random.random( + tuple([config.batch_size] + + list(feed_config.feeded_vars_dims[i]))), + dtype=feed_config.feeded_vars_types[i])) elif var.lod_level == 1: - t = np.array( - np.random.random( - tuple([config.batch_size] + list( - feed_config.feeded_vars_dims[i]))), - dtype=feed_config.feeded_vars_types[i]) + t = np.array(np.random.random( + tuple([config.batch_size] + + list(feed_config.feeded_vars_dims[i]))), + dtype=feed_config.feeded_vars_types[i]) feed_tensors.append( - fluid.create_lod_tensor(t, [[1] * config.batch_size - ], place)) + fluid.create_lod_tensor(t, + [[1] * config.batch_size], + place)) else: raise RuntimeError( "vars with lod_level >= 2 is not supported now in this infer program check tool." diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py index 0b1169e4422..3b3097bfaa4 100644 --- a/python/paddle/distributed/fleet/cloud_utils.py +++ b/python/paddle/distributed/fleet/cloud_utils.py @@ -61,8 +61,8 @@ paddlecloud environment.".format(args_node_ips, node_ips)) if paddle_ports_num >= len( devices_per_proc) and paddle_port != args_port: - logger.warning("Use Cloud specified port:{}.".format( - paddle_port)) + logger.warning( + "Use Cloud specified port:{}.".format(paddle_port)) started_port = paddle_port except Exception as e: @@ -82,12 +82,13 @@ paddlecloud environment.".format(args_node_ips, node_ips)) trainer_endpoints = [] assert num_nodes * paddle_ports_num == len(trainer_endpoints_ori) for i in range(num_nodes): - trainer_endpoints.append(trainer_endpoints_ori[ - i * paddle_ports_num:(i + 1) * paddle_ports_num]) + trainer_endpoints.append( + trainer_endpoints_ori[i * paddle_ports_num:(i + 1) * + paddle_ports_num]) logger.debug("parsed from args: node_ips:{} \ - node_ip:{} node_rank:{} trainer_endpoints:{}" - .format(node_ips, node_ip, node_rank, trainer_endpoints)) + node_ip:{} node_rank:{} trainer_endpoints:{}".format( + node_ips, node_ip, node_rank, trainer_endpoints)) cluster, pod = get_cluster(node_ips, node_ip, trainer_endpoints, device_mode, devices_per_proc) diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py index cceb81838c1..47d9e4cc8ef 100644 --- a/python/paddle/distributed/fleet/data_generator/data_generator.py +++ b/python/paddle/distributed/fleet/data_generator/data_generator.py @@ -237,6 +237,7 @@ class DataGenerator(object): # add more generalized DataGenerator that can adapt user-defined slot # for example, [(name, float_list), (name, str_list), (name, int_list)] class MultiSlotStringDataGenerator(DataGenerator): + def _gen_str(self, line): ''' Further processing the output of the process() function rewritten by @@ -281,6 +282,7 @@ class MultiSlotStringDataGenerator(DataGenerator): class MultiSlotDataGenerator(DataGenerator): + def _gen_str(self, line): ''' Further processing the output of the process() function rewritten by @@ -338,8 +340,8 @@ class MultiSlotDataGenerator(DataGenerator): for elem in elements: if isinstance(elem, float): self._proto_info[-1] = (name, "float") - elif not isinstance(elem, int) and not isinstance(elem, - long): + elif not isinstance(elem, int) and not isinstance( + elem, long): raise ValueError( "the type of element%s must be in int or float" % type(elem)) @@ -347,7 +349,8 @@ class MultiSlotDataGenerator(DataGenerator): else: if len(line) != len(self._proto_info): raise ValueError( - "the complete field set of two given line are inconsistent.") + "the complete field set of two given line are inconsistent." + ) for index, item in enumerate(line): name, elements = item if not isinstance(name, str): @@ -370,8 +373,8 @@ class MultiSlotDataGenerator(DataGenerator): if self._proto_info[index][1] != "float": if isinstance(elem, float): self._proto_info[index] = (name, "float") - elif not isinstance(elem, int) and not isinstance(elem, - long): + elif not isinstance(elem, int) and not isinstance( + elem, long): raise ValueError( "the type of element%s must be in int or float" % type(elem)) diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index 235f4ece62d..2983457b8a7 100644 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -322,10 +322,10 @@ class DatasetBase(object): "Please check if var's type in data_generator is correct." % (ele[0], "float", ele[1])) - if (var_list[i].dtype == core.VarDesc.VarType.INT64 or - var_list[i].dtype == core.VarDesc.VarType.INT32 - ) and not all( - isinstance(ele, int) for ele in ele[1]): + if (var_list[i].dtype == core.VarDesc.VarType.INT64 + or var_list[i].dtype + == core.VarDesc.VarType.INT32) and not all( + isinstance(ele, int) for ele in ele[1]): raise TypeError( "var dtype mismatch error: var name = %s, var type in var_list = %s, while var in data_generator contains non-int value, which is %s \n" "Please check if order of var_list and data_generator are aligned. \n" @@ -583,15 +583,14 @@ class InMemoryDataset(DatasetBase): pipe_command = kwargs.get("pipe_command", "cat") download_cmd = kwargs.get("download_cmd", "cat") - super(InMemoryDataset, self).init( - batch_size=batch_size, - thread_num=thread_num, - use_var=use_var, - pipe_command=pipe_command, - input_type=input_type, - fs_name=fs_name, - fs_ugi=fs_ugi, - download_cmd=download_cmd) + super(InMemoryDataset, self).init(batch_size=batch_size, + thread_num=thread_num, + use_var=use_var, + pipe_command=pipe_command, + input_type=input_type, + fs_name=fs_name, + fs_ugi=fs_ugi, + download_cmd=download_cmd) data_feed_type = kwargs.get("data_feed_type", "MultiSlotInMemoryDataFeed") @@ -779,8 +778,9 @@ class InMemoryDataset(DatasetBase): def _generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num, consume_thread_num, shard_num): - self.dataset.generate_local_tables_unlock( - table_id, fea_dim, read_thread_num, consume_thread_num, shard_num) + self.dataset.generate_local_tables_unlock(table_id, fea_dim, + read_thread_num, + consume_thread_num, shard_num) def set_date(self, date): """ diff --git a/python/paddle/distributed/fleet/dataset/index_dataset.py b/python/paddle/distributed/fleet/dataset/index_dataset.py index c4c424fe2dc..8b5a9c5a45b 100644 --- a/python/paddle/distributed/fleet/dataset/index_dataset.py +++ b/python/paddle/distributed/fleet/dataset/index_dataset.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,11 +17,13 @@ __all__ = [] class Index(object): + def __init__(self, name): self._name = name class TreeIndex(Index): + def __init__(self, name, path): super(TreeIndex, self).__init__(name) self._wrapper = core.IndexWrapper() diff --git a/python/paddle/distributed/fleet/elastic/__init__.py b/python/paddle/distributed/fleet/elastic/__init__.py index 503d2966a80..b80a66c6f01 100644 --- a/python/paddle/distributed/fleet/elastic/__init__.py +++ b/python/paddle/distributed/fleet/elastic/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/fleet/elastic/collective.py b/python/paddle/distributed/fleet/elastic/collective.py index de350e15d35..f27987571d8 100644 --- a/python/paddle/distributed/fleet/elastic/collective.py +++ b/python/paddle/distributed/fleet/elastic/collective.py @@ -23,6 +23,7 @@ from paddle.distributed.fleet.elastic.manager import LauncherInterface class CollectiveLauncher(LauncherInterface): + def __init__(self, args): self.args = args self.procs = [] diff --git a/python/paddle/distributed/fleet/elastic/manager.py b/python/paddle/distributed/fleet/elastic/manager.py index 1716e332c82..e0a6bd81c8e 100644 --- a/python/paddle/distributed/fleet/elastic/manager.py +++ b/python/paddle/distributed/fleet/elastic/manager.py @@ -59,6 +59,7 @@ class ElasticStatus: class LauncherInterface(object): + def __init__(self, args): self.args = args self.procs = [] @@ -109,8 +110,8 @@ class LauncherInterface(object): return ret logger.error("ABORT!!! ABORT!!! ABORT!!!") logger.error( - "ERROR rank {} error with exit code {}, check log for detail.". - format(p.rank, ret)) + "ERROR rank {} error with exit code {}, check log for detail." + .format(p.rank, ret)) result = ret if not alive and result is None: return 0 @@ -128,6 +129,7 @@ class LauncherInterface(object): class ElasticManager(object): + def __init__(self, args, etcd_client): self.args = args @@ -238,12 +240,13 @@ class ElasticManager(object): ] self.hosts = list(set(self.hosts)) if self.hosts else self.hosts logger.info( - f"host_call_back curr_host={self.curr_host}, hosts:{self.hosts}") + f"host_call_back curr_host={self.curr_host}, hosts:{self.hosts}" + ) self.need_sync = True self.elastic_startup_time = None - host_watch = self.etcd.add_watch_prefix_callback(self.node_prefix, - host_call_back) + host_watch = self.etcd.add_watch_prefix_callback( + self.node_prefix, host_call_back) host_lease = self.etcd.lease(elastic_ttl) # register etcd lease heartbeat @@ -267,13 +270,15 @@ class ElasticManager(object): six.b(self.curr_host), lease=host_lease) except Exception as e: - logger.error("[lease_heartbeat] internal error:{} {}". - format(e, traceback.format_exc())) + logger.error( + "[lease_heartbeat] internal error:{} {}".format( + e, traceback.format_exc())) break time.sleep(elastic_ttl / 3) - keepalived_thread = threading.Thread( - name='lease_heartbeat', target=lease_heartbeat, daemon=True) + keepalived_thread = threading.Thread(name='lease_heartbeat', + target=lease_heartbeat, + daemon=True) keepalived_thread.start() self.etcd.put(self.host_path, six.b(self.curr_host), lease=host_lease) @@ -300,7 +305,7 @@ class ElasticManager(object): def _host_to_endpoints(self, ip_port_list: list, devices_per_proc: list, - start_port: int=6170) -> str: + start_port: int = 6170) -> str: endpoint_list = [] for ip_port in ip_port_list: endpoints = ip_port.split(":") @@ -343,12 +348,11 @@ class ElasticManager(object): return logger.info("execute pre_hook...") current_env = copy.copy(os.environ.copy()) - out, err = subprocess.Popen( - self.args.elastic_pre_hook, - env=current_env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True).communicate() + out, err = subprocess.Popen(self.args.elastic_pre_hook, + env=current_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True).communicate() if err: logger.warn("pre_hook exec failed") else: @@ -390,7 +394,7 @@ class ElasticManager(object): return int(self.etcd.get(self.prefix)[0]) == 1 - def _match(self, host_list: list=None): + def _match(self, host_list: list = None): if host_list: self.hosts = host_list else: @@ -449,7 +453,7 @@ class ElasticManager(object): logger.info("update env PADDLE_TRAINERS {} ".format(self.trainers)) return - # fault tolerance + # fault tolerance idx = self.hosts.index(self.curr_host) # swap if self.host not in the right position @@ -490,7 +494,7 @@ class ElasticManager(object): ) # If scale in node from the first of the rank list, you need to minimize the movement of the rank - # eg: + # eg: # the source trainers is:10.10.10.0,10.10.10.1,10.10.10.2,10.10.10.3 # 10.10.10.0 is removed # the new trainers is:10.10.10.3,10.10.10.1,10.10.10.2 @@ -557,8 +561,8 @@ class ElasticManager(object): logger.info('ready with hosts {}'.format(self.hosts)) self._update_hosts() return - logger.info('not ready for np {} with hosts {}'.format(self.np, - self.hosts)) + logger.info('not ready for np {} with hosts {}'.format( + self.np, self.hosts)) idx += 1 time.sleep(2) return diff --git a/python/paddle/distributed/fleet/fleet_executor_utils.py b/python/paddle/distributed/fleet/fleet_executor_utils.py index 67b4b5e8fe2..0e3a95397e3 100644 --- a/python/paddle/distributed/fleet/fleet_executor_utils.py +++ b/python/paddle/distributed/fleet/fleet_executor_utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -54,11 +54,10 @@ class TaskNode: if ops is not None: assert role is not None and task_id is not None, \ "If init task node with ops, should provide `role` and `task_id`." - self.node = core.TaskNode(role, ops, cur_rank, - int(task_id), max_run_times, - max_slot_times) - print("Creating task node by ops. The role is:", - self.role(), "and the id is:", self.task_id()) + self.node = core.TaskNode(role, ops, cur_rank, int(task_id), + max_run_times, max_slot_times) + print("Creating task node by ops. The role is:", self.role(), + "and the id is:", self.task_id()) else: self.program = program self.node = core.TaskNode(program.desc, cur_rank, max_run_times, @@ -218,39 +217,35 @@ def run1f1b(program, cur_rank, max_run_times, dist_opt, nrank): # Create task nodes. # The lr_sched and opt should be 'amplifier interceptor. # The fwd and bwd should be 'compute interceptor'. - lr_task_node = TaskNode( - cur_rank=cur_rank, - max_run_times=max_run_times, - max_slot_times=max_slot_times, - role=int(OpRole.Optimize.LRSched), - ops=lr_ops, - task_id=int(cur_rank * num_of_functionality + 0), - node_type="Amplifier") + lr_task_node = TaskNode(cur_rank=cur_rank, + max_run_times=max_run_times, + max_slot_times=max_slot_times, + role=int(OpRole.Optimize.LRSched), + ops=lr_ops, + task_id=int(cur_rank * num_of_functionality + 0), + node_type="Amplifier") lr_task_node.set_run_pre_steps(max_run_times) - fwd_task_node = TaskNode( - cur_rank=cur_rank, - max_run_times=max_run_times, - max_slot_times=max_slot_times, - role=int(OpRole.Forward), - ops=fwd_ops, - task_id=int(cur_rank * num_of_functionality + 1), - node_type="Compute") - bwd_task_node = TaskNode( - cur_rank=cur_rank, - max_run_times=max_run_times, - max_slot_times=max_slot_times, - role=int(OpRole.Backward), - ops=bwd_ops, - task_id=int(cur_rank * num_of_functionality + 2), - node_type="Compute") - opt_task_node = TaskNode( - cur_rank=cur_rank, - max_run_times=max_run_times, - max_slot_times=max_slot_times, - role=int(OpRole.Optimize), - ops=opt_ops, - task_id=int(cur_rank * num_of_functionality + 3), - node_type="Amplifier") + fwd_task_node = TaskNode(cur_rank=cur_rank, + max_run_times=max_run_times, + max_slot_times=max_slot_times, + role=int(OpRole.Forward), + ops=fwd_ops, + task_id=int(cur_rank * num_of_functionality + 1), + node_type="Compute") + bwd_task_node = TaskNode(cur_rank=cur_rank, + max_run_times=max_run_times, + max_slot_times=max_slot_times, + role=int(OpRole.Backward), + ops=bwd_ops, + task_id=int(cur_rank * num_of_functionality + 2), + node_type="Compute") + opt_task_node = TaskNode(cur_rank=cur_rank, + max_run_times=max_run_times, + max_slot_times=max_slot_times, + role=int(OpRole.Optimize), + ops=opt_ops, + task_id=int(cur_rank * num_of_functionality + 3), + node_type="Amplifier") opt_task_node.set_run_pre_steps(max_run_times) opt_task_node.set_run_at_offset(max_run_times - 1) task_nodes = [lr_task_node, fwd_task_node, bwd_task_node, opt_task_node] @@ -318,8 +313,10 @@ def origin(program, cur_rank): task_id_to_rank (dict): a fake dict, since there is no upstream or downstream, this dict won't be used """ print("fleet executor will use python side origin scheduler.") - task_node = TaskNode( - program=program, cur_rank=cur_rank, max_run_times=1, max_slot_times=1) + task_node = TaskNode(program=program, + cur_rank=cur_rank, + max_run_times=1, + max_slot_times=1) task_node.set_type("Compute") task_id = task_node.task_id() task_id_to_rank = {task_id: cur_rank} diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 343cca7f4f0..583043c186a 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -166,13 +166,12 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ) base_group.add_argument("--selected_mlus", dest="mlus") - base_group.add_argument( - "training_script", - type=str, - help="The full path to the single GPU training " - "program/script to be launched in parallel, " - "followed by all the arguments for the " - "training script") + base_group.add_argument("training_script", + type=str, + help="The full path to the single GPU training " + "program/script to be launched in parallel, " + "followed by all the arguments for the " + "training script") base_group.add_argument('training_script_args', nargs=REMAINDER) @@ -204,10 +203,14 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ps_group = parser.add_argument_group("Parameter-Server Parameters") # for parameter server - ps_group.add_argument( - "--servers", type=str, default="", help="User defined servers ip:port") - ps_group.add_argument( - "--workers", type=str, default="", help="User defined workers ip:port") + ps_group.add_argument("--servers", + type=str, + default="", + help="User defined servers ip:port") + ps_group.add_argument("--workers", + type=str, + default="", + help="User defined workers ip:port") ps_group.add_argument( "--heter_workers", type=str, @@ -221,26 +224,30 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ps_group.add_argument("--worker_num", type=int, help="number of workers") ps_group.add_argument("--server_num", type=int, help="number of servers") - ps_group.add_argument( - "--heter_worker_num", - type=str, - help="number of heter_workers in each stage 1;2;3") + ps_group.add_argument("--heter_worker_num", + type=str, + help="number of heter_workers in each stage 1;2;3") ps_group.add_argument("--http_port", type=int, help="Gloo http Port") # parameter elastic mode elastic_group = parser.add_argument_group("Elastic Parameters") - elastic_group.add_argument( - "--elastic_server", type=str, help="etcd server host:port") - elastic_group.add_argument( - "--elastic_pre_hook", type=str, help="elastic pre_hook shell cmd") + elastic_group.add_argument("--elastic_server", + type=str, + help="etcd server host:port") + elastic_group.add_argument("--elastic_pre_hook", + type=str, + help="elastic pre_hook shell cmd") elastic_group.add_argument("--job_id", type=str, help="job unique id") elastic_group.add_argument("--np", type=int, help="job pod/node number") elastic_group.add_argument("--scale", type=int, default=0, help="scale np") - elastic_group.add_argument( - "--host", type=str, help="bind host, default to POD_IP env") - elastic_group.add_argument( - "--force", type=bool, default=False, help="update np force") + elastic_group.add_argument("--host", + type=str, + help="bind host, default to POD_IP env") + elastic_group.add_argument("--force", + type=bool, + default=False, + help="update np force") known_args, _ = parser.parse_known_args() return known_args @@ -351,15 +358,16 @@ def get_cluster_info(args): cluster, pod = launch_utils.get_mapped_cluster_from_args_with_rank_mapping( args, device_mode) elif cloud_utils.use_paddlecloud() and trainers_num != 1: - cluster, pod = cloud_utils.get_cloud_cluster( - args.ips, device_mode, devices_per_proc, start_port) + cluster, pod = cloud_utils.get_cloud_cluster(args.ips, device_mode, + devices_per_proc, + start_port) logger.debug("get cluster from cloud:{}".format(cluster)) elif device_mode == DeviceMode.ASCEND_NPU: # for ascend - cluster, pod = ascend_utils.get_cloud_cluster( - rank_table_file=os.getenv("RANK_TABLE_FILE", None), - device_mode=device_mode, - start_port=start_port) + cluster, pod = ascend_utils.get_cloud_cluster(rank_table_file=os.getenv( + "RANK_TABLE_FILE", None), + device_mode=device_mode, + start_port=start_port) else: # trainers_num = 1 or not use paddlecloud ips="a,b" cluster, pod = get_cluster_from_args(args, device_mode, @@ -383,13 +391,12 @@ def launch_collective(args): cluster, pod = get_cluster_info(args) global_envs = get_global_envs(args, tmp_dir) - procs = start_local_trainers( - cluster, - pod, - training_script=args.training_script, - training_script_args=args.training_script_args, - log_dir=args.log_dir, - envs=global_envs) + procs = start_local_trainers(cluster, + pod, + training_script=args.training_script, + training_script_args=args.training_script_args, + log_dir=args.log_dir, + envs=global_envs) for idx, proc in enumerate(procs): print("launch proc_id:{} idx:{}".format(proc.proc.pid, idx)) @@ -492,16 +499,17 @@ def which_distributed_mode(args): if len(has_ps_args) > 0: logger.info( - "Run parameter-sever mode. pserver arguments:{}, accelerators count:{}". - format(has_ps_args, accelerators)) + "Run parameter-sever mode. pserver arguments:{}, accelerators count:{}" + .format(has_ps_args, accelerators)) has_ps_heter_args = list(set(has_ps_args) & set(ps_heter_args)) if len(has_ps_heter_args) > 0: return DistributeMode.PS_HETER else: return DistributeMode.PS elif len(has_collective_args) > 0: - logger.info("Run collective mode. gpu arguments:{}, cuda count:{}". - format(has_collective_args, accelerators)) + logger.info( + "Run collective mode. gpu arguments:{}, cuda count:{}".format( + has_collective_args, accelerators)) return DistributeMode.COLLECTIVE else: if not fluid.core.is_compiled_with_cuda( diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 2dec58c7538..e10709416f8 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -33,6 +33,7 @@ import paddle import paddle.fluid as fluid from distutils.util import strtobool import paddle.utils.cpp_extension.extension_utils as utils + logger = logging.getLogger("root") logger.propagate = False @@ -61,6 +62,7 @@ class DeviceMode(): class Cluster(object): + def __init__(self, hdfs): self.job_server = None self.pods = [] @@ -130,6 +132,7 @@ class Cluster(object): class JobServer(object): + def __init__(self): self.endpoint = None @@ -144,6 +147,7 @@ class JobServer(object): class Trainer(object): + def __init__(self): self.accelerators = [] self.endpoint = None @@ -176,6 +180,7 @@ class Trainer(object): class Pod(object): + def __init__(self): self.rank = None self.id = None @@ -191,10 +196,10 @@ class Pod(object): def __str__(self): return "rank:{} id:{} addr:{} port:{} visible_accelerator:{} trainers:{} servers:{} \ workers:{} heter_workers:{}".format( - self.rank, self.id, self.addr, self.port, self.accelerators, [ - str(t) for t in self.trainers - ], [str(s) for s in self.servers], [str(w) for w in self.workers], - [str(h) for h in self.heter_workers]) + self.rank, self.id, self.addr, self.port, self.accelerators, + [str(t) for t in self.trainers], [str(s) for s in self.servers], + [str(w) + for w in self.workers], [str(h) for h in self.heter_workers]) def __eq__(self, pod): if self.rank != pod.rank or \ @@ -367,15 +372,15 @@ def add_arguments(argname, type, default, help, argparser, **kwargs): args = parser.parse_args() """ type = strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) + argparser.add_argument("--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) def find_free_ports(num): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: # Note(wangxi): Close the connection with a TCP RST instead @@ -424,8 +429,8 @@ def pretty_print_envs(envs, header=None): for k, v in envs.items(): max_k = max(max_k, len(k)) - h_format = " " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(max_k, " " * spacing, - max_v) + h_format = " " + "|{{:>{}s}}{}{{:^{}s}}|\n".format( + max_k, " " * spacing, max_v) l_format = " " + "|{{:>{}s}}{{}}{{:^{}s}}|\n".format(max_k, max_v) length = max_k + max_v + spacing @@ -457,6 +462,7 @@ def pretty_print_envs(envs, header=None): class TrainerProc(object): + def __init__(self): self.proc = None self.log_fn = None @@ -502,14 +508,20 @@ def start_local_trainers(cluster, procs = [] for idx, t in enumerate(pod.trainers): proc_env = { - "PADDLE_TRAINER_ID": "%d" % t.rank, - "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, - "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), - "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), - "PADDLE_RANK_IN_NODE": str(idx), + "PADDLE_TRAINER_ID": + "%d" % t.rank, + "PADDLE_CURRENT_ENDPOINT": + "%s" % t.endpoint, + "PADDLE_TRAINERS_NUM": + "%d" % cluster.trainers_nranks(), + "PADDLE_TRAINER_ENDPOINTS": + ",".join(cluster.trainers_endpoints()), + "PADDLE_RANK_IN_NODE": + str(idx), "PADDLE_LOCAL_DEVICE_IDS": ",".join([str(acc) for acc in t.accelerators]), - "PADDLE_WORLD_DEVICE_IDS": ",".join(res), + "PADDLE_WORLD_DEVICE_IDS": + ",".join(res), } # The following three environnement variables are used for auto mapping @@ -527,8 +539,8 @@ def start_local_trainers(cluster, proc_env["FLAGS_selected_gpus"] = "%s" % ",".join( [str(g) for g in t.accelerators]) - elif len(t. - accelerators) > 0 and pod.device_mode == DeviceMode.ASCEND_NPU: + elif len(t.accelerators + ) > 0 and pod.device_mode == DeviceMode.ASCEND_NPU: proc_env["FLAGS_selected_npus"] = "%s" % ",".join( [str(g) for g in t.accelerators]) elif len(t.accelerators) > 0 and pod.device_mode == DeviceMode.MLU: @@ -558,8 +570,8 @@ def start_local_trainers(cluster, logger.info("Local start {} processes. First process distributed " "environment info (Only For Debug): {}".format( len(pod.trainers), - pretty_print_envs(proc_env, ("Distributed Envs", - "Value")))) + pretty_print_envs(proc_env, + ("Distributed Envs", "Value")))) logger.info( "details about PADDLE_TRAINER_ENDPOINTS can be found in " "{}/endpoints.log, and detail running logs maybe found in " @@ -578,8 +590,11 @@ def start_local_trainers(cluster, fn = open("%s/prelaunchlog.%d" % (log_dir, idx), "a") else: fn = open("%s/workerlog.%d" % (log_dir, idx), "a") - proc = subprocess.Popen( - cmd, env=current_env, stdout=fn, stderr=fn, preexec_fn=pre_fn) + proc = subprocess.Popen(cmd, + env=current_env, + stdout=fn, + stderr=fn, + preexec_fn=pre_fn) else: proc = subprocess.Popen(cmd, env=current_env, preexec_fn=pre_fn) @@ -638,14 +653,14 @@ def watch_local_trainers(procs, nranks): return except SystemExit: logger.error( - "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log.". - format(nranks, error_rank)) + "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log." + .format(nranks, error_rank)) terminate_local_procs(procs) return except: logger.error( - "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log.". - format(nranks, error_rank)) + "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log." + .format(nranks, error_rank)) terminate_local_procs(procs) return @@ -941,8 +956,9 @@ def get_custom_endpoints(origin_endpoints, offset=0): # pretty_print_envs(environs))) -def get_mapped_cluster_without_rank_mapping( - node_ips, node_ip, trainer_endpoints, device_mode, node_ranks): +def get_mapped_cluster_without_rank_mapping(node_ips, node_ip, + trainer_endpoints, device_mode, + node_ranks): assert type(trainer_endpoints) is list, "trainer_endpoints must be list" assert device_mode == DeviceMode.GPU, \ "Only support get mapped cluster for gpu now." @@ -1000,8 +1016,9 @@ def get_mapped_cluster_from_args_without_rank_mapping(args, device_mode): "ranks length should be equal to ips length." logger.debug("parsed from args: node_ips:{} node_ip:{} " - "node_rank:{} node_ranks:{}".format( - node_ips, node_ip, node_rank, node_ranks[node_rank])) + "node_rank:{} node_ranks:{}".format(node_ips, node_ip, + node_rank, + node_ranks[node_rank])) # NOTE: there are different number of global mapped ranks on each node. free_ports = [] @@ -1011,23 +1028,22 @@ def get_mapped_cluster_from_args_without_rank_mapping(args, device_mode): if os.environ.get('PADDLE_PORT') is not None: start_port = int(os.getenv("PADDLE_PORT", "")) free_ports = [ - x - for x in range(start_port, start_port + len(node_ranks[ - node_rank])) + x for x in range(start_port, start_port + + len(node_ranks[node_rank])) ] elif os.environ.get('FLAGS_START_PORT') is not None: start_port = int(os.environ.get('FLAGS_START_PORT')) free_ports = [ - x - for x in range(start_port, start_port + len(node_ranks[ - node_rank])) + x for x in range(start_port, start_port + + len(node_ranks[node_rank])) ] else: free_ports = find_free_ports(len(node_ranks[node_rank])) trainer_endpoints.append(["%s:%d" % (ip, port) for port in free_ports]) - return get_mapped_cluster_without_rank_mapping( - node_ips, node_ip, trainer_endpoints, device_mode, node_ranks) + return get_mapped_cluster_without_rank_mapping(node_ips, node_ip, + trainer_endpoints, + device_mode, node_ranks) def get_mapped_cluster_with_rank_mapping(node_ips, node_ip, trainer_endpoints, @@ -1066,8 +1082,8 @@ def get_mapped_cluster_with_rank_mapping(node_ips, node_ip, trainer_endpoints, ranks_per_node[i])] assert len(local_device_ids) == 1, \ "Only support one process to one device mapping" - trainer.accelerators.append( - get_relative_gpu_id(local_device_ids[0])) + trainer.accelerators.append(get_relative_gpu_id( + local_device_ids[0])) trainer.endpoint = "%s" % (cur_node_endpoints[i]) trainer.rank = ranks_per_node[i] pod.trainers.append(trainer) @@ -1121,8 +1137,9 @@ def get_mapped_cluster_from_args_with_rank_mapping(args, device_mode): "ranks length should be equal to ips length." logger.debug("parsed from args: node_ips:{} node_ip:{} " - "node_rank:{} node_ranks:{}".format( - node_ips, node_ip, node_rank, node_ranks[node_rank])) + "node_rank:{} node_ranks:{}".format(node_ips, node_ip, + node_rank, + node_ranks[node_rank])) # NOTE: there are different number of global mapped ranks on each node. free_ports = [] @@ -1132,16 +1149,14 @@ def get_mapped_cluster_from_args_with_rank_mapping(args, device_mode): if os.environ.get('PADDLE_PORT') is not None: start_port = int(os.getenv("PADDLE_PORT", "")) free_ports = [ - x - for x in range(start_port, start_port + len(node_ranks[ - node_rank])) + x for x in range(start_port, start_port + + len(node_ranks[node_rank])) ] elif os.environ.get('FLAGS_START_PORT') is not None: start_port = int(os.environ.get('FLAGS_START_PORT')) free_ports = [ - x - for x in range(start_port, start_port + len(node_ranks[ - node_rank])) + x for x in range(start_port, start_port + + len(node_ranks[node_rank])) ] else: free_ports = find_free_ports(len(node_ranks[node_rank])) @@ -1153,6 +1168,7 @@ def get_mapped_cluster_from_args_with_rank_mapping(args, device_mode): class ParameterServerLauncher(object): + def __init__(self, args, distribute_mode): self.args = args self.distribute_mode = distribute_mode @@ -1234,8 +1250,9 @@ class ParameterServerLauncher(object): # create endpoints str worker_endpoints = [] for i in range(self.worker_num): - worker_endpoints.append(":".join((worker_endpoints_ips[ - i], str(worker_endpoints_port[i])))) + worker_endpoints.append(":".join( + (worker_endpoints_ips[i], + str(worker_endpoints_port[i])))) self.worker_endpoints = ",".join(worker_endpoints) else: self.worker_endpoints = args.workers @@ -1287,13 +1304,14 @@ class ParameterServerLauncher(object): if 1 in heter_worker_endpoints_len: # if no port value in heter_worker_endpoint, will set default port values. heter_worker_endpoints_port = get_ports( - len(heter_worker_endpoints_ips), self.worker_num - + self.server_num + self.heter_worker_num) + len(heter_worker_endpoints_ips), + self.worker_num + self.server_num + + self.heter_worker_num) new_heter_worker_endpoints = [] for j in range(len(heter_worker_endpoints_ips)): - new_heter_worker_endpoints.append(":".join(( - heter_worker_endpoints_ips[j], str( - heter_worker_endpoints_port[j])))) + new_heter_worker_endpoints.append(":".join( + (heter_worker_endpoints_ips[j], + str(heter_worker_endpoints_port[j])))) ip_port_list = ",".join(new_heter_worker_endpoints) else: ip_port_list = ",".join(heter_worker_endpoints) @@ -1307,9 +1325,9 @@ class ParameterServerLauncher(object): else: for i in range(len(self.stage_heter_trainer_num)): heter_trainer_num = self.stage_heter_trainer_num[i] - ports = get_ports(heter_trainer_num, - self.server_num + self.worker_num + - self.heter_worker_num) + ports = get_ports( + heter_trainer_num, self.server_num + + self.worker_num + self.heter_worker_num) ip_port_list = ",".join( ["127.0.0.1:" + str(x) for x in ports]) self.stage_heter_map[i + 2] = ip_port_list @@ -1344,9 +1362,9 @@ class ParameterServerLauncher(object): new_heter_worker_endpoints = [] for j in range(len(heter_worker_endpoints_ips)): - new_heter_worker_endpoints.append(":".join(( - heter_worker_endpoints_ips[j], str( - heter_worker_endpoints_port[j])))) + new_heter_worker_endpoints.append(":".join( + (heter_worker_endpoints_ips[j], + str(heter_worker_endpoints_port[j])))) ip_port_list = ",".join(new_heter_worker_endpoints) else: ip_port_list = ",".join(heter_worker_endpoints) @@ -1480,8 +1498,8 @@ class ParameterServerLauncher(object): self.start_pod_heter_worker(self.args, pod) logger.info( - "Please check servers, workers and heter_worker logs in {}/workerlog.*, {}/serverlog.* and {}/heterlog.*". - format(self.args.log_dir, self.args.log_dir, self.args.log_dir)) + "Please check servers, workers and heter_worker logs in {}/workerlog.*, {}/serverlog.* and {}/heterlog.*" + .format(self.args.log_dir, self.args.log_dir, self.args.log_dir)) # 4. wait for finish training if len(self.procs["worker"]) > 0: @@ -1536,8 +1554,7 @@ class ParameterServerLauncher(object): "TRAINING_ROLE": "PSERVER", "PADDLE_TRAINERS_NUM": str(self.worker_num), "POD_IP": cur_server.endpoint.split(":")[0], - "PADDLE_WITH_GLOO": - str(os.getenv("PADDLE_WITH_GLOO", "0")), + "PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")), "PADDLE_GLOO_RENDEZVOUS": "3", "PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir, "PADDLE_GLOO_HTTP_ENDPOINT": self.http_port @@ -1550,8 +1567,7 @@ class ParameterServerLauncher(object): "TRAINING_ROLE": "PSERVER", "PADDLE_TRAINERS_NUM": str(self.worker_num), "POD_IP": cur_server.endpoint.split(":")[0], - "PADDLE_WITH_GLOO": - str(os.getenv("PADDLE_WITH_GLOO", "0")), + "PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")), "PADDLE_GLOO_RENDEZVOUS": "3", "PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir, "PADDLE_GLOO_HTTP_ENDPOINT": self.http_port @@ -1567,15 +1583,17 @@ class ParameterServerLauncher(object): "Local server start {} processes. First process distributed " "environment info (Only For Debug): {}".format( len(pod.servers), - pretty_print_envs(proc_env, ("Distributed Envs", "Value" - )))) + pretty_print_envs(proc_env, + ("Distributed Envs", "Value")))) if args.log_dir is not None: os.system("mkdir -p {}".format(args.log_dir)) fn = open("%s/serverlog.%d" % (args.log_dir, idx), "w") self.log_fns["server"].append(fn) - proc = subprocess.Popen( - cmd, env=current_env, stdout=fn, stderr=fn) + proc = subprocess.Popen(cmd, + env=current_env, + stdout=fn, + stderr=fn) else: proc = subprocess.Popen(cmd, env=current_env) @@ -1605,35 +1623,54 @@ class ParameterServerLauncher(object): device_list = [str(x) for x in range(0, heter_device_num)] for idx, cur_worker in enumerate(pod.workers): - device_id = "0" if heter_device_num == 0 else str(device_list[( - idx) % heter_device_num]) + device_id = "0" if heter_device_num == 0 else str( + device_list[(idx) % heter_device_num]) if self.distribute_mode == DistributeMode.PS_HETER: proc_env = { - "PADDLE_PSERVERS_IP_PORT_LIST": self.server_endpoints, - "PADDLE_TRAINER_ENDPOINTS": self.worker_endpoints, - "PADDLE_TRAINERS_NUM": str(self.worker_num), - "PADDLE_STAGE_TRAINERS_NUM": str(self.stage_trainer_num), - "STAGE_ID": "1", - "STAGE_NUM": str(self.stage_num), - "PADDLE_PREVIOUS_HETER_TRAINER_IP_PORT_LIST": "", + "PADDLE_PSERVERS_IP_PORT_LIST": + self.server_endpoints, + "PADDLE_TRAINER_ENDPOINTS": + self.worker_endpoints, + "PADDLE_TRAINERS_NUM": + str(self.worker_num), + "PADDLE_STAGE_TRAINERS_NUM": + str(self.stage_trainer_num), + "STAGE_ID": + "1", + "STAGE_NUM": + str(self.stage_num), + "PADDLE_PREVIOUS_HETER_TRAINER_IP_PORT_LIST": + "", "PADDLE_NEXT_HETER_TRAINER_IP_PORT_LIST": self.stage_heter_map[2], "PADDLE_ALL_HETER_TRAINER_IP_PORT_LIST": self.heter_worker_endpoints, - "HETER_DEVICE_TYPE": self.stage_device_map[1], - "TRAINING_ROLE": "TRAINER", - "POD_IP": cur_worker.endpoint.split(":")[0], - "PADDLE_PORT": cur_worker.endpoint.split(":")[1], - "PADDLE_TRAINER_ID": str(cur_worker.rank), + "HETER_DEVICE_TYPE": + self.stage_device_map[1], + "TRAINING_ROLE": + "TRAINER", + "POD_IP": + cur_worker.endpoint.split(":")[0], + "PADDLE_PORT": + cur_worker.endpoint.split(":")[1], + "PADDLE_TRAINER_ID": + str(cur_worker.rank), "PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")), - "PADDLE_GLOO_RENDEZVOUS": "3", - "PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir, - "FLAGS_selected_gpus": "0", - "FLAGS_selected_xpus": "0", - "CUDA_VISIBLE_DEVICES": device_id, - "XPU_VISIBLE_DEVICES": device_id, - "PADDLE_GLOO_HTTP_ENDPOINT": self.http_port + "PADDLE_GLOO_RENDEZVOUS": + "3", + "PADDLE_GLOO_FS_PATH": + self.gloo_rendezvous_dir, + "FLAGS_selected_gpus": + "0", + "FLAGS_selected_xpus": + "0", + "CUDA_VISIBLE_DEVICES": + device_id, + "XPU_VISIBLE_DEVICES": + device_id, + "PADDLE_GLOO_HTTP_ENDPOINT": + self.http_port } else: proc_env = { @@ -1644,8 +1681,7 @@ class ParameterServerLauncher(object): "POD_IP": cur_worker.endpoint.split(":")[0], "PADDLE_PORT": cur_worker.endpoint.split(":")[1], "PADDLE_TRAINER_ID": str(cur_worker.rank), - "PADDLE_WITH_GLOO": - str(os.getenv("PADDLE_WITH_GLOO", "0")), + "PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")), "PADDLE_GLOO_RENDEZVOUS": "3", "PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir, "FLAGS_selected_gpus": "0", @@ -1665,15 +1701,17 @@ class ParameterServerLauncher(object): "Local worker start {} processes. First process distributed " "environment info (Only For Debug): {}".format( len(pod.workers), - pretty_print_envs(proc_env, ("Distributed Envs", "Value" - )))) + pretty_print_envs(proc_env, + ("Distributed Envs", "Value")))) if args.log_dir is not None: os.system("mkdir -p {}".format(args.log_dir)) fn = open("%s/workerlog.%d" % (args.log_dir, idx), "w") self.log_fns["worker"].append(fn) - proc = subprocess.Popen( - cmd, env=current_env, stdout=fn, stderr=fn) + proc = subprocess.Popen(cmd, + env=current_env, + stdout=fn, + stderr=fn) else: proc = subprocess.Popen(cmd, env=current_env) @@ -1703,12 +1741,14 @@ class ParameterServerLauncher(object): device_list = [str(x) for x in range(0, heter_device_num)] for idx, cur_heter_worker in enumerate(pod.heter_workers): - device_id = "0" if heter_device_num == 0 else str(device_list[( - idx) % heter_device_num]) + device_id = "0" if heter_device_num == 0 else str( + device_list[(idx) % heter_device_num]) stage_id = cur_heter_worker.stage proc_env = { - "PADDLE_PSERVERS_IP_PORT_LIST": self.server_endpoints, - "PADDLE_TRAINER_ENDPOINTS": self.worker_endpoints, + "PADDLE_PSERVERS_IP_PORT_LIST": + self.server_endpoints, + "PADDLE_TRAINER_ENDPOINTS": + self.worker_endpoints, "PADDLE_NEXT_HETER_TRAINER_IP_PORT_LIST": self.stage_heter_map[stage_id + 1] if stage_id <= self.stage_num - 1 else "", @@ -1716,22 +1756,38 @@ class ParameterServerLauncher(object): self.stage_heter_map[stage_id - 1], "PADDLE_ALL_HETER_TRAINER_IP_PORT_LIST": self.heter_worker_endpoints, - "HETER_DEVICE_TYPE": self.stage_device_map[stage_id], - "STAGE_ID": str(stage_id), - "STAGE_NUM": str(self.stage_num), - "PADDLE_PORT": cur_heter_worker.endpoint.split(":")[1], - "TRAINING_ROLE": "HETER_TRAINER", - "PADDLE_TRAINERS_NUM": str(self.worker_num), - "PADDLE_STAGE_TRAINERS_NUM": str(self.stage_trainer_num), - "POD_IP": cur_heter_worker.endpoint.split(":")[0], - "PADDLE_WITH_GLOO": str(os.getenv("PADDLE_WITH_GLOO", "0")), - "PADDLE_GLOO_RENDEZVOUS": "3", - "PADDLE_GLOO_FS_PATH": self.gloo_rendezvous_dir, - "FLAGS_selected_gpus": "0", - "FLAGS_selected_xpus": "0", - "CUDA_VISIBLE_DEVICES": device_id, - "XPU_VISIBLE_DEVICES": device_id, - "PADDLE_GLOO_HTTP_ENDPOINT": self.http_port + "HETER_DEVICE_TYPE": + self.stage_device_map[stage_id], + "STAGE_ID": + str(stage_id), + "STAGE_NUM": + str(self.stage_num), + "PADDLE_PORT": + cur_heter_worker.endpoint.split(":")[1], + "TRAINING_ROLE": + "HETER_TRAINER", + "PADDLE_TRAINERS_NUM": + str(self.worker_num), + "PADDLE_STAGE_TRAINERS_NUM": + str(self.stage_trainer_num), + "POD_IP": + cur_heter_worker.endpoint.split(":")[0], + "PADDLE_WITH_GLOO": + str(os.getenv("PADDLE_WITH_GLOO", "0")), + "PADDLE_GLOO_RENDEZVOUS": + "3", + "PADDLE_GLOO_FS_PATH": + self.gloo_rendezvous_dir, + "FLAGS_selected_gpus": + "0", + "FLAGS_selected_xpus": + "0", + "CUDA_VISIBLE_DEVICES": + device_id, + "XPU_VISIBLE_DEVICES": + device_id, + "PADDLE_GLOO_HTTP_ENDPOINT": + self.http_port } current_env.update(proc_env) @@ -1744,15 +1800,17 @@ class ParameterServerLauncher(object): "Local heter_worker start {} processes. First process distributed " "environment info (Only For Debug): {}".format( len(pod.heter_workers), - pretty_print_envs(proc_env, ("Distributed Envs", "Value" - )))) + pretty_print_envs(proc_env, + ("Distributed Envs", "Value")))) if args.log_dir is not None: os.system("mkdir -p {}".format(args.log_dir)) fn = open("%s/heterlog.%d" % (args.log_dir, idx), "w") self.log_fns["heter_worker"].append(fn) - proc = subprocess.Popen( - cmd, env=current_env, stdout=fn, stderr=fn) + proc = subprocess.Popen(cmd, + env=current_env, + stdout=fn, + stderr=fn) else: proc = subprocess.Popen(cmd, env=current_env) diff --git a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py index e3a781424e6..78a53ccdba5 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py @@ -18,6 +18,7 @@ __all__ = [] class AMPOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(AMPOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -33,8 +34,9 @@ class AMPOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(AMPOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(AMPOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _init_wrapped_opt(self): if self.wrapped_opt is not None: @@ -103,8 +105,9 @@ class AMPOptimizer(MetaOptimizerBase): return self.wrapped_opt.apply_gradients(params_grads=params_grads) def apply_optimize(self, loss, startup_program, params_grads): - return self.wrapped_opt.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + return self.wrapped_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/__init__.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/__init__.py index b9a7651e449..185a92b8d94 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/__init__.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py index 6282ac7b509..96d83ff4d39 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,6 +28,7 @@ __all__ = [] class AscendIRParser(object): + def __init__(self, auto_dp=False, world_rank_size=1): self.graph_idx = 0 self.hcom_endpoints = {} @@ -45,13 +46,13 @@ class AscendIRParser(object): ret_map[var.name] = ge_input ge_in_operator.append(ge_input) else: # param, learning ... - ge_input = core.GEOperatorFactory.create_operator(var.name, - "Variable") - ge_input.update_output_desc("y", - core.GETensorDesc( - core.GEShape(var.shape), - core.GEFormat.FORMAT_ND, - core.GEDataType.DT_FLOAT)) + ge_input = core.GEOperatorFactory.create_operator( + var.name, "Variable") + ge_input.update_output_desc( + "y", + core.GETensorDesc(core.GEShape(var.shape), + core.GEFormat.FORMAT_ND, + core.GEDataType.DT_FLOAT)) ret_map[var.name] = ge_input return ge_in_operator, ret_map @@ -70,7 +71,7 @@ class AscendIRParser(object): nccl_id = op.output_arg_names[0] # c_gen_nccl_id operator splits endpoints into local endpoint and other_endpoints - # we should combine these together to produce world_rank_ids + # we should combine these together to produce world_rank_ids self.hcom_endpoints[nccl_id] = other_endpoints[:] self.hcom_endpoints[nccl_id].insert(rank, endpoint) @@ -79,8 +80,8 @@ class AscendIRParser(object): elif op.type == 'c_comm_init': nccl_id = op.input_arg_names[0] nranks = op.attr("nranks") - assert nranks == len(self.hcom_endpoints[ - nccl_id]), "nranks doesn't match endpoint count" + assert nranks == len(self.hcom_endpoints[nccl_id] + ), "nranks doesn't match endpoint count" rank = op.attr("rank") ring_id = op.attr("ring_id") @@ -90,8 +91,9 @@ class AscendIRParser(object): for endpoint in self.hcom_endpoints[nccl_id] ] self.groups_to_create.append( - HcomGroupConfig( - name=group_name, nranks=nranks, rank_ids=global_rank_ids)) + HcomGroupConfig(name=group_name, + nranks=nranks, + rank_ids=global_rank_ids)) print("append to create group: %s, with rank_ids: %s" % (group_name, global_rank_ids)) elif op.type in ascend_parser.registerd_op: @@ -121,8 +123,8 @@ class AscendIRParser(object): ge_in_operator, self.var2geop = self._construct_input_map(input_varlist) - self.parser_factory = ascend_parser.AscendParserFactory(graph, - self.var2geop) + self.parser_factory = ascend_parser.AscendParserFactory( + graph, self.var2geop) for i, curop in list(enumerate(block.ops)): self.parse_op(curop) @@ -151,11 +153,10 @@ class AscendIRParser(object): input_varlist = [var for var in input_varlist if var.is_data] - block.append_op( - type="ascend_trigger", - inputs={"FeedList": input_varlist}, - outputs={"FetchList": fetch_list}, - attrs={'graph_idx': self.graph_idx}) + block.append_op(type="ascend_trigger", + inputs={"FeedList": input_varlist}, + outputs={"FetchList": fetch_list}, + attrs={'graph_idx': self.graph_idx}) self.graph_idx += 1 return graph @@ -170,10 +171,10 @@ class AscendIRParser(object): from paddle.distributed import fleet self.groups_to_create.append( - HcomGroupConfig( - name="hcom_group_0", - nranks=fleet.world_size(), - rank_ids=[x for x in range(fleet.world_size())])) + HcomGroupConfig(name="hcom_group_0", + nranks=fleet.world_size(), + rank_ids=[x + for x in range(fleet.world_size())])) return startup_graph, main_graph @@ -181,6 +182,7 @@ class AscendIRParser(object): # AscendOptimizer is a wrapper for basic optimizer now # We will make it part of fleet meta_optimizer in the future class AscendOptimizer(Optimizer): + def __init__(self, optimizer, fetch_list=[]): self.inner_opt = optimizer self.fetch_list = fetch_list @@ -220,8 +222,8 @@ class AscendOptimizer(Optimizer): precision_mode="must_keep_origin_dtype"): minimized = None if self.inner_opt: - minimized = self.inner_opt.minimize( - loss, startup_program=startup_program) + minimized = self.inner_opt.minimize(loss, + startup_program=startup_program) self.ascend_instance = core.AscendInstance() @@ -252,8 +254,8 @@ class AscendOptimizer(Optimizer): self.ascend_instance.init_global_resources() main_block = loss.block - self.parser = AscendIRParser( - auto_dp=auto_dp, world_rank_size=fleet.world_size()) + self.parser = AscendIRParser(auto_dp=auto_dp, + world_rank_size=fleet.world_size()) input_varlist = self._get_input_varlist(main_block.program) diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py index 3a52041dc7e..99c5100b70e 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -20,94 +20,94 @@ from functools import reduce __all__ = [] -registerd_op = {## forwards - "elementwise_add": "AddParser", - "matmul": "MatMulParser", - "mul": "MulParser", - "relu": "ReluParser", - "softmax_with_cross_entropy": "SoftmaxWithCrossEntropyParser", - "shape": "ShapeParser", - "fill_constant": "FillConstantParser", - "reduce_sum": "ReduceSumParser", - "elementwise_mul": "DotMulParser", - "elementwise_div": "DotDivParser", - "elementwise_pow": "DotPowParser", - "elementwise_max": "MaxParser", - "elementwise_min": "MinParser", - "elementwise_sub": "DotSubParser", - "pow": "PowParser", - "gelu": "GeluParser", - "sqrt": "SqrtParser", - "log": "LogParser", - "sum": "SumParser", - "logical_not": "LogicalNotParser", - "gather": "GatherParser", - "scatter": "ScatterParser", - "cast": "CastParser", - "tanh": "TanhParser", - "stack": "StackParser", - "square": "SquareParser", - "unsqueeze2": "UnSqueezeParser", - "assign": "AssignParser", - "softmax": "SoftMaxParser", - "reshape2": "ReshapeParser", - "transpose2": "TransposeParser", - "layer_norm": "LayerNormParser", - "less_than": "LessParser", - "mean": "MeanParser", - "scale": "ScaleParser", - "slice": "SliceParser", - "top_k": "TopkParser", - "accuracy": "AccuracyParser", - #"increment": "IncrementParser", - "lookup_table": "LookupTableParser", - "truncated_gaussian_random": "TruncatedNormalParser", - "c_allgather": "AllGatherParser", - "c_allreduce_sum": "AllReduceSumParser", - "c_allreduce_max": "AllReduceMaxParser", - "c_broadcast": "BroadcastParser", - "c_reduce_scatter": "ReduceScatterParser", - "c_send": "SendParser", - "c_receive": "ReceiveParser", - "uniform_random": "UniformRandomParser", - "range": "RangeParser", - "equal": "EqualParser", - "expand": "ExpandParser", - "squeeze2": "SqueezeParser", - - - ## backwords - "matmul_grad": "MatMulGradParser", - "mul_grad": "MulGradParser", - "relu_grad": "ReluGradParser", - "reduce_sum_grad": "ReduceSumGradParser", - "softmax_with_cross_entropy_grad": "SoftmaxWithCrossEntropyGradParser", - "tanh_grad":"TanhGradParser", - "log_grad":"LogGradParser", - "pow_grad": "PowGradParser", - "sqrt_grad": "SqrtGradParser", - "gelu_grad": "GeluGradParser", - "mean_grad": "MeanGradParser", - 'lookup_table_grad': "LookUpTableGradParser", - "elementwise_mul_grad": "DotMulGradParser", - "elementwise_add_grad": "DotAddGradParser", - "elementwise_div_grad": "DotDivGradParser", - "softmax_grad": "SoftmaxGradParser", - "slice_grad": "SliceGradParser", - "reshape2_grad": "ReshapeGradParser", - "gather_grad": "GatherGradParser", - "transpose2_grad": "TransposeGradParser", - "layer_norm_grad": "LayerNormGradParser", - - ## opt - "sgd": "SGDParser", - #"adam": "AdamParser", - } +registerd_op = { ## forwards + "elementwise_add": "AddParser", + "matmul": "MatMulParser", + "mul": "MulParser", + "relu": "ReluParser", + "softmax_with_cross_entropy": "SoftmaxWithCrossEntropyParser", + "shape": "ShapeParser", + "fill_constant": "FillConstantParser", + "reduce_sum": "ReduceSumParser", + "elementwise_mul": "DotMulParser", + "elementwise_div": "DotDivParser", + "elementwise_pow": "DotPowParser", + "elementwise_max": "MaxParser", + "elementwise_min": "MinParser", + "elementwise_sub": "DotSubParser", + "pow": "PowParser", + "gelu": "GeluParser", + "sqrt": "SqrtParser", + "log": "LogParser", + "sum": "SumParser", + "logical_not": "LogicalNotParser", + "gather": "GatherParser", + "scatter": "ScatterParser", + "cast": "CastParser", + "tanh": "TanhParser", + "stack": "StackParser", + "square": "SquareParser", + "unsqueeze2": "UnSqueezeParser", + "assign": "AssignParser", + "softmax": "SoftMaxParser", + "reshape2": "ReshapeParser", + "transpose2": "TransposeParser", + "layer_norm": "LayerNormParser", + "less_than": "LessParser", + "mean": "MeanParser", + "scale": "ScaleParser", + "slice": "SliceParser", + "top_k": "TopkParser", + "accuracy": "AccuracyParser", + #"increment": "IncrementParser", + "lookup_table": "LookupTableParser", + "truncated_gaussian_random": "TruncatedNormalParser", + "c_allgather": "AllGatherParser", + "c_allreduce_sum": "AllReduceSumParser", + "c_allreduce_max": "AllReduceMaxParser", + "c_broadcast": "BroadcastParser", + "c_reduce_scatter": "ReduceScatterParser", + "c_send": "SendParser", + "c_receive": "ReceiveParser", + "uniform_random": "UniformRandomParser", + "range": "RangeParser", + "equal": "EqualParser", + "expand": "ExpandParser", + "squeeze2": "SqueezeParser", + + ## backwords + "matmul_grad": "MatMulGradParser", + "mul_grad": "MulGradParser", + "relu_grad": "ReluGradParser", + "reduce_sum_grad": "ReduceSumGradParser", + "softmax_with_cross_entropy_grad": "SoftmaxWithCrossEntropyGradParser", + "tanh_grad": "TanhGradParser", + "log_grad": "LogGradParser", + "pow_grad": "PowGradParser", + "sqrt_grad": "SqrtGradParser", + "gelu_grad": "GeluGradParser", + "mean_grad": "MeanGradParser", + 'lookup_table_grad': "LookUpTableGradParser", + "elementwise_mul_grad": "DotMulGradParser", + "elementwise_add_grad": "DotAddGradParser", + "elementwise_div_grad": "DotDivGradParser", + "softmax_grad": "SoftmaxGradParser", + "slice_grad": "SliceGradParser", + "reshape2_grad": "ReshapeGradParser", + "gather_grad": "GatherGradParser", + "transpose2_grad": "TransposeGradParser", + "layer_norm_grad": "LayerNormGradParser", + + ## opt + "sgd": "SGDParser", + #"adam": "AdamParser", +} global_cnt = -1 global_input_cnt = -1 class AscendHelper(object): + def __init__(self): self.dtype2ge_map = { 0: core.GEDataType.DT_BOOL, @@ -141,6 +141,7 @@ class AscendHelper(object): class AscendParserFactory(object): + def __init__(self, graph, var2geop): self.graph = graph self.var2geop = var2geop @@ -154,6 +155,7 @@ class AscendParserFactory(object): class AscendParserBase(object): + def __init__(self, graph, var2geop): self.graph = graph self.var2geop = var2geop @@ -177,11 +179,11 @@ class AscendParserBase(object): assert len(arguments) == len( index_list[output_id] ), "Parser[%s]'s %dth argument number[%d] is not equal to paddle's number[%d]" % ( - self.parser_name, output_id, len(index_list[output_id]), - len(arguments)) + self.parser_name, output_id, len( + index_list[output_id]), len(arguments)) for i in range(len(arguments)): - self.var2geop[arguments[i]] = geop_list[index_list[ - output_id][i]] + self.var2geop[arguments[i]] = geop_list[ + index_list[output_id][i]] for geop in geop_list: self.graph.add_op(geop) @@ -206,22 +208,22 @@ class AscendParserBase(object): return name def _create_ge_tensor(self, shape, dtype, value): - tensor_desc = core.GETensorDesc( - core.GEShape(shape), core.GEFormat.FORMAT_ND, - self.ascend_helper.dtype2ge(dtype)) + tensor_desc = core.GETensorDesc(core.GEShape(shape), + core.GEFormat.FORMAT_ND, + self.ascend_helper.dtype2ge(dtype)) tensor = core.GETensor(tensor_desc) - data = (value * np.ones(( - shape))).reshape(shape).astype(self.ascend_helper.dtype2np(dtype)) + data = (value * np.ones( + (shape))).reshape(shape).astype(self.ascend_helper.dtype2np(dtype)) buf = data.tobytes() data_8 = np.frombuffer(buf, dtype=np.uint8) tensor.set_data(data_8) return tensor def _get_ge_tensor(self, shape, dtype, value_list): - tensor_desc = core.GETensorDesc( - core.GEShape(shape), core.GEFormat.FORMAT_ND, - self.ascend_helper.dtype2ge(dtype)) + tensor_desc = core.GETensorDesc(core.GEShape(shape), + core.GEFormat.FORMAT_ND, + self.ascend_helper.dtype2ge(dtype)) tensor = core.GETensor(tensor_desc) data = np.array(value_list).reshape(shape).astype( @@ -244,20 +246,20 @@ class AscendParserBase(object): var = core.GEOperatorFactory.create_operator( "variable" + self._accumulated_op_id(), "Variable") - var.update_output_desc("y", - core.GETensorDesc( - core.GEShape(shape), core.GEFormat.FORMAT_ND, - type)) + var.update_output_desc( + "y", + core.GETensorDesc(core.GEShape(shape), core.GEFormat.FORMAT_ND, + type)) assign = core.GEOperatorFactory.create_operator( - "assign" + self._accumulated_op_id(), "Assign").set_input( - "value", tensor).set_input("ref", var) + "assign" + self._accumulated_op_id(), + "Assign").set_input("value", tensor).set_input("ref", var) return assign def _create_shape_tensor(self): - tensor_desc = core.GETensorDesc( - core.GEShape([2]), core.GEFormat.FORMAT_ND, - core.GEDataType.DT_INT32) + tensor_desc = core.GETensorDesc(core.GEShape([2]), + core.GEFormat.FORMAT_ND, + core.GEDataType.DT_INT32) tensor = core.GETensor(tensor_desc) data = np.ones((2)).astype("int32").reshape([2]) @@ -269,14 +271,16 @@ class AscendParserBase(object): def _get_GEtensor_shape(self, tensor): tensor_shape = core.GEOperatorFactory.create_operator( - "shape" + self._accumulated_op_id(), "Shape").set_input("x", tensor) + "shape" + self._accumulated_op_id(), + "Shape").set_input("x", tensor) tensor_shape = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", tensor_shape).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", tensor_shape).set_attr_int32("dst_type", 0) return tensor_shape class AddParser(AscendParserBase): + def __init__(self, graph, var2geop): super(AddParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_add" @@ -291,6 +295,7 @@ class AddParser(AscendParserBase): class DotSubParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotSubParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_sub" @@ -305,6 +310,7 @@ class DotSubParser(AscendParserBase): class DotMulParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotMulParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_mul" @@ -319,6 +325,7 @@ class DotMulParser(AscendParserBase): class DotDivParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotDivParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_div" @@ -333,6 +340,7 @@ class DotDivParser(AscendParserBase): class DotPowParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotPowParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_pow" @@ -347,6 +355,7 @@ class DotPowParser(AscendParserBase): class LessParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LessParser, self).__init__(graph, var2geop) self.parser_name = "less_than" @@ -361,6 +370,7 @@ class LessParser(AscendParserBase): class MaxParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MaxParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_max" @@ -375,6 +385,7 @@ class MaxParser(AscendParserBase): class MinParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MinParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_min" @@ -390,6 +401,7 @@ class MinParser(AscendParserBase): ## cal class LogParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LogParser, self).__init__(graph, var2geop) self.parser_name = "log" @@ -402,6 +414,7 @@ class LogParser(AscendParserBase): class SqrtParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SqrtParser, self).__init__(graph, var2geop) self.parser_name = "sqrt" @@ -414,6 +427,7 @@ class SqrtParser(AscendParserBase): class PowParser(AscendParserBase): + def __init__(self, graph, var2geop): super(PowParser, self).__init__(graph, var2geop) self.parser_name = "pow" @@ -424,12 +438,14 @@ class PowParser(AscendParserBase): pow_value = core.GEOperatorFactory.create_operator( "pow" + self._accumulated_op_id(), "Power").set_input("x", x).set_attr_float( - "power", factor).set_attr_float("scale", 1.0).set_attr_float( - "shift", 0.0) + "power", + factor).set_attr_float("scale", + 1.0).set_attr_float("shift", 0.0) return [pow_value], [[0]] class SquareParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SquareParser, self).__init__(graph, var2geop) self.parser_name = "square" @@ -442,6 +458,7 @@ class SquareParser(AscendParserBase): class SumParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SumParser, self).__init__(graph, var2geop) self.parser_name = "sum" @@ -464,6 +481,7 @@ class SumParser(AscendParserBase): class LogicalNotParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LogicalNotParser, self).__init__(graph, var2geop) self.parser_name = "logical_not" @@ -477,6 +495,7 @@ class LogicalNotParser(AscendParserBase): class MeanParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MeanParser, self).__init__(graph, var2geop) self.parser_name = "mean" @@ -484,13 +503,14 @@ class MeanParser(AscendParserBase): def _apply(self): x = self._get_ge_input(self.op.input_arg_names[0]) mean = core.GEOperatorFactory.create_operator( - "mean" + self._accumulated_op_id(), - "ReduceMeanD").set_input("x", x).set_attr_bool( - "keep_dims", False).set_attr_vec_int32("axes", []) + "mean" + self._accumulated_op_id(), "ReduceMeanD").set_input( + "x", x).set_attr_bool("keep_dims", + False).set_attr_vec_int32("axes", []) return [mean], [[0]] class ReduceSumParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReduceSumParser, self).__init__(graph, var2geop) self.parser_name = "reduce_sum" @@ -515,18 +535,19 @@ class ReduceSumParser(AscendParserBase): # super(IncrementParser, self).__init__(graph, var2geop) # self.parser_name = "increment" # -# def _apply(self): +# def _apply(self): # x = self._get_ge_input(self.op.input_arg_names[0]) # step = self.op.attr("step") #self._get_ge_input(self.op.input_arg_names[1]) # print("step: ", step) -# +# # increment = core.GEOperatorFactory.create_operator("adds" + self._accumulated_op_id(), "Adds").set_input("x", x).set_attr_float("value", step) #set_input("x2", bias) -# +# # return [increment] ## matrix cal class MatMulParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MatMulParser, self).__init__(graph, var2geop) self.parser_name = "matmul" @@ -550,14 +571,15 @@ class MatMulParser(AscendParserBase): matmul = core.GEOperatorFactory.create_operator( "matmul" + self._accumulated_op_id(), "MatMul").set_input("x1", x).set_input("x2", y).set_attr_bool( - "transpose_x1", transpose_x).set_attr_bool("transpose_x2", - transpose_y) + "transpose_x1", + transpose_x).set_attr_bool("transpose_x2", transpose_y) else: assert False, "not support" return [matmul], [[0]] class MulParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MulParser, self).__init__(graph, var2geop) self.parser_name = "mul" @@ -580,8 +602,9 @@ class MulParser(AscendParserBase): "flatten" + self._accumulated_op_id(), "Flatten").set_input("x", x) matmul = core.GEOperatorFactory.create_operator( - "mul" + self._accumulated_op_id(), "MatMul").set_input( - "x1", flatten_x1, 0).set_input("x2", y, 0) + "mul" + self._accumulated_op_id(), + "MatMul").set_input("x1", flatten_x1, + 0).set_input("x2", y, 0) else: assert False, "not support" else: @@ -592,26 +615,27 @@ class MulParser(AscendParserBase): "FlattenV2").set_input("x", x).set_attr_int32( "axis", 0).set_attr_int32("end_axis", 1) matmul_m = core.GEOperatorFactory.create_operator( - "mul" + self._accumulated_op_id(), "MatMul").set_input( - "x1", flatten_x1, 0).set_input("x2", y, 0) + "mul" + self._accumulated_op_id(), + "MatMul").set_input("x1", flatten_x1, + 0).set_input("x2", y, 0) matmul_transpose = core.GEOperatorFactory.create_operator( "transpose" + self._accumulated_op_id(), - "TransposeD").set_input( - "x", matmul_m).set_attr_vec_int32("perm", [1, 0]) + "TransposeD").set_input("x", matmul_m).set_attr_vec_int32( + "perm", [1, 0]) tensor = self._create_ge_tensor( [3], 2, [shape_x2[1], shape_x1[0], shape_x1[1]]) const_shape = core.GEOperatorFactory.create_operator( "shape" + self._accumulated_op_id(), "Const").set_attr_tensor("value", tensor) reshape_matmul = core.GEOperatorFactory.create_operator( - "reshape" + self._accumulated_op_id(), "Reshape").set_input( - "x", matmul_transpose).set_input( - "shape", const_shape).set_attr_int32("axis", 0) + "reshape" + self._accumulated_op_id(), + "Reshape").set_input("x", matmul_transpose).set_input( + "shape", const_shape).set_attr_int32("axis", 0) matmul = core.GEOperatorFactory.create_operator( "transpose" + self._accumulated_op_id(), - "TransposeD").set_input( - "x", - reshape_matmul).set_attr_vec_int32("perm", [1, 2, 0]) + "TransposeD").set_input("x", + reshape_matmul).set_attr_vec_int32( + "perm", [1, 2, 0]) else: assert False, "not support" @@ -619,6 +643,7 @@ class MulParser(AscendParserBase): class LayerNormParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LayerNormParser, self).__init__(graph, var2geop) self.parser_name = "layer_norm" @@ -639,7 +664,8 @@ class LayerNormParser(AscendParserBase): scale).set_input("shape", shape_tensor) bias_expand = core.GEOperatorFactory.create_operator( "broadcast_to_d" + self._accumulated_op_id(), - "BroadcastTo").set_input("x", bias).set_input("shape", shape_tensor) + "BroadcastTo").set_input("x", + bias).set_input("shape", shape_tensor) layer_norm = core.GEOperatorFactory.create_operator( "layer_norm" + self._accumulated_op_id(), "LayerNorm").set_input("x", x).set_input( @@ -652,19 +678,23 @@ class LayerNormParser(AscendParserBase): cast_dtype = 0 if self.ascend_helper.dtype2paddle_inv_map[str( x_dtype)] == 0 else 1 y = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", layer_norm, 0).set_attr_int32("dst_type", cast_dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", layer_norm, + 0).set_attr_int32("dst_type", cast_dtype) mean = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", layer_norm, 1).set_attr_int32("dst_type", cast_dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", layer_norm, + 1).set_attr_int32("dst_type", cast_dtype) variance = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", layer_norm, 2).set_attr_int32("dst_type", cast_dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", layer_norm, + 2).set_attr_int32("dst_type", cast_dtype) return [y, mean, variance], [[1], [2], [0]] ## activate function class ReluParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReluParser, self).__init__(graph, var2geop) self.parser_name = "relu" @@ -677,6 +707,7 @@ class ReluParser(AscendParserBase): class GeluParser(AscendParserBase): + def __init__(self, graph, var2geop): super(GeluParser, self).__init__(graph, var2geop) self.parser_name = "gelu" @@ -689,6 +720,7 @@ class GeluParser(AscendParserBase): class TanhParser(AscendParserBase): + def __init__(self, graph, var2geop): super(TanhParser, self).__init__(graph, var2geop) self.parser_name = "tanh" @@ -702,6 +734,7 @@ class TanhParser(AscendParserBase): ## loss function class SoftmaxWithCrossEntropyParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SoftmaxWithCrossEntropyParser, self).__init__(graph, var2geop) self.parser_name = "softmax_with_cross_entropy" @@ -715,8 +748,8 @@ class SoftmaxWithCrossEntropyParser(AscendParserBase): "softmax" + self._accumulated_op_id(), "SoftmaxV2").set_input("x", logits) label = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", label).set_attr_int32("dst_type", 3) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", label).set_attr_int32("dst_type", 3) tensoron = self._create_ge_tensor([1], 5, 1) on = core.GEOperatorFactory.create_operator( @@ -729,19 +762,23 @@ class SoftmaxWithCrossEntropyParser(AscendParserBase): self._mark_as_input(on) self._mark_as_input(off) onehot = core.GEOperatorFactory.create_operator( - "onehot" + self._accumulated_op_id(), "OneHotD").set_input( - "x", label).set_input("on_value", on).set_input( - "off_value", off).set_attr_int32("depth", cls_num) + "onehot" + self._accumulated_op_id(), + "OneHotD").set_input("x", + label).set_input("on_value", on).set_input( + "off_value", + off).set_attr_int32("depth", cls_num) squeeze = core.GEOperatorFactory.create_operator( - "mul" + self._accumulated_op_id(), "Squeeze").set_input("x", onehot) + "mul" + self._accumulated_op_id(), + "Squeeze").set_input("x", onehot) loss_all = core.GEOperatorFactory.create_operator( "loss" + self._accumulated_op_id(), - "SoftmaxCrossEntropyWithLogits").set_input( - "features", logits).set_input("labels", squeeze) + "SoftmaxCrossEntropyWithLogits").set_input("features", + logits).set_input( + "labels", squeeze) loss = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", loss_all, 0).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", loss_all, 0).set_attr_int32("dst_type", 0) loss_expand = core.GEOperatorFactory.create_operator( "unsqueeze" + self._accumulated_op_id(), "Unsqueeze").set_input("x", loss).set_attr_vec_int32("axes", [1]) @@ -749,6 +786,7 @@ class SoftmaxWithCrossEntropyParser(AscendParserBase): class SoftMaxParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SoftMaxParser, self).__init__(graph, var2geop) self.parser_name = "softmax" @@ -758,13 +796,15 @@ class SoftMaxParser(AscendParserBase): axes = self.op.attr("axis") softmax = core.GEOperatorFactory.create_operator( - "softmax" + self._accumulated_op_id(), "SoftmaxV2").set_input( - "x", logits).set_attr_vec_int32("axes", [axes]) + "softmax" + self._accumulated_op_id(), + "SoftmaxV2").set_input("x", + logits).set_attr_vec_int32("axes", [axes]) return [softmax], [[0]] -## general +## general class ShapeParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ShapeParser, self).__init__(graph, var2geop) self.parser_name = "shape" @@ -777,6 +817,7 @@ class ShapeParser(AscendParserBase): class FillConstantParser(AscendParserBase): + def __init__(self, graph, var2geop): super(FillConstantParser, self).__init__(graph, var2geop) self.parser_name = "fill_constant" @@ -796,19 +837,19 @@ class FillConstantParser(AscendParserBase): # (self.op.output('Out')[0])) var = core.GEOperatorFactory.create_operator( self.op.output('Out')[0], "Variable") - var.update_output_desc("y", - core.GETensorDesc( - core.GEShape(shape), - core.GEFormat.FORMAT_ND, - core.GEDataType.DT_FLOAT)) + var.update_output_desc( + "y", + core.GETensorDesc(core.GEShape(shape), core.GEFormat.FORMAT_ND, + core.GEDataType.DT_FLOAT)) assign = core.GEOperatorFactory.create_operator( - "assign" + self._accumulated_op_id(), "Assign").set_input( - "value", const).set_input("ref", var) + "assign" + self._accumulated_op_id(), + "Assign").set_input("value", const).set_input("ref", var) return [const], [[0]] return [const], [[0]] class TruncatedNormalParser(AscendParserBase): + def __init__(self, graph, var2geop): super(TruncatedNormalParser, self).__init__(graph, var2geop) self.parser_name = "truncated_gaussian_random" @@ -850,11 +891,11 @@ class TruncatedNormalParser(AscendParserBase): truncated_normal = core.GEOperatorFactory.create_operator( "truncated_normal" + self._accumulated_op_id(), "ParameterizedTruncatedNormal").set_input( - "shape", shape_tensor).set_input( - "means", mean_tensor).set_input( - "stdevs", std_tensor).set_input( - "min", min_tensor).set_input( - "max", max_tensor).set_attr_int32("seed", 0) + "shape", + shape_tensor).set_input("means", mean_tensor).set_input( + "stdevs", + std_tensor).set_input("min", min_tensor).set_input( + "max", max_tensor).set_attr_int32("seed", 0) ## wirte the output of truncatedNormal from startup_program to main_program if self.op.block.var(self.op.output('Out')[0]).persistable: @@ -862,14 +903,14 @@ class TruncatedNormalParser(AscendParserBase): # (self.op.output('Out')[0])) var = core.GEOperatorFactory.create_operator( self.op.output('Out')[0], "Variable") - var.update_output_desc("y", - core.GETensorDesc( - core.GEShape(shape), - core.GEFormat.FORMAT_ND, - core.GEDataType.DT_FLOAT)) + var.update_output_desc( + "y", + core.GETensorDesc(core.GEShape(shape), core.GEFormat.FORMAT_ND, + core.GEDataType.DT_FLOAT)) assign = core.GEOperatorFactory.create_operator( - "assign" + self._accumulated_op_id(), "Assign").set_input( - "value", truncated_normal).set_input("ref", var) + "assign" + self._accumulated_op_id(), + "Assign").set_input("value", + truncated_normal).set_input("ref", var) return [ shape_tensor, mean_tensor, std_tensor, min_tensor, max_tensor, truncated_normal @@ -882,6 +923,7 @@ class TruncatedNormalParser(AscendParserBase): class GatherParser(AscendParserBase): + def __init__(self, graph, var2geop): super(GatherParser, self).__init__(graph, var2geop) self.parser_name = "gather" @@ -892,13 +934,15 @@ class GatherParser(AscendParserBase): clo = self.op.block.var(self.op.input_arg_names[1]).shape[-1] gather = core.GEOperatorFactory.create_operator( - "gather" + self._accumulated_op_id(), "Gather").set_input( - "x", x).set_input("indices", index).set_attr_bool( - "validate_indices", True) + "gather" + self._accumulated_op_id(), + "Gather").set_input("x", x).set_input("indices", + index).set_attr_bool( + "validate_indices", True) return [gather], [[0]] class ScatterParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ScatterParser, self).__init__(graph, var2geop) self.parser_name = "scatter" @@ -912,24 +956,24 @@ class ScatterParser(AscendParserBase): if len(index_shape) == 1: index = core.GEOperatorFactory.create_operator( - "unsqueeze" + self.getid(), "Unsqueeze").set_input( - "x", index).set_attr_vec_int32("axes", [1]) + "unsqueeze" + self.getid(), + "Unsqueeze").set_input("x", + index).set_attr_vec_int32("axes", [1]) if not overwrite: scatter_value = core.GEOperatorFactory.create_operator( "scatter" + self._accumulated_op_id(), - "TensorScatterAdd").set_input( - "x", x).set_input("indices", index).set_input("updates", - updates) + "TensorScatterAdd").set_input("x", x).set_input( + "indices", index).set_input("updates", updates) else: scatter_value = core.GEOperatorFactory.create_operator( "scatter" + self._accumulated_op_id(), - "TensorScatterUpdate").set_input( - "x", x).set_input("indices", index).set_input("updates", - updates) + "TensorScatterUpdate").set_input("x", x).set_input( + "indices", index).set_input("updates", updates) return [x, index, updates, scatter_value], [[-1]] class CastParser(AscendParserBase): + def __init__(self, graph, var2geop): super(CastParser, self).__init__(graph, var2geop) self.parser_name = "cast" @@ -938,12 +982,13 @@ class CastParser(AscendParserBase): x = self._get_ge_input(self.op.input_arg_names[0]) dtype = self.op.attr("out_dtype") cast = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", x).set_attr_int32("dst_type", dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", x).set_attr_int32("dst_type", dtype) return [cast], [[0]] class AssignParser(AscendParserBase): + def __init__(self, graph, var2geop): super(AssignParser, self).__init__(graph, var2geop) self.parser_name = "assign" @@ -952,12 +997,13 @@ class AssignParser(AscendParserBase): const = self._get_ge_input(self.op.input_arg_names[0]) var = self._get_ge_input(self.op.input_arg_names[1]) assign = core.GEOperatorFactory.create_operator( - "assign" + self._accumulated_op_id(), "Assign").set_input( - "value", const).set_input("ref", var) + "assign" + self._accumulated_op_id(), + "Assign").set_input("value", const).set_input("ref", var) return [assign], [[0]] class ScaleParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ScaleParser, self).__init__(graph, var2geop) self.parser_name = "scale" @@ -970,22 +1016,26 @@ class ScaleParser(AscendParserBase): if bias_after_scale: scale_value = core.GEOperatorFactory.create_operator( - "scale" + self._accumulated_op_id(), "Power").set_input( - "x", x).set_attr_float("power", 1.0).set_attr_float( - "scale", scale).set_attr_float("shift", bias) + "scale" + self._accumulated_op_id(), + "Power").set_input("x", x).set_attr_float( + "power", + 1.0).set_attr_float("scale", + scale).set_attr_float("shift", bias) else: x_add_bias = core.GEOperatorFactory.create_operator( - "adds" + self._accumulated_op_id(), "Adds").set_input( - "x", x).set_attr_float("value", bias) + "adds" + self._accumulated_op_id(), + "Adds").set_input("x", x).set_attr_float("value", bias) scale_value = core.GEOperatorFactory.create_operator( - "scale" + self._accumulated_op_id(), "Power").set_input( - "x", - x_add_bias).set_attr_float("power", 1.0).set_attr_float( - "scale", scale).set_attr_float("shift", 0.0) + "scale" + self._accumulated_op_id(), + "Power").set_input("x", x_add_bias).set_attr_float( + "power", + 1.0).set_attr_float("scale", + scale).set_attr_float("shift", 0.0) return [scale_value], [[0]] class SliceParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SliceParser, self).__init__(graph, var2geop) self.parser_name = "slice" @@ -1014,14 +1064,15 @@ class SliceParser(AscendParserBase): assert len(axes_cor) == len(starts_cor) == len( ends_cor), "the three fields must have same size" slice_value = core.GEOperatorFactory.create_operator( - "slice" + self._accumulated_op_id(), "SliceD").set_input( - "x", x).set_attr_vec_int32( - "offsets", starts_cor).set_attr_vec_int32("size", size) + "slice" + self._accumulated_op_id(), + "SliceD").set_input("x", x).set_attr_vec_int32( + "offsets", starts_cor).set_attr_vec_int32("size", size) return [slice_value], [[0]] class ReshapeParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReshapeParser, self).__init__(graph, var2geop) self.parser_name = "reshape2" @@ -1047,9 +1098,10 @@ class ReshapeParser(AscendParserBase): "shape" + self._accumulated_op_id(), "Const").set_attr_tensor("value", tensor) reshape = core.GEOperatorFactory.create_operator( - "reshape" + self._accumulated_op_id(), "Reshape").set_input( - "x", - x).set_input("shape", const_shape).set_attr_int32("axis", 0) + "reshape" + self._accumulated_op_id(), + "Reshape").set_input("x", x).set_input("shape", + const_shape).set_attr_int32( + "axis", 0) x_shape = core.GEOperatorFactory.create_operator( "shape" + self._accumulated_op_id(), "Shape").set_input("x", x) @@ -1057,6 +1109,7 @@ class ReshapeParser(AscendParserBase): class TransposeParser(AscendParserBase): + def __init__(self, graph, var2geop): super(TransposeParser, self).__init__(graph, var2geop) self.parser_name = "transpose2" @@ -1065,8 +1118,8 @@ class TransposeParser(AscendParserBase): x = self._get_ge_input(self.op.input_arg_names[0]) perm = self.op.attr("axis") transpose = core.GEOperatorFactory.create_operator( - "transpose" + self._accumulated_op_id(), "TransposeD").set_input( - "x", x).set_attr_vec_int32("perm", perm) + "transpose" + self._accumulated_op_id(), + "TransposeD").set_input("x", x).set_attr_vec_int32("perm", perm) x_shape = core.GEOperatorFactory.create_operator( "shape" + self._accumulated_op_id(), "Shape").set_input("x", x) @@ -1074,6 +1127,7 @@ class TransposeParser(AscendParserBase): class AccuracyParser(AscendParserBase): + def __init__(self, graph, var2geop): super(AccuracyParser, self).__init__(graph, var2geop) self.parser_name = "accuracy" @@ -1084,40 +1138,41 @@ class AccuracyParser(AscendParserBase): logits = self._get_ge_input(self.op.input_arg_names[2]) pred = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", pred).set_attr_int32("dst_type", 3) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", pred).set_attr_int32("dst_type", 3) label = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", label).set_attr_int32("dst_type", 3) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", label).set_attr_int32("dst_type", 3) equal = core.GEOperatorFactory.create_operator( - "equal" + self._accumulated_op_id(), "Equal").set_input( - "x1", pred).set_input("x2", label) + "equal" + self._accumulated_op_id(), + "Equal").set_input("x1", pred).set_input("x2", label) cast = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", equal).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", equal).set_attr_int32("dst_type", 0) acc = core.GEOperatorFactory.create_operator( "mean" + self._accumulated_op_id(), "ReduceMeanD").set_input( - "x", cast).set_attr_bool("keep_dims", False).set_attr_vec_int32( - "axes", []) + "x", cast).set_attr_bool("keep_dims", + False).set_attr_vec_int32("axes", []) correct = core.GEOperatorFactory.create_operator( "sum" + self._accumulated_op_id(), "ReduceSumD").set_input( - "x", cast).set_attr_bool("keep_dims", False).set_attr_vec_int32( - "axes", []) + "x", cast).set_attr_bool("keep_dims", + False).set_attr_vec_int32("axes", []) ones_tensor = core.GEOperatorFactory.create_operator( "oneslike" + self._accumulated_op_id(), "OnesLike").set_input("x", label) ones_tensor = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", ones_tensor).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", ones_tensor).set_attr_int32("dst_type", 0) total = core.GEOperatorFactory.create_operator( - "sum" + self._accumulated_op_id(), "ReduceSumD").set_input( - "x", ones_tensor).set_attr_bool( - "keep_dims", False).set_attr_vec_int32("axes", []) + "sum" + self._accumulated_op_id(), + "ReduceSumD").set_input("x", ones_tensor).set_attr_bool( + "keep_dims", False).set_attr_vec_int32("axes", []) return [acc, correct, total], [[0], [1], [2]] class TopkParser(AscendParserBase): + def __init__(self, graph, var2geop): super(TopkParser, self).__init__(graph, var2geop) self.parser_name = "top_k" @@ -1137,15 +1192,16 @@ class TopkParser(AscendParserBase): "topk" + self._accumulated_op_id(), "TopK").set_input("x", cast_x).set_input("k", const_k) value = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", topk, 0).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", topk, 0).set_attr_int32("dst_type", 0) index = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", topk, 1).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", topk, 1).set_attr_int32("dst_type", 0) return [value, index], [[1], [0]] class LookupTableParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LookupTableParser, self).__init__(graph, var2geop) self.parser_name = "lookup_table" @@ -1155,15 +1211,16 @@ class LookupTableParser(AscendParserBase): w = self._get_ge_input(self.op.input_arg_names[1]) ids_squeeze = core.GEOperatorFactory.create_operator( - "squeeze" + self._accumulated_op_id(), "Squeeze").set_input( - "x", ids).set_attr_vec_int32("axes", [-1]) + "squeeze" + self._accumulated_op_id(), + "Squeeze").set_input("x", ids).set_attr_vec_int32("axes", [-1]) out = core.GEOperatorFactory.create_operator( - "lookup" + self._accumulated_op_id(), "Gather").set_input( - "x", w).set_input("indices", ids_squeeze) + "lookup" + self._accumulated_op_id(), + "Gather").set_input("x", w).set_input("indices", ids_squeeze) return [out], [[0]] class StackParser(AscendParserBase): + def __init__(self, graph, var2geop): super(StackParser, self).__init__(graph, var2geop) self.parser_name = "stack" @@ -1172,8 +1229,8 @@ class StackParser(AscendParserBase): tiles = len(self.op.input_arg_names) data_x_lst = [] for index in range(tiles): - data_x_lst.append( - self._get_ge_input(self.op.input_arg_names[index])) + data_x_lst.append(self._get_ge_input( + self.op.input_arg_names[index])) axis = self.op.attr("axis") data_x = data_x_lst[0] @@ -1186,14 +1243,16 @@ class StackParser(AscendParserBase): "ExpandDims").set_input("x", data_x).set_input("axis", tensor_axis) stack = core.GEOperatorFactory.create_operator( - "stack" + self._accumulated_op_id(), - "TileWithAxis").set_input("x", expand).set_attr_int32( - "axis", axis).set_attr_int32("tiles", tiles) + "stack" + self._accumulated_op_id(), "TileWithAxis").set_input( + "x", + expand).set_attr_int32("axis", + axis).set_attr_int32("tiles", tiles) return [stack], [[0]] class UnSqueezeParser(AscendParserBase): + def __init__(self, graph, var2geop): super(UnSqueezeParser, self).__init__(graph, var2geop) self.parser_name = "unsqueeze2" @@ -1206,12 +1265,14 @@ class UnSqueezeParser(AscendParserBase): "unsqueeze" + self._accumulated_op_id(), "Unsqueeze").set_input("x", x).set_attr_vec_int32("axes", axes) shape = core.GEOperatorFactory.create_operator( - "shape" + self._accumulated_op_id(), "Shape").set_input("x", output) + "shape" + self._accumulated_op_id(), + "Shape").set_input("x", output) return [shape, output], [[1], [0]] ## parallel class AllGatherParser(AscendParserBase): + def __init__(self, graph, var2geop): super(AllGatherParser, self).__init__(graph, var2geop) self.parser_name = "c_allgather" @@ -1222,13 +1283,14 @@ class AllGatherParser(AscendParserBase): group = self.op.attr("group") allgather = core.GEOperatorFactory.create_operator( - "allgather" + self._accumulated_op_id(), "HcomAllGather").set_input( - "x", x).set_attr_int32( - "rank_size", rank_size).set_attr_string("group", group) + "allgather" + self._accumulated_op_id(), + "HcomAllGather").set_input("x", x).set_attr_int32( + "rank_size", rank_size).set_attr_string("group", group) return [allgather], [[0]] class AllReduceParser(AscendParserBase): + def __init__(self, graph, var2geop, reduction): super(AllReduceParser, self).__init__(graph, var2geop) self.parser_name = "c_allreduce_" + reduction @@ -1243,9 +1305,9 @@ class AllReduceParser(AscendParserBase): fusion_id = None #self.op.attr("fusion_id") allreduce = core.GEOperatorFactory.create_operator( - "allreduce" + self._accumulated_op_id(), "HcomAllReduce").set_input( - "x", x).set_attr_string( - "reduction", reduction).set_attr_string("group", group) + "allreduce" + self._accumulated_op_id(), + "HcomAllReduce").set_input("x", x).set_attr_string( + "reduction", reduction).set_attr_string("group", group) if fusion is not None: allreduce.set_attr_int32("fusion", fusion) @@ -1255,16 +1317,19 @@ class AllReduceParser(AscendParserBase): class AllReduceSumParser(AllReduceParser): + def __init__(self, graph, var2geop): super(AllReduceSumParser, self).__init__(graph, var2geop, 'sum') class AllReduceMaxParser(AllReduceParser): + def __init__(self, graph, var2geop): super(AllReduceMaxParser, self).__init__(graph, var2geop, 'max') class BroadcastParser(AscendParserBase): + def __init__(self, graph, var2geop): super(BroadcastParser, self).__init__(graph, var2geop) self.parser_name = "c_broadcast" @@ -1275,13 +1340,14 @@ class BroadcastParser(AscendParserBase): group = self.op.attr("group") broadcast = core.GEOperatorFactory.create_operator( - "broadcast" + self._accumulated_op_id(), "HcomBroadcast").set_input( - "x", x).set_attr_int32( - "root_rank", root_rank).set_attr_string("group", group) + "broadcast" + self._accumulated_op_id(), + "HcomBroadcast").set_input("x", x).set_attr_int32( + "root_rank", root_rank).set_attr_string("group", group) return [broadcast], [[0]] class ReduceScatterParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReduceScatterParser, self).__init__(graph, var2geop) self.parser_name = "c_reduce_scatter" @@ -1295,12 +1361,14 @@ class ReduceScatterParser(AscendParserBase): reduce_scatter = core.GEOperatorFactory.create_operator( "reducescatter" + self._accumulated_op_id(), "HcomReduceScatter").set_input("x", x).set_attr_string( - "reduction", reduction).set_attr_string( - "group", group).set_attr_int32("rank_size", rank_size) + "reduction", + reduction).set_attr_string("group", group).set_attr_int32( + "rank_size", rank_size) return [reduce_scatter], [[0]] class SendParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SendParser, self).__init__(graph, var2geop) self.parser_name = "c_send" @@ -1319,6 +1387,7 @@ class SendParser(AscendParserBase): class ReceiveParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReceiveParser, self).__init__(graph, var2geop) self.parser_name = "c_receive" @@ -1332,15 +1401,18 @@ class ReceiveParser(AscendParserBase): dtype = self.op.attr("dtype") receive = core.GEOperatorFactory.create_operator( - "receive" + self._accumulated_op_id(), "HcomReceive").set_input( - "x", x).set_attr_int32("sr_tag", sr_tag).set_attr_int32( - "src_rank", src_rank).set_attr_string( - "group", group).set_attr_vec_int32( - "shape", shape).set_attr_int32("dtype", dtype) + "receive" + self._accumulated_op_id(), + "HcomReceive").set_input("x", x).set_attr_int32( + "sr_tag", + sr_tag).set_attr_int32("src_rank", src_rank).set_attr_string( + "group", group).set_attr_vec_int32("shape", + shape).set_attr_int32( + "dtype", dtype) return [receive], [[0]] class RangeParser(AscendParserBase): + def __init__(self, graph, var2geop): super(RangeParser, self).__init__(graph, var2geop) self.parser_name = "range" @@ -1361,6 +1433,7 @@ class RangeParser(AscendParserBase): class UniformRandomParser(AscendParserBase): + def __init__(self, graph, var2geop): super(UniformRandomParser, self).__init__(graph, var2geop) self.parser_name = "uniform_random" @@ -1390,14 +1463,17 @@ class UniformRandomParser(AscendParserBase): scale = max_v - min_v scale_value = core.GEOperatorFactory.create_operator( - "scale" + self._accumulated_op_id(), "Power").set_input( - "x", ge_ur).set_attr_float("power", 1.0).set_attr_float( - "scale", scale).set_attr_float("shift", min_v) + "scale" + self._accumulated_op_id(), + "Power").set_input("x", ge_ur).set_attr_float( + "power", + 1.0).set_attr_float("scale", + scale).set_attr_float("shift", min_v) return [scale_value], [[0]] class EqualParser(AscendParserBase): + def __init__(self, graph, var2geop): super(EqualParser, self).__init__(graph, var2geop) self.parser_name = "equal" @@ -1413,6 +1489,7 @@ class EqualParser(AscendParserBase): class ExpandParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ExpandParser, self).__init__(graph, var2geop) self.parser_name = "expand" @@ -1434,6 +1511,7 @@ class ExpandParser(AscendParserBase): class SqueezeParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SqueezeParser, self).__init__(graph, var2geop) self.parser_name = "squeeze2" @@ -1461,6 +1539,7 @@ class SqueezeParser(AscendParserBase): #****************************************************************# ## grad class ReduceSumGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReduceSumGradParser, self).__init__(graph, var2geop) self.parser_name = "reduce_sum_grad" @@ -1487,6 +1566,7 @@ class ReduceSumGradParser(AscendParserBase): class MatMulGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MatMulGradParser, self).__init__(graph, var2geop) self.parser_name = "matmul_grad" @@ -1507,56 +1587,60 @@ class MatMulGradParser(AscendParserBase): x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "BatchMatMul").set_input("x1", out_grad).set_input( - "x2", y).set_attr_bool( - "adj_x1", False).set_attr_bool("adj_x2", False) + "x2", + y).set_attr_bool("adj_x1", + False).set_attr_bool("adj_x2", False) y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "BatchMatMul").set_input("x1", out_grad).set_input( - "x2", x).set_attr_bool( - "adj_x1", True).set_attr_bool("adj_x2", False) + "x2", + x).set_attr_bool("adj_x1", + True).set_attr_bool("adj_x2", False) else: x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "BatchMatMul").set_input("x1", out_grad).set_input( - "x2", y).set_attr_bool( - "adj_x1", False).set_attr_bool("adj_x2", True) + "x2", + y).set_attr_bool("adj_x1", + False).set_attr_bool("adj_x2", True) y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), - "BatchMatMul").set_input("x1", x).set_input( - "x2", out_grad).set_attr_bool( + "BatchMatMul").set_input( + "x1", x).set_input("x2", out_grad).set_attr_bool( "adj_x1", True).set_attr_bool("adj_x2", False) else: if transpose_y: x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", out_grad).set_input( - "x2", y).set_attr_bool( - "transpose_x1", False).set_attr_bool("transpose_x2", - False) + "x2", y).set_attr_bool("transpose_x1", + False).set_attr_bool( + "transpose_x2", False) y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", out_grad).set_input( - "x2", x).set_attr_bool( - "transpose_x1", True).set_attr_bool("transpose_x2", - False) + "x2", x).set_attr_bool("transpose_x1", + True).set_attr_bool( + "transpose_x2", False) else: x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", out_grad).set_input( - "x2", y).set_attr_bool( - "transpose_x1", False).set_attr_bool("transpose_x2", - True) + "x2", y).set_attr_bool("transpose_x1", + False).set_attr_bool( + "transpose_x2", True) y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", x).set_input( - "x2", out_grad).set_attr_bool( - "transpose_x1", True).set_attr_bool("transpose_x2", - False) + "x2", out_grad).set_attr_bool("transpose_x1", + True).set_attr_bool( + "transpose_x2", False) return [x_grad, y_grad], [[0], [1]] class MulGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MulGradParser, self).__init__(graph, var2geop) self.parser_name = "mul_grad" @@ -1577,25 +1661,25 @@ class MulGradParser(AscendParserBase): x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", out_grad).set_input( - "x2", y).set_attr_bool( - "transpose_x1", False).set_attr_bool("transpose_x2", - True) + "x2", y).set_attr_bool("transpose_x1", + False).set_attr_bool( + "transpose_x2", True) y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", x).set_input( - "x2", out_grad).set_attr_bool( - "transpose_x1", True).set_attr_bool("transpose_x2", - False) + "x2", out_grad).set_attr_bool("transpose_x1", + True).set_attr_bool( + "transpose_x2", False) elif len(shape_x) == 3 and len(shape_y) == 2: flatten_x = core.GEOperatorFactory.create_operator( "flatten" + self._accumulated_op_id(), "Flatten").set_input("x", x) x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), - "MatMul").set_input( - "x1", out_grad).set_input("x2", y).set_attr_bool( - "transpose_x1", - False).set_attr_bool("transpose_x2", True) + "MatMul").set_input("x1", out_grad).set_input( + "x2", y).set_attr_bool("transpose_x1", + False).set_attr_bool( + "transpose_x2", True) if len(shape_out_grad) == 2: x_grad = core.GEOperatorFactory.create_operator( "unsqueeze" + self._accumulated_op_id(), @@ -1604,11 +1688,10 @@ class MulGradParser(AscendParserBase): y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), - "MatMul").set_input( - "x1", - flatten_x).set_input("x2", out_grad).set_attr_bool( - "transpose_x1", - True).set_attr_bool("transpose_x2", False) + "MatMul").set_input("x1", flatten_x).set_input( + "x2", out_grad).set_attr_bool("transpose_x1", + True).set_attr_bool( + "transpose_x2", False) else: if len(shape_x) == 3 and len(shape_y) == 2: assert x_num_col_dims == 2, "only support 2" @@ -1632,8 +1715,9 @@ class MulGradParser(AscendParserBase): x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "BatchMatMul").set_input("x1", out_grad).set_input( - "x2", y_stack).set_attr_bool( - "adj_x1", False).set_attr_bool("adj_x2", True) + "x2", y_stack).set_attr_bool("adj_x1", + False).set_attr_bool( + "adj_x2", True) y_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "MatMul").set_input("x1", flatten_x).set_input( @@ -1645,6 +1729,7 @@ class MulGradParser(AscendParserBase): class ReluGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReluGradParser, self).__init__(graph, var2geop) self.parser_name = "relu_grad" @@ -1653,12 +1738,14 @@ class ReluGradParser(AscendParserBase): out = self._get_ge_input(self.op.input_arg_names[0]) out_grad = self._get_ge_input(self.op.input_arg_names[1]) relu_grad = core.GEOperatorFactory.create_operator( - self.parser_name + self._accumulated_op_id(), "ReluGrad").set_input( - "gradients", out_grad).set_input("features", out) + self.parser_name + self._accumulated_op_id(), + "ReluGrad").set_input("gradients", + out_grad).set_input("features", out) return [relu_grad], [[0]] class SoftmaxWithCrossEntropyGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SoftmaxWithCrossEntropyGradParser, self).__init__(graph, var2geop) self.parser_name = "softmax_with_cross_entropy_grad" @@ -1685,18 +1772,20 @@ class SoftmaxWithCrossEntropyGradParser(AscendParserBase): self._mark_as_input(off) label = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", label).set_attr_int32("dst_type", 3) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", label).set_attr_int32("dst_type", 3) onehot = core.GEOperatorFactory.create_operator( - "onehot" + self._accumulated_op_id(), "OneHotD").set_input( - "x", label).set_input("on_value", on).set_input( - "off_value", off).set_attr_int32("depth", cls_num) + "onehot" + self._accumulated_op_id(), + "OneHotD").set_input("x", + label).set_input("on_value", on).set_input( + "off_value", + off).set_attr_int32("depth", cls_num) squeeze = core.GEOperatorFactory.create_operator( "suqeeze" + self._accumulated_op_id(), "Squeeze").set_input("x", onehot) sub = core.GEOperatorFactory.create_operator( - "sub" + self._accumulated_op_id(), "Sub").set_input( - "x1", softmax).set_input("x2", squeeze) + "sub" + self._accumulated_op_id(), + "Sub").set_input("x1", softmax).set_input("x2", squeeze) grad = core.GEOperatorFactory.create_operator( "mul" + self._accumulated_op_id(), "Mul").set_input("x1", loss_grad).set_input("x2", sub) @@ -1705,6 +1794,7 @@ class SoftmaxWithCrossEntropyGradParser(AscendParserBase): class DotMulGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotMulGradParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_mul_grad" @@ -1725,6 +1815,7 @@ class DotMulGradParser(AscendParserBase): class DotAddGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotAddGradParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_add_grad" @@ -1769,6 +1860,7 @@ class DotAddGradParser(AscendParserBase): class DotDivGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(DotDivGradParser, self).__init__(graph, var2geop) self.parser_name = "elementwise_div_grad" @@ -1780,39 +1872,40 @@ class DotDivGradParser(AscendParserBase): y = self._get_ge_input(self.op.input_arg_names[3]) y_power = core.GEOperatorFactory.create_operator( - "power" + self._accumulated_op_id(), "Power").set_input( - "x", y).set_attr_float("power", -1) + "power" + self._accumulated_op_id(), + "Power").set_input("x", y).set_attr_float("power", -1) tensor_zeros = core.GEOperatorFactory.create_operator( "zeroslike" + self._accumulated_op_id(), "ZerosLike").set_input("x", x) x_zero = core.GEOperatorFactory.create_operator( - "equal" + self._accumulated_op_id(), "Equal").set_input( - "x1", x).set_input("x2", tensor_zeros) + "equal" + self._accumulated_op_id(), + "Equal").set_input("x1", x).set_input("x2", tensor_zeros) x_nozero = core.GEOperatorFactory.create_operator( "logical_not" + self._accumulated_op_id(), "LogicalNot").set_input("x", x_zero) x_nozero_f = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", x_nozero).set_attr_int32("dst_type", 0) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", x_nozero).set_attr_int32("dst_type", 0) x_grad_w = core.GEOperatorFactory.create_operator( - "mul" + self._accumulated_op_id(), "Mul").set_input( - "x1", x_nozero_f).set_input("x2", y_power) + "mul" + self._accumulated_op_id(), + "Mul").set_input("x1", x_nozero_f).set_input("x2", y_power) x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "Mul").set_input("x1", x_grad_w).set_input("x2", out_grad) y_grad_w = core.GEOperatorFactory.create_operator( - "mul" + self._accumulated_op_id(), "Mul").set_input( - "x1", out).set_input("x2", y_power) + "mul" + self._accumulated_op_id(), + "Mul").set_input("x1", out).set_input("x2", y_power) y_grad = core.GEOperatorFactory.create_operator( - "mul" + self._accumulated_op_id(), "Mul").set_input( - "x1", y_grad_w).set_input("x2", out_grad) + "mul" + self._accumulated_op_id(), + "Mul").set_input("x1", y_grad_w).set_input("x2", out_grad) return [x_grad, y_grad], [[0], [1]] class SoftmaxGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SoftmaxGradParser, self).__init__(graph, var2geop) self.parser_name = "softmax_grad" @@ -1823,12 +1916,13 @@ class SoftmaxGradParser(AscendParserBase): x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), - "SoftmaxGrad").set_input("softmax", out).set_input("grad_softmax", - out_grad) + "SoftmaxGrad").set_input("softmax", + out).set_input("grad_softmax", out_grad) return [x_grad], [[0]] class ReshapeGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(ReshapeGradParser, self).__init__(graph, var2geop) self.parser_name = "reshape2_grad" @@ -1846,13 +1940,14 @@ class ReshapeGradParser(AscendParserBase): "shape" + self._accumulated_op_id(), "Const").set_attr_tensor("value", tensor) x_grad = core.GEOperatorFactory.create_operator( - "reshape" + self._accumulated_op_id(), "Reshape").set_input( - "x", out_grad).set_input("shape", const_shape) + "reshape" + self._accumulated_op_id(), + "Reshape").set_input("x", out_grad).set_input("shape", const_shape) return [x_grad], [[0]] class GatherGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(GatherGradParser, self).__init__(graph, var2geop) self.parser_name = "gather_grad" @@ -1868,8 +1963,9 @@ class GatherGradParser(AscendParserBase): if len(index_shape) == 1: index = core.GEOperatorFactory.create_operator( - "unsqueeze" + self._accumulated_op_id(), "Unsqueeze").set_input( - "x", index).set_attr_vec_int32("axes", [1]) + "unsqueeze" + self._accumulated_op_id(), + "Unsqueeze").set_input("x", + index).set_attr_vec_int32("axes", [1]) tensor_zeros = core.GEOperatorFactory.create_operator( "zeroslike" + self._accumulated_op_id(), @@ -1883,6 +1979,7 @@ class GatherGradParser(AscendParserBase): class TransposeGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(TransposeGradParser, self).__init__(graph, var2geop) self.parser_name = "transpose2_grad" @@ -1897,13 +1994,15 @@ class TransposeGradParser(AscendParserBase): assert list(map(lambda x: out_grad_shape[x], perm)) == list(x_shape) x_grad = core.GEOperatorFactory.create_operator( - "transpose" + self._accumulated_op_id(), "TransposeD").set_input( - "x", out_grad).set_attr_vec_int32("perm", perm) + "transpose" + self._accumulated_op_id(), + "TransposeD").set_input("x", + out_grad).set_attr_vec_int32("perm", perm) return [x_grad], [[0]] class LayerNormGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LayerNormGradParser, self).__init__(graph, var2geop) self.parser_name = "layer_norm_grad" @@ -1920,25 +2019,30 @@ class LayerNormGradParser(AscendParserBase): x_grad = core.GEOperatorFactory.create_operator( self.parser_name + self._accumulated_op_id(), "LayerNormGrad").set_input("dy", out_grad).set_input( - "x", x).set_input("variance", variance).set_input( - "mean", mean).set_input("gamma", scale) + "x", x).set_input("variance", + variance).set_input("mean", mean).set_input( + "gamma", scale) cast_dtype = 0 if self.ascend_helper.dtype2paddle_inv_map[str( x_dtype)] == 0 else 1 out_x_grad = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", x_grad, 0).set_attr_int32("dst_type", cast_dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", x_grad, + 0).set_attr_int32("dst_type", cast_dtype) out_scale_grad = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", x_grad, 1).set_attr_int32("dst_type", cast_dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", x_grad, + 1).set_attr_int32("dst_type", cast_dtype) out_bias_grad = core.GEOperatorFactory.create_operator( - "cast" + self._accumulated_op_id(), "Cast").set_input( - "x", x_grad, 2).set_attr_int32("dst_type", cast_dtype) + "cast" + self._accumulated_op_id(), + "Cast").set_input("x", x_grad, + 2).set_attr_int32("dst_type", cast_dtype) return [out_x_grad, out_scale_grad, out_bias_grad], [[2], [1], [0]] class TanhGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(TanhGradParser, self).__init__(graph, var2geop) self.parser_name = 'tanh_grad' @@ -1954,6 +2058,7 @@ class TanhGradParser(AscendParserBase): class LogGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LogGradParser, self).__init__(graph, var2geop) self.parser_name = 'log_grad' @@ -1968,6 +2073,7 @@ class LogGradParser(AscendParserBase): class SqrtGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SqrtGradParser, self).__init__(graph, var2geop) self.parser_name = "sqrt_grad" @@ -1982,6 +2088,7 @@ class SqrtGradParser(AscendParserBase): class PowGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(PowGradParser, self).__init__(graph, var2geop) self.parser_name = "pow_grad" @@ -2000,15 +2107,15 @@ class PowGradParser(AscendParserBase): "Const").set_attr_tensor("value", factor_scale) factor_tensor = core.GEOperatorFactory.create_operator( "broadcast_to_d" + self._accumulated_op_id(), - "BroadcastTo").set_input( - "x", factor_scale).set_input("shape", shape_tensor) + "BroadcastTo").set_input("x", factor_scale).set_input( + "shape", shape_tensor) x_power = core.GEOperatorFactory.create_operator( - "x_power" + self._accumulated_op_id(), "Power").set_input( - "x", x).set_attr_float("power", factor - 1) + "x_power" + self._accumulated_op_id(), + "Power").set_input("x", x).set_attr_float("power", factor - 1) x_power_mul_factor = core.GEOperatorFactory.create_operator( - "x_power_mul_factor" + self._accumulated_op_id(), "Mul").set_input( - "x1", x).set_input("x2", factor_tensor) + "x_power_mul_factor" + self._accumulated_op_id(), + "Mul").set_input("x1", x).set_input("x2", factor_tensor) x_power_mul_factor_grad = core.GEOperatorFactory.create_operator( "x_power_mul_factor_grad" + self._accumulated_op_id(), "Mul").set_input("x1", x_power_mul_factor).set_input("x2", grad) @@ -2017,6 +2124,7 @@ class PowGradParser(AscendParserBase): class GeluGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(GeluGradParser, self).__init__(graph, var2geop) self.parser_name = "gelu_grad" @@ -2028,13 +2136,15 @@ class GeluGradParser(AscendParserBase): y = core.GEOperatorFactory.create_operator( "gelu" + self._accumulated_op_id(), "Gelu").set_input("x", x) gelu_grad = core.GEOperatorFactory.create_operator( - "gelu_grad" + self._accumulated_op_id(), "GeluGrad").set_input( - "x", x).set_input("dy", grad).set_input("y", y) + "gelu_grad" + self._accumulated_op_id(), + "GeluGrad").set_input("x", x).set_input("dy", + grad).set_input("y", y) return [gelu_grad], [[0]] class MeanGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(MeanGradParser, self).__init__(graph, var2geop) self.parser_name = "mean_grad" @@ -2047,12 +2157,12 @@ class MeanGradParser(AscendParserBase): "one_tensor" + self._accumulated_op_id(), "OnesLike").set_input("x", x) sum = core.GEOperatorFactory.create_operator( - "mean" + self._accumulated_op_id(), "ReduceSumD").set_input( - "x", ones_tensor).set_attr_bool( - "keep_dims", False).set_attr_vec_int32("axes", []) + "mean" + self._accumulated_op_id(), + "ReduceSumD").set_input("x", ones_tensor).set_attr_bool( + "keep_dims", False).set_attr_vec_int32("axes", []) mean = core.GEOperatorFactory.create_operator( - "x_power" + self._accumulated_op_id(), "Power").set_input( - "x", sum).set_attr_float("power", -1) + "x_power" + self._accumulated_op_id(), + "Power").set_input("x", sum).set_attr_float("power", -1) mean_grad = core.GEOperatorFactory.create_operator( "mean_grad" + self._accumulated_op_id(), @@ -2062,6 +2172,7 @@ class MeanGradParser(AscendParserBase): class SliceGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SliceGradParser, self).__init__(graph, var2geop) self.parser_name = "slice_grad" @@ -2100,6 +2211,7 @@ class SliceGradParser(AscendParserBase): class LookUpTableGradParser(AscendParserBase): + def __init__(self, graph, var2geop): super(LookUpTableGradParser, self).__init__(graph, var2geop) self.parser_name = "lookup_table_grad" @@ -2115,26 +2227,26 @@ class LookUpTableGradParser(AscendParserBase): ids_flatten = core.GEOperatorFactory.create_operator( "flatten" + self._accumulated_op_id(), "FlattenV2").set_input( - "x", - ids).set_attr_int32("axis", 0).set_attr_int32("end_axis", 1) + "x", ids).set_attr_int32("axis", + 0).set_attr_int32("end_axis", 1) grad_flatten = core.GEOperatorFactory.create_operator( "flatten" + self._accumulated_op_id(), "FlattenV2").set_input( - "x", - grad).set_attr_int32("axis", 0).set_attr_int32("end_axis", 1) + "x", grad).set_attr_int32("axis", + 0).set_attr_int32("end_axis", 1) tensor_zeros = core.GEOperatorFactory.create_operator( "zeroslike" + self._accumulated_op_id(), "ZerosLike").set_input("x", embedding) embedding_grad = core.GEOperatorFactory.create_operator( "scatteradd" + self._accumulated_op_id(), - "TensorScatterAdd").set_input( - "x", tensor_zeros).set_input("indices", ids_flatten).set_input( - "updates", grad_flatten) + "TensorScatterAdd").set_input("x", tensor_zeros).set_input( + "indices", ids_flatten).set_input("updates", grad_flatten) return [embedding_grad], [[0]] class SGDParser(AscendParserBase): + def __init__(self, graph, var2geop): super(SGDParser, self).__init__(graph, var2geop) self.parser_name = "sgd" @@ -2151,6 +2263,7 @@ class SGDParser(AscendParserBase): class AdamParser(AscendParserBase): + def __init__(self, graph, var2geop): super(AdamParser, self).__init__(graph, var2geop) self.parser_name = "adam" @@ -2168,23 +2281,26 @@ class AdamParser(AscendParserBase): epsilon = self.op.attr('epsilon') beta1 = core.GEOperatorFactory.create_operator( - "const" + self._accumulated_op_id(), "Const").set_attr_tensor( - "value", self._create_ge_tensor([1], 5, beta1)) + "const" + self._accumulated_op_id(), + "Const").set_attr_tensor("value", + self._create_ge_tensor([1], 5, beta1)) beta2 = core.GEOperatorFactory.create_operator( - "const" + self._accumulated_op_id(), "Const").set_attr_tensor( - "value", self._create_ge_tensor([1], 5, beta2)) + "const" + self._accumulated_op_id(), + "Const").set_attr_tensor("value", + self._create_ge_tensor([1], 5, beta2)) epsilon = core.GEOperatorFactory.create_operator( - "const" + self._accumulated_op_id(), "Const").set_attr_tensor( - "value", self._create_ge_tensor([1], 5, epsilon)) + "const" + self._accumulated_op_id(), + "Const").set_attr_tensor("value", + self._create_ge_tensor([1], 5, epsilon)) adam = core.GEOperatorFactory.create_operator( "adam" + self._accumulated_op_id(), "ApplyAdam").set_input("var", param).set_input( "m", moment1).set_input("v", moment2).set_input( "beta1_power", beta1_power).set_input( - "beta2_power", beta2_power).set_input( - "lr", lr).set_input("beta1", beta1).set_input( - "beta2", beta2).set_input( - "epsilon", epsilon).set_input("grad", grad) + "beta2_power", + beta2_power).set_input("lr", lr).set_input( + "beta1", beta1).set_input("beta2", beta2).set_input( + "epsilon", epsilon).set_input("grad", grad) return [adam], [[0]] diff --git a/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py index ea9cb1c62bf..2047c3172c2 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py @@ -19,6 +19,7 @@ __all__ = [] class ASPOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(ASPOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -32,8 +33,9 @@ class ASPOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(ASPOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(ASPOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _can_apply(self): if not self.role_maker._is_collective: diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py index a44607d13aa..4c0cc901025 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/common.py +++ b/python/paddle/distributed/fleet/meta_optimizers/common.py @@ -50,6 +50,7 @@ def is_optimizer_op(op): class CollectiveHelper(object): + def __init__(self, role_maker, nrings=1, wait_port=True): self.nrings = nrings self.wait_port = wait_port @@ -63,9 +64,10 @@ class CollectiveHelper(object): endpoints = self.role_maker._get_trainer_endpoints() current_endpoint = endpoints[self.role_maker._worker_index()] for ring_id in range(self.nrings): - self._init_communicator( - self.startup_program, current_endpoint, endpoints, - self.role_maker._worker_index(), ring_id, self.wait_port) + self._init_communicator(self.startup_program, + current_endpoint, endpoints, + self.role_maker._worker_index(), ring_id, + self.wait_port) self._broadcast_params() def _init_communicator(self, @@ -88,36 +90,32 @@ class CollectiveHelper(object): wait_server_ready(other_endpoints) def _add_sync_by_allreduce(block): - sync_var = block.create_var( - name=unique_name.generate('sync_var'), - dtype=core.VarDesc.VarType.INT32, - persistable=False, - stop_gradient=True) - block.append_op( - type='fill_constant', - inputs={}, - outputs={'Out': [sync_var]}, - attrs={ - 'shape': [1], - 'dtype': sync_var.dtype, - 'value': 1, - 'force_cpu': False, - OP_ROLE_KEY: OpRole.Forward - }) - block.append_op( - type='c_allreduce_sum', - inputs={'X': [sync_var]}, - outputs={'Out': [sync_var]}, - attrs={ - 'ring_id': global_ring_id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) - block.append_op( - type='c_sync_calc_stream', - inputs={'X': sync_var}, - outputs={'Out': sync_var}, - attrs={OP_ROLE_KEY: OpRole.Forward}) + sync_var = block.create_var(name=unique_name.generate('sync_var'), + dtype=core.VarDesc.VarType.INT32, + persistable=False, + stop_gradient=True) + block.append_op(type='fill_constant', + inputs={}, + outputs={'Out': [sync_var]}, + attrs={ + 'shape': [1], + 'dtype': sync_var.dtype, + 'value': 1, + 'force_cpu': False, + OP_ROLE_KEY: OpRole.Forward + }) + block.append_op(type='c_allreduce_sum', + inputs={'X': [sync_var]}, + outputs={'Out': [sync_var]}, + attrs={ + 'ring_id': global_ring_id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) + block.append_op(type='c_sync_calc_stream', + inputs={'X': sync_var}, + outputs={'Out': sync_var}, + attrs={OP_ROLE_KEY: OpRole.Forward}) block = program.global_block() if current_endpoint is None: @@ -126,77 +124,71 @@ class CollectiveHelper(object): _add_sync_by_allreduce(block) return - comm_id_var = block.create_var( - name=unique_name.generate('comm_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) + comm_id_var = block.create_var(name=unique_name.generate('comm_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) if core.is_compiled_with_cuda(): - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': comm_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward - }) - block.append_op( - type='c_comm_init', - inputs={'X': comm_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_gen_nccl_id', + inputs={}, + outputs={'Out': comm_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints, + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) + block.append_op(type='c_comm_init', + inputs={'X': comm_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) elif core.is_compiled_with_xpu(): - block.append_op( - type='c_gen_bkcl_id', - inputs={}, - outputs={'Out': comm_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward - }) - block.append_op( - type='c_comm_init', - inputs={'X': comm_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_gen_bkcl_id', + inputs={}, + outputs={'Out': comm_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints, + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) + block.append_op(type='c_comm_init', + inputs={'X': comm_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) elif core.is_compiled_with_npu(): - block.append_op( - type='c_gen_hccl_id', - inputs={}, - outputs={'Out': comm_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward - }) - block.append_op( - type='c_comm_init_hccl', - inputs={'X': comm_id_var}, - outputs={}, - attrs={ - 'rank': rank, - 'ring_id': ring_id, - 'device_id': int(os.getenv("FLAGS_selected_npus")), - 'rank_ids': nranks, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_gen_hccl_id', + inputs={}, + outputs={'Out': comm_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints, + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) + block.append_op(type='c_comm_init_hccl', + inputs={'X': comm_id_var}, + outputs={}, + attrs={ + 'rank': rank, + 'ring_id': ring_id, + 'device_id': + int(os.getenv("FLAGS_selected_npus")), + 'rank_ids': nranks, + OP_ROLE_KEY: OpRole.Forward + }) else: raise ValueError( "comm_id must be generated in paddlepaddle-xpu or paddlepaddle-xpu." @@ -217,20 +209,20 @@ class CollectiveHelper(object): continue ring_id = (ring_id + 1) % self.nrings - block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - 'root': 0, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + 'root': 0, + OP_ROLE_KEY: OpRole.Forward + }) for ring_id in range(self.nrings): - block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward}) + block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index b035f179317..d25cf968023 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -19,6 +19,7 @@ __all__ = [] class DGCOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(DGCOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -29,8 +30,9 @@ class DGCOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(DGCOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(DGCOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _init_dgc_opt(self): if self.dgc_opt is not None: @@ -102,8 +104,9 @@ class DGCOptimizer(MetaOptimizerBase): def apply_optimize(self, loss, startup_program, params_grads): self._init_dgc_opt() - return self.dgc_opt.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + return self.dgc_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py index d487f35324d..8a6ec33b39b 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py @@ -35,7 +35,7 @@ class DygraphShardingOptimizer(object): """ - # TODO (JZ-LIANG) + # TODO (JZ-LIANG) # TO support following featrues in future: # 1. fused update parameter sync # 2. parameters_groups @@ -124,7 +124,7 @@ class DygraphShardingOptimizer(object): def _buid_inner_optimizer(self): # we rely on the inner opt to determine whether a parameter is stop_gradient or not: # create moment - # update related ops: clip, regular, opt + # update related ops: clip, regular, opt self._inner_optimizer = self._inner_optimizer_class( parameters=self._rank2params[self._sharding_rank], **self._inner_optimizer_kargs) @@ -142,8 +142,8 @@ class DygraphShardingOptimizer(object): for param in params: paddle.distributed.broadcast( param, - # the collective API need src rank to be the global rank id - # instead of the relative logic rank id within group + # the collective API need src rank to be the global rank id + # instead of the relative logic rank id within group src=self._hcg.get_sharding_parallel_group().ranks[rank], group=self._hcg.get_sharding_parallel_group(), use_calc_stream=True) @@ -160,13 +160,13 @@ class DygraphShardingOptimizer(object): parameters=None, no_grad_set=None): - # NOTE in dygraph mode, the only different between step and minimize is that minimize + # NOTE in dygraph mode, the only different between step and minimize is that minimize # allow user to customize the parameters for updating on each step input_param_names = set([param.name for param in parameters]) parameters = list( - filter(lambda x: x.name in input_param_names, self._rank2params[ - self._sharding_rank])) + filter(lambda x: x.name in input_param_names, + self._rank2params[self._sharding_rank])) result = self._inner_optimizer.minimize(loss, startup_program, parameters, no_grad_set) diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index 7d930c5a69c..641bc25e5c5 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -28,6 +28,7 @@ __all__ = [] class HybridParallelGradScaler: + def __init__(self, scaler, hcg): self._scaler = scaler self._hcg = hcg @@ -70,11 +71,12 @@ class HybridParallelGradScaler: # allreduce_max found_inf in check_group if not self._use_dp_mode: self._found_inf = paddle.cast(self._found_inf, dtype="int32") - # TODO(shenliang03) Since the minimize call in the optimizer is - # after the gradscaler, check_finite needs to synchronize global + # TODO(shenliang03) Since the minimize call in the optimizer is + # after the gradscaler, check_finite needs to synchronize global # information. In the future, we should use check_group - paddle.distributed.all_reduce( - self._found_inf, op=paddle.distributed.ReduceOp.MAX, group=None) + paddle.distributed.all_reduce(self._found_inf, + op=paddle.distributed.ReduceOp.MAX, + group=None) self._found_inf = paddle.cast(self._found_inf, dtype="bool") def __getattr__(self, item): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 50bf8a2f9c7..14daba5ee33 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -43,6 +43,7 @@ def _obtain_optimizer_parameters_list(optimizer): class HybridParallelClipGrad: + def __init__(self, clip, hcg): self._clip = clip self._hcg = hcg @@ -67,8 +68,8 @@ class HybridParallelClipGrad: sum_square = layers.reduce_sum(square) not_shared_enable = (not hasattr(p, 'is_firstly_shared')) or ( - hasattr(p, 'is_firstly_shared') and - getattr(p, 'is_firstly_shared', True)) + hasattr(p, 'is_firstly_shared') + and getattr(p, 'is_firstly_shared', True)) if not_shared_enable: if p.is_distributed: @@ -88,19 +89,19 @@ class HybridParallelClipGrad: else: global_norm_dist_fp16 = layers.concat(sum_square_dist_fp16) global_norm_dist_fp16 = layers.reduce_sum(global_norm_dist_fp16) - global_norm_dist_fp16 = paddle.cast( - global_norm_dist_fp16, dtype=paddle.float32) + global_norm_dist_fp16 = paddle.cast(global_norm_dist_fp16, + dtype=paddle.float32) # global norm of non-distributed FP16 params_and_grads if len(sum_square_not_dist_fp16) == 0: - global_norm_not_dist_fp16 = paddle.to_tensor( - [0.], dtype=paddle.float32) + global_norm_not_dist_fp16 = paddle.to_tensor([0.], + dtype=paddle.float32) else: global_norm_not_dist_fp16 = layers.concat(sum_square_not_dist_fp16) global_norm_not_dist_fp16 = layers.reduce_sum( global_norm_not_dist_fp16) - global_norm_not_dist_fp16 = paddle.cast( - global_norm_not_dist_fp16, dtype=paddle.float32) + global_norm_not_dist_fp16 = paddle.cast(global_norm_not_dist_fp16, + dtype=paddle.float32) # global norm of distributed FP32 params_and_grads global_norm_dist_fp32 = layers.concat(sum_square_dist_fp32) if len( @@ -110,9 +111,9 @@ class HybridParallelClipGrad: # global norm of non-distributed FP32 params_and_grads global_norm_not_dist_fp32 = layers.concat( - sum_square_not_dist_fp32) if len( - sum_square_not_dist_fp32) != 0 else paddle.to_tensor( - [0.], dtype=paddle.float32) + sum_square_not_dist_fp32 + ) if len(sum_square_not_dist_fp32) != 0 else paddle.to_tensor( + [0.], dtype=paddle.float32) global_norm_not_dist_fp32 = layers.reduce_sum(global_norm_not_dist_fp32) global_norm_var_dist = global_norm_dist_fp16 + global_norm_dist_fp32 @@ -140,12 +141,13 @@ class HybridParallelClipGrad: global_norm_var_fp32 = layers.sqrt(global_norm_var_dist + global_norm_var_not_dist) - max_global_norm = layers.fill_constant( - shape=[1], dtype=global_norm_var_fp32.dtype, value=self.clip_norm) - clip_var = layers.elementwise_div( - x=max_global_norm, - y=layers.elementwise_max( - x=global_norm_var_fp32, y=max_global_norm)) + max_global_norm = layers.fill_constant(shape=[1], + dtype=global_norm_var_fp32.dtype, + value=self.clip_norm) + clip_var = layers.elementwise_div(x=max_global_norm, + y=layers.elementwise_max( + x=global_norm_var_fp32, + y=max_global_norm)) clip_var_fp16 = paddle.cast(clip_var, paddle.float16) for p, g in params_grads: if g is None: @@ -179,12 +181,12 @@ class HybridParallelOptimizer: self._need_dp = (self._hcg.get_data_parallel_world_size() > 1) - # NOTE(shenliang03): Because of the pure DataParallel mode, the gradient synchronization - # is achieved through reducer, so there is no need to call fuse_allreduce in optimizer. + # NOTE(shenliang03): Because of the pure DataParallel mode, the gradient synchronization + # is achieved through reducer, so there is no need to call fuse_allreduce in optimizer. self._dp_enable = not self._use_dp_mode and self._need_dp - self._sharding_enable = ( - self._hcg.get_sharding_parallel_world_size() > 1) + self._sharding_enable = (self._hcg.get_sharding_parallel_world_size() > + 1) if isinstance(self._inner_opt._grad_clip, ClipGradByGlobalNorm) and not self._use_dp_mode: @@ -224,7 +226,7 @@ class HybridParallelOptimizer: parameters=None, no_grad_set=None): - # minimize does not support parameters in the form of param_group, + # minimize does not support parameters in the form of param_group, # so no need use _obtain_optimizer_parameters_list parameter_list = parameters if parameters \ else self._inner_opt._parameter_list diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py index fb43b89e1a6..3359e63b1de 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py @@ -57,7 +57,7 @@ class ShardingOptimizerStage2(Optimizer): """ - # TODO (Baibaifan) + # TODO (Baibaifan) # Feature Notes: # 1. Unified memory for parameters and parameters.grad to InternalStorage. # 2. Support the segmentation of optimizer parameters and partial updating of parameters. @@ -97,8 +97,8 @@ class ShardingOptimizerStage2(Optimizer): filter(lambda x: x.trainable and x.dtype == Type.fp16.value, self._local_params))) > 0 - self.group = new_group(_get_global_group() - .ranks) if group is None else group + self.group = new_group( + _get_global_group().ranks) if group is None else group self.world_size = self.group.nranks self.rank = self.group.rank @@ -122,8 +122,8 @@ class ShardingOptimizerStage2(Optimizer): for item in self._optim._param_groups: if "grad_clip" in item.keys(): item["grad_clip"] = ShardingClipGrad( - self._optim._grad_clip, - paddle.get_device(), self.group) + self._optim._grad_clip, paddle.get_device(), + self.group) if offload: assert self._pfp16, "Only support offload strategy while using \'Adam\', \'AdamW\' and \'Momentum\' optimizer with AMP/Pure FP16" @@ -147,11 +147,10 @@ class ShardingOptimizerStage2(Optimizer): """ for p in self._local_params: - broadcast( - p, - src=self._global_root_rank, - group=self.group, - use_calc_stream=True) + broadcast(p, + src=self._global_root_rank, + group=self.group, + use_calc_stream=True) # Multi stream operation will be supported later wait(tensor=p, group=self.group, use_calc_stream=True) @@ -224,8 +223,9 @@ class ShardingOptimizerStage2(Optimizer): # Assign the parameters of each rank according to the type for param in self._local_params: if param.dtype not in self._dtype_rank_params.keys(): - self._dtype_rank_params[ - param.dtype] = [[] for _ in range(self.world_size)] + self._dtype_rank_params[param.dtype] = [ + [] for _ in range(self.world_size) + ] self._dtype_rank_params[param.dtype][self.param2rank[ param.name]].append(param) @@ -379,8 +379,9 @@ class ShardingOptimizerStage2(Optimizer): dev_id = int(paddle.get_device().split(":")[1]) for param in self._local_params: if param.name in self._master_params.keys(): - param.set_value(self._master_params[param.name].cuda(dev_id) - .cast(dtype=param.dtype)) + param.set_value( + self._master_params[param.name].cuda(dev_id).cast( + dtype=param.dtype)) else: self._optim.step() @@ -411,14 +412,12 @@ class ShardingOptimizerStage2(Optimizer): # Exchange all the shards with the other ranks for dtype_per_rank in self.param_storages.values(): for dst_rank, internal_storage in dtype_per_rank.items(): - broadcast( - tensor=internal_storage.buffer, - src=self.group.ranks[dst_rank], - group=self.group, - use_calc_stream=True) + broadcast(tensor=internal_storage.buffer, + src=self.group.ranks[dst_rank], + group=self.group, + use_calc_stream=True) # Multi stream operation will be supported later - wait( - tensor=internal_storage.buffer, - group=self.group, - use_calc_stream=True) + wait(tensor=internal_storage.buffer, + group=self.group, + use_calc_stream=True) diff --git a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py index f636a313757..93857461b26 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py @@ -18,6 +18,7 @@ __all__ = [] class FP16AllReduceOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(FP16AllReduceOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -35,8 +36,9 @@ class FP16AllReduceOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(FP16AllReduceOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(FP16AllReduceOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _can_apply(self): if not self.role_maker._is_collective: @@ -82,22 +84,23 @@ class FP16AllReduceOptimizer(MetaOptimizerBase): else: op._remove_attr(op_maker.kOpRoleVarAttrName()) - new_grad = block.create_var( - name=unique_name.generate(grad.name + ".cast_fp16"), - dtype=core.VarDesc.VarType.FP16, - persistable=False, - stop_gradient=True) + new_grad = block.create_var(name=unique_name.generate(grad.name + + ".cast_fp16"), + dtype=core.VarDesc.VarType.FP16, + persistable=False, + stop_gradient=True) with block.program._backward_role_guard(): - cast_op = block.append_op( - type="cast", - inputs={"X": grad}, - outputs={"Out": new_grad}, - attrs={ - "in_dtype": core.VarDesc.VarType.FP32, - "out_dtype": core.VarDesc.VarType.FP16 - }, - stop_gradient=True) + cast_op = block.append_op(type="cast", + inputs={"X": grad}, + outputs={"Out": new_grad}, + attrs={ + "in_dtype": + core.VarDesc.VarType.FP32, + "out_dtype": + core.VarDesc.VarType.FP16 + }, + stop_gradient=True) backward = op_maker.OpRole.Backward cast_op._set_attr(op_maker.kOpRoleAttrName(), backward) @@ -119,30 +122,30 @@ class FP16AllReduceOptimizer(MetaOptimizerBase): continue block = grad.block - new_grad = block.create_var( - name=unique_name.generate(grad.name + ".cast_fp32"), - dtype=core.VarDesc.VarType.FP32, - persistable=False, - stop_gradient=True) + new_grad = block.create_var(name=unique_name.generate(grad.name + + ".cast_fp32"), + dtype=core.VarDesc.VarType.FP32, + persistable=False, + stop_gradient=True) with block.program._optimized_guard( [param, grad]), framework.name_scope('fp16_allreduce'): - cast_op = block.append_op( - type="cast", - inputs={"X": grad}, - outputs={"Out": new_grad}, - attrs={ - "in_dtype": core.VarDesc.VarType.FP16, - "out_dtype": core.VarDesc.VarType.FP32 - }, - stop_gradient=True) + cast_op = block.append_op(type="cast", + inputs={"X": grad}, + outputs={"Out": new_grad}, + attrs={ + "in_dtype": + core.VarDesc.VarType.FP16, + "out_dtype": + core.VarDesc.VarType.FP32 + }, + stop_gradient=True) ret_param_and_grads.append((param, new_grad)) return ret_param_and_grads def apply_optimize(self, loss, startup_program, params_grads): new_params_grads = self.fp16_compression(params_grads) - return self.inner_opt.apply_optimize( - loss, - startup_program=startup_program, - params_grads=new_params_grads) + return self.inner_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=new_params_grads) diff --git a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py index 949ef3e5f3a..10175f8936a 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py @@ -18,6 +18,7 @@ __all__ = [] class GradientMergeOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(GradientMergeOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -33,8 +34,9 @@ class GradientMergeOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(GradientMergeOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(GradientMergeOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _init_wrapped_opt(self): config = self.user_defined_strategy.gradient_merge_configs diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 0fd7db56de5..8f42553048f 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -24,6 +24,7 @@ __all__ = [] class GraphExecutionOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(GraphExecutionOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -94,9 +95,12 @@ class GraphExecutionOptimizer(MetaOptimizerBase): inputs={}, outputs={"NCCLID": comm_id_var}, attrs={ - "trainers": trainer_endpoints, - "trainer_id": trainer_id, - "nccl_comm_num": build_strategy.nccl_comm_num, + "trainers": + trainer_endpoints, + "trainer_id": + trainer_id, + "nccl_comm_num": + build_strategy.nccl_comm_num, "use_hierarchical_allreduce": build_strategy.use_hierarchical_allreduce, "hierarchical_allreduce_inter_ranks": @@ -120,9 +124,12 @@ class GraphExecutionOptimizer(MetaOptimizerBase): inputs={}, outputs={"BKCLID": comm_id_var}, attrs={ - "trainers": trainer_endpoints, - "trainer_id": trainer_id, - "nccl_comm_num": build_strategy.nccl_comm_num, + "trainers": + trainer_endpoints, + "trainer_id": + trainer_id, + "nccl_comm_num": + build_strategy.nccl_comm_num, "use_hierarchical_allreduce": build_strategy.use_hierarchical_allreduce, "hierarchical_allreduce_inter_ranks": diff --git a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py index 6d2474d9352..3dc5bed03ae 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py @@ -20,6 +20,7 @@ __all__ = [] class LambOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(LambOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -30,8 +31,9 @@ class LambOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(LambOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(LambOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) opt = self.inner_opt if not isinstance(opt, AdamOptimizer): @@ -70,8 +72,8 @@ class LambOptimizer(MetaOptimizerBase): if self.user_defined_strategy.lamb: if not isinstance(self.inner_opt, AdamOptimizer): logging.warn( - "lamb need the inner optimizer to be AdamOptimizer optimizer but got {}.". - format(self.inner_opt.type)) + "lamb need the inner optimizer to be AdamOptimizer optimizer but got {}." + .format(self.inner_opt.type)) return False return True return False @@ -101,8 +103,9 @@ class LambOptimizer(MetaOptimizerBase): return self.lamb_opt.apply_gradients(params_grads=params_grads) def apply_optimize(self, loss, startup_program, params_grads): - return self.lamb_opt.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + return self.lamb_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py index e1bf3722c19..44f8fe473e2 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py @@ -19,6 +19,7 @@ __all__ = [] class LarsOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(LarsOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -29,8 +30,9 @@ class LarsOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(LarsOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(LarsOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) opt = self.inner_opt if not isinstance(opt, Momentum): @@ -57,8 +59,8 @@ class LarsOptimizer(MetaOptimizerBase): if self.user_defined_strategy.lars: if not isinstance(self.inner_opt, Momentum): logging.warn( - "lars need the inner optimizer to be Momentum optimizer but got {}.". - format(self.inner_opt.type)) + "lars need the inner optimizer to be Momentum optimizer but got {}." + .format(self.inner_opt.type)) return False return True return False @@ -88,8 +90,9 @@ class LarsOptimizer(MetaOptimizerBase): return self.lars_opt.apply_gradients(params_grads=params_grads) def apply_optimize(self, loss, startup_program, params_grads): - return self.lars_opt.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + return self.lars_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 9052111d22c..eb170dedb0b 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -24,6 +24,7 @@ __all__ = [] class LocalSGDOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(LocalSGDOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -70,12 +71,11 @@ class LocalSGDOptimizer(MetaOptimizerBase): p2s = [] for param in non_dist_params: - snapshot = block.create_var( - name=self.snapshot_name(param.name), - shape=param.shape, - persistable=True, - stop_gradient=True, - dtype=param.dtype) + snapshot = block.create_var(name=self.snapshot_name(param.name), + shape=param.shape, + persistable=True, + stop_gradient=True, + dtype=param.dtype) p2s.append([param, snapshot]) return p2s @@ -89,8 +89,8 @@ class LocalSGDOptimizer(MetaOptimizerBase): startup_program=None, parameter_list=None, no_grad_set=None): - minimized = self.inner_opt.minimize( - loss, startup_program=startup_program) + minimized = self.inner_opt.minimize(loss, + startup_program=startup_program) k_steps_value = self.user_defined_strategy.localsgd_configs['k_steps'] begin_step_value = self.user_defined_strategy.localsgd_configs[ @@ -109,82 +109,78 @@ class LocalSGDOptimizer(MetaOptimizerBase): p2s = self.create_snapshot_vars(main_block.program) with program_guard(main_block.program, startup_program): step = layers.autoincreased_step_counter(begin=1) - k_steps = layers.create_global_var( - name="k_steps", - shape=[1], - value=k_steps_value, - dtype='int64', - persistable=True) - - begin_step = layers.create_global_var( - name="begin_step", - shape=[1], - value=begin_step_value, - dtype='int64', - persistable=True) - - last_step = layers.create_global_var( - name="last_step", - shape=[1], - value=begin_step_value, - dtype='int64', - persistable=True) + k_steps = layers.create_global_var(name="k_steps", + shape=[1], + value=k_steps_value, + dtype='int64', + persistable=True) + + begin_step = layers.create_global_var(name="begin_step", + shape=[1], + value=begin_step_value, + dtype='int64', + persistable=True) + + last_step = layers.create_global_var(name="last_step", + shape=[1], + value=begin_step_value, + dtype='int64', + persistable=True) def communicate(): sub_block = default_main_program().current_block() ring_id = -1 for param, snapshot in p2s: - sub_block.append_op( - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - sub_block.append_op( - type='c_sync_calc_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='elementwise_sub', + inputs={ + 'X': [snapshot], + 'Y': [param] + }, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='c_sync_calc_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) ring_id = (ring_id + 1) % self.nrings - sub_block.append_op( - type='c_allreduce_sum', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Optimize - }) + sub_block.append_op(type='c_allreduce_sum', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) for ring_id in range(self.nrings): - sub_block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Optimize - }) + sub_block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) for param, snapshot in p2s: - sub_block.append_op( - type='scale', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'scale': 1.0 / self.role_maker._worker_num(), - OP_ROLE_KEY: OpRole.Optimize - }) - sub_block.append_op( - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - sub_block.append_op( - type='assign', - inputs={'X': [param]}, - outputs={'Out': [snapshot]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='scale', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'scale': + 1.0 / self.role_maker._worker_num(), + OP_ROLE_KEY: + OpRole.Optimize + }) + sub_block.append_op(type='elementwise_sub', + inputs={ + 'X': [snapshot], + 'Y': [param] + }, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='assign', + inputs={'X': [param]}, + outputs={'Out': [snapshot]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) layers.assign(step, last_step) def begin_localsgd(): @@ -195,6 +191,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(AdaptiveLocalSGDOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -243,12 +240,11 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): p2s = [] for param in non_dist_params: - snapshot = block.create_var( - name=self.snapshot_name(param.name), - shape=param.shape, - persistable=True, - stop_gradient=True, - dtype=param.dtype) + snapshot = block.create_var(name=self.snapshot_name(param.name), + shape=param.shape, + persistable=True, + stop_gradient=True, + dtype=param.dtype) p2s.append([param, snapshot]) return p2s @@ -258,37 +254,35 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): layers.assign(param, snapshot) def _generate_avg_loss(self, program_block, loss, avg_loss): - program_block.append_op( - type='c_allreduce_sum', - inputs={'X': [loss]}, - outputs={'Out': [avg_loss]}, - attrs={ - 'ring_id': 0, - OP_ROLE_KEY: OpRole.Optimize, - 'use_calc_stream': True - }) - program_block.append_op( - type='c_sync_calc_stream', - inputs={'X': [avg_loss]}, - outputs={'Out': [avg_loss]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - - program_block.append_op( - type='scale', - inputs={'X': [avg_loss]}, - outputs={'Out': [avg_loss]}, - attrs={ - 'scale': 1.0 / self.role_maker._worker_num(), - OP_ROLE_KEY: OpRole.Optimize - }) + program_block.append_op(type='c_allreduce_sum', + inputs={'X': [loss]}, + outputs={'Out': [avg_loss]}, + attrs={ + 'ring_id': 0, + OP_ROLE_KEY: OpRole.Optimize, + 'use_calc_stream': True + }) + program_block.append_op(type='c_sync_calc_stream', + inputs={'X': [avg_loss]}, + outputs={'Out': [avg_loss]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + + program_block.append_op(type='scale', + inputs={'X': [avg_loss]}, + outputs={'Out': [avg_loss]}, + attrs={ + 'scale': + 1.0 / self.role_maker._worker_num(), + OP_ROLE_KEY: OpRole.Optimize + }) def minimize_impl(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): - minimized = self.inner_opt.minimize( - loss, startup_program=startup_program) + minimized = self.inner_opt.minimize(loss, + startup_program=startup_program) init_k_steps = self.user_defined_strategy.adaptive_localsgd_configs[ 'init_k_steps'] @@ -309,47 +303,41 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): with program_guard(main_block.program, startup_program): step = layers.autoincreased_step_counter(begin=1) - k_steps = layers.create_global_var( - name="k_steps", - shape=[1], - value=int(init_k_steps), - dtype='int64', - persistable=True) - - begin_step = layers.create_global_var( - name="begin_step", - shape=[1], - value=int(begin_step_value), - dtype='int64', - persistable=True) - - last_step = layers.create_global_var( - name="last_step", - shape=[1], - value=int(0), - dtype='int64', - persistable=True) - - avg_loss = layers.create_global_var( - name="avg_loss", - shape=[1], - value=float(0), - dtype=loss.dtype, - persistable=True) - - lr_0 = layers.create_global_var( - name="lr_0", - shape=[1], - value=float(0), - dtype='float32', - persistable=True) - - loss_0 = layers.create_global_var( - name="loss_0", - shape=[1], - value=float(0), - dtype='float32', - persistable=True) + k_steps = layers.create_global_var(name="k_steps", + shape=[1], + value=int(init_k_steps), + dtype='int64', + persistable=True) + + begin_step = layers.create_global_var(name="begin_step", + shape=[1], + value=int(begin_step_value), + dtype='int64', + persistable=True) + + last_step = layers.create_global_var(name="last_step", + shape=[1], + value=int(0), + dtype='int64', + persistable=True) + + avg_loss = layers.create_global_var(name="avg_loss", + shape=[1], + value=float(0), + dtype=loss.dtype, + persistable=True) + + lr_0 = layers.create_global_var(name="lr_0", + shape=[1], + value=float(0), + dtype='float32', + persistable=True) + + loss_0 = layers.create_global_var(name="loss_0", + shape=[1], + value=float(0), + dtype='float32', + persistable=True) global_lr = self.inner_opt._global_learning_rate() @@ -364,75 +352,75 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): sub_block = default_main_program().current_block() ring_id = -1 for param, snapshot in p2s: - sub_block.append_op( - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - sub_block.append_op( - type='c_sync_calc_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='elementwise_sub', + inputs={ + 'X': [snapshot], + 'Y': [param] + }, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='c_sync_calc_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) ring_id = (ring_id + 1) % self.nrings - sub_block.append_op( - type='c_allreduce_sum', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Optimize - }) + sub_block.append_op(type='c_allreduce_sum', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) for ring_id in range(self.nrings): - sub_block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Optimize - }) + sub_block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) for param, snapshot in p2s: - sub_block.append_op( - type='scale', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'scale': 1.0 / self.role_maker._worker_num(), - OP_ROLE_KEY: OpRole.Optimize - }) - sub_block.append_op( - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - sub_block.append_op( - type='assign', - inputs={'X': [param]}, - outputs={'Out': [snapshot]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='scale', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'scale': + 1.0 / self.role_maker._worker_num(), + OP_ROLE_KEY: + OpRole.Optimize + }) + sub_block.append_op(type='elementwise_sub', + inputs={ + 'X': [snapshot], + 'Y': [param] + }, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op(type='assign', + inputs={'X': [param]}, + outputs={'Out': [snapshot]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) layers.assign(step, last_step) def communicate_avg_loss(): communicate() self._generate_avg_loss(main_block, loss, avg_loss) - next_local_steps = layers.cast( - layers.ceil( - layers.sqrt(lr_0 * avg_loss / (global_lr * loss_0) * - float(init_k_steps))), - dtype='int64') - max_local_steps = layers.fill_constant( - shape=[1], dtype='int64', value=16) - min_local_steps = layers.fill_constant( - shape=[1], dtype='int64', value=1) - next_local_steps = layers.elementwise_min(next_local_steps, - max_local_steps) - next_local_steps = layers.elementwise_max(next_local_steps, - min_local_steps) + next_local_steps = layers.cast(layers.ceil( + layers.sqrt(lr_0 * avg_loss / (global_lr * loss_0) * + float(init_k_steps))), + dtype='int64') + max_local_steps = layers.fill_constant(shape=[1], + dtype='int64', + value=16) + min_local_steps = layers.fill_constant(shape=[1], + dtype='int64', + value=1) + next_local_steps = layers.elementwise_min( + next_local_steps, max_local_steps) + next_local_steps = layers.elementwise_max( + next_local_steps, min_local_steps) layers.assign(next_local_steps, k_steps) def begin_localsgd(): diff --git a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py index 3bbaa055c5e..35e11221b6f 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py +++ b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py @@ -18,6 +18,7 @@ __all__ = [] class MetaOptimizerBase(Optimizer): + def __init__(self, optimizer): self.inner_opt = optimizer self._learning_rate = self.inner_opt._learning_rate @@ -47,12 +48,14 @@ class MetaOptimizerBase(Optimizer): return False def _disable_strategy(self, dist_strategy): - raise NotImplementedError("you should implement disable strategy in {}". - format(type(self).__name__)) + raise NotImplementedError( + "you should implement disable strategy in {}".format( + type(self).__name__)) def _enable_strategy(self, dist_strategy, context=None): - raise NotImplementedError("you should implement enable strategy in {}". - format(type(self).__name__)) + raise NotImplementedError( + "you should implement enable strategy in {}".format( + type(self).__name__)) def apply_gradients(self, params_grads): return self.inner_opt.apply_gradients(params_grads=params_grads) @@ -67,22 +70,23 @@ class MetaOptimizerBase(Optimizer): no_grad_set, callbacks) def apply_optimize(self, loss, startup_program, params_grads): - return self.inner_opt.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + return self.inner_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize_impl(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): - params_grads = self.backward( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) - optimize_ops = self.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + optimize_ops = self.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) return optimize_ops, params_grads @@ -91,6 +95,7 @@ class MetaOptimizerBase(Optimizer): startup_program=None, parameter_list=None, no_grad_set=None): - optimize_ops, params_grads = self.minimize_impl( - loss, startup_program, parameter_list, no_grad_set) + optimize_ops, params_grads = self.minimize_impl(loss, startup_program, + parameter_list, + no_grad_set) return optimize_ops, params_grads diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index ba2a0e84c7a..41a5da0d315 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -19,6 +19,7 @@ __all__ = [] class ParameterServerGraphOptimizer(ParameterServerOptimizer): + def __init__(self, optimizer): super(ParameterServerGraphOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -59,11 +60,10 @@ class ParameterServerGraphOptimizer(ParameterServerOptimizer): self._compiled_program = compiler.CompiledProgram(main_program) - self._compiled_program.with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy, - share_vars_from=None) + self._compiled_program.with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + exec_strategy=exec_strategy, + share_vars_from=None) return self._compiled_program diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index aec24365223..c04215d4565 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -24,6 +24,7 @@ __all__ = [] class ParameterServerOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(ParameterServerOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -32,8 +33,9 @@ class ParameterServerOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(ParameterServerOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(ParameterServerOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) #self.micro_batch_size = user_defined_strategy.pipeline_configs[ # 'micro_batch_size'] @@ -107,8 +109,8 @@ class ParameterServerOptimizer(MetaOptimizerBase): if not use_ps_gpu: _main = worker.delete_optimizer_pass(_main, compiled_config) _main = worker.append_send_ops_pass(_main, compiled_config) - _startup = worker.delete_extra_optimizes_pass(_startup, - compiled_config) + _startup = worker.delete_extra_optimizes_pass( + _startup, compiled_config) # for startup program _startup = worker.fake_init_ops_pass(_startup, compiled_config) @@ -117,13 +119,12 @@ class ParameterServerOptimizer(MetaOptimizerBase): from paddle.fluid.transpiler.collective import SingleProcessMultiThread t = SingleProcessMultiThread() env = self.get_dist_env() - t.transpile( - startup_program=_startup, - main_program=_main, - rank=env["trainer_id"], - endpoints=env["trainer_endpoints"], - current_endpoint=env['current_endpoint'], - wait_port=False) + t.transpile(startup_program=_startup, + main_program=_main, + rank=env["trainer_id"], + endpoints=env["trainer_endpoints"], + current_endpoint=env['current_endpoint'], + wait_port=False) compiled_config.set_origin_ps_main_program(_main) compiled_config.set_origin_ps_startup_program(_startup) @@ -138,8 +139,8 @@ class ParameterServerOptimizer(MetaOptimizerBase): _main, compiled_config, stage_id, device) else: # for default worker - _main = heter_worker.split_trainer_ops_pass(_main, - compiled_config) + _main = heter_worker.split_trainer_ops_pass( + _main, compiled_config) else: _main = worker.append_send_ops_pass(_main, compiled_config) _startup = _startup @@ -202,28 +203,29 @@ class ParameterServerOptimizer(MetaOptimizerBase): compiled_config, True) if not compiled_config.is_sync_mode(): - _main = server.delete_unused_in_main_pass(_main, - compiled_config) + _main = server.delete_unused_in_main_pass( + _main, compiled_config) - _startup = server.delete_unused_in_startup_pass(_startup, _main, - compiled_config) + _startup = server.delete_unused_in_startup_pass( + _startup, _main, compiled_config) else: _main = server.add_listen_and_serv_pass(_main, compiled_config) _main = server.add_rpc_global_flags_pass(_main, compiled_config) _main = server.add_geo_optimizer_pass(_main, compiled_config) _startup = server.build_pserver_startup_program_pass( _startup, _main, compiled_config) - _startup = server.delete_unused_in_startup_pass(_startup, _main, - compiled_config) + _startup = server.delete_unused_in_startup_pass( + _startup, _main, compiled_config) return _main, _startup def _can_apply_geo(self, dist_strategy, program): + def get_sys_free_mem(): plat = platform.system() if platform.system() == "Darwin": - vm = subprocess.Popen( - ['vm_stat'], stdout=subprocess.PIPE).communicate()[0] + vm = subprocess.Popen(['vm_stat'], + stdout=subprocess.PIPE).communicate()[0] # Process vm_stat vmLines = vm.split('\n') sep = re.compile(r':[\s]+') @@ -231,8 +233,8 @@ class ParameterServerOptimizer(MetaOptimizerBase): for row in range(1, len(vmLines) - 2): rowText = vmLines[row].strip() rowElements = sep.split(rowText) - vmStats[(rowElements[0] - )] = int(rowElements[1].strip(r'\.')) * 4096 + vmStats[(rowElements[0])] = int( + rowElements[1].strip(r'\.')) * 4096 return vmStats["Pages free"] elif platform.system() == "Linux": mems = {} diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 2988865887a..d3f461850b8 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -26,6 +26,7 @@ __all__ = [] class PipelineOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(PipelineOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -33,15 +34,18 @@ class PipelineOptimizer(MetaOptimizerBase): "RecomputeOptimizer", "AMPOptimizer", ] - self.meta_optimizers_black_list = ["GraphExecutionOptimizer", ] + self.meta_optimizers_black_list = [ + "GraphExecutionOptimizer", + ] self.global_ring_id = 1 self.dp_ring_id = 2 self.start_pipeline_ring_id = 20 # Just a magic number def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(PipelineOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(PipelineOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) self.micro_batch_size = user_defined_strategy.pipeline_configs[ 'micro_batch_size'] self.num_microbatches = user_defined_strategy.pipeline_configs[ @@ -85,23 +89,23 @@ class PipelineOptimizer(MetaOptimizerBase): if param.is_distributed: continue - block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - 'root': 0, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + 'root': 0, + OP_ROLE_KEY: OpRole.Forward + }) if not param: return # no parameter on this device - block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward}) + block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) def _get_process_group_info(self): # global ring info @@ -123,10 +127,12 @@ class PipelineOptimizer(MetaOptimizerBase): self._get_process_group_info() collective_helper = CollectiveHelper(self.role_maker, wait_port=False) # Create global ring for all gpus (ring_id = 0) - collective_helper._init_communicator( - self.startup_program, self.current_endpoint, self.global_endpoints, - self.global_rank, self.global_ring_id, True, self.global_ring_id, - True) + collective_helper._init_communicator(self.startup_program, + self.current_endpoint, + self.global_endpoints, + self.global_rank, + self.global_ring_id, True, + self.global_ring_id, True) # Create pipeline rings if self.inner_parallelism > 1: pipeline_id = self.rank // self.inner_parallelism @@ -147,10 +153,12 @@ class PipelineOptimizer(MetaOptimizerBase): ] pipeline_rank = 0 if self.rank == first_node else 1 pipeline_nranks = 2 - collective_helper._init_communicator( - self.startup_program, self.current_endpoint, - pipeline_endpoints, pipeline_rank, ring_id, False, - self.global_ring_id, True) + collective_helper._init_communicator(self.startup_program, + self.current_endpoint, + pipeline_endpoints, + pipeline_rank, ring_id, + False, self.global_ring_id, + True) # Create dp rings if self.pipeline_num > 1: @@ -215,15 +223,14 @@ class PipelineOptimizer(MetaOptimizerBase): for idx, op in reversed(list(enumerate(block.ops))): if is_loss_grad_op(op): loss_grad_var = block.vars[op.output_arg_names[0]] - block._insert_op( - idx + 1, - type='scale', - inputs={'X': loss_grad_var}, - outputs={'Out': loss_grad_var}, - attrs={ - 'scale': 1.0 / pipeline_num, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op(idx + 1, + type='scale', + inputs={'X': loss_grad_var}, + outputs={'Out': loss_grad_var}, + attrs={ + 'scale': 1.0 / pipeline_num, + OP_ROLE_KEY: OpRole.Backward + }) def _insert_allreduce_ops(self, ring_id): block = self.main_program._pipeline_opt['section_program'].global_block( @@ -256,13 +263,12 @@ class PipelineOptimizer(MetaOptimizerBase): if origin_param.is_distributed: continue - block._insert_op( - first_optimize_op_idx + offset, - type='c_allreduce_sum', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Optimize - }) + block._insert_op(first_optimize_op_idx + offset, + type='c_allreduce_sum', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Optimize + }) diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py index d223ff032d4..cd6bc03a5d5 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py @@ -26,6 +26,7 @@ from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory class ParameterServerOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(ParameterServerOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -34,8 +35,9 @@ class ParameterServerOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(ParameterServerOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(ParameterServerOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _set_origin_programs(self, losses): self.origin_main_programs = [] @@ -69,8 +71,8 @@ class ParameterServerOptimizer(MetaOptimizerBase): attrs['is_worker'] = self.role_maker._is_worker() attrs['is_server'] = self.role_maker._is_server() attrs['is_heter_worker'] = self.role_maker._is_heter_worker() - logger.info("this process is heter? {}".format(attrs[ - 'is_heter_worker'])) + logger.info("this process is heter? {}".format( + attrs['is_heter_worker'])) attrs['use_ps_gpu'] = self.user_defined_strategy.a_sync_configs[ "use_ps_gpu"] attrs['lr_decay_steps'] = self.user_defined_strategy.a_sync_configs[ @@ -85,7 +87,7 @@ class ParameterServerOptimizer(MetaOptimizerBase): build_var_distributed(attrs) - # server + # server attrs['_main_server'] = fluid.Program() attrs['_startup_server'] = fluid.Program() attrs['tensor_table'] = {} @@ -112,6 +114,7 @@ class ParameterServerOptimizer(MetaOptimizerBase): if startup_program == None: startup_program = paddle.static.default_startup_program() + # print("program after inner optimizer minimize:", # str(loss.block.program)) self._set_origin_programs([loss]) @@ -144,11 +147,12 @@ class ParameterServerOptimizer(MetaOptimizerBase): return None, None def _can_apply_geo(self, program): + def get_sys_free_mem(): plat = platform.system() if platform.system() == "Darwin": - vm = subprocess.Popen( - ['vm_stat'], stdout=subprocess.PIPE).communicate()[0] + vm = subprocess.Popen(['vm_stat'], + stdout=subprocess.PIPE).communicate()[0] # Process vm_stat vmLines = vm.split('\n') sep = re.compile(r':[\s]+') @@ -156,8 +160,8 @@ class ParameterServerOptimizer(MetaOptimizerBase): for row in range(1, len(vmLines) - 2): rowText = vmLines[row].strip() rowElements = sep.split(rowText) - vmStats[(rowElements[0] - )] = int(rowElements[1].strip(r'\.')) * 4096 + vmStats[(rowElements[0])] = int( + rowElements[1].strip(r'\.')) * 4096 return vmStats["Pages free"] elif platform.system() == "Linux": mems = {} diff --git a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py index d056d4e1065..2c7b1e45ebd 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py @@ -26,6 +26,7 @@ from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_l class RawProgramOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(RawProgramOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -38,13 +39,16 @@ class RawProgramOptimizer(MetaOptimizerBase): "DGCOptimizer", "LocalSGDOptimizer", ] - self.meta_optimizers_black_list = ["GraphExecutionOptimizer", ] + self.meta_optimizers_black_list = [ + "GraphExecutionOptimizer", + ] self.global_ring_id = 0 def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(RawProgramOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(RawProgramOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) self.without_graph_optimization = user_defined_strategy.without_graph_optimization self.fuse_all_reduce_ops = user_defined_strategy.fuse_all_reduce_ops if self.fuse_all_reduce_ops: @@ -72,23 +76,23 @@ class RawProgramOptimizer(MetaOptimizerBase): if param.is_distributed: continue - block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - 'root': 0, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + 'root': 0, + OP_ROLE_KEY: OpRole.Forward + }) if not param: return # no parameter on this device - block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward}) + block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) def _get_process_group_info(self): # global ring info @@ -100,10 +104,12 @@ class RawProgramOptimizer(MetaOptimizerBase): self._get_process_group_info() collective_helper = CollectiveHelper(self.role_maker, wait_port=False) # Create global ring for all gpus (ring_id = 0) - collective_helper._init_communicator( - self.startup_program, self.current_endpoint, self.global_endpoints, - self.global_rank, self.global_ring_id, True, self.global_ring_id, - True) + collective_helper._init_communicator(self.startup_program, + self.current_endpoint, + self.global_endpoints, + self.global_rank, + self.global_ring_id, True, + self.global_ring_id, True) self._broadcast_params(self.global_ring_id) def minimize_impl(self, @@ -190,38 +196,35 @@ class RawProgramOptimizer(MetaOptimizerBase): if not grad_vars: return - gm_block._insert_op( - first_optimize_op_idx, - type="c_sync_calc_stream", - inputs={'X': grad_vars[0]}, - outputs={'Out': grad_vars[0]}, - attrs={OP_ROLE_KEY: OpRole.Backward}) + gm_block._insert_op(first_optimize_op_idx, + type="c_sync_calc_stream", + inputs={'X': grad_vars[0]}, + outputs={'Out': grad_vars[0]}, + attrs={OP_ROLE_KEY: OpRole.Backward}) insert_op_num = 1 ring_id = self.global_ring_id # NOTE: can perform fuse allreduce inside the loop in the future for i, (p, g) in enumerate(zip(param_vars, grad_vars)): - gm_block._insert_op( - first_optimize_op_idx + insert_op_num, - type="c_allreduce_sum", - inputs={'X': g}, - outputs={'Out': g}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward, - }) + gm_block._insert_op(first_optimize_op_idx + insert_op_num, + type="c_allreduce_sum", + inputs={'X': g}, + outputs={'Out': g}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward, + }) insert_op_num += 1 - gm_block._insert_op( - first_optimize_op_idx + insert_op_num, - type="c_sync_comm_stream", - inputs={'X': grad_vars}, - outputs={'Out': grad_vars}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward, - }) + gm_block._insert_op(first_optimize_op_idx + insert_op_num, + type="c_sync_comm_stream", + inputs={'X': grad_vars}, + outputs={'Out': grad_vars}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward, + }) def _transpile_main_program(self, loss): self._insert_loss_grad_ops(loss) @@ -245,15 +248,14 @@ class RawProgramOptimizer(MetaOptimizerBase): for idx, op in reversed(list(enumerate(block.ops))): if is_loss_grad_op(op): loss_grad_var = block.vars[op.output_arg_names[0]] - block._insert_op( - idx + 1, - type='scale', - inputs={'X': loss_grad_var}, - outputs={'Out': loss_grad_var}, - attrs={ - 'scale': 1.0 / self.nranks, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op(idx + 1, + type='scale', + inputs={'X': loss_grad_var}, + outputs={'Out': loss_grad_var}, + attrs={ + 'scale': 1.0 / self.nranks, + OP_ROLE_KEY: OpRole.Backward + }) def _insert_allreduce_ops(self): block = self.main_program.global_block() @@ -277,35 +279,36 @@ class RawProgramOptimizer(MetaOptimizerBase): continue grad_vars.append(grad) - block._insert_op( - idx + offset, - type='c_sync_calc_stream', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={OP_ROLE_KEY: OpRole.Backward, }) + block._insert_op(idx + offset, + type='c_sync_calc_stream', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + OP_ROLE_KEY: OpRole.Backward, + }) offset += 1 - block._insert_op( - idx + offset, - type='c_allreduce_sum', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op(idx + offset, + type='c_allreduce_sum', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward + }) if grad is None: return for idx, op in enumerate(block.ops): if is_optimizer_op(op): - block._insert_op( - idx, - type='c_sync_comm_stream', - inputs={'X': grad_vars}, - outputs={'Out': grad_vars}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward}) + block._insert_op(idx, + type='c_sync_comm_stream', + inputs={'X': grad_vars}, + outputs={'Out': grad_vars}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward + }) break # This function helps reduce the number of allreduce by integrating op, which can save communication time. @@ -342,8 +345,8 @@ class RawProgramOptimizer(MetaOptimizerBase): continue param_grads.append((param, grad)) - outputs_name_to_idx = self.__get_ouputs_name_to_idx(first_backward_idx, - block) + outputs_name_to_idx = self.__get_ouputs_name_to_idx( + first_backward_idx, block) # structure of grad_param_segments is # [([grad0, grad1], [param0, param1]), ([grad2, grad3], [param2, param3])] @@ -371,24 +374,23 @@ class RawProgramOptimizer(MetaOptimizerBase): # not to use reversed since needs the absolute index value grad_segment, param_segment = grad_param_segments[i] # insert coalesce tensor - fused_var = block.create_var( - name=unique_name.generate('FusedOutput_{}'.format(grad_segment[ - 0].name)), - dtype=grad_segment[0].dtype, - persistable=False, - stop_gradient=True) + fused_var = block.create_var(name=unique_name.generate( + 'FusedOutput_{}'.format(grad_segment[0].name)), + dtype=grad_segment[0].dtype, + persistable=False, + stop_gradient=True) fused_vars[i] = fused_var after_idx = outputs_name_to_idx[grad_segment[-1]][1] - block._insert_op_without_sync( - after_idx + 1, - type='c_allreduce_sum', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': self.calc_comm_same_stream, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op_without_sync(after_idx + 1, + type='c_allreduce_sum', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': + self.calc_comm_same_stream, + OP_ROLE_KEY: OpRole.Backward + }) if not self.calc_comm_same_stream: block._insert_op_without_sync( after_idx + 1, @@ -398,8 +400,8 @@ class RawProgramOptimizer(MetaOptimizerBase): attrs={OP_ROLE_KEY: OpRole.Backward}) # update the outputs_name_to_idx after insertion of sync/allreduce ops - outputs_name_to_idx = self.__get_ouputs_name_to_idx(first_backward_idx, - block) + outputs_name_to_idx = self.__get_ouputs_name_to_idx( + first_backward_idx, block) # the before_idx is not guaranteed sorted, therefore we have to find the # topology to insert the coalesce ops pos_for_coalesce = {} @@ -413,25 +415,25 @@ class RawProgramOptimizer(MetaOptimizerBase): pos_for_coalesce[i] = before_idx # insert the coalesce op based on the sorted before_idx - pos_for_coalesce = sorted( - pos_for_coalesce.items(), - key=lambda kv: (kv[1], kv[0]), - reverse=True) + pos_for_coalesce = sorted(pos_for_coalesce.items(), + key=lambda kv: (kv[1], kv[0]), + reverse=True) for i, before_idx in pos_for_coalesce: grad_segment, param_segment = grad_param_segments[i] fused_var = fused_vars[i] - block._insert_op_without_sync( - before_idx, - type="coalesce_tensor", - inputs={"Input": param_segment}, - outputs={"Output": grad_segment, - "FusedOutput": fused_var}, - attrs={ - "copy_data": False, - "use_align": True, - "dtype": grad_segment[0].dtype, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op_without_sync(before_idx, + type="coalesce_tensor", + inputs={"Input": param_segment}, + outputs={ + "Output": grad_segment, + "FusedOutput": fused_var + }, + attrs={ + "copy_data": False, + "use_align": True, + "dtype": grad_segment[0].dtype, + OP_ROLE_KEY: OpRole.Backward + }) if self.calc_comm_same_stream: block._sync_with_cpp() @@ -440,13 +442,14 @@ class RawProgramOptimizer(MetaOptimizerBase): # insert the sync comm op for idx, op in enumerate(block.ops): if is_optimizer_op(op): - block._insert_op_without_sync( - idx, - type='c_sync_comm_stream', - inputs={'X': fused_vars}, - outputs={'Out': fused_vars}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward}) + block._insert_op_without_sync(idx, + type='c_sync_comm_stream', + inputs={'X': fused_vars}, + outputs={'Out': fused_vars}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward + }) break block._sync_with_cpp() diff --git a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py index d79675448c0..c9054c793f4 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py @@ -18,6 +18,7 @@ __all__ = [] class RecomputeOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(RecomputeOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -33,8 +34,9 @@ class RecomputeOptimizer(MetaOptimizerBase): def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(RecomputeOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(RecomputeOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) def _init_wrapped_opt(self): if self.wrapped_opt is not None: @@ -54,8 +56,8 @@ class RecomputeOptimizer(MetaOptimizerBase): return False if self.user_defined_strategy.recompute == True: - if len(self.user_defined_strategy.recompute_configs[ - "checkpoints"]) == 0: + if len(self.user_defined_strategy.recompute_configs["checkpoints"] + ) == 0: return False else: return True @@ -83,8 +85,9 @@ class RecomputeOptimizer(MetaOptimizerBase): return self.wrapped_opt.apply_gradients(params_grads=params_grads) def apply_optimize(self, loss, startup_program, params_grads): - return self.wrapped_opt.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + return self.wrapped_opt.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/__init__.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/__init__.py index 5d358dbd35f..abf198b97e6 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/__init__.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py index c5b2d9227bc..9e3537a3ced 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py @@ -21,6 +21,7 @@ __all__ = [] class FP16Utils(object): + def __init__(self): pass @@ -32,8 +33,8 @@ class FP16Utils(object): return False assert (len(op.desc.input_arg_names()) == 1) assert (len(op.desc.output_arg_names()) == 1) - input_name, output_name = op.desc.input_arg_names()[ - 0], op.desc.output_arg_names()[0] + input_name, output_name = op.desc.input_arg_names( + )[0], op.desc.output_arg_names()[0] if input_name not in params: return False input_var = block.var(input_name) @@ -51,8 +52,8 @@ class FP16Utils(object): return False assert (len(op.desc.input_arg_names()) == 1) assert (len(op.desc.output_arg_names()) == 1) - input_name, output_name = op.desc.input_arg_names()[ - 0], op.desc.output_arg_names()[0] + input_name, output_name = op.desc.input_arg_names( + )[0], op.desc.output_arg_names()[0] input_var = block.var(input_name) output_var = block.var(output_name) if input_var.dtype != core.VarDesc.VarType.FP16 or \ @@ -88,9 +89,9 @@ class FP16Utils(object): "@GRAD@MERGED" ) if "@MERGED" in output_name else output_name.strip("@GRAD") if param_name not in shard.global_params: - raise ValueError("Output 'X' of cast_op must be a grad of" - "model param, but {} is not a grad".format( - output_name)) + raise ValueError( + "Output 'X' of cast_op must be a grad of" + "model param, but {} is not a grad".format(output_name)) if output_name in reduced_grads_to_param: continue if shard.has_param(param_name): @@ -137,49 +138,45 @@ class FP16Utils(object): if update_loss_scaling_op_idx == -1: return inf_var = block.var(inf_var_name) - inf_var_int32 = block.create_var( - name=inf_var_name + "@cast_int32", - shape=inf_var.shape, - dtype=core.VarDesc.VarType.INT32) - - block._insert_op_without_sync( - update_loss_scaling_op_idx, - type='cast', - inputs={'X': inf_var}, - outputs={'Out': inf_var_int32}, - attrs={ - "in_dtype": inf_var.dtype, - "out_dtype": inf_var_int32.dtype, - OP_ROLE_KEY: OpRole.Optimize - }) + inf_var_int32 = block.create_var(name=inf_var_name + "@cast_int32", + shape=inf_var.shape, + dtype=core.VarDesc.VarType.INT32) + + block._insert_op_without_sync(update_loss_scaling_op_idx, + type='cast', + inputs={'X': inf_var}, + outputs={'Out': inf_var_int32}, + attrs={ + "in_dtype": inf_var.dtype, + "out_dtype": inf_var_int32.dtype, + OP_ROLE_KEY: OpRole.Optimize + }) update_loss_scaling_op_idx += 1 # allreduce(mp)->allreduce(sharding)->allreduce(pp) for ring_id in ring_ids: if ring_id == -1: continue # this allreduce communication should not overlap with calc - block._insert_op_without_sync( - update_loss_scaling_op_idx, - type='c_allreduce_max', - inputs={'X': inf_var_int32}, - outputs={'Out': inf_var_int32}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Optimize - }) + block._insert_op_without_sync(update_loss_scaling_op_idx, + type='c_allreduce_max', + inputs={'X': inf_var_int32}, + outputs={'Out': inf_var_int32}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Optimize + }) update_loss_scaling_op_idx += 1 - block._insert_op_without_sync( - update_loss_scaling_op_idx, - type='cast', - inputs={'X': inf_var_int32}, - outputs={'Out': inf_var}, - attrs={ - "in_dtype": inf_var_int32.dtype, - "out_dtype": inf_var.dtype, - OP_ROLE_KEY: OpRole.Optimize - }) + block._insert_op_without_sync(update_loss_scaling_op_idx, + type='cast', + inputs={'X': inf_var_int32}, + outputs={'Out': inf_var}, + attrs={ + "in_dtype": inf_var_int32.dtype, + "out_dtype": inf_var.dtype, + OP_ROLE_KEY: OpRole.Optimize + }) update_loss_scaling_op_idx += 1 block._sync_with_cpp() @@ -201,46 +198,42 @@ class FP16Utils(object): # 1. inf_var_int32 = allreduce_max(inf_var_int32) # 3. inf_var = cast(inf_var_int32) inf_var = block.var(inf_var_name) - inf_var_int32 = block.create_var( - name=inf_var_name + "@cast_int32", - shape=inf_var.shape, - dtype=core.VarDesc.VarType.INT32) - block._insert_op_without_sync( - update_loss_scaling_op_idx, - type='cast', - inputs={'X': inf_var}, - outputs={'Out': inf_var_int32}, - attrs={ - "in_dtype": inf_var.dtype, - "out_dtype": inf_var_int32.dtype, - OP_ROLE_KEY: OpRole.Optimize - }) + inf_var_int32 = block.create_var(name=inf_var_name + "@cast_int32", + shape=inf_var.shape, + dtype=core.VarDesc.VarType.INT32) + block._insert_op_without_sync(update_loss_scaling_op_idx, + type='cast', + inputs={'X': inf_var}, + outputs={'Out': inf_var_int32}, + attrs={ + "in_dtype": inf_var.dtype, + "out_dtype": inf_var_int32.dtype, + OP_ROLE_KEY: OpRole.Optimize + }) update_loss_scaling_op_idx += 1 # allreduce(mp)->allreduce(pp) for ring_id in ring_ids: if ring_id == -1: continue - block._insert_op_without_sync( - update_loss_scaling_op_idx, - type='c_allreduce_max', - inputs={'X': inf_var_int32}, - outputs={'Out': inf_var_int32}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Optimize - }) + block._insert_op_without_sync(update_loss_scaling_op_idx, + type='c_allreduce_max', + inputs={'X': inf_var_int32}, + outputs={'Out': inf_var_int32}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Optimize + }) update_loss_scaling_op_idx += 1 - block._insert_op_without_sync( - update_loss_scaling_op_idx, - type='cast', - inputs={'X': inf_var_int32}, - outputs={'Out': inf_var}, - attrs={ - "in_dtype": inf_var_int32.dtype, - "out_dtype": inf_var.dtype, - OP_ROLE_KEY: OpRole.Optimize - }) + block._insert_op_without_sync(update_loss_scaling_op_idx, + type='cast', + inputs={'X': inf_var_int32}, + outputs={'Out': inf_var}, + attrs={ + "in_dtype": inf_var_int32.dtype, + "out_dtype": inf_var.dtype, + OP_ROLE_KEY: OpRole.Optimize + }) update_loss_scaling_op_idx += 1 block._sync_with_cpp() diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py index 5d28c2d5ceb..03d955842f5 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py @@ -18,6 +18,7 @@ __all__ = [] class GradientClipHelper(object): + def __init__(self, mp_ring_id): self.mp_ring_id = mp_ring_id @@ -95,17 +96,20 @@ class GradientClipHelper(object): namescope = op.attr("op_namescope") block._remove_op(idx, sync=False) - op = block._insert_op_without_sync( - idx, - type='fill_constant', - inputs={}, - outputs={'Out': sum_res}, - attrs={ - 'shape': sum_var.shape, - 'dtype': sum_var.dtype, - 'value': 0.0, - OP_ROLE_KEY: OpRole.Optimize - }) + op = block._insert_op_without_sync(idx, + type='fill_constant', + inputs={}, + outputs={'Out': sum_res}, + attrs={ + 'shape': + sum_var.shape, + 'dtype': + sum_var.dtype, + 'value': + 0.0, + OP_ROLE_KEY: + OpRole.Optimize + }) op._set_attr('op_namescope', namescope) # allreduce(mp)->allreduce(sharding)->allreduce(pp) diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py index 7b47cb6d263..9479dc5fcee 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py @@ -58,23 +58,21 @@ class OffloadHelper(object): def _insert_cast_op(self, block, idx, src_name, dst_name): src_var = block.var(src_name) if not block.has_var(dst_name): - block.create_var( - name=dst_name, - shape=src_var.shape, - dtype=core.VarDesc.VarType.FP16, - persistable=True) + block.create_var(name=dst_name, + shape=src_var.shape, + dtype=core.VarDesc.VarType.FP16, + persistable=True) dst_var = block.var(dst_name) assert dst_var.dtype == core.VarDesc.VarType.FP16 - block._insert_op_without_sync( - idx, - type='cast', - inputs={'X': src_var}, - outputs={'Out': dst_var}, - attrs={ - 'in_dtype': src_var.dtype, - 'out_dtype': dst_var.dtype, - OP_ROLE_KEY: OpRole.Optimize - }) + block._insert_op_without_sync(idx, + type='cast', + inputs={'X': src_var}, + outputs={'Out': dst_var}, + attrs={ + 'in_dtype': src_var.dtype, + 'out_dtype': dst_var.dtype, + OP_ROLE_KEY: OpRole.Optimize + }) def _insert_broadcast_op(self, block, idx, param_name): rings = [] @@ -90,30 +88,28 @@ class OffloadHelper(object): # the insert op order is: mp, dp for ring in rings: - block._insert_op_without_sync( - idx, - type="c_broadcast", - inputs={'X': param_name}, - outputs={'Out': param_name}, - attrs={ - 'ring_id': ring, - 'root': 0, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward, - }) + block._insert_op_without_sync(idx, + type="c_broadcast", + inputs={'X': param_name}, + outputs={'Out': param_name}, + attrs={ + 'ring_id': ring, + 'root': 0, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward, + }) def _insert_memcpy_op(self, block, idx, src_name, dst_name, dst_place_type): src_var = block.var(src_name) dst_var = block.var(dst_name) - block._insert_op_without_sync( - idx, - type='memcpy', - inputs={'X': src_var}, - outputs={'Out': dst_var}, - attrs={ - 'dst_place_type': dst_place_type, - OP_ROLE_KEY: OpRole.Optimize, - }) + block._insert_op_without_sync(idx, + type='memcpy', + inputs={'X': src_var}, + outputs={'Out': dst_var}, + attrs={ + 'dst_place_type': dst_place_type, + OP_ROLE_KEY: OpRole.Optimize, + }) def _insert_fetch_op(self, block, idx, src_name, dst_name): self._insert_memcpy_op(block, idx, src_name, dst_name, @@ -130,11 +126,10 @@ class OffloadHelper(object): for block in blocks: var = block.var(var_name) var.persistable = False - offload_var = block.create_var( - name=offload_var_name, - shape=var.shape, - dtype=var.dtype, - persistable=True) + offload_var = block.create_var(name=offload_var_name, + shape=var.shape, + dtype=var.dtype, + persistable=True) def offload_fp32param(self, block, startup_block, offload=True): """ diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py index 9e577ca0c67..adbc00f25de 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py @@ -16,6 +16,7 @@ __all__ = [] class ProgramDeps(object): + def __init__(self, block, start_vars, end_vars): self._block = block # vars where to start to build the deps @@ -92,8 +93,8 @@ class ProgramDeps(object): raise ValueError( "op_idx: {} is not in self._var_to_use_op[{}], " "self._var_to_use_op[{}] is {}".format( - op_idx, var_name, var_name, self._var_to_use_op[ - var_name])) + op_idx, var_name, var_name, + self._var_to_use_op[var_name])) self._var_to_use_op[var_name].remove(op_idx) # update _should_removed_var if var_name in self._start_vars: diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py index 52dfed83d33..7002dfa2be5 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py @@ -20,6 +20,7 @@ __all__ = [] class Shard(object): + def __init__(self, ): self.global_params = set([]) self.worker_idx = -1 @@ -30,7 +31,7 @@ class Shard(object): def setup(self, params_grads, worker_idx, worker_num): # param names of all devices self.global_params = set([x[0].name for x in params_grads]) - # _param(str) -> device_id(int) + # _param(str) -> device_id(int) self.worker_idx = worker_idx self.worker_num = worker_num # global_param2device contains fp32 params and fp16 params @@ -138,6 +139,7 @@ class Shard(object): class ProgramSegment(object): + def __init__(self, block): self._block = block self._allreduce_vars = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py index 1a3a8a4883d..39f71be0cde 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py @@ -38,10 +38,11 @@ def check_broadcast(block): var_name = op.desc.input_arg_names()[0] if "@BroadCast" in var_name: if var_name in broadcast_vars: - raise ValueError("var_name areadly exist: {}" - "the old pos is {}, the new pos is {}". - format(var_name, broadcast_vars[ - var_name]["broadcast_pos"], idx)) + raise ValueError( + "var_name areadly exist: {}" + "the old pos is {}, the new pos is {}".format( + var_name, + broadcast_vars[var_name]["broadcast_pos"], idx)) broadcast_vars[var_name] = { "fill_constant_pos": -1, "broadcast_pos": idx, @@ -149,9 +150,9 @@ def check_allreduce_sum(block, shard, sharding_ring_id, dp_ring_id=-1): else: _status = dp_grads_status[var_name] if _status == -1: - raise ValueError("{} is not generated, but you are" - "trying to all-reduce it".format( - var_name)) + raise ValueError( + "{} is not generated, but you are" + "trying to all-reduce it".format(var_name)) if _status == 0: raise ValueError("There should be a sync_calc op " "after generate Var: {} and before the" @@ -190,18 +191,19 @@ def check_allreduce_sum(block, shard, sharding_ring_id, dp_ring_id=-1): for input_name in op.desc.input_arg_names(): if input_name in vars_status: if vars_status[input_name] != 3: - raise ValueError("There should be a sync_comm op " - "after allreduce the Var: {}".format( - input_name)) + raise ValueError( + "There should be a sync_comm op " + "after allreduce the Var: {}".format(input_name)) raise ValueError( - "The reduce output grad [{}] should NOT be be used in Non-root rank.". - format(input_name)) + "The reduce output grad [{}] should NOT be be used in Non-root rank." + .format(input_name)) if input_name in dp_grads_status: if dp_ring_id == -1: if dp_grads_status[input_name] != 3: - raise ValueError("There should be a sync_comm op " - "after allreduce the Var: {}". - format(input_name)) + raise ValueError( + "There should be a sync_comm op " + "after allreduce the Var: {}".format( + input_name)) else: if dp_grads_status[input_name] != 5: raise ValueError( @@ -232,8 +234,9 @@ def get_valid_op_role(block, insert_idx): return OpRole.Forward or OpRole.Backward """ op_role = block.ops[insert_idx].attr('op_role') - if (insert_idx >= len(block.ops)) or ( - op_role in [int(OpRole.Backward), int(OpRole.Optimize)]): + if (insert_idx >= len(block.ops)) or (op_role in [ + int(OpRole.Backward), int(OpRole.Optimize) + ]): return OpRole.Backward if op_role in [int(OpRole.Forward), int(OpRole.Loss)]: return OpRole.Forward @@ -246,12 +249,11 @@ def insert_sync_calc_op(block, insert_idx, calc_dep_vars): _insert_sync_calc_op """ op_role = get_valid_op_role(block, insert_idx) - block._insert_op_without_sync( - insert_idx, - type='c_sync_calc_stream', - inputs={'X': calc_dep_vars}, - outputs={'Out': calc_dep_vars}, - attrs={OP_ROLE_KEY: op_role}) + block._insert_op_without_sync(insert_idx, + type='c_sync_calc_stream', + inputs={'X': calc_dep_vars}, + outputs={'Out': calc_dep_vars}, + attrs={OP_ROLE_KEY: op_role}) return @@ -260,13 +262,14 @@ def insert_sync_comm_op(block, insert_idx, ring_id, comm_dep_vars): insert sync_comm_op for single var """ op_role = get_valid_op_role(block, insert_idx) - block._insert_op_without_sync( - insert_idx, - type='c_sync_comm_stream', - inputs={'X': comm_dep_vars}, - outputs={'Out': comm_dep_vars}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: op_role}) + block._insert_op_without_sync(insert_idx, + type='c_sync_comm_stream', + inputs={'X': comm_dep_vars}, + outputs={'Out': comm_dep_vars}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: op_role + }) return 1 @@ -274,18 +277,19 @@ def insert_sync_comm_ops(block, insert_idx, ring_id, comm_dep_vars): """ insert sync_comm_op for vars """ - # NOTE (JZ-LIANG) to be check, may result undefined case + # NOTE (JZ-LIANG) to be check, may result undefined case if len(comm_dep_vars) == 0: return 0 op_role = get_valid_op_role(block, insert_idx) - block._insert_op_without_sync( - insert_idx, - type='c_sync_comm_stream', - inputs={'X': comm_dep_vars}, - outputs={'Out': comm_dep_vars}, - attrs={'ring_id': int(ring_id), - OP_ROLE_KEY: op_role}) + block._insert_op_without_sync(insert_idx, + type='c_sync_comm_stream', + inputs={'X': comm_dep_vars}, + outputs={'Out': comm_dep_vars}, + attrs={ + 'ring_id': int(ring_id), + OP_ROLE_KEY: op_role + }) return 1 @@ -296,16 +300,15 @@ def insert_fill_constant_ops(block, insert_idx, fill_constant_vars): op_role = get_valid_op_role(block, insert_idx) for broadcast_name in fill_constant_vars: broadcast_var = block.var(broadcast_name) - block._insert_op_without_sync( - insert_idx, - type="fill_constant", - outputs={"Out": broadcast_var.name}, - attrs={ - "shape": broadcast_var.shape, - "dtype": broadcast_var.dtype, - "value": 0.0, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx, + type="fill_constant", + outputs={"Out": broadcast_var.name}, + attrs={ + "shape": broadcast_var.shape, + "dtype": broadcast_var.dtype, + "value": 0.0, + OP_ROLE_KEY: op_role + }) return @@ -315,16 +318,16 @@ def insert_cast_ops(block, insert_idx, cast_ops): """ op_role = get_valid_op_role(block, insert_idx) for fp16_name, fp32_name in cast_ops.items(): - block._insert_op_without_sync( - insert_idx, - type="cast", - inputs={"X": fp32_name}, - outputs={"Out": fp16_name}, - attrs={ - "in_dtype": core.VarDesc.VarType.FP32, - "out_dtype": core.VarDesc.VarType.FP16, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx, + type="cast", + inputs={"X": fp32_name}, + outputs={"Out": fp16_name}, + attrs={ + "in_dtype": core.VarDesc.VarType.FP32, + "out_dtype": + core.VarDesc.VarType.FP16, + OP_ROLE_KEY: op_role + }) return @@ -351,21 +354,22 @@ def insert_allreduce_ops(block, user_defined_strategy.fuse_grad_size_in_MB) else: for var in allreduce_vars: - block._insert_op_without_sync( - insert_idx, - type='c_allreduce_sum', - inputs={'X': var}, - outputs={'Out': var}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx, + type='c_allreduce_sum', + inputs={'X': var}, + outputs={'Out': var}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': + use_calc_stream, + OP_ROLE_KEY: op_role + }) return class FuseHelper(object): + @staticmethod def sort_vars_by_dtype(block, vars_name): fp32_vars = [] @@ -419,25 +423,25 @@ class FuseHelper(object): fused_vars.append(group[0]) continue - fused_var = block.create_var( - name=unique_name.generate('Fused{}_{}'.format(prefix, group[0] - .name)), - dtype=group[0].dtype, - persistable=False, - stop_gradient=True) + fused_var = block.create_var(name=unique_name.generate( + 'Fused{}_{}'.format(prefix, group[0].name)), + dtype=group[0].dtype, + persistable=False, + stop_gradient=True) fused_vars.append(fused_var) - block._insert_op_without_sync( - index, - type="coalesce_tensor", - inputs={"Input": group}, - outputs={"Output": group, - "FusedOutput": fused_var}, - attrs={ - "copy_data": True, - "use_align": True, - "dtype": group[0].dtype, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(index, + type="coalesce_tensor", + inputs={"Input": group}, + outputs={ + "Output": group, + "FusedOutput": fused_var + }, + attrs={ + "copy_data": True, + "use_align": True, + "dtype": group[0].dtype, + OP_ROLE_KEY: op_role + }) insert_num += 1 return fused_vars, insert_num @@ -452,27 +456,28 @@ def insert_fused_allreduce_ops(block, groups = FuseHelper.get_fused_groups(block, allreduce_vars, fuse_grad_size_in_MB) - fused_vars, insert_num = FuseHelper.insert_coalesce_tensor( - block, insert_idx, groups, op_role, prefix="Grad") + fused_vars, insert_num = FuseHelper.insert_coalesce_tensor(block, + insert_idx, + groups, + op_role, + prefix="Grad") for fused_var in fused_vars: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_allreduce_sum', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_allreduce_sum', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream, + OP_ROLE_KEY: op_role + }) if not use_calc_stream: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_sync_calc_stream', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={OP_ROLE_KEY: op_role}) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_sync_calc_stream', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={OP_ROLE_KEY: op_role}) def insert_fused_reduce_ops(block, @@ -501,24 +506,23 @@ def insert_fused_reduce_ops(block, block, insert_idx, groups, op_role, prefix="Grad") for fused_var in fused_vars: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_reduce_sum', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={ - 'ring_id': ring_id, - 'root_id': root_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_reduce_sum', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={ + 'ring_id': ring_id, + 'root_id': root_id, + 'use_calc_stream': + use_calc_stream, + OP_ROLE_KEY: op_role + }) if not use_calc_stream: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_sync_calc_stream', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={OP_ROLE_KEY: op_role}) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_sync_calc_stream', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={OP_ROLE_KEY: op_role}) return [] if rank is None else device_to_vars[rank] @@ -554,17 +558,16 @@ def insert_reduce_ops(block, root_id) if rank is not None and rank == root_id: grad_in_this_device.append(var) - block._insert_op_without_sync( - insert_idx, - type='c_reduce_sum', - inputs={'X': var}, - outputs={'Out': var}, - attrs={ - 'ring_id': ring_id, - 'root_id': root_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx, + type='c_reduce_sum', + inputs={'X': var}, + outputs={'Out': var}, + attrs={ + 'ring_id': ring_id, + 'root_id': root_id, + 'use_calc_stream': use_calc_stream, + OP_ROLE_KEY: op_role + }) return grad_in_this_device @@ -595,24 +598,23 @@ def insert_fused_broadcast_param_ops(block, block, insert_idx, groups, op_role, prefix="Param") for fused_var in fused_vars: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_broadcast', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={ - 'ring_id': ring_id, - 'root': root_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_broadcast', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={ + 'ring_id': ring_id, + 'root': root_id, + 'use_calc_stream': + use_calc_stream, + OP_ROLE_KEY: op_role + }) if not use_calc_stream: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_sync_calc_stream', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={OP_ROLE_KEY: op_role}) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_sync_calc_stream', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={OP_ROLE_KEY: op_role}) return [] if rank is None else device_to_vars[rank] @@ -631,9 +633,10 @@ def insert_broadcast_param_ops(block, """ if strategy and strategy.fuse_all_reduce_ops: # TODO(wangxi): put fused var in startup_program, only need exec once - return insert_fused_broadcast_param_ops( - block, insert_idx, ring_id, params, shard, op_role, use_calc_stream, - rank, strategy.fuse_grad_size_in_MB) + return insert_fused_broadcast_param_ops(block, insert_idx, ring_id, + params, shard, op_role, + use_calc_stream, rank, + strategy.fuse_grad_size_in_MB) param_in_this_device = [] for param in params: @@ -642,17 +645,16 @@ def insert_broadcast_param_ops(block, root_id) if rank is not None and rank == root_id: param_in_this_device.append(param) - block._insert_op_without_sync( - insert_idx, - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - 'root': root_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx, + type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + 'root': root_id, + 'use_calc_stream': use_calc_stream, + OP_ROLE_KEY: op_role + }) return param_in_this_device @@ -690,17 +692,16 @@ def fuse_opt_broadcast_param_ops(block, block, insert_idx, groups, op_role, prefix="Param") for fused_var in fused_vars: - block._insert_op_without_sync( - insert_idx + insert_num, - type='c_broadcast', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={ - 'ring_id': ring_id, - 'root': root_id, - 'use_calc_stream': True, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx + insert_num, + type='c_broadcast', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={ + 'ring_id': ring_id, + 'root': root_id, + 'use_calc_stream': True, + OP_ROLE_KEY: op_role + }) block._sync_with_cpp() @@ -759,16 +760,15 @@ def insert_broadcast_ops(block, insert_idx, ring_id, broadcast2root): """ op_role = get_valid_op_role(block, insert_idx) for broadcast_name, root_device in broadcast2root: - block._insert_op_without_sync( - insert_idx, - type='c_broadcast', - inputs={'X': broadcast_name}, - outputs={'Out': broadcast_name}, - attrs={ - 'ring_id': ring_id, - 'root': root_device, - OP_ROLE_KEY: op_role - }) + block._insert_op_without_sync(insert_idx, + type='c_broadcast', + inputs={'X': broadcast_name}, + outputs={'Out': broadcast_name}, + attrs={ + 'ring_id': ring_id, + 'root': root_device, + OP_ROLE_KEY: op_role + }) return @@ -825,8 +825,8 @@ def comm_analyse(main_program): if op.type == "c_broadcast": var_name = op.desc.input_arg_names()[0] # convert MB to KB - broadcast_vars[var_name] = get_var_size(block.var( - var_name)) * 1024.0 + broadcast_vars[var_name] = get_var_size( + block.var(var_name)) * 1024.0 elif op.type == "c_allreduce_sum": var_name = op.desc.input_arg_names()[0] reduce_vars[var_name] = get_var_size(block.var(var_name)) * 1024.0 @@ -877,14 +877,15 @@ def add_sync_comm(program, sharding_ring_id): for input_name in op.desc.input_arg_names(): not_sync_vars.remove(input_name) if not_sync_vars: - block.append_op( - type='c_sync_comm_stream', - inputs={'X': list(not_sync_vars)}, - outputs={'Out': list(not_sync_vars)}, - attrs={ - 'ring_id': sharding_ring_id, - 'op_role': core.op_proto_and_checker_maker.OpRole.Forward - }) + block.append_op(type='c_sync_comm_stream', + inputs={'X': list(not_sync_vars)}, + outputs={'Out': list(not_sync_vars)}, + attrs={ + 'ring_id': + sharding_ring_id, + 'op_role': + core.op_proto_and_checker_maker.OpRole.Forward + }) return @@ -926,41 +927,39 @@ def save_persistables(exe, dirname, main_program, filename=None): var) if int(os.environ.get('PADDLE_TRAINER_ID', 0)) == 0: - paddle.fluid.io.save_persistables( - exe, dirname, main_program=main_program, filename=None) + paddle.fluid.io.save_persistables(exe, + dirname, + main_program=main_program, + filename=None) else: - paddle.fluid.io.save_vars( - exe, - dirname, - main_program=main_program, - predicate=sharding_predicate, - filename=None) + paddle.fluid.io.save_vars(exe, + dirname, + main_program=main_program, + predicate=sharding_predicate, + filename=None) return def append_naive_sync(block, sync_var, ring_id): # NOTE (JZ-LIANG) update this to use barrier sync for more elegent logic - # sync within global - block.append_op( - type="fill_constant", - outputs={"Out": sync_var}, - attrs={ - "shape": sync_var.shape, - "dtype": sync_var.dtype, - "value": int(1), - }) - block.append_op( - type='c_allreduce_sum', - inputs={'X': sync_var}, - outputs={'Out': sync_var}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) - block.append_op( - type='c_sync_calc_stream', - inputs={'X': [sync_var]}, - outputs={'Out': [sync_var]}, - attrs={OP_ROLE_KEY: OpRole.Forward}) + # sync within global + block.append_op(type="fill_constant", + outputs={"Out": sync_var}, + attrs={ + "shape": sync_var.shape, + "dtype": sync_var.dtype, + "value": int(1), + }) + block.append_op(type='c_allreduce_sum', + inputs={'X': sync_var}, + outputs={'Out': sync_var}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) + block.append_op(type='c_sync_calc_stream', + inputs={'X': [sync_var]}, + outputs={'Out': [sync_var]}, + attrs={OP_ROLE_KEY: OpRole.Forward}) diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py index ab0c79bca55..42c52af4431 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py @@ -18,6 +18,7 @@ __all__ = [] class WeightDecayHelper(object): + def __init__(self): pass diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index 90440ff9d0e..fcecc3a9a67 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -32,9 +32,10 @@ from .sharding import utils from .sharding.utils import * import logging + logger = logging.getLogger(__name__) -formatter = logging.Formatter( - fmt='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') ch = logging.StreamHandler() ch.setFormatter(formatter) logger.addHandler(ch) @@ -57,7 +58,9 @@ class ShardingOptimizer(MetaOptimizerBase): # "ModelParallelOptimizer", # "PipelineOptimizer", ] - self.meta_optimizers_black_list = ["GraphExecutionOptimizer", ] + self.meta_optimizers_black_list = [ + "GraphExecutionOptimizer", + ] self._main_program = None self._startup_program = None self._segments = [] @@ -418,8 +421,8 @@ class ShardingOptimizer(MetaOptimizerBase): use_calc_stream=True, rank=self.dp_rank, strategy=strategy) - logger.info("Optimizer grad in this rank {}".format( - accumulated_grad_names)) + logger.info( + "Optimizer grad in this rank {}".format(accumulated_grad_names)) first_optimize_op_index += (len(main_block.ops) - len_of_ops) len_of_ops = len(main_block.ops) @@ -434,8 +437,8 @@ class ShardingOptimizer(MetaOptimizerBase): use_calc_stream=True, rank=self.dp_rank, strategy=None if optimize_cast else strategy) - logger.info("Optimizer param in this rank {}".format( - optimizer_param)) + logger.info( + "Optimizer param in this rank {}".format(optimizer_param)) if not strategy.fuse_grad_merge and not optimize_cast: assert len(accumulated_grad_names) == len(optimizer_param) elif self.hybrid_dp and self.hybrid_dp_mode == "pp_hybrid_dp": @@ -519,8 +522,9 @@ class ShardingOptimizer(MetaOptimizerBase): FP16Utils.sync_amp_check_nan_inf(main_block, rings) gradientclip_helper = GradientClipHelper(None) - gradientclip_helper.sync_global_norm( - main_block, [self.mp_ring_id, self.pp_ring_id], self.mp_rank) + gradientclip_helper.sync_global_norm(main_block, + [self.mp_ring_id, self.pp_ring_id], + self.mp_rank) def _insert_loss_grad_scale_op(self): main_block = self._main_program.global_block() @@ -541,8 +545,8 @@ class ShardingOptimizer(MetaOptimizerBase): mp_ring_id = self.mp_ring_id if self.mp_degree > 1 else None dp_ring_id = self.dp_ring_id if self.dp_degree > 1 else None - offload_helper = OffloadHelper( - mp_ring_id=mp_ring_id, dp_ring_id=dp_ring_id) + offload_helper = OffloadHelper(mp_ring_id=mp_ring_id, + dp_ring_id=dp_ring_id) # optimize offload should be enable while gradient merge is enable and # acc_step is quite large (e.g. >> 100). Since its memcpy could not be @@ -561,11 +565,13 @@ class ShardingOptimizer(MetaOptimizerBase): main_block, startup_block, [x[0].name for x in params_grads]) # NOTE(wangxi): fused after optimize_cast - utils.fuse_opt_broadcast_param_ops( - main_block, dp_ring_id, self._shard, strategy=strategy) + utils.fuse_opt_broadcast_param_ops(main_block, + dp_ring_id, + self._shard, + strategy=strategy) else: - offload_helper.cast_fp32param_in_optimize(main_block, - startup_block) + offload_helper.cast_fp32param_in_optimize( + main_block, startup_block) def _dump_program_for_debug(self): main_block = self._main_program.global_block() @@ -645,14 +651,13 @@ class ShardingOptimizer(MetaOptimizerBase): ] pp_rank = 0 if self.pp_rank == pair[0] else 1 if os.getenv("PADDLE_MANUAL_PIPELINE_STAGE", None) is None: - self._collective_helper._init_communicator( - self._startup_program, - self.current_endpoint, - pp_group_endpoints, - pp_rank, - ring_id, - False, - sync=False) + self._collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + pp_group_endpoints, + pp_rank, + ring_id, + False, + sync=False) def _init_npu_pipeline_comm(self, startup_block): # NOTE(wangxi): some bug with hccl, must set pp_degree be even number @@ -670,12 +675,13 @@ class ShardingOptimizer(MetaOptimizerBase): my_pair.append(pair) # for example: self.pp_rank=2, self.pp_degree=4 - send_to_next_pair = (self.pp_rank, - (self.pp_rank + 1) % self.pp_degree) # 2->3 - recv_from_next_pair = ((self.pp_rank + 1) % self.pp_degree, - self.pp_rank) # 3->2 - recv_from_prev_pair = ((self.pp_rank - 1 + self.pp_degree) % - self.pp_degree, self.pp_rank) # 1->2 + send_to_next_pair = (self.pp_rank, (self.pp_rank + 1) % self.pp_degree + ) # 2->3 + recv_from_next_pair = ( + (self.pp_rank + 1) % self.pp_degree, self.pp_rank) # 3->2 + recv_from_prev_pair = ( + (self.pp_rank - 1 + self.pp_degree) % self.pp_degree, self.pp_rank + ) # 1->2 send_to_prev_pair = (self.pp_rank, (self.pp_rank - 1 + self.pp_degree) % self.pp_degree) # 2->1 @@ -686,24 +692,24 @@ class ShardingOptimizer(MetaOptimizerBase): ring_id = self.pp_ring_map[pair[0] * 1000 + pair[1]] self._init_pair_comm(pair, ring_id) my_pair.remove(pair) - logger.info("pair0(even->odd): pp pair:{}, ring_id: {}".format(pair, - ring_id)) + logger.info("pair0(even->odd): pp pair:{}, ring_id: {}".format( + pair, ring_id)) # 2. even recv from next, odd send to prev, 1->0, 3->2 pair = recv_from_next_pair if even else send_to_prev_pair ring_id = self.pp_ring_map[pair[0] * 1000 + pair[1]] self._init_pair_comm(pair, ring_id) my_pair.remove(pair) - logger.info("pair1(even<-odd): pp pair:{}, ring_id: {}".format(pair, - ring_id)) + logger.info("pair1(even<-odd): pp pair:{}, ring_id: {}".format( + pair, ring_id)) # if pp_degree is 2, only need pair(0->1, 1->0) if self.pp_degree > 2: # 3. odd send to next, even recv from prev, 1->2, 3->0 pair = send_to_next_pair if not even else recv_from_prev_pair - ring_id = self.pp_ring_map.get( - pair[0] * 1000 + pair[1], - max_ring_id + 1) # 3->0 not in pp_ring_map + ring_id = self.pp_ring_map.get(pair[0] * 1000 + pair[1], + max_ring_id + + 1) # 3->0 not in pp_ring_map self._init_pair_comm(pair, ring_id) if self.pp_rank != 0 and self.pp_rank != self.pp_degree - 1: my_pair.remove(pair) @@ -712,9 +718,9 @@ class ShardingOptimizer(MetaOptimizerBase): # 4. odd recv from next, even send to prev, 2->1, 0->3 pair = recv_from_next_pair if not even else send_to_prev_pair - ring_id = self.pp_ring_map.get( - pair[0] * 1000 + pair[1], - max_ring_id + 2) # 0->3 not in pp_ring_map + ring_id = self.pp_ring_map.get(pair[0] * 1000 + pair[1], + max_ring_id + + 2) # 0->3 not in pp_ring_map self._init_pair_comm(pair, ring_id) if self.pp_rank != 0 and self.pp_rank != self.pp_degree - 1: my_pair.remove(pair) @@ -727,14 +733,13 @@ class ShardingOptimizer(MetaOptimizerBase): def _init_pipeline_comm(self, startup_block): # TODO (JZ-LIANG) to unify pp_rank_ and pp_rank if os.getenv("PADDLE_MANUAL_PIPELINE_STAGE", None) is None: - self._collective_helper._init_communicator( - self._startup_program, - self.current_endpoint, - self.pp_group_endpoints, - self.pp_rank, - self.pp_ring_id, - False, - sync=False) + self._collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + self.pp_group_endpoints, + self.pp_rank, + self.pp_ring_id, + False, + sync=False) if core.is_compiled_with_npu(): self._init_npu_pipeline_comm(startup_block) @@ -754,14 +759,13 @@ class ShardingOptimizer(MetaOptimizerBase): # mp ring if self.mp_degree > 1: - self._collective_helper._init_communicator( - self._startup_program, - self.current_endpoint, - self.mp_group_endpoints, - self.mp_rank, - self.mp_ring_id, - False, - sync=False) + self._collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + self.mp_group_endpoints, + self.mp_rank, + self.mp_ring_id, + False, + sync=False) # sharding ring if self.sharding_degree > 1: @@ -780,14 +784,13 @@ class ShardingOptimizer(MetaOptimizerBase): # pure dp ring if self.dp_degree > 1: - self._collective_helper._init_communicator( - self._startup_program, - self.current_endpoint, - self.dp_group_endpoints, - self.dp_rank, - self.dp_ring_id, - False, - sync=False) + self._collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + self.dp_group_endpoints, + self.dp_rank, + self.dp_ring_id, + False, + sync=False) startup_block._sync_with_cpp() @@ -839,12 +842,12 @@ class ShardingOptimizer(MetaOptimizerBase): if ".cast_fp16@GRAD" not in input_name: continue else: - input_name = input_name[:input_name.find( - ".cast_fp16@GRAD")] + input_name = input_name[:input_name. + find(".cast_fp16@GRAD")] if input_name in self._backward_remain_anchors: - segment = self.collect_segment(segment, op_idx, - block) + segment = self.collect_segment( + segment, op_idx, block) assert input_name not in self._forward_remain_anchors, "segment anchor [{}] met twice !".format( input_name) self._backward_remain_anchors.remove(input_name) @@ -852,8 +855,8 @@ class ShardingOptimizer(MetaOptimizerBase): elif int(op.attr('op_role')) == int(OpRole.Forward): for output_name in op.desc.output_arg_names(): if output_name in self._forward_remain_anchors: - segment = self.collect_segment(segment, op_idx, - block) + segment = self.collect_segment( + segment, op_idx, block) self._forward_remain_anchors.remove(output_name) # find broadcast vars @@ -878,8 +881,8 @@ class ShardingOptimizer(MetaOptimizerBase): if "subprog" in broadcast_var_base_name: # remove suffix broadcast_var_base_name = broadcast_var_base_name[: - broadcast_var_base_name. - find( + broadcast_var_base_name + .find( ".subprog" )] @@ -888,8 +891,8 @@ class ShardingOptimizer(MetaOptimizerBase): broadcast_var_base_name, 0) + 1 segment._param2broadcast[input_name] = broadcast_var_name - segment._broadcast_vars.append((broadcast_var_name, - self._shard.device(input_name))) + segment._broadcast_vars.append( + (broadcast_var_name, self._shard.device(input_name))) segment._param_mem += get_var_size( self._main_program.global_block().var(input_name)) @@ -904,11 +907,12 @@ class ShardingOptimizer(MetaOptimizerBase): if len(op_role_var) != 0: assert len(op_role_var) % 2 == 0 for i in range(0, len(op_role_var), 2): - param, reduced_grad = op_role_var[i], op_role_var[ - i + 1] + param, reduced_grad = op_role_var[i], op_role_var[i + + + 1] segment._allreduce_vars.append(reduced_grad) - assert (reduced_grad not in - self._reduced_grads_to_param) + assert (reduced_grad + not in self._reduced_grads_to_param) self._reduced_grads_to_param[reduced_grad] = param # find cast op @@ -931,19 +935,20 @@ class ShardingOptimizer(MetaOptimizerBase): self._backward_remain_anchors) if self._verbose: - for varname in sorted( - var2broadcast_time, key=var2broadcast_time.get, - reverse=True): + for varname in sorted(var2broadcast_time, + key=var2broadcast_time.get, + reverse=True): logger.info("Sharding broadcast: [{}] times [{}]".format( var2broadcast_time[varname], varname)) for idx_ in range(len(self._segments)): logger.info("segment [{}] :".format(idx_)) - logger.info("start op: [{}] [{}]".format(block.ops[ - self._segments[idx_]._start_idx].desc.type(), block.ops[ - self._segments[idx_]._start_idx].desc.input_arg_names( - ))) - logger.info("end op: [{}] [{}]".format(block.ops[ - self._segments[idx_]._end_idx].desc.type(), block.ops[ + logger.info("start op: [{}] [{}]".format( + block.ops[self._segments[idx_]._start_idx].desc.type(), + block.ops[self._segments[idx_]. + _start_idx].desc.input_arg_names())) + logger.info("end op: [{}] [{}]".format( + block.ops[self._segments[idx_]._end_idx].desc.type(), + block.ops[ self._segments[idx_]._end_idx].desc.input_arg_names())) return @@ -1044,7 +1049,7 @@ class ShardingOptimizer(MetaOptimizerBase): program_deps.remove_op(idx, reserved_vars) # NOTE (JZ-LIANG) revise and unify logic here - # sharding support fp16_allreduce logic + # sharding support fp16_allreduce logic block._sync_with_cpp() for idx, op in reversed(list(enumerate(block.ops))): if op.type == 'concat' and is_optimizer_op(op): @@ -1084,8 +1089,8 @@ class ShardingOptimizer(MetaOptimizerBase): self._segments[-1]._end_idx = new_end_idx if self._segments[-1]._allreduce_vars: - shard_allredue_vars = self._shard.filter_grads(self._segments[-1] - ._allreduce_vars) + shard_allredue_vars = self._shard.filter_grads( + self._segments[-1]._allreduce_vars) if self.gradient_merge_mode != "sharding_gm" or self._gradient_merge_acc_step <= 1: if self.hybrid_dp and self.hybrid_dp_mode == "sharding_hybrid_dp" and len( shard_allredue_vars) >= 1: @@ -1097,38 +1102,36 @@ class ShardingOptimizer(MetaOptimizerBase): self.dp_ring_id, shard_allredue_vars, user_defined_strategy=self.user_defined_strategy) - # gradient merge + # gradient merge elif self.gradient_merge_mode == "sharding_gm" and self._gradient_merge_acc_step > 1: self.create_persistable_gradients_and_insert_merge_ops( - block, - self._startup_program.global_block(), + block, self._startup_program.global_block(), self._segments[-1]._end_idx, shard_allredue_vars, self._shard) insert_sync_comm_ops(block, self._segments[-1]._end_idx, self.sharding_ring_id, self._segments[-1]._allreduce_vars) - # allreduce --> reduce - insert_reduce_ops( - block, - self._segments[-1]._end_idx, - self.sharding_ring_id, - self._segments[-1]._allreduce_vars, - self._shard, - op_role=OpRole.Backward, - use_calc_stream=False) + # allreduce --> reduce + insert_reduce_ops(block, + self._segments[-1]._end_idx, + self.sharding_ring_id, + self._segments[-1]._allreduce_vars, + self._shard, + op_role=OpRole.Backward, + use_calc_stream=False) for idx, segment in reversed(list(enumerate(self._segments))): allreduce_vars = self._segments[ idx - 1]._allreduce_vars if idx > 0 else [] - broadcast_vars = self._segments[idx + - 1]._broadcast_vars if idx < len( - self._segments) - 1 else [] + broadcast_vars = self._segments[ + idx + + 1]._broadcast_vars if idx < len(self._segments) - 1 else [] fill_constant_vars = self._segments[ - idx + 2]._fill_constant_vars if idx < len( - self._segments) - 2 else [] - cast_ops = self._segments[idx + 2]._cast_ops if idx < len( - self._segments) - 2 else {} + idx + + 2]._fill_constant_vars if idx < len(self._segments) - 2 else [] + cast_ops = self._segments[ + idx + 2]._cast_ops if idx < len(self._segments) - 2 else {} for op_idx in reversed(range(segment._start_idx, segment._end_idx)): op = block.ops[op_idx] @@ -1144,14 +1147,14 @@ class ShardingOptimizer(MetaOptimizerBase): name=broadcast_name, shape=self._main_program.global_block().var( param_name).shape, - dtype=self._main_program.global_block().var(param_name) - .dtype, + dtype=self._main_program.global_block().var( + param_name).dtype, persistable=False) # step1: remove cast ops block._sync_with_cpp() - segment._end_idx += FP16Utils.remove_cast_op(block, self._params, - segment, 0) + segment._end_idx += FP16Utils.remove_cast_op( + block, self._params, segment, 0) # step2: add Sync ops shard_allredue_vars = self._shard.filter_grads(allreduce_vars) @@ -1190,20 +1193,19 @@ class ShardingOptimizer(MetaOptimizerBase): insert_sync_calc_op(block, segment._end_idx, [calc_dep_vars[-1]]) - # step3: insert `fill_constant` ops + # step3: insert `fill_constant` ops insert_fill_constant_ops(block, segment._end_idx, fill_constant_vars) - # step4: add `cast` ops + # step4: add `cast` ops insert_cast_ops(block, segment._end_idx, cast_ops) # step5: add broadcast ops # gradient merge if self.gradient_merge_mode == "sharding_gm" and self._gradient_merge_acc_step > 1: self.create_persistable_gradients_and_insert_merge_ops( - block, - self._startup_program.global_block(), segment._start_idx, - shard_allredue_vars, self._shard) + block, self._startup_program.global_block(), + segment._start_idx, shard_allredue_vars, self._shard) insert_broadcast_ops(block, segment._start_idx, self.sharding_ring_id, broadcast_vars) @@ -1226,17 +1228,16 @@ class ShardingOptimizer(MetaOptimizerBase): insert_sync_comm_ops(block, segment._start_idx, self.sharding_ring_id, allreduce_vars) # sharding - # allreduce --> reduce + # allreduce --> reduce # TODO temp change if len(allreduce_vars) > 0: - insert_reduce_ops( - block, - segment._start_idx, - self.sharding_ring_id, - allreduce_vars, - self._shard, - op_role=OpRole.Backward, - use_calc_stream=False) + insert_reduce_ops(block, + segment._start_idx, + self.sharding_ring_id, + allreduce_vars, + self._shard, + op_role=OpRole.Backward, + use_calc_stream=False) block._sync_with_cpp() @@ -1308,8 +1309,8 @@ class ShardingOptimizer(MetaOptimizerBase): self.global_rank = self.role_maker._worker_index() self.global_endpoints = self.role_maker._get_trainer_endpoints() self.current_endpoint = self.global_endpoints[self.global_rank] - self._collective_helper = CollectiveHelper( - self.role_maker, nrings=self._nrings_sharding) + self._collective_helper = CollectiveHelper(self.role_maker, + nrings=self._nrings_sharding) assert self.global_word_size % self.mp_degree == 0, \ "global_word_size: {} should be divisible to the mp_degree: {}".format(self.global_word_size, self.mp_degree) assert self.global_word_size % self.sharding_degree == 0, \ @@ -1340,7 +1341,7 @@ class ShardingOptimizer(MetaOptimizerBase): self.mp_group_id = -1 self.mp_group_endpoints = [] - # sharding + # sharding if self.sharding_degree > 1: self.sharding_ring_id = 1 self.sharding_rank = (self.global_rank // @@ -1354,7 +1355,7 @@ class ShardingOptimizer(MetaOptimizerBase): if (idx // (self.mp_degree * self.sharding_degree)) == self. sharding_group_id and idx % self.mp_degree == self.mp_rank ] - # sharding + ... + # sharding + ... else: self.sharding_group_endpoints = [ ep for idx, ep in enumerate(self.global_endpoints) @@ -1385,8 +1386,9 @@ class ShardingOptimizer(MetaOptimizerBase): pp_stage_offset = self.sharding_degree * self.mp_degree self.pp_group_endpoints = [] for i in range(self.pp_degree): - self.pp_group_endpoints.append(self.global_endpoints[ - pp_first_stage_idx + pp_stage_offset * i]) + self.pp_group_endpoints.append( + self.global_endpoints[pp_first_stage_idx + + pp_stage_offset * i]) assert self.current_endpoint in self.pp_group_endpoints else: self.pp_ring_id = -1 @@ -1399,7 +1401,7 @@ class ShardingOptimizer(MetaOptimizerBase): # outter-pure-dp group # NOTE (JZ-LIANG) support outter-pure-dp to scale the throughput in 3D parallelism # e.g. mp-sharding-pp-dp - # sharding-hybrid-dp as one senario of outter-pure-dp + # sharding-hybrid-dp as one senario of outter-pure-dp local_pp_degree = self.pp_degree if os.getenv("PADDLE_MANUAL_PIPELINE_STAGE", None): assert self.pp_degree == 2, ("For manually set pipeline, only " @@ -1423,8 +1425,8 @@ class ShardingOptimizer(MetaOptimizerBase): local_pp_degree) self.dp_group_endpoints = [] for i in range(self.dp_degree): - self.dp_group_endpoints.append(self.global_endpoints[ - dp_first_rank_idx + dp_offset * i]) + self.dp_group_endpoints.append( + self.global_endpoints[dp_first_rank_idx + dp_offset * i]) assert self.current_endpoint in self.dp_group_endpoints logger.info("Hybrid DP mode turn on !") else: @@ -1475,6 +1477,7 @@ class ShardingOptimizer(MetaOptimizerBase): return def _recreate_not_persist_param_as_var(self): + def recreate_not_persist_param_as_var(program): block = program.global_block() params = block.all_parameters() @@ -1498,15 +1501,14 @@ class ShardingOptimizer(MetaOptimizerBase): is_distributed = param.is_distributed block._remove_var(name, sync=False) - var = block.create_var( - name=name, - shape=shape, - dtype=dtype, - type=type, - lod_level=lod_level, - stop_gradient=stop_gradient, - trainable=trainable, - persistable=False) + var = block.create_var(name=name, + shape=shape, + dtype=dtype, + type=type, + lod_level=lod_level, + stop_gradient=stop_gradient, + trainable=trainable, + persistable=False) if have_dist_attr: var.is_distributed = is_distributed @@ -1552,16 +1554,15 @@ class ShardingOptimizer(MetaOptimizerBase): rings.append(self.dp_ring_id) for ring in rings: - startup_block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring, - 'root': 0, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Forward - }) + startup_block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring, + 'root': 0, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Forward + }) startup_block._sync_with_cpp() @@ -1595,8 +1596,10 @@ class ShardingOptimizer(MetaOptimizerBase): main_block._insert_op_without_sync( insert_idx, type="elementwise_add", - inputs={'X': grad_name, - 'Y': gradient_merge_var}, + inputs={ + 'X': grad_name, + 'Y': gradient_merge_var + }, outputs={'Out': gradient_merge_var}, attrs={ 'axis': -1, @@ -1605,14 +1608,13 @@ class ShardingOptimizer(MetaOptimizerBase): }) # startup initialization - startup_block.append_op( - type="fill_constant", - outputs={"Out": startup_gradient_merge_var}, - attrs={ - "shape": grad_var.shape, - "dtype": grad_var.dtype, - "value": float(0), - }) + startup_block.append_op(type="fill_constant", + outputs={"Out": startup_gradient_merge_var}, + attrs={ + "shape": grad_var.shape, + "dtype": grad_var.dtype, + "value": float(0), + }) main_block._sync_with_cpp() startup_block._sync_with_cpp() @@ -1627,13 +1629,12 @@ class ShardingOptimizer(MetaOptimizerBase): persistable=True, force_cpu=True) - zero_var = layers.create_global_var( - name="gradient_merge_zero", - shape=[1], - value=int(0), - dtype='int32', - persistable=True, - force_cpu=True) + zero_var = layers.create_global_var(name="gradient_merge_zero", + shape=[1], + value=int(0), + dtype='int32', + persistable=True, + force_cpu=True) # Add step var & cond var current_step_var = layers.create_global_var( @@ -1644,36 +1645,40 @@ class ShardingOptimizer(MetaOptimizerBase): persistable=True, force_cpu=True) - cond_var = main_block.create_var( - name="gradient_merge_cond", shape=[1], dtype='bool') + cond_var = main_block.create_var(name="gradient_merge_cond", + shape=[1], + dtype='bool') with device_guard("cpu"): # step_var = (step_var + 1) % k_step - main_block.append_op( - type='increment', - inputs={'X': [current_step_var]}, - outputs={'Out': [current_step_var]}, - attrs={'step': float(1), - OP_ROLE_KEY: OpRole.Optimize}) - - main_block.append_op( - type='elementwise_mod', - inputs={'X': current_step_var, - 'Y': acc_step_var}, - outputs={'Out': current_step_var}, - attrs={ - 'axis': -1, - OP_ROLE_KEY: OpRole.Optimize, - 'use_mkldnn': False - }) + main_block.append_op(type='increment', + inputs={'X': [current_step_var]}, + outputs={'Out': [current_step_var]}, + attrs={ + 'step': float(1), + OP_ROLE_KEY: OpRole.Optimize + }) + + main_block.append_op(type='elementwise_mod', + inputs={ + 'X': current_step_var, + 'Y': acc_step_var + }, + outputs={'Out': current_step_var}, + attrs={ + 'axis': -1, + OP_ROLE_KEY: OpRole.Optimize, + 'use_mkldnn': False + }) # cond_var = (step_var == 0) - main_block.append_op( - type='equal', - inputs={'X': current_step_var, - 'Y': zero_var}, - outputs={'Out': cond_var}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) + main_block.append_op(type='equal', + inputs={ + 'X': current_step_var, + 'Y': zero_var + }, + outputs={'Out': cond_var}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) # paddle.static.Print(current_step_var, message="in FWBW last conditional") return cond_var @@ -1698,35 +1703,37 @@ class ShardingOptimizer(MetaOptimizerBase): # cur_block's forward_block & backward_block is itself cur_block._set_forward_block_idx(cur_block_idx) - # allreduce grad@gradientmerge + # allreduce grad@gradientmerge if self.hybrid_dp: assert self.dp_ring_id >= 0, "dp_ring_id should larger than 0 when in sharding&DP mode" for grad, merged_grad in self._grad2merged_grad.items(): merged_grad_var = main_block.var(merged_grad) - cur_block.append_op( - type='c_allreduce_sum', - inputs={'X': merged_grad_var}, - outputs={'Out': merged_grad_var}, - attrs={ - 'ring_id': self.dp_ring_id, - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Optimize - }) + cur_block.append_op(type='c_allreduce_sum', + inputs={'X': merged_grad_var}, + outputs={'Out': merged_grad_var}, + attrs={ + 'ring_id': self.dp_ring_id, + 'use_calc_stream': True, + OP_ROLE_KEY: OpRole.Optimize + }) # grad@gradientmerge / acc_step for grad, merged_grad in self._grad2merged_grad.items(): # grad /= k_steps merged_grad_var = main_block.var(merged_grad) - cur_block.append_op( - type='scale', - inputs={'X': merged_grad_var}, - outputs={'Out': merged_grad_var}, - attrs={ - 'scale': 1.0 / float(self._gradient_merge_acc_step), - 'bias': 0.0, - 'bias_after_scale': False, - OP_ROLE_KEY: OpRole.Optimize - }) + cur_block.append_op(type='scale', + inputs={'X': merged_grad_var}, + outputs={'Out': merged_grad_var}, + attrs={ + 'scale': + 1.0 / float(self._gradient_merge_acc_step), + 'bias': + 0.0, + 'bias_after_scale': + False, + OP_ROLE_KEY: + OpRole.Optimize + }) # re-create optimize ops already_moved_var_names = [] @@ -1755,11 +1762,10 @@ class ShardingOptimizer(MetaOptimizerBase): type_ = var_.dtype self._main_program.global_block()._remove_var( var_.name, sync=False) - self.cond_block.create_var( - name=name_, - shape=shape_, - dtype=type_, - persistable=False) + self.cond_block.create_var(name=name_, + shape=shape_, + dtype=type_, + persistable=False) already_moved_var_names.append(name_) self._main_program.global_block()._sync_with_cpp() @@ -1768,15 +1774,14 @@ class ShardingOptimizer(MetaOptimizerBase): # fill zero to grad@gradientmerge for grad, merged_grad in self._grad2merged_grad.items(): merged_grad_var = main_block.var(merged_grad) - cur_block.append_op( - type='fill_constant', - outputs={'Out': merged_grad_var}, - attrs={ - "shape": merged_grad_var.shape, - "dtype": merged_grad_var.dtype, - "value": float(0), - OP_ROLE_KEY: OpRole.Optimize - }) + cur_block.append_op(type='fill_constant', + outputs={'Out': merged_grad_var}, + attrs={ + "shape": merged_grad_var.shape, + "dtype": merged_grad_var.dtype, + "value": float(0), + OP_ROLE_KEY: OpRole.Optimize + }) # lr_var = main_block.var("gradient_merge_current_step") # paddle.static.Print(lr_var, message="in OPTIMIZE last conditional") @@ -1831,8 +1836,10 @@ class ShardingOptimizer(MetaOptimizerBase): 'Cond': cond, 'Input': [], }, - outputs={'Out': [], - 'Scope': [step_scope]}, + outputs={ + 'Out': [], + 'Scope': [step_scope] + }, attrs={ 'sub_block': cond_block, 'is_scalar_condition': True, diff --git a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py index 9d099a2af24..c628964db35 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py @@ -23,6 +23,7 @@ __all__ = [] class TensorParallelOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): super(TensorParallelOptimizer, self).__init__(optimizer) self.inner_opt = optimizer @@ -32,15 +33,18 @@ class TensorParallelOptimizer(MetaOptimizerBase): "LarsOptimizer", "LambOptimizer", ] - self.meta_optimizers_black_list = ["GraphExecutionOptimizer", ] + self.meta_optimizers_black_list = [ + "GraphExecutionOptimizer", + ] self.mp_ring_id = 0 self.global_ring_id = 1 self.dp_ring_id = 2 def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): - super(TensorParallelOptimizer, self)._set_basic_info( - loss, role_maker, user_defined_optimizer, user_defined_strategy) + super(TensorParallelOptimizer, + self)._set_basic_info(loss, role_maker, user_defined_optimizer, + user_defined_strategy) self.mp_degree = user_defined_strategy.tensor_parallel_configs[ 'tensor_parallel_degree'] @@ -58,7 +62,9 @@ class TensorParallelOptimizer(MetaOptimizerBase): def _enable_strategy(self, dist_strategy, context): dist_strategy.tensor_parallel = True - dist_strategy.tensor_parallel_configs = {"tensor_parallel_degree": 1, } + dist_strategy.tensor_parallel_configs = { + "tensor_parallel_degree": 1, + } def _broadcast_params(self, ring_id, mp_mode): block = self.startup_program.global_block() @@ -67,23 +73,23 @@ class TensorParallelOptimizer(MetaOptimizerBase): if param.is_distributed and mp_mode: continue - block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - 'root': 0, - OP_ROLE_KEY: OpRole.Forward - }) + block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + 'root': 0, + OP_ROLE_KEY: OpRole.Forward + }) if not param: return # no parameter on this device - block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward}) + block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward + }) def _get_process_group_info(self): # global ring info @@ -115,15 +121,19 @@ class TensorParallelOptimizer(MetaOptimizerBase): collective_helper = CollectiveHelper(self.role_maker, wait_port=False) # Create global ring for all gpus - collective_helper._init_communicator( - self.startup_program, self.current_endpoint, self.global_endpoints, - self.global_rank, self.global_ring_id, True, self.global_ring_id, - True) + collective_helper._init_communicator(self.startup_program, + self.current_endpoint, + self.global_endpoints, + self.global_rank, + self.global_ring_id, True, + self.global_ring_id, True) # Create model parallel ring for all gpus - collective_helper._init_communicator( - self.startup_program, self.current_endpoint, self.mp_endpoints, - self.mp_rank, self.mp_ring_id, True, self.global_ring_id, True) + collective_helper._init_communicator(self.startup_program, + self.current_endpoint, + self.mp_endpoints, self.mp_rank, + self.mp_ring_id, True, + self.global_ring_id, True) self._broadcast_params(self.mp_ring_id, mp_mode=True) # Create dp rings @@ -174,15 +184,14 @@ class TensorParallelOptimizer(MetaOptimizerBase): for idx, op in reversed(list(enumerate(block.ops))): if is_loss_grad_op(op): loss_grad_var = block.vars[op.output_arg_names[0]] - block._insert_op( - idx + 1, - type='scale', - inputs={'X': loss_grad_var}, - outputs={'Out': loss_grad_var}, - attrs={ - 'scale': 1.0 / dp_degree, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op(idx + 1, + type='scale', + inputs={'X': loss_grad_var}, + outputs={'Out': loss_grad_var}, + attrs={ + 'scale': 1.0 / dp_degree, + OP_ROLE_KEY: OpRole.Backward + }) break def _insert_allreduce_ops(self, loss, ring_id): @@ -200,34 +209,33 @@ class TensorParallelOptimizer(MetaOptimizerBase): grad = block.vars[op_role_var[i + 1]] if offset == idx: offset += 1 - block._insert_op( - offset, - type='c_sync_calc_stream', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={OP_ROLE_KEY: OpRole.Backward}) + block._insert_op(offset, + type='c_sync_calc_stream', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={OP_ROLE_KEY: OpRole.Backward}) offset += 1 - block._insert_op( - offset, - type='c_allreduce_sum', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward - }) + block._insert_op(offset, + type='c_allreduce_sum', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward + }) if grad is None: return for idx, op in list(enumerate(block.ops)): if is_optimizer_op(op): - block._insert_op( - idx, - type='c_sync_comm_stream', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Backward}) + block._insert_op(idx, + type='c_sync_comm_stream', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Backward + }) break diff --git a/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py b/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py index 69e41ab0eda..f5b8660bd88 100644 --- a/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py +++ b/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py @@ -18,6 +18,7 @@ __all__ = [] class MetaParallelBase(Layer): + def __init__(self, layers, hcg, strategy): super(MetaParallelBase, self).__init__(layers.full_name() + "_meta_parallel_base") diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py index 2ce8cf7bdeb..14ca1322e78 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py @@ -23,11 +23,12 @@ from paddle.autograd import PyLayer __all__ = [] # Follow this paper to achieve the file: -# Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter +# Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter # language models using model parallelism[J]. arXiv preprint arXiv:1909.08053, 2019. (https://arxiv.org/abs/1909.08053) class VocabParallelEmbedding(Layer): + def __init__(self, num_embeddings, embedding_dim, @@ -58,17 +59,15 @@ class VocabParallelEmbedding(Layer): if self.is_mp and paddle.in_dynamic_mode(): with get_rng_state_tracker().rng_state(): - self.weight = self.create_parameter( - attr=self._weight_attr, - shape=self._size, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._weight_attr, + shape=self._size, + dtype=self._dtype, + is_bias=False) else: - self.weight = self.create_parameter( - attr=self._weight_attr, - shape=self._size, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._weight_attr, + shape=self._size, + dtype=self._dtype, + is_bias=False) self.weight.is_distributed = True if self.is_mp else False @@ -85,16 +84,16 @@ class VocabParallelEmbedding(Layer): use_calc_stream=True, use_model_parallel=True) else: - output = F.embedding( - x, - weight=self.weight, - padding_idx=None, - sparse=False, - name=self._name) + output = F.embedding(x, + weight=self.weight, + padding_idx=None, + sparse=False, + name=self._name) return output class ColumnParallelLinear(Layer): + def __init__(self, in_features, out_features, @@ -114,8 +113,8 @@ class ColumnParallelLinear(Layer): self.gather_output = gather_output assert out_features % self.world_size == 0, ( "Number of column of the weight for linear ({}) must be" - " divisible by model parallel size ({})".format(out_features, - self.world_size)) + " divisible by model parallel size ({})".format( + out_features, self.world_size)) self.output_size_per_partition = out_features // self.world_size self._weight_attr = weight_attr @@ -156,8 +155,10 @@ class ColumnParallelLinear(Layer): else: input_parallel = x - output_parallel = F.linear( - input_parallel, self.weight, self.bias, name=self._name) + output_parallel = F.linear(input_parallel, + self.weight, + self.bias, + name=self._name) if self.gather_output and self.is_mp: output = paddle.distributed.collective._c_concat( @@ -168,6 +169,7 @@ class ColumnParallelLinear(Layer): class RowParallelLinear(Layer): + def __init__(self, in_features, out_features, @@ -193,8 +195,8 @@ class RowParallelLinear(Layer): self.is_mp = (self.world_size > 1) assert in_features % self.world_size == 0, ( "Number of row of the weight for linear ({}) must be" - " divisible by model parallel size ({})".format(in_features, - self.world_size)) + " divisible by model parallel size ({})".format( + in_features, self.world_size)) self.input_size_per_partition = in_features // self.world_size @@ -247,6 +249,7 @@ class RowParallelLinear(Layer): class ParallelCrossEntropy(Layer): + def __init__(self, name=None): super(ParallelCrossEntropy, self).__init__() self.name = name diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index a39b7730375..58b0515e0ba 100755 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -56,6 +56,7 @@ __all__ = [] class LayerDesc(object): + def __init__(self, layer_func, *inputs, **kwargs): self.layer_func = layer_func self.inputs = inputs @@ -74,6 +75,7 @@ class LayerDesc(object): class SharedLayerDesc(LayerDesc): + def __init__(self, key, layer_func, @@ -88,6 +90,7 @@ class SharedLayerDesc(LayerDesc): class SegmentLayers(object): + def __init__(self, layers_desc, num_parts, method="uniform"): self._layers_desc = layers_desc self.method = method @@ -157,6 +160,7 @@ class SegmentLayers(object): class PipelineLayer(Layer): + def __init__(self, layers, num_stages=None, @@ -184,8 +188,8 @@ class PipelineLayer(Layer): if recompute_interval > 0: logger.info( - "Start Recompute for PipeLineParallel. recompute_offload: {}, recompute_partition: {}". - format(recompute_offload, recompute_partition)) + "Start Recompute for PipeLineParallel. recompute_offload: {}, recompute_partition: {}" + .format(recompute_offload, recompute_partition)) _initialize_recompute_setting(recompute_offload, recompute_partition) world_size = dist.get_world_size() @@ -200,9 +204,10 @@ class PipelineLayer(Layer): else: # construct default topology if world_size % num_stages != 0: - raise ValueError("should provide correct num_stages({}) " - "which can be divided by world_size({})". - format(num_stages, world_size)) + raise ValueError( + "should provide correct num_stages({}) " + "which can be divided by world_size({})".format( + num_stages, world_size)) dp_num = world_size // num_stages self._topo = fleet.CommunicateTopology(["data", "pipe", "model"], [dp_num, num_stages, 1]) @@ -238,8 +243,8 @@ class PipelineLayer(Layer): return layers_desc = self._layers_desc - shared_layer_names = set( - s.layer_name for s in layers_desc if isinstance(s, SharedLayerDesc)) + shared_layer_names = set(s.layer_name for s in layers_desc + if isinstance(s, SharedLayerDesc)) for key in shared_layer_names: shared_layers = [] for idx, layer in enumerate(layers_desc): @@ -283,10 +288,10 @@ class PipelineLayer(Layer): def _synchronize_shared_weights(self): for key, comm in self.shared_comm.items(): with paddle.framework.no_grad(): - paddle.distributed.broadcast( - getattr(comm['layer'], comm['weight_attr']), - src=min(comm['ranks']), - group=comm['group']) + paddle.distributed.broadcast(getattr(comm['layer'], + comm['weight_attr']), + src=min(comm['ranks']), + group=comm['group']) for param in comm['layer'].parameters(): if self.global_rank != min(comm['ranks']): @@ -298,8 +303,8 @@ class PipelineLayer(Layer): # need use trace_op to allreduce weight if in_dygraph_mode(): with paddle.framework.no_grad(): - paddle.distributed.all_reduce( - param.grad, group=comm['group']) + paddle.distributed.all_reduce(param.grad, + group=comm['group']) else: with paddle.framework.no_grad(): paddle.fluid.framework._dygraph_tracer().trace_op( @@ -313,12 +318,13 @@ class PipelineLayer(Layer): def _segment_network(self, seg_method): logger.info("start segment network..") - seg = SegmentLayers( - self._layers_desc, num_parts=self._num_stages, method=seg_method) + seg = SegmentLayers(self._layers_desc, + num_parts=self._num_stages, + method=seg_method) self.segment_parts = seg.do_segment() - logger.info("segment result:" + ", ".join( - str(arg) for arg in self.segment_parts)) + logger.info("segment result:" + + ", ".join(str(arg) for arg in self.segment_parts)) self._start_pos = self.segment_parts[self._stage_id] self._end_pos = self.segment_parts[self._stage_id + 1] @@ -357,13 +363,13 @@ class PipelineLayer(Layer): setattr(param, "is_firstly_shared", True) if layer.forward_func is None: - self.run_function.append(self.shared_layers[ - layer.layer_name]) + self.run_function.append( + self.shared_layers[layer.layer_name]) else: self.run_function.append( - partial(layer.forward_func, self.shared_layers[ - layer.layer_name])) + partial(layer.forward_func, + self.shared_layers[layer.layer_name])) elif isinstance(layer, LayerDesc): model = layer.build_layer() @@ -373,6 +379,7 @@ class PipelineLayer(Layer): self.run_function.append(layer) def forward_function(self, start, end): + def execute_func(*x): if len(x) == 1: x = x[0] @@ -403,8 +410,8 @@ class PipelineLayer(Layer): return input def _need_recompute(self, funcs, inputs): - if not any(input_.stop_gradient == False for input_ in inputs - if isinstance(input_, paddle.Tensor)): + if not any(input_.stop_gradient == False + for input_ in inputs if isinstance(input_, paddle.Tensor)): return False params = [f.parameters() for f in funcs if isinstance(f, Layer)] diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py index a59d86f1291..fdbf0312db6 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py @@ -105,12 +105,13 @@ def determinate_seed(rng_name): helper = LayerHelper('seed', **locals()) out = helper.create_variable_for_type_inference(dtype=paddle.int32) # set force_cpu to reduce sync copy from CPU->GPU->CPU, and reduce pipeline hang - helper.append_op( - type='seed', - outputs={'Out': out}, - attrs={'deterministic': True, - 'rng_name': rng_name, - 'force_cpu': True}) + helper.append_op(type='seed', + outputs={'Out': out}, + attrs={ + 'deterministic': True, + 'rng_name': rng_name, + 'force_cpu': True + }) return out @@ -218,15 +219,18 @@ def dropout(x, mask = helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) - helper.append_op( - type='dropout', - inputs={'X': [x], - 'Seed': seed}, - outputs={'Out': [out], - 'Mask': [mask]}, - attrs={ - 'dropout_prob': p, - 'is_test': not training, - 'dropout_implementation': mode, - }) + helper.append_op(type='dropout', + inputs={ + 'X': [x], + 'Seed': seed + }, + outputs={ + 'Out': [out], + 'Mask': [mask] + }, + attrs={ + 'dropout_prob': p, + 'is_test': not training, + 'dropout_implementation': mode, + }) return out diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index d2171920f2b..3135c5379e8 100755 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -29,6 +29,7 @@ __all__ = [] class PipelineParallel(MetaParallelBase): + def __init__(self, layers, hcg, strategy): if not isinstance(layers, PipelineLayer): raise TypeError( @@ -239,9 +240,10 @@ class PipelineParallel(MetaParallelBase): assert self._layers._loss_fn is not None, "loss function should exist to compute loss" labels = self._load_micro_batch(self.micro_batch_id) output_tensor = self._layers._loss_fn(output_tensor, labels) - assert isinstance(output_tensor, ( - paddle.Tensor, core.eager.Tensor - )), "Currently, loss_fn should obtain Paddle.Tensor dtype" + assert isinstance( + output_tensor, + (paddle.Tensor, core.eager.Tensor + )), "Currently, loss_fn should obtain Paddle.Tensor dtype" with paddle.amp.auto_cast(enable=False): if self.accumulate_steps > 1: @@ -270,9 +272,8 @@ class PipelineParallel(MetaParallelBase): tensors=outputs, grad_tensors=[t for t in output_tensor_grad]) else: - paddle.autograd.backward( - tensors=[output_tensor], - grad_tensors=[output_tensor_grad]) + paddle.autograd.backward(tensors=[output_tensor], + grad_tensors=[output_tensor_grad]) input_tensor_grad = None if input_tensor is not None: @@ -327,16 +328,14 @@ class PipelineParallel(MetaParallelBase): loss = self.total_loss.detach() is_fp32 = paddle.to_tensor( 1) if loss.dtype == paddle.float32 else paddle.to_tensor(0) - paddle.distributed.broadcast( - is_fp32, - src=self.global_rank, - use_calc_stream=True, - group=self.pp_group) - paddle.distributed.broadcast( - loss, - src=self.global_rank, - use_calc_stream=True, - group=self.pp_group) + paddle.distributed.broadcast(is_fp32, + src=self.global_rank, + use_calc_stream=True, + group=self.pp_group) + paddle.distributed.broadcast(loss, + src=self.global_rank, + use_calc_stream=True, + group=self.pp_group) else: is_fp32 = paddle.to_tensor(1) paddle.distributed.broadcast( @@ -344,10 +343,10 @@ class PipelineParallel(MetaParallelBase): src=self._hcg.get_rank_from_stage(self.num_stages - 1), use_calc_stream=True, group=self.pp_group) - loss = paddle.zeros( - shape=[1], - dtype="float32") if is_fp32.numpy()[0] else paddle.zeros( - shape=[1], dtype="float16") + loss = paddle.zeros(shape=[ + 1 + ], dtype="float32") if is_fp32.numpy()[0] else paddle.zeros( + shape=[1], dtype="float16") paddle.distributed.broadcast( loss, src=self._hcg.get_rank_from_stage(self.num_stages - 1), diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py index de36f8503a6..17c7f5a9bbc 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -155,7 +155,7 @@ def _is_valid_send_recv_partial(tensor, mp_degree): assert tensor_numel != 0, "can't send/recv zero element" return mp_degree > 1 and tensor_numel % mp_degree == 0 elif in_dygraph_mode(): - # TODO(shenliang03) support mp+pp optimizer in future. + # TODO(shenliang03) support mp+pp optimizer in future. # (partial_send/partial_recv/partial_allgather_) return False @@ -175,11 +175,10 @@ def send_partial(tensor, use_calc_stream, 'ring_id', ring_id, 'peer', dst, 'num', nranks, 'id', rank_id) else: - return paddle.distributed.send( - tensor.detach(), - dst=dst, - group=group, - use_calc_stream=use_calc_stream) + return paddle.distributed.send(tensor.detach(), + dst=dst, + group=group, + use_calc_stream=use_calc_stream) def recv_partial(tensor, @@ -198,11 +197,10 @@ def recv_partial(tensor, 'id', rank_id, 'dtype', tensor.dtype, 'out_shape', tensor.shape) else: - paddle.distributed.recv( - tensor.detach(), - src=src, - group=group, - use_calc_stream=use_calc_stream) + paddle.distributed.recv(tensor.detach(), + src=src, + group=group, + use_calc_stream=use_calc_stream) def allgather_partial(tensor, @@ -244,8 +242,8 @@ def _p2p_helper(tensor_send_next, tensor_send_prev, recv_prev, recv_next): if isinstance(recv_shape_msg, tuple): tensor_recv_prev = [] for idx, shape in enumerate(recv_shape_msg): - tmp = paddle.empty( - shape=shape, dtype=number_2_dtype(recv_dtype_msg[idx])) + tmp = paddle.empty(shape=shape, + dtype=number_2_dtype(recv_dtype_msg[idx])) tmp.stop_gradient = recv_stop_gradient[idx] tensor_recv_prev.append(tmp) tensor_recv_prev = tuple(tensor_recv_prev) @@ -260,8 +258,8 @@ def _p2p_helper(tensor_send_next, tensor_send_prev, recv_prev, recv_next): tensor_recv_next = [] for idx, shape in enumerate(send_shape_msg): tensor_recv_next.append( - paddle.empty( - shape=shape, dtype=number_2_dtype(send_dtype_msg[idx]))) + paddle.empty(shape=shape, + dtype=number_2_dtype(send_dtype_msg[idx]))) tensor_recv_next = tuple(tensor_recv_next) else: tensor_recv_next = paddle.empty( @@ -272,107 +270,95 @@ def _p2p_helper(tensor_send_next, tensor_send_prev, recv_prev, recv_next): if isinstance(tensor_send_prev, tuple): for d in tensor_send_prev: paddle.distributed.wait(d, use_calc_stream=True) - send_partial( - d, - dst=0, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.send_prev_group, - use_calc_stream=False) + send_partial(d, + dst=0, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.send_prev_group, + use_calc_stream=False) else: paddle.distributed.wait(tensor_send_prev, use_calc_stream=True) - send_partial( - tensor_send_prev, - dst=0, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.send_prev_group, - use_calc_stream=False) + send_partial(tensor_send_prev, + dst=0, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.send_prev_group, + use_calc_stream=False) if tensor_recv_prev is not None: if isinstance(tensor_recv_prev, tuple): for d in tensor_recv_prev: - recv_partial( - d, - src=0, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.recv_prev_group, - use_calc_stream=True) - allgather_partial( - d, - nranks=mp_degree, - rank_id=mp_rank, - group=mp_group, - use_calc_stream=True) + recv_partial(d, + src=0, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.recv_prev_group, + use_calc_stream=True) + allgather_partial(d, + nranks=mp_degree, + rank_id=mp_rank, + group=mp_group, + use_calc_stream=True) else: - recv_partial( - tensor_recv_prev, - src=0, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.recv_prev_group, - use_calc_stream=True) - allgather_partial( - tensor_recv_prev, - nranks=mp_degree, - rank_id=mp_rank, - group=mp_group, - use_calc_stream=True) + recv_partial(tensor_recv_prev, + src=0, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.recv_prev_group, + use_calc_stream=True) + allgather_partial(tensor_recv_prev, + nranks=mp_degree, + rank_id=mp_rank, + group=mp_group, + use_calc_stream=True) if tensor_send_next is not None: if isinstance(tensor_send_next, tuple): for d in tensor_send_next: paddle.distributed.wait(d, use_calc_stream=True) - send_partial( - d, - dst=1, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.send_next_group, - use_calc_stream=False) + send_partial(d, + dst=1, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.send_next_group, + use_calc_stream=False) else: paddle.distributed.wait(tensor_send_next, use_calc_stream=True) - send_partial( - tensor_send_next, - dst=1, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.send_next_group, - use_calc_stream=False) + send_partial(tensor_send_next, + dst=1, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.send_next_group, + use_calc_stream=False) if tensor_recv_next is not None: if isinstance(tensor_recv_next, tuple): for d in tensor_recv_next: - recv_partial( - d, - src=1, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.recv_next_group, - use_calc_stream=True) - allgather_partial( - d, - nranks=mp_degree, - rank_id=mp_rank, - group=mp_group, - use_calc_stream=True) + recv_partial(d, + src=1, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.recv_next_group, + use_calc_stream=True) + allgather_partial(d, + nranks=mp_degree, + rank_id=mp_rank, + group=mp_group, + use_calc_stream=True) else: - recv_partial( - tensor_recv_next, - src=1, - nranks=mp_degree, - rank_id=mp_rank, - group=_hcg.recv_next_group, - use_calc_stream=True) - - allgather_partial( - tensor_recv_next, - nranks=mp_degree, - rank_id=mp_rank, - group=mp_group, - use_calc_stream=True) + recv_partial(tensor_recv_next, + src=1, + nranks=mp_degree, + rank_id=mp_rank, + group=_hcg.recv_next_group, + use_calc_stream=True) + + allgather_partial(tensor_recv_next, + nranks=mp_degree, + rank_id=mp_rank, + group=mp_group, + use_calc_stream=True) return tensor_recv_prev, tensor_recv_next @@ -384,11 +370,10 @@ def recv_forward(): _send_recv_meta.recv_meta(_hcg.recv_prev_group) _send_recv_meta.has_recv_meta = _use_cache - input_tensor, _ = _p2p_helper( - tensor_send_next=None, - tensor_send_prev=None, - recv_prev=True, - recv_next=False) + input_tensor, _ = _p2p_helper(tensor_send_next=None, + tensor_send_prev=None, + recv_prev=True, + recv_next=False) return input_tensor @@ -396,11 +381,10 @@ def recv_backward(): if _hcg.is_last_stage: output_tensor_grad = None else: - _, output_tensor_grad = _p2p_helper( - tensor_send_next=None, - tensor_send_prev=None, - recv_prev=False, - recv_next=True) + _, output_tensor_grad = _p2p_helper(tensor_send_next=None, + tensor_send_prev=None, + recv_prev=False, + recv_next=True) return output_tensor_grad @@ -411,31 +395,28 @@ def send_forward(output_tensor): _send_recv_meta.send_meta(output_tensor, _hcg.send_next_group) _send_recv_meta.has_send_meta = _use_cache - _p2p_helper( - tensor_send_next=output_tensor, - tensor_send_prev=None, - recv_prev=False, - recv_next=False) + _p2p_helper(tensor_send_next=output_tensor, + tensor_send_prev=None, + recv_prev=False, + recv_next=False) def send_backward(input_tensor_grad): if not _hcg.is_first_stage: - _p2p_helper( - tensor_send_next=None, - tensor_send_prev=input_tensor_grad, - recv_prev=False, - recv_next=False) + _p2p_helper(tensor_send_next=None, + tensor_send_prev=input_tensor_grad, + recv_prev=False, + recv_next=False) def send_forward_recv_backward(output_tensor): if _hcg.is_last_stage: output_tensor_grad = None else: - _, output_tensor_grad = _p2p_helper( - tensor_send_next=output_tensor, - tensor_send_prev=None, - recv_prev=False, - recv_next=True) + _, output_tensor_grad = _p2p_helper(tensor_send_next=output_tensor, + tensor_send_prev=None, + recv_prev=False, + recv_next=True) return output_tensor_grad @@ -443,9 +424,8 @@ def send_backward_recv_forward(input_tensor_grad): if _hcg.is_first_stage: input_tensor = None else: - input_tensor, _ = _p2p_helper( - tensor_send_next=None, - tensor_send_prev=input_tensor_grad, - recv_prev=True, - recv_next=False) + input_tensor, _ = _p2p_helper(tensor_send_next=None, + tensor_send_prev=input_tensor_grad, + recv_prev=True, + recv_next=False) return input_tensor diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 6c8badd64e1..4fed58fe133 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -164,7 +164,7 @@ class _HPEagerRecomputeFunction(EagerPyLayer): def forward(ctx, run_function, all_outputs, *args): check_recompute_necessary(args) - # store for recomputing + # store for recomputing ctx.run_function = run_function # store the rng states @@ -237,8 +237,8 @@ class _HPEagerRecomputeFunction(EagerPyLayer): for i, idx in enumerate(tensor_indices): if _recompute_partition: state = tensors[i].stop_gradient - tensors[i] = _merge_activation(tensors[i]).detach( - ).reshape_(tensor_shapes[i]) + tensors[i] = _merge_activation( + tensors[i]).detach().reshape_(tensor_shapes[i]) tensors[i].stop_gradient = state inputs[idx] = tensors[i].cuda( device_id) if _recompute_offload else tensors[i] @@ -249,11 +249,10 @@ class _HPEagerRecomputeFunction(EagerPyLayer): # need restore auto_cast state as well as w/b list with swith_rng_state_tracker(ctx.fwd_cuda_rng_state, ctx.fwd_cuda_rng_state_tracker): - with paddle.amp.auto_cast( - enable=ctx.is_fw_autocast, - custom_white_list=ctx.amp_white_list, - custom_black_list=ctx.amp_black_list, - level=ctx.amp_level): + with paddle.amp.auto_cast(enable=ctx.is_fw_autocast, + custom_white_list=ctx.amp_white_list, + custom_black_list=ctx.amp_black_list, + level=ctx.amp_level): detached_inputs = detach_variable(tuple(inputs)) outputs = ctx.run_function(*detached_inputs) @@ -276,7 +275,7 @@ class _HPEagerRecomputeFunction(EagerPyLayer): "none of output has stop_gradient=False, this recompute() is not necessary" ) - # actually backward + # actually backward paddle.autograd.backward(forward_outputs_with_grad, backward_inputs) grads = tuple(inp._grad_ivar() for inp in detached_inputs if isinstance(inp, core.eager.Tensor)) @@ -296,7 +295,7 @@ class _HPRecomputeFunction(PyLayer): def forward(ctx, run_function, all_outputs, *args): check_recompute_necessary(args) - # store for recomputing + # store for recomputing ctx.run_function = run_function # store the rng states @@ -369,8 +368,8 @@ class _HPRecomputeFunction(PyLayer): for i, idx in enumerate(tensor_indices): if _recompute_partition: state = tensors[i].stop_gradient - tensors[i] = _merge_activation(tensors[i]).detach( - ).reshape_(tensor_shapes[i]) + tensors[i] = _merge_activation( + tensors[i]).detach().reshape_(tensor_shapes[i]) tensors[i].stop_gradient = state inputs[idx] = tensors[i].cuda( device_id) if _recompute_offload else tensors[i] @@ -381,11 +380,10 @@ class _HPRecomputeFunction(PyLayer): # need restore auto_cast state as well as w/b list with swith_rng_state_tracker(ctx.fwd_cuda_rng_state, ctx.fwd_cuda_rng_state_tracker): - with paddle.amp.auto_cast( - enable=ctx.is_fw_autocast, - custom_white_list=ctx.amp_white_list, - custom_black_list=ctx.amp_black_list, - level=ctx.amp_level): + with paddle.amp.auto_cast(enable=ctx.is_fw_autocast, + custom_white_list=ctx.amp_white_list, + custom_black_list=ctx.amp_black_list, + level=ctx.amp_level): detached_inputs = detach_variable(tuple(inputs)) outputs = ctx.run_function(*detached_inputs) @@ -407,7 +405,7 @@ class _HPRecomputeFunction(PyLayer): "none of output has stop_gradient=False, this recompute() is not necessary" ) - # actually backward + # actually backward paddle.autograd.backward(forward_outputs_with_grad, backward_inputs) grads = tuple(inp._grad_ivar() for inp in detached_inputs if isinstance(inp, core.VarBase)) @@ -415,7 +413,7 @@ class _HPRecomputeFunction(PyLayer): def _hp_recompute(function, *args): - # NODTE(shenliang03)The current hybrid parallel recompute has limitations. + # NODTE(shenliang03)The current hybrid parallel recompute has limitations. # It cannot handle the following situations: # 1. The calculation output of recompute, there are tensors that do not require gradients. # 2. The forward output tensor has no gradient. This problem can be solved temporarily by detach(). diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py index 70d2d2a1930..7bdbe2ce32e 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py @@ -55,7 +55,7 @@ class GroupShardedOptimizerStage2(Optimizer): """ - # TODO (Baibaifan) + # TODO (Baibaifan) # Feature Notes: # 1. Unified memory for parameters and parameters.grad to InternalStorage. # 2. Support the segmentation of optimizer parameters and partial updating of parameters. @@ -103,8 +103,8 @@ class GroupShardedOptimizerStage2(Optimizer): filter(lambda x: x.trainable and x.dtype == Type.fp16.value, self._local_params))) > 0 - self._group = new_group(_get_global_group() - .ranks) if group is None else group + self._group = new_group( + _get_global_group().ranks) if group is None else group self.world_size = self._group.nranks self._rank = self._group.rank @@ -152,11 +152,10 @@ class GroupShardedOptimizerStage2(Optimizer): """ for p in self._local_params: - broadcast( - p, - src=self._global_root_rank, - group=self._group, - use_calc_stream=True) + broadcast(p, + src=self._global_root_rank, + group=self._group, + use_calc_stream=True) def _generate_master_params(self, trainable_params): if self.offload: @@ -225,8 +224,9 @@ class GroupShardedOptimizerStage2(Optimizer): # Assign the parameters of each rank according to the type for param in self._local_params: if param.dtype not in self._dtype_rank_params.keys(): - self._dtype_rank_params[ - param.dtype] = [[] for _ in range(self.world_size)] + self._dtype_rank_params[param.dtype] = [ + [] for _ in range(self.world_size) + ] self._dtype_rank_params[param.dtype][self.param2rank[ param.name]].append(param) @@ -410,8 +410,7 @@ class GroupShardedOptimizerStage2(Optimizer): # Exchange all the shards with the other ranks for dtype_per_rank in self.param_storages.values(): for dst_rank, internal_storage in dtype_per_rank.items(): - broadcast( - tensor=internal_storage.buffer, - src=self._group.ranks[dst_rank], - group=self._group, - use_calc_stream=True) + broadcast(tensor=internal_storage.buffer, + src=self._group.ranks[dst_rank], + group=self._group, + use_calc_stream=True) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py index 0c045c45fd5..39e92f88780 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py @@ -53,7 +53,7 @@ class GroupShardedStage2(nn.Layer): .. ZeRO: https://arxiv.org/pdf/1910.02054.pdf. """ - # TODO (Baibaifan) + # TODO (Baibaifan) # Feature Notes:: # 1. Unified memory for param and param.grad to InternalStorage. # 2. Divide param.grad according to rank to centrally apply for and release GPU memory. @@ -74,8 +74,9 @@ class GroupShardedStage2(nn.Layer): # training options self._layer = layer - self._sharding_optimizers = [sharding_optimizer] if not isinstance( - sharding_optimizer, list) else sharding_optimizer + self._sharding_optimizers = [ + sharding_optimizer + ] if not isinstance(sharding_optimizer, list) else sharding_optimizer assert all( list( map(lambda opt: isinstance(opt, GroupShardedOptimizerStage2), @@ -85,8 +86,8 @@ class GroupShardedStage2(nn.Layer): self._auto_refresh_trainable = auto_refresh_trainable # Communication related attributes - self._group = collective.new_group(collective._get_global_group() - .ranks) if group is None else group + self._group = collective.new_group( + collective._get_global_group().ranks) if group is None else group self._world_size_scaling = 1.0 / self._group.nranks assert self._group.nranks > 1, "Training must be distributed, ranks must be greater than 1" self._rank = self._group.rank @@ -166,8 +167,8 @@ class GroupShardedStage2(nn.Layer): return fw def set_state_dict(self, state_dict, use_structured_name=True): - self._layer.set_state_dict( - state_dict, use_structured_name=use_structured_name) + self._layer.set_state_dict(state_dict, + use_structured_name=use_structured_name) def state_dict(self, destination=None, @@ -228,7 +229,7 @@ class GroupShardedStage2(nn.Layer): else: self._build_grad_storages() - # Clear all flags state + # Clear all flags state self._clear_counters() def to(self, device=None, dtype=None, blocking=True): @@ -282,11 +283,10 @@ class GroupShardedStage2(nn.Layer): """ for buffer in self._layer.buffers(include_sublayers=True): - collective.broadcast( - buffer, - self._global_root_rank, - self._group, - use_calc_stream=True) + collective.broadcast(buffer, + self._global_root_rank, + self._group, + use_calc_stream=True) def __getattr__(self, name): """Forward missing attributes to wrapped layer.""" @@ -337,10 +337,9 @@ class GroupShardedStage2(nn.Layer): param.clear_gradient(False) # Synchronize the reduce parameter gradient - collective.reduce( - tensor=param.grad, - dst=self._group.ranks[dst_rank], - group=self._group) + collective.reduce(tensor=param.grad, + dst=self._group.ranks[dst_rank], + group=self._group) # TODO (Baibaifan) Asynchronous the reduce parameter gradient # Clear the task flow and trigger callback to clear the redundant gradient @@ -452,10 +451,10 @@ class GroupShardedStage2(nn.Layer): else: self._param_grads.append(param.name) print( - "Can not add param: {}, param's shape: {}, param align: {}, grad_storages fill: {}, ". - format(param.name, param.shape, self._trainable_param2align[ - param.name], self._grad_storages[param.dtype][dst_rank] - ._fill)) + "Can not add param: {}, param's shape: {}, param align: {}, grad_storages fill: {}, " + .format(param.name, param.shape, + self._trainable_param2align[param.name], + self._grad_storages[param.dtype][dst_rank]._fill)) for dtype in self._grad_storages.keys(): self._grad_storage_list.extend( @@ -511,15 +510,15 @@ class GroupShardedStage2(nn.Layer): if Type.fp16.value in rank_buffer_size.keys(): # FP16 GradStorage and model size logger_.info( - "====== FP16 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======". - format(rank_buffer_size[Type.fp16.value] / 2**19, model_size / 2 - **19)) + "====== FP16 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======" + .format(rank_buffer_size[Type.fp16.value] / 2**19, + model_size / 2**19)) if Type.fp32.value in rank_buffer_size.keys(): # FP32 GradStorage and model size logger_.info( - "====== FP32 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======". - format(rank_buffer_size[Type.fp32.value] / 2**18, model_size / 2 - **18)) + "====== FP32 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======" + .format(rank_buffer_size[Type.fp32.value] / 2**18, + model_size / 2**18)) return rank_buffer_size def _redefine_opt_step(self): diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py index e44b5d2515d..0d6bfcf9224 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py @@ -45,7 +45,9 @@ def _all_gather(tensor, buffer_size, group): # CUDA alignment 256 bytes -alignment = {"gpu": 256, } +alignment = { + "gpu": 256, +} align = { Type.fp16.value: 2, Type.fp32.value: 4, @@ -64,7 +66,7 @@ class GroupShardedStage3(nn.Layer): .. ZeRO: https://arxiv.org/pdf/1910.02054.pdf. """ - # TODO (Baibaifan) + # TODO (Baibaifan) # Feature Notes:: # 1. The model supports the segmentation of parameters by global ranks in layers. # 2. Support communication flow and computing flow. @@ -98,14 +100,14 @@ class GroupShardedStage3(nn.Layer): DEV = "cpu" if paddle.get_device() == "cpu" else paddle.get_device( ).split(":")[0] global DEV_ID - DEV_ID = 0 if paddle.get_device() == "cpu" else int(paddle.get_device() - .split(":")[1]) + DEV_ID = 0 if paddle.get_device() == "cpu" else int( + paddle.get_device().split(":")[1]) global param2dtype param2dtype = dict() # Communication group establishment - self._group = collective.new_group(collective._get_global_group() - .ranks) if group is None else group + self._group = collective.new_group( + collective._get_global_group().ranks) if group is None else group self._world_size_scaling = 1.0 / self._group.nranks assert self._group.nranks > 1, "Training must be distributed, ranks must be greater than 1." self._rank = self._group.rank @@ -176,11 +178,10 @@ class GroupShardedStage3(nn.Layer): """ for p in self._layer.parameters(): - collective.broadcast( - p, - src=self._global_root_rank, - group=self._group, - use_calc_stream=True) + collective.broadcast(p, + src=self._global_root_rank, + group=self._group, + use_calc_stream=True) def _clear_gradients(self): assert len(self._trainable_params.keys()) > 0 @@ -190,9 +191,9 @@ class GroupShardedStage3(nn.Layer): filter(lambda p: p.trainable and p not in self._unslice_params, current_layer_params)) for param in trainable_params: - assert hasattr( - param, "fw_storage" - ), "Find {} don't have fw_storage attribute.".format(param.name) + assert hasattr(param, "fw_storage" + ), "Find {} don't have fw_storage attribute.".format( + param.name) param.fw_storage.clear_gradient(False) param.bw_storage._clear() @@ -250,8 +251,8 @@ class GroupShardedStage3(nn.Layer): return fw def set_state_dict(self, state_dict, use_structured_name=True): - self._layer.set_state_dict( - state_dict, use_structured_name=use_structured_name) + self._layer.set_state_dict(state_dict, + use_structured_name=use_structured_name) def state_dict(self, destination=None, @@ -376,16 +377,16 @@ class GroupShardedStage3(nn.Layer): if self._offload: with device_guard(): tmp_tensor = buffer._slice(start, end) - param.fw_storage = core.eager.Tensor( - value=tmp_tensor, - place=core.CPUPlace(), - name="slice@" + param.name) + param.fw_storage = core.eager.Tensor(value=tmp_tensor, + place=core.CPUPlace(), + name="slice@" + param.name) with device_guard(): param.master_weight = paddle.cast(param.fw_storage, Type.fp32.value) else: - param.fw_storage = core.eager.Tensor( - value=buffer._slice(start, end), name="slice@" + param.name) + param.fw_storage = core.eager.Tensor(value=buffer._slice( + start, end), + name="slice@" + param.name) param.status = "part" # Updata optimizer master weights @@ -414,6 +415,7 @@ class GroupShardedStage3(nn.Layer): self._register_forward_hooks(sub_layer) def _register_forward_all_hooks(self, sub_layer, task_flow): + def _forward_pre_hook(layer, inputs): return ForwardPreHooks(layer, self._order_tracer, self._trainable_params, @@ -421,10 +423,12 @@ class GroupShardedStage3(nn.Layer): self._sync_comm, self._offload, task_flow) def _forward_post_hook(layer, inputs, outputs): - return ForwardPostHooks.apply( - outputs, layer, self._order_tracer, self._trainable_params, - self._param2buffer, self._param2buffer_size, self._rank, - self._group, self._sync_comm, self._offload, task_flow) + return ForwardPostHooks.apply(outputs, layer, self._order_tracer, + self._trainable_params, + self._param2buffer, + self._param2buffer_size, self._rank, + self._group, self._sync_comm, + self._offload, task_flow) # register previous forward hooks sub_layer.register_forward_pre_hook(_forward_pre_hook) @@ -439,11 +443,10 @@ class GroupShardedStage3(nn.Layer): """ for buffer in self._layer.buffers(include_sublayers=True): - collective.broadcast( - buffer, - self._global_root_rank, - self._group, - use_calc_stream=True) + collective.broadcast(buffer, + self._global_root_rank, + self._group, + use_calc_stream=True) def __getattr__(self, name): """Forward missing attributes to wrapped layer.""" @@ -507,15 +510,14 @@ class GroupShardedStage3(nn.Layer): trainable_params = list( filter(lambda p: p.trainable and p not in self._unslice_params, current_layer_params)) - t_flow = _allgather_buffer( - trainable_params, - self._group, - param2buffer_size=self._param2buffer_size, - use_calc_stream=True, - task_flow=TaskFlow(), - sync_wait=True, - offload=self._offload, - convert2cpu=convert2cpu) + t_flow = _allgather_buffer(trainable_params, + self._group, + param2buffer_size=self._param2buffer_size, + use_calc_stream=True, + task_flow=TaskFlow(), + sync_wait=True, + offload=self._offload, + convert2cpu=convert2cpu) if convert2cpu: for param in trainable_params: t_flow.full_param[param.name][0]._share_buffer_to(param) @@ -534,6 +536,7 @@ class GroupShardedStage3(nn.Layer): param._register_backward_hook(allreduce_function) def _get_allreduce_fn(self, param): + @paddle.autograd.no_grad() def allreduce_(*_): if param.name in self._task_flow.full_grad.keys(): @@ -552,8 +555,8 @@ class GroupShardedStage3(nn.Layer): cpu_grad = _device2cpu( full_grad._slice(start, end).detach().clone(), True) with device_guard(): - param.bw_storage = paddle.add(param.bw_storage, - cpu_grad) + param.bw_storage = paddle.add( + param.bw_storage, cpu_grad) else: param.bw_storage = paddle.add( param.bw_storage, @@ -566,8 +569,8 @@ class GroupShardedStage3(nn.Layer): param.use_count = 0 param._clear_data() start, end = self._param2buffer[param.name][self._rank] - param.fw_storage = self._task_flow.full_param[param.name][ - 0]._slice(start, end).detach().clone() + param.fw_storage = self._task_flow.full_param[ + param.name][0]._slice(start, end).detach().clone() param.status = "part" del self._task_flow.full_param[param.name] @@ -639,19 +642,19 @@ def ForwardPreHooks(layer, order_tracer, trainable_params, param2buffer_size, order_ = order_tracer[layer_id] layer_id = order_tracer["layer"][order_ + 1] - _allgather_buffer( - trainable_params[layer_id], - group, - param2buffer_size=param2buffer_size, - use_calc_stream=use_calc, - task_flow=task_flow, - sync_wait=sync_wait, - offload=offload) + _allgather_buffer(trainable_params[layer_id], + group, + param2buffer_size=param2buffer_size, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait, + offload=offload) return class ForwardPostHooks(EagerPyLayer): + @staticmethod def forward(ctx, inputs, layer, order_tracer, trainable_params, param2buffer, param2buffer_size, rank, group, sync_comm, @@ -668,7 +671,7 @@ class ForwardPostHooks(EagerPyLayer): order_tracer["order"] += 1 order_tracer["layer"].append(layer_id) - #Record fw info + #Record fw info ctx.order_tracer = order_tracer ctx.task_flow = task_flow ctx.group = group @@ -696,14 +699,13 @@ class ForwardPostHooks(EagerPyLayer): # Allgather params synchronization if sync_comm: use_calc, sync_wait = True, True - _allgather_buffer( - trainable_params[layer_id], - group, - param2buffer_size=param2buffer_size, - use_calc_stream=use_calc, - task_flow=task_flow, - sync_wait=sync_wait, - offload=offload) + _allgather_buffer(trainable_params[layer_id], + group, + param2buffer_size=param2buffer_size, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait, + offload=offload) else: _wait_layer(trainable_params[layer_id], task_flow, group, param2buffer_size, use_calc, offload) @@ -716,14 +718,13 @@ class ForwardPostHooks(EagerPyLayer): task_flow.use_calc[layer_id] = use_calc if layer_id != order_tracer["layer"][0] and not sync_comm: layer_next_id = order_tracer["layer"][order_tracer[layer_id] - 1] - _allgather_buffer( - trainable_params[layer_next_id], - group, - param2buffer_size=param2buffer_size, - use_calc_stream=use_calc, - task_flow=task_flow, - sync_wait=sync_wait, - offload=offload) + _allgather_buffer(trainable_params[layer_next_id], + group, + param2buffer_size=param2buffer_size, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait, + offload=offload) return args @@ -757,8 +758,8 @@ def _release_param(trainable_params, if param.name in task_flow.full_param.keys(): start, end = param2buffer[param.name][rank] with paddle.amp.auto_cast(enable=False): - param.fw_storage = task_flow.full_param[param.name][ - 0]._slice(start, end).detach().clone() + param.fw_storage = task_flow.full_param[ + param.name][0]._slice(start, end).detach().clone() param.status = "part" del task_flow.full_param[param.name] @@ -787,14 +788,13 @@ def _wait_layer(trainable_params, param.status = "all" param.use_count += 1 else: - _allgather_buffer( - trainable_params, - group, - param2buffer_size=param2buffer_size, - use_calc_stream=True, - task_flow=task_flow, - sync_wait=True, - offload=offload) + _allgather_buffer(trainable_params, + group, + param2buffer_size=param2buffer_size, + use_calc_stream=True, + task_flow=task_flow, + sync_wait=True, + offload=offload) break return task_flow @@ -831,7 +831,7 @@ def _allgather_buffer(trainable_params, param.use_count += 1 task_flow.full_param[param.name] = (full_param, task) - # parameter converts to cpu + # parameter converts to cpu if convert2cpu: p_name = param.name param = _device2cpu(param) @@ -847,8 +847,8 @@ def _create_params_grad(trainable_params, param2buffer_size, task_flow): if param.name in task_flow.full_grad.keys(): continue assert isinstance(param2buffer_size[param.name], int) - temp_grad = paddle.zeros( - [param2buffer_size[param.name]], dtype=param.dtype) + temp_grad = paddle.zeros([param2buffer_size[param.name]], + dtype=param.dtype) temp_tensor = temp_grad._slice(0, param._numel()) temp_tensor.get_tensor()._set_dims(param.shape) param._copy_gradient_from(temp_tensor) @@ -876,8 +876,9 @@ def _UnsliceParam(param): def _VarBaseWrapper(param): varbase = param.fw_storage - tmp_param = EagerParamBase( - shape=varbase.shape, dtype=varbase.dtype, name="slice@" + param.name) + tmp_param = EagerParamBase(shape=varbase.shape, + dtype=varbase.dtype, + name="slice@" + param.name) varbase._share_buffer_to(tmp_param) tmp_param.regularizer = param.regularizer tmp_param.optimize_attr['learning_rate'] = param.optimize_attr[ diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py index 4d706870a91..c4487249109 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py @@ -64,8 +64,8 @@ class InternalStorage: Move the underlying buffer """ assert self.buffer is not None, "Cannot move a collapsed bucket, please rebuild it" - assert (dtype == Type.fp32.value or - Type.fp16.value), "Conversion type is not supported now" + assert (dtype == Type.fp32.value + or Type.fp16.value), "Conversion type is not supported now" if self._device != device: tmp_buffer = self.buffer.cuda( diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py index eae8f87b014..b1e0f6cc130 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py @@ -45,6 +45,7 @@ class Type(Enum): class GroupShardedClipGrad: + def __init__(self, clip, device, group): self._clip = clip self._device = device @@ -82,8 +83,8 @@ class GroupShardedClipGrad: else: global_norm_fp16 = layers.concat(sum_square_fp16) global_norm_fp16 = layers.reduce_sum(global_norm_fp16) - global_norm_fp16 = paddle.cast( - global_norm_fp16, dtype=paddle.float32) + global_norm_fp16 = paddle.cast(global_norm_fp16, + dtype=paddle.float32) # global norm of non-distributed FP16 params_and_grads for unslice parameters if len(unslice_params_fp16) == 0: @@ -91,12 +92,12 @@ class GroupShardedClipGrad: else: global_unslice_fp16 = layers.concat(unslice_params_fp16) global_unslice_fp16 = layers.reduce_sum(global_unslice_fp16) - global_unslice_fp16 = paddle.cast( - global_unslice_fp16, dtype=paddle.float32) + global_unslice_fp16 = paddle.cast(global_unslice_fp16, + dtype=paddle.float32) # global norm of non-distributed FP32 params_and_grads - global_norm_fp32 = layers.concat(sum_square_fp32) if len( - sum_square_fp32) != 0 else paddle.to_tensor( + global_norm_fp32 = layers.concat( + sum_square_fp32) if len(sum_square_fp32) != 0 else paddle.to_tensor( [0.], dtype=paddle.float32) global_norm_fp32 = layers.reduce_sum(global_norm_fp32) @@ -118,13 +119,14 @@ class GroupShardedClipGrad: paddle.distributed.all_reduce(global_norm_var, group=self._group) global_norm_var = layers.sqrt(global_norm_var) - max_global_norm = layers.fill_constant( - shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) - - clip_var = layers.elementwise_div( - x=max_global_norm, - y=layers.elementwise_max( - x=global_norm_var, y=max_global_norm)) + max_global_norm = layers.fill_constant(shape=[1], + dtype=global_norm_var.dtype, + value=self.clip_norm) + + clip_var = layers.elementwise_div(x=max_global_norm, + y=layers.elementwise_max( + x=global_norm_var, + y=max_global_norm)) clip_var_fp16 = paddle.cast(clip_var, paddle.float16) for p, g in params_grads: @@ -163,6 +165,7 @@ def device_guard(dev_id=0, device="cpu"): @dygraph_only def GroupShardedScaler(scaler): + def unscale_method(self, optimizer): if not self._enable: return @@ -201,8 +204,8 @@ def GroupShardedScaler(scaler): temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool)) device = "cpu" if optimizer.offload else "gpu" - dev_id = 0 if device == "cpu" else int(paddle.get_device().split(":")[ - 1]) + dev_id = 0 if device == "cpu" else int( + paddle.get_device().split(":")[1]) with device_guard(dev_id, device): if len(param_grads_fp16): @@ -217,10 +220,9 @@ def GroupShardedScaler(scaler): self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0 is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32") - paddle.distributed.all_reduce( - is_found_inf, - op=paddle.distributed.ReduceOp.MAX, - group=optimizer._group) + paddle.distributed.all_reduce(is_found_inf, + op=paddle.distributed.ReduceOp.MAX, + group=optimizer._group) self._found_inf = is_found_inf.numpy()[0] scaler._unscale = MethodType(unscale_method, scaler) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py index b09d256d9bb..7834e6d9398 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py @@ -54,7 +54,7 @@ class ShardingStage2(nn.Layer): .. ZeRO: https://arxiv.org/pdf/1910.02054.pdf. """ - # TODO (Baibaifan) + # TODO (Baibaifan) # Feature Notes:: # 1. Unified memory for param and param.grad to InternalStorage. # 2. Divide param.grad according to rank to centrally apply for and release GPU memory. @@ -75,8 +75,9 @@ class ShardingStage2(nn.Layer): # training options self._layer = layer - self._sharding_optimizers = [sharding_optimizer] if not isinstance( - sharding_optimizer, list) else sharding_optimizer + self._sharding_optimizers = [ + sharding_optimizer + ] if not isinstance(sharding_optimizer, list) else sharding_optimizer assert all( list( map(lambda opt: isinstance(opt, ShardingOptimizerStage2), @@ -86,8 +87,8 @@ class ShardingStage2(nn.Layer): self._auto_refresh_trainable = auto_refresh_trainable # Communication related attributes - self._group = dist.new_group(_get_global_group() - .ranks) if group is None else group + self._group = dist.new_group( + _get_global_group().ranks) if group is None else group self._world_size_scaling = 1.0 / self._group.nranks assert self._group.nranks > 1, "Training must be distributed, ranks must be greater than 1" self._rank = self._group.rank @@ -106,8 +107,8 @@ class ShardingStage2(nn.Layer): self._param_grads = [] # Set grad storage size & Display param sizes and model sizes - model_size = sum( - [np.prod(p.shape) for p in self._layer.parameters()]).item() + model_size = sum([np.prod(p.shape) + for p in self._layer.parameters()]).item() assert buffer_max_size >= 0, "buffer_max_size must be GE than 0." self._buffer_max_size = self._rank_buffer_size(buffer_max_size, model_size) @@ -166,15 +167,16 @@ class ShardingStage2(nn.Layer): return fw def set_state_dict(self, state_dict, use_structured_name=True): - self._layer.set_state_dict( - state_dict, use_structured_name=use_structured_name) + self._layer.set_state_dict(state_dict, + use_structured_name=use_structured_name) def state_dict(self, destination=None, include_sublayers=True, structured_name_prefix=""): - return self._layer.state_dict( - destination=None, include_sublayers=True, structured_name_prefix="") + return self._layer.state_dict(destination=None, + include_sublayers=True, + structured_name_prefix="") def _clear_gradients(self): """ @@ -226,7 +228,7 @@ class ShardingStage2(nn.Layer): else: self._build_grad_storages() - # Clear all flags state + # Clear all flags state self._clear_counters() def to(self, device=None, dtype=None, blocking=True): @@ -280,11 +282,10 @@ class ShardingStage2(nn.Layer): """ for buffer in self._layer.buffers(include_sublayers=True): - dist.broadcast( - buffer, - self._global_root_rank, - self._group, - use_calc_stream=True) + dist.broadcast(buffer, + self._global_root_rank, + self._group, + use_calc_stream=True) # Multi stream operation will be supported later dist.wait(tensor=buffer, group=self._group, use_calc_stream=True) @@ -335,19 +336,17 @@ class ShardingStage2(nn.Layer): # Synchronize the reduce parameter gradient self._tasks_flow.append( - Taskflow( - task=dist.reduce( - tensor=param.grad, - dst=self._group.ranks[dst_rank], - group=self._group, - use_calc_stream=True), - callback=cleanup)) + Taskflow(task=dist.reduce( + tensor=param.grad, + dst=self._group.ranks[dst_rank], + group=self._group, + use_calc_stream=True), + callback=cleanup)) # Multi stream operation will be supported later - dist.wait( - tensor=param.grad, - group=self._group, - use_calc_stream=True) + dist.wait(tensor=param.grad, + group=self._group, + use_calc_stream=True) # Clear the task flow and trigger callback to clear the redundant gradient self._clear_task_flow() @@ -393,20 +392,17 @@ class ShardingStage2(nn.Layer): # Reduce the bucket grad_storage.sent = True self._tasks_flow.append( - Taskflow( - task=dist.reduce( - tensor=grad_storage.buffer, - dst=self._group.ranks[ - grad_storage.destination], - group=self._group, - use_calc_stream=True), - callback=cleanup)) + Taskflow(task=dist.reduce( + tensor=grad_storage.buffer, + dst=self._group.ranks[grad_storage.destination], + group=self._group, + use_calc_stream=True), + callback=cleanup)) # Multi stream operation will be supported later - dist.wait( - tensor=grad_storage.buffer, - group=self._group, - use_calc_stream=True) + dist.wait(tensor=grad_storage.buffer, + group=self._group, + use_calc_stream=True) # Clear the task flow and trigger callback to clear the redundant gradient self._clear_task_flow() @@ -466,10 +462,10 @@ class ShardingStage2(nn.Layer): else: self._param_grads.append(param.name) print( - "Can not add param: {}, param's shape: {}, param align: {}, grad_storages fill: {}, ". - format(param.name, param.shape, self._trainable_param2align[ - param.name], self._grad_storages[param.dtype][dst_rank] - ._fill)) + "Can not add param: {}, param's shape: {}, param align: {}, grad_storages fill: {}, " + .format(param.name, param.shape, + self._trainable_param2align[param.name], + self._grad_storages[param.dtype][dst_rank]._fill)) self._grad_storage_list = list( chain(*[ @@ -526,15 +522,15 @@ class ShardingStage2(nn.Layer): if Type.fp16.value in rank_buffer_size.keys(): # FP16 GradStorage and model size print( - "====== FP16 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======". - format(rank_buffer_size[Type.fp16.value] / 2**19, model_size / 2 - **19)) + "====== FP16 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======" + .format(rank_buffer_size[Type.fp16.value] / 2**19, + model_size / 2**19)) if Type.fp32.value in rank_buffer_size.keys(): # FP32 GradStorage and model size print( - "====== FP32 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======". - format(rank_buffer_size[Type.fp32.value] / 2**18, model_size / 2 - **18)) + "====== FP32 GradStorage size: {:.2f}M parameters, Model size {:.2f}M parameters ======" + .format(rank_buffer_size[Type.fp32.value] / 2**18, + model_size / 2**18)) return rank_buffer_size def _redefine_opt_step(self): diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py index 7bb1517f121..67d48c8abba 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py @@ -37,7 +37,9 @@ from ..pp_utils.utils import _all_gather from ...utils.internal_storage import GradStorage # CUDA alignment 256 bytes -alignment = {"gpu": 256, } +alignment = { + "gpu": 256, +} align = { Type.fp16.value: 2, Type.fp32.value: 4, @@ -56,7 +58,7 @@ class ShardingStage3(nn.Layer): .. ZeRO: https://arxiv.org/pdf/1910.02054.pdf. """ - # TODO (Baibaifan) + # TODO (Baibaifan) # Feature Notes:: # 1. The model supports the segmentation of parameters by global ranks in layers. # 2. Support communication flow and computing flow. @@ -90,14 +92,14 @@ class ShardingStage3(nn.Layer): DEV = "cpu" if paddle.get_device() == "cpu" else paddle.get_device( ).split(":")[0] global DEV_ID - DEV_ID = 0 if paddle.get_device() == "cpu" else int(paddle.get_device() - .split(":")[1]) + DEV_ID = 0 if paddle.get_device() == "cpu" else int( + paddle.get_device().split(":")[1]) global param2dtype param2dtype = dict() # Communication group establishment - self._group = dist.new_group(_get_global_group() - .ranks) if group is None else group + self._group = dist.new_group( + _get_global_group().ranks) if group is None else group self._world_size_scaling = 1.0 / self._group.nranks assert self._group.nranks > 1, "Training must be distributed, ranks must be greater than 1." self._rank = self._group.rank @@ -165,11 +167,10 @@ class ShardingStage3(nn.Layer): """ for p in self._layer.parameters(): - dist.broadcast( - p, - src=self._global_root_rank, - group=self._group, - use_calc_stream=True) + dist.broadcast(p, + src=self._global_root_rank, + group=self._group, + use_calc_stream=True) # Multi stream operation will be supported later dist.wait(tensor=p, group=self._group, use_calc_stream=True) @@ -182,9 +183,9 @@ class ShardingStage3(nn.Layer): filter(lambda p: p.trainable and p not in self._unslice_params, current_layer_params)) for param in trainable_params: - assert hasattr( - param, "fw_storage" - ), "Find {} don't have fw_storage attribute.".format(param.name) + assert hasattr(param, "fw_storage" + ), "Find {} don't have fw_storage attribute.".format( + param.name) param.fw_storage.clear_gradient(False) param.fw_storage._gradient_set_empty(False) @@ -244,15 +245,16 @@ class ShardingStage3(nn.Layer): return fw def set_state_dict(self, state_dict, use_structured_name=True): - self._layer.set_state_dict( - state_dict, use_structured_name=use_structured_name) + self._layer.set_state_dict(state_dict, + use_structured_name=use_structured_name) def state_dict(self, destination=None, include_sublayers=True, structured_name_prefix=""): - return self._layer.state_dict( - destination=None, include_sublayers=True, structured_name_prefix="") + return self._layer.state_dict(destination=None, + include_sublayers=True, + structured_name_prefix="") def _handle_unslice_params(self): buffer_size = dict() @@ -357,8 +359,8 @@ class ShardingStage3(nn.Layer): start, end = self._param2buffer[param.name][self._rank] # Copy the current param value - tmp_var = core.VarBase( - tensor=buffer._slice(0, param._numel()), place=core.CPUPlace()) + tmp_var = core.VarBase(tensor=buffer._slice(0, param._numel()), + place=core.CPUPlace()) param_cpu = param.cpu() tmp_var.value().get_tensor().set(param_cpu.value().get_tensor(), core.CPUPlace()) @@ -366,15 +368,15 @@ class ShardingStage3(nn.Layer): # Current rank param_storage if self._offload: - param.fw_storage = core.VarBase( - buffer._slice(start, end), - core.CPUPlace(), "slice@" + param.name) + param.fw_storage = core.VarBase(buffer._slice(start, end), + core.CPUPlace(), + "slice@" + param.name) with device_guard(device="cpu"): param.master_weight = paddle.cast(param.fw_storage, Type.fp32.value) else: - param.fw_storage = core.VarBase( - buffer._slice(start, end), "slice@" + param.name) + param.fw_storage = core.VarBase(buffer._slice(start, end), + "slice@" + param.name) param.status = "part" # Updata optimizer master weights @@ -402,6 +404,7 @@ class ShardingStage3(nn.Layer): self._register_forward_hooks(sub_layer) def _register_forward_all_hooks(self, sub_layer, task_flow): + def _forward_pre_hook(layer, inputs): return ForwardPreHooks(layer, self._order_tracer, self._trainable_params, self._param2buffer, @@ -409,10 +412,12 @@ class ShardingStage3(nn.Layer): self._offload, task_flow) def _forward_post_hook(layer, inputs, outputs): - return ForwardPostHooks.apply( - outputs, layer, self._order_tracer, self._trainable_params, - self._param2buffer, self._param2buffer_size, self._rank, - self._group, self._sync_comm, self._offload, task_flow) + return ForwardPostHooks.apply(outputs, layer, self._order_tracer, + self._trainable_params, + self._param2buffer, + self._param2buffer_size, self._rank, + self._group, self._sync_comm, + self._offload, task_flow) # register previous forward hooks sub_layer.register_forward_pre_hook(_forward_pre_hook) @@ -427,11 +432,10 @@ class ShardingStage3(nn.Layer): """ for buffer in self._layer.buffers(include_sublayers=True): - dist.broadcast( - buffer, - self._global_root_rank, - self._group, - use_calc_stream=True) + dist.broadcast(buffer, + self._global_root_rank, + self._group, + use_calc_stream=True) # Multi stream operation will be supported later dist.wait(tensor=buffer, group=self._group, use_calc_stream=True) @@ -472,14 +476,12 @@ class ShardingStage3(nn.Layer): # 2.Handle unslice param for grad_storage in self._grad_storages.values(): grad_storage.buffer.scale_(scale=self._world_size_scaling) - dist.all_reduce( - tensor=grad_storage.buffer, - group=self._group, - use_calc_stream=True) - dist.wait( - tensor=grad_storage.buffer, - group=self._group, - use_calc_stream=True) + dist.all_reduce(tensor=grad_storage.buffer, + group=self._group, + use_calc_stream=True) + dist.wait(tensor=grad_storage.buffer, + group=self._group, + use_calc_stream=True) if self._offload: for param in list(self._unslice_params): @@ -506,14 +508,13 @@ class ShardingStage3(nn.Layer): trainable_params = list( filter(lambda p: p.trainable and p not in self._unslice_params, current_layer_params)) - t_flow = _allgather_buffer( - trainable_params, - self._group, - use_calc_stream=True, - task_flow=TaskFlow(), - sync_wait=True, - offload=self._offload, - convert2cpu=convert2cpu) + t_flow = _allgather_buffer(trainable_params, + self._group, + use_calc_stream=True, + task_flow=TaskFlow(), + sync_wait=True, + offload=self._offload, + convert2cpu=convert2cpu) if convert2cpu: for param in trainable_params: t_flow.full_param[param.name]._share_buffer_to(param) @@ -532,38 +533,41 @@ class ShardingStage3(nn.Layer): param._register_backward_hook(allreduce_function) def _get_allreduce_fn(self, param): + @paddle.autograd.no_grad() def allreduce_(*_): if param.name in self._task_flow.full_grad.keys(): full_grad = self._task_flow.full_grad[param.name] # Only support sync allreduce current rank's layer now - dist.all_reduce( - tensor=full_grad, group=self._group, use_calc_stream=True) - dist.wait( - tensor=full_grad, group=self._group, use_calc_stream=True) + dist.all_reduce(tensor=full_grad, + group=self._group, + use_calc_stream=True) + dist.wait(tensor=full_grad, + group=self._group, + use_calc_stream=True) start, end = self._param2buffer[param.name][self._rank] if param.bw_storage is None: - param.bw_storage = core.VarBase( - full_grad._slice(start, end)).detach().clone() + param.bw_storage = core.VarBase(full_grad._slice( + start, end)).detach().clone() if self._offload: param.bw_storage = _device2cpu(param.bw_storage, True) else: if self._offload: cpu_grad = _device2cpu( - core.VarBase(full_grad._slice(start, end)) - .detach().clone(), True) + core.VarBase(full_grad._slice( + start, end)).detach().clone(), True) with device_guard(device="cpu"): - param.bw_storage = paddle.add(param.bw_storage, - cpu_grad) + param.bw_storage = paddle.add( + param.bw_storage, cpu_grad) else: # param.bw_storage.add_( # core.VarBase(full_grad._slice(start, end)) # .detach().clone()) param.bw_storage = paddle.add( param.bw_storage, - core.VarBase(full_grad._slice(start, end)).detach( - ).clone()) + core.VarBase(full_grad._slice( + start, end)).detach().clone()) param.clear_gradient(False) param._gradient_set_empty(False) tmp_var = self._task_flow.full_grad.pop(param.name) @@ -576,7 +580,8 @@ class ShardingStage3(nn.Layer): start, end = self._param2buffer[param.name][self._rank] param.fw_storage = core.VarBase( self._task_flow.full_param[param.name]._slice( - start, end), param.name + "@slice").detach().clone() + start, end), + param.name + "@slice").detach().clone() param.status = "part" tmp_var = self._task_flow.full_param.pop(param.name) tmp_var._clear() @@ -649,18 +654,18 @@ def ForwardPreHooks(layer, order_tracer, trainable_params, param2buffer, rank, order_ = order_tracer[layer_id] layer_id = order_tracer["layer"][order_ + 1] - _allgather_buffer( - trainable_params[layer_id], - group, - use_calc_stream=use_calc, - task_flow=task_flow, - sync_wait=sync_wait, - offload=offload) + _allgather_buffer(trainable_params[layer_id], + group, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait, + offload=offload) return class ForwardPostHooks(PyLayer): + @staticmethod def forward(ctx, inputs, layer, order_tracer, trainable_params, param2buffer, param2buffer_size, rank, group, sync_comm, @@ -677,7 +682,7 @@ class ForwardPostHooks(PyLayer): order_tracer["order"] += 1 order_tracer["layer"].append(layer_id) - #Record bw info + #Record bw info ctx.order_tracer = order_tracer ctx.task_flow = task_flow ctx.group = group @@ -706,13 +711,12 @@ class ForwardPostHooks(PyLayer): # Allgather params synchronization if sync_comm: use_calc, sync_wait = True, True - _allgather_buffer( - trainable_params[layer_id], - group, - use_calc_stream=use_calc, - task_flow=task_flow, - sync_wait=sync_wait, - offload=offload) + _allgather_buffer(trainable_params[layer_id], + group, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait, + offload=offload) else: _wait_layer(trainable_params[layer_id], task_flow, group, use_calc, offload) @@ -725,13 +729,12 @@ class ForwardPostHooks(PyLayer): task_flow.use_calc[layer_id] = use_calc if layer_id != order_tracer["layer"][0] and not sync_comm: layer_next_id = order_tracer["layer"][order_tracer[layer_id] - 1] - _allgather_buffer( - trainable_params[layer_next_id], - group, - use_calc_stream=use_calc, - task_flow=task_flow, - sync_wait=sync_wait, - offload=offload) + _allgather_buffer(trainable_params[layer_next_id], + group, + use_calc_stream=use_calc, + task_flow=task_flow, + sync_wait=sync_wait, + offload=offload) return args @@ -789,20 +792,19 @@ def _wait_layer(trainable_params, continue if param.name in task_flow.full_param.keys(): full_param = task_flow.full_param[param.name] - core.VarBase(full_param._slice(0, param._numel()))._share_buffer_to( - param) + core.VarBase(full_param._slice( + 0, param._numel()))._share_buffer_to(param) param.fw_storage._clear() param.fw_storage = None param.status = "all" param.use_count += 1 else: - _allgather_buffer( - trainable_params, - group, - use_calc_stream=True, - task_flow=task_flow, - sync_wait=True, - offload=offload) + _allgather_buffer(trainable_params, + group, + use_calc_stream=True, + task_flow=task_flow, + sync_wait=True, + offload=offload) break return task_flow @@ -824,25 +826,25 @@ def _allgather_buffer(trainable_params, param.fw_storage = _cpu2device(param) with paddle.amp.auto_cast(enable=False): - full_param = _all_gather( - param.fw_storage, group, use_calc_stream=use_calc_stream) + full_param = _all_gather(param.fw_storage, + group, + use_calc_stream=use_calc_stream) # Allgather current layer in the 1st step synchronously if sync_wait: with paddle.amp.auto_cast(enable=False): - dist.wait( - tensor=full_param, - group=group, - use_calc_stream=use_calc_stream) - core.VarBase(full_param._slice(0, param._numel()))._share_buffer_to( - param) + dist.wait(tensor=full_param, + group=group, + use_calc_stream=use_calc_stream) + core.VarBase(full_param._slice( + 0, param._numel()))._share_buffer_to(param) param.fw_storage._clear() param.fw_storage = None param.status = "all" param.use_count += 1 task_flow.full_param[param.name] = full_param - # parameter converts to cpu + # parameter converts to cpu if convert2cpu: p_name = param.name param = _device2cpu(param) @@ -859,8 +861,8 @@ def _create_params_grad(trainable_params, param2buffer_size, task_flow): if param.name in task_flow.full_grad.keys(): continue assert isinstance(param2buffer_size[param.name], int) - temp_grad = paddle.zeros( - [param2buffer_size[param.name]], dtype=param.dtype) + temp_grad = paddle.zeros([param2buffer_size[param.name]], + dtype=param.dtype) param._copy_gradient_from( core.VarBase(temp_grad._slice(0, param._numel()))) task_flow.full_grad[param.name] = temp_grad @@ -886,8 +888,9 @@ def _UnsliceParam(param): def _VarBaseWrapper(param): varbase = param.fw_storage - tmp_param = ParamBase( - shape=varbase.shape, dtype=varbase.dtype, name="slice@" + param.name) + tmp_param = ParamBase(shape=varbase.shape, + dtype=varbase.dtype, + name="slice@" + param.name) varbase._share_buffer_to(tmp_param) tmp_param.regularizer = param.regularizer tmp_param.optimize_attr['learning_rate'] = param.optimize_attr[ diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py index 6a30276e02b..ae98d4bdf7b 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py @@ -49,6 +49,7 @@ class Type(Enum): class ShardingClipGrad: + def __init__(self, clip, device, group): self._clip = clip self._device = device @@ -86,8 +87,8 @@ class ShardingClipGrad: else: global_norm_fp16 = layers.concat(sum_square_fp16) global_norm_fp16 = layers.reduce_sum(global_norm_fp16) - global_norm_fp16 = paddle.cast( - global_norm_fp16, dtype=paddle.float32) + global_norm_fp16 = paddle.cast(global_norm_fp16, + dtype=paddle.float32) # global norm of non-distributed FP16 params_and_grads for unslice parameter if len(unslice_params_fp16) == 0: @@ -95,12 +96,12 @@ class ShardingClipGrad: else: global_unslice_fp16 = layers.concat(unslice_params_fp16) global_unslice_fp16 = layers.reduce_sum(global_unslice_fp16) - global_unslice_fp16 = paddle.cast( - global_unslice_fp16, dtype=paddle.float32) + global_unslice_fp16 = paddle.cast(global_unslice_fp16, + dtype=paddle.float32) # global norm of non-distributed FP32 params_and_grads - global_norm_fp32 = layers.concat(sum_square_fp32) if len( - sum_square_fp32) != 0 else paddle.to_tensor( + global_norm_fp32 = layers.concat( + sum_square_fp32) if len(sum_square_fp32) != 0 else paddle.to_tensor( [0.], dtype=paddle.float32) global_norm_fp32 = layers.reduce_sum(global_norm_fp32) @@ -119,13 +120,14 @@ class ShardingClipGrad: paddle.distributed.all_reduce(global_norm_var, group=self._group) global_norm_var = layers.sqrt(global_norm_var) - max_global_norm = layers.fill_constant( - shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) - - clip_var = layers.elementwise_div( - x=max_global_norm, - y=layers.elementwise_max( - x=global_norm_var, y=max_global_norm)) + max_global_norm = layers.fill_constant(shape=[1], + dtype=global_norm_var.dtype, + value=self.clip_norm) + + clip_var = layers.elementwise_div(x=max_global_norm, + y=layers.elementwise_max( + x=global_norm_var, + y=max_global_norm)) clip_var_fp16 = paddle.cast(clip_var, paddle.float16) for p, g in params_grads: @@ -164,6 +166,7 @@ def device_guard(dev_id=0, device="cpu"): @dygraph_only def ShardingScaler(scaler): + def unscale_method(self, optimizer): if not self._enable: return @@ -181,8 +184,9 @@ def ShardingScaler(scaler): for param in group['params']: if param._grad_ivar() is not None: param_grads.append(param._grad_ivar()) - if param._grad_ivar( - ).dtype in [core.VarDesc.VarType.FP16, paddle.float16]: + if param._grad_ivar().dtype in [ + core.VarDesc.VarType.FP16, paddle.float16 + ]: param_grads_fp16.append(param._grad_ivar()) else: param_grads_fp32.append(param._grad_ivar()) @@ -201,8 +205,8 @@ def ShardingScaler(scaler): temp_found_inf_fp32 = to_variable(np.array([0]).astype(np.bool)) device = "cpu" if optimizer.offload else "gpu" - dev_id = 0 if device == "cpu" else int(paddle.get_device().split(":")[ - 1]) + dev_id = 0 if device == "cpu" else int( + paddle.get_device().split(":")[1]) with device_guard(dev_id, device): if len(param_grads_fp16): @@ -217,10 +221,9 @@ def ShardingScaler(scaler): self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0 is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32") - paddle.distributed.all_reduce( - is_found_inf, - op=paddle.distributed.ReduceOp.MAX, - group=optimizer.group) + paddle.distributed.all_reduce(is_found_inf, + op=paddle.distributed.ReduceOp.MAX, + group=optimizer.group) self._found_inf = is_found_inf.numpy()[0] scaler._unscale = MethodType(unscale_method, scaler) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py index 953a76d874e..1bc76570f17 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py @@ -21,6 +21,7 @@ __all__ = [] class ShardingParallel(MetaParallelBase): + def __init__(self, layers, hcg, **kwargs): super(ShardingParallel, self).__init__(layers, hcg, **kwargs) diff --git a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py index 171df7cf033..5814ed898fa 100755 --- a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py @@ -23,6 +23,7 @@ __all__ = [] class TensorParallel(MetaParallelBase): + def __init__(self, layers, hcg, **kwargs): super(TensorParallel, self).__init__(layers, hcg, **kwargs) diff --git a/python/paddle/distributed/fleet/runtime/collective_runtime.py b/python/paddle/distributed/fleet/runtime/collective_runtime.py index a23b15f1fca..183fa9e7c15 100644 --- a/python/paddle/distributed/fleet/runtime/collective_runtime.py +++ b/python/paddle/distributed/fleet/runtime/collective_runtime.py @@ -19,6 +19,7 @@ __all__ = [] class CollectiveRuntime(RuntimeBase): + def __init__(self): super(CollectiveRuntime, self).__init__() diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 0767158d23f..6e30ff7969e 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -30,6 +30,7 @@ __all__ = [] class ParameterServerRuntime(RuntimeBase): + def __init__(self): super(ParameterServerRuntime, self).__init__() self._communicator = None @@ -67,9 +68,10 @@ class ParameterServerRuntime(RuntimeBase): def build_compiled_startegy(self): from paddle.fluid.incubate.fleet.parameter_server.ir.public import CompileTimeStrategy - compiled_config = CompileTimeStrategy( - self.origin_main_program, self.origin_main_program, - self.async_strategy, self.role_maker) + compiled_config = CompileTimeStrategy(self.origin_main_program, + self.origin_main_program, + self.async_strategy, + self.role_maker) return compiled_config def _load_sparse_params(self, @@ -86,7 +88,8 @@ class ParameterServerRuntime(RuntimeBase): return var.name in varnames load_vars = list( - filter(_in_varnames, fluid.default_main_program().list_vars())) + filter(_in_varnames, + fluid.default_main_program().list_vars())) if main_program is None: main_program = self.origin_main_program @@ -99,20 +102,24 @@ class ParameterServerRuntime(RuntimeBase): new_var = fluid.io._clone_var_in_block_(load_block, each_var) var_path = os.path.join(dirname, origin_varname) if not os.path.exists(var_path): - raise ValueError("SelectedRows var {} can not find at {}". - format(new_var.name, var_path)) + raise ValueError( + "SelectedRows var {} can not find at {}".format( + new_var.name, var_path)) if os.path.isfile(var_path): - load_block.append_op( - type='sparse_tensor_load', - inputs={}, - outputs={'Out': [new_var]}, - attrs={ - 'file_path': os.path.join(dirname, origin_varname), - 'node_index': self.role_maker._server_index(), - 'node_num': self.role_maker._server_num(), - 'shape': each_var.shape - }) + load_block.append_op(type='sparse_tensor_load', + inputs={}, + outputs={'Out': [new_var]}, + attrs={ + 'file_path': + os.path.join(dirname, origin_varname), + 'node_index': + self.role_maker._server_index(), + 'node_num': + self.role_maker._server_num(), + 'shape': + each_var.shape + }) check_vars.append(each_var) executor.run(load_prog) @@ -129,6 +136,7 @@ class ParameterServerRuntime(RuntimeBase): @staticmethod def __exclude_vars(exclude_var_names=[]): + def is_valid(var): if var.name in exclude_var_names: return False @@ -151,6 +159,7 @@ class ParameterServerRuntime(RuntimeBase): return is_valid def _init_worker(self): + def sync_strategy_envs(): kwargs = {} kwargs[ @@ -166,8 +175,9 @@ class ParameterServerRuntime(RuntimeBase): opt_init_map["gaussian_random"] = ["seed", "mean", "std"] opt_init_map["fill_constant"] = ["value"] opt_init_map["uniform_random"] = ["seed", "min", "max"] - opt_init_map[ - "truncated_gaussian_random"] = ["seed", "mean", "std"] + opt_init_map["truncated_gaussian_random"] = [ + "seed", "mean", "std" + ] dist_varnames = get_sparse_tablenames(self.origin_main_program, True) @@ -181,8 +191,8 @@ class ParameterServerRuntime(RuntimeBase): init_attrs = [] for value_name in sparse_varnames: - value_var = self.origin_main_program.global_block().vars[ - value_name] + value_var = self.origin_main_program.global_block( + ).vars[value_name] value_attr = [ value_name, ",".join([str(dim) for dim in value_var.shape]) @@ -287,8 +297,8 @@ class ParameterServerRuntime(RuntimeBase): model_dirname = None executor = self._get_executor() - if self.role_maker._is_heter_worker() and self.context[ - "valid_strategy"].a_sync_configs["launch_barrier"]: + if self.role_maker._is_heter_worker( + ) and self.context["valid_strategy"].a_sync_configs["launch_barrier"]: # for heter trainer wait server ready wait_server_ready(self.role_maker._get_pserver_endpoints()) executor.run(fluid.default_startup_program()) @@ -328,23 +338,21 @@ class ParameterServerRuntime(RuntimeBase): raise ValueError("There is no directory named '%s'", model_dirname) # load dense - fluid.io.load_vars( - executor, - main_program=fluid.default_main_program(), - dirname=model_dirname, - vars=remaining_vars) + fluid.io.load_vars(executor, + main_program=fluid.default_main_program(), + dirname=model_dirname, + vars=remaining_vars) # load sparse - self._load_sparse_params( - executor=executor, - dirname=model_dirname, - varnames=sparse_varnames + sparse_related_optimize_varnames) + self._load_sparse_params(executor=executor, + dirname=model_dirname, + varnames=sparse_varnames + + sparse_related_optimize_varnames) # load large scale - self._load_distributed_params( - dirname=model_dirname, - varnames=distribtued_varnames + - distributed_related_optimize_varnames) + self._load_distributed_params(dirname=model_dirname, + varnames=distribtued_varnames + + distributed_related_optimize_varnames) def _run_server(self): executor = self._get_executor() @@ -368,8 +376,9 @@ class ParameterServerRuntime(RuntimeBase): reshaped_val_map["adamax"] = ["moment_0", "inf_norm_0"] reshaped_val_map["momentum"] = ["velocity_0"] reshaped_val_map["lars_momentum"] = ["velocity_0"] - reshaped_val_map[ - "rmsprop"] = ["momentum_0", "mean_square_0", "mean_grad_0"] + reshaped_val_map["rmsprop"] = [ + "momentum_0", "mean_square_0", "mean_grad_0" + ] reshaped_val_map["decayed_adagrad"] = ["moment_0"] reshaped_val_map["ftrl"] = ["squared_0", "linear_0"] @@ -379,8 +388,8 @@ class ParameterServerRuntime(RuntimeBase): if op not in supported_opts: raise ValueError( - "fleet can not support optimizer: {}, only this can be supported: {}". - format(op, supported_opts)) + "fleet can not support optimizer: {}, only this can be supported: {}" + .format(op, supported_opts)) reshaped_names = [ param_name + "_" + val for val in reshaped_val_map[op] @@ -423,19 +432,23 @@ class ParameterServerRuntime(RuntimeBase): for var_name in [varname] + reshaped_varnames + origin_varnames: var = self.origin_main_program.global_block().vars[var_name] - block.append_op( - type='recv_save', - attrs={ - "trainer_id": self.role_maker._worker_index(), - "shape": var.shape, - "slice_shapes": - [",".join([str(i) for i in var.shape])], - "slice_varnames": [var.name], - "remote_varnames": [var.name], - "is_sparse": False, - "endpoints": var_ctx.split_endpoints(), - "file_path": os.path.join(dirname, var.name) - }) + block.append_op(type='recv_save', + attrs={ + "trainer_id": + self.role_maker._worker_index(), + "shape": + var.shape, + "slice_shapes": + [",".join([str(i) for i in var.shape])], + "slice_varnames": [var.name], + "remote_varnames": [var.name], + "is_sparse": + False, + "endpoints": + var_ctx.split_endpoints(), + "file_path": + os.path.join(dirname, var.name) + }) executor.run(prog) return local_vars @@ -463,30 +476,37 @@ class ParameterServerRuntime(RuntimeBase): for section in var_ctx.sections(): slice_shapes.append(str(section) + dims1) - block.append_op( - type='recv_save', - attrs={ - "trainer_id": self.role_maker._worker_index(), - "shape": var.shape, - "slice_shapes": slice_shapes, - "slice_varnames": var_ctx.split_varnames(), - "remote_varnames": var_ctx.split_varnames(), - "is_sparse": True, - "endpoints": var_ctx.split_endpoints(), - "pserver_num": - len(self.role_maker._get_pserver_endpoints()), - "file_path": os.path.join(dirname, var.name) - }) + block.append_op(type='recv_save', + attrs={ + "trainer_id": + self.role_maker._worker_index(), + "shape": + var.shape, + "slice_shapes": + slice_shapes, + "slice_varnames": + var_ctx.split_varnames(), + "remote_varnames": + var_ctx.split_varnames(), + "is_sparse": + True, + "endpoints": + var_ctx.split_endpoints(), + "pserver_num": + len(self.role_maker._get_pserver_endpoints()), + "file_path": + os.path.join(dirname, var.name) + }) for reshaped_varname in reshaped_varnames: - var = self.origin_main_program.global_block().vars[ - reshaped_varname] + var = self.origin_main_program.global_block( + ).vars[reshaped_varname] slice_varnames = [] remote_varnames = [] for i in range(len(var_ctx.split_varnames())): - slice_varnames.append("{}.block{}".format(reshaped_varname, - i)) + slice_varnames.append("{}.block{}".format( + reshaped_varname, i)) remote_varnames.append(reshaped_varname) block.append_op( @@ -505,22 +525,26 @@ class ParameterServerRuntime(RuntimeBase): }) for origin_varname in origin_varnames: - var = self.origin_main_program.global_block().vars[ - origin_varname] - - block.append_op( - type='recv_save', - attrs={ - "trainer_id": self.role_maker._worker_index(), - "shape": var.shape, - "slice_shapes": - [",".join([str(i) for i in var.shape])], - "slice_varnames": [origin_varname], - "remote_varnames": [origin_varname], - "is_sparse": False, - "endpoints": var_ctx.split_endpoints()[:1], - "file_path": os.path.join(dirname, var.name) - }) + var = self.origin_main_program.global_block( + ).vars[origin_varname] + + block.append_op(type='recv_save', + attrs={ + "trainer_id": + self.role_maker._worker_index(), + "shape": + var.shape, + "slice_shapes": + [",".join([str(i) for i in var.shape])], + "slice_varnames": [origin_varname], + "remote_varnames": [origin_varname], + "is_sparse": + False, + "endpoints": + var_ctx.split_endpoints()[:1], + "file_path": + os.path.join(dirname, var.name) + }) executor.run(prog) return context.keys() @@ -529,16 +553,15 @@ class ParameterServerRuntime(RuntimeBase): block = prog.global_block() for name, var_ctx in context.items(): - block.append_op( - type='checkpoint_notify', - attrs={ - "varname": name, - "mode": mode, - "slice_varnames": var_ctx.split_varnames(), - "remote_varnames": var_ctx.split_varnames(), - "endpoints": var_ctx.split_endpoints(), - "dirname": dirname - }) + block.append_op(type='checkpoint_notify', + attrs={ + "varname": name, + "mode": mode, + "slice_varnames": var_ctx.split_varnames(), + "remote_varnames": var_ctx.split_varnames(), + "endpoints": var_ctx.split_endpoints(), + "dirname": dirname + }) executor.run(prog) return context.keys() @@ -557,8 +580,9 @@ class ParameterServerRuntime(RuntimeBase): recv_dense_varnames = self._save_dense_params(executor, dirname, dense_ctx, main_program) - recv_sparse_varnames = self._save_sparse_params( - executor, dirname, sparse_ctx, main_program) + recv_sparse_varnames = self._save_sparse_params(executor, dirname, + sparse_ctx, + main_program) recv_distributed_varnames = self._save_distributed_params( executor, dirname, distributed_ctx, mode) @@ -567,15 +591,13 @@ class ParameterServerRuntime(RuntimeBase): recv_sparse_varnames) + list(recv_distributed_varnames) remaining_vars = list( - filter( - ParameterServerRuntime.__exclude_vars(saved_varnames), - main_program.list_vars())) + filter(ParameterServerRuntime.__exclude_vars(saved_varnames), + main_program.list_vars())) - fluid.io.save_vars( - executor, - main_program=main_program, - dirname=dirname, - vars=remaining_vars) + fluid.io.save_vars(executor, + main_program=main_program, + dirname=dirname, + vars=remaining_vars) def _ps_inference_save_persistables(self, executor, @@ -659,8 +681,10 @@ class ParameterServerRuntime(RuntimeBase): program = Program.parse_from_string(program_desc_str) program._copy_dist_param_info_from(fluid.default_main_program()) - self._ps_inference_save_persistables( - executor, dirname, program, mode=0) + self._ps_inference_save_persistables(executor, + dirname, + program, + mode=0) def _save_inference_model(self, *args, **kwargs): self._ps_inference_save_inference_model(*args, **kwargs) diff --git a/python/paddle/distributed/fleet/runtime/runtime_base.py b/python/paddle/distributed/fleet/runtime/runtime_base.py index 2e8bacfbc3b..38bb31ce3fc 100644 --- a/python/paddle/distributed/fleet/runtime/runtime_base.py +++ b/python/paddle/distributed/fleet/runtime/runtime_base.py @@ -16,6 +16,7 @@ __all__ = [] class RuntimeBase(object): + def __init__(self): pass diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index c90fab6af5c..82cef558b1f 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -131,16 +131,17 @@ def check_embedding_dim(accessor, varname, o_main_program): fea_dim = accessor.fea_dim if fea_dim != embedding_dim: raise ValueError( - "The fea_dim is wrong, it will be sparse_embedding_dim: {}, but got {}". - format(embedding_dim, fea_dim)) + "The fea_dim is wrong, it will be sparse_embedding_dim: {}, but got {}" + .format(embedding_dim, fea_dim)) embedx_dim = accessor.embedx_dim if embedx_dim != embedding_dim - 3: raise ValueError( - "The embedx_dim is wrong, it will be sparse_embedding_dim - 3: {}, but got {}". - format(embedding_dim - 3, embedx_dim)) + "The embedx_dim is wrong, it will be sparse_embedding_dim - 3: {}, but got {}" + .format(embedding_dim - 3, embedx_dim)) class Accessor: + def __init__(self): self.accessor_class = "" self.optimizer = None @@ -157,11 +158,12 @@ class Accessor: attrs += "\n" if self.optimizer is not None: attrs += self.optimizer.to_string(indent) - return accessor_str.format( - conv_indent(indent), attrs, conv_indent(indent)) + return accessor_str.format(conv_indent(indent), attrs, + conv_indent(indent)) class CommonAccessor: + def __init__(self): self.accessor_class = "" self.table_name = None @@ -185,11 +187,11 @@ class CommonAccessor: opt_input_map["adam"] = [("Param", None), ("Moment1", None), ("Moment2", None), ("Beta1Pow", 1), ("Beta2Pow", 1), ("LearningRate", 1)] - opt_input_map["adam_d2sum"] = [ - ("Param", None), ("D2Sum", None), ("G2Sum", None), ("Moment", None), - ("MomentDecayRate", 1), ("AdaDecayRate", 1), ("AdaEpsilon", 1), - ("LearningRate", 1) - ] + opt_input_map["adam_d2sum"] = [("Param", None), ("D2Sum", None), + ("G2Sum", None), ("Moment", None), + ("MomentDecayRate", 1), + ("AdaDecayRate", 1), ("AdaEpsilon", 1), + ("LearningRate", 1)] opt_input_map["sum"] = [("Param", None)] opt_input_map["naive_adagrad"] = [("Param", None), ("G2Sum", 1), ("LearningRate", 1)] @@ -269,8 +271,8 @@ class CommonAccessor: oop = None for op in optimizer_ops: - if ("Param" in op.input_names) and ( - op.input("Param")[0] == param_name): + if ("Param" in op.input_names) and (op.input("Param")[0] + == param_name): oop = op break @@ -325,11 +327,11 @@ class CommonAccessor: #TODO: for dense learning_rate, can be different from sparse lr if formal_name == "LearningRate" and param.name != "learning_rate_0": warnings.warn("will support decay soon") - param = main_program.global_block().vars[ - "learning_rate_0"] + param = main_program.global_block( + ).vars["learning_rate_0"] - initializer = self.get_initializer_attr(param.name, - startup_program) + initializer = self.get_initializer_attr( + param.name, startup_program) elif formal_name == "MomentDecayRate": initializer = "fill_constant&0.99" elif formal_name == "AdaDecayRate": @@ -349,8 +351,8 @@ class CommonAccessor: formal_name)[0]] if formal_name == "LearningRate" and param.name != "learning_rate_0": warnings.warn("will support decay soon") - param = main_program.global_block().vars[ - "learning_rate_0"] + param = main_program.global_block( + ).vars["learning_rate_0"] if shape is None: if is_sparse: @@ -360,8 +362,8 @@ class CommonAccessor: pserver_id) dims.append(shape) - initializer = self.get_initializer_attr(param.name, - startup_program) + initializer = self.get_initializer_attr( + param.name, startup_program) initializers.append(initializer) for (attr_varname, type_) in attr_varnames: @@ -400,11 +402,12 @@ class CommonAccessor: attrs += "initializers: \"{}\" ".format(initializer) attrs += "\n" - return accessor_str.format( - conv_indent(indent), attrs, conv_indent(indent)) + return accessor_str.format(conv_indent(indent), attrs, + conv_indent(indent)) class Tensor: + def __init__(self): self.main_program_id = None self.startup_program_id = None @@ -422,11 +425,12 @@ class Tensor: attrs += "tensor_table_class: \"{}\" ".format( str(self.tensor_table_class)) attrs += "\n" - return program_str.format( - conv_indent(indent), attrs, conv_indent(indent)) + return program_str.format(conv_indent(indent), attrs, + conv_indent(indent)) class Table: + def __init__(self): self.id = -1 self.table_class = None @@ -455,8 +459,9 @@ class Table: if self.accessor_proto is not None: accessor_str = "{}accessor {{{}\n{}}}" - accessor_str = accessor_str.format( - conv_indent(indent), self.accessor_proto, conv_indent(indent)) + accessor_str = accessor_str.format(conv_indent(indent), + self.accessor_proto, + conv_indent(indent)) attrs += accessor_str + "\n" elif self.accessor is not None: attrs += self.accessor.to_string(indent) @@ -474,6 +479,7 @@ class Table: class Service: + def __init__(self): self.server_class = "BrpcPsServer" self.client_class = "BrpcPsClient" @@ -491,11 +497,12 @@ class Service: attrs += "start_server_port: {} ".format(self.start_server_port) attrs += "server_thread_num: {} ".format(self.server_thread_num) - return service_str.format( - conv_indent(indent), attrs, conv_indent(indent)) + return service_str.format(conv_indent(indent), attrs, + conv_indent(indent)) class DownpourServer: + def __init__(self): self.service = None self.tables = [] @@ -520,11 +527,12 @@ class DownpourServer: for table in self.tables: table_strs += "\n" table_strs += table.to_string(indent) - return server_str.format( - conv_indent(indent), table_strs, conv_indent(indent)) + return server_str.format(conv_indent(indent), table_strs, + conv_indent(indent)) class Server: + def __init__(self): self.servers = [] @@ -545,6 +553,7 @@ class Server: class DownpourWorker: + def __init__(self): self.tables = [] @@ -561,11 +570,12 @@ class DownpourWorker: table_strs += "\n" table_strs += table.to_string(indent) - return worker_str.format( - conv_indent(indent), table_strs, conv_indent(indent)) + return worker_str.format(conv_indent(indent), table_strs, + conv_indent(indent)) class Worker: + def __init__(self): self.workers = [] @@ -586,6 +596,7 @@ class Worker: class fsClient: + def __init__(self, proto): self.proto = proto self.uri = proto.uri @@ -604,6 +615,7 @@ class fsClient: class TheOnePSRuntime(RuntimeBase): + def __init__(self): super(TheOnePSRuntime, self).__init__() self._communicator = None @@ -648,9 +660,10 @@ class TheOnePSRuntime(RuntimeBase): def build_compiled_startegy(self): from paddle.fluid.incubate.fleet.parameter_server.ir.public import CompileTimeStrategy - compiled_config = CompileTimeStrategy( - self.origin_main_program, self.origin_main_program, - self.async_strategy, self.role_maker) + compiled_config = CompileTimeStrategy(self.origin_main_program, + self.origin_main_program, + self.async_strategy, + self.role_maker) if self.async_strategy.use_ps_gpu: compiled_config.use_ps_gpu = True return compiled_config @@ -671,8 +684,9 @@ class TheOnePSRuntime(RuntimeBase): main_program._fleet_opt = {} main_program._fleet_opt["use_ps_gpu"] = True gpus_env = os.getenv("FLAGS_selected_gpus") - main_program._fleet_opt[ - "worker_places"] = [int(s) for s in gpus_env.split(",")] + main_program._fleet_opt["worker_places"] = [ + int(s) for s in gpus_env.split(",") + ] def sync_strategy_envs(): kwargs = {} @@ -748,7 +762,7 @@ class TheOnePSRuntime(RuntimeBase): warnings.warn("gloo may not initialize correctly") all_info = [all_info] self._communicator.set_clients(all_info) - # create_c2c_connection default param: + # create_c2c_connection default param: # pserver_timeout_ms=500000 # pserver_connect_timeout_ms=10000 # max_retry=3 @@ -811,8 +825,9 @@ class TheOnePSRuntime(RuntimeBase): if self.role_maker._is_heter_worker(): heter_device_type = self.role_maker._heter_device_type().upper() if heter_device_type not in ["GPU", "XPU", "CPU"]: - raise ValueError("Heter Worker Not Support Device {}". - format(device_type)) + raise ValueError( + "Heter Worker Not Support Device {}".format( + device_type)) if heter_device_type == "GPU": executor = Executor( fluid.CUDAPlace( @@ -824,6 +839,7 @@ class TheOnePSRuntime(RuntimeBase): return executor def _get_fleet_proto(self, is_server, is_sync, **kwargs): + def _build_merge_accessor(ctx): accessor = Accessor() accessor.accessor_class = "CommMergeAccessor" @@ -856,8 +872,8 @@ class TheOnePSRuntime(RuntimeBase): common.table_name = "barrier_table" trainer_num = self.compiled_strategy.get_trainers() if self.role_maker._is_heter_parameter_server_mode: - trainer_num += len(self.role_maker._get_heter_worker_endpoints( - )) + trainer_num += len( + self.role_maker._get_heter_worker_endpoints()) common.trainer_num = trainer_num common.attrs = "" common.dims = [] @@ -904,18 +920,18 @@ class TheOnePSRuntime(RuntimeBase): if tensor_table_dict[table_name]["startup_program"] != None: tensor_table_dict[table_name][ "startup_program_id"] = program_idx - self._server_sub_program.append(tensor_table_dict[ - table_name]["startup_program"].desc) + self._server_sub_program.append( + tensor_table_dict[table_name]["startup_program"].desc) program_idx += 1 if tensor_table_dict[table_name]["main_program"] != None: tensor_table_dict[table_name][ "main_program_id"] = program_idx - self._server_sub_program.append(tensor_table_dict[ - table_name]["main_program"].desc) + self._server_sub_program.append( + tensor_table_dict[table_name]["main_program"].desc) program_idx += 1 # Todo: Hard code for lr_decay table apply table id - new_table = _build_tensor_table( - len(tables), tensor_table_dict[table_name]) + new_table = _build_tensor_table(len(tables), + tensor_table_dict[table_name]) tables.append(new_table) return tables @@ -989,12 +1005,11 @@ class TheOnePSRuntime(RuntimeBase): common.table_name = "MergedDense" adam_d2sum = self.context["user_defined_strategy"].adam_d2sum - common.parse_by_optimizer(ctx.origin_varnames()[0], - ctx.is_sparse(), - ctx.sections()[0], - ctx.sections()[1] - if ctx.is_sparse() else 1, - self.compiled_strategy, adam_d2sum) + common.parse_by_optimizer( + ctx.origin_varnames()[0], ctx.is_sparse(), + ctx.sections()[0], + ctx.sections()[1] if ctx.is_sparse() else 1, + self.compiled_strategy, adam_d2sum) if ctx.is_sparse(): common.parse_entry(common.table_name, @@ -1056,8 +1071,8 @@ class TheOnePSRuntime(RuntimeBase): trainers += len(self.role_maker._get_heter_worker_endpoints()) server = self._get_fleet_proto(is_server=True, is_sync=is_sync) proto_txt = str(server) - fs_client = fsClient(self.context["user_defined_strategy"] - .fs_client_param) + fs_client = fsClient( + self.context["user_defined_strategy"].fs_client_param) proto_txt = proto_txt + "\n" + fs_client.to_string() debug = bool(int(os.getenv("PSERVER_DEBUG", "0"))) @@ -1087,8 +1102,8 @@ class TheOnePSRuntime(RuntimeBase): for var_name in var_names: if var_name not in distributed_varnames: raise ValueError( - "fleet.init server can only load sparse variables in {}". - format(distributed_varnames)) + "fleet.init server can only load sparse variables in {}" + .format(distributed_varnames)) load_varnames = var_names if dirname is None or not load_varnames: @@ -1125,6 +1140,7 @@ class TheOnePSRuntime(RuntimeBase): @staticmethod def __exclude_vars(exclude_var_names=[]): + def is_valid(var): if var.name in exclude_var_names: return False @@ -1199,17 +1215,17 @@ class TheOnePSRuntime(RuntimeBase): saved_varnames = sparse_varnames remaining_vars = list( - filter( - TheOnePSRuntime.__exclude_vars(saved_varnames), - main_program.list_vars())) + filter(TheOnePSRuntime.__exclude_vars(saved_varnames), + main_program.list_vars())) import paddle for var in remaining_vars: # if var.name not in recv_dense_varnames: # continue tensor = var.get_value() - paddle.save( - tensor, os.path.join(dirname, var.name), use_binary_format=True) + paddle.save(tensor, + os.path.join(dirname, var.name), + use_binary_format=True) def _ps_inference_save_persistables(self, executor, @@ -1312,16 +1328,14 @@ class TheOnePSRuntime(RuntimeBase): "user_defined_strategy"].trainer_desc_configs["stat_var_names"] generate_vars = [var for var in generate_vars] remaining_vars = list( - filter( - TheOnePSRuntime.__exclude_vars(sparse_names), - infer_program.list_vars())) + filter(TheOnePSRuntime.__exclude_vars(sparse_names), + infer_program.list_vars())) for var in remaining_vars: tensor = var.get_value() - paddle.save( - tensor, - os.path.join(model_path, var.name), - use_binary_format=True) + paddle.save(tensor, + os.path.join(model_path, var.name), + use_binary_format=True) def _save_inference_model(self, *args, **kwargs): self._ps_inference_save_inference_model(*args, **kwargs) @@ -1374,9 +1388,8 @@ class TheOnePSRuntime(RuntimeBase): loaded_varnames = sparse_varnames remaining_vars = list( - filter( - TheOnePSRuntime.__exclude_vars(loaded_varnames), - main_program.list_vars())) + filter(TheOnePSRuntime.__exclude_vars(loaded_varnames), + main_program.list_vars())) if dirname.startswith("afs:") or dirname.startswith("hdfs:"): model_path = "./dnn_plugin" diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index fab7b4ff4ce..7e0456f2793 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -55,6 +55,7 @@ class FSShellCmdAborted(ExecuteError): class FS(object): + @abc.abstractmethod def ls_dir(self, fs_path): raise NotImplementedError @@ -386,7 +387,9 @@ class LocalFS(FS): def _handle_errors(max_time_out=None): + def decorator(f): + @functools.wraps(f) def handler(*args, **kwargs): o = args[0] @@ -406,13 +409,15 @@ def _handle_errors(max_time_out=None): except ExecuteError as e: if time.time() - start >= time_out: raise FSTimeOut("args:{} timeout:{}".format( - args, time.time() - start)) + args, + time.time() - start)) time.sleep(inter) if time.time() - last_print_time > 30: print("hadoop operator timeout:args:{} timeout:{}".format( - args, time.time() - start)) + args, + time.time() - start)) last_print_time = time.time() return handler @@ -778,8 +783,8 @@ class HDFSClient(FS): procs = [] for i in range(multi_processes): process_datas = self._split_files(all_files, i, multi_processes) - p = multiprocessing.Process( - target=__subprocess_upload, args=(fs_path, process_datas)) + p = multiprocessing.Process(target=__subprocess_upload, + args=(fs_path, process_datas)) procs.append(p) p.start() @@ -847,8 +852,8 @@ class HDFSClient(FS): procs = [] for i in range(multi_processes): process_datas = self._split_files(all_files, i, multi_processes) - p = multiprocessing.Process( - target=__subprocess_download, args=(local_path, process_datas)) + p = multiprocessing.Process(target=__subprocess_download, + args=(local_path, process_datas)) procs.append(p) p.start() @@ -943,8 +948,8 @@ class HDFSClient(FS): if test_exists: if not self.is_exist(fs_src_path): - raise FSFileNotExistsError("{} is not exists".format( - fs_src_path)) + raise FSFileNotExistsError( + "{} is not exists".format(fs_src_path)) if self.is_exist(fs_dst_path): raise FSFileExistsError("{} exists already".format(fs_dst_path)) @@ -1398,8 +1403,8 @@ class AFSClient(FS): procs = [] for i in range(multi_processes): process_datas = self._split_files(all_files, i, multi_processes) - p = multiprocessing.Process( - target=__subprocess_download, args=(local_path, process_datas)) + p = multiprocessing.Process(target=__subprocess_download, + args=(local_path, process_datas)) procs.append(p) p.start() @@ -1453,8 +1458,8 @@ class AFSClient(FS): if test_exists: if not self.is_exist(fs_src_path): - raise FSFileNotExistsError("{} is not exists".format( - fs_src_path)) + raise FSFileNotExistsError( + "{} is not exists".format(fs_src_path)) if self.is_exist(fs_dst_path): raise FSFileExistsError("{} exists already".format(fs_dst_path)) diff --git a/python/paddle/distributed/fleet/utils/http_server.py b/python/paddle/distributed/fleet/utils/http_server.py index 7d30fc5e0df..4653b22f96e 100644 --- a/python/paddle/distributed/fleet/utils/http_server.py +++ b/python/paddle/distributed/fleet/utils/http_server.py @@ -38,8 +38,9 @@ def get_logger(name, level, fmt): return logger -_http_server_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_http_server_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class KVHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py index 0ac2df76d6a..e6b581464fa 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -275,10 +275,12 @@ class HybridParallelInferenceHelper(object): self.role_maker, wait_port=False) # Create global rings - collective_helper._init_communicator( - self._startup_program, self.current_endpoint, self.global_endpoints, - self.global_rank, self.global_ring_id, True, self.global_ring_id, - True) + collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + self.global_endpoints, + self.global_rank, + self.global_ring_id, True, + self.global_ring_id, True) # Create mp rings if self.num_mp > 1: @@ -287,9 +289,11 @@ class HybridParallelInferenceHelper(object): idx for idx, mp_idx in enumerate(self.mp_group) if mp_idx == self.rank ][0] - collective_helper._init_communicator( - self._startup_program, self.current_endpoint, mp_endpoints, - mp_rank, self.mp_ring_id, True, self.global_ring_id, True) + collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + mp_endpoints, mp_rank, + self.mp_ring_id, True, + self.global_ring_id, True) # Create pipeline rings if self.num_pp > 1: @@ -309,10 +313,12 @@ class HybridParallelInferenceHelper(object): self.endpoints[first_node], self.endpoints[second_node] ] pipeline_rank = 0 if self.rank == first_node else 1 - collective_helper._init_communicator( - self._startup_program, self.current_endpoint, - pipeline_endpoints, pipeline_rank, ring_id, False, - self.global_ring_id, True) + collective_helper._init_communicator(self._startup_program, + self.current_endpoint, + pipeline_endpoints, + pipeline_rank, ring_id, + False, self.global_ring_id, + True) def _get_input_output_info(self, block): ''' @@ -367,8 +373,8 @@ class HybridParallelInferenceHelper(object): op_idx += 1 if op.type == "while": sub_block_id = int(op.attr('sub_block').id) - sub_used_var_names = self._split_program(program, stage, - sub_block_id) + sub_used_var_names = self._split_program( + program, stage, sub_block_id) used_var_names.update(sub_used_var_names) @@ -403,6 +409,7 @@ class HybridParallelInferenceHelper(object): return used_var_names + # def _find_post_op(self, index, var_name): # """ # Find the post op that has variable named var_name as input. @@ -474,23 +481,25 @@ class HybridParallelInferenceHelper(object): pre_stage_id = None for op in block.ops: - assert op.has_attr(self._op_role_key), ( - "{} has no {} set .".format(op.type, self._op_role_key)) + assert op.has_attr(self._op_role_key), ("{} has no {} set .".format( + op.type, self._op_role_key)) op_role = op.attr(self._op_role_key) assert op_role == int(self._op_role.Forward), ( "Only forward is supported for inference.") if not op._has_kernel(op.type): - assert op.type in ["while", "conditional_block"], ( - "The only supported op without kernel is while.") + assert op.type in [ + "while", "conditional_block" + ], ("The only supported op without kernel is while.") sub_block_id = op.attr('sub_block').id sub_block = block.program.block(sub_block_id) self._check_validation(sub_block) - assert op.has_attr(self._op_device_key), ( - "{} has no {} set.".format(op.type, self._op_device_key)) + assert op.has_attr( + self._op_device_key), ("{} has no {} set.".format( + op.type, self._op_device_key)) device = op.attr(self._op_device_key) - assert device, ( - "{} has no {} set.".format(op.type, self._op_device_key)) + assert device, ("{} has no {} set.".format(op.type, + self._op_device_key)) if device.split(':')[1] == "all": continue dev_type = device.split(':')[0] @@ -507,7 +516,9 @@ class HybridParallelInferenceHelper(object): # avoiding multiple send and recv ops. input_var_to_device = dict() - extra_index_info = {'index': 0, } + extra_index_info = { + 'index': 0, + } for index, op in enumerate(list(block.ops)): cur_device = op.attr(self._op_device_key) @@ -542,8 +553,8 @@ class HybridParallelInferenceHelper(object): if (cur_device, prev_device) in input_var_to_device[var_name]: continue - assert self._device == cur_device.split(':')[ - 0], "More than one device type found." + assert self._device == cur_device.split( + ':')[0], "More than one device type found." device_type = cur_device.split(':')[0] + ':' def _insert_send_recv(cur_id, prev_id): @@ -614,9 +625,8 @@ class HybridParallelInferenceHelper(object): }) extra_index_info['index'] += 1 - _insert_send_recv( - int(cur_device.split(':')[1]), - int(prev_device.split(':')[1])) + _insert_send_recv(int(cur_device.split(':')[1]), + int(prev_device.split(':')[1])) block._sync_with_cpp() def _insert_sendrecv_ops_in_while_block( diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index 5e2ad43c164..e2f7af769d3 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -51,8 +51,10 @@ def _apply_collective_grads(parameters, comm_group): paddle.distributed.all_reduce(coalesced_grad, group=comm_group) paddle.fluid.framework._dygraph_tracer().trace_op( type="elementwise_div", - inputs={'X': coalesced_grad, - 'Y': div_factor}, + inputs={ + 'X': coalesced_grad, + 'Y': div_factor + }, outputs={'Out': coalesced_grad}, attrs={'axis': -1}) @@ -76,7 +78,7 @@ def _apply_collective_grads_eager(parameters, comm_group): div_factor = 1.0 / comm_group.nranks for coalesced_grad, _, _ in coalesced_grads_and_vars: - # need to div nranks + # need to div nranks coalesced_grad.scale_(div_factor) paddle.distributed.all_reduce(coalesced_grad, group=comm_group) @@ -89,22 +91,20 @@ def _broadcast_data_help(data, shape, dtype, hcg): mp_rank = hcg.get_model_parallel_rank() shape_gpu = paddle.to_tensor(shape, dtype="int32") - paddle.distributed.broadcast( - shape_gpu, - src=src_rank, - group=model_parallel_group, - use_calc_stream=True) + paddle.distributed.broadcast(shape_gpu, + src=src_rank, + group=model_parallel_group, + use_calc_stream=True) if mp_rank != 0: input_data = paddle.zeros(shape_gpu, dtype=dtype) else: input_data = data - paddle.distributed.broadcast( - input_data, - src=src_rank, - group=model_parallel_group, - use_calc_stream=True) + paddle.distributed.broadcast(input_data, + src=src_rank, + group=model_parallel_group, + use_calc_stream=True) def broadcast_input_data(hcg, *inputs, **kwargs): @@ -128,15 +128,19 @@ def broadcast_input_data(hcg, *inputs, **kwargs): def broadcast_mp_parameters(model, hcg): model_parallel_group = hcg.get_model_parallel_group() src_rank = hcg.get_model_parallel_group_src_rank() - sync_params_buffers( - model, model_parallel_group, src_rank, is_model_parallel=True) + sync_params_buffers(model, + model_parallel_group, + src_rank, + is_model_parallel=True) def broadcast_dp_parameters(model, hcg): data_parallel_group = hcg.get_data_parallel_group() src_rank = hcg.get_data_parallel_group_src_rank() - sync_params_buffers( - model, data_parallel_group, src_rank, is_model_parallel=False) + sync_params_buffers(model, + data_parallel_group, + src_rank, + is_model_parallel=False) def fused_allreduce_gradients(parameter_list, hcg): @@ -150,7 +154,7 @@ def fused_allreduce_gradients(parameter_list, hcg): def sharding_reduce_gradients(parameter_list, hcg): # TODO allreduce --> reduce - # TODO merge grad / nrank with dp + # TODO merge grad / nrank with dp logger.debug("sharding start gradients sync") with framework.no_grad(): @@ -166,7 +170,7 @@ def sharding_reduce_gradients(parameter_list, hcg): elif _in_legacy_dygraph(): g_var = param._grad_ivar() - # need use trace_op to allreduce + # need use trace_op to allreduce # paddle.distributed.all_reduce( # g_var, group=hcg.get_sharding_parallel_group(), use_calc_stream=True) paddle.fluid.framework._dygraph_tracer().trace_op( @@ -179,12 +183,14 @@ def sharding_reduce_gradients(parameter_list, hcg): }) # grad / sharding_rank - div_factor = paddle.to_tensor( - sharding_nrank, dtype=g_var.dtype) + div_factor = paddle.to_tensor(sharding_nrank, + dtype=g_var.dtype) paddle.fluid.framework._dygraph_tracer().trace_op( type="elementwise_div", - inputs={'X': g_var, - 'Y': div_factor}, + inputs={ + 'X': g_var, + 'Y': div_factor + }, outputs={'Out': g_var}, attrs={'axis': -1}) @@ -194,5 +200,7 @@ def broadcast_sharding_parameters(model, hcg): logger.debug("sharding start init parameters sync") sharding_parallel_group = hcg.get_sharding_parallel_group() src_rank = hcg.get_sharding_parallel_group_src_rank() - sync_params_buffers( - model, sharding_parallel_group, src_rank, is_model_parallel=False) + sync_params_buffers(model, + sharding_parallel_group, + src_rank, + is_model_parallel=False) diff --git a/python/paddle/distributed/fleet/utils/internal_storage.py b/python/paddle/distributed/fleet/utils/internal_storage.py index 80d8d8562d4..421111d5b88 100644 --- a/python/paddle/distributed/fleet/utils/internal_storage.py +++ b/python/paddle/distributed/fleet/utils/internal_storage.py @@ -62,11 +62,11 @@ class InternalStorage: Move the underlying buffer """ assert self.buffer is not None, "Cannot move a collapsed bucket, please rebuild it" - assert (dtype == Type.fp32.value or - Type.fp16.value), "Conversion type is not supported now" + assert (dtype == Type.fp32.value + or Type.fp16.value), "Conversion type is not supported now" - dev_id = 0 if paddle.get_device() == "cpu" else int(paddle.get_device() - .split(":")[1]) + dev_id = 0 if paddle.get_device() == "cpu" else int( + paddle.get_device().split(":")[1]) if self._device != device: tmp_buffer = self.buffer.cuda( @@ -154,11 +154,11 @@ class ParamStorage(InternalStorage): param.stop_gradient = origin_state # Copy the current param value - dev_id = 0 if paddle.get_device() == "cpu" else int(paddle.get_device() - .split(":")[1]) + dev_id = 0 if paddle.get_device() == "cpu" else int( + paddle.get_device().split(":")[1]) with device_guard(dev_id, "cpu"): - tmp_var = core.VarBase(tensor=self.buffer._slice(self._fill, - var_end)) + tmp_var = core.VarBase( + tensor=self.buffer._slice(self._fill, var_end)) if convert_gpu: param_cpu = param.cpu() param.value().get_tensor()._clear() @@ -316,8 +316,8 @@ class GradStorage(InternalStorage): assert offset <= np.prod(self.buffer.shape) # Copy the current grad value to InternalStorage - dev_id = 0 if paddle.get_device() == "cpu" else int(paddle.get_device() - .split(":")[1]) + dev_id = 0 if paddle.get_device() == "cpu" else int( + paddle.get_device().split(":")[1]) if self._device == "cpu": with device_guard(dev_id, self._device): tmp_var = core.VarBase(self.buffer._slice(self._fill, grad_end)) diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py index 77eb641e0c6..cf90527c07f 100644 --- a/python/paddle/distributed/fleet/utils/log_util.py +++ b/python/paddle/distributed/fleet/utils/log_util.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ __all__ = [] class LoggerFactory: + @staticmethod def build_logger(name=None, level=logging.INFO): assert name is not None, "name for logger should not be None" diff --git a/python/paddle/distributed/fleet/utils/ps_util.py b/python/paddle/distributed/fleet/utils/ps_util.py index e4dcd59b3f1..0e141d66c1a 100644 --- a/python/paddle/distributed/fleet/utils/ps_util.py +++ b/python/paddle/distributed/fleet/utils/ps_util.py @@ -53,10 +53,10 @@ class DistributedInfer: fake_optimizer = paddle.optimizer.SGD() strategy = fleet.DistributedStrategy() strategy.a_sync = True - optimizer = fleet.distributed_optimizer( - fake_optimizer, strategy=strategy) - optimizer.minimize( - loss, startup_program=self.origin_startup_program) + optimizer = fleet.distributed_optimizer(fake_optimizer, + strategy=strategy) + optimizer.minimize(loss, + startup_program=self.origin_startup_program) if fleet.is_server(): fleet.init_server(dirname=dirname) @@ -100,11 +100,10 @@ class DistributedInfer: v[1] for v in dense_persist_vars if os.path.isfile(os.path.join(dirname, v[0])) ] - paddle.static.load_vars( - exe, - dirname, - main_program=self.origin_main_program, - vars=need_load_vars) + paddle.static.load_vars(exe, + dirname, + main_program=self.origin_main_program, + vars=need_load_vars) def get_dist_infer_program(self): varname2tables = self._get_sparse_table_map() @@ -113,6 +112,7 @@ class DistributedInfer: return convert_program def _convert_program(self, main_program, varname2tables): + def distributed_ops_pass(program): SPARSE_OP_TYPE_DICT = {"lookup_table": "W", "lookup_table_v2": "W"} @@ -128,6 +128,7 @@ class DistributedInfer: return pull_sparse_ops def _pull_sparse_fuse(_program, pull_sparse_ops): + def dag_check_up_and_reorder(program, inputs, outputs): global_block = program.global_block() min_output_index = len(global_block.ops) @@ -152,8 +153,8 @@ class DistributedInfer: for out_id, out_var in enumerate(outputs): if out_var.name in ins: output_indexes[idx] = 1 - min_output_index = min(min_output_index, - idx) + min_output_index = min( + min_output_index, idx) for i in range(len(global_block.ops)): if input_indexes[i] == 1 and output_indexes[i] == 1: @@ -213,8 +214,8 @@ class DistributedInfer: desc = global_block.desc._insert_op( min_output_index) desc.copy_from(global_block.ops[index].desc) - global_block.desc._remove_op(index + 1, - index + 2) + global_block.desc._remove_op( + index + 1, index + 2) global_block.ops[index].desc = desc insert_op = global_block.ops.pop(index) input_state = input_indexes.pop(index) @@ -230,8 +231,8 @@ class DistributedInfer: assert global_block.desc.op_size() == len( global_block.ops) for i in range(len(global_block.ops)): - assert global_block.desc.op(i) == global_block.ops[ - i].desc + assert global_block.desc.op( + i) == global_block.ops[i].desc for param, ops in pull_sparse_ops.items(): all_ops = program.global_block().ops @@ -245,8 +246,8 @@ class DistributedInfer: if w.name not in varname2tables.keys(): raise ValueError( - "can not find variable {}, please check your configuration". - format(w.name)) + "can not find variable {}, please check your configuration" + .format(w.name)) table_id = varname2tables[w.name] @@ -266,16 +267,16 @@ class DistributedInfer: program.global_block()._remove_op(idx) inputs_idxs = [-1] * len(inputs) - outputs_idxs = [len(program.global_block().ops) + 1] * len( - outputs) + outputs_idxs = [len(program.global_block().ops) + 1 + ] * len(outputs) for idx, op in enumerate(program.global_block().ops): for i in range(0, len(op.output_names)): outs = op.output(op.output_names[i]) for in_id, in_var in enumerate(inputs): if in_var.name in outs: - inputs_idxs[in_id] = max(idx, - inputs_idxs[in_id]) + inputs_idxs[in_id] = max( + idx, inputs_idxs[in_id]) for i in range(0, len(op.input_names)): ins = op.input(op.input_names[i]) for out_id, out_var in enumerate(outputs): @@ -289,8 +290,10 @@ class DistributedInfer: program.global_block()._insert_op( index=distributed_idx, type="distributed_lookup_table", - inputs={"Ids": inputs, - 'W': w}, + inputs={ + "Ids": inputs, + 'W': w + }, outputs={"Outputs": outputs}, attrs={ "is_distributed": is_distributed, diff --git a/python/paddle/distributed/fleet/utils/recompute.py b/python/paddle/distributed/fleet/utils/recompute.py index b8d1c881a08..423536b095a 100755 --- a/python/paddle/distributed/fleet/utils/recompute.py +++ b/python/paddle/distributed/fleet/utils/recompute.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,9 +21,10 @@ import contextlib from paddle.fluid.framework import in_dygraph_mode import logging + logger = logging.getLogger(__name__) -formatter = logging.Formatter( - fmt='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') ch = logging.StreamHandler() ch.setFormatter(formatter) logger.addHandler(ch) @@ -68,13 +69,14 @@ def swith_rng_state_tracker(rng_state, tracker): class EagerRecomputeFunction(EagerPyLayer): + @staticmethod def forward(ctx, run_function, preserve_rng_state, *args): from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker if framework._dygraph_tracer()._has_grad: check_recompute_necessary(args) - # store for recomputing + # store for recomputing ctx.run_function = run_function ctx.preserve_rng_state = preserve_rng_state @@ -101,8 +103,8 @@ class EagerRecomputeFunction(EagerPyLayer): cur_device = paddle.get_device() if 'gpu:' not in cur_device: raise RuntimeError( - "Recompute with RNG perserve is not support current device: {}.". - format(cur_device)) + "Recompute with RNG perserve is not support current device: {}." + .format(cur_device)) ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state() ctx.fwd_cuda_rng_state_tracker = get_rng_state_tracker( ).get_states_tracker() @@ -163,12 +165,11 @@ class EagerRecomputeFunction(EagerPyLayer): detached_inputs = detach_variable(tuple(inputs)) outputs = ctx.run_function(*detached_inputs) else: - with paddle.amp.auto_cast( - enable=ctx.is_fw_autocast, - custom_white_list=ctx.amp_white_list, - custom_black_list=ctx.amp_black_list, - level=ctx.amp_level, - dtype=ctx.amp_dtype): + with paddle.amp.auto_cast(enable=ctx.is_fw_autocast, + custom_white_list=ctx.amp_white_list, + custom_black_list=ctx.amp_black_list, + level=ctx.amp_level, + dtype=ctx.amp_dtype): detached_inputs = detach_variable(tuple(inputs)) outputs = ctx.run_function(*detached_inputs) @@ -179,7 +180,7 @@ class EagerRecomputeFunction(EagerPyLayer): # run backward() with only tensor that requires grad forward_outputs_with_grad = [] # NOTE In Transformer-like network, if user put the attention mask into the recompute segment output, - # pylayer will force the stop_gradient of attention mask to be False, which will make the number of + # pylayer will force the stop_gradient of attention mask to be False, which will make the number of # tensor that need grad does not match. # the following backward_inputs_with_grad is used to avoid this case. backward_inputs_with_grad = [] @@ -200,20 +201,20 @@ class EagerRecomputeFunction(EagerPyLayer): paddle.autograd.backward(forward_outputs_with_grad, backward_inputs_with_grad) - grads = tuple( - inp.grad for inp in detached_inputs - if isinstance(inp, core.eager.Tensor)) + grads = tuple(inp.grad for inp in detached_inputs + if isinstance(inp, core.eager.Tensor)) return grads class RecomputeFunction(PyLayer): + @staticmethod def forward(ctx, run_function, preserve_rng_state, *args): from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker if framework._dygraph_tracer()._has_grad: check_recompute_necessary(args) - # store for recomputing + # store for recomputing ctx.run_function = run_function ctx.preserve_rng_state = preserve_rng_state @@ -240,8 +241,8 @@ class RecomputeFunction(PyLayer): cur_device = paddle.get_device() if 'gpu:' not in cur_device: raise RuntimeError( - "Recompute with RNG perserve is not support current device: {}.". - format(cur_device)) + "Recompute with RNG perserve is not support current device: {}." + .format(cur_device)) ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state() ctx.fwd_cuda_rng_state_tracker = get_rng_state_tracker( ).get_states_tracker() @@ -302,12 +303,11 @@ class RecomputeFunction(PyLayer): detached_inputs = detach_variable(tuple(inputs)) outputs = ctx.run_function(*detached_inputs) else: - with paddle.amp.auto_cast( - enable=ctx.is_fw_autocast, - custom_white_list=ctx.amp_white_list, - custom_black_list=ctx.amp_black_list, - level=ctx.amp_level, - dtype=ctx.amp_dtype): + with paddle.amp.auto_cast(enable=ctx.is_fw_autocast, + custom_white_list=ctx.amp_white_list, + custom_black_list=ctx.amp_black_list, + level=ctx.amp_level, + dtype=ctx.amp_dtype): detached_inputs = detach_variable(tuple(inputs)) outputs = ctx.run_function(*detached_inputs) @@ -318,7 +318,7 @@ class RecomputeFunction(PyLayer): # run backward() with only tensor that requires grad forward_outputs_with_grad = [] # NOTE In Transformer-like network, if user put the attention mask into the recompute segment output, - # pylayer will force the stop_gradient of attention mask to be False, which will make the number of + # pylayer will force the stop_gradient of attention mask to be False, which will make the number of # tensor that need grad does not match. # the following backward_inputs_with_grad is used to avoid this case. backward_inputs_with_grad = [] @@ -463,8 +463,8 @@ def recompute(function, *args, **kwargs): # Hack to mix *args with **kwargs in a python 2.7-compliant way preserve = kwargs.pop('preserve_rng_state', True) if kwargs: - raise ValueError("Unexpected keyword arguments: " + ",".join( - arg for arg in kwargs)) + raise ValueError("Unexpected keyword arguments: " + + ",".join(arg for arg in kwargs)) if in_dygraph_mode(): return EagerRecomputeFunction.apply(function, preserve, *args) diff --git a/python/paddle/distributed/launch/__main__.py b/python/paddle/distributed/launch/__main__.py index 42f844ca717..52b0ed3a012 100644 --- a/python/paddle/distributed/launch/__main__.py +++ b/python/paddle/distributed/launch/__main__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/launch/context/__init__.py b/python/paddle/distributed/launch/context/__init__.py index fbea5d0db86..902c8189b17 100644 --- a/python/paddle/distributed/launch/context/__init__.py +++ b/python/paddle/distributed/launch/context/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ import logging class Context(object): + def __init__(self, enable_plugin=True): self.args, self.unknown_args = parse_args() self.envs = fetch_envs() diff --git a/python/paddle/distributed/launch/context/args_envs.py b/python/paddle/distributed/launch/context/args_envs.py index ea8bf3d597a..f6624e88e27 100644 --- a/python/paddle/distributed/launch/context/args_envs.py +++ b/python/paddle/distributed/launch/context/args_envs.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -50,96 +50,107 @@ def parse_args(): base_group = parser.add_argument_group("Base Parameters") - base_group.add_argument( - "--master", - type=str, - default=None, - help="the master/rendezvous server, ip:port") + base_group.add_argument("--master", + type=str, + default=None, + help="the master/rendezvous server, ip:port") - base_group.add_argument( - "--legacy", type=bool, default=False, help="use legacy launch") + base_group.add_argument("--legacy", + type=bool, + default=False, + help="use legacy launch") - base_group.add_argument( - "--rank", type=int, default=-1, help="the node rank") + base_group.add_argument("--rank", + type=int, + default=-1, + help="the node rank") - base_group.add_argument( - "--log_level", type=str, default="INFO", help="log level. Default INFO") + base_group.add_argument("--log_level", + type=str, + default="INFO", + help="log level. Default INFO") - base_group.add_argument( - "--nnodes", - type=str, - default="1", - help="the number of nodes, i.e. pod/node number") + base_group.add_argument("--nnodes", + type=str, + default="1", + help="the number of nodes, i.e. pod/node number") - base_group.add_argument( - "--nproc_per_node", - type=int, - default=None, - help="the number of processes in a pod") + base_group.add_argument("--nproc_per_node", + type=int, + default=None, + help="the number of processes in a pod") base_group.add_argument( "--log_dir", type=str, default="log", help="the path for each process's log. Default ./log") - base_group.add_argument( - "--run_mode", - type=str, - default=None, - help="run mode of the job, collective/ps/ps-heter") + base_group.add_argument("--run_mode", + type=str, + default=None, + help="run mode of the job, collective/ps/ps-heter") - base_group.add_argument( - "--job_id", - type=str, - default="default", - help="unique id of the job. Default default") + base_group.add_argument("--job_id", + type=str, + default="default", + help="unique id of the job. Default default") - base_group.add_argument( - "--devices", - type=str, - default=None, - help="accelerate devices. as --gpus,npus,xps") + base_group.add_argument("--devices", + type=str, + default=None, + help="accelerate devices. as --gpus,npus,xps") base_group.add_argument("--host", type=str, default=None, help="host ip") - base_group.add_argument( - "training_script", - type=str, - help="the full path of py script," - "followed by arguments for the " - "training script") + base_group.add_argument("training_script", + type=str, + help="the full path of py script," + "followed by arguments for the " + "training script") base_group.add_argument('training_script_args', nargs=REMAINDER) ps_group = parser.add_argument_group("Parameter-Server Parameters") # for parameter server - ps_group.add_argument( - "--servers", type=str, default='', help="servers endpoints full list") - ps_group.add_argument( - "--trainers", type=str, default='', help="trainers endpoints full list") - - ps_group.add_argument( - "--trainer_num", type=int, default=None, help="number of trainers") - ps_group.add_argument( - "--server_num", type=int, default=None, help="number of servers") - ps_group.add_argument( - "--gloo_port", type=int, default=6767, help="gloo http port") - ps_group.add_argument( - "--with_gloo", type=str, default="1", help="use gloo or not") + ps_group.add_argument("--servers", + type=str, + default='', + help="servers endpoints full list") + ps_group.add_argument("--trainers", + type=str, + default='', + help="trainers endpoints full list") + + ps_group.add_argument("--trainer_num", + type=int, + default=None, + help="number of trainers") + ps_group.add_argument("--server_num", + type=int, + default=None, + help="number of servers") + ps_group.add_argument("--gloo_port", + type=int, + default=6767, + help="gloo http port") + ps_group.add_argument("--with_gloo", + type=str, + default="1", + help="use gloo or not") # parameter elastic mode elastic_group = parser.add_argument_group("Elastic Parameters") - elastic_group.add_argument( - "--max_restart", - type=int, - default=3, - help="the times can restart. Default 3") + elastic_group.add_argument("--max_restart", + type=int, + default=3, + help="the times can restart. Default 3") elastic_group.add_argument( "--elastic_level", type=int, default=-1, - help="elastic level: -1 disable, 0 failed exit, peers hold, 1 internal restart" + help= + "elastic level: -1 disable, 0 failed exit, peers hold, 1 internal restart" ) elastic_group.add_argument( diff --git a/python/paddle/distributed/launch/context/device.py b/python/paddle/distributed/launch/context/device.py index 30b8cc15385..7df7db28f78 100644 --- a/python/paddle/distributed/launch/context/device.py +++ b/python/paddle/distributed/launch/context/device.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ class DeviceType: class Device(object): + def __init__(self, dtype=None, memory="", labels=""): self._dtype = dtype self._memory = memory diff --git a/python/paddle/distributed/launch/context/event.py b/python/paddle/distributed/launch/context/event.py index 23e8e7a5014..cb39e1529fc 100644 --- a/python/paddle/distributed/launch/context/event.py +++ b/python/paddle/distributed/launch/context/event.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,6 +14,7 @@ class Event(object): + def __init__(self, kind="status", message="", fatal=False): self.kind = kind self.message = message diff --git a/python/paddle/distributed/launch/context/node.py b/python/paddle/distributed/launch/context/node.py index 8082541ffe0..39f42d02107 100644 --- a/python/paddle/distributed/launch/context/node.py +++ b/python/paddle/distributed/launch/context/node.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from contextlib import closing class Node(object): + def __init__(self): # self.device = Device.detect_device() self.device = Device.parse_device() diff --git a/python/paddle/distributed/launch/context/resource.py b/python/paddle/distributed/launch/context/resource.py index faffed704c1..d523c3c5cdf 100644 --- a/python/paddle/distributed/launch/context/resource.py +++ b/python/paddle/distributed/launch/context/resource.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,5 +14,6 @@ class Resource(object): + def __init__(self): self.devices = [] diff --git a/python/paddle/distributed/launch/context/status.py b/python/paddle/distributed/launch/context/status.py index cfbf3623ec2..b87b7b3fb82 100644 --- a/python/paddle/distributed/launch/context/status.py +++ b/python/paddle/distributed/launch/context/status.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/launch/controllers/collective.py b/python/paddle/distributed/launch/controllers/collective.py index 5225fd6e81f..5d2bc8cb07e 100644 --- a/python/paddle/distributed/launch/controllers/collective.py +++ b/python/paddle/distributed/launch/controllers/collective.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import time class CollectiveController(Controller): + @classmethod def enable(cls, ctx): # collective is the default mode @@ -54,9 +55,10 @@ class CollectiveController(Controller): 'endpoints': ",".join(endpoints), }) - peer_list, rank = self.master.sync_peers( - '/{}/info'.format(self.job.id), self.pod.name, data, - self.job.replicas, self.pod.rank) + peer_list, rank = self.master.sync_peers('/{}/info'.format(self.job.id), + self.pod.name, data, + self.job.replicas, + self.pod.rank) self.pod.rank = rank if len(peer_list) < 1: @@ -105,6 +107,7 @@ class CollectiveController(Controller): class CollectiveElasticController(CollectiveController): + @classmethod def enable(cls, ctx): if ctx.args.master and ctx.args.master.startswith("etcd://"): @@ -133,8 +136,9 @@ class CollectiveElasticController(CollectiveController): self.ctx.logger.info("Waiting peer ready...") - ok, replicas = self.master.wait_peer_ready( - self.job.replicas_min, self.job.replicas_max, timeout) + ok, replicas = self.master.wait_peer_ready(self.job.replicas_min, + self.job.replicas_max, + timeout) if ok: self.job.replicas = replicas else: diff --git a/python/paddle/distributed/launch/controllers/controller.py b/python/paddle/distributed/launch/controllers/controller.py index f069bfbcd35..a8ae155562a 100644 --- a/python/paddle/distributed/launch/controllers/controller.py +++ b/python/paddle/distributed/launch/controllers/controller.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -32,6 +32,7 @@ class ControleMode: class ControllerBase(object): + def __init__(self, ctx): signal.signal(signal.SIGTERM, self.signal_handler) signal.signal(signal.SIGABRT, self.signal_handler) @@ -110,8 +111,8 @@ class ControllerBase(object): return False # peer failure - if self.ctx.status.is_restarting() and self.master.get_status( - ) != self.ctx.status.COMPLETED: + if self.ctx.status.is_restarting( + ) and self.master.get_status() != self.ctx.status.COMPLETED: self.pod.stop() return False @@ -185,7 +186,8 @@ class Controller(ControllerBase): err=None): c = Container( entrypoint=(entrypoint or self._get_entrypoint()), - env=(self.ctx.get_envs() if use_ctx_env else {}), ) + env=(self.ctx.get_envs() if use_ctx_env else {}), + ) c.outfile, c.errfile = self._get_out_err_file(out, err) c.update_env(envs) return c @@ -203,8 +205,10 @@ class Controller(ControllerBase): log_file = None if not container: - container = self.new_container( - entrypoint=entrypoint, envs=envs, out=log_file, err=log_file) + container = self.new_container(entrypoint=entrypoint, + envs=envs, + out=log_file, + err=log_file) if is_init: self.pod.add_init_container(container) diff --git a/python/paddle/distributed/launch/controllers/master.py b/python/paddle/distributed/launch/controllers/master.py index 742fea9e16d..8e8d31f86dd 100644 --- a/python/paddle/distributed/launch/controllers/master.py +++ b/python/paddle/distributed/launch/controllers/master.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -64,6 +64,7 @@ class Master(object): class HTTPMaster(Master): + def lazy_init(self): if self.initialized: return @@ -81,8 +82,8 @@ class HTTPMaster(Master): self.role = Master.MAIN break except Exception as e: - self.ctx.logger.warning("start master failed {}".format( - e)) + self.ctx.logger.warning( + "start master failed {}".format(e)) time.sleep(0.1) continue else: @@ -172,6 +173,7 @@ class HTTPMaster(Master): class ETCDMaster(Master): + def __init__(self, ctx): super().__init__(ctx) @@ -263,8 +265,9 @@ class ETCDMaster(Master): self.ctx.logger.debug("Heartbeat done") self.client.cancel_watch(beat_watch) - self.beat_thread = threading.Thread( - name='heartbeat', target=_heartbeat, daemon=True) + self.beat_thread = threading.Thread(name='heartbeat', + target=_heartbeat, + daemon=True) self.beat_thread.start() def fetch_peer_alive(self): diff --git a/python/paddle/distributed/launch/controllers/ps.py b/python/paddle/distributed/launch/controllers/ps.py index 037bd313bbc..19429ce1961 100644 --- a/python/paddle/distributed/launch/controllers/ps.py +++ b/python/paddle/distributed/launch/controllers/ps.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ import os, shutil class PSController(Controller): + @classmethod def enable(cls, ctx): if ctx.args.run_mode == ControleMode.PS or ctx.args.server_num or len( @@ -132,9 +133,10 @@ class PSController(Controller): 'gloo_port': self.ctx.node.get_free_port(), }) - peer_list, rank = self.master.sync_peers( - '/{}/info'.format(self.job.id), self.pod.name, data, - self.job.replicas, self.pod.rank) + peer_list, rank = self.master.sync_peers('/{}/info'.format(self.job.id), + self.pod.name, data, + self.job.replicas, + self.pod.rank) self.ctx.logger.debug("sync peers done {}".format(peer_list)) @@ -171,15 +173,22 @@ class PSController(Controller): for i in range(server_num): e = { - "PADDLE_NNODES": "{}".format(self.job.replicas), - "PADDLE_PSERVERS_IP_PORT_LIST": ",".join(server_endpoints), - "PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints), + "PADDLE_NNODES": + "{}".format(self.job.replicas), + "PADDLE_PSERVERS_IP_PORT_LIST": + ",".join(server_endpoints), + "PADDLE_TRAINER_ENDPOINTS": + ",".join(trainer_endpoints), "PADDLE_PORT": server_endpoints[i + server_rank_offset].split(":")[1], - "PADDLE_ROLE": "PSERVER", - "TRAINING_ROLE": "PSERVER", - "PADDLE_TRAINERS_NUM": "{}".format(len(trainer_endpoints)), - "POD_IP": self.ctx.node.ip, + "PADDLE_ROLE": + "PSERVER", + "TRAINING_ROLE": + "PSERVER", + "PADDLE_TRAINERS_NUM": + "{}".format(len(trainer_endpoints)), + "POD_IP": + self.ctx.node.ip, } e.update(_gloo_envs) log_tag = "ps.{}".format(i) @@ -187,16 +196,24 @@ class PSController(Controller): for i in range(trainer_num): e = { - "PADDLE_NNODES": "{}".format(self.job.replicas), - "PADDLE_PSERVERS_IP_PORT_LIST": ",".join(server_endpoints), - "PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints), + "PADDLE_NNODES": + "{}".format(self.job.replicas), + "PADDLE_PSERVERS_IP_PORT_LIST": + ",".join(server_endpoints), + "PADDLE_TRAINER_ENDPOINTS": + ",".join(trainer_endpoints), "PADDLE_PORT": trainer_endpoints[i + trainer_rank_offset].split(":")[1], - "PADDLE_ROLE": "TRAINER", - "TRAINING_ROLE": "TRAINER", - "PADDLE_TRAINER_ID": "{}".format(i + trainer_rank_offset), - "PADDLE_TRAINERS_NUM": "{}".format(len(trainer_endpoints)), - "POD_IP": self.ctx.node.ip, + "PADDLE_ROLE": + "TRAINER", + "TRAINING_ROLE": + "TRAINER", + "PADDLE_TRAINER_ID": + "{}".format(i + trainer_rank_offset), + "PADDLE_TRAINERS_NUM": + "{}".format(len(trainer_endpoints)), + "POD_IP": + self.ctx.node.ip, } e.update(_gloo_envs) log_tag = "trainer.{}".format(i) diff --git a/python/paddle/distributed/launch/controllers/watcher.py b/python/paddle/distributed/launch/controllers/watcher.py index 4d49b924f1e..131d915292e 100644 --- a/python/paddle/distributed/launch/controllers/watcher.py +++ b/python/paddle/distributed/launch/controllers/watcher.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from threading import Thread class Watcher(object): + def __init__(self, ctx): self.ctx = ctx diff --git a/python/paddle/distributed/launch/job/container.py b/python/paddle/distributed/launch/job/container.py index a1ad6dbe24e..9f7b1733d1a 100644 --- a/python/paddle/distributed/launch/job/container.py +++ b/python/paddle/distributed/launch/job/container.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -106,8 +106,10 @@ class Container(object): elif self._err: self._stderr = self._get_fd(self._err) or sys.stderr - self._proc = ProcessContext( - self._entrypoint, env=self._env, out=self._stdout, err=self._stderr) + self._proc = ProcessContext(self._entrypoint, + env=self._env, + out=self._stdout, + err=self._stderr) self._proc.start() def terminate(self, force=False): @@ -143,7 +145,8 @@ class Container(object): self._entrypoint, self.exit_code, self.errfile, - self._env, ) + self._env, + ) def logs(self, fn=None, offset=0, whence=1, limit=1000): if not self._log_handler: diff --git a/python/paddle/distributed/launch/job/job.py b/python/paddle/distributed/launch/job/job.py index 31827968ddc..4bad1209c18 100644 --- a/python/paddle/distributed/launch/job/job.py +++ b/python/paddle/distributed/launch/job/job.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ class JobMode: class Job(object): + def __init__(self, jid='default', mode=JobMode.COLLECTIVE, nnodes="1"): self._mode = mode self._id = jid diff --git a/python/paddle/distributed/launch/job/pod.py b/python/paddle/distributed/launch/job/pod.py index 701adf45f94..cda400f0a32 100644 --- a/python/paddle/distributed/launch/job/pod.py +++ b/python/paddle/distributed/launch/job/pod.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import time class PodSepc(object): + def __init__(self): self._name = ''.join( random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(6)) @@ -41,12 +42,14 @@ class PodSepc(object): class Pod(PodSepc): + def __init__(self): super().__init__() def __str__(self): - return "Pod: {}, replicas {}, status {}".format( - self.name, self.replicas, self.status) + return "Pod: {}, replicas {}, status {}".format(self.name, + self.replicas, + self.status) def failed_container(self): cs = [] diff --git a/python/paddle/distributed/launch/job/status.py b/python/paddle/distributed/launch/job/status.py index ae10c5adb6c..88fd09bbf22 100644 --- a/python/paddle/distributed/launch/job/status.py +++ b/python/paddle/distributed/launch/job/status.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/launch/main.py b/python/paddle/distributed/launch/main.py index b2c87e737c8..f90fa7401e9 100644 --- a/python/paddle/distributed/launch/main.py +++ b/python/paddle/distributed/launch/main.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/launch/plugins/__init__.py b/python/paddle/distributed/launch/plugins/__init__.py index 13c09b4c27c..fe8116207e6 100644 --- a/python/paddle/distributed/launch/plugins/__init__.py +++ b/python/paddle/distributed/launch/plugins/__init__.py @@ -41,8 +41,8 @@ def collective_compatible(ctx): hosts = set([h.split(':')[0] for h in eps]) ctx.args.master = eps[0] if ':' in eps[0] else '{}:6768'.format(eps[0]) ctx.args.nnodes = len(hosts) - ctx.logger.info('args reset by env PADDLE_TRAINER_ENDPOINTS\n{}'.format( - eps)) + ctx.logger.info( + 'args reset by env PADDLE_TRAINER_ENDPOINTS\n{}'.format(eps)) ''' if 'DISTRIBUTED_TRAINER_ENDPOINTS' in ctx.envs: eps = ctx.envs['DISTRIBUTED_TRAINER_ENDPOINTS'].split(',') diff --git a/python/paddle/distributed/launch/utils/kv_client.py b/python/paddle/distributed/launch/utils/kv_client.py index e1919541226..a66ca800c58 100644 --- a/python/paddle/distributed/launch/utils/kv_client.py +++ b/python/paddle/distributed/launch/utils/kv_client.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import time class KVClient(object): + def __init__(self, endpoint='localhost:2379'): self.endpoint = endpoint if endpoint.startswith( "http://") else "http://{}".format(endpoint) diff --git a/python/paddle/distributed/launch/utils/kv_server.py b/python/paddle/distributed/launch/utils/kv_server.py index 2d7ae15f13d..ddf5685c988 100644 --- a/python/paddle/distributed/launch/utils/kv_server.py +++ b/python/paddle/distributed/launch/utils/kv_server.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import json class KVHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): + def do_GET(self): with self.server.kv_lock: ret = {} @@ -68,6 +69,7 @@ class KVHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): class KVServer(HTTPServer, object): + def __init__(self, port): super(KVServer, self).__init__(('', port), KVHandler) self.kv_lock = threading.Lock() @@ -89,6 +91,7 @@ class KVServer(HTTPServer, object): class PKVServer(): + def __init__(self, port): self._server = KVServer(port) diff --git a/python/paddle/distributed/launch/utils/nvsmi.py b/python/paddle/distributed/launch/utils/nvsmi.py index 82a23189ac6..dc07fbc1d21 100644 --- a/python/paddle/distributed/launch/utils/nvsmi.py +++ b/python/paddle/distributed/launch/utils/nvsmi.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ import shutil class Info(object): + def __repr__(self): return str(self.__dict__) diff --git a/python/paddle/distributed/launch/utils/process_context.py b/python/paddle/distributed/launch/utils/process_context.py index 4d6fa8de794..075536c8a8c 100644 --- a/python/paddle/distributed/launch/utils/process_context.py +++ b/python/paddle/distributed/launch/utils/process_context.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import os, sys, signal, time class ProcessContext(object): + def __init__(self, cmd, env=os.environ, @@ -35,12 +36,11 @@ class ProcessContext(object): def _start(self): pre_fn = os.setsid if self._group else None - self._proc = subprocess.Popen( - self._cmd, - env=self._env, - stdout=self._stdout, - stderr=self._stderr, - preexec_fn=self._preexec_fn or pre_fn) + self._proc = subprocess.Popen(self._cmd, + env=self._env, + stdout=self._stdout, + stderr=self._stderr, + preexec_fn=self._preexec_fn or pre_fn) def _close_std(self): try: diff --git a/python/paddle/distributed/metric/__init__.py b/python/paddle/distributed/metric/__init__.py index a5b0f4cb49d..f87fe885824 100644 --- a/python/paddle/distributed/metric/__init__.py +++ b/python/paddle/distributed/metric/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. from .metrics import init_metric # noqa: F401 -from .metrics import print_auc # noqa: F401 +from .metrics import print_auc # noqa: F401 diff --git a/python/paddle/distributed/metric/metrics.py b/python/paddle/distributed/metric/metrics.py index 5685b6f053e..08d185efd97 100644 --- a/python/paddle/distributed/metric/metrics.py +++ b/python/paddle/distributed/metric/metrics.py @@ -50,11 +50,12 @@ def init_metric(metric_ptr, phase = 1 if is_join else 0 if metric_runner['method'] == 'AucCalculator': - metric_ptr.init_metric( - metric_runner['method'], metric_runner['name'], - metric_runner['label'], metric_runner['target'], - cmatch_rank_var, mask_var, uid_var, phase, cmatch_rank_group, - ignore_rank, bucket_size) + metric_ptr.init_metric(metric_runner['method'], + metric_runner['name'], + metric_runner['label'], + metric_runner['target'], cmatch_rank_var, + mask_var, uid_var, phase, cmatch_rank_group, + ignore_rank, bucket_size) elif metric_runner['method'] == 'MultiTaskAucCalculator': metric_ptr.init_metric( metric_runner['method'], metric_runner['name'], @@ -69,11 +70,12 @@ def init_metric(metric_ptr, metric_runner['cmatch_group'], metric_runner['ignore_rank'], bucket_size) elif metric_runner['method'] == 'MaskAucCalculator': - metric_ptr.init_metric( - metric_runner['method'], metric_runner['name'], - metric_runner['label'], metric_runner['target'], - cmatch_rank_var, metric_runner['mask'], uid_var, phase, - cmatch_rank_group, ignore_rank, bucket_size) + metric_ptr.init_metric(metric_runner['method'], + metric_runner['name'], + metric_runner['label'], + metric_runner['target'], cmatch_rank_var, + metric_runner['mask'], uid_var, phase, + cmatch_rank_group, ignore_rank, bucket_size) elif metric_runner['method'] == 'CmatchRankMaskAucCalculator': metric_ptr.init_metric( metric_runner['method'], metric_runner['name'], @@ -82,17 +84,19 @@ def init_metric(metric_ptr, phase, metric_runner['cmatch_group'], metric_runner['ignore_rank'], bucket_size) elif metric_runner['method'] == 'WuAucCalculator': - metric_ptr.init_metric( - metric_runner['method'], metric_runner['name'], - metric_runner['label'], metric_runner['target'], - cmatch_rank_var, mask_var, metric_runner['uid'], phase, - cmatch_rank_group, ignore_rank, bucket_size) + metric_ptr.init_metric(metric_runner['method'], + metric_runner['name'], + metric_runner['label'], + metric_runner['target'], cmatch_rank_var, + mask_var, metric_runner['uid'], phase, + cmatch_rank_group, ignore_rank, bucket_size) else: - metric_ptr.init_metric( - metric_runner['method'], metric_runner['name'], - metric_runner['label'], metric_runner['target'], - cmatch_rank_var, mask_var, phase, cmatch_rank_group, - ignore_rank, bucket_size) + metric_ptr.init_metric(metric_runner['method'], + metric_runner['name'], + metric_runner['label'], + metric_runner['target'], cmatch_rank_var, + mask_var, phase, cmatch_rank_group, + ignore_rank, bucket_size) def print_metric(metric_ptr, name): diff --git a/python/paddle/distributed/models/__init__.py b/python/paddle/distributed/models/__init__.py index e1663029ef1..97043fd7ba6 100644 --- a/python/paddle/distributed/models/__init__.py +++ b/python/paddle/distributed/models/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/models/moe/__init__.py b/python/paddle/distributed/models/moe/__init__.py index e1663029ef1..97043fd7ba6 100644 --- a/python/paddle/distributed/models/moe/__init__.py +++ b/python/paddle/distributed/models/moe/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/models/moe/utils.py b/python/paddle/distributed/models/moe/utils.py index ea3dc43d0c7..3b955dd2a8d 100644 --- a/python/paddle/distributed/models/moe/utils.py +++ b/python/paddle/distributed/models/moe/utils.py @@ -51,11 +51,10 @@ def _number_count(numbers, upper_range): helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference(dtype=numbers.dtype) - helper.append_op( - type=op_type, - inputs={'numbers': numbers}, - outputs={'Out': out}, - attrs={'upper_range': upper_range}) + helper.append_op(type=op_type, + inputs={'numbers': numbers}, + outputs={'Out': out}, + attrs={'upper_range': upper_range}) return out @@ -99,14 +98,13 @@ def _assign_pos(x, cum_count): helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference(dtype=cum_count.dtype) - helper.append_op( - type=op_type, - inputs={ - 'X': [x], - 'cum_count': [cum_count], - "eff_num_len": [cum_count[-1]] - }, - outputs={'Out': [out]}) + helper.append_op(type=op_type, + inputs={ + 'X': [x], + 'cum_count': [cum_count], + "eff_num_len": [cum_count[-1]] + }, + outputs={'Out': [out]}) return out @@ -169,12 +167,13 @@ def _limit_by_capacity(expert_count, capacity, n_worker): out = helper.create_variable_for_type_inference( dtype=expert_count.dtype) - helper.append_op( - type=op_type, - inputs={'expert_count': expert_count, - 'capacity': capacity}, - outputs={'Out': out}, - attrs={'n_worker': n_worker}) + helper.append_op(type=op_type, + inputs={ + 'expert_count': expert_count, + 'capacity': capacity + }, + outputs={'Out': out}, + attrs={'n_worker': n_worker}) return out @@ -206,8 +205,9 @@ def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker): return _C_ops.prune_gate_by_capacity(gate_idx, expert_count, "n_expert", n_expert, "n_worker", n_worker) elif _in_legacy_dygraph(): - return core.ops.prune_gate_by_capacity( - gate_idx, expert_count, "n_expert", n_expert, "n_worker", n_worker) + return core.ops.prune_gate_by_capacity(gate_idx, expert_count, + "n_expert", n_expert, "n_worker", + n_worker) check_variable_and_dtype(gate_idx, 'GateIdx', ['int32', 'int64'], 'paddle.distributed.utils.prune_gate_by_capacity') check_variable_and_dtype(expert_count, 'ExpertCount', ['int32', 'int64'], @@ -216,12 +216,15 @@ def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker): helper = LayerHelper('prune_gate_by_capacity', **locals()) new_gate_idx = helper.create_variable_for_type_inference( dtype=gate_idx.dtype) - helper.append_op( - type='prune_gate_by_capacity', - inputs={'GateIdx': gate_idx, - "ExpertCount": expert_count}, - outputs={'NewGateIdx': new_gate_idx}, - attrs={"n_expert": n_expert, - "n_worker": n_worker}) + helper.append_op(type='prune_gate_by_capacity', + inputs={ + 'GateIdx': gate_idx, + "ExpertCount": expert_count + }, + outputs={'NewGateIdx': new_gate_idx}, + attrs={ + "n_expert": n_expert, + "n_worker": n_worker + }) return new_gate_idx diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index f8c5b79e337..79b680ef2d1 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -47,7 +47,7 @@ __all__ = [] ParallelStrategy = core.ParallelStrategy -# NOTE(chenweihang): Maintain a global parallel env to avoid +# NOTE(chenweihang): Maintain a global parallel env to avoid # initializing ParallelEnv every time and improve performance _global_parallel_env = None @@ -71,9 +71,10 @@ def _start_kv_server(port, http_server_d, size): def _is_cpuonly(backend): check_backend(backend) - if backend in ['auto', 'nccl', 'bkcl', 'hccl', 'heter', 'cncl'] and ( - core.is_compiled_with_cuda() or core.is_compiled_with_xpu() or - core.is_compiled_with_npu() or core.is_compiled_with_mlu()): + if backend in [ + 'auto', 'nccl', 'bkcl', 'hccl', 'heter', 'cncl' + ] and (core.is_compiled_with_cuda() or core.is_compiled_with_xpu() + or core.is_compiled_with_npu() or core.is_compiled_with_mlu()): # passes 'auto' and can use cuda or xpu, use the default logics. so return False return False @@ -159,14 +160,14 @@ def init_parallel_env(): "Currently not a parallel execution environment, `paddle.distributed.init_parallel_env` will not do anything." ) return - # NOTE(xiongkun): support cpu gloo only, add this environment variable to + # NOTE(xiongkun): support cpu gloo only, add this environment variable to # enable cpu only gloo prarllel training) backend = os.environ.get('PADDLE_DISTRI_BACKEND', 'auto') is_cpu_only = _is_cpuonly(backend) - # 1. gpu xpu check, must be gpu or xpu, - if not (is_cpu_only or core.is_compiled_with_cuda() or - core.is_compiled_with_xpu() or core.is_compiled_with_npu() or - core.is_compiled_with_mlu()): + # 1. gpu xpu check, must be gpu or xpu, + if not (is_cpu_only or core.is_compiled_with_cuda() + or core.is_compiled_with_xpu() or core.is_compiled_with_npu() + or core.is_compiled_with_mlu()): raise NotImplementedError( "If you want to use CPU-only version, please use 'gloo' as backend") @@ -220,8 +221,8 @@ def init_parallel_env(): "required to create a process group.") master_addr = os.getenv("MASTER_ADDR", None) master_port = os.getenv("MASTER_PORT", None) - endpoints = ":".join( - [master_addr, master_port]) if master_addr and master_port else None + endpoints = ":".join([master_addr, master_port + ]) if master_addr and master_port else None if endpoints is None: endpoints = os.getenv("PADDLE_MASTER", None) if endpoints is None: @@ -235,28 +236,25 @@ def init_parallel_env(): master_port = int(master_port) is_master = rank == 0 stop_check_timeout = int(os.getenv("FLAGS_stop_check_timeout", "900")) - default_store = core.TCPStore( - master_addr, - master_port, - is_master, - world_size, - stop_check_timeout=stop_check_timeout) + default_store = core.TCPStore(master_addr, + master_port, + is_master, + world_size, + stop_check_timeout=stop_check_timeout) _set_default_store(default_store) - pg = _new_process_group_impl( - backend, - default_store, - rank, - world_size, - _default_group_name, - pg_options=None) + pg = _new_process_group_impl(backend, + default_store, + rank, + world_size, + _default_group_name, + pg_options=None) ranks = list(range(world_size)) - group = Group( - rank, - world_size, - id=0, - ranks=ranks, - pg=pg, - name=_default_group_name) + group = Group(rank, + world_size, + id=0, + ranks=ranks, + pg=pg, + name=_default_group_name) _set_group_map_by_name(_default_group_name, group) _set_group_map(0, group) parallel_helper._set_parallel_ctx(True) @@ -278,9 +276,8 @@ def init_parallel_env(): size = {'_worker': parallel_env.world_size} if backend == "heter": size = {'_worker': len(node_num)} - http_server = Process( - target=_start_kv_server, - args=(int(ep_rank_0[1]), http_server_d, size)) + http_server = Process(target=_start_kv_server, + args=(int(ep_rank_0[1]), http_server_d, size)) http_server.daemon = True http_server_d["running"] = True http_server.start() @@ -328,7 +325,7 @@ def init_parallel_env(): # are separately looking for free ports which sometimes # leads to port-conflict. if (is_cpu_only or backend == "heter") and parallel_env.rank == 0: - # compare to init_gloo, we don't need to + # compare to init_gloo, we don't need to # init gloo, because we do this in _init_parallel_ctx; http_server_d["running"] = False http_server.join() diff --git a/python/paddle/distributed/parallel_with_gloo.py b/python/paddle/distributed/parallel_with_gloo.py index 5a6f58e05ba..363de6a5505 100755 --- a/python/paddle/distributed/parallel_with_gloo.py +++ b/python/paddle/distributed/parallel_with_gloo.py @@ -103,9 +103,9 @@ def gloo_init_parallel_env(rank_id, rank_num, server_endpoint): if rank_id == 0: # The scope for worker used by http server is '_worker' size = {'_worker': rank_num} - http_server_proc = Process( - target=_start_kv_server, - args=(int(server_endpoint.split(":")[1]), http_server_status, size)) + http_server_proc = Process(target=_start_kv_server, + args=(int(server_endpoint.split(":")[1]), + http_server_status, size)) http_server_proc.daemon = True http_server_status["running"] = True http_server_proc.start() diff --git a/python/paddle/distributed/passes/__init__.py b/python/paddle/distributed/passes/__init__.py index bfa760698fe..3649d571aa4 100644 --- a/python/paddle/distributed/passes/__init__.py +++ b/python/paddle/distributed/passes/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/passes/auto_parallel_amp.py b/python/paddle/distributed/passes/auto_parallel_amp.py index 3cd04affa29..3a552d76a2d 100644 --- a/python/paddle/distributed/passes/auto_parallel_amp.py +++ b/python/paddle/distributed/passes/auto_parallel_amp.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,10 +26,12 @@ from paddle.fluid.contrib.mixed_precision.fp16_utils import _keep_fp32_input, _k from paddle.fluid.contrib.mixed_precision.fp16_utils import _valid_types, find_true_post_op, find_true_prev_op from paddle.fluid.contrib.mixed_precision.fp16_utils import _is_in_black_varnames, _dtype_to_str, _rename_arg from paddle.distributed.auto_parallel.dist_attribute import OperatorDistributedAttribute + world_process_group = get_world_process_group() class AMPState(object): + def __init__(self, block): self._block = block self._op_fp16_dict = { @@ -88,8 +90,8 @@ class AMPState(object): if in_var.op is None: continue elif in_var.op is op: - prev_op = find_true_prev_op(ops, op, - in_var_name) + prev_op = find_true_prev_op( + ops, op, in_var_name) if prev_op is None: continue else: @@ -166,8 +168,8 @@ class AMPState(object): assert in_var_dist_attr is not None ref_mesh = in_var_dist_attr.process_mesh ref_mapping = in_var_dist_attr.dims_mapping - consume_op_attr.set_input_dist_attr(cast_name, - in_var_dist_attr) + consume_op_attr.set_input_dist_attr( + cast_name, in_var_dist_attr) out_var = self._block.create_var( name=cast_name, @@ -192,8 +194,8 @@ class AMPState(object): else: in_var_dist_attr = consume_op_attr.get_input_dist_attr( in_var.name) - consume_op_attr.set_input_dist_attr(cast_name, - in_var_dist_attr) + consume_op_attr.set_input_dist_attr( + cast_name, in_var_dist_attr) _rename_arg(op, in_var.name, cast_name) else: if op.has_attr('in_dtype'): @@ -297,8 +299,8 @@ class AMPState(object): grad_op.desc._rename_input(in_var_name, cast_name) in_var_dist_attr = consume_op_attr.get_input_dist_attr( in_var_name) - consume_op_attr.set_input_dist_attr(cast_name, - in_var_dist_attr) + consume_op_attr.set_input_dist_attr( + cast_name, in_var_dist_attr) else: assert in_var.dtype == dst_dtype @@ -382,8 +384,8 @@ def _update_backward_cast_ops(params_grads, dist_context): for p, g in params_grads: op = g.op if g.dtype == core.VarDesc.VarType.FP32 and op.type == 'cast': - if int(op.attr('op_role')) == int(OpRole.Backward) and op.has_attr( - 'op_role_var'): + if int(op.attr('op_role')) == int( + OpRole.Backward) and op.has_attr('op_role_var'): op._remove_attr("op_role_var") post_ops = find_true_post_op(main_block.ops, op, g.name) @@ -398,13 +400,12 @@ def _update_backward_cast_ops(params_grads, dist_context): # add new op in the python and cpp at the same time new_op_desc = main_block.desc.append_op() new_op_desc.copy_from(op.desc) - new_op = paddle.fluid.framework.Operator( - block=main_block, - desc=new_op_desc, - type=None, - inputs=None, - outputs=None, - attrs=None) + new_op = paddle.fluid.framework.Operator(block=main_block, + desc=new_op_desc, + type=None, + inputs=None, + outputs=None, + attrs=None) main_block.ops.append(new_op) # dist attr @@ -452,11 +453,10 @@ def _check_and_update_gradient(params_grads, loss_scaling, dist_context): inputs = {'X': grads, 'Scale': loss_scaling} outputs = {'Out': grads, 'FoundInfinite': found_inf} attrs = {'op_role': OpRole.Backward} - new_op = main_block.append_op( - type='check_finite_and_unscale', - inputs=inputs, - outputs=outputs, - attrs=attrs) + new_op = main_block.append_op(type='check_finite_and_unscale', + inputs=inputs, + outputs=outputs, + attrs=attrs) new_op_dist_attr = OperatorDistributedAttribute() new_op_dist_attr.process_mesh = world_process_group.ranks @@ -476,6 +476,7 @@ def _check_and_update_gradient(params_grads, loss_scaling, dist_context): @register_pass("auto_parallel_amp") class AMPPass(PassBase): + def __init__(self): super(AMPPass, self).__init__() self.set_attr("loss", None) @@ -514,8 +515,8 @@ class AMPPass(PassBase): return True - # NOTE: why AMPBackwardPass can override apply_single_impl instead of - # apply_impl? AMP is an optimization pass for serial program, + # NOTE: why AMPBackwardPass can override apply_single_impl instead of + # apply_impl? AMP is an optimization pass for serial program, # in distributed scenario, all ranks should have the same modification. def _apply_single_impl(self, main_program, startup_program, context): self.dist_context = self.get_attr("dist_context") @@ -532,12 +533,12 @@ class AMPPass(PassBase): with paddle.static.program_guard(main_program, startup_program): amp_state.cast_forward_program(self.dist_context) amp_state.cast_backward_program(params_grads, self.dist_context) - # TODO (JZ-LIANG)support cast forward program only when inference + # TODO (JZ-LIANG)support cast forward program only when inference self._init_amp_var() self._scale_loss() - if self.get_attr("use_dynamic_loss_scaling") or self.get_attr( - "init_loss_scaling") != 1.0: + if self.get_attr("use_dynamic_loss_scaling" + ) or self.get_attr("init_loss_scaling") != 1.0: grads, found_inf = _check_and_update_gradient( params_grads, self._loss_scaling, self.dist_context) @@ -587,8 +588,8 @@ class AMPPass(PassBase): if loss.dtype != core.VarDesc.VarType.FP32: # cast loss here will change the effective loss tensor for the computation graph - # and therefore will effect all following passes whose logic is based on the loss tensor(Recompute & Gradient Merge), - # so we it is not allowed by now. fixed it in future. + # and therefore will effect all following passes whose logic is based on the loss tensor(Recompute & Gradient Merge), + # so we it is not allowed by now. fixed it in future. raise NotImplementedError( "Loss's generator op is not support in FP16 in Auto Parallel by now, please put that op into your black-list." ) @@ -598,8 +599,8 @@ class AMPPass(PassBase): loss_dist_attr = self.dist_context.get_tensor_dist_attr_for_program( loss) ref_mesh = loss_op_dist_attr.process_mesh - self.dist_context.set_tensor_dist_attr_for_program(cast_loss, - loss_dist_attr) + self.dist_context.set_tensor_dist_attr_for_program( + cast_loss, loss_dist_attr) loss_op_idx = find_op_index(main_block.desc, loss_op.desc) cast_op = main_block._insert_op( @@ -619,8 +620,8 @@ class AMPPass(PassBase): cast_op, ref_mesh, [-1], self.dist_context) loss = loss.astype('float32') - if self.get_attr("use_dynamic_loss_scaling") or self.get_attr( - "init_loss_scaling") != 1.0: + if self.get_attr("use_dynamic_loss_scaling" + ) or self.get_attr("init_loss_scaling") != 1.0: loss_op_idx = find_op_index(main_block.desc, loss_op.desc) @@ -637,10 +638,14 @@ class AMPPass(PassBase): elementwise_mul_op = main_block._insert_op( loss_op_idx + 1, type='elementwise_mul', - inputs={'X': [loss], - 'Y': [self._loss_scaling]}, + inputs={ + 'X': [loss], + 'Y': [self._loss_scaling] + }, outputs={'Out': [self._scaled_loss]}, - attrs={'op_role': loss_op.all_attrs()[OP_ROLE_KEY], }) + attrs={ + 'op_role': loss_op.all_attrs()[OP_ROLE_KEY], + }) loss_op._set_attr(OP_ROLE_KEY, core.op_proto_and_checker_maker.OpRole.Forward) naive_set_dist_op_attr_for_program_by_mesh_and_mapping( @@ -730,11 +735,10 @@ class AMPPass(PassBase): 'op_role': OpRole.Backward } - new_op = main_block.append_op( - type='update_loss_scaling', - inputs=inputs, - outputs=outputs, - attrs=attrs) + new_op = main_block.append_op(type='update_loss_scaling', + inputs=inputs, + outputs=outputs, + attrs=attrs) new_op_dist_attr = OperatorDistributedAttribute() new_op_dist_attr.process_mesh = world_process_group.ranks diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py index b01f3975aef..8bfde1cba1c 100644 --- a/python/paddle/distributed/passes/auto_parallel_fp16.py +++ b/python/paddle/distributed/passes/auto_parallel_fp16.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -95,6 +95,7 @@ def _keep_fp32_output(op, out_name): class FP16State(object): + def __init__(self, program, amp_list, @@ -169,8 +170,8 @@ class FP16State(object): if op.desc.original_id() in self.grad_op_to_op_map: fwd_op_id = self.grad_op_to_op_map[op.desc.original_id()] assert fwd_op_id in self._op_fp16_dict, "{}".format(str(op)) - self._op_fp16_dict[op.desc.original_id()] = self._op_fp16_dict[ - fwd_op_id] + self._op_fp16_dict[ + op.desc.original_id()] = self._op_fp16_dict[fwd_op_id] if int(op.attr('op_role')) == 257: self.is_train = True @@ -182,7 +183,7 @@ class FP16State(object): except ValueError as e: var = self.program.global_block().var(var_name) - # NOTE(JZ-LIANG) "array_" is a hack to adopt for ernie3.0 inference, since there is + # NOTE(JZ-LIANG) "array_" is a hack to adopt for ernie3.0 inference, since there is # a trick which make the LOD_TENSOR_ARRAY to the float32 in while block to reset the LOD_TENSOR_ARRAY if var is None or var.type not in _valid_types or "array_" in var_name: return @@ -299,8 +300,9 @@ class FP16State(object): cast_name = in_var.name + '.cast_' + _dtype_to_str( dst_dtype) cast_var = block.vars.get(cast_name) - self.forward_input_cast_ops[op.desc.original_id()] += [( - cast_name, in_var.name, dst_dtype, src_dtype, in_name)] + self.forward_input_cast_ops[op.desc.original_id()] += [ + (cast_name, in_var.name, dst_dtype, src_dtype, in_name) + ] in_var_dist_attr = consume_op_attr.get_input_dist_attr( in_var.name) @@ -367,9 +369,8 @@ class FP16State(object): # rename input assert src_name in op.input( - slot_name), "var: {} not in op's {}. {}".format(src_name, - slot_name, - str(op)) + slot_name), "var: {} not in op's {}. {}".format( + src_name, slot_name, str(op)) src_var_dist_attr = grad_op_attr.get_input_dist_attr(src_name) assert src_var_dist_attr is not None op._rename_input(src_name, cast_name) @@ -394,8 +395,8 @@ class FP16State(object): type=grad.type, persistable=grad.persistable, stop_gradient=grad.stop_gradient) - dist_context.set_tensor_dist_attr_for_program(cast_grad, - grad_dist_attr) + dist_context.set_tensor_dist_attr_for_program( + cast_grad, grad_dist_attr) op._rename_output(grad_name, cast_grad.name) grad_op_attr.set_output_dist_attr(cast_grad.name, grad_dist_attr) @@ -441,11 +442,10 @@ def _check_and_update_gradient(grads, loss_scaling, name, dist_context): inputs = {'X': grads, 'Scale': loss_scaling} outputs = {'Out': grads, 'FoundInfinite': found_inf} attrs = {'op_role': OpRole.Backward} - new_op = main_block.append_op( - type='check_finite_and_unscale', - inputs=inputs, - outputs=outputs, - attrs=attrs) + new_op = main_block.append_op(type='check_finite_and_unscale', + inputs=inputs, + outputs=outputs, + attrs=attrs) new_op_dist_attr = OperatorDistributedAttribute() new_op_dist_attr.process_mesh = world_process_group.ranks @@ -493,11 +493,12 @@ def _set_op_dist_attr_with_ranks(new_op, ranks, block, dist_context): @register_pass("auto_parallel_fp16") class FP16Pass(AMPPass): + def __init__(self): super(FP16Pass, self).__init__() - # NOTE: why FP16Pass can override apply_single_impl instead of - # apply_impl? AMP is an optimization pass for serial program, + # NOTE: why FP16Pass can override apply_single_impl instead of + # apply_impl? AMP is an optimization pass for serial program, # in distributed scenario, all ranks should have the same modification. def _apply_single_impl(self, main_program, startup_program, context): self.dist_context = self.get_attr("dist_context") @@ -507,7 +508,7 @@ class FP16Pass(AMPPass): set(self.get_attr("custom_white_list")), set(self.get_attr("custom_black_list")), None) - # NOTE don't not change input data dtype, since it is controled by dataloader + # NOTE don't not change input data dtype, since it is controled by dataloader # and which is out of control of FP16 Pass input_data_var_names = [var.name for var in self.get_attr("input_data")] @@ -519,14 +520,14 @@ class FP16Pass(AMPPass): if is_train: with paddle.static.program_guard(main_program, startup_program): - # TODO (JZ-LIANG)support cast forward program only when inference + # TODO (JZ-LIANG)support cast forward program only when inference self._init_amp_var() self._scale_loss() grads, fp32_grads, fp16_grads = _split_grads(params_grads) - if self.get_attr("use_dynamic_loss_scaling") or self.get_attr( - "init_loss_scaling") != 1.0: + if self.get_attr("use_dynamic_loss_scaling" + ) or self.get_attr("init_loss_scaling") != 1.0: found_infs = [] if fp32_grads: with main_program._backward_role_guard(): @@ -573,8 +574,9 @@ class FP16Pass(AMPPass): base_opt._multi_precision = True if self.get_attr("use_optimizer_fp16"): base_opt._multi_precision = False - if isinstance(base_opt, (paddle.fluid.optimizer.Adam, - paddle.optimizer.AdamW)): + if isinstance( + base_opt, + (paddle.fluid.optimizer.Adam, paddle.optimizer.AdamW)): # with main_program._optimized_guard([]): # found_inf = paddle.tensor.creation._memcpy( # found_inf, paddle.CPUPlace()) diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index accac811338..bc40dad8ac0 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from .pass_base import PassBase, PassType, register_pass from paddle.distributed.auto_parallel.utils import set_var_dist_attr from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping from paddle.distributed.auto_parallel.process_group import get_world_process_group + world_process_group = get_world_process_group() @@ -79,57 +80,59 @@ def _remove_op_role_var(param, grad): def _get_gm_cond_var(main_program, k_steps, dist_context): main_block = main_program.global_block() # Add const var - k_step_var = layers.create_global_var( - name="gradient_merge_k", - shape=[1], - value=int(k_steps), - dtype='int32', - persistable=True, - force_cpu=True) + k_step_var = layers.create_global_var(name="gradient_merge_k", + shape=[1], + value=int(k_steps), + dtype='int32', + persistable=True, + force_cpu=True) set_var_dist_attr(dist_context, k_step_var, [-1], world_process_group.ranks) - zero_var = layers.create_global_var( - name="gradient_merge_zero", - shape=[1], - value=int(0), - dtype='int32', - persistable=True, - force_cpu=True) + zero_var = layers.create_global_var(name="gradient_merge_zero", + shape=[1], + value=int(0), + dtype='int32', + persistable=True, + force_cpu=True) set_var_dist_attr(dist_context, zero_var, [-1], world_process_group.ranks) # Add step var & cond var - step_var = layers.create_global_var( - name="gradient_merge_step", - shape=[1], - value=int(0), - dtype='int32', - persistable=True, - force_cpu=True) + step_var = layers.create_global_var(name="gradient_merge_step", + shape=[1], + value=int(0), + dtype='int32', + persistable=True, + force_cpu=True) set_var_dist_attr(dist_context, step_var, [-1], world_process_group.ranks) - cond_var = main_block.create_var( - name="gradient_merge_cond", shape=[1], dtype='bool') + cond_var = main_block.create_var(name="gradient_merge_cond", + shape=[1], + dtype='bool') set_var_dist_attr(dist_context, cond_var, [-1], world_process_group.ranks) with device_guard("cpu"): # step_var = (step_var + 1) % k_step layers.increment(x=step_var, value=1.0, in_place=True) - elementwise_mod_op = main_block.append_op( - type='elementwise_mod', - inputs={'X': step_var, - 'Y': k_step_var}, - outputs={'Out': step_var}, - attrs={'axis': -1, - 'use_mkldnn': False}) + elementwise_mod_op = main_block.append_op(type='elementwise_mod', + inputs={ + 'X': step_var, + 'Y': k_step_var + }, + outputs={'Out': step_var}, + attrs={ + 'axis': -1, + 'use_mkldnn': False + }) naive_set_dist_op_attr_for_program_by_mesh_and_mapping( elementwise_mod_op, world_process_group.ranks, [-1], dist_context) # cond_var = (step_var == 0) - equal_op = main_block.append_op( - type='equal', - inputs={'X': step_var, - 'Y': zero_var}, - outputs={'Out': cond_var}) + equal_op = main_block.append_op(type='equal', + inputs={ + 'X': step_var, + 'Y': zero_var + }, + outputs={'Out': cond_var}) naive_set_dist_op_attr_for_program_by_mesh_and_mapping( equal_op, world_process_group.ranks, [-1], dist_context) @@ -137,9 +140,7 @@ def _get_gm_cond_var(main_program, k_steps, dist_context): def _append_gradient_merge_backward_op( - main_program, - startup_program, - params_grads: List[Tuple[Any, Any]], + main_program, startup_program, params_grads: List[Tuple[Any, Any]], cond_var_name: str, dist_context) -> Tuple[List[Tuple[Any, Any]], Dict[str, Any]]: main_block = main_program.global_block() @@ -162,11 +163,11 @@ def _append_gradient_merge_backward_op( assert (param_var is not None) ref_dist_attr = dist_context.get_tensor_dist_attr_for_program(param_var) assert ref_dist_attr is not None - gradient_merge_var = main_block.create_var( - name=param_name + "@GRAD@GradientMerge", - shape=param_var.shape, - dtype=param_var.dtype, - persistable=True) + gradient_merge_var = main_block.create_var(name=param_name + + "@GRAD@GradientMerge", + shape=param_var.shape, + dtype=param_var.dtype, + persistable=True) param_to_gradient_merge[param_name] = gradient_merge_var ref_process_mesh = ref_dist_attr.process_mesh ref_dims_mapping = ref_dist_attr.dims_mapping @@ -179,23 +180,25 @@ def _append_gradient_merge_backward_op( shape=param_var.shape, dtype=param_var.dtype, persistable=True) - startup_block.append_op( - type="fill_constant", - outputs={"Out": startup_gradient_merge_var}, - attrs={ - "shape": param_var.shape, - "dtype": param_var.dtype, - "value": float(0), - }) + startup_block.append_op(type="fill_constant", + outputs={"Out": startup_gradient_merge_var}, + attrs={ + "shape": param_var.shape, + "dtype": param_var.dtype, + "value": float(0), + }) # grad_merge += grad - new_grad_op = main_block.append_op( - type="elementwise_add", - inputs={'X': grad, - 'Y': gradient_merge_var}, - outputs={'Out': gradient_merge_var}, - attrs={'axis': -1, - 'use_mkldnn': False}) + new_grad_op = main_block.append_op(type="elementwise_add", + inputs={ + 'X': grad, + 'Y': gradient_merge_var + }, + outputs={'Out': gradient_merge_var}, + attrs={ + 'axis': -1, + 'use_mkldnn': False + }) new_params_to_grads.append([param, gradient_merge_var]) naive_set_dist_op_attr_for_program_by_mesh_and_mapping( new_grad_op, ref_process_mesh, ref_dims_mapping, dist_context) @@ -203,13 +206,10 @@ def _append_gradient_merge_backward_op( def _create_cond_block_and_update_optimizer( - main_program, - cond_var, - new_params_to_grads: List[Tuple[Any, Any]], - param_to_gradient_merge: Dict[str, Any], - optimize_ops_desc: List[Any], - k_steps, - avg): + main_program, cond_var, new_params_to_grads: List[Tuple[Any, Any]], + param_to_gradient_merge: Dict[str, Any], optimize_ops_desc: List[Any], + k_steps, avg): + def true_apply_gradient(): cur_block_idx = main_program.current_block_idx cur_block = main_program.current_block() @@ -220,15 +220,14 @@ def _create_cond_block_and_update_optimizer( if avg: for param, new_grad in new_params_to_grads: # grad /= k_steps - cur_block.append_op( - type='scale', - inputs={'X': new_grad}, - outputs={'Out': new_grad}, - attrs={ - 'scale': 1.0 / k_steps, - 'bias': 0.0, - 'bias_after_scale': False - }) + cur_block.append_op(type='scale', + inputs={'X': new_grad}, + outputs={'Out': new_grad}, + attrs={ + 'scale': 1.0 / k_steps, + 'bias': 0.0, + 'bias_after_scale': False + }) new_grad.op._set_attr(op_maker.kOpRoleAttrName(), op_maker.OpRole.Optimize) @@ -264,11 +263,10 @@ def _create_cond_block_and_update_optimizer( # clear gradient_merge_vars for param, new_grad in new_params_to_grads: - layers.fill_constant( - shape=new_grad.shape, - dtype=new_grad.dtype, - value=0.0, - out=new_grad) + layers.fill_constant(shape=new_grad.shape, + dtype=new_grad.dtype, + value=0.0, + out=new_grad) new_grad.op._set_attr(op_maker.kOpRoleAttrName(), op_maker.OpRole.Optimize) @@ -292,13 +290,15 @@ def parse_program(main_program, startup_program, params_grads, k_steps, avg, dist_context) # 4 create ConditionalBlock and append gradient merge optimizer ops - _create_cond_block_and_update_optimizer( - main_program, cond_var, new_params_to_grads, param_to_gradient_merge, - optimize_ops_desc, k_steps, avg) + _create_cond_block_and_update_optimizer(main_program, cond_var, + new_params_to_grads, + param_to_gradient_merge, + optimize_ops_desc, k_steps, avg) @register_pass("auto_parallel_gradient_merge_pass") class GradientMergePass(PassBase): + def __init__(self): super(GradientMergePass, self).__init__() self.set_attr("k_steps", -1) diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py index c6d16854462..fcd7d243771 100644 --- a/python/paddle/distributed/passes/auto_parallel_recompute.py +++ b/python/paddle/distributed/passes/auto_parallel_recompute.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,6 +28,7 @@ from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_pr class RecomputeState(ProgramStats): + def __init__(self, block, ops): super(RecomputeState, self).__init__(block=block, ops=ops) self._block = block @@ -70,22 +71,25 @@ class RecomputeState(ProgramStats): flag, min_idx, max_idx = self.is_subgraph( [checkpoints[start_idx]], [checkpoints[start_idx + 1]]) if flag: - min_idx = self._update_segment_start(min_idx, - pre_segment_end_idx) + min_idx = self._update_segment_start( + min_idx, pre_segment_end_idx) segments.append([min_idx, max_idx + 1]) else: - logging.info("Could not recompute op range [{}] - [{}] ". - format(min_idx, max_idx + 1)) + logging.info( + "Could not recompute op range [{}] - [{}] ".format( + min_idx, max_idx + 1)) start_idx += 1 for i, (idx1, idx2) in enumerate(segments): logging.info("recompute segment[{}]".format(i)) - logging.info("segment start op: [{}]: [{}] [{}]".format(self._ops[ - idx1].desc.type(), self._ops[idx1].desc.input_arg_names( - ), self._ops[idx1].desc.output_arg_names())) - logging.info("segment end op: [{}]: [{}] [{}]".format(self._ops[ - idx2 - 1].desc.type(), self._ops[idx2 - 1].desc.input_arg_names( - ), self._ops[idx2 - 1].desc.output_arg_names())) + logging.info("segment start op: [{}]: [{}] [{}]".format( + self._ops[idx1].desc.type(), + self._ops[idx1].desc.input_arg_names(), + self._ops[idx1].desc.output_arg_names())) + logging.info("segment end op: [{}]: [{}] [{}]".format( + self._ops[idx2 - 1].desc.type(), + self._ops[idx2 - 1].desc.input_arg_names(), + self._ops[idx2 - 1].desc.output_arg_names())) return segments @@ -125,8 +129,9 @@ class RecomputeState(ProgramStats): # set new seed_var's dist_attr ref_dims_mapping = [-1] ref_process_mesh = cur_op_dist_attr.process_mesh - seed_var_dist_attr = set_var_dist_attr( - dist_context, seed_var, ref_dims_mapping, ref_process_mesh) + seed_var_dist_attr = set_var_dist_attr(dist_context, seed_var, + ref_dims_mapping, + ref_process_mesh) seed = 0 if cur_op.attr("fix_seed") is False else int( cur_op.attr("seed")) @@ -135,8 +140,10 @@ class RecomputeState(ProgramStats): type="seed", inputs={}, outputs={"Out": seed_var}, - attrs={"seed": seed, - "force_cpu": True}) + attrs={ + "seed": seed, + "force_cpu": True + }) # set new seed op's dist_attr naive_set_dist_op_attr_for_program_by_mesh_and_mapping( seed_op, ref_process_mesh, ref_dims_mapping, dist_context) @@ -209,6 +216,7 @@ def _add_needed_descs_to_block(descs, block, main_block, in_memory_vars, @register_pass("auto_parallel_recompute") class RecomputePass(PassBase): + def __init__(self): super(RecomputePass, self).__init__() self.set_attr("checkpoints", None) @@ -254,9 +262,10 @@ class RecomputePass(PassBase): vars_should_be_hold.extend( rc_state.get_out_of_subgraph_vars(segment[0], segment[1])) cross_vars = set(vars_should_be_hold) - set(checkpoints) - logging.info("found [{}] vars which cross recompute segment: [{}]," - "better checkpoints might be set to reduce those vars". - format(len(cross_vars), cross_vars)) + logging.info( + "found [{}] vars which cross recompute segment: [{}]," + "better checkpoints might be set to reduce those vars".format( + len(cross_vars), cross_vars)) vars_should_be_hold.extend(rc_state.get_reserved_vars()) vars_should_be_hold.extend(rc_state.get_input_nodes()) vars_should_be_hold = list(set(vars_should_be_hold)) @@ -304,15 +313,16 @@ class RecomputePass(PassBase): set_var_dist_attr(self._dist_context, rc_var, ref_dims_mapping, ref_process_mesh) # get recomputed segment's descs - segment_descs = _add_needed_descs_to_block( - fwd_ops, buffer_block, main_block, vars_in_memory, - self._dist_context) + segment_descs = _add_needed_descs_to_block(fwd_ops, buffer_block, + main_block, + vars_in_memory, + self._dist_context) # rename recomputed ops' input and output var name for key in var_name_dict: _rename_arg_(segment_descs, key, var_name_dict[key]) # NOTE: one forward op could be correspond to multiple xxx_grad op. - # When traversing all grad_ops in reverse, need to set a flag to indicate + # When traversing all grad_ops in reverse, need to set a flag to indicate # whether the ckpt and its segment_descs can be used. ckpt_op = op_path[segment[1] - 1] ckpt_ops_dict[ckpt_op.desc.original_id()] = [True, segment_descs] diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index 7729d1c2bd0..3c1f0443e03 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -38,9 +38,10 @@ _supported_optimizer_type = [ # NOTE we add the "auto_parallel" prefix to the pass in order to # indicate that this pass should obey some constrains by auto_parallel # for example all ops and vars should has dist attr before and after pass -# should use dist op instead of custom comm op +# should use dist op instead of custom comm op @register_pass("auto_parallel_sharding") class ShardingPass(PassBase): + def __init__(self): super(ShardingPass, self).__init__() self.set_attr("dist_context", None) @@ -101,12 +102,12 @@ class ShardingPass(PassBase): if group is not None: self.dp_groups.add(group) - # TODO(JZ-LIANG) allow more than one dp groups in network, support more general distribution + # TODO(JZ-LIANG) allow more than one dp groups in network, support more general distribution # genetated by auto search if len(self.dp_groups) != 1: raise NotImplementedError( - "So far Only and Exactly one data parallel group in network are supported, but got [{}] different data parallel groups". - format(len(self.dp_groups))) + "So far Only and Exactly one data parallel group in network are supported, but got [{}] different data parallel groups" + .format(len(self.dp_groups))) def _build_sharding_infos(self, params_grads): @@ -123,7 +124,7 @@ class ShardingPass(PassBase): ) >= self.sharding_world_size, "number of parameters [{}] is not enough to be shard among [{}] ranks".format( len(params_grads), self.sharding_world_size) - # sharding hybrid data parallel: partial sharding param within + # sharding hybrid data parallel: partial sharding param within if dp_group.nranks > self.sharding_world_size: self.partial_sharding = True assert len( @@ -138,8 +139,8 @@ class ShardingPass(PassBase): # TODO(JZ-LIANG) when support multiple dp groups in future, should group param and bind them to corresponding dp group params_in_group = [p for p, g in params_grads] - assert len(params_in_group) == len(set( - params_in_group)), "found duplicated param in params_grads" + assert len(params_in_group) == len( + set(params_in_group)), "found duplicated param in params_grads" sharding_info = ShardingInfo(sharding_group, self.global_rank, params_in_group) self.sharding_infos.append(sharding_info) @@ -307,16 +308,20 @@ class ShardingPass(PassBase): assert main_block.has_var(param.name) assert startup_block.has_var(param.name) - new_op = main_block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': sharding_info.group.id, - 'root': sharding_info.get_var_rank(param.name), - 'use_calc_stream': True, - OP_ROLE_KEY: OpRole.Optimize - }) + new_op = main_block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': + sharding_info.group.id, + 'root': + sharding_info.get_var_rank( + param.name), + 'use_calc_stream': + True, + OP_ROLE_KEY: + OpRole.Optimize + }) param_dist_attr = self._dist_context.get_tensor_dist_attr_for_program( param) assert param_dist_attr is not None @@ -341,9 +346,10 @@ class ShardingPass(PassBase): input_name = op.input_arg_names[0] base_name = _get_base_name_from_grad_name(input_name) sharding_info = self.varname_to_sharding_info[base_name] - _insert_reduce_op( - main_block, idx, input_name, sharding_info.group.id, - sharding_info.get_var_rank(base_name), self._dist_context) + _insert_reduce_op(main_block, idx, input_name, + sharding_info.group.id, + sharding_info.get_var_rank(base_name), + self._dist_context) if not self.partial_sharding: main_block._remove_op(idx + 1, sync=False) else: @@ -382,11 +388,10 @@ class ShardingPass(PassBase): broadcast_varname = unique_name.generate(input_name + "@BroadCast") input_var = main_block.var(input_name) - new_var = main_block.create_var( - name=broadcast_varname, - shape=input_var.shape, - dtype=input_var.dtype, - persistable=False) + new_var = main_block.create_var(name=broadcast_varname, + shape=input_var.shape, + dtype=input_var.dtype, + persistable=False) ref_dist_attr = self._dist_context.get_tensor_dist_attr_for_program( input_var) out_var_dist_attr = set_var_dist_attr( @@ -395,11 +400,13 @@ class ShardingPass(PassBase): ref_dist_attr.process_mesh) op._rename_input(input_name, broadcast_varname) - _insert_init_and_broadcast_op( - main_block, idx, broadcast_varname, - sharding_info.local_rank, root_rank, - sharding_info.group.id, - op.attr('op_role'), self._dist_context) + _insert_init_and_broadcast_op(main_block, idx, + broadcast_varname, + sharding_info.local_rank, + root_rank, + sharding_info.group.id, + op.attr('op_role'), + self._dist_context) for idx, op in reversed(list(enumerate(main_block.ops))): if op.type != "cast": @@ -446,17 +453,16 @@ def _insert_init_and_broadcast_op(block, insert_idx, varname, local_rank, broadcast_var_dist_attr = dist_context.get_tensor_dist_attr_for_program( broadcast_var) - new_op = block._insert_op_without_sync( - insert_idx, - type='c_broadcast', - inputs={'X': varname}, - outputs={'Out': varname}, - attrs={ - 'ring_id': ring_id, - 'root': root_rank, - 'use_calc_stream': True, - OP_ROLE_KEY: op_role - }) + new_op = block._insert_op_without_sync(insert_idx, + type='c_broadcast', + inputs={'X': varname}, + outputs={'Out': varname}, + attrs={ + 'ring_id': ring_id, + 'root': root_rank, + 'use_calc_stream': True, + OP_ROLE_KEY: op_role + }) naive_set_dist_op_attr_for_program_by_mesh_and_mapping( new_op, broadcast_var_dist_attr.process_mesh, broadcast_var_dist_attr.dims_mapping, dist_context) @@ -487,17 +493,17 @@ def _insert_reduce_op(block, use_calc_stream=True): assert root_id >= 0, "root id should be a positive int, but now root id is {}".format( root_id) - new_op = block._insert_op_without_sync( - insert_idx, - type='c_reduce_sum', - inputs={'X': [reduce_var]}, - outputs={'Out': [reduce_var]}, - attrs={ - 'ring_id': ring_id, - 'root_id': root_id, - 'use_calc_stream': use_calc_stream, - OP_ROLE_KEY: op_role - }) + new_op = block._insert_op_without_sync(insert_idx, + type='c_reduce_sum', + inputs={'X': [reduce_var]}, + outputs={'Out': [reduce_var]}, + attrs={ + 'ring_id': ring_id, + 'root_id': root_id, + 'use_calc_stream': + use_calc_stream, + OP_ROLE_KEY: op_role + }) dist_attr = dist_context.get_tensor_dist_attr_for_program( block.var(reduce_var)) @@ -641,6 +647,7 @@ def shard_parameters(params, group_size): class ShardingInfo(object): + def __init__(self, group, rank, params): self.group = group self.params = params diff --git a/python/paddle/distributed/passes/cpp_pass.py b/python/paddle/distributed/passes/cpp_pass.py index 72525255b7e..1d99a93624f 100644 --- a/python/paddle/distributed/passes/cpp_pass.py +++ b/python/paddle/distributed/passes/cpp_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ from paddle.fluid.framework import core, _apply_pass as _apply_cpp_pass @register_pass("fuse_elewise_add_act") class FuseElementwiseAddActPass(CPPPassWrapper): + def __init__(self): super(FuseElementwiseAddActPass, self).__init__() @@ -31,6 +32,7 @@ class FuseElementwiseAddActPass(CPPPassWrapper): @register_pass("fuse_bn_act") class FuseBatchNormActPass(CPPPassWrapper): + def __init__(self): super(FuseBatchNormActPass, self).__init__() @@ -44,6 +46,7 @@ class FuseBatchNormActPass(CPPPassWrapper): @register_pass("fuse_bn_add_act") class FuseBatchNormAddActPass(CPPPassWrapper): + def __init__(self): super(FuseBatchNormAddActPass, self).__init__() @@ -57,6 +60,7 @@ class FuseBatchNormAddActPass(CPPPassWrapper): @register_pass("fuse_relu_depthwise_conv") class FuseReluDepthwiseConvPass(CPPPassWrapper): + def __init__(self): super(FuseReluDepthwiseConvPass, self).__init__() @@ -70,6 +74,7 @@ class FuseReluDepthwiseConvPass(CPPPassWrapper): @register_pass("fuse_optimizer") class FuseOptimizerPass(CPPPassWrapper): + def __init__(self): super(FuseOptimizerPass, self).__init__() @@ -85,6 +90,7 @@ class FuseOptimizerPass(CPPPassWrapper): @register_pass("inplace_addto_op") class InplaceAddtoOpPass(CPPPassWrapper): + def __init__(self): super(InplaceAddtoOpPass, self).__init__() @@ -98,6 +104,7 @@ class InplaceAddtoOpPass(CPPPassWrapper): @register_pass("build_cinn") class BuildCINNPass(CPPPassWrapper): + def __init__(self): super(BuildCINNPass, self).__init__() self.set_attr("allow_ops", []) diff --git a/python/paddle/distributed/passes/fuse_all_reduce.py b/python/paddle/distributed/passes/fuse_all_reduce.py index 317a66c008a..33a58a67c9d 100644 --- a/python/paddle/distributed/passes/fuse_all_reduce.py +++ b/python/paddle/distributed/passes/fuse_all_reduce.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -44,12 +44,12 @@ def find_adjacent_match_sequences(iterable, def insert_fuse_all_reduce_ops(block, reversed_op_indices, input_var_names, output_var_names, dtype, attrs): - fused_var = block.create_var( - name=unique_name.generate("FusedOutput_{}".format(input_var_names[0])), - dtype=dtype) + fused_var = block.create_var(name=unique_name.generate( + "FusedOutput_{}".format(input_var_names[0])), + dtype=dtype) - # FIXME(zengjinle): here we assume that we use - # c_sync_calc_stream/c_sync_comm_stream to do sync. + # FIXME(zengjinle): here we assume that we use + # c_sync_calc_stream/c_sync_comm_stream to do sync. # But someone may use c_wait_compute/c_wait_comm instead. if not attrs["use_calc_stream"]: ring_id = attrs["ring_id"] @@ -103,21 +103,21 @@ def insert_fuse_all_reduce_ops(block, reversed_op_indices, input_var_names, } if not attrs["use_calc_stream"]: - block._insert_op_without_sync( - insert_idx, - type="c_sync_calc_stream", - inputs={"X": fused_var}, - outputs={"Out": fused_var, - op_role_key: attrs[op_role_key]}) + block._insert_op_without_sync(insert_idx, + type="c_sync_calc_stream", + inputs={"X": fused_var}, + outputs={ + "Out": fused_var, + op_role_key: attrs[op_role_key] + }) insert_idx += 1 - # c_allreduce_sum should insert - block._insert_op_without_sync( - insert_idx, - type="c_allreduce_sum", - inputs={"X": fused_var}, - outputs={"Out": fused_var}, - attrs=attrs) + # c_allreduce_sum should insert + block._insert_op_without_sync(insert_idx, + type="c_allreduce_sum", + inputs={"X": fused_var}, + outputs={"Out": fused_var}, + attrs=attrs) for op_idx in reversed_op_indices: block._remove_op(op_idx) @@ -186,8 +186,9 @@ def find_all_fuse_all_reduce_groups(block): return False return True - match_seqs = find_adjacent_match_sequences( - collective_ops, is_valid_allreduce_op, is_same_adjacent_op) + match_seqs = find_adjacent_match_sequences(collective_ops, + is_valid_allreduce_op, + is_same_adjacent_op) new_match_seqs = [] for i, j in match_seqs: new_match_seqs.append([collective_op_indices[k] for k in range(i, j)]) @@ -330,6 +331,7 @@ def insert_fuse_all_reduce_by_memory_size(block, groups, max_memory_size): @register_pass("fuse_all_reduce") class FuseAllReducePass(PassBase): + def __init__(self): super(FuseAllReducePass, self).__init__() self.set_attr("max_memory_size", -1) @@ -344,11 +346,11 @@ class FuseAllReducePass(PassBase): def _type(self): return PassType.COMM_OPT - # NOTE: why FuseAllReducePass can override apply_single_impl instead of - # apply_impl? AllReduce is a collective operation, so the program of each - # rank inside the same communication group should have the same - # c_allreduce_sum operations. Therefore, FuseAllReducePass can override - # apply_single_impl directly. + # NOTE: why FuseAllReducePass can override apply_single_impl instead of + # apply_impl? AllReduce is a collective operation, so the program of each + # rank inside the same communication group should have the same + # c_allreduce_sum operations. Therefore, FuseAllReducePass can override + # apply_single_impl directly. def _apply_single_impl(self, main_program, startup_program, context): max_memory_size = self.get_attr("max_memory_size") op_deps = main_program.desc.get_op_deps() @@ -356,8 +358,8 @@ class FuseAllReducePass(PassBase): for i in range(num_blocks): block = main_program.block(i) groups = find_all_fuse_all_reduce_groups(block) - groups = split_fuse_all_reduce_groups_by_deps(block, groups, - op_deps[i]) + groups = split_fuse_all_reduce_groups_by_deps( + block, groups, op_deps[i]) insert_fuse_all_reduce_by_memory_size(block, groups, max_memory_size) main_program._sync_with_cpp() diff --git a/python/paddle/distributed/passes/pass_base.py b/python/paddle/distributed/passes/pass_base.py index 3afca5d6355..b733f886693 100644 --- a/python/paddle/distributed/passes/pass_base.py +++ b/python/paddle/distributed/passes/pass_base.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from paddle.fluid.framework import program_guard, _apply_pass as _apply_cpp_pass class PassContext: + def __init__(self): self._applied_passes = [] self._attrs = {} @@ -118,6 +119,7 @@ class PassBase(ABC): def register_pass(name): + def impl(cls): PassBase._register(name, cls) cls.name = name @@ -136,6 +138,7 @@ def new_pass(name, pass_attrs={}): class CPPPassWrapper(PassBase): + def __init__(self): super(CPPPassWrapper, self).__init__() @@ -159,8 +162,8 @@ class CPPPassWrapper(PassBase): def _fusion_opt_last_rule(pass_before, pass_after): - if pass_before._type() == PassType.FUSION_OPT and pass_after._type( - ) != PassType.FUSION_OPT: + if pass_before._type( + ) == PassType.FUSION_OPT and pass_after._type() != PassType.FUSION_OPT: return False else: return True @@ -168,6 +171,7 @@ def _fusion_opt_last_rule(pass_before, pass_after): def _make_rule_from_white_lists_dict(before_white_lists_dict, after_white_lists_dict): + def collect_pass_names(white_lists_dict, result): for k, v in white_lists_dict.items(): result.add(k) @@ -202,8 +206,8 @@ def _make_rule_from_white_lists_dict(before_white_lists_dict, return rule -# The key-value pair (k, [v1, v2, ..., vn]) means the pass k can be -# applied before any of pass [v1, v2, ..., vn] is applied +# The key-value pair (k, [v1, v2, ..., vn]) means the pass k can be +# applied before any of pass [v1, v2, ..., vn] is applied PassBase._BEFORE_WHITE_LISTS_DICT = { "fuse_gradient_merge": ["fuse_all_reduce"], # Add more white lists here @@ -212,7 +216,7 @@ PassBase._BEFORE_WHITE_LISTS_DICT = { # The key-value pair (k, [v1, v2, ..., vn]) means the pass k can be # applied after any of pass [v1, v2, ..., vn] is applied PassBase._AFTER_WHITE_LISTS_DICT = { - # Add more white lists here + # Add more white lists here } PassBase._COMMON_RULES = [ @@ -292,6 +296,7 @@ def _solve_pass_conflict(passes, context): class PassManager: + def __init__(self, passes, context=None, auto_solve_conflict=True): if context is None: context = PassContext() diff --git a/python/paddle/distributed/passes/pass_utils.py b/python/paddle/distributed/passes/pass_utils.py index bd1eddce3bb..6e43930d2e1 100644 --- a/python/paddle/distributed/passes/pass_utils.py +++ b/python/paddle/distributed/passes/pass_utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,7 +27,7 @@ def list_to_ordered_dict(list_obj, ordered_dict=None): # The inputs of a program are the variables -# that first occur as the input of the op. +# that first occur as the input of the op. def get_inputs_of_program(program): visited_vars = set() input_vars = [] diff --git a/python/paddle/distributed/passes/ps_server_pass.py b/python/paddle/distributed/passes/ps_server_pass.py index 30f6542fa25..0b774683387 100755 --- a/python/paddle/distributed/passes/ps_server_pass.py +++ b/python/paddle/distributed/passes/ps_server_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, noam_ @register_pass("add_lr_decay_table_pass") class AddLrDecayTablePass(PassBase): + def __init__(self): super(AddLrDecayTablePass, self).__init__() @@ -101,8 +102,8 @@ class AddLrDecayTablePass(PassBase): % lr_decay_steps) else: raise ValueError( - "Not supported current LearningRate strategy, please use follow decay strategy: {}". - format(schedler_decay)) + "Not supported current LearningRate strategy, please use follow decay strategy: {}" + .format(schedler_decay)) return decay_main_program, decay_startup_program, lr_name @@ -125,6 +126,7 @@ class AddLrDecayTablePass(PassBase): @register_pass("add_listen_and_serv_pass") class AddListenAndServPass(PassBase): + def __init__(self): super(AddListenAndServPass, self).__init__() @@ -152,12 +154,15 @@ class AddListenAndServPass(PassBase): "rpc_send_thread_num": -1, "rpc_prefetch_thread_num": -1 } - main_program.global_block().append_op( - type="listen_and_serv", inputs={'X': []}, outputs={}, attrs=opt) + main_program.global_block().append_op(type="listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=opt) @register_pass("add_rpc_global_flags_pass") class AddRpcGlobalFlagsPass(PassBase): + def __init__(self): super(AddRpcGlobalFlagsPass, self).__init__() @@ -173,6 +178,7 @@ class AddRpcGlobalFlagsPass(PassBase): @register_pass("add_optimizer_pass") class AddOptimizerPass(PassBase): + def __init__(self): super(AddOptimizerPass, self).__init__() @@ -188,6 +194,7 @@ class AddOptimizerPass(PassBase): @register_pass("add_geo_optimizer_pass") class AddGeoOptimizerPass(PassBase): + def __init__(self): super(AddGeoOptimizerPass, self).__init__() @@ -203,6 +210,7 @@ class AddGeoOptimizerPass(PassBase): @register_pass("build_pserver_startup_program_pass") class BuildPserverStartupProgramPass(PassBase): + def __init__(self): super(BuildPserverStartupProgramPass, self).__init__() @@ -218,6 +226,7 @@ class BuildPserverStartupProgramPass(PassBase): @register_pass("delete_unused_in_startup_pass") class DeleteUnusedInStartupPass(PassBase): + def __init__(self): super(DeleteUnusedInStartupPass, self).__init__() diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 876e04b7081..4a015fea30a 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,6 +26,7 @@ from paddle.fluid.framework import Program, Parameter @register_pass("append_send_ops_pass") class AppendSendOpsPass(PassBase): # 该 pass 被多种模式复用 + def __init__(self): super(AppendSendOpsPass, self).__init__() @@ -49,29 +50,33 @@ class AppendSendOpsPass(PassBase): # 该 pass 被多种模式复用 if ps_mode in [DistributedMode.SYNC, DistributedMode.HALF_ASYNC]: dummy_output = program.global_block().create_var( name=framework.generate_control_dev_var_name()) - program.global_block().append_op( - type="send", - inputs={"X": send_input_vars}, - outputs={"Out": dummy_output}, - attrs={ - "send_varnames": [queue], - "is_sparse": is_sparse, - "table_id": table_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="send", + inputs={"X": send_input_vars}, + outputs={"Out": dummy_output}, + attrs={ + "send_varnames": [queue], + "is_sparse": + is_sparse, + "table_id": + table_id, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) return dummy_output def _append_barrier_op(self, program, dummys, trainer_id): - program.global_block().append_op( - type="send_barrier", - inputs={"X": dummys}, - outputs={"Out": []}, - attrs={ - "trainer_id": trainer_id, - "half_async": True, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="send_barrier", + inputs={"X": dummys}, + outputs={"Out": []}, + attrs={ + "trainer_id": + trainer_id, + "half_async": + True, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) def _apply_single_impl(self, main_program, startup_program, pass_ctx): attrs = pass_ctx._attrs @@ -92,9 +97,9 @@ class AppendSendOpsPass(PassBase): # 该 pass 被多种模式复用 is_sparse = 1 if send.is_sparse() else 0 is_sparse = 2 if send.is_distributed() else is_sparse dummys.append( - self._append_send_op(main_program, - send.origin_varnames(), merged_name, - is_sparse, send.table_id(), ps_mode)) + self._append_send_op(main_program, send.origin_varnames(), + merged_name, is_sparse, send.table_id(), + ps_mode)) if ps_mode in [DistributedMode.SYNC, DistributedMode.HALF_ASYNC]: trainer_id = get_role_id(attrs['role_maker']) self._append_barrier_op(main_program, dummys, trainer_id) @@ -102,6 +107,7 @@ class AppendSendOpsPass(PassBase): # 该 pass 被多种模式复用 @register_pass("distributed_ops_pass") class DistributedOpsPass(PassBase): + def __init__(self): super(DistributedOpsPass, self).__init__() self.w_2_table_id = {} @@ -147,32 +153,30 @@ class DistributedOpsPass(PassBase): dtype=core.VarDesc.VarType.INT64, persistable=False, stop_gradient=True) - _program.global_block()._insert_op( - index=0, - type='fill_constant', - inputs={}, - outputs={'Out': show}, - attrs={ - 'shape': [1], - 'dtype': show.dtype, - 'value': 1, - }) + _program.global_block()._insert_op(index=0, + type='fill_constant', + inputs={}, + outputs={'Out': show}, + attrs={ + 'shape': [1], + 'dtype': show.dtype, + 'value': 1, + }) clk = _program.global_block().create_var( name="clk", dtype=core.VarDesc.VarType.INT64, persistable=False, stop_gradient=True) - _program.global_block()._insert_op( - index=0, - type='fill_constant', - inputs={}, - outputs={'Out': clk}, - attrs={ - 'shape': [1], - 'dtype': clk.dtype, - 'value': 0, - }) + _program.global_block()._insert_op(index=0, + type='fill_constant', + inputs={}, + outputs={'Out': clk}, + attrs={ + 'shape': [1], + 'dtype': clk.dtype, + 'value': 0, + }) for param, ops in push_sparse_ops.items(): all_ops = _program.global_block().ops @@ -194,25 +198,26 @@ class DistributedOpsPass(PassBase): for idx in op_idxs[::-1]: _program.global_block()._remove_op(idx) - _program.global_block().append_op( - type="distributed_push_sparse", - inputs={ - "Ids": inputs, - 'W': w, - "Outputs": outputs, - "Shows": show, - "Clicks": clk - }, - outputs={"Outputs": outputs}, - attrs={ - "is_distributed": is_distributed, - "padding_idx": padding_idx, - "table_id": table_id, - "size": self.emb_size[param], - "use_cvm_op": use_cvm_op - }) + _program.global_block().append_op(type="distributed_push_sparse", + inputs={ + "Ids": inputs, + 'W': w, + "Outputs": outputs, + "Shows": show, + "Clicks": clk + }, + outputs={"Outputs": outputs}, + attrs={ + "is_distributed": + is_distributed, + "padding_idx": padding_idx, + "table_id": table_id, + "size": self.emb_size[param], + "use_cvm_op": use_cvm_op + }) def _pull_sparse_fuse(self, _program, pull_sparse_ops, attrs, send_ctx): + def dag_check_up_and_reorder(program, inputs, outputs): global_block = program.global_block() min_output_index = len(global_block.ops) @@ -373,8 +378,10 @@ class DistributedOpsPass(PassBase): _program.global_block()._insert_op( index=distributed_idx, type="pull_gpups_sparse", - inputs={"Ids": inputs, - 'W': w}, + inputs={ + "Ids": inputs, + 'W': w + }, outputs={"Out": outputs}, attrs={ "size": [w.shape[1] for i in inputs], @@ -385,8 +392,10 @@ class DistributedOpsPass(PassBase): _program.global_block()._insert_op( index=distributed_idx, type="distributed_lookup_table", - inputs={"Ids": inputs, - 'W': w}, + inputs={ + "Ids": inputs, + 'W': w + }, outputs={"Outputs": outputs}, attrs={ "is_distributed": is_distributed, @@ -402,8 +411,10 @@ class DistributedOpsPass(PassBase): _program.global_block()._insert_op( index=distributed_idx, type="distributed_lookup_table", - inputs={"Ids": [inputs[i]], - 'W': w}, + inputs={ + "Ids": [inputs[i]], + 'W': w + }, outputs={"Outputs": [outputs[i]]}, attrs={ "is_distributed": is_distributed, @@ -438,8 +449,8 @@ class DistributedOpsPass(PassBase): for op in _program.global_block().ops: if op.type in SPARSE_GRAD_OP_TYPE_DICT.keys(): param_name = op.input(SPARSE_GRAD_OP_TYPE_DICT[op.type])[0] - if param_name in pull_sparse_ids and op.input("Ids")[ - 0] in pull_sparse_ids[param_name]: + if param_name in pull_sparse_ids and op.input( + "Ids")[0] in pull_sparse_ids[param_name]: ops = push_sparse_ops.get(param_name, []) ops.append(op) push_sparse_ops[param_name] = ops @@ -450,8 +461,8 @@ class DistributedOpsPass(PassBase): attrs = pass_ctx._attrs pull_sparse_ops, push_sparse_ops, use_cvm_op = self._get_pull_sparse_ops( main_program, attrs) - print("is_heter_ps_mode in distributed_ops_pass {}?".format(attrs[ - 'is_heter_ps_mode'])) + print("is_heter_ps_mode in distributed_ops_pass {}?".format( + attrs['is_heter_ps_mode'])) send_ctx = get_the_one_send_context( attrs, split_dense_table=attrs['is_heter_ps_mode']) self._pull_sparse_fuse(main_program, pull_sparse_ops, attrs, send_ctx) @@ -460,6 +471,7 @@ class DistributedOpsPass(PassBase): @register_pass("delete_optimizer_pass") class DeleteOptimizesPass(PassBase): + def __init__(self): super(DeleteOptimizesPass, self).__init__() @@ -493,15 +505,14 @@ class DeleteOptimizesPass(PassBase): def _add_lr_var(self, main_program, attrs): # Todo: hard code for pe - lr_var = attrs['origin_main_program'].global_block().vars[ - "learning_rate_0"] - main_program.global_block().create_var( - name=lr_var.name, - shape=lr_var.shape, - dtype=lr_var.dtype, - type=lr_var.type, - lod_level=lr_var.lod_level, - persistable=True) + lr_var = attrs['origin_main_program'].global_block( + ).vars["learning_rate_0"] + main_program.global_block().create_var(name=lr_var.name, + shape=lr_var.shape, + dtype=lr_var.dtype, + type=lr_var.type, + lod_level=lr_var.lod_level, + persistable=True) def _apply_single_impl(self, main_program, startup_program, pass_ctx): attrs = pass_ctx._attrs @@ -516,6 +527,7 @@ class DeleteOptimizesPass(PassBase): @register_pass("delete_extra_optimizer_pass") class DeleteExtraOptimizerPass(PassBase): + def __init__(self): super(DeleteExtraOptimizerPass, self).__init__() @@ -558,6 +570,7 @@ class DeleteExtraOptimizerPass(PassBase): @register_pass("fake_init_ops_pass") class FakeInitOpsPass(PassBase): + def __init__(self): super(FakeInitOpsPass, self).__init__() @@ -584,8 +597,8 @@ class FakeInitOpsPass(PassBase): table_param_init_op.append(op) init_op_num = len(table_param_init_op) if init_op_num != 1: - raise ValueError("table init op num should be 1, now is " + str( - init_op_num)) + raise ValueError("table init op num should be 1, now is " + + str(init_op_num)) table_init_op = table_param_init_op[0] program.global_block().append_op( type="fake_init", @@ -602,6 +615,7 @@ class FakeInitOpsPass(PassBase): @register_pass("ps_gpu_pass") class PsGpuPass(PassBase): + def __init__(self): super(PsGpuPass, self).__init__() @@ -626,8 +640,8 @@ class PsGpuPass(PassBase): insert_index + 1) new_op_desc.copy_from(op_desc) new_op_desc._set_attr(op_role_attr_name, backward) - new_op = paddle.fluid.framework.Operator(program.global_block(), - new_op_desc) + new_op = paddle.fluid.framework.Operator( + program.global_block(), new_op_desc) program.global_block().ops.insert(insert_index + 1, new_op) program.global_block()._sync_with_cpp() @@ -701,6 +715,7 @@ class PsGpuPass(PassBase): @register_pass("ps_transpile_pass") class PsTranspilePass(PassBase): + def __init__(self): super(PsTranspilePass, self).__init__() @@ -714,17 +729,17 @@ class PsTranspilePass(PassBase): attrs = pass_ctx._attrs t = SingleProcessMultiThread() env = get_dist_env() - t.transpile( - startup_program=startup_program, - main_program=main_program, - rank=env["trainer_id"], - endpoints=env["trainer_endpoints"], - current_endpoint=env['current_endpoint'], - wait_port=False) + t.transpile(startup_program=startup_program, + main_program=main_program, + rank=env["trainer_id"], + endpoints=env["trainer_endpoints"], + current_endpoint=env['current_endpoint'], + wait_port=False) @register_pass("split_heter_worker_ops_pass") class SplitHeterWorkerOpsPass(PassBase): + def __init__(self): super(SplitHeterWorkerOpsPass, self).__init__() @@ -762,10 +777,10 @@ class SplitHeterWorkerOpsPass(PassBase): current_device = role_maker._heter_device_type().lower() stage_id = int(role_maker._get_stage_id()) - heter_block_ops_forward = program_block_ops_list[stage_id - 1][ - "forward"] - heter_block_ops_backward = program_block_ops_list[stage_id - 1][ - "backward"] + heter_block_ops_forward = program_block_ops_list[stage_id - + 1]["forward"] + heter_block_ops_backward = program_block_ops_list[stage_id - + 1]["backward"] heter_block = heter_program._create_block(pre_block_idx) optimizer_block.append(heter_block) @@ -787,15 +802,17 @@ class SplitHeterWorkerOpsPass(PassBase): for _, op in enumerate(heter_block_ops_backward): block_append_op(heter_program, program, heter_block_bp, op) - bp_entrance_vars = block_var_detail[stage_id - 1]["backward"][ - "entrance"] + bp_entrance_vars = block_var_detail[stage_id - + 1]["backward"]["entrance"] add_vars_by_var_list(bp_entrance_vars, program, heter_program, heter_block_bp) bp_exit_vars = block_var_detail[stage_id - 1]["backward"]["exit"] add_vars_by_var_list(bp_exit_vars, program, heter_program, heter_block_bp) - backward_comm_info = get_communicate_var_info( - program, stage_id, bp_entrance_vars, type="backward") + backward_comm_info = get_communicate_var_info(program, + stage_id, + bp_entrance_vars, + type="backward") grad_to_block_id.append(backward_comm_info["block_input_var_name"] + ":" + str(heter_block_bp.idx)) @@ -804,8 +821,8 @@ class SplitHeterWorkerOpsPass(PassBase): for _, op in enumerate(heter_block_ops_backward): block_append_op(heter_program, program, heter_block, op) - bp_entrance_vars = block_var_detail[stage_id - 1]["backward"][ - "entrance"] + bp_entrance_vars = block_var_detail[stage_id - + 1]["backward"]["entrance"] add_vars_by_var_list(bp_entrance_vars, program, heter_program, heter_block) bp_exit_vars = block_var_detail[stage_id - 1]["backward"]["exit"] @@ -814,11 +831,13 @@ class SplitHeterWorkerOpsPass(PassBase): heter_block_bp = heter_block - forward_comm_info = get_communicate_var_info( - program, stage_id, entrance_vars, type="forward") + forward_comm_info = get_communicate_var_info(program, + stage_id, + entrance_vars, + type="forward") - grad_to_block_id.append(forward_comm_info["block_input_var_name"] + ":" - + str(heter_block.idx)) + grad_to_block_id.append(forward_comm_info["block_input_var_name"] + + ":" + str(heter_block.idx)) first_op_index_bp = len(heter_block_bp.ops) @@ -826,9 +845,11 @@ class SplitHeterWorkerOpsPass(PassBase): static_var = insert_communicate_op(program, role_maker, heter_block, stage_id, first_op_index_fp, block_var_detail, current_device) - static_var_bp = insert_communicate_op( - program, role_maker, heter_block_bp, stage_id, first_op_index_bp, - block_var_detail, current_device, False) + static_var_bp = insert_communicate_op(program, role_maker, + heter_block_bp, stage_id, + first_op_index_bp, + block_var_detail, current_device, + False) # add send op send_grad_var_list = add_send_op( @@ -851,11 +872,10 @@ class SplitHeterWorkerOpsPass(PassBase): RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE } # append the listen_and_serv op - heter_program.global_block().append_op( - type="heter_listen_and_serv", - inputs={'X': []}, - outputs={}, - attrs=attrs) + heter_program.global_block().append_op(type="heter_listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=attrs) # TODO check heter program def _apply_single_impl(self, main_program, startup_program, pass_ctx): @@ -888,6 +908,7 @@ class SplitHeterWorkerOpsPass(PassBase): @register_pass("split_trainer_ops_pass") class SplitTrainerOpsPass(PassBase): + def __init__(self): super(SplitTrainerOpsPass, self).__init__() @@ -1012,11 +1033,13 @@ class SplitTrainerOpsPass(PassBase): bp_ops_list, block_var_detail) bp_entrance_vars = block_var_detail[0]["backward"]["entrance"] - backward_comm_info = get_communicate_var_info( - origin_program, 1, bp_entrance_vars, type="backward") + backward_comm_info = get_communicate_var_info(origin_program, + 1, + bp_entrance_vars, + type="backward") - grad_to_block_id.append(backward_comm_info["block_input_var_name"] + ":" - + str(backward_block.idx)) + grad_to_block_id.append(backward_comm_info["block_input_var_name"] + + ":" + str(backward_block.idx)) optimizer_block.append(backward_block) role_maker = attrs['role_maker'] attrs = { @@ -1032,12 +1055,11 @@ class SplitTrainerOpsPass(PassBase): RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE } # append the listen_and_serv op - program.global_block()._insert_op( - index=0, - type="heter_listen_and_serv", - inputs={'X': []}, - outputs={}, - attrs=attrs) + program.global_block()._insert_op(index=0, + type="heter_listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=attrs) ## TODO add check for bp block #check_op_device(program.global_block(), DEFAULT_DEVICE) @@ -1065,6 +1087,7 @@ class SplitTrainerOpsPass(PassBase): @register_pass("set_heter_pipeline_opt_pass") class SetHeterPipelineOptPass(PassBase): + def __init__(self): super(SetHeterPipelineOptPass, self).__init__() @@ -1103,6 +1126,7 @@ class SetHeterPipelineOptPass(PassBase): @register_pass("split_fl_ops_pass") class SplitFlOpsPass(PassBase): + def __init__(self): super(SplitFlOpsPass, self).__init__() self.PART_A_DEVICE_FlAG = 'gpu:0' @@ -1265,10 +1289,10 @@ class SplitFlOpsPass(PassBase): def _find_dense_grad_vars(self, bp_op_list): program = self.ori_main_program - bp_op_input, bp_op_output = find_ops_list_input_output(program, - bp_op_list) - return (screen_persistables(program, bp_op_input) + screen_persistables( - program, bp_op_output)) + bp_op_input, bp_op_output = find_ops_list_input_output( + program, bp_op_list) + return (screen_persistables(program, bp_op_input) + + screen_persistables(program, bp_op_output)) def _get_partA_program(self, block): # 1. create block 0 @@ -1294,7 +1318,7 @@ class SplitFlOpsPass(PassBase): # logger.info('bp_op_list: {}'.format(bp_op_list)) second_block = self._get_block_by_idx(bp_op_list + push_sparse_op_list, self.partA_program, 1) - # 2.1. insert partA recv op + # 2.1. insert partA recv op block_input_flag = "backward_joint_{}_{}@fl_ps".format(2, 1) grad_to_block_id = block_input_flag + ":" + str(second_block.idx) attrs = { @@ -1307,12 +1331,11 @@ class SplitFlOpsPass(PassBase): "rpc_exec_thread_num": int(os.getenv("CPU_NUM", 32)), RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE } - second_block._insert_op( - index=0, - type='heter_listen_and_serv', - inputs={'X': []}, - outputs={}, - attrs=attrs) + second_block._insert_op(index=0, + type='heter_listen_and_serv', + inputs={'X': []}, + outputs={}, + attrs=attrs) # 2.2 insert push dense grad op send_ops = find_send_op(self.ori_main_program) # push dense delete_same_ops(block, send_ops) @@ -1338,7 +1361,7 @@ class SplitFlOpsPass(PassBase): op_list3.append(op) op_cnt += 1 - # 1. create block 0 + # 1. create block 0 first_block = self._get_block_by_idx(op_list1, self.partB_program, 0) # 2. create block 1 @@ -1365,12 +1388,11 @@ class SplitFlOpsPass(PassBase): "rpc_exec_thread_num": int(os.getenv("CPU_NUM", 32)), RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE } - first_block._insert_op( - index=len(op_list1), - type="heter_listen_and_serv", - inputs={'X': []}, - outputs={}, - attrs=attrs) + first_block._insert_op(index=len(op_list1), + type="heter_listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=attrs) #logger.info('partB-first_block:{}'.format(first_block)) #logger.info('partB-second_block:{}'.format(second_block)) diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py index 2ba9b6b9c5a..e10794085dc 100755 --- a/python/paddle/distributed/ps/the_one_ps.py +++ b/python/paddle/distributed/ps/the_one_ps.py @@ -73,28 +73,29 @@ def check_embedding_dim(accessor_proto, varname, program_id, context): if accessor_proto.accessor_class == "SparseAccessor": if fea_dim != embedding_dim + 2: raise ValueError( - "The fea_dim is wrong, it will be sparse_embedding_dim + 2: {}, but got {}". - format(embedding_dim + 2, fea_dim)) + "The fea_dim is wrong, it will be sparse_embedding_dim + 2: {}, but got {}" + .format(embedding_dim + 2, fea_dim)) else: if fea_dim != embedding_dim: raise ValueError( - "The fea_dim is wrong, it will be sparse_embedding_dim: {}, but got {}". - format(embedding_dim, fea_dim)) + "The fea_dim is wrong, it will be sparse_embedding_dim: {}, but got {}" + .format(embedding_dim, fea_dim)) embedx_dim = accessor_proto.embedx_dim if accessor_proto.accessor_class == "SparseAccessor": if embedx_dim != embedding_dim - 1: raise ValueError( - "The embedx_dim is wrong, it will be sparse_embedding_dim - 1: {}, but got {}". - format(embedding_dim - 1, embedx_dim)) + "The embedx_dim is wrong, it will be sparse_embedding_dim - 1: {}, but got {}" + .format(embedding_dim - 1, embedx_dim)) else: if embedx_dim != embedding_dim - 3: raise ValueError( - "The embedx_dim is wrong, it will be sparse_embedding_dim - 3: {}, but got {}". - format(embedding_dim - 3, embedx_dim)) + "The embedx_dim is wrong, it will be sparse_embedding_dim - 3: {}, but got {}" + .format(embedding_dim - 3, embedx_dim)) class Service: + def __init__(self): pass @@ -107,6 +108,7 @@ class Service: class GpuService(Service): + def __init__(self): super(GpuService, self).__init__() @@ -116,6 +118,7 @@ class GpuService(Service): class Accessor: + def __init__(self): self.accessor_class = "" self.optimizer = None @@ -124,8 +127,8 @@ class Accessor: # TableAccessorParameter accessor def _set(self, accessor_proto, varname, program_id, context): - main_program, startup_program, idx = get_program_by_id(context, - program_id) + main_program, startup_program, idx = get_program_by_id( + context, program_id) embedding_dim = 0 for var in main_program.list_vars(): if var.name == varname: @@ -208,6 +211,7 @@ class Accessor: class CommonAccessor(Accessor): + def __init__(self): super(CommonAccessor, self).__init__() self.table_name = '' @@ -229,11 +233,11 @@ class CommonAccessor(Accessor): opt_input_map["adam"] = [("Param", None), ("Moment1", None), ("Moment2", None), ("Beta1Pow", 1), ("Beta2Pow", 1), ("LearningRate", 1)] - opt_input_map["adam_d2sum"] = [ - ("Param", None), ("D2Sum", None), ("G2Sum", None), ("Moment", None), - ("MomentDecayRate", 1), ("AdaDecayRate", 1), ("AdaEpsilon", 1), - ("LearningRate", 1) - ] + opt_input_map["adam_d2sum"] = [("Param", None), ("D2Sum", None), + ("G2Sum", None), ("Moment", None), + ("MomentDecayRate", 1), + ("AdaDecayRate", 1), ("AdaEpsilon", 1), + ("LearningRate", 1)] opt_input_map["sum"] = [("Param", None)] opt_input_map["naive_adagrad"] = [("Param", None), ("G2Sum", 1), ("LearningRate", 1)] @@ -260,8 +264,8 @@ class CommonAccessor(Accessor): self.opt_init_map = opt_init_map def parse_entry(self, varname, program_id, context): - main_program, startup_program, idx = get_program_by_id(context, - program_id) + main_program, startup_program, idx = get_program_by_id( + context, program_id) for op in main_program.global_block().ops: if not is_distributed_sparse_op(op) and not is_sparse_op(op): continue @@ -315,8 +319,8 @@ class CommonAccessor(Accessor): # print("parse_by_optimizer table_id:{} is_datanorm:{}".format( # ctx.table_id(), ctx.is_datanorm_table())) - main_program, startup_program, idx = get_program_by_id(context, - ctx.program_id()) + main_program, startup_program, idx = get_program_by_id( + context, ctx.program_id()) pserver_id = get_role_id(context['role_maker']) pserver_num = len(get_ps_endpoints(context['role_maker'])) optimizer_ops = get_optimize_ops(main_program) @@ -326,8 +330,8 @@ class CommonAccessor(Accessor): for op in optimizer_ops: if ("Param" in op.input_names) and ( - op.input("Param")[0] == - context['grad_name_to_param_name'][grad_name]): + op.input("Param")[0] + == context['grad_name_to_param_name'][grad_name]): oop = op break @@ -390,8 +394,8 @@ class CommonAccessor(Accessor): param = main_program.global_block().vars[ "learning_rate_" + str(idx)] - initializer = self.get_initializer_attr(param.name, - startup_program) + initializer = self.get_initializer_attr( + param.name, startup_program) elif formal_name == "MomentDecayRate": initializer = "fill_constant&0.99" elif formal_name == "AdaDecayRate": @@ -415,8 +419,8 @@ class CommonAccessor(Accessor): param = main_program.global_block().vars[oop.input( formal_name)[0]] - initializer = self.get_initializer_attr(param.name, - startup_program) + initializer = self.get_initializer_attr( + param.name, startup_program) elif formal_name == "SummaryDecayRate": initializer = "fill_constant&0.999999" else: @@ -444,8 +448,8 @@ class CommonAccessor(Accessor): pserver_id) dims.append(shape) - initializer = self.get_initializer_attr(param.name, - startup_program) + initializer = self.get_initializer_attr( + param.name, startup_program) initializers.append(initializer) for (attr_varname, type_) in attr_varnames: @@ -472,12 +476,13 @@ class CommonAccessor(Accessor): class Tensor: + def __init__(self, tesnor_dcit): self.tensor_dict = tesnor_dcit def _set(self, tensor_proto): - tensor_proto.main_program_id = self.tensor_dict.get("main_program_id", - 0) + tensor_proto.main_program_id = self.tensor_dict.get( + "main_program_id", 0) tensor_proto.startup_program_id = self.tensor_dict.get( "startup_program_id", 0) tensor_proto.feed_var_name = self.tensor_dict.get("feed_var_name", '') @@ -487,6 +492,7 @@ class Tensor: class Table: + def __init__(self): self.table_class = None self.shard_num = -1 @@ -501,6 +507,7 @@ class Table: class BarrierTable(Table): + def __init__(self, context, idx): super(BarrierTable, self).__init__() self.type = None @@ -536,6 +543,7 @@ class BarrierTable(Table): class TensorTable(Table): + def __init__(self, idx, tensor_dict, role_maker): super(TensorTable, self).__init__() self.idx = idx @@ -549,8 +557,8 @@ class TensorTable(Table): table_proto.accessor.accessor_class = "CommMergeAccessor" - table_proto.common.table_name = self.tensor_dict.get("feed_var_name", - '') + table_proto.common.table_name = self.tensor_dict.get( + "feed_var_name", '') table_proto.common.trainer_num = get_trainers(self.role_maker) tensor = Tensor(self.tensor_dict) @@ -558,6 +566,7 @@ class TensorTable(Table): class SparseTable(Table): + def __init__(self, context, send_ctx): super(SparseTable, self).__init__() self.context = context @@ -568,8 +577,8 @@ class SparseTable(Table): def _set(self, table_proto): ctx = self.ctx - if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1 or ( - ctx.is_sparse() == False): + if ctx.is_tensor_table() or len( + ctx.origin_varnames()) < 1 or (ctx.is_sparse() == False): return table_proto.table_id = ctx.table_id() table_proto.table_class = self.table_class @@ -610,14 +619,15 @@ class SparseTable(Table): ctx.program_id(), self.context) self.common.parse_by_optimizer(ctx, self.context) - self.common.parse_entry(self.common.table_name, - ctx.program_id(), self.context) + self.common.parse_entry(self.common.table_name, ctx.program_id(), + self.context) self.common.sync = True if self.context['is_sync'] else False self.common._set(table_proto.common) class GeoSparseTable(SparseTable): + def __init__(self, context, send_ctx): super(GeoSparseTable, self).__init__(context, send_ctx) self.table_class = "MemorySparseGeoTable" @@ -626,8 +636,8 @@ class GeoSparseTable(SparseTable): def _set(self, table_proto): ctx = self.ctx - if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1 or ( - ctx.is_sparse() == False): + if ctx.is_tensor_table() or len( + ctx.origin_varnames()) < 1 or (ctx.is_sparse() == False): return table_proto.table_id = ctx.table_id() table_proto.table_class = self.table_class @@ -641,13 +651,14 @@ class GeoSparseTable(SparseTable): self.common.table_name = self.context['grad_name_to_param_name'][ ctx.origin_varnames()[0]] self.common.parse_by_optimizer(ctx, self.context) - self.common.parse_entry(self.common.table_name, - ctx.program_id(), self.context) + self.common.parse_entry(self.common.table_name, ctx.program_id(), + self.context) self.common.sync = False self.common._set(table_proto.common) class DenseTable(Table): + def __init__(self, context, send_ctx): super(DenseTable, self).__init__() self.context = context @@ -656,8 +667,8 @@ class DenseTable(Table): def _set(self, table_proto): ctx = self.ctx - if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1 or ( - ctx.is_sparse() == True): + if ctx.is_tensor_table() or len( + ctx.origin_varnames()) < 1 or (ctx.is_sparse() == True): return table_proto.table_id = ctx.table_id() @@ -672,14 +683,15 @@ class DenseTable(Table): self.common.table_name = "MergedDense" self.common.parse_by_optimizer(ctx, self.context) - self.common.parse_entry(self.common.table_name, - ctx.program_id(), self.context) + self.common.parse_entry(self.common.table_name, ctx.program_id(), + self.context) self.common.sync = True if self.context['is_sync'] else False self.common._set(table_proto.common) class Server: + def __init__(self): pass @@ -688,6 +700,7 @@ class Server: class DownpourServer(Server): + def __init__(self): super(DownpourServer, self).__init__() @@ -696,6 +709,7 @@ class DownpourServer(Server): class Worker: + def __init__(self): pass @@ -704,6 +718,7 @@ class Worker: class DownpourWorker(Worker): + def __init__(self): super(DownpourWorker, self).__init__() @@ -712,6 +727,7 @@ class DownpourWorker(Worker): class fsClient: + def __init__(self, fs_client_param): self.fs_client_param = fs_client_param @@ -725,6 +741,7 @@ class fsClient: class PsDescBuilder(object): + def __init__(self, context): self.context = context self.is_sync = context['is_sync'] @@ -817,6 +834,7 @@ class PsDescBuilder(object): class TheOnePSRuntime(RuntimeBase): + def __init__(self): super(TheOnePSRuntime, self).__init__() self._communicator = None @@ -839,8 +857,8 @@ class TheOnePSRuntime(RuntimeBase): self.context[ 'is_heter_ps_mode'] = self.role_maker._is_heter_parameter_server_mode self.is_heter_ps_mode = self.context['is_heter_ps_mode'] - self.context['trainer'] = TrainerRuntimeConfig(context[ - 'valid_strategy']) + self.context['trainer'] = TrainerRuntimeConfig( + context['valid_strategy']) self.context['ps_mode'] = self.context['trainer'].mode self.context['use_ps_gpu'] = context['valid_strategy'].a_sync_configs[ 'use_ps_gpu'] @@ -913,8 +931,9 @@ class TheOnePSRuntime(RuntimeBase): main_program._fleet_opt = {} main_program._fleet_opt["use_ps_gpu"] = True gpus_env = os.getenv("FLAGS_selected_gpus") - main_program._fleet_opt[ - "worker_places"] = [int(s) for s in gpus_env.split(",")] + main_program._fleet_opt["worker_places"] = [ + int(s) for s in gpus_env.split(",") + ] def sync_strategy_envs(): kwargs = {} @@ -993,8 +1012,9 @@ class TheOnePSRuntime(RuntimeBase): # for GEO if self.role_maker._is_first_worker() and self.is_heter_ps_mode: # for ps-heter mode load all parameters on first_worker - init_params = get_the_one_recv_context( - self.context, split_dense_table=True, use_origin_program=True) + init_params = get_the_one_recv_context(self.context, + split_dense_table=True, + use_origin_program=True) else: init_params = dense_map @@ -1088,8 +1108,8 @@ class TheOnePSRuntime(RuntimeBase): for var_name in var_names: if var_name not in distributed_varnames: raise ValueError( - "fleet.init server can only load sparse variables in {}". - format(distributed_varnames)) + "fleet.init server can only load sparse variables in {}" + .format(distributed_varnames)) load_varnames = var_names if dirname is None or not load_varnames: @@ -1119,6 +1139,7 @@ class TheOnePSRuntime(RuntimeBase): @staticmethod def __exclude_vars(exclude_var_names=[]): + def is_valid(var): if var.name in exclude_var_names: return False @@ -1193,17 +1214,17 @@ class TheOnePSRuntime(RuntimeBase): saved_varnames = sparse_varnames remaining_vars = list( - filter( - TheOnePSRuntime.__exclude_vars(saved_varnames), - main_program.list_vars())) + filter(TheOnePSRuntime.__exclude_vars(saved_varnames), + main_program.list_vars())) import paddle for var in remaining_vars: # if var.name not in recv_dense_varnames: # continue tensor = var.get_value() - paddle.save( - tensor, os.path.join(dirname, var.name), use_binary_format=True) + paddle.save(tensor, + os.path.join(dirname, var.name), + use_binary_format=True) def _ps_inference_save_persistables(self, executor, @@ -1313,16 +1334,14 @@ class TheOnePSRuntime(RuntimeBase): "user_defined_strategy"].trainer_desc_configs["stat_var_names"] generate_vars = [var for var in generate_vars] remaining_vars = list( - filter( - TheOnePSRuntime.__exclude_vars(sparse_names), - infer_program.list_vars())) + filter(TheOnePSRuntime.__exclude_vars(sparse_names), + infer_program.list_vars())) for var in remaining_vars: tensor = var.get_value(scope) - paddle.save( - tensor, - os.path.join(model_path, var.name), - use_binary_format=True) + paddle.save(tensor, + os.path.join(model_path, var.name), + use_binary_format=True) def _save_inference_model(self, *args, **kwargs): self._ps_inference_save_inference_model(*args, **kwargs) @@ -1406,9 +1425,8 @@ class TheOnePSRuntime(RuntimeBase): loaded_varnames = sparse_varnames remaining_vars = list( - filter( - TheOnePSRuntime.__exclude_vars(loaded_varnames), - main_program.list_vars())) + filter(TheOnePSRuntime.__exclude_vars(loaded_varnames), + main_program.list_vars())) if dirname.startswith("afs:") or dirname.startswith("hdfs:"): model_path = "./dnn_plugin" diff --git a/python/paddle/distributed/ps/utils/ps_factory.py b/python/paddle/distributed/ps/utils/ps_factory.py index bea102c837e..d2914b0ac44 100755 --- a/python/paddle/distributed/ps/utils/ps_factory.py +++ b/python/paddle/distributed/ps/utils/ps_factory.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ __all__ = [ class PsProgramBuilderFactory(object): + def __init__(self): pass diff --git a/python/paddle/distributed/ps/utils/ps_infer_utils.py b/python/paddle/distributed/ps/utils/ps_infer_utils.py index e1663029ef1..97043fd7ba6 100755 --- a/python/paddle/distributed/ps/utils/ps_infer_utils.py +++ b/python/paddle/distributed/ps/utils/ps_infer_utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/ps/utils/ps_program_builder.py b/python/paddle/distributed/ps/utils/ps_program_builder.py index 9e063716758..2d7246d1db9 100755 --- a/python/paddle/distributed/ps/utils/ps_program_builder.py +++ b/python/paddle/distributed/ps/utils/ps_program_builder.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from paddle.distributed.passes import new_pass, PassContext class PsProgramBuilder(object): + def __init__(self, pass_ctx): self.pass_ctx = pass_ctx self.attrs = self.pass_ctx._attrs @@ -88,11 +89,12 @@ class PsProgramBuilder(object): elif self.attrs['is_server']: self._build_pserver_programs() self.loss.block.program = self.attrs['_main_server'] - fluid.framework.switch_startup_program(self.attrs[ - '_startup_server']) + fluid.framework.switch_startup_program( + self.attrs['_startup_server']) class GeoPsProgramBuilder(PsProgramBuilder): # 仅 CPU 模式 + def __init__(self, pass_ctx): super(GeoPsProgramBuilder, self).__init__(pass_ctx) if self.ps_mode != DistributedMode.GEO: @@ -117,6 +119,7 @@ class GeoPsProgramBuilder(PsProgramBuilder): # 仅 CPU 模式 class CpuSyncPsProgramBuilder(PsProgramBuilder): + def __init__(self, pass_ctx): super(CpuSyncPsProgramBuilder, self).__init__(pass_ctx) if self.ps_mode != DistributedMode.SYNC and self.ps_mode != DistributedMode.ASYNC: @@ -159,6 +162,7 @@ class CpuSyncPsProgramBuilder(PsProgramBuilder): class CpuAsyncPsProgramBuilder(CpuSyncPsProgramBuilder): + def __init__(self, pass_ctx): super(CpuAsyncPsProgramBuilder, self).__init__(pass_ctx) @@ -195,6 +199,7 @@ class CpuAsyncPsProgramBuilder(CpuSyncPsProgramBuilder): class GpuPsProgramBuilder(PsProgramBuilder): + def __init__(self, pass_ctx): super(GpuPsProgramBuilder, self).__init__(pass_ctx) @@ -227,6 +232,7 @@ class GpuPsProgramBuilder(PsProgramBuilder): class HeterAsyncPsProgramBuilder(PsProgramBuilder): + def __init__(self, pass_ctx): super(HeterAsyncPsProgramBuilder, self).__init__(pass_ctx) @@ -278,17 +284,19 @@ class HeterAsyncPsProgramBuilder(PsProgramBuilder): self._build_trainer_programs() ps_set_heter_pipeline_opt_pass = new_pass( "set_heter_pipeline_opt_pass", self.attrs) - ps_set_heter_pipeline_opt_pass.apply( - [self.cloned_main], [self.cloned_startup], self.pass_ctx) + ps_set_heter_pipeline_opt_pass.apply([self.cloned_main], + [self.cloned_startup], + self.pass_ctx) elif self.attrs['is_server']: self._build_pserver_programs() self.loss.block.program = self.attrs['_main_server'] - fluid.framework.switch_startup_program(self.attrs[ - '_startup_server']) + fluid.framework.switch_startup_program( + self.attrs['_startup_server']) class FlPsProgramBuilder(HeterAsyncPsProgramBuilder): + def __init__(self, pass_ctx): super(FlPsProgramBuilder, self).__init__(pass_ctx) @@ -352,12 +360,14 @@ class FlPsProgramBuilder(HeterAsyncPsProgramBuilder): if not self.is_heter_worker: _main_file = ps_log_root_dir + 'final_fl_A_main_program.prototxt' - debug_program(_main_file, self.attrs['origin_main_program'] - ._heter_pipeline_opt['section_program']) + debug_program( + _main_file, self.attrs['origin_main_program']. + _heter_pipeline_opt['section_program']) else: _main_file = ps_log_root_dir + 'final_fl_B_main_program.prototxt' - debug_program(_main_file, self.attrs['origin_main_program'] - ._heter_pipeline_opt['section_program']) + debug_program( + _main_file, self.attrs['origin_main_program']. + _heter_pipeline_opt['section_program']) return @@ -373,6 +383,6 @@ class FlPsProgramBuilder(HeterAsyncPsProgramBuilder): fluid.default_startup_program()._heter_pipeline_opt)) else: self._build_pserver_programs() - fluid.framework.switch_startup_program(self.attrs[ - '_startup_server']) + fluid.framework.switch_startup_program( + self.attrs['_startup_server']) fluid.framework.switch_main_program(self.attrs['_main_server']) diff --git a/python/paddle/distributed/ps/utils/public.py b/python/paddle/distributed/ps/utils/public.py index 223c4e69d9d..a57b30a8c19 100755 --- a/python/paddle/distributed/ps/utils/public.py +++ b/python/paddle/distributed/ps/utils/public.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -61,8 +61,10 @@ DATA_NORM_GRAD_NAME = [x + "@GRAD" for x in DATA_NORM_NAME] def logger_config(log_path, logging_name): logger = logging.getLogger(logging_name) logger.setLevel(level=logging.WARNING) - handler = logging.FileHandler( - log_path, mode='a', encoding='UTF-8', delay=True) + handler = logging.FileHandler(log_path, + mode='a', + encoding='UTF-8', + delay=True) handler.setLevel(logging.INFO) formatter = logging.Formatter( '%(levelname)s - %(asctime)s - %(pathname)s: %(lineno)s - %(message)s') @@ -75,8 +77,8 @@ def logger_config(log_path, logging_name): ps_log_root_dir = './ps_log/' -logger = logger_config( - log_path='./ps_usr_print_log', logging_name='ps_usr_print_log') +logger = logger_config(log_path='./ps_usr_print_log', + logging_name='ps_usr_print_log') class DistributedMode: @@ -88,6 +90,7 @@ class DistributedMode: class TrainerRuntimeConfig(object): + def __init__(self, valid_strategy): self.mode = None num_threads = os.getenv("CPU_NUM", "1") @@ -151,21 +154,22 @@ class TrainerRuntimeConfig(object): send_queue_size = self.runtime_configs[ 'communicator_send_queue_size'] if max_merge_var_num != num_threads: - print('WARNING: In {} mode, communicator_max_merge_var_num ' - 'must be equal to CPU_NUM. But received, ' - 'communicator_max_merge_var_num = {}, CPU_NUM = ' - '{}. communicator_max_merge_var_num will be forced to {}.' - .format(mode_str, max_merge_var_num, num_threads, - num_threads)) + print( + 'WARNING: In {} mode, communicator_max_merge_var_num ' + 'must be equal to CPU_NUM. But received, ' + 'communicator_max_merge_var_num = {}, CPU_NUM = ' + '{}. communicator_max_merge_var_num will be forced to {}.'. + format(mode_str, max_merge_var_num, num_threads, + num_threads)) self.runtime_configs[ 'communicator_max_merge_var_num'] = num_threads if send_queue_size != num_threads: print('WARNING: In {} mode, communicator_send_queue_size ' 'must be equal to CPU_NUM. But received, ' 'communicator_send_queue_size = {}, CPU_NUM = ' - '{}. communicator_send_queue_size will be forced to {}.' - .format(mode_str, send_queue_size, num_threads, - num_threads)) + '{}. communicator_send_queue_size will be forced to {}.'. + format(mode_str, send_queue_size, num_threads, + num_threads)) self.runtime_configs[ 'communicator_send_queue_size'] = num_threads @@ -405,10 +409,9 @@ def get_geo_trainer_send_context(context): var = program.global_block().vars[grad.merged_var.name] var_numel = reduce(lambda x, y: x * y, var.shape[1:]) from paddle.fluid.core import CommContext - sparse_ctx = CommContext(grad_name, [grad_name], - ["127.0.0.1:6071"], [var_numel], - [grad_name], trainer_id, True, True, - is_distributed, idx, False, False, + sparse_ctx = CommContext(grad_name, [grad_name], ["127.0.0.1:6071"], + [var_numel], [grad_name], trainer_id, True, + True, is_distributed, idx, False, False, id(program)) idx += 1 send_ctx[sparse_ctx.var_name()] = sparse_ctx @@ -550,10 +553,8 @@ def find_heter_ops(program, default_device="cpu"): op_list = list(block.ops) sum_op = op_list[var2idx[param_name]] sum_op_inputs = { - sum_op.input_names[0]: [ - block.vars[input] - for input in sum_op.input_arg_names - ] + sum_op.input_names[0]: + [block.vars[input] for input in sum_op.input_arg_names] } sum_op_outputs = { sum_op.output_names[0]: [ @@ -561,12 +562,11 @@ def find_heter_ops(program, default_device="cpu"): for output in sum_op.output_arg_names ] } - block._insert_op( - index=i + 1, - type=sum_op.type, - inputs=sum_op_inputs, - outputs=sum_op_outputs, - attrs=sum_op.all_attrs()) + block._insert_op(index=i + 1, + type=sum_op.type, + inputs=sum_op_inputs, + outputs=sum_op_outputs, + attrs=sum_op.all_attrs()) block._remove_op(var2idx[param_name] + 1) var2idx.pop(param_name) for var_ in var2idx: @@ -604,12 +604,11 @@ def find_heter_ops(program, default_device="cpu"): for output in sum_op.output_arg_names ] } - block._insert_op( - index=i + 1, - type=sum_op.type, - inputs=sum_op_inputs, - outputs=sum_op_outputs, - attrs=sum_op.all_attrs()) + block._insert_op(index=i + 1, + type=sum_op.type, + inputs=sum_op_inputs, + outputs=sum_op_outputs, + attrs=sum_op.all_attrs()) block._remove_op(var2idx[no_grad_var] + 1) var2idx.pop(no_grad_var) for var_ in var2idx: @@ -624,8 +623,8 @@ def find_heter_ops(program, default_device="cpu"): forward_op_type = pre_op.type.split("_grad")[0] if forward_op_type in SPARSE_OP_TYPE_DICT.keys() \ and pre_op.attr('remote_prefetch') is True: - param_name = pre_op.input(SPARSE_OP_TYPE_DICT[ - forward_op_type])[0] + param_name = pre_op.input( + SPARSE_OP_TYPE_DICT[forward_op_type])[0] if param_name == origin_var and op.attr( "op_device") == pre_op.attr("op_device"): continue @@ -725,7 +724,8 @@ def find_heter_ops(program, default_device="cpu"): if len(heter_ops) == 0: warnings.warn( "No heterogeneous OP was found in your program , " - " please using fluid.device_guard() to run OPs on different device.") + " please using fluid.device_guard() to run OPs on different device." + ) total_heter_ops = 0 heter_blocks = 0 @@ -735,8 +735,8 @@ def find_heter_ops(program, default_device="cpu"): for _, heter_block in heter_block_dict.items(): total_heter_ops += len(heter_block) print( - "There are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks.". - format(len(block.ops), total_heter_ops, heter_blocks)) + "There are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks." + .format(len(block.ops), total_heter_ops, heter_blocks)) return origin_porgram, heter_ops, default_ops, program_block_ops @@ -756,9 +756,8 @@ def union_forward_gradient_op(program_block_ops_list): assert block_length % 2 != 0, "the length of program_block_ops_list should be odd" for i in range(0, block_length // 2): block_op_list = {"forward": program_block_ops_list[i]} - block_op_list.update({ - "backward": program_block_ops_list[block_length - 1 - i] - }) + block_op_list.update( + {"backward": program_block_ops_list[block_length - 1 - i]}) union_program_block_ops_list.append(block_op_list) block_op_list = {"forward": [], "backward": []} @@ -776,8 +775,9 @@ def find_block_joints(program, program_block_ops_list, heter_ops): program_block_ops_list) block_var_detail = entrance_exit_check(program, program_block_ops_list, block_var_detail, heter_ops) - block_var_detail = delete_block_useless_exit( - program, program_block_ops_list, block_var_detail) + block_var_detail = delete_block_useless_exit(program, + program_block_ops_list, + block_var_detail) return block_var_detail @@ -822,8 +822,8 @@ def find_entrance_exit_private(program, program_block_ops_list): bp_block_input, bp_block_output = find_ops_list_input_output( program, block_op_list["backward"]) bp_persistables = screen_persistables( - program, bp_block_input) + screen_persistables(program, - bp_block_output) + program, bp_block_input) + screen_persistables( + program, bp_block_output) # find entrance & exit bp_block_private_vars = list(set(bp_block_input) & set(bp_block_output)) bp_block_entrance = list( @@ -871,10 +871,10 @@ def entrance_exit_check(program, program_block_ops_list, block_var_detail, #need_add_vars = find_need_var_from_previous_block( # need_add_vars, block_var_detail, index, heter_ops) - previous_block_private = block_var_detail[index - 1]["forward"][ - "private"] - previous_block_entrance = block_var_detail[index - 1]["forward"][ - "entrance"] + previous_block_private = block_var_detail[index - + 1]["forward"]["private"] + previous_block_entrance = block_var_detail[index - + 1]["forward"]["entrance"] for var in need_add_vars: if var not in previous_block_private and var not in previous_block_entrance: previous_block_entrance.append(var) @@ -900,10 +900,10 @@ def entrance_exit_check(program, program_block_ops_list, block_var_detail, need_ignore_vars.append(var) need_add_vars = list( set(need_add_vars).difference(set(need_ignore_vars))) - previous_block_private = block_var_detail[index + 1]["backward"][ - "private"] - previous_block_entrance = block_var_detail[index + 1]["backward"][ - "entrance"] + previous_block_private = block_var_detail[index + + 1]["backward"]["private"] + previous_block_entrance = block_var_detail[index + + 1]["backward"]["entrance"] for var in need_add_vars: if var not in previous_block_private and var not in previous_block_entrance: previous_block_entrance.append(var) @@ -931,8 +931,8 @@ def delete_block_useless_exit(program, program_block_ops_list, if index - 1 < 0: break current_block_exit = block_var_detail[index]["backward"]["exit"] - next_block_entrance = block_var_detail[index - 1]["backward"][ - "entrance"] + next_block_entrance = block_var_detail[index - + 1]["backward"]["entrance"] need_delete_var = [] for var in current_block_exit: if var not in next_block_entrance: @@ -982,8 +982,8 @@ def add_vars_by_var_list(var_name_list, origin_program, program, block): ).vars and var_name not in block.vars: var = origin_program.global_block().vars[var_name] if var.persistable: - program.global_block()._clone_variable( - var, force_persistable=False) + program.global_block()._clone_variable(var, + force_persistable=False) else: block._clone_variable(var, force_persistable=False) @@ -1085,8 +1085,10 @@ def block_append_op(program, origin_program, block, op): if "_grad" not in op.type: # for forward op - return block.append_op( - type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs()) + return block.append_op(type=op.type, + inputs=inputs, + outputs=outputs, + attrs=op.all_attrs()) else: # for grad op op_desc = op.desc @@ -1137,22 +1139,24 @@ def insert_communicate_op(orign_program, comm_info = get_communicate_var_info(orign_program, stage_id - 1, entrance_var, "backward") - heter_block._insert_op( - index=first_op_index, - type="send_and_recv", - inputs={"X": heter_block.vars[entrance_var[0]]}, - outputs={"Out": []}, - attrs={ - "mode": "forward" if is_forward else "backward", - "send_var_name": entrance_var + ["microbatch_id"], - "recv_var_name": [], - "message_name": comm_info["block_input_var_name"], - "next_endpoints": next_heter_worker_endpoints, - "previous_endpoints": previous_heter_worker_endpoints, - "trainer_id": get_role_id(role_maker), - "op_device": device, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + heter_block._insert_op(index=first_op_index, + type="send_and_recv", + inputs={"X": heter_block.vars[entrance_var[0]]}, + outputs={"Out": []}, + attrs={ + "mode": "forward" if is_forward else "backward", + "send_var_name": + entrance_var + ["microbatch_id"], + "recv_var_name": [], + "message_name": + comm_info["block_input_var_name"], + "next_endpoints": next_heter_worker_endpoints, + "previous_endpoints": + previous_heter_worker_endpoints, + "trainer_id": get_role_id(role_maker), + "op_device": device, + RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE + }) return entrance_var @@ -1182,11 +1186,10 @@ def get_the_one_recv_context(context, param_names.append(param_name) recv_id_maps[ctx.table_id()] = param_names else: - send_ctx = get_the_one_send_context( - context, - split_dense_table=False, - use_origin_program=False, - ep_list=None) + send_ctx = get_the_one_send_context(context, + split_dense_table=False, + use_origin_program=False, + ep_list=None) for idx, (name, ctx) in enumerate(send_ctx.items()): if not ctx.is_sparse(): continue @@ -1239,6 +1242,7 @@ def get_var_mem_size(var): class MergedVariable: + def __init__(self, merged, ordered, offsets): self.merged_var = merged self.ordered_vars = ordered @@ -1342,6 +1346,7 @@ def _is_opt_role_op(op): def get_param_grads(origin_program): + def _get_params_grads(sparse_varnames): block = origin_program.global_block() @@ -1418,6 +1423,7 @@ def find_op_input_output(program, block, op): def add_send_op(program, block, _vars): + def _get_send_op_dict(): send_op_dict = {} send_op_list = find_send_op(program) @@ -1457,16 +1463,16 @@ def add_send_op(program, block, _vars): block.vars[union_var] for union_var in table_dict[table_id]['var_list'] ] - block.append_op( - type="send", - inputs={"X": send_input_vars}, - outputs={"Out": dummy_output}, - attrs={ - "send_varnames": table_dict[table_id]['send_varnames'], - "is_sparse": is_sparse, - "table_id": table_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + block.append_op(type="send", + inputs={"X": send_input_vars}, + outputs={"Out": dummy_output}, + attrs={ + "send_varnames": + table_dict[table_id]['send_varnames'], + "is_sparse": is_sparse, + "table_id": table_id, + RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE + }) return send_grad_var_list @@ -1527,8 +1533,8 @@ def is_backward_op(op): def is_forward_op(op): - return op_role_attr_name in op.attr_names and ( - int(op.attr(op_role_attr_name)) == int(op_role.Forward)) + return op_role_attr_name in op.attr_names and (int( + op.attr(op_role_attr_name)) == int(op_role.Forward)) def is_push_sparse_op(op): diff --git a/python/paddle/distributed/sharding/__init__.py b/python/paddle/distributed/sharding/__init__.py index d14e3dd099f..e938c12d5af 100644 --- a/python/paddle/distributed/sharding/__init__.py +++ b/python/paddle/distributed/sharding/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/distributed/sharding/group_sharded.py b/python/paddle/distributed/sharding/group_sharded.py index 4c22028b230..ad270c1a517 100644 --- a/python/paddle/distributed/sharding/group_sharded.py +++ b/python/paddle/distributed/sharding/group_sharded.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -105,8 +105,8 @@ def group_sharded_parallel(model, assert isinstance( optimizer, Optimizer ), "The optimizer must be the instance of paddle.optimizer.Optimizer." - assert level in ['os', 'os_g', 'p_g_os' - ], "The level must be os, os_g or p_g_os." + assert level in ['os', 'os_g', + 'p_g_os'], "The level must be os, os_g or p_g_os." def check_dtype(param): return param.dtype == paddle.float16 @@ -125,43 +125,38 @@ def group_sharded_parallel(model, optim=optimizer, group=group, offload=offload) - model = GroupShardedStage2( - model, - optimizer, - group=group, - sync_buffers=sync_buffers, - buffer_max_size=buffer_max_size) + model = GroupShardedStage2(model, + optimizer, + group=group, + sync_buffers=sync_buffers, + buffer_max_size=buffer_max_size) else: - optimizer = ShardingOptimizerStage2( - params=model.parameters(), - optim=optimizer, - group=group, - offload=offload) - model = ShardingStage2( - model, - optimizer, - group=group, - sync_buffers=sync_buffers, - buffer_max_size=buffer_max_size) + optimizer = ShardingOptimizerStage2(params=model.parameters(), + optim=optimizer, + group=group, + offload=offload) + model = ShardingStage2(model, + optimizer, + group=group, + sync_buffers=sync_buffers, + buffer_max_size=buffer_max_size) elif level == 'p_g_os': if in_dygraph_mode(): - model = GroupShardedStage3( - model, - optimizer=optimizer, - group=group, - sync_buffers=sync_buffers, - segment_size=segment_size, - offload=offload, - sync_comm=sync_comm) + model = GroupShardedStage3(model, + optimizer=optimizer, + group=group, + sync_buffers=sync_buffers, + segment_size=segment_size, + offload=offload, + sync_comm=sync_comm) else: - model = ShardingStage3( - model, - optimizer=optimizer, - group=group, - sync_buffers=sync_buffers, - segment_size=segment_size, - offload=offload, - sync_comm=sync_comm) + model = ShardingStage3(model, + optimizer=optimizer, + group=group, + sync_buffers=sync_buffers, + segment_size=segment_size, + offload=offload, + sync_comm=sync_comm) else: raise ValueError("Please enter the correct level.") if params_fp16 and isinstance(scaler, paddle.amp.GradScaler): @@ -238,7 +233,8 @@ def save_group_sharded_model(model, output, optimizer=None): paddle.save(model._layer.state_dict(), output_model) else: raise ValueError( - "Please use the layer which is wrapped with group_sharded_parallel.") + "Please use the layer which is wrapped with group_sharded_parallel." + ) if optimizer is not None: assert hasattr( diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 66545a8a249..c0ff2bc273d 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -38,6 +38,7 @@ __all__ = [] class ParallelEnvArgs(object): + def __init__(self): # Paddle cluster nodes ips, such as 192.168.0.16,192.168.0.17.. self.cluster_node_ips = None @@ -55,9 +56,9 @@ class ParallelEnvArgs(object): # Print the config or not self.print_config = True - # It's for gpu training and the training process will run - # on the selected_devices, each process is bound to a single GPU. - # And if it's not set, this module will use all the gpu cards + # It's for gpu training and the training process will run + # on the selected_devices, each process is bound to a single GPU. + # And if it's not set, this module will use all the gpu cards # for training. self.selected_devices = None @@ -105,8 +106,8 @@ def _get_default_nprocs(): return multiprocessing.cpu_count() else: raise RuntimeError( - "`paddle.distributed.spawn` does not support parallel training on device `{}` now.". - format(device)) + "`paddle.distributed.spawn` does not support parallel training on device `{}` now." + .format(device)) def _get_default_backend(): @@ -121,8 +122,8 @@ def _get_default_backend(): return 'gloo' else: raise RuntimeError( - "`paddle.distributed.spawn` does not support parallel training on device `{}` now.". - format(device)) + "`paddle.distributed.spawn` does not support parallel training on device `{}` now." + .format(device)) def _get_node_ip(ips): @@ -136,9 +137,9 @@ def _get_node_ip(ips): def _get_subprocess_env_list(nprocs, options): - # NOTE (xiongkun03) Why put backend deduction here ? - # Becase _get_subprocess_env_list is used by many testcases. - # So for campability, we put backend deduction here + # NOTE (xiongkun03) Why put backend deduction here ? + # Becase _get_subprocess_env_list is used by many testcases. + # So for campability, we put backend deduction here # logic for handle backend option if 'backend' not in options or options['backend'] == 'auto': @@ -329,8 +330,8 @@ def _remove_risky_env(): def _set_trainer_env(env_dict, backend): # NOTE(chenweihang): [ Why need set FLAGS_selected_gpus or FLAGS_selected_xpus here? ] - # When the child process starts, it will inherit the configuration of the - # main process and set the FLAGS once, but the environment variable has + # When the child process starts, it will inherit the configuration of the + # main process and set the FLAGS once, but the environment variable has # not been set at this time, which leads to the FLAGS_selected_gpus or FLAGS_selected_xpus # is keep same with mainprocess(usually empty), so manually update the flags here @@ -344,8 +345,8 @@ def _set_trainer_env(env_dict, backend): elif backend == 'cncl': set_flags({'FLAGS_selected_mlus': env_dict['FLAGS_selected_mlus']}) else: - #NOTE(xiongkun) why not raise Error ? - # So far, we added support for CPU parallel, and will be applied when paddle is not + #NOTE(xiongkun) why not raise Error ? + # So far, we added support for CPU parallel, and will be applied when paddle is not # compiled with cuda or xp. just do nothing. pass @@ -371,13 +372,14 @@ def _func_wrapper(func, args, error_queue, return_queue, env_dict, backend): class MultiprocessContext(object): + def __init__(self, processes, error_queues, return_queues): _py_supported_check() self.error_queues = error_queues - # NOTE(chenweihang): The `spawn` method is mainly used - # to wrap the outermost execution function of the program for - # parallel execution. Generally, the return value is not concerned, - # but if the user needs to obtain the return value, users can get + # NOTE(chenweihang): The `spawn` method is mainly used + # to wrap the outermost execution function of the program for + # parallel execution. Generally, the return value is not concerned, + # but if the user needs to obtain the return value, users can get # the return result of each process from context.return_queues self.return_queues = return_queues self.processes = processes @@ -390,8 +392,8 @@ class MultiprocessContext(object): if len(self.sentinels) == 0: return True - ready = multiprocessing.connection.wait( - self.sentinels.keys(), timeout=timeout) + ready = multiprocessing.connection.wait(self.sentinels.keys(), + timeout=timeout) error_index = None for sentinel in ready: @@ -554,12 +556,12 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): """ # NOTE(chenweihang): [ why only supports python3.4+ ? ] # Python supported setting the child process startup method - # since 3.4. The previous version can only use the default startup - # method, while the default startup method of Unix is fork, which + # since 3.4. The previous version can only use the default startup + # method, while the default startup method of Unix is fork, which # cannot support CUDA runtime multi-process _py_supported_check() - # Give an error hint when the users enter a configuration option + # Give an error hint when the users enter a configuration option # that does not exist _options_valid_check(options) @@ -568,15 +570,15 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): nprocs = _get_default_nprocs() # NOTE(chenweihang): [ why need get cluster info before run? ] - # when using `paddle.distributed.spawn` start parallel training, - # we should get cluster info before starting subprocess, and pass + # when using `paddle.distributed.spawn` start parallel training, + # we should get cluster info before starting subprocess, and pass # correct info to each subprocess procs_env_list = _get_subprocess_env_list(nprocs, options) # start processes # NOTE(chenweihang): [ why default start method is spawn? ] - # The CUDA runtime does not support the fork start method, - # either the spawn or forkserver start method are required + # The CUDA runtime does not support the fork start method, + # either the spawn or forkserver start method are required # to use CUDA in subprocesses. start_method = options.get('start_method', None) if start_method is None: @@ -589,10 +591,9 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): for i in range(nprocs): error_queue = mp.SimpleQueue() return_queue = mp.SimpleQueue() - process = mp.Process( - target=_func_wrapper, - args=(func, args, error_queue, return_queue, procs_env_list[i], - options['backend'])) + process = mp.Process(target=_func_wrapper, + args=(func, args, error_queue, return_queue, + procs_env_list[i], options['backend'])) process.daemon = daemon process.start() error_queues.append(error_queue) diff --git a/python/paddle/distributed/utils.py b/python/paddle/distributed/utils.py index 30cd63ed80e..ec8ef80d5da 100644 --- a/python/paddle/distributed/utils.py +++ b/python/paddle/distributed/utils.py @@ -33,24 +33,24 @@ from paddle.fluid.framework import _non_static_mode from paddle.fluid.data_feeder import check_variable_and_dtype from paddle import _C_ops -__all__ = [ #noqa - 'get_host_name_ip', - 'Trainer', - 'get_cluster', - 'start_local_trainers', - 'watch_local_trainers', - 'find_free_ports', - 'JobServer', - 'Cluster', - 'Pod', - 'Hdfs', - 'add_arguments', - 'terminate_local_procs', - 'TrainerProc', - 'get_logger', - 'pull_worker_log', - 'global_scatter', - 'global_gather', +__all__ = [ #noqa + 'get_host_name_ip', + 'Trainer', + 'get_cluster', + 'start_local_trainers', + 'watch_local_trainers', + 'find_free_ports', + 'JobServer', + 'Cluster', + 'Pod', + 'Hdfs', + 'add_arguments', + 'terminate_local_procs', + 'TrainerProc', + 'get_logger', + 'pull_worker_log', + 'global_scatter', + 'global_gather', ] @@ -163,16 +163,17 @@ def global_scatter(x, helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=op_type, - inputs={ - 'X': [x], - 'local_count': [local_count], - 'global_count': [global_count], - }, - outputs={'Out': [out]}, - attrs={'ring_id': ring_id, - 'use_calc_stream': use_calc_stream}) + helper.append_op(type=op_type, + inputs={ + 'X': [x], + 'local_count': [local_count], + 'global_count': [global_count], + }, + outputs={'Out': [out]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream + }) return out @@ -276,18 +277,17 @@ def global_gather(x, helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=op_type, - inputs={ - 'X': [x], - 'local_count': [local_count], - 'global_count': [global_count] - }, - outputs={'Out': [out]}, - attrs={ - 'ring_id': group, - 'use_calc_stream': use_calc_stream, - }) + helper.append_op(type=op_type, + inputs={ + 'X': [x], + 'local_count': [local_count], + 'global_count': [global_count] + }, + outputs={'Out': [out]}, + attrs={ + 'ring_id': group, + 'use_calc_stream': use_calc_stream, + }) return out @@ -362,6 +362,7 @@ def _print_arguments(args): class Hdfs(object): + def __init__(self): self.hdfs_ugi = None self.hdfs_name = None @@ -386,6 +387,7 @@ class Hdfs(object): class Cluster(object): + def __init__(self, hdfs): self.job_server = None self.pods = [] @@ -448,6 +450,7 @@ class Cluster(object): class JobServer(object): + def __init__(self): self.endpoint = None @@ -462,6 +465,7 @@ class JobServer(object): class Trainer(object): + def __init__(self): self.gpus = [] self.endpoint = None @@ -493,6 +497,7 @@ class Trainer(object): class Pod(object): + def __init__(self): self.rank = None self.id = None @@ -631,15 +636,15 @@ def add_arguments(argname, type, default, help, argparser, **kwargs): args = parser.parse_args() """ type = strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) + argparser.add_argument("--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) def find_free_ports(num): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('', 0)) @@ -712,6 +717,7 @@ def _prepare_trainer_env(cluster, trainer, backend=None): class TrainerProc(object): + def __init__(self): self.proc = None self.log_fn = None @@ -808,14 +814,14 @@ def watch_local_trainers(procs, nranks): raise except SystemExit: logger.error( - "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log.". - format(nranks, error_rank)) + "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log." + .format(nranks, error_rank)) terminate_local_procs(procs) raise except: logger.error( - "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log.". - format(nranks, error_rank)) + "ABORT!!! Out of all {} trainers, the trainer process with rank={} was aborted. Please check its log." + .format(nranks, error_rank)) terminate_local_procs(procs) raise diff --git a/python/paddle/distribution/__init__.py b/python/paddle/distribution/__init__.py index 3a9af812add..64d59b04864 100644 --- a/python/paddle/distribution/__init__.py +++ b/python/paddle/distribution/__init__.py @@ -28,18 +28,9 @@ from paddle.distribution.transformed_distribution import \ from paddle.distribution.uniform import Uniform __all__ = [ # noqa - 'Beta', - 'Categorical', - 'Dirichlet', - 'Distribution', - 'ExponentialFamily', - 'Multinomial', - 'Normal', - 'Uniform', - 'kl_divergence', - 'register_kl', - 'Independent', - 'TransformedDistribution' + 'Beta', 'Categorical', 'Dirichlet', 'Distribution', 'ExponentialFamily', + 'Multinomial', 'Normal', 'Uniform', 'kl_divergence', 'register_kl', + 'Independent', 'TransformedDistribution' ] __all__.extend(transform.__all__) diff --git a/python/paddle/distribution/categorical.py b/python/paddle/distribution/categorical.py index 97a3df490b1..fffcd94ad68 100644 --- a/python/paddle/distribution/categorical.py +++ b/python/paddle/distribution/categorical.py @@ -162,8 +162,8 @@ class Categorical(distribution.Distribution): sample_shape = shape logits = self.logits - sample_index = multinomial( - self._logits_to_probs(logits), num_samples, True) + sample_index = multinomial(self._logits_to_probs(logits), num_samples, + True) # multinomial sample shape is (logits.shape[:-1], num_samples), need to # tanspose to (num_samples, logits.shape[:-1]) @@ -220,11 +220,12 @@ class Categorical(distribution.Distribution): z = paddle.sum(e_logits, axis=-1, keepdim=True) other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True) prob = e_logits / z - kl = paddle.sum(prob * ( - logits - paddle.log(z) - other_logits + paddle.log(other_z)), - axis=-1, - keepdim=True, - name=name) + kl = paddle.sum( + prob * + (logits - paddle.log(z) - other_logits + paddle.log(other_z)), + axis=-1, + keepdim=True, + name=name) return kl @@ -300,17 +301,16 @@ class Categorical(distribution.Distribution): """ name = self.name + '_probs' if len(self._prob.shape) == 1: # batch_shape is empty - return paddle.gather( - self._prob, value.reshape( - [-1], name=name), name=name).reshape( - value.shape, name=name) + return paddle.gather(self._prob, + value.reshape([-1], name=name), + name=name).reshape(value.shape, name=name) else: if len(value.shape) == 1: return paddle.take_along_axis( self._prob, - paddle.reshape( - value, (len(self._prob.shape) - 1) * [1] + [-1], - name=name), + paddle.reshape(value, + (len(self._prob.shape) - 1) * [1] + [-1], + name=name), axis=-1) else: return paddle.take_along_axis(self._prob, value, axis=-1) diff --git a/python/paddle/distribution/constraint.py b/python/paddle/distribution/constraint.py index d094a7607da..4cde3d30a56 100644 --- a/python/paddle/distribution/constraint.py +++ b/python/paddle/distribution/constraint.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,11 +23,13 @@ class Constraint(object): class Real(Constraint): + def __call__(self, value): return value == value class Range(Constraint): + def __init__(self, lower, upper): self._lower = lower self._upper = upper @@ -38,14 +40,16 @@ class Range(Constraint): class Positive(Constraint): + def __call__(self, value): return value >= 0. class Simplex(Constraint): + def __call__(self, value): - return paddle.all(value >= 0, axis=-1) and ( - (value.sum(-1) - 1).abs() < 1e-6) + return paddle.all(value >= 0, + axis=-1) and ((value.sum(-1) - 1).abs() < 1e-6) real = Real() diff --git a/python/paddle/distribution/dirichlet.py b/python/paddle/distribution/dirichlet.py index 740f850b7c1..63466bda7c0 100644 --- a/python/paddle/distribution/dirichlet.py +++ b/python/paddle/distribution/dirichlet.py @@ -125,8 +125,8 @@ class Dirichlet(exponential_family.ExponentialFamily): Args: value (Tensor): Value to be evaluated. """ - return ((paddle.log(value) * (self.concentration - 1.0) - ).sum(-1) + paddle.lgamma(self.concentration.sum(-1)) - + return ((paddle.log(value) * (self.concentration - 1.0)).sum(-1) + + paddle.lgamma(self.concentration.sum(-1)) - paddle.lgamma(self.concentration).sum(-1)) def entropy(self): @@ -139,9 +139,9 @@ class Dirichlet(exponential_family.ExponentialFamily): k = self.concentration.shape[-1] return (paddle.lgamma(self.concentration).sum(-1) - paddle.lgamma(concentration0) - - (k - concentration0) * paddle.digamma(concentration0) - ( - (self.concentration - 1.0 - ) * paddle.digamma(self.concentration)).sum(-1)) + (k - concentration0) * paddle.digamma(concentration0) - + ((self.concentration - 1.0) * + paddle.digamma(self.concentration)).sum(-1)) @property def _natural_parameters(self): @@ -164,9 +164,8 @@ def _dirichlet(concentration, name=None): helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference( dtype=concentration.dtype) - helper.append_op( - type=op_type, - inputs={"Alpha": concentration}, - outputs={'Out': out}, - attrs={}) + helper.append_op(type=op_type, + inputs={"Alpha": concentration}, + outputs={'Out': out}, + attrs={}) return out diff --git a/python/paddle/distribution/distribution.py b/python/paddle/distribution/distribution.py index 1c8edfa138d..901f5e88e0c 100644 --- a/python/paddle/distribution/distribution.py +++ b/python/paddle/distribution/distribution.py @@ -177,8 +177,8 @@ class Distribution(object): arg = [arg] if not isinstance(arg, (list, tuple, np.ndarray, tensor.Variable)): raise TypeError( - "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}". - format(type(arg))) + "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}" + .format(type(arg))) arg_np = np.array(arg) arg_dtype = arg_np.dtype diff --git a/python/paddle/distribution/exponential_family.py b/python/paddle/distribution/exponential_family.py index e0236f9e6e2..b78e7749704 100644 --- a/python/paddle/distribution/exponential_family.py +++ b/python/paddle/distribution/exponential_family.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -62,8 +62,9 @@ class ExponentialFamily(distribution.Distribution): log_norm = self._log_normalizer(*natural_parameters) if _non_static_mode(): - grads = paddle.grad( - log_norm.sum(), natural_parameters, create_graph=True) + grads = paddle.grad(log_norm.sum(), + natural_parameters, + create_graph=True) else: grads = paddle.static.gradients(log_norm.sum(), natural_parameters) diff --git a/python/paddle/distribution/independent.py b/python/paddle/distribution/independent.py index 3534a31591b..884c34b4b6a 100644 --- a/python/paddle/distribution/independent.py +++ b/python/paddle/distribution/independent.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -51,7 +51,8 @@ class Independent(distribution.Distribution): def __init__(self, base, reinterpreted_batch_rank): if not isinstance(base, distribution.Distribution): raise TypeError( - f"Expected type of 'base' is Distribution, but got {type(base)}") + f"Expected type of 'base' is Distribution, but got {type(base)}" + ) if not (0 < reinterpreted_batch_rank <= len(base.batch_shape)): raise ValueError( f"Expected 0 < reinterpreted_batch_rank <= {len(base.batch_shape)}, but got {reinterpreted_batch_rank}" @@ -60,11 +61,11 @@ class Independent(distribution.Distribution): self._reinterpreted_batch_rank = reinterpreted_batch_rank shape = base.batch_shape + base.event_shape - super(Independent, self).__init__( - batch_shape=shape[:len(base.batch_shape) - - reinterpreted_batch_rank], - event_shape=shape[len(base.batch_shape) - - reinterpreted_batch_rank:]) + super(Independent, + self).__init__(batch_shape=shape[:len(base.batch_shape) - + reinterpreted_batch_rank], + event_shape=shape[len(base.batch_shape) - + reinterpreted_batch_rank:]) @property def mean(self): @@ -78,8 +79,8 @@ class Independent(distribution.Distribution): return self._base.sample(shape) def log_prob(self, value): - return self._sum_rightmost( - self._base.log_prob(value), self._reinterpreted_batch_rank) + return self._sum_rightmost(self._base.log_prob(value), + self._reinterpreted_batch_rank) def prob(self, value): return self.log_prob(value).exp() diff --git a/python/paddle/distribution/kl.py b/python/paddle/distribution/kl.py index 6310214117e..c5ad3f04358 100644 --- a/python/paddle/distribution/kl.py +++ b/python/paddle/distribution/kl.py @@ -83,8 +83,8 @@ def register_kl(cls_p, cls_q): def kl_beta_beta(): pass # insert implementation here """ - if (not issubclass(cls_p, Distribution) or - not issubclass(cls_q, Distribution)): + if (not issubclass(cls_p, Distribution) + or not issubclass(cls_q, Distribution)): raise TypeError('cls_p and cls_q must be subclass of Distribution') def decorator(f): @@ -117,6 +117,7 @@ def _dispatch(cls_p, cls_q): @functools.total_ordering class _Compare(object): + def __init__(self, *classes): self.classes = classes @@ -136,20 +137,20 @@ class _Compare(object): def _kl_beta_beta(p, q): return ((q.alpha.lgamma() + q.beta.lgamma() + (p.alpha + p.beta).lgamma()) - (p.alpha.lgamma() + p.beta.lgamma() + (q.alpha + q.beta).lgamma()) + - ((p.alpha - q.alpha) * p.alpha.digamma()) + ( - (p.beta - q.beta) * p.beta.digamma()) + ( - ((q.alpha + q.beta) - - (p.alpha + p.beta)) * (p.alpha + p.beta).digamma())) + ((p.alpha - q.alpha) * p.alpha.digamma()) + + ((p.beta - q.beta) * p.beta.digamma()) + + (((q.alpha + q.beta) - (p.alpha + p.beta)) * + (p.alpha + p.beta).digamma())) @register_kl(Dirichlet, Dirichlet) def _kl_dirichlet_dirichlet(p, q): return ( (p.concentration.sum(-1).lgamma() - q.concentration.sum(-1).lgamma()) - - ((p.concentration.lgamma() - q.concentration.lgamma()).sum(-1)) + ( - ((p.concentration - q.concentration) * - (p.concentration.digamma() - - p.concentration.sum(-1).digamma().unsqueeze(-1))).sum(-1))) + ((p.concentration.lgamma() - q.concentration.lgamma()).sum(-1)) + + (((p.concentration - q.concentration) * + (p.concentration.digamma() - + p.concentration.sum(-1).digamma().unsqueeze(-1))).sum(-1))) @register_kl(Categorical, Categorical) @@ -186,15 +187,15 @@ def _kl_expfamily_expfamily(p, q): try: if _non_static_mode(): - p_grads = paddle.grad( - p_log_norm, p_natural_params, create_graph=True) + p_grads = paddle.grad(p_log_norm, + p_natural_params, + create_graph=True) else: p_grads = paddle.static.gradients(p_log_norm, p_natural_params) except RuntimeError as e: raise TypeError( - "Cann't compute kl_divergence({cls_p}, {cls_q}) use bregman divergence. Please register_kl({cls_p}, {cls_q}).". - format( - cls_p=type(p).__name__, cls_q=type(q).__name__)) from e + "Cann't compute kl_divergence({cls_p}, {cls_q}) use bregman divergence. Please register_kl({cls_p}, {cls_q})." + .format(cls_p=type(p).__name__, cls_q=type(q).__name__)) from e kl = q._log_normalizer(*q_natural_params) - p_log_norm for p_param, q_param, p_grad in zip(p_natural_params, q_natural_params, diff --git a/python/paddle/distribution/multinomial.py b/python/paddle/distribution/multinomial.py index 837eb53eab1..424ec4b120d 100644 --- a/python/paddle/distribution/multinomial.py +++ b/python/paddle/distribution/multinomial.py @@ -145,9 +145,11 @@ class Multinomial(distribution.Distribution): if not isinstance(shape, Iterable): raise TypeError('sample shape must be Iterable object.') - samples = self._categorical.sample([self.total_count, ] + list(shape)) - return paddle.nn.functional.one_hot( - samples, self.probs.shape[-1]).cast(self.probs.dtype).sum(0) + samples = self._categorical.sample([ + self.total_count, + ] + list(shape)) + return paddle.nn.functional.one_hot(samples, self.probs.shape[-1]).cast( + self.probs.dtype).sum(0) def entropy(self): """entropy of multinomial distribution @@ -155,16 +157,18 @@ class Multinomial(distribution.Distribution): Returns: Tensor: entropy value """ - n = paddle.full( - shape=[1], fill_value=self.total_count, dtype=self.probs.dtype) + n = paddle.full(shape=[1], + fill_value=self.total_count, + dtype=self.probs.dtype) support = paddle.arange( - self.total_count + 1, dtype=self.probs.dtype).reshape((-1, ) + ( - 1, ) * len(self.probs.shape))[1:] + self.total_count + 1, + dtype=self.probs.dtype).reshape((-1, ) + + (1, ) * len(self.probs.shape))[1:] binomial_pmf = paddle.exp(self._binomial_logpmf(n, support)) - return ((n * self._categorical.entropy() - paddle.lgamma(n + 1)) + ( - (binomial_pmf * paddle.lgamma(support + 1)).sum([0, -1]))) + return ((n * self._categorical.entropy() - paddle.lgamma(n + 1)) + + ((binomial_pmf * paddle.lgamma(support + 1)).sum([0, -1]))) def _binomial_logpmf(self, count, value): logits = self._probs_to_logits(self.probs, is_binary=True) @@ -173,8 +177,9 @@ class Multinomial(distribution.Distribution): factor_k = paddle.lgamma(value + 1) factor_nmk = paddle.lgamma(count - value + 1) - norm = (count * _clip_by_zero(logits) + count * - paddle.log1p(paddle.exp(-paddle.abs(logits))) - factor_n) + norm = (count * _clip_by_zero(logits) + + count * paddle.log1p(paddle.exp(-paddle.abs(logits))) - + factor_n) return value * logits - factor_k - factor_nmk - norm diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py index 51a180271c6..71bc98a72de 100644 --- a/python/paddle/distribution/normal.py +++ b/python/paddle/distribution/normal.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -115,13 +115,11 @@ class Normal(distribution.Distribution): else: if isinstance(loc, float) and isinstance(scale, float): self.all_arg_is_float = True - if isinstance( - loc, - np.ndarray) and str(loc.dtype) in ['float32', 'float64']: + if isinstance(loc, np.ndarray) and str( + loc.dtype) in ['float32', 'float64']: self.dtype = loc.dtype - elif isinstance( - scale, - np.ndarray) and str(scale.dtype) in ['float32', 'float64']: + elif isinstance(scale, np.ndarray) and str( + scale.dtype) in ['float32', 'float64']: self.dtype = scale.dtype # pylint: disable=unbalanced-tuple-unpacking self.loc, self.scale = self._to_tensor(loc, scale) @@ -154,8 +152,11 @@ class Normal(distribution.Distribution): self.loc + self.scale, batch_shape + shape, self.dtype, 0.) zero_tmp_reshape = nn.reshape(zero_tmp, output_shape) zero_tmp_shape = nn.shape(zero_tmp_reshape) - normal_random_tmp = nn.gaussian_random( - zero_tmp_shape, mean=0., std=1., seed=seed, dtype=self.dtype) + normal_random_tmp = nn.gaussian_random(zero_tmp_shape, + mean=0., + std=1., + seed=seed, + dtype=self.dtype) output = normal_random_tmp * (zero_tmp_reshape + self.scale) output = elementwise_add(output, self.loc, name=name) return output @@ -188,12 +189,13 @@ class Normal(distribution.Distribution): """ name = self.name + '_entropy' batch_shape = list((self.loc + self.scale).shape) - zero_tmp = tensor.fill_constant_batch_size_like( - self.loc + self.scale, batch_shape, self.dtype, 0.) - return elementwise_add( - 0.5 + zero_tmp, - 0.5 * math.log(2 * math.pi) + nn.log((self.scale + zero_tmp)), - name=name) + zero_tmp = tensor.fill_constant_batch_size_like(self.loc + self.scale, + batch_shape, self.dtype, + 0.) + return elementwise_add(0.5 + zero_tmp, + 0.5 * math.log(2 * math.pi) + nn.log( + (self.scale + zero_tmp)), + name=name) def log_prob(self, value): """Log probability density/mass function. @@ -210,10 +212,10 @@ class Normal(distribution.Distribution): var = self.scale * self.scale log_scale = nn.log(self.scale) - return elementwise_sub( - -1. * ((value - self.loc) * (value - self.loc)) / (2. * var), - log_scale + math.log(math.sqrt(2. * math.pi)), - name=name) + return elementwise_sub(-1. * ((value - self.loc) * (value - self.loc)) / + (2. * var), + log_scale + math.log(math.sqrt(2. * math.pi)), + name=name) def probs(self, value): """Probability density/mass function. @@ -229,10 +231,10 @@ class Normal(distribution.Distribution): value = self._check_values_dtype_in_probs(self.loc, value) var = self.scale * self.scale - return elementwise_div( - ops.exp(-1. * ((value - self.loc) * (value - self.loc)) / - (2. * var)), (math.sqrt(2 * math.pi) * self.scale), - name=name) + return elementwise_div(ops.exp(-1. * ((value - self.loc) * + (value - self.loc)) / (2. * var)), + (math.sqrt(2 * math.pi) * self.scale), + name=name) def kl_divergence(self, other): r"""The KL-divergence between two normal distributions. @@ -275,5 +277,6 @@ class Normal(distribution.Distribution): var_ratio = (var_ratio * var_ratio) t1 = (self.loc - other.loc) / other.scale t1 = (t1 * t1) - return elementwise_add( - 0.5 * var_ratio, 0.5 * (t1 - 1. - nn.log(var_ratio)), name=name) + return elementwise_add(0.5 * var_ratio, + 0.5 * (t1 - 1. - nn.log(var_ratio)), + name=name) diff --git a/python/paddle/distribution/transform.py b/python/paddle/distribution/transform.py index 31b1dedbc5f..3fabd27ec34 100644 --- a/python/paddle/distribution/transform.py +++ b/python/paddle/distribution/transform.py @@ -25,19 +25,10 @@ from paddle.distribution import (constraint, distribution, transformed_distribution, variable) __all__ = [ # noqa - 'Transform', - 'AbsTransform', - 'AffineTransform', - 'ChainTransform', - 'ExpTransform', - 'IndependentTransform', - 'PowerTransform', - 'ReshapeTransform', - 'SigmoidTransform', - 'SoftmaxTransform', - 'StackTransform', - 'StickBreakingTransform', - 'TanhTransform' + 'Transform', 'AbsTransform', 'AffineTransform', 'ChainTransform', + 'ExpTransform', 'IndependentTransform', 'PowerTransform', + 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', + 'StackTransform', 'StickBreakingTransform', 'TanhTransform' ] @@ -147,8 +138,8 @@ class Transform(object): [Tensor|TransformedDistribution|ChainTransform]: The return value. """ if isinstance(input, distribution.Distribution): - return transformed_distribution.TransformedDistribution(input, - [self]) + return transformed_distribution.TransformedDistribution( + input, [self]) if isinstance(input, Transform): return ChainTransform([self, input]) return self.forward(x) @@ -207,8 +198,8 @@ class Transform(object): if not isinstance(x, paddle.fluid.framework.Variable): raise TypeError( f"Expected 'y' is a Tensor or Real, but got {type(x)}.") - if isinstance(x, paddle.fluid.framework.Variable) and x.dim( - ) < self._domain.event_rank: + if isinstance(x, paddle.fluid.framework.Variable + ) and x.dim() < self._domain.event_rank: raise ValueError( f'The dimensions of x({x.dim()}) should be ' f'grater than or equal to {self._domain.event_rank}') @@ -536,9 +527,8 @@ class ChainTransform(Transform): value = 0. event_rank = self._domain.event_rank for t in self.transforms: - value += self._sum_rightmost( - t.forward_log_det_jacobian(x), - event_rank - t._domain.event_rank) + value += self._sum_rightmost(t.forward_log_det_jacobian(x), + event_rank - t._domain.event_rank) x = t.forward(x) event_rank += t._codomain.event_rank - t._domain.event_rank return value diff --git a/python/paddle/distribution/transformed_distribution.py b/python/paddle/distribution/transformed_distribution.py index 2d7aa5886ae..ce386971e5f 100644 --- a/python/paddle/distribution/transformed_distribution.py +++ b/python/paddle/distribution/transformed_distribution.py @@ -112,8 +112,8 @@ class TransformedDistribution(distribution.Distribution): _sum_rightmost(t.forward_log_det_jacobian( x), event_rank-t._domain.event_rank) y = x - log_prob += _sum_rightmost( - self._base.log_prob(y), event_rank - len(self._base.event_shape)) + log_prob += _sum_rightmost(self._base.log_prob(y), + event_rank - len(self._base.event_shape)) return log_prob diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py index 5957dab14ef..cbc83eba625 100644 --- a/python/paddle/distribution/uniform.py +++ b/python/paddle/distribution/uniform.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -116,13 +116,11 @@ class Uniform(distribution.Distribution): else: if isinstance(low, float) and isinstance(high, float): self.all_arg_is_float = True - if isinstance( - low, - np.ndarray) and str(low.dtype) in ['float32', 'float64']: + if isinstance(low, np.ndarray) and str( + low.dtype) in ['float32', 'float64']: self.dtype = low.dtype - elif isinstance( - high, - np.ndarray) and str(high.dtype) in ['float32', 'float64']: + elif isinstance(high, np.ndarray) and str( + high.dtype) in ['float32', 'float64']: self.dtype = high.dtype # pylint: disable=unbalanced-tuple-unpacking self.low, self.high = self._to_tensor(low, high) @@ -161,16 +159,16 @@ class Uniform(distribution.Distribution): zero_tmp_reshape = nn.reshape(zero_tmp, output_shape) uniform_random_tmp_reshape = nn.reshape(uniform_random_tmp, output_shape) - output = uniform_random_tmp_reshape * ( - zero_tmp_reshape + self.high - self.low) + output = uniform_random_tmp_reshape * (zero_tmp_reshape + + self.high - self.low) output = elementwise_add(output, self.low, name=name) return output else: output_shape = shape + batch_shape output = nn.uniform_random( output_shape, dtype=self.dtype, min=0., max=1., - seed=seed) * (tensor.zeros( - output_shape, dtype=self.dtype) + (self.high - self.low)) + seed=seed) * (tensor.zeros(output_shape, dtype=self.dtype) + + (self.high - self.low)) output = elementwise_add(output, self.low, name=name) if self.all_arg_is_float: return nn.reshape(output, shape, name=name) @@ -204,8 +202,9 @@ class Uniform(distribution.Distribution): ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) - return elementwise_sub( - nn.log(lb * ub), nn.log(self.high - self.low), name=name) + return elementwise_sub(nn.log(lb * ub), + nn.log(self.high - self.low), + name=name) def probs(self, value): """Probability density/mass function. diff --git a/python/paddle/distribution/variable.py b/python/paddle/distribution/variable.py index 6ece1c3a1d8..b5c3d71d3fa 100644 --- a/python/paddle/distribution/variable.py +++ b/python/paddle/distribution/variable.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -43,11 +43,13 @@ class Variable(object): class Real(Variable): + def __init__(self, event_rank=0): super(Real, self).__init__(False, event_rank, constraint.real) class Positive(Variable): + def __init__(self, event_rank=0): super(Positive, self).__init__(False, event_rank, constraint.positive) @@ -64,8 +66,9 @@ class Independent(Variable): def __init__(self, base, reinterpreted_batch_rank): self._base = base self._reinterpreted_batch_rank = reinterpreted_batch_rank - super(Independent, self).__init__( - base.is_discrete, base.event_rank + reinterpreted_batch_rank) + super(Independent, + self).__init__(base.is_discrete, + base.event_rank + reinterpreted_batch_rank) def constraint(self, value): ret = self._base.constraint(value) @@ -73,11 +76,13 @@ class Independent(Variable): raise ValueError( "Input dimensions must be equal or grater than {}".format( self._reinterpreted_batch_rank)) - return ret.reshape(ret.shape[:ret.dim() - self.reinterpreted_batch_rank] - + (-1, )).all(-1) + return ret.reshape(ret.shape[:ret.dim() - + self.reinterpreted_batch_rank] + + (-1, )).all(-1) class Stack(Variable): + def __init__(self, vars, axis=0): self._vars = vars self._axis = axis diff --git a/python/paddle/fft.py b/python/paddle/fft.py index 10d637ff8b9..f44111cb766 100644 --- a/python/paddle/fft.py +++ b/python/paddle/fft.py @@ -104,8 +104,8 @@ def _check_fft_axes(x, axes): for axis in axes: if not isinstance(axis, int) or axis < -ndim or axis >= ndim: raise ValueError( - "FFT axes {} contains invalid value ({}), it should be in range [-{}, {})". - format(axes, axis, ndim, ndim)) + "FFT axes {} contains invalid value ({}), it should be in range [-{}, {})" + .format(axes, axis, ndim, ndim)) def _resize_fft_input(x, s, axes): @@ -127,11 +127,10 @@ def _resize_fft_input(x, s, axes): slices.append((0, s[i])) if axes_to_slice: - x = paddle.slice( - x, - axes_to_slice, - starts=[item[0] for item in slices], - ends=[item[1] for item in slices]) + x = paddle.slice(x, + axes_to_slice, + starts=[item[0] for item in slices], + ends=[item[1] for item in slices]) if axes_to_pad: padding_widths = [0] * (2 * ndim) for axis, pad in zip(axes_to_pad, paddings): @@ -198,8 +197,13 @@ def fft(x, n=None, axis=-1, norm="backward", name=None): """ if is_integer(x) or is_floating_point(x): - return fft_r2c( - x, n, axis, norm, forward=True, onesided=False, name=name) + return fft_r2c(x, + n, + axis, + norm, + forward=True, + onesided=False, + name=name) else: return fft_c2c(x, n, axis, norm, forward=True, name=name) @@ -262,8 +266,13 @@ def ifft(x, n=None, axis=-1, norm="backward", name=None): """ if is_integer(x) or is_floating_point(x): - return fft_r2c( - x, n, axis, norm, forward=False, onesided=False, name=name) + return fft_r2c(x, + n, + axis, + norm, + forward=False, + onesided=False, + name=name) else: return fft_c2c(x, n, axis, norm, forward=False, name=name) @@ -523,8 +532,13 @@ def fftn(x, s=None, axes=None, norm="backward", name=None): # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]]] """ if is_integer(x) or is_floating_point(x): - return fftn_r2c( - x, s, axes, norm, forward=True, onesided=False, name=name) + return fftn_r2c(x, + s, + axes, + norm, + forward=True, + onesided=False, + name=name) else: return fftn_c2c(x, s, axes, norm, forward=True, name=name) @@ -587,8 +601,13 @@ def ifftn(x, s=None, axes=None, norm="backward", name=None): """ if is_integer(x) or is_floating_point(x): - return fftn_r2c( - x, s, axes, norm, forward=False, onesided=False, name=name) + return fftn_r2c(x, + s, + axes, + norm, + forward=False, + onesided=False, + name=name) else: return fftn_c2c(x, s, axes, norm, forward=False, name=name) @@ -878,13 +897,13 @@ def fft2(x, s=None, axes=(-2, -1), norm="backward", name=None): if s is not None: if not isinstance(s, Sequence) or len(s) != 2: raise ValueError( - "Invalid FFT argument s ({}), it should be a sequence of 2 integers.". - format(s)) + "Invalid FFT argument s ({}), it should be a sequence of 2 integers." + .format(s)) if axes is not None: if not isinstance(axes, Sequence) or len(axes) != 2: raise ValueError( - "Invalid FFT argument axes ({}), it should be a sequence of 2 integers.". - format(axes)) + "Invalid FFT argument axes ({}), it should be a sequence of 2 integers." + .format(axes)) return fftn(x, s, axes, norm, name) @@ -948,13 +967,13 @@ def ifft2(x, s=None, axes=(-2, -1), norm="backward", name=None): if s is not None: if not isinstance(s, Sequence) or len(s) != 2: raise ValueError( - "Invalid FFT argument s ({}), it should be a sequence of 2 integers.". - format(s)) + "Invalid FFT argument s ({}), it should be a sequence of 2 integers." + .format(s)) if axes is not None: if not isinstance(axes, Sequence) or len(axes) != 2: raise ValueError( - "Invalid FFT argument axes ({}), it should be a sequence of 2 integers.". - format(axes)) + "Invalid FFT argument axes ({}), it should be a sequence of 2 integers." + .format(axes)) return ifftn(x, s, axes, norm, name) @@ -1002,13 +1021,13 @@ def rfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): if s is not None: if not isinstance(s, Sequence) or len(s) != 2: raise ValueError( - "Invalid FFT argument s ({}), it should be a sequence of 2 integers.". - format(s)) + "Invalid FFT argument s ({}), it should be a sequence of 2 integers." + .format(s)) if axes is not None: if not isinstance(axes, Sequence) or len(axes) != 2: raise ValueError( - "Invalid FFT argument axes ({}), it should be a sequence of 2 integers.". - format(axes)) + "Invalid FFT argument axes ({}), it should be a sequence of 2 integers." + .format(axes)) return rfftn(x, s, axes, norm, name) @@ -1054,13 +1073,13 @@ def irfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): if s is not None: if not isinstance(s, Sequence) or len(s) != 2: raise ValueError( - "Invalid FFT argument s ({}), it should be a sequence of 2 integers.". - format(s)) + "Invalid FFT argument s ({}), it should be a sequence of 2 integers." + .format(s)) if axes is not None: if not isinstance(axes, Sequence) or len(axes) != 2: raise ValueError( - "Invalid FFT argument axes ({}), it should be a sequence of 2 integers.". - format(axes)) + "Invalid FFT argument axes ({}), it should be a sequence of 2 integers." + .format(axes)) return irfftn(x, s, axes, norm, name) @@ -1107,13 +1126,13 @@ def hfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): if s is not None: if not isinstance(s, Sequence) or len(s) != 2: raise ValueError( - "Invalid FFT argument s ({}), it should be a sequence of 2 integers.". - format(s)) + "Invalid FFT argument s ({}), it should be a sequence of 2 integers." + .format(s)) if axes is not None: if not isinstance(axes, Sequence) or len(axes) != 2: raise ValueError( - "Invalid FFT argument axes ({}), it should be a sequence of 2 integers.". - format(axes)) + "Invalid FFT argument axes ({}), it should be a sequence of 2 integers." + .format(axes)) return hfftn(x, s, axes, norm, name) @@ -1159,13 +1178,13 @@ def ihfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): if s is not None: if not isinstance(s, Sequence) or len(s) != 2: raise ValueError( - "Invalid FFT argument s ({}), it should be a sequence of 2 integers.". - format(s)) + "Invalid FFT argument s ({}), it should be a sequence of 2 integers." + .format(s)) if axes is not None: if not isinstance(axes, Sequence) or len(axes) != 2: raise ValueError( - "Invalid FFT argument axes ({}), it should be a sequence of 2 integers.". - format(axes)) + "Invalid FFT argument axes ({}), it should be a sequence of 2 integers." + .format(axes)) return ihfftn(x, s, axes, norm, name) @@ -1377,14 +1396,18 @@ def fft_c2c(x, n, axis, norm, forward, name): attrs = ('axes', axes, 'normalization', norm, 'forward', forward) out = getattr(_C_ops, op_type)(x, *attrs) else: - inputs = {'X': [x], } + inputs = { + 'X': [x], + } attrs = {'axes': axes, 'normalization': norm, 'forward': forward} helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) outputs = {"Out": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -1408,7 +1431,9 @@ def fft_r2c(x, n, axis, norm, forward, onesided, name): 'onesided', onesided) out = getattr(_C_ops, op_type)(x, *attrs) else: - inputs = {'X': [x], } + inputs = { + 'X': [x], + } attrs = { 'axes': axes, 'normalization': norm, @@ -1420,8 +1445,10 @@ def fft_r2c(x, n, axis, norm, forward, onesided, name): out = helper.create_variable_for_type_inference( _real_to_complex_dtype(dtype)) outputs = {"Out": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -1450,7 +1477,9 @@ def fft_c2r(x, n, axis, norm, forward, name): attrs = ('axes', axes, 'normalization', norm, 'forward', forward) out = getattr(_C_ops, op_type)(x, *attrs) else: - inputs = {'X': [x], } + inputs = { + 'X': [x], + } attrs = {'axes': axes, 'normalization': norm, 'forward': forward} if n is not None: attrs['last_dim_size'] = n @@ -1459,8 +1488,10 @@ def fft_c2r(x, n, axis, norm, forward, name): out = helper.create_variable_for_type_inference( _complex_to_real_dtype(dtype)) outputs = {"Out": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -1501,14 +1532,18 @@ def fftn_c2c(x, s, axes, norm, forward, name): attrs = ('axes', axes, 'normalization', norm, 'forward', forward) out = getattr(_C_ops, op_type)(x, *attrs) else: - inputs = {'X': [x], } + inputs = { + 'X': [x], + } attrs = {'axes': axes, 'normalization': norm, 'forward': forward} helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) outputs = {"Out": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -1549,7 +1584,9 @@ def fftn_r2c(x, s, axes, norm, forward, onesided, name): 'onesided', onesided) out = getattr(_C_ops, op_type)(x, *attrs) else: - inputs = {'X': [x], } + inputs = { + 'X': [x], + } attrs = { 'axes': axes, 'normalization': norm, @@ -1561,8 +1598,10 @@ def fftn_r2c(x, s, axes, norm, forward, onesided, name): out = helper.create_variable_for_type_inference( _real_to_complex_dtype(dtype)) outputs = {"Out": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -1611,7 +1650,9 @@ def fftn_c2r(x, s, axes, norm, forward, name): attrs = ('axes', axes, 'normalization', norm, 'forward', forward) out = getattr(_C_ops, op_type)(x, *attrs) else: - inputs = {'X': [x], } + inputs = { + 'X': [x], + } attrs = {'axes': axes, 'normalization': norm, 'forward': forward} if s: attrs["last_dim_size"] = s[-1] @@ -1620,6 +1661,8 @@ def fftn_c2r(x, s, axes, norm, forward, name): out = helper.create_variable_for_type_inference( _complex_to_real_dtype(dtype)) outputs = {"Out": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) return out diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 8dbeb3eeb27..aa07124ad49 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -23,8 +23,8 @@ core_suffix = 'so' if os.name == 'nt': core_suffix = 'pyd' -legacy_core = os.path.abspath(os.path.dirname( - __file__)) + os.sep + 'core.' + core_suffix +legacy_core = os.path.abspath( + os.path.dirname(__file__)) + os.sep + 'core.' + core_suffix if os.path.exists(legacy_core): sys.stderr.write('Deleting legacy file ' + legacy_core + '\n') try: @@ -159,8 +159,8 @@ def __bootstrap__(): import platform from . import core - # NOTE(zhiqiu): When (1)numpy < 1.19; (2) python < 3.7, - # unittest is always imported in numpy (maybe some versions not). + # NOTE(zhiqiu): When (1)numpy < 1.19; (2) python < 3.7, + # unittest is always imported in numpy (maybe some versions not). # so is_test is True and p2p is not inited. in_test = 'unittest' in sys.modules @@ -170,12 +170,11 @@ def __bootstrap__(): num_threads = 1 if num_threads > 1: - print( - 'WARNING: OMP_NUM_THREADS set to {0}, not 1. The computation ' - 'speed will not be optimized if you use data parallel. It will ' - 'fail if this PaddlePaddle binary is compiled with OpenBlas since' - ' OpenBlas does not support multi-threads.'.format(num_threads), - file=sys.stderr) + print('WARNING: OMP_NUM_THREADS set to {0}, not 1. The computation ' + 'speed will not be optimized if you use data parallel. It will ' + 'fail if this PaddlePaddle binary is compiled with OpenBlas since' + ' OpenBlas does not support multi-threads.'.format(num_threads), + file=sys.stderr) print('PLEASE USE OMP_NUM_THREADS WISELY.', file=sys.stderr) os.environ['OMP_NUM_THREADS'] = str(num_threads) @@ -203,7 +202,7 @@ def __bootstrap__(): read_env_flags += [] core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) - # Note(zhouwei25): sys may not have argv in some cases, + # Note(zhouwei25): sys may not have argv in some cases, # Such as: use Python/C API to call Python from C++ try: core.init_glog(sys.argv[0]) diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py index a7d64d37bc7..bb5f4cb84f6 100644 --- a/python/paddle/fluid/average.py +++ b/python/paddle/fluid/average.py @@ -29,8 +29,8 @@ __all__ = ["WeightedAverage"] def _is_number_(var): - return isinstance(var, int) or isinstance(var, float) or (isinstance( - var, np.ndarray) and var.shape == (1, )) + return isinstance(var, int) or isinstance( + var, float) or (isinstance(var, np.ndarray) and var.shape == (1, )) def _is_number_or_matrix_(var): diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index ed3e0bc98ed..0ca69b5f94d 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -38,11 +38,13 @@ __all__ = [ 'gradients', ] -_logger = log_helper.get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = log_helper.get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class ProgramStats(object): + def __init__(self, block, ops): self.block = block self.ops = ops @@ -122,9 +124,9 @@ class ProgramStats(object): updated_min_idx = min_idx while idx_ > pre_segment_end_idx: if is_amp_cast(self.ops[idx_]): - _logger.info("found amp-cast op: {}, : {}".format(self.ops[ - idx_].desc.type(), self.ops[idx_].desc.input_arg_names()[ - 0])) + _logger.info("found amp-cast op: {}, : {}".format( + self.ops[idx_].desc.type(), + self.ops[idx_].desc.input_arg_names()[0])) updated_min_idx = idx_ idx_ -= 1 else: @@ -137,8 +139,8 @@ class ProgramStats(object): self.op_deps[i] = {"in_ops": [], "out_ops": []} for j, name in enumerate(op.desc.input_arg_names()): if name in self.var_op_deps: - self.op_deps[i]["in_ops"].extend(self.var_op_deps[name][ - "var_as_output_ops"]) + self.op_deps[i]["in_ops"].extend( + self.var_op_deps[name]["var_as_output_ops"]) for j, name in enumerate(op.desc.input_arg_names()): if name in self.var_op_deps: self.var_op_deps[name]["var_as_input_ops"].extend([i]) @@ -209,16 +211,15 @@ class ProgramStats(object): # Setting the force_cpu of seed to true will make the output of seed in cpu memory, # reduce the synchronous copy from GPU to CPU in dropout, and reduce the communication hang - added_op = self.block._insert_op( - index=op.idx, - type='seed', - inputs={}, - outputs={'Out': [added_var]}, - attrs={ - 'seed': seed, - 'op_device': op_device, - 'force_cpu': True - }) + added_op = self.block._insert_op(index=op.idx, + type='seed', + inputs={}, + outputs={'Out': [added_var]}, + attrs={ + 'seed': seed, + 'op_device': op_device, + 'force_cpu': True + }) self.ops.insert(op_idx, added_op) # modify dropout op desc so that it accept a seed var as input op.desc.set_input("Seed", [var_unique_name]) @@ -287,8 +288,8 @@ def _add_descs_to_block(descs, block): def _find_loss_op_(loss): for op in reversed(loss.block.ops): assert isinstance(op, framework.Operator) - if len(op.output_arg_names) == 1 and op.output_arg_names[ - 0] == loss.name: + if len(op.output_arg_names + ) == 1 and op.output_arg_names[0] == loss.name: loss.op = op break if loss.op is None: @@ -329,14 +330,16 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): op_desc.set_input( para, list( - map(lambda arg: arg.decode() if isinstance(arg, six.binary_type) else arg, - args))) + map( + lambda arg: arg.decode() + if isinstance(arg, six.binary_type) else arg, args))) for para, args in six.iteritems(outputs): op_desc.set_output( para, list( - map(lambda arg: arg.decode() if isinstance(arg, six.binary_type) else arg, - args))) + map( + lambda arg: arg.decode() + if isinstance(arg, six.binary_type) else arg, args))) op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() op_device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() @@ -358,12 +361,15 @@ def _create_loss_op_desc_(loss): op_desc = _create_op_desc_( "fill_constant", {}, {"Out": [_append_grad_suffix_(loss.name)]}, { "shape": [1], - "value": 1.0, - "dtype": loss.dtype, - "force_cpu": False, + "value": + 1.0, + "dtype": + loss.dtype, + "force_cpu": + False, core.op_proto_and_checker_maker.kOpRoleAttrName(): - int(core.op_proto_and_checker_maker.OpRole.Backward) | - int(core.op_proto_and_checker_maker.OpRole.Loss), + int(core.op_proto_and_checker_maker.OpRole.Backward) + | int(core.op_proto_and_checker_maker.OpRole.Loss), core.op_proto_and_checker_maker.kOpDeviceAttrName(): loss.op.attr(core.op_proto_and_checker_maker.kOpDeviceAttrName()) }) @@ -383,8 +389,8 @@ def _infer_var_data_type_shape_(grad_var_name, block): else: # TODO(jiabin): Maybe we should not to this to cause some unexpected error on dtype warnings.warn( - "Set grad var: {} dtype to default FP32, since we can't find its related forward var". - format(grad_var_name)) + "Set grad var: {} dtype to default FP32, since we can't find its related forward var" + .format(grad_var_name)) grad_var.set_dtype(core.VarDesc.VarType.FP32) @@ -446,10 +452,11 @@ def _accumulate_gradients_by_sum_op_(var_name, if op_idx not in pending_sum_ops.keys(): pending_sum_ops[op_idx] = [] pending_sum_ops[op_idx].append( - _create_op_desc_("sum", {"X": renamed_vars[var_name]}, { - "Out": [var_name] - }, {"use_mkldnn": False, - "op_device": op_device})) + _create_op_desc_("sum", {"X": renamed_vars[var_name]}, + {"Out": [var_name]}, { + "use_mkldnn": False, + "op_device": op_device + })) renamed_vars[var_name] = [var_name] @@ -472,10 +479,13 @@ def _accumulate_gradients_by_add_ops_(var_name, else: out_name = var_name pending_sum_ops[op_idx].append( - _create_op_desc_("grad_add", {"X": [x_name], - "Y": [y_name]}, {"Out": [out_name]}, - {"use_mkldnn": False, - "op_device": op_device})) + _create_op_desc_("grad_add", { + "X": [x_name], + "Y": [y_name] + }, {"Out": [out_name]}, { + "use_mkldnn": False, + "op_device": op_device + })) renamed_vars[var_name] = [var_name] @@ -585,13 +595,14 @@ def _addup_repetitive_outputs_(op_descs, block_idx, grad_var_to_var=None): for var_name, inputs in six.iteritems(renamed_vars): if len(renamed_vars[var_name]) > 1: if len(renamed_vars[var_name]) > _MAX_ADD_NUM_: - _accumulate_gradients_by_sum_op_( - var_name, renamed_vars, pending_sum_ops, - len(op_descs), var_device[var_name]) + _accumulate_gradients_by_sum_op_(var_name, renamed_vars, + pending_sum_ops, len(op_descs), + var_device[var_name]) else: - _accumulate_gradients_by_add_ops_( - var_name, renamed_vars, pending_sum_ops, - len(op_descs), var_device[var_name]) + _accumulate_gradients_by_add_ops_(var_name, + renamed_vars, pending_sum_ops, + len(op_descs), + var_device[var_name]) # sum_op descs are sorted according to their insert position for key, value in collections.OrderedDict( @@ -642,8 +653,9 @@ def _remove_no_grad_branch_(op_descs, no_grad_set): x_in = _strip_grad_suffix_(arg) # the reason should be: arg can be input of another grad op # and the op is a not-to-remove op - to_insert.append((_create_op_desc_( - "fill_zeros_like", {"X": [x_in]}, {"Out": [arg]}, {}), idx)) + to_insert.append( + (_create_op_desc_("fill_zeros_like", {"X": [x_in]}, + {"Out": [arg]}, {}), idx)) list([op_descs.insert(p[1], p[0]) for p in reversed(to_insert)]) @@ -669,6 +681,7 @@ def _find_not_need_ops(grad_op_descs, forward_ops, input_grad_names_set): """ class Var(object): + def __init__(self, var_name): self.var_name = var_name self.gen_op = None @@ -684,6 +697,7 @@ def _find_not_need_ops(grad_op_descs, forward_ops, input_grad_names_set): self.pendding_ops.append(op) class Op(object): + def __init__(self, op_desc): self.op_desc = op_desc self.inputs = [] @@ -780,8 +794,9 @@ def serialize_op_decs(op_desc): return proto.__str__() -def _append_backward_ops_with_checkpoints_( - block, ops, target_block, no_grad_dict, grad_to_var, checkpoints): +def _append_backward_ops_with_checkpoints_(block, ops, target_block, + no_grad_dict, grad_to_var, + checkpoints): """ Create grad ops with forward ops, and insert them into given block @@ -867,15 +882,15 @@ def _append_backward_ops_with_checkpoints_( for i, (idx1, idx2) in enumerate(recompute_segments): _logger.info("recompute segment[{}]".format(i)) - _logger.info("segment start op: [{}]: [{}]".format(ops[idx1].desc.type( - ), ops[idx1].desc.input_arg_names())) - _logger.info("segment end op: [{}]: [{}]".format(ops[ - idx2 - 1].desc.type(), ops[idx2 - 1].desc.input_arg_names())) + _logger.info("segment start op: [{}]: [{}]".format( + ops[idx1].desc.type(), ops[idx1].desc.input_arg_names())) + _logger.info("segment end op: [{}]: [{}]".format( + ops[idx2 - 1].desc.type(), ops[idx2 - 1].desc.input_arg_names())) _logger.info("recompute segment[{}]".format(i)) - _logger.info("segment start op: [{}]: [{}]".format(ops[idx1].desc.type( - ), ops[idx1].desc.input_arg_names())) - _logger.info("segment end op: [{}]: [{}]".format(ops[ - idx2 - 1].desc.type(), ops[idx2 - 1].desc.input_arg_names())) + _logger.info("segment start op: [{}]: [{}]".format( + ops[idx1].desc.type(), ops[idx1].desc.input_arg_names())) + _logger.info("segment end op: [{}]: [{}]".format( + ops[idx2 - 1].desc.type(), ops[idx2 - 1].desc.input_arg_names())) # 2) go through all forward ops and induct all variables that will be hold in memory vars_should_be_hold = [] @@ -960,13 +975,12 @@ def _append_backward_ops_with_checkpoints_( # we should create the rename var in subprog, otherwise its VarType will be BOOL ref_var = block.program.global_block().var(name) - block.create_var( - name=var_name_dict[name], - shape=ref_var.shape, - dtype=ref_var.dtype, - type=ref_var.type, - persistable=ref_var.persistable, - stop_gradient=ref_var.stop_gradient) + block.create_var(name=var_name_dict[name], + shape=ref_var.shape, + dtype=ref_var.dtype, + type=ref_var.type, + persistable=ref_var.persistable, + stop_gradient=ref_var.stop_gradient) # 3.a. add ops in current recompute_segment as forward recomputation ops buffer_descs = _add_needed_descs_to_block(ff_ops, buffer_block, block, @@ -1109,8 +1123,8 @@ def _append_backward_ops_(block, for op_desc in grad_op_desc: assert op_desc.original_id( ) not in distop_context.grad_op_id_to_op_id - distop_context.grad_op_id_to_op_id[op_desc.original_id( - )] = op.desc.original_id() + distop_context.grad_op_id_to_op_id[ + op_desc.original_id()] = op.desc.original_id() if callbacks is not None: assert (isinstance(callbacks, (list, tuple))) @@ -1206,7 +1220,8 @@ def _append_backward_ops_(block, # But this strategy is not suited for while op for some control flow, # for example, for while op, the grads maybe generated in next loop. if input_grad_names_set is not None: - is_grad_name = lambda name: name.find(core.grad_var_suffix()) != -1 or name in input_grad_names_set + is_grad_name = lambda name: name.find(core.grad_var_suffix( + )) != -1 or name in input_grad_names_set is_append_grad = False for op_desc in grad_op_desc: input_grad_names = [ @@ -1356,8 +1371,8 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): if grad_var_ins: existing_grad_var_ins = [ var for var in grad_var_ins - if block.desc.has_var_recursive(cpt.to_bytes(var)) or var in - parent_op_vars + if block.desc.has_var_recursive(cpt.to_bytes(var)) + or var in parent_op_vars ] if not existing_grad_var_ins: ''' @@ -1458,8 +1473,8 @@ def _get_no_grad_set_name(no_grad_set): % (type(no_grad_var))) else: raise TypeError( - "The type of no_grad_set should be set or list or tuple, but received {}". - format(type(no_grad_set))) + "The type of no_grad_set should be set or list or tuple, but received {}" + .format(type(no_grad_set))) return no_grad_set_name @@ -1577,9 +1592,10 @@ def append_backward(loss, # the loss is from a cloned program. Find loss op manually. _find_loss_op_(loss) - loss.op._set_attr(core.op_proto_and_checker_maker.kOpRoleAttrName(), - int(core.op_proto_and_checker_maker.OpRole.Forward) | - int(core.op_proto_and_checker_maker.OpRole.Loss)) + loss.op._set_attr( + core.op_proto_and_checker_maker.kOpRoleAttrName(), + int(core.op_proto_and_checker_maker.OpRole.Forward) + | int(core.op_proto_and_checker_maker.OpRole.Loss)) if callbacks is not None: check_type(callbacks, 'callbacks', (list, tuple), @@ -1685,7 +1701,8 @@ def append_backward(loss, callbacks, input_grad_names_set=input_grad_names_set, op_path_dict=op_path_dict, - distop_context=distop_context, ) + distop_context=distop_context, + ) grad_info_map = dict() @@ -1710,8 +1727,8 @@ def append_backward(loss, 'fluid.backward.append_backward') parameters = [] for i, param in enumerate(parameter_list): - check_type(param, 'parameter_list[%s]' % i, (framework.Variable, - six.string_types), + check_type(param, 'parameter_list[%s]' % i, + (framework.Variable, six.string_types), 'fluid.backward.append_backward') if isinstance(param, framework.Variable): parameters.append(param.name) @@ -1875,9 +1892,9 @@ def _find_op_path_(block, # All the inputs of the block are used if inputs is empty, if inputs: for i, op in enumerate(block.ops): - if _some_in_set_( - op.desc.input_arg_names(), - input_names) and core.has_non_empty_grad_op_maker(op.type): + if _some_in_set_(op.desc.input_arg_names(), + input_names) and core.has_non_empty_grad_op_maker( + op.type): for name in op.desc.output_arg_names(): if name not in no_grad_set: input_names.add(name) @@ -1889,14 +1906,14 @@ def _find_op_path_(block, sub_block_id = op._block_attr_id("sub_block") sub_block = block.program.block(sub_block_id) sub_block_target_names = output_names & set(op.output_arg_names) - sub_block_path = _get_sub_block_path(sub_block, op, - set(), op_path_dict, + sub_block_path = _get_sub_block_path(sub_block, op, set(), + op_path_dict, sub_block_target_names) op_path_dict[sub_block_id] = sub_block_path - if _some_in_set_( - op.desc.output_arg_names(), - output_names) and core.has_non_empty_grad_op_maker(op.type): + if _some_in_set_(op.desc.output_arg_names(), + output_names) and core.has_non_empty_grad_op_maker( + op.type): for name in op.desc.input_arg_names(): if name not in no_grad_set: output_names.add(name) @@ -2000,8 +2017,8 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): raise ValueError("all targets must be in the same block") if target.shape != grad.shape: raise ValueError( - "The shapes of target and grad are different: %s %s" % ( - target.name, grad.name)) + "The shapes of target and grad are different: %s %s" % + (target.name, grad.name)) target_grad_map[_append_grad_suffix_(target.name)] = grad.name input_grad_names_set.add(grad.name) rename_var_map[grad_name] = grad.name @@ -2031,15 +2048,14 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set))) grad_to_var = dict() grad_info_map = dict() - _append_backward_ops_( - block, - op_path, - block, - no_grad_dict, - grad_to_var, - input_grad_names_set=input_grad_names_set, - op_path_dict=op_path_dict, - rename_var_map=rename_var_map) + _append_backward_ops_(block, + op_path, + block, + no_grad_dict, + grad_to_var, + input_grad_names_set=input_grad_names_set, + op_path_dict=op_path_dict, + rename_var_map=rename_var_map) # Because calc_gradient may be called multiple times, # we need rename the internal gradient variables so that they have @@ -2107,8 +2123,9 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): 'paddle.static.gradients') check_type(inputs, 'inputs', (framework.Variable, list, tuple), 'paddle.static.gradients') - check_type(target_gradients, 'target_gradients', ( - framework.Variable, list, tuple, type(None)), 'paddle.static.gradients') + check_type(target_gradients, 'target_gradients', + (framework.Variable, list, tuple, type(None)), + 'paddle.static.gradients') from ..incubate.autograd.primx import _gradients from ..incubate.autograd.utils import prim_enabled @@ -2180,8 +2197,8 @@ def gradients_with_optimizer(program, optimizer, inputs=None, outputs=None): with program_guard(program, None): pram_grads = [(pram, grad) for pram, grad in zip(inputs, grads) - if isinstance(pram, paddle.fluid.framework.Parameter) and - grad is not None] + if isinstance(pram, paddle.fluid.framework.Parameter) + and grad is not None] optimize_ops = optimizer.apply_gradients(pram_grads) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 172929608db..df48de8ea29 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -91,6 +91,7 @@ def _squared_l2_norm(x): class BaseErrorClipAttr(object): + def __str__(self): raise NotImplementedError() @@ -165,8 +166,8 @@ def error_clip_callback(block, context): for grad_n in [n for n in op_desc.output_arg_names() if n in grad_to_var]: fwd_var = block._var_recursive(grad_to_var[grad_n]) error_clip = getattr(fwd_var, "error_clip", None) - if not (error_clip is None or isinstance(error_clip, - BaseErrorClipAttr)): + if not (error_clip is None + or isinstance(error_clip, BaseErrorClipAttr)): raise TypeError( "Variable's error_clip should be an instance of BaseErrorClipAttr or None." ) @@ -175,6 +176,7 @@ def error_clip_callback(block, context): class ClipGradBase(object): + def __init__(self): super(ClipGradBase, self).__init__() @@ -526,21 +528,22 @@ class ClipGradByGlobalNorm(ClipGradBase): global_norm_var.append(global_norm_var_fp64) global_norm_var = paddle.add_n(global_norm_var) global_norm_var = layers.sqrt(global_norm_var) - max_global_norm = layers.fill_constant( - shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) + max_global_norm = layers.fill_constant(shape=[1], + dtype=global_norm_var.dtype, + value=self.clip_norm) need_clip = False if not self.auto_skip_clip: # always apply clip need_clip = True - clip_var = layers.elementwise_div( - x=max_global_norm, - y=layers.elementwise_max( - x=global_norm_var, y=max_global_norm)) + clip_var = layers.elementwise_div(x=max_global_norm, + y=layers.elementwise_max( + x=global_norm_var, + y=max_global_norm)) elif global_norm_var > max_global_norm: # only when global_norm_var > max_global_norm, grad need clip need_clip = True - clip_var = layers.elementwise_div( - x=max_global_norm, y=global_norm_var) + clip_var = layers.elementwise_div(x=max_global_norm, + y=global_norm_var) for p, g in params_grads: if g is None: @@ -550,9 +553,8 @@ class ClipGradByGlobalNorm(ClipGradBase): continue # TODO(wangxi): use inplace elementwise_mul if need_clip: - clip_input = (clip_var.astype('float16') - if g.dtype == core.VarDesc.VarType.FP16 else - clip_var) + clip_input = (clip_var.astype('float16') if g.dtype + == core.VarDesc.VarType.FP16 else clip_var) new_grad = _C_ops.elementwise_mul(g, clip_input) params_and_grads.append((p, new_grad)) else: @@ -621,10 +623,10 @@ class ClipGradByGlobalNorm(ClipGradBase): shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) - scale_var = layers.elementwise_div( - x=max_global_norm, - y=layers.elementwise_max( - x=max_global_norm, y=global_norm_var)) + scale_var = layers.elementwise_div(x=max_global_norm, + y=layers.elementwise_max( + x=max_global_norm, + y=global_norm_var)) param_new_grad_name_dict = dict() for p, g in params_grads: if g is None: @@ -645,20 +647,20 @@ class ClipGradByGlobalNorm(ClipGradBase): # We need to handle the correct block, otherwise will encounter # a 'NotFoundError' during compile time. block = default_main_program().current_block() - block.append_op( - type='elementwise_mul', - inputs={'X': new_g, - 'Y': scale_input}, - outputs={'Out': new_g}) + block.append_op(type='elementwise_mul', + inputs={ + 'X': new_g, + 'Y': scale_input + }, + outputs={'Out': new_g}) if new_g is not g: - block.append_op( - type='cast', - inputs={'X': new_g}, - outputs={'Out': g}, - attrs={ - 'in_dtype': new_g.dtype, - 'out_dtype': g.dtype - }) + block.append_op(type='cast', + inputs={'X': new_g}, + outputs={'Out': g}, + attrs={ + 'in_dtype': new_g.dtype, + 'out_dtype': g.dtype + }) param_new_grad_name_dict[p.name] = g.name params_and_grads.append((p, g)) @@ -694,19 +696,20 @@ class ClipGradByGlobalNorm(ClipGradBase): group_norm_var = layers.sums(input=self.context[self.group_name]) group_norm_var = layers.sqrt(x=group_norm_var) clip_var = self.context[self.group_name + "_clip"] - group_scale_var = layers.elementwise_div( - x=clip_var, - y=layers.elementwise_max( - x=clip_var, y=group_norm_var)) + group_scale_var = layers.elementwise_div(x=clip_var, + y=layers.elementwise_max( + x=clip_var, + y=group_norm_var)) assert group_scale_var.shape == (1, ) self.context[group_scale_name] = group_scale_var # inplace - param.block.append_op( - type='elementwise_mul', - inputs={'X': grad, - 'Y': self.context[group_scale_name]}, - outputs={'Out': grad}) + param.block.append_op(type='elementwise_mul', + inputs={ + 'X': grad, + 'Y': self.context[group_scale_name] + }, + outputs={'Out': grad}) return param, grad @@ -868,7 +871,7 @@ def append_gradient_clip_ops(param_grads): # change wrong mapping relation between param & grad in clip op -# Note: This function is sensitive to the time cost of the network with gradient clipping +# Note: This function is sensitive to the time cost of the network with gradient clipping # and should not be changed easily. If you must change, please test the time cost. def _correct_clip_op_role_var(params_grads, param_new_grad_name_dict): block_id_list = [] diff --git a/python/paddle/fluid/communicator.py b/python/paddle/fluid/communicator.py index d12af8ee723..291a6b58377 100644 --- a/python/paddle/fluid/communicator.py +++ b/python/paddle/fluid/communicator.py @@ -38,6 +38,7 @@ __all__ = ['Communicator', 'LargeScaleKV'] class Communicator(object): + def __init__(self, mode, kwargs=None, envs=None): """ Communicator is used for async distribute training in distribute_transpiler mode. @@ -67,8 +68,8 @@ class Communicator(object): envs = {} else: if mode == DistributedMode.SYNC: - envs["pserver_endpoints"] = ','.join(kwargs[ - "pserver_endpoints"]) + envs["pserver_endpoints"] = ','.join( + kwargs["pserver_endpoints"]) envs["trainers"] = str(kwargs["trainers"]) envs["trainer_id"] = str(kwargs["trainer_id"]) @@ -208,6 +209,7 @@ class Communicator(object): class LargeScaleKV(object): + def __init__(self): self.scale_kv = core.LargeScaleKV() @@ -222,6 +224,7 @@ class LargeScaleKV(object): class HeterClient(object): + def __init__(self, endpoint, previous_endpoint, trainer_id): self.heter_client_ = core.HeterClient(endpoint, previous_endpoint, trainer_id) diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index 47c64ff8bd6..06f206c36d1 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -426,9 +426,9 @@ class CompiledProgram(object): return core.ParallelExecutor( places, self._persistable_vars, - cpt.to_text(self._loss_name) - if self._loss_name else six.u(''), self._scope, self._local_scopes, - self._exec_strategy, self._build_strategy, self._graph) + cpt.to_text(self._loss_name) if self._loss_name else six.u(''), + self._scope, self._local_scopes, self._exec_strategy, + self._build_strategy, self._graph) def _compile_inference(self): return core.create_paddle_predictor(self._infer_config) @@ -477,8 +477,9 @@ class CompiledProgram(object): use_device = DeviceType.XPU else: use_device = DeviceType.CPU - self._executor = self._compile_data_parallel( - use_device=use_device, scope=self._scope, places=self._places) + self._executor = self._compile_data_parallel(use_device=use_device, + scope=self._scope, + places=self._places) return self def _get_places(self, place, place_list): @@ -659,7 +660,9 @@ class IpuStrategy(object): ipu_strategy = static.IpuStrategy() ipu_strategy.set_precision_config(enable_fp16=False) """ - options = {'enable_fp16': enable_fp16, } + options = { + 'enable_fp16': enable_fp16, + } self.set_options(options) def add_custom_op(self, diff --git a/python/paddle/fluid/contrib/decoder/beam_search_decoder.py b/python/paddle/fluid/contrib/decoder/beam_search_decoder.py index 7d22e9796cc..6032238910e 100644 --- a/python/paddle/fluid/contrib/decoder/beam_search_decoder.py +++ b/python/paddle/fluid/contrib/decoder/beam_search_decoder.py @@ -78,10 +78,13 @@ class InitState(object): self._init = init elif init_boot is None: raise ValueError( - 'init_boot must be provided to infer the shape of InitState .\n') + 'init_boot must be provided to infer the shape of InitState .\n' + ) else: - self._init = layers.fill_constant_batch_size_like( - input=init_boot, value=value, shape=shape, dtype=dtype) + self._init = layers.fill_constant_batch_size_like(input=init_boot, + value=value, + shape=shape, + dtype=dtype) self._shape = shape self._value = value @@ -98,6 +101,7 @@ class InitState(object): class _MemoryState(object): + def __init__(self, state_name, rnn_obj, init_state): self._state_name = state_name # each is a rnn.memory self._rnn_obj = rnn_obj @@ -112,6 +116,7 @@ class _MemoryState(object): class _ArrayState(object): + def __init__(self, state_name, block, init_state): self._state_name = state_name self._block = block @@ -127,25 +132,25 @@ class _ArrayState(object): dtype='int64') # initialize counter - self._block.append_op( - type='fill_constant', - inputs={}, - outputs={'Out': [self._counter]}, - attrs={ - 'shape': [1], - 'dtype': self._counter.dtype, - 'value': float(0.0), - 'force_cpu': True - }) + self._block.append_op(type='fill_constant', + inputs={}, + outputs={'Out': [self._counter]}, + attrs={ + 'shape': [1], + 'dtype': self._counter.dtype, + 'value': float(0.0), + 'force_cpu': True + }) self._counter.stop_gradient = True # write initial state - block.append_op( - type='write_to_array', - inputs={'X': init_state.value, - 'I': self._counter}, - outputs={'Out': self._state_array}) + block.append_op(type='write_to_array', + inputs={ + 'X': init_state.value, + 'I': self._counter + }, + outputs={'Out': self._state_array}) def get_state(self): state = layers.array_read(array=self._state_array, i=self._counter) @@ -588,18 +593,21 @@ class BeamSearchDecoder(object): self._counter = layers.zeros(shape=[1], dtype='int64') self._counter.stop_gradient = True self._type = _DecoderType.BEAM_SEARCH - self._max_len = layers.fill_constant( - shape=[1], dtype='int64', value=max_len) - self._cond = layers.less_than( - x=self._counter, - y=layers.fill_constant( - shape=[1], dtype='int64', value=max_len)) + self._max_len = layers.fill_constant(shape=[1], + dtype='int64', + value=max_len) + self._cond = layers.less_than(x=self._counter, + y=layers.fill_constant(shape=[1], + dtype='int64', + value=max_len)) self._while_op = layers.While(self._cond) self._state_cell = state_cell self._state_cell._enter_decoder(self) self._status = BeamSearchDecoder.BEFORE_BEAM_SEARCH_DECODER - self._zero_idx = layers.fill_constant( - shape=[1], value=0, dtype='int64', force_cpu=True) + self._zero_idx = layers.fill_constant(shape=[1], + value=0, + dtype='int64', + force_cpu=True) self._array_dict = {} self._array_link = [] self._ids_array = None @@ -632,11 +640,13 @@ class BeamSearchDecoder(object): layers.increment(x=self._counter, value=1.0, in_place=True) for value, array in self._array_link: - layers.array_write( - x=value, i=self._counter, array=array) + layers.array_write(x=value, + i=self._counter, + array=array) - layers.less_than( - x=self._counter, y=self._max_len, cond=self._cond) + layers.less_than(x=self._counter, + y=self._max_len, + cond=self._cond) self._status = BeamSearchDecoder.AFTER_BEAM_SEARCH_DECODER self._state_cell._leave_decoder(self) @@ -649,8 +659,11 @@ class BeamSearchDecoder(object): """ Stop the generation process in advance. Could be used as "break". """ - layers.fill_constant( - shape=[1], value=0, dtype='bool', force_cpu=True, out=self._cond) + layers.fill_constant(shape=[1], + value=0, + dtype='bool', + force_cpu=True, + out=self._cond) def decode(self): """ @@ -665,8 +678,8 @@ class BeamSearchDecoder(object): """ with self.block(): prev_ids = self.read_array(init=self._init_ids, is_ids=True) - prev_scores = self.read_array( - init=self._init_scores, is_scores=True) + prev_scores = self.read_array(init=self._init_scores, + is_scores=True) prev_ids_embedding = layers.embedding( input=prev_ids, size=[self._target_dict_dim, self._word_dim], @@ -683,14 +696,14 @@ class BeamSearchDecoder(object): read_var = self.read_array(init=init_var) update_dict[init_var_name] = read_var - feed_var_expanded = layers.sequence_expand(read_var, - prev_scores) + feed_var_expanded = layers.sequence_expand( + read_var, prev_scores) feed_dict[init_var_name] = feed_var_expanded for state_str in self._state_cell._state_names: prev_state = self.state_cell.get_state(state_str) - prev_state_expanded = layers.sequence_expand(prev_state, - prev_scores) + prev_state_expanded = layers.sequence_expand( + prev_state, prev_scores) self.state_cell.set_state(state_str, prev_state_expanded) for i, input_name in enumerate(self._state_cell._inputs): @@ -699,25 +712,23 @@ class BeamSearchDecoder(object): self.state_cell.compute_state(inputs=feed_dict) current_state = self.state_cell.out_state() - current_state_with_lod = layers.lod_reset( - x=current_state, y=prev_scores) + current_state_with_lod = layers.lod_reset(x=current_state, + y=prev_scores) scores = layers.fc(input=current_state_with_lod, size=self._target_dict_dim, act='softmax') topk_scores, topk_indices = layers.topk(scores, k=self._topk_size) - accu_scores = layers.elementwise_add( - x=layers.log(x=topk_scores), - y=layers.reshape( - prev_scores, shape=[-1]), - axis=0) - selected_ids, selected_scores = layers.beam_search( - prev_ids, - prev_scores, - topk_indices, - accu_scores, - self._beam_size, - end_id=1, - level=0) + accu_scores = layers.elementwise_add(x=layers.log(x=topk_scores), + y=layers.reshape(prev_scores, + shape=[-1]), + axis=0) + selected_ids, selected_scores = layers.beam_search(prev_ids, + prev_scores, + topk_indices, + accu_scores, + self._beam_size, + end_id=1, + level=0) with layers.Switch() as switch: with switch.case(layers.is_empty(selected_ids)): @@ -764,11 +775,12 @@ class BeamSearchDecoder(object): name=unique_name.generate('beam_search_decoder_array'), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=init.dtype) - parent_block.append_op( - type='write_to_array', - inputs={'X': init, - 'I': self._zero_idx}, - outputs={'Out': array}) + parent_block.append_op(type='write_to_array', + inputs={ + 'X': init, + 'I': self._zero_idx + }, + outputs={'Out': array}) if is_ids: self._ids_array = array @@ -813,11 +825,10 @@ class BeamSearchDecoder(object): if self._status != BeamSearchDecoder.AFTER_BEAM_SEARCH_DECODER: raise ValueError('Output of BeamSearchDecoder object can ' 'only be visited outside the block.') - return layers.beam_search_decode( - ids=self._ids_array, - scores=self._scores_array, - beam_size=self._beam_size, - end_id=self._end_id) + return layers.beam_search_decode(ids=self._ids_array, + scores=self._scores_array, + beam_size=self._beam_size, + end_id=self._end_id) @property def state_cell(self): diff --git a/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py b/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py index fcc99c07346..6a87bb54d3f 100644 --- a/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py +++ b/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py @@ -18,6 +18,7 @@ __all__ = ["extend_with_decoupled_weight_decay"] class DecoupledWeightDecay(object): + def __init__(self, coeff=0.0, apply_decay_param_fun=None, **kwargs): if not isinstance(coeff, float) and \ not isinstance(coeff, framework.Variable): @@ -75,11 +76,10 @@ class DecoupledWeightDecay(object): startup_program=None, parameter_list=None, no_grad_set=None): - params_grads = self.backward( - loss=loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss=loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) scaled_params = self._scale_parameters(params_grads) for p_grad_sgrad in scaled_params: param, grad, scaled_param = p_grad_sgrad @@ -89,10 +89,9 @@ class DecoupledWeightDecay(object): x=param, y=scaled_param) paddle.fluid.layers.assign(input=updated_param, output=param) - optimize_ops = self.apply_optimize( - loss=loss, - params_grads=params_grads, - startup_program=startup_program) + optimize_ops = self.apply_optimize(loss=loss, + params_grads=params_grads, + startup_program=startup_program) return optimize_ops, params_grads def __str__(self): @@ -146,7 +145,7 @@ def extend_with_decoupled_weight_decay(base_optimizer): """ def __init__(self, weight_decay, apply_decay_param_fun=None, **kwargs): - super(OptimizerWithDecoupledWeightDecay, self).__init__( - weight_decay, apply_decay_param_fun, **kwargs) + super(OptimizerWithDecoupledWeightDecay, + self).__init__(weight_decay, apply_decay_param_fun, **kwargs) return OptimizerWithDecoupledWeightDecay diff --git a/python/paddle/fluid/contrib/layers/metric_op.py b/python/paddle/fluid/contrib/layers/metric_op.py index f76a3283f2f..812f616ef99 100644 --- a/python/paddle/fluid/contrib/layers/metric_op.py +++ b/python/paddle/fluid/contrib/layers/metric_op.py @@ -67,122 +67,136 @@ def ctr_metric_bundle(input, label): assert input.shape == label.shape helper = LayerHelper("ctr_metric_bundle", **locals()) - local_abserr = helper.create_global_variable( - persistable=True, dtype='float32', shape=[1]) - local_sqrerr = helper.create_global_variable( - persistable=True, dtype='float32', shape=[1]) - local_prob = helper.create_global_variable( - persistable=True, dtype='float32', shape=[1]) - local_q = helper.create_global_variable( - persistable=True, dtype='float32', shape=[1]) - local_pos_num = helper.create_global_variable( - persistable=True, dtype='float32', shape=[1]) - local_ins_num = helper.create_global_variable( - persistable=True, dtype='float32', shape=[1]) - - tmp_res_elesub = helper.create_global_variable( - persistable=False, dtype='float32', shape=[-1]) - tmp_res_sigmoid = helper.create_global_variable( - persistable=False, dtype='float32', shape=[-1]) - tmp_ones = helper.create_global_variable( - persistable=False, dtype='float32', shape=[-1]) - - batch_prob = helper.create_global_variable( - persistable=False, dtype='float32', shape=[1]) - batch_abserr = helper.create_global_variable( - persistable=False, dtype='float32', shape=[1]) - batch_sqrerr = helper.create_global_variable( - persistable=False, dtype='float32', shape=[1]) - batch_q = helper.create_global_variable( - persistable=False, dtype='float32', shape=[1]) - batch_pos_num = helper.create_global_variable( - persistable=False, dtype='float32', shape=[1]) - batch_ins_num = helper.create_global_variable( - persistable=False, dtype='float32', shape=[1]) + local_abserr = helper.create_global_variable(persistable=True, + dtype='float32', + shape=[1]) + local_sqrerr = helper.create_global_variable(persistable=True, + dtype='float32', + shape=[1]) + local_prob = helper.create_global_variable(persistable=True, + dtype='float32', + shape=[1]) + local_q = helper.create_global_variable(persistable=True, + dtype='float32', + shape=[1]) + local_pos_num = helper.create_global_variable(persistable=True, + dtype='float32', + shape=[1]) + local_ins_num = helper.create_global_variable(persistable=True, + dtype='float32', + shape=[1]) + + tmp_res_elesub = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[-1]) + tmp_res_sigmoid = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[-1]) + tmp_ones = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[-1]) + + batch_prob = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[1]) + batch_abserr = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[1]) + batch_sqrerr = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[1]) + batch_q = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[1]) + batch_pos_num = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[1]) + batch_ins_num = helper.create_global_variable(persistable=False, + dtype='float32', + shape=[1]) for var in [ local_abserr, batch_abserr, local_sqrerr, batch_sqrerr, local_prob, batch_prob, local_q, batch_q, batch_pos_num, batch_ins_num, local_pos_num, local_ins_num ]: - helper.set_variable_initializer( - var, Constant( - value=0.0, force_cpu=True)) - - helper.append_op( - type="elementwise_sub", - inputs={"X": [input], - "Y": [label]}, - outputs={"Out": [tmp_res_elesub]}) - - helper.append_op( - type="squared_l2_norm", - inputs={"X": [tmp_res_elesub]}, - outputs={"Out": [batch_sqrerr]}) - helper.append_op( - type="elementwise_add", - inputs={"X": [batch_sqrerr], - "Y": [local_sqrerr]}, - outputs={"Out": [local_sqrerr]}) - - helper.append_op( - type="l1_norm", - inputs={"X": [tmp_res_elesub]}, - outputs={"Out": [batch_abserr]}) - helper.append_op( - type="elementwise_add", - inputs={"X": [batch_abserr], - "Y": [local_abserr]}, - outputs={"Out": [local_abserr]}) - - helper.append_op( - type="reduce_sum", inputs={"X": [input]}, - outputs={"Out": [batch_prob]}) - helper.append_op( - type="elementwise_add", - inputs={"X": [batch_prob], - "Y": [local_prob]}, - outputs={"Out": [local_prob]}) - helper.append_op( - type="sigmoid", - inputs={"X": [input]}, - outputs={"Out": [tmp_res_sigmoid]}) - helper.append_op( - type="reduce_sum", - inputs={"X": [tmp_res_sigmoid]}, - outputs={"Out": [batch_q]}) - helper.append_op( - type="elementwise_add", - inputs={"X": [batch_q], - "Y": [local_q]}, - outputs={"Out": [local_q]}) - - helper.append_op( - type="reduce_sum", - inputs={"X": [label]}, - outputs={"Out": [batch_pos_num]}) - helper.append_op( - type="elementwise_add", - inputs={"X": [batch_pos_num], - "Y": [local_pos_num]}, - outputs={"Out": [local_pos_num]}) - - helper.append_op( - type='fill_constant_batch_size_like', - inputs={"Input": label}, - outputs={'Out': [tmp_ones]}, - attrs={ - 'shape': [-1, 1], - 'dtype': tmp_ones.dtype, - 'value': float(1.0), - }) - helper.append_op( - type="reduce_sum", - inputs={"X": [tmp_ones]}, - outputs={"Out": [batch_ins_num]}) - helper.append_op( - type="elementwise_add", - inputs={"X": [batch_ins_num], - "Y": [local_ins_num]}, - outputs={"Out": [local_ins_num]}) + helper.set_variable_initializer(var, Constant(value=0.0, + force_cpu=True)) + + helper.append_op(type="elementwise_sub", + inputs={ + "X": [input], + "Y": [label] + }, + outputs={"Out": [tmp_res_elesub]}) + + helper.append_op(type="squared_l2_norm", + inputs={"X": [tmp_res_elesub]}, + outputs={"Out": [batch_sqrerr]}) + helper.append_op(type="elementwise_add", + inputs={ + "X": [batch_sqrerr], + "Y": [local_sqrerr] + }, + outputs={"Out": [local_sqrerr]}) + + helper.append_op(type="l1_norm", + inputs={"X": [tmp_res_elesub]}, + outputs={"Out": [batch_abserr]}) + helper.append_op(type="elementwise_add", + inputs={ + "X": [batch_abserr], + "Y": [local_abserr] + }, + outputs={"Out": [local_abserr]}) + + helper.append_op(type="reduce_sum", + inputs={"X": [input]}, + outputs={"Out": [batch_prob]}) + helper.append_op(type="elementwise_add", + inputs={ + "X": [batch_prob], + "Y": [local_prob] + }, + outputs={"Out": [local_prob]}) + helper.append_op(type="sigmoid", + inputs={"X": [input]}, + outputs={"Out": [tmp_res_sigmoid]}) + helper.append_op(type="reduce_sum", + inputs={"X": [tmp_res_sigmoid]}, + outputs={"Out": [batch_q]}) + helper.append_op(type="elementwise_add", + inputs={ + "X": [batch_q], + "Y": [local_q] + }, + outputs={"Out": [local_q]}) + + helper.append_op(type="reduce_sum", + inputs={"X": [label]}, + outputs={"Out": [batch_pos_num]}) + helper.append_op(type="elementwise_add", + inputs={ + "X": [batch_pos_num], + "Y": [local_pos_num] + }, + outputs={"Out": [local_pos_num]}) + + helper.append_op(type='fill_constant_batch_size_like', + inputs={"Input": label}, + outputs={'Out': [tmp_ones]}, + attrs={ + 'shape': [-1, 1], + 'dtype': tmp_ones.dtype, + 'value': float(1.0), + }) + helper.append_op(type="reduce_sum", + inputs={"X": [tmp_ones]}, + outputs={"Out": [batch_ins_num]}) + helper.append_op(type="elementwise_add", + inputs={ + "X": [batch_ins_num], + "Y": [local_ins_num] + }, + outputs={"Out": [local_ins_num]}) return local_sqrerr, local_abserr, local_prob, local_q, local_pos_num, local_ins_num diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index c73ea8b5b0e..e71c73b3914 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -97,18 +97,21 @@ def fused_elemwise_activation(x, helper = LayerHelper('fused_elemwise_activation', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) intermediate_out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='fused_elemwise_activation', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out, - 'IntermediateOut': intermediate_out}, - attrs={ - 'axis': axis, - 'scale': scale, - 'save_intermediate_out': save_intermediate_out, - 'functor_list': functor_list - }) + helper.append_op(type='fused_elemwise_activation', + inputs={ + 'X': x, + 'Y': y + }, + outputs={ + 'Out': out, + 'IntermediateOut': intermediate_out + }, + attrs={ + 'axis': axis, + 'scale': scale, + 'save_intermediate_out': save_intermediate_out, + 'functor_list': functor_list + }) return out @@ -202,30 +205,32 @@ def var_conv_2d(input, filter_param = helper.create_parameter( attr=helper.param_attr, shape=filter_shape, - dtype=dtype, ) + dtype=dtype, + ) conv_res = helper.create_variable_for_type_inference(dtype) - tmp_res = helper.create_variable_for_type_inference( - dtype, stop_gradient=True) - - helper.append_op( - type='var_conv_2d', - inputs={ - 'X': input, - 'ROW': row, - 'COLUMN': col, - 'W': filter_param, - }, - outputs={"Out": conv_res, - "Col": tmp_res}, - attrs={ - 'InputChannel': input_channel, - 'OutputChannel': output_channel, - 'StrideH': stride[0], - 'StrideW': stride[1], - 'KernelH': filter_size[0], - 'KernelW': filter_size[1], - }) + tmp_res = helper.create_variable_for_type_inference(dtype, + stop_gradient=True) + + helper.append_op(type='var_conv_2d', + inputs={ + 'X': input, + 'ROW': row, + 'COLUMN': col, + 'W': filter_param, + }, + outputs={ + "Out": conv_res, + "Col": tmp_res + }, + attrs={ + 'InputChannel': input_channel, + 'OutputChannel': output_channel, + 'StrideH': stride[0], + 'StrideW': stride[1], + 'KernelH': filter_size[0], + 'KernelW': filter_size[1], + }) return helper.append_activation(conv_res) @@ -294,25 +299,28 @@ def match_matrix_tensor(x, x_shape = list(x.shape) y_shape = list(y.shape) - assert len(x_shape) == 2 and len(y_shape) == 2 and x_shape[-1] == y_shape[ - -1] + assert len(x_shape) == 2 and len( + y_shape) == 2 and x_shape[-1] == y_shape[-1] weight_shape = [x_shape[-1], channel_num, y_shape[-1]] - w = helper.create_parameter( - attr=helper.param_attr, shape=weight_shape, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=helper.param_attr, + shape=weight_shape, + dtype=dtype, + is_bias=False) mm_res = helper.create_variable_for_type_inference(dtype) - tmp_res = helper.create_variable_for_type_inference( - dtype, stop_gradient=True) - helper.append_op( - type='match_matrix_tensor', - inputs={ - 'X': x, - 'Y': y, - 'W': w, - }, - outputs={"Out": mm_res, - "Tmp": tmp_res}, - attrs={'dim_t': channel_num}) + tmp_res = helper.create_variable_for_type_inference(dtype, + stop_gradient=True) + helper.append_op(type='match_matrix_tensor', + inputs={ + 'X': x, + 'Y': y, + 'W': w, + }, + outputs={ + "Out": mm_res, + "Tmp": tmp_res + }, + attrs={'dim_t': channel_num}) return helper.append_activation(mm_res), tmp_res @@ -370,17 +378,22 @@ def sequence_topk_avg_pooling(input, row, col, topks, channel_num): """ helper = LayerHelper('sequence_topk_avg_pooling', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - pos = helper.create_variable_for_type_inference( - dtype=helper.input_dtype(), stop_gradient=True) - helper.append_op( - type='sequence_topk_avg_pooling', - inputs={'X': input, - 'ROW': row, - 'COLUMN': col}, - outputs={'Out': out, - 'pos': pos}, - attrs={'topks': topks, - 'channel_num': channel_num}) + pos = helper.create_variable_for_type_inference(dtype=helper.input_dtype(), + stop_gradient=True) + helper.append_op(type='sequence_topk_avg_pooling', + inputs={ + 'X': input, + 'ROW': row, + 'COLUMN': col + }, + outputs={ + 'Out': out, + 'pos': pos + }, + attrs={ + 'topks': topks, + 'channel_num': channel_num + }) return out @@ -439,16 +452,21 @@ edge_set(${edge_set_type}) : $ { edge_set_comment } dtype = helper.input_dtype('nodes_vector') feature_size = nodes_vector.shape[2] W_shape = [feature_size, 3, output_size, num_filters] - W = helper.create_parameter( - attr=param_attr, shape=W_shape, dtype=dtype, is_bias=False) + W = helper.create_parameter(attr=param_attr, + shape=W_shape, + dtype=dtype, + is_bias=False) out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='tree_conv', - inputs={'NodesVector': nodes_vector, - 'EdgeSet': edge_set, - 'Filter': W}, - outputs={'Out': out, }, - attrs={'max_depth': max_depth}) + helper.append_op(type='tree_conv', + inputs={ + 'NodesVector': nodes_vector, + 'EdgeSet': edge_set, + 'Filter': W + }, + outputs={ + 'Out': out, + }, + attrs={'max_depth': max_depth}) if helper.bias_attr: pre_activation = helper.append_bias_op(out) else: @@ -505,21 +523,24 @@ def fused_embedding_seq_pool(input, is_sparse=False) """ helper = LayerHelper('fused_embedding_seq_pool', **locals()) - w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=helper.param_attr, + shape=size, + dtype=dtype, + is_bias=False) out = helper.create_variable_for_type_inference(dtype) padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( size[0] + padding_idx) - helper.append_op( - type='fused_embedding_seq_pool', - inputs={'Ids': input, - 'W': w}, - outputs={'Out': out}, - attrs={ - 'is_sparse': is_sparse, - 'combiner': combiner, - 'padding_idx': padding_idx - }) + helper.append_op(type='fused_embedding_seq_pool', + inputs={ + 'Ids': input, + 'W': w + }, + outputs={'Out': out}, + attrs={ + 'is_sparse': is_sparse, + 'combiner': combiner, + 'padding_idx': padding_idx + }) return out @@ -589,17 +610,18 @@ def fused_seqpool_cvm(input, for i in range(len(inputs)) ] - helper.append_op( - type="fused_seqpool_cvm", - inputs={"X": inputs, - "CVM": cvm}, - outputs={"Out": outs}, - attrs={ - "pooltype": pool_type.upper(), - "pad_value": pad_value, - "use_cvm": use_cvm, - "cvm_offset": cvm_offset, - }) + helper.append_op(type="fused_seqpool_cvm", + inputs={ + "X": inputs, + "CVM": cvm + }, + outputs={"Out": outs}, + attrs={ + "pooltype": pool_type.upper(), + "pad_value": pad_value, + "use_cvm": use_cvm, + "cvm_offset": cvm_offset, + }) return outs @@ -710,21 +732,24 @@ def multiclass_nms2(bboxes, output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) index = helper.create_variable_for_type_inference(dtype='int') - helper.append_op( - type="multiclass_nms2", - inputs={'BBoxes': bboxes, - 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized - }, - outputs={'Out': output, - 'Index': index}) + helper.append_op(type="multiclass_nms2", + inputs={ + 'BBoxes': bboxes, + 'Scores': scores + }, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs={ + 'Out': output, + 'Index': index + }) output.stop_gradient = True index.stop_gradient = True @@ -787,22 +812,28 @@ def search_pyramid_hash(input, helper = LayerHelper('search_pyramid_hash', **locals()) w_shape = [space_len + rand_len, 1] - w = helper.create_parameter( - attr=param_attr, shape=w_shape, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=param_attr, + shape=w_shape, + dtype=dtype, + is_bias=False) w.stop_gradient = True input_vars = {'X': input, 'W': w} if white_list_len > 0: wl_shape = [white_list_len, 1] - white_list = helper.create_parameter( - attr=param_attr_wl, shape=wl_shape, dtype=dtype, is_bias=False) + white_list = helper.create_parameter(attr=param_attr_wl, + shape=wl_shape, + dtype=dtype, + is_bias=False) white_list.stop_gradient = True input_vars['WhiteList'] = white_list if black_list_len >= 0: bl_shape = [black_list_len, 1] - black_list = helper.create_parameter( - attr=param_attr_bl, shape=bl_shape, dtype=dtype, is_bias=False) + black_list = helper.create_parameter(attr=param_attr_bl, + shape=bl_shape, + dtype=dtype, + is_bias=False) black_list.stop_gradient = True input_vars['BlackList'] = black_list @@ -825,26 +856,27 @@ def search_pyramid_hash(input, res = helper.create_variable_for_type_inference(dtype) drop_pos = helper.create_variable_for_type_inference(dtype) x_temp_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='pyramid_hash', - inputs=input_vars, - outputs={"Out": res, - "X_Temp_Out": x_temp_out, - 'DropPos': drop_pos}, - attrs={ - 'num_emb': num_emb, - 'space_len': space_len, - 'pyramid_layer': pyramid_layer, - 'rand_len': rand_len, - 'drop_out_percent': drop_out_percent, - 'is_training': is_training, - 'use_filter': use_filter, - 'white_list_len': white_list_len, - 'black_list_len': black_list_len, - 'seed': seed, - 'lr': lr, - 'distribute_update_vars': distribute_update_vars_str - }) + helper.append_op(type='pyramid_hash', + inputs=input_vars, + outputs={ + "Out": res, + "X_Temp_Out": x_temp_out, + 'DropPos': drop_pos + }, + attrs={ + 'num_emb': num_emb, + 'space_len': space_len, + 'pyramid_layer': pyramid_layer, + 'rand_len': rand_len, + 'drop_out_percent': drop_out_percent, + 'is_training': is_training, + 'use_filter': use_filter, + 'white_list_len': white_list_len, + 'black_list_len': black_list_len, + 'seed': seed, + 'lr': lr, + 'distribute_update_vars': distribute_update_vars_str + }) return res @@ -902,14 +934,17 @@ def shuffle_batch(x, seed=None): name=unique_name.generate("shuffle_batch_seed"), dtype="int64", persistable=True) - helper.append_op( - type='shuffle_batch', - inputs={'X': x, - 'Seed': seed}, - outputs={'Out': out, - 'ShuffleIdx': shuffle_idx, - 'SeedOut': seed}, - attrs=op_attrs) + helper.append_op(type='shuffle_batch', + inputs={ + 'X': x, + 'Seed': seed + }, + outputs={ + 'Out': out, + 'ShuffleIdx': shuffle_idx, + 'SeedOut': seed + }, + attrs=op_attrs) return out @@ -968,11 +1003,10 @@ def partial_concat(input, start_index=0, length=-1): attrs = {'start_index': start_index, 'length': length} helper = LayerHelper('partial_concat', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='partial_concat', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs) + helper.append_op(type='partial_concat', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) return out @@ -1025,8 +1059,10 @@ def partial_sum(input, start_index=0, length=-1): attrs['length'] = length helper = LayerHelper('partial_sum', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='partial_sum', inputs=inputs, outputs={'Out': [out]}, attrs=attrs) + helper.append_op(type='partial_sum', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) return out @@ -1155,12 +1191,11 @@ def sparse_embedding(input, check_dtype(dtype, 'dtype', ['float32', 'float64'], 'paddle.static.nn.sparse_embedding') - w = helper.create_parameter( - attr=helper.param_attr, - shape=size, - type=core.VarDesc.VarType.SELECTED_ROWS, - dtype=dtype, - is_bias=False) + w = helper.create_parameter(attr=helper.param_attr, + shape=size, + type=core.VarDesc.VarType.SELECTED_ROWS, + dtype=dtype, + is_bias=False) tmp = helper.create_variable_for_type_inference(dtype) @@ -1185,20 +1220,21 @@ def sparse_embedding(input, ) entry_str = entry._to_attr() - helper.append_op( - type='lookup_table', - inputs={'Ids': input, - 'W': w}, - outputs={'Out': tmp}, - attrs={ - 'padding_idx': padding_idx, - 'is_sparse': True, - 'is_distributed': True, - 'remote_prefetch': True, - 'is_test': is_test, - 'entry': entry_str, - 'table_class': table_class - }) + helper.append_op(type='lookup_table', + inputs={ + 'Ids': input, + 'W': w + }, + outputs={'Out': tmp}, + attrs={ + 'padding_idx': padding_idx, + 'is_sparse': True, + 'is_distributed': True, + 'remote_prefetch': True, + 'is_test': is_test, + 'entry': entry_str, + 'table_class': table_class + }) return tmp @@ -1266,25 +1302,29 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): check_dtype(dtype, 'dtype', ['int32', 'int64'], 'fluid.contrib.layers.tdm_child') c_dtype = convert_np_dtype_to_dtype_(dtype) - tree_info = helper.create_parameter( - attr=helper.param_attr, - shape=[node_nums, 3 + child_nums], - dtype=dtype, - default_initializer=Constant(0)) + tree_info = helper.create_parameter(attr=helper.param_attr, + shape=[node_nums, 3 + child_nums], + dtype=dtype, + default_initializer=Constant(0)) tree_info.stop_gradient = True child = helper.create_variable_for_type_inference(dtype=dtype) leaf_mask = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='tdm_child', - inputs={'X': x, - 'TreeInfo': tree_info}, - outputs={'Child': child, - 'LeafMask': leaf_mask}, - attrs={'child_nums': child_nums, - 'dtype': c_dtype}, - stop_gradient=True) + helper.append_op(type='tdm_child', + inputs={ + 'X': x, + 'TreeInfo': tree_info + }, + outputs={ + 'Child': child, + 'LeafMask': leaf_mask + }, + attrs={ + 'child_nums': child_nums, + 'dtype': c_dtype + }, + stop_gradient=True) return (child, leaf_mask) @@ -1411,23 +1451,21 @@ def tdm_sampler(x, "The number of negative samples must be less than the number of nodes " "in the layer {}, But received negative nums {}, and num of node at layer {} " "is {}, please check your input.".format( - layer_idx, neg_samples_num_list[ - layer_idx], layer_idx, layer_node_num_list[layer_idx])) + layer_idx, neg_samples_num_list[layer_idx], layer_idx, + layer_node_num_list[layer_idx])) assert leaf_node_num < node_nums, "leaf_node_num must be less than total node nums." travel_shape = [leaf_node_num, layer_nums] - travel = helper.create_parameter( - attr=tree_travel_attr, - shape=travel_shape, - dtype=tree_dtype, - default_initializer=Constant(0)) + travel = helper.create_parameter(attr=tree_travel_attr, + shape=travel_shape, + dtype=tree_dtype, + default_initializer=Constant(0)) layer_shape = [node_nums, 1] - layer = helper.create_parameter( - attr=tree_layer_attr, - shape=layer_shape, - dtype=tree_dtype, - default_initializer=Constant(0)) + layer = helper.create_parameter(attr=tree_layer_attr, + shape=layer_shape, + dtype=tree_dtype, + default_initializer=Constant(0)) out = helper.create_variable_for_type_inference(dtype=dtype) out.stop_gradient = True @@ -1438,21 +1476,24 @@ def tdm_sampler(x, mask = helper.create_variable_for_type_inference(dtype=dtype) mask.stop_gradient = True - helper.append_op( - type='tdm_sampler', - inputs={"X": x, - "Travel": travel, - "Layer": layer}, - outputs={'Out': out, - 'Labels': labels, - 'Mask': mask}, - attrs={ - 'neg_samples_num_list': neg_samples_num_list, - 'output_positive': output_positive, - 'layer_offset_lod': tree_layer_offset_lod, - 'seed': seed, - 'dtype': c_dtype - }) + helper.append_op(type='tdm_sampler', + inputs={ + "X": x, + "Travel": travel, + "Layer": layer + }, + outputs={ + 'Out': out, + 'Labels': labels, + 'Mask': mask + }, + attrs={ + 'neg_samples_num_list': neg_samples_num_list, + 'output_positive': output_positive, + 'layer_offset_lod': tree_layer_offset_lod, + 'seed': seed, + 'dtype': c_dtype + }) if output_list: output_list = [] @@ -1466,12 +1507,18 @@ def tdm_sampler(x, for layer_sample_num in neg_samples_num_list: end_offset = start_offset + \ layer_sample_num + positive_flag - layer_samples = slice( - out, axes=[1], starts=[start_offset], ends=[end_offset]) - layer_labels = slice( - labels, axes=[1], starts=[start_offset], ends=[end_offset]) - layer_mask = slice( - mask, axes=[1], starts=[start_offset], ends=[end_offset]) + layer_samples = slice(out, + axes=[1], + starts=[start_offset], + ends=[end_offset]) + layer_labels = slice(labels, + axes=[1], + starts=[start_offset], + ends=[end_offset]) + layer_mask = slice(mask, + axes=[1], + starts=[start_offset], + ends=[end_offset]) layer_samples = reshape(layer_samples, [-1, layer_sample_num + positive_flag, 1]) @@ -1540,28 +1587,32 @@ def rank_attention(input, input_shape = input.shape assert input_shape[1] * max_rank * max_rank == rank_param_shape[0] - rank_param = helper.create_parameter( - attr=rank_param_attr, shape=rank_param_shape, dtype=dtype) + rank_param = helper.create_parameter(attr=rank_param_attr, + shape=rank_param_shape, + dtype=dtype) rank_param.stop_gradient = False output = helper.create_variable_for_type_inference(dtype) - input_help = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) - ins_rank = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) - - helper.append_op( - type="rank_attention", - inputs={ - "X": input, - "RankOffset": rank_offset, - "RankParam": rank_param - }, - outputs={"Out": output, - "InputHelp": input_help, - "InsRank": ins_rank}, - attrs={"MaxRank": max_rank, - "MaxSize": max_size}) + input_help = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) + ins_rank = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) + + helper.append_op(type="rank_attention", + inputs={ + "X": input, + "RankOffset": rank_offset, + "RankParam": rank_param + }, + outputs={ + "Out": output, + "InputHelp": input_help, + "InsRank": ins_rank + }, + attrs={ + "MaxRank": max_rank, + "MaxSize": max_size + }) return output @@ -1614,17 +1665,22 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None): dtype = helper.input_dtype() check_dtype(dtype, 'input', ['float32', 'float64'], 'batch_fc') - w = helper.create_parameter( - attr=param_attr, shape=param_size, dtype=dtype, is_bias=False) - b = helper.create_parameter( - attr=bias_attr, shape=bias_size, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=param_attr, + shape=param_size, + dtype=dtype, + is_bias=False) + b = helper.create_parameter(attr=bias_attr, + shape=bias_size, + dtype=dtype, + is_bias=False) pre_act = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="batch_fc", - inputs={"Input": input, - "W": w, - "Bias": b}, - outputs={"Out": pre_act}) + helper.append_op(type="batch_fc", + inputs={ + "Input": input, + "W": w, + "Bias": b + }, + outputs={"Out": pre_act}) return helper.append_activation(pre_act) @@ -1663,13 +1719,16 @@ def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'): helper.create_variable_for_type_inference(dtype) for i in range(len(inputs)) ] - helper.append_op( - type='pull_box_extended_sparse', - inputs={'Ids': inputs}, - outputs={'Out': outs, - 'OutExtend': outs_extend}, - attrs={'emb_size': size, - 'emb_extended_size': extend_size}) + helper.append_op(type='pull_box_extended_sparse', + inputs={'Ids': inputs}, + outputs={ + 'Out': outs, + 'OutExtend': outs_extend + }, + attrs={ + 'emb_size': size, + 'emb_extended_size': extend_size + }) if len(outs) == 1: return outs[0], outs_extend[0] return outs, outs_extend @@ -1730,11 +1789,10 @@ def bilateral_slice(x, guide, grid, has_offset, name=None): helper = LayerHelper("bilateral_slice", **locals()) out = helper.create_variable_for_type_inference(x.dtype) inputs = {'X': x, 'Guide': guide, 'Grid': grid} - helper.append_op( - type='bilateral_slice', - inputs=inputs, - attrs={'has_offset': has_offset}, - outputs={'Out': out}) + helper.append_op(type='bilateral_slice', + inputs=inputs, + attrs={'has_offset': has_offset}, + outputs={'Out': out}) return out @@ -1800,19 +1858,20 @@ def correlation(x, else: helper = LayerHelper("correlation", **locals()) output = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="correlation", - inputs={"Input1": x, - "Input2": y}, - attrs={ - "pad_size": pad_size, - "kernel_size": kernel_size, - "max_displacement": max_displacement, - "stride1": stride1, - "stride2": stride2, - "corr_type_multiply": corr_type_multiply - }, - outputs={"Output": output}) + helper.append_op(type="correlation", + inputs={ + "Input1": x, + "Input2": y + }, + attrs={ + "pad_size": pad_size, + "kernel_size": kernel_size, + "max_displacement": max_displacement, + "stride1": stride1, + "stride2": stride2, + "corr_type_multiply": corr_type_multiply + }, + outputs={"Output": output}) return output @@ -1939,29 +1998,25 @@ def fused_bn_add_act(x, param_shape = [channel_num] # create parameter - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=bn_param_dtype, - default_initializer=Constant(1.0)) - bias = helper.create_parameter( - attr=helper.bias_attr, - shape=param_shape, - dtype=bn_param_dtype, - is_bias=True) - mean = helper.create_parameter( - attr=ParamAttr( - name=moving_mean_name, initializer=Constant(0.0), trainable=False), - shape=param_shape, - dtype=bn_param_dtype) + scale = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=bn_param_dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=param_shape, + dtype=bn_param_dtype, + is_bias=True) + mean = helper.create_parameter(attr=ParamAttr(name=moving_mean_name, + initializer=Constant(0.0), + trainable=False), + shape=param_shape, + dtype=bn_param_dtype) mean.stop_gradient = True - variance = helper.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False), - shape=param_shape, - dtype=bn_param_dtype) + variance = helper.create_parameter(attr=ParamAttr(name=moving_variance_name, + initializer=Constant(1.0), + trainable=False), + shape=param_shape, + dtype=bn_param_dtype) variance.stop_gradient = True # create output @@ -1969,8 +2024,8 @@ def fused_bn_add_act(x, mean_out = mean # variance and variance out share the same memory variance_out = variance - saved_mean = helper.create_variable_for_type_inference( - dtype=bn_param_dtype, stop_gradient=True) + saved_mean = helper.create_variable_for_type_inference(dtype=bn_param_dtype, + stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) reserve_space = helper.create_variable_for_type_inference( @@ -1995,11 +2050,10 @@ def fused_bn_add_act(x, "ReserveSpace": reserve_space } - helper.append_op( - type="fused_bn_add_activation", - inputs=inputs, - outputs=outputs, - attrs=attrs) + helper.append_op(type="fused_bn_add_activation", + inputs=inputs, + outputs=outputs, + attrs=attrs) return batch_norm_out @@ -2019,21 +2073,25 @@ def pow2_decay_with_linear_warmup(warmup_steps, helper.set_variable_initializer( lr, Constant(value=float(base_lr) / warmup_steps)) - step = helper.create_global_variable( - persistable=True, dtype='int64', shape=[1]) + step = helper.create_global_variable(persistable=True, + dtype='int64', + shape=[1]) helper.set_variable_initializer(step, Constant(value=0)) assert warmup_steps <= total_steps, "warmup_steps cannot be larger than total_steps" - helper.append_op( - type="pow2_decay_with_linear_warmup", - inputs={"LearningRate": lr, - "Step": step}, - outputs={"LearningRateOut": lr, - "StepOut": step}, - attrs={ - "warmup_steps": warmup_steps, - "total_steps": total_steps, - "base_lr": base_lr, - "end_lr": end_lr, - }) + helper.append_op(type="pow2_decay_with_linear_warmup", + inputs={ + "LearningRate": lr, + "Step": step + }, + outputs={ + "LearningRateOut": lr, + "StepOut": step + }, + attrs={ + "warmup_steps": warmup_steps, + "total_steps": total_steps, + "base_lr": base_lr, + "end_lr": end_lr, + }) return lr diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py index a2dd0835b60..0b14948bff9 100644 --- a/python/paddle/fluid/contrib/layers/rnn_impl.py +++ b/python/paddle/fluid/contrib/layers/rnn_impl.py @@ -128,16 +128,14 @@ class BasicGRUUnit(Layer): gate_bias_attr = self._bias_attr candidate_bias_attr = self._bias_attr - self._gate_bias = self.create_parameter( - attr=gate_bias_attr, - shape=[2 * self._hiden_size], - dtype=self._dtype, - is_bias=True) - self._candidate_bias = self.create_parameter( - attr=candidate_bias_attr, - shape=[self._hiden_size], - dtype=self._dtype, - is_bias=True) + self._gate_bias = self.create_parameter(attr=gate_bias_attr, + shape=[2 * self._hiden_size], + dtype=self._dtype, + is_bias=True) + self._candidate_bias = self.create_parameter(attr=candidate_bias_attr, + shape=[self._hiden_size], + dtype=self._dtype, + is_bias=True) def forward(self, input, pre_hidden): concat_input_hidden = layers.concat([input, pre_hidden], 1) @@ -151,8 +149,8 @@ class BasicGRUUnit(Layer): r_hidden = r * pre_hidden - candidate = layers.matmul( - layers.concat([input, r_hidden], 1), self._candidate_weight) + candidate = layers.matmul(layers.concat([input, r_hidden], 1), + self._candidate_weight) candidate = layers.elementwise_add(candidate, self._candidate_bias) c = self._activation(candidate) @@ -304,8 +302,9 @@ def basic_gru(input, mask = None if sequence_length: max_seq_len = layers.shape(input)[0] - mask = layers.sequence_mask( - sequence_length, maxlen=max_seq_len, dtype='float32') + mask = layers.sequence_mask(sequence_length, + maxlen=max_seq_len, + dtype='float32') mask = layers.transpose(mask, [1, 0]) direc_num = 1 @@ -330,10 +329,9 @@ def basic_gru(input, if init_hidden: pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) else: - pre_hidden = rnn.memory( - batch_ref=rnn_input, - shape=[-1, hidden_size], - ref_batch_dim_idx=1) + pre_hidden = rnn.memory(batch_ref=rnn_input, + shape=[-1, hidden_size], + ref_batch_dim_idx=1) new_hidden = unit_list[i](step_input, pre_hidden) @@ -349,7 +347,8 @@ def basic_gru(input, if dropout_prob != None and dropout_prob > 0.0: step_input = layers.dropout( step_input, - dropout_prob=dropout_prob, ) + dropout_prob=dropout_prob, + ) rnn.step_output(step_input) @@ -363,22 +362,26 @@ def basic_gru(input, last_hidden_array.append(last_hidden) last_hidden_output = layers.concat(last_hidden_array, axis=0) - last_hidden_output = layers.reshape( - last_hidden_output, shape=[num_layers, -1, hidden_size]) + last_hidden_output = layers.reshape(last_hidden_output, + shape=[num_layers, -1, hidden_size]) return rnn_output, last_hidden_output # seq_len, batch_size, hidden_size - fw_rnn_out, fw_last_hidden = get_single_direction_output( - input, fw_unit_list, mask, direc_index=0) + fw_rnn_out, fw_last_hidden = get_single_direction_output(input, + fw_unit_list, + mask, + direc_index=0) if bidirectional: bw_input = layers.reverse(input, axis=[0]) bw_mask = None if mask: bw_mask = layers.reverse(mask, axis=[0]) - bw_rnn_out, bw_last_hidden = get_single_direction_output( - bw_input, bw_unit_list, bw_mask, direc_index=1) + bw_rnn_out, bw_last_hidden = get_single_direction_output(bw_input, + bw_unit_list, + bw_mask, + direc_index=1) bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0]) @@ -532,15 +535,14 @@ def basic_lstm(input, else: layer_bias_attr = bias_attr fw_unit_list.append( - BasicLSTMUnit( - new_name, - hidden_size, - param_attr=layer_param_attr, - bias_attr=layer_bias_attr, - gate_activation=gate_activation, - activation=activation, - forget_bias=forget_bias, - dtype=dtype)) + BasicLSTMUnit(new_name, + hidden_size, + param_attr=layer_param_attr, + bias_attr=layer_bias_attr, + gate_activation=gate_activation, + activation=activation, + forget_bias=forget_bias, + dtype=dtype)) if bidirectional: bw_unit_list = [] @@ -557,15 +559,14 @@ def basic_lstm(input, else: layer_bias_attr = param_attr bw_unit_list.append( - BasicLSTMUnit( - new_name, - hidden_size, - param_attr=layer_param_attr, - bias_attr=layer_bias_attr, - gate_activation=gate_activation, - activation=activation, - forget_bias=forget_bias, - dtype=dtype)) + BasicLSTMUnit(new_name, + hidden_size, + param_attr=layer_param_attr, + bias_attr=layer_bias_attr, + gate_activation=gate_activation, + activation=activation, + forget_bias=forget_bias, + dtype=dtype)) if batch_first: input = layers.transpose(input, [1, 0, 2]) @@ -573,8 +574,9 @@ def basic_lstm(input, mask = None if sequence_length: max_seq_len = layers.shape(input)[0] - mask = layers.sequence_mask( - sequence_length, maxlen=max_seq_len, dtype='float32') + mask = layers.sequence_mask(sequence_length, + maxlen=max_seq_len, + dtype='float32') mask = layers.transpose(mask, [1, 0]) @@ -605,10 +607,10 @@ def basic_lstm(input, pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) pre_cell = rnn.memory(init=init_cell[i, direc_index]) else: - pre_hidden = rnn.memory( - batch_ref=rnn_input, shape=[-1, hidden_size]) - pre_cell = rnn.memory( - batch_ref=rnn_input, shape=[-1, hidden_size]) + pre_hidden = rnn.memory(batch_ref=rnn_input, + shape=[-1, hidden_size]) + pre_cell = rnn.memory(batch_ref=rnn_input, + shape=[-1, hidden_size]) new_hidden, new_cell = unit_list[i](step_input, pre_hidden, pre_cell) @@ -650,11 +652,11 @@ def basic_lstm(input, last_cell_array.append(last_cell) last_hidden_output = layers.concat(last_hidden_array, axis=0) - last_hidden_output = layers.reshape( - last_hidden_output, shape=[num_layers, -1, hidden_size]) + last_hidden_output = layers.reshape(last_hidden_output, + shape=[num_layers, -1, hidden_size]) last_cell_output = layers.concat(last_cell_array, axis=0) - last_cell_output = layers.reshape( - last_cell_output, shape=[num_layers, -1, hidden_size]) + last_cell_output = layers.reshape(last_cell_output, + shape=[num_layers, -1, hidden_size]) return rnn_output, last_hidden_output, last_cell_output # seq_len, batch_size, hidden_size @@ -788,8 +790,9 @@ class BasicLSTMUnit(Layer): self._bias_attr = bias_attr self._gate_activation = gate_activation or layers.sigmoid self._activation = activation or layers.tanh - self._forget_bias = layers.fill_constant( - [1], dtype=dtype, value=forget_bias) + self._forget_bias = layers.fill_constant([1], + dtype=dtype, + value=forget_bias) self._forget_bias.stop_gradient = False self._dtype = dtype @@ -802,11 +805,10 @@ class BasicLSTMUnit(Layer): shape=[self._input_size + self._hiden_size, 4 * self._hiden_size], dtype=self._dtype) - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hiden_size], - dtype=self._dtype, - is_bias=True) + self._bias = self.create_parameter(attr=self._bias_attr, + shape=[4 * self._hiden_size], + dtype=self._dtype, + is_bias=True) def forward(self, input, pre_hidden, pre_cell): concat_input_hidden = layers.concat([input, pre_hidden], 1) diff --git a/python/paddle/fluid/contrib/mixed_precision/amp_nn.py b/python/paddle/fluid/contrib/mixed_precision/amp_nn.py index c5b9b9e71f6..62b98e75ea1 100644 --- a/python/paddle/fluid/contrib/mixed_precision/amp_nn.py +++ b/python/paddle/fluid/contrib/mixed_precision/amp_nn.py @@ -51,8 +51,9 @@ def check_finite_and_unscale(x, scale, name=None, float_status=None): 'check_finite_and_unscale') inputs['FloatStatus'] = float_status outputs = {'Out': x, 'FoundInfinite': found_inf} - helper.append_op( - type='check_finite_and_unscale', inputs=inputs, outputs=outputs) + helper.append_op(type='check_finite_and_unscale', + inputs=inputs, + outputs=outputs) return x, found_inf @@ -136,7 +137,9 @@ def update_loss_scaling(x, else: attrs['stop_update'] = stop_update - helper.append_op( - type='update_loss_scaling', inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type='update_loss_scaling', + inputs=inputs, + outputs=outputs, + attrs=attrs) return x diff --git a/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py b/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py index 0fb86593b2d..d2528c0e11e 100644 --- a/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/bf16/amp_utils.py @@ -34,8 +34,9 @@ __all__ = [ "cast_parameters_to_bf16", "convert_float_to_uint16" ] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') _valid_types = [ core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.SELECTED_ROWS, @@ -102,15 +103,14 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): persistable=False, stop_gradient=in_var.stop_gradient) - block._insert_op( - idx, - type="cast", - inputs={"X": in_var}, - outputs={"Out": out_var}, - attrs={ - "in_dtype": in_var.dtype, - "out_dtype": out_var.dtype - }) + block._insert_op(idx, + type="cast", + inputs={"X": in_var}, + outputs={"Out": out_var}, + attrs={ + "in_dtype": in_var.dtype, + "out_dtype": out_var.dtype + }) num_cast_ops += 1 _rename_arg(op, in_var.name, out_var.name) else: @@ -146,18 +146,18 @@ def _insert_cast_post_op(block, op, idx, src_dtype, dest_dtype, target_name, cast_name = target_var.name + '.cast_' + _dtype_to_str(dest_dtype) cast_var = block.vars.get(cast_name) if cast_var is None or cast_var.dtype != dest_dtype: - cast_var = block.create_var( - name=cast_name, - dtype=dest_dtype, - persistable=False, - stop_gradient=target_var.stop_gradient) - block._insert_op( - idx, - type="cast", - inputs={"X": target_var}, - outputs={"Out": cast_var}, - attrs={"in_dtype": target_var.dtype, - "out_dtype": cast_var.dtype}) + cast_var = block.create_var(name=cast_name, + dtype=dest_dtype, + persistable=False, + stop_gradient=target_var.stop_gradient) + block._insert_op(idx, + type="cast", + inputs={"X": target_var}, + outputs={"Out": cast_var}, + attrs={ + "in_dtype": target_var.dtype, + "out_dtype": cast_var.dtype + }) num_cast_ops += 1 op_var_rename_map[block.idx][target_var.name] = cast_var.name @@ -363,8 +363,8 @@ def cast_model_to_bf16(program, out_var.desc.set_dtype(core.VarDesc.VarType.BF16) _logger.debug( - "-- op type: {}, out var name: {}, out var dtype: {} --". - format(op.type, out_var_name, out_var.dtype)) + "-- op type: {}, out var name: {}, out var dtype: {} --" + .format(op.type, out_var_name, out_var.dtype)) for attr_name in ['in_dtype', 'out_dtype', 'dtype']: if op.has_attr(attr_name) and op.attr( attr_name) == core.VarDesc.VarType.FP32: diff --git a/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py b/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py index 4189abda058..41fce89a9e9 100644 --- a/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py +++ b/python/paddle/fluid/contrib/mixed_precision/bf16/decorator.py @@ -104,8 +104,9 @@ class OptimizerWithMixedPrecision(object): if loss.dtype != core.VarDesc.VarType.FP32: loss = loss.astype('float32') - params_grads = self._optimizer.backward( - loss, startup_program, parameter_list, no_grad_set, callbacks) + params_grads = self._optimizer.backward(loss, startup_program, + parameter_list, no_grad_set, + callbacks) return params_grads def amp_init(self, @@ -171,10 +172,9 @@ class OptimizerWithMixedPrecision(object): self._to_bf16_var_names) if test_program is not None: if self._use_pure_bf16: - cast_model_to_bf16( - test_program, - amp_lists=self._amp_lists, - use_bf16_guard=self._use_bf16_guard) + cast_model_to_bf16(test_program, + amp_lists=self._amp_lists, + use_bf16_guard=self._use_bf16_guard) elif use_bf16_test: rewrite_program_bf16(test_program, amp_lists=self._amp_lists) @@ -223,11 +223,10 @@ class OptimizerWithMixedPrecision(object): "The decorated optimizer has its own `minimize` method, but it will not be executed." ) - params_grads = self.backward( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) optimize_ops = self.apply_optimize(loss, startup_program, params_grads) diff --git a/python/paddle/fluid/contrib/mixed_precision/decorator.py b/python/paddle/fluid/contrib/mixed_precision/decorator.py index c3720396e1d..787a4e90a0f 100644 --- a/python/paddle/fluid/contrib/mixed_precision/decorator.py +++ b/python/paddle/fluid/contrib/mixed_precision/decorator.py @@ -171,15 +171,18 @@ class OptimizerWithMixedPrecision(object): # NOTE(zhiqiu): _float_status is only used for NPU. if core.is_compiled_with_npu(): - float_status = paddle.static.data( - name="float_status", shape=[8], dtype='float32') + float_status = paddle.static.data(name="float_status", + shape=[8], + dtype='float32') self._train_program.global_block().append_op( type="alloc_float_status", - outputs={"FloatStatus": float_status}, ) + outputs={"FloatStatus": float_status}, + ) self._train_program.global_block().append_op( type="clear_float_status", inputs={"FloatStatus": float_status}, - outputs={"FloatStatusOut": float_status}, ) + outputs={"FloatStatusOut": float_status}, + ) self._float_status = float_status else: self._float_status = None @@ -202,9 +205,10 @@ class OptimizerWithMixedPrecision(object): else: self._scaled_loss = loss - params_grads = self._optimizer.backward( - self._scaled_loss, startup_program, parameter_list, no_grad_set, - callbacks) + params_grads = self._optimizer.backward(self._scaled_loss, + startup_program, + parameter_list, no_grad_set, + callbacks) if self._supports_check_nan_inf(): self._add_cast_ops_to_startup_program(startup_program) return params_grads @@ -221,16 +225,16 @@ class OptimizerWithMixedPrecision(object): continue tmp = block.create_var(dtype=core.VarDesc.VarType.FP32) - block.append_op( - type='assign', inputs={'X': [name]}, outputs={'Out': [tmp]}) - block.append_op( - type='cast', - inputs={'X': [tmp]}, - outputs={'Out': [name]}, - attrs={ - 'in_dtype': core.VarDesc.VarType.FP32, - 'out_dtype': core.VarDesc.VarType.FP16, - }) + block.append_op(type='assign', + inputs={'X': [name]}, + outputs={'Out': [tmp]}) + block.append_op(type='cast', + inputs={'X': [tmp]}, + outputs={'Out': [name]}, + attrs={ + 'in_dtype': core.VarDesc.VarType.FP32, + 'out_dtype': core.VarDesc.VarType.FP16, + }) self._to_fp16_var_names = None def amp_init(self, @@ -342,13 +346,13 @@ class OptimizerWithMixedPrecision(object): real_optimizer = self._optimizer while hasattr(real_optimizer, "inner_opt"): real_optimizer = real_optimizer.inner_opt - if isinstance(real_optimizer, (paddle.fluid.optimizer.Adam, - paddle.optimizer.AdamW)): + if isinstance(real_optimizer, + (paddle.fluid.optimizer.Adam, paddle.optimizer.AdamW)): # NOTE(zhiqiu): Since found_inf needs to be on cpu in adam op, we # copy it in advance to avoid multiple time copies. with self._train_program._optimized_guard([]): - found_inf = paddle.tensor.creation._memcpy(found_inf, - paddle.CPUPlace()) + found_inf = paddle.tensor.creation._memcpy( + found_inf, paddle.CPUPlace()) real_optimizer._set_auxiliary_var('found_inf', found_inf) elif hasattr(real_optimizer, "_set_auxiliary_var"): real_optimizer._set_auxiliary_var('found_inf', found_inf) @@ -382,7 +386,9 @@ class OptimizerWithMixedPrecision(object): for p, g in params_grads: with self._train_program._optimized_guard([p, g]): _, found_inf = check_finite_and_unscale( - [g, ], + [ + g, + ], self._loss_scaling, name="find_infinite_scale", float_status=self._float_status) @@ -441,45 +447,42 @@ class OptimizerWithMixedPrecision(object): stop_update = False with self._train_program._optimized_guard([]): if fp32_grads: - update_loss_scaling( - fp32_grads, - found_inf, - self._loss_scaling, - self._num_good_steps, - self._num_bad_steps, - self._incr_every_n_steps, - self._decr_every_n_nan_or_inf, - self._incr_ratio, - self._decr_ratio, - stop_update=stop_update, - name="update_loss_scaling_fp32") + update_loss_scaling(fp32_grads, + found_inf, + self._loss_scaling, + self._num_good_steps, + self._num_bad_steps, + self._incr_every_n_steps, + self._decr_every_n_nan_or_inf, + self._incr_ratio, + self._decr_ratio, + stop_update=stop_update, + name="update_loss_scaling_fp32") stop_update = True if fp16_grads: - update_loss_scaling( - fp16_grads, - found_inf, - self._loss_scaling, - self._num_good_steps, - self._num_bad_steps, - self._incr_every_n_steps, - self._decr_every_n_nan_or_inf, - self._incr_ratio, - self._decr_ratio, - stop_update=stop_update, - name="update_loss_scaling_fp16") + update_loss_scaling(fp16_grads, + found_inf, + self._loss_scaling, + self._num_good_steps, + self._num_bad_steps, + self._incr_every_n_steps, + self._decr_every_n_nan_or_inf, + self._incr_ratio, + self._decr_ratio, + stop_update=stop_update, + name="update_loss_scaling_fp16") else: with self._train_program._optimized_guard([]): - update_loss_scaling( - grads, - found_inf, - self._loss_scaling, - self._num_good_steps, - self._num_bad_steps, - self._incr_every_n_steps, - self._decr_every_n_nan_or_inf, - self._incr_ratio, - self._decr_ratio, - name="update_loss_scaling") + update_loss_scaling(grads, + found_inf, + self._loss_scaling, + self._num_good_steps, + self._num_bad_steps, + self._incr_every_n_steps, + self._decr_every_n_nan_or_inf, + self._incr_ratio, + self._decr_ratio, + name="update_loss_scaling") def apply_optimize(self, loss, startup_program, params_grads): program = loss.block.program @@ -514,11 +517,10 @@ class OptimizerWithMixedPrecision(object): "The decorated optimizer has its own `minimize` method, but it will not be executed." ) - scaled_params_grads = self.backward( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + scaled_params_grads = self.backward(loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) optimize_ops = self.apply_optimize(loss, startup_program, scaled_params_grads) diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 0100866806c..b23c94c7e49 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -27,8 +27,9 @@ import numpy as np __all__ = ["fp16_guard", "cast_model_to_fp16", "cast_parameters_to_fp16"] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') _valid_types = [ core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.SELECTED_ROWS, @@ -147,8 +148,8 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): num_cast_ops = 0 for in_name in op.input_names: - if src_dtype == core.VarDesc.VarType.FP32 and _keep_fp32_input(op, - in_name): + if src_dtype == core.VarDesc.VarType.FP32 and _keep_fp32_input( + op, in_name): continue for in_var_name in op.input(in_name): in_var = block._find_var_recursive(in_var_name) @@ -185,17 +186,18 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): persistable=False, stop_gradient=in_var.stop_gradient) - block._insert_op_without_sync( - idx, - type="cast", - inputs={"X": in_var}, - outputs={"Out": out_var}, - attrs={ - "in_dtype": in_var.dtype, - "out_dtype": out_var.dtype, - "op_device": op_device, - "op_role": op.attr("op_role"), - }) + block._insert_op_without_sync(idx, + type="cast", + inputs={"X": in_var}, + outputs={"Out": out_var}, + attrs={ + "in_dtype": in_var.dtype, + "out_dtype": + out_var.dtype, + "op_device": op_device, + "op_role": + op.attr("op_role"), + }) num_cast_ops += 1 _rename_arg(op, in_var.name, out_var.name) else: @@ -231,22 +233,20 @@ def _insert_cast_post_op(block, op, idx, src_dtype, dest_dtype, target_name, cast_name = target_var.name + '.cast_' + _dtype_to_str(dest_dtype) cast_var = block.vars.get(cast_name) if cast_var is None or cast_var.dtype != dest_dtype: - cast_var = block.create_var( - name=cast_name, - dtype=dest_dtype, - persistable=False, - stop_gradient=target_var.stop_gradient) - block._insert_op( - idx, - type="cast", - inputs={"X": target_var}, - outputs={"Out": cast_var}, - attrs={ - "in_dtype": target_var.dtype, - "out_dtype": cast_var.dtype, - "op_device": op.attr("op_device"), - "op_role": op.attr("op_role"), - }) + cast_var = block.create_var(name=cast_name, + dtype=dest_dtype, + persistable=False, + stop_gradient=target_var.stop_gradient) + block._insert_op(idx, + type="cast", + inputs={"X": target_var}, + outputs={"Out": cast_var}, + attrs={ + "in_dtype": target_var.dtype, + "out_dtype": cast_var.dtype, + "op_device": op.attr("op_device"), + "op_role": op.attr("op_role"), + }) num_cast_ops += 1 op_var_rename_map[block.idx][target_var.name] = cast_var.name @@ -474,8 +474,8 @@ def cast_model_to_fp16(program, amp_lists=None, use_fp16_guard=True): out_var.desc.set_dtype(core.VarDesc.VarType.FP16) _logger.debug( - "-- op type: {}, out var name: {}, out var dtype: {} --". - format(op.type, out_var_name, out_var.dtype)) + "-- op type: {}, out var name: {}, out var dtype: {} --" + .format(op.type, out_var_name, out_var.dtype)) if op.has_attr('in_dtype') and op.attr( 'in_dtype') == core.VarDesc.VarType.FP32: op._set_attr('in_dtype', core.VarDesc.VarType.FP16) @@ -696,13 +696,12 @@ def update_role_var_grad(main_prog, params_grads): # add new op in the python and cpp at the same time new_op_desc = block.desc.append_op() new_op_desc.copy_from(op.desc) - new_op = framework.Operator( - block=block, - desc=new_op_desc, - type=None, - inputs=None, - outputs=None, - attrs=None) + new_op = framework.Operator(block=block, + desc=new_op_desc, + type=None, + inputs=None, + outputs=None, + attrs=None) block.ops.append(new_op) op_idx = find_op_index(block.desc, op.desc) if op_idx == -1: diff --git a/python/paddle/fluid/contrib/model_stat.py b/python/paddle/fluid/contrib/model_stat.py index 11ab8800f28..ed6d82671f2 100644 --- a/python/paddle/fluid/contrib/model_stat.py +++ b/python/paddle/fluid/contrib/model_stat.py @@ -200,8 +200,8 @@ def _print_summary(summary_table, total): parmas = total['params'] flops = total['flops'] print(summary_table) - print('Total PARAMs: {}({:.4f}M)'.format( - sum(parmas), sum(parmas) / (10**6))) + print('Total PARAMs: {}({:.4f}M)'.format(sum(parmas), + sum(parmas) / (10**6))) print('Total FLOPs: {}({:.2f}G)'.format(sum(flops), sum(flops) / 10**9)) print( "Notice: \n now supported ops include [Conv, DepthwiseConv, FC(mul), BatchNorm, Pool, Activation(sigmoid, tanh, relu, leaky_relu, prelu)]" diff --git a/python/paddle/fluid/contrib/op_frequence.py b/python/paddle/fluid/contrib/op_frequence.py index 68dd0a946b4..ec9b7b1073d 100644 --- a/python/paddle/fluid/contrib/op_frequence.py +++ b/python/paddle/fluid/contrib/op_frequence.py @@ -96,9 +96,11 @@ def op_freq_statistic(program): else: adj_2_op_freq[op_op] = 1 - uni_op_freq = sorted( - uni_op_freq.items(), key=lambda item: item[1], reverse=True) - adj_2_op_freq = sorted( - adj_2_op_freq.items(), key=lambda item: item[1], reverse=True) + uni_op_freq = sorted(uni_op_freq.items(), + key=lambda item: item[1], + reverse=True) + adj_2_op_freq = sorted(adj_2_op_freq.items(), + key=lambda item: item[1], + reverse=True) return uni_op_freq, adj_2_op_freq diff --git a/python/paddle/fluid/contrib/optimizer.py b/python/paddle/fluid/contrib/optimizer.py index 1b3ec21bf3c..9265198485c 100644 --- a/python/paddle/fluid/contrib/optimizer.py +++ b/python/paddle/fluid/contrib/optimizer.py @@ -118,12 +118,11 @@ class Momentum(Optimizer): assert momentum is not None predicate = lambda regular: isinstance(regular, L2DecayRegularizer) py_regular = None if predicate(regularization) else regularization - super(Momentum, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=py_regular, - grad_clip=grad_clip, - name=name) + super(Momentum, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=py_regular, + grad_clip=grad_clip, + name=name) self.type = "momentum" self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -141,21 +140,19 @@ class Momentum(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -175,10 +172,11 @@ class Momentum(Optimizer): target_param = self._master_weights[ param.name] if find_master else param target_name = target_param.name - if (name not in self._accumulators or - target_name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, target_name)) + if (name not in self._accumulators + or target_name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, target_name)) return self._accumulators[name][target_name] def _create_accumulators(self, block, parameters): @@ -242,11 +240,10 @@ class Momentum(Optimizer): outputs["MasterParamOut"] = master_weight # create the momentum optimize op - momentum_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + momentum_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return momentum_op diff --git a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py index 807d3c6a430..de4c1004086 100644 --- a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py +++ b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py @@ -78,6 +78,7 @@ def quant(x, scale, num_bits): class QuantizeTranspiler(object): + def __init__(self, weight_bits=8, activation_bits=8, @@ -280,19 +281,20 @@ class QuantizeTranspiler(object): raise ValueError("Only support one output, but op %s has" " more than one output." % (op.type)) out_var = block.var(op.output_arg_names[0]) - dequant_var = block.create_var( - name=_dequantized_var_name(out_var.name), - type=out_var.type, - shape=out_var.shape, - dtype=out_var.dtype) + dequant_var = block.create_var(name=_dequantized_var_name( + out_var.name), + type=out_var.type, + shape=out_var.shape, + dtype=out_var.dtype) # insert fake_dequantize_op - dequant_op = block._insert_op( - idx + 1, - type="fake_dequantize_max_abs", - attrs={'max_range': float(max_range)}, - inputs={"X": out_var, - 'Scale': scale_var}, - outputs={"Out": dequant_var}) + dequant_op = block._insert_op(idx + 1, + type="fake_dequantize_max_abs", + attrs={'max_range': float(max_range)}, + inputs={ + "X": out_var, + 'Scale': scale_var + }, + outputs={"Out": dequant_var}) op_out_rename_map[block_id][out_var.name] = dequant_var.name return dequant_var @@ -406,40 +408,37 @@ class QuantizeTranspiler(object): def _insert_quant_abs_max_op(self, block, idx, var, quant_bits): """Insert fake_quantize_abs_max op. """ - quant_var = block.create_var( - name=_quantized_var_name(var.name), - type=var.type, - shape=var.shape, - dtype=var.dtype) - scale = block.create_var( - name=_quantized_scale_name(var.name), - type=var.type, - shape=var.shape, - dtype=var.dtype) - quant_op = block._insert_op( - idx, - type='fake_quantize_abs_max', - attrs={'bit_length': quant_bits}, - inputs={'X': var}, - outputs={'Out': quant_var, - 'OutScale': scale}) + quant_var = block.create_var(name=_quantized_var_name(var.name), + type=var.type, + shape=var.shape, + dtype=var.dtype) + scale = block.create_var(name=_quantized_scale_name(var.name), + type=var.type, + shape=var.shape, + dtype=var.dtype) + quant_op = block._insert_op(idx, + type='fake_quantize_abs_max', + attrs={'bit_length': quant_bits}, + inputs={'X': var}, + outputs={ + 'Out': quant_var, + 'OutScale': scale + }) return quant_var, scale def _insert_quant_range_abs_max_op(self, block, idx, var, quant_bits): """Insert fake_quantize_range_abs_max """ - quant_var = block.create_var( - name=_quantized_var_name(var.name), - type=var.type, - shape=var.shape, - dtype=var.dtype) - scale = self.helper.create_parameter( - attr=ParamAttr( - name=_quantized_scale_name(var.name), - initializer=Constant(0.001), - trainable=False), - shape=[1], - dtype=var.dtype) + quant_var = block.create_var(name=_quantized_var_name(var.name), + type=var.type, + shape=var.shape, + dtype=var.dtype) + scale = self.helper.create_parameter(attr=ParamAttr( + name=_quantized_scale_name(var.name), + initializer=Constant(0.001), + trainable=False), + shape=[1], + dtype=var.dtype) scale.stop_gradient = True ins = {'X': var, 'InScale': scale} @@ -451,8 +450,8 @@ class QuantizeTranspiler(object): persistable=True, dtype=var.dtype, shape=[self.window_size]) - self.helper.set_variable_initializer( - scales, initializer=Constant(value=0)) + self.helper.set_variable_initializer(scales, + initializer=Constant(value=0)) ins['Iter'] = self.global_step outs['OutScales'] = scales @@ -463,12 +462,11 @@ class QuantizeTranspiler(object): 'is_test': self.is_test } - quant_op = block._insert_op( - idx, - type='fake_quantize_range_abs_max', - attrs=attrs, - inputs=ins, - outputs=outs) + quant_op = block._insert_op(idx, + type='fake_quantize_range_abs_max', + attrs=attrs, + inputs=ins, + outputs=outs) return quant_var, scale @@ -476,32 +474,30 @@ class QuantizeTranspiler(object): quant_bits): """Insert fake_quantize_moving_average_abs_max """ - quant_var = block.create_var( - name=_quantized_var_name(var.name), - type=var.type, - shape=var.shape, - dtype=var.dtype) + quant_var = block.create_var(name=_quantized_var_name(var.name), + type=var.type, + shape=var.shape, + dtype=var.dtype) state = self.helper.create_global_variable( name=unique_name.generate('state'), persistable=True, dtype=var.dtype, shape=[1]) - self.helper.set_variable_initializer( - state, initializer=Constant(value=1)) + self.helper.set_variable_initializer(state, + initializer=Constant(value=1)) accum = self.helper.create_global_variable( name=unique_name.generate('accum'), persistable=True, dtype=var.dtype, shape=[1]) - self.helper.set_variable_initializer( - accum, initializer=Constant(value=1)) - scale = self.helper.create_parameter( - attr=ParamAttr( - name=_quantized_scale_name(var.name), - initializer=Constant(0.001), - trainable=False), - shape=[1], - dtype=var.dtype) + self.helper.set_variable_initializer(accum, + initializer=Constant(value=1)) + scale = self.helper.create_parameter(attr=ParamAttr( + name=_quantized_scale_name(var.name), + initializer=Constant(0.001), + trainable=False), + shape=[1], + dtype=var.dtype) scale.stop_gradient = True ins = {'X': var, 'InScale': scale} @@ -518,12 +514,11 @@ class QuantizeTranspiler(object): 'is_test': self.is_test } - quant_op = block._insert_op( - idx, - type='fake_quantize_moving_average_abs_max', - attrs=attrs, - inputs=ins, - outputs=outs) + quant_op = block._insert_op(idx, + type='fake_quantize_moving_average_abs_max', + attrs=attrs, + inputs=ins, + outputs=outs) return quant_var, scale @@ -537,25 +532,25 @@ class QuantizeTranspiler(object): return self._insert_quant_range_abs_max_op(block, idx, var, quant_bits) elif quant_type == 'moving_average_abs_max': - return self._insert_quant_moving_average_abs_max_op(block, idx, var, - quant_bits) + return self._insert_quant_moving_average_abs_max_op( + block, idx, var, quant_bits) def _insert_dequant_op(self, block, idx, var, scale, quant_bits): """ Insert fake_quantize_op """ - dequant_var = block.create_var( - name=_dequantized_var_name(var.name), - type=var.type, - shape=var.shape, - dtype=var.dtype) + dequant_var = block.create_var(name=_dequantized_var_name(var.name), + type=var.type, + shape=var.shape, + dtype=var.dtype) # insert fake_dequantize_op max_range = (1 << (quant_bits - 1)) - 1 - dequant_op = block._insert_op( - idx, - type="fake_dequantize_max_abs", - attrs={'max_range': float(max_range)}, - inputs={"X": var, - 'Scale': scale}, - outputs={"Out": dequant_var}) + dequant_op = block._insert_op(idx, + type="fake_dequantize_max_abs", + attrs={'max_range': float(max_range)}, + inputs={ + "X": var, + 'Scale': scale + }, + outputs={"Out": dequant_var}) return dequant_var diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py index f6908d7e836..be3201044f6 100644 --- a/python/paddle/fluid/contrib/slim/quantization/adaround.py +++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py @@ -22,24 +22,29 @@ import paddle.fluid as fluid from ....log_helper import get_logger from .utils import load_variable_data, set_variable_data, stable_sigmoid, quant_tensor, dequant_tensor, _channelwise_quant_axis1_ops, calculate_quant_cos_error -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') GAMMA = -0.1 ZETA = 1.1 def compute_soft_rounding(alpha_v): - return fluid.layers.clip( - fluid.layers.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, min=0, max=1) + return fluid.layers.clip(fluid.layers.sigmoid(alpha_v) * (ZETA - GAMMA) + + GAMMA, + min=0, + max=1) def compute_soft_rounding_np(alpha_v): - return np.clip( - stable_sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, a_min=0, a_max=1) + return np.clip(stable_sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, + a_min=0, + a_max=1) class AdaRoundLoss(object): + def __init__(self, reg_param=0.01, default_beta_range=(20, 2)): self.default_reg_param = reg_param self.default_beta_range = default_beta_range @@ -48,26 +53,29 @@ class AdaRoundLoss(object): square_cost = fluid.layers.square_error_cost(ada_quantized_output, orig_output) recon_loss = fluid.layers.reduce_mean( - fluid.layers.reduce_sum( - square_cost, dim=-1)) + fluid.layers.reduce_sum(square_cost, dim=-1)) return recon_loss def compute_round_loss(self, alpha_v, warm_start, beta): + def round_loss_fn(): # compute rectified sigmoid of parameter 'alpha' which maps it between zero and one h_v = compute_soft_rounding(alpha_v) # calculate regularization term - which ensures parameter to converge to exactly zeros and ones # at the end of optimization - reg_term = fluid.layers.reduce_sum(-fluid.layers.pow( - fluid.layers.abs(2 * h_v - 1), factor=beta) + 1) + reg_term = fluid.layers.reduce_sum( + -fluid.layers.pow(fluid.layers.abs(2 * h_v - 1), factor=beta) + + 1) # calculate the rounding loss round_loss = self.default_reg_param * reg_term return round_loss - round_loss = fluid.layers.cond(warm_start, lambda: fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.0), round_loss_fn) + round_loss = fluid.layers.cond( + warm_start, lambda: fluid.layers.fill_constant( + shape=[1], dtype='float32', value=0.0), round_loss_fn) return round_loss @@ -80,15 +88,16 @@ class AdaRoundLoss(object): warm_start_end_iter = warm_start * max_iter # compute relative iteration of current iteration - rel_iter = (cur_iter - warm_start_end_iter) / ( - max_iter - warm_start_end_iter) - beta = end_beta + 0.5 * (start_beta - end_beta) * (1 + np.cos(rel_iter * - np.pi)) + rel_iter = (cur_iter - warm_start_end_iter) / (max_iter - + warm_start_end_iter) + beta = end_beta + 0.5 * (start_beta - + end_beta) * (1 + np.cos(rel_iter * np.pi)) return beta class AdaRound(object): + def __init__(self, scale, weight_tensor, @@ -145,10 +154,9 @@ class AdaRound(object): h_alpha = compute_soft_rounding_np(np_alpha) # Scale the tensor - tensor_scale = quant_tensor( - self.ori_weight_tensor.copy(), - self.scale, - quant_axis=self.quant_axis) + tensor_scale = quant_tensor(self.ori_weight_tensor.copy(), + self.scale, + quant_axis=self.quant_axis) weight_tensor = np.floor(tensor_scale) @@ -160,10 +168,10 @@ class AdaRound(object): weight_tensor_quant = self._calculate_quant_weight() # Dequantize the tensor - weight_tensor_dequant = dequant_tensor( - weight_tensor_quant + self.offset, - self.scale, - quant_axis=self.quant_axis) + weight_tensor_dequant = dequant_tensor(weight_tensor_quant + + self.offset, + self.scale, + quant_axis=self.quant_axis) return weight_tensor_dequant def update_final_weights(self): @@ -171,10 +179,10 @@ class AdaRound(object): return weight_tensor_quant def get_loss(self, beta, warm_start, adaround_out_tensor, orig_out_tensor): - round_loss = self.adaround_loss.compute_round_loss(self.alpha_v, - warm_start, beta) - recon_loss = self.adaround_loss.compute_recon_loss(adaround_out_tensor, - orig_out_tensor) + round_loss = self.adaround_loss.compute_round_loss( + self.alpha_v, warm_start, beta) + recon_loss = self.adaround_loss.compute_recon_loss( + adaround_out_tensor, orig_out_tensor) loss = round_loss + recon_loss losses = { 'loss': loss, @@ -226,29 +234,29 @@ def run_adaround(data_loader, with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): # initialize adaround - adaround = AdaRound( - scale, - weight_var_tensor, - scope=scope, - weight_var_name=weight_var_name, - weight_op_type=weight_op_type, - num_iterations=num_iterations) - orig_out_tensor = fluid.data( - name='orig_out_tensor', - shape=fp32_fetch_list.shape, - dtype='float32') - adaround_out_tensor = fluid.data( - name='adaround_out_tensor', - shape=fp32_fetch_list.shape, - dtype='float32') - beta_tensor = fluid.data( - name='beta', shape=[1], dtype='float32') - warm_start_tensor = fluid.data( - name='warm_start', shape=[1], dtype='bool') - - train_fetches_loss = adaround.get_loss( - beta_tensor, warm_start_tensor, adaround_out_tensor, - orig_out_tensor) + adaround = AdaRound(scale, + weight_var_tensor, + scope=scope, + weight_var_name=weight_var_name, + weight_op_type=weight_op_type, + num_iterations=num_iterations) + orig_out_tensor = fluid.data(name='orig_out_tensor', + shape=fp32_fetch_list.shape, + dtype='float32') + adaround_out_tensor = fluid.data(name='adaround_out_tensor', + shape=fp32_fetch_list.shape, + dtype='float32') + beta_tensor = fluid.data(name='beta', + shape=[1], + dtype='float32') + warm_start_tensor = fluid.data(name='warm_start', + shape=[1], + dtype='bool') + + train_fetches_loss = adaround.get_loss(beta_tensor, + warm_start_tensor, + adaround_out_tensor, + orig_out_tensor) optimizer = fluid.optimizer.Adam(learning_rate=lr) loss = train_fetches_loss['loss'] optimizer.minimize(loss) @@ -291,11 +299,9 @@ def run_adaround(data_loader, fetch_list=[v.name for v in train_fetches_loss.values()], return_numpy=True) _logger.info( - "Iter {:d}, lr {:.5f}, loss {:.5f}, loss_round {:.5f}, loss_recon {:.5f}, time {:.5f}s". - format(i, lr, - np.mean(out[0]), - np.mean(out[1]), - np.mean(out[2]), start_time - prev_start_time)) + "Iter {:d}, lr {:.5f}, loss {:.5f}, loss_round {:.5f}, loss_recon {:.5f}, time {:.5f}s" + .format(i, lr, np.mean(out[0]), np.mean(out[1]), + np.mean(out[2]), start_time - prev_start_time)) sys.stdout.flush() if i == num_iterations: break diff --git a/python/paddle/fluid/contrib/slim/quantization/cal_kl_threshold.py b/python/paddle/fluid/contrib/slim/quantization/cal_kl_threshold.py index 390859236d9..69cd3f64061 100644 --- a/python/paddle/fluid/contrib/slim/quantization/cal_kl_threshold.py +++ b/python/paddle/fluid/contrib/slim/quantization/cal_kl_threshold.py @@ -17,8 +17,9 @@ import math import numpy as np from ....log_helper import get_logger -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') __all__ = ['cal_kl_threshold'] @@ -37,8 +38,8 @@ def expand_quantized_bins(quantized_bins, reference_bins): if zero_count == num_merged_bins: avg_bin_ele = 0 else: - avg_bin_ele = quantized_bins[idx] / ( - num_merged_bins - zero_count + 0.0) + avg_bin_ele = quantized_bins[idx] / (num_merged_bins - zero_count + + 0.0) for idx1 in range(j_start, j_end): expanded_quantized_bins[idx1] = (0 if reference_bins[idx1] == 0 else avg_bin_ele) @@ -103,8 +104,8 @@ def cal_kl_threshold(hist, bin_width, bits): j_start = 0 j_end = num_merged_bins for idx in range(quant_range): - candidate_distr_Q_quantized[idx] = sum(candidate_distr_Q[j_start: - j_end]) + candidate_distr_Q_quantized[idx] = sum( + candidate_distr_Q[j_start:j_end]) j_start += num_merged_bins j_end += num_merged_bins if (idx + 1) == quant_range - 1: diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py index 1f7a01f17b0..4ae949bf0fe 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/fuse_utils.py @@ -82,8 +82,8 @@ def _fuse_layers(model, layers_list): layer_list.append(getattr(parent_layer, sub_name)) new_layers = _fuse_func(layer_list) for i, item in enumerate(layers_list): - parent_layer, sub_name = utils.find_parent_layer_and_sub_name(model, - item) + parent_layer, sub_name = utils.find_parent_layer_and_sub_name( + model, item) setattr(parent_layer, sub_name, new_layers[i]) @@ -123,9 +123,10 @@ def _fuse_conv_bn_eval(conv, bn): assert (not (conv.training or bn.training)), "Fusion only for eval!" fused_conv = copy.deepcopy(conv) - fused_weight, fused_bias = _fuse_conv_bn_weights( - fused_conv.weight, fused_conv.bias, bn._mean, bn._variance, bn._epsilon, - bn.weight, bn.bias) + fused_weight, fused_bias = _fuse_conv_bn_weights(fused_conv.weight, + fused_conv.bias, bn._mean, + bn._variance, bn._epsilon, + bn.weight, bn.bias) fused_conv.weight.set_value(fused_weight) if fused_conv.bias is None: fused_conv.bias = paddle.create_parameter( @@ -166,9 +167,11 @@ def _fuse_linear_bn_eval(linear, bn): assert (not (linear.training or bn.training)), "Fusion only for eval!" fused_linear = copy.deepcopy(linear) - fused_weight, fused_bias = _fuse_linear_bn_weights( - fused_linear.weight, fused_linear.bias, bn._mean, bn._variance, - bn._epsilon, bn.weight, bn.bias) + fused_weight, fused_bias = _fuse_linear_bn_weights(fused_linear.weight, + fused_linear.bias, + bn._mean, bn._variance, + bn._epsilon, bn.weight, + bn.bias) fused_linear.weight.set_value(fused_weight) if fused_linear.bias is None: fused_linear.bias = paddle.create_parameter( diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py index 5c595a8d38c..cccc5d90fba 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py @@ -31,8 +31,9 @@ from .ptq_registry import PTQRegistry __all__ = ['ImperativePTQ'] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class ImperativePTQ(object): @@ -155,12 +156,12 @@ class ImperativePTQ(object): model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX - [infer_program, feed_target_names, fetch_targets] = ( - paddle.fluid.io.load_inference_model( - dirname=dirname, - executor=exe, - model_filename=model_filename, - params_filename=params_filename)) + [infer_program, feed_target_names, + fetch_targets] = (paddle.fluid.io.load_inference_model( + dirname=dirname, + executor=exe, + model_filename=model_filename, + params_filename=params_filename)) # Process inference program self._clean_up(infer_program) @@ -168,14 +169,13 @@ class ImperativePTQ(object): self._remove_scale_op(infer_program) # Save final program - paddle.fluid.io.save_inference_model( - dirname=dirname, - feeded_var_names=feed_target_names, - target_vars=fetch_targets, - executor=exe, - main_program=infer_program.clone(), - model_filename=model_filename, - params_filename=params_filename) + paddle.fluid.io.save_inference_model(dirname=dirname, + feeded_var_names=feed_target_names, + target_vars=fetch_targets, + executor=exe, + main_program=infer_program.clone(), + model_filename=model_filename, + params_filename=params_filename) if is_dynamic_mode: paddle.disable_static() @@ -310,8 +310,8 @@ class ImperativePTQ(object): assert hasattr(quant_layer, "_fake_quant_input") assert hasattr(quant_layer._fake_quant_input, "_scale") assert len(in_act_quantizer.thresholds) == 1 - input_threshold = np.array( - [in_act_quantizer.thresholds[0]], dtype=np.float32) + input_threshold = np.array([in_act_quantizer.thresholds[0]], + dtype=np.float32) quant_layer._fake_quant_input._scale.set_value(input_threshold) assert hasattr(quant_layer, "_fake_quant_weight") @@ -319,11 +319,11 @@ class ImperativePTQ(object): assert len(wt_quantizer.thresholds) == 1 weight_threshold = wt_quantizer.thresholds[0] if isinstance(weight_threshold, list): - weight_threshold = np.array( - weight_threshold, dtype=np.float32) + weight_threshold = np.array(weight_threshold, + dtype=np.float32) else: - weight_threshold = np.array( - [weight_threshold], dtype=np.float32) + weight_threshold = np.array([weight_threshold], + dtype=np.float32) quant_layer._fake_quant_weight._scale.set_value( weight_threshold) @@ -356,8 +356,8 @@ class ImperativePTQ(object): attr_name = previous_op.output('OutScale')[0] in_threshold = utils.load_variable_data(scope, attr_name) in_threshold = utils.fp_numpy_to_naive(in_threshold) - argname, index = utils._get_input_name_index(op, - in_var_name) + argname, index = utils._get_input_name_index( + op, in_var_name) op._set_attr(argname + str(index) + "_threshold", in_threshold) op._set_attr("with_quant_attr", True) @@ -417,7 +417,8 @@ class ImperativePTQ(object): old_attr_name = argname + str(index) + "_threshold" argname, index = utils._get_output_name_index( - next_op, next_op.output("Out")[0]) + next_op, + next_op.output("Out")[0]) new_attr_name = argname + str(index) + "_threshold" _helper(op, next_op, old_attr_name, new_attr_name) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py index 63b35788717..0988f24a183 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py @@ -60,18 +60,20 @@ def combine_abs_max_and_hist(tensor, origin_max, origin_hist, bins, if new_max == 0.0: return origin_max, origin_hist elif origin_max == 0.0: - new_hist, _ = np.histogram( - paddle.abs(tensor).numpy(), range=(0, new_max), bins=bins) + new_hist, _ = np.histogram(paddle.abs(tensor).numpy(), + range=(0, new_max), + bins=bins) new_hist = new_hist.astype(np.float32) return new_max, new_hist elif new_max <= origin_max: - new_hist, _ = np.histogram( - paddle.abs(tensor).numpy(), range=(0, origin_max), bins=bins) + new_hist, _ = np.histogram(paddle.abs(tensor).numpy(), + range=(0, origin_max), + bins=bins) new_hist = new_hist.astype(np.float32) new_hist += origin_hist return origin_max, new_hist else: - # bin_width = origin_max / (bins * upsample_bins) + # bin_width = origin_max / (bins * upsample_bins) # = new_max / (bins * downsample_bins) bin_width = origin_max / (bins * upsample_bins) downsampe_bins = int(math.ceil(new_max / (bins * bin_width))) @@ -87,8 +89,9 @@ def combine_abs_max_and_hist(tensor, origin_max, origin_hist, bins, sampled_hist = (cumsumed_hist - shift_cumsumed_hist) / upsample_bins sampled_hist = sampled_hist.astype(np.float32) - new_hist, _ = np.histogram( - paddle.abs(tensor).numpy(), range=(0, new_max), bins=bins) + new_hist, _ = np.histogram(paddle.abs(tensor).numpy(), + range=(0, new_max), + bins=bins) new_hist = new_hist.astype(np.float32) new_hist += sampled_hist @@ -193,10 +196,9 @@ class BaseHistQuantizer(BaseQuantizer): if abs_max_vals[idx] == 0.0: self.hists.append(None) else: - hist, _ = np.histogram( - paddle.abs(tensor).numpy(), - range=(0., abs_max_vals[idx]), - bins=self.bins) + hist, _ = np.histogram(paddle.abs(tensor).numpy(), + range=(0., abs_max_vals[idx]), + bins=self.bins) hist = hist.astype(np.float32) self.hists.append(hist) else: @@ -228,6 +230,7 @@ class HistQuantizer(BaseHistQuantizer): self.hist_percent = hist_percent def cal_thresholds(self): + def _helper(abs_max, hist, percent): assert hist.ndim == 1 and percent < 1.0 hist = hist / np.sum(hist, dtype=np.float64) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index d5c3d9ab82d..29f4707124c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -37,8 +37,9 @@ from . import fuse_utils __all__ = ['ImperativeQuantAware'] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class ImperativeQuantAware(object): @@ -46,19 +47,18 @@ class ImperativeQuantAware(object): Applying quantization aware training (QAT) to the dgraph model. """ - def __init__( - self, - quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - weight_bits=8, - activation_bits=8, - moving_rate=0.9, - fuse_conv_bn=False, - weight_preprocess_layer=None, - act_preprocess_layer=None, - weight_quantize_layer=None, - act_quantize_layer=None): + def __init__(self, + quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + fuse_conv_bn=False, + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantAware. @@ -280,18 +280,17 @@ class ImperativeQuantizeInputs(object): logic both for activation inputs and weight inputs. """ - def __init__( - self, - quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - weight_bits=8, - activation_bits=8, - moving_rate=0.9, - weight_preprocess_layer=None, - act_preprocess_layer=None, - weight_quantize_layer=None, - act_quantize_layer=None): + def __init__(self, + quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantizeInputs. @@ -300,9 +299,8 @@ class ImperativeQuantizeInputs(object): super(ImperativeQuantizeInputs, self).__init__() self._quantizable_layer_type = tuple( - utils.layer_name_map[layer] - if layer in utils.layer_name_map else layer - for layer in quantizable_layer_type) + utils.layer_name_map[layer] if layer in + utils.layer_name_map else layer for layer in quantizable_layer_type) for layer in self._quantizable_layer_type: assert not isinstance(layer, str) \ and layer in utils.fake_quant_input_layers, \ @@ -496,12 +494,11 @@ class ImperativeQuantizeOutputs(object): model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX - [infer_program, feed_target_names, fetch_targets] = ( - load_inference_model( - dirname=dirname, - executor=exe, - model_filename=model_filename, - params_filename=params_filename)) + [infer_program, feed_target_names, fetch_targets + ] = (load_inference_model(dirname=dirname, + executor=exe, + model_filename=model_filename, + params_filename=params_filename)) self._gather_scales(infer_program, scope, fetch_targets) @@ -528,15 +525,14 @@ class ImperativeQuantizeOutputs(object): clip_extra = True - save_inference_model( - dirname=dirname, - feeded_var_names=feed_target_names, - target_vars=fetch_targets, - executor=exe, - main_program=infer_program.clone(), - model_filename=model_filename, - params_filename=params_filename, - clip_extra=clip_extra) + save_inference_model(dirname=dirname, + feeded_var_names=feed_target_names, + target_vars=fetch_targets, + executor=exe, + main_program=infer_program.clone(), + model_filename=model_filename, + params_filename=params_filename, + clip_extra=clip_extra) if is_dynamic_mode: paddle.disable_static() diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index 758928f8daf..1ac6eec80d9 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -68,7 +68,7 @@ fake_quant_wrap_layers = [ quant_layers.QuantizedConv2DTranspose ] -# The weight format of these layers is Cin * Cout * H * W +# The weight format of these layers is Cin * Cout * H * W spec_channel_axis_layers = [paddle.nn.Conv2DTranspose, paddle.nn.Linear] weight_op_types = [ diff --git a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py index d4c34efb7b9..5c16e0fe273 100644 --- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py +++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py @@ -32,8 +32,9 @@ from . import utils __all__ = ['PostTrainingQuantization', 'WeightQuantization'] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') def _all_persistable_var_names(program): @@ -84,7 +85,8 @@ def _apply_pass(scope, cpp_graph.set_not_owned('__param_scope__', scope) if attrs: assert attr_values and len(attrs) == len( - attr_values), "Different number of pass attributes and their values." + attr_values + ), "Different number of pass attributes and their values." for attr, value in zip(attrs, attr_values): ir_pass.set(attr, value) ir_pass.apply(cpp_graph) @@ -440,18 +442,17 @@ class PostTrainingQuantization(object): scale_dict = self._quantized_var_threshold else: scale_dict = self._quantized_threshold - run_adaround( - self._data_loader, - self._program, - self._fetch_list, - self._executor, - self._scope, - self._place, - self._quantized_op_pairs, - self._weight_op_pairs, - scale_dict, - num_iterations=self._batch_nums, - lr=self._learning_rate) + run_adaround(self._data_loader, + self._program, + self._fetch_list, + self._executor, + self._scope, + self._place, + self._quantized_op_pairs, + self._weight_op_pairs, + scale_dict, + num_iterations=self._batch_nums, + lr=self._learning_rate) def save_quantized_model(self, save_model_path, @@ -472,15 +473,14 @@ class PostTrainingQuantization(object): None ''' clip_extra = True if self._onnx_format else False - io.save_inference_model( - dirname=save_model_path, - model_filename=model_filename, - params_filename=params_filename, - feeded_var_names=self._feed_list, - target_vars=self._fetch_list, - executor=self._executor, - main_program=self._program, - clip_extra=clip_extra) + io.save_inference_model(dirname=save_model_path, + model_filename=model_filename, + params_filename=params_filename, + feeded_var_names=self._feed_list, + target_vars=self._fetch_list, + executor=self._executor, + main_program=self._program, + clip_extra=clip_extra) _logger.info("The quantized model is saved in " + save_model_path) def _load_model_data(self): @@ -502,17 +502,18 @@ class PostTrainingQuantization(object): if self._data_loader is not None: return - self._data_loader = io.DataLoader.from_generator( - feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True) + self._data_loader = io.DataLoader.from_generator(feed_list=feed_vars, + capacity=3 * + self._batch_size, + iterable=True) if self._sample_generator is not None: - self._data_loader.set_sample_generator( - self._sample_generator, - batch_size=self._batch_size, - drop_last=True, - places=self._place) + self._data_loader.set_sample_generator(self._sample_generator, + batch_size=self._batch_size, + drop_last=True, + places=self._place) elif self._batch_generator is not None: - self._data_loader.set_batch_generator( - self._batch_generator, places=self._place) + self._data_loader.set_batch_generator(self._batch_generator, + places=self._place) def _optimize_fp32_model(self): ''' @@ -563,12 +564,10 @@ class PostTrainingQuantization(object): " is not supported for quantization.") # For quantized ops, sample inputs and outputs if op_type in self._quantizable_op_type: - collect_var_name( - utils._get_op_input_var_names(op), - persistable_var_names, op_type) - collect_var_name( - utils._get_op_output_var_names(op), - persistable_var_names, op_type) + collect_var_name(utils._get_op_input_var_names(op), + persistable_var_names, op_type) + collect_var_name(utils._get_op_output_var_names(op), + persistable_var_names, op_type) # collect quanted op output var name for out_var_name in utils._get_op_output_var_names(op): for in_var_name in utils._get_op_input_var_names(op): @@ -577,9 +576,8 @@ class PostTrainingQuantization(object): in_var_name] = out_var_name # For other op, only sample output scale elif op_type in self._out_scale_op_list: - collect_var_name( - utils._get_op_output_var_names(op), - persistable_var_names, op_type) + collect_var_name(utils._get_op_output_var_names(op), + persistable_var_names, op_type) def _set_activation_persistable(self): ''' @@ -823,8 +821,9 @@ class PostTrainingQuantization(object): min_value = float(np.min(var_tensor)) max_value = float(np.max(var_tensor)) if var_name not in self._sampling_act_abs_min_max: - self._sampling_act_abs_min_max[ - var_name] = [min_value, max_value] + self._sampling_act_abs_min_max[var_name] = [ + min_value, max_value + ] else: if min_value < self._sampling_act_abs_min_max[var_name][0]: self._sampling_act_abs_min_max[var_name][0] = min_value @@ -839,8 +838,9 @@ class PostTrainingQuantization(object): if var_name not in self._sampling_act_histogram: min_val = self._sampling_act_abs_min_max[var_name][0] max_val = self._sampling_act_abs_min_max[var_name][1] - hist, hist_edeges = np.histogram( - [], bins=self._histogram_bins, range=(min_val, max_val)) + hist, hist_edeges = np.histogram([], + bins=self._histogram_bins, + range=(min_val, max_val)) self._sampling_act_histogram[var_name] = [hist, hist_edeges] def _calculate_kl_hist_threshold(self): @@ -944,18 +944,11 @@ class PostTrainingQuantization(object): else: scale_dict = self._quantized_threshold for key, val in scale_dict.items(): - utils.set_variable_data( - self._scope, - self._place, - key + ".scale", - np.array( - [val], dtype=np.float32)) - utils.set_variable_data( - self._scope, - self._place, - key + ".quant_dequant.scale", - np.array( - [val], dtype=np.float32)) + utils.set_variable_data(self._scope, self._place, key + ".scale", + np.array([val], dtype=np.float32)) + utils.set_variable_data(self._scope, self._place, + key + ".quant_dequant.scale", + np.array([val], dtype=np.float32)) if not self._onnx_format: # apply QuantizationFreezePass, and obtain the final quant model @@ -1031,8 +1024,8 @@ class PostTrainingQuantization(object): for block_id in range(len(self._program.blocks)): for op in self._program.blocks[block_id].ops: - if op.type in ( - self._quantizable_op_type + self._out_scale_op_list): + if op.type in (self._quantizable_op_type + + self._out_scale_op_list): out_var_names = utils._get_op_output_var_names(op) for var_name in out_var_names: analysis_and_save_info(op, var_name) @@ -1168,10 +1161,11 @@ class WeightQuantization(object): if generate_test_model: test_model_dir = os.path.join(save_model_dir, "test_model") - self._quantize_weight_to_int( - test_model_dir, save_model_filename, save_params_filename, - quantizable_op_type, weight_bits, weight_quantize_type, True, - threshold_rate) + self._quantize_weight_to_int(test_model_dir, save_model_filename, + save_params_filename, + quantizable_op_type, weight_bits, + weight_quantize_type, True, + threshold_rate) def convert_weight_to_fp16(self, save_model_dir): """ @@ -1209,16 +1203,17 @@ class WeightQuantization(object): if self._params_filename is not None: save_var_map[new_var.name] = new_var else: - save_file_path = os.path.join( - os.path.normpath(save_model_dir), new_var.name) - save_block.append_op( - type='save', - inputs={'X': [new_var]}, - outputs={}, - attrs={ - 'file_path': os.path.normpath(save_file_path), - 'save_as_fp16': True - }) + save_file_path = os.path.join(os.path.normpath(save_model_dir), + new_var.name) + save_block.append_op(type='save', + inputs={'X': [new_var]}, + outputs={}, + attrs={ + 'file_path': + os.path.normpath(save_file_path), + 'save_as_fp16': + True + }) if self._params_filename is not None: save_var_list = [] @@ -1230,14 +1225,15 @@ class WeightQuantization(object): name=unique_name.generate("saved_params")) saved_params_var.desc.set_persistable(True) - save_path = os.path.join( - os.path.normpath(save_model_dir), self._params_filename) - save_block.append_op( - type='save_combine', - inputs={'X': save_var_list}, - outputs={'Y': saved_params_var}, - attrs={'file_path': save_path, - 'save_as_fp16': True}) + save_path = os.path.join(os.path.normpath(save_model_dir), + self._params_filename) + save_block.append_op(type='save_combine', + inputs={'X': save_var_list}, + outputs={'Y': saved_params_var}, + attrs={ + 'file_path': save_path, + 'save_as_fp16': True + }) save_program._sync_with_cpp() exe.run(save_program) @@ -1286,14 +1282,13 @@ class WeightQuantization(object): self._weight_channel_wise_abs_max_quantization( scope, place, weight_bits, op, var_name, for_test) - io.save_inference_model( - dirname=save_model_dir, - feeded_var_names=feed_list, - target_vars=fetch_list, - executor=exe, - main_program=program, - model_filename=save_model_filename, - params_filename=save_params_filename) + io.save_inference_model(dirname=save_model_dir, + feeded_var_names=feed_list, + target_vars=fetch_list, + executor=exe, + main_program=program, + model_filename=save_model_filename, + params_filename=save_params_filename) def _weight_abs_max_quantization(self, scope, place, weight_bits, threshold_rate, op, var_name, for_test): @@ -1332,8 +1327,9 @@ class WeightQuantization(object): op._set_attr(var_name + "_quant_scale", [scale]) # Save as list op._set_attr("with_quant_attr", True) - def _weight_channel_wise_abs_max_quantization( - self, scope, place, weight_bits, op, var_name, for_test): + def _weight_channel_wise_abs_max_quantization(self, scope, place, + weight_bits, op, var_name, + for_test): ''' Use channel_wise_abs_max method to quantize weight. ''' @@ -1383,8 +1379,8 @@ class WeightQuantization(object): and quantize the weights. ''' scales = [] - quantized_weight_data = np.zeros_like( - weight_data, dtype=save_weight_dtype) + quantized_weight_data = np.zeros_like(weight_data, + dtype=save_weight_dtype) channel_num = weight_data.shape[0] for i in range(channel_num): scale = np.max(np.abs(weight_data[i])) / quantize_range @@ -1397,8 +1393,8 @@ class WeightQuantization(object): ''' For conv2d and depthwise_conv2d, dequantize the weights to fp32. ''' - dequantized_weight_data = np.zeros_like( - quantized_weight_data, dtype=np.float32) + dequantized_weight_data = np.zeros_like(quantized_weight_data, + dtype=np.float32) for i in range(len(scales)): dequantized_weight_data[i] = \ (quantized_weight_data[i] * scales[i]).astype(np.float32) @@ -1411,8 +1407,8 @@ class WeightQuantization(object): and quantize the weights. ''' scales = [] - quantized_weight_data = np.zeros_like( - weight_data, dtype=save_weight_dtype) + quantized_weight_data = np.zeros_like(weight_data, + dtype=save_weight_dtype) channel_num = weight_data.shape[-1] for i in range(channel_num): scale = np.max(np.abs(weight_data[:, i])) / quantize_range @@ -1425,8 +1421,8 @@ class WeightQuantization(object): ''' For mul, dequantize the weights to fp32. ''' - dequantized_weight_data = np.zeros_like( - quantized_weight_data, dtype=np.float32) + dequantized_weight_data = np.zeros_like(quantized_weight_data, + dtype=np.float32) for i in range(len(scales)): dequantized_weight_data[:, i] = \ (quantized_weight_data[:, i] * scales[i]).astype(np.float32) @@ -1434,8 +1430,9 @@ class WeightQuantization(object): def _calculate_threshold(self, input, threshold_rate, histogram_bins=5000): input_abs = np.abs(input) - hist, hist_edeges = np.histogram( - input_abs, bins=histogram_bins, range=(0, np.max(input_abs))) + hist, hist_edeges = np.histogram(input_abs, + bins=histogram_bins, + range=(0, np.max(input_abs))) hist = hist / float(sum(hist)) hist_sum = 0 hist_index = 0 diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py index 348d9149435..220016bd653 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py @@ -131,9 +131,9 @@ class Quant2Int8MkldnnPass(object): def _is_any_of_op_types_quantized(self, op_types, graph): return self._is_any_of_op_types_in_graph( - op_types, graph) and (self._is_quantizing_all_ops() or - any(op_type in self._ops_to_quantize - for op_type in op_types)) + op_types, graph) and (self._is_quantizing_all_ops() + or any(op_type in self._ops_to_quantize + for op_type in op_types)) def _is_conv_quantized(self, graph): return self._is_any_of_op_types_quantized(self._conv_ops, graph) @@ -188,8 +188,9 @@ class Quant2Int8MkldnnPass(object): scale_name = op.input("InScale")[0] output_name = op.output("Out")[0] # Gather new weight scales after folding batchnorm in convolution - scale = np.array(1.0 / self._load_param( - self._scope, scale_name)[0]).astype(np.float64) + scale = np.array( + 1.0 / self._load_param(self._scope, scale_name)[0]).astype( + np.float64) scale[scale == np.Inf] = 0.0 lod_tensor = self._convert_scale2tensor(scale) use_unsigned_int = False @@ -206,13 +207,13 @@ class Quant2Int8MkldnnPass(object): _max_range = np.array(op.op().attr("max_range")).astype( np.float64) self._weight_thresholds[input_name] = np.array( - self._s8_max * self._s8_max / - _max_range).astype(np.float64) + self._s8_max * self._s8_max / _max_range).astype( + np.float64) else: scale_name = op.input("Scales")[0] self._weight_thresholds[input_name] = np.array( - self._load_param(self._scope, scale_name)).astype( - np.float64) + self._load_param(self._scope, + scale_name)).astype(np.float64) return graph @@ -228,12 +229,14 @@ class Quant2Int8MkldnnPass(object): use_unsigned_int = False for output_name in op.op().outputs(): for out_var_name in op.op().output(output_name): - self._add_scale_for_vars( - [out_var_name], use_unsigned_int, scale_lod_tensor) + self._add_scale_for_vars([out_var_name], + use_unsigned_int, + scale_lod_tensor) return graph def _propagate_scales(self, graph): + def _update_scale_op_in_scale(op, input, output): unsigned, tensor = self._var_quant_scales[output] scale = np.array(tensor) * op.op().attr("scale") @@ -299,7 +302,8 @@ class Quant2Int8MkldnnPass(object): fake_quant_out = graph._find_node_by_name(op.outputs, op.output("Out")[0]) fake_quant_out_scale = graph._find_node_by_name( - op.outputs, op.output("OutScale")[0]) + op.outputs, + op.output("OutScale")[0]) next_ops = fake_quant_out.outputs for next_op in next_ops: @@ -332,6 +336,7 @@ class Quant2Int8MkldnnPass(object): ]) def _dequantize_weights(self, graph): + def _is_int8_weights(op_node, weight_name): weight_var_name = op_node.input(weight_name)[0] if self._scope.find_var(weight_var_name) is None: @@ -371,8 +376,8 @@ class Quant2Int8MkldnnPass(object): def _update_activations(self, graph): for op in graph.all_op_nodes(): - if op.name() in self._conv_ops and not op.op().has_attr( - "fuse_activation"): + if op.name( + ) in self._conv_ops and not op.op().has_attr("fuse_activation"): activation = "" if op.op().has_attr("fuse_relu") and op.op().attr("fuse_relu"): activation = "relu" @@ -463,8 +468,9 @@ class Quant2Int8MkldnnPass(object): ir_pass.set(attr, value) ir_pass.apply(cpp_graph) if self._debug: - graph.draw('.', '{}_{}_{}'.format(self._pass_group, self._pass_idx, - pass_name), graph.all_op_nodes()) + graph.draw( + '.', '{}_{}_{}'.format(self._pass_group, self._pass_idx, + pass_name), graph.all_op_nodes()) self._remove_unused_var_nodes(graph) self._pass_idx += 1 return graph @@ -506,16 +512,17 @@ class Quant2Int8MkldnnPass(object): return graph def _compute_weight_scales(self, graph): + def _compute_var_scales(ops, w_name, axis): for op in graph.all_op_nodes(): if op.op().type() in ops: weight_var_name = op.input(w_name)[0] weights = np.array( self._load_param(self._scope, weight_var_name)) - scales = 1.0 / np.amax( - np.abs(weights.reshape(weights.shape[0], -1)).astype( + scales = 1.0 / np.amax(np.abs( + weights.reshape(weights.shape[0], -1)).astype( np.float64), - axis=axis) + axis=axis) scales[scales == np.Inf] = 0.0 lod_tensor = self._convert_scale2tensor(scales) @@ -528,20 +535,18 @@ class Quant2Int8MkldnnPass(object): wh = np.array(self._load_param(self._scope, wh_var_name)) OC = wh.shape[0] scale_ur = 1.0 / np.max(np.abs( - np.concatenate( - [ - wx[:, :2 * OC], wh.flatten()[:2 * OC * OC].reshape(OC, 2 - * OC) - ], - axis=0)), + np.concatenate([ + wx[:, :2 * OC], + wh.flatten()[:2 * OC * OC].reshape(OC, 2 * OC) + ], + axis=0)), axis=0) scale_o = 1.0 / np.max(np.abs( - np.concatenate( - [ - wx[:, 2 * OC:], wh.flatten()[2 * OC * OC:].reshape(OC, - OC) - ], - axis=0)), + np.concatenate([ + wx[:, 2 * OC:], + wh.flatten()[2 * OC * OC:].reshape(OC, OC) + ], + axis=0)), axis=0) gru_weights_scale = np.concatenate([scale_ur, @@ -569,8 +574,7 @@ class Quant2Int8MkldnnPass(object): wh = np.array(self._load_param(self._scope, wh_var_name)) lstm_weights_scale = 1.0 / np.max( - np.abs(np.concatenate( - [wx[:, :], wh[:, :]], axis=0)), axis=0) + np.abs(np.concatenate([wx[:, :], wh[:, :]], axis=0)), axis=0) lstm_weights_scale = lstm_weights_scale.astype('float') return self._convert_scale2tensor(lstm_weights_scale) @@ -606,6 +610,7 @@ class Quant2Int8MkldnnPass(object): return self._op_ids_to_skip def _update_relu_output_scales(self, graph): + def _set_unsigned_scale(graph, ops, op_out_name, predicate): ''' Sets the type of an output scale of a passed op type(s) to 'unsigned int8' if the @@ -615,8 +620,8 @@ class Quant2Int8MkldnnPass(object): for op in graph.all_op_nodes(): if op.name() in ops: out_name = op.output(op_out_name)[0] - if out_name in self._var_quant_scales and predicate(op.op( - )): + if out_name in self._var_quant_scales and predicate( + op.op()): is_unsigned, tensor = self._var_quant_scales[out_name] if is_unsigned is False: # If the variable is signed, it means that the scales for this var @@ -651,15 +656,17 @@ class Quant2Int8MkldnnPass(object): graph = self._apply_pass(graph, 'scale_matmul_fuse_pass') graph = self._apply_pass(graph, 'reshape_transpose_matmul_mkldnn_fuse_pass') - graph = self._apply_pass(graph, - 'reshape_transpose_matmul_v2_mkldnn_fuse_pass') + graph = self._apply_pass( + graph, 'reshape_transpose_matmul_v2_mkldnn_fuse_pass') graph = self._apply_pass( graph, 'cpu_quantize_placement_pass', ['quantize_enabled_op_types', 'quantize_excluded_op_ids'], - [self._ops_to_quantize, self._find_avg_pooling_ids(graph)]) + [self._ops_to_quantize, + self._find_avg_pooling_ids(graph)]) graph = self._apply_pass( graph, 'cpu_quantize_pass', ['quant_var_scales', 'data_layout'], - [self._var_quant_scales, self._get_data_layout(graph)]) + [self._var_quant_scales, + self._get_data_layout(graph)]) graph = self._apply_pass(graph, 'cpu_quantize_squash_pass') graph = self._apply_pass(graph, 'int8_scale_calculation_mkldnn_pass') return graph diff --git a/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py index 2ed06a48c29..d56aeb79f3f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py @@ -103,8 +103,8 @@ class QuantInt8MkldnnPass(object): if op_node.name() in self._dequantize_type: input_name = op_node.input("X")[0] scale_name = op_node.input("Scale")[0] - self._in_scale[input_name] = self._load_param(self._scope, - scale_name)[0] + self._in_scale[input_name] = self._load_param( + self._scope, scale_name)[0] self._max_range[input_name] = op_node.op().attr("max_range") self._new_output[input_name] = op_node.output("Out")[0] @@ -113,8 +113,8 @@ class QuantInt8MkldnnPass(object): attrs = op_node.op().attr_names() input_name = op_node.input("X")[0] scale_name = op_node.input("InScale")[0] - self._in_scale[input_name] = self._load_param(self._scope, - scale_name)[0] + self._in_scale[input_name] = self._load_param( + self._scope, scale_name)[0] # self._max_range[input_name] = op_node.op().attr("max_range") self._new_output[input_name] = op_node.output("Out")[0] @@ -142,8 +142,8 @@ class QuantInt8MkldnnPass(object): output_name = op_node.output("Output")[0] # Convert int8 range weights to fp32 range weights weight = self._load_param(self._scope, weight_name) - w_fp32 = np.divide( - np.multiply(weight, self._s8_max), self._max_range[output_name]) + w_fp32 = np.divide(np.multiply(weight, self._s8_max), + self._max_range[output_name]) w_fp32 = w_fp32.reshape(weight.shape) self._restore_var(weight_name, w_fp32) input_var_node = graph._find_node_by_name(op_node.inputs, @@ -158,12 +158,13 @@ class QuantInt8MkldnnPass(object): for name in op_node.op().attr_names() } - conv_op_node = graph.create_op_node( - op_type='conv2d', - attrs=attrs, - inputs={'Input': input_var_node, - 'Filter': weight_var_node}, - outputs={'Output': output_var_node}) + conv_op_node = graph.create_op_node(op_type='conv2d', + attrs=attrs, + inputs={ + 'Input': input_var_node, + 'Filter': weight_var_node + }, + outputs={'Output': output_var_node}) # Based on the Quant's scales to calculate the scales of MKL-DNN INT8 conv2d scale_in = self._s8_max / self._in_scale[output_name] @@ -186,8 +187,8 @@ class QuantInt8MkldnnPass(object): output_name = op_node.output("Out")[0] # Convert int8 range weights to fp32 range weights weight = self._load_param(self._scope, weight_name) - w_fp32 = np.divide( - np.multiply(weight, self._s8_max), self._max_range[output_name]) + w_fp32 = np.divide(np.multiply(weight, self._s8_max), + self._max_range[output_name]) w_fp32 = w_fp32.reshape(weight.shape) self._restore_var(weight_name, w_fp32) input_var_node = graph._find_node_by_name(op_node.inputs, @@ -202,12 +203,13 @@ class QuantInt8MkldnnPass(object): for name in op_node.op().attr_names() } - mul_op_node = graph.create_op_node( - op_type='mul', - attrs=attrs, - inputs={'X': input_var_node, - 'Y': weight_var_node}, - outputs={'Out': output_var_node}) + mul_op_node = graph.create_op_node(op_type='mul', + attrs=attrs, + inputs={ + 'X': input_var_node, + 'Y': weight_var_node + }, + outputs={'Out': output_var_node}) # Based on the Quant's scales to calculate MKL-DNN INT8 mul's scales scale_in = self._s8_max / self._in_scale[output_name] @@ -233,7 +235,8 @@ class QuantInt8MkldnnPass(object): output_var_node = graph._find_node_by_name(op_node.outputs, op_node.output("Out")[0]) scale_in = self._s8_max / self._load_param( - self._scope, op_node.input("InScale")[0])[0] + self._scope, + op_node.input("InScale")[0])[0] quant_op_node = graph.create_op_node( op_type='quantize', attrs={ diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 17ddedd9d30..eaf9bed3d6f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -294,17 +294,18 @@ class QuantizationTransformPass(object): else False # if var node is weight and weight_preprocess_func is not None, - # will insert weight preprocess func + # will insert weight preprocess func # to preorocess weight before quantization - # if var node is activation and act_preprocess_func is not None, - # will insert activation preprocess func + # if var node is activation and act_preprocess_func is not None, + # will insert activation preprocess func # to preorocess activation before quantization if is_weight and self._weight_preprocess_func is not None: var_node = self._insert_func( graph, self._weight_preprocess_func, var_node, op) elif not is_weight and self._act_preprocess_func is not None: - var_node = self._insert_func( - graph, self._act_preprocess_func, var_node, op) + var_node = self._insert_func(graph, + self._act_preprocess_func, + var_node, op) # if var node is weight and weight_quantize_func is not None, # will insert weight quantize func to quantize and dequantize weight @@ -396,12 +397,8 @@ class QuantizationTransformPass(object): var_type=core.VarDesc.VarType.LOD_TENSOR, shape=[1], var_dtype=core.VarDesc.VarType.INT64) - _init_var_node( - global_step_in, - np.zeros( - [1], dtype='int64'), - self._scope, - self._place) + _init_var_node(global_step_in, np.zeros([1], dtype='int64'), + self._scope, self._place) global_step_out = graph.create_var_node_from_desc( global_step_in.var()) # The attribute of `op_role` is needed by ParallelExecutor. @@ -450,12 +447,9 @@ class QuantizationTransformPass(object): var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scale_var_node, - np.zeros( - scale_var_node.shape(), dtype=data_type), - self._scope, - self._place) + _init_var_node(scale_var_node, + np.zeros(scale_var_node.shape(), dtype=data_type), + self._scope, self._place) quant_op_node = graph.create_op_node( op_type='fake_quantize_abs_max', attrs={ @@ -463,8 +457,10 @@ class QuantizationTransformPass(object): 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, inputs={'X': var_node}, - outputs={'Out': quant_var_node, - 'OutScale': scale_var_node}) + outputs={ + 'Out': quant_var_node, + 'OutScale': scale_var_node + }) graph.link_to(var_node, quant_op_node) graph.link_to(quant_op_node, quant_var_node) graph.link_to(quant_op_node, scale_var_node) @@ -489,12 +485,9 @@ class QuantizationTransformPass(object): var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scale_in_node, - np.array( - [_SCALE_DEFAULT_VALUE], dtype=data_type), - self._scope, - self._place) + _init_var_node(scale_in_node, + np.array([_SCALE_DEFAULT_VALUE], dtype=data_type), + self._scope, self._place) scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) inputs = {'X': var_node, 'InScale': scale_in_node} @@ -509,12 +502,9 @@ class QuantizationTransformPass(object): var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scales_node, - np.zeros( - [self._window_size], dtype=data_type), - self._scope, - self._place) + _init_var_node(scales_node, + np.zeros([self._window_size], dtype=data_type), + self._scope, self._place) inputs['Iter'] = self._global_step outputs['OutScales'] = scales_node @@ -557,12 +547,9 @@ class QuantizationTransformPass(object): var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scale_in_node, - np.array( - [_SCALE_DEFAULT_VALUE], dtype=data_type), - self._scope, - self._place) + _init_var_node(scale_in_node, + np.array([_SCALE_DEFAULT_VALUE], dtype=data_type), + self._scope, self._place) scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) ins = {'X': var_node, 'InScale': scale_in_node} @@ -575,27 +562,19 @@ class QuantizationTransformPass(object): shape=[1]) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - state_in_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) + _init_var_node(state_in_node, np.ones([1], dtype=data_type), + self._scope, self._place) accum_in_node = graph.create_persistable_node( name=unique_name.generate('accum'), var_type=core.VarDesc.VarType.LOD_TENSOR, var_dtype=var_node.dtype(), shape=[1]) - _init_var_node( - accum_in_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) - state_out_node = graph.create_var_node_from_desc(state_in_node.var( - )) - accum_out_node = graph.create_var_node_from_desc(accum_in_node.var( - )) + _init_var_node(accum_in_node, np.ones([1], dtype=data_type), + self._scope, self._place) + state_out_node = graph.create_var_node_from_desc( + state_in_node.var()) + accum_out_node = graph.create_var_node_from_desc( + accum_in_node.var()) ins['InState'] = state_in_node ins['InAccum'] = accum_in_node @@ -647,12 +626,9 @@ class QuantizationTransformPass(object): var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scale_var_node, - np.zeros( - scale_var_node.shape(), dtype=data_type), - self._scope, - self._place) + _init_var_node(scale_var_node, + np.zeros(scale_var_node.shape(), dtype=data_type), + self._scope, self._place) quant_op_node = graph.create_op_node( op_type='fake_channel_wise_quantize_abs_max', attrs={ @@ -662,8 +638,10 @@ class QuantizationTransformPass(object): 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, inputs={'X': var_node}, - outputs={'Out': quant_var_node, - 'OutScale': scale_var_node}) + outputs={ + 'Out': quant_var_node, + 'OutScale': scale_var_node + }) graph.link_to(var_node, quant_op_node) graph.link_to(quant_op_node, quant_var_node) graph.link_to(quant_op_node, scale_var_node) @@ -687,8 +665,10 @@ class QuantizationTransformPass(object): 'max_range': float(max_range), 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, - inputs={'X': var_node, - 'Scale': scale_var_node}, + inputs={ + 'X': var_node, + 'Scale': scale_var_node + }, outputs={'Out': dequant_var_node}) graph.link_to(var_node, dequant_op_node) graph.link_to(scale_var_node, dequant_op_node) @@ -714,8 +694,10 @@ class QuantizationTransformPass(object): 'quant_axis': quant_axis, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, - inputs={'X': var_node, - 'Scales': scale_var_nodes}, + inputs={ + 'X': var_node, + 'Scales': scale_var_nodes + }, outputs={'Out': dequant_var_node}) graph.link_to(var_node, dequant_op_node) for scale_n in scale_var_nodes: @@ -803,10 +785,9 @@ class QuantizationTransformPass(object): startup_program = Program() with program_guard(tmp_program, startup_program): with unique_name.guard(var_node.name() + "_"): - in_node = data( - var_node.name() + '_tmp_input', - shape=var_node.shape(), - dtype='float32') + in_node = data(var_node.name() + '_tmp_input', + shape=var_node.shape(), + dtype='float32') out_node = func(in_node) graph.out_node_mapping_table[out_node.name] = var_node.name() # loss shape must be 1 when minimize @@ -819,8 +800,8 @@ class QuantizationTransformPass(object): with scope_guard(self._scope): self._exe.run(startup_program) - tmp_graph = IrGraph( - core.Graph(tmp_program.desc), for_test=graph._for_test) + tmp_graph = IrGraph(core.Graph(tmp_program.desc), + for_test=graph._for_test) in_node = tmp_graph._find_node_by_name(tmp_graph.all_var_nodes(), in_node.name) out_node = tmp_graph._find_node_by_name(tmp_graph.all_var_nodes(), @@ -861,9 +842,11 @@ class QuantizationTransformPass(object): # find op's gradient op, such as conv2d_grad op_grad = op_out_grad.outputs[0] target_out_grad_node = graph._find_node_by_name( - graph.all_var_nodes(), target_out_node.name() + "@GRAD") + graph.all_var_nodes(), + target_out_node.name() + "@GRAD") in_node_grad = graph._find_node_by_name( - graph.all_var_nodes(), target_in_node.name() + "@GRAD") + graph.all_var_nodes(), + target_in_node.name() + "@GRAD") in_node_grad_op = in_node_grad.inputs # update op_grad's input graph.update_input_link(var_node, target_out_node, op_grad) @@ -936,6 +919,7 @@ class QuantizationTransformPass(object): class QuantizationFreezePass(object): + def __init__(self, scope, place, @@ -1008,7 +992,8 @@ class QuantizationFreezePass(object): input_arg_name] if input_arg_name not in persistable_vars: scale_v = graph._find_node_by_name( - op_node.outputs, op_node.output('OutScale')[0]) + op_node.outputs, + op_node.output('OutScale')[0]) self._quant_var_scale_map[input_arg_name] = scale_v else: # Obtain scale from OutScale var node @@ -1063,8 +1048,8 @@ class QuantizationFreezePass(object): if self._weight_quantize_type == 'channel_wise_abs_max': quant_axis = 1 if op_node.name() in \ utils._channelwise_quant_axis1_ops else 0 - self._insert_post_channel_dequant_op(graph, op_node, - quant_axis) + self._insert_post_channel_dequant_op( + graph, op_node, quant_axis) else: self._insert_post_dequant_op(graph, op_node) @@ -1119,7 +1104,8 @@ class QuantizationFreezePass(object): " more than one output." % (op_node.name())) output_var_node = graph._find_node_by_name( - op_node.outputs, op_node.output_arg_names()[0]) + op_node.outputs, + op_node.output_arg_names()[0]) weight_scale_node = graph.create_persistable_node( name=unique_name.generate('channel_scale'), var_type=core.VarDesc.VarType.LOD_TENSOR, @@ -1127,9 +1113,8 @@ class QuantizationFreezePass(object): var_dtype=output_var_node.dtype()) data_type = 'float64' if output_var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node(weight_scale_node, - channel_scale.astype(data_type), self._scope, - self._place) + _init_var_node(weight_scale_node, channel_scale.astype(data_type), + self._scope, self._place) dequant_var_node = graph.create_var_node( name=self._dequantized_var_name(output_var_node.name()), var_type=output_var_node.type(), @@ -1192,7 +1177,8 @@ class QuantizationFreezePass(object): " more than one output." % (op_node.name())) output_var_node = graph._find_node_by_name( - op_node.outputs, op_node.output_arg_names()[0]) + op_node.outputs, + op_node.output_arg_names()[0]) dequant_var_node = graph.create_var_node( name=self._dequantized_var_name(output_var_node.name()), var_type=output_var_node.type(), @@ -1204,8 +1190,10 @@ class QuantizationFreezePass(object): 'max_range': float(max_range), 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, - inputs={'X': output_var_node, - 'Scale': scale_var_node}, + inputs={ + 'X': output_var_node, + 'Scale': scale_var_node + }, outputs={'Out': dequant_var_node}) graph.link_to(output_var_node, dequant_op_node) graph.link_to(scale_var_node, dequant_op_node) @@ -1264,6 +1252,7 @@ class QuantizationFreezePass(object): class ConvertToInt8Pass(object): + def __init__(self, scope, place, quantizable_op_type=None): """ Convert the weights into int8_t type. @@ -1303,8 +1292,8 @@ class ConvertToInt8Pass(object): name = var_node.name() if name in persistable_vars: if name not in input_map: - int8_var_node = self._convert_to_int8(graph, - var_node) + int8_var_node = self._convert_to_int8( + graph, var_node) input_map[name] = int8_var_node graph.update_input_link(var_node, input_map[name], op_node) @@ -1352,6 +1341,7 @@ class ConvertToInt8Pass(object): class TransformForMobilePass(object): + def __init__(self): """ This pass is used to convert the frozen graph for paddle-mobile execution. @@ -1394,6 +1384,7 @@ class TransformForMobilePass(object): class OutScaleForTrainingPass(object): + def __init__(self, scope=None, place=None, moving_rate=0.9): """ This pass is used for calculating output scales of some operators. @@ -1441,12 +1432,8 @@ class OutScaleForTrainingPass(object): var_dtype=in_node.dtype()) data_type = 'float64' if in_node.dtype() \ == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scale_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) + _init_var_node(scale_node, np.ones([1], dtype=data_type), + self._scope, self._place) ins = {'X': in_node} outs = {'OutScale': scale_node} if not self._is_test: @@ -1455,23 +1442,15 @@ class OutScaleForTrainingPass(object): var_type=core.VarDesc.VarType.LOD_TENSOR, var_dtype=in_node.dtype(), shape=[1]) - _init_var_node( - state_in_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) + _init_var_node(state_in_node, np.ones([1], dtype=data_type), + self._scope, self._place) accum_in_node = graph.create_persistable_node( name=unique_name.generate('scale_accum@'), var_type=core.VarDesc.VarType.LOD_TENSOR, var_dtype=in_node.dtype(), shape=[1]) - _init_var_node( - accum_in_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) + _init_var_node(accum_in_node, np.ones([1], dtype=data_type), + self._scope, self._place) state_out_node = graph.create_var_node_from_desc( state_in_node.var()) accum_out_node = graph.create_var_node_from_desc( @@ -1509,6 +1488,7 @@ class OutScaleForTrainingPass(object): class OutScaleForInferencePass(object): + def __init__(self, scope=None): """ This pass is used for setting output scales of some operators. @@ -1550,8 +1530,8 @@ class OutScaleForInferencePass(object): # For compatibility, we save output threshold by two methods. op_node.op()._set_attr("out_threshold", float(scale_value)) - argname_index = utils._get_output_name_index(op_node, - var_name) + argname_index = utils._get_output_name_index( + op_node, var_name) assert argname_index is not None, \ var_name + " is not the output of the op" op_node.op()._set_attr(argname_index[0] + str(argname_index[1]) \ @@ -1680,8 +1660,8 @@ class AddQuantDequantPass(object): if op_node.name() in self._quantizable_grad_op_type: for input_name in op_node.input_arg_names(): if input_name in dequantized_vars_map: - in_node = graph._find_node_by_name(op_node.inputs, - input_name) + in_node = graph._find_node_by_name( + op_node.inputs, input_name) dequant_var_node = dequantized_vars_map[input_name] graph.update_input_link(in_node, dequant_var_node, op_node) @@ -1693,11 +1673,11 @@ class AddQuantDequantPass(object): quant_bits): """Insert fake_quantize_dequantize_moving_average_abs_max op. """ - quant_var_node = graph.create_var_node( - name="{}.quant_dequant".format(var_node.name()), - var_type=var_node.type(), - shape=var_node.shape(), - var_dtype=var_node.dtype()) + quant_var_node = graph.create_var_node(name="{}.quant_dequant".format( + var_node.name()), + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) scale_in_node = graph.create_persistable_node( name="{}.quant_dequant.scale".format(var_node.name()), var_type=core.VarDesc.VarType.LOD_TENSOR, @@ -1705,12 +1685,9 @@ class AddQuantDequantPass(object): var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - scale_in_node, - np.array( - [_SCALE_DEFAULT_VALUE], dtype=data_type), - self._scope, - self._place) + _init_var_node(scale_in_node, + np.array([_SCALE_DEFAULT_VALUE], dtype=data_type), + self._scope, self._place) scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) ins = {'X': var_node, 'InScale': scale_in_node} @@ -1723,27 +1700,19 @@ class AddQuantDequantPass(object): shape=[1]) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' - _init_var_node( - state_in_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) + _init_var_node(state_in_node, np.ones([1], dtype=data_type), + self._scope, self._place) accum_in_node = graph.create_persistable_node( name=unique_name.generate('quant_dequant.accum'), var_type=core.VarDesc.VarType.LOD_TENSOR, var_dtype=var_node.dtype(), shape=[1]) - _init_var_node( - accum_in_node, - np.ones( - [1], dtype=data_type), - self._scope, - self._place) - state_out_node = graph.create_var_node_from_desc(state_in_node.var( - )) - accum_out_node = graph.create_var_node_from_desc(accum_in_node.var( - )) + _init_var_node(accum_in_node, np.ones([1], dtype=data_type), + self._scope, self._place) + state_out_node = graph.create_var_node_from_desc( + state_in_node.var()) + accum_out_node = graph.create_var_node_from_desc( + accum_in_node.var()) ins['InState'] = state_in_node ins['InAccum'] = accum_in_node @@ -1810,11 +1779,11 @@ class InsertQuantizeLinear(object): def insert_quant_op(self, graph, var_node): assert var_node.is_var(), '{} is not a var'.format(var_node.name()) - quant_var_node = graph.create_var_node( - name=self._quantized_var_name(var_node.name()), - var_type=var_node.type(), - shape=var_node.shape(), - var_dtype=var_node.dtype()) + quant_var_node = graph.create_var_node(name=self._quantized_var_name( + var_node.name()), + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) data_type = 'float64' if var_node.dtype( ) == core.VarDesc.VarType.FP64 else 'float32' if self.channel_wise: @@ -1840,12 +1809,9 @@ class InsertQuantizeLinear(object): var_type=core.VarDesc.VarType.LOD_TENSOR, shape=scale_var_node.shape(), var_dtype=core.VarDesc.VarType.INT32) - _init_var_node( - zero_point_node, - np.zeros( - scale_var_node.shape(), dtype="int32"), - self._scope, - self._place) + _init_var_node(zero_point_node, + np.zeros(scale_var_node.shape(), dtype="int32"), + self._scope, self._place) inputs = {"X": var_node, "Scale": scale_var_node} if zero_point_node is not None: @@ -1856,15 +1822,14 @@ class InsertQuantizeLinear(object): if not self._is_test: attrs["is_test"] = self._is_test attrs["op_role"] = core.op_proto_and_checker_maker.OpRole.Forward - scale_out_node = graph.create_var_node_from_desc(scale_var_node.var( - )) + scale_out_node = graph.create_var_node_from_desc( + scale_var_node.var()) outputs["OutScale"] = scale_out_node - quant_op_node = graph.create_op_node( - op_type="quantize_linear", - attrs=attrs, - inputs=inputs, - outputs=outputs) + quant_op_node = graph.create_op_node(op_type="quantize_linear", + attrs=attrs, + inputs=inputs, + outputs=outputs) graph.link_to(var_node, quant_op_node) graph.link_to(scale_var_node, quant_op_node) @@ -1891,12 +1856,9 @@ class InsertQuantizeLinear(object): var_type=core.VarDesc.VarType.LOD_TENSOR, shape=scale_var_node.shape(), var_dtype=core.VarDesc.VarType.INT32) - _init_var_node( - zero_point_node, - np.zeros( - scale_var_node.shape(), dtype="int32"), - self._scope, - self._place) + _init_var_node(zero_point_node, + np.zeros(scale_var_node.shape(), dtype="int32"), + self._scope, self._place) inputs = {"X": var_node, "Scale": scale_var_node} if zero_point_node is not None: @@ -1906,11 +1868,10 @@ class InsertQuantizeLinear(object): if not self._is_test: attrs["op_role"] = core.op_proto_and_checker_maker.OpRole.Forward - quant_op_node = graph.create_op_node( - op_type="dequantize_linear", - attrs=attrs, - inputs=inputs, - outputs={"Y": dequant_var_node}) + quant_op_node = graph.create_op_node(op_type="dequantize_linear", + attrs=attrs, + inputs=inputs, + outputs={"Y": dequant_var_node}) graph.link_to(var_node, quant_op_node) graph.link_to(scale_var_node, quant_op_node) @@ -2122,17 +2083,19 @@ class QuantizationTransformPassV2(object): else False # if var node is weight and weight_preprocess_func is not None, - # will insert weight preprocess func + # will insert weight preprocess func # to preorocess weight before quantization - # if var node is activation and act_preprocess_func is not None, - # will insert activation preprocess func + # if var node is activation and act_preprocess_func is not None, + # will insert activation preprocess func # to preorocess activation before quantization if is_weight and self._weight_preprocess_func is not None: - var_node = self._insert_func( - graph, self._weight_preprocess_func, var_node, op) + var_node = self._insert_func(graph, + self._weight_preprocess_func, + var_node, op) elif not is_weight and self._act_preprocess_func is not None: - var_node = self._insert_func( - graph, self._act_preprocess_func, var_node, op) + var_node = self._insert_func(graph, + self._act_preprocess_func, + var_node, op) # if var node is weight and weight_quantize_func is not None, # will insert weight quantize func to quantize and dequantize weight @@ -2144,8 +2107,9 @@ class QuantizationTransformPassV2(object): processed_vars.append(name) continue elif not is_weight and self._act_quantize_func is not None: - target_out_node = self._insert_func( - graph, self._act_quantize_func, var_node, op) + target_out_node = self._insert_func(graph, + self._act_quantize_func, + var_node, op) processed_vars.append(name) continue @@ -2389,8 +2353,8 @@ class AddQuantDequantPassV2(object): if op_node.name() in self._quantizable_grad_op_type: for input_name in op_node.input_arg_names(): if input_name in dequantized_vars_map: - in_node = graph._find_node_by_name(op_node.inputs, - input_name) + in_node = graph._find_node_by_name( + op_node.inputs, input_name) dequant_var_node = dequantized_vars_map[input_name] graph.update_input_link(in_node, dequant_var_node, op_node) @@ -2466,43 +2430,42 @@ class ReplaceFakeQuantDequantPass(object): var_type=core.VarDesc.VarType.LOD_TENSOR, shape=scale_node.shape(), var_dtype=core.VarDesc.VarType.INT32) - _init_var_node( - zero_point_node, - np.zeros( - scale_node.shape(), dtype="int32"), - self._scope, - self._place) - - quant_var_node = graph.create_var_node( - name=self._quantized_var_name(x_node.name()), - var_type=x_node.type(), - shape=x_node.shape(), - var_dtype=x_node.dtype()) - quant_op_node = graph.create_op_node( - op_type="quantize_linear", - attrs={"quant_axis": quant_axis, - "bit_length": bit_length}, - inputs={ - "X": x_node, - "Scale": scale_node, - "ZeroPoint": zero_point_node - }, - outputs={"Y": quant_var_node}) + _init_var_node(zero_point_node, + np.zeros(scale_node.shape(), dtype="int32"), + self._scope, self._place) + + quant_var_node = graph.create_var_node(name=self._quantized_var_name( + x_node.name()), + var_type=x_node.type(), + shape=x_node.shape(), + var_dtype=x_node.dtype()) + quant_op_node = graph.create_op_node(op_type="quantize_linear", + attrs={ + "quant_axis": quant_axis, + "bit_length": bit_length + }, + inputs={ + "X": x_node, + "Scale": scale_node, + "ZeroPoint": zero_point_node + }, + outputs={"Y": quant_var_node}) graph.link_to(x_node, quant_op_node) graph.link_to(scale_node, quant_op_node) if zero_point_node is not None: graph.link_to(zero_point_node, quant_op_node) graph.link_to(quant_op_node, quant_var_node) - dequant_op_node = graph.create_op_node( - op_type="dequantize_linear", - attrs={"quant_axis": quant_axis, - "bit_length": bit_length}, - inputs={ - "X": quant_var_node, - "Scale": scale_node, - "ZeroPoint": zero_point_node - }, - outputs={"Y": out_node}) + dequant_op_node = graph.create_op_node(op_type="dequantize_linear", + attrs={ + "quant_axis": quant_axis, + "bit_length": bit_length + }, + inputs={ + "X": quant_var_node, + "Scale": scale_node, + "ZeroPoint": zero_point_node + }, + outputs={"Y": out_node}) graph.link_to(quant_var_node, dequant_op_node) graph.link_to(scale_node, dequant_op_node) if zero_point_node is not None: @@ -2581,7 +2544,8 @@ class QuantWeightPass(object): scale_node = graph._find_node_by_name(_op.inputs, _op.input("Scale")[0]) zero_point_node = graph._find_node_by_name( - _op.inputs, _op.input("ZeroPoint")[0]) + _op.inputs, + _op.input("ZeroPoint")[0]) out_node = graph._find_node_by_name(_op.outputs, _op.output("Y")[0]) diff --git a/python/paddle/fluid/contrib/slim/quantization/quantize_transpiler_v2.py b/python/paddle/fluid/contrib/slim/quantization/quantize_transpiler_v2.py index 32768fff089..892b027de53 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantize_transpiler_v2.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantize_transpiler_v2.py @@ -24,8 +24,9 @@ from ....param_attr import ParamAttr from ....initializer import Constant from ....log_helper import get_logger -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') def find_next_ops(block, var_name): @@ -50,6 +51,7 @@ def load_variable_data(scope, var_name): class QuantizeTranspilerV2(object): + def __init__(self, weight_bits=8, activation_bits=8, @@ -197,15 +199,15 @@ class QuantizeTranspilerV2(object): else self._activation_quantize_type if quant_type == "abs_max": - new_var = self._insert_abs_max_fq_op(block, idx, in_var, - quant_bits) + new_var = self._insert_abs_max_fq_op( + block, idx, in_var, quant_bits) elif quant_type == "moving_average_abs_max": - new_var = self._insert_ma_abs_max_fq_op(block, idx, in_var, - quant_bits, is_test) + new_var = self._insert_ma_abs_max_fq_op( + block, idx, in_var, quant_bits, is_test) elif quant_type == "channel_wise_abs_max": ch_axis = 1 if op.type in self._out_ch_axis1_ops else 0 - new_var = self._insert_pc_abs_max_fq_op(block, idx, in_var, - quant_bits, ch_axis) + new_var = self._insert_pc_abs_max_fq_op( + block, idx, in_var, quant_bits, ch_axis) else: _logger.error("Don't support the quant_type: %s" % quant_type) @@ -264,67 +266,62 @@ class QuantizeTranspilerV2(object): """ Inset abs max fake quant op. """ - quant_dequant_var = block.create_var( - type=in_var.type, - name="{}.quant_dequant".format(in_var.name), - shape=in_var.shape, - dtype=in_var.dtype) - scale_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.quant_dequant.scale".format(in_var.name), - initializer=Constant(0.), - trainable=False), - shape=[1], - dtype=in_var.dtype) + quant_dequant_var = block.create_var(type=in_var.type, + name="{}.quant_dequant".format( + in_var.name), + shape=in_var.shape, + dtype=in_var.dtype) + scale_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.quant_dequant.scale".format(in_var.name), + initializer=Constant(0.), + trainable=False), + shape=[1], + dtype=in_var.dtype) scale_var.stop_gradient = True inputs = {'X': in_var} outputs = {'Out': quant_dequant_var, 'OutScale': scale_var} attrs = {'bit_length': quant_bits} - block._insert_op( - idx, - type='fake_quantize_dequantize_abs_max', - attrs=attrs, - inputs=inputs, - outputs=outputs) + block._insert_op(idx, + type='fake_quantize_dequantize_abs_max', + attrs=attrs, + inputs=inputs, + outputs=outputs) return quant_dequant_var def _insert_ma_abs_max_fq_op(self, block, idx, in_var, quant_bits, is_test): """ Insert moving average abs max fake quant op. """ - quant_dequant_var = block.create_var( - type=in_var.type, - name="{}.quant_dequant".format(in_var.name), - shape=in_var.shape, - dtype=in_var.dtype) - - scale_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.quant_dequant.scale".format(in_var.name), - initializer=Constant(0.), - trainable=False), - shape=[1], - dtype=in_var.dtype) + quant_dequant_var = block.create_var(type=in_var.type, + name="{}.quant_dequant".format( + in_var.name), + shape=in_var.shape, + dtype=in_var.dtype) + + scale_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.quant_dequant.scale".format(in_var.name), + initializer=Constant(0.), + trainable=False), + shape=[1], + dtype=in_var.dtype) scale_var.stop_gradient = True if not is_test: - state_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.quant_dequant.state".format(in_var.name), - initializer=Constant(0), - trainable=False), - shape=[1], - dtype=in_var.dtype) + state_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.quant_dequant.state".format(in_var.name), + initializer=Constant(0), + trainable=False), + shape=[1], + dtype=in_var.dtype) state_var.stop_gradient = True - accum_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.quant_dequant.accum".format(in_var.name), - initializer=Constant(0), - trainable=False), - shape=[1], - dtype=in_var.dtype) + accum_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.quant_dequant.accum".format(in_var.name), + initializer=Constant(0), + trainable=False), + shape=[1], + dtype=in_var.dtype) accum_var.stop_gradient = True attrs = { @@ -340,42 +337,39 @@ class QuantizeTranspilerV2(object): outputs['OutState'] = state_var outputs['OutAccum'] = accum_var - block._insert_op( - idx, - type='fake_quantize_dequantize_moving_average_abs_max', - attrs=attrs, - inputs=inputs, - outputs=outputs) + block._insert_op(idx, + type='fake_quantize_dequantize_moving_average_abs_max', + attrs=attrs, + inputs=inputs, + outputs=outputs) return quant_dequant_var def _insert_pc_abs_max_fq_op(self, block, idx, in_var, quant_bits, ch_axis): """ Insert per channel abs max fake quant op. """ - quant_dequant_var = block.create_var( - type=in_var.type, - name="{}.quant_dequant".format(in_var.name), - shape=in_var.shape, - dtype=in_var.dtype) - - scale_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.quant_dequant.scale".format(in_var.name), - initializer=Constant(0.), - trainable=False), - shape=[in_var.shape[ch_axis]], - dtype=in_var.dtype) + quant_dequant_var = block.create_var(type=in_var.type, + name="{}.quant_dequant".format( + in_var.name), + shape=in_var.shape, + dtype=in_var.dtype) + + scale_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.quant_dequant.scale".format(in_var.name), + initializer=Constant(0.), + trainable=False), + shape=[in_var.shape[ch_axis]], + dtype=in_var.dtype) scale_var.stop_gradient = True inputs = {'X': in_var} outputs = {'Out': quant_dequant_var, 'OutScale': scale_var} attrs = {'bit_length': quant_bits, 'quant_axis': ch_axis} - block._insert_op( - idx, - type='fake_channel_wise_quantize_dequantize_abs_max', - attrs=attrs, - inputs=inputs, - outputs=outputs) + block._insert_op(idx, + type='fake_channel_wise_quantize_dequantize_abs_max', + attrs=attrs, + inputs=inputs, + outputs=outputs) return quant_dequant_var def _insert_ma_abs_max_scale_op(self, @@ -387,13 +381,12 @@ class QuantizeTranspilerV2(object): """ Insert moving average abs max scale op. """ - scale_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.outscale.scale".format(in_var.name), - initializer=Constant(0.), - trainable=False), - shape=[1], - dtype=in_var.dtype) + scale_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.outscale.scale".format(in_var.name), + initializer=Constant(0.), + trainable=False), + shape=[1], + dtype=in_var.dtype) scale_var.stop_gradient = True attrs = {'moving_rate': self._moving_rate, 'is_test': is_test} @@ -401,22 +394,20 @@ class QuantizeTranspilerV2(object): outputs = {'OutScale': scale_var} if not is_test: - state_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.outscale.state".format(in_var.name), - initializer=Constant(0), - trainable=False), - shape=[1], - dtype=in_var.dtype) + state_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.outscale.state".format(in_var.name), + initializer=Constant(0), + trainable=False), + shape=[1], + dtype=in_var.dtype) state_var.stop_gradient = True - accum_var = self._helper.create_parameter( - attr=ParamAttr( - name="{}.outscale.accum".format(in_var.name), - initializer=Constant(0), - trainable=False), - shape=[1], - dtype=in_var.dtype) + accum_var = self._helper.create_parameter(attr=ParamAttr( + name="{}.outscale.accum".format(in_var.name), + initializer=Constant(0), + trainable=False), + shape=[1], + dtype=in_var.dtype) accum_var.stop_gradient = True inputs['InState'] = state_var @@ -425,20 +416,18 @@ class QuantizeTranspilerV2(object): outputs['OutAccum'] = accum_var if has_out_var: - out_var = block.create_var( - type=in_var.type, - name="{}.tmp".format(in_var.name), - shape=in_var.shape, - dtype=in_var.dtype) + out_var = block.create_var(type=in_var.type, + name="{}.tmp".format(in_var.name), + shape=in_var.shape, + dtype=in_var.dtype) outputs['Out'] = out_var - block._insert_op( - idx, - type='moving_average_abs_max_scale', - attrs=attrs, - inputs=inputs, - outputs=outputs) + block._insert_op(idx, + type='moving_average_abs_max_scale', + attrs=attrs, + inputs=inputs, + outputs=outputs) if has_out_var: return out_var diff --git a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py index 0018d81dbf2..3573f53e22d 100644 --- a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py +++ b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py @@ -26,19 +26,20 @@ paddle.enable_static() def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument( - '--model_path', type=str, default='', help='A path to a model.') - parser.add_argument( - '--save_graph_dir', - type=str, - default='', - help='A path to save the graph.') + parser.add_argument('--model_path', + type=str, + default='', + help='A path to a model.') + parser.add_argument('--save_graph_dir', + type=str, + default='', + help='A path to save the graph.') parser.add_argument( '--save_graph_name', type=str, default='', - help='A name to save the graph. Default - name from model path will be used' - ) + help= + 'A name to save the graph. Default - name from model path will be used') test_args, args = parser.parse_known_args(namespace=unittest) return test_args, sys.argv[:1] + args @@ -53,9 +54,9 @@ def generate_dot_for_model(model_path, save_graph_dir, save_graph_name): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) else: - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(model_path, exe, - 'model', 'params') + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe, 'model', + 'params') graph = IrGraph(core.Graph(inference_program.desc), for_test=True) if not os.path.exists(save_graph_dir): os.makedirs(save_graph_dir) @@ -64,8 +65,8 @@ def generate_dot_for_model(model_path, save_graph_dir, save_graph_name): save_graph_name = model_name graph.draw(save_graph_dir, save_graph_name, graph.all_op_nodes()) print( - "Success! Generated dot and pdf files for {0} model, that can be found at {1} named {2}.\n". - format(model_name, save_graph_dir, save_graph_name)) + "Success! Generated dot and pdf files for {0} model, that can be found at {1} named {2}.\n" + .format(model_name, save_graph_dir, save_graph_name)) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index 466cc14eae0..36302aea187 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -24,8 +24,9 @@ from paddle.nn import BatchNorm1D from paddle.fluid.log_helper import get_logger -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') def fix_model_dict(model): @@ -81,6 +82,7 @@ def train_lenet(lenet, reader, optimizer): class ImperativeLenet(fluid.dygraph.Layer): + def __init__(self, num_classes=10): super(ImperativeLenet, self).__init__() conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") @@ -93,50 +95,36 @@ class ImperativeLenet(fluid.dygraph.Layer): fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b3_attr = fluid.ParamAttr(name="fc_b_3") self.features = Sequential( - Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=False), - BatchNorm2D(6), - ReLU(), - MaxPool2D( - kernel_size=2, stride=2), - Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr), - BatchNorm2D(16), - PReLU(), - MaxPool2D( - kernel_size=2, stride=2)) + Conv2D(in_channels=1, + out_channels=6, + kernel_size=3, + stride=1, + padding=1, + weight_attr=conv2d_w1_attr, + bias_attr=False), BatchNorm2D(6), ReLU(), + MaxPool2D(kernel_size=2, stride=2), + Conv2D(in_channels=6, + out_channels=16, + kernel_size=5, + stride=1, + padding=0, + weight_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr), BatchNorm2D(16), PReLU(), + MaxPool2D(kernel_size=2, stride=2)) self.fc = Sequential( - Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr), - LeakyReLU(), - Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr), - Sigmoid(), - Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr), - Softmax()) + Linear(in_features=400, + out_features=120, + weight_attr=fc_w1_attr, + bias_attr=fc_b1_attr), LeakyReLU(), + Linear(in_features=120, + out_features=84, + weight_attr=fc_w2_attr, + bias_attr=fc_b2_attr), Sigmoid(), + Linear(in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr), Softmax()) self.add = paddle.nn.quant.add() self.quant_stub = paddle.nn.quant.QuantStub() @@ -151,6 +139,7 @@ class ImperativeLenet(fluid.dygraph.Layer): class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): + def __init__(self, num_classes=10): super(ImperativeLenetWithSkipQuant, self).__init__() @@ -164,53 +153,48 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): fc_b1_attr = fluid.ParamAttr(name="fc_b_1") fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - self.conv2d_0 = Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr) + self.conv2d_0 = Conv2D(in_channels=1, + out_channels=6, + kernel_size=3, + stride=1, + padding=1, + weight_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr) self.conv2d_0.skip_quant = True self.batch_norm_0 = BatchNorm2D(6) self.relu_0 = ReLU() self.pool2d_0 = MaxPool2D(kernel_size=2, stride=2) - self.conv2d_1 = Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr) + self.conv2d_1 = Conv2D(in_channels=6, + out_channels=16, + kernel_size=5, + stride=1, + padding=0, + weight_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr) self.conv2d_1.skip_quant = False self.batch_norm_1 = BatchNorm2D(16) self.relu6_0 = ReLU6() self.pool2d_1 = MaxPool2D(kernel_size=2, stride=2) - self.linear_0 = Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr) + self.linear_0 = Linear(in_features=400, + out_features=120, + weight_attr=fc_w1_attr, + bias_attr=fc_b1_attr) self.linear_0.skip_quant = True self.leaky_relu_0 = LeakyReLU() - self.linear_1 = Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr) + self.linear_1 = Linear(in_features=120, + out_features=84, + weight_attr=fc_w2_attr, + bias_attr=fc_b2_attr) self.linear_1.skip_quant = False self.sigmoid_0 = Sigmoid() - self.linear_2 = Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr) + self.linear_2 = Linear(in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr) self.linear_2.skip_quant = False self.softmax_0 = Softmax() @@ -237,6 +221,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): class ImperativeLinearBn(fluid.dygraph.Layer): + def __init__(self): super(ImperativeLinearBn, self).__init__() @@ -250,11 +235,10 @@ class ImperativeLinearBn(fluid.dygraph.Layer): name="bn_weight", initializer=paddle.nn.initializer.Constant(value=0.5)) - self.linear = Linear( - in_features=10, - out_features=10, - weight_attr=fc_w_attr, - bias_attr=fc_b_attr) + self.linear = Linear(in_features=10, + out_features=10, + weight_attr=fc_w_attr, + bias_attr=fc_b_attr) self.bn = BatchNorm1D(10, weight_attr=bn_w_attr) def forward(self, inputs): @@ -265,6 +249,7 @@ class ImperativeLinearBn(fluid.dygraph.Layer): class ImperativeLinearBn_hook(fluid.dygraph.Layer): + def __init__(self): super(ImperativeLinearBn_hook, self).__init__() @@ -272,8 +257,9 @@ class ImperativeLinearBn_hook(fluid.dygraph.Layer): name="linear_weight", initializer=paddle.nn.initializer.Constant(value=0.5)) - self.linear = Linear( - in_features=10, out_features=10, weight_attr=fc_w_attr) + self.linear = Linear(in_features=10, + out_features=10, + weight_attr=fc_w_attr) self.bn = BatchNorm1D(10) forward_pre = self.linear.register_forward_pre_hook(pre_hook) diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py index 188f14f0a69..52ebf463cdd 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py @@ -43,27 +43,32 @@ def parse_args(): default=0, help='Number of the first minibatches to skip in performance statistics.' ) - parser.add_argument( - '--quant_model', type=str, default='', help='A path to a Quant model.') - parser.add_argument( - '--fp32_model', type=str, default='', help='A path to an FP32 model.') + parser.add_argument('--quant_model', + type=str, + default='', + help='A path to a Quant model.') + parser.add_argument('--fp32_model', + type=str, + default='', + help='A path to an FP32 model.') parser.add_argument('--infer_data', type=str, default='', help='Data file.') parser.add_argument( '--batch_num', type=int, default=0, - help='Number of batches to process. 0 or less means whole dataset. Default: 0.' + help= + 'Number of batches to process. 0 or less means whole dataset. Default: 0.' ) - parser.add_argument( - '--acc_diff_threshold', - type=float, - default=0.01, - help='Accepted accuracy difference threshold.') + parser.add_argument('--acc_diff_threshold', + type=float, + default=0.01, + help='Accepted accuracy difference threshold.') parser.add_argument( '--ops_to_quantize', type=str, default='', - help='A comma separated list of operators to quantize. Only quantizable operators are taken into account. If the option is not used, an attempt to quantize all quantizable operators will be made.' + help= + 'A comma separated list of operators to quantize. Only quantizable operators are taken into account. If the option is not used, an attempt to quantize all quantizable operators will be made.' ) parser.add_argument( '--op_ids_to_skip', @@ -74,12 +79,12 @@ def parse_args(): '--targets', type=str, default='quant,int8,fp32', - help='A comma separated list of inference types to run ("int8", "fp32", "quant"). Default: "quant,int8,fp32"' + help= + 'A comma separated list of inference types to run ("int8", "fp32", "quant"). Default: "quant,int8,fp32"' ) - parser.add_argument( - '--debug', - action='store_true', - help='If used, the graph of Quant model is drawn.') + parser.add_argument('--debug', + action='store_true', + help='If used, the graph of Quant model is drawn.') test_args, args = parser.parse_known_args(namespace=unittest) return test_args, sys.argv[:1] + args @@ -91,6 +96,7 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): """ def _reader_creator(self, data_file='data.bin'): + def reader(): with open(data_file, 'rb') as fp: num = fp.read(8) @@ -143,11 +149,14 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): name = op_node.name() if name in ['depthwise_conv2d']: input_var_node = graph._find_node_by_name( - op_node.inputs, op_node.input("Input")[0]) + op_node.inputs, + op_node.input("Input")[0]) weight_var_node = graph._find_node_by_name( - op_node.inputs, op_node.input("Filter")[0]) + op_node.inputs, + op_node.input("Filter")[0]) output_var_node = graph._find_node_by_name( - graph.all_var_nodes(), op_node.output("Output")[0]) + graph.all_var_nodes(), + op_node.output("Output")[0]) attrs = { name: op_node.op().attr(name) for name in op_node.op().attr_names() @@ -182,12 +191,12 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): inference_scope = fluid.executor.global_scope() with fluid.scope_guard(inference_scope): if os.path.exists(os.path.join(model_path, '__model__')): - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(model_path, exe) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe) else: - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - model_path, exe, 'model', 'params') + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe, 'model', + 'params') graph = IrGraph(core.Graph(inference_program.desc), for_test=True) if (self._debug): @@ -252,8 +261,8 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): batch_time = (time.time() - start) * 1000 # in miliseconds outputs.append(out[0]) # Calculate accuracy result - batch_acc1, batch_acc5 = self._get_batch_accuracy(out[0], - labels) + batch_acc1, batch_acc5 = self._get_batch_accuracy( + out[0], labels) infer_accs1.append(batch_acc1) infer_accs5.append(batch_acc5) @@ -266,8 +275,8 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): appx = ' (warm-up)' if iters <= skip_batch_num else '' _logger.info('batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, ' 'latency: {3:.4f} ms, fps: {4:.2f}'.format( - iters, batch_acc1, batch_acc5, batch_time / - batch_size, fps, appx)) + iters, batch_acc1, batch_acc5, + batch_time / batch_size, fps, appx)) # Postprocess benchmark data batch_latencies = batch_times[skip_batch_num:] @@ -278,8 +287,8 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): infer_total_time = time.time() - infer_start_time acc1_avg = np.mean(infer_accs1) acc5_avg = np.mean(infer_accs5) - _logger.info('Total inference run time: {:.2f} s'.format( - infer_total_time)) + _logger.info( + 'Total inference run time: {:.2f} s'.format(infer_total_time)) return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg @@ -359,17 +368,18 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): _logger.info('Batch size: {}'.format(batch_size)) _logger.info('Batch number: {}'.format(batch_num)) _logger.info('Accuracy drop threshold: {}.'.format(acc_diff_threshold)) - _logger.info('Quantized ops: {}.'.format(','.join( - self._quantized_ops) if self._quantized_ops else 'all quantizable')) - _logger.info('Op ids to skip quantization: {}.'.format(','.join( - map(str, self._op_ids_to_skip)) if test_case_args.op_ids_to_skip - else 'none')) + _logger.info( + 'Quantized ops: {}.'.format(','.join(self._quantized_ops) if self. + _quantized_ops else 'all quantizable')) + _logger.info('Op ids to skip quantization: {}.'.format( + ','.join(map(str, self._op_ids_to_skip) + ) if test_case_args.op_ids_to_skip else 'none')) _logger.info('Targets: {}.'.format(','.join(self._targets))) if 'quant' in self._targets: _logger.info('--- Quant prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path), batch_size=batch_size) + val_reader = paddle.batch(self._reader_creator(data_path), + batch_size=batch_size) quant_output, quant_acc1, quant_acc5, quant_fps, quant_lat = self._predict( val_reader, quant_model_path, @@ -382,8 +392,8 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): if 'int8' in self._targets: _logger.info('--- INT8 prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path), batch_size=batch_size) + val_reader = paddle.batch(self._reader_creator(data_path), + batch_size=batch_size) int8_output, int8_acc1, int8_acc5, int8_fps, int8_lat = self._predict( val_reader, quant_model_path, @@ -397,8 +407,8 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): fp32_acc1 = fp32_acc5 = fp32_fps = fp32_lat = -1 if 'fp32' in self._targets and fp32_model_path: _logger.info('--- FP32 prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path), batch_size=batch_size) + val_reader = paddle.batch(self._reader_creator(data_path), + batch_size=batch_size) fp32_output, fp32_acc1, fp32_acc5, fp32_fps, fp32_lat = self._predict( val_reader, fp32_model_path, diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py index 4f4a2ddd4ab..0a9abe61e0e 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py @@ -25,28 +25,35 @@ from save_quant_model import transform_and_save_int8_model def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument( - '--fp32_model', type=str, default='', help='A path to a FP32 model.') - parser.add_argument( - '--quant_model', type=str, default='', help='A path to a quant model.') + parser.add_argument('--fp32_model', + type=str, + default='', + help='A path to a FP32 model.') + parser.add_argument('--quant_model', + type=str, + default='', + help='A path to a quant model.') parser.add_argument('--infer_data', type=str, default='', help='Data file.') parser.add_argument( '--warmup_iter', type=int, default=1, - help='Number of the first iterations to skip in performance statistics.') - parser.add_argument( - '--acc_diff_threshold', - type=float, - default=0.01, - help='Accepted accuracy difference threshold.') - parser.add_argument( - '--num_threads', type=int, default=1, help='Number of threads.') + help='Number of the first iterations to skip in performance statistics.' + ) + parser.add_argument('--acc_diff_threshold', + type=float, + default=0.01, + help='Accepted accuracy difference threshold.') + parser.add_argument('--num_threads', + type=int, + default=1, + help='Number of threads.') parser.add_argument( '--mkldnn_cache_capacity', type=int, default=0, - help='Mkldnn cache capacity. The default value in Python API is 15, which can slow down int8 models. Default 0 means unlimited cache.' + help= + 'Mkldnn cache capacity. The default value in Python API is 15, which can slow down int8 models. Default 0 means unlimited cache.' ) test_args, args = parser.parse_known_args(namespace=unittest) @@ -54,6 +61,7 @@ def parse_args(): class TestLstmModelPTQ(unittest.TestCase): + def get_warmup_tensor(self, data_path, place): data = [] with open(data_path, 'rb') as in_f: @@ -67,11 +75,11 @@ class TestLstmModelPTQ(unittest.TestCase): seq_len = (alllen >> 16) & 0xFFFF label = in_f.read(4 * label_len) - label = np.frombuffer( - label, dtype=np.int32).reshape([len(label) // 4]) + label = np.frombuffer(label, + dtype=np.int32).reshape([len(label) // 4]) feat = in_f.read(4 * seq_len * 8) - feat = np.frombuffer( - feat, dtype=np.float32).reshape([len(feat) // 4 // 8, 8]) + feat = np.frombuffer(feat, dtype=np.float32).reshape( + [len(feat) // 4 // 8, 8]) lod_feat = [feat.shape[0]] minputs = fluid.create_lod_tensor(feat, [lod_feat], place) @@ -189,22 +197,25 @@ class TestLstmModelPTQ(unittest.TestCase): warmup_iter = test_case_args.warmup_iter acc_diff_threshold = test_case_args.acc_diff_threshold - (fp32_hx_acc, fp32_ctc_acc, fp32_fps) = self.run_program( - fp32_model, infer_data, num_threads, mkldnn_cache_capacity, - warmup_iter, False, False) + (fp32_hx_acc, fp32_ctc_acc, + fp32_fps) = self.run_program(fp32_model, infer_data, num_threads, + mkldnn_cache_capacity, warmup_iter, False, + False) - (int8_hx_acc, int8_ctc_acc, int8_fps) = self.run_program( - fp32_model, infer_data, num_threads, mkldnn_cache_capacity, - warmup_iter, True, True) + (int8_hx_acc, int8_ctc_acc, + int8_fps) = self.run_program(fp32_model, infer_data, num_threads, + mkldnn_cache_capacity, warmup_iter, True, + True) quant_model_save_path = quant_model + "_int8" # transform model to quant2 transform_and_save_int8_model(quant_model, quant_model_save_path, "fusion_lstm,concat") - (quant_hx_acc, quant_ctc_acc, quant_fps) = self.run_program( - quant_model_save_path, infer_data, num_threads, - mkldnn_cache_capacity, warmup_iter, True, False) + (quant_hx_acc, quant_ctc_acc, + quant_fps) = self.run_program(quant_model_save_path, infer_data, + num_threads, mkldnn_cache_capacity, + warmup_iter, True, False) print("FP32: fps {0}, hx_acc {1}, ctc_acc {2}".format( fp32_fps, fp32_hx_acc, fp32_ctc_acc)) diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py index 12d1cfcc41d..fecead6d6de 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py @@ -41,33 +41,39 @@ def parse_args(): default=0, help='Number of the first minibatches to skip in performance statistics.' ) - parser.add_argument( - '--quant_model', type=str, default='', help='A path to a Quant model.') + parser.add_argument('--quant_model', + type=str, + default='', + help='A path to a Quant model.') parser.add_argument( '--fp32_model', type=str, default='', - help='A path to an FP32 model. If empty, the Quant model will be used for FP32 inference.' + help= + 'A path to an FP32 model. If empty, the Quant model will be used for FP32 inference.' ) parser.add_argument('--infer_data', type=str, default='', help='Data file.') - parser.add_argument( - '--labels', type=str, default='', help='File with labels.') + parser.add_argument('--labels', + type=str, + default='', + help='File with labels.') parser.add_argument( '--batch_num', type=int, default=0, - help='Number of batches to process. 0 or less means whole dataset. Default: 0.' + help= + 'Number of batches to process. 0 or less means whole dataset. Default: 0.' ) - parser.add_argument( - '--acc_diff_threshold', - type=float, - default=0.01, - help='Accepted accuracy difference threshold.') + parser.add_argument('--acc_diff_threshold', + type=float, + default=0.01, + help='Accepted accuracy difference threshold.') parser.add_argument( '--ops_to_quantize', type=str, default='', - help='A comma separated list of operators to quantize. Only quantizable operators are taken into account. If the option is not used, an attempt to quantize all quantizable operators will be made.' + help= + 'A comma separated list of operators to quantize. Only quantizable operators are taken into account. If the option is not used, an attempt to quantize all quantizable operators will be made.' ) parser.add_argument( '--op_ids_to_skip', @@ -78,12 +84,12 @@ def parse_args(): '--targets', type=str, default='quant,int8,fp32', - help='A comma separated list of inference types to run ("int8", "fp32", "quant"). Default: "quant,int8,fp32"' + help= + 'A comma separated list of inference types to run ("int8", "fp32", "quant"). Default: "quant,int8,fp32"' ) - parser.add_argument( - '--debug', - action='store_true', - help='If used, the graph of Quant model is drawn.') + parser.add_argument('--debug', + action='store_true', + help='If used, the graph of Quant model is drawn.') test_args, args = parser.parse_known_args(namespace=unittest) @@ -156,12 +162,12 @@ class QuantInt8NLPComparisonTest(unittest.TestCase): inference_scope = fluid.executor.global_scope() with fluid.scope_guard(inference_scope): if os.path.exists(os.path.join(model_path, '__model__')): - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(model_path, exe) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe) else: - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - model_path, exe, 'model', 'params') + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe, 'model', + 'params') graph = IrGraph(core.Graph(inference_program.desc), for_test=True) if (self._debug): @@ -229,8 +235,8 @@ class QuantInt8NLPComparisonTest(unittest.TestCase): ppses = ppses[skip_batch_num:] pps_avg = np.average(ppses) acc_avg = float(np.sum(total_correct)) / float(total_samples) - _logger.info('Total inference run time: {:.2f} s'.format( - infer_total_time)) + _logger.info( + 'Total inference run time: {:.2f} s'.format(infer_total_time)) return acc_avg, pps_avg, latency_avg @@ -310,56 +316,54 @@ class QuantInt8NLPComparisonTest(unittest.TestCase): _logger.info('Batch size: {}'.format(batch_size)) _logger.info('Batch number: {}'.format(batch_num)) _logger.info('Accuracy drop threshold: {}.'.format(acc_diff_threshold)) - _logger.info('Quantized ops: {}.'.format(','.join( - self._quantized_ops) if self._quantized_ops else 'all quantizable')) - _logger.info('Op ids to skip quantization: {}.'.format(','.join( - map(str, self._op_ids_to_skip)) if test_case_args.op_ids_to_skip - else 'none')) + _logger.info( + 'Quantized ops: {}.'.format(','.join(self._quantized_ops) if self. + _quantized_ops else 'all quantizable')) + _logger.info('Op ids to skip quantization: {}.'.format( + ','.join(map(str, self._op_ids_to_skip) + ) if test_case_args.op_ids_to_skip else 'none')) _logger.info('Targets: {}.'.format(','.join(self._targets))) if 'quant' in self._targets: _logger.info('--- Quant prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path, labels_path), - batch_size=batch_size) - quant_acc, quant_pps, quant_lat = self._predict( - val_reader, - quant_model_path, - batch_size, - batch_num, - skip_batch_num, - target='quant') + val_reader = paddle.batch(self._reader_creator( + data_path, labels_path), + batch_size=batch_size) + quant_acc, quant_pps, quant_lat = self._predict(val_reader, + quant_model_path, + batch_size, + batch_num, + skip_batch_num, + target='quant') self._print_performance('Quant', quant_pps, quant_lat) self._print_accuracy('Quant', quant_acc) if 'int8' in self._targets: _logger.info('--- INT8 prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path, labels_path), - batch_size=batch_size) - int8_acc, int8_pps, int8_lat = self._predict( - val_reader, - quant_model_path, - batch_size, - batch_num, - skip_batch_num, - target='int8') + val_reader = paddle.batch(self._reader_creator( + data_path, labels_path), + batch_size=batch_size) + int8_acc, int8_pps, int8_lat = self._predict(val_reader, + quant_model_path, + batch_size, + batch_num, + skip_batch_num, + target='int8') self._print_performance('INT8', int8_pps, int8_lat) self._print_accuracy('INT8', int8_acc) fp32_acc = fp32_pps = fp32_lat = -1 if 'fp32' in self._targets and fp32_model_path: _logger.info('--- FP32 prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path, labels_path), - batch_size=batch_size) - fp32_acc, fp32_pps, fp32_lat = self._predict( - val_reader, - fp32_model_path, - batch_size, - batch_num, - skip_batch_num, - target='fp32') + val_reader = paddle.batch(self._reader_creator( + data_path, labels_path), + batch_size=batch_size) + fp32_acc, fp32_pps, fp32_lat = self._predict(val_reader, + fp32_model_path, + batch_size, + batch_num, + skip_batch_num, + target='fp32') self._print_performance('FP32', fp32_pps, fp32_lat) self._print_accuracy('FP32', fp32_acc) diff --git a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py index fac41ce8a22..41ddfa513a0 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py @@ -43,24 +43,25 @@ def parse_args(): default=0, help='Number of the first minibatches to skip in performance statistics.' ) - parser.add_argument( - '--debug', - action='store_true', - help='If used, the graph of Quant model is drawn.') - parser.add_argument( - '--quant_model', type=str, default='', help='A path to a Quant model.') + parser.add_argument('--debug', + action='store_true', + help='If used, the graph of Quant model is drawn.') + parser.add_argument('--quant_model', + type=str, + default='', + help='A path to a Quant model.') parser.add_argument('--infer_data', type=str, default='', help='Data file.') parser.add_argument( '--batch_num', type=int, default=0, - help='Number of batches to process. 0 or less means whole dataset. Default: 0.' + help= + 'Number of batches to process. 0 or less means whole dataset. Default: 0.' ) - parser.add_argument( - '--acc_diff_threshold', - type=float, - default=0.01, - help='Accepted accuracy difference threshold.') + parser.add_argument('--acc_diff_threshold', + type=float, + default=0.01, + help='Accepted accuracy difference threshold.') test_args, args = parser.parse_known_args(namespace=unittest) return test_args, sys.argv[:1] + args @@ -72,6 +73,7 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): """ def _reader_creator(self, data_file='data.bin'): + def reader(): with open(data_file, 'rb') as fp: num = fp.read(8) @@ -124,11 +126,14 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): name = op_node.name() if name in ['depthwise_conv2d']: input_var_node = graph._find_node_by_name( - op_node.inputs, op_node.input("Input")[0]) + op_node.inputs, + op_node.input("Input")[0]) weight_var_node = graph._find_node_by_name( - op_node.inputs, op_node.input("Filter")[0]) + op_node.inputs, + op_node.input("Filter")[0]) output_var_node = graph._find_node_by_name( - graph.all_var_nodes(), op_node.output("Output")[0]) + graph.all_var_nodes(), + op_node.output("Output")[0]) attrs = { name: op_node.op().attr(name) for name in op_node.op().attr_names() @@ -162,19 +167,19 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): inference_scope = fluid.executor.global_scope() with fluid.scope_guard(inference_scope): if os.path.exists(os.path.join(model_path, '__model__')): - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(model_path, exe) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe) else: - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - model_path, exe, 'model', 'params') + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(model_path, exe, 'model', + 'params') graph = IrGraph(core.Graph(inference_program.desc), for_test=True) if (self._debug): graph.draw('.', 'quant_orig', graph.all_op_nodes()) if (transform_to_int8): - mkldnn_int8_pass = QuantInt8MkldnnPass( - _scope=inference_scope, _place=place) + mkldnn_int8_pass = QuantInt8MkldnnPass(_scope=inference_scope, + _place=place) graph = mkldnn_int8_pass.apply(graph) else: graph = self._prepare_for_fp32_mkldnn(graph) @@ -206,8 +211,8 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): fetch_list=fetch_targets) batch_time = (time.time() - start) * 1000 # in miliseconds outputs.append(out[0]) - batch_acc1, batch_acc5 = self._get_batch_accuracy(out[0], - labels) + batch_acc1, batch_acc5 = self._get_batch_accuracy( + out[0], labels) infer_accs1.append(batch_acc1) infer_accs5.append(batch_acc5) samples = len(data) @@ -219,8 +224,8 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): appx = ' (warm-up)' if iters <= skip_batch_num else '' _logger.info('batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, ' 'latency: {3:.4f} ms, fps: {4:.2f}'.format( - iters, batch_acc1, batch_acc5, batch_time / - batch_size, fps, appx)) + iters, batch_acc1, batch_acc5, + batch_time / batch_size, fps, appx)) # Postprocess benchmark data batch_latencies = batch_times[skip_batch_num:] @@ -231,8 +236,8 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): infer_total_time = time.time() - infer_start_time acc1_avg = np.mean(infer_accs1) acc5_avg = np.mean(infer_accs5) - _logger.info('Total inference run time: {:.2f} s'.format( - infer_total_time)) + _logger.info( + 'Total inference run time: {:.2f} s'.format(infer_total_time)) return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg @@ -281,8 +286,8 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): _logger.info('Accuracy drop threshold: {0}.'.format(acc_diff_threshold)) _logger.info('--- Quant FP32 prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path), batch_size=batch_size) + val_reader = paddle.batch(self._reader_creator(data_path), + batch_size=batch_size) fp32_output, fp32_acc1, fp32_acc5, fp32_fps, fp32_lat = self._predict( val_reader, quant_model_path, @@ -291,8 +296,8 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): skip_batch_num, transform_to_int8=False) _logger.info('--- Quant INT8 prediction start ---') - val_reader = paddle.batch( - self._reader_creator(data_path), batch_size=batch_size) + val_reader = paddle.batch(self._reader_creator(data_path), + batch_size=batch_size) int8_output, int8_acc1, int8_acc5, int8_fps, int8_lat = self._predict( val_reader, quant_model_path, diff --git a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py index 73ec8cf3e02..cb15b3da473 100644 --- a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py +++ b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py @@ -27,54 +27,56 @@ paddle.enable_static() def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument( - '--quant_model_path', - type=str, - default='', - help='A path to a Quant model.') - parser.add_argument( - '--int8_model_save_path', - type=str, - default='', - help='Saved optimized and quantized INT8 model') + parser.add_argument('--quant_model_path', + type=str, + default='', + help='A path to a Quant model.') + parser.add_argument('--int8_model_save_path', + type=str, + default='', + help='Saved optimized and quantized INT8 model') parser.add_argument( '--ops_to_quantize', type=str, default='', - help='A comma separated list of operators to quantize. Only quantizable operators are taken into account. If the option is not used, an attempt to quantize all quantizable operators will be made.' + help= + 'A comma separated list of operators to quantize. Only quantizable operators are taken into account. If the option is not used, an attempt to quantize all quantizable operators will be made.' ) parser.add_argument( '--op_ids_to_skip', type=str, default='', help='A comma separated list of operator ids to skip in quantization.') - parser.add_argument( - '--debug', - action='store_true', - help='If used, the graph of Quant model is drawn.') + parser.add_argument('--debug', + action='store_true', + help='If used, the graph of Quant model is drawn.') parser.add_argument( '--quant_model_filename', type=str, default="", - help='The input model`s file name. If empty, search default `__model__` and separate parameter files and use them or in case if not found, attempt loading `model` and `params` files.' + help= + 'The input model`s file name. If empty, search default `__model__` and separate parameter files and use them or in case if not found, attempt loading `model` and `params` files.' ) parser.add_argument( '--quant_params_filename', type=str, default="", - help='If quant_model_filename is empty, this field is ignored. The input model`s all parameters file name. If empty load parameters from separate files.' + help= + 'If quant_model_filename is empty, this field is ignored. The input model`s all parameters file name. If empty load parameters from separate files.' ) parser.add_argument( '--save_model_filename', type=str, default="__model__", - help='The name of file to save the inference program itself. If is set None, a default filename __model__ will be used.' + help= + 'The name of file to save the inference program itself. If is set None, a default filename __model__ will be used.' ) parser.add_argument( '--save_params_filename', type=str, default=None, - help='The name of file to save all related parameters. If it is set None, parameters will be saved in separate files' + help= + 'The name of file to save all related parameters. If it is set None, parameters will be saved in separate files' ) test_args, args = parser.parse_known_args(namespace=unittest) @@ -96,18 +98,17 @@ def transform_and_save_int8_model(original_path, with fluid.scope_guard(inference_scope): if not quant_model_filename: if os.path.exists(os.path.join(original_path, '__model__')): - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(original_path, - exe) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(original_path, exe) else: - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - original_path, exe, 'model', 'params') + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(original_path, exe, 'model', + 'params') else: - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - original_path, exe, quant_model_filename, - quant_params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(original_path, exe, + quant_model_filename, + quant_params_filename) ops_to_quantize_set = set() print(ops_to_quantize) @@ -132,14 +133,13 @@ def transform_and_save_int8_model(original_path, graph = transform_to_mkldnn_int8_pass.apply(graph) inference_program = graph.to_program() with fluid.scope_guard(inference_scope): - fluid.io.save_inference_model( - save_path, - feed_target_names, - fetch_targets, - exe, - inference_program, - model_filename=save_model_filename, - params_filename=save_params_filename) + fluid.io.save_inference_model(save_path, + feed_target_names, + fetch_targets, + exe, + inference_program, + model_filename=save_model_filename, + params_filename=save_params_filename) print( "Success! INT8 model obtained from the Quant model can be found at {}\n" .format(save_path)) diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph.py b/python/paddle/fluid/contrib/slim/tests/test_graph.py index 435cefd73e7..d8887e19641 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_graph.py +++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py @@ -31,21 +31,19 @@ os.environ["CPU_NUM"] = "1" def conv_block(): img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) @@ -53,6 +51,7 @@ def conv_block(): class TestGraph(unittest.TestCase): + def graph_apis(self, use_cuda=False, for_ci=True): main = fluid.Program() startup = fluid.Program() @@ -77,8 +76,8 @@ class TestGraph(unittest.TestCase): exe.run(startup) iters = 5 batch_size = 8 - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) def _train(binary): @@ -101,18 +100,18 @@ class TestGraph(unittest.TestCase): var.set(var_array, place) sum_before = np.sum( - np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor( - ))) + np.array( + fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())) fluid.io._save_persistable_nodes(exe, checkponit_dir, graph) _set_zero('conv2d_1.w_0', fluid.global_scope(), place) set_after = np.sum( - np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor( - ))) + np.array( + fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())) self.assertEqual(set_after, 0) fluid.io._load_persistable_nodes(exe, checkponit_dir, graph) sum_after = np.sum( - np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor( - ))) + np.array( + fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())) self.assertEqual(sum_before, sum_after) marked_nodes = set() diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py index 7b9cd7958b2..5e0269a2717 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py @@ -43,8 +43,9 @@ os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') def get_vaild_warning_num(warning, w): @@ -56,6 +57,7 @@ def get_vaild_warning_num(warning, w): class ImperativeLenet(fluid.dygraph.Layer): + def __init__(self, num_classes=10): super(ImperativeLenet, self).__init__() conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") @@ -68,50 +70,36 @@ class ImperativeLenet(fluid.dygraph.Layer): fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b3_attr = fluid.ParamAttr(name="fc_b_3") self.features = Sequential( - Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=False), - BatchNorm2D(6), - ReLU(), - MaxPool2D( - kernel_size=2, stride=2), - Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr), - BatchNorm2D(16), - PReLU(), - MaxPool2D( - kernel_size=2, stride=2)) + Conv2D(in_channels=1, + out_channels=6, + kernel_size=3, + stride=1, + padding=1, + weight_attr=conv2d_w1_attr, + bias_attr=False), BatchNorm2D(6), ReLU(), + MaxPool2D(kernel_size=2, stride=2), + Conv2D(in_channels=6, + out_channels=16, + kernel_size=5, + stride=1, + padding=0, + weight_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr), BatchNorm2D(16), PReLU(), + MaxPool2D(kernel_size=2, stride=2)) self.fc = Sequential( - Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr), - LeakyReLU(), - Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr), - Sigmoid(), - Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr), - Softmax()) + Linear(in_features=400, + out_features=120, + weight_attr=fc_w1_attr, + bias_attr=fc_b1_attr), LeakyReLU(), + Linear(in_features=120, + out_features=84, + weight_attr=fc_w2_attr, + bias_attr=fc_b2_attr), Sigmoid(), + Linear(in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr), Softmax()) def forward(self, inputs): x = self.features(inputs) @@ -122,6 +110,7 @@ class ImperativeLenet(fluid.dygraph.Layer): class TestImperativeOutSclae(unittest.TestCase): + def func_out_scale_acc(self): seed = 1000 lr = 0.001 @@ -141,10 +130,11 @@ class TestImperativeOutSclae(unittest.TestCase): lenet = fix_model_dict(lenet) imperative_out_scale.quantize(lenet) - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32, drop_last=True) - adam = AdamOptimizer( - learning_rate=lr, parameter_list=lenet.parameters()) + reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=32, + drop_last=True) + adam = AdamOptimizer(learning_rate=lr, + parameter_list=lenet.parameters()) loss_list = train_lenet(lenet, reader, adam) lenet.eval() @@ -157,14 +147,13 @@ class TestImperativeOutSclae(unittest.TestCase): layer=lenet, path=save_path, input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') + paddle.static.InputSpec(shape=[None, 1, 28, 28], + dtype='float32') ]) for i in range(len(loss_list) - 1): - self.assertTrue( - loss_list[i] > loss_list[i + 1], - msg='Failed to do the imperative qat.') + self.assertTrue(loss_list[i] > loss_list[i + 1], + msg='Failed to do the imperative qat.') def test_out_scale_acc(self): with _test_eager_guard(): @@ -173,6 +162,7 @@ class TestImperativeOutSclae(unittest.TestCase): class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): + def func_save_quantized_model(self): lr = 0.001 @@ -191,10 +181,11 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): imperative_out_scale.quantize(lenet) lenet.set_dict(load_dict) - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32, drop_last=True) - adam = AdamOptimizer( - learning_rate=lr, parameter_list=lenet.parameters()) + reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=32, + drop_last=True) + adam = AdamOptimizer(learning_rate=lr, + parameter_list=lenet.parameters()) loss_list = train_lenet(lenet, reader, adam) lenet.eval() @@ -202,14 +193,13 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): layer=lenet, path=save_path, input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') + paddle.static.InputSpec(shape=[None, 1, 28, 28], + dtype='float32') ]) for i in range(len(loss_list) - 1): - self.assertTrue( - loss_list[i] > loss_list[i + 1], - msg='Failed to do the imperative qat.') + self.assertTrue(loss_list[i] > loss_list[i + 1], + msg='Failed to do the imperative qat.') def test_save_quantized_model(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py index fad4c8f9d58..402113e5f8d 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py @@ -34,8 +34,9 @@ from paddle.fluid.framework import _test_eager_guard from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn from imperative_test_utils import ImperativeLinearBn_hook -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class TestFuseLinearBn(unittest.TestCase): @@ -54,15 +55,15 @@ class TestFuseLinearBn(unittest.TestCase): quant_h = ptq.quantize(model_h, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): if name in f_l: - assert not (isinstance(layer, nn.BatchNorm1D) or - isinstance(layer, nn.BatchNorm2D)) + assert not (isinstance(layer, nn.BatchNorm1D) + or isinstance(layer, nn.BatchNorm2D)) out = model(inputs) out_h = model_h(inputs) out_quant = quant_model(inputs) out_quant_h = quant_h(inputs) cos_sim_func = nn.CosineSimilarity(axis=0) - print('fuse linear+bn', - cos_sim_func(out.flatten(), out_quant.flatten())) + print('fuse linear+bn', cos_sim_func(out.flatten(), + out_quant.flatten())) print(cos_sim_func(out_h.flatten(), out_quant_h.flatten())) @@ -98,8 +99,8 @@ class TestImperativePTQ(unittest.TestCase): def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( + target_folder, zip_path) os.system(cmd) def download_model(self, data_url, data_md5, folder_name): @@ -126,23 +127,23 @@ class TestImperativePTQ(unittest.TestCase): 'batch_norm2d_0': [[0.37673383951187134], [0.44249194860458374]], 're_lu_0': [[0.44249194860458374], [0.25804123282432556]], 'max_pool2d_0': [[0.25804123282432556], [0.25804123282432556]], - 'linear_0': - [[1.7058950662612915], [14.405526161193848], [0.4373355209827423]], + 'linear_0': [[1.7058950662612915], [14.405526161193848], + [0.4373355209827423]], 'add_0': [[1.7058950662612915, 0.0], [1.7058950662612915]], } def model_test(self, model, batch_num=-1, batch_size=8): model.eval() - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) eval_acc_top1_list = [] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] + for x in data]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) @@ -165,11 +166,11 @@ class TestImperativePTQ(unittest.TestCase): def program_test(self, program_path, batch_num=-1, batch_size=8): exe = paddle.static.Executor(paddle.CPUPlace()) - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model(program_path, exe)) + [inference_program, feed_target_names, fetch_targets + ] = (paddle.static.load_inference_model(program_path, exe)) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) top1_correct_num = 0. total_num = 0. @@ -214,11 +215,11 @@ class TestImperativePTQ(unittest.TestCase): self.batch_size) input_spec = [ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') + paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ] - self.ptq.save_quantized_model( - model=quant_model, path=self.save_path, input_spec=input_spec) + self.ptq.save_quantized_model(model=quant_model, + path=self.save_path, + input_spec=input_spec) print('Quantized model saved in {%s}' % self.save_path) after_acc_top1 = self.model_test(quant_model, self.batch_num, @@ -234,13 +235,11 @@ class TestImperativePTQ(unittest.TestCase): print('After converted acc_top1: %s' % after_acc_top1) print('Infer acc_top1: %s' % infer_acc_top1) - self.assertTrue( - after_acc_top1 >= self.eval_acc_top1, - msg="The test acc {%f} is less than {%f}." % - (after_acc_top1, self.eval_acc_top1)) - self.assertTrue( - infer_acc_top1 >= after_acc_top1, - msg='The acc is lower after converting model.') + self.assertTrue(after_acc_top1 >= self.eval_acc_top1, + msg="The test acc {%f} is less than {%f}." % + (after_acc_top1, self.eval_acc_top1)) + self.assertTrue(infer_acc_top1 >= after_acc_top1, + msg='The acc is lower after converting model.') end_time = time.time() print("total time: %ss \n" % (end_time - start_time)) @@ -252,6 +251,7 @@ class TestImperativePTQ(unittest.TestCase): class TestImperativePTQfuse(TestImperativePTQ): + def func_ptq(self): start_time = time.time() @@ -270,17 +270,17 @@ class TestImperativePTQfuse(TestImperativePTQ): quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l) for name, layer in quant_model.named_sublayers(): if name in f_l: - assert not (isinstance(layer, nn.BatchNorm1D) or - isinstance(layer, nn.BatchNorm2D)) + assert not (isinstance(layer, nn.BatchNorm1D) + or isinstance(layer, nn.BatchNorm2D)) before_acc_top1 = self.model_test(quant_model, self.batch_num, self.batch_size) input_spec = [ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') + paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ] - self.ptq.save_quantized_model( - model=quant_model, path=self.save_path, input_spec=input_spec) + self.ptq.save_quantized_model(model=quant_model, + path=self.save_path, + input_spec=input_spec) print('Quantized model saved in {%s}' % self.save_path) after_acc_top1 = self.model_test(quant_model, self.batch_num, @@ -298,15 +298,13 @@ class TestImperativePTQfuse(TestImperativePTQ): #Check whether the quant_model is correct after converting. #The acc of quantized model should be higher than 0.95. - self.assertTrue( - after_acc_top1 >= self.eval_acc_top1, - msg="The test acc {%f} is less than {%f}." % - (after_acc_top1, self.eval_acc_top1)) - #Check the saved infer_model.The acc of infer model + self.assertTrue(after_acc_top1 >= self.eval_acc_top1, + msg="The test acc {%f} is less than {%f}." % + (after_acc_top1, self.eval_acc_top1)) + #Check the saved infer_model.The acc of infer model #should not be lower than the one of dygraph model. - self.assertTrue( - infer_acc_top1 >= after_acc_top1, - msg='The acc is lower after converting model.') + self.assertTrue(infer_acc_top1 >= after_acc_top1, + msg='The acc is lower after converting model.') end_time = time.time() print("total time: %ss \n" % (end_time - start_time)) @@ -318,6 +316,7 @@ class TestImperativePTQfuse(TestImperativePTQ): class TestImperativePTQHist(TestImperativePTQ): + def set_vars(self): config = PTQConfig(HistQuantizer(), AbsmaxQuantizer()) self.ptq = ImperativePTQ(config) @@ -327,18 +326,19 @@ class TestImperativePTQHist(TestImperativePTQ): self.eval_acc_top1 = 0.98 self.gt_thresholds = { - 'conv2d_0': - [[0.99853515625], [0.35732391771364225], [0.10933732241392136]], + 'conv2d_0': [[0.99853515625], [0.35732391771364225], + [0.10933732241392136]], 'batch_norm2d_0': [[0.35732391771364225], [0.4291427868761275]], 're_lu_0': [[0.4291427868761275], [0.2359918110742001]], 'max_pool2d_0': [[0.2359918110742001], [0.25665526917146053]], - 'linear_0': - [[1.7037603475152991], [14.395224522473026], [0.4373355209827423]], + 'linear_0': [[1.7037603475152991], [14.395224522473026], + [0.4373355209827423]], 'add_0': [[1.7037603475152991, 0.0], [1.7037603475152991]], } class TestImperativePTQKL(TestImperativePTQ): + def set_vars(self): config = PTQConfig(KLQuantizer(), PerChannelAbsmaxQuantizer()) self.ptq = ImperativePTQ(config) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index 0d035390e2c..0bb246f9ac9 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -41,8 +41,9 @@ os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class TestImperativeQat(unittest.TestCase): @@ -68,21 +69,22 @@ class TestImperativeQat(unittest.TestCase): with fluid.dygraph.guard(): # For CI coverage - conv1 = Conv2D( - in_channels=3, - out_channels=2, - kernel_size=3, - stride=1, - padding=1, - padding_mode='replicate') + conv1 = Conv2D(in_channels=3, + out_channels=2, + kernel_size=3, + stride=1, + padding=1, + padding_mode='replicate') quant_conv1 = QuantizedConv2D(conv1) data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) conv_transpose = Conv2DTranspose(4, 6, (3, 3)) quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) - x_var = paddle.uniform( - (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) + x_var = paddle.uniform((2, 4, 8, 8), + dtype='float32', + min=-1.0, + max=1.0) quant_conv_transpose(x_var) seed = 1 @@ -93,13 +95,14 @@ class TestImperativeQat(unittest.TestCase): lenet = ImperativeLenet() lenet = fix_model_dict(lenet) imperative_qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=0.001, parameter_list=lenet.parameters()) + adam = AdamOptimizer(learning_rate=0.001, + parameter_list=lenet.parameters()) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=32, drop_last=True) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=32, + drop_last=True) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=32) epoch_num = 1 for epoch in range(epoch_num): @@ -107,8 +110,8 @@ class TestImperativeQat(unittest.TestCase): for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) @@ -122,8 +125,8 @@ class TestImperativeQat(unittest.TestCase): if batch_id % 100 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}". - format(epoch, batch_id, - avg_loss.numpy(), acc.numpy())) + format(epoch, batch_id, avg_loss.numpy(), + acc.numpy())) if batch_id == 500: # For shortening CI time break @@ -132,39 +135,41 @@ class TestImperativeQat(unittest.TestCase): for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) - acc_top1 = fluid.layers.accuracy( - input=out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( - input=out, label=label, k=5) + acc_top1 = fluid.layers.accuracy(input=out, + label=label, + k=1) + acc_top5 = fluid.layers.accuracy(input=out, + label=label, + k=5) if batch_id % 100 == 0: eval_acc_top1_list.append(float(acc_top1.numpy())) _logger.info( - "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". - format(epoch, batch_id, - acc_top1.numpy(), acc_top5.numpy())) + "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}" + .format(epoch, batch_id, acc_top1.numpy(), + acc_top5.numpy())) # check eval acc eval_acc_top1 = sum(eval_acc_top1_list) / len( eval_acc_top1_list) print('eval_acc_top1', eval_acc_top1) - self.assertTrue( - eval_acc_top1 > 0.9, - msg="The test acc {%f} is less than 0.9." % eval_acc_top1) + self.assertTrue(eval_acc_top1 > 0.9, + msg="The test acc {%f} is less than 0.9." % + eval_acc_top1) # test the correctness of `paddle.jit.save` data = next(test_reader()) test_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] + for x in data]).astype('int64').reshape(-1, 1) test_img = fluid.dygraph.to_variable(test_data) label = fluid.dygraph.to_variable(y_data) lenet.eval() @@ -177,8 +182,8 @@ class TestImperativeQat(unittest.TestCase): layer=lenet, path=os.path.join(tmpdir, "lenet"), input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') + paddle.static.InputSpec(shape=[None, 1, 28, 28], + dtype='float32') ], onnx_format=self.onnx_format) print('Quantized model saved in %s' % tmpdir) @@ -211,6 +216,7 @@ class TestImperativeQat(unittest.TestCase): class TestImperativeQatONNXFormat(unittest.TestCase): + def set_vars(self): self.weight_quantize_type = 'abs_max' self.activation_quantize_type = 'moving_average_abs_max' diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py index 76a6e11d98d..804c56cfd87 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py @@ -34,8 +34,9 @@ os.environ["CPU_NUM"] = "1" if paddle.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class TestImperativeQatAmp(unittest.TestCase): @@ -71,8 +72,8 @@ class TestImperativeQatAmp(unittest.TestCase): def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( + target_folder, zip_path) os.system(cmd) def download_model(self, data_url, data_md5, folder_name): @@ -97,17 +98,17 @@ class TestImperativeQatAmp(unittest.TestCase): def model_train(self, model, batch_num=-1, batch_size=32, use_amp=False): model.train() - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size) - adam = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size) + adam = paddle.optimizer.Adam(learning_rate=0.001, + parameters=model.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=500) for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] + for x in data]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) @@ -143,15 +144,15 @@ class TestImperativeQatAmp(unittest.TestCase): def model_test(self, model, batch_num=-1, batch_size=32, use_amp=False): model.eval() - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) acc_top1_list = [] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] + for x in data]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) @@ -202,14 +203,12 @@ class TestImperativeQatAmp(unittest.TestCase): _logger.info('fp32_acc_top1: %f, int8_acc_top1: %f' % (fp32_acc_top1, int8_acc_top1)) - self.assertTrue( - int8_acc_top1 > fp32_acc_top1 - 0.01, - msg='fp32_acc_top1: %f, int8_acc_top1: %f' % - (fp32_acc_top1, int8_acc_top1)) + self.assertTrue(int8_acc_top1 > fp32_acc_top1 - 0.01, + msg='fp32_acc_top1: %f, int8_acc_top1: %f' % + (fp32_acc_top1, int8_acc_top1)) input_spec = [ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') + paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ] paddle.jit.save(layer=model, path=self.save_path, input_spec=input_spec) print('Quantized model saved in {%s}' % self.save_path) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py index 94e0681d1f5..3770ee48649 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -33,11 +33,13 @@ os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class TestImperativeQatChannelWise(TestImperativeQat): + def set_vars(self): self.weight_quantize_type = 'channel_wise_abs_max' self.activation_quantize_type = 'moving_average_abs_max' @@ -48,6 +50,7 @@ class TestImperativeQatChannelWise(TestImperativeQat): class TestImperativeQatChannelWiseONNXFormat(TestImperativeQat): + def set_vars(self): self.weight_quantize_type = 'channel_wise_abs_max' self.activation_quantize_type = 'moving_average_abs_max' diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py index d580eb7ae7a..db7f15c4cef 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py @@ -33,11 +33,13 @@ os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class TestImperativeQatfuseBN(TestImperativeQat): + def set_vars(self): self.weight_quantize_type = 'abs_max' self.activation_quantize_type = 'moving_average_abs_max' diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py index 0bc80694a12..4d2a990d81d 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py @@ -31,20 +31,24 @@ from paddle.fluid.dygraph import Linear from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose from paddle.fluid.log_helper import get_logger from paddle.fluid.framework import _test_eager_guard + os.environ["CPU_NUM"] = "1" -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class PACT(nn.Layer): + def __init__(self, init_value=20): super(PACT, self).__init__() alpha_attr = paddle.ParamAttr( name=self.full_name() + ".pact", initializer=paddle.nn.initializer.Constant(value=init_value)) - self.alpha = self.create_parameter( - shape=[1], attr=alpha_attr, dtype='float32') + self.alpha = self.create_parameter(shape=[1], + attr=alpha_attr, + dtype='float32') def forward(self, x): out_left = paddle.nn.functional.relu(x - self.alpha) @@ -54,24 +58,31 @@ class PACT(nn.Layer): class CustomQAT(nn.Layer): + def __init__(self): super(CustomQAT, self).__init__() - attr = paddle.ParamAttr( - initializer=paddle.nn.initializer.Constant(value=1.0)) - self.u_param = self.create_parameter( - shape=[1], attr=attr, dtype='float32') - self.l_param = self.create_parameter( - shape=[1], attr=attr, dtype='float32') - self.alpha_param = self.create_parameter( - shape=[1], attr=attr, dtype='float32') - self.upper = self.create_parameter( - shape=[1], attr=attr, dtype='float32') + attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant( + value=1.0)) + self.u_param = self.create_parameter(shape=[1], + attr=attr, + dtype='float32') + self.l_param = self.create_parameter(shape=[1], + attr=attr, + dtype='float32') + self.alpha_param = self.create_parameter(shape=[1], + attr=attr, + dtype='float32') + self.upper = self.create_parameter(shape=[1], + attr=attr, + dtype='float32') self.upper.stop_gradient = True - self.lower = self.create_parameter( - shape=[1], attr=attr, dtype='float32') + self.lower = self.create_parameter(shape=[1], + attr=attr, + dtype='float32') self.lower.stop_gradient = True def forward(self, x): + def clip(x, upper, lower): x = x + paddle.nn.functional.relu(lower - x) x = x - paddle.nn.functional.relu(x - upper) @@ -102,6 +113,7 @@ class CustomQAT(nn.Layer): class ModelForConv2dT(nn.Layer): + def __init__(self, num_classes=10): super(ModelForConv2dT, self).__init__() self.features = nn.Conv2DTranspose(4, 6, (3, 3)) @@ -115,34 +127,29 @@ class ModelForConv2dT(nn.Layer): class ImperativeLenet(paddle.nn.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): super(ImperativeLenet, self).__init__() self.features = Sequential( - Conv2D( - num_channels=1, - num_filters=6, - filter_size=3, - stride=1, - padding=1), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2), - Conv2D( - num_channels=6, - num_filters=16, - filter_size=5, - stride=1, - padding=0), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2)) + Conv2D(num_channels=1, + num_filters=6, + filter_size=3, + stride=1, + padding=1), + Pool2D(pool_size=2, pool_type='max', pool_stride=2), + Conv2D(num_channels=6, + num_filters=16, + filter_size=5, + stride=1, + padding=0), + Pool2D(pool_size=2, pool_type='max', pool_stride=2)) self.fc = Sequential( - Linear( - input_dim=400, output_dim=120), - Linear( - input_dim=120, output_dim=84), - Linear( - input_dim=84, output_dim=num_classes, - act=classifier_activation)) + Linear(input_dim=400, output_dim=120), + Linear(input_dim=120, output_dim=84), + Linear(input_dim=84, + output_dim=num_classes, + act=classifier_activation)) def forward(self, inputs): x = self.features(inputs) @@ -153,6 +160,7 @@ class ImperativeLenet(paddle.nn.Layer): class TestUserDefinedActPreprocess(unittest.TestCase): + def setUp(self): _logger.info("test act_preprocess") self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT) @@ -196,8 +204,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase): for batch_id, data in enumerate(train_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) @@ -211,8 +219,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase): if batch_id % 50 == 0: _logger.info( "Train | At epoch {} step {}: loss = {:}, acc= {:}". - format(epoch, batch_id, - avg_loss.numpy(), acc.numpy())) + format(epoch, batch_id, avg_loss.numpy(), + acc.numpy())) break def test(model): @@ -221,8 +229,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase): for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) @@ -237,8 +245,9 @@ class TestUserDefinedActPreprocess(unittest.TestCase): "Test | step {}: acc1 = {:}, acc5 = {:}".format( batch_id, acc_top1.numpy(), acc_top5.numpy())) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=512, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=512, + drop_last=True) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512) train(lenet) test(lenet) @@ -250,18 +259,21 @@ class TestUserDefinedActPreprocess(unittest.TestCase): class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess): + def setUp(self): _logger.info("test weight_preprocess") self.imperative_qat = ImperativeQuantAware(weight_preprocess_layer=PACT) class TestUserDefinedActQuantize(TestUserDefinedActPreprocess): + def setUp(self): _logger.info("test act_quantize") self.imperative_qat = ImperativeQuantAware(act_quantize_layer=CustomQAT) class TestUserDefinedWeightQuantize(TestUserDefinedActPreprocess): + def setUp(self): _logger.info("test weight_quantize") self.imperative_qat = ImperativeQuantAware( diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index d77134d72a9..e562cc2452a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -38,11 +38,13 @@ os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class TestImperativeOutSclae(unittest.TestCase): + def func_out_scale_acc(self): paddle.disable_static() seed = 1000 @@ -51,15 +53,16 @@ class TestImperativeOutSclae(unittest.TestCase): qat = ImperativeQuantAware() np.random.seed(seed) - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=512, drop_last=True) + reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=512, + drop_last=True) lenet = ImperativeLenetWithSkipQuant() lenet = fix_model_dict(lenet) qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=lr, parameter_list=lenet.parameters()) + adam = AdamOptimizer(learning_rate=lr, + parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() loss_list = train_lenet(lenet, reader, adam) @@ -69,13 +72,13 @@ class TestImperativeOutSclae(unittest.TestCase): path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" - qat.save_quantized_model( - layer=lenet, - path=path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) + qat.save_quantized_model(layer=lenet, + path=path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], + dtype='float32') + ]) paddle.enable_static() @@ -85,12 +88,12 @@ class TestImperativeOutSclae(unittest.TestCase): place = core.CPUPlace() exe = fluid.Executor(place) - [inference_program, feed_target_names, fetch_targets] = ( - fluid.io.load_inference_model( - dirname=save_dir, - executor=exe, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX)) + [inference_program, feed_target_names, + fetch_targets] = (fluid.io.load_inference_model( + dirname=save_dir, + executor=exe, + model_filename="lenet" + INFER_MODEL_SUFFIX, + params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops conv2d_count, matmul_count = 0, 0 diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py index 656fb1dda3b..fcf82c2fc89 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py @@ -37,19 +37,21 @@ def init_data(batch_size=32, img_shape=[784], label_range=9): class TestMovingAverageAbsMaxScaleOp(unittest.TestCase): + def check_backward(self, use_cuda): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - image = fluid.layers.data( - name='image', shape=[784], dtype='float32') + image = fluid.layers.data(name='image', + shape=[784], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') fc_tmp = fluid.layers.fc(image, size=10, act='softmax') out_scale = quant_layers.MovingAverageAbsMaxScale( name=fc_tmp.name, dtype=fc_tmp.dtype) fc_tmp_1 = out_scale(fc_tmp) - cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp, - label) + cross_entropy = fluid.layers.softmax_with_cross_entropy( + fc_tmp, label) loss = fluid.layers.reduce_mean(cross_entropy) sgd = fluid.optimizer.SGD(learning_rate=1e-3) sgd.minimize(loss) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py index 89e0e099f44..1beb0f916d4 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py @@ -33,6 +33,7 @@ np.random.seed(0) class TestPostTrainingQuantization(unittest.TestCase): + def setUp(self): self.download_path = 'int8/download' self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + @@ -43,21 +44,21 @@ class TestPostTrainingQuantization(unittest.TestCase): try: os.system("mkdir -p " + self.int8_model_path) except Exception as e: - print("Failed to create {} due to {}".format(self.int8_model_path, - str(e))) + print("Failed to create {} due to {}".format( + self.int8_model_path, str(e))) sys.exit(-1) def tearDown(self): try: os.system("rm -rf {}".format(self.int8_model_path)) except Exception as e: - print("Failed to delete {} due to {}".format(self.int8_model_path, - str(e))) + print("Failed to delete {} due to {}".format( + self.int8_model_path, str(e))) def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( + target_folder, zip_path) os.system(cmd) def download_model(self, data_url, data_md5, folder_name): @@ -71,6 +72,7 @@ class TestPostTrainingQuantization(unittest.TestCase): return data_cache_folder def get_batch_reader(self, data_path, place): + def reader(): with open(data_path, 'rb') as in_file: while True: @@ -83,15 +85,14 @@ class TestPostTrainingQuantization(unittest.TestCase): seq_len = (alllen >> 16) & 0xFFFF label = in_file.read(4 * label_len) - label = np.frombuffer( - label, dtype=np.int32).reshape([len(label) // 4]) + label = np.frombuffer(label, dtype=np.int32).reshape( + [len(label) // 4]) if label.shape[0] != 1 or label[0] > 6350: continue feat = in_file.read(4 * seq_len * 8) - feat = np.frombuffer( - feat, - dtype=np.float32).reshape([len(feat) // 4 // 8, 8]) + feat = np.frombuffer(feat, dtype=np.float32).reshape( + [len(feat) // 4 // 8, 8]) lod_feat = [feat.shape[0]] minputs = fluid.create_lod_tensor(feat, [lod_feat], place) @@ -100,6 +101,7 @@ class TestPostTrainingQuantization(unittest.TestCase): return reader def get_simple_reader(self, data_path, place): + def reader(): with open(data_path, 'rb') as in_file: while True: @@ -112,15 +114,14 @@ class TestPostTrainingQuantization(unittest.TestCase): seq_len = (alllen >> 16) & 0xFFFF label = in_file.read(4 * label_len) - label = np.frombuffer( - label, dtype=np.int32).reshape([len(label) // 4]) + label = np.frombuffer(label, dtype=np.int32).reshape( + [len(label) // 4]) if label.shape[0] != 1 or label[0] > 6350: continue feat = in_file.read(4 * seq_len * 8) - feat = np.frombuffer( - feat, - dtype=np.float32).reshape([len(feat) // 4 // 8, 8]) + feat = np.frombuffer(feat, dtype=np.float32).reshape( + [len(feat) // 4 // 8, 8]) lod_feat = [feat.shape[0]] minputs = fluid.create_lod_tensor(feat, [lod_feat], place) @@ -181,18 +182,17 @@ class TestPostTrainingQuantization(unittest.TestCase): scope = fluid.global_scope() batch_generator = self.get_batch_reader(data_path, place) - ptq = PostTrainingQuantization( - executor=exe, - model_dir=model_path, - batch_generator=batch_generator, - batch_nums=batch_nums, - algo=algo, - quantizable_op_type=quantizable_op_type, - round_type=round_type, - is_full_quantize=is_full_quantize, - optimize_model=is_optimize_model, - onnx_format=onnx_format, - is_use_cache_file=is_use_cache_file) + ptq = PostTrainingQuantization(executor=exe, + model_dir=model_path, + batch_generator=batch_generator, + batch_nums=batch_nums, + algo=algo, + quantizable_op_type=quantizable_op_type, + round_type=round_type, + is_full_quantize=is_full_quantize, + optimize_model=is_optimize_model, + onnx_format=onnx_format, + is_use_cache_file=is_use_cache_file) ptq.quantize() ptq.save_quantized_model(self.int8_model_path) @@ -226,10 +226,11 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start post training quantization for {0} on {1} samples ...". format(model_name, quant_iterations)) - self.generate_quantized_model( - fp32_model_path, data_path, algo, round_type, quantizable_op_type, - is_full_quantize, is_use_cache_file, is_optimize_model, - quant_iterations, onnx_format) + self.generate_quantized_model(fp32_model_path, data_path, algo, + round_type, quantizable_op_type, + is_full_quantize, is_use_cache_file, + is_optimize_model, quant_iterations, + onnx_format) print("Start INT8 inference for {0} on {1} samples ...".format( model_name, infer_iterations)) @@ -248,6 +249,7 @@ class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingAvgForLSTM(TestPostTrainingQuantization): + def test_post_training_avg(self): model_name = "nlp_lstm_fp32_model" model_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/nlp_lstm_fp32_model.tar.gz" @@ -271,6 +273,7 @@ class TestPostTrainingAvgForLSTM(TestPostTrainingQuantization): class TestPostTrainingAvgForLSTMONNXFormat(TestPostTrainingQuantization): + def test_post_training_avg_onnx_format(self): model_name = "nlp_lstm_fp32_model" model_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/nlp_lstm_fp32_model.tar.gz" @@ -288,23 +291,22 @@ class TestPostTrainingAvgForLSTMONNXFormat(TestPostTrainingQuantization): infer_iterations = 100 quant_iterations = 10 onnx_format = True - self.run_test( - model_name, - model_url, - model_md5, - data_name, - data_url, - data_md5, - algo, - round_type, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - infer_iterations, - quant_iterations, - onnx_format=onnx_format) + self.run_test(model_name, + model_url, + model_md5, + data_name, + data_url, + data_md5, + algo, + round_type, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + infer_iterations, + quant_iterations, + onnx_format=onnx_format) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py index 4c3a758f0e3..cb76f4bbac0 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py @@ -32,6 +32,7 @@ np.random.seed(0) class TestPostTrainingQuantization(unittest.TestCase): + def setUp(self): self.download_path = 'int8/download' self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + @@ -42,21 +43,21 @@ class TestPostTrainingQuantization(unittest.TestCase): try: os.system("mkdir -p " + self.int8_model_path) except Exception as e: - print("Failed to create {} due to {}".format(self.int8_model_path, - str(e))) + print("Failed to create {} due to {}".format( + self.int8_model_path, str(e))) sys.exit(-1) def tearDown(self): try: os.system("rm -rf {}".format(self.int8_model_path)) except Exception as e: - print("Failed to delete {} due to {}".format(self.int8_model_path, - str(e))) + print("Failed to delete {} due to {}".format( + self.int8_model_path, str(e))) def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( + target_folder, zip_path) os.system(cmd) def download_model(self, data_url, data_md5, folder_name): @@ -82,8 +83,8 @@ class TestPostTrainingQuantization(unittest.TestCase): cnt = 0 periods = [] for batch_id, data in enumerate(val_reader()): - image = np.array( - [x[0].reshape(img_shape) for x in data]).astype("float32") + image = np.array([x[0].reshape(img_shape) + for x in data]).astype("float32") input_label = np.array([x[1] for x in data]).astype("int64") t1 = time.time() @@ -125,20 +126,19 @@ class TestPostTrainingQuantization(unittest.TestCase): scope = fluid.global_scope() val_reader = paddle.dataset.mnist.train() - ptq = PostTrainingQuantization( - executor=exe, - model_dir=model_path, - sample_generator=val_reader, - batch_size=batch_size, - batch_nums=batch_nums, - algo=algo, - quantizable_op_type=quantizable_op_type, - round_type=round_type, - is_full_quantize=is_full_quantize, - optimize_model=is_optimize_model, - onnx_format=onnx_format, - skip_tensor_list=skip_tensor_list, - is_use_cache_file=is_use_cache_file) + ptq = PostTrainingQuantization(executor=exe, + model_dir=model_path, + sample_generator=val_reader, + batch_size=batch_size, + batch_nums=batch_nums, + algo=algo, + quantizable_op_type=quantizable_op_type, + round_type=round_type, + is_full_quantize=is_full_quantize, + optimize_model=is_optimize_model, + onnx_format=onnx_format, + skip_tensor_list=skip_tensor_list, + is_use_cache_file=is_use_cache_file) ptq.quantize() ptq.save_quantized_model(self.int8_model_path) @@ -164,30 +164,33 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start FP32 inference for {0} on {1} images ...".format( model_name, infer_iterations * batch_size)) - (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( - origin_model_path, batch_size, infer_iterations) + (fp32_throughput, fp32_latency, + fp32_acc1) = self.run_program(origin_model_path, batch_size, + infer_iterations) print("Start INT8 post training quantization for {0} on {1} images ...". format(model_name, quant_iterations * batch_size)) - self.generate_quantized_model( - origin_model_path, algo, round_type, quantizable_op_type, - is_full_quantize, is_use_cache_file, is_optimize_model, batch_size, - quant_iterations, onnx_format, skip_tensor_list) + self.generate_quantized_model(origin_model_path, algo, round_type, + quantizable_op_type, is_full_quantize, + is_use_cache_file, is_optimize_model, + batch_size, quant_iterations, onnx_format, + skip_tensor_list) print("Start INT8 inference for {0} on {1} images ...".format( model_name, infer_iterations * batch_size)) - (int8_throughput, int8_latency, int8_acc1) = self.run_program( - self.int8_model_path, batch_size, infer_iterations) + (int8_throughput, int8_latency, + int8_acc1) = self.run_program(self.int8_model_path, batch_size, + infer_iterations) print("---Post training quantization of {} method---".format(algo)) print( - "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.". - format(model_name, batch_size, fp32_throughput, fp32_latency, - fp32_acc1)) + "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}." + .format(model_name, batch_size, fp32_throughput, fp32_latency, + fp32_acc1)) print( - "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n". - format(model_name, batch_size, int8_throughput, int8_latency, - int8_acc1)) + "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n" + .format(model_name, batch_size, int8_throughput, int8_latency, + int8_acc1)) sys.stdout.flush() delta_value = fp32_acc1 - int8_acc1 @@ -195,6 +198,7 @@ class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingKLForMnist(TestPostTrainingQuantization): + def test_post_training_kl(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -216,6 +220,7 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization): class TestPostTraininghistForMnist(TestPostTrainingQuantization): + def test_post_training_hist(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -237,6 +242,7 @@ class TestPostTraininghistForMnist(TestPostTrainingQuantization): class TestPostTrainingmseForMnist(TestPostTrainingQuantization): + def test_post_training_mse(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -258,6 +264,7 @@ class TestPostTrainingmseForMnist(TestPostTrainingQuantization): class TestPostTrainingemdForMnist(TestPostTrainingQuantization): + def test_post_training_mse(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -279,6 +286,7 @@ class TestPostTrainingemdForMnist(TestPostTrainingQuantization): class TestPostTrainingavgForMnist(TestPostTrainingQuantization): + def test_post_training_avg(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -300,6 +308,7 @@ class TestPostTrainingavgForMnist(TestPostTrainingQuantization): class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization): + def test_post_training_abs_max(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -321,6 +330,7 @@ class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization): class TestPostTrainingmseAdaroundForMnist(TestPostTrainingQuantization): + def test_post_training_mse(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -342,6 +352,7 @@ class TestPostTrainingmseAdaroundForMnist(TestPostTrainingQuantization): class TestPostTrainingKLAdaroundForMnist(TestPostTrainingQuantization): + def test_post_training_kl(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -363,6 +374,7 @@ class TestPostTrainingKLAdaroundForMnist(TestPostTrainingQuantization): class TestPostTrainingmseForMnistONNXFormat(TestPostTrainingQuantization): + def test_post_training_mse_onnx_format(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -378,25 +390,25 @@ class TestPostTrainingmseForMnistONNXFormat(TestPostTrainingQuantization): batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test( - model_name, - data_url, - data_md5, - algo, - round_type, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - batch_size, - infer_iterations, - quant_iterations, - onnx_format=onnx_format) + self.run_test(model_name, + data_url, + data_md5, + algo, + round_type, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + batch_size, + infer_iterations, + quant_iterations, + onnx_format=onnx_format) class TestPostTrainingmseForMnistONNXFormatFullQuant( TestPostTrainingQuantization): + def test_post_training_mse_onnx_format_full_quant(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -412,24 +424,24 @@ class TestPostTrainingmseForMnistONNXFormatFullQuant( batch_size = 10 infer_iterations = 50 quant_iterations = 5 - self.run_test( - model_name, - data_url, - data_md5, - algo, - round_type, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - batch_size, - infer_iterations, - quant_iterations, - onnx_format=onnx_format) + self.run_test(model_name, + data_url, + data_md5, + algo, + round_type, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + batch_size, + infer_iterations, + quant_iterations, + onnx_format=onnx_format) class TestPostTrainingavgForMnistSkipOP(TestPostTrainingQuantization): + def test_post_training_avg_skip_op(self): model_name = "mnist_model" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" @@ -445,21 +457,20 @@ class TestPostTrainingavgForMnistSkipOP(TestPostTrainingQuantization): infer_iterations = 50 quant_iterations = 5 skip_tensor_list = ["fc_0.w_0"] - self.run_test( - model_name, - data_url, - data_md5, - algo, - round_type, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - batch_size, - infer_iterations, - quant_iterations, - skip_tensor_list=skip_tensor_list) + self.run_test(model_name, + data_url, + data_md5, + algo, + round_type, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + batch_size, + infer_iterations, + quant_iterations, + skip_tensor_list=skip_tensor_list) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 56d77f77b50..b36f036d415 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -82,6 +82,7 @@ def _reader_creator(file_list, color_jitter=False, rotate=False, data_dir=DATA_DIR): + def reader(): with open(file_list) as flist: full_lines = [line.strip() for line in flist] @@ -96,8 +97,10 @@ def _reader_creator(file_list, continue yield img_path, int(label) - mapper = functools.partial( - process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) + mapper = functools.partial(process_image, + mode=mode, + color_jitter=color_jitter, + rotate=rotate) return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) @@ -108,6 +111,7 @@ def val(data_dir=DATA_DIR): class TestPostTrainingQuantization(unittest.TestCase): + def setUp(self): self.int8_download = 'int8/download' self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + @@ -154,13 +158,13 @@ class TestPostTrainingQuantization(unittest.TestCase): try: os.system("rm -rf {}".format(self.int8_model)) except Exception as e: - print("Failed to delete {} due to {}".format(self.int8_model, - str(e))) + print("Failed to delete {} due to {}".format( + self.int8_model, str(e))) def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( + target_folder, zip_path) os.system(cmd) def download_data(self, data_urls, data_md5s, folder_name, is_model=True): @@ -207,17 +211,18 @@ class TestPostTrainingQuantization(unittest.TestCase): cnt = 0 periods = [] for batch_id, data in enumerate(val_reader()): - image = np.array( - [x[0].reshape(image_shape) for x in data]).astype("float32") + image = np.array([x[0].reshape(image_shape) + for x in data]).astype("float32") label = np.array([x[1] for x in data]).astype("int64") label = label.reshape([-1, 1]) t1 = time.time() - _, acc1, _ = exe.run( - infer_program, - feed={feed_dict[0]: image, - feed_dict[1]: label}, - fetch_list=fetch_targets) + _, acc1, _ = exe.run(infer_program, + feed={ + feed_dict[0]: image, + feed_dict[1]: label + }, + fetch_list=fetch_targets) t2 = time.time() period = t2 - t1 periods.append(period) @@ -248,8 +253,8 @@ class TestPostTrainingQuantization(unittest.TestCase): try: os.system("mkdir " + self.int8_model) except Exception as e: - print("Failed to create {} due to {}".format(self.int8_model, - str(e))) + print("Failed to create {} due to {}".format( + self.int8_model, str(e))) sys.exit(-1) place = fluid.CPUPlace() @@ -257,17 +262,16 @@ class TestPostTrainingQuantization(unittest.TestCase): scope = fluid.global_scope() val_reader = val() - ptq = PostTrainingQuantization( - executor=exe, - sample_generator=val_reader, - model_dir=model_path, - algo=algo, - quantizable_op_type=quantizable_op_type, - round_type=round_type, - is_full_quantize=is_full_quantize, - optimize_model=is_optimize_model, - onnx_format=onnx_format, - is_use_cache_file=is_use_cache_file) + ptq = PostTrainingQuantization(executor=exe, + sample_generator=val_reader, + model_dir=model_path, + algo=algo, + quantizable_op_type=quantizable_op_type, + round_type=round_type, + is_full_quantize=is_full_quantize, + optimize_model=is_optimize_model, + onnx_format=onnx_format, + is_use_cache_file=is_use_cache_file) ptq.quantize() ptq.save_quantized_model(self.int8_model) @@ -291,8 +295,9 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start FP32 inference for {0} on {1} images ...".format( model, infer_iterations * batch_size)) - (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( - model_cache_folder + "/model", batch_size, infer_iterations) + (fp32_throughput, fp32_latency, + fp32_acc1) = self.run_program(model_cache_folder + "/model", + batch_size, infer_iterations) print("Start INT8 post training quantization for {0} on {1} images ...". format(model, sample_iterations * batch_size)) @@ -303,16 +308,19 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start INT8 inference for {0} on {1} images ...".format( model, infer_iterations * batch_size)) - (int8_throughput, int8_latency, int8_acc1) = self.run_program( - self.int8_model, batch_size, infer_iterations) + (int8_throughput, int8_latency, + int8_acc1) = self.run_program(self.int8_model, batch_size, + infer_iterations) print("---Post training quantization of {} method---".format(algo)) print( - "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.". - format(model, batch_size, fp32_throughput, fp32_latency, fp32_acc1)) + "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}." + .format(model, batch_size, fp32_throughput, fp32_latency, + fp32_acc1)) print( - "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n". - format(model, batch_size, int8_throughput, int8_latency, int8_acc1)) + "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n" + .format(model, batch_size, int8_throughput, int8_latency, + int8_acc1)) sys.stdout.flush() delta_value = fp32_acc1 - int8_acc1 @@ -320,6 +328,7 @@ class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_kl_mobilenetv1(self): model = "MobileNet-V1" algo = "KL" @@ -344,6 +353,7 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_avg_mobilenetv1(self): model = "MobileNet-V1" algo = "avg" @@ -367,6 +377,7 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization): class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_hist_mobilenetv1(self): model = "MobileNet-V1" algo = "hist" @@ -390,6 +401,7 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_abs_max_mobilenetv1(self): model = "MobileNet-V1" algo = "abs_max" @@ -413,6 +425,7 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization): class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): + def test_post_training_onnx_format_mobilenetv1(self): model = "MobileNet-V1" algo = "avg" @@ -431,18 +444,17 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1(TestPostTrainingQuantization): is_optimize_model = True onnx_format = True diff_threshold = 0.05 - self.run_test( - model, - algo, - round_type, - data_urls, - data_md5s, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - onnx_format=onnx_format) + self.run_test(model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + onnx_format=onnx_format) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py index dc12026a21a..c79499100ce 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestPostTrainingForResnet50(TestPostTrainingQuantization): + def test_post_training_resnet50(self): model = "ResNet-50" algo = "min_max" @@ -40,6 +41,7 @@ class TestPostTrainingForResnet50(TestPostTrainingQuantization): class TestPostTrainingForResnet50ONNXFormat(TestPostTrainingQuantization): + def test_post_training_resnet50(self): model = "ResNet-50" algo = "min_max" @@ -54,18 +56,17 @@ class TestPostTrainingForResnet50ONNXFormat(TestPostTrainingQuantization): is_optimize_model = False diff_threshold = 0.025 onnx_format = True - self.run_test( - model, - algo, - round_type, - data_urls, - data_md5s, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - onnx_format=onnx_format) + self.run_test(model, + algo, + round_type, + data_urls, + data_md5s, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + onnx_format=onnx_format) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py index 642bcf2a476..f4eaf5d9bc7 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py @@ -32,6 +32,7 @@ np.random.seed(0) class TestPostTrainingQuantization(unittest.TestCase): + def setUp(self): self.download_path = 'int8/download' self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + @@ -42,16 +43,16 @@ class TestPostTrainingQuantization(unittest.TestCase): try: os.system("mkdir -p " + self.int8_model_path) except Exception as e: - print("Failed to create {} due to {}".format(self.int8_model_path, - str(e))) + print("Failed to create {} due to {}".format( + self.int8_model_path, str(e))) sys.exit(-1) def tearDown(self): try: os.system("rm -rf {}".format(self.int8_model_path)) except Exception as e: - print("Failed to delete {} due to {}".format(self.int8_model_path, - str(e))) + print("Failed to delete {} due to {}".format( + self.int8_model_path, str(e))) def cache_unzipping(self, target_folder, zip_path): cmd = 'tar xf {0} -C {1}'.format(zip_path, target_folder) @@ -82,8 +83,8 @@ class TestPostTrainingQuantization(unittest.TestCase): cnt = 0 periods = [] for batch_id, data in enumerate(val_reader()): - image = np.array( - [x[0].reshape(img_shape) for x in data]).astype("float32") + image = np.array([x[0].reshape(img_shape) + for x in data]).astype("float32") input_label = np.array([x[1] for x in data]).astype("int64") t1 = time.time() @@ -147,10 +148,9 @@ class TestPostTrainingQuantization(unittest.TestCase): optimize_model=is_optimize_model, is_use_cache_file=is_use_cache_file) ptq.quantize() - ptq.save_quantized_model( - self.int8_model_path, - model_filename='model.pdmodel', - params_filename='model.pdiparams') + ptq.save_quantized_model(self.int8_model_path, + model_filename='model.pdmodel', + params_filename='model.pdiparams') def run_test(self, model_name, @@ -172,36 +172,37 @@ class TestPostTrainingQuantization(unittest.TestCase): print("Start FP32 inference for {0} on {1} images ...".format( model_name, infer_iterations * batch_size)) - (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( - origin_model_path, batch_size, infer_iterations) + (fp32_throughput, fp32_latency, + fp32_acc1) = self.run_program(origin_model_path, batch_size, + infer_iterations) print("Start INT8 post training quantization for {0} on {1} images ...". format(model_name, quant_iterations * batch_size)) - self.generate_quantized_model( - origin_model_path, - algo, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - batch_size, - quant_iterations, - is_data_loader=is_data_loader) + self.generate_quantized_model(origin_model_path, + algo, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + batch_size, + quant_iterations, + is_data_loader=is_data_loader) print("Start INT8 inference for {0} on {1} images ...".format( model_name, infer_iterations * batch_size)) - (int8_throughput, int8_latency, int8_acc1) = self.run_program( - self.int8_model_path, batch_size, infer_iterations) + (int8_throughput, int8_latency, + int8_acc1) = self.run_program(self.int8_model_path, batch_size, + infer_iterations) print("---Post training quantization of {} method---".format(algo)) print( - "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.". - format(model_name, batch_size, fp32_throughput, fp32_latency, - fp32_acc1)) + "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}." + .format(model_name, batch_size, fp32_throughput, fp32_latency, + fp32_acc1)) print( - "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n". - format(model_name, batch_size, int8_throughput, int8_latency, - int8_acc1)) + "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n" + .format(model_name, batch_size, int8_throughput, int8_latency, + int8_acc1)) sys.stdout.flush() delta_value = fp32_acc1 - int8_acc1 @@ -209,6 +210,7 @@ class TestPostTrainingQuantization(unittest.TestCase): class TestPostTrainingKLForWhile(TestPostTrainingQuantization): + def test_post_training_kl(self): model_name = "mnist_while" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_while.tar.gz" @@ -229,6 +231,7 @@ class TestPostTrainingKLForWhile(TestPostTrainingQuantization): class TestPostTraininghistForWhile(TestPostTrainingQuantization): + def test_post_training_hist(self): model_name = "mnist_while" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_while.tar.gz" @@ -249,6 +252,7 @@ class TestPostTraininghistForWhile(TestPostTrainingQuantization): class TestPostTrainingmseForWhile(TestPostTrainingQuantization): + def test_post_training_mse(self): model_name = "mnist_while" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_while.tar.gz" @@ -269,6 +273,7 @@ class TestPostTrainingmseForWhile(TestPostTrainingQuantization): class TestPostTrainingavgForWhile(TestPostTrainingQuantization): + def test_post_training_avg(self): model_name = "mnist_while" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_while.tar.gz" @@ -289,6 +294,7 @@ class TestPostTrainingavgForWhile(TestPostTrainingQuantization): class TestPostTrainingMinMaxForWhile(TestPostTrainingQuantization): + def test_post_training_min_max(self): model_name = "mnist_while" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_while.tar.gz" @@ -309,6 +315,7 @@ class TestPostTrainingMinMaxForWhile(TestPostTrainingQuantization): class TestPostTrainingAbsMaxForWhile(TestPostTrainingQuantization): + def test_post_training_abs_max(self): model_name = "mnist_while" data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_while.tar.gz" @@ -326,20 +333,19 @@ class TestPostTrainingAbsMaxForWhile(TestPostTrainingQuantization): is_full_quantize, is_use_cache_file, is_optimize_model, diff_threshold, batch_size, infer_iterations, quant_iterations) - self.run_test( - model_name, - data_url, - data_md5, - algo, - quantizable_op_type, - is_full_quantize, - is_use_cache_file, - is_optimize_model, - diff_threshold, - batch_size, - infer_iterations, - quant_iterations, - is_data_loader=True) + self.run_test(model_name, + data_url, + data_md5, + algo, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + batch_size, + infer_iterations, + quant_iterations, + is_data_loader=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py index 04e1decd4af..dea0fcd4897 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestQuant2Int8MkldnnPassMul(unittest.TestCase): + def op_name(self): return "mul" @@ -54,19 +55,17 @@ class TestQuant2Int8MkldnnPassMul(unittest.TestCase): def prepare_program_mul(self, program): block = program.global_block() for name in self.variables_mul: - block.create_var( - name=name, - dtype="float32", - shape=self.variables_mul[name].shape) - - mul_op1 = block.append_op( - type=self.op_name(), - inputs={ - "X": block.var('mul_input'), - "Y": block.var('mul_weights') - }, - outputs={"Out": block.var('mul_output')}, - attrs={'use_mkldnn': self.use_mkldnn}) + block.create_var(name=name, + dtype="float32", + shape=self.variables_mul[name].shape) + + mul_op1 = block.append_op(type=self.op_name(), + inputs={ + "X": block.var('mul_input'), + "Y": block.var('mul_weights') + }, + outputs={"Out": block.var('mul_output')}, + attrs={'use_mkldnn': self.use_mkldnn}) def test_dequantize_op_weights(self): program = fluid.Program() @@ -81,12 +80,11 @@ class TestQuant2Int8MkldnnPassMul(unittest.TestCase): break assert op_node != "", "op of type %s not found" % self.op_name() - qpass = Quant2Int8MkldnnPass( - self.quantized_ops, - _scope=self.scope, - _place=self.place, - _core=core, - _debug=False) + qpass = Quant2Int8MkldnnPass(self.quantized_ops, + _scope=self.scope, + _place=self.place, + _core=core, + _debug=False) qpass._weight_thresholds["mul_output"] = self.mul_output_scale param = self.scope.var("mul_weights").get_tensor() param.set(self.variables_mul["mul_weights"], self.place) @@ -105,11 +103,13 @@ class TestQuant2Int8MkldnnPassMul(unittest.TestCase): class TestQuant2Int8MkldnnPassMatmulV2(TestQuant2Int8MkldnnPassMul): + def op_name(self): return "matmul_v2" class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase): + def setUp(self): self.scope = fluid.Scope() self.place = fluid.CPUPlace() @@ -144,8 +144,9 @@ class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase): def prepare_program_conv2d(self, program): block = program.global_block() for name in self.variables: - block.create_var( - name=name, dtype="float32", shape=self.variables[name].shape) + block.create_var(name=name, + dtype="float32", + shape=self.variables[name].shape) conv2d_op1 = block.append_op( type="conv2d", inputs={ @@ -203,16 +204,16 @@ class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase): graph = IrGraph(core.Graph(program.desc), for_test=True) graph = self.remove_fuse_activation_attribute(graph) self.check_graph_before_pass(graph) - quant2_int8_mkldnn_pass = Quant2Int8MkldnnPass( - self.quantized_ops, - _scope=self.scope, - _place=self.place, - _core=core, - _debug=False) + quant2_int8_mkldnn_pass = Quant2Int8MkldnnPass(self.quantized_ops, + _scope=self.scope, + _place=self.place, + _core=core, + _debug=False) graph = quant2_int8_mkldnn_pass._update_activations(graph) self.check_graph_after_pass(graph) class TestQuant2Int8MkldnnPassNearestInterp(unittest.TestCase): + def op_name(self): return "nearest_interp" @@ -268,47 +269,49 @@ class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase): def prepare_program(self, program): block = program.global_block() for name in self.variables: - block.create_var( - name=name, - dtype="float32", - shape=self.variables[name].shape) - block.append_op( - type="conv2d", - inputs={ - "Input": block.var('input'), - 'Filter': block.var('filter') - }, - outputs={"Output": block.var('conv_output')}, - attrs={ - 'strides': self.stride, - 'paddings': self.pad, - 'groups': self.groups, - 'dilations': self.dilations, - 'use_cudnn': self.use_cudnn, - 'use_mkldnn': self.use_mkldnn, - 'data_format': self.data_format, - 'fuse_relu': True - }) - block.append_op( - type=self.op_name(), - inputs={"X": block.var('conv_output'), }, - outputs={"Out": block.var('nearest_interp_output')}, - attrs={ - 'interp_method': self.interp_method, - 'out_h': self.out_h, - 'out_w': self.out_w, - 'scale': self.scale, - 'data_layout': self.data_layout, - 'use_mkldnn': self.use_mkldnn - }) - block.append_op( - type='dropout', - inputs={"X": block.var('nearest_interp_output'), }, - outputs={ - 'Out': block.var('dropout_out'), - 'Mask': block.var('dropout_mask') - }, - attrs={'dropout_prob': self.dropout_prob, }) + block.create_var(name=name, + dtype="float32", + shape=self.variables[name].shape) + block.append_op(type="conv2d", + inputs={ + "Input": block.var('input'), + 'Filter': block.var('filter') + }, + outputs={"Output": block.var('conv_output')}, + attrs={ + 'strides': self.stride, + 'paddings': self.pad, + 'groups': self.groups, + 'dilations': self.dilations, + 'use_cudnn': self.use_cudnn, + 'use_mkldnn': self.use_mkldnn, + 'data_format': self.data_format, + 'fuse_relu': True + }) + block.append_op(type=self.op_name(), + inputs={ + "X": block.var('conv_output'), + }, + outputs={"Out": block.var('nearest_interp_output')}, + attrs={ + 'interp_method': self.interp_method, + 'out_h': self.out_h, + 'out_w': self.out_w, + 'scale': self.scale, + 'data_layout': self.data_layout, + 'use_mkldnn': self.use_mkldnn + }) + block.append_op(type='dropout', + inputs={ + "X": block.var('nearest_interp_output'), + }, + outputs={ + 'Out': block.var('dropout_out'), + 'Mask': block.var('dropout_mask') + }, + attrs={ + 'dropout_prob': self.dropout_prob, + }) def check_graph_after_pass(self, graph): for op in graph.all_op_nodes(): @@ -344,6 +347,7 @@ class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase): self.check_graph_after_pass(graph) class TestQuant2Int8MkldnnPassNearestInterpV2(unittest.TestCase): + def op_name(self): return "nearest_interp_v2" diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py index 7ee0fd1d3e2..28706d34c63 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py @@ -30,21 +30,19 @@ os.environ["CPU_NUM"] = "1" def conv_net(img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) @@ -52,6 +50,7 @@ def conv_net(img, label): class TestMKLDNNTransformBasedFreezePass(unittest.TestCase): + def setUp(self): self.quantizable_op_and_inputs = { 'conv2d': ['Input', 'Filter'], @@ -76,10 +75,12 @@ class TestMKLDNNTransformBasedFreezePass(unittest.TestCase): startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) @@ -128,12 +129,11 @@ class TestMKLDNNTransformBasedFreezePass(unittest.TestCase): iters = 5 batch_size = 8 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) # Training the model to get the weights value @@ -158,9 +158,9 @@ class TestMKLDNNTransformBasedFreezePass(unittest.TestCase): for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) - test_graph.draw('.', 'test_mkldnn' + dev_name + - activation_quant_type + '_' + weight_quant_type, - marked_nodes) + test_graph.draw( + '.', 'test_mkldnn' + dev_name + activation_quant_type + '_' + + weight_quant_type, marked_nodes) mkldnn_program = test_graph.to_program() # Check the transformation weights of conv2d and mul @@ -174,8 +174,9 @@ class TestMKLDNNTransformBasedFreezePass(unittest.TestCase): # output self.check_program(mkldnn_program) if not for_ci: - print('{}: {}'.format('w_mkldnn' + dev_name + activation_quant_type - + '_' + weight_quant_type, np.sum(w_mkldnn))) + print('{}: {}'.format( + 'w_mkldnn' + dev_name + activation_quant_type + '_' + + weight_quant_type, np.sum(w_mkldnn))) def test_mkldnn_graph_cpu_static(self): with fluid.unique_name.guard(): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py index fe261237f12..c42777d673a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py @@ -46,6 +46,7 @@ def linear_fc(num): def residual_block(num, quant_skip_pattern=None): + def conv_bn_layer(input, ch_out, filter_size, @@ -53,38 +54,42 @@ def residual_block(num, quant_skip_pattern=None): padding, act='relu', bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) + tmp = fluid.layers.conv2d(input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) return fluid.layers.batch_norm(input=tmp, act=act) - data = fluid.layers.data( - name='image', - shape=[1, 1, 32, 32], - dtype='float32', - append_batch_size=False) - label = fluid.layers.data( - name='label', shape=[1, 1], dtype='int64', append_batch_size=False) + data = fluid.layers.data(name='image', + shape=[1, 1, 32, 32], + dtype='float32', + append_batch_size=False) + label = fluid.layers.data(name='label', + shape=[1, 1], + dtype='int64', + append_batch_size=False) hidden = data for _ in six.moves.xrange(num): conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True) short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None) hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu') - matmul_weight = fluid.layers.create_parameter( - shape=[1, 16, 32, 32], dtype='float32') + matmul_weight = fluid.layers.create_parameter(shape=[1, 16, 32, 32], + dtype='float32') hidden = fluid.layers.matmul(hidden, matmul_weight, True, True) if quant_skip_pattern: with fluid.name_scope(quant_skip_pattern): - pool = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='avg', pool_stride=2) + pool = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='avg', + pool_stride=2) else: - pool = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='avg', pool_stride=2) + pool = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='avg', + pool_stride=2) fc = fluid.layers.fc(input=pool, size=10) loss = fluid.layers.cross_entropy(input=fc, label=label) loss = fluid.layers.mean(loss) @@ -92,23 +97,21 @@ def residual_block(num, quant_skip_pattern=None): def conv_net(img, label, quant_skip_pattern): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - pool_type='max', - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + pool_type='max', + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - pool_type='avg', - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + pool_type='avg', + act="relu") hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu') with fluid.name_scope(quant_skip_pattern): prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') @@ -118,6 +121,7 @@ def conv_net(img, label, quant_skip_pattern): class TestQuantizationTransformPass(unittest.TestCase): + def setUp(self): self.quantizable_op_and_inputs = { 'conv2d': ['Input', 'Filter'], @@ -193,8 +197,9 @@ class TestQuantizationTransformPass(unittest.TestCase): self.linear_fc_quant('range_abs_max', 'abs_max', for_ci=True) def test_linear_fc_quant_moving_average_abs_max(self): - self.linear_fc_quant( - 'moving_average_abs_max', 'channel_wise_abs_max', for_ci=True) + self.linear_fc_quant('moving_average_abs_max', + 'channel_wise_abs_max', + for_ci=True) def residual_block_quant(self, activation_quant_type, @@ -236,24 +241,28 @@ class TestQuantizationTransformPass(unittest.TestCase): def test_residual_block_abs_max(self): quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul'] - self.residual_block_quant( - 'abs_max', 'abs_max', quantizable_op_type, for_ci=True) + self.residual_block_quant('abs_max', + 'abs_max', + quantizable_op_type, + for_ci=True) def test_residual_block_range_abs_max(self): quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul'] - self.residual_block_quant( - 'range_abs_max', 'abs_max', quantizable_op_type, for_ci=True) + self.residual_block_quant('range_abs_max', + 'abs_max', + quantizable_op_type, + for_ci=True) def test_residual_block_moving_average_abs_max(self): quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul'] - self.residual_block_quant( - 'moving_average_abs_max', - 'channel_wise_abs_max', - quantizable_op_type, - for_ci=True) + self.residual_block_quant('moving_average_abs_max', + 'channel_wise_abs_max', + quantizable_op_type, + for_ci=True) class TestQuantizationFreezePass(unittest.TestCase): + def freeze_graph(self, use_cuda, seed, @@ -262,15 +271,18 @@ class TestQuantizationFreezePass(unittest.TestCase): weight_quant_type='abs_max', for_ci=True, quant_skip_pattern='skip_quant'): + def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss = conv_net(img, label, quant_skip_pattern) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) @@ -308,14 +320,16 @@ class TestQuantizationFreezePass(unittest.TestCase): for op in main_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) - main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_' - + weight_quant_type, marked_nodes) + main_graph.draw( + '.', 'main' + dev_name + activation_quant_type + '_' + + weight_quant_type, marked_nodes) marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) - test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_' - + weight_quant_type, marked_nodes) + test_graph.draw( + '.', 'test' + dev_name + activation_quant_type + '_' + + weight_quant_type, marked_nodes) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False @@ -327,12 +341,11 @@ class TestQuantizationFreezePass(unittest.TestCase): iters = 5 batch_size = 8 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): @@ -341,9 +354,9 @@ class TestQuantizationFreezePass(unittest.TestCase): feed=feeder.feed(data), fetch_list=[loss]) if not for_ci: - print('{}: {}'.format('loss' + dev_name + - activation_quant_type + '_' + - weight_quant_type, loss_v)) + print('{}: {}'.format( + 'loss' + dev_name + activation_quant_type + '_' + + weight_quant_type, loss_v)) test_data = next(test_reader()) with fluid.program_guard(quantized_test_program): @@ -365,9 +378,9 @@ class TestQuantizationFreezePass(unittest.TestCase): for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) - test_graph.draw('.', 'test_freeze' + dev_name + - activation_quant_type + '_' + weight_quant_type, - marked_nodes) + test_graph.draw( + '.', 'test_freeze' + dev_name + activation_quant_type + '_' + + weight_quant_type, marked_nodes) server_program = test_graph.to_program() with fluid.scope_guard(scope): @@ -376,20 +389,22 @@ class TestQuantizationFreezePass(unittest.TestCase): fetch_list=[loss]) self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) if not for_ci: - print( - '{}: {}'.format('test_loss1' + dev_name + activation_quant_type - + '_' + weight_quant_type, test_loss1)) - print( - '{}: {}'.format('test_loss2' + dev_name + activation_quant_type - + '_' + weight_quant_type, test_loss2)) + print('{}: {}'.format( + 'test_loss1' + dev_name + activation_quant_type + '_' + + weight_quant_type, test_loss1)) + print('{}: {}'.format( + 'test_loss2' + dev_name + activation_quant_type + '_' + + weight_quant_type, test_loss2)) w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) # Maybe failed, this is due to the calculation precision # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) if not for_ci: - print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type - + '_' + weight_quant_type, np.sum(w_freeze))) - print('{}: {}'.format('w_quant' + dev_name + activation_quant_type + - '_' + weight_quant_type, np.sum(w_quant))) + print('{}: {}'.format( + 'w_freeze' + dev_name + activation_quant_type + '_' + + weight_quant_type, np.sum(w_freeze))) + print('{}: {}'.format( + 'w_quant' + dev_name + activation_quant_type + '_' + + weight_quant_type, np.sum(w_quant))) # Convert parameter to 8-bit. convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place) @@ -399,8 +414,9 @@ class TestQuantizationFreezePass(unittest.TestCase): for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) - test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type - + '_' + weight_quant_type, marked_nodes) + test_graph.draw( + '.', 'test_int8' + dev_name + activation_quant_type + '_' + + weight_quant_type, marked_nodes) server_program_int8 = test_graph.to_program() # Save the 8-bit parameter and model file. with fluid.scope_guard(scope): @@ -417,10 +433,12 @@ class TestQuantizationFreezePass(unittest.TestCase): self.assertEqual(w_8bit.dtype, np.int8) self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) if not for_ci: - print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type + - '_' + weight_quant_type, np.sum(w_8bit))) - print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type - + '_' + weight_quant_type, np.sum(w_freeze))) + print('{}: {}'.format( + 'w_8bit' + dev_name + activation_quant_type + '_' + + weight_quant_type, np.sum(w_8bit))) + print('{}: {}'.format( + 'w_freeze' + dev_name + activation_quant_type + '_' + + weight_quant_type, np.sum(w_freeze))) mobile_pass = TransformForMobilePass() mobile_pass.apply(test_graph) @@ -429,9 +447,9 @@ class TestQuantizationFreezePass(unittest.TestCase): for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) - test_graph.draw('.', 'test_mobile' + dev_name + - activation_quant_type + '_' + weight_quant_type, - marked_nodes) + test_graph.draw( + '.', 'test_mobile' + dev_name + activation_quant_type + '_' + + weight_quant_type, marked_nodes) mobile_program = test_graph.to_program() with fluid.scope_guard(scope): @@ -443,63 +461,56 @@ class TestQuantizationFreezePass(unittest.TestCase): def test_freeze_graph_cuda_dynamic(self): if fluid.core.is_compiled_with_cuda(): with fluid.unique_name.guard(): - self.freeze_graph( - True, - seed=1, - activation_quant_type='abs_max', - weight_quant_type='abs_max', - for_ci=True) + self.freeze_graph(True, + seed=1, + activation_quant_type='abs_max', + weight_quant_type='abs_max', + for_ci=True) with fluid.unique_name.guard(): - self.freeze_graph( - True, - seed=1, - activation_quant_type='abs_max', - weight_quant_type='channel_wise_abs_max', - for_ci=True) + self.freeze_graph(True, + seed=1, + activation_quant_type='abs_max', + weight_quant_type='channel_wise_abs_max', + for_ci=True) def test_freeze_graph_cpu_dynamic(self): with fluid.unique_name.guard(): - self.freeze_graph( - False, - seed=2, - activation_quant_type='abs_max', - weight_quant_type='abs_max', - for_ci=True) - self.freeze_graph( - False, - seed=2, - activation_quant_type='abs_max', - weight_quant_type='channel_wise_abs_max', - for_ci=True) + self.freeze_graph(False, + seed=2, + activation_quant_type='abs_max', + weight_quant_type='abs_max', + for_ci=True) + self.freeze_graph(False, + seed=2, + activation_quant_type='abs_max', + weight_quant_type='channel_wise_abs_max', + for_ci=True) def test_freeze_graph_cuda_static(self): if fluid.core.is_compiled_with_cuda(): with fluid.unique_name.guard(): - self.freeze_graph( - True, - seed=1, - activation_quant_type='range_abs_max', - bias_correction=True, - weight_quant_type='abs_max', - for_ci=True) - self.freeze_graph( - True, - seed=1, - activation_quant_type='range_abs_max', - weight_quant_type='abs_max', - for_ci=True) + self.freeze_graph(True, + seed=1, + activation_quant_type='range_abs_max', + bias_correction=True, + weight_quant_type='abs_max', + for_ci=True) + self.freeze_graph(True, + seed=1, + activation_quant_type='range_abs_max', + weight_quant_type='abs_max', + for_ci=True) self.freeze_graph( True, seed=1, activation_quant_type='moving_average_abs_max', weight_quant_type='abs_max', for_ci=True) - self.freeze_graph( - True, - seed=1, - activation_quant_type='range_abs_max', - weight_quant_type='channel_wise_abs_max', - for_ci=True) + self.freeze_graph(True, + seed=1, + activation_quant_type='range_abs_max', + weight_quant_type='channel_wise_abs_max', + for_ci=True) self.freeze_graph( True, seed=1, @@ -516,33 +527,30 @@ class TestQuantizationFreezePass(unittest.TestCase): def test_freeze_graph_cpu_static(self): with fluid.unique_name.guard(): - self.freeze_graph( - False, - seed=2, - activation_quant_type='range_abs_max', - weight_quant_type='abs_max', - for_ci=True) - self.freeze_graph( - False, - seed=2, - activation_quant_type='moving_average_abs_max', - weight_quant_type='abs_max', - for_ci=True) - self.freeze_graph( - False, - seed=2, - activation_quant_type='range_abs_max', - weight_quant_type='channel_wise_abs_max', - for_ci=True) - self.freeze_graph( - False, - seed=2, - activation_quant_type='moving_average_abs_max', - weight_quant_type='channel_wise_abs_max', - for_ci=True) + self.freeze_graph(False, + seed=2, + activation_quant_type='range_abs_max', + weight_quant_type='abs_max', + for_ci=True) + self.freeze_graph(False, + seed=2, + activation_quant_type='moving_average_abs_max', + weight_quant_type='abs_max', + for_ci=True) + self.freeze_graph(False, + seed=2, + activation_quant_type='range_abs_max', + weight_quant_type='channel_wise_abs_max', + for_ci=True) + self.freeze_graph(False, + seed=2, + activation_quant_type='moving_average_abs_max', + weight_quant_type='channel_wise_abs_max', + for_ci=True) def quant_dequant_residual_block(num, quant_skip_pattern=None): + def conv_bn_layer(input, ch_out, filter_size, @@ -550,19 +558,19 @@ def quant_dequant_residual_block(num, quant_skip_pattern=None): padding, act='relu', bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) + tmp = fluid.layers.conv2d(input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) return fluid.layers.batch_norm(input=tmp, act=act) data1 = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') - data2 = fluid.layers.data( - name='matmul_input', shape=[16, 32, 32], dtype='float32') + data2 = fluid.layers.data(name='matmul_input', + shape=[16, 32, 32], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') hidden = data1 for _ in six.moves.xrange(num): @@ -572,29 +580,43 @@ def quant_dequant_residual_block(num, quant_skip_pattern=None): hidden = fluid.layers.matmul(hidden, data2, True, True) if isinstance(quant_skip_pattern, str): with fluid.name_scope(quant_skip_pattern): - pool1 = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='avg', pool_stride=2) - pool2 = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='max', pool_stride=2) - pool_add = fluid.layers.elementwise_add( - x=pool1, y=pool2, act='relu') + pool1 = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='avg', + pool_stride=2) + pool2 = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='max', + pool_stride=2) + pool_add = fluid.layers.elementwise_add(x=pool1, + y=pool2, + act='relu') elif isinstance(quant_skip_pattern, list): assert len( quant_skip_pattern ) > 1, 'test config error: the len of quant_skip_pattern list should be greater than 1.' with fluid.name_scope(quant_skip_pattern[0]): - pool1 = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='avg', pool_stride=2) - pool2 = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='max', pool_stride=2) + pool1 = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='avg', + pool_stride=2) + pool2 = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='max', + pool_stride=2) with fluid.name_scope(quant_skip_pattern[1]): - pool_add = fluid.layers.elementwise_add( - x=pool1, y=pool2, act='relu') + pool_add = fluid.layers.elementwise_add(x=pool1, + y=pool2, + act='relu') else: - pool1 = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='avg', pool_stride=2) - pool2 = fluid.layers.pool2d( - input=hidden, pool_size=2, pool_type='max', pool_stride=2) + pool1 = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='avg', + pool_stride=2) + pool2 = fluid.layers.pool2d(input=hidden, + pool_size=2, + pool_type='max', + pool_stride=2) pool_add = fluid.layers.elementwise_add(x=pool1, y=pool2, act='relu') fc = fluid.layers.fc(input=pool_add, size=10) loss = fluid.layers.cross_entropy(input=fc, label=label) @@ -603,6 +625,7 @@ def quant_dequant_residual_block(num, quant_skip_pattern=None): class TestAddQuantDequantPass(unittest.TestCase): + def setUp(self): self._target_ops = {'elementwise_add', 'pool2d'} self._target_grad_ops = {'elementwise_add_grad', 'pool2d_grad'} @@ -626,9 +649,9 @@ class TestAddQuantDequantPass(unittest.TestCase): for input_name in op_node.input_arg_names(): in_node = graph._find_node_by_name(op_node.inputs, input_name) - in_nodes_all_not_persistable = ( - in_nodes_all_not_persistable and - not in_node.persistable()) + in_nodes_all_not_persistable = (in_nodes_all_not_persistable + and + not in_node.persistable()) if not in_nodes_all_not_persistable: continue input_names = op_node.input_arg_names() @@ -671,23 +694,25 @@ class TestAddQuantDequantPass(unittest.TestCase): def test_residual_block(self): quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul'] - self.residual_block_quant( - quantizable_op_type, skip_pattern=None, for_ci=True) + self.residual_block_quant(quantizable_op_type, + skip_pattern=None, + for_ci=True) def test_residual_block_skip_pattern(self): quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul'] - self.residual_block_quant( - quantizable_op_type, skip_pattern='skip_quant', for_ci=True) + self.residual_block_quant(quantizable_op_type, + skip_pattern='skip_quant', + for_ci=True) def test_residual_block_skip_pattern_1(self): quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul'] - self.residual_block_quant( - quantizable_op_type, - skip_pattern=['skip_quant1', 'skip_quant2'], - for_ci=True) + self.residual_block_quant(quantizable_op_type, + skip_pattern=['skip_quant1', 'skip_quant2'], + for_ci=True) class TestQuantizationTransformPassV2(unittest.TestCase): + def setUp(self): self.quantizable_op_and_inputs = { 'conv2d': ['Input', 'Filter'], @@ -802,13 +827,17 @@ class TestQuantizationTransformPassV2(unittest.TestCase): def test_residual_block_abs_max(self): quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul'] - self.residual_block_quant( - 'abs_max', 'abs_max', quantizable_op_type, for_ci=True) + self.residual_block_quant('abs_max', + 'abs_max', + quantizable_op_type, + for_ci=True) def test_residual_block_channel_wise_abs_max(self): quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul'] - self.residual_block_quant( - 'abs_max', 'channel_wise_abs_max', quantizable_op_type, for_ci=True) + self.residual_block_quant('abs_max', + 'channel_wise_abs_max', + quantizable_op_type, + for_ci=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py index ec2c7a91f96..acf3c68600c 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py @@ -34,23 +34,21 @@ os.environ["CPU_NUM"] = "1" def conv_net(img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - pool_type='max', - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + pool_type='max', + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - pool_type='avg', - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + pool_type='avg', + act="relu") hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) @@ -59,21 +57,25 @@ def conv_net(img, label): class TestQuantizationScalePass(unittest.TestCase): + def quantization_scale(self, use_cuda, seed, activation_quant_type, weight_quant_type='abs_max', for_ci=False): + def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.0001) @@ -135,10 +137,9 @@ class TestQuantizationScalePass(unittest.TestCase): iters = 5 batch_size = 8 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): @@ -169,11 +170,11 @@ class TestQuantizationScalePass(unittest.TestCase): f.write(str(server_program)) with fluid.scope_guard(scope): - fluid.io.save_inference_model( - 'quant_scale_model' + dev_name, ['image', 'label'], [loss], - exe, - server_program, - clip_extra=True) + fluid.io.save_inference_model('quant_scale_model' + dev_name, + ['image', 'label'], [loss], + exe, + server_program, + clip_extra=True) def test_quant_scale_cuda(self): if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py index f5eb7d347ca..80fe720504e 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py @@ -30,22 +30,20 @@ os.environ["CPU_NUM"] = "1" def conv_net(img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - pool_type='max', - act="relu") - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - pool_type='avg', - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + pool_type='max', + act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + pool_type='avg', + act="relu") with fluid.name_scope("skip_quant"): hidden = fluid.layers.fc(input=conv_pool_1, size=100, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') @@ -55,21 +53,25 @@ def conv_net(img, label): class TestQuantizeProgramPass(unittest.TestCase): + def quantize_program(self, use_cuda, seed, activation_quant_type='abs_max', weight_quant_type='abs_max', for_ci=False): + def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.0001) @@ -88,8 +90,8 @@ class TestQuantizeProgramPass(unittest.TestCase): test_program = test_program.clone(for_test=True) if not for_ci: - train_graph = IrGraph( - core.Graph(train_program.desc), for_test=False) + train_graph = IrGraph(core.Graph(train_program.desc), + for_test=False) train_graph.draw('.', 'train_program_1') test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) test_graph.draw('.', 'test_program_1') @@ -108,8 +110,8 @@ class TestQuantizeProgramPass(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) if not for_ci: - train_graph = IrGraph( - core.Graph(train_program.desc), for_test=False) + train_graph = IrGraph(core.Graph(train_program.desc), + for_test=False) train_graph.draw('.', 'train_program_2') test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) test_graph.draw('.', 'test_program_2') @@ -123,8 +125,8 @@ class TestQuantizeProgramPass(unittest.TestCase): iters = 5 batch_size = 8 - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for idx in range(iters): @@ -141,20 +143,19 @@ class TestQuantizeProgramPass(unittest.TestCase): qt.convert(test_program, scope) if not for_ci: with fluid.scope_guard(scope): - fluid.io.save_inference_model( - './infer_model', ['image', 'label'], [loss], - exe, - test_program, - clip_extra=True) + fluid.io.save_inference_model('./infer_model', + ['image', 'label'], [loss], + exe, + test_program, + clip_extra=True) def test_gpu_1(self): if fluid.core.is_compiled_with_cuda(): - self.quantize_program( - use_cuda=True, - seed=1, - activation_quant_type='abs_max', - weight_quant_type='abs_max', - for_ci=True) + self.quantize_program(use_cuda=True, + seed=1, + activation_quant_type='abs_max', + weight_quant_type='abs_max', + for_ci=True) def test_gpu_2(self): if fluid.core.is_compiled_with_cuda(): @@ -166,20 +167,18 @@ class TestQuantizeProgramPass(unittest.TestCase): for_ci=True) def test_cpu_1(self): - self.quantize_program( - use_cuda=False, - seed=2, - activation_quant_type='abs_max', - weight_quant_type='abs_max', - for_ci=True) + self.quantize_program(use_cuda=False, + seed=2, + activation_quant_type='abs_max', + weight_quant_type='abs_max', + for_ci=True) def test_cpu_2(self): - self.quantize_program( - use_cuda=False, - seed=2, - activation_quant_type='moving_average_abs_max', - weight_quant_type='channel_wise_abs_max', - for_ci=True) + self.quantize_program(use_cuda=False, + seed=2, + activation_quant_type='moving_average_abs_max', + weight_quant_type='channel_wise_abs_max', + for_ci=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py index f03d0faa398..96c56529cf1 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py +++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py @@ -36,23 +36,21 @@ os.environ["CPU_NUM"] = "1" def conv_net(img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - pool_type='max', - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + pool_type='max', + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - pool_type='avg', - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + pool_type='avg', + act="relu") hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) @@ -79,6 +77,7 @@ def pact(x, name=None): class TestUserDefinedQuantization(unittest.TestCase): + def quantization_scale(self, use_cuda, seed, @@ -89,16 +88,19 @@ class TestUserDefinedQuantization(unittest.TestCase): weight_preprocess_func=None, act_quantize_func=None, weight_quantize_func=None): + def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') img.stop_gradient = False - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.SGD(learning_rate=0.0001) @@ -180,10 +182,9 @@ class TestUserDefinedQuantization(unittest.TestCase): iters = 5 batch_size = 8 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): diff --git a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py index 744c97c514b..cbe0326c46a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py @@ -46,6 +46,7 @@ def _set_variable_data(scope, place, var_name, np_value): class TestWeightQuantization(unittest.TestCase): + def setUp(self): self.weight_quantization_dir = 'weight_quantization' self.cache_folder = os.path.join(DATA_HOME, @@ -64,8 +65,8 @@ class TestWeightQuantization(unittest.TestCase): def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format( + target_folder, zip_path) os.system(cmd) def quantize_to_int(self, model_name, model_data_url, model_data_md5, @@ -94,8 +95,8 @@ class TestWeightQuantization(unittest.TestCase): try: os.system("rm -rf {}".format(save_model_dir)) except Exception as e: - print("Failed to delete {} due to {}".format(save_model_dir, str( - e))) + print("Failed to delete {} due to {}".format( + save_model_dir, str(e))) def convert_to_fp16(self, model_name, model_data_url, model_data_md5, model_filename, params_filename): @@ -123,15 +124,18 @@ class TestWeightQuantization(unittest.TestCase): params_filename, input_data, True) self.assertTrue( - np.allclose( - res_fp32, res_fp16, rtol=1e-5, atol=1e-08, equal_nan=True), + np.allclose(res_fp32, + res_fp16, + rtol=1e-5, + atol=1e-08, + equal_nan=True), msg='Failed to test the accuracy of the fp32 and fp16 model.') try: os.system("rm -rf {}".format(save_model_dir)) except Exception as e: - print("Failed to delete {} due to {}".format(save_model_dir, str( - e))) + print("Failed to delete {} due to {}".format( + save_model_dir, str(e))) def run_models(self, model_dir, model_filename, params_filename, input_data, is_fp16_model): diff --git a/python/paddle/fluid/contrib/sparsity/__init__.py b/python/paddle/fluid/contrib/sparsity/__init__.py index ec288a12871..b08778a707d 100644 --- a/python/paddle/fluid/contrib/sparsity/__init__.py +++ b/python/paddle/fluid/contrib/sparsity/__init__.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/contrib/sparsity/asp.py b/python/paddle/fluid/contrib/sparsity/asp.py index c366af7237d..0710ee9c722 100644 --- a/python/paddle/fluid/contrib/sparsity/asp.py +++ b/python/paddle/fluid/contrib/sparsity/asp.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -118,8 +118,8 @@ def set_excluded_layers(param_names, main_program=None): """ if main_program is None: main_program = paddle.static.default_main_program() - ASPHelper.set_excluded_layers( - param_names=param_names, main_program=main_program) + ASPHelper.set_excluded_layers(param_names=param_names, + main_program=main_program) def reset_excluded_layers(main_program=None): @@ -454,16 +454,15 @@ def prune_model(model, n=2, m=4, mask_algo='mask_1d', with_mask=True): place = paddle.CUDAPlace(gpu_id) else: raise TypeError( - "model should be paddle.nn.Layer or paddle.static.Program, but got {}". - format(type(model))) + "model should be paddle.nn.Layer or paddle.static.Program, but got {}" + .format(type(model))) - return prune_func( - place, - model, - n=n, - m=m, - mask_algo=MaskAlgo_mapping[mask_algo], - with_mask=with_mask) + return prune_func(place, + model, + n=n, + m=m, + mask_algo=MaskAlgo_mapping[mask_algo], + with_mask=with_mask) class ProgramASPInfo(object): @@ -624,8 +623,8 @@ class ASPHelper(object): param.set_value(weight_pruned_nparray) if with_mask: - weight_mask_param = asp_info.mask_vars.get(param.name, - None) + weight_mask_param = asp_info.mask_vars.get( + param.name, None) assert weight_mask_param is not None, \ 'Cannot find {} variable, please call sparsity.decorate() to' \ ' decorate your optimizer first!'.format(ASPHelper._get_mask_name(param.name)) @@ -642,13 +641,12 @@ class ASPHelper(object): target_program = param.block.program assert target_program is not None, \ 'Cannot get paddle.static.Program from Paddle.nn.Layer.' - return ASPHelper.prune_model_by_program( - place, - target_program, - n=n, - m=m, - mask_algo=mask_algo, - with_mask=with_mask) + return ASPHelper.prune_model_by_program(place, + target_program, + n=n, + m=m, + mask_algo=mask_algo, + with_mask=with_mask) @staticmethod def _get_mask_name(param_name): @@ -746,10 +744,10 @@ class ASPHelper(object): param_name_no_weight_suffix, None) if func is None: layer_name = param_name_no_weight_suffix[: - param_name_no_weight_suffix. - rfind('_')] - func = supported_layers_and_prune_func_map.get(layer_name, - _default_pruning) + param_name_no_weight_suffix + .rfind('_')] + func = supported_layers_and_prune_func_map.get( + layer_name, _default_pruning) return func @classmethod @@ -859,16 +857,17 @@ class ASPHelper(object): asp_info = cls._get_program_asp_info(main_program) for param in params: if param.name in asp_info.mask_vars: - block.append_op( - type='elementwise_mul', - inputs={"X": param, - 'Y': asp_info.mask_vars[param.name]}, - outputs={'Out': param}, - attrs={ - 'axis': -1, - 'use_mkldnn': False, - OP_ROLE_KEY: int(OpRole.Optimize) - }) + block.append_op(type='elementwise_mul', + inputs={ + "X": param, + 'Y': asp_info.mask_vars[param.name] + }, + outputs={'Out': param}, + attrs={ + 'axis': -1, + 'use_mkldnn': False, + OP_ROLE_KEY: int(OpRole.Optimize) + }) class OptimizerWithSparsityGuarantee(object): @@ -903,12 +902,11 @@ class OptimizerWithSparsityGuarantee(object): list: operators from :attr:`optimizer`.minimize(:attr:`loss`). list: pairs of parameters and their gradients. """ - return ASPHelper._minimize( - self._optimizer, - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + return ASPHelper._minimize(self._optimizer, + loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) @dygraph_only def step(self): diff --git a/python/paddle/fluid/contrib/sparsity/supported_layer_list.py b/python/paddle/fluid/contrib/sparsity/supported_layer_list.py index 105c2ded9ee..d9d8c262ada 100644 --- a/python/paddle/fluid/contrib/sparsity/supported_layer_list.py +++ b/python/paddle/fluid/contrib/sparsity/supported_layer_list.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,21 +30,23 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name): # cuSparseLt would prune matrix A along k dimension. # In sparse training, layer weight matrices is viewed sparse matrix A, so # the math fomula should be 'Act(WX + b)'. However, default fomula in PaddlePaddle - # is 'Act(XW + b)'. For enabling SPMMA, weights and inputs should be transposed - # for computing, Act( (W^T X^T)^T + b). Therefore, we have to prune alog k dimension - # of W^T, which is m dimension of W. Moreove, all mask generating functions in - # sparsity/utils is row-major pruning. That is the reason we have to transpose weight - # matrices beforce invoking create_mask. Then we transpose the result mask to make + # is 'Act(XW + b)'. For enabling SPMMA, weights and inputs should be transposed + # for computing, Act( (W^T X^T)^T + b). Therefore, we have to prune alog k dimension + # of W^T, which is m dimension of W. Moreove, all mask generating functions in + # sparsity/utils is row-major pruning. That is the reason we have to transpose weight + # matrices beforce invoking create_mask. Then we transpose the result mask to make # sure its shape to be the same as the input weight. - weight_sparse_mask = sparsity.create_mask( - weight_nparray.T, func_name=func_name, n=n, m=m).T + weight_sparse_mask = sparsity.create_mask(weight_nparray.T, + func_name=func_name, + n=n, + m=m).T weight_pruned_nparray = np.multiply(weight_nparray, weight_sparse_mask) assert sparsity.check_sparsity(weight_pruned_nparray.T, n=n, m=m, func_name=checked_func_name), \ 'Pruning {} weight matrix failure!!!'.format(param_name) return weight_pruned_nparray, weight_sparse_mask -# When value of given key in this DICT is None, +# When value of given key in this DICT is None, # ASP will call default pruning function in pruning stage. _supported_layers_and_prune_func_map_lock = threading.Lock() supported_layers_and_prune_func_map = {} diff --git a/python/paddle/fluid/contrib/sparsity/utils.py b/python/paddle/fluid/contrib/sparsity/utils.py index a28f7fc2b4e..1d0694c4dde 100644 --- a/python/paddle/fluid/contrib/sparsity/utils.py +++ b/python/paddle/fluid/contrib/sparsity/utils.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -406,8 +406,8 @@ def _compute_valid_2d_patterns(n, m): patterns = patterns + patterns patterns = np.asarray(list(set(permutations(patterns, m)))) - valid = ((patterns.sum(axis=1) <= n).sum(axis=1) == m - ).nonzero()[0].reshape(-1) + valid = ((patterns.sum(axis=1) <= n).sum( + axis=1) == m).nonzero()[0].reshape(-1) valid_patterns = np.empty((valid.shape[0], m, m)) valid_patterns[:] = patterns[valid[:]] @@ -454,9 +454,9 @@ def get_mask_2d_best(mat, n, m): mat_flattern, shape = _reshape_2d(mat, m) mask_flattern = np.ones_like(mat_flattern).reshape(-1, m, m) - pmax = np.argmax( - np.matmul(mat_flattern, patterns.reshape(patterns.shape[0], m * m).T), - axis=1) + pmax = np.argmax(np.matmul(mat_flattern, + patterns.reshape(patterns.shape[0], m * m).T), + axis=1) mask_flattern[:] = patterns[pmax[:]] mask = np.empty(shape) @@ -578,8 +578,8 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4): t = t.reshape(shape[0] * shape[1], shape[2]) # 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op elif len(shape) == 4: - t = t.transpose([0, 1, 3, 2]).reshape( - [shape[0] * shape[1] * shape[3], shape[2]]) + t = t.transpose([0, 1, 3, + 2]).reshape([shape[0] * shape[1] * shape[3], shape[2]]) else: raise ValueError("The dimension of input tensor is not supported in create_mask, " \ "Only dimension < 4 is supported but got {}".format(len(shape))) diff --git a/python/paddle/fluid/contrib/tests/test_amp_list.py b/python/paddle/fluid/contrib/tests/test_amp_list.py index 9133a404fa0..fb46df13776 100644 --- a/python/paddle/fluid/contrib/tests/test_amp_list.py +++ b/python/paddle/fluid/contrib/tests/test_amp_list.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ from paddle.fluid.contrib.mixed_precision.fp16_lists import AutoMixedPrecisionLi class TestAMPList(unittest.TestCase): + def test_main(self): custom_white_list = [ 'lookup_table', diff --git a/python/paddle/fluid/contrib/tests/test_bf16_utils.py b/python/paddle/fluid/contrib/tests/test_bf16_utils.py index a1439c487b6..c456b1263ce 100644 --- a/python/paddle/fluid/contrib/tests/test_bf16_utils.py +++ b/python/paddle/fluid/contrib/tests/test_bf16_utils.py @@ -22,6 +22,7 @@ paddle.enable_static() class AMPTest(unittest.TestCase): + def setUp(self): self.bf16_list = copy.copy(amp.bf16.amp_lists.bf16_list) self.fp32_list = copy.copy(amp.bf16.amp_lists.fp32_list) @@ -95,6 +96,7 @@ class AMPTest(unittest.TestCase): class AMPTest2(unittest.TestCase): + def test_amp_lists_(self): # 7. w={'lstm'} b={'lstm'} # raise ValueError @@ -113,10 +115,12 @@ class AMPTest2(unittest.TestCase): var1 = block.create_var(name="X", shape=[3], dtype='float32') var2 = block.create_var(name="Y", shape=[3], dtype='float32') var3 = block.create_var(name="Z", shape=[3], dtype='float32') - op1 = block.append_op( - type="abs", inputs={"X": [var1]}, outputs={"Out": [var2]}) - op2 = block.append_op( - type="abs", inputs={"X": [var2]}, outputs={"Out": [var3]}) + op1 = block.append_op(type="abs", + inputs={"X": [var1]}, + outputs={"Out": [var2]}) + op2 = block.append_op(type="abs", + inputs={"X": [var2]}, + outputs={"Out": [var3]}) amp_lists_1 = amp.bf16.AutoMixedPrecisionListsBF16( custom_fp32_varnames={'X'}) assert amp.bf16.amp_utils._is_in_fp32_varnames(op1, amp_lists_1) @@ -132,10 +136,12 @@ class AMPTest2(unittest.TestCase): var1 = block.create_var(name="X", shape=[3], dtype='float32') var2 = block.create_var(name="Y", shape=[3], dtype='float32') var3 = block.create_var(name="Z", shape=[3], dtype='float32') - op1 = block.append_op( - type="abs", inputs={"X": [var1]}, outputs={"Out": [var2]}) - op2 = block.append_op( - type="abs", inputs={"X": [var2]}, outputs={"Out": [var3]}) + op1 = block.append_op(type="abs", + inputs={"X": [var1]}, + outputs={"Out": [var2]}) + op2 = block.append_op(type="abs", + inputs={"X": [var2]}, + outputs={"Out": [var3]}) res = amp.bf16.amp_utils.find_true_post_op(block.ops, op1, "Y") assert (res == [op2]) @@ -146,20 +152,26 @@ class AMPTest2(unittest.TestCase): var1 = block.create_var(name="X", shape=[3], dtype='float32') var2 = block.create_var(name="Y", shape=[3], dtype='float32') - inititializer_op = startup_block._prepend_op( - type="fill_constant", - outputs={"Out": var1}, - attrs={"shape": var1.shape, - "dtype": var1.dtype, - "value": 1.0}) - - op1 = block.append_op( - type="abs", inputs={"X": [var1]}, outputs={"Out": [var2]}) - result = amp.bf16.amp_utils.find_true_post_op( - block.ops, inititializer_op, "X", search_all=False) + inititializer_op = startup_block._prepend_op(type="fill_constant", + outputs={"Out": var1}, + attrs={ + "shape": var1.shape, + "dtype": var1.dtype, + "value": 1.0 + }) + + op1 = block.append_op(type="abs", + inputs={"X": [var1]}, + outputs={"Out": [var2]}) + result = amp.bf16.amp_utils.find_true_post_op(block.ops, + inititializer_op, + "X", + search_all=False) assert (len(result) == 0) - result = amp.bf16.amp_utils.find_true_post_op( - block.ops, inititializer_op, "X", search_all=True) + result = amp.bf16.amp_utils.find_true_post_op(block.ops, + inititializer_op, + "X", + search_all=True) assert (result == [op1]) diff --git a/python/paddle/fluid/contrib/tests/test_correlation.py b/python/paddle/fluid/contrib/tests/test_correlation.py index 50b091415a5..c98cbd1dd93 100644 --- a/python/paddle/fluid/contrib/tests/test_correlation.py +++ b/python/paddle/fluid/contrib/tests/test_correlation.py @@ -55,15 +55,17 @@ def corr(x_1, y1_index = j + pad_size x2_index = x1_index + k y2_index = y1_index + l - output[b, l + d + D * (k + d), i, j] = np.mean( - rinput1[b, x1_index:x1_index + K, y1_index:y1_index - + K] * rinput2[b, x2_index:x2_index + K, - y2_index:y2_index + K]) + output[b, l + d + D * (k + d), i, + j] = np.mean(rinput1[b, x1_index:x1_index + K, + y1_index:y1_index + K] * + rinput2[b, x2_index:x2_index + K, + y2_index:y2_index + K]) return output class TestCorrelationOp(unittest.TestCase): + def test_check_output(self): if not fluid.core.is_compiled_with_cuda(): return @@ -71,38 +73,34 @@ class TestCorrelationOp(unittest.TestCase): np.set_printoptions(threshold=np.inf) x_shape = (2, 10, 3, 3) x_type = 'float32' - x1 = fluid.layers.data( - name='x1', - shape=x_shape, - dtype=x_type, - append_batch_size=False, - stop_gradient=False) - x2 = fluid.layers.data( - name='x2', - shape=x_shape, - dtype=x_type, - append_batch_size=False, - stop_gradient=False) + x1 = fluid.layers.data(name='x1', + shape=x_shape, + dtype=x_type, + append_batch_size=False, + stop_gradient=False) + x2 = fluid.layers.data(name='x2', + shape=x_shape, + dtype=x_type, + append_batch_size=False, + stop_gradient=False) x1_np = np.random.randn(2, 3, 4, 5).astype(x_type) x2_np = np.random.randn(2, 3, 4, 5).astype(x_type) - out_np = corr( - x1_np, - x2_np, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) - - out = fluid.contrib.correlation( - x1, - x2, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) + out_np = corr(x1_np, + x2_np, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) + + out = fluid.contrib.correlation(x1, + x2, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) loss = fluid.layers.reduce_mean(out) optimizer = fluid.optimizer.Momentum(0.0001, 0.9) @@ -110,30 +108,33 @@ class TestCorrelationOp(unittest.TestCase): place = fluid.CUDAPlace(0) exe = fluid.Executor(place) - res = exe.run(feed={'x1': x1_np, - 'x2': x2_np}, + res = exe.run(feed={ + 'x1': x1_np, + 'x2': x2_np + }, fetch_list=[out.name, loss.name]) self.assertTrue(np.allclose(res[0], out_np)) class Net(fluid.dygraph.Layer): + def __init__(self, name_scope): super(Net, self).__init__(name_scope) def forward(self, x1, x2): - y = fluid.contrib.correlation( - x1, - x2, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) + y = fluid.contrib.correlation(x1, + x2, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) return y class TestCorrelationOpDyGraph(unittest.TestCase): + def test_check_output(self): if not fluid.core.is_compiled_with_cuda(): return @@ -145,14 +146,13 @@ class TestCorrelationOpDyGraph(unittest.TestCase): with fluid.dygraph.guard(place): x1_np = np.random.randn(2, 3, 4, 5).astype(x_type) x2_np = np.random.randn(2, 3, 4, 5).astype(x_type) - out_np = corr( - x1_np, - x2_np, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) + out_np = corr(x1_np, + x2_np, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) x1 = to_variable(x1_np) x2 = to_variable(x2_np) diff --git a/python/paddle/fluid/contrib/tests/test_fp16_utils.py b/python/paddle/fluid/contrib/tests/test_fp16_utils.py index 0b51f2dcc86..54753ce4479 100644 --- a/python/paddle/fluid/contrib/tests/test_fp16_utils.py +++ b/python/paddle/fluid/contrib/tests/test_fp16_utils.py @@ -22,6 +22,7 @@ paddle.enable_static() class AMPTest(unittest.TestCase): + def test_find_op_index(self): block = fluid.default_main_program().global_block() op_desc = core.OpDesc() @@ -34,10 +35,12 @@ class AMPTest(unittest.TestCase): var1 = block.create_var(name="X", shape=[3], dtype='float32') var2 = block.create_var(name="Y", shape=[3], dtype='float32') var3 = block.create_var(name="Z", shape=[3], dtype='float32') - op1 = block.append_op( - type="abs", inputs={"X": [var1]}, outputs={"Out": [var2]}) - op2 = block.append_op( - type="abs", inputs={"X": [var2]}, outputs={"Out": [var3]}) + op1 = block.append_op(type="abs", + inputs={"X": [var1]}, + outputs={"Out": [var2]}) + op2 = block.append_op(type="abs", + inputs={"X": [var2]}, + outputs={"Out": [var3]}) res = fp16_utils.find_true_post_op(block.ops, op1, "Y") assert (res == [op2]) diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index 66af517c3e1..028fd57229e 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -30,6 +30,7 @@ paddle.enable_static() def resnet_cifar10(input, depth=32): + def conv_bn_layer(input, ch_out, filter_size, @@ -37,14 +38,13 @@ def resnet_cifar10(input, depth=32): padding, act='relu', bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) + tmp = fluid.layers.conv2d(input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) return fluid.layers.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride): @@ -67,28 +67,33 @@ def resnet_cifar10(input, depth=32): assert (depth - 2) % 6 == 0 n = (depth - 2) // 6 - conv1 = conv_bn_layer( - input=input, ch_out=16, filter_size=3, stride=1, padding=1) + conv1 = conv_bn_layer(input=input, + ch_out=16, + filter_size=3, + stride=1, + padding=1) res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) res2 = layer_warp(basicblock, res1, 16, 32, n, 2) res3 = layer_warp(basicblock, res2, 32, 64, n, 2) - pool = fluid.layers.pool2d( - input=res3, pool_size=8, pool_type='avg', pool_stride=1) + pool = fluid.layers.pool2d(input=res3, + pool_size=8, + pool_type='avg', + pool_stride=1) return pool def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): - return fluid.nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') + return fluid.nets.img_conv_group(input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') conv1 = conv_block(input, 64, 2, [0.3, 0]) conv2 = conv_block(conv1, 128, 2, [0.4, 0]) @@ -113,8 +118,9 @@ def train(net_type, use_cuda, save_dirname, is_local): train_program.random_seed = 123 startup_prog.random_seed = 456 with fluid.program_guard(train_program, startup_prog): - images = fluid.layers.data( - name='pixel', shape=data_shape, dtype='float32') + images = fluid.layers.data(name='pixel', + shape=data_shape, + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if net_type == "vgg": @@ -139,11 +145,10 @@ def train(net_type, use_cuda, save_dirname, is_local): amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists( custom_black_varnames={"loss", "conv2d_0.w_0"}) - mp_optimizer = decorate( - optimizer=optimizer, - amp_lists=amp_lists, - init_loss_scaling=8.0, - use_dynamic_loss_scaling=True) + mp_optimizer = decorate(optimizer=optimizer, + amp_lists=amp_lists, + init_loss_scaling=8.0, + use_dynamic_loss_scaling=True) mp_optimizer.minimize(avg_cost) loss_scaling = mp_optimizer.get_loss_scaling() @@ -153,11 +158,11 @@ def train(net_type, use_cuda, save_dirname, is_local): PASS_NUM = 1 # no shuffle for unit test - train_reader = paddle.batch( - paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.dataset.cifar.train10(), + batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) + test_reader = paddle.batch(paddle.dataset.cifar.test10(), + batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) @@ -173,9 +178,9 @@ def train(net_type, use_cuda, save_dirname, is_local): feed=feeder.feed(data), fetch_list=[scaled_loss, avg_cost]) print( - 'PassID {0:1}, BatchID {1:04}, train loss {2:2.4}, scaled train closs {3:2.4}'. - format(pass_id, batch_id + 1, - float(loss), float(np_scaled_loss))) + 'PassID {0:1}, BatchID {1:04}, train loss {2:2.4}, scaled train closs {3:2.4}' + .format(pass_id, batch_id + 1, float(loss), + float(np_scaled_loss))) if (batch_id % 10) == 0: acc_list = [] avg_loss_list = [] @@ -193,9 +198,9 @@ def train(net_type, use_cuda, save_dirname, is_local): avg_loss_value = numpy.array(avg_loss_list).mean() print( - 'PassID {0:1}, BatchID {1:04}, test loss {2:2.2}, acc {3:2.2}'. - format(pass_id, batch_id + 1, - float(avg_loss_value), float(acc_value))) + 'PassID {0:1}, BatchID {1:04}, test loss {2:2.2}, acc {3:2.2}' + .format(pass_id, batch_id + 1, float(avg_loss_value), + float(acc_value))) if acc_value > 0.08: # Low threshold for speeding up CI fluid.io.save_inference_model( @@ -259,13 +264,12 @@ def infer(use_cuda, save_dirname=None): print("infer results: ", results[0]) - fluid.io.save_inference_model( - save_dirname, - feed_target_names, - fetch_targets, - exe, - inference_program, - clip_extra=True) + fluid.io.save_inference_model(save_dirname, + feed_target_names, + fetch_targets, + exe, + inference_program, + clip_extra=True) def main(net_type, use_cuda, is_local=True): @@ -280,6 +284,7 @@ def main(net_type, use_cuda, is_local=True): class TestImageClassification(unittest.TestCase): + def test_amp_lists(self): white_list = copy.copy( fluid.contrib.mixed_precision.fp16_lists.white_list) @@ -425,15 +430,18 @@ class TestImageClassification(unittest.TestCase): class TestAmpWithNonIterableDataLoader(unittest.TestCase): + def decorate_with_data_loader(self): main_prog = paddle.static.Program() start_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): with paddle.fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + image = fluid.layers.data(name='image', + shape=[3, 224, 224], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') py_reader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=4, @@ -449,11 +457,10 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase): optimizer = fluid.optimizer.Lamb(learning_rate=0.001) amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists( custom_black_varnames={"loss", "conv2d_0.w_0"}) - mp_optimizer = decorate( - optimizer=optimizer, - amp_lists=amp_lists, - init_loss_scaling=8.0, - use_dynamic_loss_scaling=True) + mp_optimizer = decorate(optimizer=optimizer, + amp_lists=amp_lists, + init_loss_scaling=8.0, + use_dynamic_loss_scaling=True) mp_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py index 5362a6ecd16..4682be8114a 100644 --- a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py +++ b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py @@ -44,6 +44,7 @@ cutf = convert_uint16_to_float @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestModelCastBF16(unittest.TestCase): + @classmethod def setUpClass(cls): cls.seed = 111 @@ -75,8 +76,8 @@ class TestModelCastBF16(unittest.TestCase): with_lod=False, startup_prog=None): exe = fluid.Executor(core.CPUPlace()) - exe.run(fluid.default_startup_program() - if startup_prog is None else startup_prog) + exe.run(fluid.default_startup_program( + ) if startup_prog is None else startup_prog) prog = fluid.default_main_program() if amp_fun is not None: if startup_prog is not None: @@ -97,10 +98,12 @@ class TestModelCastBF16(unittest.TestCase): nn_bf16 = amp.bf16.convert_float_to_uint16(nn) with self.static_graph(): - t_bf16 = layers.data( - name='t_bf16', shape=[size, size], dtype=np.uint16) - tt_bf16 = layers.data( - name='tt_bf16', shape=[size, size], dtype=np.uint16) + t_bf16 = layers.data(name='t_bf16', + shape=[size, size], + dtype=np.uint16) + tt_bf16 = layers.data(name='tt_bf16', + shape=[size, size], + dtype=np.uint16) t = layers.data(name='t', shape=[size, size], dtype='float32') tt = layers.data(name='tt', shape=[size, size], dtype='float32') @@ -151,27 +154,26 @@ class TestModelCastBF16(unittest.TestCase): amp_fun=_amp_fun, startup_prog=startup_prog ) - self.assertTrue( - static_ret_bf16, np.ones( - [size, size], dtype='float32') * -1.1) + self.assertTrue(static_ret_bf16, + np.ones([size, size], dtype='float32') * -1.1) def test_graph_rewrite(self): self._graph_common(lambda prog: amp.bf16.rewrite_program_bf16( prog, amp.bf16.AutoMixedPrecisionListsBF16( custom_bf16_list={'elementwise_add'}, - custom_fp32_varnames={'elementwise_add_0.tmp_0'}) - )) + custom_fp32_varnames={'elementwise_add_0.tmp_0'}))) def test_graph_cast(self): - self._graph_common(lambda prog, startup_prog: amp.bf16.cast_model_to_bf16( - prog, - startup_prog, - amp.bf16.AutoMixedPrecisionListsBF16( - custom_bf16_list={'elementwise_add'}, - custom_fp32_list={'elementwise_mul'}), - use_bf16_guard=True - ), startup_prog=fluid.default_startup_program()) + self._graph_common( + lambda prog, startup_prog: amp.bf16.cast_model_to_bf16( + prog, + startup_prog, + amp.bf16.AutoMixedPrecisionListsBF16( + custom_bf16_list={'elementwise_add'}, + custom_fp32_list={'elementwise_mul'}), + use_bf16_guard=True), + startup_prog=fluid.default_startup_program()) if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py index 92786f28352..c062a039f28 100644 --- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py +++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py @@ -26,6 +26,7 @@ paddle.enable_static() class RandomDataset(Dataset): + def __init__(self, num_samples, seed=123): super(RandomDataset, self).__init__() np.random.seed(seed) @@ -41,6 +42,7 @@ class RandomDataset(Dataset): def reader_decorator(reader): + def __reader__(): for i in range(len(reader)): yield reader[i] @@ -49,6 +51,7 @@ def reader_decorator(reader): def resnet_cifar10(input, depth=32): + def conv_bn_layer(input, ch_out, filter_size, @@ -56,14 +59,13 @@ def resnet_cifar10(input, depth=32): padding, act='relu', bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) + tmp = fluid.layers.conv2d(input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) return fluid.layers.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride): @@ -86,14 +88,19 @@ def resnet_cifar10(input, depth=32): assert (depth - 2) % 6 == 0 n = (depth - 2) // 6 - conv1 = conv_bn_layer( - input=input, ch_out=16, filter_size=3, stride=1, padding=1) + conv1 = conv_bn_layer(input=input, + ch_out=16, + filter_size=3, + stride=1, + padding=1) with paddle.static.amp.fp16_guard(): res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) res2 = layer_warp(basicblock, res1, 16, 32, n, 2) res3 = layer_warp(basicblock, res2, 32, 64, n, 2) - pool = fluid.layers.pool2d( - input=res3, pool_size=8, pool_type='avg', pool_stride=1) + pool = fluid.layers.pool2d(input=res3, + pool_size=8, + pool_type='avg', + pool_stride=1) return pool @@ -107,24 +114,25 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): train_program.random_seed = 123 startup_prog.random_seed = 456 with fluid.program_guard(train_program, startup_prog): - images = fluid.layers.data( - name='pixel', shape=data_shape, dtype='float32') + images = fluid.layers.data(name='pixel', + shape=data_shape, + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = resnet_cifar10(images) logits = fluid.layers.fc(input=net, size=classdim, act="softmax") - cost = fluid.layers.softmax_with_cross_entropy( - logits, label, return_softmax=False) + cost = fluid.layers.softmax_with_cross_entropy(logits, + label, + return_softmax=False) sum_cost = fluid.layers.reduce_sum(cost) # Test program test_program = train_program.clone(for_test=True) if optimizer == "Adam": - optimizer = paddle.optimizer.AdamW( - learning_rate=0.001, - epsilon=1e-8, - weight_decay=0.0, - multi_precision=True) + optimizer = paddle.optimizer.AdamW(learning_rate=0.001, + epsilon=1e-8, + weight_decay=0.0, + multi_precision=True) elif optimizer == "Lars": optimizer = paddle.fluid.optimizer.LarsMomentumOptimizer( learning_rate=0.001, @@ -147,17 +155,14 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): optimizer.minimize(sum_cost) - train_reader = paddle.batch( - reader_decorator(RandomDataset( - 16 * 5, seed=123)), - batch_size=16, - drop_last=True) + train_reader = paddle.batch(reader_decorator(RandomDataset(16 * 5, + seed=123)), + batch_size=16, + drop_last=True) - test_reader = paddle.batch( - reader_decorator(RandomDataset( - 4 * 5, seed=456)), - batch_size=4, - drop_last=True) + test_reader = paddle.batch(reader_decorator(RandomDataset(4 * 5, seed=456)), + batch_size=4, + drop_last=True) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) @@ -166,8 +171,9 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): def train_loop(): exe.run(startup_prog) if use_pure_fp16: - optimizer.amp_init( - place, test_program=test_program, use_fp16_test=True) + optimizer.amp_init(place, + test_program=test_program, + use_fp16_test=True) train_loss_list = [] test_loss_list = [] @@ -195,6 +201,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): class TestImageMultiPrecision(unittest.TestCase): + def test_resnet_pure_fp16(self): if not fluid.core.is_compiled_with_cuda(): return @@ -221,22 +228,18 @@ class TestImageMultiPrecision(unittest.TestCase): use_nesterov=use_nesterov, optimizer=optimizer) - self.assertTrue( - np.allclose( - np.array(train_loss_fp16), - np.array(train_loss_fp32), - rtol=1e-02, - atol=1e-05, - equal_nan=True), - msg='Failed to train in pure FP16.') - self.assertTrue( - np.allclose( - np.array(test_loss_fp16), - np.array(test_loss_fp32), - rtol=1e-02, - atol=1e-05, - equal_nan=True), - msg='Failed to test in pure FP16.') + self.assertTrue(np.allclose(np.array(train_loss_fp16), + np.array(train_loss_fp32), + rtol=1e-02, + atol=1e-05, + equal_nan=True), + msg='Failed to train in pure FP16.') + self.assertTrue(np.allclose(np.array(test_loss_fp16), + np.array(test_loss_fp32), + rtol=1e-02, + atol=1e-05, + equal_nan=True), + msg='Failed to test in pure FP16.') do_test(use_nesterov=False) do_test(use_nesterov=True) @@ -254,24 +257,29 @@ class TestImageMultiPrecision(unittest.TestCase): class TestAmpWithNonIterableDataLoader(unittest.TestCase): + def decorate_with_data_loader(self): main_prog = paddle.static.Program() start_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): with paddle.fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + image = fluid.layers.data(name='image', + shape=[3, 224, 224], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') py_reader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=4, iterable=False, use_double_buffer=False) - zero_var = fluid.layers.fill_constant( - shape=[1], dtype='int64', value=0) - one_var = fluid.layers.fill_constant( - shape=[1], dtype='int64', value=1) + zero_var = fluid.layers.fill_constant(shape=[1], + dtype='int64', + value=0) + one_var = fluid.layers.fill_constant(shape=[1], + dtype='int64', + value=1) with fluid.layers.control_flow.Switch() as switch: with switch.case(label != zero_var): fluid.layers.assign(input=zero_var, output=label) diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py index c3099ec88f2..dd900ff4281 100644 --- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py +++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py @@ -37,6 +37,7 @@ def linear_fc(num): def residual_block(num): + def conv_bn_layer(input, ch_out, filter_size, @@ -44,14 +45,13 @@ def residual_block(num): padding, act='relu', bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) + tmp = fluid.layers.conv2d(input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) return fluid.layers.batch_norm(input=tmp, act=act) data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') @@ -68,21 +68,19 @@ def residual_block(num): def conv_net(img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) @@ -90,6 +88,7 @@ def conv_net(img, label): class TestQuantizeTranspiler(unittest.TestCase): + def setUp(self): # since quant_op and dequant_op is not ready, use cos and sin for test self.weight_quant_op_type = 'fake_quantize_abs_max' @@ -180,15 +179,18 @@ class TestQuantizeTranspiler(unittest.TestCase): self.residual_block_quant('range_abs_max') def freeze_program(self, use_cuda, seed): + def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) @@ -220,12 +222,11 @@ class TestQuantizeTranspiler(unittest.TestCase): class_num = 10 exe.run(startup) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.program_guard(main): @@ -250,25 +251,25 @@ class TestQuantizeTranspiler(unittest.TestCase): feed=feeder.feed(test_data), fetch_list=[loss]) self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) - w_freeze = np.array(fluid.global_scope().find_var('conv2d_1.w_0') - .get_tensor()) + w_freeze = np.array( + fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()) # fail: -432.0 != -433.0, this is due to the calculation precision #self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) # Convert parameter to 8-bit. quant_transpiler.convert_to_int8(test_program, place) # Save the 8-bit parameter and model file. - fluid.io.save_inference_model( - 'model_8bit', ['image', 'label'], [loss], - exe, - test_program, - clip_extra=True) + fluid.io.save_inference_model('model_8bit', ['image', 'label'], + [loss], + exe, + test_program, + clip_extra=True) # Test whether the 8-bit parameter and model file can be loaded successfully. - [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit', - exe) + [infer, feed, + fetch] = fluid.io.load_inference_model('model_8bit', exe) # Check the loaded 8-bit weight. - w_8bit = np.array(fluid.global_scope().find_var('conv2d_1.w_0.int8') - .get_tensor()) + w_8bit = np.array( + fluid.global_scope().find_var('conv2d_1.w_0.int8').get_tensor()) self.assertEqual(w_8bit.dtype, np.int8) self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py index 9eb2fe6cbd1..bbc61d34613 100644 --- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py +++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py @@ -32,14 +32,18 @@ def fake_imdb_reader(word_dict_size, lower_seq_len=100, upper_seq_len=200, class_dim=2): + def __reader__(): for _ in six.moves.range(sample_num): - length = np.random.random_integers( - low=lower_seq_len, high=upper_seq_len, size=[1])[0] - ids = np.random.random_integers( - low=0, high=word_dict_size - 1, size=[length]).astype('int64') - label = np.random.random_integers( - low=0, high=class_dim - 1, size=[1]).astype('int64')[0] + length = np.random.random_integers(low=lower_seq_len, + high=upper_seq_len, + size=[1])[0] + ids = np.random.random_integers(low=0, + high=word_dict_size - 1, + size=[length]).astype('int64') + label = np.random.random_integers(low=0, + high=class_dim - 1, + size=[1]).astype('int64')[0] yield ids, label return __reader__ @@ -74,8 +78,9 @@ def bow_net(data, This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( - input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, + is_sparse=is_sparse, + size=[dict_dim, emb_dim]) bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") @@ -88,6 +93,7 @@ def bow_net(data, class TestWeightDecay(unittest.TestCase): + def setUp(self): # set seed np.random.seed(SEED) @@ -125,16 +131,17 @@ class TestWeightDecay(unittest.TestCase): startup_prog = fluid.framework.Program() with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost = model(data, label, self.word_dict_len) AdamW = fluid.contrib.extend_with_decoupled_weight_decay( fluid.optimizer.Adam) - optimizer = AdamW( - learning_rate=self.learning_rate, - weight_decay=self.learning_rate) + optimizer = AdamW(learning_rate=self.learning_rate, + weight_decay=self.learning_rate) optimizer.minimize(avg_cost) param_sum = self.run_program(place, [data, label]) @@ -146,8 +153,10 @@ class TestWeightDecay(unittest.TestCase): startup_prog = fluid.framework.Program() with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost = model(data, label, self.word_dict_len) @@ -160,8 +169,8 @@ class TestWeightDecay(unittest.TestCase): for var in main_prog.block(0).all_parameters()] for params in param_list: - updated_p = fluid.layers.elementwise_sub( - x=params[0], y=params[1]) + updated_p = fluid.layers.elementwise_sub(x=params[0], + y=params[1]) fluid.layers.assign(input=updated_p, output=params[0]) optimizer.apply_optimize(avg_cost, startup_prog, params_grads) @@ -179,9 +188,10 @@ class TestWeightDecay(unittest.TestCase): self.assertTrue( np.allclose(param_sum1[i], param_sum2[i]), "Current place: {}, i: {}, sum1: {}, sum2: {}".format( - place, i, param_sum1[i][~np.isclose(param_sum1[ - i], param_sum2[i])], param_sum2[i][~np.isclose( - param_sum1[i], param_sum2[i])])) + place, i, param_sum1[i] + [~np.isclose(param_sum1[i], param_sum2[i])], + param_sum2[i] + [~np.isclose(param_sum1[i], param_sum2[i])])) if __name__ == '__main__': diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py index 625728c0fce..1fa3c769d77 100644 --- a/python/paddle/fluid/core.py +++ b/python/paddle/fluid/core.py @@ -127,9 +127,9 @@ def avx_supported(): # Enable execute permissions PAGE_EXECUTE = ctypes.c_ulong(0x10) pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect - res = pfnVirtualProtect( - ctypes.c_void_p(address), ONE_PAGE, PAGE_EXECUTE, - ctypes.byref(ctypes.c_ulong(0))) + res = pfnVirtualProtect(ctypes.c_void_p(address), + ONE_PAGE, PAGE_EXECUTE, + ctypes.byref(ctypes.c_ulong(0))) if not res: raise Exception("Failed VirtualProtect") @@ -156,8 +156,8 @@ def avx_supported(): # Convert the code_str into a function that returns uint func, address = asm_func(code_str) retval = func() - ctypes.windll.kernel32.VirtualFree( - ctypes.c_void_p(address), ctypes.c_size_t(0), ONE_PAGE) + ctypes.windll.kernel32.VirtualFree(ctypes.c_void_p(address), + ctypes.c_size_t(0), ONE_PAGE) except Exception as e: sys.stderr.write('Failed getting the AVX flag on Windows.\n' 'The original error is: %s\n' % @@ -170,9 +170,10 @@ def avx_supported(): def run_shell_command(cmd): import subprocess - out, err = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - shell=True).communicate() + out, err = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True).communicate() if err: return None else: @@ -232,7 +233,7 @@ def less_than_ver(a, b): return operator.lt(to_list(a), to_list(b)) -# NOTE(zhiqiu): An error may occurs when import paddle in linux platform with glibc < 2.22, +# NOTE(zhiqiu): An error may occurs when import paddle in linux platform with glibc < 2.22, # the error message of which is "dlopen: cannot load any more object with static TLS". # This happens when: # (1) the number of dynamic shared librarys (DSO) loaded > 14, diff --git a/python/paddle/fluid/data.py b/python/paddle/fluid/data.py index 31906c465a0..4a15b6a8ea2 100644 --- a/python/paddle/fluid/data.py +++ b/python/paddle/fluid/data.py @@ -115,12 +115,11 @@ def data(name, shape, dtype='float32', lod_level=0): if shape[i] is None: shape[i] = -1 - return helper.create_global_variable( - name=name, - shape=shape, - dtype=dtype, - type=core.VarDesc.VarType.LOD_TENSOR, - stop_gradient=True, - lod_level=lod_level, - is_data=True, - need_check_feed=True) + return helper.create_global_variable(name=name, + shape=shape, + dtype=dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + stop_gradient=True, + lod_level=lod_level, + is_data=True, + need_check_feed=True) diff --git a/python/paddle/fluid/data_feed_desc.py b/python/paddle/fluid/data_feed_desc.py index eaa8985092d..fb4ce735fca 100644 --- a/python/paddle/fluid/data_feed_desc.py +++ b/python/paddle/fluid/data_feed_desc.py @@ -174,8 +174,8 @@ class DataFeedDesc(object): "Only MultiSlotDataFeed needs set_dense_slots, please check your datafeed.proto" ) for name in dense_slots_name: - self.proto_desc.multi_slot_desc.slots[self.__name_to_index[ - name]].is_dense = True + self.proto_desc.multi_slot_desc.slots[ + self.__name_to_index[name]].is_dense = True def set_use_slots(self, use_slots_name): """ @@ -219,8 +219,8 @@ class DataFeedDesc(object): "Only MultiSlotDataFeed needs set_use_slots, please check your datafeed.proto" ) for name in use_slots_name: - self.proto_desc.multi_slot_desc.slots[self.__name_to_index[ - name]].is_used = True + self.proto_desc.multi_slot_desc.slots[ + self.__name_to_index[name]].is_used = True def desc(self): """ diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index c7a68c6027b..30cfb9f4b85 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -24,6 +24,7 @@ import warnings from .framework import Variable, default_main_program, _current_expected_place, _non_static_mode, _in_eager_without_dygraph_check from .framework import _cpu_num, _cuda_ids + __all__ = ['DataFeeder'] _PADDLE_DTYPE_2_NUMPY_DTYPE = { @@ -172,6 +173,7 @@ def check_shape(shape, class DataToLoDTensorConverter(object): + def __init__(self, place, lod_level, shape, dtype): self.place = place self.lod_level = lod_level @@ -205,8 +207,8 @@ class DataToLoDTensorConverter(object): for s1, s2 in zip(self.shape, shape): if s1 != s2 and s1 >= 0 and s2 >= 0: raise ValueError( - "Shape not match. What is defined in data layer is {}, but receive {}". - format(self.shape, shape)) + "Shape not match. What is defined in data layer is {}, but receive {}" + .format(self.shape, shape)) def done(self): arr = np.array(self.data, dtype=self.dtype) @@ -227,6 +229,7 @@ class DataToLoDTensorConverter(object): class BatchedTensorProvider(object): + def __init__(self, feed_list, place, batch_size, generator, drop_last): self.place = place self.batch_size = batch_size @@ -237,11 +240,10 @@ class BatchedTensorProvider(object): for var in feed_list: assert var.lod_level == 0, "lod_level must be 0" self.converters.append( - DataToLoDTensorConverter( - place=self.place, - lod_level=0, - shape=var.shape, - dtype=var.dtype)) + DataToLoDTensorConverter(place=self.place, + lod_level=0, + shape=var.shape, + dtype=var.dtype)) def _done(self): return [c.done() for c in self.converters] @@ -249,8 +251,8 @@ class BatchedTensorProvider(object): def __call__(self): idx = 0 for each_sample in self.generator(): - for each_slot, each_converter in six.moves.zip(each_sample, - self.converters): + for each_slot, each_converter in six.moves.zip( + each_sample, self.converters): each_converter.data.append(each_slot) idx += 1 @@ -383,21 +385,21 @@ class DataFeeder(object): """ converter = [] - for lod_level, shape, dtype in six.moves.zip( - self.feed_lod_level, self.feed_shapes, self.feed_dtypes): + for lod_level, shape, dtype in six.moves.zip(self.feed_lod_level, + self.feed_shapes, + self.feed_dtypes): converter.append( - DataToLoDTensorConverter( - place=self.place, - lod_level=lod_level, - shape=shape, - dtype=dtype)) + DataToLoDTensorConverter(place=self.place, + lod_level=lod_level, + shape=shape, + dtype=dtype)) for each_sample in iterable: assert len(each_sample) == len(converter), ( "The number of fields in data (%d) does not match " + "len(feed_list) (%d)") % (len(each_sample), len(converter)) - for each_converter, each_slot in six.moves.zip(converter, - each_sample): + for each_converter, each_slot in six.moves.zip( + converter, each_sample): each_converter.feed(each_slot) ret_dict = {} for each_name, each_converter in six.moves.zip(self.feed_names, @@ -461,14 +463,12 @@ class DataFeeder(object): """ if isinstance(self.place, core.CUDAPlace): places = [ - core.CUDAPlace(i) - for i in six.moves.xrange( + core.CUDAPlace(i) for i in six.moves.xrange( self._get_number_of_places_(num_places)) ] else: places = [ - core.CPUPlace() - for _ in six.moves.xrange( + core.CPUPlace() for _ in six.moves.xrange( self._get_number_of_places_(num_places)) ] diff --git a/python/paddle/fluid/dataloader/batch_sampler.py b/python/paddle/fluid/dataloader/batch_sampler.py index 3a23c852563..8187faef008 100644 --- a/python/paddle/fluid/dataloader/batch_sampler.py +++ b/python/paddle/fluid/dataloader/batch_sampler.py @@ -148,6 +148,7 @@ class BatchSampler(Sampler): class _InfiniteIterableSampler(object): + def __init__(self, dataset, batch_size=1): assert isinstance( dataset, IterableDataset @@ -277,9 +278,10 @@ class DistributedBatchSampler(BatchSampler): subsampled_indices.extend(indices[i:i + self.batch_size]) indices = indices[len(indices) - last_batch_size:] - subsampled_indices.extend(indices[ - self.local_rank * last_local_batch_size:( - self.local_rank + 1) * last_local_batch_size]) + subsampled_indices.extend( + indices[self.local_rank * + last_local_batch_size:(self.local_rank + 1) * + last_local_batch_size]) return subsampled_indices if self.nranks > 1: diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 03dacb0396c..0d7fc17da17 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -194,8 +194,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): self._need_check_feed, self._places, self._use_buffer_reader, True, self._pin_memory) - self._thread = threading.Thread( - target=self._thread_loop, args=(_current_expected_place(), )) + self._thread = threading.Thread(target=self._thread_loop, + args=(_current_expected_place(), )) self._thread.daemon = True self._thread.start() @@ -203,7 +203,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): #NOTE(zhiqiu): Set the expected place for new thread as the same as father thread, # and it will call platform::SetDeviceId() in c++ internally. # If we do not set cudaDeviceId in new thread, the default cudaDeviceId will be 0, - # Which may cost hundreds of MB of GPU memory on CUDAPlace(0) if calling some cuda + # Which may cost hundreds of MB of GPU memory on CUDAPlace(0) if calling some cuda # APIs in this thread. _set_expected_place(legacy_expected_place) @@ -277,12 +277,11 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): for i in range(len(data)): data[i] = data[i]._move_to_list() data = [ - _restore_batch(d, s) - for d, s in zip(data, self._structure_infos[:len( - self._places)]) + _restore_batch(d, s) for d, s in zip( + data, self._structure_infos[:len(self._places)]) ] - self._structure_infos = self._structure_infos[len( - self._places):] + self._structure_infos = self._structure_infos[ + len(self._places):] # static graph organized data on multi-device with list, if # place number is 1, there is only 1 device, extra the data # from list for devices to be compatible with dygraph mode @@ -341,6 +340,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): class _DataLoaderIterMultiProcess(_DataLoaderIterBase): + def __init__(self, loader): super(_DataLoaderIterMultiProcess, self).__init__(loader) @@ -354,7 +354,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._data_queue = None # data get from _data_queue will be reordered by _rcvd_idx - # for data order keeping, data index not equal _rcvd_idx + # for data order keeping, data index not equal _rcvd_idx # will be cached in _task_infos self._send_idx = 0 self._rcvd_idx = 0 @@ -392,7 +392,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): # create data_queue for workers self._data_queue = multiprocessing.Queue() - # event for workers and thread, thread event is only need + # event for workers and thread, thread event is only need # in multi-processing mode self._workers_done_event = multiprocessing.Event() self._thread_done_event = threading.Event() @@ -434,7 +434,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): ] # if only 1 place, do not need to keep order self._blocking_queue = core.init_lod_tensor_blocking_queue( - core.Variable(), self._outstanding_capacity, len(self._places) > 1) + core.Variable(), self._outstanding_capacity, + len(self._places) > 1) self._reader = core.create_py_reader( self._blocking_queue, self._var_names, self._shapes, self._dtypes, self._need_check_feed, self._places, self._use_buffer_reader, True, @@ -442,8 +443,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._thread_done_event = threading.Event() # thread event is only need in multi-processing mode - self._thread = threading.Thread( - target=self._thread_loop, args=(_current_expected_place(), )) + self._thread = threading.Thread(target=self._thread_loop, + args=(_current_expected_place(), )) self._thread.daemon = True self._thread.start() @@ -492,8 +493,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._try_put_indices() def _shutdown_worker(self, worker_id, shutdown=False): - if self._worker_status[worker_id] or (self._persistent_workers and - shutdown): + if self._worker_status[worker_id] or (self._persistent_workers + and shutdown): self._indices_queues[worker_id].put(None) self._worker_status[worker_id] = False @@ -524,7 +525,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): #NOTE(zhiqiu): Set the expected place for new thread as the same as father thread, # and it will call platform::SetDeviceId() in c++ internally. # If we do not set cudaDeviceId in new thread, the default cudaDeviceId will be 0, - # Which may cost hundreds of MB of GPU memory on CUDAPlace(0) if calling some cuda + # Which may cost hundreds of MB of GPU memory on CUDAPlace(0) if calling some cuda # APIs in this thread. _set_expected_place(legacy_expected_place) @@ -548,8 +549,9 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): # LoDTensor not in shared memory is not # serializable, cannot be create in workers for slot in batch: - if isinstance(slot, (paddle.Tensor, - core.eager.Tensor)): + if isinstance( + slot, + (paddle.Tensor, core.eager.Tensor)): slot = slot.value().get_tensor() elif not isinstance(slot, core.LoDTensor): tmp = core.LoDTensor() @@ -570,7 +572,7 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): # For IterableDataset, batch indices is generated infinitely # for each worker to raise StopIteration, but a StopIteration # raising process will discard a batch indices which is count - # in _send_idx but will not increase _rcvd_idx, so we check + # in _send_idx but will not increase _rcvd_idx, so we check # whether the worker is still alive here to skip the discarded # batch indices and increase _rcvd_idx if self._dataset_kind == _DatasetKind.ITER: @@ -749,12 +751,11 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): for i in range(len(data)): data[i] = data[i]._move_to_list() data = [ - _restore_batch(d, s) - for d, s in zip(data, self._structure_infos[:len( - self._places)]) + _restore_batch(d, s) for d, s in zip( + data, self._structure_infos[:len(self._places)]) ] - self._structure_infos = self._structure_infos[len( - self._places):] + self._structure_infos = self._structure_infos[ + len(self._places):] # static graph organized data on multi-device with list, if # place number is 1, there is only 1 device, extra the data # from list for devices to be compatible with dygraph mode diff --git a/python/paddle/fluid/dataloader/fetcher.py b/python/paddle/fluid/dataloader/fetcher.py index ec3240a326b..387032cdfbb 100644 --- a/python/paddle/fluid/dataloader/fetcher.py +++ b/python/paddle/fluid/dataloader/fetcher.py @@ -20,6 +20,7 @@ _WARNING_TO_LOG = True class _DatasetFetcher(object): + def __init__(self, dataset, auto_collate_batch, collate_fn, drop_last): self.dataset = dataset self.auto_collate_batch = auto_collate_batch @@ -67,15 +68,17 @@ class _DatasetFetcher(object): "dtype=float32)]', and in Paddle >= 2.1, data is in format" \ " 'Tensor(shape=(1, 2, 3), dtype=float32)'\n" - logger = get_logger( - "DataLoader", logging.INFO, fmt='%(levelname)s: %(message)s') + logger = get_logger("DataLoader", + logging.INFO, + fmt='%(levelname)s: %(message)s') logger.warning(warn_str) class _IterableDatasetFetcher(_DatasetFetcher): + def __init__(self, dataset, auto_collate_batch, collate_fn, drop_last): - super(_IterableDatasetFetcher, self).__init__( - dataset, auto_collate_batch, collate_fn, drop_last) + super(_IterableDatasetFetcher, + self).__init__(dataset, auto_collate_batch, collate_fn, drop_last) self.dataset_iter = iter(dataset) def fetch(self, batch_indices, done_event=None): @@ -91,8 +94,8 @@ class _IterableDatasetFetcher(_DatasetFetcher): else: return None - if len(data) == 0 or (self.drop_last and - len(data) < len(batch_indices)): + if len(data) == 0 or (self.drop_last + and len(data) < len(batch_indices)): raise StopIteration global _WARNING_TO_LOG @@ -109,6 +112,7 @@ class _IterableDatasetFetcher(_DatasetFetcher): class _MapDatasetFetcher(_DatasetFetcher): + def __init__(self, dataset, auto_collate_batch, collate_fn, drop_last): super(_MapDatasetFetcher, self).__init__(dataset, auto_collate_batch, collate_fn, drop_last) diff --git a/python/paddle/fluid/dataloader/sampler.py b/python/paddle/fluid/dataloader/sampler.py index 7207ebcbacf..25a46f3b5df 100644 --- a/python/paddle/fluid/dataloader/sampler.py +++ b/python/paddle/fluid/dataloader/sampler.py @@ -204,7 +204,8 @@ class RandomSampler(Sampler): if self._num_samples is not None and not replacement: raise ValueError( - "num_samples should not be specified while replacement is False") + "num_samples should not be specified while replacement is False" + ) if not isinstance(self.num_samples, int) or self.num_samples <= 0: raise ValueError("num_samples should be a positive integer, " @@ -227,12 +228,13 @@ class RandomSampler(Sampler): yield index else: if self.replacement: - for index in np.random.choice( - np.arange(n), self.num_samples, replace=True).tolist(): + for index in np.random.choice(np.arange(n), + self.num_samples, + replace=True).tolist(): yield index else: - for index in np.random.choice( - np.arange(n), n, replace=False).tolist(): + for index in np.random.choice(np.arange(n), n, + replace=False).tolist(): yield index def __len__(self): diff --git a/python/paddle/fluid/dataloader/worker.py b/python/paddle/fluid/dataloader/worker.py index 6dc3813fa6d..0c3ec898aad 100644 --- a/python/paddle/fluid/dataloader/worker.py +++ b/python/paddle/fluid/dataloader/worker.py @@ -32,6 +32,7 @@ __all__ = ['get_worker_info'] class _IterableDatasetStopIteration(object): + def __init__(self, worker_id): self.worker_id = worker_id @@ -58,6 +59,7 @@ class _DatasetKind(object): class ParentWatchDog(object): + def __init__(self): self._parent_pid = os.getppid() self._parent_alive = True @@ -155,6 +157,7 @@ class WorkerInfo(object): class _WorkerException(object): + def __init__(self, worker_id, exc_info=None): self.worker_id = worker_id exc_info = exc_info or sys.exc_info() @@ -275,8 +278,9 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, np.random.seed(_generate_states(int(time.time()), worker_id)) global _worker_info - _worker_info = WorkerInfo( - id=worker_id, num_workers=num_workers, dataset=dataset) + _worker_info = WorkerInfo(id=worker_id, + num_workers=num_workers, + dataset=dataset) init_exception = None try: @@ -300,8 +304,9 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, if isinstance(data, _ResumeIteration): out_queue.put((data, None, None)) iterator_drained = False - fetcher = _DatasetKind.create_fetcher( - dataset_kind, dataset, auto_collate_batch, collate_fn, True) + fetcher = _DatasetKind.create_fetcher(dataset_kind, dataset, + auto_collate_batch, + collate_fn, True) continue # None as poison piil, so worker event should be set diff --git a/python/paddle/fluid/dataset.py b/python/paddle/fluid/dataset.py index 84064669c0d..8ea3e15ca4d 100644 --- a/python/paddle/fluid/dataset.py +++ b/python/paddle/fluid/dataset.py @@ -17,6 +17,7 @@ from paddle.fluid.proto import data_feed_pb2 from google.protobuf import text_format from . import core from ..utils import deprecated + __all__ = ['DatasetFactory', 'InMemoryDataset', 'QueueDataset'] @@ -388,9 +389,8 @@ class InMemoryDataset(DatasetBase): self.fleet_send_sleep_seconds = None self.trainer_num = -1 - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._set_feed_type") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset._set_feed_type") def set_feed_type(self, data_feed_type): """ Set data_feed_desc @@ -399,9 +399,8 @@ class InMemoryDataset(DatasetBase): if (self.proto_desc.name == "SlotRecordInMemoryDataFeed"): self.dataset = core.Dataset("SlotRecordDataset") - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._prepare_to_run") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset._prepare_to_run") def _prepare_to_run(self): """ Set data_feed_desc before load or shuffle, @@ -424,8 +423,8 @@ class InMemoryDataset(DatasetBase): @deprecated( since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._dynamic_adjust_before_train" - ) + update_to= + "paddle.distributed.InMemoryDataset._dynamic_adjust_before_train") def _dynamic_adjust_before_train(self, thread_num): if not self.is_user_set_queue_num: if self.use_ps_gpu: @@ -446,9 +445,8 @@ class InMemoryDataset(DatasetBase): self.dataset.dynamic_adjust_channel_num(self.thread_num, False) self.dataset.dynamic_adjust_readers_num(self.thread_num) - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._set_queue_num") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset._set_queue_num") def set_queue_num(self, queue_num): """ Set Dataset output queue num, training threads get data from queues @@ -467,9 +465,9 @@ class InMemoryDataset(DatasetBase): self.is_user_set_queue_num = True self.queue_num = queue_num - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._set_parse_ins_id") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset._set_parse_ins_id" + ) def set_parse_ins_id(self, parse_ins_id): """ Set id Dataset need to parse insid @@ -541,9 +539,9 @@ class InMemoryDataset(DatasetBase): """ self.trainer_num = trainer_num - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._set_merge_by_sid") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset._set_merge_by_sid" + ) def set_merge_by_sid(self, merge_by_sid): """ Set if Dataset need to merge sid. If not, one ins means one Pv. @@ -656,8 +654,8 @@ class InMemoryDataset(DatasetBase): @deprecated( since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._set_fleet_send_sleep_seconds" - ) + update_to= + "paddle.distributed.InMemoryDataset._set_fleet_send_sleep_seconds") def set_fleet_send_sleep_seconds(self, fleet_send_sleep_seconds=0): """ Set fleet send sleep time, default is 0 @@ -700,8 +698,8 @@ class InMemoryDataset(DatasetBase): @deprecated( since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._set_generate_unique_feasigns" - ) + update_to= + "paddle.distributed.InMemoryDataset._set_generate_unique_feasigns") def set_generate_unique_feasigns(self, generate_uni_feasigns, shard_num): self.dataset.set_generate_unique_feasigns(generate_uni_feasigns) self.gen_uni_feasigns = generate_uni_feasigns @@ -709,12 +707,13 @@ class InMemoryDataset(DatasetBase): @deprecated( since="2.0.0", - update_to="paddle.distributed.InMemoryDataset._generate_local_tables_unlock" - ) + update_to= + "paddle.distributed.InMemoryDataset._generate_local_tables_unlock") def generate_local_tables_unlock(self, table_id, fea_dim, read_thread_num, consume_thread_num, shard_num): - self.dataset.generate_local_tables_unlock( - table_id, fea_dim, read_thread_num, consume_thread_num, shard_num) + self.dataset.generate_local_tables_unlock(table_id, fea_dim, + read_thread_num, + consume_thread_num, shard_num) def set_date(self, date): """ @@ -739,9 +738,8 @@ class InMemoryDataset(DatasetBase): if self.use_ps_gpu and core._is_compiled_with_heterps(): self.psgpu.set_date(year, month, day) - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset.load_into_memory") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset.load_into_memory") def load_into_memory(self, is_shuffle=False): """ Load data into memory @@ -794,9 +792,9 @@ class InMemoryDataset(DatasetBase): self.dataset.create_preload_readers() self.dataset.preload_into_memory() - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset.wait_preload_done") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset.wait_preload_done" + ) def wait_preload_done(self): """ Wait preload_into_memory done @@ -815,9 +813,8 @@ class InMemoryDataset(DatasetBase): self.dataset.wait_preload_done() self.dataset.destroy_preload_readers() - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset.local_shuffle") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset.local_shuffle") def local_shuffle(self): """ Local shuffle @@ -835,9 +832,8 @@ class InMemoryDataset(DatasetBase): """ self.dataset.local_shuffle() - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset.global_shuffle") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset.global_shuffle") def global_shuffle(self, fleet=None, thread_num=12): """ Global shuffle. @@ -897,9 +893,8 @@ class InMemoryDataset(DatasetBase): else: fleet._role_maker.barrier_worker() - @deprecated( - since="2.0.0", - update_to="paddle.distributed.InMemoryDataset.release_memory") + @deprecated(since="2.0.0", + update_to="paddle.distributed.InMemoryDataset.release_memory") def release_memory(self): """ :api_attr: Static Graph @@ -1063,9 +1058,8 @@ class QueueDataset(DatasetBase): super(QueueDataset, self).__init__() self.proto_desc.name = "MultiSlotDataFeed" - @deprecated( - since="2.0.0", - update_to="paddle.distributed.QueueDataset._prepare_to_run") + @deprecated(since="2.0.0", + update_to="paddle.distributed.QueueDataset._prepare_to_run") def _prepare_to_run(self): """ Set data_feed_desc/thread num/filelist before run, diff --git a/python/paddle/fluid/debugger.py b/python/paddle/fluid/debugger.py index 75dc14a1d75..76b7c3d7dc1 100644 --- a/python/paddle/fluid/debugger.py +++ b/python/paddle/fluid/debugger.py @@ -68,31 +68,30 @@ def repr_lodtensor(proto): level = proto.type.lod_tensor.lod_level reprs = repr_tensor(proto.type.lod_tensor.tensor) - return reprtpl.format( - ttype="LoDTensor" if level > 0 else "Tensor", - name=proto.name, - reprs="level=%d, %s" % (level, reprs) if level > 0 else reprs) + return reprtpl.format(ttype="LoDTensor" if level > 0 else "Tensor", + name=proto.name, + reprs="level=%d, %s" % + (level, reprs) if level > 0 else reprs) def repr_selected_rows(proto): if proto.type.type != framework_pb2.VarType.SELECTED_ROWS: return - return reprtpl.format( - ttype="SelectedRows", - name=proto.name, - reprs=repr_tensor(proto.type.selected_rows)) + return reprtpl.format(ttype="SelectedRows", + name=proto.name, + reprs=repr_tensor(proto.type.selected_rows)) def repr_tensor_array(proto): if proto.type.type != framework_pb2.VarType.LOD_TENSOR_ARRAY: return - return reprtpl.format( - ttype="TensorArray", - name=proto.name, - reprs="level=%d, %s" % (proto.type.tensor_array.lod_level, - repr_tensor(proto.type.lod_tensor.tensor))) + return reprtpl.format(ttype="TensorArray", + name=proto.name, + reprs="level=%d, %s" % + (proto.type.tensor_array.lod_level, + repr_tensor(proto.type.lod_tensor.tensor))) type_handlers = [ @@ -119,6 +118,7 @@ def pprint_program_codes(program_desc): def pprint_block_codes(block_desc, show_backward=False): + def is_op_backward(op_desc): if op_desc.type.endswith('_grad'): return True @@ -155,7 +155,8 @@ def pprint_block_codes(block_desc, show_backward=False): idx=block_desc.idx, pidx=block_desc.parent_idx, vars='\n'.join(var_reprs), - ops='\n'.join(op_reprs), ) + ops='\n'.join(op_reprs), + ) def repr_attr(desc): @@ -187,7 +188,9 @@ def _repr_op_fill_constant(optype, inputs, outputs, attrs): shape=str(attrs['shape'])) -op_repr_handlers = [_repr_op_fill_constant, ] +op_repr_handlers = [ + _repr_op_fill_constant, +] def repr_op(opdesc): @@ -218,12 +221,11 @@ def repr_op(opdesc): res = handler(opdesc.type, inputs, outputs, attr_dict) if res: return res - return tpl.format( - outputs=', '.join(outputs), - optype=opdesc.type, - inputs=', '.join(inputs), - attrs="{%s}" % ','.join(attrs), - is_target=", is_target" if is_target else "") + return tpl.format(outputs=', '.join(outputs), + optype=opdesc.type, + inputs=', '.join(inputs), + attrs="{%s}" % ','.join(attrs), + is_target=", is_target" if is_target else "") def draw_block_graphviz(block, highlights=None, path="./temp.dot"): @@ -251,10 +253,9 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"): # TODO(gongwb): format the var.type # create var if var.persistable: - varn = graph.add_param( - var.name, - str(var.type).replace("\n", "
", 1), - highlight=need_highlight(var.name)) + varn = graph.add_param(var.name, + str(var.type).replace("\n", "
", 1), + highlight=need_highlight(var.name)) else: varn = graph.add_arg(var.name, highlight=need_highlight(var.name)) vars[var.name] = varn diff --git a/python/paddle/fluid/device_worker.py b/python/paddle/fluid/device_worker.py index 8a5e3584ed8..f0c094a84f7 100644 --- a/python/paddle/fluid/device_worker.py +++ b/python/paddle/fluid/device_worker.py @@ -164,14 +164,14 @@ class Hogwild(DeviceWorker): sparse_len = len(worker.get_desc().sparse_table) for i in range(sparse_len): sparse_table = downpour.sparse_table.add() - sparse_table.table_id = worker.get_desc().sparse_table[ - i].table_id - sparse_table.sparse_key_name.extend(worker.get_desc() - .sparse_table[i].slot_key) - sparse_table.sparse_value_name.extend(worker.get_desc( - ).sparse_table[i].slot_value) - sparse_table.sparse_grad_name.extend(worker.get_desc( - ).sparse_table[i].slot_gradient) + sparse_table.table_id = worker.get_desc( + ).sparse_table[i].table_id + sparse_table.sparse_key_name.extend( + worker.get_desc().sparse_table[i].slot_key) + sparse_table.sparse_value_name.extend( + worker.get_desc().sparse_table[i].slot_value) + sparse_table.sparse_grad_name.extend( + worker.get_desc().sparse_table[i].slot_gradient) sparse_table.fea_dim = \ self._fleet_desc.server_param.downpour_server_param.downpour_table_param[ i].accessor.fea_dim @@ -291,14 +291,14 @@ class DownpourLite(DeviceWorker): sparse_len = len(worker.get_desc().sparse_table) for i in range(sparse_len): sparse_table = downpour.sparse_table.add() - sparse_table.table_id = worker.get_desc().sparse_table[ - i].table_id - sparse_table.sparse_key_name.extend(worker.get_desc() - .sparse_table[i].slot_key) - sparse_table.sparse_value_name.extend(worker.get_desc( - ).sparse_table[i].slot_value) - sparse_table.sparse_grad_name.extend(worker.get_desc( - ).sparse_table[i].slot_gradient) + sparse_table.table_id = worker.get_desc( + ).sparse_table[i].table_id + sparse_table.sparse_key_name.extend( + worker.get_desc().sparse_table[i].slot_key) + sparse_table.sparse_value_name.extend( + worker.get_desc().sparse_table[i].slot_value) + sparse_table.sparse_grad_name.extend( + worker.get_desc().sparse_table[i].slot_gradient) sparse_table.fea_dim = \ self._fleet_desc.server_param.downpour_server_param.downpour_table_param[ i].accessor.fea_dim @@ -400,12 +400,12 @@ class DownpourSGD(DeviceWorker): for i in range(sparse_len): sparse_table = downpour.sparse_table.add() sparse_table.table_id = worker.get_desc().sparse_table[i].table_id - sparse_table.sparse_key_name.extend(worker.get_desc().sparse_table[ - i].slot_key) - sparse_table.sparse_value_name.extend(worker.get_desc() - .sparse_table[i].slot_value) - sparse_table.sparse_grad_name.extend(worker.get_desc().sparse_table[ - i].slot_gradient) + sparse_table.sparse_key_name.extend( + worker.get_desc().sparse_table[i].slot_key) + sparse_table.sparse_value_name.extend( + worker.get_desc().sparse_table[i].slot_value) + sparse_table.sparse_grad_name.extend( + worker.get_desc().sparse_table[i].slot_gradient) if opt_info["use_cvm"] or "no_cvm" in opt_info and opt_info[ "no_cvm"] == True: sparse_table.emb_dim = \ @@ -500,12 +500,12 @@ class DownpourSGDOPT(DeviceWorker): for i in range(sparse_len): sparse_table = downpour.sparse_table.add() sparse_table.table_id = worker.get_desc().sparse_table[i].table_id - sparse_table.sparse_key_name.extend(worker.get_desc().sparse_table[ - i].slot_key) - sparse_table.sparse_value_name.extend(worker.get_desc() - .sparse_table[i].slot_value) - sparse_table.sparse_grad_name.extend(worker.get_desc().sparse_table[ - i].slot_gradient) + sparse_table.sparse_key_name.extend( + worker.get_desc().sparse_table[i].slot_key) + sparse_table.sparse_value_name.extend( + worker.get_desc().sparse_table[i].slot_value) + sparse_table.sparse_grad_name.extend( + worker.get_desc().sparse_table[i].slot_gradient) if opt_info["use_cvm"] or "no_cvm" in opt_info and opt_info[ "no_cvm"] == True: sparse_table.emb_dim = \ @@ -569,15 +569,16 @@ class Section(DeviceWorker): # then runs Backward phase for all microbatches. # 1F1B scheduler, which runs forward phase and backward phase altertively # after startup phase. - assert schedule_mode_str in ["F-then-B", "1F1B"], ( - "The schedule mode " + assert schedule_mode_str in [ + "F-then-B", "1F1B" + ], ("The schedule mode " "for pipeline must be one of F-then-B or 1F1B") schedule_mode = 0 if schedule_mode_str == "F-then-B" else 1 section_param.schedule_mode = schedule_mode cfg = section_param.section_config program = pipeline_opt["section_program"] - cfg.program_desc.ParseFromString(program._get_desc() - .serialize_to_string()) + cfg.program_desc.ParseFromString( + program._get_desc().serialize_to_string()) # TODO: why does not work # cfg.program_desc.CopyFrom(program.program._get_desc()) place = pipeline_opt["place"] @@ -616,11 +617,12 @@ class HeterSection(DeviceWorker): "num_pipeline_stages"] cfg = heter_section_param.section_config program = heter_pipeline_opt["section_program"] - cfg.program_desc.ParseFromString(program._get_desc() - .serialize_to_string()) + cfg.program_desc.ParseFromString( + program._get_desc().serialize_to_string()) class DeviceWorkerFactory(object): + def _create_device_worker(self, worker_type): classname = worker_type.capitalize() return globals()[classname]() diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 89e9a6a9076..4d6cc88ea7e 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -93,14 +93,13 @@ class DownpourSGD(object): param_grads_list = [] for loss_index in range(len(losses)): program_config = ps_param.trainer_param.program_config.add() - program_config.program_id = str( - id(losses[loss_index].block.program)) + program_config.program_id = str(id( + losses[loss_index].block.program)) program_config.pull_sparse_table_id.extend([sparse_table_index]) program_config.push_sparse_table_id.extend([sparse_table_index]) - params_grads = sorted( - append_backward(losses[loss_index], parameter_list, - no_grad_set), - key=lambda x: x[0].name) + params_grads = sorted(append_backward(losses[loss_index], + parameter_list, no_grad_set), + key=lambda x: x[0].name) param_grads_list.append(params_grads) params = [] grads = [] diff --git a/python/paddle/fluid/distributed/fleet.py b/python/paddle/fluid/distributed/fleet.py index cd261195685..6c2bcdc213b 100644 --- a/python/paddle/fluid/distributed/fleet.py +++ b/python/paddle/fluid/distributed/fleet.py @@ -37,8 +37,8 @@ class Fleet(object): def init_pserver(self, opt_info): if "fleet_desc" in opt_info: - self.dist_desc_str_ = text_format.MessageToString(opt_info[ - "fleet_desc"]) + self.dist_desc_str_ = text_format.MessageToString( + opt_info["fleet_desc"]) self.dist_desc_ = opt_info["fleet_desc"] else: print( @@ -54,8 +54,8 @@ class Fleet(object): def init_worker(self, opt_info): if "fleet_desc" in opt_info: - self.dist_desc_str_ = text_format.MessageToString(opt_info[ - "fleet_desc"]) + self.dist_desc_str_ = text_format.MessageToString( + opt_info["fleet_desc"]) self.dist_desc_ = opt_info["fleet_desc"] else: print( diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py index 5c9b2def076..f1262ebae12 100644 --- a/python/paddle/fluid/distributed/ps_pb2.py +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -16,6 +16,7 @@ # source: ps.proto import sys + _b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1')) from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -42,15 +43,22 @@ _TABLETYPE = _descriptor.EnumDescriptor( filename=None, file=DESCRIPTOR, values=[ - _descriptor.EnumValueDescriptor( - name='PS_SPARSE_TABLE', index=0, number=0, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='PS_DENSE_TABLE', index=1, number=1, options=None, type=None), + _descriptor.EnumValueDescriptor(name='PS_SPARSE_TABLE', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_DENSE_TABLE', + index=1, + number=1, + options=None, + type=None), ], containing_type=None, options=None, serialized_start=3489, - serialized_end=3541, ) + serialized_end=3541, +) _sym_db.RegisterEnumDescriptor(_TABLETYPE) TableType = enum_type_wrapper.EnumTypeWrapper(_TABLETYPE) @@ -60,82 +68,77 @@ _PSCMDID = _descriptor.EnumDescriptor( filename=None, file=DESCRIPTOR, values=[ - _descriptor.EnumValueDescriptor( - name='PS_PULL_DENSE_TABLE', - index=0, - number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_DENSE_TABLE', - index=1, - number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PULL_SPARSE_TABLE', - index=2, - number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_SPARSE_TABLE', - index=3, - number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SHRINK_TABLE', index=4, number=4, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ONE_TABLE', - index=5, - number=5, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ALL_TABLE', - index=6, - number=6, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_LOAD_ONE_TABLE', - index=7, - number=7, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_LOAD_ALL_TABLE', - index=8, - number=8, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CLEAR_ONE_TABLE', - index=9, - number=9, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CLEAR_ALL_TABLE', - index=10, - number=10, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_DENSE_PARAM', - index=11, - number=11, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_STOP_SERVER', index=12, number=12, options=None, - type=None), + _descriptor.EnumValueDescriptor(name='PS_PULL_DENSE_TABLE', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PUSH_DENSE_TABLE', + index=1, + number=1, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PULL_SPARSE_TABLE', + index=2, + number=2, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PUSH_SPARSE_TABLE', + index=3, + number=3, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SHRINK_TABLE', + index=4, + number=4, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SAVE_ONE_TABLE', + index=5, + number=5, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SAVE_ALL_TABLE', + index=6, + number=6, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_LOAD_ONE_TABLE', + index=7, + number=7, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_LOAD_ALL_TABLE', + index=8, + number=8, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_CLEAR_ONE_TABLE', + index=9, + number=9, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_CLEAR_ALL_TABLE', + index=10, + number=10, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PUSH_DENSE_PARAM', + index=11, + number=11, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_STOP_SERVER', + index=12, + number=12, + options=None, + type=None), ], containing_type=None, options=None, serialized_start=3544, - serialized_end=3861, ) + serialized_end=3861, +) _sym_db.RegisterEnumDescriptor(_PSCMDID) PsCmdID = enum_type_wrapper.EnumTypeWrapper(_PSCMDID) @@ -161,15 +164,22 @@ _FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( filename=None, file=DESCRIPTOR, values=[ - _descriptor.EnumValueDescriptor( - name='HDFS', index=0, number=0, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='AFS', index=1, number=1, options=None, type=None), + _descriptor.EnumValueDescriptor(name='HDFS', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='AFS', + index=1, + number=1, + options=None, + type=None), ], containing_type=None, options=None, serialized_start=3457, - serialized_end=3487, ) + serialized_end=3487, +) _sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) _PSPARAMETER = _descriptor.Descriptor( @@ -179,38 +189,36 @@ _PSPARAMETER = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='worker_class', - full_name='paddle.PSParameter.worker_class', - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_class', - full_name='paddle.PSParameter.server_class', - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='worker_class', + full_name='paddle.PSParameter.worker_class', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='server_class', + full_name='paddle.PSParameter.server_class', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='instance_class', full_name='paddle.PSParameter.instance_class', @@ -227,38 +235,36 @@ _PSPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='worker_param', - full_name='paddle.PSParameter.worker_param', - index=3, - number=101, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_param', - full_name='paddle.PSParameter.server_param', - index=4, - number=102, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='worker_param', + full_name='paddle.PSParameter.worker_param', + index=3, + number=101, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='server_param', + full_name='paddle.PSParameter.server_param', + index=4, + number=102, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='trainer_param', full_name='paddle.PSParameter.trainer_param', @@ -301,7 +307,8 @@ _PSPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=21, - serialized_end=307, ) + serialized_end=307, +) _WORKERPARAMETER = _descriptor.Descriptor( name='WorkerParameter', @@ -336,7 +343,8 @@ _WORKERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=309, - serialized_end=390, ) + serialized_end=390, +) _SERVERPARAMETER = _descriptor.Descriptor( name='ServerParameter', @@ -371,7 +379,8 @@ _SERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=392, - serialized_end=473, ) + serialized_end=473, +) _DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( name='DownpourWorkerParameter', @@ -406,7 +415,8 @@ _DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=475, - serialized_end=554, ) + serialized_end=554, +) _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( name='DownpourTrainerParameter', @@ -521,7 +531,8 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=557, - serialized_end=810, ) + serialized_end=810, +) _PROGRAMCONFIG = _descriptor.Descriptor( name='ProgramConfig', @@ -530,22 +541,21 @@ _PROGRAMCONFIG = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='program_id', - full_name='paddle.ProgramConfig.program_id', - index=0, - number=1, - type=9, - cpp_type=9, - label=2, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='program_id', + full_name='paddle.ProgramConfig.program_id', + index=0, + number=1, + type=9, + cpp_type=9, + label=2, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='push_sparse_table_id', full_name='paddle.ProgramConfig.push_sparse_table_id', @@ -620,7 +630,8 @@ _PROGRAMCONFIG = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=813, - serialized_end=966, ) + serialized_end=966, +) _DENSETABLEPARAMETER = _descriptor.Descriptor( name='DenseTableParameter', @@ -703,7 +714,8 @@ _DENSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=968, - serialized_end=1091, ) + serialized_end=1091, +) _SPARSETABLEPARAMETER = _descriptor.Descriptor( name='SparseTableParameter', @@ -802,7 +814,8 @@ _SPARSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1093, - serialized_end=1215, ) + serialized_end=1215, +) _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( name='DownpourServerParameter', @@ -853,7 +866,8 @@ _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1218, - serialized_end=1352, ) + serialized_end=1352, +) _SERVERSERVICEPARAMETER = _descriptor.Descriptor( name='ServerServiceParameter', @@ -952,7 +966,8 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1355, - serialized_end=1570, ) + serialized_end=1570, +) _TABLEPARAMETER = _descriptor.Descriptor( name='TableParameter', @@ -961,22 +976,21 @@ _TABLEPARAMETER = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='table_id', - full_name='paddle.TableParameter.table_id', - index=0, - number=1, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='table_id', + full_name='paddle.TableParameter.table_id', + index=0, + number=1, + type=4, + cpp_type=4, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='table_class', full_name='paddle.TableParameter.table_class', @@ -1009,38 +1023,36 @@ _TABLEPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='accessor', - full_name='paddle.TableParameter.accessor', - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', - full_name='paddle.TableParameter.type', - index=4, - number=5, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='accessor', + full_name='paddle.TableParameter.accessor', + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='type', + full_name='paddle.TableParameter.type', + index=4, + number=5, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='compress_in_save', full_name='paddle.TableParameter.compress_in_save', @@ -1067,7 +1079,8 @@ _TABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1573, - serialized_end=1764, ) + serialized_end=1764, +) _TABLEACCESSORPARAMETER = _descriptor.Descriptor( name='TableAccessorParameter', @@ -1214,7 +1227,8 @@ _TABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1767, - serialized_end=2136, ) + serialized_end=2136, +) _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( name='DownpourTableAccessorParameter', @@ -1305,7 +1319,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( options=None), _descriptor.FieldDescriptor( name='show_click_decay_rate', - full_name='paddle.DownpourTableAccessorParameter.show_click_decay_rate', + full_name= + 'paddle.DownpourTableAccessorParameter.show_click_decay_rate', index=5, number=6, type=2, @@ -1345,7 +1360,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2139, - serialized_end=2345, ) + serialized_end=2345, +) _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( name='TableAccessorSaveParameter', @@ -1412,7 +1428,8 @@ _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2347, - serialized_end=2430, ) + serialized_end=2430, +) _PSREQUESTMESSAGE = _descriptor.Descriptor( name='PsRequestMessage', @@ -1421,22 +1438,21 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='cmd_id', - full_name='paddle.PsRequestMessage.cmd_id', - index=0, - number=1, - type=13, - cpp_type=3, - label=2, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='cmd_id', + full_name='paddle.PsRequestMessage.cmd_id', + index=0, + number=1, + type=13, + cpp_type=3, + label=2, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='table_id', full_name='paddle.PsRequestMessage.table_id', @@ -1453,22 +1469,21 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='params', - full_name='paddle.PsRequestMessage.params', - index=2, - number=3, - type=12, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='params', + full_name='paddle.PsRequestMessage.params', + index=2, + number=3, + type=12, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='client_id', full_name='paddle.PsRequestMessage.client_id', @@ -1485,22 +1500,21 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='data', - full_name='paddle.PsRequestMessage.data', - index=4, - number=5, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b(""), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='data', + full_name='paddle.PsRequestMessage.data', + index=4, + number=5, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), ], extensions=[], nested_types=[], @@ -1511,7 +1525,8 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2432, - serialized_end=2533, ) + serialized_end=2533, +) _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( name='SparseSGDRuleParameter', @@ -1594,7 +1609,8 @@ _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2535, - serialized_end=2654, ) + serialized_end=2654, +) _DENSESGDRULEPARAMETER = _descriptor.Descriptor( name='DenseSGDRuleParameter', @@ -1693,7 +1709,8 @@ _DENSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2657, - serialized_end=2882, ) + serialized_end=2882, +) _ADAMSGDPARAMETER = _descriptor.Descriptor( name='AdamSGDParameter', @@ -1792,7 +1809,8 @@ _ADAMSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2885, - serialized_end=3019, ) + serialized_end=3019, +) _NAIVESGDPARAMETER = _descriptor.Descriptor( name='NaiveSGDParameter', @@ -1843,7 +1861,8 @@ _NAIVESGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3021, - serialized_end=3087, ) + serialized_end=3087, +) _SUMMARYSGDPARAMETER = _descriptor.Descriptor( name='SummarySGDParameter', @@ -1878,7 +1897,8 @@ _SUMMARYSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3089, - serialized_end=3148, ) + serialized_end=3148, +) _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( name='MovingAverageRuleParameter', @@ -1913,7 +1933,8 @@ _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3150, - serialized_end=3196, ) + serialized_end=3196, +) _PSRESPONSEMESSAGE = _descriptor.Descriptor( name='PsResponseMessage', @@ -1954,22 +1975,21 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='data', - full_name='paddle.PsResponseMessage.data', - index=2, - number=3, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b(""), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='data', + full_name='paddle.PsResponseMessage.data', + index=2, + number=3, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), ], extensions=[], nested_types=[], @@ -1980,7 +2000,8 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3198, - serialized_end=3271, ) + serialized_end=3271, +) _FSCLIENTPARAMETER = _descriptor.Descriptor( name='FsClientParameter', @@ -2005,54 +2026,51 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='uri', - full_name='paddle.FsClientParameter.uri', - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='user', - full_name='paddle.FsClientParameter.user', - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='passwd', - full_name='paddle.FsClientParameter.passwd', - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='uri', + full_name='paddle.FsClientParameter.uri', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='user', + full_name='paddle.FsClientParameter.user', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='passwd', + full_name='paddle.FsClientParameter.passwd', + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='buffer_size', full_name='paddle.FsClientParameter.buffer_size', @@ -2104,14 +2122,17 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( ], extensions=[], nested_types=[], - enum_types=[_FSCLIENTPARAMETER_FSAPITYPE, ], + enum_types=[ + _FSCLIENTPARAMETER_FSAPITYPE, + ], options=None, is_extendable=False, syntax='proto2', extension_ranges=[], oneofs=[], serialized_start=3274, - serialized_end=3487, ) + serialized_end=3487, +) _PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER _PSPARAMETER.fields_by_name['server_param'].message_type = _SERVERPARAMETER @@ -2193,121 +2214,109 @@ DESCRIPTOR.enum_types_by_name['PsCmdID'] = _PSCMDID PSParameter = _reflection.GeneratedProtocolMessageType( 'PSParameter', (_message.Message, ), - dict( - DESCRIPTOR=_PSPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PSParameter) - )) + dict(DESCRIPTOR=_PSPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PSParameter) + )) _sym_db.RegisterMessage(PSParameter) WorkerParameter = _reflection.GeneratedProtocolMessageType( 'WorkerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_WORKERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) - )) + dict(DESCRIPTOR=_WORKERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) + )) _sym_db.RegisterMessage(WorkerParameter) ServerParameter = _reflection.GeneratedProtocolMessageType( 'ServerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SERVERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ServerParameter) - )) + dict(DESCRIPTOR=_SERVERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerParameter) + )) _sym_db.RegisterMessage(ServerParameter) DownpourWorkerParameter = _reflection.GeneratedProtocolMessageType( 'DownpourWorkerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DOWNPOURWORKERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) - )) + dict(DESCRIPTOR=_DOWNPOURWORKERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) + )) _sym_db.RegisterMessage(DownpourWorkerParameter) DownpourTrainerParameter = _reflection.GeneratedProtocolMessageType( 'DownpourTrainerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DOWNPOURTRAINERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) - )) + dict(DESCRIPTOR=_DOWNPOURTRAINERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) + )) _sym_db.RegisterMessage(DownpourTrainerParameter) ProgramConfig = _reflection.GeneratedProtocolMessageType( 'ProgramConfig', (_message.Message, ), - dict( - DESCRIPTOR=_PROGRAMCONFIG, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ProgramConfig) - )) + dict(DESCRIPTOR=_PROGRAMCONFIG, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ProgramConfig) + )) _sym_db.RegisterMessage(ProgramConfig) DenseTableParameter = _reflection.GeneratedProtocolMessageType( 'DenseTableParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DENSETABLEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) - )) + dict(DESCRIPTOR=_DENSETABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) + )) _sym_db.RegisterMessage(DenseTableParameter) SparseTableParameter = _reflection.GeneratedProtocolMessageType( 'SparseTableParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SPARSETABLEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) - )) + dict(DESCRIPTOR=_SPARSETABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) + )) _sym_db.RegisterMessage(SparseTableParameter) DownpourServerParameter = _reflection.GeneratedProtocolMessageType( 'DownpourServerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DOWNPOURSERVERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) - )) + dict(DESCRIPTOR=_DOWNPOURSERVERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) + )) _sym_db.RegisterMessage(DownpourServerParameter) ServerServiceParameter = _reflection.GeneratedProtocolMessageType( 'ServerServiceParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SERVERSERVICEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) - )) + dict(DESCRIPTOR=_SERVERSERVICEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) + )) _sym_db.RegisterMessage(ServerServiceParameter) TableParameter = _reflection.GeneratedProtocolMessageType( 'TableParameter', (_message.Message, ), - dict( - DESCRIPTOR=_TABLEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableParameter) - )) + dict(DESCRIPTOR=_TABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableParameter) + )) _sym_db.RegisterMessage(TableParameter) TableAccessorParameter = _reflection.GeneratedProtocolMessageType( 'TableAccessorParameter', (_message.Message, ), - dict( - DESCRIPTOR=_TABLEACCESSORPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) - )) + dict(DESCRIPTOR=_TABLEACCESSORPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) + )) _sym_db.RegisterMessage(TableAccessorParameter) DownpourTableAccessorParameter = _reflection.GeneratedProtocolMessageType( @@ -2333,61 +2342,55 @@ _sym_db.RegisterMessage(TableAccessorSaveParameter) PsRequestMessage = _reflection.GeneratedProtocolMessageType( 'PsRequestMessage', (_message.Message, ), - dict( - DESCRIPTOR=_PSREQUESTMESSAGE, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) - )) + dict(DESCRIPTOR=_PSREQUESTMESSAGE, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) + )) _sym_db.RegisterMessage(PsRequestMessage) SparseSGDRuleParameter = _reflection.GeneratedProtocolMessageType( 'SparseSGDRuleParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SPARSESGDRULEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) - )) + dict(DESCRIPTOR=_SPARSESGDRULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) + )) _sym_db.RegisterMessage(SparseSGDRuleParameter) DenseSGDRuleParameter = _reflection.GeneratedProtocolMessageType( 'DenseSGDRuleParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DENSESGDRULEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) - )) + dict(DESCRIPTOR=_DENSESGDRULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) + )) _sym_db.RegisterMessage(DenseSGDRuleParameter) AdamSGDParameter = _reflection.GeneratedProtocolMessageType( 'AdamSGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_ADAMSGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) - )) + dict(DESCRIPTOR=_ADAMSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) + )) _sym_db.RegisterMessage(AdamSGDParameter) NaiveSGDParameter = _reflection.GeneratedProtocolMessageType( 'NaiveSGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_NAIVESGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) - )) + dict(DESCRIPTOR=_NAIVESGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) + )) _sym_db.RegisterMessage(NaiveSGDParameter) SummarySGDParameter = _reflection.GeneratedProtocolMessageType( 'SummarySGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SUMMARYSGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) - )) + dict(DESCRIPTOR=_SUMMARYSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) + )) _sym_db.RegisterMessage(SummarySGDParameter) MovingAverageRuleParameter = _reflection.GeneratedProtocolMessageType( @@ -2403,21 +2406,19 @@ _sym_db.RegisterMessage(MovingAverageRuleParameter) PsResponseMessage = _reflection.GeneratedProtocolMessageType( 'PsResponseMessage', (_message.Message, ), - dict( - DESCRIPTOR=_PSRESPONSEMESSAGE, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) - )) + dict(DESCRIPTOR=_PSRESPONSEMESSAGE, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) + )) _sym_db.RegisterMessage(PsResponseMessage) FsClientParameter = _reflection.GeneratedProtocolMessageType( 'FsClientParameter', (_message.Message, ), - dict( - DESCRIPTOR=_FSCLIENTPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) - )) + dict(DESCRIPTOR=_FSCLIENTPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) + )) _sym_db.RegisterMessage(FsClientParameter) DESCRIPTOR.has_options = True diff --git a/python/paddle/fluid/dygraph/amp/auto_cast.py b/python/paddle/fluid/dygraph/amp/auto_cast.py index 5da5dbbd7bd..f441a35ca0f 100644 --- a/python/paddle/fluid/dygraph/amp/auto_cast.py +++ b/python/paddle/fluid/dygraph/amp/auto_cast.py @@ -181,8 +181,8 @@ def check_models(models): for model in models: if not isinstance(model, paddle.nn.Layer): raise RuntimeError( - "Current train mode is pure fp16, models should be paddle.nn.Layer, but receive {}.". - format(type(model))) + "Current train mode is pure fp16, models should be paddle.nn.Layer, but receive {}." + .format(type(model))) if isinstance(model, paddle.DataParallel): raise RuntimeError( "For distributed AMP training, you should first use paddle.amp.decorate() to decotate origin model, and then call paddle.DataParallel get distributed model." @@ -191,11 +191,12 @@ def check_models(models): def check_optimizers(optimizers): for optimizer in optimizers: - if not isinstance(optimizer, (paddle.optimizer.Optimizer, - paddle.fluid.optimizer.Optimizer)): + if not isinstance( + optimizer, + (paddle.optimizer.Optimizer, paddle.fluid.optimizer.Optimizer)): raise RuntimeError( - "Current train mode is pure fp16, optimizers should be paddle.optimizer.Optimizer or paddle.fluid.optimizer.Optimizer, but receive {}.". - format(type(optimizer))) + "Current train mode is pure fp16, optimizers should be paddle.optimizer.Optimizer or paddle.fluid.optimizer.Optimizer, but receive {}." + .format(type(optimizer))) @signature_safe_contextmanager @@ -273,11 +274,11 @@ def amp_guard(enable=True, # check device_type: # NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, mlu for float16, npu for float16. # Maybe we will support cpu for bfloat16. - if enable and not (tracer._expected_place.is_gpu_place() or - tracer._expected_place.is_xpu_place() or - tracer._expected_place.is_mlu_place() or - tracer._expected_place.is_npu_place() or - tracer._expected_place.is_custom_place()): + if enable and not (tracer._expected_place.is_gpu_place() + or tracer._expected_place.is_xpu_place() + or tracer._expected_place.is_mlu_place() + or tracer._expected_place.is_npu_place() + or tracer._expected_place.is_custom_place()): warnings.warn( 'amp_guard can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.' % tracer._expected_place) @@ -384,6 +385,7 @@ def amp_guard(enable=True, class StateDictHook(object): + def __init__(self, save_dtype): self._save_dtype = save_dtype @@ -492,8 +494,9 @@ def amp_decorate(models, if optimizers is not None: # check optimizers optimizers_is_list = False - if isinstance(optimizers, (paddle.optimizer.Optimizer, - paddle.fluid.optimizer.Optimizer)): + if isinstance( + optimizers, + (paddle.optimizer.Optimizer, paddle.fluid.optimizer.Optimizer)): optimizers_is_list = False optimizers = [optimizers] check_optimizers(optimizers) @@ -504,7 +507,7 @@ def amp_decorate(models, raise TypeError( "optimizers must be either a single optimizer or a list of optimizers." ) - # supprot master_weight + # supprot master_weight for idx_opt in range(len(optimizers)): if hasattr(optimizers[idx_opt], '_multi_precision'): if master_weight is False: diff --git a/python/paddle/fluid/dygraph/amp/loss_scaler.py b/python/paddle/fluid/dygraph/amp/loss_scaler.py index df79b5ab5e4..9da69b1e45e 100644 --- a/python/paddle/fluid/dygraph/amp/loss_scaler.py +++ b/python/paddle/fluid/dygraph/amp/loss_scaler.py @@ -104,11 +104,11 @@ class AmpScaler(object): raise ValueError( "current_tracer is None, maybe it is not in imperative mode.") - if enable and not (tracer._expected_place.is_gpu_place() or - tracer._expected_place.is_xpu_place() or - tracer._expected_place.is_mlu_place() or - tracer._expected_place.is_npu_place() or - tracer._expected_place.is_custom_place()): + if enable and not (tracer._expected_place.is_gpu_place() + or tracer._expected_place.is_xpu_place() + or tracer._expected_place.is_mlu_place() + or tracer._expected_place.is_npu_place() + or tracer._expected_place.is_custom_place()): warnings.warn( 'AmpScaler can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace and CustomPlace, current place is %s, so it makes no effect.' % tracer._expected_place) @@ -279,15 +279,13 @@ class AmpScaler(object): ] param_grads_fp16 = [ param._grad_ivar() for param in optimizer._parameter_list - if (param._grad_ivar() is not None - ) and (param._grad_ivar().dtype == core.VarDesc.VarType.FP16 - ) + if (param._grad_ivar() is not None) and ( + param._grad_ivar().dtype == core.VarDesc.VarType.FP16) ] param_grads_fp32 = [ param._grad_ivar() for param in optimizer._parameter_list - if (param._grad_ivar() is not None - ) and (param._grad_ivar().dtype == core.VarDesc.VarType.FP32 - ) + if (param._grad_ivar() is not None) and ( + param._grad_ivar().dtype == core.VarDesc.VarType.FP32) ] if core.is_compiled_with_npu(): float_status = _C_ops.alloc_float_status() @@ -332,10 +330,9 @@ class AmpScaler(object): self._decr_count = self._decr_count + 1 if self._decr_count == self._decr_every_n_nan_or_inf: print( - 'Found inf or nan, current scale is: {}, decrease to: {}*{}'. - format( - float(self._scale), - float(self._scale), float(self._decr_ratio))) + 'Found inf or nan, current scale is: {}, decrease to: {}*{}' + .format(float(self._scale), float(self._scale), + float(self._decr_ratio))) self._scale = self._scale * self._decr_ratio self._decr_count = 0 else: diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 6e61f998b28..4e22af9cfdb 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -46,6 +46,7 @@ def in_declarative_mode(): def _switch_to_static_graph_(func): + def __impl__(*args, **kwargs): with framework._dygraph_guard(None): return func(*args, **kwargs) @@ -85,8 +86,8 @@ _functional_dygraph_context_manager = None @signature_safe_contextmanager def param_guard(parameters): # Note: parameters is a reference of self._parameters or self._buffers - if in_declarative_mode() and not framework._non_static_mode( - ) and parameters: + if in_declarative_mode( + ) and not framework._non_static_mode() and parameters: origin_parameters = parameters.copy() for name, var_base in parameters.items(): if isinstance(var_base, list): @@ -124,8 +125,8 @@ def _convert_into_variable(tensor): # non-persistable. See case of `drop_state` in lstm api. is_persistable = len(tensor.shape) > 0 - new_var = tensor._to_static_var( - to_parameter=False, persistable=is_persistable) + new_var = tensor._to_static_var(to_parameter=False, + persistable=is_persistable) return new_var else: return tensor @@ -348,6 +349,7 @@ class no_grad_: """ def __call__(self, func): + @decorator.decorator def _decorate_function(func, *args, **kwargs): with self: @@ -569,8 +571,8 @@ def grad(outputs, for each_var in in_out_list: if _in_eager_without_dygraph_check(): assert isinstance( - each_var, core.eager. - Tensor), "Elements of {} must be Tensor".format(name) + each_var, core.eager.Tensor + ), "Elements of {} must be Tensor".format(name) else: assert isinstance( each_var, @@ -580,8 +582,8 @@ def grad(outputs, else: if _in_eager_without_dygraph_check(): assert isinstance( - in_out_list, core.eager. - Tensor), "{} must be Tensor or list of Tensor".format(name) + in_out_list, core.eager.Tensor + ), "{} must be Tensor or list of Tensor".format(name) else: assert isinstance( in_out_list, core.VarBase @@ -632,7 +634,8 @@ def grad(outputs, else: if _in_eager_without_dygraph_check(): raise AssertionError( - "no_grad_vars must be None, Tensor or list/tuple/set of Tensors") + "no_grad_vars must be None, Tensor or list/tuple/set of Tensors" + ) else: raise AssertionError( "no_grad_vars must be None, Variable or list/tuple/set of Variables" @@ -652,15 +655,17 @@ def grad(outputs, assert only_inputs, "only_inputs=False is not supported yet" if _in_eager_without_dygraph_check(): - return core.eager.run_partial_grad( - outputs, inputs, grad_outputs, retain_graph, create_graph, - only_inputs, allow_unused, no_grad_vars) + return core.eager.run_partial_grad(outputs, inputs, grad_outputs, + retain_graph, create_graph, + only_inputs, allow_unused, + no_grad_vars) else: place = core.Place() place.set_place(framework._current_expected_place()) - return core.dygraph_partial_grad( - inputs, outputs, grad_outputs, no_grad_vars, place, create_graph, - retain_graph, allow_unused, only_inputs) + return core.dygraph_partial_grad(inputs, outputs, grad_outputs, + no_grad_vars, place, create_graph, + retain_graph, allow_unused, + only_inputs) @framework.dygraph_only @@ -756,14 +761,13 @@ def to_variable(value, name=None, zero_copy=None, dtype=None): value = value.astype(dtype) if _in_eager_without_dygraph_check(): - return core.eager.Tensor(value, - framework._current_expected_place(), False, - zero_copy, name if name else None, True) + return core.eager.Tensor(value, framework._current_expected_place(), + False, zero_copy, name if name else None, + True) else: - py_var = core.VarBase( - value=value, - place=framework._current_expected_place(), - persistable=False, - zero_copy=zero_copy, - name=name if name else '') + py_var = core.VarBase(value=value, + place=framework._current_expected_place(), + persistable=False, + zero_copy=zero_copy, + name=name if name else '') return py_var diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index ba5c709b1d8..0fe5d236a58 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -119,7 +119,7 @@ def save_dygraph(state_dict, model_path): pickle.dump(model_dict, f, protocol=2) -# NOTE(chenweihang): load_dygraph will deprecated in future, we don't +# NOTE(chenweihang): load_dygraph will deprecated in future, we don't # support new loading features for it # TODO(qingqing01): remove dygraph_only to support loading static model. # maybe need to unify the loading interface after 2.0 API is ready. @@ -217,11 +217,11 @@ def load_dygraph(model_path, **configs): if os.path.exists(model_file_path): # Load state dict by `jit.save/io.save_inference_model` save format # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] - # The model saved by `save_inference_model` does not completely correspond to - # the information required by the `state_dict` under the dygraph. - # `save_inference_model` not save structured name, we need to remind + # The model saved by `save_inference_model` does not completely correspond to + # the information required by the `state_dict` under the dygraph. + # `save_inference_model` not save structured name, we need to remind # the user to configure the `use_structured_name` argument when `set_state_dict` - # NOTE(chenweihang): `jit.save` doesn't save optimizer state + # NOTE(chenweihang): `jit.save` doesn't save optimizer state # 1. load program desc & construct _ProgramHolder programs = _construct_program_holders(model_path, @@ -259,11 +259,11 @@ def load_dygraph(model_path, **configs): # load state dict by `io.save_params/persistables` save format # TODO(chenweihang): [ Now only supports loading parameters separately ] # If users save all parameters as one file, the [ variable.name -> variable ] - # mapping info will lost, so users need to give variable list, but users build + # mapping info will lost, so users need to give variable list, but users build # variable list in dygraph mode is difficult, we recommend users to use # paddle.static.load_program_state in this case - # Try to load all the files in the directory in VarBase format, + # Try to load all the files in the directory in VarBase format, # the file name is used as the name of VarBase load_var_list = [] diff --git a/python/paddle/fluid/dygraph/container.py b/python/paddle/fluid/dygraph/container.py index ca40781a5f9..854df393557 100644 --- a/python/paddle/fluid/dygraph/container.py +++ b/python/paddle/fluid/dygraph/container.py @@ -222,8 +222,8 @@ class LayerList(Layer): if isinstance(idx, int): if not (-len(self) <= idx < len(self)): raise IndexError( - 'index {} is out of range, should be an integer in range [{}, {})'. - format(idx, -len(self), len(self))) + 'index {} is out of range, should be an integer in range [{}, {})' + .format(idx, -len(self), len(self))) if idx < 0: idx += len(self) return idx diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/break_continue_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/break_continue_transformer.py index 401ad1c8e84..b85a2137dad 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/break_continue_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/break_continue_transformer.py @@ -60,7 +60,8 @@ class ForToWhileTransformer(gast.NodeTransformer): i += len(new_stmts) return new_stmts raise ValueError( - "parent_node doesn't contain the loop_node in ForToWhileTransformer") + "parent_node doesn't contain the loop_node in ForToWhileTransformer" + ) def get_for_stmt_nodes(self, node): assert isinstance( @@ -74,12 +75,13 @@ class ForToWhileTransformer(gast.NodeTransformer): init_stmts, cond_stmt, body_stmts = stmts_tuple # 2. append break statement - new_cond_stmt = gast.BoolOp( - op=gast.And(), values=[cond_stmt, self.condition_node]) + new_cond_stmt = gast.BoolOp(op=gast.And(), + values=[cond_stmt, self.condition_node]) # 3. construct gast.While node - while_node = gast.While( - test=new_cond_stmt, body=body_stmts, orelse=node.orelse) + while_node = gast.While(test=new_cond_stmt, + body=body_stmts, + orelse=node.orelse) init_stmts.append(while_node) return init_stmts @@ -141,17 +143,15 @@ class BreakContinueTransformer(BaseNodeVisitor): assign_false_node = create_fill_constant_node(variable_name, False) self._add_stmt_before_cur_node(loop_node_index, assign_false_node) - cond_var_node = gast.UnaryOp( - op=gast.Not(), - operand=gast.Name( - id=variable_name, - ctx=gast.Load(), - annotation=None, - type_comment=None)) + cond_var_node = gast.UnaryOp(op=gast.Not(), + operand=gast.Name(id=variable_name, + ctx=gast.Load(), + annotation=None, + type_comment=None)) if isinstance(loop_node, gast.While): - loop_node.test = gast.BoolOp( - op=gast.And(), values=[loop_node.test, cond_var_node]) + loop_node.test = gast.BoolOp(op=gast.And(), + values=[loop_node.test, cond_var_node]) elif isinstance(loop_node, gast.For): parent_node = self.ancestor_nodes[loop_node_index - 1] for_to_while = ForToWhileTransformer(parent_node, loop_node, @@ -180,8 +180,9 @@ class BreakContinueTransformer(BaseNodeVisitor): assign_false_node = create_fill_constant_node(variable_name, False) loop_node.body.insert(0, assign_false_node) - def _remove_stmts_after_break_continue( - self, break_continue_node, break_continue_name, loop_node_index): + def _remove_stmts_after_break_continue(self, break_continue_node, + break_continue_name, + loop_node_index): for first_block_index in range( len(self.ancestor_nodes) - 1, loop_node_index - 1, -1): first_block = self.ancestor_nodes[first_block_index] @@ -214,8 +215,9 @@ class BreakContinueTransformer(BaseNodeVisitor): cur_node.orelse, son_node, break_continue_name): continue - def _replace_break_continue_in_stmt_list( - self, stmt_list, break_continue_node, break_continue_name): + def _replace_break_continue_in_stmt_list(self, stmt_list, + break_continue_node, + break_continue_name): i = index_in_list(stmt_list, break_continue_node) if i == -1: return False @@ -233,13 +235,12 @@ class BreakContinueTransformer(BaseNodeVisitor): # No need to add, we consider this as added successfully return True - if_stmt = gast.If(test=gast.UnaryOp( - op=gast.Not(), - operand=gast.Name( - id=break_continue_name, - ctx=gast.Store(), - annotation=None, - type_comment=None)), + if_stmt = gast.If(test=gast.UnaryOp(op=gast.Not(), + operand=gast.Name( + id=break_continue_name, + ctx=gast.Store(), + annotation=None, + type_comment=None)), body=stmt_list[i + 1:], orelse=[]) stmt_list[i + 1:] = [] diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py index 50733e4d896..bf7791c788c 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py @@ -39,8 +39,8 @@ class CastTransformer(gast.NodeTransformer): func_str = ast_to_source_code(node.func).strip() if func_str in self._castable_type and len(node.args) > 0: args_str = ast_to_source_code(node.args[0]).strip() - new_func_str = "_jst.convert_var_dtype({}, '{}')".format(args_str, - func_str) + new_func_str = "_jst.convert_var_dtype({}, '{}')".format( + args_str, func_str) new_node = gast.parse(new_func_str).body[0].value return new_node diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py index 0b009c0049d..cf3383f5d06 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py @@ -96,8 +96,8 @@ def is_unsupported(func): if func_in_dict: translator_logger.log( 2, - "Whitelist: {} is part of built-in module and does not have to be transformed.". - format(func)) + "Whitelist: {} is part of built-in module and does not have to be transformed." + .format(func)) return True # NOTE: should be placed before `is_paddle_func` @@ -107,8 +107,8 @@ def is_unsupported(func): if is_paddle_func(func): translator_logger.log( 2, - "Whitelist: {} is part of Paddle module and does not have to be transformed.". - format(func)) + "Whitelist: {} is part of Paddle module and does not have to be transformed." + .format(func)) return True @@ -161,8 +161,8 @@ def convert_call(func): if options is not None and options.not_convert: translator_logger.log( 2, - "{} is not converted when it is decorated by 'paddle.jit.not_to_static'.". - format(func)) + "{} is not converted when it is decorated by 'paddle.jit.not_to_static'." + .format(func)) return func if is_builtin_len(func): @@ -175,7 +175,7 @@ def convert_call(func): return func if inspect.isgeneratorfunction(func): - # NOTE(xiongkun03): inspect.isfunction() will return True even though func is a generator function. + # NOTE(xiongkun03): inspect.isfunction() will return True even though func is a generator function. # If we don't deal generatorfunction here, we will regard it as normal function and get errors in some # occasion. number_of_stars = 30 diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py index 576baf6cc29..7933ddfe590 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py @@ -219,8 +219,8 @@ def _remove_no_value_return_var(out): align_ret = out[0] if isinstance(align_ret, tuple): for index, item in enumerate(align_ret): - if isinstance(item, Variable) and ( - RETURN_NO_VALUE_VAR_NAME in item.name): + if isinstance(item, Variable) and (RETURN_NO_VALUE_VAR_NAME + in item.name): # return None if index == 0: processed_out = (None, ) + out[1:] @@ -231,8 +231,8 @@ def _remove_no_value_return_var(out): break for index, item in enumerate(processed_out): - if isinstance(item, Variable) and ( - RETURN_NO_VALUE_VAR_NAME in item.name): + if isinstance(item, Variable) and (RETURN_NO_VALUE_VAR_NAME + in item.name): processed_out = processed_out[:index] if not processed_out: @@ -316,8 +316,8 @@ def convert_var_shape(x, idx=None, in_control_flow=False): # # Assume x.shape=[3, -1] in static mode # y = paddle.reshape(x, shape=[1, x.shape[1]]) # ``` - if isinstance(x, Variable) and (in_control_flow or has_negative(x.shape, - idx)): + if isinstance(x, Variable) and (in_control_flow + or has_negative(x.shape, idx)): return nn.shape(x) if idx is None else nn.shape(x)[idx] else: return x.shape if idx is None else x.shape[idx] @@ -549,6 +549,7 @@ def _run_paddle_pop(array, *args): # TODO(liym27): A better way to slice tensor array. # Maybe support start == end for slice op. def _slice_tensor_array(array, start, end): + def true_fn(): null_array = create_array("float32") return null_array diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/error.py b/python/paddle/fluid/dygraph/dygraph_to_static/error.py index 69ec89a5af6..c422c5269e7 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/error.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/error.py @@ -114,9 +114,9 @@ class TraceBackFrameRange(OriginInfo): for i in range(len(self.source_code)): # if source_code[i] is empty line between two code line, dont add blank if self.source_code[i]: - self.source_code[i] = ' ' * (blank_count[i] - min_black_count + - BLANK_COUNT_BEFORE_FILE_STR * 2 - ) + self.source_code[i] + self.source_code[i] = ' ' * ( + blank_count[i] - min_black_count + + BLANK_COUNT_BEFORE_FILE_STR * 2) + self.source_code[i] def formated_message(self): msg = ' ' * BLANK_COUNT_BEFORE_FILE_STR + 'File "{}", line {}, in {}\n'.format( @@ -126,6 +126,7 @@ class TraceBackFrameRange(OriginInfo): class SuggestionDict(object): + def __init__(self): # {(keywords): (suggestions)} self.suggestion_dict = { @@ -212,16 +213,16 @@ class ErrorData(object): -1] + 1 if user_code_traceback_index else 0 for filepath, lineno, funcname, code in self.origin_traceback[ paddle_traceback_start_index:]: - traceback_frame = TraceBackFrame( - Location(filepath, lineno), funcname, code) + traceback_frame = TraceBackFrame(Location(filepath, lineno), + funcname, code) message_lines.append(traceback_frame.formated_message()) message_lines.append("") # Step3: Adds error message like "TypeError: dtype must be int32, but received float32". # NOTE: `format_exception` is a list, its length is 1 in most cases, but sometimes its length # is gather than 1, for example, the error_type is IndentationError. - format_exception = traceback.format_exception_only(self.error_type, - self.error_value) + format_exception = traceback.format_exception_only( + self.error_type, self.error_value) error_message = [ " " * BLANK_COUNT_BEFORE_FILE_STR + line for line in format_exception @@ -281,8 +282,8 @@ class ErrorData(object): if error_value_lines_strip[i].startswith("File "): re_result = re.search(pattern, error_value_lines_strip[i]) tmp_filepath, lineno_str, function_name = re_result.groups() - code = error_value_lines_strip[i + 1] if i + 1 < len( - error_value_lines_strip) else '' + code = error_value_lines_strip[ + i + 1] if i + 1 < len(error_value_lines_strip) else '' if i == 0: user_filepath = tmp_filepath if tmp_filepath == user_filepath: @@ -299,8 +300,8 @@ class ErrorData(object): traceback_frame = TraceBackFrameRange( Location(filepath, lineno), funcname) else: - traceback_frame = TraceBackFrame( - Location(filepath, lineno), funcname, code) + traceback_frame = TraceBackFrame(Location(filepath, lineno), + funcname, code) error_frame.append(traceback_frame.formated_message()) error_frame.append("") @@ -309,8 +310,8 @@ class ErrorData(object): -1] + 1 if user_code_traceback_index else 0 for filepath, lineno, funcname, code in error_traceback[ paddle_traceback_start_index:]: - traceback_frame = TraceBackFrame( - Location(filepath, lineno), funcname, code) + traceback_frame = TraceBackFrame(Location(filepath, lineno), + funcname, code) error_frame.append(traceback_frame.formated_message()) error_frame.append("") diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py b/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py index 900541459f6..e8afef09468 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py @@ -73,8 +73,8 @@ class FunctionSpec(object): """ if len(self._arg_names) < len(args): error_msg = "The decorated function `{}` requires {} arguments: {}, but received {} with {}.".format( - self._dygraph_function.__name__, - len(self._arg_names), self._arg_names, len(args), args) + self._dygraph_function.__name__, len(self._arg_names), + self._arg_names, len(args), args) if args and inspect.isclass(args[0]): error_msg += "\n\tMaybe the function has more than one decorator, we don't support this for now." raise NotImplementedError(error_msg) @@ -91,9 +91,9 @@ class FunctionSpec(object): else: if arg_name not in self._default_kwargs: raise ValueError( - "`{}()` requires `{}` arguments, but not found in input `args`: {} and `kwargs`: {}.". - format(self._dygraph_function.__name__, arg_name, args, - kwargs)) + "`{}()` requires `{}` arguments, but not found in input `args`: {} and `kwargs`: {}." + .format(self._dygraph_function.__name__, arg_name, args, + kwargs)) args.append(self._default_kwargs[arg_name]) return tuple(args), kwargs @@ -136,16 +136,16 @@ class FunctionSpec(object): # So we don't support to deal this case while specificing `input_spec` currently. if kwargs: raise ValueError( - "{} got unexpected keyword arguments: {}. Cannot trace the function when `input_spec` is specificed.". - format(self._dygraph_function.__name__, kwargs)) + "{} got unexpected keyword arguments: {}. Cannot trace the function when `input_spec` is specificed." + .format(self._dygraph_function.__name__, kwargs)) # Note: The length of `input_spec` can be greater than `args`, # because `args` may contains non-tensor value merged form `kwargs` # after `unified_args_and_kwargs`. if len(args) < len(self._input_spec): raise ValueError( - "Requires len(arguments) >= len(input_spec), but received len(args):{} < len(InputSpec): {}". - format(len(args), len(self._input_spec))) + "Requires len(arguments) >= len(input_spec), but received len(args):{} < len(InputSpec): {}" + .format(len(args), len(self._input_spec))) # replace argument with corresponding InputSpec. args_with_spec = convert_to_input_spec(args, self._input_spec) @@ -196,8 +196,8 @@ class FunctionSpec(object): """ if not isinstance(input_spec, (tuple, list)): raise TypeError( - "The type(input_spec) should be one of (tuple, list), but received {}.". - format(type_name(input_spec))) + "The type(input_spec) should be one of (tuple, list), but received {}." + .format(type_name(input_spec))) return tuple(input_spec) @@ -289,8 +289,8 @@ def convert_to_input_spec(inputs, input_spec): type(spec), type(input))) if check_length and len(input) < len(spec): raise ValueError( - 'Requires len(inputs) >= len(input_spec), but received len(inputs):{} < len(input_spec):{}'. - format(len(inputs), len(input_spec))) + 'Requires len(inputs) >= len(input_spec), but received len(inputs):{} < len(input_spec):{}' + .format(len(inputs), len(input_spec))) if isinstance(input_spec, (tuple, list)): input_with_spec = [] @@ -307,8 +307,8 @@ def convert_to_input_spec(inputs, input_spec): if isinstance(rest_input, (core.VarBase, np.ndarray)): logging_utils.warn( "The inputs constain `{}` without specificing InputSpec, its shape and dtype will be treated immutable. " - "Please specific InputSpec information in `@to_static` if you expect them as mutable inputs.". - format(type_name(rest_input))) + "Please specific InputSpec information in `@to_static` if you expect them as mutable inputs." + .format(type_name(rest_input))) input_with_spec.extend(inputs[len(input_spec):]) return input_with_spec @@ -317,8 +317,8 @@ def convert_to_input_spec(inputs, input_spec): check_type_and_len(inputs, input_spec, True) for name, input in six.iteritems(inputs): if name in input_spec: - input_with_spec[name] = convert_to_input_spec(input, - input_spec[name]) + input_with_spec[name] = convert_to_input_spec( + input, input_spec[name]) else: input_with_spec[name] = input return input_with_spec diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/grad_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/grad_transformer.py index 98045b3aae4..d8d8d0bc043 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/grad_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/grad_transformer.py @@ -69,8 +69,8 @@ class GradTransformer(gast.NodeTransformer): warnings.warn("paddle.grad has unsupported parameter in jit: " + kw.arg + ", jit will discard it") continue - kw = gast.keyword( - arg=to_static_grad_param[arg_name], value=node.args[i]) + kw = gast.keyword(arg=to_static_grad_param[arg_name], + value=node.args[i]) static_keywords.append(kw) node.func = gast.parse('paddle.static.gradients').body[0].value diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py index 157822430d2..4c003dd5999 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py @@ -91,6 +91,7 @@ class IfElseTransformer(gast.NodeTransformer): class NameVisitor(gast.NodeVisitor): + def __init__(self, after_node=None, end_node=None): # The start node (exclusive) of the visitor self.after_node = after_node @@ -159,8 +160,8 @@ class NameVisitor(gast.NodeVisitor): else: # Blocks the vars in `if.body` and only inserts the vars both created in 'if/else' branch # into name_ids. - new_name_ids = self._find_new_name_ids(body_name_ids, - else_name_ids) + new_name_ids = self._find_new_name_ids( + body_name_ids, else_name_ids) for new_name_id in new_name_ids: before_if_name_ids[new_name_id].append(gast.Store()) @@ -219,14 +220,15 @@ class NameVisitor(gast.NodeVisitor): return copy.deepcopy(self.name_ids) def _find_new_name_ids(self, body_name_ids, else_name_ids): + def is_required_ctx(ctxs, required_ctx): for ctx in ctxs: if isinstance(ctx, required_ctx): return True return False - candidate_name_ids = set(body_name_ids.keys()) & set(else_name_ids.keys( - )) + candidate_name_ids = set(body_name_ids.keys()) & set( + else_name_ids.keys()) store_ctx = gast.Store new_name_ids = set() for name_id in candidate_name_ids: @@ -309,18 +311,18 @@ def parse_cond_args(parent_ids_dict, arg_name_ids.sort() args = [ - gast.Name( - id=name_id, ctx=gast.Load(), annotation=None, type_comment=None) - for name_id in arg_name_ids + gast.Name(id=name_id, + ctx=gast.Load(), + annotation=None, + type_comment=None) for name_id in arg_name_ids ] - arguments = gast.arguments( - args=args, - posonlyargs=[], - vararg=None, - kwonlyargs=[], - kw_defaults=None, - kwarg=None, - defaults=[]) + arguments = gast.arguments(args=args, + posonlyargs=[], + vararg=None, + kwonlyargs=[], + kw_defaults=None, + kwarg=None, + defaults=[]) return arguments @@ -398,9 +400,8 @@ def parse_cond_return(parent_vars_dict, if_vars_dict, else_vars_dict, return vars def _modified_vars(child_dict, parent_dict): - return set([ - var for var in _vars_with_store(child_dict) if var in parent_dict - ]) + return set( + [var for var in _vars_with_store(child_dict) if var in parent_dict]) def _vars_loaded(ids_dict): """ @@ -446,8 +447,8 @@ def parse_cond_return(parent_vars_dict, if_vars_dict, else_vars_dict, new_vars_to_create = new_vars_in_one_of_body_or_orelse & used_vars_after_ifelse | new_vars_in_body_and_orelse # 4. generate return_ids of if/else node. - return_ids = list(modified_vars_from_parent | new_vars_in_body_and_orelse | - new_vars_to_create) + return_ids = list(modified_vars_from_parent | new_vars_in_body_and_orelse + | new_vars_to_create) return_ids.sort() return return_ids, modified_vars_from_parent, new_vars_to_create @@ -515,9 +516,10 @@ def create_convert_ifelse_node(return_name_ids, return gast.Tuple(elts=[], ctx=gast.Load()) gast_names = [ - gast.Name( - id=name_id, ctx=gast.Load(), annotation=None, type_comment=None) - for name_id in name_ids + gast.Name(id=name_id, + ctx=gast.Load(), + annotation=None, + type_comment=None) for name_id in name_ids ] name_node = gast.Tuple(elts=gast_names, ctx=gast.Load()) return name_node @@ -537,14 +539,13 @@ def create_convert_ifelse_node(return_name_ids, convert_ifelse_layer = gast.parse( '_jst.convert_ifelse(' - '{pred}, {true_fn}, {false_fn}, {true_args}, {false_args}, {return_vars})'. - format( - pred=ast_to_source_code(pred), - true_fn=true_func_source, - false_fn=false_func_source, - true_args=ast_to_source_code(true_args), - false_args=ast_to_source_code(false_args), - return_vars=ast_to_source_code(return_vars))).body[0].value + '{pred}, {true_fn}, {false_fn}, {true_args}, {false_args}, {return_vars})' + .format(pred=ast_to_source_code(pred), + true_fn=true_func_source, + false_fn=false_func_source, + true_args=ast_to_source_code(true_args), + false_args=ast_to_source_code(false_args), + return_vars=ast_to_source_code(return_vars))).body[0].value if return_name_ids: _, cond_node = create_assign_node(return_name_ids, convert_ifelse_layer) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py index 4a6d855a893..3ae10997c8e 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py @@ -30,6 +30,7 @@ LOG_AllTransformer = 100 def synchronized(func): + def wrapper(*args, **kwargs): with threading.Lock(): return func(*args, **kwargs) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py index 8014a00bff9..045878ed54e 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py @@ -240,7 +240,9 @@ class NameVisitor(gast.NodeVisitor): self.current_seen_vars.add(node) write_context = { - type(gast.Store()), type(gast.AugStore()), type(gast.Del()) + type(gast.Store()), + type(gast.AugStore()), + type(gast.Del()) } for loop_node in self.current_loop: self.in_loop_vars[loop_node].append(node) @@ -581,20 +583,18 @@ class LoopTransformer(gast.NodeTransformer): # 5. create & append condition function node condition_func_node = gast.FunctionDef( name=unique_name.generate(FOR_CONDITION_PREFIX), - args=gast.arguments( - args=[ - gast.Name( - id=name, - ctx=gast.Param(), - annotation=None, - type_comment=None) for name in loop_var_names - ], - posonlyargs=[], - vararg=None, - kwonlyargs=[], - kw_defaults=None, - kwarg=None, - defaults=[]), + args=gast.arguments(args=[ + gast.Name(id=name, + ctx=gast.Param(), + annotation=None, + type_comment=None) for name in loop_var_names + ], + posonlyargs=[], + vararg=None, + kwonlyargs=[], + kw_defaults=None, + kwarg=None, + defaults=[]), body=[gast.Return(value=cond_stmt)], decorator_list=[], returns=None, @@ -613,20 +613,18 @@ class LoopTransformer(gast.NodeTransformer): loop_var_names, ctx=gast.Load(), gen_tuple_if_single=True))) body_func_node = gast.FunctionDef( name=unique_name.generate(FOR_BODY_PREFIX), - args=gast.arguments( - args=[ - gast.Name( - id=name, - ctx=gast.Param(), - annotation=None, - type_comment=None) for name in loop_var_names - ], - posonlyargs=[], - vararg=None, - kwonlyargs=[], - kw_defaults=None, - kwarg=None, - defaults=[]), + args=gast.arguments(args=[ + gast.Name(id=name, + ctx=gast.Param(), + annotation=None, + type_comment=None) for name in loop_var_names + ], + posonlyargs=[], + vararg=None, + kwonlyargs=[], + kw_defaults=None, + kwarg=None, + defaults=[]), body=body_stmts, decorator_list=[], returns=None, @@ -639,8 +637,9 @@ class LoopTransformer(gast.NodeTransformer): new_stmts.append(body_func_node) # 7. create & append while loop node - while_loop_nodes = create_while_nodes( - condition_func_node.name, body_func_node.name, loop_var_names) + while_loop_nodes = create_while_nodes(condition_func_node.name, + body_func_node.name, + loop_var_names) new_stmts.extend(while_loop_nodes) return new_stmts @@ -664,20 +663,18 @@ class LoopTransformer(gast.NodeTransformer): condition_func_node = gast.FunctionDef( name=unique_name.generate(WHILE_CONDITION_PREFIX), - args=gast.arguments( - args=[ - gast.Name( - id=name, - ctx=gast.Param(), - annotation=None, - type_comment=None) for name in loop_var_names - ], - posonlyargs=[], - vararg=None, - kwonlyargs=[], - kw_defaults=None, - kwarg=None, - defaults=[]), + args=gast.arguments(args=[ + gast.Name(id=name, + ctx=gast.Param(), + annotation=None, + type_comment=None) for name in loop_var_names + ], + posonlyargs=[], + vararg=None, + kwonlyargs=[], + kw_defaults=None, + kwarg=None, + defaults=[]), body=[gast.Return(value=node.test)], decorator_list=[], returns=None, @@ -696,20 +693,18 @@ class LoopTransformer(gast.NodeTransformer): loop_var_names, ctx=gast.Load(), gen_tuple_if_single=True))) body_func_node = gast.FunctionDef( name=unique_name.generate(WHILE_BODY_PREFIX), - args=gast.arguments( - args=[ - gast.Name( - id=name, - ctx=gast.Param(), - annotation=None, - type_comment=None) for name in loop_var_names - ], - posonlyargs=[], - vararg=None, - kwonlyargs=[], - kw_defaults=None, - kwarg=None, - defaults=[]), + args=gast.arguments(args=[ + gast.Name(id=name, + ctx=gast.Param(), + annotation=None, + type_comment=None) for name in loop_var_names + ], + posonlyargs=[], + vararg=None, + kwonlyargs=[], + kw_defaults=None, + kwarg=None, + defaults=[]), body=new_body, decorator_list=[], returns=None, @@ -721,7 +716,8 @@ class LoopTransformer(gast.NodeTransformer): name, unique_name.generate(GENERATE_VARIABLE_PREFIX)) new_stmts.append(body_func_node) - while_loop_nodes = create_while_nodes( - condition_func_node.name, body_func_node.name, loop_var_names) + while_loop_nodes = create_while_nodes(condition_func_node.name, + body_func_node.name, + loop_var_names) new_stmts.extend(while_loop_nodes) return new_stmts diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py b/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py index 60043c42121..de126777683 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py @@ -38,7 +38,8 @@ class Location(object): __slots__ = ( "filepath", "lineno", - "col_offset", ) + "col_offset", + ) def __init__(self, filepath, lineno, col_offset=None): self.filepath = filepath @@ -61,7 +62,8 @@ class OriginInfo(object): __slots__ = ( "location", "function_name", - "source_code", ) + "source_code", + ) def __init__(self, location, function_name, source_code): self.location = location diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py b/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py index 64652dd8e35..43a05cbb2f9 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py @@ -61,8 +61,8 @@ class NestSequence(object): def _get_var_ids(self): var_ids = [] for idx, var in enumerate(self.__input_list): - if isinstance(var, (framework.Variable, core.VarBase, - core.eager.Tensor)): + if isinstance( + var, (framework.Variable, core.VarBase, core.eager.Tensor)): var_ids.append(idx) return var_ids @@ -74,8 +74,9 @@ class NestSequence(object): if need_check: warning_types = set() for var in self.__input_list: - if not isinstance(var, (framework.Variable, core.VarBase, - core.eager.Tensor)): + if not isinstance( + var, + (framework.Variable, core.VarBase, core.eager.Tensor)): warning_types.add(type(var)) if warning_types: logging_utils.warn( @@ -136,7 +137,11 @@ class PartialProgramLayer: Layer: A Layer object that run all ops internally in static mode. """ - def __init__(self, main_program, inputs, outputs, parameters=None, + def __init__(self, + main_program, + inputs, + outputs, + parameters=None, **kwargs): super(PartialProgramLayer, self).__init__() self._inputs = NestSequence(inputs) @@ -219,8 +224,9 @@ class PartialProgramLayer: """ infer_pure_fp16_program = self._origin_main_program.clone() with program_guard(infer_pure_fp16_program): - cast_model_to_fp16( - infer_pure_fp16_program, self._amp_list, use_fp16_guard=False) + cast_model_to_fp16(infer_pure_fp16_program, + self._amp_list, + use_fp16_guard=False) return infer_pure_fp16_program @@ -343,9 +349,10 @@ class PartialProgramLayer: in_vars, out_vars = self._prepare(inputs) attrs = [ - 'global_block', self.program.desc.block(0), 'start_op_index', 0, - 'end_op_index', self._get_end_op_index(), 'is_test', - not self.training, 'program_id', self.program_id + 'global_block', + self.program.desc.block(0), 'start_op_index', 0, 'end_op_index', + self._get_end_op_index(), 'is_test', not self.training, + 'program_id', self.program_id ] if self._cuda_graph_capture_mode: attrs.extend( @@ -354,11 +361,10 @@ class PartialProgramLayer: self._cast_fp16_if_pure_fp16(in_vars) - _C_ops.run_program( - self._valid_vars(in_vars), - self._valid_vars(self._params), - self._valid_vars(out_vars), self._tmp_scope_vec, self._double_grads, - self._cuda_graph_vec, *attrs) + _C_ops.run_program(self._valid_vars(in_vars), + self._valid_vars(self._params), + self._valid_vars(out_vars), self._tmp_scope_vec, + self._double_grads, self._cuda_graph_vec, *attrs) self.drop_scope_if_no_grad() restored_nest_out = self._restore_out(out_vars) return self._remove_no_value(restored_nest_out) @@ -367,9 +373,9 @@ class PartialProgramLayer: if _in_pure_fp16_guard(): for i, var in enumerate(in_vars): name = var.name - if (self.program.global_block().has_var(name) and - self.program.global_block().var(name).dtype == - paddle.float16): + if (self.program.global_block().has_var(name) + and self.program.global_block().var(name).dtype + == paddle.float16): in_vars[i] = var.astype('float16') in_vars[i].name = name @@ -418,19 +424,17 @@ class PartialProgramLayer: if isinstance(value, np.ndarray): var = None if not framework._in_eager_mode_: - var = core.VarBase( - value=value, - name=self._inputs[i].desc.name(), - persistable=False, - place=expected_place, - zero_copy=True) + var = core.VarBase(value=value, + name=self._inputs[i].desc.name(), + persistable=False, + place=expected_place, + zero_copy=True) else: - var = core.eager.Tensor( - value=value, - name=self._inputs[i].desc.name(), - persistable=False, - place=expected_place, - zero_copy=True) + var = core.eager.Tensor(value=value, + name=self._inputs[i].desc.name(), + persistable=False, + place=expected_place, + zero_copy=True) elif isinstance(value, (core.VarBase, core.eager.Tensor)): # NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times # into CUDAPlace when it's as input of multi Ops. so we move it in advance @@ -452,14 +456,12 @@ class PartialProgramLayer: var_desc = var.desc varbase = None if not framework._in_eager_mode_: - var_base = core.VarBase(var_desc.dtype(), - var_desc.shape(), + var_base = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) else: - var_base = core.eager.Tensor(var_desc.dtype(), - var_desc.shape(), - var_desc.name(), - var_desc.type(), False) + var_base = core.eager.Tensor(var_desc.dtype(), var_desc.shape(), + var_desc.name(), var_desc.type(), + False) return var_base # Create VarBase to receive output data. @@ -522,8 +524,8 @@ class PartialProgramLayer: return out_vars elif isinstance(out_vars, (tuple, list)): if isinstance(out_vars, tuple): - res = tuple( - var for var in out_vars if not self._is_no_value(var)) + res = tuple(var for var in out_vars + if not self._is_no_value(var)) else: # isinstance(out_vars, list) res = [var for var in out_vars if not self._is_no_value(var)] @@ -585,8 +587,8 @@ class PartialProgramLayer: # self._params constains parameters and buffers with persistable=True. if not isinstance(var, (core.VarBase, core.eager.Tensor)): raise TypeError( - 'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'. - format(i, type(var))) + 'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.' + .format(i, type(var))) param_and_buffer_names_set.add(var.name) for block in main_program.blocks: @@ -632,6 +634,7 @@ def partial_program_from(concrete_program): if inputs and isinstance(inputs[0], layers.Layer): inputs = inputs[1:] - return PartialProgramLayer( - concrete_program.main_program, inputs, concrete_program.outputs, - concrete_program.parameters, **concrete_program.kwargs) + return PartialProgramLayer(concrete_program.main_program, inputs, + concrete_program.outputs, + concrete_program.parameters, + **concrete_program.kwargs) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 207cff67a1b..54c2b2216cd 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -198,11 +198,11 @@ class CacheKey(object): error_msg = "Arguments to a `@paddle.jit.to_static` must be a hashable Python objects (or nested structures of these types)." with_hook = self.kwargs.get("with_hook", False) is_train = self.kwargs.get("is_train", False) - return hash( - (id(self.function_spec), - make_hashable(self.input_args_with_spec, error_msg), - make_hashable(self.input_kwargs_with_spec, error_msg), - self._spec_names_id, self.class_instance, with_hook, is_train)) + return hash((id(self.function_spec), + make_hashable(self.input_args_with_spec, error_msg), + make_hashable(self.input_kwargs_with_spec, + error_msg), self._spec_names_id, + self.class_instance, with_hook, is_train)) def __eq__(self, other): return (type(self) is type(other)) and hash(self) == hash(other) @@ -433,20 +433,19 @@ class StaticFunction(object): if "with_hook" in kwargs: kwargs.pop("with_hook") # 1. unify args/kwargs and replace Tensor with InputSpec if len(args) != len(self._function_spec.args_name): - args, kwargs = self._function_spec.unified_args_and_kwargs(args, - kwargs) + args, kwargs = self._function_spec.unified_args_and_kwargs( + args, kwargs) input_args_with_spec, input_kwargs_with_spec = self._function_spec.args_to_input_spec( args, kwargs) # 2. generate cache key - cache_key = CacheKey( - self._function_spec, - input_args_with_spec, - input_kwargs_with_spec, - self._class_instance, - **self._kwargs, - with_hook=with_hook, - is_train=is_train) + cache_key = CacheKey(self._function_spec, + input_args_with_spec, + input_kwargs_with_spec, + self._class_instance, + **self._kwargs, + with_hook=with_hook, + is_train=is_train) # 3. check whether hit the cache or build a new program for the input arguments concrete_program, partial_program_layer = self._program_cache[cache_key] @@ -528,15 +527,15 @@ class StaticFunction(object): flatten(input_spec), flatten(self._function_spec.input_spec)): raise ValueError( - "The `input_spec`: {} used to construct concrete_program is conflict with the `input_spec`: {} in `@paddle.jit.to_static`". - format(input_spec, self._function_spec.input_spec)) + "The `input_spec`: {} used to construct concrete_program is conflict with the `input_spec`: {} in `@paddle.jit.to_static`" + .format(input_spec, self._function_spec.input_spec)) # NOTE(chenweihang): we should always translated program based on the `input_spec` # decorated on forward if it is valid desired_input_spec = self._function_spec.input_spec if input_spec is not None: logging_utils.warn( - "\n\nYou have specified `input_spec` both in function definition (higher priority) and `paddle.jit.save` (will be ignored.)\n\n\t Using: {}\n\n\t Ignore: {}\n". - format(desired_input_spec, input_spec)) + "\n\nYou have specified `input_spec` both in function definition (higher priority) and `paddle.jit.save` (will be ignored.)\n\n\t Using: {}\n\n\t Ignore: {}\n" + .format(desired_input_spec, input_spec)) has_input_spec = (desired_input_spec is not None) if has_input_spec: @@ -547,8 +546,8 @@ class StaticFunction(object): return concrete_program else: raise ValueError( - "No valid transformed program for {}.\n\t Please specific `input_spec` in `@paddle.jit.to_static` or feed input tensor to call the decorated function at once.\n". - format(self._function_spec)) + "No valid transformed program for {}.\n\t Please specific `input_spec` in `@paddle.jit.to_static` or feed input tensor to call the decorated function at once.\n" + .format(self._function_spec)) elif with_hook: cache_key = self._program_cache._recent_cache_key cache_key.kwargs["with_hook"] = True @@ -558,8 +557,8 @@ class StaticFunction(object): # If more than one programs have been cached, return the recent converted program by default. elif cached_program_len > 1: logging_utils.warn( - "Current {} has more than one cached programs: {}, the last traced progam will be return by default.". - format(self._function_spec, cached_program_len)) + "Current {} has more than one cached programs: {}, the last traced progam will be return by default." + .format(self._function_spec, cached_program_len)) cache_key, (concrete_program, partial_layer) = self._program_cache.last() @@ -632,8 +631,8 @@ class HookHelper(object): self.class_instance = class_instance self.with_hook = with_hook self.need_apply_hook = with_hook and isinstance( - self.class_instance, - layers.Layer) and getattr(func, "__name__") == "forward" + self.class_instance, layers.Layer) and getattr( + func, "__name__") == "forward" def apply_pre_hooks(self, inputs): """ @@ -731,8 +730,8 @@ class ConcreteProgram(object): _kwargs = func_spec.to_static_inputs_with_spec( input_kwargs_spec, main_program) if class_instance: - static_inputs = tuple([class_instance] + list( - static_inputs)) + static_inputs = tuple([class_instance] + + list(static_inputs)) # 2. Gets all ParamBases and buffered VarBases in the function all_parameters_and_buffers = _extract_indeed_params_buffers( @@ -740,8 +739,9 @@ class ConcreteProgram(object): # 3. Builds program only once and returns the output Variables. with param_guard(get_parameters( - class_instance, False)), param_guard( - get_buffers(class_instance, False)): + class_instance, + False)), param_guard(get_buffers(class_instance, + False)): try: # only for jit.save, do nothing while train and eval process inputs = hook_helper.apply_pre_hooks(static_inputs) @@ -759,21 +759,20 @@ class ConcreteProgram(object): raise if outputs is not None: - need_wrap_into_list = not isinstance(outputs, ( - tuple, list)) or len(outputs) == 1 + need_wrap_into_list = not isinstance( + outputs, (tuple, list)) or len(outputs) == 1 if need_wrap_into_list: outputs = [outputs] main_program = update_op_callstack_with_origin_info(main_program) - return ConcreteProgram( - inputs=static_inputs, - outputs=outputs, - parameters=all_parameters_and_buffers, - function=dygraph_function, - main_program=main_program, - startup_program=startup_program, - **kwargs) + return ConcreteProgram(inputs=static_inputs, + outputs=outputs, + parameters=all_parameters_and_buffers, + function=dygraph_function, + main_program=main_program, + startup_program=startup_program, + **kwargs) def _extract_indeed_params_buffers(class_instance): @@ -795,7 +794,7 @@ class ProgramCache(object): def __init__(self): # {hash_id : (concrete_program, partial_layer)} self._caches = collections.OrderedDict() - # trace mostly recent used program + # trace mostly recent used program self._recent_key = None self._recent_cache_key = None @@ -822,8 +821,8 @@ class ProgramCache(object): if current_tracing_count > MAX_TRACED_PROGRAM_COUNT: logging_utils.warn( "Current traced program number: {} > `max_tracing_count`:{}. Too much cached programs will bring expensive overhead. " - "The reason may be: (1) passing tensors with different shapes, (2) passing python objects instead of tensors.". - format(current_tracing_count, MAX_TRACED_PROGRAM_COUNT)) + "The reason may be: (1) passing tensors with different shapes, (2) passing python objects instead of tensors." + .format(current_tracing_count, MAX_TRACED_PROGRAM_COUNT)) return self._caches[item_id] @@ -1003,9 +1002,9 @@ class ProgramTranslator(object): return dygraph_func(*args, **kwargs) try: function_spec = FunctionSpec(dygraph_func) - cache_key = CacheKey.from_func_and_args(function_spec, args, kwargs, - getattr(dygraph_func, - '__self__', None)) + cache_key = CacheKey.from_func_and_args( + function_spec, args, kwargs, + getattr(dygraph_func, '__self__', None)) _, partial_program_layer = self._program_cache[cache_key] if args and isinstance(args[0], layers.Layer): @@ -1133,9 +1132,9 @@ class ProgramTranslator(object): return dygraph_func(*args, **kwargs) function_spec = FunctionSpec(dygraph_func) - cache_key = CacheKey.from_func_and_args(function_spec, args, kwargs, - getattr(dygraph_func, - '__self__', None)) + cache_key = CacheKey.from_func_and_args( + function_spec, args, kwargs, getattr(dygraph_func, '__self__', + None)) concrete_program, partial_program_layer = self._program_cache[cache_key] # Note: concrete_program hold all input/output infos include non-Variable diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py index 8ac659dbead..7e387b45c40 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py @@ -211,11 +211,10 @@ class ReturnTransformer(gast.NodeTransformer): value_name = self.return_value_name[node] if value_name is not None: node.body.append( - gast.Return(value=gast.Name( - id=value_name, - ctx=gast.Load(), - annotation=None, - type_comment=None))) + gast.Return(value=gast.Name(id=value_name, + ctx=gast.Load(), + annotation=None, + type_comment=None))) init_names = [ unique_name.generate(RETURN_VALUE_INIT_NAME) for i in range(max_return_length) @@ -224,32 +223,27 @@ class ReturnTransformer(gast.NodeTransformer): create_fill_constant_node(iname, 0.0) for iname in init_names ] if len(init_names) == 1: - return_value_nodes = gast.Name( - id=init_names[0], - ctx=gast.Load(), - annotation=None, - type_comment=None) + return_value_nodes = gast.Name(id=init_names[0], + ctx=gast.Load(), + annotation=None, + type_comment=None) else: # We need to initialize return value as a tuple because control # flow requires some inputs or outputs have same structure - return_value_nodes = gast.Tuple( - elts=[ - gast.Name( - id=iname, - ctx=gast.Load(), - annotation=None, - type_comment=None) for iname in init_names - ], - ctx=gast.Load()) - assign_return_value_node = gast.Assign( - targets=[ - gast.Name( - id=value_name, - ctx=gast.Store(), - annotation=None, - type_comment=None) + return_value_nodes = gast.Tuple(elts=[ + gast.Name(id=iname, + ctx=gast.Load(), + annotation=None, + type_comment=None) for iname in init_names ], - value=return_value_nodes) + ctx=gast.Load()) + assign_return_value_node = gast.Assign(targets=[ + gast.Name(id=value_name, + ctx=gast.Store(), + annotation=None, + type_comment=None) + ], + value=return_value_nodes) node.body.insert(0, assign_return_value_node) node.body[:0] = assign_zero_nodes @@ -276,43 +270,43 @@ class ReturnTransformer(gast.NodeTransformer): if hasattr(ancestor, "body") and index_in_list(ancestor.body, cur_node) != -1: if cur_node == node: - self._replace_return_in_stmt_list( - ancestor.body, cur_node, return_name, max_return_length, - parent_node_of_return) + self._replace_return_in_stmt_list(ancestor.body, cur_node, + return_name, + max_return_length, + parent_node_of_return) self._replace_after_node_to_if_in_stmt_list( ancestor.body, cur_node, return_name, parent_node_of_return) - elif hasattr(ancestor, "orelse") and index_in_list(ancestor.orelse, - cur_node) != -1: + elif hasattr(ancestor, "orelse") and index_in_list( + ancestor.orelse, cur_node) != -1: if cur_node == node: - self._replace_return_in_stmt_list( - ancestor.orelse, cur_node, return_name, - max_return_length, parent_node_of_return) + self._replace_return_in_stmt_list(ancestor.orelse, cur_node, + return_name, + max_return_length, + parent_node_of_return) self._replace_after_node_to_if_in_stmt_list( ancestor.orelse, cur_node, return_name, parent_node_of_return) # If return node in while loop, add `not return_name` in gast.While.test if isinstance(ancestor, gast.While): - cond_var_node = gast.UnaryOp( - op=gast.Not(), - operand=gast.Name( - id=return_name, - ctx=gast.Load(), - annotation=None, - type_comment=None)) + cond_var_node = gast.UnaryOp(op=gast.Not(), + operand=gast.Name( + id=return_name, + ctx=gast.Load(), + annotation=None, + type_comment=None)) ancestor.test = gast.BoolOp( op=gast.And(), values=[ancestor.test, cond_var_node]) continue # If return node in for loop, add `not return_name` in gast.While.test if isinstance(ancestor, gast.For): - cond_var_node = gast.UnaryOp( - op=gast.Not(), - operand=gast.Name( - id=return_name, - ctx=gast.Load(), - annotation=None, - type_comment=None)) + cond_var_node = gast.UnaryOp(op=gast.Not(), + operand=gast.Name( + id=return_name, + ctx=gast.Load(), + annotation=None, + type_comment=None)) parent_node = self.ancestor_nodes[ancestor_index - 1] for_to_while = ForToWhileTransformer(parent_node, ancestor, cond_var_node) @@ -363,27 +357,23 @@ class ReturnTransformer(gast.NodeTransformer): # Handle tuple/non-tuple case if max_return_length == 1: assign_nodes.append( - gast.Assign( - targets=[ - gast.Name( - id=self.return_value_name[cur_func_node], - ctx=gast.Store(), - annotation=None, - type_comment=None) - ], - value=gast.Name( - id=no_value_names[0], - ctx=gast.Load(), - annotation=None, - type_comment=None))) + gast.Assign(targets=[ + gast.Name(id=self.return_value_name[cur_func_node], + ctx=gast.Store(), + annotation=None, + type_comment=None) + ], + value=gast.Name(id=no_value_names[0], + ctx=gast.Load(), + annotation=None, + type_comment=None))) else: # max_return_length > 1 which means we should assign tuple fill_tuple = [ - gast.Name( - id=n, - ctx=gast.Load(), - annotation=None, - type_comment=None) for n in no_value_names + gast.Name(id=n, + ctx=gast.Load(), + annotation=None, + type_comment=None) for n in no_value_names ] if return_node.value is not None: if isinstance(return_node.value, gast.Tuple): @@ -392,16 +382,14 @@ class ReturnTransformer(gast.NodeTransformer): fill_tuple.insert(0, return_node.value) assign_nodes.append( - gast.Assign( - targets=[ - gast.Name( - id=self.return_value_name[cur_func_node], - ctx=gast.Store(), - annotation=None, - type_comment=None) - ], - value=gast.Tuple( - elts=fill_tuple, ctx=gast.Load()))) + gast.Assign(targets=[ + gast.Name(id=self.return_value_name[cur_func_node], + ctx=gast.Store(), + annotation=None, + type_comment=None) + ], + value=gast.Tuple(elts=fill_tuple, + ctx=gast.Load()))) else: # In this case we should NOT append RETURN_NO_VALUE placeholder if return_node.value is not None: @@ -412,21 +400,20 @@ class ReturnTransformer(gast.NodeTransformer): RETURN_VALUE_PREFIX) assign_nodes.append( - gast.Assign( - targets=[ - gast.Name( - id=self.return_value_name[cur_func_node], - ctx=gast.Store(), - annotation=None, - type_comment=None) - ], - value=return_node.value)) + gast.Assign(targets=[ + gast.Name(id=self.return_value_name[cur_func_node], + ctx=gast.Store(), + annotation=None, + type_comment=None) + ], + value=return_node.value)) stmt_list[i:] = assign_nodes return True - def _replace_after_node_to_if_in_stmt_list( - self, stmt_list, node, return_name, parent_node_of_return): + def _replace_after_node_to_if_in_stmt_list(self, stmt_list, node, + return_name, + parent_node_of_return): i = index_in_list(stmt_list, node) if i < 0 or i >= len(stmt_list): return False @@ -434,13 +421,12 @@ class ReturnTransformer(gast.NodeTransformer): # No need to add, we consider this as added successfully return True - if_stmt = gast.If(test=gast.UnaryOp( - op=gast.Not(), - operand=gast.Name( - id=return_name, - ctx=gast.Store(), - annotation=None, - type_comment=None)), + if_stmt = gast.If(test=gast.UnaryOp(op=gast.Not(), + operand=gast.Name( + id=return_name, + ctx=gast.Store(), + annotation=None, + type_comment=None)), body=stmt_list[i + 1:], orelse=[]) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/static_analysis.py b/python/paddle/fluid/dygraph/dygraph_to_static/static_analysis.py index 98e76c0f46f..82177b343aa 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/static_analysis.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/static_analysis.py @@ -181,8 +181,9 @@ class AstVarEnv(object): self.cur_scope = AstVarScope() def enter_scope(self, scope_name, scope_type): - self.cur_scope = AstVarScope( - scope_name, scope_type, parent_scope=self.cur_scope) + self.cur_scope = AstVarScope(scope_name, + scope_type, + parent_scope=self.cur_scope) return self.cur_scope def exit_scope(self): @@ -351,8 +352,8 @@ class StaticAnalysisVisitor(object): if node.value: node_value_type = self.node_to_wrapper_map[ node.value].node_var_type - if not (node_value_type & - {NodeVarType.UNKNOWN, NodeVarType.STATEMENT}): + if not (node_value_type + & {NodeVarType.UNKNOWN, NodeVarType.STATEMENT}): ret_type = node_value_type if isinstance(node.target, gast.Name): self.node_to_wrapper_map[node.target].node_var_type = ret_type diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py index d5b23d2f53b..a04171dfc30 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py @@ -47,8 +47,9 @@ def create_convert_shape_node(var_shape_node, api_shape_node = gast.parse(convert_var_shape_func).body[0].value if slice_node is not None and not slice_is_num(slice_node): - return gast.Subscript( - value=api_shape_node, slice=slice_node.slice, ctx=gast.Load()) + return gast.Subscript(value=api_shape_node, + slice=slice_node.slice, + ctx=gast.Load()) return api_shape_node if isinstance(var_shape_node, gast.Subscript): @@ -65,12 +66,13 @@ def create_choose_shape_node(attr_shape_name, api_shape_name, slice_node=None): if slice_node is not None and slice_is_num(slice_node): args.append(ast_to_source_code(slice_node.slice).strip()) - choose_shape_func = "_jst.choose_shape_attr_or_api({})".format(",".join( - args)) + choose_shape_func = "_jst.choose_shape_attr_or_api({})".format( + ",".join(args)) choose_shape_node = gast.parse(choose_shape_func).body[0].value if slice_node is not None and not slice_is_num(slice_node): - return gast.Subscript( - value=choose_shape_node, slice=slice_node.slice, ctx=gast.Load()) + return gast.Subscript(value=choose_shape_node, + slice=slice_node.slice, + ctx=gast.Load()) return choose_shape_node @@ -226,9 +228,10 @@ class TensorShapeTransformer(gast.NodeTransformer): for field, value in gast.iter_fields(parent_node): if child_node is value: if var_shape_node is child_node: - setattr(parent_node, field, - create_convert_shape_node(var_shape_node, - None, True)) + setattr( + parent_node, field, + create_convert_shape_node( + var_shape_node, None, True)) else: setattr(parent_node, field, var_shape_node) break @@ -283,8 +286,8 @@ class TensorShapeTransformer(gast.NodeTransformer): if isinstance(node, gast.Attribute): # If node is `paddle.shape`, return False - if (node.attr == 'shape' and isinstance(node.value, gast.Name) and - node.value.id == 'paddle'): + if (node.attr == 'shape' and isinstance(node.value, gast.Name) + and node.value.id == 'paddle'): return False if node.attr != 'shape': return False @@ -323,9 +326,8 @@ class TensorShapeTransformer(gast.NodeTransformer): sub_node = gast.parse(sub_node_str).body[0].value update_static_shape_var_node.append( - gast.Assign( - targets=[static_shape_var_node], - value=sub_node)) + gast.Assign(targets=[static_shape_var_node], + value=sub_node)) self.name_to_var_shape[ target_id] = static_shape_var_name @@ -346,16 +348,15 @@ class TensorShapeTransformer(gast.NodeTransformer): idx) sub_node = gast.parse(sub_node_str).body[0].value # Note(Aurelius84): Becuase static_shape_var_name is used in - # eval_if_exist_else_none() as plain string, so it will not + # eval_if_exist_else_none() as plain string, so it will not # be pasred as argument in convert_loop/ifelse. We delcare it # as global var because it has unique name. update_static_shape_var_node.append( gast.Global(names=[static_shape_var_name])) update_static_shape_var_node.append( - gast.Assign( - targets=[static_shape_var_node], - value=sub_node)) + gast.Assign(targets=[static_shape_var_node], + value=sub_node)) self.name_to_var_shape[ target_id] = static_shape_var_name return update_static_shape_var_node @@ -373,16 +374,15 @@ class TensorShapeTransformer(gast.NodeTransformer): static_shape_value_name).body[0].value update_static_shape_var_node = [ - gast.Assign( - targets=[static_shape_var_node], - value=static_shape_value_node) + gast.Assign(targets=[static_shape_var_node], + value=static_shape_value_node) ] self.name_to_var_shape[target_id] = static_shape_var_name elif self._is_var_shape(value_node): # eg: x.shape or x.shape[0] static_shape_var_name = unique_name.generate( STATIC_CONVERT_VAR_SHAPE_SUFFIX) - static_shape_var_node = gast.parse(static_shape_var_name).body[ - 0].value + static_shape_var_node = gast.parse( + static_shape_var_name).body[0].value static_shape_value_node = copy.deepcopy(value_node) # x.shape becomes convert_var_shape_simple(x) static_shape_value_node = ShapeAttributeTransformer().visit( @@ -392,8 +392,7 @@ class TensorShapeTransformer(gast.NodeTransformer): gast.Global(names=[static_shape_var_name]) ] update_static_shape_var_node.append( - gast.Assign( - targets=[static_shape_var_node], - value=static_shape_value_node)) + gast.Assign(targets=[static_shape_var_node], + value=static_shape_value_node)) self.name_to_var_shape[target_id] = static_shape_var_name return update_static_shape_var_node diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py index 91c2c5dc65a..4a477fb7d7c 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py @@ -92,14 +92,13 @@ def getfullargspec(target): return inspect.getfullargspec(target) else: argspec = inspect.getargspec(target) - return FullArgSpec( - args=argspec.args, - varargs=argspec.varargs, - varkw=argspec.keywords, - defaults=argspec.defaults, - kwonlyargs=[], - kwonlydefaults=None, - annotations={}) + return FullArgSpec(args=argspec.args, + varargs=argspec.varargs, + varkw=argspec.keywords, + defaults=argspec.defaults, + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) def parse_arg_and_kwargs(function): @@ -190,8 +189,8 @@ def is_api_in_module(node, module_prefix): from paddle.fluid.dygraph import to_variable from paddle import to_tensor - return eval("_is_api_in_module_helper({}, '{}')".format(func_str, - module_prefix)) + return eval("_is_api_in_module_helper({}, '{}')".format( + func_str, module_prefix)) except Exception: return False @@ -241,8 +240,9 @@ def is_control_flow_to_transform(node, """ assert isinstance(node, gast.AST), \ "The type of input node must be gast.AST, but received %s." % type(node) - visitor = IsControlFlowVisitor( - node, static_analysis_visitor, node_var_type_map=var_name_to_type) + visitor = IsControlFlowVisitor(node, + static_analysis_visitor, + node_var_type_map=var_name_to_type) need_to_transform = visitor.transform() return need_to_transform @@ -262,9 +262,9 @@ def to_static_api(dygraph_class): if dygraph_class in dygraph_class_to_static_api: return dygraph_class_to_static_api[dygraph_class] else: - raise NotImplementedError("Paddle dygraph API {} cannot be converted " - "to static graph at present.".format( - dygraph_class)) + raise NotImplementedError( + "Paddle dygraph API {} cannot be converted " + "to static graph at present.".format(dygraph_class)) def _add_keywords_to(node, dygraph_api_name): @@ -275,10 +275,8 @@ def _add_keywords_to(node, dygraph_api_name): ast_keyword.arg = "size" node.keywords.append( - gast.keyword( - arg="num_flatten_dims", - value=gast.Constant( - value=-1, kind=None))) + gast.keyword(arg="num_flatten_dims", + value=gast.Constant(value=-1, kind=None))) if dygraph_api_name == "BilinearTensorProduct": for ast_keyword in node.keywords: @@ -297,15 +295,15 @@ def to_static_ast(node, class_node): assert isinstance(class_node, gast.Call) static_api = to_static_api(class_node.func.attr) - node.func = gast.Attribute( - attr=static_api, - ctx=gast.Load(), - value=gast.Attribute( - attr='layers', - ctx=gast.Load(), - value=gast.Name( - ctx=gast.Load(), id='fluid', annotation=None, - type_comment=None))) + node.func = gast.Attribute(attr=static_api, + ctx=gast.Load(), + value=gast.Attribute(attr='layers', + ctx=gast.Load(), + value=gast.Name( + ctx=gast.Load(), + id='fluid', + annotation=None, + type_comment=None))) update_args_of_func(node, class_node, 'forward') @@ -330,8 +328,8 @@ def update_args_of_func(node, dygraph_node, method_name): import paddle.fluid as fluid if method_name == "__init__" or eval( "issubclass({}, fluid.dygraph.Layer)".format(class_src)): - full_args = eval("inspect.getargspec({}.{})".format(class_src, - method_name)) + full_args = eval("inspect.getargspec({}.{})".format( + class_src, method_name)) full_args_name = [ arg_name for arg_name in full_args[0] if arg_name != "self" ] @@ -394,11 +392,11 @@ def generate_name_node(name_ids, ctx=gast.Load(), gen_tuple_if_single=False): if isinstance(name_ids, six.string_types): name_ids = [name_ids] if not isinstance(name_ids, (list, tuple, set)): - raise TypeError('name_ids must be list or tuple or set, but received %s' - % type(type(name_ids))) + raise TypeError( + 'name_ids must be list or tuple or set, but received %s' % + type(type(name_ids))) gast_names = [ - gast.Name( - id=name_id, ctx=ctx, annotation=None, type_comment=None) + gast.Name(id=name_id, ctx=ctx, annotation=None, type_comment=None) for name_id in name_ids ] if len(gast_names) == 1 and not gen_tuple_if_single: @@ -419,13 +417,12 @@ def create_funcDef_node(nodes, name, input_args, return_name_ids): nodes.append(gast.Return(value=generate_name_node(return_name_ids))) else: nodes.append(gast.Return(value=None)) - func_def_node = gast.FunctionDef( - name=name, - args=input_args, - body=nodes, - decorator_list=[], - returns=None, - type_comment=None) + func_def_node = gast.FunctionDef(name=name, + args=input_args, + body=nodes, + decorator_list=[], + returns=None, + type_comment=None) return func_def_node @@ -447,6 +444,7 @@ def create_assign_node(name, node): class RenameTransformer(gast.NodeTransformer): + def __init__(self, node): assert isinstance( node, gast.AST), "RenameTransformer only accepts gast.AST as input" @@ -488,8 +486,10 @@ def ast_to_func(ast_root, dyfunc, delete_on_exit=True): source = ast_to_source_code(ast_root) source = _inject_import_statements() + source - f = tempfile.NamedTemporaryFile( - mode='w', suffix='.py', delete=False, encoding='utf-8') + f = tempfile.NamedTemporaryFile(mode='w', + suffix='.py', + delete=False, + encoding='utf-8') with f: module_name = os.path.basename(f.name[:-3]) f.write(source) @@ -546,8 +546,8 @@ def func_to_source_code(function, dedent=True): """ if not (inspect.isfunction(function) or inspect.ismethod(function)): raise TypeError( - "The type of 'function' should be a function or method, but received {}.". - format(type(function).__name__)) + "The type of 'function' should be a function or method, but received {}." + .format(type(function).__name__)) source_code_list, _ = inspect.getsourcelines(function) # Replace comments with blank lines so that error messages are not misplaced source_code_list = [ @@ -596,8 +596,9 @@ def compare_with_none(node): # node.comparators is a list. if isinstance(child, list): child = child[0] - if (isinstance(child, gast.Constant) and child.value is None) or ( - isinstance(child, gast.Name) and child.id == 'None'): + if (isinstance(child, gast.Constant) + and child.value is None) or (isinstance(child, gast.Name) + and child.id == 'None'): return True return False @@ -869,54 +870,46 @@ class ForLoopTuplePreTransformer(gast.NodeTransformer): tuple_iter_name = unique_name.generate( FOR_ITER_TUPLE_INDEX_PREFIX) tuple_var_name = unique_name.generate(FOR_ITER_TUPLE_PREFIX) - node.target = gast.Tuple( - elts=[ - gast.Name( - id=tuple_iter_name, - ctx=gast.Store(), - annotation=None, - type_comment=None), gast.Name( - id=tuple_var_name, - ctx=gast.Store(), - annotation=None, - type_comment=None) - ], - ctx=gast.Store()) + node.target = gast.Tuple(elts=[ + gast.Name(id=tuple_iter_name, + ctx=gast.Store(), + annotation=None, + type_comment=None), + gast.Name(id=tuple_var_name, + ctx=gast.Store(), + annotation=None, + type_comment=None) + ], + ctx=gast.Store()) node.body.insert( 0, - gast.Assign( - targets=[ - gast.Name( - id=out_tuple_name, - ctx=gast.Store(), - annotation=None, - type_comment=None) - ], - value=gast.Tuple( - elts=[ - gast.Name( - id=tuple_iter_name, - ctx=gast.Load(), - annotation=None, - type_comment=None), gast.Name( - id=tuple_var_name, - ctx=gast.Load(), - annotation=None, - type_comment=None) - ], - ctx=gast.Load()))) - elif isinstance(node.target, ( - gast.List, - gast.Tuple)) and len(node.target.elts) >= 2 and isinstance( + gast.Assign(targets=[ + gast.Name(id=out_tuple_name, + ctx=gast.Store(), + annotation=None, + type_comment=None) + ], + value=gast.Tuple(elts=[ + gast.Name(id=tuple_iter_name, + ctx=gast.Load(), + annotation=None, + type_comment=None), + gast.Name(id=tuple_var_name, + ctx=gast.Load(), + annotation=None, + type_comment=None) + ], + ctx=gast.Load()))) + elif isinstance(node.target, (gast.List, gast.Tuple)) and len( + node.target.elts) >= 2 and isinstance( node.target.elts[1], (gast.List, gast.Tuple)): # Inner tuple case inner_tuple_name = unique_name.generate(FOR_ITER_TUPLE_PREFIX) origin_inner_tuple_node = node.target.elts[1] - node.target.elts[1] = gast.Name( - id=inner_tuple_name, - ctx=gast.Store(), - annotation=None, - type_comment=None) + node.target.elts[1] = gast.Name(id=inner_tuple_name, + ctx=gast.Store(), + annotation=None, + type_comment=None) node.body[0:0] = self.tuple_to_stmts(origin_inner_tuple_node, inner_tuple_name) elif self.is_for_iter(node) and isinstance(node.target, @@ -924,11 +917,10 @@ class ForLoopTuplePreTransformer(gast.NodeTransformer): # Non-enumrate case: tuple_name = unique_name.generate(FOR_ITER_TUPLE_PREFIX) origin_tuple_node = node.target - node.target = gast.Name( - id=tuple_name, - ctx=gast.Store(), - annotation=None, - type_comment=None) + node.target = gast.Name(id=tuple_name, + ctx=gast.Store(), + annotation=None, + type_comment=None) node.body[0:0] = self.tuple_to_stmts(origin_tuple_node, tuple_name) return node @@ -1144,8 +1136,8 @@ class ForNodeVisitor(object): if self.args_length == 1: index_init_value_str = '0' else: - index_init_value_str = ast_to_source_code(self.iter_args[ - 0]).strip() + index_init_value_str = ast_to_source_code( + self.iter_args[0]).strip() index_init_var_name = self.iter_var_name else: @@ -1164,8 +1156,8 @@ class ForNodeVisitor(object): if isinstance(self.iter_node, gast.Call) and isinstance( self.iter_node.func, gast.Attribute) and self.iter_node.func.attr == 'numpy': - iter_var_name = ast_to_source_code(self.iter_node.func.value).strip( - ) + iter_var_name = ast_to_source_code( + self.iter_node.func.value).strip() else: iter_var_name = ast_to_source_code(self.iter_node).strip() @@ -1196,11 +1188,10 @@ class ForNodeVisitor(object): zip_to_list_node = gast.parse(zip_to_list_str).body[0] new_nodes.append(zip_to_list_node) - self.iter_node = gast.Name( - id=self.iter_zip_to_list_name, - ctx=gast.Load(), - annotation=None, - type_comment=None) + self.iter_node = gast.Name(id=self.iter_zip_to_list_name, + ctx=gast.Load(), + annotation=None, + type_comment=None) return new_nodes @@ -1220,18 +1211,17 @@ class ForNodeVisitor(object): compare_node = self.iter_args[ 0] if self.args_length == 1 else self.iter_args[1] else: - compare_node = gast.Name( - id=self.iter_var_len_name, - ctx=gast.Load(), - annotation=None, - type_comment=None) + compare_node = gast.Name(id=self.iter_var_len_name, + ctx=gast.Load(), + annotation=None, + type_comment=None) return compare_node def _build_step_node(self): if self.is_for_range_iter(): step_node = self.iter_args[ - 2] if self.args_length == 3 else gast.Constant( - value=1, kind=None) + 2] if self.args_length == 3 else gast.Constant(value=1, + kind=None) else: step_node = gast.Constant(value=1, kind=None) return step_node @@ -1248,40 +1238,37 @@ class ForNodeVisitor(object): # range(max, min, -2) # -> # i > min - return gast.Compare( - left=gast.Name( - id=self.iter_var_name - if self.is_for_range_iter() else self.iter_idx_name, - ctx=gast.Load(), - annotation=None, - type_comment=None), - ops=[gast.Gt()], - comparators=[compare_node]) + return gast.Compare(left=gast.Name( + id=self.iter_var_name + if self.is_for_range_iter() else self.iter_idx_name, + ctx=gast.Load(), + annotation=None, + type_comment=None), + ops=[gast.Gt()], + comparators=[compare_node]) else: # eg: # range(min, max, 2) # -> # i < max - return gast.Compare( - left=gast.Name( - id=self.iter_var_name - if self.is_for_range_iter() else self.iter_idx_name, - ctx=gast.Load(), - annotation=None, - type_comment=None), - ops=[gast.Lt()], - comparators=[compare_node]) - - def _build_index_increase_node(self, step_node): - return gast.AugAssign( - target=gast.Name( + return gast.Compare(left=gast.Name( id=self.iter_var_name if self.is_for_range_iter() else self.iter_idx_name, - ctx=gast.Store(), + ctx=gast.Load(), annotation=None, type_comment=None), - op=gast.Add(), - value=step_node) + ops=[gast.Lt()], + comparators=[compare_node]) + + def _build_index_increase_node(self, step_node): + return gast.AugAssign(target=gast.Name( + id=self.iter_var_name + if self.is_for_range_iter() else self.iter_idx_name, + ctx=gast.Store(), + annotation=None, + type_comment=None), + op=gast.Add(), + value=step_node) def _build_assign_var_slice_node(self): var_slice_str = "{}[{}]".format( @@ -1293,15 +1280,12 @@ class ForNodeVisitor(object): return target_node, assign_node def _build_enum_increase_node(self): - return gast.AugAssign( - target=gast.Name( - id=self.enum_idx_name, - ctx=gast.Store(), - annotation=None, - type_comment=None), - op=gast.Add(), - value=gast.Constant( - value=1, kind=None)) + return gast.AugAssign(target=gast.Name(id=self.enum_idx_name, + ctx=gast.Store(), + annotation=None, + type_comment=None), + op=gast.Add(), + value=gast.Constant(value=1, kind=None)) def _get_iter_var_name(self): if self.is_for_range_iter(): diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py b/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py index 7ce5aede499..66885536ae4 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py @@ -65,15 +65,14 @@ def data_layer_not_check(name, shape, dtype='float32', lod_level=0): if shape[i] is None: shape[i] = -1 - return helper.create_global_variable( - name=name, - shape=shape, - dtype=dtype, - type=core.VarDesc.VarType.LOD_TENSOR, - stop_gradient=True, - lod_level=lod_level, - is_data=True, - need_check_feed=False) + return helper.create_global_variable(name=name, + shape=shape, + dtype=dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + stop_gradient=True, + lod_level=lod_level, + is_data=True, + need_check_feed=False) def to_static_variable_gast_node(name): diff --git a/python/paddle/fluid/dygraph/inplace_utils.py b/python/paddle/fluid/dygraph/inplace_utils.py index 5fa38c9d5f0..14e875b8b06 100644 --- a/python/paddle/fluid/dygraph/inplace_utils.py +++ b/python/paddle/fluid/dygraph/inplace_utils.py @@ -23,12 +23,13 @@ from paddle import _C_ops # in dygraph mode. If static mode is used, the inplace mechanism will not be used, and the static method # of the original API will be called. def _inplace_apis_in_dygraph_only_(func): + def __impl__(*args, **kwargs): if not _non_static_mode(): origin_api_name = func.__name__[:-1] warnings.warn( - "In static mode, {}() is the same as {}() and does not perform inplace operation.". - format(func.__name__, origin_api_name)) + "In static mode, {}() is the same as {}() and does not perform inplace operation." + .format(func.__name__, origin_api_name)) origin_func = "{}.{}".format(func.__module__, origin_api_name) return eval(origin_func)(*args, **kwargs) return func(*args, **kwargs) diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 249c7b6a064..a778cc3a1c6 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -206,8 +206,8 @@ def _rename_var_program_desc(program_desc, include=None, exclude=None): is_double_grad_var = "@GRAD" in name_old has_double_grad = has_double_grad or is_double_grad_var should_rename = (include is None or name_old in include) and ( - exclude is None or - name_old not in exclude) and not is_double_grad_var + exclude is None + or name_old not in exclude) and not is_double_grad_var if should_rename: temp_name = name_old.split('_') if len(temp_name) > 1 and temp_name[-1].isnumeric(): @@ -223,8 +223,8 @@ def _rename_var_program_desc(program_desc, include=None, exclude=None): else: name_new = name_old if name_old != name_new: - cur_block._rename_var( - cpt.to_bytes(name_old), cpt.to_bytes(name_new)) + cur_block._rename_var(cpt.to_bytes(name_old), + cpt.to_bytes(name_new)) if not is_double_grad_var: dict_rename_var_old_new[name_old] = name_new dict_rename_var_new_old[name_new] = name_old @@ -244,8 +244,8 @@ def _rename_var_program_desc(program_desc, include=None, exclude=None): for var_name in double_grad_rename_dict: dict_rename_var_old_new[var_name] = double_grad_rename_dict[ var_name] - dict_rename_var_new_old[double_grad_rename_dict[ - var_name]] = var_name + dict_rename_var_new_old[ + double_grad_rename_dict[var_name]] = var_name # Rename on program desc for b_idx in six.moves.range(program_desc.num_blocks()): @@ -254,16 +254,15 @@ def _rename_var_program_desc(program_desc, include=None, exclude=None): op = cur_block.op(op_idx) for input_arg_name in op.input_arg_names(): if input_arg_name in dict_rename_var_old_new: - if input_arg_name != dict_rename_var_old_new[ - input_arg_name]: + if input_arg_name != dict_rename_var_old_new[input_arg_name]: op._rename_input( input_arg_name, dict_rename_var_old_new[input_arg_name]) if cur_block.has_var(cpt.to_bytes(input_arg_name)): cur_block._rename_var( cpt.to_bytes(input_arg_name), - cpt.to_bytes(dict_rename_var_old_new[ - input_arg_name])) + cpt.to_bytes( + dict_rename_var_old_new[input_arg_name])) for output_arg_name in op.output_arg_names(): if output_arg_name in dict_rename_var_old_new: if output_arg_name != dict_rename_var_old_new[ @@ -274,8 +273,8 @@ def _rename_var_program_desc(program_desc, include=None, exclude=None): if cur_block.has_var(cpt.to_bytes(output_arg_name)): cur_block._rename_var( cpt.to_bytes(output_arg_name), - cpt.to_bytes(dict_rename_var_old_new[ - output_arg_name])) + cpt.to_bytes( + dict_rename_var_old_new[output_arg_name])) program_desc.flush() return dict_rename_var_new_old, dict_rename_var_old_new @@ -364,8 +363,8 @@ class _ProgramHolder(object): def _preprocess(self, program_desc): # rename persistable variables of 'program_desc' list_persistable_var = _get_persistable_var_names(program_desc) - rename_new_old_dict, _ = _rename_var_program_desc(program_desc, - list_persistable_var) + rename_new_old_dict, _ = _rename_var_program_desc( + program_desc, list_persistable_var) # 1. Prune original program # remove feed, fetch and scale-1 op, remove op_callstack attr ops_to_remove = [] @@ -412,16 +411,16 @@ class _ProgramHolder(object): # 3. Output processing, add scale for outputs tmp_program = _build_program_by_desc(program_desc) # NOTE: [why need append scale for outputs] - # When dealing with some more complex pre-training models, there - # will be situations where the pre-training model has multiple - # fetch outputs. In the scenario of multiple fetch outputs, - # there is a special case where multiple outputs of the model - # may be on the same branch. According to the user's subsequent + # When dealing with some more complex pre-training models, there + # will be situations where the pre-training model has multiple + # fetch outputs. In the scenario of multiple fetch outputs, + # there is a special case where multiple outputs of the model + # may be on the same branch. According to the user's subsequent # use, multiple outputs may be associated with multiple branches. - # These subsequent operations are added in TranslatedLayer is - # agnostic during initialization, which results in subsequent - # gradient accumulation operations that are required on the - # output node in the middle of the branch will not be performed, + # These subsequent operations are added in TranslatedLayer is + # agnostic during initialization, which results in subsequent + # gradient accumulation operations that are required on the + # output node in the middle of the branch will not be performed, # resulting in error, details see pull request: # [https://github.com/PaddlePaddle/Paddle/pull/24627] self._append_scale_to_output(tmp_program) @@ -429,15 +428,15 @@ class _ProgramHolder(object): # 4. Persistable vars processing # - append loaded suffix to persistable vars # NOTE: [why need to append suffix to persistable vars] - # Dygraph and static graph mode use the same naming mechanism. - # If users want to load the model fine-tune, it is possible - # to add the existing Layer in the loaded model to enhance - # the network. For example, the original saved model has linear, - # and later after loading, a new linear is added. At this time, - # there will be a problem of duplicate names, so here is unified + # Dygraph and static graph mode use the same naming mechanism. + # If users want to load the model fine-tune, it is possible + # to add the existing Layer in the loaded model to enhance + # the network. For example, the original saved model has linear, + # and later after loading, a new linear is added. At this time, + # there will be a problem of duplicate names, so here is unified # to add the LOADED suffix to the parameters of the model loaded - self._suffix_varname_dict = _get_loaded_var_new_old(program_desc, - rename_new_old_dict) + self._suffix_varname_dict = _get_loaded_var_new_old( + program_desc, rename_new_old_dict) # - get persistable var self._persistable_names = _get_persistable_var_names(program_desc) @@ -451,8 +450,9 @@ class _ProgramHolder(object): with framework.program_guard(program): for i, out in enumerate(self._output_descs): var = program.global_block().var(out.name()) - var = nn.scale( - var, 1., name="translated_layer/scale_{}".format(i)) + var = nn.scale(var, + 1., + name="translated_layer/scale_{}".format(i)) scale_output_vars.append(var) # 2. update output names & descs for i, var in enumerate(scale_output_vars): @@ -468,7 +468,7 @@ class _ProgramHolder(object): # 2. prepare program and related var # NOTE: To reuse backward interfaces, build Program firstly. # Originally, there is no need to build a program, but need to almost - # rewrite a series of methods for append_backward for program_desc. + # rewrite a series of methods for append_backward for program_desc. # Therefore, in order to reuse the method of backward.py, build the program here. program = _build_program_by_desc(program_desc_copy) # 3. Add the outputs which is only used for training and not saved in @@ -498,7 +498,7 @@ class _ProgramHolder(object): # [ TranslatedLayer : Run program in imperative mode ] -# +# # DESIGN IDEA: using an special operator `RunProgram`, execute program inside operator. # # Op's Inputs: @@ -506,21 +506,21 @@ class _ProgramHolder(object): # - the necessary parameters of the network # Op's Outputs: # - the output variable of fetch -# +# # This op receives a complete program desc, internally creates scope # and executor, executes this program. Key points: # -# 1. Data Sharing: +# 1. Data Sharing: # The varBase of the dynamic graph is not in the scope, so before the op # executes the program internally, create persistent variables with the # same name as feed, parameters, and fetch in the scope, and share the # LoDTensor of the op input. -# +# # 2. Forward and Backward Separation: # Because the dynamic graph op performs the forward and backward separately, # in the forward op RunProgram, we only execute the forward part of whole program, # and in the backward op RunProgramGrad, we execute the backward part of program. -# We can not separate the program into forward and backward part, which will +# We can not separate the program into forward and backward part, which will # make some control flow execution logic wrong. @@ -537,26 +537,23 @@ def _load_persistable_vars_by_program(model_path, if _is_parameter(each_var, program_holder.infer_program): # create output varbase if framework._in_eager_without_dygraph_check(): - new_var = framework.EagerParamBase( - shape=each_var.shape(), - dtype=each_var.dtype(), - name=each_var.name(), - type=each_var.type(), - persistable=True) + new_var = framework.EagerParamBase(shape=each_var.shape(), + dtype=each_var.dtype(), + name=each_var.name(), + type=each_var.type(), + persistable=True) else: - new_var = framework.ParamBase( - shape=each_var.shape(), - dtype=each_var.dtype(), - name=each_var.name(), - type=each_var.type(), - persistable=True) + new_var = framework.ParamBase(shape=each_var.shape(), + dtype=each_var.dtype(), + name=each_var.name(), + type=each_var.type(), + persistable=True) else: - new_var = framework._varbase_creator( - type=each_var.type(), - name=each_var.name(), - shape=each_var.shape(), - dtype=each_var.dtype(), - persistable=True) + new_var = framework._varbase_creator(type=each_var.type(), + name=each_var.name(), + shape=each_var.shape(), + dtype=each_var.dtype(), + persistable=True) if params_filename is None: framework._dygraph_tracer().trace_op( type='load', @@ -588,7 +585,7 @@ def _load_persistable_vars_by_program(model_path, param.stop_gradient = False # NOTE: [Recovery stop gradient information based on the program] - # After loading the model, the stop_gradient information + # After loading the model, the stop_gradient information # of the original variable is lost, but if a parameter does not # have a corresponding @GRAD variable in the backward program, # it can be said that it is also stop_gradient @@ -617,7 +614,7 @@ def _load_persistable_vars(model_path, var_info_path, program_holder, # NOTE(chenweihang): we need load persistable vars based the program, # because the program may be pruned when `save_inference_model`, some - # var in `extra_var_info` may have been pruned + # var in `extra_var_info` may have been pruned for name in sorted(inv_suffix_varname_dict): if name not in extra_var_info: raise RuntimeError( @@ -646,8 +643,8 @@ def _load_persistable_vars(model_path, var_info_path, program_holder, name=new_name, persistable=True) else: - new_var = framework._varbase_creator( - name=new_name, persistable=True) + new_var = framework._varbase_creator(name=new_name, + persistable=True) new_var.stop_gradient = extra_var_info[name]['stop_gradient'] load_var_dict[new_name] = new_var @@ -660,11 +657,10 @@ def _load_persistable_vars(model_path, var_info_path, program_holder, if len(extra_var_info) != 0: raise ValueError("The model to be loaded is incomplete.") else: - framework._dygraph_tracer().trace_op( - type='load_combine', - inputs={}, - outputs={'Out': load_var_list}, - attrs={'file_path': var_file_path}) + framework._dygraph_tracer().trace_op(type='load_combine', + inputs={}, + outputs={'Out': load_var_list}, + attrs={'file_path': var_file_path}) return load_var_dict @@ -694,8 +690,9 @@ def _construct_program_holders(model_path, model_filename=None): model_file_path = os.path.join(model_path, model_filename) elif filename.endswith(INFER_MODEL_SUFFIX) and filename.startswith( model_name): - parsing_names = filename[len(model_name):-len( - INFER_MODEL_SUFFIX) + 1].split('.') + parsing_names = filename[len(model_name + ):-len(INFER_MODEL_SUFFIX) + + 1].split('.') if len(parsing_names) == 3 and len(parsing_names[1]) > 0: func_name = parsing_names[1] model_file_path = os.path.join(model_path, filename) @@ -737,8 +734,9 @@ def _construct_params_and_buffers(model_path, for file_name in os.listdir(model_path): if file_name.startswith(model_name) and file_name.endswith( INFER_PARAMS_SUFFIX): - parsing_names = file_name[len(model_name):-len( - INFER_PARAMS_SUFFIX) + 1].split('.') + parsing_names = file_name[len(model_name + ):-len(INFER_PARAMS_SUFFIX) + + 1].split('.') if len(parsing_names) == 3 and len(parsing_names[1]) > 0: func_name = parsing_names[1] else: @@ -747,14 +745,15 @@ def _construct_params_and_buffers(model_path, continue var_info_path = os.path.join(model_path, var_info_filename) var_dict.update( - _load_persistable_vars(model_path, var_info_path, programs[ - func_name], file_name)) + _load_persistable_vars(model_path, var_info_path, + programs[func_name], file_name)) elif params_filename is not None and not os.path.exists(params_path): # When saving XX, there is only '*.pdmodel' return dict() else: - var_dict = _load_persistable_vars_by_program( - model_path, programs['forward'], params_filename) + var_dict = _load_persistable_vars_by_program(model_path, + programs['forward'], + params_filename) if not append_suffix: var_dict = _remove_varname_suffix(var_dict, programs['forward']) @@ -796,15 +795,14 @@ def _run_dygraph(instance, input, program_holder): place=framework._current_expected_place(), zero_copy=True) else: - var = core.VarBase( - value=value, - name=program_holder.input_descs[i].name(), - persistable=False, - place=framework._current_expected_place(), - zero_copy=True) + var = core.VarBase(value=value, + name=program_holder.input_descs[i].name(), + persistable=False, + place=framework._current_expected_place(), + zero_copy=True) else: var = value - # NOTE: we changed var name here, + # NOTE: we changed var name here, # but it may be an important name set by user var.name = program_holder.input_descs[i].name() input_vars.append(var) @@ -828,15 +826,13 @@ def _run_dygraph(instance, input, program_holder): output_vars = [] for var_desc in program_holder.output_descs: if framework._in_eager_without_dygraph_check(): - var = core.eager.Tensor( - dtype=var_desc.dtype(), - dims=var_desc.shape(), - name=var_desc.name(), - type=var_desc.type(), - persistable=False) + var = core.eager.Tensor(dtype=var_desc.dtype(), + dims=var_desc.shape(), + name=var_desc.name(), + type=var_desc.type(), + persistable=False) else: - var = core.VarBase(var_desc.dtype(), - var_desc.shape(), + var = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) output_vars.append(var) @@ -852,15 +848,13 @@ def _run_dygraph(instance, input, program_holder): double_grad_vars = [] for var_desc in program_holder.double_grad_descs: if framework._in_eager_without_dygraph_check(): - var = core.eager.Tensor( - dtype=var_desc.dtype(), - dims=var_desc.shape(), - name=var_desc.name(), - type=var_desc.type(), - persistable=False) + var = core.eager.Tensor(dtype=var_desc.dtype(), + dims=var_desc.shape(), + name=var_desc.name(), + type=var_desc.type(), + persistable=False) else: - var = core.VarBase(var_desc.dtype(), - var_desc.shape(), + var = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) double_grad_vars.append(var) @@ -870,11 +864,9 @@ def _run_dygraph(instance, input, program_holder): attrs = ('global_block', trace_program.block(0), 'start_op_index', 0, 'end_op_index', end_op_index, 'is_test', instance._is_test, 'program_id', _hash_with_id(trace_program, instance)) - _C_ops.run_program( - _valid_vars(input_vars), - _valid_vars(persistable_vars), - _valid_vars(output_vars), tmp_scope_vec, - _valid_vars(double_grad_vars), None, *attrs) + _C_ops.run_program(_valid_vars(input_vars), _valid_vars(persistable_vars), + _valid_vars(output_vars), tmp_scope_vec, + _valid_vars(double_grad_vars), None, *attrs) # NOTE: [ why need set param's gradient type here ] # if user set sparse gradient mode, the param's gradient # will be SelectedRows, not LoDTensor. But tracer will just @@ -885,7 +877,7 @@ def _run_dygraph(instance, input, program_holder): for persistable_var in persistable_vars: grad_var_name = persistable_var.name + core.grad_var_suffix() grad_var = trace_program.block(0).find_var(cpt.to_bytes(grad_var_name)) - # NOTE: cannot find var desc maybe not problem, + # NOTE: cannot find var desc maybe not problem, # such as in batch_norm if grad_var is None: continue @@ -902,8 +894,8 @@ def _run_dygraph(instance, input, program_holder): def drop_scope_if_no_grad(instance, scope_vec): tracer = framework._dygraph_tracer() - scope = scope_vec.value().get_scope() if isinstance(scope_vec, ( - core.VarBase)) else scope_vec[0] + scope = scope_vec.value().get_scope() if isinstance( + scope_vec, (core.VarBase)) else scope_vec[0] if (not instance._is_test) and (not tracer._has_grad): scope.drop_kids() @@ -968,10 +960,9 @@ def _append_block(dest_program, origin_block_idx = dest_program.current_block_idx param_var_names = _collect_current_and_parent_var(dest_program, origin_block_idx) - append_var_from_block_desc_static( - dest_program.block(origin_block_idx), - src_program_desc.block(0), - exclude=param_var_names) + append_var_from_block_desc_static(dest_program.block(origin_block_idx), + src_program_desc.block(0), + exclude=param_var_names) name_inp_desc = [inp.name() for inp in program_holder.input_descs] input_names = [inp.name for inp in input_variables] @@ -1002,10 +993,11 @@ def _append_block(dest_program, else: parent_idx = origin_block_idx dest_block = dest_program._create_block(parent_idx=parent_idx) - append_var_from_block_desc_static( - dest_block, src_block, exclude=param_var_names) - append_ops += append_op_from_block_desc_static(dest_block, - src_block) + append_var_from_block_desc_static(dest_block, + src_block, + exclude=param_var_names) + append_ops += append_op_from_block_desc_static( + dest_block, src_block) dest_program._sync_with_cpp() for op in append_ops: @@ -1072,13 +1064,12 @@ def append_op_from_desc_static(block, op_desc): op_type = op_desc.type() op_append = block.desc.append_op() op_append.copy_from(op_desc) - op = framework.Operator( - block=block, - desc=op_append, - type=op_type, - inputs=None, - outputs=None, - attrs=None) + op = framework.Operator(block=block, + desc=op_append, + type=op_type, + inputs=None, + outputs=None, + attrs=None) block.ops.append(op) return op @@ -1298,8 +1289,8 @@ class TranslatedLayer(layers.Layer): programs = _construct_program_holders(model_path, model_filename) # 2. load layer parameters & buffers - persistable_vars = _construct_params_and_buffers(model_path, programs, - params_filename) + persistable_vars = _construct_params_and_buffers( + model_path, programs, params_filename) # 3. construct TranslatedLayer object translated_layer = TranslatedLayer(programs, persistable_vars) @@ -1310,9 +1301,10 @@ class TranslatedLayer(layers.Layer): translated_layer._input_args_names = [ ins.name() for ins in program_holder.input_descs ] - setattr(TranslatedLayer, method_name, - TranslatedLayer._execution_method_creator(method_name, - program_holder)) + setattr( + TranslatedLayer, method_name, + TranslatedLayer._execution_method_creator( + method_name, program_holder)) # 5. set TranslatedLayer's default mode to eval translated_layer.eval() @@ -1321,6 +1313,7 @@ class TranslatedLayer(layers.Layer): @staticmethod def _execution_method_creator(method_name, program_holder): + def __i_m_p_l__(self, *input): program_holder = self._program_holder_dict[__i_m_p_l__.__name__] # When using jit.save, it runs in static graph mode. @@ -1457,10 +1450,9 @@ class TranslatedLayer(layers.Layer): # 2. build input spec by input desc input_spec = [] for var_desc in program_holder.input_descs: - spec = paddle.static.InputSpec( - shape=var_desc.shape(), - dtype=var_desc.dtype(), - name=var_desc.name()) + spec = paddle.static.InputSpec(shape=var_desc.shape(), + dtype=var_desc.dtype(), + name=var_desc.name()) input_spec.append(spec) return input_spec @@ -1472,13 +1464,12 @@ class TranslatedLayer(layers.Layer): # 2. build output spec by output desc output_spec = [] for var_desc in program_holder.output_descs: - # NOTE(chenweihang): InputSpec describes a tensor, not just input. - # Maybe the name is not good enough. Here we use InputSpec to + # NOTE(chenweihang): InputSpec describes a tensor, not just input. + # Maybe the name is not good enough. Here we use InputSpec to # construct the description of Output tensor - spec = paddle.static.InputSpec( - shape=var_desc.shape(), - dtype=var_desc.dtype(), - name=var_desc.name()) + spec = paddle.static.InputSpec(shape=var_desc.shape(), + dtype=var_desc.dtype(), + name=var_desc.name()) output_spec.append(spec) return output_spec diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index e0e259215c5..b6847efab1d 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -64,8 +64,8 @@ def _extract_vars(inputs, result_list, err_tag='inputs'): _extract_vars(var, result_list, err_tag) else: raise TypeError( - "The type of 'each element of {}' in fluid.dygraph.jit.TracedLayer.trace must be fluid.Variable, but received {}.". - format(err_tag, type(inputs))) + "The type of 'each element of {}' in fluid.dygraph.jit.TracedLayer.trace must be fluid.Variable, but received {}." + .format(err_tag, type(inputs))) def extract_vars(inputs, err_tag='inputs'): @@ -211,20 +211,19 @@ def declarative(function=None, input_spec=None, build_strategy=None): _, python_func = unwrap_decorators(python_func) # Step 2. copy some attributes from original python function. - static_layer = copy_decorator_attrs( - original_func=python_func, - decorated_obj=StaticFunction( - function=python_func, - input_spec=input_spec, - build_strategy=build_strategy)) + static_layer = copy_decorator_attrs(original_func=python_func, + decorated_obj=StaticFunction( + function=python_func, + input_spec=input_spec, + build_strategy=build_strategy)) return static_layer build_strategy = build_strategy or BuildStrategy() if not isinstance(build_strategy, BuildStrategy): raise TypeError( - "Required type(build_strategy) shall be `paddle.static.BuildStrategy`, but received {}". - format(type(build_strategy).__name__)) + "Required type(build_strategy) shall be `paddle.static.BuildStrategy`, but received {}" + .format(type(build_strategy).__name__)) # for usage: `declarative(foo, ...)` if function is not None: @@ -232,8 +231,8 @@ def declarative(function=None, input_spec=None, build_strategy=None): if isinstance(function.forward, StaticFunction): class_name = function.__class__.__name__ logging_utils.warn( - "`{}.forward` has already been decorated somewhere. It will be redecorated to replace previous one.". - format(class_name)) + "`{}.forward` has already been decorated somewhere. It will be redecorated to replace previous one." + .format(class_name)) function.forward = decorated(function.forward) return function else: @@ -284,6 +283,7 @@ def not_to_static(func=None): class _SaveLoadConfig(object): + def __init__(self): self._output_spec = None self._model_filename = None @@ -622,6 +622,7 @@ def _remove_save_pre_hook(hook): def _run_save_pre_hooks(func): + def wrapper(layer, path, input_spec=None, **configs): global _save_pre_hooks for hook in _save_pre_hooks: @@ -775,8 +776,8 @@ def save(layer, path, input_spec=None, **configs): "The paddle.jit.save doesn't work when setting ProgramTranslator.enable to False." ) - if not (isinstance(layer, Layer) or inspect.isfunction(layer) or isinstance( - layer, StaticFunction)): + if not (isinstance(layer, Layer) or inspect.isfunction(layer) + or isinstance(layer, StaticFunction)): raise TypeError( "The input of paddle.jit.save should be 'Layer' or 'Function', but received input type is %s." % type(layer)) @@ -837,7 +838,7 @@ def save(layer, path, input_spec=None, **configs): # parse configs configs = _parse_save_configs(configs) # whether outermost layer has pre/post hook, if does, we need also save - # these operators in program. + # these operators in program. with_hook = configs.with_hook scope = core.Scope() @@ -848,7 +849,9 @@ def save(layer, path, input_spec=None, **configs): with_hook = True else: # layer is function - functions = [layer, ] + functions = [ + layer, + ] for attr_func in functions: if isinstance(layer, Layer): static_func = getattr(inner_layer, attr_func, None) @@ -862,8 +865,8 @@ def save(layer, path, input_spec=None, **configs): if inner_input_spec: inner_input_spec = pack_sequence_as(input_spec, inner_input_spec) - static_forward = declarative( - inner_layer.forward, input_spec=inner_input_spec) + static_forward = declarative(inner_layer.forward, + input_spec=inner_input_spec) concrete_program = static_forward.concrete_program_specify_input_spec( with_hook=with_hook) # the input_spec has been used in declarative, which is equal to @@ -882,14 +885,14 @@ def save(layer, path, input_spec=None, **configs): if inner_input_spec: inner_input_spec = pack_sequence_as(input_spec, inner_input_spec) - static_function = declarative( - attr_func, input_spec=inner_input_spec) + static_function = declarative(attr_func, + input_spec=inner_input_spec) concrete_program = static_function.concrete_program if static_function._class_instance is None: warnings.warn( - '`jit.save` will only save the `Program`, not the parameters. If you have to save the parameters, please make sure that {} is a member function of `paddle.nn.Layer` and the saved parameters are in `state_dict`'. - format(layer)) + '`jit.save` will only save the `Program`, not the parameters. If you have to save the parameters, please make sure that {} is a member function of `paddle.nn.Layer` and the saved parameters are in `state_dict`' + .format(layer)) dygraph_state_dict = None if isinstance(inner_layer, Layer): @@ -922,8 +925,8 @@ def save(layer, path, input_spec=None, **configs): param_or_buffer_tensor = scope.var( param_or_buffer.name).get_tensor() #src_tensor = param_or_buffer.value().get_tensor() - src_tensor = state_var_dict[param_or_buffer.name].value( - ).get_tensor() + src_tensor = state_var_dict[ + param_or_buffer.name].value().get_tensor() param_or_buffer_tensor._share_data_with(src_tensor) # record var info if param_or_buffer.name not in extra_var_info: @@ -1534,14 +1537,16 @@ class TracedLayer(object): "fluid.dygraph.jit.TracedLayer.save_inference_model") if isinstance(feed, list): for f in feed: - check_type(f, "each element of feed", int, - "fluid.dygraph.jit.TracedLayer.save_inference_model") + check_type( + f, "each element of feed", int, + "fluid.dygraph.jit.TracedLayer.save_inference_model") check_type(fetch, "fetch", (type(None), list), "fluid.dygraph.jit.TracedLayer.save_inference_model") if isinstance(fetch, list): for f in fetch: - check_type(f, "each element of fetch", int, - "fluid.dygraph.jit.TracedLayer.save_inference_model") + check_type( + f, "each element of fetch", int, + "fluid.dygraph.jit.TracedLayer.save_inference_model") clip_extra = kwargs.get('clip_extra', False) # path check file_prefix = os.path.basename(path) @@ -1575,12 +1580,11 @@ class TracedLayer(object): model_filename = file_prefix + INFER_MODEL_SUFFIX params_filename = file_prefix + INFER_PARAMS_SUFFIX - save_inference_model( - dirname=dirname, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - executor=self._exe, - main_program=self._program.clone(), - model_filename=model_filename, - params_filename=params_filename, - clip_extra=clip_extra) + save_inference_model(dirname=dirname, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + executor=self._exe, + main_program=self._program.clone(), + model_filename=model_filename, + params_filename=params_filename, + clip_extra=clip_extra) diff --git a/python/paddle/fluid/dygraph/layer_hooks.py b/python/paddle/fluid/dygraph/layer_hooks.py index f93ba569807..68c3d463e5d 100644 --- a/python/paddle/fluid/dygraph/layer_hooks.py +++ b/python/paddle/fluid/dygraph/layer_hooks.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -37,14 +37,14 @@ def record_program_ops_pre_hook(layer, inputs): """ if not _non_static_mode(): if layer._op_recorder.start < 0: - layer._op_recorder.start = len(default_main_program().current_block( - ).ops) + layer._op_recorder.start = len( + default_main_program().current_block().ops) layer._op_recorder.is_valid = True else: layer._op_recorder.is_valid = False warnings.warn( - "{} has recorded the op information before. Please check whether you call this layer twice.". - format(layer._full_name)) + "{} has recorded the op information before. Please check whether you call this layer twice." + .format(layer._full_name)) return None diff --git a/python/paddle/fluid/dygraph/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py index 5da9013fb7e..394df321811 100644 --- a/python/paddle/fluid/dygraph/layer_object_helper.py +++ b/python/paddle/fluid/dygraph/layer_object_helper.py @@ -25,6 +25,7 @@ from ..dygraph_utils import _append_activation_in_dygraph class LayerObjectHelper(LayerHelperBase): + def __init__(self, name): super(LayerObjectHelper, self).__init__(name, layer_type=name) @@ -169,11 +170,10 @@ class LayerObjectHelper(LayerHelperBase): return res else: tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) - self.append_op( - type=act_type, - inputs={"X": [input_var]}, - outputs={"Out": [tmp]}, - attrs=act) + self.append_op(type=act_type, + inputs={"X": [input_var]}, + outputs={"Out": [tmp]}, + attrs=act) return tmp def is_instance(self, param, cls): diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 088fed03c35..b67f7d0a91f 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -423,10 +423,9 @@ class Layer(object): return self._helper.create_parameter(temp_attr, shape, dtype, is_bias, default_initializer) - @deprecated( - since="2.0.0", - update_to="paddle.nn.Layer.create_tensor", - reason="New api in create_tensor, easier to use.") + @deprecated(since="2.0.0", + update_to="paddle.nn.Layer.create_tensor", + reason="New api in create_tensor, easier to use.") def create_variable(self, name=None, persistable=None, dtype=None): """ @@ -541,8 +540,7 @@ class Layer(object): """ ret = [ - param - for _, param in self.named_parameters( + param for _, param in self.named_parameters( include_sublayers=include_sublayers) ] return ret @@ -658,8 +656,8 @@ class Layer(object): """ params_set = set() named_sublayers = self.named_sublayers( - prefix=prefix, - include_self=True) if include_sublayers else zip([prefix], [self]) + prefix=prefix, include_self=True) if include_sublayers else zip( + [prefix], [self]) for layer_prefix, sublayer in named_sublayers: params = sublayer._parameters.items() for key, param in params: @@ -703,9 +701,9 @@ class Layer(object): if layer is None: continue layer_prefix = prefix + ('.' if prefix else '') + key - for p, l in layer.named_sublayers( - prefix=layer_prefix, include_self=True, - layers_set=layers_set): + for p, l in layer.named_sublayers(prefix=layer_prefix, + include_self=True, + layers_set=layers_set): yield p, l def register_buffer(self, name, tensor, persistable=True): @@ -762,11 +760,11 @@ class Layer(object): raise KeyError("The name of buffer can not be empty.") elif hasattr(self, name) and name not in self._buffers: raise KeyError("attribute '{}' already exists.".format(name)) - elif tensor is not None and not (type(tensor) == core.VarBase or - type(tensor) == core.eager.Tensor): + elif tensor is not None and not (type(tensor) == core.VarBase + or type(tensor) == core.eager.Tensor): raise TypeError( - "The registered buffer should be a Paddle.Tensor, but received {}.". - format(type(tensor).__name__)) + "The registered buffer should be a Paddle.Tensor, but received {}." + .format(type(tensor).__name__)) else: self._buffers[name] = tensor if persistable: @@ -799,8 +797,7 @@ class Layer(object): """ ret = [ - buffer - for _, buffer in self.named_buffers( + buffer for _, buffer in self.named_buffers( include_sublayers=include_sublayers) ] return ret @@ -843,8 +840,8 @@ class Layer(object): """ buffers_set = set() named_sublayers = self.named_sublayers( - prefix=prefix, - include_self=True) if include_sublayers else zip([prefix], [self]) + prefix=prefix, include_self=True) if include_sublayers else zip( + [prefix], [self]) for layer_prefix, sublayer in named_sublayers: buffers = sublayer._buffers.items() for key, buffer in buffers: @@ -1034,8 +1031,8 @@ class Layer(object): elif parameter is not None and not isinstance(parameter, framework.Parameter): raise TypeError( - "The parameter to be added should be a Parameter, but received {}.". - format(type(parameter).__name__)) + "The parameter to be added should be a Parameter, but received {}." + .format(type(parameter).__name__)) else: if parameter is None: self._parameters[name] = None @@ -1072,8 +1069,9 @@ class Layer(object): return already_registed if not isinstance(attrs, dict): - raise TypeError("attrs should be type(dict), but received {}". - format(type(attrs).__name__)) + raise TypeError( + "attrs should be type(dict), but received {}".format( + type(attrs).__name__)) # NOTE: Overwrite behavior for same key. self._customized_attrs.update(attrs) @@ -1089,8 +1087,8 @@ class Layer(object): post_hook_helper = self.register_forward_post_hook( set_op_customized_attrs_post_hook) if len(self._forward_post_hooks) > 1: - self._forward_post_hooks.move_to_end( - post_hook_helper._hook_id, last=False) + self._forward_post_hooks.move_to_end(post_hook_helper._hook_id, + last=False) assert len(self._op_recorder.hooks) == 1 @@ -1123,6 +1121,7 @@ class Layer(object): return object.__getattribute__(self, name) def __setattr__(self, name, value): + def _remove_if_exist(*dicts): for d in dicts: if name in d: @@ -1147,7 +1146,8 @@ class Layer(object): if value is not None: raise TypeError( "assignment to parameter '{}' should be of type Parameter or None, but got '{}'" - .format(name, type(value).__name__)) + .format(name, + type(value).__name__)) params[name] = None else: layers = self.__dict__.get('_sub_layers', None) @@ -1163,7 +1163,8 @@ class Layer(object): if value is not None: raise TypeError( "assignment to sublayer '{}' should be of type Layer or None, but got '{}'" - .format(name, type(value).__name__)) + .format(name, + type(value).__name__)) layers[name] = None else: _buffers = self.__dict__.get('_buffers', None) @@ -1194,17 +1195,18 @@ class Layer(object): if in_declarative_mode() and _buffers[name] is None: raise RuntimeError( 'In Dy2stat, self.{0} is a buffer and self.{0} is ' - 'not allowed to be set to Variable when self.{0} is None.'. - format(name)) - elif _buffers[name] is None or type( - getattr(self, name)) == core.VarBase: + 'not allowed to be set to Variable when self.{0} is None.' + .format(name)) + elif _buffers[name] is None or type(getattr( + self, name)) == core.VarBase: _buffers[name] = assign(value) else: assign(value, getattr(self, name)) elif value is not None: raise TypeError( "assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'" - .format(name, type(value).__name__)) + .format(name, + type(value).__name__)) else: # Assigning None will remove the buffer, but if re-assign a new varBase to it, # it will be remarked as a buffer with same `persistable` attribute. @@ -1454,8 +1456,8 @@ class Layer(object): def _check_match(key, param): state = state_dict.get(key, None) if state is None: - raise ValueError("{} is not found in the provided dict.".format( - key)) + raise ValueError( + "{} is not found in the provided dict.".format(key)) if (isinstance(state, dict) or isinstance(state, list)): if (len(state) != len(param)): raise ValueError("{} receieves the length of {}, " @@ -1507,8 +1509,8 @@ class Layer(object): executor = Executor(_get_device())._default_executor # restore parameter states core._create_loaded_parameter( - [param for param, state in matched_param_state], - global_scope(), executor) + [param for param, state in matched_param_state], global_scope(), + executor) for param, state in matched_param_state: _set_var(param, state) @@ -1560,11 +1562,10 @@ class Layer(object): # [ 0.33960250, 0.96878713]]) ''' - return self._to_impl( - device=device, - dtype=dtype, - blocking=blocking, - include_sublayers=True) + return self._to_impl(device=device, + dtype=dtype, + blocking=blocking, + include_sublayers=True) def _apply(self, func, device, dtype, blocking, include_sublayers=True): if include_sublayers: diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index a6c1993dbbf..4b9c50127f0 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -67,7 +67,7 @@ class LearningRateDecay(object): persistable=False) return lr - # Note: If you want to change what optimizer.state_dict stores, just overwrite this functions, + # Note: If you want to change what optimizer.state_dict stores, just overwrite this functions, # "self.step_num" will be stored by default. def state_dict(self): """ @@ -107,8 +107,8 @@ class LearningRateDecay(object): self.__dict__[key] = state_dict[key] else: raise RuntimeError( - "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict". - format(key)) + "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict" + .format(key)) if len(state_dict) > len(self.keys): warnings.warn( "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" @@ -259,8 +259,8 @@ class NaturalExpDecay(LearningRateDecay): div_res = self.create_lr_var(self.step_num / self.decay_steps) if self.staircase: div_res = layers.floor(div_res) - decayed_lr = self.learning_rate * layers.exp(-1 * self.decay_rate * - div_res) + decayed_lr = self.learning_rate * layers.exp( + -1 * self.decay_rate * div_res) return decayed_lr @@ -510,9 +510,9 @@ class PolynomialDecay(LearningRateDecay): div_res = self.create_lr_var(1.0) tmp_decay_steps = self.decay_steps * div_res else: - tmp_step_num = self.create_lr_var(tmp_step_num - if tmp_step_num < self.decay_steps - else self.decay_steps) + tmp_step_num = self.create_lr_var( + tmp_step_num if tmp_step_num < self.decay_steps else self. + decay_steps) decayed_lr = (self.learning_rate - self.end_learning_rate) * \ ((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate @@ -639,8 +639,8 @@ class NoamDecay(LearningRateDecay): from .. import layers a = self.create_lr_var(self.step_num**-0.5) b = self.create_lr_var((self.warmup_steps**-1.5) * self.step_num) - lr_value = self.learning_rate * (self.d_model - **-0.5) * layers.elementwise_min(a, b) + lr_value = self.learning_rate * (self.d_model** + -0.5) * layers.elementwise_min(a, b) return lr_value @@ -713,15 +713,15 @@ class LinearLrWarmup(LearningRateDecay): learning_rate, int) or isinstance(learning_rate, LearningRateDecay) if not type_check: raise TypeError( - "the type of learning_rate should be [int, float or LearningRateDecay], the current type is {}". - format(learning_rate)) + "the type of learning_rate should be [int, float or LearningRateDecay], the current type is {}" + .format(learning_rate)) self.learning_rate = learning_rate self.warmup_steps = warmup_steps self.start_lr = start_lr assert end_lr > start_lr, "end_lr {} must be greater than start_lr {}".format( end_lr, start_lr) - self.lr_ratio_before_warmup = ( - float(end_lr) - float(start_lr)) / float(warmup_steps) + self.lr_ratio_before_warmup = (float(end_lr) - + float(start_lr)) / float(warmup_steps) def step(self): base_lr = self.learning_rate @@ -913,15 +913,16 @@ class ReduceLROnPlateau(LearningRateDecay): from .. import layers self.cooldown_counter = self.cooldown self.num_bad_epochs = 0 - new_lr = layers.elementwise_max(self.learning_rate * - self.decay_rate, self.min_lr) + new_lr = layers.elementwise_max( + self.learning_rate * self.decay_rate, self.min_lr) if self.learning_rate - new_lr > self.eps: if self.verbose: old_lr = self.learning_rate.numpy()[0] if isinstance( self.learning_rate, Variable) else self.learning_rate print('Epoch {}: reducing learning rate from {} to {}.'. - format(self.epoch_num, old_lr, new_lr.numpy()[0])) + format(self.epoch_num, old_lr, + new_lr.numpy()[0])) self.learning_rate = new_lr def _is_better(self, current, best): diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 8a19be640a7..13f11ea161b 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -33,10 +33,10 @@ _supported_int_dtype_ = [ core.VarDesc.VarType.BOOL, ] -# NOTE(chenweihang): We currently do not fully support the type promotion -# between tensors. Parting support here is because the interoperation of -# real and complex numbers in paddle quantum is very frequent, such as the -# binary operation between `float` and `complex64`, so we must support the +# NOTE(chenweihang): We currently do not fully support the type promotion +# between tensors. Parting support here is because the interoperation of +# real and complex numbers in paddle quantum is very frequent, such as the +# binary operation between `float` and `complex64`, so we must support the # correct type promotion on the APIs paddle quantum used. # Now only check in dygraph (paddle quantum based dygraph) # Full type promotion support will need to be fully verified later. @@ -200,6 +200,7 @@ def monkey_patch_math_varbase(): reverse=False, scalar_method=None, call_final_api=False): + def __impl__(self, other_var): # 1. scalar exists cases # we need combine the tensor.dtype and scalar.dtype, cast correct object @@ -217,13 +218,13 @@ def monkey_patch_math_varbase(): other_var = float(other_var) # division is a special case # NOTE(chenweihang): because we cast tensor to float32 instead float64, - # the division result can only guarantee the numerical accuracy of 6 digits - # after the decimal point. The result of numpy calculation is of float64 type, - # so the calculation result here and the calculation result of numpy are + # the division result can only guarantee the numerical accuracy of 6 digits + # after the decimal point. The result of numpy calculation is of float64 type, + # so the calculation result here and the calculation result of numpy are # different after 6 decimal point. If necessary, we can also use float64 here. # torch's behavior here is consistent with ours - if (op_type == "final_state_divide" or - op_type == "elementwise_div" + if (op_type == "final_state_divide" + or op_type == "elementwise_div" ) and self.dtype in _supported_int_dtype_: self = astype(self, 'float32') # here use `scale` replace `elementwise` to get better performance @@ -246,19 +247,20 @@ def monkey_patch_math_varbase(): other_var = paddle.to_tensor(other_var, dtype='complex64') else: if reverse: - other_var = create_tensor( - other_var, dtype=lhs_dtype, shape=self.shape) + other_var = create_tensor(other_var, + dtype=lhs_dtype, + shape=self.shape) else: # add fill_op - other_var = create_scalar( - value=other_var, dtype=lhs_dtype) + other_var = create_scalar(value=other_var, + dtype=lhs_dtype) # 3. promote types or unify right var type to left var rhs_dtype = other_var.dtype if lhs_dtype != rhs_dtype: if method_name in _supported_promote_complex_types_ and ( - lhs_dtype in _complex_dtypes or - rhs_dtype in _complex_dtypes): + lhs_dtype in _complex_dtypes + or rhs_dtype in _complex_dtypes): # only when lhs_dtype or rhs_dtype is complex type, # the dtype will promote, in other cases, directly # use lhs_dtype, this is consistent will original rule @@ -270,8 +272,8 @@ def monkey_patch_math_varbase(): other_var, promote_dtype) else: warnings.warn( - 'The dtype of left and right variables are not the same, left dtype is {}, but right dtype is {}, the right dtype will convert to {}'. - format(lhs_dtype, rhs_dtype, lhs_dtype)) + 'The dtype of left and right variables are not the same, left dtype is {}, but right dtype is {}, the right dtype will convert to {}' + .format(lhs_dtype, rhs_dtype, lhs_dtype)) other_var = astype(other_var, lhs_dtype) if reverse: @@ -320,54 +322,66 @@ def monkey_patch_math_varbase(): ('ndim', _ndim_), ('size', _size_), ('T', _T_), - ('__add__', _binary_creator_('__add__', 'final_state_add', False, - _scalar_add_, True)) - if framework._in_eager_mode_ else ('__add__', _binary_creator_( - '__add__', 'elementwise_add', False, _scalar_add_)), + ('__add__', + _binary_creator_('__add__', 'final_state_add', False, _scalar_add_, + True)) if framework._in_eager_mode_ else + ('__add__', + _binary_creator_('__add__', 'elementwise_add', False, _scalar_add_)), ## a+b == b+a. Do not need to reverse explicitly - ('__radd__', _binary_creator_('__radd__', 'final_state_add', False, - _scalar_add_, True)) - if framework._in_eager_mode_ else ('__radd__', _binary_creator_( - '__radd__', 'elementwise_add', False, _scalar_add_)), - ('__sub__', _binary_creator_('__sub__', 'final_state_subtract', False, - _scalar_sub_, True)) - if framework._in_eager_mode_ else ('__sub__', _binary_creator_( - '__sub__', 'elementwise_sub', False, _scalar_sub_)), - ('__rsub__', _binary_creator_('__rsub__', 'final_state_subtract', True, - _scalar_rsub_, True)) - if framework._in_eager_mode_ else ('__rsub__', _binary_creator_( - '__rsub__', 'elementwise_sub', True, _scalar_rsub_)), - ('__mul__', _binary_creator_('__mul__', 'final_state_multiply', False, - _scalar_mul_, True)) - if framework._in_eager_mode_ else ('__mul__', _binary_creator_( - '__mul__', 'elementwise_mul', False, _scalar_mul_)), + ('__radd__', + _binary_creator_('__radd__', 'final_state_add', False, _scalar_add_, + True)) if framework._in_eager_mode_ else + ('__radd__', + _binary_creator_('__radd__', 'elementwise_add', False, _scalar_add_)), + ('__sub__', + _binary_creator_('__sub__', 'final_state_subtract', False, + _scalar_sub_, True)) if framework._in_eager_mode_ else + ('__sub__', + _binary_creator_('__sub__', 'elementwise_sub', False, _scalar_sub_)), + ('__rsub__', + _binary_creator_('__rsub__', 'final_state_subtract', True, + _scalar_rsub_, True)) + if framework._in_eager_mode_ else + ('__rsub__', + _binary_creator_('__rsub__', 'elementwise_sub', True, _scalar_rsub_)), + ('__mul__', + _binary_creator_('__mul__', 'final_state_multiply', False, + _scalar_mul_, True)) if framework._in_eager_mode_ else + ('__mul__', + _binary_creator_('__mul__', 'elementwise_mul', False, _scalar_mul_)), ## a*b == b*a. Do not need to reverse explicitly - ('__rmul__', _binary_creator_('__rmul__', 'final_state_multiply', False, - _scalar_mul_, True)) - if framework._in_eager_mode_ else ('__rmul__', _binary_creator_( - '__rmul__', 'elementwise_mul', False, _scalar_mul_)), - ('__div__', _binary_creator_('__div__', 'final_state_divide', False, - _scalar_div_, True)) - if framework._in_eager_mode_ else ('__div__', _binary_creator_( - '__div__', 'elementwise_div', False, _scalar_div_)), - ('__truediv__', _binary_creator_('__truediv__', 'final_state_divide', - False, _scalar_div_, True)) - if framework._in_eager_mode_ else ('__truediv__', _binary_creator_( - '__truediv__', 'elementwise_div', False, _scalar_div_)), - ('__rdiv__', _binary_creator_('__rdiv__', 'final_state_divide', True, - None, True)) if framework._in_eager_mode_ - else ('__rdiv__', _binary_creator_('__rdiv__', 'elementwise_div', True, - None)), - ('__rtruediv__', _binary_creator_('rtruediv__', 'final_state_divide', - True, None, True)) - if framework._in_eager_mode_ else ('__rtruediv__', _binary_creator_( - 'rtruediv__', 'elementwise_div', True, None)), + ('__rmul__', + _binary_creator_('__rmul__', 'final_state_multiply', False, + _scalar_mul_, True)) if framework._in_eager_mode_ else + ('__rmul__', + _binary_creator_('__rmul__', 'elementwise_mul', False, _scalar_mul_)), + ('__div__', + _binary_creator_('__div__', 'final_state_divide', False, _scalar_div_, + True)) if framework._in_eager_mode_ else + ('__div__', + _binary_creator_('__div__', 'elementwise_div', False, _scalar_div_)), + ('__truediv__', + _binary_creator_('__truediv__', 'final_state_divide', False, + _scalar_div_, True)) if framework._in_eager_mode_ else + ('__truediv__', + _binary_creator_('__truediv__', 'elementwise_div', False, + _scalar_div_)), + ('__rdiv__', + _binary_creator_('__rdiv__', 'final_state_divide', True, None, True)) + if framework._in_eager_mode_ else + ('__rdiv__', + _binary_creator_('__rdiv__', 'elementwise_div', True, None)), + ('__rtruediv__', + _binary_creator_('rtruediv__', 'final_state_divide', True, None, True)) + if framework._in_eager_mode_ else + ('__rtruediv__', + _binary_creator_('rtruediv__', 'elementwise_div', True, None)), ('__pow__', _binary_creator_('__pow__', 'elementwise_pow', False, None)), ('__rpow__', _binary_creator_('__rpow__', 'elementwise_pow', True, None)), - ('__floordiv__', _binary_creator_('__floordiv__', - 'elementwise_floordiv', False, None)), + ('__floordiv__', + _binary_creator_('__floordiv__', 'elementwise_floordiv', False, None)), ('__mod__', _binary_creator_('__mod__', 'elementwise_mod', False, None)), ('__matmul__', _binary_creator_('__matmul__', "matmul_v2", False, @@ -377,22 +391,25 @@ def monkey_patch_math_varbase(): _binary_creator_('__eq__', 'final_state_equal', False, None, True)) if framework._in_eager_mode_ else ('__eq__', _binary_creator_('__eq__', 'equal', False, None)), - ('__ne__', _binary_creator_('__ne__', 'final_state_not_equal', False, - None, True)) if framework._in_eager_mode_ - else ('__ne__', _binary_creator_('__ne__', 'not_equal', False, None)), - ('__lt__', _binary_creator_('__lt__', 'final_state_less_than', False, - None, True)) if framework._in_eager_mode_ - else ('__lt__', _binary_creator_('__lt__', 'less_than', False, None)), - ('__le__', _binary_creator_('__le__', 'final_state_less_equal', False, - None, True)) if framework._in_eager_mode_ - else ('__le__', _binary_creator_('__le__', 'less_equal', False, None)), - ('__gt__', _binary_creator_('__gt__', 'final_state_greater_than', False, - None, True)) + ('__ne__', + _binary_creator_('__ne__', 'final_state_not_equal', False, None, True)) if framework._in_eager_mode_ else - ('__gt__', _binary_creator_('__gt__', 'greater_than', False, None)), - ('__ge__', _binary_creator_('__ge__', 'final_state_greater_equal', - False, None, True)) + ('__ne__', _binary_creator_('__ne__', 'not_equal', False, None)), + ('__lt__', + _binary_creator_('__lt__', 'final_state_less_than', False, None, True)) if framework._in_eager_mode_ else + ('__lt__', _binary_creator_('__lt__', 'less_than', False, None)), + ('__le__', + _binary_creator_('__le__', 'final_state_less_equal', False, None, + True)) if framework._in_eager_mode_ else + ('__le__', _binary_creator_('__le__', 'less_equal', False, None)), + ('__gt__', + _binary_creator_('__gt__', 'final_state_greater_than', False, None, + True)) if framework._in_eager_mode_ else + ('__gt__', _binary_creator_('__gt__', 'greater_than', False, None)), + ('__ge__', + _binary_creator_('__ge__', 'final_state_greater_equal', False, None, + True)) if framework._in_eager_mode_ else ('__ge__', _binary_creator_('__ge__', 'greater_equal', False, None)), ('__array_ufunc__', None) ] diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 72114a27515..26bda1a34ef 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -196,17 +196,17 @@ class Conv2D(layers.Layer): self._bias_attr = bias_attr self._dtype = dtype - if (self._num_channels == self._groups and - num_filters % self._num_channels == 0 and - not self._use_cudnn and not self._use_mkldnn): + if (self._num_channels == self._groups + and num_filters % self._num_channels == 0 + and not self._use_cudnn and not self._use_mkldnn): self._l_type = 'depthwise_conv2d' else: self._l_type = 'conv2d' # NPU only supports depthwise_conv2d when "input_channel = output_channel = groups" if core.is_compiled_with_npu(): - if (self._num_channels == self._groups and - self._num_channels == self._num_filters): + if (self._num_channels == self._groups + and self._num_channels == self._num_filters): self._l_type = 'depthwise_conv2d' else: self._l_type = 'conv2d' @@ -233,11 +233,10 @@ class Conv2D(layers.Layer): dtype=self._dtype, default_initializer=_get_default_param_initializer()) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) def forward(self, input): if in_dygraph_mode() and self._l_type == "conv2d": @@ -252,12 +251,12 @@ class Conv2D(layers.Layer): return dygraph_utils._append_activation_in_dygraph( pre_act, self._act, use_mkldnn=self._use_mkldnn) - if _non_static_mode() and (self._l_type == 'conv2d' or - self._l_type == 'depthwise_conv2d'): + if _non_static_mode() and (self._l_type == 'conv2d' + or self._l_type == 'depthwise_conv2d'): attrs = ('strides', self._stride, 'paddings', self._padding, - 'dilations', self._dilation, 'groups', self._groups - if self._groups else 1, 'use_cudnn', self._use_cudnn, - 'use_mkldnn', self._use_mkldnn) + 'dilations', self._dilation, 'groups', + self._groups if self._groups else 1, 'use_cudnn', + self._use_cudnn, 'use_mkldnn', self._use_mkldnn) out = _C_ops.conv2d(input, self.weight, *attrs) pre_bias = out @@ -283,25 +282,27 @@ class Conv2D(layers.Layer): pre_bias = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type=self._l_type, - inputs={ - 'Input': input, - 'Filter': self.weight, - }, - outputs={"Output": pre_bias}, - attrs=attrs) + self._helper.append_op(type=self._l_type, + inputs={ + 'Input': input, + 'Filter': self.weight, + }, + outputs={"Output": pre_bias}, + attrs=attrs) if self.bias is not None: pre_act = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self.bias]}, - outputs={'Out': [pre_act]}, - attrs={'axis': 1, - 'use_mkldnn': self._use_mkldnn}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self.bias] + }, + outputs={'Out': [pre_act]}, + attrs={ + 'axis': 1, + 'use_mkldnn': self._use_mkldnn + }) else: pre_act = pre_bias @@ -470,41 +471,41 @@ class Conv3D(layers.Layer): dtype=self._dtype, default_initializer=_get_default_param_initializer()) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) def forward(self, input): pre_bias = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='conv3d', - inputs={ - 'Input': input, - 'Filter': self.weight, - }, - outputs={"Output": pre_bias}, - attrs={ - 'strides': self._stride, - 'paddings': self._padding, - 'dilations': self._dilation, - 'groups': self._groups if self._groups else 1, - 'use_cudnn': self._use_cudnn, - 'use_mkldnn': False - }) + self._helper.append_op(type='conv3d', + inputs={ + 'Input': input, + 'Filter': self.weight, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': + self._groups if self._groups else 1, + 'use_cudnn': self._use_cudnn, + 'use_mkldnn': False + }) if self.bias is not None: pre_act = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self.bias]}, - outputs={'Out': [pre_act]}, - attrs={'axis': 1}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self.bias] + }, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) else: pre_act = pre_bias @@ -688,39 +689,42 @@ class Conv3DTranspose(layers.Layer): filter_shape = [self._num_channels, self._num_filters // self._groups ] + self._filter_size - self.weight = self.create_parameter( - dtype=self._dtype, shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + self.weight = self.create_parameter(dtype=self._dtype, + shape=filter_shape, + attr=self._param_attr) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) def forward(self, input): pre_bias = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type="conv3d_transpose", - inputs={'Input': [input], - 'Filter': [self.weight]}, - outputs={'Output': pre_bias}, - attrs={ - 'strides': self._stride, - 'paddings': self._padding, - 'dilations': self._dilation, - 'groups': self._groups if self._groups else 1, - 'use_cudnn': self._use_cudnn - }) + self._helper.append_op(type="conv3d_transpose", + inputs={ + 'Input': [input], + 'Filter': [self.weight] + }, + outputs={'Output': pre_bias}, + attrs={ + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': + self._groups if self._groups else 1, + 'use_cudnn': self._use_cudnn + }) if self._bias_attr: pre_act = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self.bias]}, - outputs={'Out': [pre_act]}, - attrs={'axis': 1}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self.bias] + }, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) else: pre_act = pre_bias @@ -910,11 +914,10 @@ class Pool2D(layers.Layer): pool_out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type=self._l_type, - inputs={"X": input}, - outputs={"Out": pool_out}, - attrs=attrs) + self._helper.append_op(type=self._l_type, + inputs={"X": input}, + outputs={"Out": pool_out}, + attrs=attrs) return pool_out @@ -980,13 +983,14 @@ class Linear(layers.Layer): super(Linear, self).__init__() self._act = act self._dtype = dtype - self.weight = self.create_parameter( - shape=[input_dim, output_dim], - attr=param_attr, - dtype=dtype, - is_bias=False) - self.bias = self.create_parameter( - shape=[output_dim], attr=bias_attr, dtype=dtype, is_bias=True) + self.weight = self.create_parameter(shape=[input_dim, output_dim], + attr=param_attr, + dtype=dtype, + is_bias=False) + self.bias = self.create_parameter(shape=[output_dim], + attr=bias_attr, + dtype=dtype, + is_bias=True) self._use_mkldnn = _global_flags()["FLAGS_use_mkldnn"] @@ -1017,20 +1021,23 @@ class Linear(layers.Layer): inputs = {"X": [input], "Y": [self.weight]} tmp = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="matmul", inputs=inputs, outputs={"Out": tmp}, attrs=attrs) + self._helper.append_op(type="matmul", + inputs=inputs, + outputs={"Out": tmp}, + attrs=attrs) if self.bias is not None: pre_activation = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [tmp], - 'Y': [self.bias]}, - outputs={'Out': [pre_activation]}, - attrs={ - 'axis': len(input.shape) - 1, - 'use_mkldnn': self._use_mkldnn - }) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [tmp], + 'Y': [self.bias] + }, + outputs={'Out': [pre_activation]}, + attrs={ + 'axis': len(input.shape) - 1, + 'use_mkldnn': self._use_mkldnn + }) else: pre_activation = tmp return self._helper.append_activation(pre_activation, act=self._act) @@ -1126,12 +1133,11 @@ class InstanceNorm(layers.Layer): dtype=self._dtype, default_initializer=Constant(1.0), is_bias=False) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[num_channels], - dtype=self._dtype, - default_initializer=Constant(0.0), - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[num_channels], + dtype=self._dtype, + default_initializer=Constant(0.0), + is_bias=True) else: self.scale = None self.bias = None @@ -1169,8 +1175,10 @@ class InstanceNorm(layers.Layer): "SavedVariance": [saved_variance] } - self._helper.append_op( - type="instance_norm", inputs=inputs, outputs=outputs, attrs=attrs) + self._helper.append_op(type="instance_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) return instance_norm_out @@ -1304,38 +1312,34 @@ class BatchNorm(layers.Layer): param_shape = [num_channels] # create parameter - self.weight = self.create_parameter( - attr=self._param_attr, - shape=param_shape, - dtype=self._dtype, - default_initializer=Constant(1.0)) + self.weight = self.create_parameter(attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(1.0)) self.weight.stop_gradient = use_global_stats and self._param_attr.learning_rate == 0. - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=True) self.bias.stop_gradient = use_global_stats and self._param_attr.learning_rate == 0. - self._mean = self.create_parameter( - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var), - shape=param_shape, - dtype=self._dtype) + self._mean = self.create_parameter(attr=ParamAttr( + name=moving_mean_name, + initializer=Constant(0.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), + shape=param_shape, + dtype=self._dtype) self._mean.stop_gradient = True - self._variance = self.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var), - shape=param_shape, - dtype=self._dtype) + self._variance = self.create_parameter(attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), + shape=param_shape, + dtype=self._dtype) self._variance.stop_gradient = True self._in_place = in_place @@ -1420,8 +1424,10 @@ class BatchNorm(layers.Layer): if reserve_space is not None: outputs["ReserveSpace"] = [reserve_space] - self._helper.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + self._helper.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) # Currently, we don't support inplace in dygraph mode return self._helper.append_activation(batch_norm_out, self._act) @@ -1514,8 +1520,8 @@ class Dropout(layers.Layer): self._seed = prog.random_seed attrs = { 'dropout_prob': self._dropout_prob, - 'is_test': not self.training - if _non_static_mode() else self._is_test, + 'is_test': + not self.training if _non_static_mode() else self._is_test, 'fix_seed': self._seed is not None, 'seed': self._seed if self._seed is not None else 0, 'dropout_implementation': self._dropout_implementation, @@ -1530,12 +1536,13 @@ class Dropout(layers.Layer): mask = self._helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) - self._helper.append_op( - type='dropout', - inputs={'X': [input]}, - outputs={'Out': [out], - 'Mask': [mask]}, - attrs=attrs) + self._helper.append_op(type='dropout', + inputs={'X': [input]}, + outputs={ + 'Out': [out], + 'Mask': [mask] + }, + attrs=attrs) return out @@ -1667,18 +1674,19 @@ class Embedding(layers.Layer): if self._remote_prefetch: assert self._is_sparse is True and self._is_distributed is False - self.weight = self.create_parameter( - attr=self._param_attr, - shape=self._size, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._param_attr, + shape=self._size, + dtype=self._dtype, + is_bias=False) def forward(self, input): if _non_static_mode(): - return _C_ops.lookup_table_v2( - self.weight, input, 'is_sparse', self._is_sparse, - 'is_distributed', self._is_distributed, 'remote_prefetch', - self._remote_prefetch, 'padding_idx', self._padding_idx) + return _C_ops.lookup_table_v2(self.weight, input, 'is_sparse', + self._is_sparse, 'is_distributed', + self._is_distributed, + 'remote_prefetch', + self._remote_prefetch, 'padding_idx', + self._padding_idx) check_variable_and_dtype(input, 'input', ['uint8', 'int8', 'int16', 'int32', 'int64'], @@ -1691,12 +1699,13 @@ class Embedding(layers.Layer): } out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type='lookup_table_v2', - inputs={'Ids': input, - 'W': self.weight}, - outputs={'Out': out}, - attrs=attrs) + self._helper.append_op(type='lookup_table_v2', + inputs={ + 'Ids': input, + 'W': self.weight + }, + outputs={'Out': out}, + attrs=attrs) return out @@ -1807,11 +1816,10 @@ class LayerNorm(layers.Layer): if self._shift: assert self._bias_attr is not False - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=True) else: if self._bias_attr: logging.warn("bias_attr are only available with shift is True") @@ -1825,10 +1833,11 @@ class LayerNorm(layers.Layer): if input_ndim < normalized_ndim or input_shape[ self._begin_norm_axis:] != self._normalized_shape: str_normalized_shape = str(self._normalized_shape) - raise ValueError( - 'Given normalized_shape is ' + str_normalized_shape + - ', expected input with shape [*, ' + str_normalized_shape[ - 1:] + ', but got input shape ' + str(input_shape)) + raise ValueError('Given normalized_shape is ' + + str_normalized_shape + + ', expected input with shape [*, ' + + str_normalized_shape[1:] + + ', but got input shape ' + str(input_shape)) if _non_static_mode(): if in_dygraph_mode(): @@ -1838,9 +1847,10 @@ class LayerNorm(layers.Layer): return dygraph_utils._append_activation_in_dygraph( pre_act, act=self._act) else: - pre_act, _, _ = _C_ops.layer_norm( - input, self.weight, self.bias, 'epsilon', self._epsilon, - 'begin_norm_axis', self._begin_norm_axis) + pre_act, _, _ = _C_ops.layer_norm(input, self.weight, self.bias, + 'epsilon', self._epsilon, + 'begin_norm_axis', + self._begin_norm_axis) return dygraph_utils._append_activation_in_dygraph( pre_act, act=self._act) @@ -1866,18 +1876,17 @@ class LayerNorm(layers.Layer): layer_norm_out = self._helper.create_variable_for_type_inference( self._dtype) - self._helper.append_op( - type="layer_norm", - inputs=inputs, - outputs={ - "Y": layer_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={ - "epsilon": self._epsilon, - "begin_norm_axis": self._begin_norm_axis - }) + self._helper.append_op(type="layer_norm", + inputs=inputs, + outputs={ + "Y": layer_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": self._epsilon, + "begin_norm_axis": self._begin_norm_axis + }) return self._helper.append_activation(layer_norm_out, act=self._act) @@ -2005,21 +2014,25 @@ class GRUUnit(layers.Layer): identity=0, sigmoid=1, tanh=2, - relu=3, ) + relu=3, + ) self.activation = activation_dict[activation] self.gate_activation = activation_dict[gate_activation] self._dtype = dtype size = size // 3 # create weight - self.weight = self.create_parameter( - attr=param_attr, shape=[size, 3 * size], dtype=dtype) + self.weight = self.create_parameter(attr=param_attr, + shape=[size, 3 * size], + dtype=dtype) # create bias bias_size = [1, 3 * size] self._bias_size = bias_size - self.bias = self.create_parameter( - attr=bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + self.bias = self.create_parameter(attr=bias_attr, + shape=bias_size, + dtype=dtype, + is_bias=True) def forward(self, input, hidden): if _non_static_mode(): @@ -2044,18 +2057,17 @@ class GRUUnit(layers.Layer): self._dtype) updated_hidden = self._helper.create_variable_for_type_inference( self._dtype) - self._helper.append_op( - type='gru_unit', - inputs=inputs, - outputs={ - 'Gate': gate, - 'ResetHiddenPrev': reset_hidden_pre, - 'Hidden': updated_hidden, - }, - attrs={ - 'activation': self.activation, - 'gate_activation': self.gate_activation, - }) + self._helper.append_op(type='gru_unit', + inputs=inputs, + outputs={ + 'Gate': gate, + 'ResetHiddenPrev': reset_hidden_pre, + 'Hidden': updated_hidden, + }, + attrs={ + 'activation': self.activation, + 'gate_activation': self.gate_activation, + }) return updated_hidden, reset_hidden_pre, gate @@ -2274,11 +2286,12 @@ class NCE(layers.Layer): self._attrs['seed'], 'sampler', self._attrs['sampler'], 'is_sparse', self._attrs['is_sparse'], 'remote_prefetch', self._attrs['remote_prefetch']) - cost, _, _ = _C_ops.nce( - input, label, self.weight, self.bias, - self._inputs['SampleWeight'], self._inputs['CustomDistProbs'], - self._inputs['CustomDistAlias'], - self._inputs['CustomDistAliasProbs'], *attrs) + cost, _, _ = _C_ops.nce(input, label, self.weight, self.bias, + self._inputs['SampleWeight'], + self._inputs['CustomDistProbs'], + self._inputs['CustomDistAlias'], + self._inputs['CustomDistAliasProbs'], + *attrs) return cost / (self._num_neg_samples + 1) check_variable_and_dtype(input, "input", ['float32', 'float64'], "NCE") @@ -2299,15 +2312,14 @@ class NCE(layers.Layer): sample_labels = self._helper.create_variable_for_type_inference( dtype=label.dtype) - self._helper.append_op( - type='nce', - inputs=self._inputs, - outputs={ - 'Cost': cost, - 'SampleLogits': sample_logits, - 'SampleLabels': sample_labels - }, - attrs=self._attrs) + self._helper.append_op(type='nce', + inputs=self._inputs, + outputs={ + 'Cost': cost, + 'SampleLogits': sample_logits, + 'SampleLabels': sample_labels + }, + attrs=self._attrs) return cost / (self._num_neg_samples + 1) @@ -2389,33 +2401,34 @@ class PRelu(layers.Layer): channel, int), "channel argument is required when mode is 'channel'." #NOTE(zhiqiu): The _alpha_shape should be [1, channel] + [1] * len(input_shape[2:]), not [1, channel, 1, 1]. - # However, the suffix 1 in the list is useless, since the tensor is viewed as one demension array during kernel calculation. + # However, the suffix 1 in the list is useless, since the tensor is viewed as one demension array during kernel calculation. # And, input_shape is not required when mode is 'channel', so it is simplified. #NOTE(zhiqiu): Revert shape to [1, channel, 1, 1] for compatibility with saved model of old version. self._alpha_shape = [1, channel, 1, 1] elif mode == 'element': - assert isinstance(input_shape, ( - list, tuple - )), "input_shape argument is required when mode is 'element'." + assert isinstance( + input_shape, + (list, tuple + )), "input_shape argument is required when mode is 'element'." self._alpha_shape = [1] + list(input_shape)[1:] else: raise ValueError('mode should be one of all, channel, element.') - self.weight = self.create_parameter( - attr=self._param_attr, - shape=self._alpha_shape, - dtype='float32', - is_bias=False, - default_initializer=Constant(1.0)) + self.weight = self.create_parameter(attr=self._param_attr, + shape=self._alpha_shape, + dtype='float32', + is_bias=False, + default_initializer=Constant(1.0)) def forward(self, input): check_variable_and_dtype(input, 'input', ['float32'], 'PRelu') out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="prelu", - inputs={"X": input, - 'Alpha': self.weight}, - attrs={"mode": self._mode}, - outputs={"Out": out}) + self._helper.append_op(type="prelu", + inputs={ + "X": input, + 'Alpha': self.weight + }, + attrs={"mode": self._mode}, + outputs={"Out": out}) return out @@ -2495,22 +2508,19 @@ class BilinearTensorProduct(layers.Layer): self._dtype = dtype param_shape = [self._output_dim, self._input1_dim, self._input2_dim] - self.weight = self.create_parameter( - attr=self._param_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=False) bias_size = [1, self._output_dim] - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=bias_size, - dtype=self._dtype, - is_bias=True) - - @deprecated( - since="2.0.0", - update_to="paddle.nn.Bilinear", - reason="New name and new args in Bilinear, easier to use.") + self.bias = self.create_parameter(attr=self._bias_attr, + shape=bias_size, + dtype=self._dtype, + is_bias=True) + + @deprecated(since="2.0.0", + update_to="paddle.nn.Bilinear", + reason="New name and new args in Bilinear, easier to use.") def forward(self, x, y): check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'BilinearTensorProduct') @@ -2520,17 +2530,16 @@ class BilinearTensorProduct(layers.Layer): if self.bias is not None: self._inputs["Bias"] = self.bias if self._name is not None: - out = self._helper.create_variable( - name=".".join([self.full_name(), self._name]), - dtype=self._dtype, - persistable=False) + out = self._helper.create_variable(name=".".join( + [self.full_name(), self._name]), + dtype=self._dtype, + persistable=False) else: - out = self._helper.create_variable( - dtype=self._dtype, persistable=False) - self._helper.append_op( - type="bilinear_tensor_product", - inputs=self._inputs, - outputs={"Out": out}) + out = self._helper.create_variable(dtype=self._dtype, + persistable=False) + self._helper.append_op(type="bilinear_tensor_product", + inputs=self._inputs, + outputs={"Out": out}) # add activation return self._helper.append_activation(out, act=self._act) @@ -2684,9 +2693,9 @@ class Conv2DTranspose(layers.Layer): self._output_size = output_size self._dtype = dtype - if (self._num_channels == self._groups and - self._num_filters == self._num_channels and - not self._use_cudnn): + if (self._num_channels == self._groups + and self._num_filters == self._num_channels + and not self._use_cudnn): self._op_type = 'depthwise_conv2d_transpose' else: self._op_type = 'conv2d_transpose' @@ -2711,14 +2720,14 @@ class Conv2DTranspose(layers.Layer): filter_shape = [self._num_channels, self._num_filters // self._groups ] + self._filter_size - self.weight = self.create_parameter( - dtype=self._dtype, shape=filter_shape, attr=self._param_attr) + self.weight = self.create_parameter(dtype=self._dtype, + shape=filter_shape, + attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) def forward(self, input): if _non_static_mode(): @@ -2728,10 +2737,10 @@ class Conv2DTranspose(layers.Layer): 'dilations', self._dilation, 'groups', self._groups, 'use_cudnn', self._use_cudnn) pre_bias = out - pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, self.bias, - 1) - return dygraph_utils._append_activation_in_dygraph( - pre_act, act=self._act) + pre_act = dygraph_utils._append_bias_in_dygraph( + pre_bias, self.bias, 1) + return dygraph_utils._append_activation_in_dygraph(pre_act, + act=self._act) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], @@ -2749,21 +2758,21 @@ class Conv2DTranspose(layers.Layer): pre_bias = self._helper.create_variable_for_type_inference( dtype=input.dtype) - self._helper.append_op( - type=self._op_type, - inputs=inputs, - outputs={'Output': pre_bias}, - attrs=attrs) + self._helper.append_op(type=self._op_type, + inputs=inputs, + outputs={'Output': pre_bias}, + attrs=attrs) if self.bias is not None: pre_act = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self.bias]}, - outputs={'Out': [pre_act]}, - attrs={'axis': 1}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self.bias] + }, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) else: pre_act = pre_bias @@ -2826,39 +2835,39 @@ class SequenceConv(layers.Layer): def _build_once(self, input): self._dtype = self._helper.input_dtype(input) filter_shape = [self._filter_size * input.shape[1], self._num_filters] - self.weight = self.create_parameter( - attr=self._param_attr, shape=filter_shape, dtype=self._dtype) + self.weight = self.create_parameter(attr=self._param_attr, + shape=filter_shape, + dtype=self._dtype) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) def forward(self, input): pre_bias = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type='sequence_conv', - inputs={ - 'X': [input], - 'Filter': [self.weight], - }, - outputs={"Out": pre_bias}, - attrs={ - 'contextStride': self._filter_stride, - 'contextStart': -int(self._filter_size // 2), - 'contextLength': self._filter_size - }) + self._helper.append_op(type='sequence_conv', + inputs={ + 'X': [input], + 'Filter': [self.weight], + }, + outputs={"Out": pre_bias}, + attrs={ + 'contextStride': self._filter_stride, + 'contextStart': -int(self._filter_size // 2), + 'contextLength': self._filter_size + }) if self.bias is not None: pre_act = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self.bias]}, - outputs={'Out': [pre_act]}, - attrs={'axis': 1}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self.bias] + }, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) else: pre_act = pre_bias @@ -2928,19 +2937,19 @@ class RowConv(layers.Layer): def _build_once(self, input): self._dtype = self._helper.input_dtype(input) filter_shape = [self._future_context_size + 1, input.shape[1]] - self.weight = self.create_parameter( - attr=self._param_attr, - shape=filter_shape, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._param_attr, + shape=filter_shape, + dtype=self._dtype, + is_bias=False) def forward(self, input): out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type='row_conv', - inputs={'X': [input], - 'Filter': [self.weight]}, - outputs={'Out': [out]}) + self._helper.append_op(type='row_conv', + inputs={ + 'X': [input], + 'Filter': [self.weight] + }, + outputs={'Out': [out]}) return self._helper.append_activation(out, act=self._act) @@ -3007,17 +3016,15 @@ class GroupNorm(layers.Layer): param_shape = [self._channels] - self.weight = self.create_parameter( - attr=self._param_attr or False, - shape=param_shape, - dtype=self._dtype, - default_initializer=Constant(1.0)) + self.weight = self.create_parameter(attr=self._param_attr or False, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(1.0)) - self.bias = self.create_parameter( - attr=self._bias_attr or False, - shape=param_shape, - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr or False, + shape=param_shape, + dtype=self._dtype, + is_bias=True) def forward(self, input): mean_out = self._helper.create_variable_for_type_inference( @@ -3042,16 +3049,17 @@ class GroupNorm(layers.Layer): group_norm_out = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type="group_norm", - inputs=inputs, - outputs={ - "Y": group_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={"epsilon": self._epsilon, - "groups": self._groups}) + self._helper.append_op(type="group_norm", + inputs=inputs, + outputs={ + "Y": group_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": self._epsilon, + "groups": self._groups + }) return self._helper.append_activation(group_norm_out, self._act) @@ -3137,18 +3145,18 @@ class SpectralNorm(layers.Layer): h = self._weight_shape[self._dim] w = np.prod(self._weight_shape) // h - self.weight_u = self.create_parameter( - attr=ParamAttr(), - shape=[h], - dtype=self._dtype, - default_initializer=Normal(0., 1.)) + self.weight_u = self.create_parameter(attr=ParamAttr(), + shape=[h], + dtype=self._dtype, + default_initializer=Normal( + 0., 1.)) self.weight_u.stop_gradient = True - self.weight_v = self.create_parameter( - attr=ParamAttr(), - shape=[w], - dtype=self._dtype, - default_initializer=Normal(0., 1.)) + self.weight_v = self.create_parameter(attr=ParamAttr(), + shape=[w], + dtype=self._dtype, + default_initializer=Normal( + 0., 1.)) self.weight_v.stop_gradient = True def forward(self, weight): @@ -3156,15 +3164,16 @@ class SpectralNorm(layers.Layer): 'SpectralNorm') inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} out = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="spectral_norm", - inputs=inputs, - outputs={"Out": out, }, - attrs={ - "dim": self._dim, - "power_iters": self._power_iters, - "eps": self._eps, - }) + self._helper.append_op(type="spectral_norm", + inputs=inputs, + outputs={ + "Out": out, + }, + attrs={ + "dim": self._dim, + "power_iters": self._power_iters, + "eps": self._eps, + }) return out @@ -3236,44 +3245,45 @@ class TreeConv(layers.Layer): self._dtype = dtype w_shape = [self._feature_size, 3, self._output_size, self._num_filters] if self._bias_attr: - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) - self.weight = self.create_parameter( - attr=self._param_attr, - shape=w_shape, - dtype=self._dtype, - is_bias=False) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + self.weight = self.create_parameter(attr=self._param_attr, + shape=w_shape, + dtype=self._dtype, + is_bias=False) def forward(self, nodes_vector, edge_set): check_type(nodes_vector, 'nodes_vector', (Variable), 'TreeConv') check_type(edge_set, 'edge_set', (Variable), 'TreeConv') if self._name: - out = self.create_variable( - name=self._name, dtype=self._dtype, persistable=False) + out = self.create_variable(name=self._name, + dtype=self._dtype, + persistable=False) else: out = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='tree_conv', - inputs={ - 'NodesVector': nodes_vector, - 'EdgeSet': edge_set, - 'Filter': self.weight - }, - outputs={'Out': out, }, - attrs={'max_depth': self._max_depth}) + self._helper.append_op(type='tree_conv', + inputs={ + 'NodesVector': nodes_vector, + 'EdgeSet': edge_set, + 'Filter': self.weight + }, + outputs={ + 'Out': out, + }, + attrs={'max_depth': self._max_depth}) if self._bias_attr: pre_activation = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [out], - 'Y': [self.bias]}, - outputs={'Out': [pre_activation]}, - attrs={'axis': 1}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [out], + 'Y': [self.bias] + }, + outputs={'Out': [pre_activation]}, + attrs={'axis': 1}) else: pre_activation = out return self._helper.append_activation(pre_activation, act=self._act) @@ -3312,6 +3322,7 @@ class Flatten(layers.Layer): self.stop_axis = stop_axis def forward(self, input): - out = paddle.tensor.manipulation.flatten( - input, start_axis=self.start_axis, stop_axis=self.stop_axis) + out = paddle.tensor.manipulation.flatten(input, + start_axis=self.start_axis, + stop_axis=self.stop_axis) return out diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index fe1b56931f8..09036ed942d 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -287,8 +287,7 @@ def _coalesce_tensors(var_groups): for g_var in grad_vars: g_var_shapes.append(g_var.shape) flattened_vars.append( - nn.reshape( - x=g_var, shape=[np.prod(g_var.shape)])) + nn.reshape(x=g_var, shape=[np.prod(g_var.shape)])) coalesced_grad = nn.concat(flattened_vars) coalesced_grads_and_grad_vars.append( [coalesced_grad, grad_vars, g_var_shapes]) @@ -298,12 +297,13 @@ def _coalesce_tensors(var_groups): @framework.dygraph_only def _reshape_inplace(x, shape): x_shape = framework._varbase_creator(dtype=x.dtype) - framework._dygraph_tracer().trace_op( - type="reshape2", - inputs={'X': x}, - outputs={'Out': x, - 'XShape': x_shape}, - attrs={'shape': shape}) + framework._dygraph_tracer().trace_op(type="reshape2", + inputs={'X': x}, + outputs={ + 'Out': x, + 'XShape': x_shape + }, + attrs={'shape': shape}) @framework.dygraph_only @@ -315,8 +315,10 @@ def _split_tensors(coalesced_grads_and_grad_vars): type='split', inputs={'X': coalesced_grad}, outputs={'Out': origin_grad_vars}, - attrs={'sections': grad_var_len, - 'axis': 0}) + attrs={ + 'sections': grad_var_len, + 'axis': 0 + }) for g_var, g_shape in zip(origin_grad_vars, grad_shapes): _reshape_inplace(x=g_var, shape=g_shape) assert g_var.shape == g_shape @@ -382,7 +384,7 @@ def sync_params_buffers(model, if is_model_parallel and param.is_distributed: continue - # NOTE(shenliang03): Support situations that do not require synchronization parameters, + # NOTE(shenliang03): Support situations that do not require synchronization parameters, # such as moe's expert parameters if getattr(param, "no_sync", False): continue @@ -397,8 +399,10 @@ def sync_params_buffers(model, coalesced_vars = build_groups(model_vars, 128 * 1024 * 1024) for coalesced_var, _, _ in coalesced_vars: - paddle.distributed.broadcast( - coalesced_var, src=src_rank, group=comm_group, use_calc_stream=True) + paddle.distributed.broadcast(coalesced_var, + src=src_rank, + group=comm_group, + use_calc_stream=True) for coalesced_var, origin_vars, var_shapes in coalesced_vars: var_len = [np.prod(v_shape) for v_shape in var_shapes] @@ -406,8 +410,10 @@ def sync_params_buffers(model, type='split', inputs={'X': coalesced_var}, outputs={'Out': origin_vars}, - attrs={'sections': var_len, - 'axis': 0}) + attrs={ + 'sections': var_len, + 'axis': 0 + }) class DataParallel(layers.Layer): @@ -591,8 +597,8 @@ class DataParallel(layers.Layer): self.var_dtype = core.eager.Tensor if in_dygraph_mode( ) else core.VarBase - # NOTE(chenweihang): The ParallelStrategy here is not strictly a strategy. - # It just stores some environment variables, which can be constructed by + # NOTE(chenweihang): The ParallelStrategy here is not strictly a strategy. + # It just stores some environment variables, which can be constructed by # ParallelEnv. Here it is set as an optional argument. # This parameter is not removed because of compatibility with 1.x writing. if strategy is not None: @@ -614,15 +620,15 @@ class DataParallel(layers.Layer): "ProcessGroup must be an instance of Group in DataParallel." # sync buffer and params - # TODO(liuyuhui) Currently not support xpu. xpu is + # TODO(liuyuhui) Currently not support xpu. xpu is # still broadcasting parameters when calling layer if not paddle.is_compiled_with_xpu(): sync_params_buffers(self._layers) self.comm_buffer_size = int(comm_buffer_size * 1024 * 1024) - # NOTE(shenliang03): We can set environment variables to control - # the size of the group, Default: 1MB. The role of this small group is: - # when the last group allreduce, the overlap cannot work. Making the + # NOTE(shenliang03): We can set environment variables to control + # the size of the group, Default: 1MB. The role of this small group is: + # when the last group allreduce, the overlap cannot work. Making the # the last group small is useful to improve performance. self.last_comm_buffer_size = int(last_comm_buffer_size * 1024 * 1024) @@ -660,7 +666,7 @@ class DataParallel(layers.Layer): def check_layer_sparse(sublayer): if isinstance(sublayer, paddle.nn.layer.common.Embedding): return sublayer._sparse - # NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding + # NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding # is removed in the future, the check will also be removed here. if isinstance(sublayer, paddle.fluid.dygraph.Embedding): return sublayer._is_sparse @@ -676,9 +682,8 @@ class DataParallel(layers.Layer): [self.last_comm_buffer_size, self.comm_buffer_size]) self._reducer = core.EagerReducer( - trainable_parameters, - list(reversed(self.group_indices)), is_sparse_gradient, - self.group.process_group, + trainable_parameters, list(reversed(self.group_indices)), + is_sparse_gradient, self.group.process_group, [self.last_comm_buffer_size, self.comm_buffer_size], self.find_unused_parameters) elif _in_legacy_dygraph(): @@ -687,9 +692,8 @@ class DataParallel(layers.Layer): [self.last_comm_buffer_size, self.comm_buffer_size]) self._reducer = core.Reducer( - trainable_parameters, - list(reversed(self.group_indices)), is_sparse_gradient, - parallel_helper.__parallel_ctx__clz__, + trainable_parameters, list(reversed(self.group_indices)), + is_sparse_gradient, parallel_helper.__parallel_ctx__clz__, [self.last_comm_buffer_size, self.comm_buffer_size], self.find_unused_parameters) @@ -752,12 +756,12 @@ class DataParallel(layers.Layer): outputs = self._layers(*inputs, **kwargs) if self._strategy.nranks > 1 and framework._dygraph_tracer( )._has_grad and self.grad_need_sync: - self._reducer.prepare_for_backward( - list(self._find_varbase(outputs))) + self._reducer.prepare_for_backward(list( + self._find_varbase(outputs))) return outputs - @deprecated( - since="2.0.0", reason="This method does not need to be called anymore.") + @deprecated(since="2.0.0", + reason="This method does not need to be called anymore.") def scale_loss(self, loss): """ Deprecated method, now ``scale_loss`` is an empty method, @@ -765,8 +769,8 @@ class DataParallel(layers.Layer): """ return loss - @deprecated( - since="2.0.0", reason="This method does not need to be called anymore.") + @deprecated(since="2.0.0", + reason="This method does not need to be called anymore.") def apply_collective_grads(self): """ Deprecated method, now ``apply_collective_grads`` is an empty method, @@ -840,8 +844,8 @@ class DataParallel(layers.Layer): ''' - self._layers.set_state_dict( - state_dict, use_structured_name=use_structured_name) + self._layers.set_state_dict(state_dict, + use_structured_name=use_structured_name) # [aliases] Compatible with old method names set_dict = set_state_dict diff --git a/python/paddle/fluid/dygraph/parallel_helper.py b/python/paddle/fluid/dygraph/parallel_helper.py index 5fe4d4162e6..bc0bb460352 100644 --- a/python/paddle/fluid/dygraph/parallel_helper.py +++ b/python/paddle/fluid/dygraph/parallel_helper.py @@ -14,6 +14,7 @@ import os from ..layers import collective from ..framework import Parameter + __parallel_ctx__clz__ = None diff --git a/python/paddle/fluid/dygraph/rnn.py b/python/paddle/fluid/dygraph/rnn.py index 05a76a8d125..837287faa0f 100644 --- a/python/paddle/fluid/dygraph/rnn.py +++ b/python/paddle/fluid/dygraph/rnn.py @@ -162,21 +162,20 @@ class LSTMCell(Layer): shape=[4 * self._hidden_size, self._hidden_size], dtype=self._dtype) - self._bias_ih = self.create_parameter( - attr=bias_ih_param_attr, - shape=[4 * self._hidden_size], - dtype=self._dtype, - is_bias=True) - self._bias_hh = self.create_parameter( - attr=bias_hh_param_attr, - shape=[4 * self._hidden_size], - dtype=self._dtype, - is_bias=True) + self._bias_ih = self.create_parameter(attr=bias_ih_param_attr, + shape=[4 * self._hidden_size], + dtype=self._dtype, + is_bias=True) + self._bias_hh = self.create_parameter(attr=bias_hh_param_attr, + shape=[4 * self._hidden_size], + dtype=self._dtype, + is_bias=True) else: - self._forget_bias = fill_constant( - [1], dtype=dtype, value=forget_bias) + self._forget_bias = fill_constant([1], + dtype=dtype, + value=forget_bias) self._forget_bias.stop_gradient = False self._weight = self.create_parameter( @@ -186,11 +185,10 @@ class LSTMCell(Layer): ], dtype=dtype) - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hidden_size], - dtype=dtype, - is_bias=True) + self._bias = self.create_parameter(attr=self._bias_attr, + shape=[4 * self._hidden_size], + dtype=dtype, + is_bias=True) def forward(self, input, pre_hidden, pre_cell): @@ -226,9 +224,10 @@ class LSTMCell(Layer): gate_input = elementwise_add(gate_input, self._bias) i, j, f, o = split(gate_input, num_or_sections=4, dim=-1) new_cell = elementwise_add( - elementwise_mul(pre_cell, - self._gate_activation( - elementwise_add(f, self._forget_bias))), + elementwise_mul( + pre_cell, + self._gate_activation(elementwise_add(f, + self._forget_bias))), elementwise_mul(sigmoid(i), tanh(j))) new_hidden = self._activation(new_cell) * self._gate_activation(o) @@ -363,16 +362,14 @@ class GRUCell(Layer): shape=[3 * self._hidden_size, self._hidden_size], dtype=self._dtype) - self._bias_ih = self.create_parameter( - attr=bias_ih_param_attr, - shape=[3 * self._hidden_size], - dtype=self._dtype, - is_bias=True) - self._bias_hh = self.create_parameter( - attr=bias_hh_param_attr, - shape=[3 * self._hidden_size], - dtype=self._dtype, - is_bias=True) + self._bias_ih = self.create_parameter(attr=bias_ih_param_attr, + shape=[3 * self._hidden_size], + dtype=self._dtype, + is_bias=True) + self._bias_hh = self.create_parameter(attr=bias_hh_param_attr, + shape=[3 * self._hidden_size], + dtype=self._dtype, + is_bias=True) else: @@ -403,9 +400,7 @@ class GRUCell(Layer): self._candidate_weight = self.create_parameter( attr=candidate_weight_param_attr, - shape=[ - self._input_size + self._hidden_size, self._hidden_size - ], + shape=[self._input_size + self._hidden_size, self._hidden_size], dtype=dtype) self._gate_bias = self.create_parameter( @@ -455,8 +450,8 @@ class GRUCell(Layer): r_hidden = r * pre_hidden - candidate = matmul( - concat([input, r_hidden], 1), self._candidate_weight) + candidate = matmul(concat([input, r_hidden], 1), + self._candidate_weight) candidate = elementwise_add(candidate, self._candidate_bias) c = self._activation(candidate) diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py index 44a49148ca0..046a98293e8 100644 --- a/python/paddle/fluid/dygraph/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -189,15 +189,15 @@ class Tracer(core.Tracer): # Replaced outputs by function returns if isinstance(returns[i], list): for j in range(len(returns[i])): - outputs[retname][j].reconstruct_from_(returns[i][j], - False) + outputs[retname][j].reconstruct_from_( + returns[i][j], False) else: if isinstance(outputs[retname], list): - outputs[retname][0].reconstruct_from_(returns[i], - False) + outputs[retname][0].reconstruct_from_( + returns[i], False) else: - outputs[retname].reconstruct_from_(returns[i], - False) + outputs[retname].reconstruct_from_( + returns[i], False) elif isinstance(returns, list): assert len(outputs.keys()) == 1 key = list(outputs.keys())[0] @@ -277,8 +277,8 @@ class Tracer(core.Tracer): # Replaced outputs by function returns if isinstance(returns[i], list): for j in range(len(returns[i])): - outputs[retname][j].reconstruct_from_(returns[i][j], - False) + outputs[retname][j].reconstruct_from_( + returns[i][j], False) else: outputs[retname][0].reconstruct_from_(returns[i], False) elif isinstance(returns, list): @@ -316,8 +316,9 @@ class Tracer(core.Tracer): inplace_map) else: self.trace(type, inputs, outputs, attrs, - framework._current_expected_place(), self._has_grad and - not stop_gradient, inplace_map if inplace_map else {}) + framework._current_expected_place(), self._has_grad + and not stop_gradient, + inplace_map if inplace_map else {}) def train_mode(self): self._train_mode = True diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index d6b50249df0..2422c68622a 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -70,6 +70,7 @@ _already_patch_repr = False def monkey_patch_varbase(): + @switch_to_static_graph def _to_static_var(self, to_parameter=False, **kwargs): """ @@ -258,12 +259,12 @@ def monkey_patch_varbase(): if grad_tensor is not None: if framework._in_eager_mode_: assert isinstance( - grad_tensor, core.eager. - Tensor), "The type of grad_tensor must be paddle.Tensor" + grad_tensor, core.eager.Tensor + ), "The type of grad_tensor must be paddle.Tensor" else: assert isinstance( - grad_tensor, paddle. - Tensor), "The type of grad_tensor must be paddle.Tensor" + grad_tensor, paddle.Tensor + ), "The type of grad_tensor must be paddle.Tensor" assert grad_tensor.shape == self.shape, \ "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) @@ -304,7 +305,8 @@ def monkey_patch_varbase(): @deprecated( since="2.1.0", level=1, - reason="Please use tensor.grad, which returns the tensor value of the gradient." + reason= + "Please use tensor.grad, which returns the tensor value of the gradient." ) def gradient(self): """ @@ -341,9 +343,9 @@ def monkey_patch_varbase(): new_ivar = self._grad_ivar()._copy_to(core.CPUPlace(), True) if self._grad_ivar().type == core.VarDesc.VarType.SELECTED_ROWS: - return ( - np.array(new_ivar.value().get_selected_rows().get_tensor()), - np.array(new_ivar.value().get_selected_rows().rows())) + return (np.array( + new_ivar.value().get_selected_rows().get_tensor()), + np.array(new_ivar.value().get_selected_rows().rows())) else: return np.array(new_ivar.value().get_tensor()) @@ -715,7 +717,9 @@ def monkey_patch_varbase(): return False def __getitem__(self, item): + def is_list_tuple(index, contain_type): + def _is_list_tuple(item): if isinstance(item, (tuple, list)): for s in item: @@ -743,6 +747,7 @@ def monkey_patch_varbase(): return self._getitem_index_not_tensor(item) def __setitem__(self, item, value): + def contain_tensor_or_list(item): if not isinstance(item, tuple): item = [item] @@ -984,17 +989,25 @@ def monkey_patch_varbase(): if framework._in_eager_mode_ and not hasattr(core, "eager"): return - for method_name, method in ( - ("__bool__", __bool__), ("__nonzero__", __nonzero__), - ("_to_static_var", _to_static_var), ("set_value", set_value), - ("block", block), ("backward", backward), ("clear_grad", clear_grad), - ("inplace_version", inplace_version), ("gradient", gradient), - ("register_hook", register_hook), ("__str__", __str__), - ("__repr__", __str__), ("__deepcopy__", __deepcopy__), - ("__module__", "paddle"), ("__array__", __array__), - ("__getitem__", __getitem__), ("item", item), - ("__setitem__", __setitem__), ("_to", _to), ("values", values), - ("to_dense", to_dense), ("to_sparse_coo", to_sparse_coo)): + for method_name, method in (("__bool__", __bool__), ("__nonzero__", + __nonzero__), + ("_to_static_var", + _to_static_var), ("set_value", set_value), + ("block", block), ("backward", backward), + ("clear_grad", clear_grad), ("inplace_version", + inplace_version), + ("gradient", gradient), ("register_hook", + register_hook), + ("__str__", __str__), ("__repr__", __str__), + ("__deepcopy__", __deepcopy__), ("__module__", + "paddle"), + ("__array__", + __array__), ("__getitem__", + __getitem__), ("item", item), + ("__setitem__", + __setitem__), ("_to", _to), ("values", values), + ("to_dense", to_dense), ("to_sparse_coo", + to_sparse_coo)): if framework._in_eager_mode_: setattr(core.eager.Tensor, method_name, method) else: diff --git a/python/paddle/fluid/entry_attr.py b/python/paddle/fluid/entry_attr.py index 0fbbf7c36e8..37507957042 100644 --- a/python/paddle/fluid/entry_attr.py +++ b/python/paddle/fluid/entry_attr.py @@ -39,6 +39,7 @@ class EntryAttr(object): class ProbabilityEntry(EntryAttr): + def __init__(self, probability): super(ProbabilityEntry, self).__init__() @@ -56,6 +57,7 @@ class ProbabilityEntry(EntryAttr): class CountFilterEntry(EntryAttr): + def __init__(self, count_filter): super(CountFilterEntry, self).__init__() diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py index 5c8386af3a7..510733d4c1c 100644 --- a/python/paddle/fluid/evaluator.py +++ b/python/paddle/fluid/evaluator.py @@ -33,13 +33,12 @@ __all__ = [ def _clone_var_(block, var): assert isinstance(var, Variable) - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - persistable=True) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + persistable=True) class Evaluator(object): @@ -89,8 +88,10 @@ class Evaluator(object): for var in self.states: assert isinstance(var, Variable) g_var = _clone_var_(reset_program.current_block(), var) - layers.fill_constant( - shape=g_var.shape, value=0.0, dtype=g_var.dtype, out=g_var) + layers.fill_constant(shape=g_var.shape, + value=0.0, + dtype=g_var.dtype, + out=g_var) executor.run(reset_program) @@ -115,11 +116,11 @@ class Evaluator(object): Returns: State variable """ - state = self.helper.create_variable( - name="_".join([unique_name.generate(self.helper.name), suffix]), - persistable=True, - dtype=dtype, - shape=shape) + state = self.helper.create_variable(name="_".join( + [unique_name.generate(self.helper.name), suffix]), + persistable=True, + dtype=dtype, + shape=shape) self.states.append(state) return state @@ -158,21 +159,24 @@ class ChunkEvaluator(Evaluator): """ def __init__( - self, - input, - label, - chunk_scheme, - num_chunk_types, - excluded_chunk_types=None, ): + self, + input, + label, + chunk_scheme, + num_chunk_types, + excluded_chunk_types=None, + ): super(ChunkEvaluator, self).__init__("chunk_eval") main_program = self.helper.main_program if main_program.current_block().idx != 0: raise ValueError("You can only invoke Evaluator in root block") - self.num_infer_chunks = self._create_state( - dtype='int64', shape=[1], suffix='num_infer_chunks') - self.num_label_chunks = self._create_state( - dtype='int64', shape=[1], suffix='num_label_chunks') + self.num_infer_chunks = self._create_state(dtype='int64', + shape=[1], + suffix='num_infer_chunks') + self.num_label_chunks = self._create_state(dtype='int64', + shape=[1], + suffix='num_label_chunks') self.num_correct_chunks = self._create_state( dtype='int64', shape=[1], suffix='num_correct_chunks') precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval( @@ -180,16 +184,14 @@ class ChunkEvaluator(Evaluator): label=label, chunk_scheme=chunk_scheme, num_chunk_types=num_chunk_types, - excluded_chunk_types=excluded_chunk_types, ) - layers.sums( - input=[self.num_infer_chunks, num_infer_chunks], - out=self.num_infer_chunks) - layers.sums( - input=[self.num_label_chunks, num_label_chunks], - out=self.num_label_chunks) - layers.sums( - input=[self.num_correct_chunks, num_correct_chunks], - out=self.num_correct_chunks) + excluded_chunk_types=excluded_chunk_types, + ) + layers.sums(input=[self.num_infer_chunks, num_infer_chunks], + out=self.num_infer_chunks) + layers.sums(input=[self.num_label_chunks, num_label_chunks], + out=self.num_label_chunks) + layers.sums(input=[self.num_correct_chunks, num_correct_chunks], + out=self.num_correct_chunks) self.metrics.extend([precision, recall, f1_score]) @@ -209,10 +211,8 @@ class ChunkEvaluator(Evaluator): num_correct_chunks) / num_label_chunks if num_label_chunks else 0 f1_score = float(2 * precision * recall) / ( precision + recall) if num_correct_chunks else 0 - return np.array( - [precision], dtype='float32'), np.array( - [recall], dtype='float32'), np.array( - [f1_score], dtype='float32') + return np.array([precision], dtype='float32'), np.array( + [recall], dtype='float32'), np.array([f1_score], dtype='float32') class EditDistance(Evaluator): @@ -252,29 +252,31 @@ class EditDistance(Evaluator): if main_program.current_block().idx != 0: raise ValueError("You can only invoke Evaluator in root block") - self.total_distance = self._create_state( - dtype='float32', shape=[1], suffix='total_distance') - self.seq_num = self._create_state( - dtype='int64', shape=[1], suffix='seq_num') - self.instance_error = self._create_state( - dtype='int64', shape=[1], suffix='instance_error') - distances, seq_num = layers.edit_distance( - input=input, label=label, ignored_tokens=ignored_tokens) + self.total_distance = self._create_state(dtype='float32', + shape=[1], + suffix='total_distance') + self.seq_num = self._create_state(dtype='int64', + shape=[1], + suffix='seq_num') + self.instance_error = self._create_state(dtype='int64', + shape=[1], + suffix='instance_error') + distances, seq_num = layers.edit_distance(input=input, + label=label, + ignored_tokens=ignored_tokens) zero = layers.fill_constant(shape=[1], value=0.0, dtype='float32') compare_result = layers.equal(distances, zero) compare_result_int = layers.cast(x=compare_result, dtype='int64') seq_right_count = layers.reduce_sum(compare_result_int) - instance_error_count = layers.elementwise_sub( - x=seq_num, y=seq_right_count) + instance_error_count = layers.elementwise_sub(x=seq_num, + y=seq_right_count) total_distance = layers.reduce_sum(distances) - layers.sums( - input=[self.total_distance, total_distance], - out=self.total_distance) + layers.sums(input=[self.total_distance, total_distance], + out=self.total_distance) layers.sums(input=[self.seq_num, seq_num], out=self.seq_num) - layers.sums( - input=[self.instance_error, instance_error_count], - out=self.instance_error) + layers.sums(input=[self.instance_error, instance_error_count], + out=self.instance_error) self.metrics.append(total_distance) self.metrics.append(instance_error_count) @@ -289,10 +291,10 @@ class EditDistance(Evaluator): seq_num = layers.cast(x=seq_num, dtype='float32') instance_error = layers.cast(x=instance_error, dtype='float32') avg_distance = layers.elementwise_div(x=total_distance, y=seq_num) - avg_instance_error = layers.elementwise_div( - x=instance_error, y=seq_num) - result = executor.run( - eval_program, fetch_list=[avg_distance, avg_instance_error]) + avg_instance_error = layers.elementwise_div(x=instance_error, + y=seq_num) + result = executor.run(eval_program, + fetch_list=[avg_distance, avg_instance_error]) return np.array(result[0]), np.array(result[1]) @@ -375,25 +377,26 @@ class DetectionMAP(Evaluator): label = layers.concat([gt_label, gt_box], axis=1) # calculate mean average precision (mAP) of current mini-batch - map = detection.detection_map( - input, - label, - class_num, - background_label, - overlap_threshold=overlap_threshold, - evaluate_difficult=evaluate_difficult, - ap_version=ap_version) + map = detection.detection_map(input, + label, + class_num, + background_label, + overlap_threshold=overlap_threshold, + evaluate_difficult=evaluate_difficult, + ap_version=ap_version) self._create_state(dtype='int32', shape=None, suffix='accum_pos_count') self._create_state(dtype='float32', shape=None, suffix='accum_true_pos') - self._create_state( - dtype='float32', shape=None, suffix='accum_false_pos') + self._create_state(dtype='float32', + shape=None, + suffix='accum_false_pos') self.has_state = None - var = self.helper.create_variable( - persistable=True, dtype='int32', shape=[1]) - self.helper.set_variable_initializer( - var, initializer=Constant(value=int(0))) + var = self.helper.create_variable(persistable=True, + dtype='int32', + shape=[1]) + self.helper.set_variable_initializer(var, + initializer=Constant(value=int(0))) self.has_state = var # calculate accumulative mAP @@ -409,11 +412,10 @@ class DetectionMAP(Evaluator): out_states=self.states, ap_version=ap_version) - layers.fill_constant( - shape=self.has_state.shape, - value=1, - dtype=self.has_state.dtype, - out=self.has_state) + layers.fill_constant(shape=self.has_state.shape, + value=1, + dtype=self.has_state.dtype, + out=self.has_state) self.cur_map = map self.accum_map = accum_map @@ -426,6 +428,8 @@ class DetectionMAP(Evaluator): reset_program = Program() with program_guard(main_program=reset_program): var = _clone_var_(reset_program.current_block(), self.has_state) - layers.fill_constant( - shape=var.shape, value=0, dtype=var.dtype, out=var) + layers.fill_constant(shape=var.shape, + value=0, + dtype=var.dtype, + out=var) executor.run(reset_program) diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 862b18dc5a2..0d4acf5fe6d 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -278,8 +278,9 @@ def has_feed_operators(block, feed_targets, feed_holder_name): assert op.desc.input('X')[0] == feed_holder_name feed_target_name = op.desc.output('Out')[0] if feed_target_name not in feed_targets: - raise Exception("'feed_targets' does not have {} variable". - format(feed_target_name)) + raise Exception( + "'feed_targets' does not have {} variable".format( + feed_target_name)) else: break if feed_count > 0 and feed_count != len(feed_targets): @@ -322,8 +323,9 @@ def has_fetch_operators(block, if fetch_target_name not in [ var.desc.name() for var in fetch_targets ]: - raise Exception("'fetch_targets' does not have {} variable". - format(fetch_target_name)) + raise Exception( + "'fetch_targets' does not have {} variable".format( + fetch_target_name)) idx = op.desc.attr('col') assert fetch_target_name == fetch_targets[idx].desc.name() if fetch_count > 0 and fetch_count != len(fetch_targets): @@ -366,6 +368,7 @@ def _fetch_var(name, scope=None, return_numpy=True): def _to_name_str(var): + def _to_str(var): if isinstance(var, Variable): return var.desc.name() @@ -426,7 +429,7 @@ def _prepare_fleet_executor(): def _get_strong_program_cache_key(program, feed, fetch_list): - # NOTE(xiongkun) id(proram) may be duplicate. So add addition var_name as cache key. + # NOTE(xiongkun) id(proram) may be duplicate. So add addition var_name as cache key. def _get_varname_from_block(block): block_str = [] for var_name in list(block.vars.keys()): @@ -435,8 +438,8 @@ def _get_strong_program_cache_key(program, feed, fetch_list): inner_program = program._program if isinstance( program, compiler.CompiledProgram) else program - return _get_varname_from_block(inner_program.blocks[0]) + str(id( - program)) + _get_program_cache_key(feed, fetch_list) + return _get_varname_from_block(inner_program.blocks[0]) + str( + id(program)) + _get_program_cache_key(feed, fetch_list) def _get_program_cache_key(feed, fetch_list): @@ -499,6 +502,7 @@ def _as_lodtensor(data, place, dtype=None): class FetchHandler(object): + def __init__(self, var_dict=None, period_secs=60): assert var_dict != None self.var_dict = var_dict @@ -525,6 +529,7 @@ handler = FetchHandlerExample(var_dict=var_dict) class _StandaloneExecutor(object): + def __init__(self, place, main_program, scope): self._place = core.Place() self._place.set_place(place) @@ -610,6 +615,7 @@ class _StandaloneExecutor(object): class _ExecutorCache(object): + def __init__(self, place): # {Program : _StandaloneExecutor} self._place = place @@ -789,11 +795,10 @@ class Executor(object): for i, name in enumerate(feed): if global_block.has_var(name): out = global_block.var(name) - global_block._prepend_op( - type='feed', - inputs={'X': [feed_var]}, - outputs={'Out': [out]}, - attrs={'col': i}) + global_block._prepend_op(type='feed', + inputs={'X': [feed_var]}, + outputs={'Out': [out]}, + attrs={'col': i}) else: warnings.warn( "The variable %s is not found in program. It is not declared or is pruned." @@ -809,13 +814,13 @@ class Executor(object): fetch_op): for i, var in enumerate(fetch_list): assert isinstance(var, Variable) or isinstance( - var, six.string_types), ( - "Wrong type for fetch_list[%s]: %s" % (i, type(var))) - global_block.append_op( - type=fetch_op, - inputs={'X': [var]}, - outputs={'Out': [fetch_var]}, - attrs={'col': i}) + var, + six.string_types), ("Wrong type for fetch_list[%s]: %s" % + (i, type(var))) + global_block.append_op(type=fetch_op, + inputs={'X': [var]}, + outputs={'Out': [fetch_var]}, + attrs={'col': i}) return tmp_program @@ -886,8 +891,9 @@ class Executor(object): elif isinstance(item, tuple): if not isinstance(item[0], (list, tuple)): raise TypeError( - "Requires fetch_list[{}][0] shall be one of (list, tuple) when type(fetch_list[{}]) is `tuple`, but received fetch_list[{}][0]'s type is `{}`.". - format(index, index, index, type(item[0]).__name__)) + "Requires fetch_list[{}][0] shall be one of (list, tuple) when type(fetch_list[{}]) is `tuple`, but received fetch_list[{}][0]'s type is `{}`." + .format(index, index, index, + type(item[0]).__name__)) for i in item[0]: _get_targets(_optimize_ops, _fetch_list, i) else: @@ -1051,8 +1057,8 @@ class Executor(object): # always set to CPU place, since the tensor need to be split # it is fast in CPU feed_tensor = _as_lodtensor(feed[feed_name], - core.CPUPlace(), var.dtype - if var else None) + core.CPUPlace(), + var.dtype if var else None) if need_check_feed: check_feed_shape_type(var, feed_tensor, exe.device_count()) feed_tensor_dict[feed_name] = feed_tensor @@ -1071,8 +1077,8 @@ class Executor(object): feed_name) if need_check_feed else None if not isinstance(tensor, core.LoDTensor): tensor = _as_lodtensor(each[feed_name], - program._places[i], var.dtype - if var else None) + program._places[i], + var.dtype if var else None) if need_check_feed: check_feed_shape_type(var, tensor) res_dict[feed_name] = tensor @@ -1092,9 +1098,8 @@ class Executor(object): "take any effect! Please set the learning rate manually before each batch!" ) else: - exe.feed_and_split_tensor_into_local_scopes({ - lr_sheduler._var_name: lr_tensor - }) + exe.feed_and_split_tensor_into_local_scopes( + {lr_sheduler._var_name: lr_tensor}) fetch_var_names = list(map(_to_name_str, fetch_list)) tensors = exe.run(fetch_var_names, return_merged)._move_to_list() @@ -1282,17 +1287,16 @@ class Executor(object): """ try: - res = self._run_impl( - program=program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name, - scope=scope, - return_numpy=return_numpy, - use_program_cache=use_program_cache, - use_prune=use_prune, - return_merged=return_merged) + res = self._run_impl(program=program, + feed=feed, + fetch_list=fetch_list, + feed_var_name=feed_var_name, + fetch_var_name=fetch_var_name, + scope=scope, + return_numpy=return_numpy, + use_program_cache=use_program_cache, + use_prune=use_prune, + return_merged=return_merged) core.update_autotune_status() return res except Exception as e: @@ -1315,20 +1319,20 @@ class Executor(object): # Move prepare here for port conflict with nccl in startup program if self._fleet_executor is None: self._fleet_executor = _prepare_fleet_executor() - return self._run_using_fleet_executor( - program=program, feed=feed, fetch_list=fetch_list) + return self._run_using_fleet_executor(program=program, + feed=feed, + fetch_list=fetch_list) if "startup_program" in program._pipeline_opt: program = program._pipeline_opt["startup_program"] else: - return self._run_pipeline( - program, - fetch_list=fetch_list, - use_program_cache=use_program_cache) + return self._run_pipeline(program, + fetch_list=fetch_list, + use_program_cache=use_program_cache) if isinstance(program, Program) and program._heter_pipeline_opt: #print("program._heter_pipeline_opt: {}".format( # program._heter_pipeline_opt)) - ## change default executor + ## change default executor heter_place = program._heter_pipeline_opt["heter_place"] heter_place = framework._get_paddle_place(heter_place) p = core.Place() @@ -1457,11 +1461,11 @@ class Executor(object): lr_sheduler = program.lr_sheduler lr_value = lr_sheduler() lr_var = program.global_block().vars[lr_sheduler._var_name] - data = np.array( - [lr_value]).astype(convert_dtype(lr_var.dtype)) + data = np.array([lr_value + ]).astype(convert_dtype(lr_var.dtype)) tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) - # NOTE(dev): `set` always call TensorCopySync that is a + # NOTE(dev): `set` always call TensorCopySync that is a # blocking behavior. So we use `_copy_from` to replace it. cpu_tensor = _as_lodtensor(data, core.CPUPlace()) tensor._copy_from(cpu_tensor, self.place) @@ -1503,37 +1507,34 @@ class Executor(object): # _graph in program does not support inference since the _graph is optimized # through optimizer.minimize function and should not be used as inference graph # assert not program._graph._is_inference - return self._run_parallel( - program._graph, - scope=scope, - feed=feed, - fetch_list=fetch_list, - fetch_var_name=fetch_var_name, - return_numpy=return_numpy, - return_merged=return_merged) - - return self._run_program( - program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name, - scope=scope, - return_numpy=return_numpy, - use_program_cache=use_program_cache) + return self._run_parallel(program._graph, + scope=scope, + feed=feed, + fetch_list=fetch_list, + fetch_var_name=fetch_var_name, + return_numpy=return_numpy, + return_merged=return_merged) + + return self._run_program(program, + feed=feed, + fetch_list=fetch_list, + feed_var_name=feed_var_name, + fetch_var_name=fetch_var_name, + scope=scope, + return_numpy=return_numpy, + use_program_cache=use_program_cache) program._compile(scope, self.place) if program._is_inference: return self._run_inference(program._executor, feed) else: - return self._run_parallel( - program, - scope=scope, - feed=feed, - fetch_list=fetch_list, - fetch_var_name=fetch_var_name, - return_numpy=return_numpy, - return_merged=return_merged) + return self._run_parallel(program, + scope=scope, + feed=feed, + fetch_list=fetch_list, + fetch_var_name=fetch_var_name, + return_numpy=return_numpy, + return_merged=return_merged) def _run_program(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache): @@ -1590,12 +1591,11 @@ class Executor(object): ctx = cached_ctx scope = cached_scope else: - program = self._add_feed_fetch_ops( - program=program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name) + program = self._add_feed_fetch_ops(program=program, + feed=feed, + fetch_list=fetch_list, + feed_var_name=feed_var_name, + fetch_var_name=fetch_var_name) self._feed_data(program, feed, feed_var_name, scope) if hasattr(program, 'lr_sheduler'): @@ -1625,7 +1625,8 @@ class Executor(object): return exe.run(feed) def _check_fetch_list(self, fetch_list): - is_fetch_var = lambda var: isinstance(var, (Variable, str, six.string_types)) + is_fetch_var = lambda var: isinstance(var, + (Variable, str, six.string_types)) is_tuple_list = lambda var: isinstance(var, (tuple, list)) if fetch_list is None: return [] @@ -1648,8 +1649,9 @@ class Executor(object): res.append(var) else: raise TypeError( - "Require fetch_list[{}] 's type shall be one of (Variable, str), but received {}.". - format(i, type(var).__name__)) + "Require fetch_list[{}] 's type shall be one of (Variable, str), but received {}." + .format(i, + type(var).__name__)) return res @@ -1764,7 +1766,7 @@ class Executor(object): import paddle if dataset is not None: raise RuntimeError("dataset should be None for pipeline mode") - # The following fake dataset is created to call + # The following fake dataset is created to call # the _prepare_trainer api, and it is meaningless. data_vars = [] for var in program.global_block().vars.values(): @@ -1790,7 +1792,7 @@ class Executor(object): if dataset is not None: raise RuntimeError( "dataset should be None for heter pipeline mode") - # The following fake dataset is created to call + # The following fake dataset is created to call # the _prepare_trainer api, and it is meaningless. data_vars = [] for var in program.global_block().vars.values(): @@ -1842,15 +1844,14 @@ class Executor(object): 'op_role', core.op_proto_and_checker_maker.OpRole.Optimize) fetch_list = None - scope, trainer = self._prepare_trainer( - program=program, - dataset=dataset, - scope=scope, - thread=thread, - debug=debug, - fetch_list=fetch_list, - fetch_info=fetch_info, - print_period=print_period) + scope, trainer = self._prepare_trainer(program=program, + dataset=dataset, + scope=scope, + thread=thread, + debug=debug, + fetch_list=fetch_list, + fetch_info=fetch_info, + print_period=print_period) trainer._set_infer(is_infer) trainer._gen_trainer_desc() @@ -1958,12 +1959,11 @@ class Executor(object): if fetch_var_name in real_program.global_block().vars: real_fetch_list.append(fetch_var) - real_program = self._add_feed_fetch_ops( - program=real_program, - feed=[], - fetch_list=real_fetch_list, - feed_var_name='feed', - fetch_var_name='fetch') + real_program = self._add_feed_fetch_ops(program=real_program, + feed=[], + fetch_list=real_fetch_list, + feed_var_name='feed', + fetch_var_name='fetch') main_block = real_program.block(0) for op in main_block.ops: # set the op_role of fetch op to Optimize to avoid @@ -1979,15 +1979,14 @@ class Executor(object): program._pipeline_opt["section_program"] = real_program fetch_list = None - scope, trainer = self._prepare_trainer( - program=program, - dataset=dataset, - scope=scope, - thread=thread, - debug=debug, - fetch_list=fetch_list, - fetch_info=fetch_info, - print_period=print_period) + scope, trainer = self._prepare_trainer(program=program, + dataset=dataset, + scope=scope, + thread=thread, + debug=debug, + fetch_list=fetch_list, + fetch_info=fetch_info, + print_period=print_period) trainer._set_infer(is_infer) trainer._gen_trainer_desc() @@ -2038,8 +2037,7 @@ class Executor(object): fleet_opt["dist_strategy"]["pp_degree"] == 1: warnings.warn("Using 1F1B scheduler with pp_degree == 1.") tasks, task_id_to_rank = run1f1b( - program, cur_rank, - fleet_opt.get('num_micro_batches', 1), + program, cur_rank, fleet_opt.get('num_micro_batches', 1), fleet_opt.get('dist_strategy', {}), nrank) elif scheduler == 'Origin': from paddle.distributed.fleet.fleet_executor_utils import origin @@ -2062,8 +2060,7 @@ class Executor(object): # NOTE: the last argument is used to force create some vars in root scope, # won't be used during train. self._fleet_executor.init(carrier_id, program.desc, scope, place, - num_micro_batches, tasks, task_id_to_rank, - []) + num_micro_batches, tasks, task_id_to_rank, []) def _run_using_fleet_executor(self, program=None, @@ -2110,10 +2107,9 @@ class Executor(object): feed_task = fleet_opt['tasks'][0] print("Inserting feed ops for task", feed_task.task_id()) feed_program = feed_task.get_program() - feed_program = self._add_feed_ops( - program=feed_program, - feed=real_feed, - feed_var_name=feed_var_name) + feed_program = self._add_feed_ops(program=feed_program, + feed=real_feed, + feed_var_name=feed_var_name) feed_task.set_program(feed_program) # Insert fetch ops @@ -2134,11 +2130,10 @@ class Executor(object): core.op_proto_and_checker_maker.OpRole.Optimize) fetch_task.set_program(fetch_program) - self._prepare_fleet_executor_carrier( - cache_key, - program=cached_program, - scope=cached_scope, - fleet_opt=fleet_opt) + self._prepare_fleet_executor_carrier(cache_key, + program=cached_program, + scope=cached_scope, + fleet_opt=fleet_opt) if feed: # NOTE: don't have to traverse programs in task nodes, @@ -2183,11 +2178,10 @@ class Executor(object): for i, name in enumerate(feed): if global_block.has_var(name): out = global_block.var(name) - global_block._prepend_op( - type='feed', - inputs={'X': [feed_var]}, - outputs={'Out': [out]}, - attrs={'col': i}) + global_block._prepend_op(type='feed', + inputs={'X': [feed_var]}, + outputs={'Out': [out]}, + attrs={'col': i}) else: warnings.warn( "The variable %s is not found in program. It is not declared or is pruned." @@ -2222,13 +2216,13 @@ class Executor(object): fetch_op): for i, var in enumerate(fetch_list): assert isinstance(var, Variable) or isinstance( - var, six.string_types), ( - "Wrong type for fetch_list[%s]: %s" % (i, type(var))) - global_block.append_op( - type=fetch_op, - inputs={'X': [var]}, - outputs={'Out': [fetch_var]}, - attrs={'col': i}) + var, + six.string_types), ("Wrong type for fetch_list[%s]: %s" % + (i, type(var))) + global_block.append_op(type=fetch_op, + inputs={'X': [var]}, + outputs={'Out': [fetch_var]}, + attrs={'col': i}) return tmp_program @@ -2349,15 +2343,14 @@ class Executor(object): fetch_info=None, print_period=100, fetch_handler=None): - scope, trainer = self._prepare_trainer( - program=program, - dataset=None, - scope=scope, - thread=1, - debug=debug, - fetch_list=fetch_list, - fetch_info=fetch_info, - print_period=print_period) + scope, trainer = self._prepare_trainer(program=program, + dataset=None, + scope=scope, + thread=1, + debug=debug, + fetch_list=fetch_list, + fetch_info=fetch_info, + print_period=print_period) trainer._set_infer(False) trainer._gen_trainer_desc() diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index bd453b3ddaa..e0b4f8d19e8 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -84,22 +84,22 @@ _already_patch_varbase = False _global_flags_ = core.globals() # Some explanation of our execution system 2022.03 -# For now we have 3 kinds of execution system, since we refactored dygraph mode to +# For now we have 3 kinds of execution system, since we refactored dygraph mode to # build a fast execution system for dynamic mode. But we can't just remove all legacy -# code once we present the new system for some historical reason. That's why we have +# code once we present the new system for some historical reason. That's why we have # these flags. -# +# # 1. _non_static_mode(): -# _non_static_mode means we are now running in legacy dygraph mode or dygraph mode. +# _non_static_mode means we are now running in legacy dygraph mode or dygraph mode. # 2. dygraph_mode(): # This flags inidicates we are now running in dygraph mode which called eager mode before. # 3. _in_legacy_dygraph(): # This flags inidicates we are now running in legacy dygraph mode -# +# # They have a relation ship as below: -# Both dygraph_mode and _in_legacy_dygraph are _non_static_mode, but if you are running in +# Both dygraph_mode and _in_legacy_dygraph are _non_static_mode, but if you are running in # dygraph mode means you are not in _in_legacy_dygraph. -# +# # Why we have to make different of _in_legacy_dygraph and dygraph_mode? # In some performance issue, we find that python if statement cause server performance problem # and we need our new dygraph mode becomes as fast as it could be. That's why we make these flags @@ -396,13 +396,13 @@ def require_version(min_version, max_version=None): return min_version_split = min_version.split('.') - min_version_to_check = min_version_split + zero_version[len( - min_version_split):] + min_version_to_check = min_version_split + zero_version[ + len(min_version_split):] if max_version is not None: max_version_split = max_version.split('.') - max_version_to_check = max_version_split + zero_version[len( - max_version_split):] + max_version_to_check = max_version_split + zero_version[ + len(max_version_split):] if version_cmp(version_installed, max_version_to_check) > 0 or version_cmp( @@ -419,6 +419,7 @@ def require_version(min_version, max_version=None): def _dygraph_not_support_(func): + def __impl__(*args, **kwargs): assert not _non_static_mode( ), "We don't support %s in dynamic graph mode" % func.__name__ @@ -428,6 +429,7 @@ def _dygraph_not_support_(func): def _dygraph_only_(func): + def __impl__(*args, **kwargs): assert _non_static_mode( ), "We only support '%s()' in dynamic graph mode, please call 'paddle.disable_static()' to enter dynamic graph mode." % func.__name__ @@ -437,6 +439,7 @@ def _dygraph_only_(func): def _static_only_(func): + def __impl__(*args, **kwargs): assert not _non_static_mode( ), "In PaddlePaddle 2.x, we turn on dynamic graph mode by default, and '%s()' is only supported in static graph mode. So if you want to use this api, please call 'paddle.enable_static()' before this api to enter static graph mode." % func.__name__ @@ -458,6 +461,7 @@ def _set_pipeline_stage(stage): # TODO(zhiqiu): We should make VarBase consistent with Variable in future, for example, by inheritting # same base class. def _fake_interface_only_(func): + def __impl__(*args, **kwargs): raise AssertionError( "'%s' only can be called by `paddle.Tensor` in dynamic graph mode. Suggestions:\n" @@ -475,6 +479,7 @@ def _fake_interface_only_(func): # NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will # move kwargs to args, which doesn't work in this decorate case def deprecate_stat_dict(func): + @functools.wraps(func) def wrapper(*args, **kwargs): if 'stat_dict' in kwargs: @@ -967,6 +972,7 @@ def mlu_places(device_ids=None): class NameScope(object): + def __init__(self, name="", parent=None): self._children = dict() self._name = name @@ -1154,8 +1160,9 @@ def _debug_string_(proto, throw_on_error=True): """ error_fields = list() if not proto.IsInitialized(error_fields) and throw_on_error: - raise ValueError("{0} are not initialized.\nThe message is {1}:\n". - format(error_fields, proto)) + raise ValueError( + "{0} are not initialized.\nThe message is {1}:\n".format( + error_fields, proto)) return proto.__str__() @@ -1172,19 +1179,20 @@ def _varbase_creator(type=core.VarDesc.VarType.LOD_TENSOR, if _in_eager_mode_: eager_tensor = core.eager.Tensor( dtype if dtype else core.VarDesc.VarType.FP32, - list(shape) if shape else [], name, type - if type else core.VarDesc.VarType.LOD_TENSOR, True - if persistable else False) + list(shape) if shape else [], name, + type if type else core.VarDesc.VarType.LOD_TENSOR, + True if persistable else False) eager_tensor.retain_grads() return eager_tensor else: return core.VarBase(dtype if dtype else core.VarDesc.VarType.FP32, - list(shape) if shape else [], name, type - if type else core.VarDesc.VarType.LOD_TENSOR, True - if persistable else False) + list(shape) if shape else [], name, + type if type else core.VarDesc.VarType.LOD_TENSOR, + True if persistable else False) class VariableMetaClass(type): + @classmethod def __instancecheck__(cls, instance): t = type(instance) @@ -1197,6 +1205,7 @@ class VariableMetaClass(type): class ParameterMetaClass(VariableMetaClass): + @classmethod def __instancecheck__(cls, instance): t = type(instance) @@ -1394,8 +1403,9 @@ class Variable(object): persistable=self.persistable, stop_gradient=True) - self.block.append_op( - type='share_data', inputs={'X': [self]}, outputs={'Out': [output]}) + self.block.append_op(type='share_data', + inputs={'X': [self]}, + outputs={'Out': [output]}) return output @fake_interface_only @@ -1609,8 +1619,8 @@ class Variable(object): dist_context = get_default_distributed_context() dist_tensor = dist_context.get_dist_tensor_for_program(self) if dist_tensor is not None: - var_str += ", {name} = {value}".format( - name="dist_attr", value=dist_tensor) + var_str += ", {name} = {value}".format(name="dist_attr", + value=dist_tensor) return var_str @@ -1643,8 +1653,8 @@ class Variable(object): print("=============with detail===============") print(new_variable.to_string(True, True)) """ - assert isinstance(throw_on_error, bool) and isinstance(with_details, - bool) + assert isinstance(throw_on_error, bool) and isinstance( + with_details, bool) protostr = self.desc.serialize_to_string() proto = framework_pb2.VarDesc.FromString(six.binary_type(protostr)) res_str = _debug_string_(proto, throw_on_error) @@ -1955,12 +1965,13 @@ class Variable(object): persistable=False, stop_gradient=False) - self.block.append_op( - type='transpose2', - inputs={'X': [self]}, - outputs={'Out': [out], - 'XShape': [input_shape]}, - attrs={'axis': perm}) + self.block.append_op(type='transpose2', + inputs={'X': [self]}, + outputs={ + 'Out': [out], + 'XShape': [input_shape] + }, + attrs={'axis': perm}) return out def clone(self): @@ -1993,8 +2004,9 @@ class Variable(object): persistable=self.persistable, stop_gradient=self.stop_gradient) - self.block.append_op( - type='assign', inputs={'X': [self]}, outputs={'Out': [output]}) + self.block.append_op(type='assign', + inputs={'X': [self]}, + outputs={'Out': [output]}) return output def _set_error_clip(self, error_clip): @@ -2060,8 +2072,8 @@ class Variable(object): start = upper if step < 0 else lower else: start = slice.start - start = max(start + length, lower) if start < 0 else min(start, - upper) + start = max(start + + length, lower) if start < 0 else min(start, upper) # Compute stop. if slice.stop is None: @@ -2135,22 +2147,24 @@ class Variable(object): def _sliceVar(self, axes, starts, ends): new_var = self._cloneVar() - self.block.append_op( - type="slice", - inputs={'Input': [self]}, - outputs={'Out': [new_var]}, - attrs={'axes': axes, - 'starts': starts, - 'ends': ends}) + self.block.append_op(type="slice", + inputs={'Input': [self]}, + outputs={'Out': [new_var]}, + attrs={ + 'axes': axes, + 'starts': starts, + 'ends': ends + }) return new_var def _concatVar(self, inputs, axis): new_var = self._cloneVar() - self.block.append_op( - type="concat", - inputs={'X': inputs}, - outputs={'Out': [new_var]}, - attrs={'axis': axis, }) + self.block.append_op(type="concat", + inputs={'X': inputs}, + outputs={'Out': [new_var]}, + attrs={ + 'axis': axis, + }) return new_var def _sliceAndConcatVar(self, item, axis): @@ -2164,13 +2178,13 @@ class Variable(object): vars = [] if step > 0: while start < stop: - vars.append( - self._sliceVar([axis], [start], [start + 1])) + vars.append(self._sliceVar([axis], [start], + [start + 1])) start += step else: while start > stop: - vars.append( - self._sliceVar([axis], [start], [start + 1])) + vars.append(self._sliceVar([axis], [start], + [start + 1])) start += step return self._concatVar(vars, axis) elif isinstance(item, int): @@ -2231,14 +2245,14 @@ class Variable(object): t_load = paddle.load(path+var.name+'.pdtensor') var.set_value(t_load) """ - # The 'framework' is a low-level module, and 'executor' - # can not be imported at the begainning of this file. + # The 'framework' is a low-level module, and 'executor' + # can not be imported at the begainning of this file. # Therefore, the above two modules are dynamically imported. from .executor import global_scope if scope is not None and not isinstance(scope, core._Scope): raise TypeError( - "`scope` should be None or `paddle.static.Scope` type, but received {}.". - format(type(scope))) + "`scope` should be None or `paddle.static.Scope` type, but received {}." + .format(type(scope))) if scope is None: scope = global_scope() @@ -2293,19 +2307,19 @@ class Variable(object): ''' # The 'framework' is a low-level module, and 'executor' - # can not be imported at the begainning of this file. + # can not be imported at the begainning of this file. # Therefore, the above two modules are dynamically imported. from .executor import global_scope if not (isinstance(value, np.ndarray) or hasattr(value, '__array__')): raise TypeError( - "`value` should be `numpy.ndarray` or `LoDTensor`, but received {}.". - format(type(value))) + "`value` should be `numpy.ndarray` or `LoDTensor`, but received {}." + .format(type(value))) if scope is not None and not isinstance(scope, core._Scope): raise TypeError( - "`scope` should be None or `paddle.static.Scope` type, but received {}.". - format(type(scope))) + "`scope` should be None or `paddle.static.Scope` type, but received {}." + .format(type(scope))) if scope is None: scope = global_scope() @@ -2376,8 +2390,9 @@ class Variable(object): name=unique_name.generate_with_ignorable_key(self.name + "_size"), dtype=core.VarDesc.VarType.INT64) - self.block.append_op( - type='size', inputs={'Input': [self]}, outputs={'Out': [output]}) + self.block.append_op(type='size', + inputs={'Input': [self]}, + outputs={'Out': [output]}) return output def _set_attr(self, name, val): @@ -2610,12 +2625,12 @@ class Operator(object): op_maker = core.op_proto_and_checker_maker if op_maker.kOpRoleAttrName() not in op_attrs: - op_attrs[op_maker.kOpRoleAttrName( - )] = self.block.program._op_role + op_attrs[ + op_maker.kOpRoleAttrName()] = self.block.program._op_role role_var_name = op_maker.kOpRoleVarAttrName() - if len(self.block.program. - _op_role_var) != 0 and role_var_name not in op_attrs: + if len(self.block.program._op_role_var + ) != 0 and role_var_name not in op_attrs: op_attrs[role_var_name] = self.block.program._op_role_var if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: @@ -2636,10 +2651,10 @@ class Operator(object): op_attrs[callstack_var_name] = [] for frame in traceback.extract_stack(): op_attrs[callstack_var_name].append( - ' File "{}", line {}, in {}'.format(frame[0], frame[1], - frame[2])) - op_attrs[callstack_var_name].append(' {}'.format(frame[ - 3])) + ' File "{}", line {}, in {}'.format( + frame[0], frame[1], frame[2])) + op_attrs[callstack_var_name].append(' {}'.format( + frame[3])) self.desc.set_type(type) proto = OpProtoHolder.instance().get_op_proto(type) @@ -2713,9 +2728,10 @@ class Operator(object): if (m.name not in outputs) and m.dispensable: continue if not ((m.name in outputs) or m.dispensable): - raise ValueError(("Incorrect setting for output(s) of " - "operator \"%s\", should set: [%s].") - % (type, m.name)) + raise ValueError( + ("Incorrect setting for output(s) of " + "operator \"%s\", should set: [%s].") % + (type, m.name)) for out_proto in proto.outputs: if out_proto.name not in outputs: continue @@ -2745,8 +2761,8 @@ class Operator(object): raise TypeError("'attrs' should be a dict.") for attr in proto.attrs: attr_name = attr.name - if (attr_name not in op_attrs) or ( - op_attrs[attr_name] is None): + if (attr_name + not in op_attrs) or (op_attrs[attr_name] is None): continue attr_val = op_attrs[attr_name] self._update_desc_attr(attr_name, attr_val) @@ -2863,7 +2879,7 @@ class Operator(object): attrs_str += ", " continue - # it is bytes of serialized protobuf + # it is bytes of serialized protobuf if is_compiled_with_cinn( ) and self.type == 'cinn_launch' and name == 'compilation_key': key = self.desc.attr(name) @@ -2877,8 +2893,9 @@ class Operator(object): else: value = self.desc.attr(name) - a = "{name} = {value}".format( - name=name, type=attr_type, value=value) + a = "{name} = {value}".format(name=name, + type=attr_type, + value=value) attrs_str += a if i != len(attr_names) - 1: @@ -2888,8 +2905,8 @@ class Operator(object): dist_context = get_default_distributed_context() dist_op = dist_context.get_dist_op_for_program(self) if dist_op is not None: - attrs_str += ", {name} = {value}".format( - name="dist_attr", value=dist_op) + attrs_str += ", {name} = {value}".format(name="dist_attr", + value=dist_op) if outputs_str != "{}": op_str = "{outputs} = {op_type}(inputs={inputs}, {attrs})".\ @@ -3298,8 +3315,8 @@ class Block(object): Returns: str: The debug string. """ - assert isinstance(throw_on_error, bool) and isinstance(with_details, - bool) + assert isinstance(throw_on_error, bool) and isinstance( + with_details, bool) if with_details: re_add_indent = re.compile(r"\n(.)") res_str = "blocks {\n idx: %d\n parent_idx: %d" % ( @@ -3491,47 +3508,43 @@ class Block(object): d = self.desc.find_var(cpt.to_bytes(new_name)) if var_type == "Parameter": if in_dygraph_mode(): - var = EagerParamBase( - d.shape(), - d.dtype(), - type=orig_var_type, - name=new_name, - stop_gradient=stop_gradient, - trainable=trainable, - optimize_attr=optimize_attr, - regularizer=regularizer, - error_clip=error_clip) + var = EagerParamBase(d.shape(), + d.dtype(), + type=orig_var_type, + name=new_name, + stop_gradient=stop_gradient, + trainable=trainable, + optimize_attr=optimize_attr, + regularizer=regularizer, + error_clip=error_clip) else: if _in_legacy_dygraph(): - var = ParamBase( - d.shape(), - d.dtype(), - type=orig_var_type, - name=new_name, - stop_gradient=stop_gradient, - trainable=trainable, - optimize_attr=optimize_attr, - regularizer=regularizer, - error_clip=error_clip) + var = ParamBase(d.shape(), + d.dtype(), + type=orig_var_type, + name=new_name, + stop_gradient=stop_gradient, + trainable=trainable, + optimize_attr=optimize_attr, + regularizer=regularizer, + error_clip=error_clip) else: - var = Parameter( - self, - d.shape(), - d.dtype(), - type=orig_var_type, - name=new_name, - stop_gradient=stop_gradient, - trainable=trainable, - optimize_attr=optimize_attr, - regularizer=regularizer, - error_clip=error_clip) + var = Parameter(self, + d.shape(), + d.dtype(), + type=orig_var_type, + name=new_name, + stop_gradient=stop_gradient, + trainable=trainable, + optimize_attr=optimize_attr, + regularizer=regularizer, + error_clip=error_clip) elif var_type == "Variable": - var = Variable( - self, - type=orig_var_type, - name=new_name, - error_clip=error_clip, - stop_gradient=stop_gradient) + var = Variable(self, + type=orig_var_type, + name=new_name, + error_clip=error_clip, + stop_gradient=stop_gradient) # rename the python side, _sync_with_cpp will only add # new vars/ops to python side. @@ -3580,8 +3593,8 @@ class Block(object): init_ops_len = len(init_ops) if init_ops_len > 1: raise RuntimeError("param " + param.name + - " is inited by multiple init ops " + str( - init_ops)) + " is inited by multiple init ops " + + str(init_ops)) elif init_ops_len == 1: # TODO already inited, do nothing, should log a warning pass @@ -3604,23 +3617,21 @@ class Block(object): "Op `%s` is executed through `append_op` under the dynamic mode, " "the corresponding API implementation needs to be upgraded to " "using `_C_ops` method." % type, DeprecationWarning) - op = Operator( - block=self, - desc=None, - type=type, - inputs=None, - outputs=None, - attrs=attrs) + op = Operator(block=self, + desc=None, + type=type, + inputs=None, + outputs=None, + attrs=attrs) # record ops in tracer rather than blocks # # TODO(minqiyang): add op stop_gradient support in static mode too. # currently, we only support stop_gradient in dygraph mode. - _dygraph_tracer().trace_op(type, - kwargs.get("inputs", {}), - kwargs.get("outputs", {}), attrs - if attrs else {}, + _dygraph_tracer().trace_op(type, kwargs.get("inputs", {}), + kwargs.get("outputs", + {}), attrs if attrs else {}, kwargs.get("stop_gradient", False), inplace_map) else: @@ -3633,13 +3644,12 @@ class Block(object): inputs = kwargs.get("inputs", None) outputs = kwargs.get("outputs", None) with param_guard(inputs), param_guard(outputs): - op = Operator( - block=self, - desc=op_desc, - type=kwargs.get("type", None), - inputs=inputs, - outputs=outputs, - attrs=kwargs.get("attrs", None)) + op = Operator(block=self, + desc=op_desc, + type=kwargs.get("type", None), + inputs=inputs, + outputs=outputs, + attrs=kwargs.get("attrs", None)) self.ops.append(op) @@ -3706,23 +3716,25 @@ class Block(object): if _non_static_mode(): type = kwargs.get("type", None) attrs = kwargs.get("attrs", {}) - op = Operator( - self, None, type=type, inputs=None, outputs=None, attrs=attrs) - - _dygraph_tracer().trace_op(type, - kwargs.get("inputs", {}), - kwargs.get("outputs", {}), attrs - if attrs else {}, + op = Operator(self, + None, + type=type, + inputs=None, + outputs=None, + attrs=attrs) + + _dygraph_tracer().trace_op(type, kwargs.get("inputs", {}), + kwargs.get("outputs", {}), + attrs if attrs else {}, kwargs.get("stop_gradient", False)) else: op_desc = self.desc._prepend_op() - op = Operator( - self, - op_desc, - type=kwargs.get("type", None), - inputs=kwargs.get("inputs", None), - outputs=kwargs.get("outputs", None), - attrs=kwargs.get("attrs", None)) + op = Operator(self, + op_desc, + type=kwargs.get("type", None), + inputs=kwargs.get("inputs", None), + outputs=kwargs.get("outputs", None), + attrs=kwargs.get("attrs", None)) self.ops.insert(0, op) return op @@ -3739,19 +3751,17 @@ class Block(object): if var.has_stop_gradient(): is_stop_gradient = var.stop_gradient() if var.has_is_parameter() and var.is_parameter(): - self.create_parameter( - name=var.name(), - desc=var, - type=var.type(), - shape=var.shape(), - dtype=var.dtype(), - stop_gradient=is_stop_gradient) + self.create_parameter(name=var.name(), + desc=var, + type=var.type(), + shape=var.shape(), + dtype=var.dtype(), + stop_gradient=is_stop_gradient) else: - self.create_var( - name=var.name(), - desc=var, - type=var.type(), - stop_gradient=is_stop_gradient) + self.create_var(name=var.name(), + desc=var, + type=var.type(), + stop_gradient=is_stop_gradient) # sync variables removed from c++ end for var in list(self.vars.keys()): @@ -3835,30 +3845,28 @@ class Block(object): assert isinstance(v, Variable) new_p = None if in_dygraph_mode(): - new_p = EagerParamBase( - shape=v.shape, - dtype=v.dtype, - type=v.type, - lod_level=v.lod_level, - stop_gradient=p.stop_gradient, - trainable=p.trainable, - optimize_attr=p.optimize_attr, - regularizer=p.regularizer, - error_clip=p.error_clip, - name=v.name) + new_p = EagerParamBase(shape=v.shape, + dtype=v.dtype, + type=v.type, + lod_level=v.lod_level, + stop_gradient=p.stop_gradient, + trainable=p.trainable, + optimize_attr=p.optimize_attr, + regularizer=p.regularizer, + error_clip=p.error_clip, + name=v.name) else: if _in_legacy_dygraph(): - new_p = ParamBase( - shape=v.shape, - dtype=v.dtype, - type=v.type, - lod_level=v.lod_level, - stop_gradient=p.stop_gradient, - trainable=p.trainable, - optimize_attr=p.optimize_attr, - regularizer=p.regularizer, - error_clip=p.error_clip, - name=v.name) + new_p = ParamBase(shape=v.shape, + dtype=v.dtype, + type=v.type, + lod_level=v.lod_level, + stop_gradient=p.stop_gradient, + trainable=p.trainable, + optimize_attr=p.optimize_attr, + regularizer=p.regularizer, + error_clip=p.error_clip, + name=v.name) else: new_p = Parameter( block=self, @@ -3892,11 +3900,13 @@ class Block(object): ret_var = None # make STEP_SCOPES var can be safely cloned. if var.type == core.VarDesc.VarType.STEP_SCOPES: - ret_var = self.create_var( - name=var.name, persistable=var.persistable, type=var.type) + ret_var = self.create_var(name=var.name, + persistable=var.persistable, + type=var.type) elif var.type == core.VarDesc.VarType.RAW: - ret_var = self.create_var( - name=var.name, persistable=var.persistable, type=var.type) + ret_var = self.create_var(name=var.name, + persistable=var.persistable, + type=var.type) elif var.type == core.VarDesc.VarType.SELECTED_ROWS: ret_var = self.create_var( name=var.name, @@ -3923,7 +3933,7 @@ class Block(object): # some Python Variable and all Python Operators should not be used # again. Because all Python Variables and all Python Operators are # re-constructed inside this method. The underlying VarDesc(OpDesc) -# of some old Python Variables(all old Python Operators) may have +# of some old Python Variables(all old Python Operators) may have # been destructed. def _apply_pass(main_program, startup_program, @@ -4437,8 +4447,7 @@ class IrGraph(object): """ return [ - IrGraph( - self.graph.get_sub_graph(i), for_test=for_test) + IrGraph(self.graph.get_sub_graph(i), for_test=for_test) for i in range(self.graph.sub_graph_size()) ] @@ -4697,9 +4706,9 @@ class IrGraph(object): def _convert_to_pdf(dot_file_path): pdf_save_path = os.path.splitext(dot_file_path)[0] + '.pdf' - exited_code = subprocess.call( - 'dot -Tpdf ' + dot_file_path + ' -o ' + pdf_save_path, - shell=True) + exited_code = subprocess.call('dot -Tpdf ' + dot_file_path + + ' -o ' + pdf_save_path, + shell=True) if exited_code != 0: print('The dot command is needed for creating pdf files.') print('The {} is saved as the dot filetype.'.format( @@ -4905,14 +4914,18 @@ class Program(object): old_var = None kwargs = { - 'type': new_var_desc.type(), - 'name': new_var_desc.name(), - 'shape': get_var_desc_attr_or_none(new_var_desc, "shape", [ + 'type': + new_var_desc.type(), + 'name': + new_var_desc.name(), + 'shape': + get_var_desc_attr_or_none(new_var_desc, "shape", [ core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.SELECTED_ROWS, core.VarDesc.VarType.LOD_TENSOR_ARRAY, ]), - 'dtype': get_var_desc_attr_or_none(new_var_desc, "dtype", [ + 'dtype': + get_var_desc_attr_or_none(new_var_desc, "dtype", [ core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.SELECTED_ROWS, core.VarDesc.VarType.LOD_TENSOR_ARRAY, @@ -4922,14 +4935,16 @@ class Program(object): core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.LOD_TENSOR_ARRAY, ]), - 'error_clip': old_var.error_clip - if old_var is not None else None, - 'stop_gradient': old_var.stop_gradient - if old_var is not None else False, - 'is_data': old_var.is_data - if old_var is not None else False, - 'need_check_feed': new_var_desc.need_check_feed(), - 'belong_to_optimizer': old_var.belong_to_optimizer + 'error_clip': + old_var.error_clip if old_var is not None else None, + 'stop_gradient': + old_var.stop_gradient if old_var is not None else False, + 'is_data': + old_var.is_data if old_var is not None else False, + 'need_check_feed': + new_var_desc.need_check_feed(), + 'belong_to_optimizer': + old_var.belong_to_optimizer if old_var is not None else False, } @@ -5578,9 +5593,8 @@ class Program(object): targets_idx.append([t.block.idx, t.idx]) res = Program() - res.desc, pruned_origin_block_id_map = core.prune(self.desc, - set(feeded_var_names), - targets_idx) + res.desc, pruned_origin_block_id_map = core.prune( + self.desc, set(feeded_var_names), targets_idx) res.blocks = [ Block(res, i) for i in six.moves.range(res.desc.num_blocks()) ] @@ -6194,20 +6208,21 @@ class Program(object): paddle.save(prog.state_dict(), path) """ # The 'framework' is a low-level module, and 'executor' - # can not be imported at the begainning of this file. + # can not be imported at the begainning of this file. # Therefore, the above two modules are dynamically imported. from .executor import global_scope if scope is not None and not isinstance(scope, core._Scope): raise TypeError( - "`scope` should be None or `paddle.static.Scope'` type, but received {}.". - format(type(scope))) + "`scope` should be None or `paddle.static.Scope'` type, but received {}." + .format(type(scope))) if scope is None: scope = global_scope() if not isinstance(mode, str): - raise TypeError("Type of `mode` should be string, but received {}.". - format(type(mode))) + raise TypeError( + "Type of `mode` should be string, but received {}.".format( + type(mode))) def is_parameter(var): return isinstance(var, Parameter) @@ -6234,8 +6249,8 @@ class Program(object): return is_parameter(var) or is_belong_to_optimizer(var) else: raise ValueError( - "`mode` string should be 'param', 'opt' or 'all', but received {}.". - format(mode)) + "`mode` string should be 'param', 'opt' or 'all', but received {}." + .format(mode)) var_list = filter(condition, self.list_vars()) @@ -6244,8 +6259,8 @@ class Program(object): var_temp = scope.find_var(var.name) if var_temp is None: raise ValueError( - "Can not find Variable '{}' in the scope. Make sure it is initialized". - format(var.name)) + "Can not find Variable '{}' in the scope. Make sure it is initialized" + .format(var.name)) state_dict[var.name] = var_temp.get_tensor() return state_dict @@ -6315,9 +6330,9 @@ class Program(object): warnings.warn( ("Skip loading for '{}'. ".format(name) + str(err))) else: - warnings.warn(( - "Skip loading for '{0}'. Because '{0}' not in the program.". - format(name))) + warnings.warn( + ("Skip loading for '{0}'. Because '{0}' not in the program." + .format(name))) @six.add_metaclass(ParameterMetaClass) @@ -6366,14 +6381,13 @@ class Parameter(Variable): "Each dimension of shape for Parameter must be greater than 0, but received %s" % list(shape)) - Variable.__init__( - self, - block, - persistable=True, - shape=shape, - dtype=dtype, - type=type, - **kwargs) + Variable.__init__(self, + block, + persistable=True, + shape=shape, + dtype=dtype, + type=type, + **kwargs) self.trainable = kwargs.get('trainable', True) self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0}) @@ -6413,8 +6427,8 @@ class Parameter(Variable): debug_str = prog.to_string(throw_on_error=True, with_details=False) print(debug_str) """ - assert isinstance(throw_on_error, bool) and isinstance(with_details, - bool) + assert isinstance(throw_on_error, bool) and isinstance( + with_details, bool) if with_details: res_str = Variable.to_string(self, throw_on_error, True) additional_attr = ("trainable", "optimize_attr", "regularizer", @@ -6477,10 +6491,10 @@ class ParamBase(core.VarBase): name = kwargs.get('name', unique_name.generate('_param_base')) - super(ParamBase, self).__init__(dtype - if dtype else core.VarDesc.VarType.FP32, - list(shape) if shape else [], name, - core.VarDesc.VarType.LOD_TENSOR, True) + super(ParamBase, + self).__init__(dtype if dtype else core.VarDesc.VarType.FP32, + list(shape) if shape else [], name, + core.VarDesc.VarType.LOD_TENSOR, True) trainable = kwargs.get('trainable', True) self.stop_gradient = not trainable @@ -6626,10 +6640,10 @@ class EagerParamBase(_core_eager_eagertensor): if isinstance(shape, core.eager.Tensor): shape = shape.numpy() - super(EagerParamBase, self).__init__( - dtype if dtype else core.VarDesc.VarType.FP32, - list(shape) - if shape else [], name, core.VarDesc.VarType.LOD_TENSOR, True) + super(EagerParamBase, + self).__init__(dtype if dtype else core.VarDesc.VarType.FP32, + list(shape) if shape else [], name, + core.VarDesc.VarType.LOD_TENSOR, True) self.retain_grads() trainable = kwargs.get('trainable', True) @@ -7158,8 +7172,8 @@ def _get_paddle_place(place): return core.MLUPlace(device_id) raise ValueError( - "Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace, MLUPlace and NPUPlace, but received {}.". - format(place)) + "Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace, MLUPlace and NPUPlace, but received {}." + .format(place)) def _get_paddle_place_list(places): diff --git a/python/paddle/fluid/graphviz.py b/python/paddle/fluid/graphviz.py index 2b18d854d18..798c9914b79 100644 --- a/python/paddle/fluid/graphviz.py +++ b/python/paddle/fluid/graphviz.py @@ -29,6 +29,7 @@ def crepr(v): class Rank(object): + def __init__(self, kind, name, priority): ''' kind: str @@ -86,31 +87,28 @@ class Graph(object): def compile(self, dot_path): file = open(dot_path, 'w') file.write(self.__str__()) - image_path = os.path.join( - os.path.dirname(dot_path), dot_path[:-3] + "pdf") + image_path = os.path.join(os.path.dirname(dot_path), + dot_path[:-3] + "pdf") cmd = ["dot", "-Tpdf", dot_path, "-o", image_path] - subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) logging.warning("write block debug graph to {}".format(image_path)) return image_path def show(self, dot_path): image = self.compile(dot_path) cmd = ["open", image] - subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) def _rank_repr(self): - ranks = sorted( - six.iteritems(self.rank_groups), - key=functools.cmp_to_key( - lambda a, b: a[1].priority > b[1].priority)) + ranks = sorted(six.iteritems(self.rank_groups), + key=functools.cmp_to_key( + lambda a, b: a[1].priority > b[1].priority)) repr = [] for x in ranks: repr.append(str(x[1])) @@ -123,8 +121,8 @@ class Graph(object): ] for attr in self.attrs: - reprs.append("{key}={value};".format( - key=attr, value=crepr(self.attrs[attr]))) + reprs.append("{key}={value};".format(key=attr, + value=crepr(self.attrs[attr]))) reprs.append(self._rank_repr()) @@ -159,6 +157,7 @@ class Node(object): class Edge(object): + def __init__(self, source, target, **attrs): ''' Link source to target. @@ -175,9 +174,9 @@ class Edge(object): repr = "{source} -> {target} {extra}".format( source=self.source.name, target=self.target.name, - extra="" if not self.attrs else - "[" + ','.join("{}={}".format(attr[0], crepr(attr[1])) - for attr in six.iteritems(self.attrs)) + "]") + extra="" if not self.attrs else "[" + + ','.join("{}={}".format(attr[0], crepr(attr[1])) + for attr in six.iteritems(self.attrs)) + "]") return repr @@ -192,7 +191,8 @@ class GraphPreviewGenerator(object): title, layout="dot", concentrate="true", - rankdir="TB", ) + rankdir="TB", + ) self.op_rank = self.graph.rank_group('same', 2) self.param_rank = self.graph.rank_group('same', 1) @@ -221,16 +221,15 @@ class GraphPreviewGenerator(object): ' ', '>', ]) - return self.graph.node( - label, - prefix="param", - description=name, - shape="none", - style="rounded,filled,bold", - width="1.3", - color="#148b97" if not highlight else "orange", - fontcolor="#ffffff", - fontname="Arial") + return self.graph.node(label, + prefix="param", + description=name, + shape="none", + style="rounded,filled,bold", + width="1.3", + color="#148b97" if not highlight else "orange", + fontcolor="#ffffff", + fontname="Arial") def add_op(self, opType, **kwargs): highlight = False @@ -247,26 +246,25 @@ class GraphPreviewGenerator(object): fontname="Arial", fontcolor="#ffffff", width="1.3", - height="0.84", ) + height="0.84", + ) def add_arg(self, name, highlight=False): - return self.graph.node( - crepr(name), - prefix="arg", - description=name, - shape="box", - style="rounded,filled,bold", - fontname="Arial", - fontcolor="#999999", - color="#dddddd" if not highlight else "orange") + return self.graph.node(crepr(name), + prefix="arg", + description=name, + shape="box", + style="rounded,filled,bold", + fontname="Arial", + fontcolor="#999999", + color="#dddddd" if not highlight else "orange") def add_edge(self, source, target, **kwargs): highlight = False if 'highlight' in kwargs: highlight = kwargs['highlight'] del kwargs['highlight'] - return self.graph.edge( - source, - target, - color="#00000" if not highlight else "orange", - **kwargs) + return self.graph.edge(source, + target, + color="#00000" if not highlight else "orange", + **kwargs) diff --git a/python/paddle/fluid/incubate/checkpoint/auto_checkpoint.py b/python/paddle/fluid/incubate/checkpoint/auto_checkpoint.py index 6446642b153..b5dd3222b8f 100644 --- a/python/paddle/fluid/incubate/checkpoint/auto_checkpoint.py +++ b/python/paddle/fluid/incubate/checkpoint/auto_checkpoint.py @@ -69,6 +69,7 @@ def _thread_checker(): class AutoCheckpointChecker(object): + def __init__(self): self._run_env = None self._platform = None @@ -191,6 +192,7 @@ class AutoCheckpointChecker(object): class ExeTrainStatus(SerializableBase): + def __init__(self): self._epoch_no = -1 # start epoch_no self._hash_key = None @@ -263,6 +265,7 @@ class ExeTrainStatus(SerializableBase): class TrainEpochRange(SerializableBase): + def __init__(self, max_epoch_num, name, @@ -319,11 +322,10 @@ class TrainEpochRange(SerializableBase): epoch_no = -1 for i in cp_nos[::-1]: t = TrainEpochRange(self._max_epoch_num, self.name, restored=False) - self._cper.load_checkpoint( - self._checkpoint_path, [t], - self._checker.trainer_id, - checkpoint_no=i, - local_cache_path=self._checker._fs_cache) + self._cper.load_checkpoint(self._checkpoint_path, [t], + self._checker.trainer_id, + checkpoint_no=i, + local_cache_path=self._checker._fs_cache) cps.append(t) logger.debug("look for valid:{} t:{}".format(i, t._serialize())) if epoch_no < 0: @@ -343,10 +345,9 @@ class TrainEpochRange(SerializableBase): if g_acp_type == CONST_ACP_TYPE: # get the last one - self._cper.load_checkpoint( - self._checkpoint_path, [self], - self._checker.trainer_id, - local_cache_path=self._checker._fs_cache) + self._cper.load_checkpoint(self._checkpoint_path, [self], + self._checker.trainer_id, + local_cache_path=self._checker._fs_cache) self._restored_from = CONST_CHECKPOINT self._checkpoint_epoch_no = self._epoch_no @@ -359,11 +360,10 @@ class TrainEpochRange(SerializableBase): self._restored_from = CONST_MEMORYINIT return - self._cper.load_checkpoint( - self._checkpoint_path, [self], - self._checker.trainer_id, - checkpoint_no=i, - local_cache_path=self._checker._fs_cache) + self._cper.load_checkpoint(self._checkpoint_path, [self], + self._checker.trainer_id, + checkpoint_no=i, + local_cache_path=self._checker._fs_cache) self._restored_from = CONST_CHECKPOINT self._checkpoint_epoch_no = self._epoch_no @@ -497,9 +497,8 @@ class TrainEpochRange(SerializableBase): logger.debug("save executor checkpoint:{}".format(t._serialize())) if len(self._exe_status) > 0: - self._cper.save_checkpoint( - self._checkpoint_path, [self], - local_cache_path=self._checker._fs_cache) + self._cper.save_checkpoint(self._checkpoint_path, [self], + local_cache_path=self._checker._fs_cache) logger.info("save train_epoch_range checkpoint:{}".format( self._serialize())) @@ -658,11 +657,10 @@ def _auto_checkpoint(exe, prog): if t._restored_from is None: a = CheckpointSaver(g_train_epoch_range._hdfs) m = PaddleModel(exe, program) - a.load_checkpoint( - g_checker.get_exe_checkpoint_path(key), [m], - trainer_id=g_checker.trainer_id, - checkpoint_no=t._checkpoint_no, - local_cache_path=g_checker._fs_cache) + a.load_checkpoint(g_checker.get_exe_checkpoint_path(key), [m], + trainer_id=g_checker.trainer_id, + checkpoint_no=t._checkpoint_no, + local_cache_path=g_checker._fs_cache) t._restored_from = CONST_CHECKPOINT logger.info("load executor checkpoint {}".format(t)) t._exe = exe diff --git a/python/paddle/fluid/incubate/checkpoint/checkpoint_saver.py b/python/paddle/fluid/incubate/checkpoint/checkpoint_saver.py index 08400ab13a2..c8aeb50f157 100644 --- a/python/paddle/fluid/incubate/checkpoint/checkpoint_saver.py +++ b/python/paddle/fluid/incubate/checkpoint/checkpoint_saver.py @@ -16,6 +16,7 @@ from ...compiler import CompiledProgram class SerializableBase(object): + def serialize(self, path): raise NotImplementedError @@ -24,6 +25,7 @@ class SerializableBase(object): class PaddleModel(SerializableBase): + def __init__(self, exe, program): self._exe = exe self._origin_program = program @@ -35,22 +37,21 @@ class PaddleModel(SerializableBase): def serialize(self, path): from ...io import save_persistables - save_persistables( - executor=self._exe, - dirname=path, - main_program=self._program, - filename=self._file_name) + save_persistables(executor=self._exe, + dirname=path, + main_program=self._program, + filename=self._file_name) def deserialize(self, path): from ...io import load_persistables - load_persistables( - executor=self._exe, - dirname=path, - main_program=self._program, - filename=self._file_name) + load_persistables(executor=self._exe, + dirname=path, + main_program=self._program, + filename=self._file_name) class CheckpointSaver(object): + def __init__(self, fs): self._fs = fs self._checkpoint_prefix = "__paddle_checkpoint__" @@ -84,8 +85,9 @@ class CheckpointSaver(object): cache_path = None if self._fs.need_upload_download(): - cache_path = "{}/{}.{}.saved_cache".format( - local_cache_path, self._checkpoint_prefix, max_no) + cache_path = "{}/{}.{}.saved_cache".format(local_cache_path, + self._checkpoint_prefix, + max_no) if trainer_id is not None: cache_path = "{}.{}".format(cache_path, trainer_id) @@ -137,8 +139,9 @@ class CheckpointSaver(object): from paddle.distributed.fleet.utils.fs import LocalFS local_fs = LocalFS() if self._fs.need_upload_download(): - cache_path = "{}/{}.{}.load_cache".format( - local_cache_path, self._checkpoint_prefix, checkpoint_no) + cache_path = "{}/{}.{}.load_cache".format(local_cache_path, + self._checkpoint_prefix, + checkpoint_no) if trainer_id is not None: cache_path = "{}.{}".format(cache_path, trainer_id) diff --git a/python/paddle/fluid/incubate/data_generator/__init__.py b/python/paddle/fluid/incubate/data_generator/__init__.py index 7ff80039ae2..0ef851f52e7 100644 --- a/python/paddle/fluid/incubate/data_generator/__init__.py +++ b/python/paddle/fluid/incubate/data_generator/__init__.py @@ -214,6 +214,7 @@ class DataGenerator(object): # add more generalized DataGenerator that can adapt user-defined slot # for example, [(name, float_list), (name, str_list), (name, int_list)] class MultiSlotStringDataGenerator(DataGenerator): + def _gen_str(self, line): ''' Further processing the output of the process() function rewritten by @@ -251,6 +252,7 @@ class MultiSlotStringDataGenerator(DataGenerator): class MultiSlotDataGenerator(DataGenerator): + def _gen_str(self, line): ''' Further processing the output of the process() function rewritten by @@ -302,8 +304,8 @@ class MultiSlotDataGenerator(DataGenerator): for elem in elements: if isinstance(elem, float): self._proto_info[-1] = (name, "float") - elif not isinstance(elem, int) and not isinstance(elem, - long): + elif not isinstance(elem, int) and not isinstance( + elem, long): raise ValueError( "the type of element%s must be in int or float" % type(elem)) @@ -311,7 +313,8 @@ class MultiSlotDataGenerator(DataGenerator): else: if len(line) != len(self._proto_info): raise ValueError( - "the complete field set of two given line are inconsistent.") + "the complete field set of two given line are inconsistent." + ) for index, item in enumerate(line): name, elements = item if not isinstance(name, str): @@ -334,8 +337,8 @@ class MultiSlotDataGenerator(DataGenerator): if self._proto_info[index][1] != "float": if isinstance(elem, float): self._proto_info[index] = (name, "float") - elif not isinstance(elem, int) and not isinstance(elem, - long): + elif not isinstance(elem, int) and not isinstance( + elem, long): raise ValueError( "the type of element%s must be in int or float" % type(elem)) diff --git a/python/paddle/fluid/incubate/fleet/base/role_maker.py b/python/paddle/fluid/incubate/fleet/base/role_maker.py index 90387337faa..f97f46a7c49 100644 --- a/python/paddle/fluid/incubate/fleet/base/role_maker.py +++ b/python/paddle/fluid/incubate/fleet/base/role_maker.py @@ -630,8 +630,8 @@ class GeneralRoleMaker(RoleMakerBase): raise ValueError("TRAINING_ROLE must be PSERVER or TRAINER") self._is_barrier_all = 1 if "PADDLE_IS_BARRIER_ALL_ROLE" in os.environ: - self._is_barrier_all = int(os.environ[ - "PADDLE_IS_BARRIER_ALL_ROLE"]) + self._is_barrier_all = int( + os.environ["PADDLE_IS_BARRIER_ALL_ROLE"]) if training_role == "TRAINER": role = Role.WORKER current_id = int(os.environ["PADDLE_TRAINER_ID"]) @@ -642,9 +642,9 @@ class GeneralRoleMaker(RoleMakerBase): "all": len(worker_endpoints) + len(eplist) } # child process for http server - self._http_server = Process( - target=self.__start_kv_server, - args=(self._http_server_d, size_d)) + self._http_server = Process(target=self.__start_kv_server, + args=(self._http_server_d, + size_d)) self._http_server.daemon = True # set running status to True self._http_server_d["running"] = True diff --git a/python/paddle/fluid/incubate/fleet/collective/__init__.py b/python/paddle/fluid/incubate/fleet/collective/__init__.py index 6466ce4b42e..da4fe609ca3 100644 --- a/python/paddle/fluid/incubate/fleet/collective/__init__.py +++ b/python/paddle/fluid/incubate/fleet/collective/__init__.py @@ -39,16 +39,19 @@ import shutil class LambConfig(object): + def __init__(self): pass class DistFCConfig(object): + def __init__(self): pass class Collective(Fleet): + def __init__(self): super(Collective, self).__init__(Mode.COLLECTIVE) self._local_ip = 0 @@ -186,11 +189,10 @@ class Collective(Fleet): m = PaddleModel(executor, main_program) c = CheckpointSaver(fs) - return c.load_checkpoint( - path, [m, train_status], - trainer_id=trainer_id, - ignore_empty=ignore_empty, - local_cache_path=local_cache_path) + return c.load_checkpoint(path, [m, train_status], + trainer_id=trainer_id, + ignore_empty=ignore_empty, + local_cache_path=local_cache_path) fleet = Collective() @@ -294,27 +296,24 @@ class CollectiveOptimizer(DistributedOptimizer): if strategy.use_local_sgd: strategy.mode = "collective" strategy.collective_mode = "local_sgd" - self._check_condition( - "use_local_sgd", - use_dgc=main_program._enable_dgc, - use_dist_fc=strategy.use_dist_fc, - use_lamb=main_program._use_lamb) + self._check_condition("use_local_sgd", + use_dgc=main_program._enable_dgc, + use_dist_fc=strategy.use_dist_fc, + use_lamb=main_program._use_lamb) if strategy.use_dist_fc: - self._check_condition( - "use_dist_fc", - use_dgc=main_program._enable_dgc, - use_local_sgd=strategy.use_local_sgd, - use_lamb=main_program._use_lamb) + self._check_condition("use_dist_fc", + use_dgc=main_program._enable_dgc, + use_local_sgd=strategy.use_local_sgd, + use_lamb=main_program._use_lamb) assert strategy.dist_fc_config is not None, "DistributedStrategy.dist_fc_config should be set" if strategy._ut4grad_allreduce: strategy.mode = "collective" strategy.collective_mode = "grad_allreduce" - self._check_condition( - "_ut4grad_allreduce", - use_dgc=main_program._enable_dgc, - use_lamb=main_program._use_lamb) + self._check_condition("_ut4grad_allreduce", + use_dgc=main_program._enable_dgc, + use_lamb=main_program._use_lamb) if self._strategy.collective_mode=="local_sgd" \ or self._strategy.collective_mode == "grad_allreduce": @@ -346,12 +345,11 @@ class CollectiveOptimizer(DistributedOptimizer): config.hierarchical_allreduce_inter_nranks = self._strategy.hierarchical_allreduce_inter_nranks t = dist_transpiler.DistributeTranspiler(config=config) - t.transpile( - trainer_id=trainer_id, - trainers=worker_endpoints_env, - startup_program=startup_program, - program=main_program, - current_endpoint=current_endpoint) + t.transpile(trainer_id=trainer_id, + trainers=worker_endpoints_env, + startup_program=startup_program, + program=main_program, + current_endpoint=current_endpoint) def _get_node_ips_from_endpoints(self, endpoints): ss = set() diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py index e556a98ed75..1354c317b0a 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py @@ -109,13 +109,15 @@ class FleetTranspiler(Fleet): return kwargs def geo_strategy_envs(): + def get_sparse_attrs(): opt_init_map = {} opt_init_map["gaussian_random"] = ["seed", "mean", "std"] opt_init_map["fill_constant"] = ["value"] opt_init_map["uniform_random"] = ["seed", "min", "max"] - opt_init_map[ - "truncated_gaussian_random"] = ["seed", "mean", "std"] + opt_init_map["truncated_gaussian_random"] = [ + "seed", "mean", "std" + ] dist_varnames = get_sparse_tablenames(self._origin_main_program, True) @@ -129,8 +131,8 @@ class FleetTranspiler(Fleet): init_attrs = [] for value_name in sparse_varnames: - value_var = self._origin_main_program.global_block().vars[ - value_name] + value_var = self._origin_main_program.global_block( + ).vars[value_name] value_attr = [ value_name, ",".join([str(dim) for dim in value_var.shape]) @@ -238,14 +240,13 @@ class FleetTranspiler(Fleet): distribtued_varnames), self.main_program.list_vars())) - fluid.io.load_vars( - self._executor, - main_program=self.main_program, - dirname=model_dir, - vars=remaining_vars) + fluid.io.load_vars(self._executor, + main_program=self.main_program, + dirname=model_dir, + vars=remaining_vars) - self._load_sparse_params( - dirname=model_dir, varnames=sparse_varnames) + self._load_sparse_params(dirname=model_dir, + varnames=sparse_varnames) # todo(tangwei12) load distributed vars # self._load_sparse_params(dirname=model_dir, varnames=distribtued_varnames) @@ -324,7 +325,8 @@ class FleetTranspiler(Fleet): raise ValueError("optimizer must be an instance of Optimizer") if not self._is_initialized: raise ValueError( - "fleet.init(role) to initialize before optimizer.minimize(loss)") + "fleet.init(role) to initialize before optimizer.minimize(loss)" + ) if not strategy: _strategy = StrategyFactory.create_async_strategy() @@ -439,8 +441,9 @@ class FleetTranspiler(Fleet): reshaped_val_map["adamax"] = ["moment_0", "inf_norm_0"] reshaped_val_map["momentum"] = ["velocity_0"] reshaped_val_map["lars_momentum"] = ["velocity_0"] - reshaped_val_map[ - "rmsprop"] = ["momentum_0", "mean_square_0", "mean_grad_0"] + reshaped_val_map["rmsprop"] = [ + "momentum_0", "mean_square_0", "mean_grad_0" + ] reshaped_val_map["decayed_adagrad"] = ["moment_0"] reshaped_val_map["ftrl"] = ["squared_0", "linear_0"] @@ -450,8 +453,8 @@ class FleetTranspiler(Fleet): if op not in supported_opts: raise ValueError( - "fleet can not support optimizer: {}, only this can be supported: {}". - format(op, supported_opts)) + "fleet can not support optimizer: {}, only this can be supported: {}" + .format(op, supported_opts)) reshaped_names = [ param_name + "_" + val for val in reshaped_val_map[op] @@ -492,19 +495,23 @@ class FleetTranspiler(Fleet): for var_name in [varname] + reshaped_varnames + origin_varnames: var = self._origin_main_program.global_block().vars[var_name] - block.append_op( - type='recv_save', - attrs={ - "trainer_id": self._role_maker.worker_index(), - "shape": var.shape, - "slice_shapes": - [",".join([str(i) for i in var.shape])], - "slice_varnames": [var.name], - "remote_varnames": [var.name], - "is_sparse": False, - "endpoints": var_ctx.split_endpoints(), - "file_path": os.path.join(dirname, var.name) - }) + block.append_op(type='recv_save', + attrs={ + "trainer_id": + self._role_maker.worker_index(), + "shape": + var.shape, + "slice_shapes": + [",".join([str(i) for i in var.shape])], + "slice_varnames": [var.name], + "remote_varnames": [var.name], + "is_sparse": + False, + "endpoints": + var_ctx.split_endpoints(), + "file_path": + os.path.join(dirname, var.name) + }) executor.run(prog) return local_vars @@ -532,30 +539,37 @@ class FleetTranspiler(Fleet): for section in var_ctx.sections(): slice_shapes.append(str(section) + dims1) - block.append_op( - type='recv_save', - attrs={ - "trainer_id": self._role_maker.worker_index(), - "shape": var.shape, - "slice_shapes": slice_shapes, - "slice_varnames": var_ctx.split_varnames(), - "remote_varnames": var_ctx.split_varnames(), - "is_sparse": True, - "endpoints": var_ctx.split_endpoints(), - "pserver_num": - len(self._role_maker.get_pserver_endpoints()), - "file_path": os.path.join(dirname, var.name) - }) + block.append_op(type='recv_save', + attrs={ + "trainer_id": + self._role_maker.worker_index(), + "shape": + var.shape, + "slice_shapes": + slice_shapes, + "slice_varnames": + var_ctx.split_varnames(), + "remote_varnames": + var_ctx.split_varnames(), + "is_sparse": + True, + "endpoints": + var_ctx.split_endpoints(), + "pserver_num": + len(self._role_maker.get_pserver_endpoints()), + "file_path": + os.path.join(dirname, var.name) + }) for reshaped_varname in reshaped_varnames: - var = self._origin_main_program.global_block().vars[ - reshaped_varname] + var = self._origin_main_program.global_block( + ).vars[reshaped_varname] slice_varnames = [] remote_varnames = [] for i in range(len(var_ctx.split_varnames())): - slice_varnames.append("{}.block{}".format(reshaped_varname, - i)) + slice_varnames.append("{}.block{}".format( + reshaped_varname, i)) remote_varnames.append(reshaped_varname) block.append_op( @@ -574,22 +588,26 @@ class FleetTranspiler(Fleet): }) for origin_varname in origin_varnames: - var = self._origin_main_program.global_block().vars[ - origin_varname] - - block.append_op( - type='recv_save', - attrs={ - "trainer_id": self._role_maker.worker_index(), - "shape": var.shape, - "slice_shapes": - [",".join([str(i) for i in var.shape])], - "slice_varnames": [origin_varname], - "remote_varnames": [origin_varname], - "is_sparse": False, - "endpoints": var_ctx.split_endpoints()[:1], - "file_path": os.path.join(dirname, var.name) - }) + var = self._origin_main_program.global_block( + ).vars[origin_varname] + + block.append_op(type='recv_save', + attrs={ + "trainer_id": + self._role_maker.worker_index(), + "shape": + var.shape, + "slice_shapes": + [",".join([str(i) for i in var.shape])], + "slice_varnames": [origin_varname], + "remote_varnames": [origin_varname], + "is_sparse": + False, + "endpoints": + var_ctx.split_endpoints()[:1], + "file_path": + os.path.join(dirname, var.name) + }) executor.run(prog) return context.keys() @@ -599,16 +617,15 @@ class FleetTranspiler(Fleet): block = prog.global_block() for name, var_ctx in context.items(): - block.append_op( - type='checkpoint_notify', - attrs={ - "varname": name, - "is_slice": True, - "slice_varnames": var_ctx.split_varnames(), - "remote_varnames": var_ctx.split_varnames(), - "endpoints": var_ctx.split_endpoints(), - "dirname": dirname - }) + block.append_op(type='checkpoint_notify', + attrs={ + "varname": name, + "is_slice": True, + "slice_varnames": var_ctx.split_varnames(), + "remote_varnames": var_ctx.split_varnames(), + "endpoints": var_ctx.split_endpoints(), + "dirname": dirname + }) executor.run(prog) return context.keys() @@ -626,8 +643,9 @@ class FleetTranspiler(Fleet): recv_dense_varnames = self._save_dense_params(executor, dirname, dense_ctx, main_program) - recv_sparse_varnames = self._save_sparse_params( - executor, dirname, sparse_ctx, main_program) + recv_sparse_varnames = self._save_sparse_params(executor, dirname, + sparse_ctx, + main_program) recv_distributed_varnames = self._save_distributed_params( executor, dirname, distributed_ctx, main_program) @@ -636,15 +654,13 @@ class FleetTranspiler(Fleet): recv_sparse_varnames) + list(recv_distributed_varnames) remaining_vars = list( - filter( - FleetTranspiler.__exclude_vars(saved_varnames), - main_program.list_vars())) + filter(FleetTranspiler.__exclude_vars(saved_varnames), + main_program.list_vars())) - fluid.io.save_vars( - executor, - main_program=main_program, - dirname=dirname, - vars=remaining_vars) + fluid.io.save_vars(executor, + main_program=main_program, + dirname=dirname, + vars=remaining_vars) def save_persistables(self, executor, dirname, main_program=None, **kwargs): """ @@ -690,6 +706,7 @@ if you would like to save all variables in a @staticmethod def __exclude_vars(exclude_var_names=[]): + def is_valid(var): if var.name in exclude_var_names: return False @@ -738,10 +755,11 @@ class ParameterServerOptimizer(DistributedOptimizer): if self._mode == PSMode.PSLIB: self._optimizer_name = "Distributed%s" % optimizer.type.capitalize() if optimizer.type != "adam": - print("Currently, distributed optimizer only support Adam" - "Will config built-in adam for you." - "We will support more functions in DistributedOptimizer", - sys.stderr) + print( + "Currently, distributed optimizer only support Adam" + "Will config built-in adam for you." + "We will support more functions in DistributedOptimizer", + sys.stderr) self._optimizer_name = "DistributedAdam" self._optimizer = globals()[self._optimizer_name](optimizer) @@ -779,8 +797,8 @@ class ParameterServerOptimizer(DistributedOptimizer): # for startup program _startup = worker.fake_init_ops_pass(_startup, compiled_config) _startup = worker.init_from_server_pass(_startup, compiled_config) - _startup = worker.delet_extra_optimizes_pass(_startup, - compiled_config) + _startup = worker.delet_extra_optimizes_pass( + _startup, compiled_config) else: _main = worker.append_send_ops_pass(_main, compiled_config) _startup = _startup @@ -803,11 +821,11 @@ class ParameterServerOptimizer(DistributedOptimizer): compiled_config, True) if not compiled_config.is_sync_mode(): - _main = server.delete_unused_in_main_pass(_main, - compiled_config) + _main = server.delete_unused_in_main_pass( + _main, compiled_config) - _startup = server.delete_unused_in_startup_pass(_startup, _main, - compiled_config) + _startup = server.delete_unused_in_startup_pass( + _startup, _main, compiled_config) else: _main = server.add_listen_and_serv_pass(_main, compiled_config) _main = server.add_rpc_global_flags_pass(_main, compiled_config) @@ -818,8 +836,8 @@ class ParameterServerOptimizer(DistributedOptimizer): _startup, _main, compiled_config) _startup = server.large_scale_sparse_pass(_startup, _main, compiled_config, True) - _startup = server.delete_unused_in_startup_pass(_startup, _main, - compiled_config) + _startup = server.delete_unused_in_startup_pass( + _startup, _main, compiled_config) return _main, _startup diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py index 2a9d26daaed..8e40fa81ebb 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/distributed_strategy.py @@ -24,6 +24,7 @@ from paddle.fluid.incubate.fleet.parameter_server.mode import DistributedMode class TrainerRuntimeConfig(object): + def __init__(self): self.mode = None num_threads = os.getenv("CPU_NUM", "1") @@ -46,9 +47,9 @@ class TrainerRuntimeConfig(object): self.runtime_configs['communicator_is_sgd_optimizer'] = os.getenv( "FLAGS_communicator_is_sgd_optimizer", "1") - # not used - self.runtime_configs['rpc_deadline'] = os.getenv("FLAGS_rpc_deadline", - "180000") + # not used + self.runtime_configs['rpc_deadline'] = os.getenv( + "FLAGS_rpc_deadline", "180000") self.runtime_configs['rpc_retry_times'] = os.getenv( "FLAGS_rpc_retry_times", "3") @@ -84,18 +85,18 @@ class TrainerRuntimeConfig(object): print('WARNING: In {} mode, communicator_max_merge_var_num ' 'must be equal to CPU_NUM. But received, ' 'communicator_max_merge_var_num = {}, CPU_NUM = ' - '{}. communicator_max_merge_var_num will be fored to {}.' - .format(mode_str, max_merge_var_num, num_threads, - num_threads)) + '{}. communicator_max_merge_var_num will be fored to {}.'. + format(mode_str, max_merge_var_num, num_threads, + num_threads)) self.runtime_configs[ 'communicator_max_merge_var_num'] = num_threads if send_queue_size != num_threads: print('WARNING: In {} mode, communicator_send_queue_size ' 'must be equal to CPU_NUM. But received, ' 'communicator_send_queue_size = {}, CPU_NUM = ' - '{}. communicator_send_queue_size will be fored to {}.' - .format(mode_str, send_queue_size, num_threads, - num_threads)) + '{}. communicator_send_queue_size will be fored to {}.'. + format(mode_str, send_queue_size, num_threads, + num_threads)) self.runtime_configs[ 'communicator_send_queue_size'] = num_threads @@ -127,6 +128,7 @@ class TrainerRuntimeConfig(object): class PSLibRuntimeConfig(object): + def __init__(self): self.runtime_configs = {} @@ -135,6 +137,7 @@ class PSLibRuntimeConfig(object): class DistributedStrategy(object): + def __init__(self): self._program_config = DistributeTranspilerConfig() self._trainer_runtime_config = TrainerRuntimeConfig() @@ -295,6 +298,7 @@ class DistributedStrategy(object): class SyncStrategy(DistributedStrategy): + def __init__(self): super(SyncStrategy, self).__init__() self.check_program_config() @@ -323,6 +327,7 @@ class SyncStrategy(DistributedStrategy): class AsyncStrategy(DistributedStrategy): + def __init__(self): super(AsyncStrategy, self).__init__() self.check_program_config() @@ -349,6 +354,7 @@ class AsyncStrategy(DistributedStrategy): class HalfAsyncStrategy(DistributedStrategy): + def __init__(self): super(HalfAsyncStrategy, self).__init__() self.check_program_config() @@ -376,6 +382,7 @@ class HalfAsyncStrategy(DistributedStrategy): class GeoStrategy(DistributedStrategy): + def __init__(self, update_frequency=100): super(GeoStrategy, self).__init__() self._program_config.geo_sgd_need_push_nums = update_frequency @@ -410,6 +417,7 @@ class GeoStrategy(DistributedStrategy): class StrategyFactory(object): + def __init_(self): pass diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/heter_trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/heter_trainer_pass.py index ebf9395361c..0018b73e264 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/heter_trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/heter_trainer_pass.py @@ -36,8 +36,8 @@ def split_heter_worker_ops_pass(program, config, stage_id, device): 3. create heter worker program, add listen&serv op """ default_deveice = "cpu" - program, heter_ops, _, program_block_ops = find_heter_ops(program, - default_deveice) + program, heter_ops, _, program_block_ops = find_heter_ops( + program, default_deveice) if len(heter_ops) == 0: warnings.warn( "Currently running in Heter Parameter Server mode, but no OP running on heterogeneous devices, Please check your code." diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py index 295f02e73cf..38a4a14b02f 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py @@ -83,6 +83,7 @@ def _get_optimizer_input_shape(op_type, varkey, orig_shape, param_shape): def _append_pserver_non_opt_ops(optimize_block, opt_op, origin_program, config): + def _get_pserver_grad_param_var(var, var_dict): """ Return pserver side grad/param variable, return None @@ -122,7 +123,8 @@ def _append_pserver_non_opt_ops(optimize_block, opt_op, origin_program, config): # for ops like clipping and weight decay, get the split var(xxx.block0) # for inputs / outputs grad_block = _get_pserver_grad_param_var( - var, program.global_block().vars) + var, + program.global_block().vars) if grad_block: varlist[i] = grad_block elif var.name not in program.global_block().vars: @@ -140,7 +142,8 @@ def _append_pserver_non_opt_ops(optimize_block, opt_op, origin_program, config): for i in range(len(varlist)): var = varlist[i] grad_block = _get_pserver_grad_param_var( - var, program.global_block().vars) + var, + program.global_block().vars) if grad_block: varlist[i] = grad_block elif var.name not in program.global_block().vars: @@ -150,11 +153,10 @@ def _append_pserver_non_opt_ops(optimize_block, opt_op, origin_program, config): varlist[i] = program.global_block().vars[var.name] outputs[key] = varlist - return optimize_block.append_op( - type=opt_op.type, - inputs=inputs, - outputs=outputs, - attrs=opt_op.all_attrs()) + return optimize_block.append_op(type=opt_op.type, + inputs=inputs, + outputs=outputs, + attrs=opt_op.all_attrs()) def _append_pserver_ops(optimize_block, opt_op, endpoint, grad_to_block_id, @@ -221,11 +223,10 @@ def _append_pserver_ops(optimize_block, opt_op, endpoint, grad_to_block_id, if not param_block: return - tmpvar = pserver_block.create_var( - name=param_block.name, - persistable=True, - dtype=param_block.dtype, - shape=param_block.shape) + tmpvar = pserver_block.create_var(name=param_block.name, + persistable=True, + dtype=param_block.dtype, + shape=param_block.shape) new_inputs[key] = tmpvar elif key == "LearningRate": @@ -255,22 +256,20 @@ def _append_pserver_ops(optimize_block, opt_op, endpoint, grad_to_block_id, # update accumulator variable shape new_shape = _get_optimizer_input_shape(opt_op.type, key, var.shape, param_var.shape) - tmpvar = pserver_block.create_var( - name=var.name, - persistable=var.persistable, - dtype=var.dtype, - shape=new_shape) + tmpvar = pserver_block.create_var(name=var.name, + persistable=var.persistable, + dtype=var.dtype, + shape=new_shape) new_inputs[key] = tmpvar # change output's ParamOut variable outputs = _get_output_map_from_op(origin_program.global_block().vars, opt_op) outputs["ParamOut"] = new_inputs["Param"] - optimize_block.append_op( - type=opt_op.type, - inputs=new_inputs, - outputs=outputs, - attrs=opt_op.all_attrs()) + optimize_block.append_op(type=opt_op.type, + inputs=new_inputs, + outputs=outputs, + attrs=opt_op.all_attrs()) # record sparse grad to param name if new_inputs["Grad"].type == core.VarDesc.VarType.SELECTED_ROWS: @@ -332,8 +331,10 @@ def add_listen_and_serv_pass(program, config): } # step5 append the listen_and_serv op - program.global_block().append_op( - type="listen_and_serv", inputs={'X': []}, outputs={}, attrs=attrs) + program.global_block().append_op(type="listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=attrs) return program @@ -358,16 +359,16 @@ def add_rpc_global_flags_pass(program, config): def _clone_var(block, var, persistable=True): - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - persistable=persistable) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + persistable=persistable) def add_optimizer_pass(program, config): + def _append_pserver_grad_merge_ops(optimize_block, grad_varname_for_block, endpoint, grad_to_block_id): trainers = config.get_trainers() @@ -395,12 +396,11 @@ def add_optimizer_pass(program, config): else: merged_var_name = orig_varname - merged_var = pserver_block.create_var( - name=grad_block.name, - persistable=True, - type=grad_block.type, - dtype=grad_block.dtype, - shape=grad_block.shape) + merged_var = pserver_block.create_var(name=grad_block.name, + persistable=True, + type=grad_block.type, + dtype=grad_block.dtype, + shape=grad_block.shape) grad_to_block_id.append(merged_var.name + ":" + str(optimize_block.idx)) if config.is_sync_mode() and trainers > 1: @@ -416,16 +416,14 @@ def add_optimizer_pass(program, config): shape=grad_block.shape) vars2merge.append(per_trainer_var) - optimize_block.append_op( - type="sum", - inputs={"X": vars2merge}, - outputs={"Out": merged_var}, - attrs={"use_mkldnn": False}) - optimize_block.append_op( - type="scale", - inputs={"X": merged_var}, - outputs={"Out": merged_var}, - attrs={"scale": 1.0 / float(trainers)}) + optimize_block.append_op(type="sum", + inputs={"X": vars2merge}, + outputs={"Out": merged_var}, + attrs={"use_mkldnn": False}) + optimize_block.append_op(type="scale", + inputs={"X": merged_var}, + outputs={"Out": merged_var}, + attrs={"scale": 1.0 / float(trainers)}) return merged_var origin_program = config.get_origin_main_program() @@ -660,24 +658,25 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): grad = main_program.global_block().vars[op.input("Grad")[0]] lr = main_program.global_block().vars[op.input("LearningRate")[0]] - block._insert_op( - opt_idx, - type="lookup_sparse_table_fuse_sgd", - inputs={"Grad": grad, - "LearningRate": lr}, - attrs={ - "is_entry": is_entry, - "tablename": table_name, - "value_names": value_names - }) + block._insert_op(opt_idx, + type="lookup_sparse_table_fuse_sgd", + inputs={ + "Grad": grad, + "LearningRate": lr + }, + attrs={ + "is_entry": is_entry, + "tablename": table_name, + "value_names": value_names + }) elif op.type == "adam": grad = main_program.global_block().vars[op.input("Grad")[0]] lr = main_program.global_block().vars[op.input("LearningRate")[0]] - beta1_pow = main_program.global_block().vars[op.input("Beta1Pow")[ - 0]] - beta2_pow = main_program.global_block().vars[op.input("Beta2Pow")[ - 0]] + beta1_pow = main_program.global_block().vars[op.input("Beta1Pow") + [0]] + beta2_pow = main_program.global_block().vars[op.input("Beta2Pow") + [0]] beta1_pow_o = main_program.global_block().vars[op.output( "Beta1PowOut")[0]] beta2_pow_o = main_program.global_block().vars[op.output( @@ -687,68 +686,71 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): beta2 = op.attr('beta2') epsilon = op.attr('epsilon') - block._insert_op( - opt_idx, - type="lookup_sparse_table_fuse_adam", - inputs={ - "Grad": grad, - "LearningRate": lr, - "Beta1Pow": beta1_pow, - "Beta2Pow": beta2_pow - }, - outputs={ - "Beta1PowOut": beta1_pow_o, - "Beta2PowOut": beta2_pow_o - }, - attrs={ - "beta1": beta1, - "beta2": beta2, - "epsilon": epsilon, - "is_entry": is_entry, - "tablename": table_name, - "value_names": value_names - }) + block._insert_op(opt_idx, + type="lookup_sparse_table_fuse_adam", + inputs={ + "Grad": grad, + "LearningRate": lr, + "Beta1Pow": beta1_pow, + "Beta2Pow": beta2_pow + }, + outputs={ + "Beta1PowOut": beta1_pow_o, + "Beta2PowOut": beta2_pow_o + }, + attrs={ + "beta1": beta1, + "beta2": beta2, + "epsilon": epsilon, + "is_entry": is_entry, + "tablename": table_name, + "value_names": value_names + }) else: raise ValueError("only support sgd/adam optimizer now") def add_large_scale_op(block, global_block, table_name, value_names, acture_names, grad, is_entry, opt_idx): - ids = global_block.create_var( - name="kSparseIDs@{}".format(table_name), - persistable=False, - dtype="int64", - shape=[1, 1], - lod_level=0) + ids = global_block.create_var(name="kSparseIDs@{}".format(table_name), + persistable=False, + dtype="int64", + shape=[1, 1], + lod_level=0) # insert grad split to ids and tensor op - block._insert_op( - opt_idx, - type="lookup_sparse_table_grad_split", - inputs={"Grad": grad}, - outputs={"Row": ids, - "Value": grad}, - attrs={"tablename": table_name, - "is_entry": is_entry}) + block._insert_op(opt_idx, + type="lookup_sparse_table_grad_split", + inputs={"Grad": grad}, + outputs={ + "Row": ids, + "Value": grad + }, + attrs={ + "tablename": table_name, + "is_entry": is_entry + }) # insert read at first vars = [global_block.vars[acture_name] for acture_name in acture_names] - block._insert_op( - opt_idx + 1, - type="lookup_sparse_table_read", - inputs={"Ids": ids}, - outputs={"Out": vars}, - attrs={"tablename": table_name, - "value_names": value_names}) + block._insert_op(opt_idx + 1, + type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": vars}, + attrs={ + "tablename": table_name, + "value_names": value_names + }) # append write at last inputs = {"Ids": ids, "In": vars} - block.append_op( - type="lookup_sparse_table_write", - inputs=inputs, - outputs={}, - attrs={"tablename": table_name, - "value_names": value_names}) + block.append_op(type="lookup_sparse_table_write", + inputs=inputs, + outputs={}, + attrs={ + "tablename": table_name, + "value_names": value_names + }) op = get_op_by_type(main_program.global_block(), "listen_and_serv") @@ -783,14 +785,13 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): is_entry = False if entry_attr == "none" else True if fuse: - add_fuse_large_scale_op(opt_block, - program.global_block(), param, - value_names, acture_names, grad, + add_fuse_large_scale_op(opt_block, program.global_block(), + param, value_names, acture_names, grad, is_entry, opt_idx) else: - add_large_scale_op(opt_block, - program.global_block(), param, value_names, - acture_names, grad, is_entry, opt_idx) + add_large_scale_op(opt_block, program.global_block(), param, + value_names, acture_names, grad, is_entry, + opt_idx) else: large_scale_kv_metas = [] for param, blockid in param_blockid_map.items(): @@ -844,8 +845,8 @@ def get_distributed_from_listen_and_serv(program, origin_program): def delete_unused_in_main_pass(program, config): origin_program = config.get_origin_main_program() - sparse_params = get_distributed_from_listen_and_serv(program, - origin_program) + sparse_params = get_distributed_from_listen_and_serv( + program, origin_program) for var in sparse_params: if program.global_block().has_var(var): @@ -855,8 +856,8 @@ def delete_unused_in_main_pass(program, config): def delete_unused_in_startup_pass(program, main_program, config): origin_program = config.get_origin_main_program() - sparse_params = get_distributed_from_listen_and_serv(main_program, - origin_program) + sparse_params = get_distributed_from_listen_and_serv( + main_program, origin_program) remove_ops = [] for op in program.global_block().ops: @@ -944,11 +945,10 @@ def build_pserver_startup_program_pass(program, p_main_program, config): ]: op._set_attr("shape", list(new_outputs["Out"].shape)) - program.global_block().append_op( - type=op.type, - inputs=new_inputs, - outputs=new_outputs, - attrs=op.all_attrs()) + program.global_block().append_op(type=op.type, + inputs=new_inputs, + outputs=new_outputs, + attrs=op.all_attrs()) return program @@ -981,17 +981,15 @@ def add_geo_optimizer_pass(program, config): if origin_varname in sparse_tablenames: sparse_grad_to_param.append(":".join([delta_var_name, param.name])) - delta_var = pserver_block.create_var( - name=delta_var_name, - persistable=False, - type=param.type, - dtype=param.dtype, - shape=param.shape) - - per_opt_block.append_op( - type="sum", - inputs={"X": [param, delta_var]}, - outputs={"Out": param}) + delta_var = pserver_block.create_var(name=delta_var_name, + persistable=False, + type=param.type, + dtype=param.dtype, + shape=param.shape) + + per_opt_block.append_op(type="sum", + inputs={"X": [param, delta_var]}, + outputs={"Out": param}) param_to_block_id.append(delta_var_name + ":" + str(per_opt_block.idx)) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index b6ec09bab72..6fb0c85d05c 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -109,6 +109,7 @@ def get_sparse_tablenames(program, is_distributed): class MergedVariable: + def __init__(self, merged, ordered, offsets): self.merged_var = merged self.ordered_vars = ordered @@ -128,6 +129,7 @@ def Singleton(cls): @Singleton class CompileTimeStrategy(object): + def __init__(self, main_program, startup_program, strategy, role_maker): self.min_block_size = 81920 @@ -356,6 +358,7 @@ class CompileTimeStrategy(object): is_sparse, is_send, is_distributed=False): + def get_grad_var_ep(slices): names = [] eps = [] @@ -367,8 +370,8 @@ class CompileTimeStrategy(object): names.append("{}.delta".format(slice.name)) else: names.append(slice.name) - elif is_grad and self.is_sync_mode() and self.get_trainers( - ) > 1: + elif is_grad and self.is_sync_mode( + ) and self.get_trainers() > 1: names.append("{}.trainer_{}".format(slice.name, self.get_role_id())) else: @@ -447,8 +450,7 @@ class CompileTimeStrategy(object): param_ctx.split_endpoints(), param_ctx.sections(), grad_ctx.origin_varnames(), - param_ctx.trainer_id(), - param_ctx.aggregate(), + param_ctx.trainer_id(), param_ctx.aggregate(), param_ctx.is_sparse(), param_ctx.is_distributed()) @@ -623,8 +625,8 @@ class CompileTimeStrategy(object): for merged in merged_dense_pairs: grad = merged[1] origin_varname = grad.merged_var.name - var = self.origin_main_program.global_block().vars[ - origin_varname] + var = self.origin_main_program.global_block( + ).vars[origin_varname] var_numel = reduce(lambda x, y: x * y, var.shape) grad_name = origin_varname aggregate = True @@ -782,13 +784,12 @@ class CompileTimeStrategy(object): if len(split) == 1: var_mapping[varname] = [orig_var] - self.var_distributed.add_distributed_var( - origin_var=orig_var, - slice_var=orig_var, - block_id=0, - offset=0, - is_slice=False, - vtype="Param") + self.var_distributed.add_distributed_var(origin_var=orig_var, + slice_var=orig_var, + block_id=0, + offset=0, + is_slice=False, + vtype="Param") else: var_mapping[varname] = [] orig_shape = orig_var.shape @@ -921,8 +922,8 @@ class CompileTimeStrategy(object): # update split_count after aligning split_count = int(math.ceil(var_numel / float(block_size))) for block_id in range(split_count): - curr_block_size = min(block_size, var_numel - ( - (block_id) * block_size)) + curr_block_size = min(block_size, + var_numel - ((block_id) * block_size)) block = vars_metatools.VarBlock(var.name, block_id, curr_block_size) blocks.append(str(block)) @@ -1010,12 +1011,10 @@ class CompileTimeStrategy(object): # create mapping of endpoint->split var to create pserver side program self.param_grad_ep_mapping = collections.OrderedDict() [ - self.param_grad_ep_mapping.update({ - ep: { - "params": [], - "grads": [] - } - }) for ep in self.get_ps_endpoints() + self.param_grad_ep_mapping.update({ep: { + "params": [], + "grads": [] + }}) for ep in self.get_ps_endpoints() ] def _build_var_distributed(self): @@ -1193,9 +1192,10 @@ def _add_lr_decay_table_pass(main_program, compiled_config, lr_decay_steps): lr_decay_main_program, lr_decay_startup_program, lr_name = _get_lr_sheduler_program( compiled_config.origin_main_program.lr_sheduler, lr_param_dict, lr_decay_steps) - compiled_config.add_tensor_table( - "@LR_DECAY_COUNTER@", lr_name, lr_decay_startup_program, - lr_decay_main_program, "GlobalStepTable") + compiled_config.add_tensor_table("@LR_DECAY_COUNTER@", lr_name, + lr_decay_startup_program, + lr_decay_main_program, + "GlobalStepTable") def _get_lr_param_dict(opt_ops): @@ -1260,8 +1260,8 @@ def _get_lr_sheduler_program(lr_sheduler, lr_param_dict, lr_decay_steps): % lr_decay_steps) else: raise ValueError( - "Not supported current LearningRate strategy, please use follow decay strategy: {}". - format(schedler_decay)) + "Not supported current LearningRate strategy, please use follow decay strategy: {}" + .format(schedler_decay)) return decay_main_program, decay_startup_program, lr_name diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index 51e89cc301c..18755212cc1 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -53,6 +53,7 @@ DEFAULT_DEVICE = 'cpu' def delete_optimizer_pass(program, config): + def _delete_optimizer_op_and_vars(_program, optimize_ops): optimize_vars = [] optimize_op_role_vars = [] @@ -77,15 +78,14 @@ def delete_optimizer_pass(program, config): def _add_lr_var(main_program, compiled_config): # Todo: hard code for pe - lr_var = compiled_config.origin_main_program.global_block().vars[ - "learning_rate_0"] - main_program.global_block().create_var( - name=lr_var.name, - shape=lr_var.shape, - dtype=lr_var.dtype, - type=lr_var.type, - lod_level=lr_var.lod_level, - persistable=True) + lr_var = compiled_config.origin_main_program.global_block( + ).vars["learning_rate_0"] + main_program.global_block().create_var(name=lr_var.name, + shape=lr_var.shape, + dtype=lr_var.dtype, + type=lr_var.type, + lod_level=lr_var.lod_level, + persistable=True) optimizer_ops = _get_optimize_ops(program) lr_ops = _get_lr_ops(program) @@ -126,14 +126,15 @@ def distributed_ops_pass(program, config, use_ps_gpu=False): for op in _program.global_block().ops: if op.type in SPARSE_GRAD_OP_TYPE_DICT.keys(): param_name = op.input(SPARSE_GRAD_OP_TYPE_DICT[op.type])[0] - if param_name in pull_sparse_ids and op.input("Ids")[ - 0] in pull_sparse_ids[param_name]: + if param_name in pull_sparse_ids and op.input( + "Ids")[0] in pull_sparse_ids[param_name]: ops = push_sparse_ops.get(param_name, []) ops.append(op) push_sparse_ops[param_name] = ops return pull_sparse_ops, push_sparse_ops def _pull_sparse_fuse(_program, pull_sparse_ops, use_ps_gpu): + def dag_check_up_and_reorder(program, inputs, outputs): global_block = program.global_block() min_output_index = len(global_block.ops) @@ -294,8 +295,10 @@ def distributed_ops_pass(program, config, use_ps_gpu=False): program.global_block()._insert_op( index=distributed_idx, type="pull_gpups_sparse", - inputs={"Ids": inputs, - 'W': w}, + inputs={ + "Ids": inputs, + 'W': w + }, outputs={"Out": outputs}, attrs={ "size": [w.shape[1] for i in inputs], @@ -306,8 +309,10 @@ def distributed_ops_pass(program, config, use_ps_gpu=False): program.global_block()._insert_op( index=distributed_idx, type="distributed_lookup_table", - inputs={"Ids": inputs, - 'W': w}, + inputs={ + "Ids": inputs, + 'W': w + }, outputs={"Outputs": outputs}, attrs={ "is_distributed": is_distributed, @@ -323,8 +328,10 @@ def distributed_ops_pass(program, config, use_ps_gpu=False): program.global_block()._insert_op( index=distributed_idx, type="distributed_lookup_table", - inputs={"Ids": [inputs[i]], - 'W': w}, + inputs={ + "Ids": [inputs[i]], + 'W': w + }, outputs={"Outputs": [outputs[i]]}, attrs={ "is_distributed": is_distributed, @@ -419,6 +426,7 @@ def distributed_ops_pass(program, config, use_ps_gpu=False): for idx in op_idxs[::-1]: program.global_block()._remove_op(idx) + # if use_ps_gpu: # program.global_block().append_op( # type="push_box_sparse", @@ -431,22 +439,22 @@ def distributed_ops_pass(program, config, use_ps_gpu=False): # "is_sparse": True # }) # else: - program.global_block().append_op( - type="distributed_push_sparse", - inputs={ - "Ids": inputs, - 'W': w, - "Outputs": outputs, - "Shows": show, - "Clicks": clk - }, - outputs={"Outputs": outputs}, - attrs={ - "is_distributed": is_distributed, - "padding_idx": padding_idx, - "table_id": table_id, - "size": emb_size[param] - }) + program.global_block().append_op(type="distributed_push_sparse", + inputs={ + "Ids": inputs, + 'W': w, + "Outputs": outputs, + "Shows": show, + "Clicks": clk + }, + outputs={"Outputs": outputs}, + attrs={ + "is_distributed": + is_distributed, + "padding_idx": padding_idx, + "table_id": table_id, + "size": emb_size[param] + }) pull_sparse_ops, push_sparse_ops = _get_pull_sparse_ops(program) _pull_sparse_fuse(program, pull_sparse_ops, use_ps_gpu) @@ -473,29 +481,33 @@ def append_send_ops_pass(program, config): dummy_output = program.global_block().create_var( name=framework.generate_control_dev_var_name()) - program.global_block().append_op( - type="send", - inputs={"X": send_input_vars}, - outputs={"Out": dummy_output}, - attrs={ - "send_varnames": [queue], - "is_sparse": is_sparse, - "table_id": table_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="send", + inputs={"X": send_input_vars}, + outputs={"Out": dummy_output}, + attrs={ + "send_varnames": [queue], + "is_sparse": + is_sparse, + "table_id": + table_id, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) return dummy_output def _append_barrier_op(dummys): - program.global_block().append_op( - type="send_barrier", - inputs={"X": dummys}, - outputs={"Out": []}, - attrs={ - "trainer_id": trainer_id, - "half_async": True, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="send_barrier", + inputs={"X": dummys}, + outputs={"Out": []}, + attrs={ + "trainer_id": + trainer_id, + "half_async": + True, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) dummys = [] @@ -525,15 +537,17 @@ def init_from_server_pass(program, config): fetch_barrier_out = program.global_block().create_var( name=framework.generate_control_dev_var_name()) - program.global_block().append_op( - type="fetch_barrier", - inputs={}, - outputs={"Out": fetch_barrier_out}, - attrs={ - "endpoints": config.get_ps_endpoints(), - "trainer_id": config.get_role_id(), - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="fetch_barrier", + inputs={}, + outputs={"Out": fetch_barrier_out}, + attrs={ + "endpoints": + config.get_ps_endpoints(), + "trainer_id": + config.get_role_id(), + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) return program @@ -555,8 +569,8 @@ def fake_init_ops_pass(program, config): table_param_init_op.append(op) init_op_num = len(table_param_init_op) if init_op_num != 1: - raise ValueError("table init op num should be 1, now is " + str( - init_op_num)) + raise ValueError("table init op num should be 1, now is " + + str(init_op_num)) table_init_op = table_param_init_op[0] program.global_block().append_op( type="fake_init", @@ -572,6 +586,7 @@ def fake_init_ops_pass(program, config): def ps_gpu_pass(program): + def _add_push_box_sparse_op(program): op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() backward = core.op_proto_and_checker_maker.OpRole.Backward @@ -744,10 +759,8 @@ def find_heter_ops(program, default_device="cpu"): op_list = list(block.ops) sum_op = op_list[var2idx[param_name]] sum_op_inputs = { - sum_op.input_names[0]: [ - block.vars[input] - for input in sum_op.input_arg_names - ] + sum_op.input_names[0]: + [block.vars[input] for input in sum_op.input_arg_names] } sum_op_outputs = { sum_op.output_names[0]: [ @@ -755,12 +768,11 @@ def find_heter_ops(program, default_device="cpu"): for output in sum_op.output_arg_names ] } - block._insert_op( - index=i + 1, - type=sum_op.type, - inputs=sum_op_inputs, - outputs=sum_op_outputs, - attrs=sum_op.all_attrs()) + block._insert_op(index=i + 1, + type=sum_op.type, + inputs=sum_op_inputs, + outputs=sum_op_outputs, + attrs=sum_op.all_attrs()) block._remove_op(var2idx[param_name] + 1) var2idx.pop(param_name) for var_ in var2idx: @@ -798,12 +810,11 @@ def find_heter_ops(program, default_device="cpu"): for output in sum_op.output_arg_names ] } - block._insert_op( - index=i + 1, - type=sum_op.type, - inputs=sum_op_inputs, - outputs=sum_op_outputs, - attrs=sum_op.all_attrs()) + block._insert_op(index=i + 1, + type=sum_op.type, + inputs=sum_op_inputs, + outputs=sum_op_outputs, + attrs=sum_op.all_attrs()) block._remove_op(var2idx[no_grad_var] + 1) var2idx.pop(no_grad_var) for var_ in var2idx: @@ -818,8 +829,8 @@ def find_heter_ops(program, default_device="cpu"): forward_op_type = pre_op.type.split("_grad")[0] if forward_op_type in SPARSE_OP_TYPE_DICT.keys() \ and pre_op.attr('remote_prefetch') is True: - param_name = pre_op.input(SPARSE_OP_TYPE_DICT[ - forward_op_type])[0] + param_name = pre_op.input( + SPARSE_OP_TYPE_DICT[forward_op_type])[0] if param_name == origin_var and op.attr( "op_device") == pre_op.attr("op_device"): continue @@ -919,7 +930,8 @@ def find_heter_ops(program, default_device="cpu"): if len(heter_ops) == 0: warnings.warn( "No heterogeneous OP was found in your program , " - " please using fluid.device_guard() to run OPs on different device.") + " please using fluid.device_guard() to run OPs on different device." + ) total_heter_ops = 0 heter_blocks = 0 @@ -929,8 +941,8 @@ def find_heter_ops(program, default_device="cpu"): for _, heter_block in heter_block_dict.items(): total_heter_ops += len(heter_block) print( - "There are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks.". - format(len(block.ops), total_heter_ops, heter_blocks)) + "There are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks." + .format(len(block.ops), total_heter_ops, heter_blocks)) return origin_porgram, heter_ops, default_ops, program_block_ops @@ -984,25 +996,27 @@ def create_heter_program(program, config, heter_program, program_block_ops_list, for _, op in enumerate(heter_block_ops_backward): block_append_op(heter_program, program, heter_block_bp, op) - bp_entrance_vars = block_var_detail[stage_id - 1]["backward"][ - "entrance"] + bp_entrance_vars = block_var_detail[stage_id - + 1]["backward"]["entrance"] add_vars_by_var_list(bp_entrance_vars, program, heter_program, heter_block_bp) bp_exit_vars = block_var_detail[stage_id - 1]["backward"]["exit"] add_vars_by_var_list(bp_exit_vars, program, heter_program, heter_block_bp) - backward_comm_info = get_communicate_var_info( - program, stage_id, bp_entrance_vars, type="backward") + backward_comm_info = get_communicate_var_info(program, + stage_id, + bp_entrance_vars, + type="backward") - grad_to_block_id.append(backward_comm_info["block_input_var_name"] + ":" - + str(heter_block_bp.idx)) + grad_to_block_id.append(backward_comm_info["block_input_var_name"] + + ":" + str(heter_block_bp.idx)) else: for _, op in enumerate(heter_block_ops_backward): block_append_op(heter_program, program, heter_block, op) - bp_entrance_vars = block_var_detail[stage_id - 1]["backward"][ - "entrance"] + bp_entrance_vars = block_var_detail[stage_id - + 1]["backward"]["entrance"] add_vars_by_var_list(bp_entrance_vars, program, heter_program, heter_block) bp_exit_vars = block_var_detail[stage_id - 1]["backward"]["exit"] @@ -1010,8 +1024,10 @@ def create_heter_program(program, config, heter_program, program_block_ops_list, heter_block_bp = heter_block - forward_comm_info = get_communicate_var_info( - program, stage_id, entrance_vars, type="forward") + forward_comm_info = get_communicate_var_info(program, + stage_id, + entrance_vars, + type="forward") grad_to_block_id.append(forward_comm_info["block_input_var_name"] + ":" + str(heter_block.idx)) @@ -1022,13 +1038,15 @@ def create_heter_program(program, config, heter_program, program_block_ops_list, static_var = insert_communicate_op(program, config, heter_block, stage_id, first_op_index_fp, block_var_detail, current_device) - static_var_bp = insert_communicate_op( - program, config, heter_block_bp, stage_id, first_op_index_bp, - block_var_detail, current_device, False) + static_var_bp = insert_communicate_op(program, config, heter_block_bp, + stage_id, first_op_index_bp, + block_var_detail, current_device, + False) # add send op - send_grad_var_list = add_heter_send_op( - program, heter_program, heter_block_bp, block_var_detail[stage_id - 1]) + send_grad_var_list = add_heter_send_op(program, heter_program, + heter_block_bp, + block_var_detail[stage_id - 1]) # --------------- # add step conter @@ -1063,8 +1081,10 @@ def create_heter_program(program, config, heter_program, program_block_ops_list, RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE } # append the listen_and_serv op - heter_program.global_block().append_op( - type="heter_listen_and_serv", inputs={'X': []}, outputs={}, attrs=attrs) + heter_program.global_block().append_op(type="heter_listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=attrs) check_heter_compile_time_strategy(program, config, send_grad_var_list) @@ -1098,8 +1118,9 @@ def create_trainer_program(program, origin_program, config, for heter_block_index in range(1, len(program_block_ops_list)): ops_list = program_block_ops_list[heter_block_index][ "forward"] + program_block_ops_list[heter_block_index]["backward"] - static_var += replace_ops_by_communicate_op( - program, config, heter_block_index, ops_list, block_var_detail) + static_var += replace_ops_by_communicate_op(program, config, + heter_block_index, ops_list, + block_var_detail) remove_trainer_send_op(program, config, heter_block_index, block_var_detail) @@ -1113,8 +1134,10 @@ def create_trainer_program(program, origin_program, config, bp_ops_list, block_var_detail) bp_entrance_vars = block_var_detail[0]["backward"]["entrance"] - backward_comm_info = get_communicate_var_info( - origin_program, 1, bp_entrance_vars, type="backward") + backward_comm_info = get_communicate_var_info(origin_program, + 1, + bp_entrance_vars, + type="backward") grad_to_block_id.append(backward_comm_info["block_input_var_name"] + ":" + str(backward_block.idx)) @@ -1135,12 +1158,11 @@ def create_trainer_program(program, origin_program, config, RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE } # append the listen_and_serv op - program.global_block()._insert_op( - index=0, - type="heter_listen_and_serv", - inputs={'X': []}, - outputs={}, - attrs=attrs) + program.global_block()._insert_op(index=0, + type="heter_listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs=attrs) ## TODO add check for bp block check_op_device(program.global_block(), DEFAULT_DEVICE) @@ -1171,22 +1193,24 @@ def insert_communicate_op(orign_program, comm_info = get_communicate_var_info(orign_program, stage_id - 1, entrance_var, "backward") - heter_block._insert_op( - index=first_op_index, - type="send_and_recv", - inputs={"X": heter_block.vars[entrance_var[0]]}, - outputs={"Out": []}, - attrs={ - "mode": "forward" if is_forward else "backward", - "send_var_name": entrance_var + ["microbatch_id"], - "recv_var_name": [], - "message_name": comm_info["block_input_var_name"], - "next_endpoints": next_heter_worker_endpoints, - "previous_endpoints": previous_heter_worker_endpoints, - "trainer_id": config.get_role_id(), - "op_device": device, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + heter_block._insert_op(index=first_op_index, + type="send_and_recv", + inputs={"X": heter_block.vars[entrance_var[0]]}, + outputs={"Out": []}, + attrs={ + "mode": "forward" if is_forward else "backward", + "send_var_name": + entrance_var + ["microbatch_id"], + "recv_var_name": [], + "message_name": + comm_info["block_input_var_name"], + "next_endpoints": next_heter_worker_endpoints, + "previous_endpoints": + previous_heter_worker_endpoints, + "trainer_id": config.get_role_id(), + "op_device": device, + RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE + }) return entrance_var @@ -1269,8 +1293,8 @@ def remove_trainer_send_op(program, config, heter_block_index, need_remove_send_op = [] need_remove_grad_var = [] for op in find_send_op(program): - input_list, _ = find_op_input_output(program, - program.global_block(), op) + input_list, _ = find_op_input_output(program, program.global_block(), + op) for var_name in input_list: origin_var_name = var_name.split("@GRAD")[0] if origin_var_name in persistables: @@ -1283,6 +1307,7 @@ def remove_trainer_send_op(program, config, heter_block_index, def add_heter_send_op(program, heter_program, block, block_var_detail): + def _get_send_op_dict(): send_op_dict = {} send_op_list = find_send_op(program) @@ -1328,16 +1353,16 @@ def add_heter_send_op(program, heter_program, block, block_var_detail): block.vars[union_var] for union_var in table_dict[table_id]['var_list'] ] - block.append_op( - type="send", - inputs={"X": send_input_vars}, - outputs={"Out": dummy_output}, - attrs={ - "send_varnames": table_dict[table_id]['send_varnames'], - "is_sparse": is_sparse, - "table_id": table_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + block.append_op(type="send", + inputs={"X": send_input_vars}, + outputs={"Out": dummy_output}, + attrs={ + "send_varnames": + table_dict[table_id]['send_varnames'], + "is_sparse": is_sparse, + "table_id": table_id, + RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE + }) return send_grad_var_list @@ -1454,9 +1479,8 @@ def union_forward_gradient_op(program_block_ops_list): assert block_length % 2 != 0, "the length of program_block_ops_list should be odd" for i in range(0, block_length // 2): block_op_list = {"forward": program_block_ops_list[i]} - block_op_list.update({ - "backward": program_block_ops_list[block_length - 1 - i] - }) + block_op_list.update( + {"backward": program_block_ops_list[block_length - 1 - i]}) union_program_block_ops_list.append(block_op_list) block_op_list = {"forward": [], "backward": []} @@ -1474,8 +1498,9 @@ def find_block_joints(program, program_block_ops_list, heter_ops): program_block_ops_list) block_var_detail = entrance_exit_check(program, program_block_ops_list, block_var_detail, heter_ops) - block_var_detail = delete_block_useless_exit( - program, program_block_ops_list, block_var_detail) + block_var_detail = delete_block_useless_exit(program, + program_block_ops_list, + block_var_detail) return block_var_detail @@ -1506,8 +1531,8 @@ def find_entrance_exit_private(program, program_block_ops_list): bp_block_input, bp_block_output = find_ops_list_input_output( program, block_op_list["backward"]) bp_persistables = screen_persistables( - program, bp_block_input) + screen_persistables(program, - bp_block_output) + program, bp_block_input) + screen_persistables( + program, bp_block_output) # find entrance & exit bp_block_private_vars = list(set(bp_block_input) & set(bp_block_output)) bp_block_entrance = list( @@ -1555,10 +1580,10 @@ def entrance_exit_check(program, program_block_ops_list, block_var_detail, #need_add_vars = find_need_var_from_previous_block( # need_add_vars, block_var_detail, index, heter_ops) - previous_block_private = block_var_detail[index - 1]["forward"][ - "private"] - previous_block_entrance = block_var_detail[index - 1]["forward"][ - "entrance"] + previous_block_private = block_var_detail[index - + 1]["forward"]["private"] + previous_block_entrance = block_var_detail[index - + 1]["forward"]["entrance"] for var in need_add_vars: if var not in previous_block_private and var not in previous_block_entrance: previous_block_entrance.append(var) @@ -1584,10 +1609,10 @@ def entrance_exit_check(program, program_block_ops_list, block_var_detail, need_ignore_vars.append(var) need_add_vars = list( set(need_add_vars).difference(set(need_ignore_vars))) - previous_block_private = block_var_detail[index + 1]["backward"][ - "private"] - previous_block_entrance = block_var_detail[index + 1]["backward"][ - "entrance"] + previous_block_private = block_var_detail[index + + 1]["backward"]["private"] + previous_block_entrance = block_var_detail[index + + 1]["backward"]["entrance"] for var in need_add_vars: if var not in previous_block_private and var not in previous_block_entrance: previous_block_entrance.append(var) @@ -1648,8 +1673,8 @@ def delete_block_useless_exit(program, program_block_ops_list, if index - 1 < 0: break current_block_exit = block_var_detail[index]["backward"]["exit"] - next_block_entrance = block_var_detail[index - 1]["backward"][ - "entrance"] + next_block_entrance = block_var_detail[index - + 1]["backward"]["entrance"] need_delete_var = [] for var in current_block_exit: if var not in next_block_entrance: @@ -1693,61 +1718,62 @@ def insert_reshape_op(program, input_var = block.vars[var_name] if new_var_name not in block.vars: - out = block.create_var( - name=new_var_name, - shape=new_var_shape, - dtype=input_var.dtype, - type=input_var.type) + out = block.create_var(name=new_var_name, + shape=new_var_shape, + dtype=input_var.dtype, + type=input_var.type) else: out = block.vars[new_var_name] new_var_shape = out.shape - x_shape = block.create_var( - name="{}.xshape@Heter".format(var_name), dtype=input_var.dtype) - block._insert_op( - index=index, - type="reshape2", - inputs={"X": input_var}, - attrs={'shape': new_var_shape}, - outputs={"Out": out, - "XShape": x_shape}) + x_shape = block.create_var(name="{}.xshape@Heter".format(var_name), + dtype=input_var.dtype) + block._insert_op(index=index, + type="reshape2", + inputs={"X": input_var}, + attrs={'shape': new_var_shape}, + outputs={ + "Out": out, + "XShape": x_shape + }) def insert_send_concat_op(program, block, index, var_name_list, new_var_name, new_var_shape): input_var_list = [block.vars[var_name] for var_name in var_name_list] - out = program.global_block().create_var( - name=new_var_name, - shape=new_var_shape, - dtype=input_var_list[0].dtype, - type=input_var_list[0].type) + out = program.global_block().create_var(name=new_var_name, + shape=new_var_shape, + dtype=input_var_list[0].dtype, + type=input_var_list[0].type) - block._insert_op( - index=index, - type='concat', - inputs={"X": input_var_list}, - outputs={'Out': [out]}, - attrs={'axis': -1, - 'use_stack': False}) + block._insert_op(index=index, + type='concat', + inputs={"X": input_var_list}, + outputs={'Out': [out]}, + attrs={ + 'axis': -1, + 'use_stack': False + }) def insert_recv_slice_op(program, block, index, var_name, var_shape, dtype, type, new_var_name_list, new_var_shape_list): if var_name not in program.global_block().vars: - input_var = program.global_block().create_var( - name=var_name, shape=var_shape, dtype=dtype, type=type) + input_var = program.global_block().create_var(name=var_name, + shape=var_shape, + dtype=dtype, + type=type) else: input_var = program.global_block().vars[var_name] out_list = [] for i in range(len(new_var_name_list)): if new_var_name_list[i] not in block.vars: - out = block.create_var( - name=new_var_name_list[i], - shape=new_var_shape_list[i], - dtype=input_var.dtype, - type=input_var.type) + out = block.create_var(name=new_var_name_list[i], + shape=new_var_shape_list[i], + dtype=input_var.dtype, + type=input_var.type) else: out = block.vars[new_var_name_list[i]] out_list.append(out) @@ -1764,12 +1790,11 @@ def insert_recv_slice_op(program, block, index, var_name, var_shape, dtype, attrs['starts'] = starts attrs['ends'] = ends - block._insert_op( - index=index, - type='slice', - inputs={'Input': input_var}, - attrs=attrs, - outputs={'Out': out_list[i]}) + block._insert_op(index=index, + type='slice', + inputs={'Input': input_var}, + attrs=attrs, + outputs={'Out': out_list[i]}) start_index = end_index index += 1 @@ -1838,8 +1863,10 @@ def block_append_op(program, origin_program, block, op): if "_grad" not in op.type: # for forward op - return block.append_op( - type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs()) + return block.append_op(type=op.type, + inputs=inputs, + outputs=outputs, + attrs=op.all_attrs()) else: # for grad op op_desc = op.desc @@ -1865,8 +1892,8 @@ def add_vars_by_var_list(var_name_list, origin_program, program, block): ).vars and var_name not in block.vars: var = origin_program.global_block().vars[var_name] if var.persistable: - program.global_block()._clone_variable( - var, force_persistable=False) + program.global_block()._clone_variable(var, + force_persistable=False) else: block._clone_variable(var, force_persistable=False) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py index c80b4a800bd..f852c1a0311 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py @@ -30,6 +30,7 @@ dtype_to_size = { class VarBlock: + def __init__(self, varname, offset, size): self.varname = varname # NOTE: real offset is offset * size diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py index dd9d7e760a8..3d625d47f30 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py @@ -85,8 +85,8 @@ class PSLib(Fleet): if self._role_maker.is_xpu(): local_endpoint = self._role_maker.get_local_endpoint() local_endpoint = local_endpoint.split(":") - self._heter_ptr.start_xpu_service( - str(local_endpoint[0]), int(local_endpoint[1])) + self._heter_ptr.start_xpu_service(str(local_endpoint[0]), + int(local_endpoint[1])) self._role_maker._barrier_all() self.all_ips_ = self._role_maker._all_gather(self._local_ip) # worker_index * 2 is for compatible with older versions of pslib @@ -136,8 +136,9 @@ class PSLib(Fleet): var_name = table.dense_variable_name[i] if scope.find_var(var_name) is None: raise ValueError( - "var " + var_name + " not found in scope, " - + "you should run startup program first") + "var " + var_name + + " not found in scope, " + + "you should run startup program first") var_name_list.append(var_name) if not self._opt_info["use_ps_gpu"]: self._fleet_ptr.init_model(scope, @@ -249,9 +250,10 @@ class PSLib(Fleet): """ - trainer_instance = executor.start_heter_trainer( - program, scope, debug, fetch_list, fetch_info, print_period, - fetch_handler) + trainer_instance = executor.start_heter_trainer(program, scope, debug, + fetch_list, fetch_info, + print_period, + fetch_handler) if self._role_maker.is_xpu(): print("barrier heter") self._role_maker._barrier_heter() @@ -1006,10 +1008,11 @@ class DownpourOptimizer(DistributedOptimizer): self._optimizer = optimizer self._optimizer_name = "Distributed%s" % optimizer.type.capitalize() if optimizer.type != "adam": - print("Currently, distributed optimizer only support Adam" - "Will config built-in adam for you." - "We will support more functions in DistributedOptimizer", - sys.stderr) + print( + "Currently, distributed optimizer only support Adam" + "Will config built-in adam for you." + "We will support more functions in DistributedOptimizer", + sys.stderr) self._optimizer_name = "DistributedAdam" self._distributed_optimizer = globals()[self._optimizer_name](optimizer) @@ -1152,13 +1155,12 @@ class DownpourOptimizer(DistributedOptimizer): t = MultiThread(trans_mode=program_mode) start_program = startup_programs[i] main_program = programs[i] - t.transpile( - startup_program=start_program, - main_program=main_program, - rank=env["trainer_id"], - endpoints=env["trainer_endpoints"], - current_endpoint=env['current_endpoint'], - wait_port=False) + t.transpile(startup_program=start_program, + main_program=main_program, + rank=env["trainer_id"], + endpoints=env["trainer_endpoints"], + current_endpoint=env['current_endpoint'], + wait_port=False) if i > 0: self._remove_collective_ops(start_program, "c_comm_init_all") diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py index 5f0af296441..308261cea06 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py @@ -109,8 +109,8 @@ class DownpourServer(Server): if table_class == 'DownpourSparseTable' or table_class == 'DownpourSparseSSDTable': table.enable_sparse_table_cache = strategy.get( 'sparse_enable_cache', True) - table.sparse_table_cache_rate = strategy.get('sparse_cache_rate', - 0.00055) + table.sparse_table_cache_rate = strategy.get( + 'sparse_cache_rate', 0.00055) table.sparse_table_cache_file_num = strategy.get( 'sparse_cache_file_num', 16) table.compress_in_save = strategy.get('sparse_compress_in_save', @@ -313,8 +313,8 @@ class DownpourServer(Server): table.compress_in_save = strategy.get('dense_compress_in_save', True) table.accessor.accessor_class = strategy.get( 'dense_accessor_class', "DownpourDenseValueAccessor") - table.accessor.dense_sgd_param.name = strategy.get('dense_optimizer', - "adam") + table.accessor.dense_sgd_param.name = strategy.get( + 'dense_optimizer', "adam") table.accessor.dense_sgd_param.adam.learning_rate = strategy.get( 'dense_learning_rate', 5e-06) table.accessor.dense_sgd_param.adam.avg_decay_rate = strategy.get( @@ -377,8 +377,8 @@ class DownpourServer(Server): table.compress_in_save = strategy.get('datanorm_compress_in_save', True) table.accessor.accessor_class = strategy.get( 'datanorm_accessor_class', 'DownpourDenseValueAccessor') - table.accessor.dense_sgd_param.name = strategy.get('datanorm_operation', - 'summary') + table.accessor.dense_sgd_param.name = strategy.get( + 'datanorm_operation', 'summary') table.accessor.dense_sgd_param.summary.summary_decay_rate = strategy.get( 'datanorm_decay_rate', 0.999999) table.accessor.fea_dim = fea_dim diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py index 9483556d46f..35cda4c34b0 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py @@ -203,7 +203,7 @@ class DistributedAdam(DistributedOptimizerImplBase): return ret_list def _if_last_block(self, op, _equal_dict): - # for conditional_block op + # for conditional_block op cond_str = op.input('Cond')[0] bool_test = False if cond_str.startswith('equal'): @@ -345,25 +345,25 @@ class DistributedAdam(DistributedOptimizerImplBase): if st.get("sparse_embedx_dim") is not None \ and strategy.get("use_cvm") == True \ and st["sparse_embedx_dim"] != emb_to_size[table_name] - 3: - raise ValueError("fleet config sparse_embedx_dim=%s not" - " equal to embedding dim - 3 = %s" % - (st["sparse_embedx_dim"], - emb_to_size[table_name] - 3)) + raise ValueError( + "fleet config sparse_embedx_dim=%s not" + " equal to embedding dim - 3 = %s" % + (st["sparse_embedx_dim"], emb_to_size[table_name] - 3)) if st.get("sparse_embedx_dim") is not None \ and strategy.get("use_cvm") == False \ and st["sparse_embedx_dim"] != emb_to_size[table_name] - 1: - raise ValueError("fleet config sparse_embedx_dim=%s not" - " equal to embedding dim - 1 = %s" % - (st["sparse_embedx_dim"], - emb_to_size[table_name] - 1)) + raise ValueError( + "fleet config sparse_embedx_dim=%s not" + " equal to embedding dim - 1 = %s" % + (st["sparse_embedx_dim"], emb_to_size[table_name] - 1)) if st.get("sparse_embedx_dim") is None \ and strategy.get("use_cvm") == True: logger.warning( "sparse embedding dim for table name '{}' is: {}, while sparse_embedx_dim " "with same sparse table name is not set in config_fleet.py. " "Hence automatically set sparse_embedx_dim = {} - 3.". - format(table_name, emb_to_size[table_name], emb_to_size[ - table_name])) + format(table_name, emb_to_size[table_name], + emb_to_size[table_name])) st["sparse_embedx_dim"] = emb_to_size[table_name] - 3 if st.get("sparse_embedx_dim") is None \ and strategy.get("use_cvm") == False: @@ -371,23 +371,23 @@ class DistributedAdam(DistributedOptimizerImplBase): "sparse embedding dim for table name '{}' is: {}, while sparse_embedx_dim " "with same sparse table name is not set in config_fleet.py. " "Hence automatically set sparse_embedx_dim = {} - 1.". - format(table_name, emb_to_size[table_name], emb_to_size[ - table_name])) + format(table_name, emb_to_size[table_name], + emb_to_size[table_name])) st["sparse_embedx_dim"] = emb_to_size[table_name] - 1 elif accessor == "DownpourSparseValueAccessor": if st.get("sparse_embedx_dim") is not None \ and st["sparse_embedx_dim"] != emb_to_size[table_name]: - raise ValueError("fleet config sparse_embedx_dim=%s not" - " equal to embedding dim = %s" % - (st["sparse_embedx_dim"], - emb_to_size[table_name])) + raise ValueError( + "fleet config sparse_embedx_dim=%s not" + " equal to embedding dim = %s" % + (st["sparse_embedx_dim"], emb_to_size[table_name])) if st.get("sparse_embedx_dim") is None: logger.warning( "sparse embedding dim for table name '{}' is: {}, while sparse_embedx_dim " "with same sparse table name is not set in config_fleet.py. " "Hence automatically set sparse_embedx_dim = {}.".format( - table_name, emb_to_size[table_name], emb_to_size[ - table_name])) + table_name, emb_to_size[table_name], + emb_to_size[table_name])) st["sparse_embedx_dim"] = emb_to_size[table_name] return strategy @@ -439,9 +439,9 @@ class DistributedAdam(DistributedOptimizerImplBase): parameters = parameter_list[num] prog_id = str(id(loss.block.program)) # param_grads of program - params_grads = sorted( - fluid.backward.append_backward(loss, parameters, no_grad_set), - key=lambda x: x[0].name) + params_grads = sorted(fluid.backward.append_backward( + loss, parameters, no_grad_set), + key=lambda x: x[0].name) flag_use_ps_gpu = strategy.get("use_ps_gpu", False) if flag_use_ps_gpu: @@ -455,7 +455,7 @@ class DistributedAdam(DistributedOptimizerImplBase): embedding_table = self._find_multi_distributed_lookup_table( [loss]) self._remove_optimize_op_for_embedding(loss, embedding_table) - # has condition_block op means multi-task + # has condition_block op means multi-task flag_multi_task = self._has_conditional_block(loss) if flag_multi_task: self._cond_params = dict() @@ -593,25 +593,25 @@ class DistributedAdam(DistributedOptimizerImplBase): or accessor == "DownpourUnitAccessor": if st.get("sparse_embedx_dim") is not None \ and st["sparse_embedx_dim"] != emb_to_size[key] - 3: - raise ValueError("fleet config sparse_embedx_dim=%s not" - " equal to embedding size - 3 = %s" % - (st["sparse_embedx_dim"], - emb_to_size[key] - 3)) + raise ValueError( + "fleet config sparse_embedx_dim=%s not" + " equal to embedding size - 3 = %s" % + (st["sparse_embedx_dim"], emb_to_size[key] - 3)) st["sparse_embedx_dim"] = emb_to_size[key] - 3 elif accessor == "DownpourSparseValueAccessor": if st.get("sparse_embedx_dim") is not None \ and st["sparse_embedx_dim"] != emb_to_size[key]: - raise ValueError("fleet config sparse_embedx_dim=%s not" - " equal to embedding size = %s" % - (st["sparse_embedx_dim"], - emb_to_size[key])) + raise ValueError( + "fleet config sparse_embedx_dim=%s not" + " equal to embedding size = %s" % + (st["sparse_embedx_dim"], emb_to_size[key])) st["sparse_embedx_dim"] = emb_to_size[key] # ServerParameter add all sparse tables for tn in sparse_table_to_index: sparse_table_index = sparse_table_to_index[tn] - st = self._check_config_fleet_with_program_op(strategy, tn, - emb_to_size) + st = self._check_config_fleet_with_program_op( + strategy, tn, emb_to_size) if st.get(tn) is not None: server.add_sparse_table(sparse_table_index, st[tn]) else: @@ -692,22 +692,25 @@ class DistributedAdam(DistributedOptimizerImplBase): if flag_multi_task: server_dense_table_index = dense_table_index if len(root_params_list) > 0: - server.add_dense_table( - server_dense_table_index, root_params_list, - root_grads_list, strategy['dense_table'], - sparse_table_names) + server.add_dense_table(server_dense_table_index, + root_params_list, + root_grads_list, + strategy['dense_table'], + sparse_table_names) server_dense_table_index += 1 for i in range(len(lists_params)): - server.add_dense_table( - server_dense_table_index, lists_params[i], - lists_grads[i], strategy['dense_table'], - sparse_table_names) + server.add_dense_table(server_dense_table_index, + lists_params[i], + lists_grads[i], + strategy['dense_table'], + sparse_table_names) server_dense_table_index += 1 else: - server.add_dense_table( - dense_table_index, params, grads, - strategy['dense_table'], sparse_table_names) + server.add_dense_table(dense_table_index, params, + grads, + strategy['dense_table'], + sparse_table_names) else: server.add_dense_table(dense_table_index, params, grads, @@ -716,24 +719,29 @@ class DistributedAdam(DistributedOptimizerImplBase): if flag_multi_task: if len(root_params_list) > 0: - worker.add_dense_table( - dense_table_index, self._learning_rate, - root_params_list, root_grads_list, - dense_start_table_id, sparse_table_names) + worker.add_dense_table(dense_table_index, + self._learning_rate, + root_params_list, + root_grads_list, + dense_start_table_id, + sparse_table_names) dense_table_index += 1 for i in range(len(lists_params)): - worker.add_dense_table( - dense_table_index, self._learning_rate, - lists_params[i], lists_grads[i], - dense_start_table_id, sparse_table_names) + worker.add_dense_table(dense_table_index, + self._learning_rate, + lists_params[i], + lists_grads[i], + dense_start_table_id, + sparse_table_names) dense_table_index += 1 dense_table_index -= 1 else: - worker.add_dense_table( - dense_table_index, self._learning_rate, params, - grads, dense_start_table_id, sparse_table_names) + worker.add_dense_table(dense_table_index, + self._learning_rate, params, + grads, dense_start_table_id, + sparse_table_names) if FLEET_GLOBAL_DICT["enable"]: cur_prog = losses[loss_index].block.program @@ -749,8 +757,8 @@ class DistributedAdam(DistributedOptimizerImplBase): if "pull_dense" in program_configs[ program_id] and "push_dense" in program_configs[ - program_id] and len(program_configs[program_id][ - "pull_dense"]) > 0: + program_id] and len(program_configs[program_id] + ["pull_dense"]) > 0: if flag_multi_task: program_configs[program_id]["pull_dense"].extend( multi_task_dense_tables_pull) @@ -768,10 +776,12 @@ class DistributedAdam(DistributedOptimizerImplBase): program_configs[program_id][ "push_dense"] = multi_task_dense_tables_push else: - program_configs[program_id][ - "pull_dense"] = [dense_table_index] - program_configs[program_id][ - "push_dense"] = [dense_table_index] + program_configs[program_id]["pull_dense"] = [ + dense_table_index + ] + program_configs[program_id]["push_dense"] = [ + dense_table_index + ] if len(data_norm_params) != 0 and len(data_norm_grads) != 0: dense_table_index += 1 @@ -781,15 +791,18 @@ class DistributedAdam(DistributedOptimizerImplBase): data_norm_params, data_norm_grads, strategy['datanorm_table'], sparse_table_names) else: - server.add_data_norm_table( - dense_table_index, self._learning_rate, - data_norm_params, data_norm_grads, None, - sparse_table_names) - - worker.add_dense_table( - dense_table_index, self._learning_rate, - data_norm_params, data_norm_grads, - dense_start_table_id, sparse_table_names) + server.add_data_norm_table(dense_table_index, + self._learning_rate, + data_norm_params, + data_norm_grads, None, + sparse_table_names) + + worker.add_dense_table(dense_table_index, + self._learning_rate, + data_norm_params, + data_norm_grads, + dense_start_table_id, + sparse_table_names) if FLEET_GLOBAL_DICT["enable"]: cur_prog = losses[loss_index].block.program @@ -799,7 +812,8 @@ class DistributedAdam(DistributedOptimizerImplBase): attrs={ "InputNames": [i.name for i in data_norm_grads], - "TableId": dense_table_index, + "TableId": + dense_table_index, "ScaleDataNorm": strategy.get("scale_datanorm", -1) }) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py index 363475b3013..eec51ef827c 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py @@ -16,6 +16,7 @@ # source: ps.proto import sys + _b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1')) from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -42,15 +43,22 @@ _TABLETYPE = _descriptor.EnumDescriptor( filename=None, file=DESCRIPTOR, values=[ - _descriptor.EnumValueDescriptor( - name='PS_SPARSE_TABLE', index=0, number=0, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='PS_DENSE_TABLE', index=1, number=1, options=None, type=None), + _descriptor.EnumValueDescriptor(name='PS_SPARSE_TABLE', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_DENSE_TABLE', + index=1, + number=1, + options=None, + type=None), ], containing_type=None, options=None, serialized_start=4679, - serialized_end=4731, ) + serialized_end=4731, +) _sym_db.RegisterEnumDescriptor(_TABLETYPE) TableType = enum_type_wrapper.EnumTypeWrapper(_TABLETYPE) @@ -60,103 +68,96 @@ _PSCMDID = _descriptor.EnumDescriptor( filename=None, file=DESCRIPTOR, values=[ - _descriptor.EnumValueDescriptor( - name='PS_PULL_DENSE_TABLE', - index=0, - number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_DENSE_TABLE', - index=1, - number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PULL_SPARSE_TABLE', - index=2, - number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_SPARSE_TABLE', - index=3, - number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SHRINK_TABLE', index=4, number=4, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ONE_TABLE', - index=5, - number=5, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ALL_TABLE', - index=6, - number=6, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_LOAD_ONE_TABLE', - index=7, - number=7, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_LOAD_ALL_TABLE', - index=8, - number=8, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CLEAR_ONE_TABLE', - index=9, - number=9, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CLEAR_ALL_TABLE', - index=10, - number=10, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_DENSE_PARAM', - index=11, - number=11, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_STOP_SERVER', index=12, number=12, options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ONE_CACHE_TABLE', - index=13, - number=13, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_GET_CACHE_THRESHOLD', - index=14, - number=14, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CACHE_SHUFFLE', - index=15, - number=15, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_COPY_TABLE', index=16, number=16, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='PS_COPY_TABLE_BY_FEASIGN', - index=17, - number=17, - options=None, - type=None), + _descriptor.EnumValueDescriptor(name='PS_PULL_DENSE_TABLE', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PUSH_DENSE_TABLE', + index=1, + number=1, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PULL_SPARSE_TABLE', + index=2, + number=2, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PUSH_SPARSE_TABLE', + index=3, + number=3, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SHRINK_TABLE', + index=4, + number=4, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SAVE_ONE_TABLE', + index=5, + number=5, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SAVE_ALL_TABLE', + index=6, + number=6, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_LOAD_ONE_TABLE', + index=7, + number=7, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_LOAD_ALL_TABLE', + index=8, + number=8, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_CLEAR_ONE_TABLE', + index=9, + number=9, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_CLEAR_ALL_TABLE', + index=10, + number=10, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_PUSH_DENSE_PARAM', + index=11, + number=11, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_STOP_SERVER', + index=12, + number=12, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_SAVE_ONE_CACHE_TABLE', + index=13, + number=13, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_GET_CACHE_THRESHOLD', + index=14, + number=14, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_CACHE_SHUFFLE', + index=15, + number=15, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_COPY_TABLE', + index=16, + number=16, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_COPY_TABLE_BY_FEASIGN', + index=17, + number=17, + options=None, + type=None), _descriptor.EnumValueDescriptor( name='PS_PULL_SPARSE_TABLE_WITH_DEPENDENCY', index=18, @@ -169,19 +170,22 @@ _PSCMDID = _descriptor.EnumDescriptor( number=19, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='PS_PRINT_TABLE_STAT', - index=20, - number=20, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_S2S_MSG', index=21, number=101, options=None, type=None), + _descriptor.EnumValueDescriptor(name='PS_PRINT_TABLE_STAT', + index=20, + number=20, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='PS_S2S_MSG', + index=21, + number=101, + options=None, + type=None), ], containing_type=None, options=None, serialized_start=4734, - serialized_end=5304, ) + serialized_end=5304, +) _sym_db.RegisterEnumDescriptor(_PSCMDID) PsCmdID = enum_type_wrapper.EnumTypeWrapper(_PSCMDID) @@ -216,15 +220,22 @@ _FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( filename=None, file=DESCRIPTOR, values=[ - _descriptor.EnumValueDescriptor( - name='HDFS', index=0, number=0, options=None, type=None), - _descriptor.EnumValueDescriptor( - name='AFS', index=1, number=1, options=None, type=None), + _descriptor.EnumValueDescriptor(name='HDFS', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor(name='AFS', + index=1, + number=1, + options=None, + type=None), ], containing_type=None, options=None, serialized_start=4647, - serialized_end=4677, ) + serialized_end=4677, +) _sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) _PSPARAMETER = _descriptor.Descriptor( @@ -234,38 +245,36 @@ _PSPARAMETER = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='worker_class', - full_name='paddle.PSParameter.worker_class', - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_class', - full_name='paddle.PSParameter.server_class', - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='worker_class', + full_name='paddle.PSParameter.worker_class', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='server_class', + full_name='paddle.PSParameter.server_class', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='instance_class', full_name='paddle.PSParameter.instance_class', @@ -282,54 +291,51 @@ _PSPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='init_gflags', - full_name='paddle.PSParameter.init_gflags', - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=True, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='worker_param', - full_name='paddle.PSParameter.worker_param', - index=4, - number=101, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_param', - full_name='paddle.PSParameter.server_param', - index=5, - number=102, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='init_gflags', + full_name='paddle.PSParameter.init_gflags', + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=True, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='worker_param', + full_name='paddle.PSParameter.worker_param', + index=4, + number=101, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='server_param', + full_name='paddle.PSParameter.server_param', + index=5, + number=102, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='trainer_param', full_name='paddle.PSParameter.trainer_param', @@ -372,7 +378,8 @@ _PSPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=21, - serialized_end=330, ) + serialized_end=330, +) _WORKERPARAMETER = _descriptor.Descriptor( name='WorkerParameter', @@ -407,7 +414,8 @@ _WORKERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=332, - serialized_end=413, ) + serialized_end=413, +) _SERVERPARAMETER = _descriptor.Descriptor( name='ServerParameter', @@ -442,7 +450,8 @@ _SERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=415, - serialized_end=496, ) + serialized_end=496, +) _DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( name='DownpourWorkerParameter', @@ -477,7 +486,8 @@ _DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=498, - serialized_end=577, ) + serialized_end=577, +) _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( name='DownpourTrainerParameter', @@ -592,7 +602,8 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=580, - serialized_end=833, ) + serialized_end=833, +) _PROGRAMCONFIG = _descriptor.Descriptor( name='ProgramConfig', @@ -601,22 +612,21 @@ _PROGRAMCONFIG = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='program_id', - full_name='paddle.ProgramConfig.program_id', - index=0, - number=1, - type=9, - cpp_type=9, - label=2, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='program_id', + full_name='paddle.ProgramConfig.program_id', + index=0, + number=1, + type=9, + cpp_type=9, + label=2, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='push_sparse_table_id', full_name='paddle.ProgramConfig.push_sparse_table_id', @@ -691,7 +701,8 @@ _PROGRAMCONFIG = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=836, - serialized_end=989, ) + serialized_end=989, +) _DENSETABLEPARAMETER = _descriptor.Descriptor( name='DenseTableParameter', @@ -774,7 +785,8 @@ _DENSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=991, - serialized_end=1114, ) + serialized_end=1114, +) _SPARSETABLEPARAMETER = _descriptor.Descriptor( name='SparseTableParameter', @@ -873,7 +885,8 @@ _SPARSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1116, - serialized_end=1238, ) + serialized_end=1238, +) _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( name='DownpourServerParameter', @@ -924,7 +937,8 @@ _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1241, - serialized_end=1375, ) + serialized_end=1375, +) _SERVERSERVICEPARAMETER = _descriptor.Descriptor( name='ServerServiceParameter', @@ -1023,7 +1037,8 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1378, - serialized_end=1593, ) + serialized_end=1593, +) _TABLEPARAMETER = _descriptor.Descriptor( name='TableParameter', @@ -1032,22 +1047,21 @@ _TABLEPARAMETER = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='table_id', - full_name='paddle.TableParameter.table_id', - index=0, - number=1, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='table_id', + full_name='paddle.TableParameter.table_id', + index=0, + number=1, + type=4, + cpp_type=4, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='table_class', full_name='paddle.TableParameter.table_class', @@ -1064,54 +1078,51 @@ _TABLEPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='shard_num', - full_name='paddle.TableParameter.shard_num', - index=2, - number=3, - type=4, - cpp_type=4, - label=1, - has_default_value=True, - default_value=1000, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='accessor', - full_name='paddle.TableParameter.accessor', - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', - full_name='paddle.TableParameter.type', - index=4, - number=5, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='shard_num', + full_name='paddle.TableParameter.shard_num', + index=2, + number=3, + type=4, + cpp_type=4, + label=1, + has_default_value=True, + default_value=1000, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='accessor', + full_name='paddle.TableParameter.accessor', + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='type', + full_name='paddle.TableParameter.type', + index=4, + number=5, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='compress_in_save', full_name='paddle.TableParameter.compress_in_save', @@ -1186,7 +1197,8 @@ _TABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1596, - serialized_end=1916, ) + serialized_end=1916, +) _TABLEACCESSORPARAMETER = _descriptor.Descriptor( name='TableAccessorParameter', @@ -1381,7 +1393,8 @@ _TABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=1919, - serialized_end=2496, ) + serialized_end=2496, +) _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( name='DownpourTableAccessorParameter', @@ -1472,7 +1485,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( options=None), _descriptor.FieldDescriptor( name='show_click_decay_rate', - full_name='paddle.DownpourTableAccessorParameter.show_click_decay_rate', + full_name= + 'paddle.DownpourTableAccessorParameter.show_click_decay_rate', index=5, number=6, type=2, @@ -1504,7 +1518,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( options=None), _descriptor.FieldDescriptor( name='delete_after_unseen_days', - full_name='paddle.DownpourTableAccessorParameter.delete_after_unseen_days', + full_name= + 'paddle.DownpourTableAccessorParameter.delete_after_unseen_days', index=7, number=8, type=2, @@ -1520,7 +1535,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( options=None), _descriptor.FieldDescriptor( name='ssd_unseenday_threshold', - full_name='paddle.DownpourTableAccessorParameter.ssd_unseenday_threshold', + full_name= + 'paddle.DownpourTableAccessorParameter.ssd_unseenday_threshold', index=8, number=9, type=5, @@ -1544,7 +1560,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2499, - serialized_end=2813, ) + serialized_end=2813, +) _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( name='TableAccessorSaveParameter', @@ -1611,7 +1628,8 @@ _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2815, - serialized_end=2898, ) + serialized_end=2898, +) _PSREQUESTMESSAGE = _descriptor.Descriptor( name='PsRequestMessage', @@ -1620,22 +1638,21 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( file=DESCRIPTOR, containing_type=None, fields=[ - _descriptor.FieldDescriptor( - name='cmd_id', - full_name='paddle.PsRequestMessage.cmd_id', - index=0, - number=1, - type=13, - cpp_type=3, - label=2, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='cmd_id', + full_name='paddle.PsRequestMessage.cmd_id', + index=0, + number=1, + type=13, + cpp_type=3, + label=2, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='table_id', full_name='paddle.PsRequestMessage.table_id', @@ -1652,22 +1669,21 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='params', - full_name='paddle.PsRequestMessage.params', - index=2, - number=3, - type=12, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='params', + full_name='paddle.PsRequestMessage.params', + index=2, + number=3, + type=12, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='client_id', full_name='paddle.PsRequestMessage.client_id', @@ -1684,22 +1700,21 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='data', - full_name='paddle.PsRequestMessage.data', - index=4, - number=5, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b(""), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='data', + full_name='paddle.PsRequestMessage.data', + index=4, + number=5, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), ], extensions=[], nested_types=[], @@ -1710,7 +1725,8 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=2900, - serialized_end=3001, ) + serialized_end=3001, +) _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( name='SparseSGDRuleParameter', @@ -1793,7 +1809,8 @@ _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3004, - serialized_end=3137, ) + serialized_end=3137, +) _SPARSECOMMONSGDRULEPARAMETER = _descriptor.Descriptor( name='SparseCommonSGDRuleParameter', @@ -1876,7 +1893,8 @@ _SPARSECOMMONSGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3140, - serialized_end=3338, ) + serialized_end=3338, +) _SPARSENAIVESGDRULEPARAMETER = _descriptor.Descriptor( name='SparseNaiveSGDRuleParameter', @@ -1943,7 +1961,8 @@ _SPARSENAIVESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3340, - serialized_end=3452, ) + serialized_end=3452, +) _SPARSEADAGRADSGDRULEPARAMETER = _descriptor.Descriptor( name='SparseAdagradSGDRuleParameter', @@ -2026,7 +2045,8 @@ _SPARSEADAGRADSGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3455, - serialized_end=3595, ) + serialized_end=3595, +) _SPARSEADAMSGDPARAMETER = _descriptor.Descriptor( name='SparseAdamSGDParameter', @@ -2141,7 +2161,8 @@ _SPARSEADAMSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3598, - serialized_end=3798, ) + serialized_end=3798, +) _DENSESGDRULEPARAMETER = _descriptor.Descriptor( name='DenseSGDRuleParameter', @@ -2240,7 +2261,8 @@ _DENSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=3801, - serialized_end=4026, ) + serialized_end=4026, +) _ADAMSGDPARAMETER = _descriptor.Descriptor( name='AdamSGDParameter', @@ -2339,7 +2361,8 @@ _ADAMSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=4029, - serialized_end=4201, ) + serialized_end=4201, +) _NAIVESGDPARAMETER = _descriptor.Descriptor( name='NaiveSGDParameter', @@ -2390,7 +2413,8 @@ _NAIVESGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=4203, - serialized_end=4277, ) + serialized_end=4277, +) _SUMMARYSGDPARAMETER = _descriptor.Descriptor( name='SummarySGDParameter', @@ -2425,7 +2449,8 @@ _SUMMARYSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=4279, - serialized_end=4338, ) + serialized_end=4338, +) _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( name='MovingAverageRuleParameter', @@ -2460,7 +2485,8 @@ _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=4340, - serialized_end=4386, ) + serialized_end=4386, +) _PSRESPONSEMESSAGE = _descriptor.Descriptor( name='PsResponseMessage', @@ -2501,22 +2527,21 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='data', - full_name='paddle.PsResponseMessage.data', - index=2, - number=3, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b(""), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='data', + full_name='paddle.PsResponseMessage.data', + index=2, + number=3, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), ], extensions=[], nested_types=[], @@ -2527,7 +2552,8 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[], serialized_start=4388, - serialized_end=4461, ) + serialized_end=4461, +) _FSCLIENTPARAMETER = _descriptor.Descriptor( name='FsClientParameter', @@ -2552,54 +2578,51 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='uri', - full_name='paddle.FsClientParameter.uri', - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='user', - full_name='paddle.FsClientParameter.user', - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='passwd', - full_name='paddle.FsClientParameter.passwd', - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode('utf-8'), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - options=None), + _descriptor.FieldDescriptor(name='uri', + full_name='paddle.FsClientParameter.uri', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='user', + full_name='paddle.FsClientParameter.user', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor(name='passwd', + full_name='paddle.FsClientParameter.passwd', + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), _descriptor.FieldDescriptor( name='buffer_size', full_name='paddle.FsClientParameter.buffer_size', @@ -2651,14 +2674,17 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( ], extensions=[], nested_types=[], - enum_types=[_FSCLIENTPARAMETER_FSAPITYPE, ], + enum_types=[ + _FSCLIENTPARAMETER_FSAPITYPE, + ], options=None, is_extendable=False, syntax='proto2', extension_ranges=[], oneofs=[], serialized_start=4464, - serialized_end=4677, ) + serialized_end=4677, +) _PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER _PSPARAMETER.fields_by_name['server_param'].message_type = _SERVERPARAMETER @@ -2760,121 +2786,109 @@ DESCRIPTOR.enum_types_by_name['PsCmdID'] = _PSCMDID PSParameter = _reflection.GeneratedProtocolMessageType( 'PSParameter', (_message.Message, ), - dict( - DESCRIPTOR=_PSPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PSParameter) - )) + dict(DESCRIPTOR=_PSPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PSParameter) + )) _sym_db.RegisterMessage(PSParameter) WorkerParameter = _reflection.GeneratedProtocolMessageType( 'WorkerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_WORKERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) - )) + dict(DESCRIPTOR=_WORKERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) + )) _sym_db.RegisterMessage(WorkerParameter) ServerParameter = _reflection.GeneratedProtocolMessageType( 'ServerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SERVERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ServerParameter) - )) + dict(DESCRIPTOR=_SERVERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerParameter) + )) _sym_db.RegisterMessage(ServerParameter) DownpourWorkerParameter = _reflection.GeneratedProtocolMessageType( 'DownpourWorkerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DOWNPOURWORKERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) - )) + dict(DESCRIPTOR=_DOWNPOURWORKERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) + )) _sym_db.RegisterMessage(DownpourWorkerParameter) DownpourTrainerParameter = _reflection.GeneratedProtocolMessageType( 'DownpourTrainerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DOWNPOURTRAINERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) - )) + dict(DESCRIPTOR=_DOWNPOURTRAINERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) + )) _sym_db.RegisterMessage(DownpourTrainerParameter) ProgramConfig = _reflection.GeneratedProtocolMessageType( 'ProgramConfig', (_message.Message, ), - dict( - DESCRIPTOR=_PROGRAMCONFIG, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ProgramConfig) - )) + dict(DESCRIPTOR=_PROGRAMCONFIG, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ProgramConfig) + )) _sym_db.RegisterMessage(ProgramConfig) DenseTableParameter = _reflection.GeneratedProtocolMessageType( 'DenseTableParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DENSETABLEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) - )) + dict(DESCRIPTOR=_DENSETABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) + )) _sym_db.RegisterMessage(DenseTableParameter) SparseTableParameter = _reflection.GeneratedProtocolMessageType( 'SparseTableParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SPARSETABLEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) - )) + dict(DESCRIPTOR=_SPARSETABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) + )) _sym_db.RegisterMessage(SparseTableParameter) DownpourServerParameter = _reflection.GeneratedProtocolMessageType( 'DownpourServerParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DOWNPOURSERVERPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) - )) + dict(DESCRIPTOR=_DOWNPOURSERVERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) + )) _sym_db.RegisterMessage(DownpourServerParameter) ServerServiceParameter = _reflection.GeneratedProtocolMessageType( 'ServerServiceParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SERVERSERVICEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) - )) + dict(DESCRIPTOR=_SERVERSERVICEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) + )) _sym_db.RegisterMessage(ServerServiceParameter) TableParameter = _reflection.GeneratedProtocolMessageType( 'TableParameter', (_message.Message, ), - dict( - DESCRIPTOR=_TABLEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableParameter) - )) + dict(DESCRIPTOR=_TABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableParameter) + )) _sym_db.RegisterMessage(TableParameter) TableAccessorParameter = _reflection.GeneratedProtocolMessageType( 'TableAccessorParameter', (_message.Message, ), - dict( - DESCRIPTOR=_TABLEACCESSORPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) - )) + dict(DESCRIPTOR=_TABLEACCESSORPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) + )) _sym_db.RegisterMessage(TableAccessorParameter) DownpourTableAccessorParameter = _reflection.GeneratedProtocolMessageType( @@ -2900,21 +2914,19 @@ _sym_db.RegisterMessage(TableAccessorSaveParameter) PsRequestMessage = _reflection.GeneratedProtocolMessageType( 'PsRequestMessage', (_message.Message, ), - dict( - DESCRIPTOR=_PSREQUESTMESSAGE, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) - )) + dict(DESCRIPTOR=_PSREQUESTMESSAGE, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) + )) _sym_db.RegisterMessage(PsRequestMessage) SparseSGDRuleParameter = _reflection.GeneratedProtocolMessageType( 'SparseSGDRuleParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SPARSESGDRULEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) - )) + dict(DESCRIPTOR=_SPARSESGDRULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) + )) _sym_db.RegisterMessage(SparseSGDRuleParameter) SparseCommonSGDRuleParameter = _reflection.GeneratedProtocolMessageType( @@ -2950,51 +2962,46 @@ _sym_db.RegisterMessage(SparseAdagradSGDRuleParameter) SparseAdamSGDParameter = _reflection.GeneratedProtocolMessageType( 'SparseAdamSGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SPARSEADAMSGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseAdamSGDParameter) - )) + dict(DESCRIPTOR=_SPARSEADAMSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseAdamSGDParameter) + )) _sym_db.RegisterMessage(SparseAdamSGDParameter) DenseSGDRuleParameter = _reflection.GeneratedProtocolMessageType( 'DenseSGDRuleParameter', (_message.Message, ), - dict( - DESCRIPTOR=_DENSESGDRULEPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) - )) + dict(DESCRIPTOR=_DENSESGDRULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) + )) _sym_db.RegisterMessage(DenseSGDRuleParameter) AdamSGDParameter = _reflection.GeneratedProtocolMessageType( 'AdamSGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_ADAMSGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) - )) + dict(DESCRIPTOR=_ADAMSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) + )) _sym_db.RegisterMessage(AdamSGDParameter) NaiveSGDParameter = _reflection.GeneratedProtocolMessageType( 'NaiveSGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_NAIVESGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) - )) + dict(DESCRIPTOR=_NAIVESGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) + )) _sym_db.RegisterMessage(NaiveSGDParameter) SummarySGDParameter = _reflection.GeneratedProtocolMessageType( 'SummarySGDParameter', (_message.Message, ), - dict( - DESCRIPTOR=_SUMMARYSGDPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) - )) + dict(DESCRIPTOR=_SUMMARYSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) + )) _sym_db.RegisterMessage(SummarySGDParameter) MovingAverageRuleParameter = _reflection.GeneratedProtocolMessageType( @@ -3010,21 +3017,19 @@ _sym_db.RegisterMessage(MovingAverageRuleParameter) PsResponseMessage = _reflection.GeneratedProtocolMessageType( 'PsResponseMessage', (_message.Message, ), - dict( - DESCRIPTOR=_PSRESPONSEMESSAGE, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) - )) + dict(DESCRIPTOR=_PSRESPONSEMESSAGE, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) + )) _sym_db.RegisterMessage(PsResponseMessage) FsClientParameter = _reflection.GeneratedProtocolMessageType( 'FsClientParameter', (_message.Message, ), - dict( - DESCRIPTOR=_FSCLIENTPARAMETER, - __module__='ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) - )) + dict(DESCRIPTOR=_FSCLIENTPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) + )) _sym_db.RegisterMessage(FsClientParameter) DESCRIPTOR.has_options = True diff --git a/python/paddle/fluid/incubate/fleet/tests/ctr_dataset_reader.py b/python/paddle/fluid/incubate/fleet/tests/ctr_dataset_reader.py index 83343933074..95d47ee9baa 100644 --- a/python/paddle/fluid/incubate/fleet/tests/ctr_dataset_reader.py +++ b/python/paddle/fluid/incubate/fleet/tests/ctr_dataset_reader.py @@ -22,8 +22,9 @@ import paddle import paddle.distributed.fleet as fleet from paddle.fluid.log_helper import get_logger -logger = get_logger( - "paddle", logging.INFO, fmt='%(asctime)s - %(levelname)s - %(message)s') +logger = get_logger("paddle", + logging.INFO, + fmt='%(asctime)s - %(levelname)s - %(message)s') DATA_URL = "http://paddle-ctr-data.bj.bcebos.com/avazu_ctr_data.tgz" DATA_MD5 = "c11df99fbd14e53cd4bfa6567344b26e" @@ -60,7 +61,9 @@ def load_lr_input_record(sent): class DatasetCtrReader(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def iter(): fs = line.strip().split('\t') dnn_input = load_dnn_input_record(fs[0]) @@ -84,8 +87,7 @@ def prepare_data(): lines = f.readlines() err_info = "wrong meta format" assert len(lines) == 2, err_info - assert 'dnn_input_dim:' in lines[0] and 'lr_input_dim:' in lines[ - 1], err_info + assert 'dnn_input_dim:' in lines[0] and 'lr_input_dim:' in lines[1], err_info res = map(int, [_.split(':')[1] for _ in lines]) res = list(res) dnn_input_dim = res[0] diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py index 06a90b78fd2..806de1e6da9 100644 --- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py +++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py @@ -25,8 +25,9 @@ from paddle.fluid.log_helper import get_logger import ctr_dataset_reader -logger = get_logger( - "fluid", logging.INFO, fmt='%(asctime)s - %(levelname)s - %(message)s') +logger = get_logger("fluid", + logging.INFO, + fmt='%(asctime)s - %(levelname)s - %(message)s') def parse_args(): @@ -48,16 +49,14 @@ def parse_args(): type=str, default='127.0.0.1:6000', help='The path for model to store (default: 127.0.0.1:6000)') - parser.add_argument( - '--trainer_id', - type=int, - default=0, - help='The path for model to store (default: models)') - parser.add_argument( - '--trainers', - type=int, - default=1, - help='The num of trainers, (default: 1)') + parser.add_argument('--trainer_id', + type=int, + default=0, + help='The path for model to store (default: models)') + parser.add_argument('--trainers', + type=int, + default=1, + help='The num of trainers, (default: 1)') return parser.parse_args() @@ -66,24 +65,21 @@ def model(): dnn_input_dim, lr_input_dim, train_file_path = ctr_dataset_reader.prepare_data( ) """ network definition """ - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="int64", + lod_level=0, + append_batch_size=False) datas = [dnn_data, lr_data, label] @@ -104,8 +100,8 @@ def model(): input=dnn_out, size=dim, act="relu", - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01)), + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.01)), name='dnn-fc-%d' % i) dnn_out = fc @@ -186,16 +182,15 @@ def train(args): logger.info("epoch {} start".format(epoch_id)) pass_start = time.time() dataset.set_filelist(filelist) - exe.train_from_dataset( - program=fleet.main_program, - dataset=dataset, - fetch_list=[avg_cost], - fetch_info=["cost"], - print_period=100, - debug=False) + exe.train_from_dataset(program=fleet.main_program, + dataset=dataset, + fetch_list=[avg_cost], + fetch_info=["cost"], + print_period=100, + debug=False) pass_time = time.time() - pass_start - logger.info("epoch {} finished, pass_time {}".format(epoch_id, - pass_time)) + logger.info("epoch {} finished, pass_time {}".format( + epoch_id, pass_time)) fleet.stop_worker() diff --git a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py index 5fc8fbd0116..48ce51b3724 100644 --- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py +++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py @@ -27,12 +27,14 @@ from paddle.fluid import core from paddle.fluid.log_helper import get_logger from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient, AFSClient from . import utils + OpRole = core.op_proto_and_checker_maker.OpRole __all__ = ["FleetUtil", "GPUPSUtil"] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s %(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s %(levelname)s: %(message)s') fleet = None @@ -777,8 +779,10 @@ class FleetUtil(object): suffix_name = "/%s/delta-%s" % (day, pass_id) model_path = output_path.rstrip("/") + suffix_name self.rank0_print("going to save_cache_model %s" % model_path) - key_num = fleet.save_cache_model( - None, model_path, mode=mode, table_id=table_id) + key_num = fleet.save_cache_model(None, + model_path, + mode=mode, + table_id=table_id) self.rank0_print("save_cache_model done") return key_num @@ -809,8 +813,10 @@ class FleetUtil(object): suffix_name = "/%s/base" % day model_path = output_path.rstrip("/") + suffix_name self.rank0_print("going to save_cache_base_model %s" % model_path) - key_num = fleet.save_cache_model( - None, model_path, mode=2, table_id=table_id) + key_num = fleet.save_cache_model(None, + model_path, + mode=2, + table_id=table_id) self.rank0_print("save_cache_base_model done") return key_num @@ -853,8 +859,8 @@ class FleetUtil(object): " not found in scope " + "when pull dense") var_name_list.append(var_name) - fleet._fleet_ptr.pull_dense(scope, - int(table.table_id), var_name_list) + fleet._fleet_ptr.pull_dense(scope, int(table.table_id), + var_name_list) fleet._role_maker._barrier_worker() def save_paddle_inference_model(self, @@ -1022,8 +1028,11 @@ class FleetUtil(object): vars = [program.global_block().var(i) for i in var_names] with fluid.scope_guard(scope): if save_combine: - fluid.io.save_vars( - executor, "./", program, vars=vars, filename=model_name) + fluid.io.save_vars(executor, + "./", + program, + vars=vars, + filename=model_name) else: fluid.io.save_vars(executor, model_name, program, vars=vars) @@ -1431,7 +1440,8 @@ class FleetUtil(object): return [ auc, bucket_error, mae, rmse, return_actual_ctr, predicted_ctr, - copc, mean_predict_qvalue, int(total_ins_num) + copc, mean_predict_qvalue, + int(total_ins_num) ] def print_global_metrics(self, @@ -1523,12 +1533,12 @@ class FleetUtil(object): mean_predict_qvalue, total_ins_num = self.get_global_metrics(\ scope, stat_pos_name, stat_neg_name, sqrerr_name, abserr_name,\ prob_name, q_name, pos_ins_num_name, total_ins_num_name) - self.rank0_print("%s global AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f " - "RMSE=%.6f Actural_CTR=%.6f Predicted_CTR=%.6f " - "COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % - (print_prefix, auc, bucket_error, mae, rmse, - actual_ctr, predicted_ctr, copc, mean_predict_qvalue, - total_ins_num)) + self.rank0_print( + "%s global AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f " + "RMSE=%.6f Actural_CTR=%.6f Predicted_CTR=%.6f " + "COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % + (print_prefix, auc, bucket_error, mae, rmse, actual_ctr, + predicted_ctr, copc, mean_predict_qvalue, total_ins_num)) def program_type_trans(self, prog_dir, prog_fn, is_text): return utils.program_type_trans(prog_dir, prog_fn, is_text) @@ -1609,8 +1619,8 @@ class FleetUtil(object): if self._is_optimizer_op(op): break if op.has_attr("op_device"): - cur_attr = op.attr("op_device") if op.attr( - "op_device") != "" else type_cpu + cur_attr = op.attr( + "op_device") if op.attr("op_device") != "" else type_cpu if pre is None or pre != cur_attr: ops_list.append([]) type_list.append(cur_attr) @@ -1700,8 +1710,8 @@ class FleetUtil(object): send_list[i].extend(list(in_from_pre[i + 1])) prog = program.clone() if merged_type_list[i] != type_cpu: - prog = prog._prune_with_input( - list(in_from_pre[i]), list(send_list[i])) + prog = prog._prune_with_input(list(in_from_pre[i]), + list(send_list[i])) program_list.append(prog) else: program_list.append(prog) diff --git a/python/paddle/fluid/incubate/fleet/utils/hdfs.py b/python/paddle/fluid/incubate/fleet/utils/hdfs.py index e5b2129e857..fb1b36e33c5 100644 --- a/python/paddle/fluid/incubate/fleet/utils/hdfs.py +++ b/python/paddle/fluid/incubate/fleet/utils/hdfs.py @@ -36,7 +36,9 @@ __all__ = ["HDFSClient"] def _handle_errors(max_time_out=None): + def decorator(f): + @functools.wraps(f) def handler(*args, **kwargs): o = args[0] @@ -56,13 +58,15 @@ def _handle_errors(max_time_out=None): except ExecuteError as e: if time.time() - start >= time_out: raise FSTimeOut("args:{} timeout:{}".format( - args, time.time() - start)) + args, + time.time() - start)) time.sleep(inter) if time.time() - last_print_time > 30: print("hadoop operator timeout:args:{} timeout:{}".format( - args, time.time() - start)) + args, + time.time() - start)) last_print_time = time.time() return handler @@ -71,6 +75,7 @@ def _handle_errors(max_time_out=None): class HDFSClient(FS): + def __init__( self, hadoop_home, @@ -264,8 +269,8 @@ class HDFSClient(FS): if test_exists: if not self.is_exist(fs_src_path): - raise FSFileNotExistsError("{} is not exists".format( - fs_src_path)) + raise FSFileNotExistsError( + "{} is not exists".format(fs_src_path)) if self.is_exist(fs_dst_path): raise FSFileExistsError("{} exists already".format(fs_dst_path)) diff --git a/python/paddle/fluid/incubate/fleet/utils/http_server.py b/python/paddle/fluid/incubate/fleet/utils/http_server.py index b4ee29a065a..685228f0749 100644 --- a/python/paddle/fluid/incubate/fleet/utils/http_server.py +++ b/python/paddle/fluid/incubate/fleet/utils/http_server.py @@ -32,8 +32,9 @@ def get_logger(name, level, fmt): return logger -_http_server_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_http_server_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class KVHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): diff --git a/python/paddle/fluid/incubate/fleet/utils/utils.py b/python/paddle/fluid/incubate/fleet/utils/utils.py index 5cb4948a859..3890da7ecec 100644 --- a/python/paddle/fluid/incubate/fleet/utils/utils.py +++ b/python/paddle/fluid/incubate/fleet/utils/utils.py @@ -90,8 +90,8 @@ def check_pruned_program_vars(train_prog, pruned_prog): if fluid.io.is_persistable(v)] pruned_vars = OrderedDict(pruned_vars) pruned_vars_name = [name for name in pruned_vars] - logger.info("persistable vars in pruned program: {}".format( - pruned_vars_name)) + logger.info( + "persistable vars in pruned program: {}".format(pruned_vars_name)) for var_name in pruned_vars: var = pruned_vars[var_name] @@ -108,9 +108,9 @@ def check_pruned_program_vars(train_prog, pruned_prog): continue if var.shape != train_prog_var.shape or var.dtype != train_prog_var.dtype: logger.error( - "variable: {} not match. in pruned program shape: {} dtype:{}, in train program shape: {} dtype: {}". - format(var_name, var.shape, var.dtype, train_prog_var.shape, - train_prog_var.dtype)) + "variable: {} not match. in pruned program shape: {} dtype:{}, in train program shape: {} dtype: {}" + .format(var_name, var.shape, var.dtype, train_prog_var.shape, + train_prog_var.dtype)) is_match = False return is_match @@ -120,11 +120,10 @@ def graphviz(block, output_dir="", filename='debug'): pdf_path = os.path.join(output_dir, filename + '.pdf') debugger.draw_block_graphviz(block, path=dot_path) cmd = ["dot", "-Tpdf", dot_path, "-o", pdf_path] - p = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + p = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) p.wait() @@ -136,17 +135,17 @@ def program_type_trans(prog_dir, prog_fn, is_text): def append_save_op(block, var, path): - block.append_op( - type='save', inputs={'X': [var]}, outputs={}, - attrs={'file_path': path}) + block.append_op(type='save', + inputs={'X': [var]}, + outputs={}, + attrs={'file_path': path}) def append_load_op(block, var, path): - block.append_op( - type='load', - inputs={}, - outputs={'Out': [var]}, - attrs={'file_path': path}) + block.append_op(type='load', + inputs={}, + outputs={'Out': [var]}, + attrs={'file_path': path}) def save_var(np_array, var_name, shape_list, dtype, save_path): @@ -229,8 +228,8 @@ def try_load_model_vars(dump_dir, dump_prog_fn, is_text_dump_program, if new_shape != orig_shape: raise RuntimeError( "Shape not matching: the Program requires a parameter with a shape of ({}), " - "while the loaded parameter (namely [ {} ]) has a shape of ({}).". - format(orig_shape, each_var.name, new_shape)) + "while the loaded parameter (namely [ {} ]) has a shape of ({})." + .format(orig_shape, each_var.name, new_shape)) # check feed/fetch vars in program and config fetch_targets_names = [v.name for v in fetch_targets] @@ -242,8 +241,8 @@ def try_load_model_vars(dump_dir, dump_prog_fn, is_text_dump_program, feed_name_list = feed_target_names if feed_config.feeded_vars_names is not None and feed_target_names != feed_config.feeded_vars_names: logger.warning( - "feed vars in program and config are diff: feed in program: {}. feed in config {}.". - format(feed_target_names, feed_config.feeded_vars_names)) + "feed vars in program and config are diff: feed in program: {}. feed in config {}." + .format(feed_target_names, feed_config.feeded_vars_names)) feed_name_list = feed_config.feeded_vars_names # remove feed op in inference_program. new feed op will be added in exe.run global_block = inference_program.global_block() @@ -256,8 +255,8 @@ def try_load_model_vars(dump_dir, dump_prog_fn, is_text_dump_program, global_block._remove_op(index) if fetch_config.fetch_vars_names is not None and fetch_targets_names != fetch_config.fetch_vars_names: logger.warning( - "fetch vars in program and config are diff: fetch in program: {}. fetch in config {}.". - format(fetch_targets_names, fetch_config.fetch_vars_names)) + "fetch vars in program and config are diff: fetch in program: {}. fetch in config {}." + .format(fetch_targets_names, fetch_config.fetch_vars_names)) fetch_list = [ inference_program.global_block().var(i) for i in fetch_config.fetch_vars_names @@ -291,9 +290,9 @@ def try_load_model_vars(dump_dir, dump_prog_fn, is_text_dump_program, var_shape = var.shape[1:] if tensor_shape != var_shape: raise RuntimeError( - "feed variable '{}' shape not match. infer program shape: {}. feed tensor shape: {}". - format(feed_config.feeded_vars_names[i], var_shape, - tensor_shape)) + "feed variable '{}' shape not match. infer program shape: {}. feed tensor shape: {}" + .format(feed_config.feeded_vars_names[i], var_shape, + tensor_shape)) if not feed_config.feeded_vars_filelist: logger.info("generate random feed vars.") @@ -303,17 +302,15 @@ def try_load_model_vars(dump_dir, dump_prog_fn, is_text_dump_program, # create fake feed tensor. if lod_level > 1, should create_lod_tensor() if var.lod_level == 0: feed_tensors.append( - np.array( - np.random.random( - tuple([batch_size] + list( - feed_config.feeded_vars_dims[i]))), - dtype=feed_config.feeded_vars_types[i])) + np.array(np.random.random( + tuple([batch_size] + + list(feed_config.feeded_vars_dims[i]))), + dtype=feed_config.feeded_vars_types[i])) elif var.lod_level == 1: - t = np.array( - np.random.random( - tuple([batch_size] + list( - feed_config.feeded_vars_dims[i]))), - dtype=feed_config.feeded_vars_types[i]) + t = np.array(np.random.random( + tuple([batch_size] + + list(feed_config.feeded_vars_dims[i]))), + dtype=feed_config.feeded_vars_types[i]) feed_tensors.append( fluid.create_lod_tensor(t, [[1] * batch_size], place)) else: @@ -354,8 +351,8 @@ def check_not_expected_ops(prog): for op in prog.global_block().ops: if op.type in not_expected_op_types and op.type not in op_types_set: logger.warning( - "find op type '{}' in program, please check if your program is pruned correctly !". - format(op.type)) + "find op type '{}' in program, please check if your program is pruned correctly !" + .format(op.type)) op_types_set.add(op.type) @@ -366,8 +363,8 @@ def check_saved_vars_try_dump(dump_dir, fetch_config, batch_size=1, save_filename=None): - dump_prog = load_program( - os.path.join(dump_dir, dump_prog_fn), is_text_dump_program) + dump_prog = load_program(os.path.join(dump_dir, dump_prog_fn), + is_text_dump_program) saved_params = [ v for v in dump_prog.list_vars() if fluid.io.is_persistable(v) ] diff --git a/python/paddle/fluid/inference/wrapper.py b/python/paddle/fluid/inference/wrapper.py index 950a89d08bc..c81ad03df73 100644 --- a/python/paddle/fluid/inference/wrapper.py +++ b/python/paddle/fluid/inference/wrapper.py @@ -30,9 +30,8 @@ def tensor_copy_from_cpu(self, data): ''' Support input type check based on tensor.copy_from_cpu. ''' - if isinstance(data, np.ndarray) or (isinstance(data, list) and - len(data) > 0 and - isinstance(data[0], str)): + if isinstance(data, np.ndarray) or (isinstance(data, list) and len(data) > 0 + and isinstance(data[0], str)): self.copy_from_cpu_bind(data) else: raise TypeError( diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 1c8e3994366..47199fcd1ad 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -133,30 +133,28 @@ class ConstantInitializer(Initializer): """ block = self._check_block(block) - assert (isinstance(var, framework.Variable) or - isinstance(var, framework.EagerParamBase)) + assert (isinstance(var, framework.Variable) + or isinstance(var, framework.EagerParamBase)) assert isinstance(block, framework.Block) if framework._non_static_mode(): - _C_ops.fill_constant(var, 'value', - float(self._value), 'force_cpu', - self._force_cpu, 'dtype', + _C_ops.fill_constant(var, 'value', float(self._value), + 'force_cpu', self._force_cpu, 'dtype', int(var.dtype), 'str_value', str(float(self._value)), 'shape', var.shape) return None else: # fill constant should set the "str_value" to preserve precision - op = block.append_op( - type="fill_constant", - outputs={"Out": var}, - attrs={ - "shape": var.shape, - "dtype": int(var.dtype), - "value": float(self._value), - 'str_value': str(float(self._value)), - 'force_cpu': self._force_cpu - }, - stop_gradient=True) + op = block.append_op(type="fill_constant", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": int(var.dtype), + "value": float(self._value), + 'str_value': str(float(self._value)), + 'force_cpu': self._force_cpu + }, + stop_gradient=True) var.op = op return op @@ -233,13 +231,12 @@ class UniformInitializer(Initializer): # to be compatible of fp16 initializers if var.dtype == VarDesc.VarType.FP16: out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate(".".join( - ['uniform_random', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['uniform_random', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_dtype = var.dtype out_var = var @@ -257,29 +254,29 @@ class UniformInitializer(Initializer): out_var._share_underline_tensor_to(var) return None else: - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "min": self._low, - "max": self._high, - "seed": self._seed, - "diag_num": self._diag_num, - "diag_step": self._diag_step, - "diag_val": self._diag_val - }, - stop_gradient=True) + op = block.append_op(type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "min": self._low, + "max": self._high, + "seed": self._seed, + "diag_num": self._diag_num, + "diag_step": self._diag_step, + "diag_val": self._diag_val + }, + stop_gradient=True) if var.dtype == VarDesc.VarType.FP16: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -334,13 +331,12 @@ class NormalInitializer(Initializer): # to be compatible of fp16 initalizers if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate(".".join( - ['normal_init', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['normal_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_dtype = var.dtype out_var = var @@ -362,9 +358,10 @@ class NormalInitializer(Initializer): return None if _in_legacy_dygraph(): - out_var = _C_ops.gaussian_random( - 'shape', var.shape, 'dtype', out_dtype, 'mean', self._mean, - 'std', self._std_dev, 'seed', self._seed, 'use_mkldnn', False) + out_var = _C_ops.gaussian_random('shape', var.shape, 'dtype', + out_dtype, 'mean', self._mean, + 'std', self._std_dev, 'seed', + self._seed, 'use_mkldnn', False) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, @@ -374,26 +371,26 @@ class NormalInitializer(Initializer): out_var._share_underline_tensor_to(var) return None else: - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "mean": self._mean, - "std": self._std_dev, - "seed": self._seed, - "use_mkldnn": False - }, - stop_gradient=True) + op = block.append_op(type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "mean": self._mean, + "std": self._std_dev, + "seed": self._seed, + "use_mkldnn": False + }, + stop_gradient=True) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -446,13 +443,12 @@ class TruncatedNormalInitializer(Initializer): # to be compatible of fp16 initalizers if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate(".".join( - ['truncated_gaussian_random', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['truncated_gaussian_random', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_dtype = var.dtype out_var = var @@ -469,9 +465,11 @@ class TruncatedNormalInitializer(Initializer): return None if _in_legacy_dygraph(): - out_var = _C_ops.truncated_gaussian_random( - 'shape', var.shape, 'dtype', out_dtype, 'mean', self._mean, - 'std', self._std_dev, 'seed', self._seed) + out_var = _C_ops.truncated_gaussian_random('shape', var.shape, + 'dtype', out_dtype, + 'mean', self._mean, + 'std', self._std_dev, + 'seed', self._seed) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) @@ -480,25 +478,25 @@ class TruncatedNormalInitializer(Initializer): out_var._share_underline_tensor_to(var) return None else: - op = block.append_op( - type="truncated_gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "mean": self._mean, - "std": self._std_dev, - "seed": self._seed - }, - stop_gradient=True) + op = block.append_op(type="truncated_gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "mean": self._mean, + "std": self._std_dev, + "seed": self._seed + }, + stop_gradient=True) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -588,13 +586,12 @@ class XavierInitializer(Initializer): if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate(".".join( - ['xavier_init', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['xavier_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_dtype = var.dtype out_var = var @@ -613,9 +610,10 @@ class XavierInitializer(Initializer): out_var = _C_ops.final_state_gaussian_random( out_var.shape, 0.0, std, self._seed, out_dtype, place) else: - out_var = _C_ops.gaussian_random( - 'shape', out_var.shape, 'dtype', out_dtype, 'mean', 0.0, - 'std', std, 'seed', self._seed) + out_var = _C_ops.gaussian_random('shape', out_var.shape, + 'dtype', out_dtype, 'mean', + 0.0, 'std', std, 'seed', + self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): @@ -628,40 +626,39 @@ class XavierInitializer(Initializer): else: if self._uniform: limit = math.sqrt(6.0 / float(fan_in + fan_out)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": out_dtype, - "min": -limit, - "max": limit, - "seed": self._seed - }, - stop_gradient=True) + op = block.append_op(type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": out_dtype, + "min": -limit, + "max": limit, + "seed": self._seed + }, + stop_gradient=True) else: std = math.sqrt(2.0 / float(fan_in + fan_out)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": out_dtype, - "mean": 0.0, - "std": std, - "seed": self._seed - }, - stop_gradient=True) + op = block.append_op(type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": out_dtype, + "mean": 0.0, + "std": std, + "seed": self._seed + }, + stop_gradient=True) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -746,13 +743,12 @@ class MSRAInitializer(Initializer): if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate(".".join( - ['masra_init', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['masra_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_dtype = var.dtype out_var = var @@ -771,10 +767,11 @@ class MSRAInitializer(Initializer): out_var = _C_ops.final_state_gaussian_random( out_var.shape, 0.0, std, self._seed, out_dtype, place) else: - out_var = _C_ops.gaussian_random( - 'shape', out_var.shape, 'dtype', - int(out_dtype), 'mean', 0.0, 'std', std, 'seed', - self._seed) + out_var = _C_ops.gaussian_random('shape', + out_var.shape, 'dtype', + int(out_dtype), 'mean', + 0.0, 'std', std, 'seed', + self._seed) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): @@ -787,41 +784,40 @@ class MSRAInitializer(Initializer): else: if self._uniform: limit = math.sqrt(6.0 / float(fan_in)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "min": -limit, - "max": limit, - "seed": self._seed - }, - stop_gradient=True) + op = block.append_op(type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": int(out_dtype), + "min": -limit, + "max": limit, + "seed": self._seed + }, + stop_gradient=True) else: std = math.sqrt(2.0 / float(fan_in)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "mean": 0.0, - "std": std, - "seed": self._seed - }, - stop_gradient=True) + op = block.append_op(type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": int(out_dtype), + "mean": 0.0, + "std": std, + "seed": self._seed + }, + stop_gradient=True) if var.dtype == VarDesc.VarType.FP16 or ( var.dtype == VarDesc.VarType.BF16 and not self._uniform): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -918,13 +914,12 @@ class BilinearInitializer(Initializer): VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 ]: out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate(".".join( - ['bilinear_init', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['bilinear_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_dtype = var.dtype out_var = var @@ -939,9 +934,8 @@ class BilinearInitializer(Initializer): raise ValueError("The size of input is too big. ") if framework._non_static_mode(): - _C_ops.assign_value(out_var, 'shape', - list(shape), 'dtype', out_dtype, value_name, - values) + _C_ops.assign_value(out_var, 'shape', list(shape), 'dtype', + out_dtype, value_name, values) if var.dtype in [ VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 @@ -953,25 +947,25 @@ class BilinearInitializer(Initializer): out_var._share_underline_tensor_to(var) return None else: - op = block.append_op( - type='assign_value', - outputs={'Out': [out_var]}, - attrs={ - 'dtype': out_dtype, - 'shape': list(shape), - value_name: values - }) + op = block.append_op(type='assign_value', + outputs={'Out': [out_var]}, + attrs={ + 'dtype': out_dtype, + 'shape': list(shape), + value_name: values + }) if var.dtype in [ VarDesc.VarType.FP16, VarDesc.VarType.BF16, VarDesc.VarType.FP64 ]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -1023,13 +1017,12 @@ class NumpyArrayInitializer(Initializer): if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: out_dtype = VarDesc.VarType.FP32 np_value = self._value.astype("float32") - out_var = block.create_var( - name=unique_name.generate(".".join( - ['numpy_array_init', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['numpy_array_init', var.name, 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_var = var out_dtype = var.dtype @@ -1048,9 +1041,8 @@ class NumpyArrayInitializer(Initializer): "saving it to file and 'load_op' to load it") if framework._non_static_mode(): - _C_ops.assign_value(out_var, 'shape', - list(self._value.shape), 'dtype', out_dtype, - value_name, values) + _C_ops.assign_value(out_var, 'shape', list(self._value.shape), + 'dtype', out_dtype, value_name, values) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: var_tmp = _C_ops.cast(out_var, 'in_dtype', out_var.dtype, 'out_dtype', var.dtype) @@ -1059,23 +1051,23 @@ class NumpyArrayInitializer(Initializer): out_var._share_underline_tensor_to(var) return None else: - op = block.append_op( - type='assign_value', - outputs={'Out': out_var}, - attrs={ - 'dtype': out_dtype, - 'shape': list(self._value.shape), - value_name: values - }, - stop_gradient=True) + op = block.append_op(type='assign_value', + outputs={'Out': out_var}, + attrs={ + 'dtype': out_dtype, + 'shape': list(self._value.shape), + value_name: values + }, + stop_gradient=True) if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }) var.op = op return op @@ -1200,8 +1192,9 @@ def calculate_gain(nonlinearity, param=None): if nonlinearity in recommended_gain.keys(): return recommended_gain[nonlinearity] else: - raise ValueError("nonlinearity function {} is not suppported now.". - format(nonlinearity)) + raise ValueError( + "nonlinearity function {} is not suppported now.".format( + nonlinearity)) # We short the class name, since users will use the initializer with the package diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py index 3e46ac52090..502a89ec36d 100644 --- a/python/paddle/fluid/input.py +++ b/python/paddle/fluid/input.py @@ -116,19 +116,18 @@ def one_hot(input, depth, allow_out_of_range=False): attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range} else: if not isinstance(depth, Variable): - # user attribute + # user attribute inputs = {'X': input} attrs = {'depth': depth, 'allow_out_of_range': allow_out_of_range} else: depth.stop_gradient = True inputs = {'X': input, 'depth_tensor': depth} attrs = {'allow_out_of_range': allow_out_of_range} - helper.append_op( - type="one_hot_v2", - inputs=inputs, - attrs=attrs, - outputs={'Out': one_hot_out}, - stop_gradient=True) + helper.append_op(type="one_hot_v2", + inputs=inputs, + attrs=attrs, + outputs={'Out': one_hot_out}, + stop_gradient=True) return one_hot_out @@ -317,20 +316,23 @@ def embedding(input, remote_prefetch = is_sparse and (not is_distributed) if remote_prefetch: assert is_sparse is True and is_distributed is False - w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=helper.param_attr, + shape=size, + dtype=dtype, + is_bias=False) tmp = helper.create_variable_for_type_inference(dtype) padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( size[0] + padding_idx) - helper.append_op( - type='lookup_table_v2', - inputs={'Ids': input, - 'W': w}, - outputs={'Out': tmp}, - attrs={ - 'is_sparse': is_sparse, - 'is_distributed': is_distributed, - 'remote_prefetch': remote_prefetch, - 'padding_idx': padding_idx - }) + helper.append_op(type='lookup_table_v2', + inputs={ + 'Ids': input, + 'W': w + }, + outputs={'Out': tmp}, + attrs={ + 'is_sparse': is_sparse, + 'is_distributed': is_distributed, + 'remote_prefetch': remote_prefetch, + 'padding_idx': padding_idx + }) return tmp diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 111f33e613a..0c621766b37 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -31,6 +31,7 @@ __all__ = ['run_check'] class SimpleLayer(Layer): + def __init__(self, input_size): super(SimpleLayer, self).__init__() self._linear1 = nn.Linear( @@ -123,8 +124,9 @@ def run_check(): with executor.scope_guard(scope): with program_guard(train_prog, startup_prog): with unique_name.guard(): - inp0 = layers.data( - name="inp", shape=[2, 2], append_batch_size=False) + inp0 = layers.data(name="inp", + shape=[2, 2], + append_batch_size=False) simple_layer0 = SimpleLayer(input_size=2) out0 = simple_layer0(inp0) param_grads = backward.append_backward( diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 8b25c93d7ce..3d071fce6c7 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -68,11 +68,13 @@ __all__ = [ 'get_program_persistable_vars', ] + reader.__all__ -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class _open_buffer(object): + def __init__(self, buffer): self.buffer = buffer @@ -81,6 +83,7 @@ class _open_buffer(object): class _buffer_reader(_open_buffer): + def __init__(self, buffer): super(_buffer_reader, self).__init__(buffer) self.initial_tell = self.buffer.tell() @@ -92,6 +95,7 @@ class _buffer_reader(_open_buffer): class _buffer_writer(_open_buffer): + def __exit__(self, *args): self.buffer.flush() @@ -110,8 +114,8 @@ def _open_file_buffer(path_or_buffer, mode): elif 'r' in mode: return _buffer_reader(path_or_buffer) else: - raise ValueError("Expected 'r' or 'w' in mode but got {}".format( - mode)) + raise ValueError( + "Expected 'r' or 'w' in mode but got {}".format(mode)) def _is_memory_buffer(buffer): @@ -236,20 +240,18 @@ def get_program_persistable_vars(program): def _clone_var_in_block_(block, var): assert isinstance(var, Variable) if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR: - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - persistable=True) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + persistable=True) else: - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - persistable=True) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + persistable=True) @signature_safe_contextmanager @@ -368,12 +370,11 @@ def save_vars(executor, main_program = _get_valid_program(main_program) if vars is None: - return save_vars( - executor, - main_program=main_program, - dirname=dirname, - vars=list(filter(predicate, main_program.list_vars())), - filename=filename) + return save_vars(executor, + main_program=main_program, + dirname=dirname, + vars=list(filter(predicate, main_program.list_vars())), + filename=filename) else: params_var_name = unique_name.generate("saved_params") # give warning when there is no var in model @@ -393,8 +394,8 @@ def save_vars(executor, continue new_var = _clone_var_in_block_(save_block, each_var) if filename is None and save_to_memory is False: - save_file_path = os.path.join( - os.path.normpath(dirname), new_var.name) + save_file_path = os.path.join(os.path.normpath(dirname), + new_var.name) save_block.append_op( type='save', inputs={'X': [new_var]}, @@ -412,17 +413,16 @@ def save_vars(executor, if save_to_memory is False: save_path = os.path.join(os.path.normpath(dirname), filename) - saved_params = save_block.create_var( - type=core.VarDesc.VarType.RAW, name=params_var_name) + saved_params = save_block.create_var(type=core.VarDesc.VarType.RAW, + name=params_var_name) saved_params.desc.set_persistable(True) - save_block.append_op( - type='save_combine', - inputs={'X': save_var_list}, - outputs={'Y': saved_params}, - attrs={ - 'file_path': save_path, - 'save_to_memory': save_to_memory - }) + save_block.append_op(type='save_combine', + inputs={'X': save_var_list}, + outputs={'Y': saved_params}, + attrs={ + 'file_path': save_path, + 'save_to_memory': save_to_memory + }) # NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc, # which leads to diff on save_program and its desc. Call _sync_with_cpp @@ -501,13 +501,12 @@ def save_params(executor, dirname, main_program=None, filename=None): # The parameters weights and bias of the fc layer in the network are going to # be saved in different files in the path "./my_paddle_model" """ - return save_vars( - executor, - dirname=dirname, - main_program=main_program, - vars=None, - predicate=is_parameter, - filename=filename) + return save_vars(executor, + dirname=dirname, + main_program=main_program, + vars=None, + predicate=is_parameter, + filename=filename) def _save_distributed_persistables(executor, dirname, main_program): @@ -581,17 +580,16 @@ def _save_distributed_persistables(executor, dirname, main_program): tmp = [str(dim) for dim in slice.shape] slice_shapes.append(",".join(tmp)) - block.append_op( - type='recv_save', - attrs={ - "trainer_id": 0, - "shape": origin.shape, - "slice_shapes": slice_shapes, - "slice_varnames": slice_varnames, - "remote_varnames": remote_varnames, - "endpoints": endpoints, - "file_path": os.path.join(dirname, origin.name) - }) + block.append_op(type='recv_save', + attrs={ + "trainer_id": 0, + "shape": origin.shape, + "slice_shapes": slice_shapes, + "slice_varnames": slice_varnames, + "remote_varnames": remote_varnames, + "endpoints": endpoints, + "file_path": os.path.join(dirname, origin.name) + }) executor.run(prog) @@ -613,11 +611,14 @@ def _save_distributed_persistables(executor, dirname, main_program): attrs['epmap'] = endpoints attrs['dir'] = lookup_table_filename attrs['lookup_table'] = distributed_lookup_table - block.append_op( - type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) + block.append_op(type='checkpoint_notify', + inputs={}, + outputs={}, + attrs=attrs) executor.run(prog) def __exclude_vars(exclude_var_names=[]): + def is_valid(var): if var.name in exclude_var_names: return False @@ -652,8 +653,10 @@ def _save_distributed_persistables(executor, dirname, main_program): local_vars = list( filter(__exclude_vars(exclude_var_names), main_program.list_vars())) - save_vars( - executor, main_program=main_program, dirname=dirname, vars=local_vars) + save_vars(executor, + main_program=main_program, + dirname=dirname, + vars=local_vars) if main_program._is_chief: if remote_params_map: @@ -725,16 +728,16 @@ def save_persistables(executor, dirname, main_program=None, filename=None): # "./my_paddle_model" """ if main_program and main_program._is_distributed: - return _save_distributed_persistables( - executor, dirname=dirname, main_program=main_program) + return _save_distributed_persistables(executor, + dirname=dirname, + main_program=main_program) else: - return save_vars( - executor, - dirname=dirname, - main_program=main_program, - vars=None, - predicate=is_persistable, - filename=filename) + return save_vars(executor, + dirname=dirname, + main_program=main_program, + vars=None, + predicate=is_persistable, + filename=filename) def load_vars(executor, @@ -836,12 +839,11 @@ def load_vars(executor, "The type of input main_program is invalid, expected type is fluid.Program, but received %s" % type(main_program)) - load_vars( - executor, - dirname=dirname, - main_program=main_program, - vars=list(filter(predicate, main_program.list_vars())), - filename=filename) + load_vars(executor, + dirname=dirname, + main_program=main_program, + vars=list(filter(predicate, main_program.list_vars())), + filename=filename) else: load_prog = Program() load_block = load_prog.global_block() @@ -868,8 +870,8 @@ def load_vars(executor, continue if isinstance(each_var, Parameter): - orig_para_shape[each_var.name] = tuple(each_var.desc.get_shape( - )) + orig_para_shape[each_var.name] = tuple( + each_var.desc.get_shape()) if each_var.type == core.VarDesc.VarType.SELECTED_ROWS: sparse_vars.append(each_var) @@ -902,8 +904,9 @@ def load_vars(executor, var_path = os.path.join(dirname, new_var.name) if not os.path.exists(var_path): - raise ValueError("SelectedRows var {} can not find at {}". - format(new_var.name, var_path)) + raise ValueError( + "SelectedRows var {} can not find at {}".format( + new_var.name, var_path)) if os.path.isfile(var_path): load_block.append_op( @@ -921,26 +924,23 @@ def load_vars(executor, slices = [] for block in blocks: - slice = load_block.create_var( - name=block, - type=new_var.type, - shape=new_var.shape, - dtype=new_var.dtype, - persistable=False) + slice = load_block.create_var(name=block, + type=new_var.type, + shape=new_var.shape, + dtype=new_var.dtype, + persistable=False) slices.append(slice) file_path = os.path.join(var_path, block, "Param") - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [slice]}, - attrs={'file_path': file_path}) + load_block.append_op(type='load', + inputs={}, + outputs={'Out': [slice]}, + attrs={'file_path': file_path}) - load_block.append_op( - type='lookup_sparse_table_merge', - inputs={'X': slices}, - outputs={'Out': new_var}, - attrs={}) + load_block.append_op(type='lookup_sparse_table_merge', + inputs={'X': slices}, + outputs={'Out': new_var}, + attrs={}) if filename is not None: load_var_list = [] @@ -950,14 +950,13 @@ def load_vars(executor, if vars_from_memory is False: filename = os.path.join(dirname, filename) - load_block.append_op( - type='load_combine', - inputs={}, - outputs={"Out": load_var_list}, - attrs={ - 'file_path': filename, - 'model_from_memory': vars_from_memory - }) + load_block.append_op(type='load_combine', + inputs={}, + outputs={"Out": load_var_list}, + attrs={ + 'file_path': filename, + 'model_from_memory': vars_from_memory + }) executor.run(load_prog) # check var shape @@ -972,8 +971,8 @@ def load_vars(executor, if new_shape != orig_shape: raise RuntimeError( "Variable's shape does not match, the Program requires a parameter with the shape of ({}), " - "while the loaded parameter (namely [ {} ]) has a shape of ({}).". - format(orig_shape, each_var.name, new_shape)) + "while the loaded parameter (namely [ {} ]) has a shape of ({})." + .format(orig_shape, each_var.name, new_shape)) @dygraph_not_support @@ -1030,12 +1029,11 @@ def load_params(executor, dirname, main_program=None, filename=None): fluid.io.load_params(executor=exe, dirname=param_path, main_program=None) """ - load_vars( - executor, - dirname=dirname, - main_program=main_program, - predicate=is_parameter, - filename=filename) + load_vars(executor, + dirname=dirname, + main_program=main_program, + predicate=is_parameter, + filename=filename) @dygraph_not_support @@ -1083,15 +1081,15 @@ def load_persistables(executor, dirname, main_program=None, filename=None): """ if main_program and main_program._is_distributed: - _load_distributed_persistables( - executor, dirname=dirname, main_program=main_program) + _load_distributed_persistables(executor, + dirname=dirname, + main_program=main_program) else: - load_vars( - executor, - dirname=dirname, - main_program=main_program, - predicate=is_persistable, - filename=filename) + load_vars(executor, + dirname=dirname, + main_program=main_program, + predicate=is_persistable, + filename=filename) def _load_distributed_persistables(executor, dirname, main_program=None): @@ -1141,40 +1139,40 @@ def _load_distributed_persistables(executor, dirname, main_program=None): offset = param.offset if is_slice: - slice = load_block.create_var( - name=slice_var.name, - type=slice_var.type, - shape=slice_var.shape, - dtype=slice_var.dtype, - persistable=True) - - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [slice]}, - attrs={ - 'file_path': os.path.join(dirname, origin_var.name), - 'seek': offset, - 'shape': slice.shape - }) + slice = load_block.create_var(name=slice_var.name, + type=slice_var.type, + shape=slice_var.shape, + dtype=slice_var.dtype, + persistable=True) + + load_block.append_op(type='load', + inputs={}, + outputs={'Out': [slice]}, + attrs={ + 'file_path': + os.path.join(dirname, origin_var.name), + 'seek': + offset, + 'shape': + slice.shape + }) else: - origin = load_block.create_var( - name="{}".format(origin_var.name), - type=origin_var.type, - shape=origin_var.shape, - dtype=origin_var.dtype, - persistable=True) + origin = load_block.create_var(name="{}".format( + origin_var.name), + type=origin_var.type, + shape=origin_var.shape, + dtype=origin_var.dtype, + persistable=True) load_block.append_op( type='load', inputs={}, outputs={'Out': [origin]}, - attrs={ - 'file_path': os.path.join(dirname, origin_var.name) - }) + attrs={'file_path': os.path.join(dirname, origin_var.name)}) load_block.append_op( type='delete_var', - inputs={'X': need_delete_vars}, ) + inputs={'X': need_delete_vars}, + ) executor.run(load_prog) @@ -1203,10 +1201,9 @@ def prepend_feed_ops(inference_program, return global_block = inference_program.global_block() - feed_var = global_block.create_var( - name=feed_holder_name, - type=core.VarDesc.VarType.FEED_MINIBATCH, - persistable=True) + feed_var = global_block.create_var(name=feed_holder_name, + type=core.VarDesc.VarType.FEED_MINIBATCH, + persistable=True) for i, name in enumerate(feed_target_names): if not global_block.has_var(name): @@ -1214,31 +1211,27 @@ def prepend_feed_ops(inference_program, "The feeded_var_names[{i}]: '{name}' doesn't exist in pruned inference program. " "Please check whether '{name}' is a valid feed_var name, or remove it from feeded_var_names " "if '{name}' is not involved in the target_vars calculation.". - format( - i=i, name=name)) + format(i=i, name=name)) out = global_block.var(name) - global_block._prepend_op( - type='feed', - inputs={'X': [feed_var]}, - outputs={'Out': [out]}, - attrs={'col': i}) + global_block._prepend_op(type='feed', + inputs={'X': [feed_var]}, + outputs={'Out': [out]}, + attrs={'col': i}) def append_fetch_ops(inference_program, fetch_target_names, fetch_holder_name='fetch'): global_block = inference_program.global_block() - fetch_var = global_block.create_var( - name=fetch_holder_name, - type=core.VarDesc.VarType.FETCH_LIST, - persistable=True) + fetch_var = global_block.create_var(name=fetch_holder_name, + type=core.VarDesc.VarType.FETCH_LIST, + persistable=True) for i, name in enumerate(fetch_target_names): - global_block.append_op( - type='fetch', - inputs={'X': [name]}, - outputs={'Out': [fetch_var]}, - attrs={'col': i}) + global_block.append_op(type='fetch', + inputs={'X': [name]}, + outputs={'Out': [fetch_var]}, + attrs={'col': i}) @static_only @@ -1355,8 +1348,8 @@ def save_inference_model(dirname, if isinstance(target_vars, Variable): target_vars = [target_vars] elif export_for_deployment: - if not (bool(target_vars) and - all(isinstance(var, Variable) for var in target_vars)): + if not (bool(target_vars) + and all(isinstance(var, Variable) for var in target_vars)): raise ValueError("'target_vars' should be a list of Variable.") main_program = _get_valid_program(main_program) @@ -1436,15 +1429,15 @@ def save_inference_model(dirname, paddle.fluid.core.save_op_version_info(main_program.desc) with open(model_basename, "wb") as f: f.write( - main_program._remove_training_info(clip_extra=clip_extra) - .desc.serialize_to_string()) + main_program._remove_training_info( + clip_extra=clip_extra).desc.serialize_to_string()) else: # TODO(panyx0718): Save more information so that it can also be used # for training and more flexible post-processing. with open(model_basename + ".main_program", "wb") as f: f.write( - main_program._remove_training_info(clip_extra=clip_extra) - .desc.serialize_to_string()) + main_program._remove_training_info( + clip_extra=clip_extra).desc.serialize_to_string()) if program_only: warnings.warn( @@ -1788,8 +1781,9 @@ def _unpack_saved_dict(saved_obj, protocol): part_name = key + "@@." + str(i) unpack_infor[key]["slices"].append(part_name) temp_saved_obj[part_name] = value[ - i * MAX_NUMBER_OF_ELEMENT:MAX_NUMBER_OF_ELEMENT - * (i + 1)] + i * + MAX_NUMBER_OF_ELEMENT:MAX_NUMBER_OF_ELEMENT * + (i + 1)] if unpack_infor: for key, value in unpack_infor.items(): @@ -1808,8 +1802,8 @@ def _pack_loaded_dict(load_obj): removes = [] for key, value in load_obj[unpack_info].items(): slices = [load_obj[part] for part in value["slices"]] - load_obj[key] = np.concatenate(slices).reshape(value[ - "OriginShape"]) + load_obj[key] = np.concatenate(slices).reshape( + value["OriginShape"]) removes += value["slices"] for key in removes: load_obj.pop(key) @@ -1820,6 +1814,7 @@ def _pack_loaded_dict(load_obj): @static_only def _legacy_save(param_dict, model_path, protocol=2): + def get_tensor(var): if isinstance(var, (core.VarBase, core.eager.Tensor)): return var.numpy() @@ -1897,8 +1892,9 @@ def save(program, model_path, protocol=4, **configs): type(protocol))) if protocol < 2 or protocol > 4: - raise ValueError("Expected 1<'protocol'<5, but received protocol={}". - format(protocol)) + raise ValueError( + "Expected 1<'protocol'<5, but received protocol={}".format( + protocol)) dir_name = os.path.dirname(model_path) if dir_name and not os.path.exists(dir_name): @@ -2011,8 +2007,8 @@ def load(program, model_path, executor=None, var_list=None): # model file save by fluid.save not found, try to load model file saved with # [save_vars, save_params, save_persistables] _logger.debug( - "{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]". - format(parameter_file_name)) + "{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]" + .format(parameter_file_name)) if executor is None: raise ValueError( "executor is required when loading model file saved with [ save_params, save_persistables, save_vars ]" @@ -2042,8 +2038,9 @@ def load(program, model_path, executor=None, var_list=None): _logger.warning("variable file [ %s ] not used" % (" ".join(list(binary_file_set)))) try: - load_vars( - executor=executor, dirname=model_path, vars=loaded_var_list) + load_vars(executor=executor, + dirname=model_path, + vars=loaded_var_list) except RuntimeError as e: _logger.error(e) raise e @@ -2069,11 +2066,10 @@ def load(program, model_path, executor=None, var_list=None): dir_name, file_name = os.path.split(model_path) try: - load_vars( - executor=executor, - dirname=dir_name, - vars=var_list, - filename=file_name) + load_vars(executor=executor, + dirname=dir_name, + vars=var_list, + filename=file_name) except RuntimeError as e: _logger.error(e) raise e @@ -2200,8 +2196,8 @@ def load_program_state(model_path, var_list=None): # model file saved with fluid.save is not found, try to load model file saved with # [save_vars, save_params, save_persistables] _logger.debug( - "{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]". - format(parameter_file_name)) + "{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]" + .format(parameter_file_name)) var_name_list = [] if var_list is None and os.path.isfile(model_path): @@ -2227,9 +2223,8 @@ def load_program_state(model_path, var_list=None): shape=var.shape, dtype=var.dtype, type=var.type, - lod_level=var.lod_level - if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR else - None, + lod_level=var.lod_level if var.desc.type() + == core.VarDesc.VarType.LOD_TENSOR else None, persistable=True) def _load_vars_with_try_catch(exe, @@ -2238,11 +2233,10 @@ def load_program_state(model_path, var_list=None): filename, raise_error=True): try: - load_vars( - executor=exe, - dirname=dirname, - vars=vars, - filename=filename) + load_vars(executor=exe, + dirname=dirname, + vars=vars, + filename=filename) return True except: error_str = "Failed to load model/variables `%s`, please make sure " \ @@ -2278,21 +2272,21 @@ def load_program_state(model_path, var_list=None): None) else: for var_name in var_name_list: - # NOTE(chenweihang): If identify which files the user wants - # to load from the disk, we load these variables one by one. - # If a file does not exist, we only warn the user that the - # file may be an irrelevant file, but does not throw an error + # NOTE(chenweihang): If identify which files the user wants + # to load from the disk, we load these variables one by one. + # If a file does not exist, we only warn the user that the + # file may be an irrelevant file, but does not throw an error # to ensure that other legal variables can be loaded. - temp_var = load_block.create_var( - name=var_name, persistable=True) + temp_var = load_block.create_var(name=var_name, + persistable=True) if _load_vars_with_try_catch(exe, model_path, [temp_var], None, False): loaded_var_list.append(temp_var) res_dict = {} for var in loaded_var_list: - res_dict[var.name] = np.asarray(paddle.fluid.global_scope( - ).find_var(var.name).get_tensor()) + res_dict[var.name] = np.asarray( + paddle.fluid.global_scope().find_var(var.name).get_tensor()) return res_dict @@ -2412,5 +2406,5 @@ def set_program_state(program, state_dict): unused_para_list.append(k) if len(unused_para_list) > 0: warnings.warn( - "This list is not set, Because of Paramerter not found in program. There are: {}". - format(" ".join(unused_para_list))) + "This list is not set, Because of Paramerter not found in program. There are: {}" + .format(" ".join(unused_para_list))) diff --git a/python/paddle/fluid/ir.py b/python/paddle/fluid/ir.py index 2756eac990e..aca134a1df5 100644 --- a/python/paddle/fluid/ir.py +++ b/python/paddle/fluid/ir.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -67,6 +67,7 @@ def _update_grad_persistable(main_program): def apply_build_strategy(main_program, startup_program, build_strategy, pass_attrs): + def update_attr(attrs, attr_types, name, value, typ=None): if name not in attrs: attrs[name] = value @@ -120,7 +121,7 @@ def apply_build_strategy(main_program, startup_program, build_strategy, apply_pass("runtime_context_cache_pass") build_strategy.cache_runtime_context = False if build_strategy.enable_addto and use_cuda: - # NOTE: how to get fetch vars to skip memory optimization? + # NOTE: how to get fetch vars to skip memory optimization? apply_pass("inplace_addto_op_pass") build_strategy.enable_addto = False if build_strategy.enable_inplace: @@ -191,8 +192,8 @@ class RegisterPassHelper(object): op_outs = out.Outputs() if len(op_outs) != 1: raise ValueError( - "Operator '{}' has multiple outputs, please specify one output variable.". - format(out._type)) + "Operator '{}' has multiple outputs, please specify one output variable." + .format(out._type)) for op_out in op_outs.values(): vars.extend(op_out) else: @@ -204,6 +205,7 @@ class RegisterPassHelper(object): return vars, program.current_block().ops def _convert_vars_to_pass_desc(self, patterns, replaces, desc): + def _add_element_conditions(conditions, elements): for element in elements: if element._condition: @@ -250,7 +252,7 @@ class RegisterPassHelper(object): multi_pass_desc = pass_desc_pb2.MultiPassDesc() multi_pass_desc.pass_type = self._pass_type # Traverse all pass pairs and convert them to PassDesc data. - # Here need to add cache in the future. + # Here need to add cache in the future. for (pattern, replace) in self._pass_pairs: pass_desc = multi_pass_desc.pass_descs.add() # Convert ProgramDescs of pattern and replace subgraphs. @@ -267,7 +269,9 @@ class RegisterPassHelper(object): class PassDesc(object): + class AttrHelper(object): + def __init__(self, obj, name, element_index=None): self._obj = obj self._name = name @@ -279,8 +283,9 @@ class PassDesc(object): self._mapped = None def __getitem__(self, index): - element = PassDesc.AttrHelper( - self._obj, self._name, element_index=index) + element = PassDesc.AttrHelper(self._obj, + self._name, + element_index=index) self._elements.append(element) return element @@ -373,12 +378,14 @@ class PassDesc(object): raise ValueError( "Index '{}' of operator '{}' is incorrect.".format( index, op)) - return PassDesc.AttrHelper( - ops[index], name, element_index=element_index) + return PassDesc.AttrHelper(ops[index], + name, + element_index=element_index) self._mapped = mapped_op if var is None else mapped_var class VarHelper(paddle.static.Variable): + def __init__(self, *args, **kwargs): block = paddle.static.default_main_program().current_block() self._var = paddle.static.data(*args, **kwargs) @@ -395,6 +402,7 @@ class PassDesc(object): return attr class OpHelper(object): + def __init__(self, type=None): self._type = type @@ -425,8 +433,8 @@ class PassDesc(object): op_outs = in_arg.Outputs() if len(op_outs) != 1: raise ValueError( - "The size of outputs of operator '{}' is not equal 1, please specify one output variable.". - format(in_arg._type)) + "The size of outputs of operator '{}' is not equal 1, please specify one output variable." + .format(in_arg._type)) for op_out in op_outs.values(): op_input.extend(op_out) else: diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index f60d1a90594..42b67a5a0df 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -28,6 +28,7 @@ from .dygraph_utils import _append_activation_in_dygraph class LayerHelper(LayerHelperBase): + def __init__(self, layer_type, **kwargs): self.kwargs = kwargs name = self.kwargs.get('name', None) @@ -37,8 +38,8 @@ class LayerHelper(LayerHelperBase): if name is None: self.kwargs['name'] = unique_name.generate(layer_type) - super(LayerHelper, self).__init__( - self.kwargs['name'], layer_type=layer_type) + super(LayerHelper, self).__init__(self.kwargs['name'], + layer_type=layer_type) def append_op(self, *args, **kwargs): return self.main_program.current_block().append_op(*args, **kwargs) @@ -125,15 +126,18 @@ class LayerHelper(LayerHelperBase): if not bias_attr: return input_var - b = self.create_parameter( - attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True) + b = self.create_parameter(attr=bias_attr, + shape=size, + dtype=input_var.dtype, + is_bias=True) tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) - self.append_op( - type='elementwise_add', - inputs={'X': [input_var], - 'Y': [b]}, - outputs={'Out': [tmp]}, - attrs={'axis': dim_start}) + self.append_op(type='elementwise_add', + inputs={ + 'X': [input_var], + 'Y': [b] + }, + outputs={'Out': [tmp]}, + attrs={'axis': dim_start}) return tmp #TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of act @@ -151,7 +155,8 @@ class LayerHelper(LayerHelperBase): use_cudnn = self.kwargs.get('use_cudnn') act['use_cudnn'] = use_cudnn use_mkldnn = self.kwargs.get( - 'use_mkldnn', _global_flags().get("FLAGS_use_mkldnn", False)) + 'use_mkldnn', + _global_flags().get("FLAGS_use_mkldnn", False)) if use_mkldnn: act['use_mkldnn'] = use_mkldnn act_type = act.pop('type') @@ -161,11 +166,10 @@ class LayerHelper(LayerHelperBase): return res else: tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) - self.append_op( - type=act_type, - inputs={"X": [input_var]}, - outputs={"Out": [tmp]}, - attrs=act) + self.append_op(type=act_type, + inputs={"X": [input_var]}, + outputs={"Out": [tmp]}, + attrs=act) return tmp #TODO (jiabin): should we remove this since it has never be used diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index 47f0c02d287..cb604b1ce89 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -83,16 +83,15 @@ class LayerHelperBase(object): """ if isinstance(value, np.ndarray): if _in_eager_without_dygraph_check(): - return core.eager.Tensor(value, - _current_expected_place(), False, - False, name if name else None, True) + return core.eager.Tensor(value, _current_expected_place(), + False, False, name if name else None, + True) else: - py_var = core.VarBase( - value=value, - name=name if name else '', - persistable=False, - place=_current_expected_place(), - zero_copy=False) + py_var = core.VarBase(value=value, + name=name if name else '', + persistable=False, + place=_current_expected_place(), + zero_copy=False) return py_var elif isinstance(value, (core.VarBase, Variable, core.eager.Tensor)): return value @@ -123,37 +122,35 @@ class LayerHelperBase(object): [self.name, 'weight_norm_abs'])), dtype=dtype, persistable=False) - block.append_op( - type='abs', inputs={'X': x}, outputs={'Out': abs_out}) + block.append_op(type='abs', + inputs={'X': x}, + outputs={'Out': abs_out}) pow_out = block.create_var( name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_pow'])), dtype=dtype, persistable=False) - block.append_op( - type='pow', - inputs={'X': abs_out}, - outputs={'Out': pow_out}, - attrs={'factor': float(p)}) + block.append_op(type='pow', + inputs={'X': abs_out}, + outputs={'Out': pow_out}, + attrs={'factor': float(p)}) sum_out = block.create_var( name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'weight_norm_sum'])), dtype=dtype, persistable=False) - block.append_op( - type='reduce_sum', - inputs={'X': pow_out}, - outputs={'Out': sum_out}, - attrs={ - 'dim': dim, - 'keep_dim': keep_dim, - 'reduce_all': True if dim is None else False - }) - block.append_op( - type='pow', - inputs={'X': sum_out}, - outputs={'Out': out}, - attrs={'factor': 1. / p}) + block.append_op(type='reduce_sum', + inputs={'X': pow_out}, + outputs={'Out': sum_out}, + attrs={ + 'dim': dim, + 'keep_dim': keep_dim, + 'reduce_all': True if dim is None else False + }) + block.append_op(type='pow', + inputs={'X': sum_out}, + outputs={'Out': out}, + attrs={'factor': 1. / p}) return out def __reshape_op(x, @@ -166,11 +163,10 @@ class LayerHelperBase(object): [self.name, 'weight_norm_reshape'])), dtype=dtype, persistable=False) - block.append_op( - type='reshape', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'shape': shape}) + block.append_op(type='reshape', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'shape': shape}) return out def __transpose_op(x, @@ -183,11 +179,10 @@ class LayerHelperBase(object): [self.name, 'weight_norm_transpose'])), dtype=dtype, persistable=False) - block.append_op( - type='transpose', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'axis': axis}) + block.append_op(type='transpose', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'axis': axis}) return out def __norm_except_dim(x, @@ -217,10 +212,11 @@ class LayerHelperBase(object): perm = list(range(len(x.shape))) perm[0], perm[dim] = dim, 0 transpose = __transpose_op(x, perm, block=block) - out_shape = [transpose.shape[0]] + [1] * (len(transpose.shape) - - 1) - reshape = __reshape_op( - transpose, shape=[transpose.shape[0], -1], block=block) + out_shape = [transpose.shape[0] + ] + [1] * (len(transpose.shape) - 1) + reshape = __reshape_op(transpose, + shape=[transpose.shape[0], -1], + block=block) norm = __norm_op(reshape, dim=[1], block=block) reshape2 = __reshape_op(norm, shape=out_shape, block=block) __transpose_op(reshape2, perm, out=out, block=block) @@ -228,18 +224,18 @@ class LayerHelperBase(object): def __weight_normalize(g, v, dim): """Calculations for weight normalization""" - norm = __norm_except_dim( - v, dim=dim, block=self.main_program.current_block()) + norm = __norm_except_dim(v, + dim=dim, + block=self.main_program.current_block()) scale = elementwise_div( x=g, y=norm) # The shapes of g and norm are the same. # Currently, elementwise_mul only support broadcast when the shape # of y is a subset of the shape of x. Thus, we reshape y to squeeze # to achieve the subset. - w = elementwise_mul( - x=v, - y=scale if dim is None else reshape( - x=scale, shape=[v.shape[dim]]), - axis=-1 if dim is None else dim) + w = elementwise_mul(x=v, + y=scale if dim is None else reshape( + x=scale, shape=[v.shape[dim]]), + axis=-1 if dim is None else dim) # To serialize the original parameter for inference, maybe a # parameter rather than a variable should be returned. return w @@ -268,18 +264,16 @@ class LayerHelperBase(object): dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs(with_initializer=True)) - __norm_except_dim( - x=v_param, - out=g_param, - dim=attr.dim, - block=self.startup_program.global_block()) + __norm_except_dim(x=v_param, + out=g_param, + dim=attr.dim, + block=self.startup_program.global_block()) # keep g_param shape to be consistent with that in main_program - __reshape_op( - g_param, - g_param_shape, - out=g_param, - block=self.startup_program.global_block()) + __reshape_op(g_param, + g_param_shape, + out=g_param, + block=self.startup_program.global_block()) # Add weight normalization to main_program g_param = self.main_program.global_block().create_parameter( @@ -316,9 +310,9 @@ class LayerHelperBase(object): return None assert isinstance(attr, ParamAttr) for i, size in enumerate(shape): - assert size > 0, ( - "Expected every dim's size to be larger than 0, " - "but the size of the {}-th dim is {}".format(i, size)) + assert size > 0, ("Expected every dim's size to be larger than 0, " + "but the size of the {}-th dim is {}".format( + i, size)) # set global dtype if not dtype: dtype = self.__dtype @@ -344,8 +338,8 @@ class LayerHelperBase(object): "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" ) else: - if not (dtype.startswith("float") or - dtype in ["double", "uint16"]): + if not (dtype.startswith("float") + or dtype in ["double", "uint16"]): raise TypeError( "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" ) diff --git a/python/paddle/fluid/layers/collective.py b/python/paddle/fluid/layers/collective.py index 0b4211cbb63..b0e285e036e 100644 --- a/python/paddle/fluid/layers/collective.py +++ b/python/paddle/fluid/layers/collective.py @@ -43,23 +43,25 @@ def _allreduce(x, out=None, reduce_type="sum", sync_mode=False): type=x.type, persistable=x.persistable, stop_gradient=True) - helper.append_op( - type='allreduce', - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={"reduce_type": red_typ_int, - "sync_mode": sync_mode}) + helper.append_op(type='allreduce', + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={ + "reduce_type": red_typ_int, + "sync_mode": sync_mode + }) return out def _broadcast(x, root, sync_mode=False): helper = LayerHelper("broadcast", **locals()) - helper.append_op( - type='broadcast', - inputs={'X': [x]}, - outputs={'Out': [x]}, - attrs={"sync_mode": sync_mode, - "root": root}) + helper.append_op(type='broadcast', + inputs={'X': [x]}, + outputs={'Out': [x]}, + attrs={ + "sync_mode": sync_mode, + "root": root + }) return x @@ -83,27 +85,27 @@ def _c_allreduce(x, type=x.type, persistable=x.persistable) - helper.append_op( - type=op_type, - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={'ring_id': ring_id, - 'use_calc_stream': use_calc_stream}) + helper.append_op(type=op_type, + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream + }) return out def _c_broadcast(x, root=0, ring_id=0, use_calc_stream=False): op_type = 'c_broadcast' helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [x]}, - outputs={'Out': [x]}, - attrs={ - 'root': root, - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream - }) + helper.append_op(type=op_type, + inputs={'X': [x]}, + outputs={'Out': [x]}, + attrs={ + 'root': root, + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream + }) return x @@ -128,22 +130,20 @@ def _c_allgather(x, nranks, ring_id=0, use_calc_stream=False): out_shape = list(x.shape[:]) if out_shape[0] > 0: out_shape[0] *= nranks - out = helper.create_variable( - name=unique_name.generate_with_ignorable_key('.'.join( - [x.name, op_type])), - shape=out_shape, - dtype=x.dtype, - type=x.type, - persistable=x.persistable) - helper.append_op( - type=op_type, - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={ - 'nranks': nranks, - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream - }) + out = helper.create_variable(name=unique_name.generate_with_ignorable_key( + '.'.join([x.name, op_type])), + shape=out_shape, + dtype=x.dtype, + type=x.type, + persistable=x.persistable) + helper.append_op(type=op_type, + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={ + 'nranks': nranks, + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream + }) return out @@ -152,30 +152,29 @@ def _c_reducescatter(x, nranks, ring_id=0, use_calc_stream=False): raise TypeError('x must be a Variable') if x.shape[0] > 0 and x.shape[0] % nranks != 0: - raise ValueError('x.shape[0](%d) cannot be evenly divided by nranks(%d)' - % (x.shape[0], nranks)) + raise ValueError( + 'x.shape[0](%d) cannot be evenly divided by nranks(%d)' % + (x.shape[0], nranks)) op_type = 'c_reducescatter' helper = LayerHelper(op_type, **locals()) out_shape = list(x.shape[:]) if out_shape[0] > 0: out_shape[0] //= nranks - out = helper.create_variable( - name=unique_name.generate_with_ignorable_key('.'.join( - [x.name, op_type])), - shape=out_shape, - dtype=x.dtype, - type=x.type, - persistable=x.persistable) - helper.append_op( - type=op_type, - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={ - 'nranks': nranks, - 'ring_id': ring_id, - 'use_calc_stream': use_calc_stream - }) + out = helper.create_variable(name=unique_name.generate_with_ignorable_key( + '.'.join([x.name, op_type])), + shape=out_shape, + dtype=x.dtype, + type=x.type, + persistable=x.persistable) + helper.append_op(type=op_type, + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={ + 'nranks': nranks, + 'ring_id': ring_id, + 'use_calc_stream': use_calc_stream + }) return out @@ -189,9 +188,8 @@ def _c_sync_calc_stream(x): def _c_sync_comm_stream(x, ring_id): op_type = 'c_sync_comm_stream' helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [x]}, - outputs={'Out': [x]}, - attrs={'ring_id': ring_id}) + helper.append_op(type=op_type, + inputs={'X': [x]}, + outputs={'Out': [x]}, + attrs={'ring_id': ring_id}) return x diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index d143a6637f8..4c3a4e5e8fc 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -61,11 +61,12 @@ def select_output(input, outputs, mask): check_variable_and_dtype(mask, 'mask', ['int32'], 'select_output') check_type(outputs, 'outputs', (list, tuple), 'select_output') - helper.append_op( - type='select_output', - inputs={'X': input, - 'Mask': mask}, - outputs={'Out': outputs}) + helper.append_op(type='select_output', + inputs={ + 'X': input, + 'Mask': mask + }, + outputs={'Out': outputs}) return outputs @@ -92,13 +93,15 @@ def select_input(inputs, mask): input_shape = inputs[0].shape input_type = inputs[0].type - out = helper.create_variable( - dtype=input_dtype, shape=input_shape, type=input_type) - helper.append_op( - type='select_input', - inputs={'X': inputs, - 'Mask': mask}, - outputs={'Out': out}) + out = helper.create_variable(dtype=input_dtype, + shape=input_shape, + type=input_type) + helper.append_op(type='select_input', + inputs={ + 'X': inputs, + 'Mask': mask + }, + outputs={'Out': out}) return out @@ -110,19 +113,20 @@ def select_input_with_buildin_type(inputs, mask): if isinstance(false_var, Variable) and isinstance(true_var, Variable): return select_input(inputs, mask) - elif (isinstance(false_var, (support_ret_buildin_type)) and - isinstance(false_var, type(true_var))): + elif (isinstance(false_var, (support_ret_buildin_type)) + and isinstance(false_var, type(true_var))): if false_var == true_var: return false_var else: inputs = [ - to_static_variable(false_var), to_static_variable(true_var) + to_static_variable(false_var), + to_static_variable(true_var) ] # Deal with the situations like this: false_var is int and true_var is Variable - elif ((isinstance(false_var, support_ret_buildin_type) and - isinstance(true_var, Variable)) or - (isinstance(true_var, support_ret_buildin_type) and - isinstance(false_var, Variable))): + elif ((isinstance(false_var, support_ret_buildin_type) + and isinstance(true_var, Variable)) + or (isinstance(true_var, support_ret_buildin_type) + and isinstance(false_var, Variable))): inputs = [to_static_variable(false_var), to_static_variable(true_var)] warnings.warn( "Return results from different branches in cond are not same type: " @@ -178,15 +182,16 @@ def split_lod_tensor(input, mask, level=0): helper = LayerHelper('split_lod_tensor', **locals()) out_true = helper.create_variable_for_type_inference(dtype=input.dtype) out_false = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='split_lod_tensor', - inputs={ - 'X': input, - 'Mask': mask, - }, - outputs={'OutTrue': out_true, - 'OutFalse': out_false}, - attrs={'level': level}) + helper.append_op(type='split_lod_tensor', + inputs={ + 'X': input, + 'Mask': mask, + }, + outputs={ + 'OutTrue': out_true, + 'OutFalse': out_false + }, + attrs={'level': level}) return out_true, out_false @@ -236,14 +241,15 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0): check_type(in_false, 'in_false', (Variable, list, tuple, type(None)), 'fluid.layers.merge_lod_tensor') out = helper.create_variable_for_type_inference(dtype=in_true.dtype) - helper.append_op( - type='merge_lod_tensor', - inputs={'X': x, - 'Mask': mask, - 'InTrue': in_true, - 'InFalse': in_false}, - outputs={'Out': out}, - attrs={'level': level}) + helper.append_op(type='merge_lod_tensor', + inputs={ + 'X': x, + 'Mask': mask, + 'InTrue': in_true, + 'InFalse': in_false + }, + outputs={'Out': out}, + attrs={'level': level}) return out @@ -321,21 +327,20 @@ def Print(input, helper = LayerHelper('print' + "_" + input.name, **locals()) output = helper.create_variable_for_type_inference(input.dtype) - helper.append_op( - type='print', - inputs={'In': input}, - outputs={'Out': output}, - attrs={ - 'first_n': first_n, - 'summarize': summarize, - 'message': message or "", - 'print_tensor_name': print_tensor_name, - 'print_tensor_type': print_tensor_type, - 'print_tensor_shape': print_tensor_shape, - 'print_tensor_layout': print_tensor_layout, - 'print_tensor_lod': print_tensor_lod, - 'print_phase': print_phase.upper() - }) + helper.append_op(type='print', + inputs={'In': input}, + outputs={'Out': output}, + attrs={ + 'first_n': first_n, + 'summarize': summarize, + 'message': message or "", + 'print_tensor_name': print_tensor_name, + 'print_tensor_type': print_tensor_type, + 'print_tensor_shape': print_tensor_shape, + 'print_tensor_layout': print_tensor_layout, + 'print_tensor_lod': print_tensor_lod, + 'print_phase': print_phase.upper() + }) return output @@ -402,11 +407,12 @@ def Assert(cond, data=None, summarize=20, name=None): layer_name = name if name else ('assert_' + cond.name) helper = LayerHelper(layer_name, **locals()) - op = helper.append_op( - type="assert", - inputs={"Cond": cond, - "Data": [] if data is None else list(data)}, - attrs={"summarize": summarize}) + op = helper.append_op(type="assert", + inputs={ + "Cond": cond, + "Data": [] if data is None else list(data) + }, + attrs={"summarize": summarize}) return op @@ -456,8 +462,8 @@ class BlockGuardWithCompletion(BlockGuard): return False self.rnn.status = StaticRNN.AFTER_RNN_BLOCK self.rnn._complete_op() - return super(BlockGuardWithCompletion, self).__exit__(exc_type, exc_val, - exc_tb) + return super(BlockGuardWithCompletion, + self).__exit__(exc_type, exc_val, exc_tb) class StaticRNNMemoryLink(object): @@ -652,23 +658,21 @@ class StaticRNN(object): parent_block = self._parent_block() var_name = unique_name.generate_with_ignorable_key("@".join( [self.helper.name, "memory_boot"])) - boot_var = parent_block.create_var( - name=var_name, - shape=shape, - dtype=batch_ref.dtype, - persistable=False) - - parent_block.append_op( - type="fill_constant_batch_size_like", - inputs={'Input': [batch_ref]}, - outputs={'Out': [boot_var]}, - attrs={ - 'value': init_value, - 'shape': boot_var.shape, - 'dtype': boot_var.dtype, - 'input_dim_idx': ref_batch_dim_idx, - 'output_dim_idx': init_batch_dim_idx - }) + boot_var = parent_block.create_var(name=var_name, + shape=shape, + dtype=batch_ref.dtype, + persistable=False) + + parent_block.append_op(type="fill_constant_batch_size_like", + inputs={'Input': [batch_ref]}, + outputs={'Out': [boot_var]}, + attrs={ + 'value': init_value, + 'shape': boot_var.shape, + 'dtype': boot_var.dtype, + 'input_dim_idx': ref_batch_dim_idx, + 'output_dim_idx': init_batch_dim_idx + }) return self.memory(init=boot_var) else: @@ -677,8 +681,8 @@ class StaticRNN(object): [self.helper.name, "mem"])), dtype=init.dtype, shape=init.shape) - self.memories[pre_mem.name] = StaticRNNMemoryLink( - init=init, pre_mem=pre_mem) + self.memories[pre_mem.name] = StaticRNNMemoryLink(init=init, + pre_mem=pre_mem) return pre_mem def step_input(self, x): @@ -727,8 +731,10 @@ class StaticRNN(object): elif x.shape[0] != -1 and self.seq_len != x.shape[0]: raise ValueError("Static RNN only take fix seq_len input") - ipt = self.helper.create_variable( - name=x.name, dtype=x.dtype, shape=list(x.shape[1:]), type=x.type) + ipt = self.helper.create_variable(name=x.name, + dtype=x.dtype, + shape=list(x.shape[1:]), + type=x.type) self.inputs.append(ipt) return ipt @@ -777,16 +783,15 @@ class StaticRNN(object): check_type(o, "o", Variable, "fluid.layers.StaticRNN.step_output") tmp_o = self.helper.create_variable_for_type_inference(dtype=o.dtype) - self.helper.append_op( - type='rnn_memory_helper', - inputs={'X': [o]}, - outputs={'Out': tmp_o}, - attrs={'dtype': o.dtype}) + self.helper.append_op(type='rnn_memory_helper', + inputs={'X': [o]}, + outputs={'Out': tmp_o}, + attrs={'dtype': o.dtype}) - out_var = self._parent_block().create_var( - name=tmp_o.name, - shape=[self.seq_len] + list(tmp_o.shape), - dtype=tmp_o.dtype) + out_var = self._parent_block().create_var(name=tmp_o.name, + shape=[self.seq_len] + + list(tmp_o.shape), + dtype=tmp_o.dtype) self.outputs.append(out_var) @@ -920,32 +925,33 @@ class StaticRNN(object): assert isinstance(mem_var, Variable) new_mem = self.helper.create_variable_for_type_inference( dtype=mem_var.dtype) - rnn_block.append_op( - type='rnn_memory_helper', - inputs={'X': [mem_var]}, - outputs={'Out': [new_mem]}, - attrs={'dtype': mem_var.dtype}) + rnn_block.append_op(type='rnn_memory_helper', + inputs={'X': [mem_var]}, + outputs={'Out': [new_mem]}, + attrs={'dtype': mem_var.dtype}) memories.append(new_mem.name) - parent_block.append_op( - type='recurrent', - inputs={ - 'inputs': inlinks, - 'initial_states': boot_memories, - 'parameters': parameters - }, - outputs={'outputs': outlinks, - 'step_scopes': [step_scope]}, - attrs={ - 'has_states': len(pre_memories) > 0, - 'ex_states': pre_memories, - 'states': memories, - 'sub_block': rnn_block - }) + parent_block.append_op(type='recurrent', + inputs={ + 'inputs': inlinks, + 'initial_states': boot_memories, + 'parameters': parameters + }, + outputs={ + 'outputs': outlinks, + 'step_scopes': [step_scope] + }, + attrs={ + 'has_states': len(pre_memories) > 0, + 'ex_states': pre_memories, + 'states': memories, + 'sub_block': rnn_block + }) class WhileGuard(BlockGuard): + def __init__(self, while_op): if not isinstance(while_op, While): raise TypeError("WhileGuard takes a while op") @@ -1114,8 +1120,8 @@ class While(object): def _complete(self): main_program = self.helper.main_program while_block = main_program.current_block() - parent_block = main_program.block(main_program.current_block() - .parent_idx) + parent_block = main_program.block( + main_program.current_block().parent_idx) inner_outputs = {self.cond_var.name} x_name_list = set() @@ -1134,16 +1140,18 @@ class While(object): parent_block.append_op( type='while', inputs={ - 'X': [ - parent_block._var_recursive(x_name) - for x_name in x_name_list - ], + 'X': + [parent_block._var_recursive(x_name) for x_name in x_name_list], 'Condition': [self.cond_var] }, - outputs={'Out': out_vars, - 'StepScopes': [step_scope]}, - attrs={'sub_block': while_block, - "is_test": self.is_test}) + outputs={ + 'Out': out_vars, + 'StepScopes': [step_scope] + }, + attrs={ + 'sub_block': while_block, + "is_test": self.is_test + }) def assign_skip_lod_tensor_array(input, output): @@ -1156,8 +1164,8 @@ def assign_skip_lod_tensor_array(input, output): if input.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY: main_program = input.block.program - parent_block = main_program.block(main_program.current_block() - .parent_idx) + parent_block = main_program.block( + main_program.current_block().parent_idx) if parent_block and not parent_block._find_var_recursive(input.name): assign(input, output) else: @@ -1260,9 +1268,9 @@ def while_loop(cond, body, loop_vars, is_test=False, name=None): try: assert_same_structure(output_vars, loop_vars, check_types=False) except ValueError as e: - raise ValueError("body in while_loop should return the same arity " - "(length and structure) as loop_vars: {0}".format( - e)) + raise ValueError( + "body in while_loop should return the same arity " + "(length and structure) as loop_vars: {0}".format(e)) now_cond = cond(*output_vars) map_structure(assign_skip_lod_tensor_array, output_vars, loop_vars) assign(now_cond, pre_cond) @@ -1324,14 +1332,12 @@ def lod_rank_table(x, level=0): 'lod_rank_table') helper = LayerHelper("lod_rank_table", **locals()) - table = helper.create_variable( - type=core.VarDesc.VarType.LOD_RANK_TABLE, - name=unique_name.generate("lod_rank_table")) - helper.append_op( - type='lod_rank_table', - inputs={'X': x}, - outputs={'Out': table}, - attrs={'level': level}) + table = helper.create_variable(type=core.VarDesc.VarType.LOD_RANK_TABLE, + name=unique_name.generate("lod_rank_table")) + helper.append_op(type='lod_rank_table', + inputs={'X': x}, + outputs={'Out': table}, + attrs={'level': level}) return table @@ -1354,10 +1360,9 @@ def max_sequence_len(rank_table): """ helper = LayerHelper("max_seqence_len", **locals()) res = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="max_sequence_len", - inputs={"RankTable": rank_table}, - outputs={"Out": res}) + helper.append_op(type="max_sequence_len", + inputs={"RankTable": rank_table}, + outputs={"Out": res}) return res @@ -1405,11 +1410,12 @@ def lod_tensor_to_array(x, table): name=unique_name.generate("lod_tensor_to_array"), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=x.dtype) - helper.append_op( - type='lod_tensor_to_array', - inputs={'X': x, - 'RankTable': table}, - outputs={'Out': array}) + helper.append_op(type='lod_tensor_to_array', + inputs={ + 'X': x, + 'RankTable': table + }, + outputs={'Out': array}) return array @@ -1448,11 +1454,12 @@ def array_to_lod_tensor(x, table): helper = LayerHelper("array_to_lod_tensor", **locals()) tmp = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="array_to_lod_tensor", - inputs={'X': x, - 'RankTable': table}, - outputs={'Out': tmp}) + helper.append_op(type="array_to_lod_tensor", + inputs={ + 'X': x, + 'RankTable': table + }, + outputs={'Out': tmp}) return tmp @@ -1484,11 +1491,10 @@ def increment(x, value=1.0, in_place=True): out = helper.create_variable_for_type_inference(dtype=x.dtype) else: out = x - helper.append_op( - type='increment', - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={'step': float(value)}) + helper.append_op(type='increment', + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={'step': float(value)}) return out @@ -1572,8 +1578,8 @@ def array_write(x, i, array=None): helper = LayerHelper('array_write', **locals()) if array is not None: if not isinstance( - array, - Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: + array, Variable + ) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError( "array should be tensor array vairable in array_write Op") if array is None: @@ -1581,11 +1587,12 @@ def array_write(x, i, array=None): name="{0}.out".format(helper.name), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=x.dtype) - helper.append_op( - type='write_to_array', - inputs={'X': [x], - 'I': [i]}, - outputs={'Out': [array]}) + helper.append_op(type='write_to_array', + inputs={ + 'X': [x], + 'I': [i] + }, + outputs={'Out': [array]}) return array @@ -1616,16 +1623,16 @@ def create_array(dtype, initialized_list=None): if initialized_list is not None: if not isinstance(initialized_list, (list, tuple)): raise TypeError( - "Require type(initialized_list) should be list/tuple, but received {}". - format(type(initialized_list))) + "Require type(initialized_list) should be list/tuple, but received {}" + .format(type(initialized_list))) array = list(initialized_list) # NOTE: Only support plain list like [x, y,...], not support nested list in static mode. for val in array: if not isinstance(val, Variable): raise TypeError( - "All values in `initialized_list` should be Variable, but recevied {}.". - format(type(val))) + "All values in `initialized_list` should be Variable, but recevied {}." + .format(type(val))) if _non_static_mode(): return array @@ -1689,12 +1696,13 @@ def less_than(x, y, force_cpu=None, cond=None, name=None): if force_cpu is not None: attrs['force_cpu'] = force_cpu - helper.append_op( - type='less_than', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [cond]}, - attrs=attrs) + helper.append_op(type='less_than', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [cond]}, + attrs=attrs) return cond @@ -1743,12 +1751,13 @@ def less_equal(x, y, cond=None, name=None): attrs = dict() - helper.append_op( - type='less_equal', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [cond]}, - attrs=attrs) + helper.append_op(type='less_equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [cond]}, + attrs=attrs) return cond @@ -1799,12 +1808,13 @@ def greater_than(x, y, cond=None, name=None): if in_dygraph_mode(): return _C_ops.final_state_greater_than(x, y, -1) else: - helper.append_op( - type='greater_than', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [cond]}, - attrs=attrs) + helper.append_op(type='greater_than', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [cond]}, + attrs=attrs) return cond @@ -1854,12 +1864,13 @@ def greater_equal(x, y, cond=None, name=None): attrs = dict() - helper.append_op( - type='greater_equal', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [cond]}, - attrs=attrs) + helper.append_op(type='greater_equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [cond]}, + attrs=attrs) return cond @@ -1904,9 +1915,12 @@ def equal(x, y, cond=None, name=None): cond = helper.create_variable_for_type_inference(dtype='bool') cond.stop_gradient = True - helper.append_op( - type='equal', inputs={'X': [x], - 'Y': [y]}, outputs={'Out': [cond]}) + helper.append_op(type='equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [cond]}) return cond @@ -1950,9 +1964,12 @@ def not_equal(x, y, cond=None, name=None): cond = helper.create_variable_for_type_inference(dtype='bool') cond.stop_gradient = True - helper.append_op( - type='not_equal', inputs={'X': [x], - 'Y': [y]}, outputs={'Out': [cond]}) + helper.append_op(type='not_equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [cond]}) return cond @@ -2037,11 +2054,12 @@ def array_read(array, i): Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError("array should be tensor array vairable") out = helper.create_variable_for_type_inference(dtype=array.dtype) - helper.append_op( - type='read_from_array', - inputs={'X': [array], - 'I': [i]}, - outputs={'Out': [out]}) + helper.append_op(type='read_from_array', + inputs={ + 'X': [array], + 'I': [i] + }, + outputs={'Out': [out]}) return out @@ -2075,13 +2093,14 @@ def shrink_memory(x, i, table): check_type(i, 'i', Variable, 'shrink_memory') check_type(table, 'table', Variable, 'shrink_memory') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='shrink_rnn_memory', - inputs={'X': [x], - 'I': [i], - 'RankTable': [table]}, - outputs={'Out': [out]}, - attrs={}) + helper.append_op(type='shrink_rnn_memory', + inputs={ + 'X': [x], + 'I': [i], + 'RankTable': [table] + }, + outputs={'Out': [out]}, + attrs={}) return out @@ -2146,8 +2165,9 @@ def array_length(array): helper = LayerHelper('array_length', **locals()) tmp = helper.create_variable_for_type_inference(dtype='int64') tmp.stop_gradient = True - helper.append_op( - type='lod_array_length', inputs={'X': [array]}, outputs={'Out': [tmp]}) + helper.append_op(type='lod_array_length', + inputs={'X': [array]}, + outputs={'Out': [tmp]}) return tmp @@ -2169,8 +2189,8 @@ class ConditionalBlockGuard(BlockGuard): def __exit__(self, exc_type, exc_val, exc_tb): self.block.complete() - return super(ConditionalBlockGuard, self).__exit__(exc_type, exc_val, - exc_tb) + return super(ConditionalBlockGuard, + self).__exit__(exc_type, exc_val, exc_tb) class ConditionalBlock(object): @@ -2216,8 +2236,10 @@ class ConditionalBlock(object): intermediate = set() params = set() - params, intermediate = get_inputs_outputs_in_block( - inside_block, params, intermediate, helper=self.helper) + params, intermediate = get_inputs_outputs_in_block(inside_block, + params, + intermediate, + helper=self.helper) # Todo(liym27) Here assume that all params are in recursive parent block # but when minimize() called in control flow, some params may be in @@ -2240,8 +2262,10 @@ class ConditionalBlock(object): 'Cond': self.inputs, 'Input': param_list, }, - outputs={'Out': out_list, - 'Scope': [step_scope]}, + outputs={ + 'Out': out_list, + 'Scope': [step_scope] + }, attrs={ 'sub_block': inside_block, 'is_scalar_condition': self.is_scalar_condition @@ -2299,8 +2323,8 @@ class ConditionalBlock(object): param_list.append(cpt.to_text(inner_var.name)) grad_op_desc, op_grad_to_var = core.get_grad_op_desc( - conditional_block_op.desc, - cpt.to_text(set()), [grad_sub_block.desc]) + conditional_block_op.desc, cpt.to_text(set()), + [grad_sub_block.desc]) # append op_desc in grad_op_descs to target_block op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() @@ -2315,9 +2339,8 @@ class ConditionalBlock(object): new_vars = set() for grad_var_name in new_op_desc.output_arg_names(): - if grad_sub_block.desc.has_var_recursive( - cpt.to_bytes(grad_var_name) - ) or grad_var_name == core.empty_var_name(): + if grad_sub_block.desc.has_var_recursive(cpt.to_bytes( + grad_var_name)) or grad_var_name == core.empty_var_name(): continue grad_sub_block.desc.var(cpt.to_bytes(grad_var_name)) new_vars.add(grad_var_name) @@ -2347,8 +2370,9 @@ def copy_var_to_parent_block(var, layer_helper): and parent_block._find_var_recursive(var.name): parent_block_var = var else: - parent_block_var = parent_block.create_var( - dtype=var.dtype, shape=var.shape, type=var.type) + parent_block_var = parent_block.create_var(dtype=var.dtype, + shape=var.shape, + type=var.type) assign(var, parent_block_var) return parent_block_var @@ -2464,8 +2488,8 @@ def cond(pred, true_fn=None, false_fn=None, name=None): if false_fn is not None: if not callable(false_fn): raise TypeError( - "The false_fn in cond must be callable, but received {}". - format(type(false_fn).__name__)) + "The false_fn in cond must be callable, but received {}" + .format(type(false_fn).__name__)) return false_fn() return None @@ -2491,8 +2515,8 @@ def cond(pred, true_fn=None, false_fn=None, name=None): raise TypeError( "The false_fn in cond must be callable, but received {}".format( type(false_fn).__name__)) - false_cond_block = ConditionalBlock( - [logical_not(pred)], is_scalar_condition=True) + false_cond_block = ConditionalBlock([logical_not(pred)], + is_scalar_condition=True) with false_cond_block.block(): origin_false_output = false_fn() if origin_false_output is not None: @@ -2520,7 +2544,8 @@ def cond(pred, true_fn=None, false_fn=None, name=None): format(e)) mask = cast(pred, dtype='int32') - merge_func = lambda false_var, true_var : select_input_with_buildin_type([false_var, true_var], mask) + merge_func = lambda false_var, true_var: select_input_with_buildin_type( + [false_var, true_var], mask) merged_output = map_structure(merge_func, false_output, true_output) return merged_output @@ -2618,7 +2643,8 @@ def case(pred_fn_pairs, default=None, name=None): if len(pred_fn) != 2: raise TypeError( _error_message("The tuple's size", "pred_fn_pairs", "case", - "2", str(len(pred_fn)) + "-tuple")) + "2", + str(len(pred_fn)) + "-tuple")) pred, fn = pred_fn if not isinstance(pred, Variable): @@ -2741,12 +2767,11 @@ class Switch(object): else: pre_cond_num = len(self.pre_not_conditions) pre_not_cond = self.pre_not_conditions[pre_cond_num - 1] - new_not_cond = logical_and( - x=pre_not_cond, y=logical_not(x=condition)) + new_not_cond = logical_and(x=pre_not_cond, + y=logical_not(x=condition)) self.pre_not_conditions.append(new_not_cond) cond_block = ConditionalBlock( - [logical_and( - x=pre_not_cond, y=condition)], + [logical_and(x=pre_not_cond, y=condition)], is_scalar_condition=True) return ConditionalBlockGuard(cond_block) @@ -2777,6 +2802,7 @@ class Switch(object): class IfElseBlockGuard(object): + def __init__(self, is_true, ifelse): if not isinstance(ifelse, IfElse): raise TypeError("ifelse must be an instance of IfElse class") @@ -2913,15 +2939,16 @@ class IfElse(object): name=unique_name.generate_with_ignorable_key('ifelse_input' + self.helper.name), dtype=x.dtype) - parent_block.append_op( - type='split_lod_tensor', - inputs={ - 'X': x, - 'Mask': self.cond, - }, - outputs={'OutTrue': out_true, - 'OutFalse': out_false}, - attrs={'level': 0}) + parent_block.append_op(type='split_lod_tensor', + inputs={ + 'X': x, + 'Mask': self.cond, + }, + outputs={ + 'OutTrue': out_true, + 'OutFalse': out_false + }, + attrs={'level': 0}) self.input_table[id(x)] = (out_true, out_false) else: out_true, out_false = self.input_table[id(x)] @@ -2978,12 +3005,11 @@ class IfElse(object): rlist = [] for false_var, true_var in zip(*self.output_table): rlist.append( - merge_lod_tensor( - in_true=true_var, - in_false=false_var, - mask=self.cond, - x=self.cond, - level=0)) + merge_lod_tensor(in_true=true_var, + in_false=false_var, + mask=self.cond, + x=self.cond, + level=0)) return rlist @@ -3173,37 +3199,37 @@ class DynamicRNN(object): name=unique_name.generate('lod_rank_table'), type=core.VarDesc.VarType.LOD_RANK_TABLE) self.lod_rank_table.stop_gradient = True - parent_block.append_op( - type='lod_rank_table', - inputs={"X": x}, - outputs={"Out": self.lod_rank_table}, - attrs={"level": level}) + parent_block.append_op(type='lod_rank_table', + inputs={"X": x}, + outputs={"Out": self.lod_rank_table}, + attrs={"level": level}) self.max_seq_len = parent_block.create_var( name=unique_name.generate('dynamic_rnn_max_seq_len'), dtype='int64') self.max_seq_len.stop_gradient = False - parent_block.append_op( - type='max_sequence_len', - inputs={'RankTable': self.lod_rank_table}, - outputs={"Out": self.max_seq_len}) + parent_block.append_op(type='max_sequence_len', + inputs={'RankTable': self.lod_rank_table}, + outputs={"Out": self.max_seq_len}) self.cond.stop_gradient = True - parent_block.append_op( - type='less_than', - inputs={'X': self.step_idx, - 'Y': self.max_seq_len}, - outputs={'Out': self.cond}, - attrs={'force_cpu': True}) + parent_block.append_op(type='less_than', + inputs={ + 'X': self.step_idx, + 'Y': self.max_seq_len + }, + outputs={'Out': self.cond}, + attrs={'force_cpu': True}) input_array = parent_block.create_var( name=unique_name.generate('dynamic_rnn_input_array'), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=x.dtype) self.input_array.append((input_array, x.dtype)) - parent_block.append_op( - type='lod_tensor_to_array', - inputs={'X': x, - 'RankTable': self.lod_rank_table}, - outputs={'Out': input_array}) + parent_block.append_op(type='lod_tensor_to_array', + inputs={ + 'X': x, + 'RankTable': self.lod_rank_table + }, + outputs={'Out': input_array}) return array_read(array=input_array, i=self.step_idx) def static_input(self, x): @@ -3342,11 +3368,12 @@ class DynamicRNN(object): name=unique_name.generate("dynamic_rnn_static_input_reordered"), type=core.VarDesc.VarType.LOD_TENSOR, dtype=x.dtype) - parent_block.append_op( - type='reorder_lod_tensor_by_rank', - inputs={'X': [x], - 'RankTable': [self.lod_rank_table]}, - outputs={'Out': [x_reordered]}) + parent_block.append_op(type='reorder_lod_tensor_by_rank', + inputs={ + 'X': [x], + 'RankTable': [self.lod_rank_table] + }, + outputs={'Out': [x_reordered]}) return shrink_memory(x_reordered, self.step_idx, self.lod_rank_table) @signature_safe_contextmanager @@ -3361,8 +3388,10 @@ class DynamicRNN(object): """ if self.status != DynamicRNN.BEFORE_RNN: raise ValueError("rnn.block() can only be invoke once") - self.step_idx = fill_constant( - shape=[1], dtype='int64', value=0, force_cpu=True) + self.step_idx = fill_constant(shape=[1], + dtype='int64', + value=0, + force_cpu=True) self.step_idx.stop_gradient = False self.status = DynamicRNN.IN_RNN with self.while_op.block(): @@ -3372,17 +3401,15 @@ class DynamicRNN(object): for new_mem, mem_array in self.mem_link: array_write(x=new_mem, i=self.step_idx, array=mem_array) - less_than( - x=self.step_idx, - y=self.max_seq_len, - force_cpu=True, - cond=self.cond) + less_than(x=self.step_idx, + y=self.max_seq_len, + force_cpu=True, + cond=self.cond) self.status = DynamicRNN.AFTER_RNN for each_array in self.output_array: self.outputs.append( - array_to_lod_tensor( - x=each_array, table=self.lod_rank_table)) + array_to_lod_tensor(x=each_array, table=self.lod_rank_table)) def __call__(self, *args, **kwargs): """ @@ -3516,26 +3543,27 @@ class DynamicRNN(object): name=unique_name.generate('dynamic_rnn_mem_init_reordered'), type=core.VarDesc.VarType.LOD_TENSOR, dtype=init.dtype) - parent_block.append_op( - type='reorder_lod_tensor_by_rank', - inputs={ - 'X': [init_tensor], - 'RankTable': [self.lod_rank_table] - }, - outputs={'Out': [init_reordered]}) + parent_block.append_op(type='reorder_lod_tensor_by_rank', + inputs={ + 'X': [init_tensor], + 'RankTable': [self.lod_rank_table] + }, + outputs={'Out': [init_reordered]}) init_tensor = init_reordered mem_array = parent_block.create_var( name=unique_name.generate('dynamic_rnn_mem_array'), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=init.dtype) - parent_block.append_op( - type='write_to_array', - inputs={'X': init_tensor, - 'I': self.zero_idx}, - outputs={'Out': mem_array}) + parent_block.append_op(type='write_to_array', + inputs={ + 'X': init_tensor, + 'I': self.zero_idx + }, + outputs={'Out': mem_array}) retv = array_read(array=mem_array, i=self.step_idx) - retv = shrink_memory( - x=retv, i=self.step_idx, table=self.lod_rank_table) + retv = shrink_memory(x=retv, + i=self.step_idx, + table=self.lod_rank_table) self.mem_dict[retv.name] = mem_array return retv else: @@ -3547,22 +3575,22 @@ class DynamicRNN(object): init = parent_block.create_var( name=unique_name.generate('mem_init'), dtype=dtype) arr, dtype = self.input_array[0] - in0 = parent_block.create_var( - name=unique_name.generate('in0'), dtype=dtype) - parent_block.append_op( - type='read_from_array', - inputs={'X': [arr], - 'I': [self.zero_idx]}, - outputs={'Out': [in0]}) - parent_block.append_op( - type='fill_constant_batch_size_like', - inputs={'Input': [in0]}, - outputs={'Out': [init]}, - attrs={ - 'shape': [-1] + shape, - 'value': float(value), - 'dtype': init.dtype - }) + in0 = parent_block.create_var(name=unique_name.generate('in0'), + dtype=dtype) + parent_block.append_op(type='read_from_array', + inputs={ + 'X': [arr], + 'I': [self.zero_idx] + }, + outputs={'Out': [in0]}) + parent_block.append_op(type='fill_constant_batch_size_like', + inputs={'Input': [in0]}, + outputs={'Out': [init]}, + attrs={ + 'shape': [-1] + shape, + 'value': float(value), + 'dtype': init.dtype + }) return self.memory(init=init) def update_memory(self, ex_mem, new_mem): @@ -3629,16 +3657,15 @@ class DynamicRNN(object): parent_block = self._parent_block_() self.zero_idx = parent_block.create_var( name=unique_name.generate('zero_idx'), dtype='int64') - parent_block.append_op( - type='fill_constant', - inputs={}, - outputs={'Out': [self.zero_idx]}, - attrs={ - 'shape': [1], - 'dtype': self.zero_idx.dtype, - 'value': float(0), - 'force_cpu': True - }) + parent_block.append_op(type='fill_constant', + inputs={}, + outputs={'Out': [self.zero_idx]}, + attrs={ + 'shape': [1], + 'dtype': self.zero_idx.dtype, + 'value': float(0), + 'force_cpu': True + }) def _parent_block_(self): prog = self.helper.main_program @@ -3650,8 +3677,8 @@ class DynamicRNN(object): def _assert_in_rnn_block_(self, method): if self.status != DynamicRNN.IN_RNN: - raise ValueError("{0} can only be invoked inside rnn block.".format( - method)) + raise ValueError( + "{0} can only be invoked inside rnn block.".format(method)) def switch_case(branch_index, branch_fns, default=None, name=None): @@ -3764,16 +3791,16 @@ def switch_case(branch_index, branch_fns, default=None, name=None): if key in keys_of_fns: raise ValueError( - "The key in 'branch_fns' must be unique, but '{}' appears more than once.". - format(key)) + "The key in 'branch_fns' must be unique, but '{}' appears more than once." + .format(key)) else: keys_of_fns.append(key) if not callable(fn): raise TypeError( - _error_message("The type of function for key {}".format( - key), "branch_fns", "switch_case", "callable", type( - fn))) + _error_message( + "The type of function for key {}".format(key), + "branch_fns", "switch_case", "callable", type(fn))) if default is None: default = sorted(branch_fns)[-1][1] @@ -3832,11 +3859,12 @@ def reorder_lod_tensor_by_rank(x, rank_table): helper = LayerHelper('reorder_lod_tensor_by_rank', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='reorder_lod_tensor_by_rank', - inputs={'X': [x], - 'RankTable': [rank_table]}, - outputs={'Out': [out]}) + helper.append_op(type='reorder_lod_tensor_by_rank', + inputs={ + 'X': [x], + 'RankTable': [rank_table] + }, + outputs={'Out': [out]}) return out @@ -3882,6 +3910,7 @@ def is_empty(x, name=None): helper = LayerHelper("is_empty", **locals()) cond = helper.create_variable_for_type_inference(dtype='bool') cond.stop_gradient = True - helper.append_op( - type='is_empty', inputs={'X': [x]}, outputs={'Out': [cond]}) + helper.append_op(type='is_empty', + inputs={'X': [x]}, + outputs={'Out': [cond]}) return cond diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 75b2b26fb9d..f89c95b93a1 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -272,27 +272,26 @@ def retinanet_target_assign(bbox_pred, bbox_inside_weight = helper.create_variable_for_type_inference( dtype=anchor_box.dtype) fg_num = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type="retinanet_target_assign", - inputs={ - 'Anchor': anchor_box, - 'GtBoxes': gt_boxes, - 'GtLabels': gt_labels, - 'IsCrowd': is_crowd, - 'ImInfo': im_info - }, - outputs={ - 'LocationIndex': loc_index, - 'ScoreIndex': score_index, - 'TargetLabel': target_label, - 'TargetBBox': target_bbox, - 'BBoxInsideWeight': bbox_inside_weight, - 'ForegroundNumber': fg_num - }, - attrs={ - 'positive_overlap': positive_overlap, - 'negative_overlap': negative_overlap - }) + helper.append_op(type="retinanet_target_assign", + inputs={ + 'Anchor': anchor_box, + 'GtBoxes': gt_boxes, + 'GtLabels': gt_labels, + 'IsCrowd': is_crowd, + 'ImInfo': im_info + }, + outputs={ + 'LocationIndex': loc_index, + 'ScoreIndex': score_index, + 'TargetLabel': target_label, + 'TargetBBox': target_bbox, + 'BBoxInsideWeight': bbox_inside_weight, + 'ForegroundNumber': fg_num + }, + attrs={ + 'positive_overlap': positive_overlap, + 'negative_overlap': negative_overlap + }) loc_index.stop_gradient = True score_index.stop_gradient = True @@ -434,29 +433,28 @@ def rpn_target_assign(bbox_pred, dtype=anchor_box.dtype) bbox_inside_weight = helper.create_variable_for_type_inference( dtype=anchor_box.dtype) - helper.append_op( - type="rpn_target_assign", - inputs={ - 'Anchor': anchor_box, - 'GtBoxes': gt_boxes, - 'IsCrowd': is_crowd, - 'ImInfo': im_info - }, - outputs={ - 'LocationIndex': loc_index, - 'ScoreIndex': score_index, - 'TargetLabel': target_label, - 'TargetBBox': target_bbox, - 'BBoxInsideWeight': bbox_inside_weight - }, - attrs={ - 'rpn_batch_size_per_im': rpn_batch_size_per_im, - 'rpn_straddle_thresh': rpn_straddle_thresh, - 'rpn_positive_overlap': rpn_positive_overlap, - 'rpn_negative_overlap': rpn_negative_overlap, - 'rpn_fg_fraction': rpn_fg_fraction, - 'use_random': use_random - }) + helper.append_op(type="rpn_target_assign", + inputs={ + 'Anchor': anchor_box, + 'GtBoxes': gt_boxes, + 'IsCrowd': is_crowd, + 'ImInfo': im_info + }, + outputs={ + 'LocationIndex': loc_index, + 'ScoreIndex': score_index, + 'TargetLabel': target_label, + 'TargetBBox': target_bbox, + 'BBoxInsideWeight': bbox_inside_weight + }, + attrs={ + 'rpn_batch_size_per_im': rpn_batch_size_per_im, + 'rpn_straddle_thresh': rpn_straddle_thresh, + 'rpn_positive_overlap': rpn_positive_overlap, + 'rpn_negative_overlap': rpn_negative_overlap, + 'rpn_fg_fraction': rpn_fg_fraction, + 'use_random': use_random + }) loc_index.stop_gradient = True score_index.stop_gradient = True @@ -608,14 +606,17 @@ def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="sigmoid_focal_loss", - inputs={"X": x, - "Label": label, - "FgNum": fg_num}, - attrs={"gamma": gamma, - 'alpha': alpha}, - outputs={"Out": out}) + helper.append_op(type="sigmoid_focal_loss", + inputs={ + "X": x, + "Label": label, + "FgNum": fg_num + }, + attrs={ + "gamma": gamma, + 'alpha': alpha + }, + outputs={"Out": out}) return out @@ -714,11 +715,10 @@ def detection_output(loc, return_index=True) """ helper = LayerHelper("detection_output", **locals()) - decoded_box = box_coder( - prior_box=prior_box, - prior_box_var=prior_box_var, - target_box=loc, - code_type='decode_center_size') + decoded_box = box_coder(prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=loc, + code_type='decode_center_size') scores = nn.softmax(input=scores) scores = nn.transpose(scores, perm=[0, 2, 1]) scores.stop_gradient = True @@ -726,35 +726,39 @@ def detection_output(loc, dtype=decoded_box.dtype) if return_index: index = helper.create_variable_for_type_inference(dtype='int') - helper.append_op( - type="multiclass_nms2", - inputs={'Scores': scores, - 'BBoxes': decoded_box}, - outputs={'Out': nmsed_outs, - 'Index': index}, - attrs={ - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - }) + helper.append_op(type="multiclass_nms2", + inputs={ + 'Scores': scores, + 'BBoxes': decoded_box + }, + outputs={ + 'Out': nmsed_outs, + 'Index': index + }, + attrs={ + 'background_label': 0, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0, + }) index.stop_gradient = True else: - helper.append_op( - type="multiclass_nms", - inputs={'Scores': scores, - 'BBoxes': decoded_box}, - outputs={'Out': nmsed_outs}, - attrs={ - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - }) + helper.append_op(type="multiclass_nms", + inputs={ + 'Scores': scores, + 'BBoxes': decoded_box + }, + outputs={'Out': nmsed_outs}, + attrs={ + 'background_label': 0, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0, + }) nmsed_outs.stop_gradient = True if return_index: return nmsed_outs, index @@ -806,12 +810,13 @@ def iou_similarity(x, y, box_normalized=True, name=None): helper = LayerHelper("iou_similarity", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="iou_similarity", - inputs={"X": x, - "Y": y}, - attrs={"box_normalized": box_normalized}, - outputs={"Out": out}) + helper.append_op(type="iou_similarity", + inputs={ + "X": x, + "Y": y + }, + attrs={"box_normalized": box_normalized}, + outputs={"Out": out}) return out @@ -958,11 +963,10 @@ def box_coder(prior_box, attrs['variance'] = prior_box_var else: raise TypeError("Input variance of box_coder must be Variable or lisz") - helper.append_op( - type="box_coder", - inputs=inputs, - attrs=attrs, - outputs={"OutputBox": output_box}) + helper.append_op(type="box_coder", + inputs=inputs, + attrs=attrs, + outputs={"OutputBox": output_box}) return output_box @@ -992,11 +996,10 @@ def polygon_box_transform(input, name=None): helper = LayerHelper("polygon_box_transform", **locals()) output = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type="polygon_box_transform", - inputs={"Input": input}, - attrs={}, - outputs={"Output": output}) + helper.append_op(type="polygon_box_transform", + inputs={"Input": input}, + attrs={}, + outputs={"Output": output}) return output @@ -1125,15 +1128,14 @@ def yolov3_loss(x, "scale_x_y": scale_x_y, } - helper.append_op( - type='yolov3_loss', - inputs=inputs, - outputs={ - 'Loss': loss, - 'ObjectnessMask': objectness_mask, - 'GTMatchMask': gt_match_mask - }, - attrs=attrs) + helper.append_op(type='yolov3_loss', + inputs=inputs, + outputs={ + 'Loss': loss, + 'ObjectnessMask': objectness_mask, + 'GTMatchMask': gt_match_mask + }, + attrs=attrs) return loss @@ -1220,17 +1222,16 @@ def yolo_box(x, "iou_aware_factor": iou_aware_factor } - helper.append_op( - type='yolo_box', - inputs={ - "X": x, - "ImgSize": img_size, - }, - outputs={ - 'Boxes': boxes, - 'Scores': scores, - }, - attrs=attrs) + helper.append_op(type='yolo_box', + inputs={ + "X": x, + "ImgSize": img_size, + }, + outputs={ + 'Boxes': boxes, + 'Scores': scores, + }, + attrs=attrs) return boxes, scores @@ -1303,28 +1304,27 @@ def detection_map(detect_res, true_pos = input_states[1] if input_states is not None else None false_pos = input_states[2] if input_states is not None else None - helper.append_op( - type="detection_map", - inputs={ - 'Label': label, - 'DetectRes': detect_res, - 'HasState': has_state, - 'PosCount': pos_count, - 'TruePos': true_pos, - 'FalsePos': false_pos - }, - outputs={ - 'MAP': map_out, - 'AccumPosCount': accum_pos_count_out, - 'AccumTruePos': accum_true_pos_out, - 'AccumFalsePos': accum_false_pos_out - }, - attrs={ - 'overlap_threshold': overlap_threshold, - 'evaluate_difficult': evaluate_difficult, - 'ap_type': ap_version, - 'class_num': class_num, - }) + helper.append_op(type="detection_map", + inputs={ + 'Label': label, + 'DetectRes': detect_res, + 'HasState': has_state, + 'PosCount': pos_count, + 'TruePos': true_pos, + 'FalsePos': false_pos + }, + outputs={ + 'MAP': map_out, + 'AccumPosCount': accum_pos_count_out, + 'AccumTruePos': accum_true_pos_out, + 'AccumFalsePos': accum_false_pos_out + }, + attrs={ + 'overlap_threshold': overlap_threshold, + 'evaluate_difficult': evaluate_difficult, + 'ap_type': ap_version, + 'class_num': class_num, + }) return map_out @@ -1404,17 +1404,16 @@ def bipartite_match(dist_matrix, match_indices = helper.create_variable_for_type_inference(dtype='int32') match_distance = helper.create_variable_for_type_inference( dtype=dist_matrix.dtype) - helper.append_op( - type='bipartite_match', - inputs={'DistMat': dist_matrix}, - attrs={ - 'match_type': match_type, - 'dist_threshold': dist_threshold, - }, - outputs={ - 'ColToRowMatchIndices': match_indices, - 'ColToRowMatchDist': match_distance - }) + helper.append_op(type='bipartite_match', + inputs={'DistMat': dist_matrix}, + attrs={ + 'match_type': match_type, + 'dist_threshold': dist_threshold, + }, + outputs={ + 'ColToRowMatchIndices': match_indices, + 'ColToRowMatchDist': match_distance + }) return match_indices, match_distance @@ -1511,16 +1510,17 @@ def target_assign(input, helper = LayerHelper('target_assign', **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) out_weight = helper.create_variable_for_type_inference(dtype='float32') - helper.append_op( - type='target_assign', - inputs={ - 'X': input, - 'MatchIndices': matched_indices, - 'NegIndices': negative_indices - }, - outputs={'Out': out, - 'OutWeight': out_weight}, - attrs={'mismatch_value': mismatch_value}) + helper.append_op(type='target_assign', + inputs={ + 'X': input, + 'MatchIndices': matched_indices, + 'NegIndices': negative_indices + }, + outputs={ + 'Out': out, + 'OutWeight': out_weight + }, + attrs={'mismatch_value': mismatch_value}) return out, out_weight @@ -1676,11 +1676,12 @@ def ssd_loss(location, # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices - gt_label = nn.reshape( - x=gt_label, shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1)) + gt_label = nn.reshape(x=gt_label, + shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1)) gt_label.stop_gradient = True - target_label, _ = target_assign( - gt_label, matched_indices, mismatch_value=background_label) + target_label, _ = target_assign(gt_label, + matched_indices, + mismatch_value=background_label) # 2.2. Compute confidence loss. # Reshape confidence to 2D tensor. confidence = __reshape_to_2d(confidence) @@ -1693,39 +1694,38 @@ def ssd_loss(location, actual_shape.stop_gradient = True # shape=(-1, 0) is set for compile-time, the correct shape is set by # actual_shape in runtime. - conf_loss = nn.reshape( - x=conf_loss, shape=(-1, 0), actual_shape=actual_shape) + conf_loss = nn.reshape(x=conf_loss, + shape=(-1, 0), + actual_shape=actual_shape) conf_loss.stop_gradient = True neg_indices = helper.create_variable_for_type_inference(dtype='int32') dtype = matched_indices.dtype updated_matched_indices = helper.create_variable_for_type_inference( dtype=dtype) - helper.append_op( - type='mine_hard_examples', - inputs={ - 'ClsLoss': conf_loss, - 'LocLoss': None, - 'MatchIndices': matched_indices, - 'MatchDist': matched_dist, - }, - outputs={ - 'NegIndices': neg_indices, - 'UpdatedMatchIndices': updated_matched_indices - }, - attrs={ - 'neg_pos_ratio': neg_pos_ratio, - 'neg_dist_threshold': neg_overlap, - 'mining_type': mining_type, - 'sample_size': sample_size, - }) + helper.append_op(type='mine_hard_examples', + inputs={ + 'ClsLoss': conf_loss, + 'LocLoss': None, + 'MatchIndices': matched_indices, + 'MatchDist': matched_dist, + }, + outputs={ + 'NegIndices': neg_indices, + 'UpdatedMatchIndices': updated_matched_indices + }, + attrs={ + 'neg_pos_ratio': neg_pos_ratio, + 'neg_dist_threshold': neg_overlap, + 'mining_type': mining_type, + 'sample_size': sample_size, + }) # 4. Assign classification and regression targets # 4.1. Encoded bbox according to the prior boxes. - encoded_bbox = box_coder( - prior_box=prior_box, - prior_box_var=prior_box_var, - target_box=gt_box, - code_type='encode_center_size') + encoded_bbox = box_coder(prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=gt_box, + code_type='encode_center_size') # 4.2. Assign regression targets target_bbox, target_loc_weight = target_assign( encoded_bbox, updated_matched_indices, mismatch_value=background_label) @@ -1888,8 +1888,9 @@ def prior_box(input, """ helper = LayerHelper("prior_box", **locals()) dtype = helper.input_dtype() - check_variable_and_dtype( - input, 'input', ['uint8', 'int8', 'float32', 'float64'], 'prior_box') + check_variable_and_dtype(input, 'input', + ['uint8', 'int8', 'float32', 'float64'], + 'prior_box') def _is_list_or_tuple_(data): return (isinstance(data, list) or isinstance(data, tuple)) @@ -1926,11 +1927,16 @@ def prior_box(input, var = helper.create_variable_for_type_inference(dtype) helper.append_op( type="prior_box", - inputs={"Input": input, - "Image": image}, - outputs={"Boxes": box, - "Variances": var}, - attrs=attrs, ) + inputs={ + "Input": input, + "Image": image + }, + outputs={ + "Boxes": box, + "Variances": var + }, + attrs=attrs, + ) box.stop_gradient = True var.stop_gradient = True return box, var @@ -2106,11 +2112,16 @@ def density_prior_box(input, var = helper.create_variable_for_type_inference(dtype) helper.append_op( type="density_prior_box", - inputs={"Input": input, - "Image": image}, - outputs={"Boxes": box, - "Variances": var}, - attrs=attrs, ) + inputs={ + "Input": input, + "Image": image + }, + outputs={ + "Boxes": box, + "Variances": var + }, + attrs=attrs, + ) box.stop_gradient = True var.stop_gradient = True return box, var @@ -2362,12 +2373,11 @@ def multi_box_head(inputs, # get loc num_loc_output = num_boxes * 4 - mbox_loc = nn.conv2d( - input=input, - num_filters=num_loc_output, - filter_size=kernel_size, - padding=pad, - stride=stride) + mbox_loc = nn.conv2d(input=input, + num_filters=num_loc_output, + filter_size=kernel_size, + padding=pad, + stride=stride) mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1]) mbox_loc_flatten = nn.flatten(mbox_loc, axis=1) @@ -2375,12 +2385,11 @@ def multi_box_head(inputs, # get conf num_conf_output = num_boxes * num_classes - conf_loc = nn.conv2d( - input=input, - num_filters=num_conf_output, - filter_size=kernel_size, - padding=pad, - stride=stride) + conf_loc = nn.conv2d(input=input, + num_filters=num_conf_output, + filter_size=kernel_size, + padding=pad, + stride=stride) conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1]) conf_loc_flatten = nn.flatten(conf_loc, axis=1) mbox_confs.append(conf_loc_flatten) @@ -2402,8 +2411,8 @@ def multi_box_head(inputs, mbox_locs_concat = tensor.concat(mbox_locs, axis=1) mbox_locs_concat = nn.reshape(mbox_locs_concat, shape=[0, -1, 4]) mbox_confs_concat = tensor.concat(mbox_confs, axis=1) - mbox_confs_concat = nn.reshape( - mbox_confs_concat, shape=[0, -1, num_classes]) + mbox_confs_concat = nn.reshape(mbox_confs_concat, + shape=[0, -1, num_classes]) box.stop_gradient = True var.stop_gradient = True @@ -2507,9 +2516,12 @@ def anchor_generator(input, helper.append_op( type="anchor_generator", inputs={"Input": input}, - outputs={"Anchors": anchor, - "Variances": var}, - attrs=attrs, ) + outputs={ + "Anchors": anchor, + "Variances": var + }, + attrs=attrs, + ) anchor.stop_gradient = True var.stop_gradient = True return anchor, var @@ -2588,22 +2600,23 @@ def roi_perspective_transform(input, transform_matrix = helper.create_variable_for_type_inference(dtype) out2in_idx = helper.create_variable_for_type_inference(dtype="int32") out2in_w = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="roi_perspective_transform", - inputs={"X": input, - "ROIs": rois}, - outputs={ - "Out": out, - "Out2InIdx": out2in_idx, - "Out2InWeights": out2in_w, - "Mask": mask, - "TransformMatrix": transform_matrix - }, - attrs={ - "transformed_height": transformed_height, - "transformed_width": transformed_width, - "spatial_scale": spatial_scale - }) + helper.append_op(type="roi_perspective_transform", + inputs={ + "X": input, + "ROIs": rois + }, + outputs={ + "Out": out, + "Out2InIdx": out2in_idx, + "Out2InWeights": out2in_w, + "Mask": mask, + "TransformMatrix": transform_matrix + }, + attrs={ + "transformed_height": transformed_height, + "transformed_width": transformed_width, + "spatial_scale": spatial_scale + }) return out, mask, transform_matrix @@ -2723,29 +2736,28 @@ def generate_proposal_labels(rpn_rois, } if max_overlap is not None: inputs['MaxOverlap'] = max_overlap - helper.append_op( - type="generate_proposal_labels", - inputs=inputs, - outputs={ - 'Rois': rois, - 'LabelsInt32': labels_int32, - 'BboxTargets': bbox_targets, - 'BboxInsideWeights': bbox_inside_weights, - 'BboxOutsideWeights': bbox_outside_weights, - 'MaxOverlapWithGT': max_overlap_with_gt - }, - attrs={ - 'batch_size_per_im': batch_size_per_im, - 'fg_fraction': fg_fraction, - 'fg_thresh': fg_thresh, - 'bg_thresh_hi': bg_thresh_hi, - 'bg_thresh_lo': bg_thresh_lo, - 'bbox_reg_weights': bbox_reg_weights, - 'class_nums': class_nums, - 'use_random': use_random, - 'is_cls_agnostic': is_cls_agnostic, - 'is_cascade_rcnn': is_cascade_rcnn - }) + helper.append_op(type="generate_proposal_labels", + inputs=inputs, + outputs={ + 'Rois': rois, + 'LabelsInt32': labels_int32, + 'BboxTargets': bbox_targets, + 'BboxInsideWeights': bbox_inside_weights, + 'BboxOutsideWeights': bbox_outside_weights, + 'MaxOverlapWithGT': max_overlap_with_gt + }, + attrs={ + 'batch_size_per_im': batch_size_per_im, + 'fg_fraction': fg_fraction, + 'fg_thresh': fg_thresh, + 'bg_thresh_hi': bg_thresh_hi, + 'bg_thresh_lo': bg_thresh_lo, + 'bbox_reg_weights': bbox_reg_weights, + 'class_nums': class_nums, + 'use_random': use_random, + 'is_cls_agnostic': is_cls_agnostic, + 'is_cascade_rcnn': is_cascade_rcnn + }) rois.stop_gradient = True labels_int32.stop_gradient = True @@ -2880,23 +2892,24 @@ def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois, mask_int32 = helper.create_variable_for_type_inference( dtype=gt_classes.dtype) - helper.append_op( - type="generate_mask_labels", - inputs={ - 'ImInfo': im_info, - 'GtClasses': gt_classes, - 'IsCrowd': is_crowd, - 'GtSegms': gt_segms, - 'Rois': rois, - 'LabelsInt32': labels_int32 - }, - outputs={ - 'MaskRois': mask_rois, - 'RoiHasMaskInt32': roi_has_mask_int32, - 'MaskInt32': mask_int32 - }, - attrs={'num_classes': num_classes, - 'resolution': resolution}) + helper.append_op(type="generate_mask_labels", + inputs={ + 'ImInfo': im_info, + 'GtClasses': gt_classes, + 'IsCrowd': is_crowd, + 'GtSegms': gt_segms, + 'Rois': rois, + 'LabelsInt32': labels_int32 + }, + outputs={ + 'MaskRois': mask_rois, + 'RoiHasMaskInt32': roi_has_mask_int32, + 'MaskInt32': mask_int32 + }, + attrs={ + 'num_classes': num_classes, + 'resolution': resolution + }) mask_rois.stop_gradient = True roi_has_mask_int32.stop_gradient = True @@ -3028,23 +3041,22 @@ def generate_proposals(scores, rpn_rois_num.stop_gradient = True outputs['RpnRoisNum'] = rpn_rois_num - helper.append_op( - type="generate_proposals", - inputs={ - 'Scores': scores, - 'BboxDeltas': bbox_deltas, - 'ImInfo': im_info, - 'Anchors': anchors, - 'Variances': variances - }, - attrs={ - 'pre_nms_topN': pre_nms_top_n, - 'post_nms_topN': post_nms_top_n, - 'nms_thresh': nms_thresh, - 'min_size': min_size, - 'eta': eta - }, - outputs=outputs) + helper.append_op(type="generate_proposals", + inputs={ + 'Scores': scores, + 'BboxDeltas': bbox_deltas, + 'ImInfo': im_info, + 'Anchors': anchors, + 'Variances': variances + }, + attrs={ + 'pre_nms_topN': pre_nms_top_n, + 'post_nms_topN': post_nms_top_n, + 'nms_thresh': nms_thresh, + 'min_size': min_size, + 'eta': eta + }, + outputs=outputs) rpn_rois.stop_gradient = True rpn_roi_probs.stop_gradient = True @@ -3253,22 +3265,21 @@ def retinanet_detection_output(bboxes, helper = LayerHelper('retinanet_detection_output', **locals()) output = helper.create_variable_for_type_inference( dtype=helper.input_dtype('scores')) - helper.append_op( - type="retinanet_detection_output", - inputs={ - 'BBoxes': bboxes, - 'Scores': scores, - 'Anchors': anchors, - 'ImInfo': im_info - }, - attrs={ - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'keep_top_k': keep_top_k, - 'nms_eta': 1., - }, - outputs={'Out': output}) + helper.append_op(type="retinanet_detection_output", + inputs={ + 'BBoxes': bboxes, + 'Scores': scores, + 'Anchors': anchors, + 'ImInfo': im_info + }, + attrs={ + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'keep_top_k': keep_top_k, + 'nms_eta': 1., + }, + outputs={'Out': output}) output.stop_gradient = True return output @@ -3408,20 +3419,21 @@ def multiclass_nms(bboxes, helper = LayerHelper('multiclass_nms', **locals()) output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) - helper.append_op( - type="multiclass_nms", - inputs={'BBoxes': bboxes, - 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'nms_eta': nms_eta, - 'keep_top_k': keep_top_k, - 'normalized': normalized - }, - outputs={'Out': output}) + helper.append_op(type="multiclass_nms", + inputs={ + 'BBoxes': bboxes, + 'Scores': scores + }, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'nms_eta': nms_eta, + 'keep_top_k': keep_top_k, + 'normalized': normalized + }, + outputs={'Out': output}) output.stop_gradient = True return output @@ -3537,21 +3549,22 @@ def locality_aware_nms(bboxes, output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) out = {'Out': output} - helper.append_op( - type="locality_aware_nms", - inputs={'BBoxes': bboxes, - 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'nms_eta': nms_eta, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized - }, - outputs={'Out': output}) + helper.append_op(type="locality_aware_nms", + inputs={ + 'BBoxes': bboxes, + 'Scores': scores + }, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'nms_eta': nms_eta, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs={'Out': output}) output.stop_gradient = True return output @@ -3660,22 +3673,25 @@ def matrix_nms(bboxes, helper = LayerHelper('matrix_nms', **locals()) output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) index = helper.create_variable_for_type_inference(dtype='int') - helper.append_op( - type="matrix_nms", - inputs={'BBoxes': bboxes, - 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'post_threshold': post_threshold, - 'nms_top_k': nms_top_k, - 'gaussian_sigma': gaussian_sigma, - 'use_gaussian': use_gaussian, - 'keep_top_k': keep_top_k, - 'normalized': normalized - }, - outputs={'Out': output, - 'Index': index}) + helper.append_op(type="matrix_nms", + inputs={ + 'BBoxes': bboxes, + 'Scores': scores + }, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'post_threshold': post_threshold, + 'nms_top_k': nms_top_k, + 'gaussian_sigma': gaussian_sigma, + 'use_gaussian': use_gaussian, + 'keep_top_k': keep_top_k, + 'normalized': normalized + }, + outputs={ + 'Out': output, + 'Index': index + }) output.stop_gradient = True if return_index: @@ -3792,16 +3808,15 @@ def distribute_fpn_proposals(fpn_rois, ] outputs['MultiLevelRoIsNum'] = rois_num_per_level - helper.append_op( - type='distribute_fpn_proposals', - inputs=inputs, - outputs=outputs, - attrs={ - 'min_level': min_level, - 'max_level': max_level, - 'refer_level': refer_level, - 'refer_scale': refer_scale - }) + helper.append_op(type='distribute_fpn_proposals', + inputs=inputs, + outputs=outputs, + attrs={ + 'min_level': min_level, + 'max_level': max_level, + 'refer_level': refer_level, + 'refer_scale': refer_scale + }) if rois_num is not None: return multi_rois, restore_ind, rois_num_per_level return multi_rois, restore_ind @@ -3866,19 +3881,18 @@ def box_decoder_and_assign(prior_box, output_assign_box = helper.create_variable_for_type_inference( dtype=prior_box.dtype) - helper.append_op( - type="box_decoder_and_assign", - inputs={ - "PriorBox": prior_box, - "PriorBoxVar": prior_box_var, - "TargetBox": target_box, - "BoxScore": box_score - }, - attrs={"box_clip": box_clip}, - outputs={ - "DecodeBox": decoded_box, - "OutputAssignBox": output_assign_box - }) + helper.append_op(type="box_decoder_and_assign", + inputs={ + "PriorBox": prior_box, + "PriorBoxVar": prior_box_var, + "TargetBox": target_box, + "BoxScore": box_score + }, + attrs={"box_clip": box_clip}, + outputs={ + "DecodeBox": decoded_box, + "OutputAssignBox": output_assign_box + }) return decoded_box, output_assign_box @@ -3982,11 +3996,10 @@ def collect_fpn_proposals(multi_rois, rois_num = helper.create_variable_for_type_inference(dtype='int32') rois_num.stop_gradient = True outputs['RoisNum'] = rois_num - helper.append_op( - type='collect_fpn_proposals', - inputs=inputs, - outputs=outputs, - attrs={'post_nms_topN': post_nms_top_n}) + helper.append_op(type='collect_fpn_proposals', + inputs=inputs, + outputs=outputs, + attrs={'post_nms_topN': post_nms_top_n}) if rois_num_per_level is not None: return output_rois, rois_num return output_rois diff --git a/python/paddle/fluid/layers/device.py b/python/paddle/fluid/layers/device.py index 42ccdbb8d26..a4b967a8509 100644 --- a/python/paddle/fluid/layers/device.py +++ b/python/paddle/fluid/layers/device.py @@ -37,7 +37,8 @@ def get_places(device_count=None, device_type=None): if device_type is not None: attrs['device_type'] = str(device_type) - helper.append_op( - type='get_places', outputs={"Out": [out_places]}, attrs=attrs) + helper.append_op(type='get_places', + outputs={"Out": [out_places]}, + attrs=attrs) return out_places diff --git a/python/paddle/fluid/layers/distributions.py b/python/paddle/fluid/layers/distributions.py index 4e4c8dfd2a0..757ba0dc885 100644 --- a/python/paddle/fluid/layers/distributions.py +++ b/python/paddle/fluid/layers/distributions.py @@ -214,15 +214,14 @@ class Uniform(Distribution): self.low + self.high, batch_shape + shape, self.low.dtype, 0.) uniform_random_tmp = nn.uniform_random_batch_size_like( zero_tmp, zero_tmp.shape, min=0., max=1., seed=seed) - output = uniform_random_tmp * (zero_tmp + self.high - self.low - ) + self.low + output = uniform_random_tmp * (zero_tmp + self.high - + self.low) + self.low return nn.reshape(output, output_shape) else: output_shape = shape + batch_shape - output = nn.uniform_random( - output_shape, seed=seed) * (tensor.zeros( - output_shape, dtype=self.low.dtype) + - (self.high - self.low)) + self.low + output = nn.uniform_random(output_shape, seed=seed) * ( + tensor.zeros(output_shape, dtype=self.low.dtype) + + (self.high - self.low)) + self.low if self.all_arg_is_float: return nn.reshape(output, shape) else: @@ -358,8 +357,10 @@ class Normal(Distribution): zero_tmp = tensor.fill_constant_batch_size_like( self.loc + self.scale, batch_shape + shape, self.loc.dtype, 0.) zero_tmp_shape = nn.shape(zero_tmp) - normal_random_tmp = nn.gaussian_random( - zero_tmp_shape, mean=0., std=1., seed=seed) + normal_random_tmp = nn.gaussian_random(zero_tmp_shape, + mean=0., + std=1., + seed=seed) output = normal_random_tmp * (zero_tmp + self.scale) + self.loc return nn.reshape(output, output_shape) else: @@ -379,8 +380,9 @@ class Normal(Distribution): """ batch_shape = list((self.loc + self.scale).shape) - zero_tmp = tensor.fill_constant_batch_size_like( - self.loc + self.scale, batch_shape, self.loc.dtype, 0.) + zero_tmp = tensor.fill_constant_batch_size_like(self.loc + self.scale, + batch_shape, + self.loc.dtype, 0.) return 0.5 + 0.5 * math.log(2 * math.pi) + nn.log( (self.scale + zero_tmp)) @@ -399,8 +401,9 @@ class Normal(Distribution): var = self.scale * self.scale log_scale = nn.log(self.scale) - return -1. * ((value - self.loc) * (value - self.loc)) / ( - 2. * var) - log_scale - math.log(math.sqrt(2. * math.pi)) + return -1. * ((value - self.loc) * + (value - self.loc)) / (2. * var) - log_scale - math.log( + math.sqrt(2. * math.pi)) def kl_divergence(self, other): """The KL-divergence between two normal distributions. @@ -613,8 +616,7 @@ class MultivariateNormalDiag(Distribution): batch_shape = list(value.shape) one_all = tensor.ones(shape=batch_shape, dtype=self.loc.dtype) one_diag = tensor.diag( - tensor.ones( - shape=[batch_shape[0]], dtype=self.loc.dtype)) + tensor.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)) det_diag = nn.reduce_prod(value + one_all - one_diag) return det_diag @@ -624,8 +626,7 @@ class MultivariateNormalDiag(Distribution): batch_shape = list(value.shape) one_all = tensor.ones(shape=batch_shape, dtype=self.loc.dtype) one_diag = tensor.diag( - tensor.ones( - shape=[batch_shape[0]], dtype=self.loc.dtype)) + tensor.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)) inv_diag = nn.elementwise_pow(value, (one_all - 2 * one_diag)) return inv_diag @@ -637,9 +638,8 @@ class MultivariateNormalDiag(Distribution): Variable: Shannon entropy of Multivariate Normal distribution. The data type is float32. """ - entropy = 0.5 * ( - self.scale.shape[0] * - (1.0 + math.log(2 * math.pi)) + nn.log(self._det(self.scale))) + entropy = 0.5 * (self.scale.shape[0] * (1.0 + math.log(2 * math.pi)) + + nn.log(self._det(self.scale))) return entropy diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index c8a5235a586..c24a0477ffc 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -125,14 +125,13 @@ def data(name, if append_batch_size: shape = [-1] + shape # append batch size as -1 - data_var = helper.create_global_variable( - name=name, - shape=shape, - dtype=dtype, - type=type, - stop_gradient=stop_gradient, - lod_level=lod_level, - is_data=True) + data_var = helper.create_global_variable(name=name, + shape=shape, + dtype=dtype, + type=type, + stop_gradient=stop_gradient, + lod_level=lod_level, + is_data=True) return data_var @@ -247,9 +246,9 @@ class ListenAndServ(object): attrs={ 'endpoint': self.endpoint, 'Fanin': self.fan_in, - 'optimize_blocks': [ - current_block - ], # did not support multiple optimize blocks in layers + 'optimize_blocks': + [current_block + ], # did not support multiple optimize blocks in layers 'distributed_mode': DistributedMode.SYNC, # did not support async now in layers 'grad_to_block_id': [""] @@ -283,21 +282,22 @@ def Send(endpoints, send_vars, dummy_output=None, sync=True): helper = LayerHelper("Send", **locals()) rpc_op_role_name = core.op_proto_and_checker_maker.kOpRoleAttrName() - helper.append_op( - type="send", - inputs={"X": send_vars}, - outputs={"Out": dummy_output}, - attrs={ - "endpoints": endpoints, - "epmap": epmap, - rpc_op_role_name: core.op_proto_and_checker_maker.OpRole.RPC - }) + helper.append_op(type="send", + inputs={"X": send_vars}, + outputs={"Out": dummy_output}, + attrs={ + "endpoints": + endpoints, + "epmap": + epmap, + rpc_op_role_name: + core.op_proto_and_checker_maker.OpRole.RPC + }) if sync: - helper.append_op( - type="send_barrier", - inputs={"X": dummy_output}, - outputs={"Out": []}, - attrs={"endpoints": endpoints}) + helper.append_op(type="send_barrier", + inputs={"X": dummy_output}, + outputs={"Out": []}, + attrs={"endpoints": endpoints}) def Recv(endpoints, get_vars, dummy_input=None, sync=True): @@ -326,21 +326,22 @@ def Recv(endpoints, get_vars, dummy_input=None, sync=True): endpoints = list(set(epmap)) helper = LayerHelper("Recv", **locals()) - helper.append_op( - type="recv", - inputs={"X": dummy_input}, - outputs={"Out": get_vars}, - attrs={"endpoints": endpoints, - "epmap": epmap}) + helper.append_op(type="recv", + inputs={"X": dummy_input}, + outputs={"Out": get_vars}, + attrs={ + "endpoints": endpoints, + "epmap": epmap + }) if sync: - helper.append_op( - type="fetch_barrier", - outputs={"Out": get_vars}, - attrs={"endpoints": endpoints}) + helper.append_op(type="fetch_barrier", + outputs={"Out": get_vars}, + attrs={"endpoints": endpoints}) return get_vars def monkey_patch_reader_methods(reader): + def __get_reader__(): scope = global_scope() var = scope.find_var(reader.name) @@ -381,11 +382,10 @@ def _copy_reader_create_op_(block, op): for arg_name in arg_names: new_output_map[param_name].append(block.var(arg_name)) - new_op = block.append_op( - type=op.type, - inputs=new_input_map, - outputs=new_output_map, - attrs=op.all_attrs()) + new_op = block.append_op(type=op.type, + inputs=new_input_map, + outputs=new_output_map, + attrs=op.all_attrs()) return new_op @@ -441,17 +441,16 @@ def _py_reader(capacity, startup_blk = default_startup_program().current_block() startup_var = startup_blk.create_var(name=reader_name) - startup_blk.append_op( - type='create_py_reader', - inputs={'blocking_queue': [queue_name]}, - outputs={'Out': [startup_var]}, - attrs={ - 'shape_concat': shape_concat, - 'lod_levels': lod_levels, - 'dtypes': dtype_int, - 'need_check_feed': need_check_feed, - 'ranks': ranks - }) + startup_blk.append_op(type='create_py_reader', + inputs={'blocking_queue': [queue_name]}, + outputs={'Out': [startup_var]}, + attrs={ + 'shape_concat': shape_concat, + 'lod_levels': lod_levels, + 'dtypes': dtype_int, + 'need_check_feed': need_check_feed, + 'ranks': ranks + }) startup_var.desc.set_dtypes(dtypes) startup_var.persistable = True @@ -475,6 +474,7 @@ def _py_reader(capacity, reader.exited = False def start_provide_thread(func): + def __provider_thread__(legacy_expected_place): try: # See _DataLoaderIterSingleProcess._thread_loop() for why set expected place here. @@ -501,8 +501,8 @@ def _py_reader(capacity, logging.warn('Your decorated reader has raised an exception!') six.reraise(*sys.exc_info()) - reader.thread = threading.Thread( - target=__provider_thread__, args=(_current_expected_place(), )) + reader.thread = threading.Thread(target=__provider_thread__, + args=(_current_expected_place(), )) reader.thread.daemon = True reader.thread.start() @@ -518,18 +518,17 @@ def _py_reader(capacity, for dtype, shape, lod_level in zip(dtypes, shapes, lod_levels): name = str(counter) actual_feed_list.append( - data( - name=name, - dtype=dtype, - shape=shape, - lod_level=lod_level)) + data(name=name, + dtype=dtype, + shape=shape, + lod_level=lod_level)) counter += 1 data_names = [feed_data.name for feed_data in actual_feed_list] - feeder = DataFeeder( - feed_list=actual_feed_list, place=core.CPUPlace()) - paddle_reader = feeder.decorate_reader( - paddle_reader, multi_devices=False) + feeder = DataFeeder(feed_list=actual_feed_list, + place=core.CPUPlace()) + paddle_reader = feeder.decorate_reader(paddle_reader, + multi_devices=False) def __tensor_provider__(): for slots in paddle_reader(): @@ -720,13 +719,12 @@ def py_reader(capacity, logging.warn( 'paddle.fluid.layers.py_reader() may be deprecated in the near future. ' 'Please use paddle.fluid.io.DataLoader.from_generator() instead.') - return _py_reader( - capacity=capacity, - shapes=shapes, - dtypes=dtypes, - lod_levels=lod_levels, - name=name, - use_double_buffer=use_double_buffer) + return _py_reader(capacity=capacity, + shapes=shapes, + dtypes=dtypes, + lod_levels=lod_levels, + name=name, + use_double_buffer=use_double_buffer) def create_py_reader_by_data(capacity, @@ -802,25 +800,23 @@ def create_py_reader_by_data(capacity, logging.warn( 'paddle.fluid.layers.create_py_reader_by_data() may be deprecated in the near future. ' 'Please use paddle.fluid.io.DataLoader.from_generator() instead.') - return _py_reader( - capacity=capacity, - shapes=None, - dtypes=None, - lod_levels=None, - name=name, - use_double_buffer=use_double_buffer, - feed_list=feed_list) + return _py_reader(capacity=capacity, + shapes=None, + dtypes=None, + lod_levels=None, + name=name, + use_double_buffer=use_double_buffer, + feed_list=feed_list) def __create_shared_decorated_reader__(op_type, reader, attrs): var_name = unique_name(op_type) startup_blk = default_startup_program().current_block() startup_var = startup_blk.create_var(name=var_name) - startop_op = startup_blk.append_op( - type=op_type, - inputs={'UnderlyingReader': reader}, - outputs={'Out': [startup_var]}, - attrs=attrs) + startop_op = startup_blk.append_op(type=op_type, + inputs={'UnderlyingReader': reader}, + outputs={'Out': [startup_var]}, + attrs=attrs) startup_var.persistable = True main_prog_block = default_main_program().current_block() main_prog_var = _copy_reader_var_(main_prog_block, startup_var) @@ -832,11 +828,10 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): new_reader_name = name if name is not None else unique_name(op_type) main_blk = default_main_program().current_block() new_reader = main_blk.create_var(name=new_reader_name) - main_blk.append_op( - type=op_type, - inputs={'UnderlyingReader': reader}, - outputs={'Out': [new_reader]}, - attrs=attrs) + main_blk.append_op(type=op_type, + inputs={'UnderlyingReader': reader}, + outputs={'Out': [new_reader]}, + attrs=attrs) return monkey_patch_reader_methods(new_reader) @@ -869,8 +864,10 @@ def double_buffer(reader, place=None, name=None): if place is not None: attrs['place'] = str(_get_paddle_place(place)).upper() - return __create_unshared_decorated_reader__( - 'create_double_buffer_reader', reader, attrs, name=name) + return __create_unshared_decorated_reader__('create_double_buffer_reader', + reader, + attrs, + name=name) def read_file(reader): @@ -901,12 +898,13 @@ def read_file(reader): """ helper = LayerHelper('read_file') out = [ - helper.create_variable_for_type_inference( - stop_gradient=True, dtype='float32') + helper.create_variable_for_type_inference(stop_gradient=True, + dtype='float32') for _ in range(len(reader.desc.shapes())) ] - helper.append_op( - type='read', inputs={'Reader': [reader]}, outputs={'Out': out}) + helper.append_op(type='read', + inputs={'Reader': [reader]}, + outputs={'Out': out}) if len(out) == 1: return out[0] else: diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index ec99f7c64f3..4fe9cbb0874 100755 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -187,8 +187,8 @@ def generate_layer_fn(op_type): for each in val: if not isinstance(each, Variable): - raise ValueError("input of {0} must be variable".format( - op_type)) + raise ValueError( + "input of {0} must be variable".format(op_type)) if dtype is None: dtype = each.dtype @@ -227,8 +227,8 @@ def generate_layer_fn(op_type): outputs = dict() out = kwargs.pop(_convert_(o_name), []) if out: - out_var = out[0] if (isinstance(out, list) or - isinstance(out, tuple)) else out + out_var = out[0] if (isinstance(out, list) + or isinstance(out, tuple)) else out else: out_var = helper.create_variable_for_type_inference(dtype=dtype) outputs[o_name] = [out_var] @@ -236,8 +236,10 @@ def generate_layer_fn(op_type): outputs[name] = [ helper.create_variable_for_type_inference(dtype=dtype) ] - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=kwargs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=kwargs) return helper.append_activation(out_var) func.__name__ = op_type @@ -309,8 +311,8 @@ def generate_inplace_fn(inplace_op_type): op = getattr(_C_ops, inplace_op_type) return op(x) warnings.warn( - "In static mode, {}() is the same as {}() and does not perform inplace operation.". - format(inplace_op_type, origin_op_type)) + "In static mode, {}() is the same as {}() and does not perform inplace operation." + .format(inplace_op_type, origin_op_type)) return generate_activation_fn(origin_op_type)(x, name) func.__name__ = inplace_op_type @@ -323,9 +325,10 @@ Please refer to :ref:`api_fluid_layers_{1}`. def autodoc(comment=""): + def __impl__(func): - func.__doc__ = _generate_doc_string_(OpProtoHolder.instance( - ).get_op_proto(func.__name__)) + comment + func.__doc__ = _generate_doc_string_( + OpProtoHolder.instance().get_op_proto(func.__name__)) + comment return func return __impl__ diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 924cc35ea9f..e1a65633e60 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -96,16 +96,17 @@ def noam_decay(d_model, warmup_steps, learning_rate=1.0): """ with default_main_program()._lr_schedule_guard(): if _non_static_mode(): - decay = imperate_lr.NoamDecay( - d_model, warmup_steps, learning_rate=learning_rate) + decay = imperate_lr.NoamDecay(d_model, + warmup_steps, + learning_rate=learning_rate) return decay else: global_step = _decay_step_counter(1) a = global_step**-0.5 b = (warmup_steps**-1.5) * global_step - lr_value = learning_rate * (d_model**-0.5) * nn.elementwise_min(a, - b) + lr_value = learning_rate * (d_model**-0.5) * nn.elementwise_min( + a, b) return lr_value @@ -341,20 +342,23 @@ def polynomial_decay(learning_rate, if cycle: div_res = ops.ceil(global_step / decay_steps) - zero_var = tensor.fill_constant( - shape=[1], dtype='float32', value=0.0) - one_var = tensor.fill_constant( - shape=[1], dtype='float32', value=1.0) + zero_var = tensor.fill_constant(shape=[1], + dtype='float32', + value=0.0) + one_var = tensor.fill_constant(shape=[1], + dtype='float32', + value=1.0) with control_flow.Switch() as switch: with switch.case(global_step == zero_var): tensor.assign(input=one_var, output=div_res) decay_steps = decay_steps * div_res else: - decay_steps_var = tensor.fill_constant( - shape=[1], dtype='float32', value=float(decay_steps)) - global_step = nn.elementwise_min( - x=global_step, y=decay_steps_var) + decay_steps_var = tensor.fill_constant(shape=[1], + dtype='float32', + value=float(decay_steps)) + global_step = nn.elementwise_min(x=global_step, + y=decay_steps_var) decayed_lr = (learning_rate - end_learning_rate) * \ ((1 - global_step / decay_steps) ** power) + end_learning_rate @@ -411,32 +415,29 @@ Applies piecewise decay to the initial learning rate. else: global_step = _decay_step_counter() - lr = tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") + lr = tensor.create_global_var(shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") with control_flow.Switch() as switch: for i in range(len(boundaries)): - boundary_val = tensor.fill_constant( - shape=[1], - dtype='float32', - value=float(boundaries[i]), - force_cpu=True) + boundary_val = tensor.fill_constant(shape=[1], + dtype='float32', + value=float( + boundaries[i]), + force_cpu=True) with switch.case(global_step < boundary_val): - tensor.fill_constant( - shape=[1], - dtype="float32", - value=float(values[i]), - out=lr) + tensor.fill_constant(shape=[1], + dtype="float32", + value=float(values[i]), + out=lr) with switch.default(): - tensor.fill_constant( - shape=[1], - dtype="float32", - value=float(values[len(values) - 1]), - out=lr) + tensor.fill_constant(shape=[1], + dtype="float32", + value=float(values[len(values) - 1]), + out=lr) return lr @@ -556,12 +557,11 @@ def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): start_lr, end_lr) return lr else: - lr = tensor.create_global_var( - shape=[1], - value=0.0, - dtype=dtype, - persistable=True, - name="learning_rate_warmup") + lr = tensor.create_global_var(shape=[1], + value=0.0, + dtype=dtype, + persistable=True, + name="learning_rate_warmup") global_step = _decay_step_counter() diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 99c0a2e70b7..1ad4e3c4298 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -114,8 +114,9 @@ def center_loss(input, check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'center_loss') centers_shape = [num_classes, input.shape[1]] - centers_param = helper.create_parameter( - attr=param_attr, shape=centers_shape, dtype=dtype) + centers_param = helper.create_parameter(attr=param_attr, + shape=centers_shape, + dtype=dtype) centers_param.stop_gradient = True if isinstance(alpha, Variable): @@ -135,21 +136,22 @@ def center_loss(input, centersdiff = helper.create_variable_for_type_inference(dtype=input.dtype) loss = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='center_loss', - inputs={ - 'X': [input], - 'Label': [label], - 'Centers': [centers_param], - 'CenterUpdateRate': [alpha_param] - }, - outputs={ - 'SampleCenterDiff': [centersdiff], - 'Loss': [loss], - 'CentersOut': [centers_param] - }, - attrs={'cluster_num': num_classes, - 'need_update': update_center}) + helper.append_op(type='center_loss', + inputs={ + 'X': [input], + 'Label': [label], + 'Centers': [centers_param], + 'CenterUpdateRate': [alpha_param] + }, + outputs={ + 'SampleCenterDiff': [centersdiff], + 'Loss': [loss], + 'CentersOut': [centers_param] + }, + attrs={ + 'cluster_num': num_classes, + 'need_update': update_center + }) return loss @@ -197,11 +199,12 @@ def bpr_loss(input, label, name=None): out = helper.create_variable_for_type_inference(dtype=input.dtype) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], 'bpr_loss') - helper.append_op( - type='bpr_loss', - inputs={'X': [input], - 'Label': [label]}, - outputs={'Y': [out]}) + helper.append_op(type='bpr_loss', + inputs={ + 'X': [input], + 'Label': [label] + }, + outputs={'Y': [out]}) return out @@ -273,8 +276,10 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): 'cross_entropy') helper = LayerHelper('cross_entropy', **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy', inputs=inputs, outputs={'Y': [out]}, attrs=attrs) + helper.append_op(type='cross_entropy', + inputs=inputs, + outputs={'Y': [out]}, + attrs=attrs) return out @@ -292,13 +297,14 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex): out = helper.create_variable_for_type_inference(dtype=input.dtype) xshape = helper.create_variable_for_type_inference(dtype=input.dtype) match_x = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy2', - inputs=inputs, - outputs={'Y': [out], - 'MatchX': [match_x], - 'XShape': [xshape]}, - attrs=attrs) + helper.append_op(type='cross_entropy2', + inputs=inputs, + outputs={ + 'Y': [out], + 'MatchX': [match_x], + 'XShape': [xshape] + }, + attrs=attrs) return out @@ -412,8 +418,9 @@ def edit_distance(input, # [4] """ - return paddle.nn.functional.loss.edit_distance( - input, label, normalized, ignored_tokens, input_length, label_length) + return paddle.nn.functional.loss.edit_distance(input, label, normalized, + ignored_tokens, input_length, + label_length) def warpctc(input, @@ -552,7 +559,8 @@ def warpctc(input, 'blank', blank, 'norm_by_times', - norm_by_times, ) + norm_by_times, + ) return loss_out helper = LayerHelper('warpctc', **locals()) check_variable_and_dtype(input, 'input', ['float32', 'float64'], "warpctc") @@ -569,15 +577,16 @@ def warpctc(input, loss_out = helper.create_variable_for_type_inference(dtype=input.dtype) grad_out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='warpctc', - inputs=this_inputs, - outputs={'WarpCTCGrad': [grad_out], - 'Loss': [loss_out]}, - attrs={ - 'blank': blank, - 'norm_by_times': norm_by_times, - }) + helper.append_op(type='warpctc', + inputs=this_inputs, + outputs={ + 'WarpCTCGrad': [grad_out], + 'Loss': [loss_out] + }, + attrs={ + 'blank': blank, + 'norm_by_times': norm_by_times, + }) return loss_out @@ -682,18 +691,16 @@ def nce(input, dim = input.shape[1] num_true_class = label.shape[1] - w = helper.create_parameter( - attr=helper.param_attr, - shape=[num_total_classes, dim], - is_bias=False, - dtype=input.dtype) + w = helper.create_parameter(attr=helper.param_attr, + shape=[num_total_classes, dim], + is_bias=False, + dtype=input.dtype) inputs = {} if helper.bias_attr: - b = helper.create_parameter( - attr=helper.bias_attr, - shape=[num_total_classes, 1], - is_bias=True, - dtype=input.dtype) + b = helper.create_parameter(attr=helper.bias_attr, + shape=[num_total_classes, 1], + is_bias=True, + dtype=input.dtype) inputs['Bias'] = b cost = helper.create_variable_for_type_inference(dtype=input.dtype) sample_logits = helper.create_variable_for_type_inference(dtype=input.dtype) @@ -791,15 +798,14 @@ def nce(input, 'remote_prefetch': remote_prefetch } - helper.append_op( - type='nce', - inputs=inputs, - outputs={ - 'Cost': cost, - 'SampleLogits': sample_logits, - 'SampleLabels': sample_labels - }, - attrs=attrs) + helper.append_op(type='nce', + inputs=inputs, + outputs={ + 'Cost': cost, + 'SampleLogits': sample_logits, + 'SampleLabels': sample_labels + }, + attrs=attrs) return cost / (num_neg_samples + 1) @@ -921,17 +927,15 @@ def hsigmoid(input, "With sparse mode, if your models has only small parameter prefetch may cause speed down" ) if not is_custom: - weights = helper.create_parameter( - attr=helper.param_attr, - shape=[num_classes - 1, dim], - is_bias=False, - dtype=input.dtype) + weights = helper.create_parameter(attr=helper.param_attr, + shape=[num_classes - 1, dim], + is_bias=False, + dtype=input.dtype) else: - weights = helper.create_parameter( - attr=helper.param_attr, - shape=[num_classes, dim], - is_bias=False, - dtype=input.dtype) + weights = helper.create_parameter(attr=helper.param_attr, + shape=[num_classes, dim], + is_bias=False, + dtype=input.dtype) inputs = { "X": input, "W": weights, @@ -941,30 +945,29 @@ def hsigmoid(input, } if helper.bias_attr: if not is_custom: - bias = helper.create_parameter( - attr=helper.bias_attr, - shape=[num_classes - 1, 1], - is_bias=True, - dtype=input.dtype) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=[num_classes - 1, 1], + is_bias=True, + dtype=input.dtype) inputs['Bias'] = bias else: - bias = helper.create_parameter( - attr=helper.bias_attr, - shape=[num_classes, 1], - is_bias=True, - dtype=input.dtype) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=[num_classes, 1], + is_bias=True, + dtype=input.dtype) inputs['Bias'] = bias - helper.append_op( - type="hierarchical_sigmoid", - inputs=inputs, - outputs={"Out": out, - "PreOut": pre_out, - "W_Out": weights}, - attrs={ - "num_classes": num_classes, - "is_sparse": is_sparse, - "remote_prefetch": remote_prefetch - }) + helper.append_op(type="hierarchical_sigmoid", + inputs=inputs, + outputs={ + "Out": out, + "PreOut": pre_out, + "W_Out": weights + }, + attrs={ + "num_classes": num_classes, + "is_sparse": is_sparse, + "remote_prefetch": remote_prefetch + }) return out @@ -1075,48 +1078,49 @@ def sampled_softmax_with_cross_entropy(logits, logits_dim = helper.create_variable_for_type_inference(dtype=logits.dtype) labels_dim = helper.create_variable_for_type_inference(dtype=label.type) - helper.append_op( - type='sample_logits', - inputs={ - 'Logits': logits, - 'Labels': label, - 'CustomizedSamples': customized_samples, - 'CustomizedProbabilities': customized_probabilities - }, - outputs={ - 'Samples': samples, - 'Probabilities': probabilities, - 'SampledLabels': sampled_label, - 'SampledLogits': sampled_logits, - 'LogitsDim': logits_dim, - 'LabelsDim': labels_dim - }, - attrs={ - 'use_customized_samples': use_customized_samples, - 'uniq': True, - 'remove_accidental_hits': remove_accidental_hits, - 'num_samples': num_samples, - 'seed': seed - }) + helper.append_op(type='sample_logits', + inputs={ + 'Logits': logits, + 'Labels': label, + 'CustomizedSamples': customized_samples, + 'CustomizedProbabilities': customized_probabilities + }, + outputs={ + 'Samples': samples, + 'Probabilities': probabilities, + 'SampledLabels': sampled_label, + 'SampledLogits': sampled_logits, + 'LogitsDim': logits_dim, + 'LabelsDim': labels_dim + }, + attrs={ + 'use_customized_samples': use_customized_samples, + 'uniq': True, + 'remove_accidental_hits': remove_accidental_hits, + 'num_samples': num_samples, + 'seed': seed + }) loss = helper.create_variable_for_type_inference(dtype=logits.dtype) softmax = helper.create_variable_for_type_inference(dtype=logits.dtype) - helper.append_op( - type='one_hot', - inputs={'X': sampled_label}, - attrs={'depth': num_samples + 1}, - outputs={'Out': sampled_softlabel}) - - helper.append_op( - type='softmax_with_cross_entropy', - inputs={'Logits': sampled_logits, - 'Label': sampled_softlabel}, - outputs={'Softmax': softmax, - 'Loss': loss}, - attrs={ - 'soft_label': True, - 'ignore_index': False, - 'numeric_stable_mode': False - }) + helper.append_op(type='one_hot', + inputs={'X': sampled_label}, + attrs={'depth': num_samples + 1}, + outputs={'Out': sampled_softlabel}) + + helper.append_op(type='softmax_with_cross_entropy', + inputs={ + 'Logits': sampled_logits, + 'Label': sampled_softlabel + }, + outputs={ + 'Softmax': softmax, + 'Loss': loss + }, + attrs={ + 'soft_label': True, + 'ignore_index': False, + 'numeric_stable_mode': False + }) return loss / num_true @@ -1280,12 +1284,13 @@ def rank_loss(label, left, right, name=None): out = helper.create_variable_for_type_inference("float32") - helper.append_op( - type='rank_loss', - inputs={"Label": label, - "Left": left, - "Right": right}, - outputs={'Out': out}) + helper.append_op(type='rank_loss', + inputs={ + "Label": label, + "Left": left, + "Right": right + }, + outputs={'Out': out}) return out @@ -1330,14 +1335,17 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None): check_variable_and_dtype(label, 'right', ['float32'], 'margin_rank_loss') out = helper.create_variable_for_type_inference(left.dtype) act = helper.create_variable_for_type_inference(left.dtype) - helper.append_op( - type='margin_rank_loss', - inputs={"Label": label, - "X1": left, - "X2": right}, - outputs={'Out': out, - 'Activated': act}, - attrs={'margin': margin}) + helper.append_op(type='margin_rank_loss', + inputs={ + "Label": label, + "X1": left, + "X2": right + }, + outputs={ + 'Out': out, + 'Activated': act + }, + attrs={'margin': margin}) return out @@ -1392,13 +1400,16 @@ def sigmoid_cross_entropy_with_logits(x, out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="sigmoid_cross_entropy_with_logits", - inputs={"X": x, - "Label": label}, - attrs={"ignore_index": ignore_index, - 'normalize': normalize}, - outputs={"Out": out}) + helper.append_op(type="sigmoid_cross_entropy_with_logits", + inputs={ + "X": x, + "Label": label + }, + attrs={ + "ignore_index": ignore_index, + 'normalize': normalize + }, + outputs={"Out": out}) return out @@ -1521,13 +1532,16 @@ def huber_loss(input, label, delta): residual = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='huber_loss', - inputs={'X': input, - 'Y': label}, - outputs={'Out': out, - 'Residual': residual}, - attrs={'delta': delta}) + helper.append_op(type='huber_loss', + inputs={ + 'X': input, + 'Y': label + }, + outputs={ + 'Out': out, + 'Residual': residual + }, + attrs={'delta': delta}) return out @@ -1581,12 +1595,13 @@ def kldiv_loss(x, target, reduction='mean', name=None): 'kldiv_loss') check_type(reduction, 'reduction', str, 'kldiv_loss') loss = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='kldiv_loss', - inputs={'X': x, - 'Target': target}, - outputs={'Loss': loss}, - attrs={'reduction': reduction}) + helper.append_op(type='kldiv_loss', + inputs={ + 'X': x, + 'Target': target + }, + outputs={'Loss': loss}, + attrs={'reduction': reduction}) return loss diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index 47b42f65e48..f6810fc0630 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -61,6 +61,7 @@ _already_patch_variable = False def monkey_patch_variable(): + def unique_tmp_name(): return unique_name.generate("tmp") @@ -81,16 +82,15 @@ def monkey_patch_variable(): def create_tensor(block, value, dtype, shape): value = float(value) var = create_new_tmp_var(block, dtype) - block.append_op( - type="fill_constant", - outputs={'Out': [var]}, - attrs={ - 'dtype': var.dtype, - 'shape': shape, - 'value': value, - 'force_cpu': False - }, - stop_gradient=True) + block.append_op(type="fill_constant", + outputs={'Out': [var]}, + attrs={ + 'dtype': var.dtype, + 'shape': shape, + 'value': value, + 'force_cpu': False + }, + stop_gradient=True) var.stop_gradient = True return var @@ -114,17 +114,16 @@ def monkey_patch_variable(): else: out_shape.append(d) assert batch_dim != -1 - block.append_op( - type='fill_constant_batch_size_like', - outputs={'Out': [var]}, - inputs={'Input': [ref_var]}, - attrs={ - 'shape': out_shape, - 'value': value, - 'input_dim_idx': batch_dim, - 'output_dim_idx': batch_dim - }, - stop_gradient=True) + block.append_op(type='fill_constant_batch_size_like', + outputs={'Out': [var]}, + inputs={'Input': [ref_var]}, + attrs={ + 'shape': out_shape, + 'value': value, + 'input_dim_idx': batch_dim, + 'output_dim_idx': batch_dim + }, + stop_gradient=True) var.stop_gradient = True return var @@ -176,12 +175,13 @@ def monkey_patch_variable(): """ block = current_block(self) out = create_new_tmp_var(block, dtype) - block.append_op( - type="cast", - inputs={"X": [self]}, - outputs={"Out": [out]}, - attrs={"in_dtype": self.dtype, - "out_dtype": out.dtype}) + block.append_op(type="cast", + inputs={"X": [self]}, + outputs={"Out": [out]}, + attrs={ + "in_dtype": self.dtype, + "out_dtype": out.dtype + }) out.stop_gradient = self.stop_gradient return out @@ -198,20 +198,21 @@ def monkey_patch_variable(): type(var))) if self.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError( - "Only Variable with VarType.LOD_TENSOR_ARRAY support `append` method, but received type: {}". - format(self.type)) + "Only Variable with VarType.LOD_TENSOR_ARRAY support `append` method, but received type: {}" + .format(self.type)) array_write(x=var, i=array_length(self), array=self) def _scalar_op_(var, scale, bias): block = current_block(var) out = create_new_tmp_var(block, var.dtype) - block.append_op( - type="scale", - inputs={"X": [var]}, - outputs={"Out": [out]}, - attrs={"scale": scale, - "bias": bias}) + block.append_op(type="scale", + inputs={"X": [var]}, + outputs={"Out": [out]}, + attrs={ + "scale": scale, + "bias": bias + }) return out def _neg_(var): @@ -258,6 +259,7 @@ def monkey_patch_variable(): op_type, reverse=False, scalar_method=None): + def __impl__(self, other_var): # 1. scalar exists cases # we need combine the tensor.dtype and scalar.dtype, cast correct object @@ -300,18 +302,18 @@ def monkey_patch_variable(): has_batch_size = True break if not has_batch_size: - other_var = create_tensor( - current_block(self), - other_var, - dtype=lhs_dtype, - shape=self.shape) + other_var = create_tensor(current_block(self), + other_var, + dtype=lhs_dtype, + shape=self.shape) else: other_var = create_tensor_with_batchsize( self, other_var, lhs_dtype) else: # add fill_op to current_block - other_var = create_scalar( - current_block(self), value=other_var, dtype=lhs_dtype) + other_var = create_scalar(current_block(self), + value=other_var, + dtype=lhs_dtype) # 3. unify right var type to left var rhs_dtype = safe_get_dtype(other_var) @@ -339,12 +341,13 @@ def monkey_patch_variable(): "%s(X, Y, axis=0) instead of %s. This transitional warning will be dropped in the future." % (file_name, line_num, EXPRESSION_MAP[method_name], op_type, op_type, EXPRESSION_MAP[method_name])) - current_block(self).append_op( - type=op_type, - inputs={'X': [self], - 'Y': [other_var]}, - outputs={'Out': out}, - attrs={'axis': axis}) + current_block(self).append_op(type=op_type, + inputs={ + 'X': [self], + 'Y': [other_var] + }, + outputs={'Out': out}, + attrs={'axis': axis}) return out comment = OpProtoHolder.instance().get_op_proto(op_type).comment @@ -369,34 +372,35 @@ def monkey_patch_variable(): ('dim', lambda x: len(x.shape)), ('ndimension', lambda x: len(x.shape)), ('ndim', _ndim_), - ('__add__', _binary_creator_('__add__', 'elementwise_add', False, - _scalar_add_)), + ('__add__', + _binary_creator_('__add__', 'elementwise_add', False, _scalar_add_)), # a+b == b+a. Do not need to reverse explicitly ('__radd__', _binary_creator_('__radd__', 'elementwise_add', False, _scalar_add_)), - ('__sub__', _binary_creator_('__sub__', 'elementwise_sub', False, - _scalar_sub_)), - ('__rsub__', _binary_creator_('__rsub__', 'elementwise_sub', True, - _scalar_rsub_)), - ('__mul__', _binary_creator_('__mul__', 'elementwise_mul', False, - _scalar_mul_)), + ('__sub__', + _binary_creator_('__sub__', 'elementwise_sub', False, _scalar_sub_)), + ('__rsub__', + _binary_creator_('__rsub__', 'elementwise_sub', True, _scalar_rsub_)), + ('__mul__', + _binary_creator_('__mul__', 'elementwise_mul', False, _scalar_mul_)), # a*b == b*a. Do not need to reverse explicitly ('__rmul__', _binary_creator_('__rmul__', 'elementwise_mul', False, _scalar_mul_)), - ('__div__', _binary_creator_('__div__', 'elementwise_div', False, - _scalar_div_)), - ('__truediv__', _binary_creator_('__truediv__', 'elementwise_div', - False, _scalar_div_)), + ('__div__', + _binary_creator_('__div__', 'elementwise_div', False, _scalar_div_)), + ('__truediv__', + _binary_creator_('__truediv__', 'elementwise_div', False, + _scalar_div_)), ('__rdiv__', _binary_creator_('__rdiv__', 'elementwise_div', True, None)), - ('__rtruediv__', _binary_creator_('__rtruediv__', 'elementwise_div', - True, None)), + ('__rtruediv__', + _binary_creator_('__rtruediv__', 'elementwise_div', True, None)), ('__pow__', _binary_creator_('__pow__', 'elementwise_pow', False, None)), ('__rpow__', _binary_creator_('__rpow__', 'elementwise_pow', True, None)), - ('__floordiv__', _binary_creator_('__floordiv__', - 'elementwise_floordiv', False, None)), + ('__floordiv__', + _binary_creator_('__floordiv__', 'elementwise_floordiv', False, None)), ('__mod__', _binary_creator_('__mod__', 'elementwise_mod', False, None)), ('__matmul__', _binary_creator_('__matmul__', "matmul_v2", False, diff --git a/python/paddle/fluid/layers/metric_op.py b/python/paddle/fluid/layers/metric_op.py index 7616e49c48f..57b8411a54f 100644 --- a/python/paddle/fluid/layers/metric_op.py +++ b/python/paddle/fluid/layers/metric_op.py @@ -102,29 +102,29 @@ def accuracy(input, label, k=1, correct=None, total=None): else: attrs = {'k': k} attrs['sorted'] = False - helper.append_op( - type="top_k_v2", - inputs=inputs, - attrs=attrs, - outputs={"Out": [topk_out], - "Indices": [topk_indices]}) + helper.append_op(type="top_k_v2", + inputs=inputs, + attrs=attrs, + outputs={ + "Out": [topk_out], + "Indices": [topk_indices] + }) acc_out = helper.create_variable_for_type_inference(dtype="float32") if correct is None: correct = helper.create_variable_for_type_inference(dtype="int32") if total is None: total = helper.create_variable_for_type_inference(dtype="int32") - helper.append_op( - type="accuracy", - inputs={ - "Out": [topk_out], - "Indices": [topk_indices], - "Label": [label] - }, - outputs={ - "Accuracy": [acc_out], - "Correct": [correct], - "Total": [total], - }) + helper.append_op(type="accuracy", + inputs={ + "Out": [topk_out], + "Indices": [topk_indices], + "Label": [label] + }, + outputs={ + "Accuracy": [acc_out], + "Correct": [correct], + "Total": [total], + }) return acc_out @@ -206,8 +206,8 @@ def auc(input, # make tp, tn, fp, fn persistable, so that can accumulate all batches. # for batch auc - # we create slide_step+1 buckets, the first slide_steps buckets store - # historical batch-level values, and the last bucket stores the sum values of + # we create slide_step+1 buckets, the first slide_steps buckets store + # historical batch-level values, and the last bucket stores the sum values of # previous slide_step buckets. # The index of bucket that the newest batch will use is determined by batch_id mod slide_steps, # and batch_id is store in the last posision of following variable @@ -222,54 +222,53 @@ def auc(input, # for global auc # Needn't maintain the batch id - stat_pos = helper.create_global_variable( - persistable=True, dtype='int64', shape=[1, num_thresholds + 1]) - stat_neg = helper.create_global_variable( - persistable=True, dtype='int64', shape=[1, num_thresholds + 1]) + stat_pos = helper.create_global_variable(persistable=True, + dtype='int64', + shape=[1, num_thresholds + 1]) + stat_neg = helper.create_global_variable(persistable=True, + dtype='int64', + shape=[1, num_thresholds + 1]) for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]: - helper.set_variable_initializer( - var, Constant( - value=0.0, force_cpu=False)) + helper.set_variable_initializer(var, Constant(value=0.0, + force_cpu=False)) # Batch AUC - helper.append_op( - type="auc", - inputs={ - "Predict": [input], - "Label": [label], - "StatPos": [batch_stat_pos], - "StatNeg": [batch_stat_neg] - }, - attrs={ - "curve": curve, - "num_thresholds": num_thresholds, - "slide_steps": slide_steps - }, - outputs={ - "AUC": [batch_auc_out], - "StatPosOut": [batch_stat_pos], - "StatNegOut": [batch_stat_neg] - }) + helper.append_op(type="auc", + inputs={ + "Predict": [input], + "Label": [label], + "StatPos": [batch_stat_pos], + "StatNeg": [batch_stat_neg] + }, + attrs={ + "curve": curve, + "num_thresholds": num_thresholds, + "slide_steps": slide_steps + }, + outputs={ + "AUC": [batch_auc_out], + "StatPosOut": [batch_stat_pos], + "StatNegOut": [batch_stat_neg] + }) # Global AUC - helper.append_op( - type="auc", - inputs={ - "Predict": [input], - "Label": [label], - "StatPos": [stat_pos], - "StatNeg": [stat_neg] - }, - attrs={ - "curve": curve, - "num_thresholds": num_thresholds, - "slide_steps": 0 - }, - outputs={ - "AUC": [auc_out], - "StatPosOut": [stat_pos], - "StatNegOut": [stat_neg] - }) + helper.append_op(type="auc", + inputs={ + "Predict": [input], + "Label": [label], + "StatPos": [stat_pos], + "StatNeg": [stat_neg] + }, + attrs={ + "curve": curve, + "num_thresholds": num_thresholds, + "slide_steps": 0 + }, + outputs={ + "AUC": [auc_out], + "StatPosOut": [stat_pos], + "StatNegOut": [stat_neg] + }) return auc_out, batch_auc_out, [ batch_stat_pos, batch_stat_neg, stat_pos, stat_neg ] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 7fb9f6057b5..2c3cb903d83 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -215,6 +215,7 @@ def _elementwise_op_in_dygraph(x, act=None, use_mkldnn=False, op_name=None): + def is_inplace(op_name): return op_name[-1] == "_" @@ -223,15 +224,17 @@ def _elementwise_op_in_dygraph(x, out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) else: if in_dygraph_mode(): - op = getattr(_C_ops, OP_NAMEMAPPING[op_name] - if not is_inplace(op_name) else op_name) + op = getattr( + _C_ops, + OP_NAMEMAPPING[op_name] if not is_inplace(op_name) else op_name) out = op(x, y) if _in_legacy_dygraph(): op = getattr(_C_ops, op_name) out = op(x, y, 'axis', axis, 'use_mkldnn', use_mkldnn) - return dygraph_utils._append_activation_in_dygraph( - out, act, use_mkldnn=use_mkldnn) + return dygraph_utils._append_activation_in_dygraph(out, + act, + use_mkldnn=use_mkldnn) def fc(input, @@ -369,27 +372,31 @@ def fc(input, reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1) ] + [size] - w = helper.create_parameter( - attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=param_attr, + shape=param_shape, + dtype=dtype, + is_bias=False) tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="mul", - inputs={"X": input_var, - "Y": w}, - outputs={"Out": tmp}, - attrs={"x_num_col_dims": num_flatten_dims, - "y_num_col_dims": 1}) + helper.append_op(type="mul", + inputs={ + "X": input_var, + "Y": w + }, + outputs={"Out": tmp}, + attrs={ + "x_num_col_dims": num_flatten_dims, + "y_num_col_dims": 1 + }) mul_results.append(tmp) if len(mul_results) == 1: pre_bias = mul_results[0] else: pre_bias = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="sum", - inputs={"X": mul_results}, - outputs={"Out": pre_bias}, - attrs={"use_mkldnn": False}) + helper.append_op(type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": False}) # add bias pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims) # add activation @@ -529,22 +536,25 @@ def embedding(input, remote_prefetch = True if is_sparse else False - w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=helper.param_attr, + shape=size, + dtype=dtype, + is_bias=False) tmp = helper.create_variable_for_type_inference(dtype) padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( size[0] + padding_idx) - helper.append_op( - type='lookup_table', - inputs={'Ids': input, - 'W': w}, - outputs={'Out': tmp}, - attrs={ - 'is_sparse': is_sparse, - 'is_distributed': is_distributed, - 'remote_prefetch': remote_prefetch, - 'padding_idx': padding_idx - }) + helper.append_op(type='lookup_table', + inputs={ + 'Ids': input, + 'W': w + }, + outputs={'Out': tmp}, + attrs={ + 'is_sparse': is_sparse, + 'is_distributed': is_distributed, + 'remote_prefetch': remote_prefetch, + 'padding_idx': padding_idx + }) return tmp @@ -606,14 +616,18 @@ def _pull_sparse(input, 'is_distributed': True } # this is only for compatible with embedding op - w, _ = helper.create_or_get_global_variable( - name=name, shape=[size], dtype=dtype, is_bias=False, persistable=True) - helper.append_op( - type='pull_sparse', - inputs={'Ids': inputs, - 'W': w}, - outputs={'Out': outs}, - attrs=attrs) + w, _ = helper.create_or_get_global_variable(name=name, + shape=[size], + dtype=dtype, + is_bias=False, + persistable=True) + helper.append_op(type='pull_sparse', + inputs={ + 'Ids': inputs, + 'W': w + }, + outputs={'Out': outs}, + attrs=attrs) if len(outs) == 1: return outs[0] return outs @@ -677,14 +691,18 @@ def _pull_sparse_v2(input, 'is_distributed': True } # this is only for compatible with embedding op - w, _ = helper.create_or_get_global_variable( - name=name, shape=[size], dtype=dtype, is_bias=False, persistable=True) - helper.append_op( - type='pull_sparse_v2', - inputs={'Ids': inputs, - 'W': w}, - outputs={'Out': outs}, - attrs=attrs) + w, _ = helper.create_or_get_global_variable(name=name, + shape=[size], + dtype=dtype, + is_bias=False, + persistable=True) + helper.append_op(type='pull_sparse_v2', + inputs={ + 'Ids': inputs, + 'W': w + }, + outputs={'Out': outs}, + attrs=attrs) if len(outs) == 1: return outs[0] return outs @@ -736,18 +754,21 @@ def _pull_gpups_sparse(input, helper.create_variable_for_type_inference(dtype) for i in range(len(inputs)) ] - w = helper.create_parameter( - attr=helper.param_attr, shape=[size[0]], dtype=dtype, is_bias=False) - helper.append_op( - type='pull_gpups_sparse', - inputs={'Ids': inputs, - 'W': w}, - outputs={'Out': outs}, - attrs={ - 'size': size, - 'is_distributed': is_distributed, - 'is_sparse': is_sparse - }) + w = helper.create_parameter(attr=helper.param_attr, + shape=[size[0]], + dtype=dtype, + is_bias=False) + helper.append_op(type='pull_gpups_sparse', + inputs={ + 'Ids': inputs, + 'W': w + }, + outputs={'Out': outs}, + attrs={ + 'size': size, + 'is_distributed': is_distributed, + 'is_sparse': is_sparse + }) if len(outs) == 1: return outs[0] return outs @@ -795,18 +816,21 @@ def _pull_box_sparse(input, helper.create_variable_for_type_inference(dtype) for i in range(len(inputs)) ] - w = helper.create_parameter( - attr=helper.param_attr, shape=[size], dtype=dtype, is_bias=False) - helper.append_op( - type='pull_box_sparse', - inputs={'Ids': inputs, - 'W': w}, - outputs={'Out': outs}, - attrs={ - 'size': size, - 'is_distributed': is_distributed, - 'is_sparse': is_sparse - }) + w = helper.create_parameter(attr=helper.param_attr, + shape=[size], + dtype=dtype, + is_bias=False) + helper.append_op(type='pull_box_sparse', + inputs={ + 'Ids': inputs, + 'W': w + }, + outputs={'Out': outs}, + attrs={ + 'size': size, + 'is_distributed': is_distributed, + 'is_sparse': is_sparse + }) if len(outs) == 1: return outs[0] return outs @@ -907,10 +931,9 @@ def linear_chain_crf(input, label, param_attr=None, length=None): check_variable_and_dtype(label, 'label', ['int64'], 'linear_chain_crf') helper = LayerHelper('linear_chain_crf', **locals()) size = input.shape[2] if length else input.shape[1] - transition = helper.create_parameter( - attr=helper.param_attr, - shape=[size + 2, size], - dtype=helper.input_dtype()) + transition = helper.create_parameter(attr=helper.param_attr, + shape=[size + 2, size], + dtype=helper.input_dtype()) alpha = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) emission_exps = helper.create_variable_for_type_inference( @@ -926,15 +949,14 @@ def linear_chain_crf(input, label, param_attr=None, length=None): } if length: this_inputs['Length'] = [length] - helper.append_op( - type='linear_chain_crf', - inputs=this_inputs, - outputs={ - "Alpha": [alpha], - "EmissionExps": [emission_exps], - "TransitionExps": transition_exps, - "LogLikelihood": log_likelihood - }) + helper.append_op(type='linear_chain_crf', + inputs=this_inputs, + outputs={ + "Alpha": [alpha], + "EmissionExps": [emission_exps], + "TransitionExps": transition_exps, + "LogLikelihood": log_likelihood + }) return log_likelihood @@ -999,10 +1021,9 @@ def crf_decoding(input, param_attr, label=None, length=None): inputs = {"Emission": [input], "Transition": transition, "Label": label} if length: inputs['Length'] = length - helper.append_op( - type='crf_decoding', - inputs=inputs, - outputs={"ViterbiPath": [viterbi_path]}) + helper.append_op(type='crf_decoding', + inputs=inputs, + outputs={"ViterbiPath": [viterbi_path]}) return viterbi_path @@ -1036,13 +1057,16 @@ def cos_sim(X, Y): out = helper.create_variable_for_type_inference(dtype=X.dtype) xnorm = helper.create_variable_for_type_inference(dtype=X.dtype) ynorm = helper.create_variable_for_type_inference(dtype=X.dtype) - helper.append_op( - type='cos_sim', - inputs={'X': [X], - 'Y': [Y]}, - outputs={'Out': [out], - 'XNorm': [xnorm], - 'YNorm': [ynorm]}) + helper.append_op(type='cos_sim', + inputs={ + 'X': [X], + 'Y': [Y] + }, + outputs={ + 'Out': [out], + 'XNorm': [xnorm], + 'YNorm': [ynorm] + }) return out @@ -1113,15 +1137,16 @@ def dropout(x, return x if _non_static_mode(): - if (seed is None or - seed == 0) and default_main_program().random_seed != 0: + if (seed is None + or seed == 0) and default_main_program().random_seed != 0: seed = default_main_program().random_seed if is_test is None: is_test = not _dygraph_tracer()._train_mode - out, mask = _C_ops.dropout( - x, 'dropout_prob', dropout_prob, 'is_test', is_test, 'fix_seed', - seed is not None, 'seed', seed if seed is not None else 0, - 'dropout_implementation', dropout_implementation) + out, mask = _C_ops.dropout(x, 'dropout_prob', dropout_prob, 'is_test', + is_test, 'fix_seed', seed is not None, + 'seed', seed if seed is not None else 0, + 'dropout_implementation', + dropout_implementation) return out def get_attrs(prog, dropout_prob, is_test, seed): @@ -1146,12 +1171,13 @@ def dropout(x, attrs = get_attrs(helper.main_program, dropout_prob, is_test, seed) - helper.append_op( - type='dropout', - inputs={'X': [x]}, - outputs={'Out': [out], - 'Mask': [mask]}, - attrs=attrs) + helper.append_op(type='dropout', + inputs={'X': [x]}, + outputs={ + 'Out': [out], + 'Mask': [mask] + }, + attrs=attrs) return out @@ -1287,22 +1313,21 @@ def chunk_eval(input, if seq_length is not None: this_input["SeqLength"] = [seq_length] - helper.append_op( - type="chunk_eval", - inputs=this_input, - outputs={ - "Precision": [precision], - "Recall": [recall], - "F1-Score": [f1_score], - "NumInferChunks": [num_infer_chunks], - "NumLabelChunks": [num_label_chunks], - "NumCorrectChunks": [num_correct_chunks] - }, - attrs={ - "num_chunk_types": num_chunk_types, - "chunk_scheme": chunk_scheme, - "excluded_chunk_types": excluded_chunk_types or [] - }) + helper.append_op(type="chunk_eval", + inputs=this_input, + outputs={ + "Precision": [precision], + "Recall": [recall], + "F1-Score": [f1_score], + "NumInferChunks": [num_infer_chunks], + "NumLabelChunks": [num_label_chunks], + "NumCorrectChunks": [num_correct_chunks] + }, + attrs={ + "num_chunk_types": num_chunk_types, + "chunk_scheme": chunk_scheme, + "excluded_chunk_types": excluded_chunk_types or [] + }) return (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) @@ -1433,11 +1458,10 @@ def softmax(input, use_cudnn=True, name=None, axis=-1): dtype = helper.input_dtype() softmax_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="softmax", - inputs={"X": input}, - outputs={"Out": softmax_out}, - attrs=attrs) + helper.append_op(type="softmax", + inputs={"X": input}, + outputs={"Out": softmax_out}, + attrs=attrs) return softmax_out @@ -1615,9 +1639,9 @@ def conv2d(input, if groups is None: num_filter_channels = num_channels elif groups <= 0: - raise ValueError("the groups of input must be greater than 0, " - "but received the groups of input is {}".format( - groups)) + raise ValueError( + "the groups of input must be greater than 0, " + "but received the groups of input is {}".format(groups)) else: if num_channels % groups != 0: raise ValueError( @@ -1627,12 +1651,12 @@ def conv2d(input, num_filter_channels = num_channels // groups l_type = 'conv2d' - if (num_channels == groups and num_filters % num_channels == 0 and - not use_cudnn): + if (num_channels == groups and num_filters % num_channels == 0 + and not use_cudnn): l_type = 'depthwise_conv2d' - if (num_channels == groups and num_filters % num_channels == 0 and - core.is_compiled_with_rocm()): + if (num_channels == groups and num_filters % num_channels == 0 + and core.is_compiled_with_rocm()): l_type = 'depthwise_conv2d' # NPU only supports depthwise_conv2d when "input_channel = output_channel = groups" @@ -1651,6 +1675,7 @@ def conv2d(input, # padding def _update_padding(padding, data_format): + def is_list_or_tuple(ele): if isinstance(ele, list) or isinstance(ele, tuple): return True @@ -1720,24 +1745,23 @@ def conv2d(input, "FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]): use_cudnn = False - helper.append_op( - type=l_type, - inputs={ - 'Input': input, - 'Filter': filter_param, - }, - outputs={"Output": pre_bias}, - attrs={ - 'strides': stride, - 'paddings': padding, - 'dilations': dilation, - 'groups': groups, - 'use_cudnn': use_cudnn, - 'use_mkldnn': False, - 'fuse_relu_before_depthwise_conv': False, - "padding_algorithm": padding_algorithm, - "data_format": data_format, - }) + helper.append_op(type=l_type, + inputs={ + 'Input': input, + 'Filter': filter_param, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'use_mkldnn': False, + 'fuse_relu_before_depthwise_conv': False, + "padding_algorithm": padding_algorithm, + "data_format": data_format, + }) if data_format == 'NCHW': pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) @@ -1913,8 +1937,8 @@ def conv3d(input, channel_last = (data_format == "NDHWC") if len(input.shape) != 5: raise ValueError( - "Input should be 5D tensor, but received input with the shape of {}". - format(input.shape)) + "Input should be 5D tensor, but received input with the shape of {}" + .format(input.shape)) num_channels = input.shape[4] if channel_last else input.shape[1] if num_channels < 0: raise ValueError( @@ -1925,8 +1949,8 @@ def conv3d(input, num_filter_channels = num_channels elif groups <= 0: raise ValueError( - "the groups of conv3d should be greater than 0. Received groups: {}". - format(groups)) + "the groups of conv3d should be greater than 0. Received groups: {}" + .format(groups)) else: if num_channels % groups != 0: raise ValueError( @@ -1940,6 +1964,7 @@ def conv3d(input, dilation = utils.convert_to_list(dilation, 3, 'dilation') def _update_padding(padding, data_format): + def is_list_or_tuple(ele): if isinstance(ele, list) or isinstance(ele, tuple): return True @@ -2011,23 +2036,22 @@ def conv3d(input, pre_bias = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=l_type, - inputs={ - 'Input': input, - 'Filter': filter_param, - }, - outputs={"Output": pre_bias}, - attrs={ - 'strides': stride, - 'paddings': padding, - 'dilations': dilation, - 'groups': groups, - 'use_cudnn': use_cudnn, - 'use_mkldnn': False, - "padding_algorithm": padding_algorithm, - "data_format": data_format, - }) + helper.append_op(type=l_type, + inputs={ + 'Input': input, + 'Filter': filter_param, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'use_mkldnn': False, + "padding_algorithm": padding_algorithm, + "data_format": data_format, + }) if data_format == 'NCDHW': pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) @@ -2179,6 +2203,7 @@ def pool2d(input, pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride') def update_padding(padding, data_format): + def is_list_or_tuple(ele): if isinstance(ele, list) or isinstance(ele, tuple): return True @@ -2233,23 +2258,22 @@ def pool2d(input, dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": input}, - outputs={"Out": pool_out}, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "global_pooling": global_pooling, - "strides": pool_stride, - "paddings": pool_padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": use_cudnn, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": exclusive, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": input}, + outputs={"Out": pool_out}, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "global_pooling": global_pooling, + "strides": pool_stride, + "paddings": pool_padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": use_cudnn, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": exclusive, + "data_format": data_format, + }) return pool_out @@ -2403,6 +2427,7 @@ def pool3d(input, pool_stride = utils.convert_to_list(pool_stride, 3, 'pool_stride') def update_padding(padding, data_format): + def is_list_or_tuple(ele): if isinstance(ele, (list, tuple)): return True @@ -2461,23 +2486,22 @@ def pool3d(input, dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": input}, - outputs={"Out": pool_out}, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "global_pooling": global_pooling, - "strides": pool_stride, - "paddings": pool_padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": use_cudnn, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": exclusive, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": input}, + outputs={"Out": pool_out}, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "global_pooling": global_pooling, + "strides": pool_stride, + "paddings": pool_padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": use_cudnn, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": exclusive, + "data_format": data_format, + }) return pool_out @@ -2615,15 +2639,14 @@ def adaptive_pool2d(input, mask = helper.create_variable_for_type_inference(dtype) outputs["Mask"] = mask - helper.append_op( - type=l_type, - inputs={"X": input}, - outputs=outputs, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "adaptive": True, - }) + helper.append_op(type=l_type, + inputs={"X": input}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) return (pool_out, mask) if require_index else pool_out @@ -2775,15 +2798,14 @@ def adaptive_pool3d(input, mask = helper.create_variable_for_type_inference(dtype) outputs["Mask"] = mask - helper.append_op( - type=l_type, - inputs={"X": input}, - outputs=outputs, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "adaptive": True, - }) + helper.append_op(type=l_type, + inputs={"X": input}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) return (pool_out, mask) if require_index else pool_out @@ -2939,32 +2961,31 @@ def batch_norm(input, param_shape = [channel_num] # create parameter - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - default_initializer=Constant(1.0)) - bias = helper.create_parameter( - attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) - - mean = helper.create_parameter( - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var), - shape=param_shape, - dtype=dtype) + scale = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=param_shape, + dtype=dtype, + is_bias=True) + + mean = helper.create_parameter(attr=ParamAttr( + name=moving_mean_name, + initializer=Constant(0.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), + shape=param_shape, + dtype=dtype) mean.stop_gradient = True - variance = helper.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var), - shape=param_shape, - dtype=dtype) + variance = helper.create_parameter(attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), + shape=param_shape, + dtype=dtype) variance.stop_gradient = True # create output @@ -3001,11 +3022,12 @@ def batch_norm(input, input, scale, bias, mean, variance, None, mean_out, variance_out, *attrs_) - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=act, use_mkldnn=False) + return dygraph_utils._append_activation_in_dygraph(batch_norm_out, + act=act, + use_mkldnn=False) - saved_mean = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) + saved_mean = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) reserve_space = None @@ -3048,8 +3070,10 @@ def batch_norm(input, if reserve_space is not None: outputs["ReserveSpace"] = reserve_space - helper.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) return helper.append_activation(batch_norm_out) @@ -3168,32 +3192,31 @@ def inplace_abn(input, param_shape = [channel_num] # create parameter - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - default_initializer=Constant(1.0)) - bias = helper.create_parameter( - attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) - - mean = helper.create_parameter( - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var), - shape=param_shape, - dtype=dtype) + scale = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=param_shape, + dtype=dtype, + is_bias=True) + + mean = helper.create_parameter(attr=ParamAttr( + name=moving_mean_name, + initializer=Constant(0.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), + shape=param_shape, + dtype=dtype) mean.stop_gradient = True - variance = helper.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var), - shape=param_shape, - dtype=dtype) + variance = helper.create_parameter(attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False, + do_model_average=do_model_average_for_mean_and_var), + shape=param_shape, + dtype=dtype) variance.stop_gradient = True # create output @@ -3235,8 +3258,8 @@ def inplace_abn(input, variance_out, *attrs__) return batch_norm_out - saved_mean = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) + saved_mean = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) reserve_space = helper.create_variable_for_type_inference( @@ -3273,8 +3296,10 @@ def inplace_abn(input, if reserve_space is not None: outputs["ReserveSpace"] = reserve_space - helper.append_op( - type="inplace_abn", inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type="inplace_abn", + inputs=inputs, + outputs=outputs, + attrs=attrs) return batch_norm_out @@ -3362,29 +3387,27 @@ def instance_norm(input, input_shape = input.shape if len(input.shape) < 2 or len(input.shape) > 5: raise ValueError( - 'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})'. - format(len(input.shape), input_shape)) + 'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})' + .format(len(input.shape), input_shape)) channel_num = input_shape[1] param_shape = [channel_num] if param_attr != False and bias_attr != False: # create parameter - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - default_initializer=Constant(1.0)) - bias = helper.create_parameter( - attr=helper.bias_attr, - shape=param_shape, - dtype=dtype, - is_bias=True, - default_initializer=Constant(0.0)) + scale = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=param_shape, + dtype=dtype, + is_bias=True, + default_initializer=Constant(0.0)) # create output - saved_mean = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) + saved_mean = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) @@ -3395,15 +3418,16 @@ def instance_norm(input, inputs["Scale"] = scale inputs["Bias"] = bias - helper.append_op( - type="instance_norm", - inputs=inputs, - outputs={ - "Y": instance_norm_out, - "SavedMean": saved_mean, - "SavedVariance": saved_variance - }, - attrs={"epsilon": epsilon, }) + helper.append_op(type="instance_norm", + inputs=inputs, + outputs={ + "Y": instance_norm_out, + "SavedMean": saved_mean, + "SavedVariance": saved_variance + }, + attrs={ + "epsilon": epsilon, + }) return instance_norm_out @@ -3520,44 +3544,39 @@ def data_norm(input, if name == None: name = "dn" if enable_scale_and_shift: - scale_w = helper.create_parameter( - attr=ParamAttr( - name=name + '.scale_w', - initializer=Constant(value=float(scale_w_default)), - trainable=True), - shape=param_shape, - dtype=input.dtype) - bias = helper.create_parameter( - attr=ParamAttr( - name=name + '.bias', - initializer=Constant(value=float(bias_default)), - trainable=True), - shape=param_shape, - dtype=input.dtype) - # create parameter - batch_size = helper.create_parameter( - attr=ParamAttr( - name=name + '.batch_size', - initializer=Constant(value=float(batch_size_default)), + scale_w = helper.create_parameter(attr=ParamAttr( + name=name + '.scale_w', + initializer=Constant(value=float(scale_w_default)), trainable=True), - shape=param_shape, - dtype=input.dtype) - - batch_sum = helper.create_parameter( - attr=ParamAttr( - name=name + '.batch_sum', - initializer=Constant(value=float(batch_sum_default)), - trainable=True), - shape=param_shape, - dtype=input.dtype) - - batch_square_sum = helper.create_parameter( - attr=ParamAttr( - name=name + '.batch_square_sum', - initializer=Constant(value=float(batch_square_sum_default)), + shape=param_shape, + dtype=input.dtype) + bias = helper.create_parameter(attr=ParamAttr( + name=name + '.bias', + initializer=Constant(value=float(bias_default)), trainable=True), - shape=param_shape, - dtype=input.dtype) + shape=param_shape, + dtype=input.dtype) + # create parameter + batch_size = helper.create_parameter(attr=ParamAttr( + name=name + '.batch_size', + initializer=Constant(value=float(batch_size_default)), + trainable=True), + shape=param_shape, + dtype=input.dtype) + + batch_sum = helper.create_parameter(attr=ParamAttr( + name=name + '.batch_sum', + initializer=Constant(value=float(batch_sum_default)), + trainable=True), + shape=param_shape, + dtype=input.dtype) + + batch_square_sum = helper.create_parameter(attr=ParamAttr( + name=name + '.batch_square_sum', + initializer=Constant(value=float(batch_square_sum_default)), + trainable=True), + shape=param_shape, + dtype=input.dtype) means = helper.create_variable(dtype=dtype, stop_gradient=True) scales = helper.create_variable(dtype=dtype, stop_gradient=True) @@ -3583,18 +3602,17 @@ def data_norm(input, if enable_scale_and_shift: inputs["scale_w"] = scale_w inputs["bias"] = bias - helper.append_op( - type="data_norm", - inputs=inputs, - outputs={ - "Y": data_norm_out, - "Means": means, - "Scales": scales, - "BatchSize": batch_size, - "BatchSum": batch_sum, - "BatchSquareSum": batch_square_sum - }, - attrs=attrs) + helper.append_op(type="data_norm", + inputs=inputs, + outputs={ + "Y": data_norm_out, + "Means": means, + "Scales": scales, + "BatchSize": batch_size, + "BatchSum": batch_sum, + "BatchSquareSum": batch_square_sum + }, + attrs=attrs) return helper.append_activation(data_norm_out) @@ -3684,41 +3702,43 @@ def layer_norm(input, param_shape = [reduce(lambda x, y: x * y, input_shape[begin_norm_axis:])] if scale: assert param_attr is not False, "param_attr should not be False when using scale." - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - default_initializer=Constant(1.0)) + scale = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + default_initializer=Constant(1.0)) inputs['Scale'] = scale else: if param_attr: warnings.warn("param_attr is only available with scale is True.") if shift: assert bias_attr is not False, "bias_attr should not be False when using shift." - bias = helper.create_parameter( - attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=param_shape, + dtype=dtype, + is_bias=True) inputs['Bias'] = bias else: if bias_attr: warnings.warn("bias_attr is only available with shift is True.") # create output - mean_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) - variance_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) + mean_out = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) + variance_out = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) layer_norm_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="layer_norm", - inputs=inputs, - outputs={ - "Y": layer_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={"epsilon": epsilon, - "begin_norm_axis": begin_norm_axis}) + helper.append_op(type="layer_norm", + inputs=inputs, + outputs={ + "Y": layer_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis + }) return helper.append_activation(layer_norm_out) @@ -3792,15 +3812,16 @@ def group_norm(input, channel_num = input_shape[1] if data_layout == 'NCHW' else input_shape[-1] param_shape = [channel_num] if param_attr: - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - default_initializer=Constant(1.0)) + scale = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + default_initializer=Constant(1.0)) inputs['Scale'] = scale if bias_attr: - bias = helper.create_parameter( - attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=param_shape, + dtype=dtype, + is_bias=True) inputs['Bias'] = bias # create output @@ -3808,19 +3829,18 @@ def group_norm(input, variance_out = helper.create_variable(dtype=dtype, stop_gradient=True) group_norm_out = helper.create_variable(dtype=dtype) - helper.append_op( - type="group_norm", - inputs=inputs, - outputs={ - "Y": group_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={ - "epsilon": epsilon, - "groups": groups, - "data_layout": data_layout - }) + helper.append_op(type="group_norm", + inputs=inputs, + outputs={ + "Y": group_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": epsilon, + "groups": groups, + "data_layout": data_layout + }) return helper.append_activation(group_norm_out) @@ -3906,33 +3926,32 @@ def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None): h = input_shape[dim] w = np.prod(input_shape) // h - u = helper.create_parameter( - attr=ParamAttr(), - shape=[h], - dtype=dtype, - default_initializer=Normal(0., 1.)) + u = helper.create_parameter(attr=ParamAttr(), + shape=[h], + dtype=dtype, + default_initializer=Normal(0., 1.)) u.stop_gradient = True inputs['U'] = u - v = helper.create_parameter( - attr=ParamAttr(), - shape=[w], - dtype=dtype, - default_initializer=Normal(0., 1.)) + v = helper.create_parameter(attr=ParamAttr(), + shape=[w], + dtype=dtype, + default_initializer=Normal(0., 1.)) inputs['V'] = v v.stop_gradient = True # create output out = helper.create_variable(dtype=dtype) - helper.append_op( - type="spectral_norm", - inputs=inputs, - outputs={"Out": out, }, - attrs={ - "dim": dim, - "power_iters": power_iters, - "eps": eps, - }) + helper.append_op(type="spectral_norm", + inputs=inputs, + outputs={ + "Out": out, + }, + attrs={ + "dim": dim, + "power_iters": power_iters, + "eps": eps, + }) return out @@ -4118,8 +4137,8 @@ def conv2d_transpose(input, input_channel = input.shape[1] if data_format == 'NCHW' else input.shape[-1] op_type = 'conv2d_transpose' - if (input_channel == groups and num_filters == input_channel and - not use_cudnn): + if (input_channel == groups and num_filters == input_channel + and not use_cudnn): op_type = 'depthwise_conv2d_transpose' helper = LayerHelper(op_type, **locals()) @@ -4133,6 +4152,7 @@ def conv2d_transpose(input, raise ValueError("use_cudnn should be True or False") def _update_padding(padding, data_format): + def is_list_or_tuple(ele): if isinstance(ele, list) or isinstance(ele, tuple): return True @@ -4206,31 +4226,33 @@ def conv2d_transpose(input, if groups is None: groups = 1 elif groups <= 0: - raise ValueError("the groups of input must be greater than 0, " - "but received the groups of input is {}".format( - groups)) + raise ValueError( + "the groups of input must be greater than 0, " + "but received the groups of input is {}".format(groups)) filter_shape = [input_channel, num_filters // groups] + filter_size - img_filter = helper.create_parameter( - dtype=input.dtype, shape=filter_shape, attr=helper.param_attr) + img_filter = helper.create_parameter(dtype=input.dtype, + shape=filter_shape, + attr=helper.param_attr) pre_bias = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type=op_type, - inputs={'Input': [input], - 'Filter': [img_filter]}, - outputs={'Output': pre_bias}, - attrs={ - 'output_size': output_size, - 'strides': stride, - 'paddings': padding, - 'padding_algorithm': padding_algorithm, - 'dilations': dilation, - 'groups': groups, - 'use_cudnn': use_cudnn, - 'data_format': data_format - }) + helper.append_op(type=op_type, + inputs={ + 'Input': [input], + 'Filter': [img_filter] + }, + outputs={'Output': pre_bias}, + attrs={ + 'output_size': output_size, + 'strides': stride, + 'paddings': padding, + 'padding_algorithm': padding_algorithm, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'data_format': data_format + }) if data_format == 'NCHW': pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) @@ -4429,10 +4451,9 @@ def conv3d_transpose(input, raise TypeError("Input of conv3d_transpose must be Variable") if len(input.shape) != 5: raise ValueError( - "Input should be 5D tensor, but received input with the shape of {}". - format(input.shape)) - input_channel = input.shape[1] if data_format == 'NCDHW' else input.shape[ - -1] + "Input should be 5D tensor, but received input with the shape of {}" + .format(input.shape)) + input_channel = input.shape[1] if data_format == 'NCDHW' else input.shape[-1] stride = utils.convert_to_list(stride, 3, 'stride') dilation = utils.convert_to_list(dilation, 3, 'dilation') @@ -4441,6 +4462,7 @@ def conv3d_transpose(input, raise ValueError("use_cudnn should be True or False") def _update_padding(padding, data_format): + def is_list_or_tuple(ele): if isinstance(ele, list) or isinstance(ele, tuple): return True @@ -4524,16 +4546,18 @@ def conv3d_transpose(input, groups = 1 if groups is None else groups if groups <= 0: raise ValueError( - "the groups of conv3d_transpose should be greater than 0. Received groups: {}". - format(groups)) + "the groups of conv3d_transpose should be greater than 0. Received groups: {}" + .format(groups)) if num_filters % groups != 0: - raise ValueError("Attr(num_filters) must be divisible by groups," - "Received: Attr(num_filters) is {}, the groups is {}". - format(num_filters, groups)) + raise ValueError( + "Attr(num_filters) must be divisible by groups," + "Received: Attr(num_filters) is {}, the groups is {}".format( + num_filters, groups)) filter_shape = [input_channel, num_filters // groups] + filter_size - img_filter = helper.create_parameter( - dtype=input.dtype, shape=filter_shape, attr=helper.param_attr) + img_filter = helper.create_parameter(dtype=input.dtype, + shape=filter_shape, + attr=helper.param_attr) if data_format == 'NCDHW': data_format = 'NCHW' @@ -4541,21 +4565,22 @@ def conv3d_transpose(input, data_format = 'NHWC' pre_bias = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type=l_type, - inputs={'Input': [input], - 'Filter': [img_filter]}, - outputs={'Output': pre_bias}, - attrs={ - 'output_size': output_size, - 'strides': stride, - 'paddings': padding, - 'padding_algorithm': padding_algorithm, - 'dilations': dilation, - 'groups': groups, - 'use_cudnn': use_cudnn, - 'data_format': data_format - }) + helper.append_op(type=l_type, + inputs={ + 'Input': [input], + 'Filter': [img_filter] + }, + outputs={'Output': pre_bias}, + attrs={ + 'output_size': output_size, + 'strides': stride, + 'paddings': padding, + 'padding_algorithm': padding_algorithm, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'data_format': data_format + }) if data_format == 'NCHW': pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) @@ -4627,9 +4652,12 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): return _C_ops.reduce_sum(input, 'dim', dim, 'keep_dim', keep_dim, 'reduce_all', reduce_all) attrs = { - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True + 'dim': + dim if dim != None and dim != [] else [0], + 'keep_dim': + keep_dim, + 'reduce_all': + True if dim == None or dim == [] or len(dim) == len(input.shape) else False } check_variable_and_dtype( @@ -4637,11 +4665,10 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): 'reduce_sum') helper = LayerHelper('reduce_sum', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='reduce_sum', - inputs={'X': input}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='reduce_sum', + inputs={'X': input}, + outputs={'Out': out}, + attrs=attrs) return out @@ -4754,16 +4781,18 @@ def reduce_max(input, dim=None, keep_dim=False, name=None): out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) if dim is not None and not isinstance(dim, list): dim = [dim] - helper.append_op( - type='reduce_max', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True if dim == None or dim == [] or - len(dim) == len(input.shape) else False - }) + helper.append_op(type='reduce_max', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': + dim if dim != None and dim != [] else [0], + 'keep_dim': + keep_dim, + 'reduce_all': + True if dim == None or dim == [] + or len(dim) == len(input.shape) else False + }) return out @@ -4820,16 +4849,18 @@ def reduce_min(input, dim=None, keep_dim=False, name=None): out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) if dim is not None and not isinstance(dim, list): dim = [dim] - helper.append_op( - type='reduce_min', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True if dim == None or dim == [] or - len(dim) == len(input.shape) else False - }) + helper.append_op(type='reduce_min', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': + dim if dim != None and dim != [] else [0], + 'keep_dim': + keep_dim, + 'reduce_all': + True if dim == None or dim == [] + or len(dim) == len(input.shape) else False + }) return out @@ -4898,19 +4929,22 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): dim == None or dim == [] or len(dim) == len(input.shape) else False) helper = LayerHelper('reduce_prod', **locals()) - check_variable_and_dtype( - input, 'input', ['float32', 'float64', 'int32', 'int64'], 'reduce_prod') + check_variable_and_dtype(input, 'input', + ['float32', 'float64', 'int32', 'int64'], + 'reduce_prod') out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='reduce_prod', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True if dim == None or dim == [] or - len(dim) == len(input.shape) else False - }) + helper.append_op(type='reduce_prod', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': + dim if dim != None and dim != [] else [0], + 'keep_dim': + keep_dim, + 'reduce_all': + True if dim == None or dim == [] + or len(dim) == len(input.shape) else False + }) return out @@ -4963,16 +4997,18 @@ def reduce_all(input, dim=None, keep_dim=False, name=None): out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) if dim is not None and not isinstance(dim, list): dim = [dim] - helper.append_op( - type='reduce_all', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True if dim == None or dim == [] or - len(dim) == len(input.shape) else False - }) + helper.append_op(type='reduce_all', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': + dim if dim != None and dim != [] else [0], + 'keep_dim': + keep_dim, + 'reduce_all': + True if dim == None or dim == [] + or len(dim) == len(input.shape) else False + }) return out @@ -5024,16 +5060,18 @@ def reduce_any(input, dim=None, keep_dim=False, name=None): out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) if dim is not None and not isinstance(dim, list): dim = [dim] - helper.append_op( - type='reduce_any', - inputs={'X': input}, - outputs={'Out': out}, - attrs={ - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True if dim == None or dim == [] or - len(dim) == len(input.shape) else False - }) + helper.append_op(type='reduce_any', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': + dim if dim != None and dim != [] else [0], + 'keep_dim': + keep_dim, + 'reduce_all': + True if dim == None or dim == [] + or len(dim) == len(input.shape) else False + }) return out @@ -5108,8 +5146,8 @@ def split(input, num_or_sections, dim=-1, name=None): if utils._contain_var(num_or_sections): for index, item in enumerate(num_or_sections): if isinstance(item, Variable): - num_or_sections[index] = num_or_sections[index].numpy()[ - 0] + num_or_sections[index] = num_or_sections[index].numpy( + )[0] attrs += ('sections', list(num_or_sections)) else: attrs += ('sections', list(num_or_sections)) @@ -5154,8 +5192,11 @@ def split(input, num_or_sections, dim=-1, name=None): idx) unk_dim_idx = idx temp_out = helper.create_variable_for_type_inference('int32') - fill_constant( - [1], 'int32', dim_size, force_cpu=True, out=temp_out) + fill_constant([1], + 'int32', + dim_size, + force_cpu=True, + out=temp_out) tensor_list.append(temp_out) return tensor_list @@ -5181,8 +5222,8 @@ def split(input, num_or_sections, dim=-1, name=None): dim], 'len(num_or_sections) must not be more than input.shape[dim].' num = len(num_or_sections) attrs['sections'] = list( - map(lambda ele: -1 if isinstance(ele, Variable) else ele, - num_or_sections)) + map(lambda ele: -1 + if isinstance(ele, Variable) else ele, num_or_sections)) if utils._contain_var(num_or_sections): inputs['SectionsTensorList'] = _get_SectionsTensorList( num_or_sections) @@ -5191,8 +5232,10 @@ def split(input, num_or_sections, dim=-1, name=None): helper.create_variable_for_type_inference(dtype=helper.input_dtype()) for i in range(num) ] - helper.append_op( - type='split', inputs=inputs, outputs={'Out': outs}, attrs=attrs) + helper.append_op(type='split', + inputs=inputs, + outputs={'Out': outs}, + attrs=attrs) return outs @@ -5240,8 +5283,8 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): if len(x.shape) == 1: axis = 0 if _non_static_mode(): - _, out = _C_ops.norm(x, 'axis', 1 - if axis is None else axis, 'epsilon', epsilon) + _, out = _C_ops.norm(x, 'axis', 1 if axis is None else axis, 'epsilon', + epsilon) return out check_variable_and_dtype(x, "X", ("float16", "float32", "float64"), "norm") @@ -5249,15 +5292,16 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): helper = LayerHelper("l2_normalize", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) norm = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="norm", - inputs={"X": x}, - outputs={"Out": out, - "Norm": norm}, - attrs={ - "axis": 1 if axis is None else axis, - "epsilon": epsilon, - }) + helper.append_op(type="norm", + inputs={"X": x}, + outputs={ + "Out": out, + "Norm": norm + }, + attrs={ + "axis": 1 if axis is None else axis, + "epsilon": epsilon, + }) return out @@ -5345,8 +5389,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): def __check_input(x, y): var_names = {'x': x, 'y': y} for name, val in var_names.items(): - check_variable_and_dtype( - val, name, ['float16', 'float32', 'float64'], 'matmul') + check_variable_and_dtype(val, name, + ['float16', 'float32', 'float64'], + 'matmul') x_shape = list(x.shape) y_shape = list(y.shape) if len(x_shape) == 1: @@ -5388,12 +5433,13 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): helper = LayerHelper('matmul', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='matmul', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='matmul', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs=attrs) return out @@ -5486,12 +5532,13 @@ def topk(input, k, name=None): values = helper.create_variable_for_type_inference(dtype=input.dtype) indices = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="top_k", - inputs=inputs, - outputs={"Out": [values], - "Indices": [indices]}, - attrs=attrs) + helper.append_op(type="top_k", + inputs=inputs, + outputs={ + "Out": [values], + "Indices": [indices] + }, + attrs=attrs) values.stop_gradient = True indices.stop_gradient = True return values, indices @@ -5637,28 +5684,32 @@ def ctc_greedy_decoder(input, ctc_out = helper.create_variable_for_type_inference(dtype="int64") if input_length is None: - helper.append_op( - type="ctc_align", - inputs={"Input": [topk_indices]}, - outputs={"Output": [ctc_out]}, - attrs={"merge_repeated": True, - "blank": blank}) + helper.append_op(type="ctc_align", + inputs={"Input": [topk_indices]}, + outputs={"Output": [ctc_out]}, + attrs={ + "merge_repeated": True, + "blank": blank + }) return ctc_out else: ctc_out_len = helper.create_variable_for_type_inference(dtype="int64") ctc_input = squeeze(topk_indices, [2]) - helper.append_op( - type="ctc_align", - inputs={"Input": [ctc_input], - "InputLength": [input_length]}, - outputs={"Output": [ctc_out], - "OutputLength": [ctc_out_len]}, - attrs={ - "merge_repeated": True, - "blank": blank, - "padding_value": padding_value - }) + helper.append_op(type="ctc_align", + inputs={ + "Input": [ctc_input], + "InputLength": [input_length] + }, + outputs={ + "Output": [ctc_out], + "OutputLength": [ctc_out_len] + }, + attrs={ + "merge_repeated": True, + "blank": blank, + "padding_value": padding_value + }) return ctc_out, ctc_out_len @@ -5742,12 +5793,13 @@ def transpose(x, perm, name=None): helper = LayerHelper('transpose', **locals()) out = helper.create_variable_for_type_inference(x.dtype) x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='transpose2', - inputs={'X': [x]}, - outputs={'Out': [out], - 'XShape': [x_shape]}, - attrs={'axis': perm}) + helper.append_op(type='transpose2', + inputs={'X': [x]}, + outputs={ + 'Out': [out], + 'XShape': [x_shape] + }, + attrs={'axis': perm}) return out @@ -5894,8 +5946,10 @@ def im2sequence(input, attrs["out_stride"] = out_stride helper = LayerHelper('im2sequence', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='im2sequence', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='im2sequence', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -5935,14 +5989,16 @@ def row_conv(input, future_context_size, param_attr=None, act=None): check_variable_and_dtype(input, 'input', ['float32'], 'row_conv') dtype = helper.input_dtype() filter_shape = [future_context_size + 1, input.shape[-1]] - filter_param = helper.create_parameter( - attr=helper.param_attr, shape=filter_shape, dtype=dtype) + filter_param = helper.create_parameter(attr=helper.param_attr, + shape=filter_shape, + dtype=dtype) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='row_conv', - inputs={'X': [input], - 'Filter': [filter_param]}, - outputs={'Out': [out]}) + helper.append_op(type='row_conv', + inputs={ + 'X': [input], + 'Filter': [filter_param] + }, + outputs={'Out': [out]}) return helper.append_activation(out) @@ -6016,11 +6072,12 @@ def multiplex(inputs, index, name=None): check_variable_and_dtype(index, "index", ['int32', 'int64'], 'multiplex') out = helper.create_variable_for_type_inference(inputs[0].dtype) - helper.append_op( - type='multiplex', - inputs={'X': inputs, - 'Ids': index}, - outputs={'Out': [out]}) + helper.append_op(type='multiplex', + inputs={ + 'X': inputs, + 'Ids': index + }, + outputs={'Out': [out]}) return out @@ -6087,17 +6144,18 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): diff = helper.create_variable_for_type_inference(dtype=x.dtype) loss = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='smooth_l1_loss', - inputs={ - 'X': x, - 'Y': y, - 'InsideWeight': inside_weight, - 'OutsideWeight': outside_weight - }, - outputs={'Diff': diff, - 'Out': loss}, - attrs={'sigma': sigma if sigma is not None else 1.0}) + helper.append_op(type='smooth_l1_loss', + inputs={ + 'X': x, + 'Y': y, + 'InsideWeight': inside_weight, + 'OutsideWeight': outside_weight + }, + outputs={ + 'Diff': diff, + 'Out': loss + }, + attrs={'sigma': sigma if sigma is not None else 1.0}) return loss @@ -6209,11 +6267,10 @@ def one_hot(input, depth, allow_out_of_range=False): depth.stop_gradient = True inputs = {'X': input, 'depth_tensor': depth} attrs = {'allow_out_of_range': allow_out_of_range} - helper.append_op( - type="one_hot", - inputs=inputs, - attrs=attrs, - outputs={'Out': one_hot_out}) + helper.append_op(type="one_hot", + inputs=inputs, + attrs=attrs, + outputs={'Out': one_hot_out}) one_hot_out.stop_gradient = True return one_hot_out @@ -6253,9 +6310,9 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): persistable=True, belong_to_optimizer=True) if is_new_var: - helper.set_variable_initializer( - counter, initializer=Constant( - value=begin - 1, force_cpu=True)) + helper.set_variable_initializer(counter, + initializer=Constant(value=begin - 1, + force_cpu=True)) helper.main_program.global_block()._prepend_op( type='increment', inputs={'X': [counter]}, @@ -6469,12 +6526,13 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): out = x if inplace else helper.create_variable_for_type_inference( dtype=x.dtype) x_shape = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="reshape2", - inputs=inputs, - attrs=attrs, - outputs={"Out": out, - "XShape": x_shape}) + helper.append_op(type="reshape2", + inputs=inputs, + attrs=attrs, + outputs={ + "Out": out, + "XShape": x_shape + }) return helper.append_activation(out) @@ -6546,12 +6604,13 @@ def squeeze(input, axes, name=None): check_type(axes, 'axis/axes', (list, tuple), 'squeeze') out = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type="squeeze2", - inputs={"X": input}, - attrs={"axes": axes}, - outputs={"Out": out, - "XShape": x_shape}) + helper.append_op(type="squeeze2", + inputs={"X": input}, + attrs={"axes": axes}, + outputs={ + "Out": out, + "XShape": x_shape + }) return out @@ -6630,12 +6689,13 @@ def unsqueeze(input, axes, name=None): out = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type="unsqueeze2", - inputs=inputs, - attrs=attrs, - outputs={"Out": out, - "XShape": x_shape}) + helper.append_op(type="unsqueeze2", + inputs=inputs, + attrs=attrs, + outputs={ + "Out": out, + "XShape": x_shape + }) return out @@ -6728,15 +6788,17 @@ def lod_reset(x, y=None, target_lod=None): if y is not None: check_type(y, 'y', (Variable), 'lod_reset') #TODO: check y.lod_level = 0 dtype - helper.append_op( - type="lod_reset", inputs={'X': x, - 'Y': y}, outputs={'Out': out}) + helper.append_op(type="lod_reset", + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}) elif target_lod is not None: - helper.append_op( - type="lod_reset", - inputs={'X': x}, - attrs={'target_lod': target_lod}, - outputs={'Out': out}) + helper.append_op(type="lod_reset", + inputs={'X': x}, + attrs={'target_lod': target_lod}, + outputs={'Out': out}) else: raise ValueError("y and target_lod should not be both none.") return out @@ -6804,12 +6866,19 @@ def lod_append(x, level): #TODO: check y.lod_level = 0 dtype else: attrs['target_lod'] = level - helper.append_op( - type="lod_reset", inputs=inputs, attrs=attrs, outputs={'Out': out}) + helper.append_op(type="lod_reset", + inputs=inputs, + attrs=attrs, + outputs={'Out': out}) return out -def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None, +def lrn(input, + n=5, + k=1.0, + alpha=1e-4, + beta=0.75, + name=None, data_format='NCHW'): r""" :alias_main: paddle.nn.functional.lrn @@ -6879,23 +6948,22 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None, "Attr(data_format) of Op(lrn) got wrong value: received " + data_format + " but only NCHW or NHWC supported.") - mid_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) + mid_out = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) lrn_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="lrn", - inputs={"X": input}, - outputs={ - "Out": lrn_out, - "MidOut": mid_out, - }, - attrs={ - "n": n, - "k": k, - "alpha": alpha, - "beta": beta, - "data_format": data_format - }) + helper.append_op(type="lrn", + inputs={"X": input}, + outputs={ + "Out": lrn_out, + "MidOut": mid_out, + }, + attrs={ + "n": n, + "k": k, + "alpha": alpha, + "beta": beta, + "data_format": data_format + }) return lrn_out @@ -6963,12 +7031,13 @@ def pad(x, paddings, pad_value=0., name=None): helper = LayerHelper('pad', **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='pad', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'paddings': paddings, - 'pad_value': float(pad_value)}) + helper.append_op(type='pad', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'paddings': paddings, + 'pad_value': float(pad_value) + }) return out @@ -7058,12 +7127,13 @@ def pad_constant_like(x, y, pad_value=0., name=None): helper = LayerHelper('pad_constant_like', **locals()) dtype = helper.input_dtype(input_param_name='y') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='pad_constant_like', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs={'pad_value': float(pad_value)}) + helper.append_op(type='pad_constant_like', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs={'pad_value': float(pad_value)}) return out @@ -7145,12 +7215,13 @@ def label_smooth(label, helper = LayerHelper("label_smooth", **locals()) label.stop_gradient = True smooth_label = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="label_smooth", - inputs={"X": label, - "PriorDist": prior_dist} if prior_dist else {"X": label}, - outputs={"Out": smooth_label}, - attrs={"epsilon": float(epsilon)}) + helper.append_op(type="label_smooth", + inputs={ + "X": label, + "PriorDist": prior_dist + } if prior_dist else {"X": label}, + outputs={"Out": smooth_label}, + attrs={"epsilon": float(epsilon)}) return smooth_label @@ -7228,9 +7299,10 @@ def roi_pool(input, """ if _non_static_mode(): assert rois_num is not None, "rois_num should not be None in dygraph mode." - pool_out, argmaxes = _C_ops.roi_pool( - input, rois, rois_num, "pooled_height", pooled_height, - "pooled_width", pooled_width, "spatial_scale", spatial_scale) + pool_out, argmaxes = _C_ops.roi_pool(input, rois, rois_num, + "pooled_height", pooled_height, + "pooled_width", pooled_width, + "spatial_scale", spatial_scale) return pool_out, argmaxes check_variable_and_dtype(input, 'input', ['float32'], 'roi_pool') @@ -7246,16 +7318,17 @@ def roi_pool(input, } if rois_num is not None: inputs['RoisNum'] = rois_num - helper.append_op( - type="roi_pool", - inputs=inputs, - outputs={"Out": pool_out, - "Argmax": argmaxes}, - attrs={ - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "spatial_scale": spatial_scale - }) + helper.append_op(type="roi_pool", + inputs=inputs, + outputs={ + "Out": pool_out, + "Argmax": argmaxes + }, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale + }) return pool_out @@ -7316,15 +7389,17 @@ def roi_align(input, """ if in_dygraph_mode(): assert rois_num is not None, "rois_num should not be None in dygraph mode." - return _C_ops.final_state_roi_align( - input, rois, rois_num, pooled_height, pooled_width, spatial_scale, - sampling_ratio, False) + return _C_ops.final_state_roi_align(input, rois, rois_num, + pooled_height, pooled_width, + spatial_scale, sampling_ratio, + False) if _in_legacy_dygraph(): assert rois_num is not None, "rois_num should not be None in dygraph mode." - align_out = _C_ops.roi_align( - input, rois, rois_num, "pooled_height", pooled_height, - "pooled_width", pooled_width, "spatial_scale", spatial_scale, - "sampling_ratio", sampling_ratio) + align_out = _C_ops.roi_align(input, rois, rois_num, "pooled_height", + pooled_height, "pooled_width", + pooled_width, "spatial_scale", + spatial_scale, "sampling_ratio", + sampling_ratio) return align_out check_variable_and_dtype(input, 'input', ['float32', 'float64'], @@ -7339,16 +7414,15 @@ def roi_align(input, } if rois_num is not None: inputs['RoisNum'] = rois_num - helper.append_op( - type="roi_align", - inputs=inputs, - outputs={"Out": align_out}, - attrs={ - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "spatial_scale": spatial_scale, - "sampling_ratio": sampling_ratio - }) + helper.append_op(type="roi_align", + inputs=inputs, + outputs={"Out": align_out}, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale, + "sampling_ratio": sampling_ratio + }) return align_out @@ -7394,8 +7468,10 @@ def dice_loss(input, label, epsilon=0.00001, name=None): predictions = F.softmax(x) loss = F.dice_loss(input=predictions, label=label) """ - return paddle.nn.functional.dice_loss( - input, label, epsilon=epsilon, name=name) + return paddle.nn.functional.dice_loss(input, + label, + epsilon=epsilon, + name=name) def image_resize(input, @@ -7808,8 +7884,11 @@ def image_resize(input, assert (isinstance(dim, int)) temp_out = helper.create_variable_for_type_inference( 'int32') - fill_constant( - [1], 'int32', dim, force_cpu=True, out=temp_out) + fill_constant([1], + 'int32', + dim, + force_cpu=True, + out=temp_out) new_size_tensor.append(temp_out) size_list.append(dim) inputs['SizeTensor'] = new_size_tensor @@ -7892,11 +7971,10 @@ def image_resize(input, return out out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='{}_interp'.format(resample_type), - inputs=inputs, - outputs={"Out": out}, - attrs=attrs) + helper.append_op(type='{}_interp'.format(resample_type), + inputs=inputs, + outputs={"Out": out}, + attrs=attrs) return out @@ -8514,16 +8592,15 @@ def resize_nearest(input, """ - return image_resize( - input, - out_shape, - scale, - name, - 'NEAREST', - actual_shape, - align_corners, - align_mode=1, - data_format=data_format) + return image_resize(input, + out_shape, + scale, + name, + 'NEAREST', + actual_shape, + align_corners, + align_mode=1, + data_format=data_format) def image_resize_short(input, out_short_len, resample='BILINEAR'): @@ -8558,8 +8635,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): out_shape = list(hw) out_shape[short_idx] = out_short_len out_shape[long_idx] = int( - float(out_shape[long_idx]) * (float(out_short_len) / float(hw[ - short_idx])) + 0.5) + float(out_shape[long_idx]) * + (float(out_short_len) / float(hw[short_idx])) + 0.5) return image_resize(input=input, out_shape=out_shape, resample=resample) @@ -8626,12 +8703,13 @@ def gather(input, index, overwrite=True): helper = LayerHelper('gather', **locals()) dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="gather", - inputs={"X": input, - "Index": index}, - outputs={"Out": out}, - attrs={'overwrite': overwrite}) + helper.append_op(type="gather", + inputs={ + "X": input, + "Index": index + }, + outputs={"Out": out}, + attrs={'overwrite': overwrite}) return out @@ -8722,11 +8800,12 @@ def gather_nd(input, index, name=None): helper = LayerHelper('gather_nd', **locals()) dtype = helper.input_dtype() output = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="gather_nd", - inputs={"X": input, - "Index": index}, - outputs={"Out": output}) + helper.append_op(type="gather_nd", + inputs={ + "X": input, + "Index": index + }, + outputs={"Out": output}) return output @@ -8811,13 +8890,14 @@ def scatter(input, index, updates, name=None, overwrite=True): helper = LayerHelper('scatter', **locals()) dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="scatter", - inputs={"X": input, - "Ids": index, - "Updates": updates}, - attrs={'overwrite': overwrite}, - outputs={"Out": out}) + helper.append_op(type="scatter", + inputs={ + "X": input, + "Ids": index, + "Updates": updates + }, + attrs={'overwrite': overwrite}, + outputs={"Out": out}) return out @@ -8902,12 +8982,13 @@ def scatter_nd_add(ref, index, updates, name=None): helper = LayerHelper('scatter_nd_add', **locals()) dtype = helper.input_dtype(input_param_name='ref') output = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="scatter_nd_add", - inputs={"X": ref, - "Index": index, - "Updates": updates}, - outputs={"Out": output}) + helper.append_op(type="scatter_nd_add", + inputs={ + "X": ref, + "Index": index, + "Updates": updates + }, + outputs={"Out": output}) return output @@ -9001,13 +9082,16 @@ def random_crop(x, shape, seed=None): persistable=True) elif not isinstance(seed, Variable): raise ValueError("'seed' must be a Variable or an int.") - helper.append_op( - type="random_crop", - inputs={"X": x, - "Seed": seed}, - outputs={"Out": out, - "SeedOut": seed}, - attrs=op_attrs) + helper.append_op(type="random_crop", + inputs={ + "X": x, + "Seed": seed + }, + outputs={ + "Out": out, + "SeedOut": seed + }, + attrs=op_attrs) return out @@ -9092,8 +9176,9 @@ def relu(x, name=None): helper = LayerHelper('relu', **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="relu", inputs={"X": helper.input('x')}, outputs={"Out": out}) + helper.append_op(type="relu", + inputs={"X": helper.input('x')}, + outputs={"Out": out}) return out @@ -9163,8 +9248,10 @@ def selu(x, scale=None, alpha=None, name=None): if alpha is not None: attrs["alpha"] = alpha - helper.append_op( - type="selu", inputs={"X": x}, outputs={"Out": out}, attrs=attrs) + helper.append_op(type="selu", + inputs={"X": x}, + outputs={"Out": out}, + attrs=attrs) return out @@ -9221,16 +9308,17 @@ def mean_iou(input, label, num_classes): out_mean_iou = helper.create_variable_for_type_inference(dtype='float32') out_wrong = helper.create_variable_for_type_inference(dtype='int32') out_correct = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type="mean_iou", - inputs={"Predictions": input, - "Labels": label}, - outputs={ - "OutMeanIou": out_mean_iou, - "OutWrong": out_wrong, - "OutCorrect": out_correct - }, - attrs={"num_classes": num_classes}) + helper.append_op(type="mean_iou", + inputs={ + "Predictions": input, + "Labels": label + }, + outputs={ + "OutMeanIou": out_mean_iou, + "OutWrong": out_wrong, + "OutCorrect": out_correct + }, + attrs={"num_classes": num_classes}) return out_mean_iou, out_wrong, out_correct @@ -9333,11 +9421,10 @@ def crop(x, shape=None, offsets=None, name=None): else: attrs['offsets'] = offsets - helper.append_op( - type='crop', - inputs=ipts, - outputs={'Out': out}, - attrs=None if len(attrs) == 0 else attrs) + helper.append_op(type='crop', + inputs=ipts, + outputs={'Out': out}, + attrs=None if len(attrs) == 0 else attrs) return out @@ -9506,8 +9593,11 @@ def crop_tensor(x, shape=None, offsets=None, name=None): else: _attr_shape_check(dim_size) temp_out = helper.create_variable_for_type_inference('int32') - fill_constant( - [1], 'int32', dim_size, force_cpu=True, out=temp_out) + fill_constant([1], + 'int32', + dim_size, + force_cpu=True, + out=temp_out) new_shape_tensor.append(temp_out) shape_attr.append(dim_size) ipts['ShapeTensor'] = new_shape_tensor @@ -9517,11 +9607,10 @@ def crop_tensor(x, shape=None, offsets=None, name=None): _attr_shape_check(dim_size) attrs['shape'] = shape - helper.append_op( - type='crop_tensor', - inputs=ipts, - outputs={'Out': out}, - attrs=None if len(attrs) == 0 else attrs) + helper.append_op(type='crop_tensor', + inputs=ipts, + outputs={'Out': out}, + attrs=None if len(attrs) == 0 else attrs) return out @@ -9595,11 +9684,10 @@ def affine_grid(theta, out_shape, name=None): # ROCM platform do not have MIOPEN kernel for affine_grid attrs['use_cudnn'] = False - helper.append_op( - type='affine_grid', - inputs=ipts, - outputs={'Output': out}, - attrs=None if len(attrs) == 0 else attrs) + helper.append_op(type='affine_grid', + inputs=ipts, + outputs={'Output': out}, + attrs=None if len(attrs) == 0 else attrs) return out @@ -9720,8 +9808,10 @@ def pad2d(input, dtype = helper.input_dtype(input_param_name='input') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='pad2d', inputs=inputs, outputs={"Out": out}, attrs=attrs) + helper.append_op(type='pad2d', + inputs=inputs, + outputs={"Out": out}, + attrs=attrs) return out @@ -9760,11 +9850,10 @@ def elu(x, alpha=1.0, name=None): helper = LayerHelper('elu', **locals()) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='elu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'alpha': alpha}) + helper.append_op(type='elu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) return out @@ -9802,14 +9891,13 @@ def relu6(x, threshold=6.0, name=None): helper = LayerHelper('relu6', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='relu6', - inputs={'X': x}, - outputs={'Out': out}, - attrs={ - 'threshold': threshold, - 'use_mkldnn': _global_flags()["FLAGS_use_mkldnn"] - }) + helper.append_op(type='relu6', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'threshold': threshold, + 'use_mkldnn': _global_flags()["FLAGS_use_mkldnn"] + }) return out @@ -9859,8 +9947,10 @@ def pow(x, factor=1.0, name=None): attrs['factor'] = factor out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='pow', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='pow', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -9900,12 +9990,13 @@ def stanh(x, scale_a=0.67, scale_b=1.7159, name=None): helper = LayerHelper('stanh', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='stanh', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'scale_a': scale_a, - 'scale_b': scale_b}) + helper.append_op(type='stanh', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'scale_a': scale_a, + 'scale_b': scale_b + }) return out @@ -9943,12 +10034,13 @@ def hard_sigmoid(x, slope=0.2, offset=0.5, name=None): helper = LayerHelper('hard_sigmoid', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='hard_sigmoid', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'slope': slope, - 'offset': offset}) + helper.append_op(type='hard_sigmoid', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'slope': slope, + 'offset': offset + }) return out @@ -10029,11 +10121,10 @@ def swish(x, beta=1.0, name=None): helper = LayerHelper('swish', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='swish', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'slope': beta}) + helper.append_op(type='swish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'slope': beta}) return out @@ -10121,20 +10212,22 @@ def prelu(x, mode, param_attr=None, data_format="NCHW", name=None): ) >= 1, "The size of input shape should be equal or larger than 1 in prelu() when mode is 'element'" alpha_shape = [1] + list(x.shape)[1:] dtype = helper.input_dtype(input_param_name='x') - alpha = helper.create_parameter( - attr=helper.param_attr, - shape=alpha_shape, - dtype=dtype, - is_bias=False, - default_initializer=Constant(0.25)) + alpha = helper.create_parameter(attr=helper.param_attr, + shape=alpha_shape, + dtype=dtype, + is_bias=False, + default_initializer=Constant(0.25)) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="prelu", - inputs={"X": x, - 'Alpha': alpha}, - attrs={"mode": mode, - "data_format": data_format}, - outputs={"Out": out}) + helper.append_op(type="prelu", + inputs={ + "X": x, + 'Alpha': alpha + }, + attrs={ + "mode": mode, + "data_format": data_format + }, + outputs={"Out": out}) return out @@ -10175,12 +10268,13 @@ def brelu(x, t_min=0.0, t_max=24.0, name=None): helper = LayerHelper('brelu', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='brelu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'t_min': t_min, - 't_max': t_max}) + helper.append_op(type='brelu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 't_min': t_min, + 't_max': t_max + }) return out @@ -10252,11 +10346,10 @@ def soft_relu(x, threshold=40.0, name=None): helper = LayerHelper('soft_relu', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='soft_relu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'threshold': threshold}) + helper.append_op(type='soft_relu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) return out @@ -10340,12 +10433,13 @@ def flatten(x, axis=1, name=None): out = helper.create_variable_for_type_inference(x.dtype) x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='flatten2', - inputs={"X": x}, - outputs={'Out': out, - 'XShape': x_shape}, - attrs={"axis": axis}) + helper.append_op(type='flatten2', + inputs={"X": x}, + outputs={ + 'Out': out, + 'XShape': x_shape + }, + attrs={"axis": axis}) return out @@ -10443,10 +10537,10 @@ def stack(x, axis=0, name=None): ) == core.VarDesc.VarType.LOD_TENSOR_ARRAY: x = [x] else: - raise TypeError("The type of '%s' in %s must be %s, but received %s" - % ('x', 'stack', - 'list[Tensor], tuple[Tensor] or TensorArray', - type(x))) + raise TypeError( + "The type of '%s' in %s must be %s, but received %s" % + ('x', 'stack', 'list[Tensor], tuple[Tensor] or TensorArray', + type(x))) helper = LayerHelper('stack', **locals()) @@ -10460,19 +10554,21 @@ def stack(x, axis=0, name=None): check_variable_and_dtype(i, 'x', \ ['float16', 'float32', 'float64', 'int32', 'int64'], 'stack') - helper.append_op( - type='tensor_array_to_tensor', - inputs={'X': x[0]}, - outputs={'Out': [out], - 'OutIndex': [out_index]}, - attrs={'axis': axis, - 'use_stack': True}) + helper.append_op(type='tensor_array_to_tensor', + inputs={'X': x[0]}, + outputs={ + 'Out': [out], + 'OutIndex': [out_index] + }, + attrs={ + 'axis': axis, + 'use_stack': True + }) else: - helper.append_op( - type='stack', - inputs={'X': x}, - outputs={'Y': out}, - attrs={'axis': axis}) + helper.append_op(type='stack', + inputs={'X': x}, + outputs={'Y': out}, + attrs={'axis': axis}) return out @@ -10536,16 +10632,21 @@ def filter_by_instag(ins, ins_tag, filter_tag, is_lod, out_val_if_empty=0): out = helper.create_variable_for_type_inference(dtype=ins.dtype) loss_weight = helper.create_variable_for_type_inference(dtype=np.float64) mmap = helper.create_variable_for_type_inference(dtype=ins_tag.dtype) - helper.append_op( - type='filter_by_instag', - inputs={'Ins': ins, - 'Ins_tag': ins_tag, - 'Filter_tag': filter_tag}, - outputs={'Out': out, - 'LossWeight': loss_weight, - 'IndexMap': mmap}, - attrs={'is_lod': is_lod, - 'out_val_if_empty': out_val_if_empty}) + helper.append_op(type='filter_by_instag', + inputs={ + 'Ins': ins, + 'Ins_tag': ins_tag, + 'Filter_tag': filter_tag + }, + outputs={ + 'Out': out, + 'LossWeight': loss_weight, + 'IndexMap': mmap + }, + attrs={ + 'is_lod': is_lod, + 'out_val_if_empty': out_val_if_empty + }) return [out, loss_weight] @@ -10602,12 +10703,13 @@ def unstack(x, axis=0, num=None): for _ in range(num): outs.append(helper.create_variable_for_type_inference(x.dtype)) - helper.append_op( - type='unstack', - inputs={'X': [x]}, - outputs={'Y': outs}, - attrs={'axis': axis, - 'num': num}) + helper.append_op(type='unstack', + inputs={'X': [x]}, + outputs={'Y': outs}, + attrs={ + 'axis': axis, + 'num': num + }) return outs @@ -10721,8 +10823,10 @@ def expand(x, expand_times, name=None): dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='expand', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='expand', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -10797,8 +10901,9 @@ def expand_as(x, target_tensor, name=None): if _non_static_mode(): return _C_ops.expand_as(x, target_tensor) - check_variable_and_dtype( - x, 'x', ['float32', 'float64', 'int32', 'int64', 'bool'], 'expand_as') + check_variable_and_dtype(x, 'x', + ['float32', 'float64', 'int32', 'int64', 'bool'], + 'expand_as') check_variable_and_dtype(target_tensor, 'target_tensor', ['float32', 'float64', 'int32', 'int64', 'bool'], 'expand_as') @@ -10894,19 +10999,18 @@ def uniform_random_batch_size_like(input, helper = LayerHelper('uniform_random_batch_size_like', **locals()) out = helper.create_variable_for_type_inference(dtype) c_dtype = convert_np_dtype_to_dtype_(dtype) - helper.append_op( - type='uniform_random_batch_size_like', - inputs={'Input': input}, - outputs={'Out': out}, - attrs={ - 'shape': shape, - 'input_dim_idx': input_dim_idx, - 'output_dim_idx': output_dim_idx, - 'min': min, - 'max': max, - 'seed': seed, - 'dtype': c_dtype - }) + helper.append_op(type='uniform_random_batch_size_like', + inputs={'Input': input}, + outputs={'Out': out}, + attrs={ + 'shape': shape, + 'input_dim_idx': input_dim_idx, + 'output_dim_idx': output_dim_idx, + 'min': min, + 'max': max, + 'seed': seed, + 'dtype': c_dtype + }) return out @@ -11017,15 +11121,14 @@ def gaussian_random(shape, if in_dygraph_mode(): shape = utils.convert_shape_to_list(shape) place = _current_expected_place() - return _C_ops.final_state_gaussian_random(shape, - float(mean), + return _C_ops.final_state_gaussian_random(shape, float(mean), float(std), seed, dtype, place) if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) - return _C_ops.gaussian_random('shape', shape, 'mean', - float(mean), 'std', + return _C_ops.gaussian_random('shape', + shape, 'mean', float(mean), 'std', float(std), 'seed', seed, 'dtype', dtype) check_type(shape, 'shape', (list, tuple, Variable), 'gaussian_random/randn') @@ -11039,19 +11142,17 @@ def gaussian_random(shape, 'dtype': dtype, 'use_mkldnn': False } - utils.get_shape_tensor_inputs( - inputs=inputs, - attrs=attrs, - shape=shape, - op_type='gaussian_random/randn') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='gaussian_random/randn') helper = LayerHelper('gaussian_random', **locals()) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='gaussian_random', - inputs=inputs, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='gaussian_random', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -11085,13 +11186,14 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'): helper = LayerHelper('sampling_id', **locals()) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='sampling_id', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'min': min, - 'max': max, - 'seed': seed}) + helper.append_op(type='sampling_id', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'min': min, + 'max': max, + 'seed': seed + }) return out @@ -11144,19 +11246,18 @@ def gaussian_random_batch_size_like(input, 'fluid.layers.gaussian_random_batch_size_like') out = helper.create_variable_for_type_inference(dtype) c_dtype = convert_np_dtype_to_dtype_(dtype) - helper.append_op( - type='gaussian_random_batch_size_like', - inputs={'Input': input}, - outputs={'Out': out}, - attrs={ - 'shape': shape, - 'input_dim_idx': input_dim_idx, - 'output_dim_idx': output_dim_idx, - 'mean': mean, - 'std': std, - 'seed': seed, - 'dtype': c_dtype - }) + helper.append_op(type='gaussian_random_batch_size_like', + inputs={'Input': input}, + outputs={'Out': out}, + attrs={ + 'shape': shape, + 'input_dim_idx': input_dim_idx, + 'output_dim_idx': output_dim_idx, + 'mean': mean, + 'std': std, + 'seed': seed, + 'dtype': c_dtype + }) return out @@ -11371,8 +11472,8 @@ def slice(input, axes, starts, ends): else: raise ValueError( - "Input axes must be a python list or tuple, but reveived {}". - format(type(axes))) + "Input axes must be a python list or tuple, but reveived {}" + .format(type(axes))) infer_flags = list(1 for i in range(len(axes))) @@ -11459,8 +11560,10 @@ def slice(input, axes, starts, ends): attrs['infer_flags'] = infer_flags out = helper.create_variable_for_type_inference( dtype=helper.input_dtype('input')) - helper.append_op( - type='slice', inputs=inputs, attrs=attrs, outputs={'Out': out}) + helper.append_op(type='slice', + inputs=inputs, + attrs=attrs, + outputs={'Out': out}) return out @@ -11675,8 +11778,10 @@ def strided_slice(input, axes, starts, ends, strides): attrs['infer_flags'] = infer_flags out = helper.create_variable_for_type_inference( dtype=helper.input_dtype('input')) - helper.append_op( - type='strided_slice', inputs=inputs, attrs=attrs, outputs={'Out': out}) + helper.append_op(type='strided_slice', + inputs=inputs, + attrs=attrs, + outputs={'Out': out}) return out @@ -11749,11 +11854,10 @@ def shape(input): ], 'shape') helper = LayerHelper('shape', **locals()) out = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type='shape', - inputs={'Input': input}, - outputs={'Out': out}, - stop_gradient=True) + helper.append_op(type='shape', + inputs={'Input': input}, + outputs={'Out': out}, + stop_gradient=True) return out @@ -11849,13 +11953,16 @@ def _elementwise_op(helper): name = helper.kwargs.get('name', None) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=op_type, - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs={'axis': axis, - 'use_mkldnn': use_mkldnn}) + helper.append_op(type=op_type, + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs={ + 'axis': axis, + 'use_mkldnn': use_mkldnn + }) return helper.append_activation(out) @@ -11911,9 +12018,8 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): return dygraph_utils._append_activation_in_dygraph(out) if _non_static_mode(): _scale = scale.numpy().item(0) if isinstance(scale, Variable) else scale - out = _C_ops.scale(x, 'scale', - float(_scale), 'bias', - float(bias), 'bias_after_scale', bias_after_scale) + out = _C_ops.scale(x, 'scale', float(_scale), 'bias', float(bias), + 'bias_after_scale', bias_after_scale) return dygraph_utils._append_activation_in_dygraph(out) check_variable_and_dtype(x, "x", [ @@ -11932,8 +12038,10 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): helper = LayerHelper('scale', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='scale', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='scale', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return helper.append_activation(out) @@ -12112,8 +12220,11 @@ Examples: """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_div') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_div') return _elementwise_op(LayerHelper('elementwise_div', **locals())) @@ -12200,8 +12311,11 @@ Examples: """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_sub') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_sub') return _elementwise_op(LayerHelper('elementwise_sub', **locals())) @@ -12289,8 +12403,11 @@ Examples: """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_mul') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_mul') return _elementwise_op(LayerHelper('elementwise_mul', **locals())) @@ -12353,8 +12470,11 @@ Examples: """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_max') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_max') return _elementwise_op(LayerHelper('elementwise_max', **locals())) @@ -12415,8 +12535,11 @@ Examples: print(z_value)#[[[[0., 0., 0., 0., 0.] .... [0., 0., 0., 0., 0.]]]] """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_min') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_min') return _elementwise_op(LayerHelper('elementwise_min', **locals())) @@ -12450,8 +12573,11 @@ Examples: print(z_value) #[2, 243, 16] """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_pow') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_pow') return _elementwise_op(LayerHelper('elementwise_pow', **locals())) @@ -12485,8 +12611,11 @@ Examples: print(z_value) #[1, 3, 3] """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_mod') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_mod') return _elementwise_op(LayerHelper('elementwise_mod', **locals())) @@ -12521,8 +12650,11 @@ Examples: print(z_value) #[3, 2, 1] """ if _non_static_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_floordiv') + return _elementwise_op_in_dygraph(x, + y, + axis=axis, + act=act, + op_name='elementwise_floordiv') return _elementwise_op(LayerHelper('elementwise_floordiv', **locals())) @@ -12622,13 +12754,15 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): return op(x, y) else: return op(x) - check_variable_and_dtype(x, "x", [ - "bool", "int8", "int16", "int32", "int64", "float32", "float64" - ], op_name) + check_variable_and_dtype( + x, "x", + ["bool", "int8", "int16", "int32", "int64", "float32", "float64"], + op_name) if y is not None: - check_variable_and_dtype(y, "y", [ - "bool", "int8", "int16", "int32", "int64", "float32", "float64" - ], op_name) + check_variable_and_dtype( + y, "y", + ["bool", "int8", "int16", "int32", "int64", "float32", "float64"], + op_name) if out is not None: check_type(out, "out", Variable, op_name) @@ -12643,9 +12777,12 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): out = helper.create_variable_for_type_inference(dtype=x.dtype) if binary_op: - helper.append_op( - type=op_name, inputs={"X": x, - "Y": y}, outputs={"Out": out}) + helper.append_op(type=op_name, + inputs={ + "X": x, + "Y": y + }, + outputs={"Out": out}) else: helper.append_op(type=op_name, inputs={"X": x}, outputs={"Out": out}) @@ -12687,8 +12824,12 @@ def logical_and(x, y, out=None, name=None): if in_dygraph_mode(): return _C_ops.final_state_logical_and(x, y) - return _logical_op( - op_name="logical_and", x=x, y=y, name=name, out=out, binary_op=True) + return _logical_op(op_name="logical_and", + x=x, + y=y, + name=name, + out=out, + binary_op=True) def logical_or(x, y, out=None, name=None): @@ -12728,8 +12869,12 @@ def logical_or(x, y, out=None, name=None): """ if in_dygraph_mode(): return _C_ops.final_state_logical_or(x, y) - return _logical_op( - op_name="logical_or", x=x, y=y, name=name, out=out, binary_op=True) + return _logical_op(op_name="logical_or", + x=x, + y=y, + name=name, + out=out, + binary_op=True) def logical_xor(x, y, out=None, name=None): @@ -12770,8 +12915,12 @@ def logical_xor(x, y, out=None, name=None): if in_dygraph_mode(): return _C_ops.final_state_logical_xor(x, y) - return _logical_op( - op_name="logical_xor", x=x, y=y, name=name, out=out, binary_op=True) + return _logical_op(op_name="logical_xor", + x=x, + y=y, + name=name, + out=out, + binary_op=True) @templatedoc() @@ -12804,8 +12953,12 @@ def logical_not(x, out=None, name=None): """ if in_dygraph_mode(): return _C_ops.final_state_logical_not(x) - return _logical_op( - op_name="logical_not", x=x, y=None, name=name, out=out, binary_op=False) + return _logical_op(op_name="logical_not", + x=x, + y=None, + name=name, + out=out, + binary_op=False) @templatedoc() @@ -12845,15 +12998,18 @@ def clip(x, min, max, name=None): name = unique_name.generate_with_ignorable_key(".".join( [helper.name, 'tmp'])) - out = helper.create_variable( - type=x.type, name=name, dtype=x.dtype, persistable=False) + out = helper.create_variable(type=x.type, + name=name, + dtype=x.dtype, + persistable=False) - helper.append_op( - type="clip", - inputs={"X": x}, - attrs={"min": min, - "max": max}, - outputs={"Out": out}) + helper.append_op(type="clip", + inputs={"X": x}, + attrs={ + "min": min, + "max": max + }, + outputs={"Out": out}) return out @@ -12898,14 +13054,15 @@ def clip_by_norm(x, max_norm, name=None): name = unique_name.generate_with_ignorable_key(".".join( [helper.name, 'tmp'])) - out = helper.create_variable( - type=x.type, name=name, dtype=x.dtype, persistable=False) + out = helper.create_variable(type=x.type, + name=name, + dtype=x.dtype, + persistable=False) - helper.append_op( - type="clip_by_norm", - inputs={"X": x}, - attrs={"max_norm": max_norm}, - outputs={"Out": out}) + helper.append_op(type="clip_by_norm", + inputs={"X": x}, + attrs={"max_norm": max_norm}, + outputs={"Out": out}) return out @@ -12944,8 +13101,10 @@ def mean(x, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'mean') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="mean", inputs={"X": x}, attrs={}, outputs={"Out": out}) + helper.append_op(type="mean", + inputs={"X": x}, + attrs={}, + outputs={"Out": out}) return out @@ -12975,11 +13134,10 @@ def merge_selected_rows(x, name=None): helper = LayerHelper("merge_selected_rows", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="merge_selected_rows", - inputs={"X": x}, - attrs={}, - outputs={"Out": out}) + helper.append_op(type="merge_selected_rows", + inputs={"X": x}, + attrs={}, + outputs={"Out": out}) return out @@ -13029,9 +13187,13 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None): check_variable_and_dtype(y, 'y', ['float16', 'float32', 'float64'], 'mul') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="mul", inputs={"X": x, - "Y": y}, attrs=attrs, outputs={"Out": out}) + helper.append_op(type="mul", + inputs={ + "X": x, + "Y": y + }, + attrs=attrs, + outputs={"Out": out}) return out @@ -13170,11 +13332,10 @@ def space_to_depth(x, blocksize, name=None): out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="space_to_depth", - inputs={"X": x}, - attrs={"blocksize": blocksize}, - outputs={"Out": out}) + helper.append_op(type="space_to_depth", + inputs={"X": x}, + attrs={"blocksize": blocksize}, + outputs={"Out": out}) return out @@ -13250,13 +13411,14 @@ def affine_channel(x, check_type(bias, 'bias', (Variable, type(None)), 'affine_channel') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="affine_channel", - inputs={"X": x, - 'Scale': scale, - 'Bias': bias}, - attrs={"data_layout": data_layout}, - outputs={"Out": out}) + helper.append_op(type="affine_channel", + inputs={ + "X": x, + 'Scale': scale, + 'Bias': bias + }, + attrs={"data_layout": data_layout}, + outputs={"Out": out}) return helper.append_activation(out) @@ -13365,12 +13527,13 @@ def similarity_focus(input, axis, indexes, name=None): raise ValueError("indexes can not be empty.") out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='similarity_focus', - inputs={'X': input}, - outputs={'Out': out}, - attrs={"axis": axis, - "indexes": indexes}) + helper.append_op(type='similarity_focus', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + "axis": axis, + "indexes": indexes + }) return out @@ -13425,14 +13588,15 @@ def hash(input, hash_size, num_hash=1, name=None): check_type(hash_size, 'hash_size', int, 'hash') check_type(num_hash, 'num_hash', int, 'hash') helper = LayerHelper('hash', **locals()) - out = helper.create_variable_for_type_inference( - helper.input_dtype(), stop_gradient=True) - helper.append_op( - type='hash', - inputs={'X': input}, - outputs={'Out': out}, - attrs={'num_hash': num_hash, - 'mod_by': hash_size}) + out = helper.create_variable_for_type_inference(helper.input_dtype(), + stop_gradient=True) + helper.append_op(type='hash', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'num_hash': num_hash, + 'mod_by': hash_size + }) return out @@ -13539,8 +13703,10 @@ def grid_sampler(x, grid, name=None): attrs = {'use_cudnn': False} if core.is_compiled_with_rocm() else {} - helper.append_op( - type='grid_sampler', inputs=ipts, outputs={'Output': out}, attrs=attrs) + helper.append_op(type='grid_sampler', + inputs=ipts, + outputs={'Output': out}, + attrs=attrs) return out @@ -13642,12 +13808,13 @@ def add_position_encoding(input, alpha, beta, name=None): out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type="add_position_encoding", - inputs={"X": input}, - outputs={"Out": out}, - attrs={"alpha": alpha, - "beta": beta}) + helper.append_op(type="add_position_encoding", + inputs={"X": input}, + outputs={"Out": out}, + attrs={ + "alpha": alpha, + "beta": beta + }) return out @@ -13708,18 +13875,23 @@ def bilinear_tensor_product(x, param_shape = [size, x.shape[1], y.shape[1]] - w = helper.create_parameter( - attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=False) + w = helper.create_parameter(attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + is_bias=False) out = helper.create_variable_for_type_inference(dtype=dtype) inputs = {"X": x, "Y": y, "Weight": w} if helper.bias_attr: bias_size = [1, size] - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=bias_size, + dtype=dtype, + is_bias=True) inputs["Bias"] = bias - helper.append_op( - type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}) + helper.append_op(type="bilinear_tensor_product", + inputs=inputs, + outputs={"Out": out}) # add activation return helper.append_activation(out) @@ -13769,11 +13941,10 @@ def get_tensor_from_selected_rows(x, name=None): ) helper = LayerHelper('get_tensor_from_selected_rows', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='get_tensor_from_selected_rows', - inputs={'X': x}, - outputs={'Out': out}, - attrs={}) + helper.append_op(type='get_tensor_from_selected_rows', + inputs={'X': x}, + outputs={'Out': out}, + attrs={}) return out @@ -13843,11 +14014,10 @@ def shuffle_channel(x, group, name=None): if not isinstance(group, int): raise TypeError("group must be int type") - helper.append_op( - type="shuffle_channel", - inputs={"X": x}, - outputs={"Out": out}, - attrs={"group": group}) + helper.append_op(type="shuffle_channel", + inputs={"X": x}, + outputs={"Out": out}, + attrs={"group": group}) return out @@ -14185,19 +14355,18 @@ def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None): for v in skip_vars_in_backward_input: if not v.name in fwd_in_out: raise ValueError( - 'Variable {} is not found in forward inputs and outputs' - .format(v.name)) + 'Variable {} is not found in forward inputs and outputs'. + format(v.name)) backward_skip_vars.add(v.name) - helper.append_op( - type='py_func', - inputs={'X': x}, - outputs={'Out': out_list}, - attrs={ - 'forward_callable_id': fwd_func_id, - 'backward_callable_id': bwd_func_id, - 'backward_skip_vars': list(backward_skip_vars) - }) + helper.append_op(type='py_func', + inputs={'X': x}, + outputs={'Out': out_list}, + attrs={ + 'forward_callable_id': fwd_func_id, + 'backward_callable_id': bwd_func_id, + 'backward_skip_vars': list(backward_skip_vars) + }) return out @@ -14261,17 +14430,18 @@ def psroi_pool(input, raise TypeError("pooled_width must be int type") dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='psroi_pool', - inputs={'X': input, - 'ROIs': rois}, - outputs={'Out': out}, - attrs={ - 'output_channels': output_channels, - 'spatial_scale': spatial_scale, - 'pooled_height': pooled_height, - 'pooled_width': pooled_width - }) + helper.append_op(type='psroi_pool', + inputs={ + 'X': input, + 'ROIs': rois + }, + outputs={'Out': out}, + attrs={ + 'output_channels': output_channels, + 'spatial_scale': spatial_scale, + 'pooled_height': pooled_height, + 'pooled_width': pooled_width + }) return out @@ -14345,15 +14515,14 @@ def prroi_pool(input, inputs_op = {'X': input, 'ROIs': rois} if batch_roi_nums is not None: inputs_op['BatchRoINums'] = batch_roi_nums - helper.append_op( - type='prroi_pool', - inputs=inputs_op, - outputs={'Out': out}, - attrs={ - 'spatial_scale': spatial_scale, - 'pooled_height': pooled_height, - 'pooled_width': pooled_width - }) + helper.append_op(type='prroi_pool', + inputs=inputs_op, + outputs={'Out': out}, + attrs={ + 'spatial_scale': spatial_scale, + 'pooled_height': pooled_height, + 'pooled_width': pooled_width + }) return out @@ -14410,11 +14579,10 @@ def pixel_shuffle(x, upscale_factor): if not isinstance(upscale_factor, int): raise TypeError("upscale factor must be int type") - helper.append_op( - type="pixel_shuffle", - inputs={"X": x}, - outputs={"Out": out}, - attrs={"upscale_factor": upscale_factor}) + helper.append_op(type="pixel_shuffle", + inputs={"X": x}, + outputs={"Out": out}, + attrs={"upscale_factor": upscale_factor}) return out @@ -14517,12 +14685,13 @@ def continuous_value_model(input, cvm, use_cvm=True): out = helper.create_variable(dtype=input.dtype) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], 'cvm') - helper.append_op( - type='cvm', - inputs={'X': [input], - 'CVM': [cvm]}, - outputs={'Y': [out]}, - attrs={"use_cvm": use_cvm}) + helper.append_op(type='cvm', + inputs={ + 'X': [input], + 'CVM': [cvm] + }, + outputs={'Y': [out]}, + attrs={"use_cvm": use_cvm}) return out @@ -14570,10 +14739,9 @@ def where(condition): out = helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.INT64) - helper.append_op( - type='where_index', - inputs={'Condition': condition}, - outputs={'Out': [out]}) + helper.append_op(type='where_index', + inputs={'Condition': condition}, + outputs={'Out': [out]}) return out @@ -14640,12 +14808,13 @@ def unique(x, dtype='int32'): index = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='unique', - inputs={'X': x}, - attrs={'dtype': convert_np_dtype_to_dtype_(dtype)}, - outputs={'Out': [out], - 'Index': [index]}) + helper.append_op(type='unique', + inputs={'X': x}, + attrs={'dtype': convert_np_dtype_to_dtype_(dtype)}, + outputs={ + 'Out': [out], + 'Index': [index] + }) return out, index @@ -14697,13 +14866,14 @@ def unique_with_counts(x, dtype='int32'): count = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='unique_with_counts', - inputs={'X': x}, - attrs={'dtype': convert_np_dtype_to_dtype_(dtype)}, - outputs={'Out': [out], - 'Index': [index], - 'Count': [count]}) + helper.append_op(type='unique_with_counts', + inputs={'X': x}, + attrs={'dtype': convert_np_dtype_to_dtype_(dtype)}, + outputs={ + 'Out': [out], + 'Index': [index], + 'Count': [count] + }) return out, index, count @@ -14904,41 +15074,39 @@ def deformable_conv(input, pre_bias = helper.create_variable_for_type_inference(dtype) if modulated: - helper.append_op( - type='deformable_conv', - inputs={ - 'Input': input, - 'Filter': filter_param, - 'Offset': offset, - 'Mask': mask, - }, - outputs={"Output": pre_bias}, - attrs={ - 'strides': stride, - 'paddings': padding, - 'dilations': dilation, - 'groups': groups, - 'deformable_groups': deformable_groups, - 'im2col_step': im2col_step, - }) + helper.append_op(type='deformable_conv', + inputs={ + 'Input': input, + 'Filter': filter_param, + 'Offset': offset, + 'Mask': mask, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'deformable_groups': deformable_groups, + 'im2col_step': im2col_step, + }) else: - helper.append_op( - type='deformable_conv_v1', - inputs={ - 'Input': input, - 'Filter': filter_param, - 'Offset': offset, - }, - outputs={"Output": pre_bias}, - attrs={ - 'strides': stride, - 'paddings': padding, - 'dilations': dilation, - 'groups': groups, - 'deformable_groups': deformable_groups, - 'im2col_step': im2col_step, - }) + helper.append_op(type='deformable_conv_v1', + inputs={ + 'Input': input, + 'Filter': filter_param, + 'Offset': offset, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'deformable_groups': deformable_groups, + 'im2col_step': im2col_step, + }) output = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) return output @@ -15163,24 +15331,27 @@ def deformable_roi_pooling(input, dtype = helper.input_dtype() output = helper.create_variable_for_type_inference(dtype) top_count = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type="deformable_psroi_pooling", - inputs={"Input": input, - "ROIs": rois, - "Trans": trans}, - outputs={"Output": output, - "TopCount": top_count}, - attrs={ - "no_trans": no_trans, - "spatial_scale": spatial_scale, - "output_dim": output_channels, - "group_size": group_size, - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "part_size": part_size, - "sample_per_part": sample_per_part, - "trans_std": trans_std - }) + helper.append_op(type="deformable_psroi_pooling", + inputs={ + "Input": input, + "ROIs": rois, + "Trans": trans + }, + outputs={ + "Output": output, + "TopCount": top_count + }, + attrs={ + "no_trans": no_trans, + "spatial_scale": spatial_scale, + "output_dim": output_channels, + "group_size": group_size, + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "part_size": part_size, + "sample_per_part": sample_per_part, + "trans_std": trans_std + }) return output @@ -15243,17 +15414,16 @@ def shard_index(input, index_num, nshards, shard_id, ignore_value=-1): (shard_id, nshards)) out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type=op_type, - inputs={'X': [input]}, - outputs={'Out': out}, - attrs={ - 'index_num': index_num, - 'nshards': nshards, - 'shard_id': shard_id, - 'ignore_value': ignore_value - }, - stop_gradient=True) + helper.append_op(type=op_type, + inputs={'X': [input]}, + outputs={'Out': out}, + attrs={ + 'index_num': index_num, + 'nshards': nshards, + 'shard_id': shard_id, + 'ignore_value': ignore_value + }, + stop_gradient=True) return out @@ -15316,13 +15486,14 @@ def hard_swish(x, threshold=6.0, scale=6.0, offset=3.0, name=None): helper = LayerHelper('hard_swish', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='hard_swish', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'threshold': threshold, - 'scale': scale, - 'offset': offset}) + helper.append_op(type='hard_swish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'threshold': threshold, + 'scale': scale, + 'offset': offset + }) return out @@ -15398,11 +15569,10 @@ def mish(x, threshold=20, name=None): helper = LayerHelper('mish', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='mish', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'threshold': threshold}) + helper.append_op(type='mish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) return out @@ -15472,7 +15642,11 @@ def gather_tree(ids, parents): @deprecated(since="2.0.0", update_to="paddle.uniform") @templatedoc() -def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, +def uniform_random(shape, + dtype='float32', + min=-1.0, + max=1.0, + seed=0, name=None): """ This OP returns a Tensor filled with random values sampled from a uniform @@ -15553,8 +15727,7 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, if _non_static_mode(): shape = utils.convert_shape_to_list(shape) - return _C_ops.uniform_random('shape', shape, 'min', - float(min), 'max', + return _C_ops.uniform_random('shape', shape, 'min', float(min), 'max', float(max), 'seed', seed, 'dtype', dtype) check_type(shape, 'shape', (list, tuple, Variable), 'uniform_random/rand') @@ -15563,14 +15736,17 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, inputs = dict() attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype} - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='uniform_random/rand') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='uniform_random/rand') helper = LayerHelper("uniform_random", **locals()) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="uniform_random", inputs=inputs, attrs=attrs, - outputs={"Out": out}) + helper.append_op(type="uniform_random", + inputs=inputs, + attrs=attrs, + outputs={"Out": out}) utils.try_set_static_shape_tensor(out, shape) return out @@ -15621,9 +15797,8 @@ def unbind(input, axis=0): for i in range(num) ] - helper.append_op( - type="unbind", - inputs={"X": input}, - outputs={"Out": outs}, - attrs={"axis": axis}) + helper.append_op(type="unbind", + inputs={"X": input}, + outputs={"Out": outs}, + attrs={"axis": axis}) return outs diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index d8cd7f6abf6..84d147bc97c 100755 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -92,8 +92,8 @@ for _OP in set(__activations_noattr__): if _OP in __deprecated_func_name__: _new_OP = __deprecated_func_name__[_OP] _func = generate_activation_fn(_OP) - _func = deprecated( - since="2.0.0", update_to="paddle.nn.functional.%s" % (_new_OP))(_func) + _func = deprecated(since="2.0.0", + update_to="paddle.nn.functional.%s" % (_new_OP))(_func) globals()[_OP] = _func for _OP in set(__unary_func__): @@ -112,7 +112,8 @@ for _OP in set(__inplace_unary_func__): _func = deprecated(since="2.0.0", update_to="paddle.%s" % (_new_OP))(_func) globals()[_OP] = _func -add_sample_code(globals()["sigmoid"], r""" +add_sample_code( + globals()["sigmoid"], r""" Examples: .. code-block:: python @@ -126,7 +127,8 @@ Examples: """) -add_sample_code(globals()["silu"], r""" +add_sample_code( + globals()["silu"], r""" Examples: .. code-block:: python @@ -140,7 +142,8 @@ Examples: """) -add_sample_code(globals()["logsigmoid"], r""" +add_sample_code( + globals()["logsigmoid"], r""" Examples: .. code-block:: python @@ -154,7 +157,8 @@ Examples: """) -add_sample_code(globals()["exp"], r""" +add_sample_code( + globals()["exp"], r""" Examples: .. code-block:: python @@ -167,7 +171,8 @@ Examples: """) -add_sample_code(globals()["expm1"], r""" +add_sample_code( + globals()["expm1"], r""" Examples: .. code-block:: python @@ -180,7 +185,8 @@ Examples: """) -add_sample_code(globals()["tanh"], r""" +add_sample_code( + globals()["tanh"], r""" Examples: .. code-block:: python @@ -193,7 +199,8 @@ Examples: """) -add_sample_code(globals()["atan"], r""" +add_sample_code( + globals()["atan"], r""" Examples: .. code-block:: python @@ -206,7 +213,8 @@ Examples: """) -add_sample_code(globals()["tanh_shrink"], r""" +add_sample_code( + globals()["tanh_shrink"], r""" Examples: .. code-block:: python @@ -220,7 +228,8 @@ Examples: """) -add_sample_code(globals()["sqrt"], r""" +add_sample_code( + globals()["sqrt"], r""" Examples: .. code-block:: python @@ -233,7 +242,8 @@ Examples: """) -add_sample_code(globals()["rsqrt"], r""" +add_sample_code( + globals()["rsqrt"], r""" Examples: .. code-block:: python @@ -246,7 +256,8 @@ Examples: """) -add_sample_code(globals()["abs"], r""" +add_sample_code( + globals()["abs"], r""" Examples: .. code-block:: python @@ -259,7 +270,8 @@ Examples: """) -add_sample_code(globals()["ceil"], r""" +add_sample_code( + globals()["ceil"], r""" Examples: .. code-block:: python @@ -272,7 +284,8 @@ Examples: """) -add_sample_code(globals()["floor"], r""" +add_sample_code( + globals()["floor"], r""" Examples: .. code-block:: python @@ -285,7 +298,8 @@ Examples: """) -add_sample_code(globals()["cos"], r""" +add_sample_code( + globals()["cos"], r""" Examples: .. code-block:: python @@ -298,7 +312,8 @@ Examples: """) -add_sample_code(globals()["tan"], r""" +add_sample_code( + globals()["tan"], r""" Examples: .. code-block:: python @@ -311,7 +326,8 @@ Examples: """) -add_sample_code(globals()["acos"], r""" +add_sample_code( + globals()["acos"], r""" Examples: .. code-block:: python @@ -324,7 +340,8 @@ Examples: """) -add_sample_code(globals()["sin"], r""" +add_sample_code( + globals()["sin"], r""" Examples: .. code-block:: python @@ -337,7 +354,8 @@ Examples: """) -add_sample_code(globals()["asin"], r""" +add_sample_code( + globals()["asin"], r""" Examples: .. code-block:: python @@ -350,7 +368,8 @@ Examples: """) -add_sample_code(globals()["cosh"], r""" +add_sample_code( + globals()["cosh"], r""" Examples: .. code-block:: python @@ -363,7 +382,8 @@ Examples: """) -add_sample_code(globals()["sinh"], r""" +add_sample_code( + globals()["sinh"], r""" Examples: .. code-block:: python @@ -376,7 +396,8 @@ Examples: """) -add_sample_code(globals()["asinh"], r""" +add_sample_code( + globals()["asinh"], r""" Examples: .. code-block:: python @@ -389,7 +410,8 @@ Examples: """) -add_sample_code(globals()["acosh"], r""" +add_sample_code( + globals()["acosh"], r""" Examples: .. code-block:: python @@ -402,7 +424,8 @@ Examples: """) -add_sample_code(globals()["atanh"], r""" +add_sample_code( + globals()["atanh"], r""" Examples: .. code-block:: python @@ -415,7 +438,8 @@ Examples: """) -add_sample_code(globals()["round"], r""" +add_sample_code( + globals()["round"], r""" Examples: .. code-block:: python @@ -428,7 +452,8 @@ Examples: """) -add_sample_code(globals()["reciprocal"], r""" +add_sample_code( + globals()["reciprocal"], r""" Examples: .. code-block:: python @@ -441,7 +466,8 @@ Examples: """) -add_sample_code(globals()["square"], r""" +add_sample_code( + globals()["square"], r""" Examples: .. code-block:: python @@ -454,7 +480,8 @@ Examples: """) -add_sample_code(globals()["lgamma"], r""" +add_sample_code( + globals()["lgamma"], r""" Examples: .. code-block:: python @@ -467,7 +494,8 @@ Examples: """) -add_sample_code(globals()["softplus"], r""" +add_sample_code( + globals()["softplus"], r""" Examples: .. code-block:: python @@ -481,7 +509,8 @@ Examples: """) -add_sample_code(globals()["softsign"], r""" +add_sample_code( + globals()["softsign"], r""" Examples: .. code-block:: python @@ -576,10 +605,9 @@ __all__ += ['cumsum'] _cum_sum_ = generate_layer_fn('cumsum') -@deprecated( - since="2.0.0", - update_to="paddle.cumsum", - reason="New APIs for Paddle 2.0 are coming.") +@deprecated(since="2.0.0", + update_to="paddle.cumsum", + reason="New APIs for Paddle 2.0 are coming.") def cumsum(x, axis=None, exclusive=None, reverse=None): check_type(x, 'x', (Variable), 'cumsum') locals_var = locals().copy() diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index b04cf90e1d8..6b51721aafc 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -131,7 +131,8 @@ class RNNCell(object): if sys.version_info < (3, ): integer_types = ( int, - long, ) + long, + ) else: integer_types = (int, ) check_variable_and_dtype(batch_ref, 'batch_ref', @@ -156,7 +157,8 @@ class RNNCell(object): if sys.version_info < (3, ): integer_types = ( int, - long, ) + long, + ) else: integer_types = (int, ) """For shape, list/tuple of integer is the finest-grained objection""" @@ -167,10 +169,11 @@ class RNNCell(object): # TODO: Add check for the illegal if isinstance(seq, dict): return True - return (isinstance(seq, Sequence) and - not isinstance(seq, six.string_types)) + return (isinstance(seq, Sequence) + and not isinstance(seq, six.string_types)) class Shape(object): + def __init__(self, shape): self.shape = shape if shape[0] == -1 else ([-1] + list(shape)) @@ -507,6 +510,7 @@ def rnn(cell, class ArrayWrapper(object): + def __init__(self, x): self.array = [x] @@ -549,8 +553,9 @@ def _rnn_dynamic_graph(cell, inputs = map_structure(_transpose_batch_time, inputs) if sequence_length is not None: - mask = sequence_lod.sequence_mask( - sequence_length, maxlen=time_steps, dtype=inputs.dtype) + mask = sequence_lod.sequence_mask(sequence_length, + maxlen=time_steps, + dtype=inputs.dtype) mask = nn.transpose(mask, [1, 0]) if is_reverse: @@ -564,9 +569,8 @@ def _rnn_dynamic_graph(cell, step_inputs = map_structure(lambda x: x[i], inputs) step_outputs, new_states = cell(step_inputs, states, **kwargs) if sequence_length is not None: - new_states = map_structure( - partial( - _maybe_copy, step_mask=mask[i]), states, new_states) + new_states = map_structure(partial(_maybe_copy, step_mask=mask[i]), + states, new_states) states = new_states outputs = map_structure(lambda x: ArrayWrapper(x), step_outputs) if i == 0 else map_structure( @@ -574,13 +578,11 @@ def _rnn_dynamic_graph(cell, step_outputs, outputs) final_outputs = map_structure( - lambda x: nn.stack(x.array, axis=time_step_index), - outputs) + lambda x: nn.stack(x.array, axis=time_step_index), outputs) if is_reverse: final_outputs = map_structure( - lambda x: tensor.reverse(x, axis=time_step_index), - final_outputs) + lambda x: tensor.reverse(x, axis=time_step_index), final_outputs) final_states = new_states return final_outputs, final_states @@ -638,8 +640,7 @@ def _rnn_static_graph(cell, if sequence_length: step_mask = rnn.step_input(mask) new_states = map_structure( - partial( - _maybe_copy, step_mask=step_mask), states, new_states) + partial(_maybe_copy, step_mask=step_mask), states, new_states) map_structure(rnn.update_memory, states, new_states) flat_outputs = flatten(outputs) @@ -963,15 +964,16 @@ class BeamSearchDecoder(Decoder): expand_times = [1] * len(x.shape) expand_times[1] = beam_size x = paddle.tile(x, expand_times) # [batch_size, beam_size, ...] - x = nn.transpose(x, list(range(2, len(x.shape))) + + x = nn.transpose(x, + list(range(2, len(x.shape))) + [0, 1]) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape - x = nn.reshape( - x, shape=[0] * - (len(x.shape) - 2) + [-1]) # [..., batch_size * beam_size] + x = nn.reshape(x, shape=[0] * (len(x.shape) - 2) + + [-1]) # [..., batch_size * beam_size] x = nn.transpose( x, [len(x.shape) - 1] + - list(range(0, len(x.shape) - 1))) # [batch_size * beam_size, ...] + list(range(0, + len(x.shape) - 1))) # [batch_size * beam_size, ...] return x def _split_batch_beams(self, x): @@ -1056,8 +1058,7 @@ class BeamSearchDecoder(Decoder): probs = nn.elementwise_mul( paddle.tile(nn.unsqueeze(finished, [2]), [1, 1, self.vocab_size]), self.noend_mask_tensor, - axis=-1) - nn.elementwise_mul( - probs, (finished - 1), axis=0) + axis=-1) - nn.elementwise_mul(probs, (finished - 1), axis=0) return probs def _gather(self, x, indices, batch_size): @@ -1085,10 +1086,8 @@ class BeamSearchDecoder(Decoder): indices.dtype) if batch_size.dtype != indices.dtype else batch_size batch_size.stop_gradient = True # TODO: remove this batch_pos = paddle.tile( - nn.unsqueeze( - tensor.range( - 0, batch_size, 1, dtype=indices.dtype), [1]), - [1, self.beam_size]) + nn.unsqueeze(tensor.range(0, batch_size, 1, dtype=indices.dtype), + [1]), [1, self.beam_size]) topk_coordinates = nn.stack([batch_pos, indices], axis=2) topk_coordinates.stop_gradient = True return nn.gather_nd(x, topk_coordinates) @@ -1137,22 +1136,22 @@ class BeamSearchDecoder(Decoder): state = flatten(initial_cell_states)[0] self.batch_size = nn.shape(state)[0] - self.start_token_tensor = tensor.fill_constant( - shape=[1], dtype="int64", value=self.start_token) - self.end_token_tensor = tensor.fill_constant( - shape=[1], dtype="int64", value=self.end_token) + self.start_token_tensor = tensor.fill_constant(shape=[1], + dtype="int64", + value=self.start_token) + self.end_token_tensor = tensor.fill_constant(shape=[1], + dtype="int64", + value=self.end_token) init_cell_states = map_structure(self._expand_to_beam_size, initial_cell_states) - init_inputs = paddle.full( - shape=[self.batch_size, self.beam_size], - fill_value=self.start_token_tensor, - dtype=self.start_token_tensor.dtype) + init_inputs = paddle.full(shape=[self.batch_size, self.beam_size], + fill_value=self.start_token_tensor, + dtype=self.start_token_tensor.dtype) log_probs = paddle.tile( tensor.assign( - np.array( - [[0.] + [-self.kinf] * (self.beam_size - 1)], - dtype="float32")), [self.batch_size, 1]) + np.array([[0.] + [-self.kinf] * (self.beam_size - 1)], + dtype="float32")), [self.batch_size, 1]) if paddle.get_default_dtype() == "float64": log_probs = tensor.cast(log_probs, "float64") # TODO: remove the restriction of force_cpu @@ -1198,8 +1197,9 @@ class BeamSearchDecoder(Decoder): """ self.vocab_size = logits.shape[-1] - self.vocab_size_tensor = tensor.fill_constant( - shape=[1], dtype="int64", value=self.vocab_size) + self.vocab_size_tensor = tensor.fill_constant(shape=[1], + dtype="int64", + value=self.vocab_size) noend_array = [-self.kinf] * self.vocab_size noend_array[self.end_token] = 0 @@ -1210,8 +1210,9 @@ class BeamSearchDecoder(Decoder): step_log_probs = nn.log(nn.softmax(logits)) step_log_probs = self._mask_probs(step_log_probs, beam_state.finished) - log_probs = nn.elementwise_add( - x=step_log_probs, y=beam_state.log_probs, axis=0) + log_probs = nn.elementwise_add(x=step_log_probs, + y=beam_state.log_probs, + axis=0) # TODO: length penalty scores = log_probs scores = nn.reshape(scores, [-1, self.beam_size * self.vocab_size]) @@ -1230,8 +1231,8 @@ class BeamSearchDecoder(Decoder): self.batch_size) next_lengths = self._gather(beam_state.lengths, beam_indices, self.batch_size) - next_lengths = next_lengths + tensor.cast( - nn.logical_not(next_finished), beam_state.lengths.dtype) + next_lengths = next_lengths + tensor.cast(nn.logical_not(next_finished), + beam_state.lengths.dtype) next_finished = control_flow.logical_or( next_finished, control_flow.equal(token_indices, self.end_token_tensor)) @@ -1345,6 +1346,7 @@ def _dynamic_decode_imperative(decoder, is_test=False, return_length=False, **kwargs): + def _maybe_copy(state, new_state, step_mask): # TODO: use where_op state_dtype = state.dtype @@ -1357,8 +1359,9 @@ def _dynamic_decode_imperative(decoder, # to sum(bool) error. step_mask.stop_gradient = True new_state = nn.elementwise_mul( - state, step_mask, axis=0) - nn.elementwise_mul( - new_state, (step_mask - 1), axis=0) + state, step_mask, axis=0) - nn.elementwise_mul(new_state, + (step_mask - 1), + axis=0) if convert_dtype(state_dtype) in ["bool"]: new_state = tensor.cast(new_state, dtype=state_dtype) return new_state @@ -1371,11 +1374,13 @@ def _dynamic_decode_imperative(decoder, outputs = None step_idx = 0 - step_idx_tensor = tensor.fill_constant( - shape=[1], dtype="int64", value=step_idx) + step_idx_tensor = tensor.fill_constant(shape=[1], + dtype="int64", + value=step_idx) while cond.numpy(): - (step_outputs, next_states, next_inputs, next_finished) = decoder.step( - step_idx_tensor, inputs, states, **kwargs) + (step_outputs, next_states, next_inputs, + next_finished) = decoder.step(step_idx_tensor, inputs, states, + **kwargs) if not decoder.tracks_own_finished: # BeamSearchDecoder would track it own finished, since # beams would be reordered and the finished status of each @@ -1387,8 +1392,8 @@ def _dynamic_decode_imperative(decoder, tensor.assign(next_finished, finished) next_sequence_lengths = nn.elementwise_add( sequence_lengths, - tensor.cast( - control_flow.logical_not(finished), sequence_lengths.dtype)) + tensor.cast(control_flow.logical_not(finished), + sequence_lengths.dtype)) if impute_finished: # rectify the states for the finished. next_states = map_structure( lambda x, y: _maybe_copy(x, y, finished), states, @@ -1404,8 +1409,9 @@ def _dynamic_decode_imperative(decoder, lambda x: ArrayWrapper(x), step_outputs) if step_idx == 0 else map_structure( lambda x, x_array: x_array.append(x), step_outputs, outputs) - inputs, states, finished, sequence_lengths = ( - next_inputs, next_states, next_finished, next_sequence_lengths) + inputs, states, finished, sequence_lengths = (next_inputs, next_states, + next_finished, + next_sequence_lengths) control_flow.increment(x=step_idx_tensor, value=1.0, in_place=True) step_idx += 1 @@ -1418,8 +1424,9 @@ def _dynamic_decode_imperative(decoder, final_states = states try: - final_outputs, final_states = decoder.finalize( - final_outputs, final_states, sequence_lengths) + final_outputs, final_states = decoder.finalize(final_outputs, + final_states, + sequence_lengths) except NotImplementedError: pass @@ -1442,15 +1449,17 @@ def _dynamic_decode_declarative(decoder, return_length=False, **kwargs): initial_inputs, initial_states, initial_finished = decoder.initialize(inits) - global_inputs, global_states, global_finished = ( - initial_inputs, initial_states, initial_finished) + global_inputs, global_states, global_finished = (initial_inputs, + initial_states, + initial_finished) global_finished.stop_gradient = True step_idx = tensor.fill_constant(shape=[1], dtype="int64", value=0) cond = control_flow.logical_not((nn.reduce_all(initial_finished))) if max_step_num is not None: - max_step_num = tensor.fill_constant( - shape=[1], dtype="int64", value=max_step_num) + max_step_num = tensor.fill_constant(shape=[1], + dtype="int64", + value=max_step_num) while_op = control_flow.While(cond, is_test=is_test) sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") @@ -1479,8 +1488,9 @@ def _dynamic_decode_declarative(decoder, # to sum(bool) error. step_mask.stop_gradient = True new_state = nn.elementwise_mul( - state, step_mask, axis=0) - nn.elementwise_mul( - new_state, (step_mask - 1), axis=0) + state, step_mask, axis=0) - nn.elementwise_mul(new_state, + (step_mask - 1), + axis=0) if convert_dtype(state_dtype) in ["bool"]: new_state = tensor.cast(new_state, dtype=state_dtype) return new_state @@ -1516,14 +1526,14 @@ def _dynamic_decode_declarative(decoder, global_finished) next_sequence_lengths = nn.elementwise_add( sequence_lengths, - tensor.cast( - control_flow.logical_not(global_finished), - sequence_lengths.dtype)) + tensor.cast(control_flow.logical_not(global_finished), + sequence_lengths.dtype)) if impute_finished: # rectify the states for the finished. next_states = map_structure( lambda x, y: _maybe_copy(x, y, global_finished), states, - next_states, ) + next_states, + ) else: warnings.warn( "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros." @@ -1571,8 +1581,9 @@ def _dynamic_decode_declarative(decoder, states_arrays) try: - final_outputs, final_states = decoder.finalize( - final_outputs, final_states, sequence_lengths) + final_outputs, final_states = decoder.finalize(final_outputs, + final_states, + sequence_lengths) except NotImplementedError: pass @@ -1821,8 +1832,9 @@ class TrainingHelper(DecodeHelper): """ init_finished = control_flow.equal( self.sequence_length, - tensor.fill_constant( - shape=[1], dtype=self.sequence_length.dtype, value=0)) + tensor.fill_constant(shape=[1], + dtype=self.sequence_length.dtype, + value=0)) # TODO: support zero length init_inputs = map_structure( lambda x: x[0] if self.time_major else x[:, 0], self.inputs) @@ -1879,9 +1891,8 @@ class TrainingHelper(DecodeHelper): shape `[batch_size]`. """ # TODO: compatibility of int32 and int64 - time = tensor.cast( - time, - "int32") if convert_dtype(time.dtype) not in ["int32"] else time + time = tensor.cast(time, "int32") if convert_dtype( + time.dtype) not in ["int32"] else time if self.sequence_length.dtype != time.dtype: self.sequence_length = tensor.cast(self.sequence_length, time.dtype) next_time = time + 1 @@ -1889,10 +1900,11 @@ class TrainingHelper(DecodeHelper): def _slice(x): # TODO: use Variable.__getitem__ axes = [0 if self.time_major else 1] - return nn.squeeze( - nn.slice( - x, axes=axes, starts=[next_time], ends=[next_time + 1]), - axes=axes) + return nn.squeeze(nn.slice(x, + axes=axes, + starts=[next_time], + ends=[next_time + 1]), + axes=axes) next_inputs = map_structure(_slice, self.inputs_) return finished, next_inputs, states @@ -1950,8 +1962,9 @@ class GreedyEmbeddingHelper(DecodeHelper): """ self.embedding_fn = embedding_fn self.start_tokens = start_tokens - self.end_token = tensor.fill_constant( - shape=[1], dtype="int64", value=end_token) + self.end_token = tensor.fill_constant(shape=[1], + dtype="int64", + value=end_token) def initialize(self): r""" @@ -2125,8 +2138,9 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): # not pass to probs, since sampling_id op does not have corresponding # grad op and thus can not pass. probs.stop_gradient = True - sample_ids = nn.sampling_id( - probs, seed=self.seed, dtype=self.start_tokens.dtype) + sample_ids = nn.sampling_id(probs, + seed=self.seed, + dtype=self.start_tokens.dtype) return sample_ids @@ -2253,14 +2267,15 @@ class BasicDecoder(Decoder): cell_outputs, cell_states = self.cell(inputs, states, **kwargs) if self.output_fn is not None: cell_outputs = self.output_fn(cell_outputs) - sample_ids = self.helper.sample( - time=time, outputs=cell_outputs, states=cell_states) + sample_ids = self.helper.sample(time=time, + outputs=cell_outputs, + states=cell_states) sample_ids.stop_gradient = True - (finished, next_inputs, next_states) = self.helper.next_inputs( - time=time, - outputs=cell_outputs, - states=cell_states, - sample_ids=sample_ids) + (finished, next_inputs, + next_states) = self.helper.next_inputs(time=time, + outputs=cell_outputs, + states=cell_states, + sample_ids=sample_ids) outputs = self.OutputWrapper(cell_outputs, sample_ids) return (outputs, next_states, next_inputs, finished) @@ -2396,13 +2411,16 @@ def dynamic_lstm(input, helper = LayerHelper('lstm', **locals()) size = size // 4 - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype) + weight = helper.create_parameter(attr=helper.param_attr, + shape=[size, 4 * size], + dtype=dtype) bias_size = [1, 7 * size] if not use_peepholes: bias_size[1] = 4 * size - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=bias_size, + dtype=dtype, + is_bias=True) hidden = helper.create_variable_for_type_inference(dtype) cell = helper.create_variable_for_type_inference(dtype) @@ -2419,29 +2437,27 @@ def dynamic_lstm(input, 'The shape of c0 should be (batch_size, %d)' % size inputs['C0'] = c_0 - helper.append_op( - type='lstm', - inputs=inputs, - outputs={ - 'Hidden': hidden, - 'Cell': cell, - 'BatchGate': batch_gate, - 'BatchCellPreAct': batch_cell_pre_act - }, - attrs={ - 'use_peepholes': use_peepholes, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'cell_activation': cell_activation, - 'candidate_activation': candidate_activation - }) + helper.append_op(type='lstm', + inputs=inputs, + outputs={ + 'Hidden': hidden, + 'Cell': cell, + 'BatchGate': batch_gate, + 'BatchCellPreAct': batch_cell_pre_act + }, + attrs={ + 'use_peepholes': use_peepholes, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'cell_activation': cell_activation, + 'candidate_activation': candidate_activation + }) return hidden, cell -@deprecated( - since='2.0.0', - update_to='paddle.nn.LSTM', - reason="This API may occur CUDNN errors.") +@deprecated(since='2.0.0', + update_to='paddle.nn.LSTM', + reason="This API may occur CUDNN errors.") def lstm(input, init_h, init_c, @@ -2580,11 +2596,10 @@ def lstm(input, weight_size += input_weight_size + hidden_weight_size weight_size += hidden_size * 8 * num_dirrection - weight = helper.create_parameter( - attr=helper.param_attr, - shape=[weight_size], - dtype=dtype, - default_initializer=default_initializer) + weight = helper.create_parameter(attr=helper.param_attr, + shape=[weight_size], + dtype=dtype, + default_initializer=default_initializer) out = helper.create_variable_for_type_inference(dtype) last_h = helper.create_variable_for_type_inference(dtype) @@ -2595,30 +2610,29 @@ def lstm(input, dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) state_out.persistable = True - helper.append_op( - type='cudnn_lstm', - inputs={ - 'Input': input, - 'InitH': init_h, - 'InitC': init_c, - 'W': weight, - }, - outputs={ - 'Out': out, - 'LastH': last_h, - 'LastC': last_c, - 'Reserve': reserve, - 'StateOut': state_out, - }, - attrs={ - 'is_bidirec': is_bidirec, - 'input_size': input_size, - 'hidden_size': hidden_size, - 'num_layers': num_layers, - 'is_test': is_test, - 'dropout_prob': dropout_prob, - 'seed': seed, - }) + helper.append_op(type='cudnn_lstm', + inputs={ + 'Input': input, + 'InitH': init_h, + 'InitC': init_c, + 'W': weight, + }, + outputs={ + 'Out': out, + 'LastH': last_h, + 'LastC': last_c, + 'Reserve': reserve, + 'StateOut': state_out, + }, + attrs={ + 'is_bidirec': is_bidirec, + 'input_size': input_size, + 'hidden_size': hidden_size, + 'num_layers': num_layers, + 'is_test': is_test, + 'dropout_prob': dropout_prob, + 'seed': seed, + }) return out, last_h, last_c @@ -2781,15 +2795,19 @@ def dynamic_lstmp(input, helper = LayerHelper('lstmp', **locals()) size = size // 4 - weight = helper.create_parameter( - attr=helper.param_attr, shape=[proj_size, 4 * size], dtype=dtype) - proj_weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, proj_size], dtype=dtype) + weight = helper.create_parameter(attr=helper.param_attr, + shape=[proj_size, 4 * size], + dtype=dtype) + proj_weight = helper.create_parameter(attr=helper.param_attr, + shape=[size, proj_size], + dtype=dtype) bias_size = [1, 7 * size] if not use_peepholes: bias_size[1] = 4 * size - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=bias_size, + dtype=dtype, + is_bias=True) projection = helper.create_variable_for_type_inference(dtype) cell = helper.create_variable_for_type_inference(dtype) @@ -2818,26 +2836,25 @@ def dynamic_lstmp(input, if proj_clip: assert proj_clip >= 0, "proj_clip should not be negative." - helper.append_op( - type='lstmp', - inputs=inputs, - outputs={ - 'Projection': projection, - 'Cell': cell, - 'BatchHidden': batch_hidden, - 'BatchGate': batch_gate, - 'BatchCellPreAct': batch_cell_pre_act - }, - attrs={ - 'use_peepholes': use_peepholes, - 'cell_clip': cell_clip, - 'proj_clip': proj_clip, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'cell_activation': cell_activation, - 'candidate_activation': candidate_activation, - 'proj_activation': proj_activation - }) + helper.append_op(type='lstmp', + inputs=inputs, + outputs={ + 'Projection': projection, + 'Cell': cell, + 'BatchHidden': batch_hidden, + 'BatchGate': batch_gate, + 'BatchCellPreAct': batch_cell_pre_act + }, + attrs={ + 'use_peepholes': use_peepholes, + 'cell_clip': cell_clip, + 'proj_clip': proj_clip, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'cell_activation': cell_activation, + 'candidate_activation': candidate_activation, + 'proj_activation': proj_activation + }) return projection, cell @@ -2969,16 +2986,19 @@ def dynamic_gru(input, helper = LayerHelper('gru', **locals()) dtype = helper.input_dtype() - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) - bias = helper.create_parameter( - attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True) + weight = helper.create_parameter(attr=helper.param_attr, + shape=[size, 3 * size], + dtype=dtype) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=[1, 3 * size], + dtype=dtype, + is_bias=True) batch_size = input.shape[0] inputs = {'Input': input, 'Weight': weight, 'Bias': bias} if h_0: assert h_0.shape == ( - batch_size, size - ), 'The shape of h0 should be(batch_size, %d)' % size + batch_size, + size), 'The shape of h0 should be(batch_size, %d)' % size inputs['H0'] = h_0 hidden = helper.create_variable_for_type_inference(dtype) @@ -2986,21 +3006,20 @@ def dynamic_gru(input, batch_reset_hidden_prev = helper.create_variable_for_type_inference(dtype) batch_hidden = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='gru', - inputs=inputs, - outputs={ - 'Hidden': hidden, - 'BatchGate': batch_gate, - 'BatchResetHiddenPrev': batch_reset_hidden_prev, - 'BatchHidden': batch_hidden - }, - attrs={ - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'activation': candidate_activation, - 'origin_mode': origin_mode - }) + helper.append_op(type='gru', + inputs=inputs, + outputs={ + 'Hidden': hidden, + 'BatchGate': batch_gate, + 'BatchResetHiddenPrev': batch_reset_hidden_prev, + 'BatchHidden': batch_hidden + }, + attrs={ + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'activation': candidate_activation, + 'origin_mode': origin_mode + }) return hidden @@ -3120,7 +3139,8 @@ def gru_unit(input, identity=0, sigmoid=1, tanh=2, - relu=3, ) + relu=3, + ) activation = activation_dict[activation] gate_activation = activation_dict[gate_activation] @@ -3129,8 +3149,9 @@ def gru_unit(input, size = size // 3 # create weight - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) + weight = helper.create_parameter(attr=helper.param_attr, + shape=[size, 3 * size], + dtype=dtype) gate = helper.create_variable_for_type_inference(dtype) reset_hidden_pre = helper.create_variable_for_type_inference(dtype) @@ -3139,8 +3160,10 @@ def gru_unit(input, # create bias if helper.bias_attr: bias_size = [1, 3 * size] - bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + bias = helper.create_parameter(attr=helper.bias_attr, + shape=bias_size, + dtype=dtype, + is_bias=True) inputs['Bias'] = bias helper.append_op( @@ -3384,16 +3407,19 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None): sentence_scores = helper.create_variable_for_type_inference( dtype=scores.dtype) - helper.append_op( - type="beam_search_decode", - inputs={"Ids": ids, - "Scores": scores}, - outputs={ - "SentenceIds": sentence_ids, - "SentenceScores": sentence_scores - }, - attrs={"beam_size": beam_size, - "end_id": end_id}) + helper.append_op(type="beam_search_decode", + inputs={ + "Ids": ids, + "Scores": scores + }, + outputs={ + "SentenceIds": sentence_ids, + "SentenceScores": sentence_scores + }, + attrs={ + "beam_size": beam_size, + "end_id": end_id + }) return sentence_ids, sentence_scores @@ -3491,8 +3517,8 @@ def lstm_unit(x_t, check_variable_and_dtype(x_t, 'x_t', ['float32', 'float64'], 'lstm_unit') check_variable_and_dtype(hidden_t_prev, 'hidden_t_prev', ['float32', 'float64'], 'lstm_unit') - check_variable_and_dtype(cell_t_prev, 'cell_t_prev', - ['float32', 'float64'], 'lstm_unit') + check_variable_and_dtype(cell_t_prev, 'cell_t_prev', ['float32', 'float64'], + 'lstm_unit') if len(x_t.shape) != 2: raise ValueError("Rank of x_t must be 2.") @@ -3524,12 +3550,15 @@ def lstm_unit(x_t, c = helper.create_variable_for_type_inference(dtype) h = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='lstm_unit', - inputs={"X": fc_out, - "C_prev": cell_t_prev}, - outputs={"C": c, - "H": h}, - attrs={"forget_bias": forget_bias}) + helper.append_op(type='lstm_unit', + inputs={ + "X": fc_out, + "C_prev": cell_t_prev + }, + outputs={ + "C": c, + "H": h + }, + attrs={"forget_bias": forget_bias}) return h, c diff --git a/python/paddle/fluid/layers/sequence_lod.py b/python/paddle/fluid/layers/sequence_lod.py index 702e38f3d23..4a213a7a146 100644 --- a/python/paddle/fluid/layers/sequence_lod.py +++ b/python/paddle/fluid/layers/sequence_lod.py @@ -155,24 +155,24 @@ def sequence_conv(input, helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() filter_shape = [filter_size * input.shape[1], num_filters] - filter_param = helper.create_parameter( - attr=helper.param_attr, shape=filter_shape, dtype=dtype) + filter_param = helper.create_parameter(attr=helper.param_attr, + shape=filter_shape, + dtype=dtype) pre_bias = helper.create_variable_for_type_inference(dtype) if padding_start is None: padding_start = -int(filter_size // 2) - helper.append_op( - type='sequence_conv', - inputs={ - 'X': [input], - 'Filter': [filter_param], - }, - outputs={"Out": pre_bias}, - attrs={ - 'contextStride': filter_stride, - 'contextStart': padding_start, - 'contextLength': filter_size, - }) + helper.append_op(type='sequence_conv', + inputs={ + 'X': [input], + 'Filter': [filter_param], + }, + outputs={"Out": pre_bias}, + attrs={ + 'contextStride': filter_stride, + 'contextStart': padding_start, + 'contextLength': filter_size, + }) pre_act = helper.append_bias_op(pre_bias) return helper.append_activation(pre_act) @@ -255,11 +255,10 @@ def sequence_softmax(input, use_cudnn=False, name=None): 'sequence_softmax') dtype = helper.input_dtype() softmax_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="sequence_softmax", - inputs={"X": input}, - outputs={"Out": softmax_out}, - attrs={"use_cudnn": use_cudnn}) + helper.append_op(type="sequence_softmax", + inputs={"X": input}, + outputs={"Out": softmax_out}, + attrs={"use_cudnn": use_cudnn}) return softmax_out @@ -359,16 +358,17 @@ def sequence_pool(input, pool_type, is_test=False, pad_value=0.0): pool_out = helper.create_variable_for_type_inference(dtype) max_index = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="sequence_pool", - inputs={"X": input}, - outputs={"Out": pool_out, - "MaxIndex": max_index}, - attrs={ - "pooltype": pool_type.upper(), - "is_test": is_test, - "pad_value": pad_value - }) + helper.append_op(type="sequence_pool", + inputs={"X": input}, + outputs={ + "Out": pool_out, + "MaxIndex": max_index + }, + attrs={ + "pooltype": pool_type.upper(), + "is_test": is_test, + "pad_value": pad_value + }) # when pool_type is max, variable max_index is initialized, # so we stop the gradient explicitly here @@ -437,8 +437,9 @@ def sequence_concat(input, name=None): 'fluid.layers.sequence_concat') out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) - helper.append_op( - type='sequence_concat', inputs={'X': input}, outputs={'Out': [out]}) + helper.append_op(type='sequence_concat', + inputs={'X': input}, + outputs={'Out': [out]}) return out @@ -639,12 +640,13 @@ def sequence_slice(input, offset, length, name=None): offset.stop_gradient = True length.stop_gradient = True - helper.append_op( - type="sequence_slice", - inputs={"X": input, - "Offset": offset, - "Length": length}, - outputs={"Out": out}) + helper.append_op(type="sequence_slice", + inputs={ + "X": input, + "Offset": offset, + "Length": length + }, + outputs={"Out": out}) return out @@ -777,12 +779,13 @@ def sequence_expand(x, y, ref_level=-1, name=None): helper = LayerHelper('sequence_expand', **locals()) dtype = helper.input_dtype(input_param_name='x') tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='sequence_expand', - inputs={'X': x, - 'Y': y}, - outputs={'Out': tmp}, - attrs={'ref_level': ref_level}) + helper.append_op(type='sequence_expand', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': tmp}, + attrs={'ref_level': ref_level}) return tmp @@ -899,11 +902,12 @@ def sequence_expand_as(x, y, name=None): helper = LayerHelper('sequence_expand_as', **locals()) dtype = helper.input_dtype(input_param_name='x') tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='sequence_expand_as', - inputs={'X': x, - 'Y': y}, - outputs={'Out': tmp}) + helper.append_op(type='sequence_expand_as', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': tmp}) return tmp @@ -1012,13 +1016,16 @@ def sequence_pad(x, pad_value, maxlen=None, name=None): if maxlen is None: maxlen = -1 - helper.append_op( - type='sequence_pad', - inputs={'X': x, - 'PadValue': pad_value}, - outputs={'Out': out, - 'Length': length}, - attrs={'padded_length': maxlen}) + helper.append_op(type='sequence_pad', + inputs={ + 'X': x, + 'PadValue': pad_value + }, + outputs={ + 'Out': out, + 'Length': length + }, + attrs={'padded_length': maxlen}) return out, length @@ -1091,11 +1098,12 @@ def sequence_unpad(x, length, name=None): length.stop_gradient = True - helper.append_op( - type='sequence_unpad', - inputs={'X': x, - 'Length': length}, - outputs={'Out': out}) + helper.append_op(type='sequence_unpad', + inputs={ + 'X': x, + 'Length': length + }, + outputs={'Out': out}) return out @@ -1155,11 +1163,10 @@ def sequence_reshape(input, new_dim): ['float32', 'float64', 'int32', 'int64'], 'fluid.layers.sequence_reshape') out = helper.create_variable_for_type_inference(helper.input_dtype()) - helper.append_op( - type='sequence_reshape', - inputs={'X': [input]}, - outputs={'Out': [out]}, - attrs={'new_dim': new_dim}) + helper.append_op(type='sequence_reshape', + inputs={'X': [input]}, + outputs={'Out': [out]}, + attrs={'new_dim': new_dim}) return out @@ -1245,12 +1252,13 @@ def sequence_scatter(input, index, updates, name=None): dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="sequence_scatter", - inputs={"X": input, - "Ids": index, - "Updates": updates}, - outputs={"Out": out}) + helper.append_op(type="sequence_scatter", + inputs={ + "X": input, + "Ids": index, + "Updates": updates + }, + outputs={"Out": out}) return out @@ -1312,14 +1320,15 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None): check_variable_and_dtype(input, 'input', ['int32', 'int64'], 'sequence_enumerate') helper = LayerHelper('sequence_enumerate', **locals()) - out = helper.create_variable_for_type_inference( - helper.input_dtype(), stop_gradient=True) - helper.append_op( - type='sequence_enumerate', - inputs={'X': input}, - outputs={'Out': out}, - attrs={'win_size': win_size, - 'pad_value': pad_value}) + out = helper.create_variable_for_type_inference(helper.input_dtype(), + stop_gradient=True) + helper.append_op(type='sequence_enumerate', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'win_size': win_size, + 'pad_value': pad_value + }) return out @@ -1441,9 +1450,8 @@ def sequence_reverse(x, name=None): 'fluid.layers.sequence_reverse') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="sequence_reverse", - inputs={"X": x}, - outputs={"Y": out}, - attrs=dict()) + helper.append_op(type="sequence_reverse", + inputs={"X": x}, + outputs={"Y": out}, + attrs=dict()) return out diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 3b1fcc15ab9..3c2b0442776 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -90,8 +90,9 @@ def create_tensor(dtype, name=None, persistable=False): 'int64' ], 'create_tensor') helper = LayerHelper("create_tensor", **locals()) - return helper.create_variable( - name=helper.name, dtype=dtype, persistable=persistable) + return helper.create_variable(name=helper.name, + dtype=dtype, + persistable=persistable) def create_parameter(shape, @@ -148,8 +149,7 @@ def create_parameter(shape, helper = LayerHelper("create_parameter", **locals()) if attr is None: attr = ParamAttr(name=name) - return helper.create_parameter(attr, shape, - convert_dtype(dtype), is_bias, + return helper.create_parameter(attr, shape, convert_dtype(dtype), is_bias, default_initializer) @@ -206,15 +206,14 @@ def create_global_var(shape, ], 'create_global_var') helper = LayerHelper("global_var", **locals()) - var = helper.create_global_variable( - dtype=dtype, - shape=shape, - persistable=persistable, - name=name, - stop_gradient=True) - helper.set_variable_initializer( - var, initializer=Constant( - value=float(value), force_cpu=force_cpu)) + var = helper.create_global_variable(dtype=dtype, + shape=shape, + persistable=persistable, + name=name, + stop_gradient=True) + helper.set_variable_initializer(var, + initializer=Constant(value=float(value), + force_cpu=force_cpu)) return var @@ -266,12 +265,13 @@ def cast(x, dtype): helper = LayerHelper('cast', **locals()) out = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=x.stop_gradient) - helper.append_op( - type='cast', - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={'in_dtype': x.dtype, - 'out_dtype': out.dtype}) + helper.append_op(type='cast', + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={ + 'in_dtype': x.dtype, + 'out_dtype': out.dtype + }) return out @@ -352,7 +352,8 @@ def concat(input, axis=0, name=None): 'concat') if x.dtype != input[0].dtype: raise TypeError( - "All the Tensors in the input must have the same data type.") + "All the Tensors in the input must have the same data type." + ) else: input = [input] check_type(axis, 'axis', (int, Variable), 'concat') @@ -360,7 +361,8 @@ def concat(input, axis=0, name=None): if isinstance(axis, Variable): check_dtype( axis.dtype, 'axis', ['int32', 'int64'], 'concat', - "The data type of axis must be int32 or int64 when axis is a Tensor") + "The data type of axis must be int32 or int64 when axis is a Tensor" + ) helper = LayerHelper('concat', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) @@ -373,13 +375,16 @@ def concat(input, axis=0, name=None): assert len(input) == 1, "If the elements of 'input' in concat are Variable(LoDTensorArray), " \ "number of the elements must be 1, but received %s." % len(input) out_index = helper.create_variable_for_type_inference(dtype="int32") - helper.append_op( - type='tensor_array_to_tensor', - inputs={'X': input[0]}, - outputs={'Out': [out], - 'OutIndex': [out_index]}, - attrs={'axis': axis, - 'use_stack': False}) + helper.append_op(type='tensor_array_to_tensor', + inputs={'X': input[0]}, + outputs={ + 'Out': [out], + 'OutIndex': [out_index] + }, + attrs={ + 'axis': axis, + 'use_stack': False + }) else: inputs = {'X': input} attrs = {} @@ -389,8 +394,10 @@ def concat(input, axis=0, name=None): else: attrs['axis'] = axis - helper.append_op( - type='concat', inputs=inputs, outputs={'Out': [out]}, attrs=attrs) + helper.append_op(type='concat', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) return out @@ -493,13 +500,16 @@ def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False): helper = LayerHelper('tensor_array_to_tensor', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) out_index = helper.create_variable_for_type_inference(dtype="int32") - helper.append_op( - type='tensor_array_to_tensor', - inputs={'X': input}, - outputs={'Out': [out], - 'OutIndex': [out_index]}, - attrs={'axis': axis, - 'use_stack': use_stack}) + helper.append_op(type='tensor_array_to_tensor', + inputs={'X': input}, + outputs={ + 'Out': [out], + 'OutIndex': [out_index] + }, + attrs={ + 'axis': axis, + 'use_stack': use_stack + }) return out, out_index @@ -567,14 +577,14 @@ def sums(input, out=None): out = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) else: - check_variable_and_dtype( - out, "out", ['float32', 'float64', 'int32', 'int64'], 'sums') - - helper.append_op( - type='sum', - inputs={'X': input}, - outputs={'Out': out}, - attrs={'use_mkldnn': False}) + check_variable_and_dtype(out, "out", + ['float32', 'float64', 'int32', 'int64'], + 'sums') + + helper.append_op(type='sum', + inputs={'X': input}, + outputs={'Out': out}, + attrs={'use_mkldnn': False}) return out @@ -609,8 +619,9 @@ def assign(input, output=None): result3 = paddle.assign(np.array([[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]], dtype='float32')) # result3 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]] """ helper = LayerHelper('assign', **locals()) - check_type(input, 'input', (Variable, numpy.ndarray, list, tuple, float, - int, bool), 'assign') + check_type(input, 'input', + (Variable, numpy.ndarray, list, tuple, float, int, bool), + 'assign') is_inplace = True if output is not None else False if numpy.isscalar(input) and not isinstance(input, str): @@ -641,9 +652,9 @@ def assign(input, output=None): if output is None: output = helper.create_variable_for_type_inference( dtype=input.dtype) - helper.append_op( - type='assign', inputs={'X': [input]}, - outputs={'Out': [output]}) + helper.append_op(type='assign', + inputs={'X': [input]}, + outputs={'Out': [output]}) elif isinstance(input, numpy.ndarray): # Not support [var, var, ...] currently. if len(input.shape) > 0 and any(isinstance(x, Variable) for x in input): @@ -682,18 +693,16 @@ def assign(input, output=None): if output is None: output = helper.create_variable_for_type_inference(dtype=dtype) if _non_static_mode(): - _C_ops.assign_value(output, 'shape', - list(input.shape), 'dtype', dtype, value_name, - values) + _C_ops.assign_value(output, 'shape', list(input.shape), 'dtype', + dtype, value_name, values) else: - helper.append_op( - type='assign_value', - outputs={'Out': [output]}, - attrs={ - 'dtype': dtype, - 'shape': list(input.shape), - value_name: values - }) + helper.append_op(type='assign_value', + outputs={'Out': [output]}, + attrs={ + 'dtype': dtype, + 'shape': list(input.shape), + value_name: values + }) if is_inplace and _non_static_mode(): output._bump_inplace_version() @@ -769,8 +778,9 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): for item in shape: if not isinstance(item, Variable): shape = list( - map(lambda x: x.numpy().flat[0] if isinstance(x, Variable) else x, - shape)) + map( + lambda x: x.numpy().flat[0] + if isinstance(x, Variable) else x, shape)) break if not isinstance(dtype, core.VarDesc.VarType): @@ -790,10 +800,9 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): else: attrs['str_value'] = str(float(value.numpy().item(0))) - _C_ops.fill_constant(out, 'value', - float(value), 'force_cpu', force_cpu, 'dtype', - out.dtype, 'str_value', attrs['str_value'], - 'shape', shape) + _C_ops.fill_constant(out, 'value', float(value), 'force_cpu', + force_cpu, 'dtype', out.dtype, 'str_value', + attrs['str_value'], 'shape', shape) out.stop_gradient = True return out @@ -816,18 +825,19 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): 'fill_constant') helper = LayerHelper("fill_constant", **locals()) - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='fill_constant') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='fill_constant') if out is None: out = helper.create_variable_for_type_inference(dtype=dtype) attrs['dtype'] = out.dtype - helper.append_op( - type='fill_constant', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs, - stop_gradient=True) + helper.append_op(type='fill_constant', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs, + stop_gradient=True) out.stop_gradient = True return out @@ -882,8 +892,9 @@ def fill_constant_batch_size_like(input, place = _current_expected_place() if force_cpu: place = core.CPUPlace() - out = _C_ops.final_state_full_batch_size_like( - input, shape, dtype, value, input_dim_idx, output_dim_idx, place) + out = _C_ops.final_state_full_batch_size_like(input, shape, dtype, + value, input_dim_idx, + output_dim_idx, place) out.stop_gradient = True return out @@ -901,11 +912,10 @@ def fill_constant_batch_size_like(input, attrs['str_value'] = str(int(value)) else: attrs['str_value'] = str(float(value)) - helper.append_op( - type='fill_constant_batch_size_like', - inputs={'Input': input}, - outputs={'Out': [out]}, - attrs=attrs) + helper.append_op(type='fill_constant_batch_size_like', + inputs={'Input': input}, + outputs={'Out': [out]}, + attrs=attrs) out.stop_gradient = True return out @@ -968,11 +978,10 @@ def argmin(x, axis=0): 'argmin') helper = LayerHelper("arg_min", **locals()) out = helper.create_variable_for_type_inference(VarDesc.VarType.INT64) - helper.append_op( - type='arg_min', - inputs={'X': x}, - outputs={'Out': [out]}, - attrs={'axis': axis}) + helper.append_op(type='arg_min', + inputs={'X': x}, + outputs={'Out': [out]}, + attrs={'axis': axis}) out.stop_gradient = True return out @@ -1031,11 +1040,10 @@ def argmax(x, axis=0): 'argmax') helper = LayerHelper("arg_max", **locals()) out = helper.create_variable_for_type_inference(VarDesc.VarType.INT64) - helper.append_op( - type='arg_max', - inputs={'X': x}, - outputs={'Out': [out]}, - attrs={'axis': axis}) + helper.append_op(type='arg_max', + inputs={'X': x}, + outputs={'Out': [out]}, + attrs={'axis': axis}) out.stop_gradient = True return out @@ -1118,17 +1126,20 @@ def argsort(input, axis=-1, descending=False, name=None): input, 'input', ['float32', 'float64', 'int16', 'int32', 'int64', 'uint8'], 'argsort') helper = LayerHelper("argsort", **locals()) - out = helper.create_variable_for_type_inference( - dtype=input.dtype, stop_gradient=True) - ids = helper.create_variable_for_type_inference( - VarDesc.VarType.INT64, stop_gradient=True) - helper.append_op( - type='argsort', - inputs={'X': input}, - outputs={'Out': out, - 'Indices': ids}, - attrs={'axis': axis, - 'descending': descending}) + out = helper.create_variable_for_type_inference(dtype=input.dtype, + stop_gradient=True) + ids = helper.create_variable_for_type_inference(VarDesc.VarType.INT64, + stop_gradient=True) + helper.append_op(type='argsort', + inputs={'X': input}, + outputs={ + 'Out': out, + 'Indices': ids + }, + attrs={ + 'axis': axis, + 'descending': descending + }) return out, ids @@ -1254,18 +1265,18 @@ def reverse(x, axis): reversed_tensor_array = fluid.layers.reverse(tensor_array, 0) # {[[3, 4, 5]], [[0, 1, 2]]} """ - check_variable_and_dtype( - x, 'x', ('float32', 'float64', 'int32', 'int64', 'uint8'), 'reverse') + check_variable_and_dtype(x, 'x', + ('float32', 'float64', 'int32', 'int64', 'uint8'), + 'reverse') check_type(axis, 'axis', (int, tuple, list), 'reverse') if isinstance(axis, int): axis = [axis] helper = LayerHelper("reverse", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='reverse', - inputs={'X': x}, - outputs={'Out': [out]}, - attrs={'axis': axis}) + helper.append_op(type='reverse', + inputs={'X': x}, + outputs={'Out': [out]}, + attrs={'axis': axis}) return out @@ -1281,12 +1292,13 @@ def save(x, file_path, overwrite=True): error will be thrown. """ helper = LayerHelper("save", **locals()) - helper.append_op( - type="save", - inputs={"input": x}, - outputs={}, - args={"file_path": file_path, - "overwrite": overwrite}) + helper.append_op(type="save", + inputs={"input": x}, + outputs={}, + args={ + "file_path": file_path, + "overwrite": overwrite + }) def save_combine(x, file_path, overwrite=True): @@ -1318,12 +1330,13 @@ def save_combine(x, file_path, overwrite=True): normed = fluid.layers.save_combine([v1, v2], file_path="output") """ helper = LayerHelper("save_combine", **locals()) - helper.append_op( - type="save_combine", - inputs={"input": x}, - outputs={}, - args={"file_path": file_path, - "overwrite": overwrite}) + helper.append_op(type="save_combine", + inputs={"input": x}, + outputs={}, + args={ + "file_path": file_path, + "overwrite": overwrite + }) def load_combine(out, file_path): @@ -1335,11 +1348,10 @@ def load_combine(out, file_path): file_path(str): The path of the disk file. """ helper = LayerHelper("load_combine", **locals()) - helper.append_op( - type="load_combine", - inputs={}, - output={"Out": out}, - args={"file_path": file_path}) + helper.append_op(type="load_combine", + inputs={}, + output={"Out": out}, + args={"file_path": file_path}) def has_inf(x): @@ -1520,12 +1532,13 @@ def range(start, end, step, dtype, name=None): 'range/arange') helper = LayerHelper('range', **locals()) out = helper.create_variable_for_type_inference(dtype, shape=out_shape) - helper.append_op( - type='range', - inputs={'Start': start, - 'End': end, - 'Step': step}, - outputs={'Out': out}) + helper.append_op(type='range', + inputs={ + 'Start': start, + 'End': end, + 'Step': step + }, + outputs={'Out': out}) out.stop_gradient = True if out_shape is not None: out.desc.set_shape(out_shape) @@ -1605,10 +1618,10 @@ def linspace(start, stop, num, dtype=None, name=None): check_dtype(num.dtype, 'num', ['int32'], 'linspace') check_dtype(dtype, 'dtype', ['int32', 'int64', 'float32', 'float64'], 'linspace') - if ((stop_dtype == "float64" or start_dtype == "float64") and - out_dtype in ["float32", "int32"]) or ((stop_dtype == "int64" or - start_dtype == "int64") and - out_dtype == "int32"): + if ((stop_dtype == "float64" or start_dtype == "float64") + and out_dtype in ["float32", "int32"]) or ( + (stop_dtype == "int64" or start_dtype == "int64") + and out_dtype == "int32"): raise ValueError( "The dtype of start/stop is {}/{} but the attr(dtype) of linspace is {}, " "which may cause data type overflows. Please reset attr(dtype) of linspace." @@ -1616,13 +1629,14 @@ def linspace(start, stop, num, dtype=None, name=None): out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='linspace', - inputs={'Start': tensor_start, - 'Stop': tensor_stop, - 'Num': tensor_num}, - attrs={'dtype': dtype}, - outputs={'Out': [out]}) + helper.append_op(type='linspace', + inputs={ + 'Start': tensor_start, + 'Stop': tensor_stop, + 'Num': tensor_num + }, + attrs={'dtype': dtype}, + outputs={'Out': [out]}) if isinstance(num, int): out.desc.set_shape((num, )) return out @@ -1655,8 +1669,9 @@ def zeros_like(x, out=None): """ - check_variable_and_dtype( - x, "x", ['bool', 'float32', 'float64', 'int32', 'int64'], 'ones_like') + check_variable_and_dtype(x, "x", + ['bool', 'float32', 'float64', 'int32', 'int64'], + 'ones_like') helper = LayerHelper("zeros_like", **locals()) if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) @@ -1665,8 +1680,9 @@ def zeros_like(x, out=None): out, "out", ['bool', 'float32', 'float64', 'int32', 'int64'], 'zeros_like') - helper.append_op( - type='fill_zeros_like', inputs={'X': [x]}, outputs={'Out': [out]}) + helper.append_op(type='fill_zeros_like', + inputs={'X': [x]}, + outputs={'Out': [out]}) out.stop_gradient = True return out @@ -1712,8 +1728,9 @@ def diag(diagonal): out = helper.create_variable_for_type_inference(dtype=diagonal.dtype) - helper.append_op( - type='diag', inputs={'Diagonal': [diagonal]}, outputs={'Out': [out]}) + helper.append_op(type='diag', + inputs={'Diagonal': [diagonal]}, + outputs={'Out': [out]}) out.stop_gradient = True return out @@ -1782,16 +1799,15 @@ def eye(num_rows, if not isinstance(num_rows, int) or num_rows < 0: raise TypeError("num_rows should be a non-negative int") out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='eye', - inputs={}, - outputs={'Out': [out]}, - attrs={ - 'num_rows': num_rows, - 'num_columns': num_columns, - 'dtype': dtype - }, - stop_gradient=True) + helper.append_op(type='eye', + inputs={}, + outputs={'Out': [out]}, + attrs={ + 'num_rows': num_rows, + 'num_columns': num_columns, + 'dtype': dtype + }, + stop_gradient=True) if batch_shape is not None: re_shape = [1] * len(batch_shape) @@ -1838,8 +1854,9 @@ def ones_like(x, out=None): data = fluid.layers.ones_like(x) # [1.0, 1.0, 1.0] """ - check_variable_and_dtype( - x, "x", ['bool', 'float32', 'float64', 'int32', 'int64'], 'ones_like') + check_variable_and_dtype(x, "x", + ['bool', 'float32', 'float64', 'int32', 'int64'], + 'ones_like') helper = LayerHelper("ones_like", **locals()) if out is None: @@ -1848,11 +1865,10 @@ def ones_like(x, out=None): check_variable_and_dtype( out, "out", ['bool', 'float32', 'float64', 'int32', 'int64'], 'ones_like') - helper.append_op( - type='fill_any_like', - inputs={'X': [x]}, - attrs={'value': 1.0}, - outputs={'Out': [out]}) + helper.append_op(type='fill_any_like', + inputs={'X': [x]}, + attrs={'value': 1.0}, + outputs={'Out': [out]}) return out diff --git a/python/paddle/fluid/layers/utils.py b/python/paddle/fluid/layers/utils.py index 5d781a437fe..ca11727221f 100644 --- a/python/paddle/fluid/layers/utils.py +++ b/python/paddle/fluid/layers/utils.py @@ -48,14 +48,16 @@ def convert_to_list(value, n, name, dtype=int): passed. """ if isinstance(value, dtype): - return [value, ] * n + return [ + value, + ] * n else: try: value_list = list(value) except TypeError: raise ValueError("The " + name + - "'s type must be list or tuple. Received: " + str( - value)) + "'s type must be list or tuple. Received: " + + str(value)) if len(value_list) != n: raise ValueError("The " + name + "'s length must be " + str(n) + ". Received: " + str(value)) @@ -63,12 +65,12 @@ def convert_to_list(value, n, name, dtype=int): try: dtype(single_value) except (ValueError, TypeError): - raise ValueError( - "The " + name + "'s type must be a list or tuple of " + str( - n) + " " + str(dtype) + " . Received: " + str( - value) + " " - "including element " + str(single_value) + " of type" + " " - + str(type(single_value))) + raise ValueError("The " + name + + "'s type must be a list or tuple of " + + str(n) + " " + str(dtype) + " . Received: " + + str(value) + " " + "including element " + str(single_value) + + " of type" + " " + str(type(single_value))) return value_list @@ -148,11 +150,11 @@ def _sequence_like(instance, args): # ordered and plain dicts (e.g., flattening a dict but using a # corresponding `OrderedDict` to pack it back). result = dict(zip(_sorted(instance), args)) - return type(instance)((key, result[key]) - for key in six.iterkeys(instance)) - elif (isinstance(instance, tuple) and hasattr(instance, "_fields") and - isinstance(instance._fields, Sequence) and - all(isinstance(f, six.string_types) for f in instance._fields)): + return type(instance)( + (key, result[key]) for key in six.iterkeys(instance)) + elif (isinstance(instance, tuple) and hasattr(instance, "_fields") + and isinstance(instance._fields, Sequence) + and all(isinstance(f, six.string_types) for f in instance._fields)): # This is a namedtuple return type(instance)(*args) else: @@ -332,9 +334,9 @@ def get_shape_tensor_inputs(inputs, attrs, shape, op_type): shape = cast(shape, 'int32') inputs["ShapeTensor"] = shape elif isinstance(shape, (list, tuple)): - assert len(shape) > 0, ( - "The size of 'shape' in" + op_type + " can't be zero, " - "but received %s." % len(shape)) + assert len(shape) > 0, ("The size of 'shape' in" + op_type + + " can't be zero, " + "but received %s." % len(shape)) attrs["shape"] = _get_attr_shape(shape) if _contain_var(shape): inputs['ShapeTensorList'] = _get_shape_tensor(shape) @@ -366,8 +368,8 @@ def convert_shape_to_list(shape): """ if isinstance(shape, (list, tuple)): shape = list( - map(lambda x: x.numpy().flat[0] if isinstance(x, Variable) else x, - shape)) + map(lambda x: x.numpy().flat[0] + if isinstance(x, Variable) else x, shape)) else: shape = shape.numpy().astype(int).tolist() return shape @@ -434,8 +436,8 @@ def try_get_constant_shape_from_tensor(shape_tensor): if shape_tensor.op is not None: generate_op = shape_tensor.op if generate_op.type == 'shape': - var = shape_tensor.block.vars[generate_op.input_arg_names[ - 0]] + var = shape_tensor.block.vars[ + generate_op.input_arg_names[0]] return var.shape except: return None diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 1c9a1709d3e..ffc94941294 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -74,11 +74,10 @@ def create_lod_tensor(data, recursive_seq_lens, place): elif isinstance(data, list): # dtype and shape are not important here, # we only want to reuse code of DataToLoDTensorConverter - converter = DataToLoDTensorConverter( - place=place, - lod_level=len(recursive_seq_lens), - shape=[], - dtype=core.VarDesc.VarType.FP32) + converter = DataToLoDTensorConverter(place=place, + lod_level=len(recursive_seq_lens), + shape=[], + dtype=core.VarDesc.VarType.FP32) new_recursive_seq_lens = [] for seq in data: @@ -114,7 +113,7 @@ def create_lod_tensor(data, recursive_seq_lens, place): def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, high): """ - :api_attr: Static Graph + :api_attr: Static Graph Create a LoDTensor containing random integers. diff --git a/python/paddle/fluid/memory_analysis.py b/python/paddle/fluid/memory_analysis.py index 0bcfeed3516..de9a260ada8 100644 --- a/python/paddle/fluid/memory_analysis.py +++ b/python/paddle/fluid/memory_analysis.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,8 +31,8 @@ def get_var_and_memory_size(block, var_name, batch_size=None): assert not has_none shape[i] = batch_size has_none = True - assert all( - [s >= 0 for s in shape]), "shape {} is not deterministic".format(shape) + assert all([s >= 0 + for s in shape]), "shape {} is not deterministic".format(shape) mem_size = int(np.prod(shape)) * core.size_of_dtype(var.dtype) return var, mem_size @@ -44,7 +44,7 @@ def pre_allocate_memory(size, place): del t -# NOTE: does not consider inplace yet. +# NOTE: does not consider inplace yet. def get_max_memory_info(program, batch_size=None): assert program.num_blocks == 1, "only support to analysis program with only one block" cur_tmp_mem = 0 diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index a3b61f2e911..9ee27e0c3cf 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -905,28 +905,30 @@ class DetectionMAP(object): label = layers.concat([gt_label, gt_box], axis=1) # calculate mean average precision (mAP) of current mini-batch - map = detection.detection_map( - input, - label, - class_num, - background_label, - overlap_threshold=overlap_threshold, - evaluate_difficult=evaluate_difficult, - ap_version=ap_version) + map = detection.detection_map(input, + label, + class_num, + background_label, + overlap_threshold=overlap_threshold, + evaluate_difficult=evaluate_difficult, + ap_version=ap_version) states = [] states.append( - self._create_state( - dtype='int32', shape=None, suffix='accum_pos_count')) + self._create_state(dtype='int32', + shape=None, + suffix='accum_pos_count')) states.append( - self._create_state( - dtype='float32', shape=None, suffix='accum_true_pos')) + self._create_state(dtype='float32', + shape=None, + suffix='accum_true_pos')) states.append( - self._create_state( - dtype='float32', shape=None, suffix='accum_false_pos')) + self._create_state(dtype='float32', + shape=None, + suffix='accum_false_pos')) var = self._create_state(dtype='int32', shape=[1], suffix='has_state') - self.helper.set_variable_initializer( - var, initializer=Constant(value=int(0))) + self.helper.set_variable_initializer(var, + initializer=Constant(value=int(0))) self.has_state = var # calculate accumulative mAP @@ -942,11 +944,10 @@ class DetectionMAP(object): out_states=states, ap_version=ap_version) - layers.fill_constant( - shape=self.has_state.shape, - value=1, - dtype=self.has_state.dtype, - out=self.has_state) + layers.fill_constant(shape=self.has_state.shape, + value=1, + dtype=self.has_state.dtype, + out=self.has_state) self.cur_map = map self.accum_map = accum_map @@ -960,11 +961,11 @@ class DetectionMAP(object): shape(tuple|list): the shape of state Returns: State variable """ - state = self.helper.create_variable( - name="_".join([unique_name.generate(self.helper.name), suffix]), - persistable=True, - dtype=dtype, - shape=shape) + state = self.helper.create_variable(name="_".join( + [unique_name.generate(self.helper.name), suffix]), + persistable=True, + dtype=dtype, + shape=shape) return state def get_map_var(self): @@ -986,18 +987,19 @@ class DetectionMAP(object): def _clone_var_(block, var): assert isinstance(var, Variable) - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - persistable=var.persistable) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + persistable=var.persistable) if reset_program is None: reset_program = Program() with program_guard(main_program=reset_program): var = _clone_var_(reset_program.current_block(), self.has_state) - layers.fill_constant( - shape=var.shape, value=0, dtype=var.dtype, out=var) + layers.fill_constant(shape=var.shape, + value=0, + dtype=var.dtype, + out=var) executor.run(reset_program) diff --git a/python/paddle/fluid/multiprocess_utils.py b/python/paddle/fluid/multiprocess_utils.py index d622172dced..73bba0069cd 100644 --- a/python/paddle/fluid/multiprocess_utils.py +++ b/python/paddle/fluid/multiprocess_utils.py @@ -64,6 +64,7 @@ class CleanupFuncRegistrar(): @classmethod def register(cls, function, signals=[]): + def _func_exectuor(): if function not in cls._executed_func_set: try: @@ -92,8 +93,8 @@ class CleanupFuncRegistrar(): for sig in signals: orig_handler = signal.signal(sig, _signal_handler) if orig_handler not in (signal.SIG_DFL, signal.SIG_IGN): - if (sig == signal.SIGINT and - orig_handler is signal.default_int_handler): + if (sig == signal.SIGINT + and orig_handler is signal.default_int_handler): continue if orig_handler not in cls._registered_func_set: atexit.register(orig_handler) diff --git a/python/paddle/fluid/net_drawer.py b/python/paddle/fluid/net_drawer.py index fd8f6eaf364..a7323d1ead2 100644 --- a/python/paddle/fluid/net_drawer.py +++ b/python/paddle/fluid/net_drawer.py @@ -46,12 +46,15 @@ OP_STYLE = { VAR_STYLE = {} -GRAPH_STYLE = {"rankdir": "TB", } +GRAPH_STYLE = { + "rankdir": "TB", +} GRAPH_ID = 0 def unique_id(): + def generator(): GRAPH_ID += 1 return GRAPH_ID @@ -112,13 +115,12 @@ def draw_graph(startup_program, main_program, **kwargs): filename = kwargs.get("filename") if filename == None: filename = str(graph_id) + ".gv" - g = Graph( - name=str(graph_id), - filename=filename, - graph_attr=GRAPH_STYLE, - node_attr=OP_STYLE, - edge_attr=VAR_STYLE, - **kwargs) + g = Graph(name=str(graph_id), + filename=filename, + graph_attr=GRAPH_STYLE, + node_attr=OP_STYLE, + edge_attr=VAR_STYLE, + **kwargs) var_dict = {} parse_graph(startup_program, g, var_dict) diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index e8f8bdd3f9a..abafb48d866 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -117,27 +117,25 @@ def simple_img_conv_pool(input, pool_stride=2, act="relu") """ - conv_out = layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=param_attr, - bias_attr=bias_attr, - act=act, - use_cudnn=use_cudnn) - - pool_out = layers.pool2d( - input=conv_out, - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) + conv_out = layers.conv2d(input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=param_attr, + bias_attr=bias_attr, + act=act, + use_cudnn=use_cudnn) + + pool_out = layers.pool2d(input=conv_out, + pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) return pool_out @@ -235,14 +233,13 @@ def img_conv_group(input, if conv_with_batchnorm[i]: local_conv_act = None - tmp = layers.conv2d( - input=tmp, - num_filters=conv_num_filter[i], - filter_size=conv_filter_size[i], - padding=conv_padding[i], - param_attr=param_attr[i], - act=local_conv_act, - use_cudnn=use_cudnn) + tmp = layers.conv2d(input=tmp, + num_filters=conv_num_filter[i], + filter_size=conv_filter_size[i], + padding=conv_padding[i], + param_attr=param_attr[i], + act=local_conv_act, + use_cudnn=use_cudnn) if conv_with_batchnorm[i]: tmp = layers.batch_norm(input=tmp, act=conv_act) @@ -250,12 +247,11 @@ def img_conv_group(input, if abs(drop_rate) > 1e-5: tmp = layers.dropout(x=tmp, dropout_prob=drop_rate) - pool_out = layers.pool2d( - input=tmp, - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - use_cudnn=use_cudnn) + pool_out = layers.pool2d(input=tmp, + pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + use_cudnn=use_cudnn) return pool_out @@ -321,13 +317,12 @@ def sequence_conv_pool(input, """ check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'input') - conv_out = layers.sequence_conv( - input=input, - num_filters=num_filters, - filter_size=filter_size, - param_attr=param_attr, - bias_attr=bias_attr, - act=act) + conv_out = layers.sequence_conv(input=input, + num_filters=num_filters, + filter_size=filter_size, + param_attr=param_attr, + bias_attr=bias_attr, + act=act) pool_out = layers.sequence_pool(input=conv_out, pool_type=pool_type) return pool_out @@ -468,8 +463,8 @@ def scaled_dot_product_attention(queries, "The dtype of keys, values and queries should be the same." "But received queries.dtype = %s, " " keys.dtype = %s, values.dtype) = %s." % - (convert_dtype(queries.dtype), convert_dtype(keys.dtype), - convert_dtype(values.dtype))) + (convert_dtype(queries.dtype), convert_dtype( + keys.dtype), convert_dtype(values.dtype))) if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): raise ValueError( @@ -542,9 +537,9 @@ def scaled_dot_product_attention(queries, # reshape the 3-D input: [batch_size, max_sequence_length, hidden_dim] # into a 4-D output: # [batch_size, max_sequence_length, num_heads, hidden_size_per_head]. - reshaped = layers.reshape( - x=x, - shape=list(x.shape[:-1]) + [num_heads, hidden_size // num_heads]) + reshaped = layers.reshape(x=x, + shape=list(x.shape[:-1]) + + [num_heads, hidden_size // num_heads]) # permute the dimensions into: # [batch_size, num_heads, max_sequence_len, hidden_size_per_head] @@ -569,13 +564,12 @@ def scaled_dot_product_attention(queries, raise ValueError("Input(x) should be a 4-D Tensor.") trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - return layers.reshape( - x=trans_x, - shape=list( - map(int, [ - trans_x.shape[0], trans_x.shape[1], trans_x.shape[2] * - trans_x.shape[3] - ]))) + return layers.reshape(x=trans_x, + shape=list( + map(int, [ + trans_x.shape[0], trans_x.shape[1], + trans_x.shape[2] * trans_x.shape[3] + ]))) q, k, v = __compute_qkv(queries, keys, values, num_heads) @@ -587,12 +581,13 @@ def scaled_dot_product_attention(queries, scaled_q = layers.scale(x=q, scale=key_dim_per_head**-0.5) product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - weights = layers.reshape( - x=layers.reshape( - x=product, shape=[-1, product.shape[-1]], act="softmax"), - shape=product.shape) + weights = layers.reshape(x=layers.reshape(x=product, + shape=[-1, product.shape[-1]], + act="softmax"), + shape=product.shape) if dropout_rate: - weights = layers.dropout( - weights, dropout_prob=dropout_rate, is_test=False) + weights = layers.dropout(weights, + dropout_prob=dropout_rate, + is_test=False) ctx_multiheads = layers.matmul(weights, v) return __combine_heads(ctx_multiheads) diff --git a/python/paddle/fluid/op.py b/python/paddle/fluid/op.py index ee61ec1c3da..d5be4423775 100644 --- a/python/paddle/fluid/op.py +++ b/python/paddle/fluid/op.py @@ -126,8 +126,8 @@ class OpDescCreationMethod(object): new_attr.longs.extend(user_defined_attr) else: raise NotImplementedError( - "A not supported attribute type: %s." % ( - str(attr.type))) + "A not supported attribute type: %s." % + (str(attr.type))) return op_desc @@ -144,6 +144,7 @@ class OpDescCreationMethod(object): class OpInfo(object): + def __init__(self, name, method, inputs, outputs, attrs): self.name = name self.method = method @@ -162,15 +163,17 @@ def create_op_creation_method(op_proto): opdesc = method(*args, **kwargs) return core.Operator.create(opdesc.SerializeToString()) - return OpInfo( - method=__impl__, - name=op_proto.type, - inputs=[(var.name, var.duplicable) for var in op_proto.inputs], - outputs=[(var.name, var.duplicable) for var in op_proto.outputs], - attrs=[attr.name for attr in op_proto.attrs]) + return OpInfo(method=__impl__, + name=op_proto.type, + inputs=[(var.name, var.duplicable) + for var in op_proto.inputs], + outputs=[(var.name, var.duplicable) + for var in op_proto.outputs], + attrs=[attr.name for attr in op_proto.attrs]) class OperatorFactory(object): + def __init__(self): self.op_methods = dict() diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 49fb5399d8a..20e39e89f30 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -130,8 +130,8 @@ class Optimizer(object): # program -> Variable(learning_rate) self._learning_rate_map = dict() if isinstance(self._learning_rate, framework.Variable): - self._learning_rate_map[framework.default_main_program( - )] = self._learning_rate + self._learning_rate_map[ + framework.default_main_program()] = self._learning_rate # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -145,7 +145,7 @@ class Optimizer(object): self._param_device_map = dict() # NOTE(zhiqiu): sometimes we want to add some variables(Tenosr) to the optimizer for a specific optimization, # for example, we want to pass 'found_inf' to adam optimizer so it can skip update when found_inf is True. - # And these variables should not be the parameters of Optimizer's construnctor (because not commonly used). + # And these variables should not be the parameters of Optimizer's construnctor (because not commonly used). # Use _auxiliary_vars together with _set_auxiliary_var/_get_auxiliary_var to achieve that. self._auxiliary_vars = dict() @@ -187,11 +187,14 @@ class Optimizer(object): if not isinstance(self._learning_rate, _LearningRateEpochDecay): var_tmp = None - var_temp = framework._varbase_creator( - None, name='global_step', dtype='int32') + var_temp = framework._varbase_creator(None, + name='global_step', + dtype='int32') - tensor.fill_constant( - [1], "int32", self._learning_rate.step_num, out=var_temp) + tensor.fill_constant([1], + "int32", + self._learning_rate.step_num, + out=var_temp) state_dict['global_step'] = var_temp return state_dict @@ -326,8 +329,8 @@ class Optimizer(object): main_prog = framework.default_main_program() main_prog.lr_sheduler = self._learning_rate main_prog.lr_var = lr_var - self._learning_rate_map[framework.default_main_program( - )] = lr_var + self._learning_rate_map[ + framework.default_main_program()] = lr_var lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( @@ -351,8 +354,8 @@ class Optimizer(object): persistable=True) # get learning rate Variable from LearningRateDecay elif isinstance(self._learning_rate, LearningRateDecay): - self._learning_rate_map[framework.default_main_program( - )] = self._learning_rate() + self._learning_rate_map[ + framework.default_main_program()] = self._learning_rate() else: raise TypeError( "optimizer's learning rate must be float or LearningRateDecay" @@ -370,13 +373,13 @@ class Optimizer(object): ) # create learning rate in the current main program - self._learning_rate_map[framework.default_main_program( - )] = layers.create_global_var( - name=unique_name.generate("learning_rate"), - shape=[1], - value=float(self._learning_rate), - dtype='float32' if self._dtype is None else self._dtype, - persistable=True) + self._learning_rate_map[ + framework.default_main_program()] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32' if self._dtype is None else self._dtype, + persistable=True) @framework.dygraph_only def set_lr(self, value): @@ -441,22 +444,20 @@ class Optimizer(object): current_lr = self._global_learning_rate() if current_lr is not None: if framework._non_static_mode(): - _C_ops.fill_constant(current_lr, 'value', - float(value), 'dtype', - current_lr.dtype, 'shape', + _C_ops.fill_constant(current_lr, 'value', float(value), + 'dtype', current_lr.dtype, 'shape', list(current_lr.shape)) else: global_block = framework.default_main_program( ).global_block() - global_block.append_op( - type='fill_constant', - outputs={'Out': [current_lr]}, - attrs={ - 'dtype': current_lr.dtype, - 'shape': list(current_lr.shape), - 'value': float(value) - }, - stop_gradient=True) + global_block.append_op(type='fill_constant', + outputs={'Out': [current_lr]}, + attrs={ + 'dtype': current_lr.dtype, + 'shape': list(current_lr.shape), + 'value': float(value) + }, + stop_gradient=True) else: assert len(value.shape) == 1 and value.shape[ 0] == 1, "optimizer's learning rate must be 1-D Tensor with shape[1]" @@ -596,12 +597,13 @@ class Optimizer(object): """ if self._name is not None: name = self._name + "_" + name - if (name in self._accumulators and - param.name in self._accumulators[name]): + if (name in self._accumulators + and param.name in self._accumulators[name]): if framework._non_static_mode(): return self._accumulators[name][param.name] - raise Exception("Accumulator {} already exists for parameter {}". - format(name, param.name)) + raise Exception( + "Accumulator {} already exists for parameter {}".format( + name, param.name)) if shape == None: shape = param.shape assert isinstance(self.helper, LayerHelper) @@ -615,8 +617,8 @@ class Optimizer(object): persistable=True, dtype=dtype or param.dtype, type=core.VarDesc.VarType.LOD_TENSOR - if framework._non_static_mode() else (param.type - if type is None else type), + if framework._non_static_mode() else + (param.type if type is None else type), shape=shape, belong_to_optimizer=True) if device is None: @@ -700,10 +702,11 @@ class Optimizer(object): """ if self._name is not None: name = self._name + "_" + name - if (name not in self._accumulators or - param.name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, param.name)) + if (name not in self._accumulators + or param.name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, param.name)) return self._accumulators[name][param.name] def _get_global_accumulator(self, name): @@ -795,8 +798,8 @@ class Optimizer(object): with param_and_grad[0].block.program._optimized_guard( param_and_grad), name_scope("optimizer"): if param_and_grad[0].trainable is True: - device = self._get_device_for_param(param_and_grad[0] - .name) + device = self._get_device_for_param( + param_and_grad[0].name) with device_guard(device): optimize_op = self._append_optimize_op( target_block, param_and_grad) @@ -925,10 +928,10 @@ class Optimizer(object): Function helper of append_regularization_ops. """ # If no gradient or no regularization is specified, then we don't need to do anything - if grad is None or ((not hasattr(param, 'regularizer') or - (hasattr(param, 'regularizer') and - param.regularizer is None)) and - regularization is None): + if grad is None or ( + (not hasattr(param, 'regularizer') or + (hasattr(param, 'regularizer') and param.regularizer is None)) + and regularization is None): return grad regularization_term = None if hasattr(param, 'regularizer') and param.regularizer is not None: @@ -987,8 +990,8 @@ class Optimizer(object): params_and_grads = [] if framework._non_static_mode(): for param, grad in parameters_and_grads: - new_grad = self._create_regularization_of_grad(param, grad, - regularization) + new_grad = self._create_regularization_of_grad( + param, grad, regularization) params_and_grads.append((param, new_grad)) else: repeate_regularizer = False @@ -1048,40 +1051,38 @@ class Optimizer(object): belong_to_optimizer=True) with program_guard(default_main_program()): - block.append_op( - type="coalesce_tensor", - inputs={"Input": need_flatten_params}, - outputs={ - "Output": need_flatten_params, - "FusedOutput": flatten_param - }, - attrs={ - "copy_data": True, - "use_align": True, - "align_size": self._align_size, - "dtype": need_flatten_params[0].dtype - }) + block.append_op(type="coalesce_tensor", + inputs={"Input": need_flatten_params}, + outputs={ + "Output": need_flatten_params, + "FusedOutput": flatten_param + }, + attrs={ + "copy_data": True, + "use_align": True, + "align_size": self._align_size, + "dtype": need_flatten_params[0].dtype + }) - block.append_op( - type="coalesce_tensor", - inputs={"Input": need_flatten_grads}, - outputs={ - "Output": need_flatten_grads, - "FusedOutput": flatten_grad - }, - attrs={ - "copy_data": True, - "use_align": True, - "align_size": self._align_size, - "dtype": need_flatten_grads[0].dtype - }) + block.append_op(type="coalesce_tensor", + inputs={"Input": need_flatten_grads}, + outputs={ + "Output": need_flatten_grads, + "FusedOutput": flatten_grad + }, + attrs={ + "copy_data": True, + "use_align": True, + "align_size": self._align_size, + "dtype": need_flatten_grads[0].dtype + }) #NOTE(zhiqiu): the initializer should be set after coalesce_tensor op, # so the shape of flatten_param and flatten_grad will be inferred. - self.helper.set_variable_initializer( - flatten_param, initializer=Constant(0.0)) - self.helper.set_variable_initializer( - flatten_grad, initializer=Constant(0.0)) + self.helper.set_variable_initializer(flatten_param, + initializer=Constant(0.0)) + self.helper.set_variable_initializer(flatten_grad, + initializer=Constant(0.0)) return [(flatten_param, flatten_grad)] @@ -1233,14 +1234,14 @@ class Optimizer(object): parameter_list = parameter_list if parameter_list \ else self._parameter_list - params_grads = self.backward( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) - optimize_ops = self.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + optimize_ops = self.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) return optimize_ops, params_grads @@ -1309,12 +1310,11 @@ class SGDOptimizer(Optimizer): multi_precision=False, name=None): assert learning_rate is not None - super(SGDOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(SGDOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "sgd" self._use_mkldnn = False self._multi_precision = multi_precision @@ -1328,21 +1328,19 @@ class SGDOptimizer(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -1396,12 +1394,11 @@ class SGDOptimizer(Optimizer): inputs["MasterParam"] = master_weight outputs["MasterParamOut"] = master_weight - sgd_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + sgd_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return sgd_op @@ -1488,12 +1485,11 @@ class MomentumOptimizer(Optimizer): name=None): assert learning_rate is not None assert momentum is not None - super(MomentumOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(MomentumOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "momentum" self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -1512,10 +1508,11 @@ class MomentumOptimizer(Optimizer): lr = self._create_param_lr(param_and_grad) master_weight = None if framework._non_static_mode(): - _, _, _ = _C_ops.momentum( - param_and_grad[0], param_and_grad[1], velocity_acc, lr, - master_weight, param_and_grad[0], velocity_acc, master_weight, - 'mu', self._momentum, 'use_nesterov', self._use_nesterov) + _, _, _ = _C_ops.momentum(param_and_grad[0], param_and_grad[1], + velocity_acc, lr, master_weight, + param_and_grad[0], velocity_acc, + master_weight, 'mu', self._momentum, + 'use_nesterov', self._use_nesterov) return None attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov} @@ -1531,12 +1528,11 @@ class MomentumOptimizer(Optimizer): "VelocityOut": [velocity_acc] } # create the momentum optimize op - momentum_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + momentum_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return momentum_op @@ -1629,12 +1625,12 @@ class DGCMomentumOptimizer(Optimizer): assert learning_rate is not None assert momentum is not None - super(DGCMomentumOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(DGCMomentumOptimizer, + self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "dgc_momentum" self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -1719,12 +1715,11 @@ class DGCMomentumOptimizer(Optimizer): attrs.update({"rampup_begin_step": float(self._rampup_begin_step)}) # create the dgc momentum optimize op - dgc_momentum_op = block.append_op( - type=type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + dgc_momentum_op = block.append_op(type=type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return dgc_momentum_op def _add_auto_increment_var(self, counter_name, begin, step=1): @@ -1732,10 +1727,10 @@ class DGCMomentumOptimizer(Optimizer): counter, is_new_var = helper.create_or_get_global_variable( name=counter_name, dtype='float32', shape=[1], persistable=True) if is_new_var: - helper.set_variable_initializer( - counter, - initializer=Constant( - value=float(begin - 1), force_cpu=True)) + helper.set_variable_initializer(counter, + initializer=Constant( + value=float(begin - 1), + force_cpu=True)) helper.main_program.global_block()._prepend_op( type='increment', inputs={'X': [counter]}, @@ -1751,10 +1746,10 @@ class DGCMomentumOptimizer(Optimizer): counter, is_new_var = helper.create_or_get_global_variable( name=name, dtype='float32', shape=[1], persistable=True) if is_new_var: - helper.set_variable_initializer( - counter, - initializer=Constant( - value=float(value), force_cpu=True)) + helper.set_variable_initializer(counter, + initializer=Constant( + value=float(value), + force_cpu=True)) counter.stop_gradient = True return counter @@ -1767,8 +1762,8 @@ class DGCMomentumOptimizer(Optimizer): self._global_step_var = self._add_auto_increment_var( counter_name=core.dgc.kDGCCounterName(), begin=0) - self._nranks_var = self._add_nranks_var( - name=core.dgc.kDGCNRanksName(), value=-1) + self._nranks_var = self._add_nranks_var(name=core.dgc.kDGCNRanksName(), + value=-1) # rampup begin step var for all_reduce_op_handle self._rampup_begin_step_var = tensor.create_global_var( @@ -1790,29 +1785,29 @@ class DGCMomentumOptimizer(Optimizer): v_var = self._add_accumulator(self._v_velocity_acc_str, param_var) - k_var = tensor.create_global_var( - shape=[1], - dtype=param_var.dtype, - persistable=True, - name=param_var.name + core.dgc.kDGCKName(), - value=0.0, - force_cpu=True) - - encoded_var = tensor.create_global_var( - shape=[1], - dtype=param_var.dtype, - persistable=True, - name=param_var.name + core.dgc.kDGCEncodedName(), - value=0.0, - force_cpu=False) - - gather_var = tensor.create_global_var( - shape=[1], - dtype=param_var.dtype, - persistable=True, - name=param_var.name + core.dgc.kDGCGatherName(), - value=0.0, - force_cpu=False) + k_var = tensor.create_global_var(shape=[1], + dtype=param_var.dtype, + persistable=True, + name=param_var.name + + core.dgc.kDGCKName(), + value=0.0, + force_cpu=True) + + encoded_var = tensor.create_global_var(shape=[1], + dtype=param_var.dtype, + persistable=True, + name=param_var.name + + core.dgc.kDGCEncodedName(), + value=0.0, + force_cpu=False) + + gather_var = tensor.create_global_var(shape=[1], + dtype=param_var.dtype, + persistable=True, + name=param_var.name + + core.dgc.kDGCGatherName(), + value=0.0, + force_cpu=False) # del back oprolevarname op_maker = core.op_proto_and_checker_maker @@ -1855,24 +1850,28 @@ class DGCMomentumOptimizer(Optimizer): name = unique_name.generate_with_ignorable_key(".".join( [helper.name, 'tmp'])) - out = helper.create_variable( - type=x.type, name=name, dtype=x.dtype, persistable=False) - - helper.append_op( - type="dgc_clip_by_norm", - inputs={"X": x, - "current_step": self._global_step_var}, - attrs={ - "max_norm": max_norm, - "rampup_begin_step": float(self._rampup_begin_step) - }, - outputs={"Out": out}) + out = helper.create_variable(type=x.type, + name=name, + dtype=x.dtype, + persistable=False) + + helper.append_op(type="dgc_clip_by_norm", + inputs={ + "X": x, + "current_step": self._global_step_var + }, + attrs={ + "max_norm": max_norm, + "rampup_begin_step": float(self._rampup_begin_step) + }, + outputs={"Out": out}) return out def _append_clip_norm(self, grad_var, clip_norm): with grad_var.block.program._backward_role_guard(): - return self._clip_by_norm( - x=grad_var, max_norm=clip_norm, name=grad_var.name) + return self._clip_by_norm(x=grad_var, + max_norm=clip_norm, + name=grad_var.name) def _dgc_op(self, param_var, clip_var, grad_var, u_var, v_var, k_var, encoded_var, gather_var): @@ -1886,34 +1885,40 @@ class DGCMomentumOptimizer(Optimizer): regular_type, regular_coeff = self._get_regularization_param( param_var.regularizer) - dgc_op = block.append_op( - type="dgc", - inputs={ - "U": u_var, - "V": v_var, - "Grad": clip_var, - "Param": param_var, - "current_step": self._global_step_var, - "nranks": self._nranks_var, - }, - outputs={ - "U_out": u_var, - "V_out": v_var, - "EncodeGrad": encoded_var, - "k": k_var, - "Grad_out": grad_var, - "GatherBuff": gather_var, - }, - attrs={ - "m": self._momentum, - "sparsity": self._sparsity, - "use_nesterov": self._use_nesterov, - "rampup_begin_step": float(self._rampup_begin_step), - "rampup_step": float(self._rampup_step), - "regular_coeff": float(regular_coeff), - "regular_type": int(regular_type), - }, - stop_gradient=True) + dgc_op = block.append_op(type="dgc", + inputs={ + "U": u_var, + "V": v_var, + "Grad": clip_var, + "Param": param_var, + "current_step": self._global_step_var, + "nranks": self._nranks_var, + }, + outputs={ + "U_out": u_var, + "V_out": v_var, + "EncodeGrad": encoded_var, + "k": k_var, + "Grad_out": grad_var, + "GatherBuff": gather_var, + }, + attrs={ + "m": + self._momentum, + "sparsity": + self._sparsity, + "use_nesterov": + self._use_nesterov, + "rampup_begin_step": + float(self._rampup_begin_step), + "rampup_step": + float(self._rampup_step), + "regular_coeff": + float(regular_coeff), + "regular_type": + int(regular_type), + }, + stop_gradient=True) backward = op_maker.OpRole.Backward dgc_op._set_attr(op_maker.kOpRoleAttrName(), backward) @@ -2039,12 +2044,12 @@ class LarsMomentumOptimizer(Optimizer): rescale_grad=1.0): assert learning_rate is not None assert momentum is not None - super(LarsMomentumOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(LarsMomentumOptimizer, + self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "lars_momentum" self._momentum = momentum self._lars_coeff = float(lars_coeff) @@ -2066,21 +2071,19 @@ class LarsMomentumOptimizer(Optimizer): var_name = param.name + '_fp32_master' var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -2098,10 +2101,11 @@ class LarsMomentumOptimizer(Optimizer): target_param = self._master_weights[ param.name] if find_master else param target_name = target_param.name - if (name not in self._accumulators or - target_name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, target_name)) + if (name not in self._accumulators + or target_name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, target_name)) return self._accumulators[name][target_name] def _create_accumulators(self, block, parameters): @@ -2169,12 +2173,11 @@ class LarsMomentumOptimizer(Optimizer): self._epsilon, "rescale_grad", self._rescale_grad) else: # create the momentum optimize op - momentum_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + momentum_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return momentum_op @@ -2254,12 +2257,11 @@ class AdagradOptimizer(Optimizer): initial_accumulator_value=0.0): assert learning_rate is not None assert epsilon is not None - super(AdagradOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(AdagradOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "adagrad" self._epsilon = epsilon self.initial_accumulator_value = initial_accumulator_value @@ -2268,10 +2270,9 @@ class AdagradOptimizer(Optimizer): assert isinstance(block, framework.Block) for p in parameters: - self._add_accumulator( - self._moment_acc_str, - p, - fill_value=self.initial_accumulator_value) + self._add_accumulator(self._moment_acc_str, + p, + fill_value=self.initial_accumulator_value) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -2483,14 +2484,14 @@ class AdamOptimizer(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - flatten_param_grads=flatten_param_grads, - align_size=align_size, - name=name) + super(AdamOptimizer, + self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + flatten_param_grads=flatten_param_grads, + align_size=align_size, + name=name) self.type = "adam" self._beta1 = beta1 self._beta2 = beta2 @@ -2613,12 +2614,11 @@ class AdamOptimizer(Optimizer): else: attrs['epsilon'] = self._epsilon - adam_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + adam_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return adam_op @@ -2639,20 +2639,18 @@ class AdamOptimizer(Optimizer): if isinstance(self._beta1, Variable): inputs["Y"] = self._beta1 # use elementwise_mul for better performance - block.append_op( - type="elementwise_mul", - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + block.append_op(type="elementwise_mul", + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) else: attrs['scale'] = self._beta1 - block.append_op( - type="scale", - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + block.append_op(type="scale", + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) inputs = {"X": beta2_pow_acc} outputs = {"Out": beta2_pow_acc} @@ -2660,20 +2658,18 @@ class AdamOptimizer(Optimizer): if isinstance(self._beta2, Variable): inputs["Y"] = self._beta2 # use elementwise_mul for better performance - block.append_op( - type="elementwise_mul", - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + block.append_op(type="elementwise_mul", + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) else: attrs['scale'] = self._beta2 - block.append_op( - type="scale", - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + block.append_op(type="scale", + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) class AdamaxOptimizer(Optimizer): @@ -2774,12 +2770,11 @@ class AdamaxOptimizer(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamaxOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(AdamaxOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "adamax" self._beta1 = beta1 self._beta2 = beta2 @@ -2790,11 +2785,10 @@ class AdamaxOptimizer(Optimizer): for p in parameters: self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p) - self._add_accumulator( - name=self._beta1_pow_acc_str, - param=p, - fill_value=self._beta1, - shape=[1]) + self._add_accumulator(name=self._beta1_pow_acc_str, + param=p, + fill_value=self._beta1, + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -2843,8 +2837,8 @@ class AdamaxOptimizer(Optimizer): for param, grad in parameters_and_grads: if grad is None or param.trainable is False: continue - with param.block.program._optimized_guard( - [param, grad]), name_scope('adamx'): + with param.block.program._optimized_guard([param, grad + ]), name_scope('adamx'): beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, param) if framework._non_static_mode(): @@ -2855,12 +2849,11 @@ class AdamaxOptimizer(Optimizer): tmp = _C_ops.scale(beta1_pow_acc, "scale", self._beta1) beta1_pow_acc.copy_(tmp, False) else: - block.append_op( - type="scale", - inputs={"X": beta1_pow_acc}, - outputs={"Out": beta1_pow_acc}, - attrs={"scale": self._beta1}, - stop_gradient=True) + block.append_op(type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}, + stop_gradient=True) class DpsgdOptimizer(Optimizer): @@ -2918,8 +2911,8 @@ class DpsgdOptimizer(Optimizer): assert clip is not None assert batch_size is not None assert sigma is not None - super(DpsgdOptimizer, self).__init__( - learning_rate=learning_rate, parameter_list=parameter_list) + super(DpsgdOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list) self.type = "dpsgd" self._clip = clip self._batch_size = batch_size @@ -2946,21 +2939,23 @@ class DpsgdOptimizer(Optimizer): self._batch_size, "sigma", self._sigma, "seed", self._seed) else: - dpsgd_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "LearningRate": self._create_param_lr(param_and_grad) - }, - outputs={"ParamOut": param_and_grad[0]}, - attrs={ - "clip": self._clip, - "batch_size": self._batch_size, - "sigma": self._sigma, - "seed": self._seed - }, - stop_gradient=True) + dpsgd_op = block.append_op(type=self.type, + inputs={ + "Param": + param_and_grad[0], + "Grad": + param_and_grad[1], + "LearningRate": + self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0]}, + attrs={ + "clip": self._clip, + "batch_size": self._batch_size, + "sigma": self._sigma, + "seed": self._seed + }, + stop_gradient=True) return dpsgd_op @@ -3035,12 +3030,12 @@ class DecayedAdagradOptimizer(Optimizer): assert decay is not None assert epsilon is not None - super(DecayedAdagradOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(DecayedAdagradOptimizer, + self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "decayed_adagrad" self._decay = decay self._epsilon = epsilon @@ -3058,10 +3053,11 @@ class DecayedAdagradOptimizer(Optimizer): param_and_grad[0]) if framework._non_static_mode(): - _C_ops.decayed_adagrad( - param_and_grad[0], param_and_grad[1], moment_acc, - self._create_param_lr(param_and_grad), param_and_grad[0], - moment_acc, "epsilon", self._epsilon, "decay", self._decay) + _C_ops.decayed_adagrad(param_and_grad[0], param_and_grad[1], + moment_acc, + self._create_param_lr(param_and_grad), + param_and_grad[0], moment_acc, "epsilon", + self._epsilon, "decay", self._decay) else: # Create the decayed adagrad optimizer op decayed_adagrad_op = block.append_op( @@ -3076,8 +3072,10 @@ class DecayedAdagradOptimizer(Optimizer): "ParamOut": param_and_grad[0], "MomentOut": moment_acc }, - attrs={"epsilon": self._epsilon, - "decay": self._decay}, + attrs={ + "epsilon": self._epsilon, + "decay": self._decay + }, stop_gradient=True) return decayed_adagrad_op @@ -3154,12 +3152,11 @@ class AdadeltaOptimizer(Optimizer): raise ValueError("epsilon is not set.") if rho is None: raise ValueError("rho is not set.") - super(AdadeltaOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(AdadeltaOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) self.type = "adadelta" self._epsilon = epsilon self._rho = rho @@ -3189,22 +3186,30 @@ class AdadeltaOptimizer(Optimizer): "rho", self._rho) else: # Create the adadelta optimizer op - adadelta_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "AvgSquaredGrad": avg_squared_grad_acc, - "AvgSquaredUpdate": avg_squared_update_acc - }, - outputs={ - "ParamOut": param_and_grad[0], - "AvgSquaredGradOut": avg_squared_grad_acc, - "AvgSquaredUpdateOut": avg_squared_update_acc - }, - attrs={"epsilon": self._epsilon, - "rho": self._rho}, - stop_gradient=True) + adadelta_op = block.append_op(type=self.type, + inputs={ + "Param": + param_and_grad[0], + "Grad": + param_and_grad[1], + "AvgSquaredGrad": + avg_squared_grad_acc, + "AvgSquaredUpdate": + avg_squared_update_acc + }, + outputs={ + "ParamOut": + param_and_grad[0], + "AvgSquaredGradOut": + avg_squared_grad_acc, + "AvgSquaredUpdateOut": + avg_squared_update_acc + }, + attrs={ + "epsilon": self._epsilon, + "rho": self._rho + }, + stop_gradient=True) return adadelta_op @@ -3330,12 +3335,11 @@ class RMSPropOptimizer(Optimizer): regularization=None, grad_clip=None, name=None): - super(RMSPropOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(RMSPropOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) if learning_rate is None: raise ValueError("learning_rate is not set.") if rho is None: @@ -3371,12 +3375,13 @@ class RMSPropOptimizer(Optimizer): mean_grad_acc = self._get_accumulator(self._mean_grad_acc_str, param_and_grad[0]) if framework._non_static_mode(): - _C_ops.rmsprop( - param_and_grad[0], mean_square_acc, - self._create_param_lr(param_and_grad), param_and_grad[1], - momentum_acc, param_and_grad[0], momentum_acc, mean_square_acc, - mean_grad_acc, "epsilon", self._epsilon, "decay", self._rho, - "momentum", self._momentum, "centered", self._centered) + _C_ops.rmsprop(param_and_grad[0], mean_square_acc, + self._create_param_lr(param_and_grad), + param_and_grad[1], momentum_acc, param_and_grad[0], + momentum_acc, mean_square_acc, mean_grad_acc, + "epsilon", self._epsilon, "decay", self._rho, + "momentum", self._momentum, "centered", + self._centered) else: rmsprop_op = block.append_op( type=self.type, @@ -3512,12 +3517,11 @@ class FtrlOptimizer(Optimizer): regularization=None, grad_clip=None, name=None): - super(FtrlOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - name=name) + super(FtrlOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + name=name) if learning_rate is None: raise ValueError("learning_rate is not set.") @@ -3543,33 +3547,37 @@ class FtrlOptimizer(Optimizer): linear_acc = self._get_accumulator(self._linear_acc_str, param_and_grad[0]) if framework._non_static_mode(): - _C_ops.ftrl(param_and_grad[0], squared_acc, linear_acc, - param_and_grad[1], + _C_ops.ftrl(param_and_grad[0], squared_acc, + linear_acc, param_and_grad[1], self._create_param_lr(param_and_grad), param_and_grad[0], squared_acc, linear_acc, "l1", self._l1, "l2", self._l2, "lr_power", self._lr_power) else: - ftrl_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "SquaredAccumulator": squared_acc, - "LinearAccumulator": linear_acc, - "LearningRate": self._create_param_lr(param_and_grad), - }, - outputs={ - "ParamOut": param_and_grad[0], - "SquaredAccumOut": squared_acc, - "LinearAccumOut": linear_acc - }, - attrs={ - "l1": self._l1, - "l2": self._l2, - "lr_power": self._lr_power - }, - stop_gradient=True) + ftrl_op = block.append_op(type=self.type, + inputs={ + "Param": + param_and_grad[0], + "Grad": + param_and_grad[1], + "SquaredAccumulator": + squared_acc, + "LinearAccumulator": + linear_acc, + "LearningRate": + self._create_param_lr(param_and_grad), + }, + outputs={ + "ParamOut": param_and_grad[0], + "SquaredAccumOut": squared_acc, + "LinearAccumOut": linear_acc + }, + attrs={ + "l1": self._l1, + "l2": self._l2, + "lr_power": self._lr_power + }, + stop_gradient=True) return ftrl_op @@ -3668,15 +3676,14 @@ class LambOptimizer(AdamOptimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(LambOptimizer, self).__init__( - learning_rate=learning_rate, - parameter_list=parameter_list, - regularization=regularization, - grad_clip=grad_clip, - beta1=beta1, - beta2=beta2, - epsilon=epsilon, - name=name) + super(LambOptimizer, self).__init__(learning_rate=learning_rate, + parameter_list=parameter_list, + regularization=regularization, + grad_clip=grad_clip, + beta1=beta1, + beta2=beta2, + epsilon=epsilon, + name=name) self.type = "lamb" self._weight_decay = lamb_weight_decay self._exclude_from_weight_decay_fn = exclude_from_weight_decay_fn @@ -3711,31 +3718,30 @@ class LambOptimizer(AdamOptimizer): return None # create the lamb optimize op - lamb_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "LearningRate": lr, - "Moment1": moment1, - "Moment2": moment2, - "Beta1Pow": beta1_pow_acc, - "Beta2Pow": beta2_pow_acc - }, - outputs={ - "ParamOut": param_and_grad[0], - "Moment1Out": moment1, - "Moment2Out": moment2, - "Beta1PowOut": beta1_pow_acc, - "Beta2PowOut": beta2_pow_acc - }, - attrs={ - "beta1": self._beta1, - "beta2": self._beta2, - "epsilon": self._epsilon, - "weight_decay": weight_decay - }, - stop_gradient=True) + lamb_op = block.append_op(type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "LearningRate": lr, + "Moment1": moment1, + "Moment2": moment2, + "Beta1Pow": beta1_pow_acc, + "Beta2Pow": beta2_pow_acc + }, + outputs={ + "ParamOut": param_and_grad[0], + "Moment1Out": moment1, + "Moment2Out": moment2, + "Beta1PowOut": beta1_pow_acc, + "Beta2PowOut": beta2_pow_acc + }, + attrs={ + "beta1": self._beta1, + "beta2": self._beta2, + "epsilon": self._epsilon, + "weight_decay": weight_decay + }, + stop_gradient=True) return lamb_op @@ -3857,8 +3863,9 @@ class ModelAverage(Optimizer): name=None): if framework._non_static_mode(): raise Exception("In dygraph, don't support ModelAverage.") - super(ModelAverage, self).__init__( - 0.0, regularization=regularization, name=name) + super(ModelAverage, self).__init__(0.0, + regularization=regularization, + name=name) self.average_window = average_window_rate self.min_average_window = min_average_window self.max_average_window = max_average_window @@ -3927,38 +3934,44 @@ class ModelAverage(Optimizer): sum_1 = self._add_accumulator('sum_1', param) sum_2 = self._add_accumulator('sum_2', param) sum_3 = self._add_accumulator('sum_3', param) - num_accumulates = self._add_accumulator( - 'num_accumulates', param, dtype='int64', shape=[1]) - old_num_accumulates = self._add_accumulator( - 'old_num_accumulates', param, dtype='int64', shape=[1]) - num_updates = self._add_accumulator( - 'num_updates', param, dtype='int64', shape=[1]) - - self.helper.append_op( - type='average_accumulates', - inputs={ - "param": param, - "in_sum_1": sum_1, - "in_sum_2": sum_2, - "in_sum_3": sum_3, - "in_num_accumulates": num_accumulates, - "in_old_num_accumulates": old_num_accumulates, - "in_num_updates": num_updates - }, - outputs={ - "out_sum_1": sum_1, - "out_sum_2": sum_2, - "out_sum_3": sum_3, - "out_num_accumulates": num_accumulates, - "out_old_num_accumulates": old_num_accumulates, - "out_num_updates": num_updates, - }, - attrs={ - "average_window": self.average_window, - "min_average_window": self.min_average_window, - "max_average_window": self.max_average_window, - }, - stop_gradient=True) + num_accumulates = self._add_accumulator('num_accumulates', + param, + dtype='int64', + shape=[1]) + old_num_accumulates = self._add_accumulator('old_num_accumulates', + param, + dtype='int64', + shape=[1]) + num_updates = self._add_accumulator('num_updates', + param, + dtype='int64', + shape=[1]) + + self.helper.append_op(type='average_accumulates', + inputs={ + "param": param, + "in_sum_1": sum_1, + "in_sum_2": sum_2, + "in_sum_3": sum_3, + "in_num_accumulates": num_accumulates, + "in_old_num_accumulates": old_num_accumulates, + "in_num_updates": num_updates + }, + outputs={ + "out_sum_1": sum_1, + "out_sum_2": sum_2, + "out_sum_3": sum_3, + "out_num_accumulates": num_accumulates, + "out_old_num_accumulates": + old_num_accumulates, + "out_num_updates": num_updates, + }, + attrs={ + "average_window": self.average_window, + "min_average_window": self.min_average_window, + "max_average_window": self.max_average_window, + }, + stop_gradient=True) @signature_safe_contextmanager def apply(self, executor, need_restore=True): @@ -4181,12 +4194,11 @@ class ExponentialMovingAverage(object): self._params_tmps = [] for param in default_main_program().global_block().all_parameters(): if param.do_model_average != False: - tmp = param.block.create_var( - name=unique_name.generate(".".join( - [self._name + param.name, 'ema_tmp'])), - dtype=param.dtype, - persistable=False, - stop_gradient=True) + tmp = param.block.create_var(name=unique_name.generate(".".join( + [self._name + param.name, 'ema_tmp'])), + dtype=param.dtype, + persistable=False, + stop_gradient=True) self._params_tmps.append((param, tmp)) self._ema_vars = {} @@ -4207,8 +4219,8 @@ class ExponentialMovingAverage(object): # bias correction with layers.control_flow.Switch() as switch: with switch.case(global_step > 0): - layers.assign( - output=param, input=ema / (1.0 - decay_pow)) + layers.assign(output=param, + input=ema / (1.0 - decay_pow)) with switch.default(): layers.assign(output=param, input=ema) @@ -4236,18 +4248,16 @@ class ExponentialMovingAverage(object): layers.tensor.assign(decay_t, decay_var) with switch.default(): layers.tensor.assign( - np.array( - [self._decay], dtype=np.float32), + np.array([self._decay], dtype=np.float32), decay_var) return decay_var def _get_decay_pow(self, block): - global_step = layers.create_global_var( - name=self._step_counter_name, - shape=[1], - value=0, - dtype='int64', - persistable=True) + global_step = layers.create_global_var(name=self._step_counter_name, + shape=[1], + value=0, + dtype='int64', + persistable=True) global_step = layers.cast(global_step, "float32") decay_var = block._clone_variable(self._decay_var) decay_pow_acc = layers.elementwise_pow(decay_var, global_step) @@ -4432,18 +4442,18 @@ class PipelineOptimizer(object): if op.type == "reduce_any": # cast the bool var to int32 to use allreduce_max op temp_var_name = unique_name.generate(out_name + "_cast_int32") - temp_var = block.create_var( - name=temp_var_name, shape=[1], dtype="int32") - block._insert_op( - op_idx + 1 + offset, - type='cast', - inputs={'X': out_var}, - outputs={'Out': temp_var}, - attrs={ - 'in_dtype': out_var.dtype, - 'out_dtype': temp_var.dtype, - self._op_role_key: self._op_role.Optimize - }) + temp_var = block.create_var(name=temp_var_name, + shape=[1], + dtype="int32") + block._insert_op(op_idx + 1 + offset, + type='cast', + inputs={'X': out_var}, + outputs={'Out': temp_var}, + attrs={ + 'in_dtype': out_var.dtype, + 'out_dtype': temp_var.dtype, + self._op_role_key: self._op_role.Optimize + }) offset += 1 block._insert_op( op_idx + 1 + offset, @@ -4458,16 +4468,15 @@ class PipelineOptimizer(object): }) offset += 1 if op.type == "reduce_any": - block._insert_op( - op_idx + 1 + offset, - type='cast', - inputs={'X': temp_var}, - outputs={'Out': out_var}, - attrs={ - 'in_dtype': temp_var.dtype, - 'out_dtype': out_var.dtype, - self._op_role_key: self._op_role.Optimize - }) + block._insert_op(op_idx + 1 + offset, + type='cast', + inputs={'X': temp_var}, + outputs={'Out': out_var}, + attrs={ + 'in_dtype': temp_var.dtype, + 'out_dtype': out_var.dtype, + self._op_role_key: self._op_role.Optimize + }) offset += 1 return offset @@ -4483,7 +4492,7 @@ class PipelineOptimizer(object): # get the global information, so allreduce op is needed. should_insert = False op = block.ops[op_idx] - # For op process vars on all devices, remove its input + # For op process vars on all devices, remove its input # vars not in this block reserved_x = [] if op.type == 'reduce_any' and self._is_optimize_op(op): @@ -4518,8 +4527,8 @@ class PipelineOptimizer(object): vars = op.desc.input_arg_names() + op.desc.output_arg_names() for var in vars: - # a var whose name contains "blocking_queue" - # only exists in startup program + # a var whose name contains "blocking_queue" + # only exists in startup program if var in used_var_set or "_blocking_queue" in var: continue used_var_set.add(var) @@ -4561,8 +4570,8 @@ class PipelineOptimizer(object): self._op_role.Loss) def _is_forward_op(self, op): - return self._op_role_key in op.attr_names and ( - int(op.attr(self._op_role_key)) == int(self._op_role.Forward)) + return self._op_role_key in op.attr_names and (int( + op.attr(self._op_role_key)) == int(self._op_role.Forward)) def _is_backward_op(self, op): return self._op_role_key in op.attr_names and ( @@ -4769,8 +4778,8 @@ class PipelineOptimizer(object): device = post_op.attr(self._op_device_key) assert device, "The post op must have op_device set." op._set_attr(self._op_device_key, device) - elif (op.type == "cast" or - op.type == "scale") and self._is_backward_op(op): + elif (op.type == "cast" + or op.type == "scale") and self._is_backward_op(op): prev_op = self._find_prev_op(idx, op.desc.input("X")[0]) op._set_attr(self._op_device_key, prev_op.attr(self._op_device_key)) elif op.type == "memcpy" and not self._is_optimize_op(op): @@ -4790,8 +4799,8 @@ class PipelineOptimizer(object): elif self._is_loss_op(op): # For loss * loss_scaling op added by AMP offset = 1 - while (not block.ops[idx + offset].has_attr(self._op_device_key) or - not block.ops[idx + offset].attr(self._op_device_key)): + while (not block.ops[idx + offset].has_attr(self._op_device_key) + or not block.ops[idx + offset].attr(self._op_device_key)): offset += 1 device = block.ops[idx + offset].attr(self._op_device_key) assert device, "Please put you program within device_guard scope." @@ -4814,12 +4823,12 @@ class PipelineOptimizer(object): "regularization ops must have two elements." param_name = op_role_var[0] device = self._param_device_map[param_name] - # For sum op added by global gradient clip, it must be + # For sum op added by global gradient clip, it must be # put on all devices - if (op.type == 'sum' or op.type == 'sqrt' or - op.type == 'fill_constant' or - op.type == 'elementwise_max' or - op.type == 'elementwise_div'): + if (op.type == 'sum' or op.type == 'sqrt' + or op.type == 'fill_constant' + or op.type == 'elementwise_max' + or op.type == 'elementwise_div'): device = f"{self._device}:all" op._set_attr(self._op_device_key, device) elif op.type == "alloc_float_status" or op.type == "clear_float_status": @@ -4851,9 +4860,9 @@ class PipelineOptimizer(object): not that attribute set. """ for idx, op in enumerate(list(block.ops)): - if (op.type == "create_py_reader" or op.type == "read" or - op.type == "create_double_buffer_reader"): - # Copy read related ops to all section to make them exit + if (op.type == "create_py_reader" or op.type == "read" + or op.type == "create_double_buffer_reader"): + # Copy read related ops to all section to make them exit # after each epoch. # We use "gpu:all" to represent the op should be put on all # sub-programs, such as lr-related ops. Note that: "gpu:all" @@ -4882,13 +4891,13 @@ class PipelineOptimizer(object): ] for op in block.ops: if not op._has_kernel(op.type): - assert op.type == "conditional_block" and ( - op.attr(self._op_role_key) == int(self._op_role.LRSched)), ( + assert op.type == "conditional_block" and (op.attr( + self._op_role_key) == int(self._op_role.LRSched)), ( "Now, the only supported op without kernel is " "conditional_block, and its op role must be LRSched.") - assert op.has_attr(self._op_role_key), ( - "op ({}) has no {} attribute.".format(op.type, - self._op_role_key)) + assert op.has_attr( + self._op_role_key), ("op ({}) has no {} attribute.".format( + op.type, self._op_role_key)) op_role = op.attr(self._op_role_key) assert int(op_role) in valid_op_role_value, \ "op_role {} for op {} must be one of {}".format( @@ -4896,9 +4905,9 @@ class PipelineOptimizer(object): op.type, valid_op_role_value) - assert op.has_attr(self._op_device_key), ( - "op ({}) has no {} attribute.".format(op.type, - self._op_device_key)) + assert op.has_attr( + self._op_device_key), ("op ({}) has no {} attribute.".format( + op.type, self._op_device_key)) device = op.attr(self._op_device_key) assert device, ("op_device attribute for op " @@ -5055,8 +5064,8 @@ class PipelineOptimizer(object): 0] < 0 else var_shape[0] numel = np.prod(var_shape) - use_mp = (self.mp_degree > 1) and ( - numel % self.mp_degree == 0) + use_mp = (self.mp_degree > 1) and (numel % + self.mp_degree == 0) if 'subprog' in var.name: # For recompute, if the checkpoints var is layer_norm_6.tmp_2 @@ -5085,15 +5094,17 @@ class PipelineOptimizer(object): _check_stage(cur_id, prev_id) - block._insert_op_without_sync( - index=index + extra_index_info['index'], - type='c_sync_calc_stream', - inputs={'X': [var]}, - outputs={'Out': [var]}, - attrs={ - self._op_device_key: prev_dev, - self._op_role_key: op_role, - }) + block._insert_op_without_sync(index=index + + extra_index_info['index'], + type='c_sync_calc_stream', + inputs={'X': [var]}, + outputs={'Out': [var]}, + attrs={ + self._op_device_key: + prev_dev, + self._op_role_key: + op_role, + }) extra_index_info['index'] += 1 prefix_name = var.name.split('@')[0] prefix_var = block.var(prefix_name) @@ -5175,9 +5186,8 @@ class PipelineOptimizer(object): "Now only 'F-then-B' and '1F1B' are supported." "The given value is {}.".format(self.schedule_mode)) - _insert_send_recv( - int(cur_device.split(':')[1]), - int(prev_device.split(':')[1])) + _insert_send_recv(int(cur_device.split(':')[1]), + int(prev_device.split(':')[1])) block._sync_with_cpp() def _insert_loss_scale(self, block): @@ -5247,8 +5257,8 @@ class PipelineOptimizer(object): # maybe have no optimize # if first_opt_op_idx == len(block.ops): return - if self._is_backward_op(op) and ( - self._op_role_var_key in op.attr_names): + if self._is_backward_op(op) and (self._op_role_var_key + in op.attr_names): op_role_var = op.attr(self._op_role_var_key) if len(op_role_var) == 0: continue assert len(op_role_var) % 2 == 0 @@ -5274,11 +5284,15 @@ class PipelineOptimizer(object): inputs={}, outputs={'Out': [merged_param_grad_var]}, attrs={ - 'shape': merged_param_grad_var.shape, - 'dtype': merged_param_grad_var.dtype, - 'value': float(0), + 'shape': + merged_param_grad_var.shape, + 'dtype': + merged_param_grad_var.dtype, + 'value': + float(0), # a trick to run this op once per mini-batch - self._op_role_key: self._op_role.Optimize.LRSched, + self._op_role_key: + self._op_role.Optimize.LRSched, }) offset += 1 grad_name = op_role_var[i + 1] @@ -5296,16 +5310,18 @@ class PipelineOptimizer(object): cast_grad_var = self._create_var( block, param_grad_var, cast_grad_var_name, dtype) cast_grad_var.persistable = False - block._insert_op( - index=first_opt_op_idx + offset, - type='cast', - inputs={'X': grad_var}, - outputs={'Out': cast_grad_var}, - attrs={ - 'in_dtype': grad_var.dtype, - 'out_dtype': cast_grad_var.dtype, - self._op_role_key: self._op_role.Backward, - }) + block._insert_op(index=first_opt_op_idx + offset, + type='cast', + inputs={'X': grad_var}, + outputs={'Out': cast_grad_var}, + attrs={ + 'in_dtype': + grad_var.dtype, + 'out_dtype': + cast_grad_var.dtype, + self._op_role_key: + self._op_role.Backward, + }) offset += 1 grad_var = cast_grad_var @@ -5314,7 +5330,9 @@ class PipelineOptimizer(object): type='sum', inputs={'X': [merged_param_grad_var, grad_var]}, outputs={'Out': merged_param_grad_var}, - attrs={self._op_role_key: self._op_role.Backward, }) + attrs={ + self._op_role_key: self._op_role.Backward, + }) offset += 1 merged_gradient_names.append(merged_param_grad_name) @@ -5342,24 +5360,23 @@ class PipelineOptimizer(object): grad_var = block.var(grad_name) grad_var.persistable = False - block._insert_op( - index=first_opt_op_idx, - type='cast', - inputs={'X': fp16_grad_var}, - outputs={'Out': grad_var}, - attrs={ - 'in_dtype': fp16_grad_var.dtype, - 'out_dtype': grad_var.dtype, - self._op_role_key: self._op_role.Optimize, - }) + block._insert_op(index=first_opt_op_idx, + type='cast', + inputs={'X': fp16_grad_var}, + outputs={'Out': grad_var}, + attrs={ + 'in_dtype': fp16_grad_var.dtype, + 'out_dtype': grad_var.dtype, + self._op_role_key: self._op_role.Optimize, + }) return merged_gradient_names def _insert_accumulate_gradients_with_fuse(self, main_block, fp16, fused_size, grad_param_pairs, first_opt_op_idx): - grad_param_pairs = self._sort_grad_param_by_dtype(main_block, - grad_param_pairs) + grad_param_pairs = self._sort_grad_param_by_dtype( + main_block, grad_param_pairs) grad_param_segments = [] merged_suffix = '@MERGED@FP16' if fp16 else '@MERGED' @@ -5402,11 +5419,11 @@ class PipelineOptimizer(object): for grad_param_segment in grad_param_segments: grad_segment = grad_param_segment[0] merged_grad_segment = grad_param_segment[2] - fused_grad = main_block.create_var( - name='FusedGrad_{}'.format(grad_segment[0].name), - dtype=grad_segment[0].dtype, - persistable=False, - stop_gradient=False) + fused_grad = main_block.create_var(name='FusedGrad_{}'.format( + grad_segment[0].name), + dtype=grad_segment[0].dtype, + persistable=False, + stop_gradient=False) # keep the '.cast_fp16' info in the fuse var name fused_merged_grad_name_prefix = 'FusedMergedGrad.cast_fp16.' if \ merged_grad_segment[0].dtype == paddle.float16 else 'FusedMergedGrad' @@ -5442,8 +5459,10 @@ class PipelineOptimizer(object): first_back_op_idx + offset, type="coalesce_tensor", inputs={"Input": params}, - outputs={"Output": grads, - "FusedOutput": fused_grad}, + outputs={ + "Output": grads, + "FusedOutput": fused_grad + }, attrs={ # Explanation of user_defined_size_of_dtype: # In coalesce op, the align size is 256 bytes @@ -5503,21 +5522,20 @@ class PipelineOptimizer(object): # for fp16 allreduce, cast fp32 grad to fp16 # for fp32 allreduce, cast fp16 grad to fp32 cast_grad_var_name = fused_grad.name + '@TMP' - cast_grad_var = main_block.create_var( - name=cast_grad_var_name, - dtype=dtype, - persistable=False, - stop_gradient=False) - main_block._insert_op( - index=first_opt_op_idx + offset, - type='cast', - inputs={'X': fused_grad}, - outputs={'Out': cast_grad_var}, - attrs={ - 'in_dtype': fused_grad.dtype, - 'out_dtype': cast_grad_var.dtype, - self._op_role_key: self._op_role.Backward, - }) + cast_grad_var = main_block.create_var(name=cast_grad_var_name, + dtype=dtype, + persistable=False, + stop_gradient=False) + main_block._insert_op(index=first_opt_op_idx + offset, + type='cast', + inputs={'X': fused_grad}, + outputs={'Out': cast_grad_var}, + attrs={ + 'in_dtype': fused_grad.dtype, + 'out_dtype': cast_grad_var.dtype, + self._op_role_key: + self._op_role.Backward, + }) offset += 1 fused_grad = cast_grad_var main_block._insert_op( @@ -5536,22 +5554,21 @@ class PipelineOptimizer(object): assert main_block.has_var(fp16_grad_name) fp16_grad = main_block.var(fp16_grad_name) fp32_grad_name = param + core.grad_var_suffix() + '@MERGED' - fp32_grad = main_block.create_var( - name=fp32_grad_name, - dtype=paddle.float32, - shape=real_grad.shape, - persistable=False, - stop_gradient=False) - main_block._insert_op( - index=first_opt_op_idx + offset, - type='cast', - inputs={'X': fp16_grad}, - outputs={'Out': fp32_grad}, - attrs={ - 'in_dtype': paddle.float16, - 'out_dtype': paddle.float32, - self._op_role_key: self._op_role.Optimize, - }) + fp32_grad = main_block.create_var(name=fp32_grad_name, + dtype=paddle.float32, + shape=real_grad.shape, + persistable=False, + stop_gradient=False) + main_block._insert_op(index=first_opt_op_idx + offset, + type='cast', + inputs={'X': fp16_grad}, + outputs={'Out': fp32_grad}, + attrs={ + 'in_dtype': paddle.float16, + 'out_dtype': paddle.float32, + self._op_role_key: + self._op_role.Optimize, + }) offset += 1 # replace the var with it's name, which will be used for inserting allreduce @@ -5584,8 +5601,8 @@ class PipelineOptimizer(object): if first_opt_op_idx == len(main_block.ops): return - if self._is_backward_op(op) and ( - self._op_role_var_key in op.attr_names): + if self._is_backward_op(op) and (self._op_role_var_key + in op.attr_names): op_role_var = op.attr(self._op_role_var_key) if len(op_role_var) == 0: continue @@ -5747,30 +5764,44 @@ class PipelineOptimizer(object): write_block._insert_op( index=0, type='send_v2', - inputs={'X': write_block.var(var_name), }, + inputs={ + 'X': write_block.var(var_name), + }, attrs={ - self._op_device_key: write_device, - 'use_calc_stream': False, + self._op_device_key: + write_device, + 'use_calc_stream': + False, # A trick to make the role LRSched to avoid copy every # microbatch - self._op_role_key: self._op_role.LRSched, - 'peer': read_dev_index, - 'ring_id': ring_id + self._op_role_key: + self._op_role.LRSched, + 'peer': + read_dev_index, + 'ring_id': + ring_id }) read_block._insert_op( index=0, type='recv_v2', outputs={'Out': [read_block.var(var_name)]}, attrs={ - 'out_shape': read_block.var(var_name).shape, - 'dtype': read_block.var(var_name).dtype, - self._op_device_key: read_device, - 'use_calc_stream': False, + 'out_shape': + read_block.var(var_name).shape, + 'dtype': + read_block.var(var_name).dtype, + self._op_device_key: + read_device, + 'use_calc_stream': + False, # A trick to make the role LRSched to avoid copy every # microbatch - self._op_role_key: self._op_role.LRSched, - 'peer': write_dev_index, - 'ring_id': ring_id + self._op_role_key: + self._op_role.LRSched, + 'peer': + write_dev_index, + 'ring_id': + ring_id }) read_block._insert_op( index=1, @@ -5778,11 +5809,14 @@ class PipelineOptimizer(object): inputs={'X': [read_block.var(var_name)]}, outputs={'Out': [read_block.var(var_name)]}, attrs={ - self._op_device_key: read_device, + self._op_device_key: + read_device, # A trick to make the role LRSched to avoid copy every # microbatch - self._op_role_key: self._op_role.LRSched, - 'ring_id': ring_id + self._op_role_key: + self._op_role.LRSched, + 'ring_id': + ring_id }) def _is_gradient_clip_op(self, op): @@ -5891,12 +5925,11 @@ class PipelineOptimizer(object): op_outputs = dict() for name in op.output_names: op_outputs[name] = op.output(name) - block._insert_op_without_sync( - index=insert_index, - type=op.type, - inputs=op_inputs, - outputs=op_outputs, - attrs=op.all_attrs()) + block._insert_op_without_sync(index=insert_index, + type=op.type, + inputs=op_inputs, + outputs=op_outputs, + attrs=op.all_attrs()) block._remove_op(i + 1) if op_role == int(self._op_role.Forward): forward_insert_index += 1 @@ -6018,7 +6051,7 @@ class PipelineOptimizer(object): # Step4: Special Case: process persistable vars that exist in # multiple sections - # FIXME + # FIXME # self._process_persistable_vars_in_multi_sections( # main_program, startup_program, program_list) @@ -6034,8 +6067,8 @@ class PipelineOptimizer(object): place_list.append(core.NPUPlace(dev_index % 1)) # Step6: Split startup program - new_startup_program = self._split_startup_program(startup_program, - self.local_rank) + new_startup_program = self._split_startup_program( + startup_program, self.local_rank) startup_program._pipeline_opt = { "startup_program": new_startup_program, @@ -6044,7 +6077,7 @@ class PipelineOptimizer(object): if not self.scale_gradient: self._insert_loss_scale(real_block) if not self.use_sharding: - # Step7: clear gradients before each mini-batch and + # Step7: clear gradients before each mini-batch and # accumulate gradients during backward self._rename_gradient_var_name(real_block) real_block._sync_with_cpp() @@ -6162,11 +6195,12 @@ class RecomputeOptimizer(Optimizer): ), "_checkpoints should be a list of Variable or a list of String" for ckpt in checkpoints: assert ( - isinstance(ckpt, six.string_types) or isinstance(ckpt, Variable) + isinstance(ckpt, six.string_types) + or isinstance(ckpt, Variable) ), "_checkpoints should be a list of Variable or a list of String" self._checkpoints = checkpoints - # should enable offload before calling backward + # should enable offload before calling backward def _enable_offload(self): self.enable_offload = True @@ -6300,16 +6334,15 @@ class RecomputeOptimizer(Optimizer): dtype=self._main_program.global_block().var(var.name).dtype, persistable=False, stop_gradient=True) - block.append_op( - type='fill_constant', - outputs={'Out': varname}, - attrs={ - "shape": var.shape, - "dtype": var.dtype, - "value": 0.0, - "place_type": 2, - OP_ROLE_KEY: op_role, - }) + block.append_op(type='fill_constant', + outputs={'Out': varname}, + attrs={ + "shape": var.shape, + "dtype": var.dtype, + "value": 0.0, + "place_type": 2, + OP_ROLE_KEY: op_role, + }) return @@ -6343,7 +6376,7 @@ class RecomputeOptimizer(Optimizer): self._insert_async_memcpy_op(idx, varname, pinned_varname, 0, 2) def _insert_sync_op(self, op_idx, checkpoint_name): - # single stream offload no need sync + # single stream offload no need sync pass def _record_fetch_op(self, idx): @@ -6372,7 +6405,7 @@ class RecomputeOptimizer(Optimizer): def _parse_backward(self): self.idx2insertions = {} - # don't offload the last checkpoints, to favor throughput + # don't offload the last checkpoints, to favor throughput self.un_fetch_checkpoint_names = self.sorted_checkpoint_names[:] self.un_fetch_checkpoint_names.pop(-1) need_fetch_checkpoint_names = self.un_fetch_checkpoint_names[:] @@ -6405,12 +6438,12 @@ class RecomputeOptimizer(Optimizer): if self.checkpoint_usage_count[input_var] == 0: # TODO (JZ-LIANG) sync memcpy_stream if extra stream for memcpy second_to_last_fetch_checkpoint = fetched_checkpoint_varname - # there is NO fetch ahead the first checkpoint + # there is NO fetch ahead the first checkpoint if input_var != self.sorted_checkpoint_names[0]: fetched_checkpoint_varname = self._record_fetch_op( idx) - # should check the current used checkpoint is ths last fetch one + # should check the current used checkpoint is ths last fetch one assert second_to_last_fetch_checkpoint == input_var, "Current recompute segment should use [{}] BUT got [{}]".format( second_to_last_fetch_checkpoint, input_var) # rename @@ -6436,8 +6469,8 @@ class RecomputeOptimizer(Optimizer): operation, checkpoint_name = self.idx2insertions[op_idx] if operation == "fetch": self._insert_fetch_op(op_idx, checkpoint_name) - logging.debug("Insert [{}] fetch op.".format( - checkpoint_name)) + logging.debug( + "Insert [{}] fetch op.".format(checkpoint_name)) del self.idx2insertions[op_idx] elif operation == "sync": self._insert_sync_op(op_idx, checkpoint_name) @@ -6450,7 +6483,7 @@ class RecomputeOptimizer(Optimizer): def _parse_forward(self): self.idx2insertions = {} - # don't offload the last checkpoints, faster, less memory saving + # don't offload the last checkpoints, faster, less memory saving self.un_offload_checkpoint_names = self.sorted_checkpoint_names[:] last_checkpoint = self.un_offload_checkpoint_names.pop(-1) need_offload_checkpoint_names = self.un_offload_checkpoint_names[:] @@ -6471,8 +6504,8 @@ class RecomputeOptimizer(Optimizer): self.block.ops), "Could NOT found Forward op in prog" last_offload_checkpoint = None - for i, op in enumerate(self.block.ops[self.fw_strart_op_idx: - self.bw_strart_op_idx]): + for i, op in enumerate( + self.block.ops[self.fw_strart_op_idx:self.bw_strart_op_idx]): idx = self.fw_strart_op_idx + i output_vars = op.desc.output_arg_names() @@ -6504,8 +6537,8 @@ class RecomputeOptimizer(Optimizer): last_offload_checkpoint = output_var else: raise ValueError( - "There should be just ONE op that output checkpoint [{}]". - format(output_var)) + "There should be just ONE op that output checkpoint [{}]" + .format(output_var)) # need to sync the last need to offload checkpoint before the last checkpoint as output op if output_var == last_checkpoint: assert len( @@ -6527,7 +6560,7 @@ class RecomputeOptimizer(Optimizer): last_offload_checkpoint) self._record_sync_op(last_usage_idx + 1, last_offload_checkpoint) - # record checkpoint usage + # record checkpoint usage for input_var in input_vars: if input_var in need_offload_checkpoint_names: assert input_var not in self.synced_checkpoints, "checkpoint [{}] used after sync".format( @@ -6552,13 +6585,13 @@ class RecomputeOptimizer(Optimizer): operation, checkpoint_name = self.idx2insertions[op_idx] if operation == "offload": self._insert_offload_op(op_idx, checkpoint_name) - logging.debug("Insert [{}] offload op.".format( - checkpoint_name)) + logging.debug( + "Insert [{}] offload op.".format(checkpoint_name)) del self.idx2insertions[op_idx] elif operation == "sync": self._insert_sync_op(op_idx, checkpoint_name) - logging.debug("Insert [{}] offload_sync op.".format( - checkpoint_name)) + logging.debug( + "Insert [{}] offload_sync op.".format(checkpoint_name)) del self.idx2insertions[op_idx] self.block._sync_with_cpp() @@ -6585,11 +6618,11 @@ class RecomputeOptimizer(Optimizer): with program_guard(self._main_program, startup_program): assert len(self.checkpoint_shape) > 0, ( - "checkpoints shape {} should be an non empty list like: [12, 512, 1024]". - format(self.checkpoint_shape)) + "checkpoints shape {} should be an non empty list like: [12, 512, 1024]" + .format(self.checkpoint_shape)) assert all([ele > 0 for ele in self.checkpoint_shape]), ( - "all ele in checkpoints shape {} should be a determined integer larger than 0". - format(self.checkpoint_shape)) + "all ele in checkpoints shape {} should be a determined integer larger than 0" + .format(self.checkpoint_shape)) self.checkpoint_name2pinned_name = dict() self.checkpoint_name2fetch_name = dict() for checkpoint_varname in self.sorted_checkpoint_names: @@ -6659,8 +6692,8 @@ class RecomputeOptimizer(Optimizer): no_grad_set=None) print("Finished backward") """ - assert (self._checkpoints is not None - ), "You should call _set_checkpoints first" + assert (self._checkpoints + is not None), "You should call _set_checkpoints first" if framework._non_static_mode(): raise NotImplementedError( @@ -6684,11 +6717,10 @@ class RecomputeOptimizer(Optimizer): no_grad_set, checkpoints=checkpoint_vars) else: - params_grads = append_backward( - loss, - parameter_list, - no_grad_set, - checkpoints=checkpoint_vars) + params_grads = append_backward(loss, + parameter_list, + no_grad_set, + checkpoints=checkpoint_vars) if self.enable_offload: self.sorted_checkpoint_names = sorted_checkpoint_names @@ -6738,8 +6770,9 @@ class RecomputeOptimizer(Optimizer): func = self._optimizer.apply_optimize if hasattr( self._optimizer, 'apply_optimize') else self._optimizer._apply_optimize - return func( - loss, startup_program=startup_program, params_grads=params_grads) + return func(loss, + startup_program=startup_program, + params_grads=params_grads) def minimize(self, loss, @@ -6747,19 +6780,19 @@ class RecomputeOptimizer(Optimizer): parameter_list=None, no_grad_set=None): assert isinstance(loss, Variable), "The loss should be an Variable." - assert (self._checkpoints is not None - ), "You should call _set_checkpoints first" + assert (self._checkpoints + is not None), "You should call _set_checkpoints first" if framework._non_static_mode(): raise NotImplementedError( "DyGraph current does not support recompute") - params_grads = self.backward( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) - optimize_ops = self.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + optimize_ops = self.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) return optimize_ops, params_grads @@ -6857,11 +6890,10 @@ class LookaheadOptimizer(object): for param in params: fast_var = main_block.var(param) assert (fast_var is not None) - slow_var = main_block.create_var( - name=param + "@SLOW", - shape=fast_var.shape, - dtype=fast_var.dtype, - persistable=True) + slow_var = main_block.create_var(name=param + "@SLOW", + shape=fast_var.shape, + dtype=fast_var.dtype, + persistable=True) param_to_slow[param] = slow_var # add some vars to the startup_program @@ -6869,49 +6901,46 @@ class LookaheadOptimizer(object): for param in params: fast_var = startup_block.var(param) assert (fast_var is not None) - slow_var = startup_block.create_var( - name=param + "@SLOW", - shape=fast_var.shape, - dtype=fast_var.dtype, - persistable=True) + slow_var = startup_block.create_var(name=param + "@SLOW", + shape=fast_var.shape, + dtype=fast_var.dtype, + persistable=True) - startup_block.append_op( - type="assign", - inputs={"X": fast_var}, - outputs={"Out": slow_var}) + startup_block.append_op(type="assign", + inputs={"X": fast_var}, + outputs={"Out": slow_var}) with framework.program_guard(main_block.program, startup_program): # Add Var k to main prog and startup prog - k = layers.create_global_var( - name="lookahead_k", - shape=[1], - value=int(self.k), - dtype='int32', - persistable=True) + k = layers.create_global_var(name="lookahead_k", + shape=[1], + value=int(self.k), + dtype='int32', + persistable=True) # Add Var alpha to main prog and startup prog - alpha = layers.create_global_var( - name="lookahead_alpha", - shape=[1], - value=float(self.alpha), - dtype='float32', - persistable=True) + alpha = layers.create_global_var(name="lookahead_alpha", + shape=[1], + value=float(self.alpha), + dtype='float32', + persistable=True) # Add Var step - step = layers.create_global_var( - name="lookahead_step", - shape=[1], - value=int(0), - dtype='int32', - persistable=True) + step = layers.create_global_var(name="lookahead_step", + shape=[1], + value=int(0), + dtype='int32', + persistable=True) layers.increment(x=step, value=1.0, in_place=True) # lookahead - zero_var = layers.fill_constant( - shape=[1], dtype='float32', value=0.0) + zero_var = layers.fill_constant(shape=[1], + dtype='float32', + value=0.0) - one_var = layers.fill_constant( - shape=[1], dtype='float32', value=1.0) + one_var = layers.fill_constant(shape=[1], + dtype='float32', + value=1.0) mod = layers.elementwise_mod(step, k) with layers.control_flow.Switch() as switch: @@ -7001,8 +7030,8 @@ class GradientMergeOptimizer(object): "and one-time optimizer.minimize()") assert (inner_optimizer is not None), "inner optimizer can not be None" - assert (isinstance(k_steps, int) and - k_steps > 0), "k_steps should be a positive integer" + assert (isinstance(k_steps, int) + and k_steps > 0), "k_steps should be a positive integer" self.inner_optimizer = inner_optimizer self.k_steps = k_steps @@ -7089,51 +7118,53 @@ class GradientMergeOptimizer(object): def _get_gm_cond_var(self, main_block): # Add const var - k_step_var = layers.create_global_var( - name="gradient_merge_k", - shape=[1], - value=int(self.k_steps), - dtype='int32', - persistable=True, - force_cpu=True) - - zero_var = layers.create_global_var( - name="gradient_merge_zero", - shape=[1], - value=int(0), - dtype='int32', - persistable=True, - force_cpu=True) + k_step_var = layers.create_global_var(name="gradient_merge_k", + shape=[1], + value=int(self.k_steps), + dtype='int32', + persistable=True, + force_cpu=True) + + zero_var = layers.create_global_var(name="gradient_merge_zero", + shape=[1], + value=int(0), + dtype='int32', + persistable=True, + force_cpu=True) # Add step var & cond var - step_var = layers.create_global_var( - name="gradient_merge_step", - shape=[1], - value=int(0), - dtype='int32', - persistable=True, - force_cpu=True) + step_var = layers.create_global_var(name="gradient_merge_step", + shape=[1], + value=int(0), + dtype='int32', + persistable=True, + force_cpu=True) - cond_var = main_block.create_var( - name="gradient_merge_cond", shape=[1], dtype='bool') + cond_var = main_block.create_var(name="gradient_merge_cond", + shape=[1], + dtype='bool') with device_guard("cpu"): # step_var = (step_var + 1) % k_step layers.increment(x=step_var, value=1.0, in_place=True) - main_block.append_op( - type='elementwise_mod', - inputs={'X': step_var, - 'Y': k_step_var}, - outputs={'Out': step_var}, - attrs={'axis': -1, - 'use_mkldnn': False}) + main_block.append_op(type='elementwise_mod', + inputs={ + 'X': step_var, + 'Y': k_step_var + }, + outputs={'Out': step_var}, + attrs={ + 'axis': -1, + 'use_mkldnn': False + }) # cond_var = (step_var == 0) - main_block.append_op( - type='equal', - inputs={'X': step_var, - 'Y': zero_var}, - outputs={'Out': cond_var}) + main_block.append_op(type='equal', + inputs={ + 'X': step_var, + 'Y': zero_var + }, + outputs={'Out': cond_var}) return cond_var @@ -7165,11 +7196,11 @@ class GradientMergeOptimizer(object): param_name = param.name param_var = main_block.var(param_name) assert (param_var is not None) - gradient_merge_var = main_block.create_var( - name=param_name + "@GRAD@GradientMerge", - shape=param_var.shape, - dtype=param_var.dtype, - persistable=True) + gradient_merge_var = main_block.create_var(name=param_name + + "@GRAD@GradientMerge", + shape=param_var.shape, + dtype=param_var.dtype, + persistable=True) param_to_gradient_merge[param_name] = gradient_merge_var startup_gradient_merge_var = startup_block.create_var( @@ -7177,23 +7208,26 @@ class GradientMergeOptimizer(object): shape=param_var.shape, dtype=param_var.dtype, persistable=True) - startup_block.append_op( - type="fill_constant", - outputs={"Out": startup_gradient_merge_var}, - attrs={ - "shape": param_var.shape, - "dtype": param_var.dtype, - "value": float(0), - }) + startup_block.append_op(type="fill_constant", + outputs={"Out": startup_gradient_merge_var}, + attrs={ + "shape": param_var.shape, + "dtype": param_var.dtype, + "value": float(0), + }) # grad_merge += grad new_grad_op = main_block.append_op( type="elementwise_add", - inputs={'X': grad, - 'Y': gradient_merge_var}, + inputs={ + 'X': grad, + 'Y': gradient_merge_var + }, outputs={'Out': gradient_merge_var}, - attrs={'axis': -1, - 'use_mkldnn': False}) + attrs={ + 'axis': -1, + 'use_mkldnn': False + }) self._add_gm_op_role_var(new_grad_op, param, gradient_merge_var, cond) new_params_grads.append([param, gradient_merge_var]) @@ -7209,15 +7243,14 @@ class GradientMergeOptimizer(object): if self.avg: for param, new_grad in new_params_grads: # grad /= k_steps - cur_block.append_op( - type='scale', - inputs={'X': new_grad}, - outputs={'Out': new_grad}, - attrs={ - 'scale': 1.0 / self.k_steps, - 'bias': 0.0, - 'bias_after_scale': False - }) + cur_block.append_op(type='scale', + inputs={'X': new_grad}, + outputs={'Out': new_grad}, + attrs={ + 'scale': 1.0 / self.k_steps, + 'bias': 0.0, + 'bias_after_scale': False + }) new_grad.op._set_attr(op_maker.kOpRoleAttrName(), op_maker.OpRole.Backward) @@ -7233,11 +7266,10 @@ class GradientMergeOptimizer(object): # clear gradient_merge_vars for param, new_grad in new_params_grads: - layers.fill_constant( - shape=new_grad.shape, - dtype=new_grad.dtype, - value=0.0, - out=new_grad) + layers.fill_constant(shape=new_grad.shape, + dtype=new_grad.dtype, + value=0.0, + out=new_grad) new_grad.op._set_attr(op_maker.kOpRoleAttrName(), op_maker.OpRole.Optimize) @@ -7253,13 +7285,13 @@ class GradientMergeOptimizer(object): no_grad_set=None): assert isinstance(loss, Variable), "The loss should be an Variable." - params_grads = self.backward( - loss, - startup_program=startup_program, - parameter_list=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) - optimize_ops = self.apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + optimize_ops = self.apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) return optimize_ops, params_grads diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index a10ce1ce808..6580c82536a 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -295,12 +295,12 @@ class WeightNormParamAttr(ParamAttr): trainable=True, do_model_average=False, need_clip=True): - super(WeightNormParamAttr, self).__init__( - name=name, - initializer=initializer, - learning_rate=learning_rate, - regularizer=regularizer, - trainable=trainable, - do_model_average=do_model_average, - need_clip=need_clip) + super(WeightNormParamAttr, + self).__init__(name=name, + initializer=initializer, + learning_rate=learning_rate, + regularizer=regularizer, + trainable=trainable, + do_model_average=do_model_average, + need_clip=need_clip) self.dim = dim diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py index 4d39d388530..5739cdb2f59 100644 --- a/python/paddle/fluid/profiler.py +++ b/python/paddle/fluid/profiler.py @@ -42,8 +42,8 @@ NVPROF_CONFIG = [ since="2.3.0", update_to="paddle.profiler.Profiler", level=1, - reason="Please use new profiler tool, this profiler tool is no longer maintained." -) + reason= + "Please use new profiler tool, this profiler tool is no longer maintained.") @signature_safe_contextmanager def cuda_profiler(output_file, output_mode=None, config=None): """ @@ -121,8 +121,8 @@ def npu_profiler(output_file, config=None): since="2.3.0", update_to="paddle.profiler.Profiler", level=1, - reason="Please use new profiler tool, this profiler tool is no longer maintained." -) + reason= + "Please use new profiler tool, this profiler tool is no longer maintained.") def reset_profiler(): """ Clear the previous time record. It works for @@ -149,8 +149,8 @@ def reset_profiler(): since="2.3.0", update_to="paddle.profiler.Profiler", level=1, - reason="Please use new profiler tool, this profiler tool is no longer maintained." -) + reason= + "Please use new profiler tool, this profiler tool is no longer maintained.") def start_profiler(state, tracer_option='Default'): """ Enable the profiler. Uers can use `fluid.profiler.start_profiler` and @@ -223,8 +223,8 @@ def start_profiler(state, tracer_option='Default'): since="2.3.0", update_to="paddle.profiler.Profiler", level=1, - reason="Please use new profiler tool, this profiler tool is no longer maintained." -) + reason= + "Please use new profiler tool, this profiler tool is no longer maintained.") def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): """ Stop the profiler. Uers can use `fluid.profiler.start_profiler` and @@ -286,8 +286,8 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): since="2.3.0", update_to="paddle.profiler.Profiler", level=1, - reason="Please use new profiler tool, this profiler tool is no longer maintained." -) + reason= + "Please use new profiler tool, this profiler tool is no longer maintained.") @signature_safe_contextmanager def profiler(state, sorted_key=None, diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index d4be63f275f..ff299bcca9b 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -120,6 +120,7 @@ def _reader_process_loop(batch_reader, data_queue): class DataLoaderBase(object): + def __init__(self): self._places = None @@ -155,6 +156,7 @@ class DataLoaderBase(object): class AuToTune(object): + def __init__(self, loader): self.loader = loader self.max_num_worker = multiprocessing.cpu_count() / 2 @@ -172,12 +174,12 @@ class AuToTune(object): # pick the best num_workers auto_tune_start = time.time() logging.debug("========= DataLoader Auto Tune =========") - logging.debug("User config for DataLoader: " + str( - self.loader.num_workers)) + logging.debug("User config for DataLoader: " + + str(self.loader.num_workers)) best_num_workers = 0 min_cost = float("inf") - logging.debug("Tuning Range for num_workers: 0 ~ " + str( - self.max_num_worker)) + logging.debug("Tuning Range for num_workers: 0 ~ " + + str(self.max_num_worker)) num_workers = 0 while num_workers < self.max_num_worker: auto_tune_loader.num_workers = num_workers @@ -195,10 +197,10 @@ class AuToTune(object): logging.debug("num_workers: " + str(num_workers) + " avg_cost: " + str(avg_cost)) num_workers += 2 - logging.info("auto_tune dataLoader best_num_workers: " + str( - best_num_workers)) - logging.debug("AutoTuning Cost for DataLoader: " + str(time.time( - ) - auto_tune_start) + ' seconds') + logging.info("auto_tune dataLoader best_num_workers: " + + str(best_num_workers)) + logging.debug("AutoTuning Cost for DataLoader: " + + str(time.time() - auto_tune_start) + ' seconds') # tune the default loader's num_workers return best_num_workers @@ -479,8 +481,8 @@ class DataLoader(object): self.places = _convert_places(places) assert num_workers >= 0, "num_workers should be a non-negative value" - if num_workers > 0 and (sys.platform == 'darwin' or - sys.platform == 'win32'): + if num_workers > 0 and (sys.platform == 'darwin' + or sys.platform == 'win32'): warnings.warn( "DataLoader with multi-process mode is not supported on MacOs and Windows currently." \ " Please use signle-process mode with num_workers = 0 instead") @@ -523,14 +525,13 @@ class DataLoader(object): "batch_sampler is not given" self.batch_size = batch_size if isinstance(dataset, IterableDataset): - self.batch_sampler = _InfiniteIterableSampler(dataset, - batch_size) + self.batch_sampler = _InfiniteIterableSampler( + dataset, batch_size) else: - self.batch_sampler = BatchSampler( - dataset=dataset, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last) + self.batch_sampler = BatchSampler(dataset=dataset, + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last) self.drop_last = drop_last self.auto_collate_batch = self.batch_sampler is not None @@ -978,8 +979,8 @@ class DygraphGeneratorLoader(DataLoaderBase): # NOTE: the multiprocessing in different platform is incompatible, we will solve it later self._use_multiprocess = use_multiprocess - if self._use_multiprocess and (sys.platform == 'darwin' or - sys.platform == 'win32'): + if self._use_multiprocess and (sys.platform == 'darwin' + or sys.platform == 'win32'): warnings.warn( "NOTE: DygraphGeneratorLoader with multiprocess mode is not currently supported on MacOs and Windows." ) @@ -995,7 +996,7 @@ class DygraphGeneratorLoader(DataLoaderBase): self._blocking_queue = None # NOTE: 1. In multiprocess mode, this thread is used to get next batch data from # self._data_queue, then push it into self._blocking_queue; 2. In singleprocess - # mode, this thread is used to get next batch data from self._batch_reader, then + # mode, this thread is used to get next batch data from self._batch_reader, then # push it into self._blocking_queue self._thread = None self._pin_memory = True if use_pinned_memory( @@ -1043,10 +1044,12 @@ class DygraphGeneratorLoader(DataLoaderBase): self._blocking_queue = core.init_lod_tensor_blocking_queue( core.Variable(), self._capacity, False) self._reader = None - self._reader = core.create_py_reader( - self.queue, self._var_names, self._shapes, self._dtypes, - self._need_check_feed, self._places, self._use_double_buffer, True, - self._pin_memory) + self._reader = core.create_py_reader(self.queue, self._var_names, + self._shapes, self._dtypes, + self._need_check_feed, + self._places, + self._use_double_buffer, True, + self._pin_memory) def _start(self): if self._use_multiprocess: @@ -1057,17 +1060,17 @@ class DygraphGeneratorLoader(DataLoaderBase): # add _data_queue into global queue set global multiprocess_queue_set multiprocess_queue_set.add(self._data_queue) - self._process = multiprocessing.Process( - target=_reader_process_loop, - args=(self._batch_reader, self._data_queue)) + self._process = multiprocessing.Process(target=_reader_process_loop, + args=(self._batch_reader, + self._data_queue)) self._process.daemon = True self._process.start() # Set child process signal handler # NOTE: [ avoiding hang ] 1. if the child process dies due to bus error/segfault - # or just hang, the main process will hang waiting for data, so here need to deal + # or just hang, the main process will hang waiting for data, so here need to deal # with SIGSEGV and SIGBUS of child process; 2. if the main process end before child - # process, it shuts the all its daemonic children down with a SIGTERM (instead of + # process, it shuts the all its daemonic children down with a SIGTERM (instead of # joining them without a timeout), so here nedd to deal with SIGTERM. core._set_process_pids(id(self), [self._process.pid]) _set_SIGCHLD_handler() @@ -1127,10 +1130,10 @@ class DygraphGeneratorLoader(DataLoaderBase): while not self._thread_done_event.is_set(): try: - # NOTE: [ avoid hanging ] Even with carefully designed data dependencies - # (i.e., a put() always corresponding to a get()), hanging on get() can - # still happen when data in queue is corrupted (e.g., due to - # Queue.cancel_join_thread or unexpected exit). So we set a timeout whenever + # NOTE: [ avoid hanging ] Even with carefully designed data dependencies + # (i.e., a put() always corresponding to a get()), hanging on get() can + # still happen when data in queue is corrupted (e.g., due to + # Queue.cancel_join_thread or unexpected exit). So we set a timeout whenever # we try to get data from `data_queue` # NOTE: [ avoid failed quickly ] Here, the time setting of QUEUE_GET_TIMEOUT # is relatively long, currently it is 60 seconds, because in some models, @@ -1201,10 +1204,10 @@ class DygraphGeneratorLoader(DataLoaderBase): places = _get_paddle_place_list(places) else: places = _get_paddle_place(places) - self.set_sample_list_generator( - paddle.batch( - reader, batch_size=batch_size, drop_last=drop_last), - places=places) + self.set_sample_list_generator(paddle.batch(reader, + batch_size=batch_size, + drop_last=drop_last), + places=places) return self def set_sample_list_generator(self, reader, places=None): @@ -1242,6 +1245,7 @@ class DygraphGeneratorLoader(DataLoaderBase): class GeneratorLoader(DataLoaderBase): + def __init__(self, feed_list=None, capacity=None, @@ -1287,10 +1291,12 @@ class GeneratorLoader(DataLoaderBase): self._queue = core.init_lod_tensor_blocking_queue( core.Variable(), self._capacity, self._keep_order) self._reader = None - self._reader = core.create_py_reader( - self.queue, self._var_names, self._shapes, self._dtypes, - self._need_check_feed, self._places, self._use_double_buffer, - self._drop_last, False) + self._reader = core.create_py_reader(self.queue, self._var_names, + self._shapes, self._dtypes, + self._need_check_feed, + self._places, + self._use_double_buffer, + self._drop_last, False) def _init_non_iterable(self): lod_levels = [] @@ -1314,8 +1320,8 @@ class GeneratorLoader(DataLoaderBase): double_buffer_name = data_loader_unique_name_generator('double_buffer') var = global_scope().var(queue_name) - self._queue = core.init_lod_tensor_blocking_queue(var, self._capacity, - self._keep_order) + self._queue = core.init_lod_tensor_blocking_queue( + var, self._capacity, self._keep_order) if self._keep_order: block = default_main_program().current_block() @@ -1325,17 +1331,16 @@ class GeneratorLoader(DataLoaderBase): reader_var = block.create_var(name=reader_name) dtype_int = [int(t) for t in dtypes] - block.append_op( - type='create_py_reader', - inputs={'blocking_queue': [queue_name]}, - outputs={'Out': [reader_var]}, - attrs={ - 'shape_concat': shape_concat, - 'lod_levels': lod_levels, - 'dtypes': dtype_int, - 'need_check_feed': need_check_feed, - 'ranks': ranks - }) + block.append_op(type='create_py_reader', + inputs={'blocking_queue': [queue_name]}, + outputs={'Out': [reader_var]}, + attrs={ + 'shape_concat': shape_concat, + 'lod_levels': lod_levels, + 'dtypes': dtype_int, + 'need_check_feed': need_check_feed, + 'ranks': ranks + }) reader_var.desc.set_dtypes(dtypes) reader_var.persistable = True @@ -1355,8 +1360,8 @@ class GeneratorLoader(DataLoaderBase): reader = monkey_patch_reader_methods(main_prog_var) if self._use_double_buffer: - double_buffer_reader = double_buffer( - reader, name=double_buffer_name) + double_buffer_reader = double_buffer(reader, + name=double_buffer_name) # we return a double buffer reader. However, the reset method comes from # py_reader. double_buffer_reader.reset = reader.reset @@ -1410,6 +1415,7 @@ class GeneratorLoader(DataLoaderBase): self._reset() def _start(self): + def __thread_main__(legacy_expected_place): try: # See _DataLoaderIterSingleProcess._thread_loop() for why set expected place here. @@ -1441,8 +1447,8 @@ class GeneratorLoader(DataLoaderBase): logging.warning('Your reader has raised an exception!') six.reraise(*sys.exc_info()) - self._thread = threading.Thread( - target=__thread_main__, args=(_current_expected_place(), )) + self._thread = threading.Thread(target=__thread_main__, + args=(_current_expected_place(), )) self._thread.daemon = True self._thread.start() @@ -1473,17 +1479,16 @@ class GeneratorLoader(DataLoaderBase): break if has_lod: - self.set_sample_list_generator( - paddle.batch( - reader, batch_size=batch_size, drop_last=drop_last), - places=places) + self.set_sample_list_generator(paddle.batch(reader, + batch_size=batch_size, + drop_last=drop_last), + places=places) else: - reader = BatchedTensorProvider( - feed_list=self._feed_list, - place=core.CPUPlace(), - batch_size=batch_size, - generator=reader, - drop_last=drop_last) + reader = BatchedTensorProvider(feed_list=self._feed_list, + place=core.CPUPlace(), + batch_size=batch_size, + generator=reader, + drop_last=drop_last) self.set_batch_generator(reader, places=places) return self @@ -1493,8 +1498,8 @@ class GeneratorLoader(DataLoaderBase): else: places = _get_paddle_place(places) with program_guard(Program(), Program()): - feeder = DataFeeder( - feed_list=self._feed_list, place=core.CPUPlace()) + feeder = DataFeeder(feed_list=self._feed_list, + place=core.CPUPlace()) paddle_reader = feeder.decorate_reader(reader, multi_devices=False) def __tensor_reader_impl__(): @@ -1694,8 +1699,9 @@ class PyReader(DataLoaderBase): use_double_buffer=True, iterable=True, return_list=False): - self._loader = DataLoader.from_generator( - feed_list, capacity, use_double_buffer, iterable, return_list) + self._loader = DataLoader.from_generator(feed_list, capacity, + use_double_buffer, iterable, + return_list) @property def queue(self): @@ -1978,9 +1984,10 @@ class PyReader(DataLoaderBase): class DatasetLoader(DataLoaderBase): + def __init__(self, dataset, places, drop_last): - assert isinstance(dataset, paddle.distributed.fleet.dataset. - DatasetBase), "dataset must be type of DatasetBase" + assert isinstance(dataset, paddle.distributed.fleet.dataset.DatasetBase + ), "dataset must be type of DatasetBase" assert not _non_static_mode( ), "DatasetLoader is not supported in dygraph mode yet" if isinstance(places, (list, tuple)): @@ -1994,15 +2001,17 @@ class DatasetLoader(DataLoaderBase): "Filelist number of dataset {} must be not less than place number {}".format(len(dataset.filelist), thread_num) if dataset.thread_num != 0 and dataset.thread_num != thread_num: - logging.warn('thread_num {} which is set in Dataset is ignored'. - format(dataset.thread_num)) + logging.warn( + 'thread_num {} which is set in Dataset is ignored'.format( + dataset.thread_num)) dataset._set_thread(thread_num) - if isinstance(dataset, paddle.distributed.fleet.dataset. - InMemoryDataset) and dataset.queue_num > thread_num: - logging.warn("queue_num {} which is set in Dataset is ignored". - format(dataset.queue_num)) + if isinstance(dataset, paddle.distributed.fleet.dataset.InMemoryDataset + ) and dataset.queue_num > thread_num: + logging.warn( + "queue_num {} which is set in Dataset is ignored".format( + dataset.queue_num)) dataset._set_queue_num(thread_num) self._dataset = dataset @@ -2012,8 +2021,8 @@ class DatasetLoader(DataLoaderBase): ] self._iterable_dataset = core.IterableDatasetWrapper( - dataset.dataset, use_slots, - _convert_places(places), dataset.proto_desc.batch_size, drop_last) + dataset.dataset, use_slots, _convert_places(places), + dataset.proto_desc.batch_size, drop_last) def __iter__(self): self._dataset._finish_to_run() diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index ed28a2813e2..da0b91cc5c9 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -135,20 +135,21 @@ class L2DecayRegularizer(WeightDecayRegularizer): if framework._non_static_mode(): if framework.in_dygraph_mode(): - return _C_ops.final_state_scale( - param, self._regularization_coeff, 0.0, True) + return _C_ops.final_state_scale(param, + self._regularization_coeff, 0.0, + True) else: return _C_ops.scale(param, "scale", self._regularization_coeff) else: - decay = block.create_var( - dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) + decay = block.create_var(dtype=param.dtype, + shape=param.shape, + lod_level=param.lod_level) # Append Op to calculate decay - block.append_op( - type='scale', - inputs={"X": param}, - outputs={"Out": decay}, - attrs={"scale": self._regularization_coeff}) + block.append_op(type='scale', + inputs={"X": param}, + outputs={"Out": decay}, + attrs={"scale": self._regularization_coeff}) return decay @@ -245,20 +246,21 @@ class L1DecayRegularizer(WeightDecayRegularizer): sign = block.create_var(dtype=param.dtype, shape=param.shape) decay = block.create_var(dtype=param.dtype, shape=param.shape) else: - sign = block.create_var( - dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) - decay = block.create_var( - dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) + sign = block.create_var(dtype=param.dtype, + shape=param.shape, + lod_level=param.lod_level) + decay = block.create_var(dtype=param.dtype, + shape=param.shape, + lod_level=param.lod_level) # Append sign op block.append_op(type='sign', inputs={"X": param}, outputs={"Out": sign}) # Append scale op to the output of sign op - block.append_op( - type='scale', - inputs={"X": sign}, - outputs={"Out": decay}, - attrs={"scale": self._regularization_coeff}) + block.append_op(type='scale', + inputs={"X": sign}, + outputs={"Out": decay}, + attrs={"scale": self._regularization_coeff}) return decay diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 9ce90a2bd71..d96e640f77a 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -25,22 +25,25 @@ import sys import os -def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, +def convolution_net(data, + label, + input_dim, + class_dim=2, + emb_dim=32, hid_dim=32): - emb = fluid.layers.embedding( - input=data, size=[input_dim, emb_dim], is_sparse=True) - conv_3 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=3, - act="tanh", - pool_type="sqrt") - conv_4 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=4, - act="tanh", - pool_type="sqrt") + emb = fluid.layers.embedding(input=data, + size=[input_dim, emb_dim], + is_sparse=True) + conv_3 = fluid.nets.sequence_conv_pool(input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool(input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") prediction = fluid.layers.fc(input=[conv_3, conv_4], size=class_dim, act="softmax") @@ -50,10 +53,15 @@ def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, return avg_cost, accuracy, prediction -def dyn_rnn_lstm(data, label, input_dim, class_dim=2, emb_dim=32, +def dyn_rnn_lstm(data, + label, + input_dim, + class_dim=2, + emb_dim=32, lstm_size=128): - emb = fluid.layers.embedding( - input=data, size=[input_dim, emb_dim], is_sparse=True) + emb = fluid.layers.embedding(input=data, + size=[input_dim, emb_dim], + is_sparse=True) sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() @@ -67,14 +75,14 @@ def dyn_rnn_lstm(data, label, input_dim, class_dim=2, emb_dim=32, gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) return gate0 + gate1 - forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, - lstm_size)) - input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, - lstm_size)) - output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, - lstm_size)) - cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, - lstm_size)) + forget_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + input_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + output_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) + cell_gate = fluid.layers.sigmoid( + x=gate_common(word, prev_hidden, lstm_size)) cell = forget_gate * prev_cell + input_gate * cell_gate hidden = output_gate * fluid.layers.tanh(x=cell) @@ -99,8 +107,9 @@ def stacked_lstm_net(data, stacked_num=3): assert stacked_num % 2 == 1 - emb = fluid.layers.embedding( - input=data, size=[input_dim, emb_dim], is_sparse=True) + emb = fluid.layers.embedding(input=data, + size=[input_dim, emb_dim], + is_sparse=True) # add bias attr # TODO(qijun) linear act @@ -111,8 +120,9 @@ def stacked_lstm_net(data, for i in range(2, stacked_num + 1): fc = fluid.layers.fc(input=inputs, size=hid_dim) - lstm, cell = fluid.layers.dynamic_lstm( - input=fc, size=hid_dim, is_reverse=(i % 2) == 0) + lstm, cell = fluid.layers.dynamic_lstm(input=fc, + size=hid_dim, + is_reverse=(i % 2) == 0) inputs = [fc, lstm] fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') @@ -138,23 +148,26 @@ def train(word_dict, dict_dim = len(word_dict) class_dim = 2 - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") if not parallel: - cost, acc_out, prediction = net_method( - data, label, input_dim=dict_dim, class_dim=class_dim) + cost, acc_out, prediction = net_method(data, + label, + input_dim=dict_dim, + class_dim=class_dim) else: raise NotImplementedError() adagrad = fluid.optimizer.Adagrad(learning_rate=0.002) adagrad.minimize(cost) - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=BATCH_SIZE) + train_data = paddle.batch(paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=1000), + batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) @@ -234,12 +247,11 @@ def infer(word_dict, use_cuda, save_dirname=None): recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] - tensor_words = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) + tensor_words = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -258,16 +270,16 @@ def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - train( - word_dict, - net_method, - use_cuda, - parallel=parallel, - save_dirname=save_dirname) + train(word_dict, + net_method, + use_cuda, + parallel=parallel, + save_dirname=save_dirname) infer(word_dict, use_cuda, save_dirname) class TestUnderstandSentiment(unittest.TestCase): + @classmethod def setUpClass(cls): cls.word_dict = paddle.dataset.imdb.word_dict() @@ -283,19 +295,17 @@ class TestUnderstandSentiment(unittest.TestCase): def test_conv_cpu(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=convolution_net, - use_cuda=False, - save_dirname="understand_sentiment_conv.inference.model") + main(self.word_dict, + net_method=convolution_net, + use_cuda=False, + save_dirname="understand_sentiment_conv.inference.model") def test_conv_cpu_parallel(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=convolution_net, - use_cuda=False, - parallel=True) + main(self.word_dict, + net_method=convolution_net, + use_cuda=False, + parallel=True) @unittest.skip(reason="make CI faster") def test_stacked_lstm_cpu(self): @@ -304,31 +314,29 @@ class TestUnderstandSentiment(unittest.TestCase): self.word_dict, net_method=stacked_lstm_net, use_cuda=False, - save_dirname="understand_sentiment_stacked_lstm.inference.model") + save_dirname="understand_sentiment_stacked_lstm.inference.model" + ) def test_stacked_lstm_cpu_parallel(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=stacked_lstm_net, - use_cuda=False, - parallel=True) + main(self.word_dict, + net_method=stacked_lstm_net, + use_cuda=False, + parallel=True) def test_conv_gpu(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=convolution_net, - use_cuda=True, - save_dirname="understand_sentiment_conv.inference.model") + main(self.word_dict, + net_method=convolution_net, + use_cuda=True, + save_dirname="understand_sentiment_conv.inference.model") def test_conv_gpu_parallel(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=convolution_net, - use_cuda=True, - parallel=True) + main(self.word_dict, + net_method=convolution_net, + use_cuda=True, + parallel=True) @unittest.skip(reason="make CI faster") def test_stacked_lstm_gpu(self): @@ -337,32 +345,30 @@ class TestUnderstandSentiment(unittest.TestCase): self.word_dict, net_method=stacked_lstm_net, use_cuda=True, - save_dirname="understand_sentiment_stacked_lstm.inference.model") + save_dirname="understand_sentiment_stacked_lstm.inference.model" + ) def test_stacked_lstm_gpu_parallel(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=stacked_lstm_net, - use_cuda=True, - parallel=True) + main(self.word_dict, + net_method=stacked_lstm_net, + use_cuda=True, + parallel=True) @unittest.skip(reason='make CI faster') def test_dynrnn_lstm_gpu(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=dyn_rnn_lstm, - use_cuda=True, - parallel=False) + main(self.word_dict, + net_method=dyn_rnn_lstm, + use_cuda=True, + parallel=False) def test_dynrnn_lstm_gpu_parallel(self): with self.new_program_scope(): - main( - self.word_dict, - net_method=dyn_rnn_lstm, - use_cuda=True, - parallel=True) + main(self.word_dict, + net_method=dyn_rnn_lstm, + use_cuda=True, + parallel=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index 4324e582fc9..668373838c0 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -75,15 +75,14 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): amp_lists=amp.bf16.AutoMixedPrecisionListsBF16(), use_bf16_guard=False, use_pure_bf16=pure_bf16) - sgd_optimizer.minimize( - avg_cost, startup_program=fluid.default_startup_program()) + sgd_optimizer.minimize(avg_cost, + startup_program=fluid.default_startup_program()) BATCH_SIZE = 20 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.uci_housing.train(), buf_size=500), - batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.uci_housing.train(), buf_size=500), + batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) @@ -93,8 +92,9 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): exe.run(fluid.default_startup_program()) test_prog = main_program.clone(for_test=True) if pure_bf16: - sgd_optimizer.amp_init( - exe.place, test_program=test_prog, use_bf16_test=True) + sgd_optimizer.amp_init(exe.place, + test_program=test_prog, + use_bf16_test=True) PASS_NUM = 100 for pass_id in range(PASS_NUM): @@ -106,10 +106,10 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): avg_loss_value = convert_uint16_to_float(avg_loss_value) if avg_loss_value[0] < 10.0: if save_dirname is not None: - paddle.static.save_inference_model( - save_dirname, [x], [y_predict], - exe, - clip_extra=False) + paddle.static.save_inference_model(save_dirname, [x], + [y_predict], + exe, + clip_extra=False) return if math.isnan(float(avg_loss_value)): sys.exit("got NaN loss, training failed.") @@ -161,18 +161,18 @@ def infer(use_cuda, save_dirname=None, use_bf16=False): # The input data should be >= 0 batch_size = 10 - test_reader = paddle.batch( - paddle.dataset.uci_housing.test(), batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.uci_housing.test(), + batch_size=batch_size) test_data = next(test_reader()) - test_feat = numpy.array( - [data[0] for data in test_data]).astype("float32") + test_feat = numpy.array([data[0] + for data in test_data]).astype("float32") if use_bf16: test_feat = convert_float_to_uint16(test_feat) - test_label = numpy.array( - [data[1] for data in test_data]).astype("float32") + test_label = numpy.array([data[1] + for data in test_data]).astype("float32") assert feed_target_names[0] == 'x' results = exe.run(inference_program, @@ -200,6 +200,7 @@ def main(use_cuda, is_local=True, use_bf16=False, pure_bf16=False): class TestFitALineBase(unittest.TestCase): + @contextlib.contextmanager def program_scope_guard(self): prog = fluid.Program() @@ -211,6 +212,7 @@ class TestFitALineBase(unittest.TestCase): class TestFitALine(TestFitALineBase): + def test_cpu(self): with self.program_scope_guard(): main(use_cuda=False) @@ -223,6 +225,7 @@ class TestFitALine(TestFitALineBase): @unittest.skipIf(not fluid.core.supports_bfloat16(), "place does not support BF16 evaluation") class TestFitALineBF16(TestFitALineBase): + def test_bf16(self): with self.program_scope_guard(): main(use_cuda=False, use_bf16=True) diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 7c2d5c693a9..7096a16d89f 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -28,6 +28,7 @@ paddle.enable_static() def resnet_cifar10(input, depth=32): + def conv_bn_layer(input, ch_out, filter_size, @@ -35,14 +36,13 @@ def resnet_cifar10(input, depth=32): padding, act='relu', bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) + tmp = fluid.layers.conv2d(input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) return fluid.layers.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride): @@ -65,28 +65,33 @@ def resnet_cifar10(input, depth=32): assert (depth - 2) % 6 == 0 n = (depth - 2) // 6 - conv1 = conv_bn_layer( - input=input, ch_out=16, filter_size=3, stride=1, padding=1) + conv1 = conv_bn_layer(input=input, + ch_out=16, + filter_size=3, + stride=1, + padding=1) res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) res2 = layer_warp(basicblock, res1, 16, 32, n, 2) res3 = layer_warp(basicblock, res2, 32, 64, n, 2) - pool = fluid.layers.pool2d( - input=res3, pool_size=8, pool_type='avg', pool_stride=1) + pool = fluid.layers.pool2d(input=res3, + pool_size=8, + pool_type='avg', + pool_stride=1) return pool def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): - return fluid.nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') + return fluid.nets.img_conv_group(input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') conv1 = conv_block(input, 64, 2, [0.3, 0]) conv2 = conv_block(conv1, 128, 2, [0.4, 0]) @@ -132,13 +137,12 @@ def train(net_type, use_cuda, save_dirname, is_local): BATCH_SIZE = 128 PASS_NUM = 1 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), - batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=128 * 10), + batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) + test_reader = paddle.batch(paddle.dataset.cifar.test10(), + batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) @@ -168,9 +172,9 @@ def train(net_type, use_cuda, save_dirname, is_local): avg_loss_value = numpy.array(avg_loss_list).mean() print( - 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. - format(pass_id, batch_id + 1, - float(avg_loss_value), float(acc_value))) + 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}' + .format(pass_id, batch_id + 1, float(avg_loss_value), + float(acc_value))) if acc_value > 0.01: # Low threshold for speeding up CI fluid.io.save_inference_model(save_dirname, ["pixel"], @@ -247,6 +251,7 @@ def main(net_type, use_cuda, is_local=True): class TestImageClassification(unittest.TestCase): + def test_vgg_cuda(self): with self.scope_prog_guard(): main('vgg', use_cuda=True) diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 568d7518a1e..eee1d7959ee 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -55,26 +55,24 @@ def load_parameter(file_name, h, w): def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, **ignored): # 8 features - predicate_embedding = fluid.layers.embedding( - input=predicate, - size=[pred_dict_len, word_dim], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='vemb') - - mark_embedding = fluid.layers.embedding( - input=mark, - size=[mark_dict_len, mark_dim], - dtype='float32', - is_sparse=IS_SPARSE) + predicate_embedding = fluid.layers.embedding(input=predicate, + size=[pred_dict_len, word_dim], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='vemb') + + mark_embedding = fluid.layers.embedding(input=mark, + size=[mark_dict_len, mark_dim], + dtype='float32', + is_sparse=IS_SPARSE) word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] emb_layers = [ - fluid.layers.embedding( - size=[word_dict_len, word_dim], - input=x, - param_attr=fluid.ParamAttr( - name=embedding_name, trainable=False)) for x in word_input + fluid.layers.embedding(size=[word_dict_len, word_dim], + input=x, + param_attr=fluid.ParamAttr(name=embedding_name, + trainable=False)) + for x in word_input ] emb_layers.append(predicate_embedding) emb_layers.append(mark_embedding) @@ -85,12 +83,11 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, hidden_0 = fluid.layers.sums(input=hidden_0_layers) - lstm_0 = fluid.layers.dynamic_lstm( - input=hidden_0, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid') + lstm_0 = fluid.layers.dynamic_lstm(input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') # stack L-LSTM and R-LSTM with direct edges input_tmp = [hidden_0, lstm_0] @@ -101,13 +98,12 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, fluid.layers.fc(input=input_tmp[1], size=hidden_dim) ]) - lstm = fluid.layers.dynamic_lstm( - input=mix_hidden, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - is_reverse=((i % 2) == 1)) + lstm = fluid.layers.dynamic_lstm(input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) input_tmp = [mix_hidden, lstm] @@ -121,40 +117,57 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, def train(use_cuda, save_dirname=None, is_local=True): # define network topology - word = fluid.layers.data( - name='word_data', shape=[1], dtype='int64', lod_level=1) - predicate = fluid.layers.data( - name='verb_data', shape=[1], dtype='int64', lod_level=1) - ctx_n2 = fluid.layers.data( - name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) - ctx_n1 = fluid.layers.data( - name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) - ctx_0 = fluid.layers.data( - name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) - ctx_p1 = fluid.layers.data( - name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) - ctx_p2 = fluid.layers.data( - name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) - mark = fluid.layers.data( - name='mark_data', shape=[1], dtype='int64', lod_level=1) + word = fluid.layers.data(name='word_data', + shape=[1], + dtype='int64', + lod_level=1) + predicate = fluid.layers.data(name='verb_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_n2 = fluid.layers.data(name='ctx_n2_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_n1 = fluid.layers.data(name='ctx_n1_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_0 = fluid.layers.data(name='ctx_0_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_p1 = fluid.layers.data(name='ctx_p1_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_p2 = fluid.layers.data(name='ctx_p2_data', + shape=[1], + dtype='int64', + lod_level=1) + mark = fluid.layers.data(name='mark_data', + shape=[1], + dtype='int64', + lod_level=1) feature_out = db_lstm(**locals()) - target = fluid.layers.data( - name='target', shape=[1], dtype='int64', lod_level=1) - crf_cost = fluid.layers.linear_chain_crf( - input=feature_out, - label=target, - param_attr=fluid.ParamAttr( - name='crfw', learning_rate=mix_hidden_lr)) + target = fluid.layers.data(name='target', + shape=[1], + dtype='int64', + lod_level=1) + crf_cost = fluid.layers.linear_chain_crf(input=feature_out, + label=target, + param_attr=fluid.ParamAttr( + name='crfw', + learning_rate=mix_hidden_lr)) avg_cost = fluid.layers.mean(crf_cost) # TODO(qiao) # check other optimizers and check why out will be NAN sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.01, - decay_steps=100000, - decay_rate=0.5, - staircase=True)) + learning_rate=fluid.layers.exponential_decay(learning_rate=0.01, + decay_steps=100000, + decay_rate=0.5, + staircase=True)) sgd_optimizer.minimize(avg_cost) # TODO(qiao) @@ -162,17 +175,15 @@ def train(use_cuda, save_dirname=None, is_local=True): crf_decode = fluid.layers.crf_decoding( input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.conll05.test(), buf_size=8192), - batch_size=BATCH_SIZE) + train_data = paddle.batch(paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - feeder = fluid.DataFeeder( - feed_list=[ - word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target - ], - place=place) + feeder = fluid.DataFeeder(feed_list=[ + word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target + ], + place=place) exe = fluid.Executor(place) def train_loop(main_program): @@ -195,17 +206,18 @@ def train(use_cuda, save_dirname=None, is_local=True): if batch_id % 10 == 0: print("avg_cost:" + str(cost)) if batch_id != 0: - print("second per batch: " + str((time.time( - ) - start_time) / batch_id)) + print("second per batch: " + + str((time.time() - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(cost) < 80.0: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode - fluid.io.save_inference_model(save_dirname, [ - 'word_data', 'verb_data', 'ctx_n2_data', - 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', - 'ctx_p2_data', 'mark_data' - ], [feature_out], exe) + fluid.io.save_inference_model( + save_dirname, [ + 'word_data', 'verb_data', 'ctx_n2_data', + 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', + 'ctx_p2_data', 'mark_data' + ], [feature_out], exe) return batch_id = batch_id + 1 @@ -268,54 +280,46 @@ def infer(use_cuda, save_dirname=None): recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] - word = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) - pred = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=pred_dict_len - 1) - ctx_n2 = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) - ctx_n1 = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) - ctx_0 = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) - ctx_p1 = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) - ctx_p2 = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=word_dict_len - 1) - mark = fluid.create_random_int_lodtensor( - recursive_seq_lens, - base_shape, - place, - low=0, - high=mark_dict_len - 1) + word = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) + pred = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=pred_dict_len - 1) + ctx_n2 = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) + ctx_n1 = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) + ctx_0 = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) + ctx_p1 = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) + ctx_p2 = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) + mark = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=mark_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -358,6 +362,7 @@ def main(use_cuda, is_local=True): class TestLabelSemanticRoles(unittest.TestCase): + def test_cuda(self): with self.scope_prog_guard(): main(use_cuda=True) diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index a0056ba3bab..f0595d52f7f 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -41,14 +41,15 @@ decoder_size = hidden_dim def encoder(is_sparse): # encoder - src_word_id = pd.data( - name="src_word_id", shape=[1], dtype='int64', lod_level=1) - src_embedding = pd.embedding( - input=src_word_id, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=is_sparse, - param_attr=fluid.ParamAttr(name='vemb')) + src_word_id = pd.data(name="src_word_id", + shape=[1], + dtype='int64', + lod_level=1) + src_embedding = pd.embedding(input=src_word_id, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=is_sparse, + param_attr=fluid.ParamAttr(name='vemb')) fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4) @@ -58,14 +59,15 @@ def encoder(is_sparse): def decoder_train(context, is_sparse): # decoder - trg_language_word = pd.data( - name="target_language_word", shape=[1], dtype='int64', lod_level=1) - trg_embedding = pd.embedding( - input=trg_language_word, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=is_sparse, - param_attr=fluid.ParamAttr(name='vemb')) + trg_language_word = pd.data(name="target_language_word", + shape=[1], + dtype='int64', + lod_level=1) + trg_embedding = pd.embedding(input=trg_language_word, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=is_sparse, + param_attr=fluid.ParamAttr(name='vemb')) rnn = pd.DynamicRNN() with rnn.block(): @@ -98,8 +100,10 @@ def decoder_decode(context, is_sparse): scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) - init_scores = pd.data( - name="init_scores", shape=[1], dtype="float32", lod_level=2) + init_scores = pd.data(name="init_scores", + shape=[1], + dtype="float32", + lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) @@ -115,11 +119,10 @@ def decoder_decode(context, is_sparse): # expand the recursive_sequence_lengths of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) - pre_ids_emb = pd.embedding( - input=pre_ids, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=is_sparse) + pre_ids_emb = pd.embedding(input=pre_ids, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], @@ -132,17 +135,16 @@ def decoder_decode(context, is_sparse): act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=beam_size) # calculate accumulated scores after topk to reduce computation cost - accu_scores = pd.elementwise_add( - x=pd.log(topk_scores), y=pd.reshape( - pre_score, shape=[-1]), axis=0) - selected_ids, selected_scores = pd.beam_search( - pre_ids, - pre_score, - topk_indices, - accu_scores, - beam_size, - end_id=10, - level=0) + accu_scores = pd.elementwise_add(x=pd.log(topk_scores), + y=pd.reshape(pre_score, shape=[-1]), + axis=0) + selected_ids, selected_scores = pd.beam_search(pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -172,8 +174,10 @@ def train_main(use_cuda, is_sparse, is_local=True): context = encoder(is_sparse) rnn_out = decoder_train(context, is_sparse) - label = pd.data( - name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) + label = pd.data(name="target_language_next_word", + shape=[1], + dtype='int64', + lod_level=1) cost = pd.cross_entropy(input=rnn_out, label=label) avg_cost = pd.mean(cost) @@ -183,10 +187,9 @@ def train_main(use_cuda, is_sparse, is_local=True): regularization_coeff=0.1)) optimizer.minimize(avg_cost) - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size), buf_size=1000), - batch_size=batch_size) + train_data = paddle.batch(paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) feed_order = [ 'src_word_id', 'target_language_word', 'target_language_next_word' @@ -252,8 +255,8 @@ def decode_main(use_cuda, is_sparse): exe.run(framework.default_startup_program()) init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64') - init_scores_data = np.array( - [1. for _ in range(batch_size)], dtype='float32') + init_scores_data = np.array([1. for _ in range(batch_size)], + dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) init_recursive_seq_lens = [1] * batch_size @@ -264,10 +267,9 @@ def decode_main(use_cuda, is_sparse): init_scores = fluid.create_lod_tensor(init_scores_data, init_recursive_seq_lens, place) - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size), buf_size=1000), - batch_size=batch_size) + train_data = paddle.batch(paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) feed_order = ['src_word_id'] feed_list = [ @@ -305,8 +307,8 @@ def scope_prog_guard(): def inject_test_train(use_cuda, is_sparse): - f_name = 'test_{0}_{1}_train'.format('cuda' if use_cuda else 'cpu', 'sparse' - if is_sparse else 'dense') + f_name = 'test_{0}_{1}_train'.format('cuda' if use_cuda else 'cpu', + 'sparse' if is_sparse else 'dense') def f(*args): with scope_prog_guard(): @@ -316,9 +318,8 @@ def inject_test_train(use_cuda, is_sparse): def inject_test_decode(use_cuda, is_sparse, decorator=None): - f_name = 'test_{0}_{1}_decode'.format('cuda' - if use_cuda else 'cpu', 'sparse' - if is_sparse else 'dense') + f_name = 'test_{0}_{1}_decode'.format('cuda' if use_cuda else 'cpu', + 'sparse' if is_sparse else 'dense') def f(*args): with scope_prog_guard(): @@ -342,8 +343,9 @@ for _use_cuda_ in (False, True): _decorator_ = unittest.skip( reason='Beam Search does not support CUDA!') - inject_test_decode( - is_sparse=_is_sparse_, use_cuda=_use_cuda_, decorator=_decorator_) + inject_test_decode(is_sparse=_is_sparse_, + use_cuda=_use_cuda_, + decorator=_decorator_) if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 71c57b85160..5301f9aa760 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -46,21 +46,19 @@ def mlp(img, label): def conv_net(img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") return loss_net(conv_pool_2, label) @@ -96,12 +94,11 @@ def train(nn_type, exe = fluid.Executor(place) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=BATCH_SIZE) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=BATCH_SIZE) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) def train_loop(main_program): @@ -143,9 +140,9 @@ def train(nn_type, return else: print( - 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. - format(pass_id, batch_id + 1, - float(avg_loss_val), float(acc_val))) + 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}' + .format(pass_id, batch_id + 1, float(avg_loss_val), + float(acc_val))) if math.isnan(float(avg_loss_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Loss of recognize digits is too large") @@ -192,8 +189,9 @@ def infer(use_cuda, # data using feed operators), and the fetch_targets (variables that # we want to obtain data from using fetch operators). [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - save_dirname, exe, model_filename, params_filename) + fetch_targets] = fluid.io.load_inference_model(save_dirname, exe, + model_filename, + params_filename) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range [-1.0, 1.0]. @@ -222,19 +220,17 @@ def main(use_cuda, parallel, nn_type, combine): params_filename = "__params_combined__" # call train() with is_local argument to run distributed train - train( - nn_type=nn_type, - use_cuda=use_cuda, - parallel=parallel, - save_dirname=save_dirname, - save_full_dirname=save_full_dirname, - model_filename=model_filename, - params_filename=params_filename) - infer( - use_cuda=use_cuda, - save_dirname=save_dirname, - model_filename=model_filename, - params_filename=params_filename) + train(nn_type=nn_type, + use_cuda=use_cuda, + parallel=parallel, + save_dirname=save_dirname, + save_full_dirname=save_full_dirname, + model_filename=model_filename, + params_filename=params_filename) + infer(use_cuda=use_cuda, + save_dirname=save_dirname, + model_filename=model_filename, + params_filename=params_filename) class TestRecognizeDigits(unittest.TestCase): @@ -242,6 +238,7 @@ class TestRecognizeDigits(unittest.TestCase): def inject_test_method(use_cuda, parallel, nn_type, combine): + def __impl__(self): prog = fluid.Program() startup_prog = fluid.Program() @@ -250,10 +247,9 @@ def inject_test_method(use_cuda, parallel, nn_type, combine): with fluid.program_guard(prog, startup_prog): main(use_cuda, parallel, nn_type, combine) - fn = 'test_{0}_{1}_{2}_{3}'.format(nn_type, 'cuda' - if use_cuda else 'cpu', 'parallel' - if parallel else 'normal', 'combine' - if combine else 'separate') + fn = 'test_{0}_{1}_{2}_{3}'.format(nn_type, 'cuda' if use_cuda else 'cpu', + 'parallel' if parallel else 'normal', + 'combine' if combine else 'separate') setattr(TestRecognizeDigits, fn, __impl__) diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index c2ab249f571..8a4b4c26837 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -41,12 +41,11 @@ def get_usr_combined_features(): uid = layers.data(name='user_id', shape=[1], dtype='int64') - usr_emb = layers.embedding( - input=uid, - dtype='float32', - size=[USR_DICT_SIZE, 32], - param_attr='user_table', - is_sparse=IS_SPARSE) + usr_emb = layers.embedding(input=uid, + dtype='float32', + size=[USR_DICT_SIZE, 32], + param_attr='user_table', + is_sparse=IS_SPARSE) usr_fc = layers.fc(input=usr_emb, size=32) @@ -54,33 +53,30 @@ def get_usr_combined_features(): usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') - usr_gender_emb = layers.embedding( - input=usr_gender_id, - size=[USR_GENDER_DICT_SIZE, 16], - param_attr='gender_table', - is_sparse=IS_SPARSE) + usr_gender_emb = layers.embedding(input=usr_gender_id, + size=[USR_GENDER_DICT_SIZE, 16], + param_attr='gender_table', + is_sparse=IS_SPARSE) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") - usr_age_emb = layers.embedding( - input=usr_age_id, - size=[USR_AGE_DICT_SIZE, 16], - is_sparse=IS_SPARSE, - param_attr='age_table') + usr_age_emb = layers.embedding(input=usr_age_id, + size=[USR_AGE_DICT_SIZE, 16], + is_sparse=IS_SPARSE, + param_attr='age_table') usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") - usr_job_emb = layers.embedding( - input=usr_job_id, - size=[USR_JOB_DICT_SIZE, 16], - param_attr='job_table', - is_sparse=IS_SPARSE) + usr_job_emb = layers.embedding(input=usr_job_id, + size=[USR_JOB_DICT_SIZE, 16], + param_attr='job_table', + is_sparse=IS_SPARSE) usr_job_fc = layers.fc(input=usr_job_emb, size=16) @@ -98,40 +94,44 @@ def get_mov_combined_features(): mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') - mov_emb = layers.embedding( - input=mov_id, - dtype='float32', - size=[MOV_DICT_SIZE, 32], - param_attr='movie_table', - is_sparse=IS_SPARSE) + mov_emb = layers.embedding(input=mov_id, + dtype='float32', + size=[MOV_DICT_SIZE, 32], + param_attr='movie_table', + is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', shape=[1], dtype='int64', lod_level=1) + category_id = layers.data(name='category_id', + shape=[1], + dtype='int64', + lod_level=1) - mov_categories_emb = layers.embedding( - input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) + mov_categories_emb = layers.embedding(input=category_id, + size=[CATEGORY_DICT_SIZE, 32], + is_sparse=IS_SPARSE) - mov_categories_hidden = layers.sequence_pool( - input=mov_categories_emb, pool_type="sum") + mov_categories_hidden = layers.sequence_pool(input=mov_categories_emb, + pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', shape=[1], dtype='int64', lod_level=1) + mov_title_id = layers.data(name='movie_title', + shape=[1], + dtype='int64', + lod_level=1) - mov_title_emb = layers.embedding( - input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) + mov_title_emb = layers.embedding(input=mov_title_id, + size=[MOV_TITLE_DICT_SIZE, 32], + is_sparse=IS_SPARSE) - mov_title_conv = nets.sequence_conv_pool( - input=mov_title_emb, - num_filters=32, - filter_size=3, - act="tanh", - pool_type="sum") + mov_title_conv = nets.sequence_conv_pool(input=mov_title_emb, + num_filters=32, + filter_size=3, + act="tanh", + pool_type="sum") concat_embed = layers.concat( input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) @@ -170,12 +170,11 @@ def train(use_cuda, save_dirname, is_local=True): exe = Executor(place) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.movielens.train(), buf_size=8192), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.movielens.train(), buf_size=8192), + batch_size=BATCH_SIZE) + test_reader = paddle.batch(paddle.dataset.movielens.test(), + batch_size=BATCH_SIZE) feed_order = [ 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', @@ -212,10 +211,11 @@ def train(use_cuda, save_dirname, is_local=True): if test_avg_cost < 6.0: # if avg_cost less than 6.0, we think our code is good. if save_dirname is not None: - fluid.io.save_inference_model(save_dirname, [ - "user_id", "gender_id", "age_id", "job_id", - "movie_id", "category_id", "movie_title" - ], [scale_infer], exe) + fluid.io.save_inference_model( + save_dirname, [ + "user_id", "gender_id", "age_id", "job_id", + "movie_id", "category_id", "movie_title" + ], [scale_infer], exe) return if math.isnan(float(out[0])): @@ -289,13 +289,11 @@ def infer(use_cuda, save_dirname=None): assert feed_target_names[5] == "category_id" category_id = fluid.create_lod_tensor( - [np.array( - [10, 8, 9], dtype='int64')], [[3]], place) + [np.array([10, 8, 9], dtype='int64')], [[3]], place) assert feed_target_names[6] == "movie_title" movie_title = fluid.create_lod_tensor( - [np.array( - [1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], + [np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], place) # Construct feed as a dictionary of {feed_target_name: feed_target_data} diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py index 3791e386ecf..7a31035d2fb 100644 --- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py @@ -46,18 +46,16 @@ def bi_lstm_encoder(input_seq, hidden_size): input_forward_proj = fluid.layers.fc(input=input_seq, size=hidden_size * 4, bias_attr=True) - forward, _ = fluid.layers.dynamic_lstm( - input=input_forward_proj, - size=hidden_size * 4, - use_peepholes=USE_PEEPHOLES) + forward, _ = fluid.layers.dynamic_lstm(input=input_forward_proj, + size=hidden_size * 4, + use_peepholes=USE_PEEPHOLES) input_backward_proj = fluid.layers.fc(input=input_seq, size=hidden_size * 4, bias_attr=True) - backward, _ = fluid.layers.dynamic_lstm( - input=input_backward_proj, - size=hidden_size * 4, - is_reverse=True, - use_peepholes=USE_PEEPHOLES) + backward, _ = fluid.layers.dynamic_lstm(input=input_backward_proj, + size=hidden_size * 4, + is_reverse=True, + use_peepholes=USE_PEEPHOLES) forward_last = fluid.layers.sequence_last_step(input=forward) backward_first = fluid.layers.sequence_first_step(input=backward) @@ -67,6 +65,7 @@ def bi_lstm_encoder(input_seq, hidden_size): # FIXME(peterzhang2029): Replace this function with the lstm_unit_op. def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): + def linear(inputs): return fluid.layers.fc(input=inputs, size=size, bias_attr=True) @@ -76,13 +75,12 @@ def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): cell_tilde = fluid.layers.tanh(x=linear([hidden_t_prev, x_t])) cell_t = fluid.layers.sums(input=[ - fluid.layers.elementwise_mul( - x=forget_gate, y=cell_t_prev), fluid.layers.elementwise_mul( - x=input_gate, y=cell_tilde) + fluid.layers.elementwise_mul(x=forget_gate, y=cell_t_prev), + fluid.layers.elementwise_mul(x=input_gate, y=cell_tilde) ]) - hidden_t = fluid.layers.elementwise_mul( - x=output_gate, y=fluid.layers.tanh(x=cell_t)) + hidden_t = fluid.layers.elementwise_mul(x=output_gate, + y=fluid.layers.tanh(x=cell_t)) return hidden_t, cell_t @@ -104,8 +102,8 @@ def lstm_decoder_without_attention(target_embedding, decoder_boot, context, hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) cell_mem = rnn.memory(init=cell_init) - decoder_inputs = fluid.layers.concat( - input=[context, current_word], axis=1) + decoder_inputs = fluid.layers.concat(input=[context, current_word], + axis=1) h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size) rnn.update_memory(hidden_mem, h) rnn.update_memory(cell_mem, c) @@ -120,8 +118,10 @@ def lstm_decoder_without_attention(target_embedding, decoder_boot, context, def seq_to_seq_net(): """Construct a seq2seq network.""" - src_word_idx = fluid.layers.data( - name='source_sequence', shape=[1], dtype='int64', lod_level=1) + src_word_idx = fluid.layers.data(name='source_sequence', + shape=[1], + dtype='int64', + lod_level=1) src_embedding = fluid.layers.embedding( input=src_word_idx, @@ -139,8 +139,10 @@ def seq_to_seq_net(): bias_attr=False, act='tanh') - trg_word_idx = fluid.layers.data( - name='target_sequence', shape=[1], dtype='int64', lod_level=1) + trg_word_idx = fluid.layers.data(name='target_sequence', + shape=[1], + dtype='int64', + lod_level=1) trg_embedding = fluid.layers.embedding( input=trg_word_idx, @@ -149,8 +151,10 @@ def seq_to_seq_net(): prediction = lstm_decoder_without_attention(trg_embedding, decoder_boot, encoded_vector, decoder_size) - label = fluid.layers.data( - name='label_sequence', shape=[1], dtype='int64', lod_level=1) + label = fluid.layers.data(name='label_sequence', + shape=[1], + dtype='int64', + lod_level=1) cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(cost) @@ -163,10 +167,9 @@ def train(use_cuda, save_dirname=None): optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size), buf_size=1000), - batch_size=batch_size) + train_data = paddle.batch(paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) @@ -194,8 +197,8 @@ def train(use_cuda, save_dirname=None): if batch_id > 3: if save_dirname is not None: fluid.io.save_inference_model( - save_dirname, ['source_sequence', - 'target_sequence'], [prediction], exe) + save_dirname, ['source_sequence', 'target_sequence'], + [prediction], exe) return batch_id += 1 @@ -230,10 +233,16 @@ def infer(use_cuda, save_dirname=None): recursive_seq_lens = [[4, 6]] base_shape = [1] # The range of random integers is [low, high] - word_data = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=1) - trg_word = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=1) + word_data = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=1) + trg_word = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -264,6 +273,7 @@ def main(use_cuda): class TestRnnEncoderDecoder(unittest.TestCase): + def test_cuda(self): with self.scope_prog_guard(): main(use_cuda=True) diff --git a/python/paddle/fluid/tests/book/test_word2vec_book.py b/python/paddle/fluid/tests/book/test_word2vec_book.py index 650ccc0776a..37d5106e850 100644 --- a/python/paddle/fluid/tests/book/test_word2vec_book.py +++ b/python/paddle/fluid/tests/book/test_word2vec_book.py @@ -54,30 +54,26 @@ def train(target, IS_SPARSE = is_sparse def __network__(words): - embed_first = fluid.layers.embedding( - input=words[0], - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') - embed_second = fluid.layers.embedding( - input=words[1], - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') - embed_third = fluid.layers.embedding( - input=words[2], - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') - embed_forth = fluid.layers.embedding( - input=words[3], - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') + embed_first = fluid.layers.embedding(input=words[0], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_second = fluid.layers.embedding(input=words[1], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_third = fluid.layers.embedding(input=words[2], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_forth = fluid.layers.embedding(input=words[3], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) @@ -117,8 +113,8 @@ def train(target, sgd_optimizer.minimize(avg_cost, fluid.default_startup_program()) - train_reader = paddle.batch( - paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) + train_reader = paddle.batch(paddle.dataset.imikolov.train(word_dict, N), + BATCH_SIZE) place = get_place(target) exe = fluid.Executor(place) @@ -138,9 +134,10 @@ def train(target, fetch_list=[avg_cost]) if avg_cost_np[0] < 5.0: if save_dirname is not None and not pure_bf16: - fluid.io.save_inference_model(save_dirname, [ - 'firstw', 'secondw', 'thirdw', 'forthw' - ], [predict_word], exe) + fluid.io.save_inference_model( + save_dirname, + ['firstw', 'secondw', 'thirdw', 'forthw'], + [predict_word], exe) return if math.isnan(float(avg_cost_np[0])): sys.exit("got NaN loss, training failed.") @@ -200,14 +197,26 @@ def infer(target, save_dirname=None): recursive_seq_lens = [[1]] base_shape = [1] # The range of random integers is [low, high] - first_word = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) - second_word = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) - third_word = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) - fourth_word = fluid.create_random_int_lodtensor( - recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) + first_word = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=dict_size - 1) + second_word = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=dict_size - 1) + third_word = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=dict_size - 1) + fourth_word = fluid.create_random_int_lodtensor(recursive_seq_lens, + base_shape, + place, + low=0, + high=dict_size - 1) assert feed_target_names[0] == 'firstw' assert feed_target_names[1] == 'secondw' @@ -274,13 +283,12 @@ def main(target, is_sparse, is_parallel, use_bf16, pure_bf16): # so only inference is turned on. train("cpu", is_sparse, is_parallel, save_dirname) else: - train( - target, - is_sparse, - is_parallel, - save_dirname, - use_bf16=use_bf16, - pure_bf16=pure_bf16) + train(target, + is_sparse, + is_parallel, + save_dirname, + use_bf16=use_bf16, + pure_bf16=pure_bf16) infer(target, save_dirname) @@ -298,11 +306,10 @@ def inject_test_method(target, is_parallel, use_bf16=False, pure_bf16=False): - fn_name = "test_{0}_{1}_{2}{3}".format(target, "sparse" - if is_sparse else "dense", "parallel" - if is_parallel else "normal", - "_purebf16" if pure_bf16 else "_bf16" - if use_bf16 else "") + fn_name = "test_{0}_{1}_{2}{3}".format( + target, "sparse" if is_sparse else "dense", + "parallel" if is_parallel else "normal", + "_purebf16" if pure_bf16 else "_bf16" if use_bf16 else "") def __impl__(*args, **kwargs): prog = fluid.Program() @@ -312,13 +319,13 @@ def inject_test_method(target, with fluid.program_guard(prog, startup_prog): main(target, is_sparse, is_parallel, use_bf16, pure_bf16) - if (not fluid.core.is_compiled_with_cuda() or - target == "cuda") and is_sparse: + if (not fluid.core.is_compiled_with_cuda() + or target == "cuda") and is_sparse: fn = __impl__ else: # skip the other test when on CI server - fn = unittest.skipUnless( - condition=FULL_TEST, reason=SKIP_REASON)(__impl__) + fn = unittest.skipUnless(condition=FULL_TEST, + reason=SKIP_REASON)(__impl__) setattr(W2VTest, fn_name, fn) diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py index d52882acfc9..94de1a39ccf 100644 --- a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py +++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py @@ -25,6 +25,7 @@ from setuptools.command.build_ext import build_ext # cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid # for C/ObjC but not for C++ class BuildExt(build_ext): + def build_extensions(self): if '-Wstrict-prototypes' in self.compiler.compiler_so: self.compiler.compiler_so.remove('-Wstrict-prototypes') @@ -74,9 +75,8 @@ custom_kernel_dot_module = Extension( libraries=libs, extra_compile_args=paddle_extra_compile_args) -setup( - name='custom_kernel_dot', - version='1.0', - description='custom kernel fot compiling', - cmdclass={'build_ext': BuildExt}, - ext_modules=[custom_kernel_dot_module]) +setup(name='custom_kernel_dot', + version='1.0', + description='custom kernel fot compiling', + cmdclass={'build_ext': BuildExt}, + ext_modules=[custom_kernel_dot_module]) diff --git a/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py b/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py index 13d8a29e71b..d1929fef5cc 100644 --- a/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py +++ b/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_dot.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import numpy as np # use dot as test case. class TestCustomKernelDot(unittest.TestCase): + def setUp(self): # compile so and set to current path cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -48,8 +49,8 @@ class TestCustomKernelDot(unittest.TestCase): self.assertTrue( np.array_equal(out.numpy(), result), - "custom kernel dot out: {},\n numpy dot out: {}".format(out.numpy(), - result)) + "custom kernel dot out: {},\n numpy dot out: {}".format( + out.numpy(), result)) def tearDown(self): del os.environ['CUSTOM_DEVICE_ROOT'] diff --git a/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_load.py b/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_load.py index 1d7b29e8511..a4def8df9e0 100644 --- a/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_load.py +++ b/python/paddle/fluid/tests/custom_kernel/test_custom_kernel_load.py @@ -20,6 +20,7 @@ import numpy as np class TestCustomKernelLoad(unittest.TestCase): + def setUp(self): # compile so and set to current path cur_dir = os.path.dirname(os.path.abspath(__file__)) @@ -32,8 +33,9 @@ class TestCustomKernelLoad(unittest.TestCase): # get paddle lib path and place so paddle_lib_path = '' site_dirs = site.getsitepackages() if hasattr( - site, 'getsitepackages' - ) else [x for x in sys.path if 'site-packages' in x] + site, 'getsitepackages') else [ + x for x in sys.path if 'site-packages' in x + ] for site_dir in site_dirs: lib_dir = os.path.sep.join([site_dir, 'paddle', 'libs']) if os.path.exists(lib_dir): @@ -65,8 +67,8 @@ class TestCustomKernelLoad(unittest.TestCase): self.assertTrue( np.array_equal(out.numpy(), result), - "custom kernel dot out: {},\n numpy dot out: {}".format(out.numpy(), - result)) + "custom kernel dot out: {},\n numpy dot out: {}".format( + out.numpy(), result)) def tearDown(self): cmd = 'rm -rf {}'.format(self.default_path) diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op_setup.py b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op_setup.py index 8889a56ad20..e751a335d72 100644 --- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op_setup.py +++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op_setup.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -40,11 +40,9 @@ if core.is_compiled_with_nccl(): macros.append(("PADDLE_WITH_NCCL", None)) include_dirs = list(paddle_includes) + [cwd] -setup( - name=os.getenv("MODULE_NAME", "custom_raw_op_kernel_op_setup"), - ext_modules=extension( - sources=sources, - include_dirs=include_dirs, - extra_compile_args=extra_compile_args, - _compile_dir=compile_dir, - define_macros=macros)) +setup(name=os.getenv("MODULE_NAME", "custom_raw_op_kernel_op_setup"), + ext_modules=extension(sources=sources, + include_dirs=include_dirs, + extra_compile_args=extra_compile_args, + _compile_dir=compile_dir, + define_macros=macros)) diff --git a/python/paddle/fluid/tests/custom_op/test_check_abi.py b/python/paddle/fluid/tests/custom_op/test_check_abi.py index baef25d2d11..727e02f62cd 100644 --- a/python/paddle/fluid/tests/custom_op/test_check_abi.py +++ b/python/paddle/fluid/tests/custom_op/test_check_abi.py @@ -21,6 +21,7 @@ import paddle.utils.cpp_extension.extension_utils as utils class TestABIBase(unittest.TestCase): + def test_environ(self): compiler_list = ['gcc', 'cl'] for compiler in compiler_list: @@ -35,6 +36,7 @@ class TestABIBase(unittest.TestCase): class TestCheckCompiler(TestABIBase): + def test_expected_compiler(self): if utils.OS_NAME.startswith('linux'): gt = ['gcc', 'g++', 'gnu-c++', 'gnu-cc'] @@ -85,8 +87,8 @@ class TestCheckCompiler(TestABIBase): self.assertFalse(flag) # check ABI Compatibility WARNING self.assertTrue(len(error) == 1) - self.assertTrue("Failed to check compiler version for" in - str(error[0].message)) + self.assertTrue("Failed to check compiler version for" in str( + error[0].message)) def test_exception_linux(self): # clear environ @@ -106,8 +108,8 @@ class TestCheckCompiler(TestABIBase): self.assertFalse(flag) # check ABI Compatibility WARNING self.assertTrue(len(error) == 1) - self.assertTrue("Failed to check compiler version for" in - str(error[0].message)) + self.assertTrue("Failed to check compiler version for" in str( + error[0].message)) # restore utils._expected_compiler_current_platform = raw_func @@ -136,6 +138,7 @@ class TestCheckCompiler(TestABIBase): class TestRunCMDException(unittest.TestCase): + def test_exception(self): for verbose in [True, False]: with self.assertRaisesRegexp(RuntimeError, "Failed to run command"): diff --git a/python/paddle/fluid/tests/custom_op/test_context_pool.py b/python/paddle/fluid/tests/custom_op/test_context_pool.py index d532b29688b..d4a079ee4fe 100644 --- a/python/paddle/fluid/tests/custom_op/test_context_pool.py +++ b/python/paddle/fluid/tests/custom_op/test_context_pool.py @@ -24,8 +24,8 @@ from paddle.fluid.framework import _test_eager_guard # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. -file = '{}\\context_pool_jit\\context_pool_jit.pyd'.format(get_build_directory( -)) +file = '{}\\context_pool_jit\\context_pool_jit.pyd'.format( + get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) @@ -41,6 +41,7 @@ custom_ops = load( class TestContextPool(unittest.TestCase): + def setUp(self): self.devices = ['cpu'] if paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py index f6945125243..953ca551906 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py @@ -22,10 +22,10 @@ from utils import paddle_includes, extra_cc_args, extra_nvcc_args from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.fluid.framework import _test_eager_guard -# Because Windows don't use docker, the shared lib already exists in the +# Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. -file = '{}\\custom_attrs_jit\\custom_attrs_jit.pyd'.format(get_build_directory( -)) +file = '{}\\custom_attrs_jit\\custom_attrs_jit.pyd'.format( + get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) @@ -41,6 +41,7 @@ custom_attrs = load( class TestJitCustomAttrs(unittest.TestCase): + def setUp(self): paddle.set_device('cpu') # prepare test value @@ -57,10 +58,11 @@ class TestJitCustomAttrs(unittest.TestCase): def func_attr_value(self): x = paddle.ones([2, 2], dtype='float32') x.stop_gradient = False - out = custom_attrs.attr_test( - x, self.bool_attr, self.int_attr, self.float_attr, self.int64_attr, - self.str_attr, self.int_vec_attr, self.float_vec_attr, - self.int64_vec_attr, self.str_vec_attr) + out = custom_attrs.attr_test(x, self.bool_attr, self.int_attr, + self.float_attr, self.int64_attr, + self.str_attr, self.int_vec_attr, + self.float_vec_attr, self.int64_vec_attr, + self.str_vec_attr) out.stop_gradient = False out.backward() @@ -74,10 +76,12 @@ class TestJitCustomAttrs(unittest.TestCase): def func_const_attr_value(self): x = paddle.ones([2, 2], dtype='float32') x.stop_gradient = False - out = custom_attrs.const_attr_test( - x, self.bool_attr, self.int_attr, self.float_attr, self.int64_attr, - self.str_attr, self.int_vec_attr, self.float_vec_attr, - self.int64_vec_attr, self.str_vec_attr) + out = custom_attrs.const_attr_test(x, self.bool_attr, self.int_attr, + self.float_attr, self.int64_attr, + self.str_attr, self.int_vec_attr, + self.float_vec_attr, + self.int64_vec_attr, + self.str_vec_attr) out.stop_gradient = False out.backward() diff --git a/python/paddle/fluid/tests/custom_op/test_custom_concat.py b/python/paddle/fluid/tests/custom_op/test_custom_concat.py index 2a5d037bdad..83be96a95a8 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_concat.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_concat.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -48,8 +48,7 @@ custom_ops = load( def concat_dynamic(func, dtype, np_inputs, axis_v, with_attr=False): paddle.set_device("cpu") inputs = [ - paddle.to_tensor( - x, dtype=dtype, stop_gradient=False) for x in np_inputs + paddle.to_tensor(x, dtype=dtype, stop_gradient=False) for x in np_inputs ] if with_attr: axis = axis_v @@ -103,6 +102,7 @@ def concat_static(func, dtype, np_inputs, axis_v, with_attr=False): class TestCustomConcatDynamicAxisJit(unittest.TestCase): + def setUp(self): self.dtypes = ['float32', 'float64', 'int32', 'int64'] self.np_inputs = [ @@ -114,8 +114,8 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase): def check_output(self, out, pd_out, name): self.assertTrue( np.array_equal(out, pd_out), - "custom op {}: {},\n paddle api {}: {}".format(name, out, name, - pd_out)) + "custom op {}: {},\n paddle api {}: {}".format( + name, out, name, pd_out)) def func_dynamic(self): for dtype in self.dtypes: @@ -137,8 +137,9 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase): def test_static(self): for dtype in self.dtypes: for axis in self.axises: - out, x1_grad, x2_grad = concat_static( - custom_ops.custom_concat, dtype, self.np_inputs, axis) + out, x1_grad, x2_grad = concat_static(custom_ops.custom_concat, + dtype, self.np_inputs, + axis) pd_out, pd_x1_grad, pd_x2_grad = concat_static( paddle.concat, dtype, self.np_inputs, axis) @@ -152,8 +153,9 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase): out, grad_inputs = concat_dynamic( custom_ops.custom_concat_with_attr, dtype, self.np_inputs, axis, True) - pd_out, pd_grad_inputs = concat_dynamic( - paddle.concat, dtype, self.np_inputs, axis, True) + pd_out, pd_grad_inputs = concat_dynamic(paddle.concat, dtype, + self.np_inputs, axis, + True) self.check_output(out, pd_out, "out") for x_grad, pd_x_grad in zip(grad_inputs, pd_grad_inputs): diff --git a/python/paddle/fluid/tests/custom_op/test_custom_conj.py b/python/paddle/fluid/tests/custom_op/test_custom_conj.py index 5f3c107a9b2..ea916ff55ec 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_conj.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_conj.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -91,6 +91,7 @@ def conj_static(func, shape, dtype, np_input): class TestCustomConjJit(unittest.TestCase): + def setUp(self): self.dtypes = ['float32', 'float64'] self.shape = [2, 20, 2, 3] @@ -98,8 +99,8 @@ class TestCustomConjJit(unittest.TestCase): def check_output(self, out, pd_out, name): self.assertTrue( np.array_equal(out, pd_out), - "custom op {}: {},\n paddle api {}: {}".format(name, out, name, - pd_out)) + "custom op {}: {},\n paddle api {}: {}".format( + name, out, name, pd_out)) def run_dynamic(self, dtype, np_input): out, x_grad = conj_dynamic(custom_ops.custom_conj, dtype, np_input) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_linear.py b/python/paddle/fluid/tests/custom_op/test_custom_linear.py index fba512d511c..2309751659a 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_linear.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_linear.py @@ -56,8 +56,9 @@ def linear_static(func, device, dtype, np_x, np_weight, np_bias): with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype) - weight = static.data( - name="weight", shape=np_weight.shape, dtype=dtype) + weight = static.data(name="weight", + shape=np_weight.shape, + dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) x.stop_gradient = False weight.stop_gradient = False @@ -85,6 +86,7 @@ def linear_static(func, device, dtype, np_x, np_weight, np_bias): class TestCustomLinearJit(unittest.TestCase): + def setUp(self): self.dtypes = ['float32', 'float64'] self.devices = ['cpu'] @@ -97,8 +99,8 @@ class TestCustomLinearJit(unittest.TestCase): def check_output(self, out, pd_out, name): self.assertTrue( np.array_equal(out, pd_out), - "custom op {}: {},\n paddle api {}: {}".format(name, out, name, - pd_out)) + "custom op {}: {},\n paddle api {}: {}".format( + name, out, name, pd_out)) def test_static(self): for device in self.devices: diff --git a/python/paddle/fluid/tests/custom_op/test_custom_raw_op_kernel_op.py b/python/paddle/fluid/tests/custom_op/test_custom_raw_op_kernel_op.py index 4da99b1ea10..f95f57b4b7a 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_raw_op_kernel_op.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_raw_op_kernel_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -36,8 +36,8 @@ def prepare_module_path(): else: site_dir = site.getsitepackages()[0] custom_egg_path = [x for x in os.listdir(site_dir) if MODULE_NAME in x] - assert len(custom_egg_path) == 1, "Matched egg number is %d." % len( - custom_egg_path) + assert len(custom_egg_path + ) == 1, "Matched egg number is %d." % len(custom_egg_path) sys.path.append(os.path.join(site_dir, custom_egg_path[0])) @@ -46,6 +46,7 @@ def prepare_module_path(): # temporarily. @unittest.skipIf(os.name == "nt", "Windows does not support yet.") class TestCustomRawReluOp(unittest.TestCase): + @classmethod def setUpClass(cls): path = os.path.dirname(os.path.abspath(__file__)) @@ -77,8 +78,8 @@ class TestCustomRawReluOp(unittest.TestCase): exe = paddle.static.Executor() exe.run(paddle.static.default_startup_program()) - x_np = np.random.uniform( - low=-1.0, high=1.0, size=[2, 3]).astype('float32') + x_np = np.random.uniform(low=-1.0, high=1.0, size=[2, + 3]).astype('float32') y1_value, y2_value = exe.run(paddle.static.default_main_program(), feed={x.name: x_np}, fetch_list=[y1, y2]) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py index 4980a159225..78078963a7d 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py @@ -72,6 +72,7 @@ class Net(nn.Layer): class TestDygraphModel(unittest.TestCase): + def setUp(self): self.seed = 2021 @@ -96,8 +97,9 @@ class TestDygraphModel(unittest.TestCase): self.model_dy2stat_path = "infer_model/custom_relu_model_dy2sta" # for dy2stat - self.x_spec = paddle.static.InputSpec( - shape=[None, self.in_dim], dtype='float32', name='x') + self.x_spec = paddle.static.InputSpec(shape=[None, self.in_dim], + dtype='float32', + name='x') def func_train_eval(self): for device in self.devices: @@ -107,7 +109,7 @@ class TestDygraphModel(unittest.TestCase): # for train origin_relu_train_out = self.train_model(use_custom_op=False) custom_relu_train_out = self.train_model(use_custom_op=True) - # open this when dy2stat is ready for eager + # open this when dy2stat is ready for eager if _in_legacy_dygraph(): custom_relu_dy2stat_train_out = self.train_model( use_custom_op=True, dy2stat=True) # for to_static @@ -188,6 +190,7 @@ class TestDygraphModel(unittest.TestCase): class TestStaticModel(unittest.TestCase): + def setUp(self): self.seed = 2021 self.in_dim = 10 @@ -217,14 +220,16 @@ class TestStaticModel(unittest.TestCase): def test_train_eval(self): for device in self.devices: # for train - original_relu_train_out = self.train_model( - device, use_custom_op=False) + original_relu_train_out = self.train_model(device, + use_custom_op=False) custom_relu_train_out = self.train_model(device, use_custom_op=True) # using PE - original_relu_train_pe_out = self.train_model( - device, use_custom_op=False, use_pe=True) - custom_relu_train_pe_out = self.train_model( - device, use_custom_op=True, use_pe=True) + original_relu_train_pe_out = self.train_model(device, + use_custom_op=False, + use_pe=True) + custom_relu_train_pe_out = self.train_model(device, + use_custom_op=True, + use_pe=True) self.assertTrue( np.array_equal(original_relu_train_out, custom_relu_train_out)) @@ -233,14 +238,16 @@ class TestStaticModel(unittest.TestCase): custom_relu_train_pe_out)) # for eval - original_relu_eval_out = self.eval_model( - device, use_custom_op=False) + original_relu_eval_out = self.eval_model(device, + use_custom_op=False) custom_relu_eval_out = self.eval_model(device, use_custom_op=True) # using PE - original_relu_eval_pe_out = self.eval_model( - device, use_custom_op=False, use_pe=True) - custom_relu_eval_pe_out = self.eval_model( - device, use_custom_op=True, use_pe=True) + original_relu_eval_pe_out = self.eval_model(device, + use_custom_op=False, + use_pe=True) + custom_relu_eval_pe_out = self.eval_model(device, + use_custom_op=True, + use_pe=True) self.assertTrue( np.array_equal(original_relu_eval_out, custom_relu_eval_out)) @@ -258,10 +265,12 @@ class TestStaticModel(unittest.TestCase): with paddle.static.scope_guard(paddle.static.Scope()): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - x = paddle.static.data( - shape=[None, self.in_dim], name='x', dtype='float32') - y = paddle.static.data( - shape=[None, 1], name='y', dtype='float32') + x = paddle.static.data(shape=[None, self.in_dim], + name='x', + dtype='float32') + y = paddle.static.data(shape=[None, 1], + name='y', + dtype='float32') net = Net(self.in_dim, self.out_dim, use_custom_op) out = net(x) @@ -279,8 +288,8 @@ class TestStaticModel(unittest.TestCase): ) if device is 'cpu' else paddle.static.cuda_places() main_program = paddle.static.CompiledProgram( paddle.static.default_main_program( - )).with_data_parallel( - loss_name=loss.name, places=places) + )).with_data_parallel(loss_name=loss.name, + places=places) else: main_program = paddle.static.default_main_program() @@ -289,14 +298,16 @@ class TestStaticModel(unittest.TestCase): y_data = self.labels[batch_id] res = exe.run(main_program, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[out]) # save model paddle.static.save_inference_model( - self.model_path_template.format(use_custom_op, use_pe), - [x], [out], exe) + self.model_path_template.format(use_custom_op, use_pe), [x], + [out], exe) return res[0] diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py index 072b3c6484e..5052a0989bb 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py @@ -47,6 +47,7 @@ custom_module = load( class TestJITLoad(unittest.TestCase): + def setUp(self): self.custom_ops = [ custom_module.custom_relu, custom_module.custom_relu_dup, @@ -84,8 +85,8 @@ class TestJITLoad(unittest.TestCase): for custom_op in self.custom_ops: out, x_grad = custom_relu_dynamic(custom_op, device, dtype, x) - pd_out, pd_x_grad = custom_relu_dynamic(custom_op, device, - dtype, x, False) + pd_out, pd_x_grad = custom_relu_dynamic( + custom_op, device, dtype, x, False) self.assertTrue( np.array_equal(out, pd_out), "custom op out: {},\n paddle api out: {}".format( @@ -132,8 +133,8 @@ class TestJITLoad(unittest.TestCase): "function \"relu_cuda_forward_kernel\" is not implemented for data type `int32`" in str(e)) self.assertTrue( - "python/paddle/fluid/tests/custom_op/custom_relu_op.cu" in - str(e)) + "python/paddle/fluid/tests/custom_op/custom_relu_op.cu" in str( + e)) self.assertTrue(caught_exception) def test_exception(self): diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py index 55c9571d44f..29433b17153 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py @@ -85,9 +85,9 @@ def custom_relu_static_pe(func, device, dtype, np_x, use_func=True): exe.run(static.default_startup_program()) # in static mode, x data has been covered by out - compiled_prog = static.CompiledProgram(static.default_main_program( - )).with_data_parallel( - loss_name=out.name, places=places) + compiled_prog = static.CompiledProgram( + static.default_main_program()).with_data_parallel( + loss_name=out.name, places=places) out_v = exe.run(compiled_prog, feed={'X': np_x}, fetch_list=[out.name]) @@ -102,8 +102,9 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): # simple module - data = static.data( - name='data', shape=[None, 1, 28, 28], dtype='float32') + data = static.data(name='data', + shape=[None, 1, 28, 28], + dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') hidden = static.nn.fc(data, size=128) @@ -123,8 +124,10 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): # train for i in range(4): avg_loss_v = exe.run(static.default_main_program(), - feed={'data': np_data, - 'label': np_label}, + feed={ + 'data': np_data, + 'label': np_label + }, fetch_list=[avg_loss]) # save inference model @@ -132,8 +135,10 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): # get train predict value predict_v = exe.run(static.default_main_program(), - feed={'data': np_data, - 'label': np_label}, + feed={ + 'data': np_data, + 'label': np_label + }, fetch_list=[predict]) return predict_v @@ -147,8 +152,10 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): out = func(t) if use_func else paddle.nn.functional.relu(t) out.stop_gradient = False - dx = paddle.grad( - outputs=[out], inputs=[t], create_graph=True, retain_graph=True) + dx = paddle.grad(outputs=[out], + inputs=[t], + create_graph=True, + retain_graph=True) dx[0].backward() @@ -157,6 +164,7 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): class TestNewCustomOpSetUpInstall(unittest.TestCase): + def setUp(self): cur_dir = os.path.dirname(os.path.abspath(__file__)) # compile, install the custom op egg into site-packages under background @@ -181,8 +189,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): custom_egg_path = [ x for x in os.listdir(site_dir) if 'custom_relu_module_setup' in x ] - assert len(custom_egg_path) == 1, "Matched egg number is %d." % len( - custom_egg_path) + assert len(custom_egg_path + ) == 1, "Matched egg number is %d." % len(custom_egg_path) sys.path.append(os.path.join(site_dir, custom_egg_path[0])) # usage: import the package directly @@ -244,8 +252,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): for custom_op in self.custom_ops: out, x_grad = custom_relu_dynamic(custom_op, device, dtype, x) - pd_out, pd_x_grad = custom_relu_dynamic(custom_op, device, - dtype, x, False) + pd_out, pd_x_grad = custom_relu_dynamic( + custom_op, device, dtype, x, False) self.assertTrue( np.array_equal(out, pd_out), "custom op out: {},\n paddle api out: {}".format( @@ -266,8 +274,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): np_label = np.random.random((1, 1)).astype("int64") path_prefix = "custom_op_inference/custom_relu" for device in self.devices: - predict = custom_relu_static_inference( - self.custom_ops[0], device, np_data, np_label, path_prefix) + predict = custom_relu_static_inference(self.custom_ops[0], device, + np_data, np_label, + path_prefix) # load inference model with static.scope_guard(static.Scope()): exe = static.Executor() @@ -290,14 +299,15 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): from paddle.inference import Config from paddle.inference import create_predictor for device in self.devices: - predict = custom_relu_static_inference( - self.custom_ops[0], device, np_data, np_label, path_prefix) + predict = custom_relu_static_inference(self.custom_ops[0], device, + np_data, np_label, + path_prefix) # load inference model config = Config(path_prefix + ".pdmodel", path_prefix + ".pdiparams") predictor = create_predictor(config) - input_tensor = predictor.get_input_handle(predictor.get_input_names( - )[0]) + input_tensor = predictor.get_input_handle( + predictor.get_input_names()[0]) input_tensor.reshape(np_data.shape) input_tensor.copy_from_cpu(np_data.copy()) predictor.run() @@ -305,8 +315,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): predictor.get_output_names()[0]) predict_infer = output_tensor.copy_to_cpu() self.assertTrue( - np.isclose( - predict, predict_infer, rtol=5e-5).any(), + np.isclose(predict, predict_infer, rtol=5e-5).any(), "custom op predict: {},\n custom op infer predict: {}".format( predict, predict_infer)) paddle.disable_static() @@ -323,8 +332,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): self.custom_ops[0], device, dtype, x, False) self.assertTrue( np.array_equal(out, pd_out), - "custom op out: {},\n paddle api out: {}".format(out, - pd_out)) + "custom op out: {},\n paddle api out: {}".format( + out, pd_out)) self.assertTrue( np.array_equal(dx_grad, pd_dx_grad), "custom op dx grad: {},\n paddle api dx grad: {}".format( @@ -335,24 +344,22 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): paddle.set_device(device) # data loader transform = Compose( - [Normalize( - mean=[127.5], std=[127.5], data_format='CHW')]) - train_dataset = paddle.vision.datasets.MNIST( - mode='train', transform=transform) - train_loader = paddle.io.DataLoader( - train_dataset, - batch_size=64, - shuffle=True, - drop_last=True, - num_workers=0) + [Normalize(mean=[127.5], std=[127.5], data_format='CHW')]) + train_dataset = paddle.vision.datasets.MNIST(mode='train', + transform=transform) + train_loader = paddle.io.DataLoader(train_dataset, + batch_size=64, + shuffle=True, + drop_last=True, + num_workers=0) for batch_id, (image, _) in enumerate(train_loader()): out = self.custom_ops[0](image) pd_out = paddle.nn.functional.relu(image) self.assertTrue( np.array_equal(out, pd_out), - "custom op out: {},\n paddle api out: {}".format(out, - pd_out)) + "custom op out: {},\n paddle api out: {}".format( + out, pd_out)) if batch_id == 5: break diff --git a/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py b/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py index 6b1fb7c71ae..4202545759c 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtaina copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -40,6 +40,7 @@ custom_ops = load( class TestCustomSimpleSliceJit(unittest.TestCase): + def func_slice_output(self): np_x = np.random.random((5, 2)).astype("float32") x = paddle.to_tensor(np_x) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py index 0fa07abe9d0..56093767993 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -49,8 +49,10 @@ def custom_tanh_double_grad_dynamic(func, device, dtype, np_x): out = func(t) out.stop_gradient = False - dx = paddle.grad( - outputs=[out], inputs=[t], create_graph=True, retain_graph=True) + dx = paddle.grad(outputs=[out], + inputs=[t], + create_graph=True, + retain_graph=True) dx[0].backward() @@ -61,6 +63,7 @@ def custom_tanh_double_grad_dynamic(func, device, dtype, np_x): class TestCustomTanhDoubleGradJit(unittest.TestCase): + def setUp(self): paddle.set_device('cpu') self.dtypes = ['float32', 'float64'] @@ -76,8 +79,8 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): paddle.tanh, device, dtype, x) self.assertTrue( np.allclose(out, pd_out), - "custom op out: {},\n paddle api out: {}".format(out, - pd_out)) + "custom op out: {},\n paddle api out: {}".format( + out, pd_out)) self.assertTrue( np.allclose(dx_grad, pd_dx_grad), "custom op dx grad: {},\n paddle api dx grad: {}".format( diff --git a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py index 0d2cb941eaf..d48d25ea3b1 100644 --- a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py +++ b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py @@ -20,7 +20,7 @@ from paddle.utils.cpp_extension import load, get_build_directory from utils import paddle_includes, extra_cc_args from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.fluid.framework import _test_eager_guard -# Because Windows don't use docker, the shared lib already exists in the +# Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. file = '{}\\dispatch_op\\dispatch_op.pyd'.format(get_build_directory()) if os.name == 'nt' and os.path.isfile(file): @@ -36,6 +36,7 @@ dispatch_op = load( class TestJitDispatch(unittest.TestCase): + def setUp(self): paddle.set_device('cpu') diff --git a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py index 4fc9270b0f4..83731de32a4 100644 --- a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py +++ b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py @@ -23,7 +23,7 @@ from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension.extension_utils import run_cmd from utils import paddle_includes, extra_cc_args from paddle.fluid.framework import _test_eager_guard -# Because Windows don't use docker, the shared lib already exists in the +# Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. file = '{}\\multi_out_jit\\multi_out_jit.pyd'.format(get_build_directory()) if os.name == 'nt' and os.path.isfile(file): @@ -40,6 +40,7 @@ multi_out_module = load( class TestMultiOutputDtypes(unittest.TestCase): + def setUp(self): self.custom_op = multi_out_module.multi_out self.dtypes = ['float32', 'float64'] @@ -70,11 +71,13 @@ class TestMultiOutputDtypes(unittest.TestCase): # Fake_float64 self.assertTrue('float64' in str(zero_float64.dtype)) self.assertTrue( - np.array_equal(zero_float64, np.zeros([4, 8]).astype('float64'))) + np.array_equal(zero_float64, + np.zeros([4, 8]).astype('float64'))) # ZFake_int32 self.assertTrue('int32' in str(one_int32.dtype)) self.assertTrue( - np.array_equal(one_int32, np.ones([4, 8]).astype('int32'))) + np.array_equal(one_int32, + np.ones([4, 8]).astype('int32'))) def test_static(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/custom_op/test_sysconfig.py b/python/paddle/fluid/tests/custom_op/test_sysconfig.py index 78c0cdf0316..89ef36931f3 100644 --- a/python/paddle/fluid/tests/custom_op/test_sysconfig.py +++ b/python/paddle/fluid/tests/custom_op/test_sysconfig.py @@ -18,6 +18,7 @@ import paddle class SysConfigTest(unittest.TestCase): + def test_include(self): inc_dir = paddle.sysconfig.get_include() inc_dirs = inc_dir.split(os.sep) diff --git a/python/paddle/fluid/tests/custom_op/utils.py b/python/paddle/fluid/tests/custom_op/utils.py index 2d492da3d97..82361f2a304 100644 --- a/python/paddle/fluid/tests/custom_op/utils.py +++ b/python/paddle/fluid/tests/custom_op/utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/tests/test_beam_search_decoder.py b/python/paddle/fluid/tests/test_beam_search_decoder.py index 301bd0ff003..f37090f67e2 100644 --- a/python/paddle/fluid/tests/test_beam_search_decoder.py +++ b/python/paddle/fluid/tests/test_beam_search_decoder.py @@ -47,13 +47,14 @@ beam_size = 2 def encoder(): # encoder - src_word = layers.data( - name="src_word", shape=[1], dtype='int64', lod_level=1) - src_embedding = layers.embedding( - input=src_word, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=IS_SPARSE) + src_word = layers.data(name="src_word", + shape=[1], + dtype='int64', + lod_level=1) + src_embedding = layers.embedding(input=src_word, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=IS_SPARSE) fc1 = layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4) @@ -80,13 +81,14 @@ def decoder_state_cell(context): def decoder_train(state_cell): # decoder - trg_language_word = layers.data( - name="target_word", shape=[1], dtype='int64', lod_level=1) - trg_embedding = layers.embedding( - input=trg_language_word, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=IS_SPARSE) + trg_language_word = layers.data(name="target_word", + shape=[1], + dtype='int64', + lod_level=1) + trg_embedding = layers.embedding(input=trg_language_word, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=IS_SPARSE) decoder = TrainingDecoder(state_cell) @@ -103,24 +105,27 @@ def decoder_train(state_cell): def decoder_decode(state_cell): - init_ids = layers.data( - name="init_ids", shape=[1], dtype="int64", lod_level=2) - init_scores = layers.data( - name="init_scores", shape=[1], dtype="float32", lod_level=2) - - decoder = BeamSearchDecoder( - state_cell=state_cell, - init_ids=init_ids, - init_scores=init_scores, - target_dict_dim=target_dict_dim, - word_dim=word_dim, - input_var_dict={}, - topk_size=topk_size, - sparse_emb=IS_SPARSE, - max_len=max_length, - beam_size=beam_size, - end_id=1, - name=None) + init_ids = layers.data(name="init_ids", + shape=[1], + dtype="int64", + lod_level=2) + init_scores = layers.data(name="init_scores", + shape=[1], + dtype="float32", + lod_level=2) + + decoder = BeamSearchDecoder(state_cell=state_cell, + init_ids=init_ids, + init_scores=init_scores, + target_dict_dim=target_dict_dim, + word_dim=word_dim, + input_var_dict={}, + topk_size=topk_size, + sparse_emb=IS_SPARSE, + max_len=max_length, + beam_size=beam_size, + end_id=1, + name=None) decoder.decode() translation_ids, translation_scores = decoder() @@ -135,18 +140,19 @@ def train_main(use_cuda): context = encoder() state_cell = decoder_state_cell(context) rnn_out = decoder_train(state_cell) - label = layers.data( - name="target_next_word", shape=[1], dtype='int64', lod_level=1) + label = layers.data(name="target_next_word", + shape=[1], + dtype='int64', + lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = layers.mean(x=cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-3) optimizer.minimize(avg_cost) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size), buf_size=1000), - batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) feed_order = ['src_word', 'target_word', 'target_next_word'] exe = Executor(place) @@ -186,8 +192,8 @@ def decode_main(use_cuda): exe.run(framework.default_startup_program()) init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64') - init_scores_data = np.array( - [1. for _ in range(batch_size)], dtype='float32') + init_scores_data = np.array([1. for _ in range(batch_size)], + dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) init_lod = [1] * batch_size @@ -196,10 +202,9 @@ def decode_main(use_cuda): init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size), buf_size=1000), - batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) feed_order = ['src_word'] feed_list = [ diff --git a/python/paddle/fluid/tests/test_data_feeder.py b/python/paddle/fluid/tests/test_data_feeder.py index d50c57e670b..54ff53e3706 100644 --- a/python/paddle/fluid/tests/test_data_feeder.py +++ b/python/paddle/fluid/tests/test_data_feeder.py @@ -22,6 +22,7 @@ paddle.enable_static() class TestDataFeeder(unittest.TestCase): + def test_lod_level_0_converter(self): img = fluid.layers.data(name='image', shape=[1, 28, 28]) label = fluid.layers.data(name='label', shape=[1], dtype='int64') @@ -42,16 +43,18 @@ class TestDataFeeder(unittest.TestCase): def test_lod_level_1_converter(self): # lod_level = 1 # each sentence has a different number of words - sentences = fluid.layers.data( - name='sentences', shape=[1], dtype='int64', lod_level=1) + sentences = fluid.layers.data(name='sentences', + shape=[1], + dtype='int64', + lod_level=1) label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder([sentences, label], fluid.CPUPlace()) # lod = [[0, 3, 5, 9]] # data = [[1, 2, 3], [4, 5], [6, 7, 8, 9]] # label = [1] * len(data) - result = feeder.feed( - [([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])]) + result = feeder.feed([([1, 2, 3], [1]), ([4, 5], [1]), + ([6, 7, 8, 9], [1])]) self.assertEqual(result['sentences'].shape(), [9, 1]) self.assertEqual(result['label'].shape(), [3, 1]) @@ -62,16 +65,18 @@ class TestDataFeeder(unittest.TestCase): def test_lod_level_2_converter(self): # lod_level = 2 # paragraphs -> sentences -> words - paragraphs = fluid.layers.data( - name='paragraphs', shape=[1], dtype='int64', lod_level=2) + paragraphs = fluid.layers.data(name='paragraphs', + shape=[1], + dtype='int64', + lod_level=2) label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder([paragraphs, label], fluid.CPUPlace()) # lod = [[0, 2, 3], [0, 3, 5, 9]] # data = [[[1, 2, 3], [4, 5]], [[6, 7, 8, 9]]] # label = [1] * len(data) - result = feeder.feed( - [([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])]) + result = feeder.feed([([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, + 9]], [1])]) self.assertEqual(result['paragraphs'].shape(), [9, 1]) self.assertEqual(result['label'].shape(), [2, 1]) diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index c4504550920..046aa4c1f17 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -30,6 +30,7 @@ paddle.enable_static() class LayerTest(unittest.TestCase): + @classmethod def setUpClass(cls): cls.seed = 111 @@ -76,37 +77,35 @@ class LayerTest(unittest.TestCase): class TestDetection(unittest.TestCase): + def test_detection_output(self): program = Program() with program_guard(program): - pb = layers.data( - name='prior_box', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - pbv = layers.data( - name='prior_box_var', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - loc = layers.data( - name='target_box', - shape=[2, 10, 4], - append_batch_size=False, - dtype='float32') - scores = layers.data( - name='scores', - shape=[2, 10, 20], - append_batch_size=False, - dtype='float32') - out = layers.detection_output( - scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv) - out2, index = layers.detection_output( - scores=scores, - loc=loc, - prior_box=pb, - prior_box_var=pbv, - return_index=True) + pb = layers.data(name='prior_box', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + pbv = layers.data(name='prior_box_var', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + loc = layers.data(name='target_box', + shape=[2, 10, 4], + append_batch_size=False, + dtype='float32') + scores = layers.data(name='scores', + shape=[2, 10, 20], + append_batch_size=False, + dtype='float32') + out = layers.detection_output(scores=scores, + loc=loc, + prior_box=pb, + prior_box_var=pbv) + out2, index = layers.detection_output(scores=scores, + loc=loc, + prior_box=pb, + prior_box_var=pbv, + return_index=True) self.assertIsNotNone(out) self.assertIsNotNone(out2) self.assertIsNotNone(index) @@ -118,11 +117,10 @@ class TestDetection(unittest.TestCase): with program_guard(program): x = layers.data(name='x', shape=[4], dtype='float32') y = layers.data(name='z', shape=[4], dtype='float32', lod_level=1) - bcoder = layers.box_coder( - prior_box=x, - prior_box_var=[0.1, 0.2, 0.1, 0.2], - target_box=y, - code_type='encode_center_size') + bcoder = layers.box_coder(prior_box=x, + prior_box_var=[0.1, 0.2, 0.1, 0.2], + target_box=y, + code_type='encode_center_size') self.assertIsNotNone(bcoder) print(str(program)) @@ -130,26 +128,28 @@ class TestDetection(unittest.TestCase): program = Program() with program_guard(program): x1 = fluid.data(name='x1', shape=[10, 4], dtype='int32') - y1 = fluid.data( - name='y1', shape=[10, 4], dtype='float32', lod_level=1) + y1 = fluid.data(name='y1', + shape=[10, 4], + dtype='float32', + lod_level=1) x2 = fluid.data(name='x2', shape=[10, 4], dtype='float32') - y2 = fluid.data( - name='y2', shape=[10, 4], dtype='int32', lod_level=1) - - self.assertRaises( - TypeError, - layers.box_coder, - prior_box=x1, - prior_box_var=[0.1, 0.2, 0.1, 0.2], - target_box=y1, - code_type='encode_center_size') - self.assertRaises( - TypeError, - layers.box_coder, - prior_box=x2, - prior_box_var=[0.1, 0.2, 0.1, 0.2], - target_box=y2, - code_type='encode_center_size') + y2 = fluid.data(name='y2', + shape=[10, 4], + dtype='int32', + lod_level=1) + + self.assertRaises(TypeError, + layers.box_coder, + prior_box=x1, + prior_box_var=[0.1, 0.2, 0.1, 0.2], + target_box=y1, + code_type='encode_center_size') + self.assertRaises(TypeError, + layers.box_coder, + prior_box=x2, + prior_box_var=[0.1, 0.2, 0.1, 0.2], + target_box=y2, + code_type='encode_center_size') def test_detection_api(self): program = Program() @@ -158,11 +158,10 @@ class TestDetection(unittest.TestCase): y = layers.data(name='y', shape=[4], dtype='float32') z = layers.data(name='z', shape=[4], dtype='float32', lod_level=1) iou = layers.iou_similarity(x=x, y=y) - bcoder = layers.box_coder( - prior_box=x, - prior_box_var=y, - target_box=z, - code_type='encode_center_size') + bcoder = layers.box_coder(prior_box=x, + prior_box_var=y, + target_box=z, + code_type='encode_center_size') self.assertIsNotNone(iou) self.assertIsNotNone(bcoder) @@ -170,17 +169,23 @@ class TestDetection(unittest.TestCase): self.assertIsNotNone(matched_indices) self.assertIsNotNone(matched_dist) - gt = layers.data( - name='gt', shape=[1, 1], dtype='int32', lod_level=1) - trg, trg_weight = layers.target_assign( - gt, matched_indices, mismatch_value=0) + gt = layers.data(name='gt', + shape=[1, 1], + dtype='int32', + lod_level=1) + trg, trg_weight = layers.target_assign(gt, + matched_indices, + mismatch_value=0) self.assertIsNotNone(trg) self.assertIsNotNone(trg_weight) - gt2 = layers.data( - name='gt2', shape=[10, 4], dtype='float32', lod_level=1) - trg, trg_weight = layers.target_assign( - gt2, matched_indices, mismatch_value=0) + gt2 = layers.data(name='gt2', + shape=[10, 4], + dtype='float32', + lod_level=1) + trg, trg_weight = layers.target_assign(gt2, + matched_indices, + mismatch_value=0) self.assertIsNotNone(trg) self.assertIsNotNone(trg_weight) @@ -189,22 +194,24 @@ class TestDetection(unittest.TestCase): def test_ssd_loss(self): program = Program() with program_guard(program): - pb = layers.data( - name='prior_box', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - pbv = layers.data( - name='prior_box_var', - shape=[10, 4], - append_batch_size=False, - dtype='float32') + pb = layers.data(name='prior_box', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + pbv = layers.data(name='prior_box_var', + shape=[10, 4], + append_batch_size=False, + dtype='float32') loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') scores = layers.data(name='scores', shape=[10, 21], dtype='float32') - gt_box = layers.data( - name='gt_box', shape=[4], lod_level=1, dtype='float32') - gt_label = layers.data( - name='gt_label', shape=[1], lod_level=1, dtype='int32') + gt_box = layers.data(name='gt_box', + shape=[4], + lod_level=1, + dtype='float32') + gt_label = layers.data(name='gt_label', + shape=[1], + lod_level=1, + dtype='int32') loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) self.assertIsNotNone(loss) self.assertEqual(loss.shape[-1], 1) @@ -212,69 +219,73 @@ class TestDetection(unittest.TestCase): class TestPriorBox(unittest.TestCase): + def test_prior_box(self): program = Program() with program_guard(program): data_shape = [3, 224, 224] - images = fluid.layers.data( - name='pixel', shape=data_shape, dtype='float32') + images = fluid.layers.data(name='pixel', + shape=data_shape, + dtype='float32') conv1 = fluid.layers.conv2d(images, 3, 3, 2) - box, var = layers.prior_box( - input=conv1, - image=images, - min_sizes=[100.0], - aspect_ratios=[1.], - flip=True, - clip=True) + box, var = layers.prior_box(input=conv1, + image=images, + min_sizes=[100.0], + aspect_ratios=[1.], + flip=True, + clip=True) assert len(box.shape) == 4 assert box.shape == var.shape assert box.shape[3] == 4 class TestPriorBox2(unittest.TestCase): + def test_prior_box(self): program = Program() with program_guard(program): data_shape = [None, 3, None, None] images = fluid.data(name='pixel', shape=data_shape, dtype='float32') conv1 = fluid.layers.conv2d(images, 3, 3, 2) - box, var = layers.prior_box( - input=conv1, - image=images, - min_sizes=[100.0], - aspect_ratios=[1.], - flip=True, - clip=True) + box, var = layers.prior_box(input=conv1, + image=images, + min_sizes=[100.0], + aspect_ratios=[1.], + flip=True, + clip=True) assert len(box.shape) == 4 assert box.shape == var.shape assert box.shape[3] == 4 class TestDensityPriorBox(unittest.TestCase): + def test_density_prior_box(self): program = Program() with program_guard(program): data_shape = [3, 224, 224] - images = fluid.layers.data( - name='pixel', shape=data_shape, dtype='float32') + images = fluid.layers.data(name='pixel', + shape=data_shape, + dtype='float32') conv1 = fluid.layers.conv2d(images, 3, 3, 2) - box, var = layers.density_prior_box( - input=conv1, - image=images, - densities=[3, 4], - fixed_sizes=[50., 60.], - fixed_ratios=[1.0], - clip=True) + box, var = layers.density_prior_box(input=conv1, + image=images, + densities=[3, 4], + fixed_sizes=[50., 60.], + fixed_ratios=[1.0], + clip=True) assert len(box.shape) == 4 assert box.shape == var.shape assert box.shape[-1] == 4 class TestAnchorGenerator(unittest.TestCase): + def test_anchor_generator(self): data_shape = [3, 224, 224] - images = fluid.layers.data( - name='pixel', shape=data_shape, dtype='float32') + images = fluid.layers.data(name='pixel', + shape=data_shape, + dtype='float32') conv1 = fluid.layers.conv2d(images, 3, 3, 2) anchor, var = fluid.layers.anchor_generator( input=conv1, @@ -289,6 +300,7 @@ class TestAnchorGenerator(unittest.TestCase): class TestGenerateProposalLabels(unittest.TestCase): + def check_out(self, outs): rois = outs[0] labels_int32 = outs[1] @@ -310,17 +322,27 @@ class TestGenerateProposalLabels(unittest.TestCase): def test_generate_proposal_labels(self): program = Program() with program_guard(program): - rpn_rois = fluid.data( - name='rpn_rois', shape=[4, 4], dtype='float32', lod_level=1) - gt_classes = fluid.data( - name='gt_classes', shape=[6], dtype='int32', lod_level=1) - is_crowd = fluid.data( - name='is_crowd', shape=[6], dtype='int32', lod_level=1) - gt_boxes = fluid.data( - name='gt_boxes', shape=[6, 4], dtype='float32', lod_level=1) + rpn_rois = fluid.data(name='rpn_rois', + shape=[4, 4], + dtype='float32', + lod_level=1) + gt_classes = fluid.data(name='gt_classes', + shape=[6], + dtype='int32', + lod_level=1) + is_crowd = fluid.data(name='is_crowd', + shape=[6], + dtype='int32', + lod_level=1) + gt_boxes = fluid.data(name='gt_boxes', + shape=[6, 4], + dtype='float32', + lod_level=1) im_info = fluid.data(name='im_info', shape=[1, 3], dtype='float32') - max_overlap = fluid.data( - name='max_overlap', shape=[4], dtype='float32', lod_level=1) + max_overlap = fluid.data(name='max_overlap', + shape=[4], + dtype='float32', + lod_level=1) self.class_nums = 5 outs = fluid.layers.generate_proposal_labels( rpn_rois=rpn_rois, @@ -358,62 +380,57 @@ class TestGenerateProposalLabels(unittest.TestCase): class TestGenerateMaskLabels(unittest.TestCase): + def test_generate_mask_labels(self): program = Program() with program_guard(program): - im_info = layers.data( - name='im_info', - shape=[1, 3], - dtype='float32', - lod_level=1, - append_batch_size=False) - gt_classes = layers.data( - name='gt_classes', - shape=[2, 1], - dtype='int32', - lod_level=1, - append_batch_size=False) - is_crowd = layers.data( - name='is_crowd', - shape=[2, 1], - dtype='int32', - lod_level=1, - append_batch_size=False) - gt_segms = layers.data( - name='gt_segms', - shape=[20, 2], - dtype='float32', - lod_level=3, - append_batch_size=False) - rois = layers.data( - name='rois', - shape=[4, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) - labels_int32 = layers.data( - name='labels_int32', - shape=[4, 1], - dtype='int32', - lod_level=1, - append_batch_size=False) + im_info = layers.data(name='im_info', + shape=[1, 3], + dtype='float32', + lod_level=1, + append_batch_size=False) + gt_classes = layers.data(name='gt_classes', + shape=[2, 1], + dtype='int32', + lod_level=1, + append_batch_size=False) + is_crowd = layers.data(name='is_crowd', + shape=[2, 1], + dtype='int32', + lod_level=1, + append_batch_size=False) + gt_segms = layers.data(name='gt_segms', + shape=[20, 2], + dtype='float32', + lod_level=3, + append_batch_size=False) + rois = layers.data(name='rois', + shape=[4, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + labels_int32 = layers.data(name='labels_int32', + shape=[4, 1], + dtype='int32', + lod_level=1, + append_batch_size=False) num_classes = 5 resolution = 14 - outs = fluid.layers.generate_mask_labels( - im_info=im_info, - gt_classes=gt_classes, - is_crowd=is_crowd, - gt_segms=gt_segms, - rois=rois, - labels_int32=labels_int32, - num_classes=num_classes, - resolution=resolution) + outs = fluid.layers.generate_mask_labels(im_info=im_info, + gt_classes=gt_classes, + is_crowd=is_crowd, + gt_segms=gt_segms, + rois=rois, + labels_int32=labels_int32, + num_classes=num_classes, + resolution=resolution) mask_rois, roi_has_mask_int32, mask_int32 = outs assert mask_rois.shape[1] == 4 assert mask_int32.shape[1] == num_classes * resolution * resolution class TestMultiBoxHead(unittest.TestCase): + def test_multi_box_head(self): data_shape = [3, 224, 224] mbox_locs, mbox_confs, box, var = self.multi_box_head_output(data_shape) @@ -424,8 +441,9 @@ class TestMultiBoxHead(unittest.TestCase): assert mbox_locs.shape[1] == mbox_confs.shape[1] def multi_box_head_output(self, data_shape): - images = fluid.layers.data( - name='pixel', shape=data_shape, dtype='float32') + images = fluid.layers.data(name='pixel', + shape=data_shape, + dtype='float32') conv1 = fluid.layers.conv2d(images, 3, 3, 2) conv2 = fluid.layers.conv2d(conv1, 3, 3, 2) conv3 = fluid.layers.conv2d(conv2, 3, 3, 2) @@ -448,19 +466,18 @@ class TestMultiBoxHead(unittest.TestCase): class TestDetectionMAP(unittest.TestCase): + def test_detection_map(self): program = Program() with program_guard(program): - detect_res = layers.data( - name='detect_res', - shape=[10, 6], - append_batch_size=False, - dtype='float32') - label = layers.data( - name='label', - shape=[10, 6], - append_batch_size=False, - dtype='float32') + detect_res = layers.data(name='detect_res', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + label = layers.data(name='label', + shape=[10, 6], + append_batch_size=False, + dtype='float32') map_out = detection.detection_map(detect_res, label, 21) self.assertIsNotNone(map_out) @@ -469,6 +486,7 @@ class TestDetectionMAP(unittest.TestCase): class TestRpnTargetAssign(unittest.TestCase): + def test_rpn_target_assign(self): program = Program() with program_guard(program): @@ -476,54 +494,49 @@ class TestRpnTargetAssign(unittest.TestCase): cls_logits_shape = [10, 50, 2] anchor_shape = [50, 4] - bbox_pred = layers.data( - name='bbox_pred', - shape=bbox_pred_shape, - append_batch_size=False, - dtype='float32') - cls_logits = layers.data( - name='cls_logits', - shape=cls_logits_shape, - append_batch_size=False, - dtype='float32') - anchor_box = layers.data( - name='anchor_box', - shape=anchor_shape, - append_batch_size=False, - dtype='float32') - anchor_var = layers.data( - name='anchor_var', - shape=anchor_shape, - append_batch_size=False, - dtype='float32') - gt_boxes = layers.data( - name='gt_boxes', shape=[4], lod_level=1, dtype='float32') - is_crowd = layers.data( - name='is_crowd', - shape=[1, 10], - dtype='int32', - lod_level=1, - append_batch_size=False) - im_info = layers.data( - name='im_info', - shape=[1, 3], - dtype='float32', - lod_level=1, - append_batch_size=False) - outs = layers.rpn_target_assign( - bbox_pred=bbox_pred, - cls_logits=cls_logits, - anchor_box=anchor_box, - anchor_var=anchor_var, - gt_boxes=gt_boxes, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0.0, - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=False) + bbox_pred = layers.data(name='bbox_pred', + shape=bbox_pred_shape, + append_batch_size=False, + dtype='float32') + cls_logits = layers.data(name='cls_logits', + shape=cls_logits_shape, + append_batch_size=False, + dtype='float32') + anchor_box = layers.data(name='anchor_box', + shape=anchor_shape, + append_batch_size=False, + dtype='float32') + anchor_var = layers.data(name='anchor_var', + shape=anchor_shape, + append_batch_size=False, + dtype='float32') + gt_boxes = layers.data(name='gt_boxes', + shape=[4], + lod_level=1, + dtype='float32') + is_crowd = layers.data(name='is_crowd', + shape=[1, 10], + dtype='int32', + lod_level=1, + append_batch_size=False) + im_info = layers.data(name='im_info', + shape=[1, 3], + dtype='float32', + lod_level=1, + append_batch_size=False) + outs = layers.rpn_target_assign(bbox_pred=bbox_pred, + cls_logits=cls_logits, + anchor_box=anchor_box, + anchor_var=anchor_var, + gt_boxes=gt_boxes, + is_crowd=is_crowd, + im_info=im_info, + rpn_batch_size_per_im=256, + rpn_straddle_thresh=0.0, + rpn_fg_fraction=0.5, + rpn_positive_overlap=0.7, + rpn_negative_overlap=0.3, + use_random=False) pred_scores = outs[0] pred_loc = outs[1] tgt_lbl = outs[2] @@ -542,6 +555,7 @@ class TestRpnTargetAssign(unittest.TestCase): class TestGenerateProposals(LayerTest): + def test_generate_proposals(self): scores_np = np.random.rand(2, 3, 4, 4).astype('float32') bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32') @@ -551,15 +565,19 @@ class TestGenerateProposals(LayerTest): variances_np = np.ones((4, 4, 3, 4)).astype('float32') with self.static_graph(): - scores = fluid.data( - name='scores', shape=[2, 3, 4, 4], dtype='float32') - bbox_deltas = fluid.data( - name='bbox_deltas', shape=[2, 12, 4, 4], dtype='float32') + scores = fluid.data(name='scores', + shape=[2, 3, 4, 4], + dtype='float32') + bbox_deltas = fluid.data(name='bbox_deltas', + shape=[2, 12, 4, 4], + dtype='float32') im_info = fluid.data(name='im_info', shape=[2, 3], dtype='float32') - anchors = fluid.data( - name='anchors', shape=[4, 4, 3, 4], dtype='float32') - variances = fluid.data( - name='var', shape=[4, 4, 3, 4], dtype='float32') + anchors = fluid.data(name='anchors', + shape=[4, 4, 3, 4], + dtype='float32') + variances = fluid.data(name='var', + shape=[4, 4, 3, 4], + dtype='float32') rois, roi_probs, rois_num = fluid.layers.generate_proposals( scores, bbox_deltas, @@ -605,6 +623,7 @@ class TestGenerateProposals(LayerTest): class TestYoloDetection(unittest.TestCase): + def test_yolov3_loss(self): program = Program() with program_guard(program): @@ -612,15 +631,14 @@ class TestYoloDetection(unittest.TestCase): gt_box = layers.data(name='gt_box', shape=[10, 4], dtype='float32') gt_label = layers.data(name='gt_label', shape=[10], dtype='int32') gt_score = layers.data(name='gt_score', shape=[10], dtype='float32') - loss = layers.yolov3_loss( - x, - gt_box, - gt_label, [10, 13, 30, 13], [0, 1], - 10, - 0.7, - 32, - gt_score=gt_score, - use_label_smooth=False) + loss = layers.yolov3_loss(x, + gt_box, + gt_label, [10, 13, 30, 13], [0, 1], + 10, + 0.7, + 32, + gt_score=gt_score, + use_label_smooth=False) self.assertIsNotNone(loss) @@ -641,16 +659,15 @@ class TestYoloDetection(unittest.TestCase): gt_box = layers.data(name='gt_box', shape=[10, 4], dtype='float32') gt_label = layers.data(name='gt_label', shape=[10], dtype='int32') gt_score = layers.data(name='gt_score', shape=[10], dtype='float32') - loss = layers.yolov3_loss( - x, - gt_box, - gt_label, [10, 13, 30, 13], [0, 1], - 10, - 0.7, - 32, - gt_score=gt_score, - use_label_smooth=False, - scale_x_y=1.2) + loss = layers.yolov3_loss(x, + gt_box, + gt_label, [10, 13, 30, 13], [0, 1], + 10, + 0.7, + 32, + gt_score=gt_score, + use_label_smooth=False, + scale_x_y=1.2) self.assertIsNotNone(loss) @@ -659,29 +676,38 @@ class TestYoloDetection(unittest.TestCase): with program_guard(program): x = layers.data(name='x', shape=[30, 7, 7], dtype='float32') img_size = layers.data(name='img_size', shape=[2], dtype='int32') - boxes, scores = layers.yolo_box( - x, img_size, [10, 13, 30, 13], 10, 0.01, 32, scale_x_y=1.2) + boxes, scores = layers.yolo_box(x, + img_size, [10, 13, 30, 13], + 10, + 0.01, + 32, + scale_x_y=1.2) self.assertIsNotNone(boxes) self.assertIsNotNone(scores) class TestBoxClip(unittest.TestCase): + def test_box_clip(self): program = Program() with program_guard(program): - input_box = layers.data( - name='input_box', shape=[7, 4], dtype='float32', lod_level=1) + input_box = layers.data(name='input_box', + shape=[7, 4], + dtype='float32', + lod_level=1) im_info = layers.data(name='im_info', shape=[3], dtype='float32') out = layers.box_clip(input_box, im_info) self.assertIsNotNone(out) class TestMulticlassNMS(unittest.TestCase): + def test_multiclass_nms(self): program = Program() with program_guard(program): - bboxes = layers.data( - name='bboxes', shape=[-1, 10, 4], dtype='float32') + bboxes = layers.data(name='bboxes', + shape=[-1, 10, 4], + dtype='float32') scores = layers.data(name='scores', shape=[-1, 10], dtype='float32') output = layers.multiclass_nms(bboxes, scores, 0.3, 400, 200, 0.7) self.assertIsNotNone(output) @@ -689,48 +715,57 @@ class TestMulticlassNMS(unittest.TestCase): def test_multiclass_nms_error(self): program = Program() with program_guard(program): - bboxes1 = fluid.data( - name='bboxes1', shape=[10, 10, 4], dtype='int32') - scores1 = fluid.data( - name='scores1', shape=[10, 10], dtype='float32') - bboxes2 = fluid.data( - name='bboxes2', shape=[10, 10, 4], dtype='float32') + bboxes1 = fluid.data(name='bboxes1', + shape=[10, 10, 4], + dtype='int32') + scores1 = fluid.data(name='scores1', + shape=[10, 10], + dtype='float32') + bboxes2 = fluid.data(name='bboxes2', + shape=[10, 10, 4], + dtype='float32') scores2 = fluid.data(name='scores2', shape=[10, 10], dtype='int32') - self.assertRaises( - TypeError, - layers.multiclass_nms, - bboxes=bboxes1, - scores=scores1, - score_threshold=0.5, - nms_top_k=400, - keep_top_k=200) - self.assertRaises( - TypeError, - layers.multiclass_nms, - bboxes=bboxes2, - scores=scores2, - score_threshold=0.5, - nms_top_k=400, - keep_top_k=200) + self.assertRaises(TypeError, + layers.multiclass_nms, + bboxes=bboxes1, + scores=scores1, + score_threshold=0.5, + nms_top_k=400, + keep_top_k=200) + self.assertRaises(TypeError, + layers.multiclass_nms, + bboxes=bboxes2, + scores=scores2, + score_threshold=0.5, + nms_top_k=400, + keep_top_k=200) class TestMulticlassNMS2(unittest.TestCase): + def test_multiclass_nms2(self): program = Program() with program_guard(program): - bboxes = layers.data( - name='bboxes', shape=[-1, 10, 4], dtype='float32') + bboxes = layers.data(name='bboxes', + shape=[-1, 10, 4], + dtype='float32') scores = layers.data(name='scores', shape=[-1, 10], dtype='float32') output = fluid.contrib.multiclass_nms2(bboxes, scores, 0.3, 400, 200, 0.7) - output2, index = fluid.contrib.multiclass_nms2( - bboxes, scores, 0.3, 400, 200, 0.7, return_index=True) + output2, index = fluid.contrib.multiclass_nms2(bboxes, + scores, + 0.3, + 400, + 200, + 0.7, + return_index=True) self.assertIsNotNone(output) self.assertIsNotNone(output2) self.assertIsNotNone(index) class TestCollectFpnPropsals(LayerTest): + def test_collect_fpn_proposals(self): multi_bboxes_np = [] multi_scores_np = [] @@ -748,18 +783,17 @@ class TestCollectFpnPropsals(LayerTest): multi_scores = [] rois_num_per_level = [] for i in range(4): - bboxes = fluid.data( - name='rois' + str(i), - shape=[5, 4], - dtype='float32', - lod_level=1) - scores = fluid.data( - name='scores' + str(i), - shape=[5, 1], - dtype='float32', - lod_level=1) - rois_num = fluid.data( - name='rois_num' + str(i), shape=[None], dtype='int32') + bboxes = fluid.data(name='rois' + str(i), + shape=[5, 4], + dtype='float32', + lod_level=1) + scores = fluid.data(name='scores' + str(i), + shape=[5, 1], + dtype='float32', + lod_level=1) + rois_num = fluid.data(name='rois_num' + str(i), + shape=[None], + dtype='int32') multi_bboxes.append(bboxes) multi_scores.append(scores) @@ -807,50 +841,52 @@ class TestCollectFpnPropsals(LayerTest): self.assertTrue(np.array_equal(rois_num_stat, rois_num_dy)) def test_collect_fpn_proposals_error(self): + def generate_input(bbox_type, score_type, name): multi_bboxes = [] multi_scores = [] for i in range(4): - bboxes = fluid.data( - name='rois' + name + str(i), - shape=[10, 4], - dtype=bbox_type, - lod_level=1) - scores = fluid.data( - name='scores' + name + str(i), - shape=[10, 1], - dtype=score_type, - lod_level=1) + bboxes = fluid.data(name='rois' + name + str(i), + shape=[10, 4], + dtype=bbox_type, + lod_level=1) + scores = fluid.data(name='scores' + name + str(i), + shape=[10, 1], + dtype=score_type, + lod_level=1) multi_bboxes.append(bboxes) multi_scores.append(scores) return multi_bboxes, multi_scores program = Program() with program_guard(program): - bbox1 = fluid.data( - name='rois', shape=[5, 10, 4], dtype='float32', lod_level=1) - score1 = fluid.data( - name='scores', shape=[5, 10, 1], dtype='float32', lod_level=1) + bbox1 = fluid.data(name='rois', + shape=[5, 10, 4], + dtype='float32', + lod_level=1) + score1 = fluid.data(name='scores', + shape=[5, 10, 1], + dtype='float32', + lod_level=1) bbox2, score2 = generate_input('int32', 'float32', '2') - self.assertRaises( - TypeError, - layers.collect_fpn_proposals, - multi_rois=bbox1, - multi_scores=score1, - min_level=2, - max_level=5, - post_nms_top_n=2000) - self.assertRaises( - TypeError, - layers.collect_fpn_proposals, - multi_rois=bbox2, - multi_scores=score2, - min_level=2, - max_level=5, - post_nms_top_n=2000) + self.assertRaises(TypeError, + layers.collect_fpn_proposals, + multi_rois=bbox1, + multi_scores=score1, + min_level=2, + max_level=5, + post_nms_top_n=2000) + self.assertRaises(TypeError, + layers.collect_fpn_proposals, + multi_rois=bbox2, + multi_scores=score2, + min_level=2, + max_level=5, + post_nms_top_n=2000) class TestDistributeFpnProposals(LayerTest): + def test_distribute_fpn_proposals(self): rois_np = np.random.rand(10, 4).astype('float32') rois_num_np = np.array([4, 6]).astype('int32') @@ -865,11 +901,12 @@ class TestDistributeFpnProposals(LayerTest): refer_scale=224, rois_num=rois_num) fetch_list = multi_rois + [restore_ind] + rois_num_per_level - output_stat = self.get_static_graph_result( - feed={'rois': rois_np, - 'rois_num': rois_num_np}, - fetch_list=fetch_list, - with_lod=True) + output_stat = self.get_static_graph_result(feed={ + 'rois': rois_np, + 'rois_num': rois_num_np + }, + fetch_list=fetch_list, + with_lod=True) output_stat_np = [] for output in output_stat: output_np = np.array(output) @@ -900,43 +937,52 @@ class TestDistributeFpnProposals(LayerTest): def test_distribute_fpn_proposals_error(self): program = Program() with program_guard(program): - fpn_rois = fluid.data( - name='data_error', shape=[10, 4], dtype='int32', lod_level=1) - self.assertRaises( - TypeError, - layers.distribute_fpn_proposals, - fpn_rois=fpn_rois, - min_level=2, - max_level=5, - refer_level=4, - refer_scale=224) + fpn_rois = fluid.data(name='data_error', + shape=[10, 4], + dtype='int32', + lod_level=1) + self.assertRaises(TypeError, + layers.distribute_fpn_proposals, + fpn_rois=fpn_rois, + min_level=2, + max_level=5, + refer_level=4, + refer_scale=224) class TestBoxDecoderAndAssign(unittest.TestCase): + def test_box_decoder_and_assign(self): program = Program() with program_guard(program): pb = fluid.data(name='prior_box', shape=[None, 4], dtype='float32') pbv = fluid.data(name='prior_box_var', shape=[4], dtype='float32') - loc = fluid.data( - name='target_box', shape=[None, 4 * 81], dtype='float32') - scores = fluid.data( - name='scores', shape=[None, 81], dtype='float32') + loc = fluid.data(name='target_box', + shape=[None, 4 * 81], + dtype='float32') + scores = fluid.data(name='scores', + shape=[None, 81], + dtype='float32') decoded_box, output_assign_box = fluid.layers.box_decoder_and_assign( pb, pbv, loc, scores, 4.135) self.assertIsNotNone(decoded_box) self.assertIsNotNone(output_assign_box) def test_box_decoder_and_assign_error(self): + def generate_input(pb_type, pbv_type, loc_type, score_type, name): - pb = fluid.data( - name='prior_box' + name, shape=[None, 4], dtype=pb_type) - pbv = fluid.data( - name='prior_box_var' + name, shape=[4], dtype=pbv_type) - loc = fluid.data( - name='target_box' + name, shape=[None, 4 * 81], dtype=loc_type) - scores = fluid.data( - name='scores' + name, shape=[None, 81], dtype=score_type) + pb = fluid.data(name='prior_box' + name, + shape=[None, 4], + dtype=pb_type) + pbv = fluid.data(name='prior_box_var' + name, + shape=[4], + dtype=pbv_type) + loc = fluid.data(name='target_box' + name, + shape=[None, 4 * 81], + dtype=loc_type) + scores = fluid.data(name='scores' + name, + shape=[None, 81], + dtype=score_type) return pb, pbv, loc, scores program = Program() @@ -947,30 +993,27 @@ class TestBoxDecoderAndAssign(unittest.TestCase): 'int32', 'float32', '2') pb3, pbv3, loc3, scores3 = generate_input('float32', 'float32', 'float32', 'int32', '3') - self.assertRaises( - TypeError, - layers.box_decoder_and_assign, - prior_box=pb1, - prior_box_var=pbv1, - target_box=loc1, - box_score=scores1, - box_clip=4.0) - self.assertRaises( - TypeError, - layers.box_decoder_and_assign, - prior_box=pb2, - prior_box_var=pbv2, - target_box=loc2, - box_score=scores2, - box_clip=4.0) - self.assertRaises( - TypeError, - layers.box_decoder_and_assign, - prior_box=pb3, - prior_box_var=pbv3, - target_box=loc3, - box_score=scores3, - box_clip=4.0) + self.assertRaises(TypeError, + layers.box_decoder_and_assign, + prior_box=pb1, + prior_box_var=pbv1, + target_box=loc1, + box_score=scores1, + box_clip=4.0) + self.assertRaises(TypeError, + layers.box_decoder_and_assign, + prior_box=pb2, + prior_box_var=pbv2, + target_box=loc2, + box_score=scores2, + box_clip=4.0) + self.assertRaises(TypeError, + layers.box_decoder_and_assign, + prior_box=pb3, + prior_box_var=pbv3, + target_box=loc3, + box_score=scores3, + box_clip=4.0) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py index 7859fca15f6..e3b20c32392 100644 --- a/python/paddle/fluid/tests/test_error_clip.py +++ b/python/paddle/fluid/tests/test_error_clip.py @@ -39,13 +39,12 @@ with fluid.program_guard(main_program=prog): prog_clip = prog.clone() prog_clip.block(0).var(hidden1.name)._set_error_clip( - fluid.clip.ErrorClipByValue( - max=CLIP_MAX, min=CLIP_MIN)) + fluid.clip.ErrorClipByValue(max=CLIP_MAX, min=CLIP_MIN)) avg_cost_clip = prog_clip.block(0).var(avg_cost.name) fluid.backward.append_backward(loss=avg_cost) -fluid.backward.append_backward( - loss=avg_cost_clip, callbacks=[fluid.clip.error_clip_callback]) +fluid.backward.append_backward(loss=avg_cost_clip, + callbacks=[fluid.clip.error_clip_callback]) hidden1_grad = prog.block(0).var(hidden1.name + "@GRAD") hidden1_grad_clip = prog_clip.block(0).var(hidden1.name + "@GRAD") @@ -53,10 +52,9 @@ hidden1_grad_clip = prog_clip.block(0).var(hidden1.name + "@GRAD") hidden2_grad = prog.block(0).var(hidden2.name + "@GRAD") hidden2_grad_clip = prog_clip.block(0).var(hidden2.name + "@GRAD") -train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=8192), - batch_size=BATCH_SIZE) +train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), + buf_size=8192), + batch_size=BATCH_SIZE) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -75,8 +73,7 @@ for data in train_reader(): prog_clip, feed=feeder.feed(data), fetch_list=[hidden1_grad_clip, hidden2_grad_clip]) - if not ((out1.clip( - min=CLIP_MIN, max=CLIP_MAX) == out1_clip).all() and + if not ((out1.clip(min=CLIP_MIN, max=CLIP_MAX) == out1_clip).all() and (out2 == out2_clip).all()): exit(1) diff --git a/python/paddle/fluid/tests/test_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py index b7792e5ce27..12d33d1c724 100644 --- a/python/paddle/fluid/tests/test_if_else_op.py +++ b/python/paddle/fluid/tests/test_if_else_op.py @@ -61,18 +61,19 @@ class TestMNISTIfElseOp(unittest.TestCase): prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) - prob = merge_lod_tensor( - in_true=true_out, in_false=false_out, mask=cond, x=image) + prob = merge_lod_tensor(in_true=true_out, + in_false=false_out, + mask=cond, + x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=8192), - batch_size=10) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=10) place = core.CPUPlace() exe = Executor(place) @@ -86,8 +87,10 @@ class TestMNISTIfElseOp(unittest.TestCase): y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[avg_loss]) print(outs[0]) if outs[0] < 1.0: @@ -125,10 +128,9 @@ class TestMNISTIfElseOp(unittest.TestCase): optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=8192), - batch_size=200) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) place = core.CPUPlace() exe = Executor(place) @@ -142,8 +144,10 @@ class TestMNISTIfElseOp(unittest.TestCase): y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(prog, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[avg_loss]) print(outs[0]) if outs[0] < 1.0: @@ -152,6 +156,7 @@ class TestMNISTIfElseOp(unittest.TestCase): class TestIfElse(unittest.TestCase): + def set_test_case(self): # condiction is: self.data < self.cond_value self.cond_value = 0.5 @@ -171,8 +176,9 @@ class TestIfElse(unittest.TestCase): startup_prog = Program() with program_guard(prog, startup_prog): src = layers.data(name='data', shape=[1], dtype='float32') - cond = layers.fill_constant( - [1], dtype='float32', value=self.cond_value) + cond = layers.fill_constant([1], + dtype='float32', + value=self.cond_value) ifcond = layers.less_than(x=src, y=cond) ie = layers.IfElse(ifcond) with ie.true_block(): @@ -196,8 +202,7 @@ class TestIfElse(unittest.TestCase): o2 = self.numpy_cal() self.assertTrue( - np.allclose( - o1, o2, atol=1e-8), + np.allclose(o1, o2, atol=1e-8), "IfElse result : " + str(o1) + "\n Numpy result :" + str(o2)) def test_cpu(self): @@ -210,6 +215,7 @@ class TestIfElse(unittest.TestCase): class TestIfElseTrueBranch(TestIfElse): + def set_test_case(self): # condiction is: self.data < self.cond_value self.cond_value = 10. @@ -217,6 +223,7 @@ class TestIfElseTrueBranch(TestIfElse): class TestIfElseFalseBranch(TestIfElse): + def set_test_case(self): # condiction is: self.data < self.cond_value self.cond_value = -10. @@ -224,13 +231,15 @@ class TestIfElseFalseBranch(TestIfElse): class TestIfElseError(unittest.TestCase): + def test_input_type_error(self): main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): src = layers.data(name='data', shape=[1], dtype='float32') - const_value = layers.fill_constant( - [1], dtype='float32', value=123.0) + const_value = layers.fill_constant([1], + dtype='float32', + value=123.0) ifcond = layers.less_than(x=src, y=const_value) with self.assertRaises(TypeError): ie = layers.IfElse(set()) diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index e21224c909f..cc97b0eb5ae 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -22,6 +22,7 @@ import unittest class TestLoDTensor(unittest.TestCase): + def test_pybind_recursive_seq_lens(self): tensor = fluid.LoDTensor() recursive_seq_lens = [] @@ -58,8 +59,8 @@ class TestLoDTensor(unittest.TestCase): def test_create_lod_tensor(self): # Create LoDTensor from a list - data = [[np.int64(1), np.int64(2), np.int64(3)], - [np.int64(3), np.int64(4)]] + data = [[np.int64(1), np.int64(2), + np.int64(3)], [np.int64(3), np.int64(4)]] wrong_recursive_seq_lens = [[2, 2]] correct_recursive_seq_lens = [[3, 2]] self.assertRaises(AssertionError, create_lod_tensor, data, @@ -73,8 +74,8 @@ class TestLoDTensor(unittest.TestCase): self.assertTrue( np.array_equal( np.array(tensor), - np.array([1, 2, 3, 3, 4]).reshape(tensor.shape()).astype( - 'int64'))) + np.array([1, 2, 3, 3, + 4]).reshape(tensor.shape()).astype('int64'))) # Create LoDTensor from numpy array data = np.random.random([10, 1]).astype('float64') @@ -133,9 +134,8 @@ class TestLoDTensor(unittest.TestCase): tensor_from_dlpack = fluid.core.from_dlpack(dltensor) self.assertTrue(isinstance(tensor_from_dlpack, fluid.core.Tensor)) self.assertTrue( - np.array_equal( - np.array(tensor_from_dlpack), - np.array([[1], [2], [3], [4]]).astype('int'))) + np.array_equal(np.array(tensor_from_dlpack), + np.array([[1], [2], [3], [4]]).astype('int'))) # when build with cuda if core.is_compiled_with_cuda(): gtensor = fluid.create_lod_tensor( @@ -145,9 +145,8 @@ class TestLoDTensor(unittest.TestCase): gtensor_from_dlpack = fluid.core.from_dlpack(gdltensor) self.assertTrue(isinstance(gtensor_from_dlpack, fluid.core.Tensor)) self.assertTrue( - np.array_equal( - np.array(gtensor_from_dlpack), - np.array([[1], [2], [3], [4]]).astype('int'))) + np.array_equal(np.array(gtensor_from_dlpack), + np.array([[1], [2], [3], [4]]).astype('int'))) def test_as_type(self): tensor = fluid.create_lod_tensor( diff --git a/python/paddle/fluid/tests/test_python_operator_overriding.py b/python/paddle/fluid/tests/test_python_operator_overriding.py index fd9dc961988..50bfdd287b6 100644 --- a/python/paddle/fluid/tests/test_python_operator_overriding.py +++ b/python/paddle/fluid/tests/test_python_operator_overriding.py @@ -27,6 +27,7 @@ paddle.enable_static() class TestPythonOperatorOverride(unittest.TestCase): + def check_result(self, fn, place, dtype): shape = [9, 10] @@ -34,18 +35,26 @@ class TestPythonOperatorOverride(unittest.TestCase): y_data = np.random.random(size=shape).astype(dtype) python_out = fn(x_data, y_data) - x_var = layers.create_global_var( - name='x', shape=shape, value=0.0, dtype=dtype, persistable=True) - y_var = layers.create_global_var( - name='y', shape=shape, value=0.0, dtype=dtype, persistable=True) + x_var = layers.create_global_var(name='x', + shape=shape, + value=0.0, + dtype=dtype, + persistable=True) + y_var = layers.create_global_var(name='y', + shape=shape, + value=0.0, + dtype=dtype, + persistable=True) out = fn(x_var, y_var) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid_out = exe.run(fluid.default_main_program(), - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[out]) np.testing.assert_array_equal(python_out, fluid_out[0]) diff --git a/python/paddle/fluid/tests/test_sequential.py b/python/paddle/fluid/tests/test_sequential.py index 7446bb83841..09cfbcdd7e3 100644 --- a/python/paddle/fluid/tests/test_sequential.py +++ b/python/paddle/fluid/tests/test_sequential.py @@ -17,6 +17,7 @@ import paddle class TestDataFeeder(unittest.TestCase): + def test_lod_level_1_converter(self): sequential = paddle.nn.Sequential() diff --git a/python/paddle/fluid/tests/unittests/__init__.py b/python/paddle/fluid/tests/unittests/__init__.py index 193b91cdaa1..e427eb51247 100644 --- a/python/paddle/fluid/tests/unittests/__init__.py +++ b/python/paddle/fluid/tests/unittests/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License.p -# Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory +# Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory # will still be dirA(), But is should be dirB(). So it will ModulNotFoundError # please refer to https://stackoverflow.com/questions/8953844/import-module-from-subfolder diff --git a/python/paddle/fluid/tests/unittests/ascend_group.py b/python/paddle/fluid/tests/unittests/ascend_group.py index 851544e1659..1d3f308a611 100644 --- a/python/paddle/fluid/tests/unittests/ascend_group.py +++ b/python/paddle/fluid/tests/unittests/ascend_group.py @@ -46,59 +46,55 @@ def init_communicator(startup_program, main_program, current_endpoint, assert group_rank >= 0 block = startup_program.global_block() - nccl_id_var = block.create_var( - name=unique_name.generate('nccl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': nccl_id_var}, - attrs={ - 'rank': group_rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - OP_ROLE_KEY: OpRole.Forward, - }) - block.append_op( - type='c_comm_init', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': group_rank, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward, - }) + nccl_id_var = block.create_var(name=unique_name.generate('nccl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) + block.append_op(type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': group_rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints, + OP_ROLE_KEY: OpRole.Forward, + }) + block.append_op(type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': group_rank, + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Forward, + }) # add input op for test fill_var_name = "tensor@Filled" - fill_var = block.create_var( - name=fill_var_name, - shape=[10, 10], - dtype='float32', - persistable=False, - stop_gradient=True) - block.append_op( - type="fill_constant", - outputs={"Out": fill_var_name}, - attrs={ - "shape": [10, 10], - "dtype": fill_var.dtype, - "value": 1.0, - "place_type": 1 - }) + fill_var = block.create_var(name=fill_var_name, + shape=[10, 10], + dtype='float32', + persistable=False, + stop_gradient=True) + block.append_op(type="fill_constant", + outputs={"Out": fill_var_name}, + attrs={ + "shape": [10, 10], + "dtype": fill_var.dtype, + "value": 1.0, + "place_type": 1 + }) with fluid.program_guard(main_program): op_type = "c_allreduce_sum" data = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.5) helper = LayerHelper(op_type, **locals()) - helper.append_op( - type=op_type, - inputs={'X': [data]}, - outputs={'Out': [data]}, - attrs={'ring_id': ring_id, - 'use_calc_stream': True}) + helper.append_op(type=op_type, + inputs={'X': [data]}, + outputs={'Out': [data]}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True + }) print("startup program:", startup_program) print("main program:", main_program) @@ -138,11 +134,10 @@ def train(world_endpoints, world_device_ids, local_device_ids, local_rank): main_program = main_programs[local_rank] loss = Loss(Block(main_program)) optimizer = ascend_optimizer.AscendOptimizer(None, fetch_list=[]) - optimizer.minimize( - loss, - startup_program, - auto_dp=True, - rank_table_file=os.getenv("RANK_TABLE_FILE", None)) + optimizer.minimize(loss, + startup_program, + auto_dp=True, + rank_table_file=os.getenv("RANK_TABLE_FILE", None)) exe = paddle.static.Executor(paddle.CPUPlace()) exe.run(startup_program) diff --git a/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py b/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py index e594bc5c34e..1b387c08120 100644 --- a/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py +++ b/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,16 +27,21 @@ paddle.enable_static() class TestASPHelperPruningBase(unittest.TestCase): + def setUp(self): self.main_program = fluid.Program() self.startup_program = fluid.Program() def build_model(): - img = fluid.data( - name='img', shape=[None, 3, 32, 32], dtype='float32') + img = fluid.data(name='img', + shape=[None, 3, 32, 32], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - hidden = fluid.layers.conv2d( - input=img, num_filters=4, filter_size=3, padding=2, act="relu") + hidden = fluid.layers.conv2d(input=img, + num_filters=4, + filter_size=3, + padding=2, + act="relu") hidden = fluid.layers.fc(input=hidden, size=32, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') return img, label, prediction @@ -57,8 +62,8 @@ class TestASPHelperPruningBase(unittest.TestCase): def run_training_pruning_test(self, get_mask_gen_func, get_mask_check_func): with fluid.program_guard(self.main_program, self.startup_program): loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=self.predict, label=self.label)) + fluid.layers.cross_entropy(input=self.predict, + label=self.label)) optimizer = paddle.incubate.asp.decorate( fluid.optimizer.SGD(learning_rate=0.01)) optimizer.minimize(loss, self.startup_program) @@ -74,12 +79,13 @@ class TestASPHelperPruningBase(unittest.TestCase): def __pruning_and_checking(self, exe, place, mask_func_name, check_func_name, with_mask): exe.run(self.startup_program) - paddle.incubate.asp.prune_model( - self.main_program, mask_algo=mask_func_name, with_mask=with_mask) + paddle.incubate.asp.prune_model(self.main_program, + mask_algo=mask_func_name, + with_mask=with_mask) for param in self.main_program.global_block().all_parameters(): if ASPHelper._is_supported_layer(self.main_program, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, func_name=check_func_name, n=2, m=4)) diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py b/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py index dca56076dbc..4ee7c2a99fb 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,6 +26,7 @@ from paddle.fluid.dygraph.layers import Layer, _convert_camel_to_snake class MyOwnLayer(Layer): + def __init__(self): super(MyOwnLayer, self).__init__() @@ -48,6 +49,7 @@ def my_own_pruning(tensor, m, n, mask_algo, param_name): class TestASPAddSupportedLayer(unittest.TestCase): + def test_add_supported_layer_via_name(self): sparsity.add_supported_layer("test_supported_1") sparsity.add_supported_layer("test_supported_2", my_own_pruning) @@ -67,21 +69,25 @@ class TestASPAddSupportedLayer(unittest.TestCase): class TestASPDynamicCustomerizedPruneFunc(unittest.TestCase): + def setUp(self): paddle.disable_static() class CustomerLayer(paddle.nn.Layer): + def __init__(self): super(CustomerLayer, self).__init__() - self.weight = self.create_parameter( - shape=[32, 32], attr=None, dtype='float32', is_bias=False) + self.weight = self.create_parameter(shape=[32, 32], + attr=None, + dtype='float32', + is_bias=False) self.linear1 = paddle.nn.Linear(32, 32) self.linear2 = paddle.nn.Linear(32, 10) def forward(self, input_): - hidden = paddle.nn.functional.linear( - x=input_, weight=self.weight) + hidden = paddle.nn.functional.linear(x=input_, + weight=self.weight) hidden = self.linear1(hidden) out = self.linear2(hidden) return out @@ -139,8 +145,8 @@ class TestASPDynamicCustomerizedPruneFunc(unittest.TestCase): self.assertLessEqual( np.sum(mat.flatten() - static_tensor.flatten()), 1e-4) self.assertLessEqual( - np.sum(mat_mask.flatten() - static_tensor_mask.flatten( - )), 1e-4) + np.sum(mat_mask.flatten() - + static_tensor_mask.flatten()), 1e-4) else: self.assertTrue( sparsity.check_sparsity( @@ -158,6 +164,7 @@ class TestASPDynamicCustomerizedPruneFunc(unittest.TestCase): class TestASPStaticCustomerizedPruneFunc(unittest.TestCase): + def setUp(self): paddle.enable_static() @@ -167,11 +174,15 @@ class TestASPStaticCustomerizedPruneFunc(unittest.TestCase): self.customer_prefix = "customer_layer" def build_model(): - img = fluid.data( - name='img', shape=[None, 3, 32, 32], dtype='float32') + img = fluid.data(name='img', + shape=[None, 3, 32, 32], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - hidden = fluid.layers.conv2d( - input=img, num_filters=4, filter_size=3, padding=2, act="relu") + hidden = fluid.layers.conv2d(input=img, + num_filters=4, + filter_size=3, + padding=2, + act="relu") hidden = fluid.layers.fc(input=hidden, size=32, act='relu', @@ -198,15 +209,16 @@ class TestASPStaticCustomerizedPruneFunc(unittest.TestCase): def test_inference_pruning(self): self.exe.run(self.startup_program) - sparsity.prune_model( - self.main_program, mask_algo="mask_1d", with_mask=False) + sparsity.prune_model(self.main_program, + mask_algo="mask_1d", + with_mask=False) supported_layer_count = 0 for param in self.main_program.global_block().all_parameters(): - mat = np.array(fluid.global_scope().find_var(param.name).get_tensor( - )) - if sparsity.asp.ASPHelper._is_supported_layer(self.main_program, - param.name): + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) + if sparsity.asp.ASPHelper._is_supported_layer( + self.main_program, param.name): supported_layer_count += 1 if (self.customer_prefix in param.name): self.assertLessEqual( @@ -223,33 +235,34 @@ class TestASPStaticCustomerizedPruneFunc(unittest.TestCase): def test_training_pruning(self): with fluid.program_guard(self.main_program, self.startup_program): loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=self.predict, label=self.label)) + fluid.layers.cross_entropy(input=self.predict, + label=self.label)) optimizer = sparsity.decorate( fluid.optimizer.SGD(learning_rate=0.01)) optimizer.minimize(loss, self.startup_program) self.exe.run(self.startup_program) - sparsity.prune_model( - self.main_program, mask_algo="mask_1d", with_mask=True) + sparsity.prune_model(self.main_program, + mask_algo="mask_1d", + with_mask=True) supported_layer_count = 0 for param in self.main_program.global_block().all_parameters(): - mat = np.array(fluid.global_scope().find_var(param.name).get_tensor( - )) - if sparsity.asp.ASPHelper._is_supported_layer(self.main_program, - param.name): + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) + if sparsity.asp.ASPHelper._is_supported_layer( + self.main_program, param.name): mat_mask = np.array(fluid.global_scope().find_var( - sparsity.asp.ASPHelper._get_mask_name(param.name)) - .get_tensor()) + sparsity.asp.ASPHelper._get_mask_name( + param.name)).get_tensor()) supported_layer_count += 1 if (self.customer_prefix in param.name): self.assertLessEqual( np.sum(mat.flatten() - static_tensor.flatten()), 1e-4) self.assertLessEqual( - np.sum(mat_mask.flatten() - static_tensor_mask.flatten( - )), 1e-4) + np.sum(mat_mask.flatten() - + static_tensor_mask.flatten()), 1e-4) else: self.assertTrue( sparsity.check_sparsity( diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_dynamic.py b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_dynamic.py index e127dca2251..b58fea9b779 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_dynamic.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_dynamic.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,10 +24,13 @@ import numpy as np class MyLayer(paddle.nn.Layer): + def __init__(self): super(MyLayer, self).__init__() - self.conv1 = paddle.nn.Conv2D( - in_channels=3, out_channels=2, kernel_size=3, padding=2) + self.conv1 = paddle.nn.Conv2D(in_channels=3, + out_channels=2, + kernel_size=3, + padding=2) self.linear1 = paddle.nn.Linear(1352, 32) self.linear2 = paddle.nn.Linear(32, 32) self.linear3 = paddle.nn.Linear(32, 10) @@ -42,6 +45,7 @@ class MyLayer(paddle.nn.Layer): class TestASPDynamicOptimize(unittest.TestCase): + def setUp(self): self.layer = MyLayer() @@ -106,17 +110,14 @@ class TestASPDynamicOptimize(unittest.TestCase): paddle.incubate.asp.prune_model(self.layer) - imgs = paddle.to_tensor( - np.random.randn(32, 3, 24, 24), - dtype='float32', - place=self.place, - stop_gradient=False) - labels = paddle.to_tensor( - np.random.randint( - 10, size=(32, 1)), - dtype='float32', - place=self.place, - stop_gradient=False) + imgs = paddle.to_tensor(np.random.randn(32, 3, 24, 24), + dtype='float32', + place=self.place, + stop_gradient=False) + labels = paddle.to_tensor(np.random.randint(10, size=(32, 1)), + dtype='float32', + place=self.place, + stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') @@ -131,25 +132,23 @@ class TestASPDynamicOptimize(unittest.TestCase): paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) def test_asp_training_with_amp(self): self.optimizer = paddle.incubate.asp.decorate(self.optimizer) paddle.incubate.asp.prune_model(self.layer) - imgs = paddle.to_tensor( - np.random.randn(32, 3, 24, 24), - dtype='float32', - place=self.place, - stop_gradient=False) - labels = paddle.to_tensor( - np.random.randint( - 10, size=(32, 1)), - dtype='float32', - place=self.place, - stop_gradient=False) + imgs = paddle.to_tensor(np.random.randn(32, 3, 24, 24), + dtype='float32', + place=self.place, + stop_gradient=False) + labels = paddle.to_tensor(np.random.randint(10, size=(32, 1)), + dtype='float32', + place=self.place, + stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') scaler = paddle.amp.GradScaler(init_loss_scaling=1024) @@ -167,8 +166,9 @@ class TestASPDynamicOptimize(unittest.TestCase): paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py index b51e28cdcb9..4fdfe21de01 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,16 +27,21 @@ paddle.enable_static() class TestASPStaticOptimize(unittest.TestCase): + def setUp(self): self.main_program = fluid.Program() self.startup_program = fluid.Program() def build_model(): - img = fluid.data( - name='img', shape=[None, 3, 24, 24], dtype='float32') + img = fluid.data(name='img', + shape=[None, 3, 24, 24], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - hidden = fluid.layers.conv2d( - input=img, num_filters=4, filter_size=3, padding=2, act="relu") + hidden = fluid.layers.conv2d(input=img, + num_filters=4, + filter_size=3, + padding=2, + act="relu") hidden = fluid.layers.fc(input=hidden, size=32, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') return img, label, prediction @@ -44,11 +49,11 @@ class TestASPStaticOptimize(unittest.TestCase): with fluid.program_guard(self.main_program, self.startup_program): self.img, self.label, predict = build_model() self.loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict, label=self.label)) + fluid.layers.cross_entropy(input=predict, label=self.label)) self.optimizer = fluid.optimizer.SGD(learning_rate=0.01) def test_get_not_ASP_relevant_vars(self): + def check_params(params, params_from_asp): if len(params_from_asp) != len(params): return False @@ -105,8 +110,8 @@ class TestASPStaticOptimize(unittest.TestCase): ref[i] == ASPHelper._is_supported_layer(program, name)) def test_decorate(self): - param_names = self.__get_param_names(self.main_program.global_block() - .all_parameters()) + param_names = self.__get_param_names( + self.main_program.global_block().all_parameters()) with fluid.program_guard(self.main_program, self.startup_program): self.optimizer = paddle.incubate.asp.decorate(self.optimizer) self.optimizer.minimize(self.loss, self.startup_program) @@ -130,17 +135,18 @@ class TestASPStaticOptimize(unittest.TestCase): exe.run(self.startup_program) paddle.incubate.asp.prune_model(self.main_program) - data = (np.random.randn(32, 3, 24, 24), np.random.randint( - 10, size=(32, 1))) + data = (np.random.randn(32, 3, 24, + 24), np.random.randint(10, size=(32, 1))) exe.run(self.main_program, feed=feeder.feed([data])) for param in self.main_program.global_block().all_parameters(): if ASPHelper._is_supported_layer(self.main_program, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) def test_asp_training_with_amp(self): if core.is_compiled_with_cuda(): @@ -152,23 +158,24 @@ class TestASPStaticOptimize(unittest.TestCase): self.optimizer.minimize(self.loss, self.startup_program) exe = fluid.Executor(place) - feeder = fluid.DataFeeder( - feed_list=[self.img, self.label], place=place) + feeder = fluid.DataFeeder(feed_list=[self.img, self.label], + place=place) exe.run(self.startup_program) paddle.incubate.asp.prune_model(self.main_program) - data = (np.random.randn(32, 3, 24, 24), np.random.randint( - 10, size=(32, 1))) + data = (np.random.randn(32, 3, 24, + 24), np.random.randint(10, size=(32, 1))) exe.run(self.main_program, feed=feeder.feed([data])) for param in self.main_program.global_block().all_parameters(): if ASPHelper._is_supported_layer(self.main_program, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) def __get_param_names(self, params): param_names = [] diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_dynamic.py b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_dynamic.py index b0fad0b6400..fd592785a28 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_dynamic.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_dynamic.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,10 +24,13 @@ from paddle.fluid.contrib.sparsity.asp import ASPHelper class MyLayer(paddle.nn.Layer): + def __init__(self): super(MyLayer, self).__init__() - self.conv1 = paddle.nn.Conv2D( - in_channels=3, out_channels=2, kernel_size=3, padding=2) + self.conv1 = paddle.nn.Conv2D(in_channels=3, + out_channels=2, + kernel_size=3, + padding=2) self.linear1 = paddle.nn.Linear(1352, 32) self.linear2 = paddle.nn.Linear(32, 10) @@ -40,6 +43,7 @@ class MyLayer(paddle.nn.Layer): class TestASPDynamicPruningBase(unittest.TestCase): + def setUp(self): self.layer = MyLayer() @@ -47,12 +51,12 @@ class TestASPDynamicPruningBase(unittest.TestCase): if core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) - self.img = paddle.to_tensor( - np.random.uniform( - low=-0.5, high=0.5, size=(32, 3, 24, 24)), - dtype=np.float32, - place=place, - stop_gradient=False) + self.img = paddle.to_tensor(np.random.uniform(low=-0.5, + high=0.5, + size=(32, 3, 24, 24)), + dtype=np.float32, + place=place, + stop_gradient=False) self.set_config() @@ -73,8 +77,9 @@ class TestASPDynamicPruningBase(unittest.TestCase): def __pruning_and_checking(self, with_mask): - paddle.incubate.asp.prune_model( - self.layer, mask_algo=self.mask_gen_func, with_mask=with_mask) + paddle.incubate.asp.prune_model(self.layer, + mask_algo=self.mask_gen_func, + with_mask=with_mask) for param in self.layer.parameters(): if ASPHelper._is_supported_layer( @@ -86,18 +91,21 @@ class TestASPDynamicPruningBase(unittest.TestCase): class TestASPDynamicPruning1D(TestASPDynamicPruningBase): + def set_config(self): self.mask_gen_func = 'mask_1d' self.mask_check_func = paddle.fluid.contrib.sparsity.CheckMethod.CHECK_1D class TestASPDynamicPruning2DBest(TestASPDynamicPruningBase): + def set_config(self): self.mask_gen_func = 'mask_2d_best' self.mask_check_func = paddle.fluid.contrib.sparsity.CheckMethod.CHECK_2D class TestASPDynamicPruning2DGreedy(TestASPDynamicPruningBase): + def set_config(self): self.mask_gen_func = 'mask_2d_greedy' self.mask_check_func = paddle.fluid.contrib.sparsity.CheckMethod.CHECK_2D diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py index a9986f24b02..6f137e086eb 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,16 +27,21 @@ paddle.enable_static() class TestASPStaticPruningBase(unittest.TestCase): + def setUp(self): self.main_program = fluid.Program() self.startup_program = fluid.Program() def build_model(): - img = fluid.data( - name='img', shape=[None, 3, 24, 24], dtype='float32') + img = fluid.data(name='img', + shape=[None, 3, 24, 24], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - hidden = fluid.layers.conv2d( - input=img, num_filters=2, filter_size=3, padding=2, act="relu") + hidden = fluid.layers.conv2d(input=img, + num_filters=2, + filter_size=3, + padding=2, + act="relu") hidden = fluid.layers.fc(input=hidden, size=32, act='softmax') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') return img, label, prediction @@ -61,8 +66,8 @@ class TestASPStaticPruningBase(unittest.TestCase): def test_training_pruning(self): with fluid.program_guard(self.main_program, self.startup_program): loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=self.predict, label=self.label)) + fluid.layers.cross_entropy(input=self.predict, + label=self.label)) optimizer = paddle.incubate.asp.decorate( fluid.optimizer.SGD(learning_rate=0.01)) optimizer.minimize(loss, self.startup_program) @@ -76,32 +81,34 @@ class TestASPStaticPruningBase(unittest.TestCase): def __pruning_and_checking(self, exe, place, with_mask): exe.run(self.startup_program) - paddle.incubate.asp.prune_model( - self.main_program, - mask_algo=self.mask_gen_func, - with_mask=with_mask) + paddle.incubate.asp.prune_model(self.main_program, + mask_algo=self.mask_gen_func, + with_mask=with_mask) for param in self.main_program.global_block().all_parameters(): if ASPHelper._is_supported_layer(self.main_program, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, func_name=self.mask_check_func, n=2, m=4)) class TestASPStaticPruning1D(TestASPStaticPruningBase): + def set_config(self): self.mask_gen_func = 'mask_1d' self.mask_check_func = paddle.fluid.contrib.sparsity.CheckMethod.CHECK_1D class TestASPStaticPruning2DBest(TestASPStaticPruningBase): + def set_config(self): self.mask_gen_func = 'mask_2d_best' self.mask_check_func = paddle.fluid.contrib.sparsity.CheckMethod.CHECK_2D class TestASPStaticPruning2DGreedy(TestASPStaticPruningBase): + def set_config(self): self.mask_gen_func = 'mask_2d_greedy' self.mask_check_func = paddle.fluid.contrib.sparsity.CheckMethod.CHECK_2D diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py b/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py index 653cbbf8409..710bbcc6582 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py @@ -1,12 +1,12 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,10 +24,13 @@ import numpy as np class MyLayer(paddle.nn.Layer): + def __init__(self): super(MyLayer, self).__init__() - self.conv1 = paddle.nn.Conv2D( - in_channels=3, out_channels=4, kernel_size=3, padding=2) + self.conv1 = paddle.nn.Conv2D(in_channels=3, + out_channels=4, + kernel_size=3, + padding=2) self.linear1 = paddle.nn.Linear(4624, 32) self.linear2 = paddle.nn.Linear(32, 32) self.linear3 = paddle.nn.Linear(32, 10) @@ -42,6 +45,7 @@ class MyLayer(paddle.nn.Layer): class TestASPDynamicOptimize(unittest.TestCase): + def setUp(self): paddle.disable_static() @@ -69,8 +73,7 @@ class TestASPDynamicOptimize(unittest.TestCase): for param_name in asp_info.mask_vars: mask = asp_info.mask_vars[param_name] asp_info.update_mask_vars( - param_name, paddle.ones( - shape=mask.shape, dtype=mask.dtype)) + param_name, paddle.ones(shape=mask.shape, dtype=mask.dtype)) asp_info.update_masks(param_name, np.ones(shape=mask.shape)) net_state_dict = paddle.load(net_path) @@ -79,17 +82,14 @@ class TestASPDynamicOptimize(unittest.TestCase): self.layer.set_state_dict(net_state_dict) self.optimizer.set_state_dict(opt_state_dict) - imgs = paddle.to_tensor( - np.random.randn(64, 3, 32, 32), - dtype='float32', - place=self.place, - stop_gradient=False) - labels = paddle.to_tensor( - np.random.randint( - 10, size=(64, 1)), - dtype='float32', - place=self.place, - stop_gradient=False) + imgs = paddle.to_tensor(np.random.randn(64, 3, 32, 32), + dtype='float32', + place=self.place, + stop_gradient=False) + labels = paddle.to_tensor(np.random.randint(10, size=(64, 1)), + dtype='float32', + place=self.place, + stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') @@ -104,11 +104,13 @@ class TestASPDynamicOptimize(unittest.TestCase): paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) class TestASPStaticOptimize(unittest.TestCase): + def setUp(self): paddle.enable_static() @@ -116,11 +118,15 @@ class TestASPStaticOptimize(unittest.TestCase): self.startup_program = fluid.Program() def build_model(): - img = fluid.data( - name='img', shape=[None, 3, 32, 32], dtype='float32') + img = fluid.data(name='img', + shape=[None, 3, 32, 32], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - hidden = fluid.layers.conv2d( - input=img, num_filters=4, filter_size=3, padding=2, act="relu") + hidden = fluid.layers.conv2d(input=img, + num_filters=4, + filter_size=3, + padding=2, + act="relu") hidden = fluid.layers.fc(input=hidden, size=32, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') return img, label, prediction @@ -128,8 +134,7 @@ class TestASPStaticOptimize(unittest.TestCase): with fluid.program_guard(self.main_program, self.startup_program): self.img, self.label, predict = build_model() self.loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict, label=self.label)) + fluid.layers.cross_entropy(input=predict, label=self.label)) self.optimizer = fluid.optimizer.SGD(learning_rate=0.01) self.optimizer = paddle.incubate.asp.decorate(self.optimizer) self.optimizer.minimize(self.loss, self.startup_program) @@ -155,20 +160,21 @@ class TestASPStaticOptimize(unittest.TestCase): state_dict = paddle.load(param_path) prog.set_state_dict(state_dict) - feeder = fluid.DataFeeder( - feed_list=[self.img, self.label], place=self.place) + feeder = fluid.DataFeeder(feed_list=[self.img, self.label], + place=self.place) - data = (np.random.randn(64, 3, 32, 32), np.random.randint( - 10, size=(64, 1))) + data = (np.random.randn(64, 3, 32, + 32), np.random.randint(10, size=(64, 1))) self.exe.run(prog, feed=feeder.feed([data])) for param in prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(prog, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/asp/test_asp_utils.py b/python/paddle/fluid/tests/unittests/asp/test_asp_utils.py index 67ec54367d3..a65721aa0be 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_asp_utils.py +++ b/python/paddle/fluid/tests/unittests/asp/test_asp_utils.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import numpy as np class TestASPUtils(unittest.TestCase): + def test_get_check_method(self): self.assertEqual( paddle.fluid.contrib.sparsity.CheckMethod.get_checking_method( @@ -59,13 +60,13 @@ class TestASPUtils(unittest.TestCase): for _ in range(10): x = np.random.randint(10, size=(5, 5)) x = paddle.fluid.contrib.sparsity.get_mask_1d(x, 2, 4) - self.assertTrue( - paddle.fluid.contrib.sparsity.check_mask_1d(x, 2, 4)) + self.assertTrue(paddle.fluid.contrib.sparsity.check_mask_1d( + x, 2, 4)) x = np.random.randn(5, 4) x = paddle.fluid.contrib.sparsity.get_mask_1d(x, 2, 4) - self.assertTrue( - paddle.fluid.contrib.sparsity.check_mask_1d(x, 2, 4)) + self.assertTrue(paddle.fluid.contrib.sparsity.check_mask_1d( + x, 2, 4)) def test_check_mask_2d(self): x = np.array([[1.0, 0.0, 0.0, 1.0, 1.0], [0.0, 1.0, 0.0, 0.0, 0.0], @@ -82,27 +83,28 @@ class TestASPUtils(unittest.TestCase): for _ in range(10): x = np.random.randint(10, size=(5, 5)) x = paddle.fluid.contrib.sparsity.get_mask_2d_greedy(x, 2, 4) - self.assertTrue( - paddle.fluid.contrib.sparsity.check_mask_2d(x, 2, 4)) + self.assertTrue(paddle.fluid.contrib.sparsity.check_mask_2d( + x, 2, 4)) x = np.random.randn(5, 4) x = paddle.fluid.contrib.sparsity.get_mask_2d_greedy(x, 2, 4) - self.assertTrue( - paddle.fluid.contrib.sparsity.check_mask_2d(x, 2, 4)) + self.assertTrue(paddle.fluid.contrib.sparsity.check_mask_2d( + x, 2, 4)) def test_get_mask_2d_best(self): for _ in range(10): x = np.random.randint(10, size=(5, 5)) x = paddle.fluid.contrib.sparsity.get_mask_2d_best(x, 2, 4) - self.assertTrue( - paddle.fluid.contrib.sparsity.check_mask_2d(x, 2, 4)) + self.assertTrue(paddle.fluid.contrib.sparsity.check_mask_2d( + x, 2, 4)) x = np.random.randn(5, 4) x = paddle.fluid.contrib.sparsity.get_mask_2d_best(x, 2, 4) - self.assertTrue( - paddle.fluid.contrib.sparsity.check_mask_2d(x, 2, 4)) + self.assertTrue(paddle.fluid.contrib.sparsity.check_mask_2d( + x, 2, 4)) def test_threadsafe_valid_2d_patterns(self): + def get_reference(m=4, n=2): from itertools import permutations @@ -112,8 +114,8 @@ class TestASPUtils(unittest.TestCase): patterns = patterns + patterns patterns = np.asarray(list(set(permutations(patterns, m)))) - valid = ((patterns.sum(axis=1) <= n).sum(axis=1) == m - ).nonzero()[0].reshape(-1) + valid = ((patterns.sum(axis=1) <= n).sum( + axis=1) == m).nonzero()[0].reshape(-1) valid_patterns = np.empty((valid.shape[0], m, m)) valid_patterns[:] = patterns[valid[:]] return valid_patterns @@ -131,8 +133,8 @@ class TestASPUtils(unittest.TestCase): self.assertTrue(reference_key in patterns_map) self.assertTrue(len(patterns_map) == 1) - self.assertTrue((reference_patterns == patterns_map[reference_key]).all( - )) + self.assertTrue( + (reference_patterns == patterns_map[reference_key]).all()) def test_check_sparsity(self): for _ in range(10): @@ -173,16 +175,14 @@ class TestASPUtils(unittest.TestCase): mask, func_name=paddle.fluid.contrib.sparsity.CheckMethod.CHECK_1D, n=2, - m=4), - paddle.fluid.contrib.sparsity.check_mask_1d(mask, 2, 4)) + m=4), paddle.fluid.contrib.sparsity.check_mask_1d(mask, 2, 4)) mask = paddle.fluid.contrib.sparsity.get_mask_2d_best(x_2d, 2, 4) self.assertEqual( paddle.fluid.contrib.sparsity.check_sparsity( mask, func_name=paddle.fluid.contrib.sparsity.CheckMethod.CHECK_2D, n=2, - m=4), - paddle.fluid.contrib.sparsity.check_mask_2d(mask, 2, 4)) + m=4), paddle.fluid.contrib.sparsity.check_mask_2d(mask, 2, 4)) def __test_1D_2D_sparse_mask_generation_methods(self, x): mask = paddle.fluid.contrib.sparsity.create_mask( diff --git a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_dynamic.py b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_dynamic.py index 3ced15bf158..7aaf1fd33a9 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_dynamic.py +++ b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_dynamic.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core import os from paddle.fluid.contrib.sparsity.asp import ASPHelper import numpy as np + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') if cuda_visible_devices is None or cuda_visible_devices == "": os.environ['CUDA_VISIBLE_DEVICES'] = '0' @@ -30,6 +31,7 @@ else: class MyLayer(paddle.nn.Layer): + def __init__(self): super(MyLayer, self).__init__() self.linear1 = paddle.nn.Linear(32, 32) @@ -42,6 +44,7 @@ class MyLayer(paddle.nn.Layer): class TestFleetWithASPDynamic(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" @@ -66,17 +69,14 @@ class TestFleetWithASPDynamic(unittest.TestCase): self.optimizer = fleet.distributed_optimizer(self.optimizer) self.layer = fleet.distributed_model(self.layer) - imgs = paddle.to_tensor( - np.random.randn(64, 32), - dtype='float32', - place=self.place, - stop_gradient=False) - labels = paddle.to_tensor( - np.random.randint( - 10, size=(64, 1)), - dtype='float32', - place=self.place, - stop_gradient=False) + imgs = paddle.to_tensor(np.random.randn(64, 32), + dtype='float32', + place=self.place, + stop_gradient=False) + labels = paddle.to_tensor(np.random.randint(10, size=(64, 1)), + dtype='float32', + place=self.place, + stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') @@ -91,11 +91,13 @@ class TestFleetWithASPDynamic(unittest.TestCase): paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) class TestFleetWithASPAMPDynamic(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" @@ -120,17 +122,14 @@ class TestFleetWithASPAMPDynamic(unittest.TestCase): self.optimizer = fleet.distributed_optimizer(self.optimizer) self.layer = fleet.distributed_model(self.layer) - imgs = paddle.to_tensor( - np.random.randn(64, 32), - dtype='float32', - place=self.place, - stop_gradient=False) - labels = paddle.to_tensor( - np.random.randint( - 10, size=(64, 1)), - dtype='float32', - place=self.place, - stop_gradient=False) + imgs = paddle.to_tensor(np.random.randn(64, 32), + dtype='float32', + place=self.place, + stop_gradient=False) + labels = paddle.to_tensor(np.random.randint(10, size=(64, 1)), + dtype='float32', + place=self.place, + stop_gradient=False) loss_fn = paddle.nn.MSELoss(reduction='mean') scaler = paddle.amp.GradScaler(init_loss_scaling=1024) @@ -148,8 +147,9 @@ class TestFleetWithASPAMPDynamic(unittest.TestCase): paddle.static.default_main_program(), param.name): mat = param.numpy() self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py index d9ddd6c88d7..1feb3e28c13 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py +++ b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_sharding.py @@ -23,6 +23,7 @@ import os from paddle.static import sparsity from paddle.fluid.contrib.sparsity.asp import ASPHelper import numpy as np + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') if cuda_visible_devices is None or cuda_visible_devices == "": os.environ['CUDA_VISIBLE_DEVICES'] = '0' @@ -33,6 +34,7 @@ paddle.enable_static() class TestFleetWithASPSharding(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" @@ -49,8 +51,9 @@ class TestFleetWithASPSharding(unittest.TestCase): def net(self, main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog): - input_x = paddle.static.data( - name="x", shape=[-1, 32], dtype='float32') + input_x = paddle.static.data(name="x", + shape=[-1, 32], + dtype='float32') input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') @@ -84,8 +87,8 @@ class TestFleetWithASPSharding(unittest.TestCase): with fluid.program_guard(train_prog, startup_prog): optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) if paddle.fluid.is_compiled_with_cuda(): @@ -105,11 +108,12 @@ class TestFleetWithASPSharding(unittest.TestCase): for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py index 2023c005140..23110bb7ff7 100644 --- a/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py +++ b/python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp_static.py @@ -23,6 +23,7 @@ import os from paddle.static import sparsity from paddle.fluid.contrib.sparsity.asp import ASPHelper import numpy as np + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') if cuda_visible_devices is None or cuda_visible_devices == "": os.environ['CUDA_VISIBLE_DEVICES'] = '0' @@ -33,6 +34,7 @@ paddle.enable_static() class TestFleetWithASPStatic(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" @@ -41,8 +43,9 @@ class TestFleetWithASPStatic(unittest.TestCase): def net(self, main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog): - input_x = paddle.static.data( - name="x", shape=[-1, 32], dtype='float32') + input_x = paddle.static.data(name="x", + shape=[-1, 32], + dtype='float32') input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') @@ -62,12 +65,12 @@ class TestFleetWithASPStatic(unittest.TestCase): with fluid.program_guard(train_prog, startup_prog): optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) - place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if paddle.fluid.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place) @@ -80,14 +83,16 @@ class TestFleetWithASPStatic(unittest.TestCase): for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) class TestFleetWithASPAMPStatic(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" @@ -96,8 +101,9 @@ class TestFleetWithASPAMPStatic(unittest.TestCase): def net(self, main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog): - input_x = paddle.static.data( - name="x", shape=[-1, 32], dtype='float32') + input_x = paddle.static.data(name="x", + shape=[-1, 32], + dtype='float32') input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') @@ -118,12 +124,12 @@ class TestFleetWithASPAMPStatic(unittest.TestCase): with fluid.program_guard(train_prog, startup_prog): optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) - place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if paddle.fluid.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place) @@ -138,11 +144,12 @@ class TestFleetWithASPAMPStatic(unittest.TestCase): for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) def test_with_asp_and_pure_fp16(self): fleet.init(is_collective=True) @@ -158,12 +165,12 @@ class TestFleetWithASPAMPStatic(unittest.TestCase): with paddle.static.amp.fp16_guard(): optimizer = optimizer = paddle.optimizer.Momentum( learning_rate=0.01, multi_precision=True) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) - place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if paddle.fluid.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place) @@ -178,11 +185,12 @@ class TestFleetWithASPAMPStatic(unittest.TestCase): for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): - mat = np.array(fluid.global_scope().find_var(param.name) - .get_tensor()) + mat = np.array(fluid.global_scope().find_var( + param.name).get_tensor()) self.assertTrue( - paddle.fluid.contrib.sparsity.check_sparsity( - mat.T, n=2, m=4)) + paddle.fluid.contrib.sparsity.check_sparsity(mat.T, + n=2, + m=4)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py index 2464882d617..47db2793dc0 100644 --- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py +++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py @@ -54,6 +54,7 @@ def get_random_images_and_labels(image_shape, label_shape): def sample_list_generator_creator(): + def __reader__(): for _ in range(BATCH_NUM): sample_list = [] @@ -67,19 +68,21 @@ def sample_list_generator_creator(): class AutoCheckpointBase(unittest.TestCase): + def _init_env(self, exe, main_prog, startup_prog, minimize=True, iterable=True): + def simple_net(): image = fluid.data(name='image', shape=[-1, 4, 4], dtype='float32') label = fluid.data(name='label', shape=[-1, 1], dtype='int64') fc_tmp = fluid.layers.fc(image, size=CLASS_NUM) - cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp, - label) + cross_entropy = fluid.layers.softmax_with_cross_entropy( + fc_tmp, label) loss = fluid.layers.reduce_mean(cross_entropy) sgd = fluid.optimizer.SGD(learning_rate=1e-3) if minimize: diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py index 8e5221ed5ff..d459ffd6d68 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py @@ -45,6 +45,7 @@ def get_random_inputs_and_labels(input_shape, label_shape): def batch_generator_creator(): + def __reader__(): for _ in range(batch_size): batch_input, batch_label = get_random_inputs_and_labels( @@ -56,6 +57,7 @@ def batch_generator_creator(): class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -64,14 +66,18 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train") @@ -90,32 +96,31 @@ class MLPLayer(nn.Layer): def mlp_pretrain_forward(train_program, start_program): with static.program_guard(train_program, start_program), utils.unique_name.guard(): - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - label = static.data( - name="label", shape=[batch_size, sequence_len, 1], dtype='float32') - - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mappig": [-1, -1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, sequence_len, 1], + dtype='float32') + + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mappig": [-1, -1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) loss = paddle.mean(error_cost) - loader = paddle.io.DataLoader.from_generator( - feed_list=[input, label], capacity=4 * batch_size, iterable=True) + loader = paddle.io.DataLoader.from_generator(feed_list=[input, label], + capacity=4 * batch_size, + iterable=True) return loss, train_program, start_program, loader @@ -138,12 +143,11 @@ def train(): loss, train_program, start_program, loader = mlp_pretrain_forward( train_program, start_program) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py index 014a8048364..6bd48fb1963 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py @@ -22,6 +22,7 @@ import numpy as np import paddle.distributed.auto_parallel as auto from auto_parallel_relaunch_model import mlp_pretrain_forward from auto_parallel_relaunch_model import batch_generator_creator + sys.path.append("..") import auto_parallel_gpt_model as modeling from auto_parallel_gpt_model import GPTModel, GPTForPretraining, GPTPretrainingCriterion @@ -31,41 +32,44 @@ def get_gpt_model(train_program, start_program, place, batch_size, sequence_len, vocab_size): modeling.init_global() with static.program_guard(train_program, start_program): - tokens = paddle.static.data( - name="tokens", shape=[batch_size, sequence_len], dtype='int64') - position_ids = paddle.static.data( - name="position_ids", - shape=[batch_size, sequence_len], - dtype='int64') + tokens = paddle.static.data(name="tokens", + shape=[batch_size, sequence_len], + dtype='int64') + position_ids = paddle.static.data(name="position_ids", + shape=[batch_size, sequence_len], + dtype='int64') attention_mask = paddle.static.data( name="attention_mask", shape=[batch_size, 1, sequence_len, sequence_len], dtype='float32') - labels = paddle.static.data( - name="labels", shape=[batch_size, sequence_len], dtype='int64') - loss_mask = paddle.static.data( - name="loss_mask", shape=[batch_size, sequence_len], dtype='float32') + labels = paddle.static.data(name="labels", + shape=[batch_size, sequence_len], + dtype='int64') + loss_mask = paddle.static.data(name="loss_mask", + shape=[batch_size, sequence_len], + dtype='float32') data_holder = [tokens, position_ids, attention_mask, labels, loss_mask] - gpt = GPTModel( - vocab_size=1000, - hidden_size=64, - num_hidden_layers=2, - num_attention_heads=8, - intermediate_size=256, - hidden_act="gelu", - hidden_dropout_prob=0.0, - attention_probs_dropout_prob=0.0, - max_position_embeddings=1024, - type_vocab_size=1, - initializer_range=0.02, - pad_token_id=0, - eos_token_id=7, - bos_token_id=0, - eol_token_id=3) - - model = GPTForPretraining( - gpt, vocab_size=1000, hidden_size=64, initializer_range=0.02) + gpt = GPTModel(vocab_size=1000, + hidden_size=64, + num_hidden_layers=2, + num_attention_heads=8, + intermediate_size=256, + hidden_act="gelu", + hidden_dropout_prob=0.0, + attention_probs_dropout_prob=0.0, + max_position_embeddings=1024, + type_vocab_size=1, + initializer_range=0.02, + pad_token_id=0, + eos_token_id=7, + bos_token_id=0, + eol_token_id=3) + + model = GPTForPretraining(gpt, + vocab_size=1000, + hidden_size=64, + initializer_range=0.02) preds = model(tokens, position_ids, attention_mask) criterion = GPTPretrainingCriterion() loss = criterion(preds, labels, loss_mask) @@ -105,12 +109,11 @@ def train(): train_program, start_program, place, batch_size, sequence_len, vocab_size) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( loss, start_program) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_planner.py index a93663cb95e..20d45e32b7a 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_planner.py @@ -29,12 +29,11 @@ def train(): loss, train_program, start_program, loader = mlp_pretrain_forward( train_program, start_program) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py index 23bab5ffa29..e6a730f0a64 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py @@ -47,6 +47,7 @@ paddle.seed(44) class MyDataset(Dataset): + def __init__(self, num_samples): super(MyDataset, self).__init__() self.num_samples = num_samples @@ -61,6 +62,7 @@ class MyDataset(Dataset): class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -69,43 +71,45 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train") def forward(self, input): - out = auto.shard_op( - self.norm, dist_attr={"process_mesh": PP_MESH_0})(input)[0] + out = auto.shard_op(self.norm, dist_attr={"process_mesh": + PP_MESH_0})(input)[0] out = self.linear0(input) out = F.gelu(out, approximate=True) - out = auto.shard_op( - self.linear1, dist_attr={"process_mesh": PP_MESH_1})(out)[0] + out = auto.shard_op(self.linear1, dist_attr={"process_mesh": + PP_MESH_1})(out)[0] out = self.dropout(out) out = self.linear2(out) return out def train(): - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) loss = paddle.nn.CrossEntropyLoss() - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) inputs_spec = InputSpec([batch_size, hidden_size], 'float32', 'x') labels_spec = InputSpec([batch_size], 'int64', 'label') @@ -119,11 +123,10 @@ def train(): fleet.init(is_collective=True, strategy=dist_strategy) # init engine - engine = Engine( - mlp, - inputs_spec=inputs_spec, - labels_spec=labels_spec, - strategy=dist_strategy) + engine = Engine(mlp, + inputs_spec=inputs_spec, + labels_spec=labels_spec, + strategy=dist_strategy) engine.prepare(optimizer, loss, metrics=paddle.metric.Accuracy()) # train diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py b/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py index cc0acae2fb1..1de44e91a78 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py @@ -28,6 +28,7 @@ paddle.seed(1234) class FCNet: + def __init__(self, num_ins, num_outs, num_layers, hidden_size): self.num_ins = num_ins self.num_outs = num_outs @@ -48,10 +49,12 @@ class FCNet: lsize = self.hidden_size rsize = self.hidden_size - w = paddle.static.create_parameter( - shape=[lsize, rsize], dtype="float32", is_bias=False) - b = paddle.static.create_parameter( - shape=[rsize], dtype="float32", is_bias=True) + w = paddle.static.create_parameter(shape=[lsize, rsize], + dtype="float32", + is_bias=False) + b = paddle.static.create_parameter(shape=[rsize], + dtype="float32", + is_bias=True) self.weights.append(w) self.biases.append(b) @@ -65,13 +68,13 @@ class FCNet: class LaplaceModel(paddle.nn.Layer): + def __init__(self, num_ins=2, num_outs=1, num_layers=5, hidden_size=20): super(LaplaceModel, self).__init__() - self.net = FCNet( - num_ins=num_ins, - num_outs=num_outs, - num_layers=num_layers, - hidden_size=hidden_size) + self.net = FCNet(num_ins=num_ins, + num_outs=num_outs, + num_layers=num_layers, + hidden_size=hidden_size) def forward(self, inputs, bc_index): inputs.stop_gradient = False @@ -85,6 +88,7 @@ class LaplaceModel(paddle.nn.Layer): class LaplaceDataset: + def __init__(self, num_sample): self.num_sample = num_sample @@ -127,7 +131,8 @@ def main(): # spec inputs_spec = [ - InputSpec([100, 2], 'float32', 'x'), InputSpec([36], 'int64', 'bc_idx') + InputSpec([100, 2], 'float32', 'x'), + InputSpec([36], 'int64', 'bc_idx') ] labels_spec = InputSpec([36, 1], 'float32', 'bc_v') @@ -135,11 +140,10 @@ def main(): dist_strategy.semi_auto = True fleet.init(is_collective=True, strategy=dist_strategy) - engine = Engine( - laplace, - inputs_spec=inputs_spec, - labels_spec=labels_spec, - strategy=dist_strategy) + engine = Engine(laplace, + inputs_spec=inputs_spec, + labels_spec=labels_spec, + strategy=dist_strategy) engine.prepare(optimizer=optimizer, loss=loss_func) res = engine.fit(train_dataset, batch_size=None) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/launch.py b/python/paddle/fluid/tests/unittests/auto_parallel/launch.py index c225fe85cd8..ee9ff484523 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/launch.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/launch.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py index 321b2622862..4ff72173382 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py @@ -79,6 +79,7 @@ cluster_json = """ class TestAutoParallelReLaunch(unittest.TestCase): + def test_relaunch(self): file_dir = os.path.dirname(os.path.abspath(__file__)) cluster_json_path = os.path.join(file_dir, "auto_parallel_cluster.json") @@ -105,8 +106,8 @@ class TestAutoParallelReLaunch(unittest.TestCase): # Remove unnecessary files if os.path.exists(cluster_json_path): os.remove(cluster_json_path) - rank_mapping_json_path = os.path.join(file_dir, - "auto_parallel_rank_mapping.json") + rank_mapping_json_path = os.path.join( + file_dir, "auto_parallel_rank_mapping.json") if os.path.exists(rank_mapping_json_path): os.remove(rank_mapping_json_path) log_path = os.path.join(file_dir, "log") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster.py index dc22263b520..5b6f898d5b7 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_cluster.py @@ -1967,6 +1967,7 @@ multi_cluster_json = """{ class TestCluster(unittest.TestCase): + def test_single_machine(self): # Build cluster file_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_comm_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_comm_cost.py index 898408becac..0d3f193e8bc 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_comm_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_comm_cost.py @@ -31,6 +31,7 @@ from test_cluster import cluster_json, multi_cluster_json class TestCommOpCost(unittest.TestCase): + def test_comm_cost(self): # Build cluster file_dir = os.path.dirname(os.path.abspath(__file__)) @@ -47,9 +48,10 @@ class TestCommOpCost(unittest.TestCase): comm_context = CommContext(cluster) # Check AllreduceSumCost 128MB ring cost - allreduce_sum_op_desc = build_comm_desc( - "c_allreduce_sum", [0, 1, 2, 3, 4, 5, 6, 7], paddle.float32, - [1, 32 * (10**6)]) + allreduce_sum_op_desc = build_comm_desc("c_allreduce_sum", + [0, 1, 2, 3, 4, 5, 6, 7], + paddle.float32, + [1, 32 * (10**6)]) allreduce_sum_op_cost = AllreduceSumOpCost( op_desc=allreduce_sum_op_desc, comm_context=comm_context) @@ -57,37 +59,37 @@ class TestCommOpCost(unittest.TestCase): allgather_op_desc = build_comm_desc("c_allgather", [0, 1, 2, 3, 4, 5, 6, 7], paddle.float32, [1, 32 * (10**6)]) - allgather_op_cost = AllgatherOpCost( - op_desc=allgather_op_desc, comm_context=comm_context) + allgather_op_cost = AllgatherOpCost(op_desc=allgather_op_desc, + comm_context=comm_context) self.assertTrue(allgather_op_cost.time > 0) # Check BroadcastOpCost cost broadcast_op_desc = build_comm_desc("c_broadcast", [0, 1, 2, 3, 4, 5, 6, 7], paddle.float32, [1, 32 * (10**6)]) - broadcast_op_cost = BroadcastOpCost( - op_desc=broadcast_op_desc, comm_context=comm_context) + broadcast_op_cost = BroadcastOpCost(op_desc=broadcast_op_desc, + comm_context=comm_context) self.assertTrue(broadcast_op_cost.time > 0) # Check SendOpCost cost send_op_desc = build_comm_desc("send_v2", [0, 1], paddle.float32, [1, 32 * (10**6)]) - send_op_cost = SendOpCost( - op_desc=send_op_desc, comm_context=comm_context) + send_op_cost = SendOpCost(op_desc=send_op_desc, + comm_context=comm_context) self.assertTrue(send_op_cost.time > 0) # Check RecvOpCost cost recv_op_desc = build_comm_desc("recv_v2", [0, 1], paddle.float32, [1, 32 * (10**6)]) - recv_op_cost = RecvOpCost( - op_desc=recv_op_desc, comm_context=comm_context) + recv_op_cost = RecvOpCost(op_desc=recv_op_desc, + comm_context=comm_context) self.assertTrue(recv_op_cost.time > 0) # Check IdentityOpCost cost identity_op_desc = build_comm_desc("c_identity", [0, 1], paddle.float32, [1, 32 * (10**6)]) - identity_op_cost = IdentityOpCost( - op_desc=identity_op_desc, comm_context=comm_context) + identity_op_cost = IdentityOpCost(op_desc=identity_op_desc, + comm_context=comm_context) self.assertTrue(identity_op_cost.time >= 0) # Remove unnecessary files @@ -122,8 +124,8 @@ class TestCommOpCost(unittest.TestCase): "c_allgather", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], paddle.float32, [1, 32 * (10**6)]) - allgather_op_cost = AllgatherOpCost( - op_desc=allgather_op_desc, comm_context=comm_context) + allgather_op_cost = AllgatherOpCost(op_desc=allgather_op_desc, + comm_context=comm_context) self.assertTrue(allgather_op_cost.time > 0) # Check BroadcastOpCost cost @@ -131,22 +133,22 @@ class TestCommOpCost(unittest.TestCase): "c_broadcast", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], paddle.float32, [1, 32 * (10**6)]) - broadcast_op_cost = BroadcastOpCost( - op_desc=broadcast_op_desc, comm_context=comm_context) + broadcast_op_cost = BroadcastOpCost(op_desc=broadcast_op_desc, + comm_context=comm_context) self.assertTrue(broadcast_op_cost.time > 0) # Check SendOpCost cost send_op_desc = build_comm_desc("send_v2", [0, 1], paddle.float32, [1, 32 * (10**6)]) - send_op_cost = SendOpCost( - op_desc=send_op_desc, comm_context=comm_context) + send_op_cost = SendOpCost(op_desc=send_op_desc, + comm_context=comm_context) self.assertTrue(send_op_cost.time > 0) # Check RecvOpCost cost recv_op_desc = build_comm_desc("recv_v2", [0, 1], paddle.float32, [1, 32 * (10**6)]) - recv_op_cost = RecvOpCost( - op_desc=recv_op_desc, comm_context=comm_context) + recv_op_cost = RecvOpCost(op_desc=recv_op_desc, + comm_context=comm_context) self.assertTrue(recv_op_cost.time > 0) # Remove unnecessary files diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py index af7a44b5aaa..8472354826d 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_comp_cost.py @@ -88,6 +88,7 @@ from test_cluster import cluster_json class TestCompOpCost(unittest.TestCase): + def test_comp_cost(self): # Build cluster file_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py index fbadbb7d8c1..22abd6d7995 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_converter.py @@ -22,6 +22,7 @@ from paddle.distributed.auto_parallel.converter import Converter class TestConverter(unittest.TestCase): + def test_converter(self): file_dir = os.path.dirname(os.path.abspath(__file__)) launch_model_path = os.path.join(file_dir, "converter.py") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py index f7718e584f5..24b056e9f5b 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py @@ -43,6 +43,7 @@ def get_random_inputs_and_labels(input_shape, label_shape): def batch_generator_creator(): + def __reader__(): for _ in range(batch_size): batch_input, batch_label = get_random_inputs_and_labels( @@ -54,6 +55,7 @@ def batch_generator_creator(): class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -62,8 +64,8 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - param_initializer = nn.initializer.Normal( - mean=0.0, std=initializer_range) + param_initializer = nn.initializer.Normal(mean=0.0, + std=initializer_range) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.linear0 = nn.Linear( @@ -79,20 +81,18 @@ class MLPLayer(nn.Layer): def forward(self, input): out = self.norm(input) - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _g_process_mesh[0], - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _g_process_mesh[0], + "dims_mapping": [-1, 0] + }) out = self.linear0(out) out = F.gelu(out, approximate=True) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _g_process_mesh[1], - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _g_process_mesh[1], + "dims_mapping": [0, -1] + }) out = self.linear1(out) return out @@ -107,62 +107,58 @@ def get_program(): start_program = static.Program() with static.program_guard(train_program, start_program): # input - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - label = static.data( - name="label", shape=[batch_size, sequence_len, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, sequence_len, 1], + dtype='float32') data_holder = [input, label] # dataloader - dataloader = paddle.io.DataLoader.from_generator( - feed_list=data_holder, capacity=4 * batch_size, iterable=False) - dataloader.set_batch_generator( - batch_generator_creator(), places=paddle.static.cuda_places()) + dataloader = paddle.io.DataLoader.from_generator(feed_list=data_holder, + capacity=4 * + batch_size, + iterable=False) + dataloader.set_batch_generator(batch_generator_creator(), + places=paddle.static.cuda_places()) # data dist_attr - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _g_process_mesh[0], - "dims_mapping": [0, -1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": _g_process_mesh[0], - "dims_mapping": [0, -1, -1] - }) - - mlp_start = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _g_process_mesh[0], + "dims_mapping": [0, -1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": _g_process_mesh[0], + "dims_mapping": [0, -1, -1] + }) + + mlp_start = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_start(input) - mlp_mid = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + mlp_mid = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_mid(pred) - mlp_end = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + mlp_end = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_end(pred) error_cost = paddle.nn.functional.square_error_cost(pred, label) loss = paddle.mean(error_cost) - optimizer = paddle.optimizer.Adam( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.optimizer.Adam(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) feed_vars = {"inputs": [input], "labels": [label]} fetch_vars = {"loss": [loss]} @@ -171,6 +167,7 @@ def get_program(): class TestDistributedContext(unittest.TestCase): + def test_backup_restore(self): train_program, start_program, dataloader, loss, optimizer, feed_vars, fetch_vars = get_program( ) @@ -180,18 +177,16 @@ class TestDistributedContext(unittest.TestCase): dist_context.initialize() dist_context._backup(serial=True, dist=True) - dist_context._restore( - serial=True, - serial_mode="to_backup", - dist=True, - dist_mode="to_backup") + dist_context._restore(serial=True, + serial_mode="to_backup", + dist=True, + dist_mode="to_backup") dist_context._backup(serial=True, dist=True) - dist_context._restore( - serial=True, - serial_mode="to_original", - dist=True, - dist_mode="to_original") + dist_context._restore(serial=True, + serial_mode="to_original", + dist=True, + dist_mode="to_original") dist_context._backup(serial=True, dist=True) dist_context._restore(serial=True, dist=True, dist_mode="to_default") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py index 946f33b7e4f..74664062303 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py @@ -29,12 +29,11 @@ def make_program_dp2(): with paddle.static.program_guard(main_program, start_program): x = paddle.static.data(name='x', shape=[4, 5, 6], dtype='float32') x.stop_gradient = False - auto.shard_tensor( - x, - dist_attr={ - "process_mesh": auto.ProcessMesh([0, 1]), - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(x, + dist_attr={ + "process_mesh": auto.ProcessMesh([0, 1]), + "dims_mapping": [0, -1, -1] + }) tmp_0 = paddle.norm(x, p=2) return main_program, start_program, tmp_0 @@ -45,12 +44,11 @@ def make_program_serial(): with paddle.static.program_guard(main_program, start_program): x = paddle.static.data(name='x', shape=[4, 5, 6], dtype='float32') x.stop_gradient = False - auto.shard_tensor( - x, - dist_attr={ - "process_mesh": auto.ProcessMesh([0]), - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(x, + dist_attr={ + "process_mesh": auto.ProcessMesh([0]), + "dims_mapping": [-1, -1, -1] + }) tmp_0 = paddle.norm(x, p=2) return main_program, start_program, tmp_0 @@ -81,6 +79,7 @@ def parallelizer(program_func, rank): class TestDistPNorm(unittest.TestCase): + def test_dist_pnorm_dp2(self): for rank in range(2): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py index 8777bf3ff1f..60b43ef9fe3 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py @@ -29,12 +29,11 @@ def make_program_dp2(): with paddle.static.program_guard(main_program, start_program): x = paddle.static.data(name='x', shape=[4, 4, 8], dtype='float32') x.stop_gradient = False - auto.shard_tensor( - x, - dist_attr={ - "process_mesh": auto.ProcessMesh([0, 1]), - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(x, + dist_attr={ + "process_mesh": auto.ProcessMesh([0, 1]), + "dims_mapping": [0, -1, -1] + }) tmp_0 = paddle.reshape(x, shape=[0, 0, 4, 2]) tmp_1 = paddle.reshape(tmp_0, shape=[0, 0, 8]) tmp_2 = tmp_1.reshape((tmp_1.shape[0], tmp_1.shape[1], -1)) @@ -61,6 +60,7 @@ def parallelizer(program_func, rank): class TestDistReshape(unittest.TestCase): + def test_dist_reshape_mp2(self): for rank in range(2): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py index 8af055a09a3..e12fd0f922a 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py @@ -25,12 +25,11 @@ def make_program_dp2(): start_program = paddle.fluid.Program() with paddle.static.program_guard(main_program, start_program): x = paddle.static.data(name='x', shape=[4, 5, 6], dtype='float32') - auto.shard_tensor( - x, - dist_attr={ - "process_mesh": auto.ProcessMesh([0, 1]), - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(x, + dist_attr={ + "process_mesh": auto.ProcessMesh([0, 1]), + "dims_mapping": [0, -1, -1] + }) tmp_0 = x[0] tmp_1 = x[:, 0, :] tmp_2 = x[:, :, 1] @@ -43,12 +42,11 @@ def make_program_serial(): start_program = paddle.fluid.Program() with paddle.static.program_guard(main_program, start_program): x = paddle.static.data(name='x', shape=[4, 5, 6], dtype='float32') - auto.shard_tensor( - x, - dist_attr={ - "process_mesh": auto.ProcessMesh([0]), - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(x, + dist_attr={ + "process_mesh": auto.ProcessMesh([0]), + "dims_mapping": [-1, -1, -1] + }) tmp_0 = x[0] tmp_1 = x[:, 0, :] tmp_2 = x[:, :, 1] @@ -78,6 +76,7 @@ def parallelizer(program_func, rank): class TestDistSlice(unittest.TestCase): + def test_dist_slice_dp2(self): for rank in range(2): dist_main_prog, dist_context = parallelizer(make_program_dp2, rank) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py index efcad7eb112..b8ad54cbb79 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py @@ -21,6 +21,7 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage class TestEngineAPI(unittest.TestCase): + def test_engine_api(self): file_dir = os.path.dirname(os.path.abspath(__file__)) launch_model_path = os.path.join(file_dir, "engine_api.py") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py index ab4a34cf99c..9fb1c22d76c 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_high_order_grad.py @@ -21,6 +21,7 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage class TestHighOrderGrad(unittest.TestCase): + def test_dp2(self): file_dir = os.path.dirname(os.path.abspath(__file__)) launch_model_path = os.path.join(file_dir, "high_order_grad.py") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py index c0df01ada58..911f20f1149 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py @@ -35,6 +35,7 @@ def check_cost(cost): class TestCost(unittest.TestCase): + def test_base_cost(self): cost = cost_model.Cost(memory=100, flops=200, time=0.5) self.assertTrue(check_cost(cost)) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py index f9ab6f37f3c..67894f6dd93 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py @@ -33,6 +33,7 @@ rank = 0 class TestPrimDistOp(unittest.TestCase): + def setUp(self): self.main_program = paddle.static.Program() self.startup_program = paddle.static.Program() @@ -45,41 +46,42 @@ class TestPrimDistOp(unittest.TestCase): def init_prog(self): # block = self.main_program.global_block() # block = self.main_program.global_block() - self.w = self.layer_help.create_parameter( - dtype="float", shape=[20], attr=None) - self.w_grad = paddle.static.data( - name='w_grad', shape=[20], dtype='float') + self.w = self.layer_help.create_parameter(dtype="float", + shape=[20], + attr=None) + self.w_grad = paddle.static.data(name='w_grad', + shape=[20], + dtype='float') self.tmp1 = paddle.static.data(name='tmp1', shape=[20], dtype='float') self.tmp2 = paddle.static.data(name='tmp2', shape=[20], dtype='float') - self.batch_reduced = paddle.static.data( - name='batch_reduced', shape=[1], dtype='float') + self.batch_reduced = paddle.static.data(name='batch_reduced', + shape=[1], + dtype='float') self.attrs = {} default_dist_context = get_default_distributed_context() _global_process_mesh = auto.ProcessMesh(list(range(nranks))) - tensor_dist_attr = set_var_dist_attr( - default_dist_context, - self.tmp1, [-1], - _global_process_mesh, - mark_annotated=True) - tensor_dist_attr = set_var_dist_attr( - default_dist_context, - self.tmp1, [-1], - _global_process_mesh, - mark_annotated=True) - - op = self.layer_help.append_op( - type="add_p", - inputs={'X': self.tmp1, - 'Y': self.w}, - outputs={'Z': self.w_grad}, - attrs=self.attrs) - - op = self.layer_help.append_op( - type="reduce_p", - inputs={'X': self.tmp2}, - outputs={'Y': self.batch_reduced}, - attrs={"axis": [0]}) + tensor_dist_attr = set_var_dist_attr(default_dist_context, + self.tmp1, [-1], + _global_process_mesh, + mark_annotated=True) + tensor_dist_attr = set_var_dist_attr(default_dist_context, + self.tmp1, [-1], + _global_process_mesh, + mark_annotated=True) + + op = self.layer_help.append_op(type="add_p", + inputs={ + 'X': self.tmp1, + 'Y': self.w + }, + outputs={'Z': self.w_grad}, + attrs=self.attrs) + + op = self.layer_help.append_op(type="reduce_p", + inputs={'X': self.tmp2}, + outputs={'Y': self.batch_reduced}, + attrs={"axis": [0]}) def test_loss_and_grad_allreduce(self): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py index ab704a6a257..d9594b95198 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py @@ -19,6 +19,7 @@ from paddle.distributed.auto_parallel.tuner import recorder as rd class TestRecorder(unittest.TestCase): + def test_register(self): recorder = rd.MetricsRecorder() recorder.register("metric") @@ -34,8 +35,8 @@ class TestRecorder(unittest.TestCase): recorder = rd.MetricsRecorder() recorder.update("metric", 4, 1000) self.assertEqual(recorder.records["metric"].direction, "min") - self.assertEqual( - recorder.get_records("metric"), [rd.MetricRecord(4, 1000)]) + self.assertEqual(recorder.get_records("metric"), + [rd.MetricRecord(4, 1000)]) def test_get_records(self): recorder = rd.MetricsRecorder() @@ -43,13 +44,12 @@ class TestRecorder(unittest.TestCase): recorder.update("metric", 2, step=1) recorder.update("metric", 3, step=2) recorder.update("metric", 4, step=3) - self.assertEqual( - recorder.get_records("metric"), [ - rd.MetricRecord(1, 0), - rd.MetricRecord(2, 1), - rd.MetricRecord(3, 2), - rd.MetricRecord(4, 3), - ]) + self.assertEqual(recorder.get_records("metric"), [ + rd.MetricRecord(1, 0), + rd.MetricRecord(2, 1), + rd.MetricRecord(3, 2), + rd.MetricRecord(4, 3), + ]) def test_set_records(self): recorder = rd.MetricsRecorder() @@ -60,14 +60,14 @@ class TestRecorder(unittest.TestCase): rd.MetricRecord(2, 1), rd.MetricRecord(3, 2), rd.MetricRecord(4, 3), - ], ) - self.assertEqual( - recorder.get_records("metric"), [ - rd.MetricRecord(1, 0), - rd.MetricRecord(2, 1), - rd.MetricRecord(3, 2), - rd.MetricRecord(4, 3), - ]) + ], + ) + self.assertEqual(recorder.get_records("metric"), [ + rd.MetricRecord(1, 0), + rd.MetricRecord(2, 1), + rd.MetricRecord(3, 2), + rd.MetricRecord(4, 3), + ]) def test_get_best_value(self): recorder = rd.MetricsRecorder() @@ -81,7 +81,8 @@ class TestRecorder(unittest.TestCase): rd.MetricRecord(2, 1), rd.MetricRecord(3, 2), rd.MetricRecord(4, 3), - ], ) + ], + ) self.assertEqual(recorder.get_best_value("metric_min"), 1) recorder.set_records( @@ -91,7 +92,8 @@ class TestRecorder(unittest.TestCase): rd.MetricRecord(2, 1), rd.MetricRecord(3, 2), rd.MetricRecord(4, 3), - ], ) + ], + ) self.assertEqual(recorder.get_best_value("metric_max"), 4) def test_get_best_step(self): @@ -105,7 +107,8 @@ class TestRecorder(unittest.TestCase): rd.MetricRecord(2, 1), rd.MetricRecord(3, 2), rd.MetricRecord(4, 3), - ], ) + ], + ) self.assertEqual(recorder.get_best_step("metric_min"), 0) recorder.register("metric_max", "max") @@ -116,7 +119,8 @@ class TestRecorder(unittest.TestCase): rd.MetricRecord(2, 1), rd.MetricRecord(3, 2), rd.MetricRecord(4, 3), - ], ) + ], + ) self.assertEqual(recorder.get_best_step("metric_max"), 3) def test_get_statistics(self): @@ -142,7 +146,8 @@ class TestRecorder(unittest.TestCase): rd.MetricRecord(2, 1), rd.MetricRecord(3, 2), rd.MetricRecord(4, 3), - ], ) + ], + ) print(recorder.get_state()) new_recorder = rd.MetricsRecorder.from_state(recorder.get_state()) self.assertEqual(new_recorder.records.keys(), recorder.records.keys()) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py index bc1d0a70182..88ad5f98bf7 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py @@ -22,6 +22,7 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage class TestPlannerReLaunch(unittest.TestCase): + def test_relaunch_with_planner(self): from test_auto_parallel_relaunch import cluster_json file_dir = os.path.dirname(os.path.abspath(__file__)) @@ -49,8 +50,8 @@ class TestPlannerReLaunch(unittest.TestCase): # Remove unnecessary files if os.path.exists(cluster_json_path): os.remove(cluster_json_path) - rank_mapping_json_path = os.path.join(file_dir, - "auto_parallel_rank_mapping.json") + rank_mapping_json_path = os.path.join( + file_dir, "auto_parallel_rank_mapping.json") if os.path.exists(rank_mapping_json_path): os.remove(rank_mapping_json_path) files_path = [path for path in os.listdir('.') if '.pkl' in path] diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py index 5a7ae87e646..b6fc0d7a1fa 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py @@ -22,6 +22,7 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage class TestPlannerReLaunch(unittest.TestCase): + def test_relaunch_with_planner(self): from test_auto_parallel_relaunch import cluster_json file_dir = os.path.dirname(os.path.abspath(__file__)) @@ -49,8 +50,8 @@ class TestPlannerReLaunch(unittest.TestCase): # Remove unnecessary files if os.path.exists(cluster_json_path): os.remove(cluster_json_path) - rank_mapping_json_path = os.path.join(file_dir, - "auto_parallel_rank_mapping.json") + rank_mapping_json_path = os.path.join( + file_dir, "auto_parallel_rank_mapping.json") if os.path.exists(rank_mapping_json_path): os.remove(rank_mapping_json_path) log_path = os.path.join(file_dir, "log") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py index fc52d1c394e..e39991fcaa5 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py @@ -19,6 +19,7 @@ from paddle.distributed.auto_parallel.tuner import trial as tr class TestTiral(unittest.TestCase): + def test_trial(self): space = ts.TunableSpace() space.choice("choice", [0, 1, 2, 3], default=2) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_space.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_space.py index cb7104f9ef6..f0c6a0b7cdf 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_space.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_space.py @@ -18,6 +18,7 @@ from paddle.distributed.auto_parallel.tuner import tunable_space as ts class TestTunableSpace(unittest.TestCase): + def test_fixed(self): space = ts.TunableSpace() fixed = space.fixed("fixed", default=4) @@ -72,8 +73,10 @@ class TestTunableSpace(unittest.TestCase): def test_float_range(self): space = ts.TunableSpace() - float_range = space.float_range( - "float_range", start=0.4, stop=4.4, default=2.0) + float_range = space.float_range("float_range", + start=0.4, + stop=4.4, + default=2.0) self.assertEqual(space.values["float_range"], 2.0) self.assertEqual(len(space.variables), 1) self.assertEqual(space.variables["float_range"].name, "float_range") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_variable.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_variable.py index ade228f6c49..ce0a076c83e 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_variable.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_tunable_variable.py @@ -18,6 +18,7 @@ from paddle.distributed.auto_parallel.tuner import tunable_variable as tv class TestTunableVariable(unittest.TestCase): + def test_fixed(self): fixed = tv.Fixed("fixed", True) fixed = tv.Fixed.from_state(fixed.get_state()) @@ -63,8 +64,12 @@ class TestTunableVariable(unittest.TestCase): self.assertIn(int_range.random(1234), [1, 2, 3, 4]) self.assertNotEqual(int_range.default, 4) - int_range = tv.IntRange( - "int_range", start=1, stop=8, step=2, default=3, endpoint=True) + int_range = tv.IntRange("int_range", + start=1, + stop=8, + step=2, + default=3, + endpoint=True) int_range = tv.IntRange.from_state(int_range.get_state()) self.assertEqual(int_range.default, 3) self.assertIn(int_range.random(), [1, 3, 5, 7]) @@ -72,8 +77,10 @@ class TestTunableVariable(unittest.TestCase): self.assertNotEqual(int_range.default, 2) def test_float_range(self): - float_range = tv.FloatRange( - "float_range", start=0.4, stop=4.4, default=2.0) + float_range = tv.FloatRange("float_range", + start=0.4, + stop=4.4, + default=2.0) float_range = tv.FloatRange.from_state(float_range.get_state()) self.assertEqual(float_range.default, 2.0) self.assertGreaterEqual(float_range.random(), 0.4) @@ -81,13 +88,12 @@ class TestTunableVariable(unittest.TestCase): self.assertNotAlmostEqual(float_range.random(), 1) self.assertNotAlmostEqual(float_range.random(), 4.4) - float_range = tv.FloatRange( - "float_range", - start=0.4, - stop=8.4, - step=2.0, - default=3.0, - endpoint=True) + float_range = tv.FloatRange("float_range", + start=0.4, + stop=8.4, + step=2.0, + default=3.0, + endpoint=True) float_range = tv.FloatRange.from_state(float_range.get_state()) self.assertEqual(float_range.default, 3.0) self.assertGreaterEqual(float_range.random(), 0.4) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py index 9989f5bbdc6..3dabe38ff6e 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py @@ -46,6 +46,7 @@ def get_random_inputs_and_labels(input_shape, label_shape): def batch_generator_creator(): + def __reader__(): for _ in range(batch_size): batch_input, batch_label = get_random_inputs_and_labels( @@ -57,6 +58,7 @@ def batch_generator_creator(): class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -65,8 +67,8 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - param_initializer = nn.initializer.Normal( - mean=0.0, std=initializer_range) + param_initializer = nn.initializer.Normal(mean=0.0, + std=initializer_range) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.linear0 = nn.Linear( @@ -82,20 +84,18 @@ class MLPLayer(nn.Layer): def forward(self, input): out = self.norm(input) - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _g_process_mesh[0], - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _g_process_mesh[0], + "dims_mapping": [-1, 0] + }) out = self.linear0(out) out = F.gelu(out, approximate=True) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _g_process_mesh[1], - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _g_process_mesh[1], + "dims_mapping": [0, -1] + }) out = self.linear1(out) return out @@ -107,17 +107,15 @@ def loop_cond(i, loop_len, input_array): def loop_body(i, loop_len, input_array): pre_input = paddle.tensor.array_read(array=input_array, i=i) - mlp_while0 = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) - - mlp_while1 = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + mlp_while0 = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) + + mlp_while1 = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) output = mlp_while0(pre_input) cur_pred = mlp_while1(output) @@ -142,37 +140,36 @@ def get_program(): loop_len = paddle.full(shape=[1], fill_value=epoch_num, dtype='int64') # input - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - label = static.data( - name="label", shape=[batch_size, sequence_len, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, sequence_len, 1], + dtype='float32') data_holder = [input, label] # dataloader - dataloader = paddle.io.DataLoader.from_generator( - feed_list=data_holder, capacity=4 * batch_size, iterable=False) - dataloader.set_batch_generator( - batch_generator_creator(), places=paddle.static.cuda_places()) + dataloader = paddle.io.DataLoader.from_generator(feed_list=data_holder, + capacity=4 * + batch_size, + iterable=False) + dataloader.set_batch_generator(batch_generator_creator(), + places=paddle.static.cuda_places()) # data dist_attr - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _g_process_mesh[0], - "dims_mapping": [-1, -1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": _g_process_mesh[0], - "dims_mapping": [-1, -1, -1] - }) - - mlp_start = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _g_process_mesh[0], + "dims_mapping": [-1, -1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": _g_process_mesh[0], + "dims_mapping": [-1, -1, -1] + }) + + mlp_start = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_start(input) input_array = paddle.tensor.array_write(pred, i) @@ -182,11 +179,10 @@ def get_program(): loop_vars=[i, loop_len, input_array]) end_pred = paddle.tensor.array_read(array=input_array, i=i) - mlp_end = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + mlp_end = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_end(end_pred) error_cost = paddle.nn.functional.square_error_cost(pred, label) @@ -196,6 +192,7 @@ def get_program(): class TestMLP(unittest.TestCase): + def test_completer(self): train_program, start_program, dataloader, i, loss = get_program() dist_context = DistributedContext() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py index d296d943330..3c6e086ae7f 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -47,6 +47,7 @@ def get_random_inputs_and_labels(input_shape, label_shape): def batch_generator_creator(): + def __reader__(): for _ in range(batch_size): batch_input, batch_label = get_random_inputs_and_labels( @@ -58,6 +59,7 @@ def batch_generator_creator(): class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -66,8 +68,8 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - param_initializer = nn.initializer.Normal( - mean=0.0, std=initializer_range) + param_initializer = nn.initializer.Normal(mean=0.0, + std=initializer_range) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.linear0 = nn.Linear( @@ -83,63 +85,61 @@ class MLPLayer(nn.Layer): def forward(self, input): - auto.shard_tensor( - self.norm.weight, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) - auto.shard_tensor( - self.norm.bias, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear0.bias, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [0]}) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [0, -1] - }) - auto.shard_tensor( - self.linear1.bias, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) + auto.shard_tensor(self.norm.weight, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) + auto.shard_tensor(self.norm.bias, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear0.bias, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(self.linear1.bias, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) out = self.norm(input) - auto.shard_tensor( - out, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(out, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) out = self.linear0(out) - auto.shard_tensor( - out, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, 0] - }) + auto.shard_tensor(out, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, 0] + }) out = F.gelu(out, approximate=True) - auto.shard_tensor( - out, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, 0] - }) + auto.shard_tensor(out, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, 0] + }) out = self.linear1(out) - auto.shard_tensor( - out, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(out, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) return out @@ -155,95 +155,94 @@ def get_program(): # 循环计数器 i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) - auto.shard_tensor( - i, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) + auto.shard_tensor(i, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) # 循环次数 - loop_len = fluid.layers.fill_constant( - shape=[1], dtype='int64', value=epoch_num) - auto.shard_tensor( - loop_len, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) + loop_len = fluid.layers.fill_constant(shape=[1], + dtype='int64', + value=epoch_num) + auto.shard_tensor(loop_len, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) # input - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - label = static.data( - name="label", shape=[batch_size, sequence_len, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, sequence_len, 1], + dtype='float32') data_holder = [input, label] # dataloader - dataloader = paddle.io.DataLoader.from_generator( - feed_list=data_holder, capacity=4 * batch_size, iterable=False) - dataloader.set_batch_generator( - batch_generator_creator(), places=paddle.static.cuda_places()) + dataloader = paddle.io.DataLoader.from_generator(feed_list=data_holder, + capacity=4 * + batch_size, + iterable=False) + dataloader.set_batch_generator(batch_generator_creator(), + places=paddle.static.cuda_places()) # data dist_attr - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) # fill constant bsz like tmp = paddle.fluid.layers.fill_constant_batch_size_like( input=input, shape=[-1, 16, 0, 48], dtype='float32', value=0) - auto.shard_tensor( - tmp, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, 0, -1, -1] - }) + auto.shard_tensor(tmp, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, 0, -1, -1] + }) # model - mlp_start = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + mlp_start = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_start(input) input_array = fluid.layers.array_write(pred, i) - auto.shard_tensor( - input_array, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(input_array, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) cond = fluid.layers.less_than(x=i, y=loop_len) - auto.shard_tensor( - cond, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) + auto.shard_tensor(cond, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) while_op = fluid.layers.While(cond=cond) with while_op.block(): pre_input = fluid.layers.array_read(array=input_array, i=i) - auto.shard_tensor( - pre_input, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) - - mlp_while = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + auto.shard_tensor(pre_input, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) + + mlp_while = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) cur_pred = mlp_while(pre_input) # 更新循环条件 @@ -252,33 +251,31 @@ def get_program(): fluid.layers.less_than(x=i, y=loop_len, cond=cond) end_pred = fluid.layers.array_read(array=input_array, i=i) - auto.shard_tensor( - end_pred, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) - - mlp_end = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + auto.shard_tensor(end_pred, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) + + mlp_end = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) pred = mlp_end(end_pred) error_cost = paddle.nn.functional.square_error_cost(pred, label) - auto.shard_tensor( - error_cost, - dist_attr={ - "process_mesh": _g_process_mesh, - "dims_mapping": [-1, -1, -1] - }) + auto.shard_tensor(error_cost, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1, -1, -1] + }) loss = paddle.mean(error_cost) - auto.shard_tensor( - loss, - dist_attr={"process_mesh": _g_process_mesh, - "dims_mapping": [-1]}) + auto.shard_tensor(loss, + dist_attr={ + "process_mesh": _g_process_mesh, + "dims_mapping": [-1] + }) return train_program, start_program, dataloader, i, loss @@ -437,6 +434,7 @@ def partition(train_program, start_program, dist_context): class TestMLP(unittest.TestCase): + def test_partitioner(self): train_program, start_program, dataloader, i, loss = get_program() @@ -445,8 +443,9 @@ class TestMLP(unittest.TestCase): dist_context) dist_context.block_state.parse_forward_blocks(train_program) - dist_main_prog, dist_startup_prog = partition( - train_program, start_program, dist_context) + dist_main_prog, dist_startup_prog = partition(train_program, + start_program, + dist_context) global_block_ops = dist_main_prog.blocks[0].ops fill_op = None diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py index ffc222d3492..c3f64e30fc5 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py @@ -41,6 +41,7 @@ PP_MESH_1 = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=64, intermediate_size=4 * 64, @@ -54,52 +55,50 @@ class MLPLayer(nn.Layer): weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr1, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr0, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr1, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): if _global_parallel_strategy == "pp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) elif _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -113,43 +112,40 @@ def mlp_forward(train_program, start_program): utils.unique_name.guard(): batch_size = 4 hidden_size = 64 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') if _global_parallel_strategy == "pp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) elif _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "mp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) loss = paddle.mean(error_cost) @@ -173,6 +169,7 @@ def get_distributed_program(): class TestMLPAutoConvert(unittest.TestCase): + def setUp(self): paddle.seed(2021) random.seed(2021) @@ -201,8 +198,9 @@ class TestMLPAutoConvert(unittest.TestCase): for step in range(20): if step == 10: - save_distributed_checkpoint( - dist_main_prog, ".", dist_attr_path=".") + save_distributed_checkpoint(dist_main_prog, + ".", + dist_attr_path=".") res = exe.run(dist_main_prog, feed={ @@ -253,6 +251,7 @@ class TestMLPAutoConvert(unittest.TestCase): class TestMLPAutoConvert2(unittest.TestCase): + def setUp(self): paddle.seed(2021) random.seed(2021) @@ -340,6 +339,7 @@ class TestMLPAutoConvert2(unittest.TestCase): class TestMLPAutoConvertInvalid(unittest.TestCase): + def setUp(self): paddle.seed(2021) random.seed(2021) @@ -353,14 +353,14 @@ class TestMLPAutoConvertInvalid(unittest.TestCase): _global_process_mesh = auto.ProcessMesh([0, 1]) dist_main_prog, _, _ = get_distributed_program() with self.assertRaises(TypeError): - save_distributed_checkpoint( - dist_main_prog, [""], [""], addition_info=[0]) + save_distributed_checkpoint(dist_main_prog, [""], [""], + addition_info=[0]) with self.assertRaises(ValueError): - save_distributed_checkpoint( - dist_main_prog, [""], [""], addition_info={"step": 0}) + save_distributed_checkpoint(dist_main_prog, [""], [""], + addition_info={"step": 0}) with self.assertRaises(ValueError): - save_distributed_checkpoint( - dist_main_prog, [""], [""], addition_info={"batch": 0.0}) + save_distributed_checkpoint(dist_main_prog, [""], [""], + addition_info={"batch": 0.0}) with self.assertRaises(ValueError): load_checkpoint_into_program(["./model_state_rank.pdmodel"], ["./dist_attr_rank.pdattr"], @@ -369,9 +369,8 @@ class TestMLPAutoConvertInvalid(unittest.TestCase): load_distributed_checkpoint(["./model_state_rank.pdmodel"], ["./dist_attr_rank.pdattr"]) with self.assertRaises(TypeError): - load_distributed_checkpoint({ - "0": "./model_state_rank.pdmodel" - }, {"1": "./dist_attr_rank.pdattr"}) + load_distributed_checkpoint({"0": "./model_state_rank.pdmodel"}, + {"1": "./dist_attr_rank.pdattr"}) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py b/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py index ed8cb8a23c3..d3a4a4898bf 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py @@ -32,7 +32,9 @@ paddle.distributed.init_parallel_env() class TestDataUnshard(unittest.TestCase): + def test_dp2pp1mp1(self): + def create_model(train_program, start_program): with paddle.static.program_guard(train_program, start_program): @@ -41,41 +43,36 @@ class TestDataUnshard(unittest.TestCase): label = paddle.static.data(name='label', shape=[2, 8]) weight_attr = paddle.ParamAttr( - initializer=nn.initializer.Normal( - mean=0.0, std=0.02)) + initializer=nn.initializer.Normal(mean=0.0, std=0.02)) linear0 = nn.Linear(8, 8, weight_attr) linear1 = nn.Linear(8, 8, weight_attr) - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [0, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [0, -1] - }) - auto.shard_tensor( - linear0.weight, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - linear1.weight, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(linear0.weight, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(linear1.weight, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [-1, -1] + }) linear0_out = linear0(input) gelu_out = F.gelu(linear0_out) linear1_out = linear1(gelu_out) - error_cost = paddle.nn.functional.square_error_cost(linear1_out, - label) + error_cost = paddle.nn.functional.square_error_cost( + linear1_out, label) loss = paddle.mean(error_cost) return train_program, start_program, loss, input, label @@ -88,12 +85,11 @@ class TestDataUnshard(unittest.TestCase): dist_strategy = fleet.DistributedStrategy() dist_strategy.semi_auto = True fleet.init(is_collective=True, strategy=dist_strategy) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( @@ -112,15 +108,17 @@ class TestDataUnshard(unittest.TestCase): label_data = np.random.randint(0, 10, [2, 8]).astype("float32") fetchs = [loss.name, 'input@RESHARD_0'] - loss_np, shard_data_np = exe.run( - distributed_main_program, - feed={"input": input_data, - "label": label_data}, - fetch_list=fetchs) + loss_np, shard_data_np = exe.run(distributed_main_program, + feed={ + "input": input_data, + "label": label_data + }, + fetch_list=fetchs) desired = input_data[worker_index].reshape(shard_data_np.shape) np.testing.assert_allclose(shard_data_np, desired) def dp1pp1mp2(self): + def create_model(train_program, start_program): with paddle.static.program_guard(train_program, start_program): @@ -129,44 +127,39 @@ class TestDataUnshard(unittest.TestCase): label = paddle.static.data(name='label', shape=[8, 8]) weight_attr = paddle.ParamAttr( - initializer=nn.initializer.Normal( - mean=0.0, std=0.02)) + initializer=nn.initializer.Normal(mean=0.0, std=0.02)) linear0 = nn.Linear(8, 8, weight_attr) linear1 = nn.Linear(8, 8, weight_attr) - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [-1, -1] - }) - - auto.shard_tensor( - linear0.weight, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - linear1.weight, - dist_attr={ - "process_mesh": MESH_0, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [-1, -1] + }) + + auto.shard_tensor(linear0.weight, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(linear1.weight, + dist_attr={ + "process_mesh": MESH_0, + "dims_mapping": [0, -1] + }) linear0_out = linear0(input) gelu_out = F.gelu(linear0_out) linear1_out = linear1(gelu_out) - error_cost = paddle.nn.functional.square_error_cost(linear1_out, - label) + error_cost = paddle.nn.functional.square_error_cost( + linear1_out, label) loss = paddle.mean(error_cost) return train_program, start_program, loss, input, label @@ -179,12 +172,11 @@ class TestDataUnshard(unittest.TestCase): dist_strategy = fleet.DistributedStrategy() dist_strategy.semi_auto = True fleet.init(is_collective=True, strategy=dist_strategy) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( @@ -203,11 +195,12 @@ class TestDataUnshard(unittest.TestCase): label_data = np.random.randint(0, 10, [8, 8]).astype("float32") fetchs = [loss.name, 'input'] - loss_np, shard_data_np = exe.run( - distributed_main_program, - feed={"input": input_data, - "label": label_data}, - fetch_list=fetchs) + loss_np, shard_data_np = exe.run(distributed_main_program, + feed={ + "input": input_data, + "label": label_data + }, + fetch_list=fetchs) desired = input_data.reshape(shard_data_np.shape) np.testing.assert_allclose(shard_data_np, desired) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py b/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py index b1c15c5ce62..4695f6a4a94 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py @@ -76,26 +76,27 @@ class MultiHeadAttention(nn.Layer): if self.fuse: assert self.kdim == embed_dim assert self.vdim == embed_dim - self.qkv_proj = nn.Linear( - embed_dim, 3 * embed_dim, weight_attr, bias_attr=bias_attr) + self.qkv_proj = nn.Linear(embed_dim, + 3 * embed_dim, + weight_attr, + bias_attr=bias_attr) else: - self.q_proj = nn.Linear( - embed_dim, - embed_dim, - weight_attr=weight_attr, - bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, - embed_dim, - weight_attr=weight_attr, - bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, - embed_dim, - weight_attr=weight_attr, - bias_attr=bias_attr) - self.out_proj = nn.Linear( - embed_dim, embed_dim, weight_attr=weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(embed_dim, + embed_dim, + weight_attr=weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + embed_dim, + weight_attr=weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + embed_dim, + weight_attr=weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(embed_dim, + embed_dim, + weight_attr=weight_attr, + bias_attr=bias_attr) def _fuse_prepare_qkv(self, query): mix_layer = self.qkv_proj(query) @@ -113,33 +114,30 @@ class MultiHeadAttention(nn.Layer): """ q = self.q_proj(query) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 1] + }) q = tensor.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) q = tensor.transpose(x=q, perm=[0, 2, 1, 3]) if isinstance(cache, self.StaticCache): @@ -167,62 +165,56 @@ class MultiHeadAttention(nn.Layer): """ k = self.k_proj(key) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 1] + }) v = self.v_proj(value) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) v = tensor.reshape(x=v, shape=[0, 0, self.num_heads, self.head_dim]) @@ -276,17 +268,18 @@ class MultiHeadAttention(nn.Layer): else: q, k, v, cache = self._prepare_qkv(query, key, value, use_cache, cache) - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if attn_mask is not None: product = product + attn_mask weights = F.softmax(product) if self.dropout: - weights = F.dropout( - weights, - self.dropout, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) # combine heads out = tensor.transpose(out, perm=[0, 2, 1, 3]) @@ -294,33 +287,30 @@ class MultiHeadAttention(nn.Layer): # project to output out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [1, -1] + }) outs = [out] if self.need_weights: outs.append(weights) @@ -362,36 +352,37 @@ class TransformerDecoder(nn.Layer): new_caches = [] self.checkpoints = [] if _global_parallel_strategy == "pp": - auto.shard_tensor( - output, - dist_attr={ - "process_mesh": PP_MESH_LIST[0], - "dims_mapping": [-1 for i in range(len(output.shape))] - }) + auto.shard_tensor(output, + dist_attr={ + "process_mesh": + PP_MESH_LIST[0], + "dims_mapping": + [-1 for i in range(len(output.shape))] + }) if _global_parallel_strategy == "dp_pp": - auto.shard_tensor( - output, - dist_attr={ - "process_mesh": DPPP_MESH_LIST[0], - "dims_mapping": - [0] + [-1 for i in range(len(output.shape) - 1)] - }) + auto.shard_tensor(output, + dist_attr={ + "process_mesh": + DPPP_MESH_LIST[0], + "dims_mapping": [0] + + [-1 for i in range(len(output.shape) - 1)] + }) if _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - output, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[0], - "dims_mapping": - [-1] + [-1 for i in range(len(output.shape) - 1)] - }) + auto.shard_tensor(output, + dist_attr={ + "process_mesh": + MPPP_MESH_LIST[0], + "dims_mapping": [-1] + + [-1 for i in range(len(output.shape) - 1)] + }) if _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - output, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[0], - "dims_mapping": - [0] + [-1 for i in range(len(output.shape) - 1)] - }) + auto.shard_tensor(output, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[0], + "dims_mapping": [0] + + [-1 for i in range(len(output.shape) - 1)] + }) for i, mod in enumerate(self.layers): if cache is None: if use_cache: @@ -404,7 +395,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": PP_MESH_LIST[mod.mesh_idx], + "process_mesh": + PP_MESH_LIST[mod.mesh_idx], "dims_mapping": [-1 for i in range(len(output.shape))] }) @@ -417,7 +409,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": DPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + DPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [0] + [-1 for i in range(len(output.shape) - 1)] }) @@ -430,7 +423,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": MPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + MPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [-1] + [-1 for i in range(len(output.shape) - 1)] }) @@ -443,7 +437,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + DPMPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [0] + [-1 for i in range(len(output.shape) - 1)] }) @@ -456,41 +451,47 @@ class TransformerDecoder(nn.Layer): new_caches.append(new_cache) else: if _global_parallel_strategy == "pp": - output = auto.shard_op( - mod, - dist_attr={ - "process_mesh": PP_MESH_LIST[mod.mesh_idx] - })(output, memory, tgt_mask, use_cache, cache)[0] + output = auto.shard_op(mod, + dist_attr={ + "process_mesh": + PP_MESH_LIST[mod.mesh_idx] + })(output, memory, tgt_mask, + use_cache, cache)[0] auto.shard_tensor( output, dist_attr={ - "process_mesh": PP_MESH_LIST[mod.mesh_idx], + "process_mesh": + PP_MESH_LIST[mod.mesh_idx], "dims_mapping": [-1 for i in range(len(output.shape))] }) elif _global_parallel_strategy == "dp_pp": - output = auto.shard_op( - mod, - dist_attr={ - "process_mesh": DPPP_MESH_LIST[mod.mesh_idx] - })(output, memory, tgt_mask, use_cache, cache)[0] + output = auto.shard_op(mod, + dist_attr={ + "process_mesh": + DPPP_MESH_LIST[mod.mesh_idx] + })(output, memory, tgt_mask, + use_cache, cache)[0] auto.shard_tensor( output, dist_attr={ - "process_mesh": DPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + DPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [0] + [-1 for i in range(len(output.shape) - 1)] }) elif _global_parallel_strategy == "mp_pp": - output = auto.shard_op( - mod, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[mod.mesh_idx] - })(output, memory, tgt_mask, use_cache, cache)[0] + output = auto.shard_op(mod, + dist_attr={ + "process_mesh": + MPPP_MESH_LIST[mod.mesh_idx] + })(output, memory, tgt_mask, + use_cache, cache)[0] auto.shard_tensor( output, dist_attr={ - "process_mesh": MPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + MPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [-1] + [-1 for i in range(len(output.shape) - 1)] }) @@ -503,7 +504,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + DPMPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [0] + [-1 for i in range(len(output.shape) - 1)] }) @@ -517,8 +519,9 @@ class TransformerDecoder(nn.Layer): if _global_parallel_strategy == "pp": output, new_cache = auto.shard_op( mod, - dist_attr={"process_mesh": PP_MESH_LIST[mod.mesh_idx]})( - output, memory, tgt_mask, use_cache, cache) + dist_attr={"process_mesh": PP_MESH_LIST[mod.mesh_idx] + })(output, memory, tgt_mask, use_cache, + cache) auto.shard_tensor( output, dist_attr={ @@ -535,7 +538,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": DPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + DPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [0] + [-1 for i in range(len(output.shape) - 1)] }) @@ -548,7 +552,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": MPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + MPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [-1] + [-1 for i in range(len(output.shape) - 1)] }) @@ -561,7 +566,8 @@ class TransformerDecoder(nn.Layer): auto.shard_tensor( output, dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[mod.mesh_idx], + "process_mesh": + DPMPPP_MESH_LIST[mod.mesh_idx], "dims_mapping": [0] + [-1 for i in range(len(output.shape) - 1)] }) @@ -619,17 +625,20 @@ class TransformerDecoderLayer(nn.Layer): self.normalize_before = normalize_before weight_attrs = _convert_param_attr_to_list(weight_attr, 3) bias_attrs = _convert_param_attr_to_list(bias_attr, 3) - self.self_attn = MultiHeadAttention( - d_model, - nhead, - dropout=attn_dropout, - weight_attr=weight_attrs[0], - bias_attr=bias_attrs[0], - mesh_idx=self.mesh_idx) - self.linear1 = nn.Linear( - d_model, dim_feedforward, weight_attrs[2], bias_attr=bias_attrs[2]) - self.linear2 = nn.Linear( - dim_feedforward, d_model, weight_attrs[2], bias_attr=bias_attrs[2]) + self.self_attn = MultiHeadAttention(d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0], + mesh_idx=self.mesh_idx) + self.linear1 = nn.Linear(d_model, + dim_feedforward, + weight_attrs[2], + bias_attr=bias_attrs[2]) + self.linear2 = nn.Linear(dim_feedforward, + d_model, + weight_attrs[2], + bias_attr=bias_attrs[2]) self.norm1 = nn.LayerNorm(d_model, epsilon=1e-5) self.norm2 = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train") @@ -652,72 +661,65 @@ class TransformerDecoderLayer(nn.Layer): if self.normalize_before: tgt = self.norm2(tgt) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 0] + }) if _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [-1, 1] + }) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[self.mesh_idx], - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[self.mesh_idx], + "dims_mapping": [1, -1] + }) tgt = self.dropout2( - self.linear2(F.gelu( - self.linear1(tgt), approximate=True))) + self.linear2(F.gelu(self.linear1(tgt), approximate=True))) tgt = residual + tgt if not self.normalize_before: tgt = self.norm2(tgt) return tgt if use_cache is False else (tgt, incremental_cache) def gen_cache(self, memory): - incremental_cache = self.self_attn.gen_cache( - memory, type=self.self_attn.Cache) + incremental_cache = self.self_attn.gen_cache(memory, + type=self.self_attn.Cache) return incremental_cache @@ -737,17 +739,15 @@ class GPTEmbeddings(nn.Layer): self.word_embeddings = nn.Embedding( vocab_size, hidden_size, - weight_attr=paddle.ParamAttr( - name="word_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="word_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, std=initializer_range))) self.position_embeddings = nn.Embedding( max_position_embeddings, hidden_size, - weight_attr=paddle.ParamAttr( - name="pos_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="pos_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, std=initializer_range))) self.dropout = nn.Dropout(hidden_dropout_prob) def forward(self, input_ids, position_ids=None): @@ -757,33 +757,29 @@ class GPTEmbeddings(nn.Layer): position_ids = seq_length - ones input_embedings = self.word_embeddings(input_ids) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) elif _global_parallel_strategy == "mp_pp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": MPPP_MESH_LIST[0], - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": MPPP_MESH_LIST[0], + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[0], - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": DPMPPP_MESH_LIST[0], + "dims_mapping": [1, -1] + }) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings embeddings = self.dropout(embeddings) @@ -821,9 +817,10 @@ class GPTModel(nn.Layer): self.pipline_mode = (pp_degree is not None and pp_degree > 1) if self.pipline_mode: self.layer_per_stage = num_hidden_layers // pp_degree - self.embeddings = GPTEmbeddings( - vocab_size, hidden_size, hidden_dropout_prob, - max_position_embeddings, type_vocab_size, self.initializer_range) + self.embeddings = GPTEmbeddings(vocab_size, hidden_size, + hidden_dropout_prob, + max_position_embeddings, + type_vocab_size, self.initializer_range) decoder_layers = nn.LayerList() for i in range(num_hidden_layers): mesh_index = None @@ -831,25 +828,23 @@ class GPTModel(nn.Layer): if self.layer_per_stage is not None: mesh_index = i // self.layer_per_stage decoder_layers.append( - DecoderLayer( - d_model=hidden_size, - nhead=num_attention_heads, - dim_feedforward=intermediate_size, - dropout=hidden_dropout_prob, - activation=hidden_act, - attn_dropout=attention_probs_dropout_prob, - act_dropout=hidden_dropout_prob, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range)), - bias_attr=None, - mesh_idx=mesh_index)) + DecoderLayer(d_model=hidden_size, + nhead=num_attention_heads, + dim_feedforward=intermediate_size, + dropout=hidden_dropout_prob, + activation=hidden_act, + attn_dropout=attention_probs_dropout_prob, + act_dropout=hidden_dropout_prob, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Normal( + mean=0.0, std=self.initializer_range)), + bias_attr=None, + mesh_idx=mesh_index)) Decoder = TransformerDecoder - self.decoder = Decoder( - decoder_layers, - num_hidden_layers, - norm="LayerNorm", - hidden_size=hidden_size) + self.decoder = Decoder(decoder_layers, + num_hidden_layers, + norm="LayerNorm", + hidden_size=hidden_size) self.checkpoints = [] def forward(self, @@ -863,44 +858,44 @@ class GPTModel(nn.Layer): past_length = 0 if cache is not None: past_length = paddle.shape(cache[0].k)[-2] - position_ids = paddle.arange( - past_length, - paddle.shape(input_ids)[-1] + past_length, - dtype='int64') + position_ids = paddle.arange(past_length, + paddle.shape(input_ids)[-1] + + past_length, + dtype='int64') position_ids = position_ids.unsqueeze(0) - position_ids = paddle.fluid.layers.expand_as(position_ids, - input_ids) - embedding_output = self.embeddings( - input_ids=input_ids, position_ids=position_ids) + position_ids = paddle.fluid.layers.expand_as( + position_ids, input_ids) + embedding_output = self.embeddings(input_ids=input_ids, + position_ids=position_ids) if _global_parallel_strategy == "pp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": PP_MESH_LIST[0], - "dims_mapping": [-1 for i in range(len(input_ids.shape))] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": + PP_MESH_LIST[0], + "dims_mapping": + [-1 for i in range(len(input_ids.shape))] + }) if _global_parallel_strategy == "dp_pp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": DPPP_MESH_LIST[0], - "dims_mapping": - [0] + [-1 for i in range(len(input_ids.shape) - 1)] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": + DPPP_MESH_LIST[0], + "dims_mapping": [0] + + [-1 for i in range(len(input_ids.shape) - 1)] + }) if _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": DPMPPP_MESH_LIST[0], - "dims_mapping": - [0] + [-1 for i in range(len(input_ids.shape) - 1)] - }) - encoder_outputs = self.decoder( - embedding_output, - memory=None, - tgt_mask=attention_mask, - use_cache=use_cache, - cache=cache) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": + DPMPPP_MESH_LIST[0], + "dims_mapping": [0] + + [-1 for i in range(len(input_ids.shape) - 1)] + }) + encoder_outputs = self.decoder(embedding_output, + memory=None, + tgt_mask=attention_mask, + use_cache=use_cache, + cache=cache) self.checkpoints.extend(self.decoder.checkpoints) return encoder_outputs @@ -912,19 +907,19 @@ class GPTForPretraining(nn.Layer): """ def __init__( - self, - gpt, - vocab_size=50304, - hidden_size=768, - initializer_range=0.02, ): + self, + gpt, + vocab_size=50304, + hidden_size=768, + initializer_range=0.02, + ): super(GPTForPretraining, self).__init__() self.output_embeddings = nn.Embedding( vocab_size, hidden_size, - weight_attr=paddle.ParamAttr( - name="output_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="output_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, std=initializer_range))) self.gpt = gpt def forward(self, @@ -943,8 +938,9 @@ class GPTForPretraining(nn.Layer): encoder_outputs, cached_kvs = outputs[:2] else: encoder_outputs = outputs - logits = paddle.matmul( - encoder_outputs, self.output_embeddings.weight, transpose_y=True) + logits = paddle.matmul(encoder_outputs, + self.output_embeddings.weight, + transpose_y=True) if use_cache: return logits, cached_kvs else: diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_parallelizer.py b/python/paddle/fluid/tests/unittests/auto_parallel_parallelizer.py index 3ddd41158a6..7d738d36789 100755 --- a/python/paddle/fluid/tests/unittests/auto_parallel_parallelizer.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_parallelizer.py @@ -33,6 +33,7 @@ _global_process_mesh = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -41,14 +42,18 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train") @@ -70,25 +75,23 @@ def mlp_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - label = static.data( - name="label", shape=[batch_size, sequence_len, 1], dtype='float32') - - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mappig": [-1, -1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, sequence_len, 1], + dtype='float32') + + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mappig": [-1, -1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) predict = mlp(input) @@ -99,6 +102,7 @@ def mlp_pretrain_forward(train_program, start_program): class TestMLPAutoParallelizer(unittest.TestCase): + def test_mlp_serial(self): global _global_process_mesh @@ -116,15 +120,14 @@ class TestMLPAutoParallelizer(unittest.TestCase): train_program = static.Program() start_program = static.Program() - loss, train_program, start_program = mlp_pretrain_forward(train_program, - start_program) - - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + loss, train_program, start_program = mlp_pretrain_forward( + train_program, start_program) + + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py index 35ee4f30da0..12f4cc08b08 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py @@ -39,6 +39,7 @@ PP_MESH_1 = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=64, intermediate_size=4 * 64, @@ -51,52 +52,50 @@ class MLPLayer(nn.Layer): weight_attr = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): if _global_parallel_strategy == "pp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) elif _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -112,43 +111,40 @@ def mlp_forward(train_program, start_program): batch_size = 4 hidden_size = 64 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') if _global_parallel_strategy == "pp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) elif _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "mp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -177,6 +173,7 @@ def get_distributed_program(): class TestMLPSaveLoad(unittest.TestCase): + def setUp(self): paddle.seed(2021) random.seed(2021) diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py index a4c09cd661a..6e097b6335b 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py @@ -38,6 +38,7 @@ def make_v(f, inputs): class TestAutogradFunctional(unittest.TestCase): + @classmethod def setUpClass(cls): cls.RAW_INPUTS = { @@ -55,8 +56,8 @@ class TestAutogradFunctional(unittest.TestCase): def gen_input(self, inp, stop_gradient=False): if isinstance(inp, paddle.Tensor): return inp - return paddle.to_tensor( - self.RAW_INPUTS[inp], stop_gradient=stop_gradient) + return paddle.to_tensor(self.RAW_INPUTS[inp], + stop_gradient=stop_gradient) def gen_inputs(self, inputs): if isinstance(inputs, list): @@ -71,6 +72,7 @@ class TestAutogradFunctional(unittest.TestCase): v=None, create_graph=False, allow_unused=False): + def vjp_test(): nonlocal v xs = self.gen_inputs(inputs) @@ -88,18 +90,16 @@ class TestAutogradFunctional(unittest.TestCase): v = self.gen_inputs(v) outputs = func(*xs) if v is not None: - inputs_grad = paddle.grad( - outputs, - xs, - v, - create_graph=create_graph, - allow_unused=allow_unused) + inputs_grad = paddle.grad(outputs, + xs, + v, + create_graph=create_graph, + allow_unused=allow_unused) else: - inputs_grad = paddle.grad( - outputs, - xs, - create_graph=create_graph, - allow_unused=allow_unused) + inputs_grad = paddle.grad(outputs, + xs, + create_graph=create_graph, + allow_unused=allow_unused) return outputs, inputs_grad return vjp_test, grad_test @@ -110,6 +110,7 @@ class TestAutogradFunctional(unittest.TestCase): v=None, create_graph=False, allow_unused=False): + def jvp_test(): nonlocal v xs = self.gen_inputs(inputs) @@ -147,6 +148,7 @@ class TestAutogradFunctional(unittest.TestCase): class TestVJP(TestAutogradFunctional): + def func_vjp_i1o1(self): test_cases = [ [reduce, 'A'], # noqa @@ -224,14 +226,14 @@ class TestVJP(TestAutogradFunctional): @utils.place(config.DEVICES) @utils.parameterize( - (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), ( - ('v_shape_not_equal_ys', utils.square, np.random.rand(3), - np.random.rand(1), RuntimeError), )) + (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), + (('v_shape_not_equal_ys', utils.square, np.random.rand(3), + np.random.rand(1), RuntimeError), )) class TestVJPException(unittest.TestCase): + def func_vjp(self): with self.assertRaises(self.expected_exception): - paddle.autograd.vjp(self.fun, - paddle.to_tensor(self.xs), + paddle.autograd.vjp(self.fun, paddle.to_tensor(self.xs), paddle.to_tensor(self.v)) def test_all_cases(self): @@ -269,6 +271,7 @@ def jac(grad_fn, f, inputs): class TestJVP(TestAutogradFunctional): + def func_jvp_i1o1(self): test_cases = [ [reduce, 'A'], # noqa @@ -329,17 +332,19 @@ class TestJVP(TestAutogradFunctional): ('3d_in_3d_out', utils.square, np.random.rand(2, 3, 4)), ('single_in_single_out', utils.square, np.random.rand(2, 3)), ('multi_in_single_out', paddle.matmul, - (np.random.rand(2, 2), np.random.rand(2, 2))), )) + (np.random.rand(2, 2), np.random.rand(2, 2))), +)) class TestJacobianClassNoBatch(unittest.TestCase): + def setUp(self): self._dtype = self.xs[0].dtype if isinstance( self.xs, typing.Sequence) else self.xs.dtype - self._eps = config.TOLERANCE.get(str(self._dtype)).get( - "first_order_grad").get("eps") - self._rtol = config.TOLERANCE.get(str(self._dtype)).get( - "first_order_grad").get("rtol") - self._atol = config.TOLERANCE.get(str(self._dtype)).get( - "first_order_grad").get("atol") + self._eps = config.TOLERANCE.get(str( + self._dtype)).get("first_order_grad").get("eps") + self._rtol = config.TOLERANCE.get(str( + self._dtype)).get("first_order_grad").get("rtol") + self._atol = config.TOLERANCE.get(str( + self._dtype)).get("first_order_grad").get("atol") def func_jacobian(self): xs = [paddle.to_tensor(x) for x in self.xs] if isinstance( @@ -359,7 +364,8 @@ class TestJacobianClassNoBatch(unittest.TestCase): self._expected.__getitem__(index.value), rtol=self._rtol, atol=self._atol, - err_msg=f'Testcase {index.type} index not passed, value is {index.value}' + err_msg= + f'Testcase {index.type} index not passed, value is {index.value}' ) def _get_expected(self): @@ -379,17 +385,19 @@ class TestJacobianClassNoBatch(unittest.TestCase): @utils.parameterize((utils.TEST_CASE_NAME, 'func', 'xs'), ( ('1d_in_1d_out', utils.square, np.array([[1., 2., 3.], [3., 4., 3.]])), ('3d_in_3d_out', utils.square, np.random.rand(2, 3, 4)), - ('multi_in_single_out', utils.square, np.random.rand(2, 3)), )) + ('multi_in_single_out', utils.square, np.random.rand(2, 3)), +)) class TestJacobianClassBatchFirst(unittest.TestCase): + def setUp(self): self._dtype = self.xs[0].dtype if isinstance( self.xs, typing.Sequence) else self.xs.dtype - self._eps = config.TOLERANCE.get(str(self._dtype)).get( - "first_order_grad").get("eps") - self._rtol = config.TOLERANCE.get(str(self._dtype)).get( - "first_order_grad").get("rtol") - self._atol = config.TOLERANCE.get(str(self._dtype)).get( - "first_order_grad").get("atol") + self._eps = config.TOLERANCE.get(str( + self._dtype)).get("first_order_grad").get("eps") + self._rtol = config.TOLERANCE.get(str( + self._dtype)).get("first_order_grad").get("rtol") + self._atol = config.TOLERANCE.get(str( + self._dtype)).get("first_order_grad").get("atol") def func_jacobian(self): xs = [paddle.to_tensor(x) for x in self.xs] if isinstance( @@ -398,16 +406,18 @@ class TestJacobianClassBatchFirst(unittest.TestCase): self._expected = self._get_expected() Index = collections.namedtuple('Index', ('type', 'value')) - indexes = ( - Index('all', (slice(0, None, None), slice(0, None, None), - slice(0, None, None))), - Index('row', (slice(0, None, None), 0, slice(0, None, None))), - Index('col', - (slice(0, None, None), slice(0, None, None), 0)), Index( - 'batch', (slice(0, 2, None), slice(0, None, None), - slice(0, None, None))), - Index('multi_row', - (slice(0, 1, None), slice(0, 2, 1), slice(0, None, None)))) + indexes = (Index( + 'all', + (slice(0, None, None), slice(0, None, None), slice(0, None, None))), + Index('row', + (slice(0, None, None), 0, slice(0, None, None))), + Index('col', + (slice(0, None, None), slice(0, None, None), 0)), + Index('batch', (slice(0, 2, None), slice( + 0, None, None), slice(0, None, None))), + Index('multi_row', + (slice(0, 1, None), slice(0, 2, 1), slice( + 0, None, None)))) self.assertEqual(self._actual[:].numpy().dtype, self._expected.dtype) for index in indexes: np.testing.assert_allclose( @@ -415,7 +425,8 @@ class TestJacobianClassBatchFirst(unittest.TestCase): self._expected.__getitem__(index.value), rtol=self._rtol, atol=self._atol, - err_msg=f'Testcase {index.type} index not passed, value is {index.value}' + err_msg= + f'Testcase {index.type} index not passed, value is {index.value}' ) def _get_expected(self): @@ -434,21 +445,23 @@ class TestJacobianClassBatchFirst(unittest.TestCase): class TestHessianClassNoBatch(unittest.TestCase): + @classmethod def setUpClass(self): self.shape = (2, 2) self.dtype = 'float32' self.np_dtype = np.float32 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("eps") - self.rtol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("rtol") - self.atol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("atol") + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("atol") self.x = paddle.rand(shape=self.shape, dtype=self.dtype) self.y = paddle.rand(shape=self.shape, dtype=self.dtype) def func_single_input(self): + def func(x): return paddle.sum(paddle.matmul(x, x)) @@ -462,6 +475,7 @@ class TestHessianClassNoBatch(unittest.TestCase): self.rtol, self.atol) def func_multi_input(self): + def func(x, y): return paddle.sum(paddle.matmul(x, y)) @@ -471,13 +485,13 @@ class TestHessianClassNoBatch(unittest.TestCase): self.x.stop_gradient = False self.y.stop_gradient = False hessian = paddle.autograd.Hessian(func, [self.x, self.y]) - np.testing.assert_allclose( - hessian[:].numpy(), - numerical_hessian, - rtol=self.rtol, - atol=self.atol) + np.testing.assert_allclose(hessian[:].numpy(), + numerical_hessian, + rtol=self.rtol, + atol=self.atol) def func_allow_unused_true(self): + def func(x, y): return paddle.sum(paddle.matmul(x, x)) @@ -507,6 +521,7 @@ class TestHessianClassNoBatch(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_out_not_single(self): + def func(x): return x * x @@ -530,6 +545,7 @@ class TestHessianClassNoBatch(unittest.TestCase): class TestHessianClassBatchFirst(unittest.TestCase): + @classmethod def setUpClass(self): self.x_shape = (5, 2) @@ -538,17 +554,18 @@ class TestHessianClassBatchFirst(unittest.TestCase): self.nbatch, self.nrow = 5, 2 self.dtype = 'float32' self.np_dtype = np.float32 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('eps') - self.rtol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('rtol') - self.atol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('atol') + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('atol') self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) def func_single_input(self): + def func(x): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -556,13 +573,15 @@ class TestHessianClassBatchFirst(unittest.TestCase): func, self.x, self.numerical_delta, self.np_dtype) H = paddle.autograd.Hessian(func, self.x, is_batched=True) - actual = utils._np_transpose_matrix_format( - H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM) + actual = utils._np_transpose_matrix_format(H[:].numpy(), + utils.MatrixFormat.BNM, + utils.MatrixFormat.NBM) actual = actual.reshape((H.shape[1], -1)) np.testing.assert_allclose(actual, expected, self.rtol, self.atol) def func_multi_input(self): + def func(x, y): return paddle.matmul(x * x * y * y, self.weight)[:, 0:1] @@ -578,12 +597,14 @@ class TestHessianClassBatchFirst(unittest.TestCase): self.x.stop_gradient = False self.y.stop_gradient = False H = paddle.autograd.Hessian(func, [self.x, self.y], is_batched=True) - actual = utils._np_transpose_matrix_format( - H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM) + actual = utils._np_transpose_matrix_format(H[:].numpy(), + utils.MatrixFormat.BNM, + utils.MatrixFormat.NBM) np.testing.assert_allclose(actual, expected, self.rtol, self.atol) def func_allow_unused(self): + def func(x, y): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -595,16 +616,20 @@ class TestHessianClassBatchFirst(unittest.TestCase): (xs_len, xs_len, self.nrow, self.nbatch, self.nrow)) expected = [[n for n in row] for row in expected] expected = utils._np_concat_matrix_sequence(expected) - expected = utils._np_transpose_matrix_format( - expected, utils.MatrixFormat.NBM, utils.MatrixFormat.BNM) + expected = utils._np_transpose_matrix_format(expected, + utils.MatrixFormat.NBM, + utils.MatrixFormat.BNM) - actual = paddle.autograd.Hessian( - func, [self.x, self.y], is_batched=True)[:] + actual = paddle.autograd.Hessian(func, [self.x, self.y], + is_batched=True)[:] - np.testing.assert_allclose( - actual, expected, rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(actual, + expected, + rtol=self.rtol, + atol=self.atol) def func_stop_gradient(self): + def func(x): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -614,13 +639,15 @@ class TestHessianClassBatchFirst(unittest.TestCase): x = self.x.clone() x.stop_gradient = True H = paddle.autograd.Hessian(func, self.x, is_batched=True)[:] - actual = utils._np_transpose_matrix_format( - H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM) + actual = utils._np_transpose_matrix_format(H[:].numpy(), + utils.MatrixFormat.BNM, + utils.MatrixFormat.NBM) actual = actual.reshape((H.shape[1], -1)) np.testing.assert_allclose(actual, expected, self.rtol, self.atol) def func_out_not_single(self): + def func(x): return (x * x) @@ -644,27 +671,30 @@ class TestHessianClassBatchFirst(unittest.TestCase): class TestHessian(unittest.TestCase): + @classmethod def setUpClass(self): self.shape = (2, 2) self.dtype = 'float32' self.np_dtype = np.float32 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("eps") - self.rtol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("rtol") - self.atol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("atol") + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("atol") self.x = paddle.rand(shape=self.shape, dtype=self.dtype) self.y = paddle.rand(shape=self.shape, dtype=self.dtype) def func_single_input(self): + def func(x): return paddle.sum(paddle.matmul(x, x)) - numerical_hessian = _compute_numerical_hessian( - func, self.x, self.numerical_delta, self.np_dtype) + numerical_hessian = _compute_numerical_hessian(func, self.x, + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False hessian = paddle.autograd.hessian(func, self.x) @@ -672,11 +702,13 @@ class TestHessian(unittest.TestCase): self.rtol, self.atol) def func_multi_input(self): + def func(x, y): return paddle.sum(paddle.matmul(x, y)) - numerical_hessian = _compute_numerical_hessian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) + numerical_hessian = _compute_numerical_hessian(func, [self.x, self.y], + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False @@ -688,6 +720,7 @@ class TestHessian(unittest.TestCase): self.atol) def func_allow_unused_false(self): + def func(x, y): return paddle.sum(paddle.matmul(x, x)) @@ -700,15 +733,17 @@ class TestHessian(unittest.TestCase): assert error_msg.find("allow_unused") > 0 def func_allow_unused_true(self): + def func(x, y): return paddle.sum(paddle.matmul(x, x)) - numerical_hessian = _compute_numerical_hessian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) + numerical_hessian = _compute_numerical_hessian(func, [self.x, self.y], + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False - hessian = paddle.autograd.hessian( - func, [self.x, self.y], allow_unused=True) + hessian = paddle.autograd.hessian(func, [self.x, self.y], + allow_unused=True) for i in range(len(hessian)): for j in range(len(hessian[0])): if i == j == 0: @@ -719,11 +754,13 @@ class TestHessian(unittest.TestCase): assert hessian[i][j] is None def func_create_graph_false(self): + def func(x): return paddle.sum(paddle.matmul(x, x)) - numerical_hessian = _compute_numerical_hessian( - func, self.x, self.numerical_delta, self.np_dtype) + numerical_hessian = _compute_numerical_hessian(func, self.x, + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False hessian = paddle.autograd.hessian(func, self.x) assert hessian.stop_gradient == True @@ -742,8 +779,9 @@ class TestHessian(unittest.TestCase): def func(x): return paddle.sum(F.sigmoid(x)) - numerical_hessian = _compute_numerical_hessian( - func, self.x, self.numerical_delta, self.np_dtype) + numerical_hessian = _compute_numerical_hessian(func, self.x, + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False hessian = paddle.autograd.hessian(func, self.x, create_graph=True) assert hessian.stop_gradient == False @@ -772,22 +810,24 @@ class TestHessian(unittest.TestCase): class TestHessianFloat64(TestHessian): + @classmethod def setUpClass(self): self.shape = (2, 2) self.dtype = 'float64' self.np_dtype = np.float64 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("eps") - self.rtol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("rtol") - self.atol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("atol") + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("atol") self.x = paddle.rand(shape=self.shape, dtype=self.dtype) self.y = paddle.rand(shape=self.shape, dtype=self.dtype) class TestBatchHessian(unittest.TestCase): + @classmethod def setUpClass(self): self.x_shape = (5, 2) @@ -795,17 +835,18 @@ class TestBatchHessian(unittest.TestCase): self.y_shape = (5, 2) self.dtype = 'float32' self.np_dtype = np.float32 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("eps") - self.rtol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("rtol") - self.atol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("atol") + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("atol") self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) def func_single_input(self): + def func(x): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -817,6 +858,7 @@ class TestBatchHessian(unittest.TestCase): self.atol) def func_multi_input(self): + def func(x, y): return paddle.matmul(x * x * y * y, self.weight)[:, 0:1] @@ -833,6 +875,7 @@ class TestBatchHessian(unittest.TestCase): self.rtol, self.atol) def func_allow_unused_false(self): + def func(x, y): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -845,6 +888,7 @@ class TestBatchHessian(unittest.TestCase): assert error_msg.find("allow_unused") > 0 def func_allow_unused_true(self): + def func(x, y): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -852,8 +896,8 @@ class TestBatchHessian(unittest.TestCase): func, [self.x, self.y], self.numerical_delta, self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False - hessian = paddle.autograd.batch_hessian( - func, [self.x, self.y], allow_unused=True) + hessian = paddle.autograd.batch_hessian(func, [self.x, self.y], + allow_unused=True) for i in range(len(hessian)): for j in range(len(hessian[0])): @@ -867,6 +911,7 @@ class TestBatchHessian(unittest.TestCase): assert hessian[i][j] is None def func_create_graph_false(self): + def func(x): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -885,6 +930,7 @@ class TestBatchHessian(unittest.TestCase): "does not appear") > 0 def func_create_graph_true(self): + def func(x): return paddle.matmul(x * x, self.weight)[:, 0:1] @@ -917,6 +963,7 @@ class TestBatchHessian(unittest.TestCase): class TestBatchHessianFloat64(TestBatchHessian): + @classmethod def setUpClass(self): self.x_shape = (5, 2) @@ -924,41 +971,44 @@ class TestBatchHessianFloat64(TestBatchHessian): self.y_shape = (5, 2) self.dtype = 'float64' self.np_dtype = np.float64 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("eps") - self.rtol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("rtol") - self.atol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("atol") + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("atol") self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) class TestVHP(unittest.TestCase): + @classmethod def setUpClass(self): self.shape = (2, 2) self.dtype = 'float32' self.np_dtype = np.float32 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("eps") - self.rtol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("rtol") - self.atol = config.TOLERANCE.get(self.dtype).get( - "second_order_grad").get("atol") + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get( + self.dtype).get("second_order_grad").get("atol") self.x = paddle.rand(shape=self.shape, dtype=self.dtype) self.y = paddle.rand(shape=self.shape, dtype=self.dtype) self.vx = paddle.rand(shape=self.shape, dtype=self.dtype) self.vy = paddle.rand(shape=self.shape, dtype=self.dtype) def func_single_input(self): + def func(x): return paddle.sum(paddle.matmul(x, x)) numerical_func_output = func(self.x).numpy() - numerical_vhp = _compute_numerical_vhp( - func, self.x, self.vx, self.numerical_delta, self.np_dtype) + numerical_vhp = _compute_numerical_vhp(func, self.x, self.vx, + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx) @@ -968,13 +1018,15 @@ class TestVHP(unittest.TestCase): self.atol) def func_multi_input(self): + def func(x, y): return paddle.sum(paddle.matmul(x, y)) numerical_func_output = func(self.x, self.y).numpy() - numerical_vhp = _compute_numerical_vhp( - func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta, - self.np_dtype) + numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y], + [self.vx, self.vy], + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False @@ -987,14 +1039,15 @@ class TestVHP(unittest.TestCase): self.rtol, self.atol) def func_v_default(self): + def func(x, y): return paddle.sum(paddle.matmul(x, y)) numerical_func_output = func(self.x, self.y).numpy() vx = paddle.ones(self.vx.shape, dtype=self.vx.dtype) vy = paddle.ones(self.vy.shape, dtype=self.vy.dtype) - numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y], - [vx, vy], self.numerical_delta, + numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y], [vx, vy], + self.numerical_delta, self.np_dtype) self.x.stop_gradient = False @@ -1007,13 +1060,15 @@ class TestVHP(unittest.TestCase): self.rtol, self.atol) def func_allow_unused_true(self): + def func(x, y): return paddle.sum(paddle.matmul(x, x)) numerical_func_output = func(self.x, self.y).numpy() - numerical_vhp = _compute_numerical_vhp( - func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta, - self.np_dtype) + numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y], + [self.vx, self.vy], + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False @@ -1031,8 +1086,9 @@ class TestVHP(unittest.TestCase): return paddle.sum(F.sigmoid(x)) numerical_func_output = func(self.x).numpy() - numerical_vhp = _compute_numerical_vhp( - func, self.x, self.vx, self.numerical_delta, self.np_dtype) + numerical_vhp = _compute_numerical_vhp(func, self.x, self.vx, + self.numerical_delta, + self.np_dtype) self.x.stop_gradient = False func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx) @@ -1062,6 +1118,7 @@ class TestVHP(unittest.TestCase): class TestJacobian(unittest.TestCase): + @classmethod def setUpClass(self): self.shape = (4, 4) @@ -1074,6 +1131,7 @@ class TestJacobian(unittest.TestCase): self.y = paddle.rand(shape=self.shape, dtype=self.dtype) def func_single_input_and_single_output(self): + def func(x): return paddle.matmul(x, x) @@ -1085,6 +1143,7 @@ class TestJacobian(unittest.TestCase): self.rtol, self.atol) def func_single_input_and_multi_output(self): + def func(x): return paddle.matmul(x, x), x * x @@ -1098,6 +1157,7 @@ class TestJacobian(unittest.TestCase): self.atol) def func_multi_input_and_single_output(self): + def func(x, y): return paddle.matmul(x, y) @@ -1130,6 +1190,7 @@ class TestJacobian(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_allow_unused_false(self): + def func(x, y): return paddle.matmul(x, x) @@ -1142,6 +1203,7 @@ class TestJacobian(unittest.TestCase): assert error_msg.find("allow_unused") > 0 def func_allow_unused_true(self): + def func(x, y): return paddle.matmul(x, x) @@ -1149,13 +1211,15 @@ class TestJacobian(unittest.TestCase): func, [self.x, self.y], self.numerical_delta, self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian( - func, [self.x, self.y], allow_unused=True) - np.testing.assert_allclose( - jacobian[0].numpy(), numerical_jacobian[0][0], self.rtol, self.atol) + jacobian = paddle.autograd.jacobian(func, [self.x, self.y], + allow_unused=True) + np.testing.assert_allclose(jacobian[0].numpy(), + numerical_jacobian[0][0], self.rtol, + self.atol) assert jacobian[1] is None def func_create_graph_false(self): + def func(x, y): return paddle.matmul(x, y) @@ -1177,6 +1241,7 @@ class TestJacobian(unittest.TestCase): "does not appear") > 0 def func_create_graph_true(self): + def func(x, y): return paddle.matmul(x, y) @@ -1184,8 +1249,8 @@ class TestJacobian(unittest.TestCase): func, [self.x, self.y], self.numerical_delta, self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian( - func, [self.x, self.y], create_graph=True) + jacobian = paddle.autograd.jacobian(func, [self.x, self.y], + create_graph=True) for j in range(len(jacobian)): assert jacobian[j].stop_gradient == False np.testing.assert_allclose(jacobian[j].numpy(), @@ -1217,6 +1282,7 @@ class TestJacobian(unittest.TestCase): class TestJacobianFloat64(TestJacobian): + @classmethod def setUpClass(self): self.shape = (4, 4) @@ -1230,6 +1296,7 @@ class TestJacobianFloat64(TestJacobian): class TestJacobianBatch(unittest.TestCase): + @classmethod def setUpClass(self): self.x_shape = (4, 2) @@ -1245,6 +1312,7 @@ class TestJacobianBatch(unittest.TestCase): self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) def func_batch_single_input_and_batch_single_output(self): + def func(x): return paddle.matmul(paddle.matmul(x, self.weight), self.y) @@ -1254,13 +1322,15 @@ class TestJacobianBatch(unittest.TestCase): self.x.stop_gradient = False batch_jacobian = paddle.autograd.batch_jacobian( func, - self.x, ) + self.x, + ) self.assertTrue( - np.allclose(batch_jacobian.numpy().all(), numerical_jacobian[0][0] - .all())) + np.allclose(batch_jacobian.numpy().all(), + numerical_jacobian[0][0].all())) def func_batch_single_input_and_batch_multi_output(self): + def func(x): return paddle.matmul(paddle.matmul(x, self.weight), self.y), x * x @@ -1270,7 +1340,8 @@ class TestJacobianBatch(unittest.TestCase): self.x.stop_gradient = False batch_jacobian = paddle.autograd.batch_jacobian( func, - self.x, ) + self.x, + ) for i in range(len(batch_jacobian)): np.testing.assert_allclose(batch_jacobian[i].numpy(), @@ -1278,6 +1349,7 @@ class TestJacobianBatch(unittest.TestCase): self.atol) def func_batch_multi_input_and_batch_single_output(self): + def func(x, y): return x * y @@ -1294,6 +1366,7 @@ class TestJacobianBatch(unittest.TestCase): self.atol) def func_batch_multi_input_and_batch_multi_output(self): + def func(x, y): return x * y, x * y @@ -1309,6 +1382,7 @@ class TestJacobianBatch(unittest.TestCase): self.rtol, self.atol) def func_allow_unused_false(self): + def func(x, y): return x * x @@ -1321,6 +1395,7 @@ class TestJacobianBatch(unittest.TestCase): assert error_msg.find("allow_unused") > 0 def func_allow_unused_true(self): + def func(x, y): return x * x @@ -1328,14 +1403,16 @@ class TestJacobianBatch(unittest.TestCase): func, [self.x, self.y], self.numerical_delta, self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False - jacobian = paddle.autograd.batch_jacobian( - func, [self.x, self.y], allow_unused=True) + jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y], + allow_unused=True) - np.testing.assert_allclose( - jacobian[0].numpy(), numerical_jacobian[0][0], self.rtol, self.atol) + np.testing.assert_allclose(jacobian[0].numpy(), + numerical_jacobian[0][0], self.rtol, + self.atol) assert jacobian[1] is None def func_create_graph_false(self): + def func(x, y): return x * y @@ -1357,6 +1434,7 @@ class TestJacobianBatch(unittest.TestCase): "does not appear") > 0 def func_create_graph_true(self): + def func(x, y): return x * y @@ -1364,8 +1442,8 @@ class TestJacobianBatch(unittest.TestCase): func, [self.x, self.y], self.numerical_delta, self.np_dtype) self.x.stop_gradient = False self.y.stop_gradient = False - jacobian = paddle.autograd.batch_jacobian( - func, [self.x, self.y], create_graph=True) + jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y], + create_graph=True) for j in range(len(jacobian)): assert jacobian[j].stop_gradient == False np.testing.assert_allclose(jacobian[j].numpy(), @@ -1397,6 +1475,7 @@ class TestJacobianBatch(unittest.TestCase): class TestJacobianBatchFloat64(TestJacobianBatch): + @classmethod def setUpClass(self): self.x_shape = (12, 2) @@ -1404,12 +1483,12 @@ class TestJacobianBatchFloat64(TestJacobianBatch): self.y_shape = (12, 2) self.dtype = 'float64' self.np_dtype = np.float64 - self.numerical_delta = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('eps') - self.rtol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('rtol') - self.atol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('atol') + self.numerical_delta = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('atol') self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py index 8801664fdca..06d3bb5eb24 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -32,24 +32,26 @@ paddle.enable_static() @utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'stop_gradient'), ( ('tensor_input', utils.reduce, np.random.rand(2, 3), None, False), ('tensor_sequence_input', utils.reduce, np.random.rand(2, 3), None, False), - ('v_not_none', utils.reduce, np.random.rand(2, 3), np.random.rand(1), - False), - ('xs_stop_gradient', utils.reduce, np.random.rand(2, 3), np.random.rand(1), - True), - ('func_mutmul', utils.matmul, (np.random.rand(3, 2), np.random.rand(2, 3)), - None, False), - ('func_mul', utils.mul, (np.random.rand(3, 3), np.random.rand(3, 3)), None, - False), - ('func_out_two', utils.o2, (np.random.rand(10), np.random.rand(10)), None, - False), )) + ('v_not_none', utils.reduce, np.random.rand(2, + 3), np.random.rand(1), False), + ('xs_stop_gradient', utils.reduce, np.random.rand( + 2, 3), np.random.rand(1), True), + ('func_mutmul', utils.matmul, + (np.random.rand(3, 2), np.random.rand(2, 3)), None, False), + ('func_mul', utils.mul, + (np.random.rand(3, 3), np.random.rand(3, 3)), None, False), + ('func_out_two', utils.o2, + (np.random.rand(10), np.random.rand(10)), None, False), +)) class TestVJP(unittest.TestCase): + def setUp(self): self.dtype = str(self.xs[0].dtype) if isinstance( self.xs, typing.Sequence) else str(self.xs.dtype) - self._rtol = config.TOLERANCE.get(str(self.dtype)).get( - "first_order_grad").get("rtol") - self._atol = config.TOLERANCE.get(str(self.dtype)).get( - "first_order_grad").get("atol") + self._rtol = config.TOLERANCE.get(str( + self.dtype)).get("first_order_grad").get("rtol") + self._atol = config.TOLERANCE.get(str( + self.dtype)).get("first_order_grad").get("atol") def _vjp(self): exe = paddle.static.Executor() @@ -67,8 +69,8 @@ class TestVJP(unittest.TestCase): sp = paddle.static.Program() mp = paddle.static.Program() with paddle.static.program_guard(mp, sp): - feed, static_xs, static_v = gen_static_data_and_feed(self.xs, - self.v, False) + feed, static_xs, static_v = gen_static_data_and_feed( + self.xs, self.v, False) ys = self.fun(*static_xs) if isinstance( static_xs, typing.Sequence) else self.fun(static_xs) xs_grads = paddle.static.gradients(ys, static_xs, static_v) @@ -80,16 +82,19 @@ class TestVJP(unittest.TestCase): expected = self._expected_vjp() self.assertEqual(len(actual), len(expected)) for i in range(len(actual)): - np.testing.assert_allclose( - actual[i], expected[i], rtol=self._rtol, atol=self._atol) + np.testing.assert_allclose(actual[i], + expected[i], + rtol=self._rtol, + atol=self._atol) @utils.place(config.DEVICES) @utils.parameterize( - (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), ( - ('v_shape_not_equal_ys', utils.square, np.random.rand(3), - np.random.rand(1), RuntimeError), )) + (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), + (('v_shape_not_equal_ys', utils.square, np.random.rand(3), + np.random.rand(1), RuntimeError), )) class TestVJPException(unittest.TestCase): + def setUp(self): self.exe = paddle.static.Executor() @@ -97,8 +102,8 @@ class TestVJPException(unittest.TestCase): sp = paddle.static.Program() mp = paddle.static.Program() with paddle.static.program_guard(mp, sp): - feed, static_xs, static_v = gen_static_data_and_feed(self.xs, - self.v) + feed, static_xs, static_v = gen_static_data_and_feed( + self.xs, self.v) ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v) self.exe.run(sp) return self.exe.run(mp, feed, fetch_list=[ys, xs_grads]) @@ -194,8 +199,7 @@ def approx_jacobian(f, xs, dtype, eps=1e-5, batch=False): def make_tensors(inps): if isinstance(inps, list): xs = [ - paddle.static.data( - f'x{i}', inp.shape, dtype=inp.dtype) + paddle.static.data(f'x{i}', inp.shape, dtype=inp.dtype) for i, inp in enumerate(inps) ] else: @@ -218,6 +222,7 @@ def prepare_data(test, input_shapes, dtype): class TestJacobianFloat32(unittest.TestCase): + @classmethod def setUpClass(self): paddle.enable_static() @@ -228,8 +233,8 @@ class TestJacobianFloat32(unittest.TestCase): self.dtype = 'float32' self.np_dtype = np.float32 prepare_data(self, all_data_shapes, self.dtype) - self.eps = config.TOLERANCE.get(self.dtype).get('first_order_grad').get( - 'eps') + self.eps = config.TOLERANCE.get( + self.dtype).get('first_order_grad').get('eps') # self.rtol = config.TOLERANCE.get(self.dtype).get('first_order_grad').get('rtol') # self.atol = config.TOLERANCE.get(self.dtype).get('first_order_grad').get('atol') # Do't use tolerance in config, which will cause this test case failed. @@ -254,8 +259,11 @@ class TestJacobianFloat32(unittest.TestCase): else: feeds = {'x': inps} pd_jacobians = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0] - np_jacobians = approx_jacobian( - np_f, inps, self.dtype, self.eps, batch=batch) + np_jacobians = approx_jacobian(np_f, + inps, + self.dtype, + self.eps, + batch=batch) if batch: np_jacobians = utils._np_transpose_matrix_format( np_jacobians, utils.MatrixFormat.NBM, utils.MatrixFormat.BNM) @@ -317,6 +325,7 @@ class TestJacobianFloat32(unittest.TestCase): np.testing.assert_allclose(pd_entry, np_entry, self.rtol, self.atol) def test_square(self): + def pd_f(x): return paddle.multiply(x, x) @@ -328,6 +337,7 @@ class TestJacobianFloat32(unittest.TestCase): self.run_test_by_entries(pd_f, np_f, self.A) def test_mul(self): + def pd_f(x, y): return paddle.multiply(x, y) @@ -338,11 +348,13 @@ class TestJacobianFloat32(unittest.TestCase): self.run_test_by_fullmatrix( pd_f, np_f, - [self.B, self.C], ) + [self.B, self.C], + ) self.run_test_by_rows(pd_f, np_f, [self.B, self.C]) self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) def test_matmul(self): + def pd_f(x, y): return paddle.matmul(x, y) @@ -355,6 +367,7 @@ class TestJacobianFloat32(unittest.TestCase): self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) def test_batch_matmul(self): + def pd_f(x, y): return paddle.matmul(x, y) @@ -368,6 +381,7 @@ class TestJacobianFloat32(unittest.TestCase): class TestJacobianFloat64(TestJacobianFloat32): + @classmethod def setUpClass(self): paddle.enable_static() @@ -377,15 +391,16 @@ class TestJacobianFloat64(TestJacobianFloat32): self.place = fluid.CPUPlace() self.dtype = 'float64' prepare_data(self, all_data_shapes, self.dtype) - self.eps = config.TOLERANCE.get(self.dtype).get('first_order_grad').get( - 'eps') - self.rtol = config.TOLERANCE.get(self.dtype).get( - 'first_order_grad').get('rtol') - self.atol = config.TOLERANCE.get(self.dtype).get( - 'first_order_grad').get('atol') + self.eps = config.TOLERANCE.get( + self.dtype).get('first_order_grad').get('eps') + self.rtol = config.TOLERANCE.get( + self.dtype).get('first_order_grad').get('rtol') + self.atol = config.TOLERANCE.get( + self.dtype).get('first_order_grad').get('atol') class TestHessianFloat32(unittest.TestCase): + @classmethod def setUpClass(self): paddle.enable_static() @@ -395,12 +410,12 @@ class TestHessianFloat32(unittest.TestCase): self.place = fluid.CPUPlace() self.dtype = 'float32' prepare_data(self, all_data_shapes, self.dtype) - self.eps = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('eps') - self.rtol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('rtol') - self.atol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('atol') + self.eps = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('atol') def run_test_by_fullmatrix(self, pd_f, inps, np_hess, batch=False): main = fluid.Program() @@ -420,6 +435,7 @@ class TestHessianFloat32(unittest.TestCase): np.testing.assert_allclose(pd_hess, np_hess, self.rtol, self.atol) def test_square(self): + def pd_f(x): """Input is a square matrix.""" return paddle.matmul(x, x.T).flatten().sum() @@ -434,6 +450,7 @@ class TestHessianFloat32(unittest.TestCase): class TestHessianFloat64(TestHessianFloat32): + @classmethod def setUpClass(self): paddle.enable_static() @@ -443,12 +460,12 @@ class TestHessianFloat64(TestHessianFloat32): self.place = fluid.CPUPlace() self.dtype = 'float64' prepare_data(self, all_data_shapes, self.dtype) - self.eps = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('eps') - self.rtol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('rtol') - self.atol = config.TOLERANCE.get(self.dtype).get( - 'second_order_grad').get('atol') + self.eps = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get( + self.dtype).get('second_order_grad').get('atol') if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/autograd/test_gradients_and_minimize.py b/python/paddle/fluid/tests/unittests/autograd/test_gradients_and_minimize.py index 092ddb4094d..67ebe01d9f0 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_gradients_and_minimize.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_gradients_and_minimize.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestGradients(unittest.TestCase): + def test_third_order(self): enable_prim() main = paddle.static.Program() @@ -71,7 +72,9 @@ class TestGradients(unittest.TestCase): prim2orig(main.block(0)) - feed = {x.name: np.array([2.]).astype('float32'), } + feed = { + x.name: np.array([2.]).astype('float32'), + } fetch_list = [grad4.name] # (3*(-5*x^2-16*x-16))/(16*(x+1)^3.5) result = [np.array([-0.27263762711])] @@ -87,6 +90,7 @@ class TestGradients(unittest.TestCase): class TestMinimize(unittest.TestCase): + def model(self, x, w, bias, opt): paddle.seed(0) place = paddle.CPUPlace() @@ -98,10 +102,12 @@ class TestMinimize(unittest.TestCase): with paddle.static.program_guard(main, startup): input_x = paddle.static.data('x', x.shape, dtype=x.dtype) input_x.stop_gradient = False - params_w = paddle.static.create_parameter( - shape=w.shape, dtype=w.dtype, is_bias=False) - params_bias = paddle.static.create_parameter( - shape=bias.shape, dtype=bias.dtype, is_bias=True) + params_w = paddle.static.create_parameter(shape=w.shape, + dtype=w.dtype, + is_bias=False) + params_bias = paddle.static.create_parameter(shape=bias.shape, + dtype=bias.dtype, + is_bias=True) y = paddle.tanh(paddle.matmul(input_x, params_w) + params_bias) loss = paddle.norm(y, p=2) opt = opt @@ -110,9 +116,11 @@ class TestMinimize(unittest.TestCase): prim2orig(main.block(0)) exe.run(startup) grads = exe.run(main, - feed={'x': x, - 'w': w, - 'bias': bias}, + feed={ + 'x': x, + 'w': w, + 'bias': bias + }, fetch_list=grads) return grads diff --git a/python/paddle/fluid/tests/unittests/autograd/test_jvp_and_transpose.py b/python/paddle/fluid/tests/unittests/autograd/test_jvp_and_transpose.py index d6ff931a936..f99bb9074c9 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_jvp_and_transpose.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_jvp_and_transpose.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ paddle.enable_static() ############################ Test linearize rules ############################ class TestAddPJVPAndTranspose(unittest.TestCase): + def setUp(self): self.main_program = paddle.static.Program() self.startup_program = paddle.static.Program() @@ -68,11 +69,10 @@ class TestAddPJVPAndTranspose(unittest.TestCase): def test_op(self): with paddle.static.program_guard(self.main_program, self.startup_program): - op = self.layer_help.append_op( - type=self.op_type, - inputs=self.prim_input, - outputs=self.prim_output, - attrs=self.prim_attrs) + op = self.layer_help.append_op(type=self.op_type, + inputs=self.prim_input, + outputs=self.prim_output, + attrs=self.prim_attrs) jvp_out = _jvp(op, *self.jvp_args) jvp_out = flatten(jvp_out) @@ -91,6 +91,7 @@ class TestAddPJVPAndTranspose(unittest.TestCase): class TestSubPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'sub_p' @@ -127,6 +128,7 @@ class TestSubPJVPAndTranspose(TestAddPJVPAndTranspose): class TestMulPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'mul_p' @@ -149,7 +151,9 @@ class TestMulPJVPAndTranspose(TestAddPJVPAndTranspose): check_dot = lambda v: v is X Z_BAR = paddle.static.data(name='Z_BAR', shape=[5, 6], dtype='int64') self.transpose_args = (check_dot, Z_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -164,6 +168,7 @@ class TestMulPJVPAndTranspose(TestAddPJVPAndTranspose): class TestDivPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'div_p' @@ -186,7 +191,9 @@ class TestDivPJVPAndTranspose(TestAddPJVPAndTranspose): check_dot = lambda v: v is X Z_BAR = paddle.static.data(name='Z_BAR', shape=[5, 6], dtype='int64') self.transpose_args = (check_dot, Z_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -203,11 +210,14 @@ class TestDivPJVPAndTranspose(TestAddPJVPAndTranspose): class TestSqrtPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'sqrt_p' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -232,11 +242,14 @@ class TestSqrtPJVPAndTranspose(TestAddPJVPAndTranspose): class TestTanhPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'tanh_p' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -261,11 +274,14 @@ class TestTanhPJVPAndTranspose(TestAddPJVPAndTranspose): class TestReshapePJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'reshape_p' X = paddle.static.data(name='X', shape=[8, 8], dtype='int64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -281,7 +297,9 @@ class TestReshapePJVPAndTranspose(TestAddPJVPAndTranspose): check_dot = lambda v: v is X Y_BAR = paddle.static.data(name='Y_BAR', shape=[2, 32], dtype='int64') self.transpose_args = (check_dot, Y_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -294,11 +312,14 @@ class TestReshapePJVPAndTranspose(TestAddPJVPAndTranspose): class TestBroadcastPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'broadcast_p' X = paddle.static.data(name='X', shape=[10, 1], dtype='int64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -312,10 +333,13 @@ class TestBroadcastPJVPAndTranspose(TestAddPJVPAndTranspose): # Set transpose check_dot = lambda v: v is X - Y_BAR = paddle.static.data( - name='Y_BAR', shape=[2, 10, 7], dtype='int64') + Y_BAR = paddle.static.data(name='Y_BAR', + shape=[2, 10, 7], + dtype='int64') self.transpose_args = (check_dot, Y_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -329,11 +353,14 @@ class TestBroadcastPJVPAndTranspose(TestAddPJVPAndTranspose): class TestTransposePJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'transpose_p' X = paddle.static.data(name='X', shape=[2, 3, 4, 5], dtype='int64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -341,17 +368,21 @@ class TestTransposePJVPAndTranspose(TestAddPJVPAndTranspose): self.prim_attrs = {'axis': [0, 2, 3, 1]} # Set JVP - X_DOT = paddle.static.data( - name='X_DOT', shape=[2, 3, 4, 5], dtype='int64') + X_DOT = paddle.static.data(name='X_DOT', + shape=[2, 3, 4, 5], + dtype='int64') self.jvp_args = (X_DOT, ) self.jvp_out_shape_map = {0: self.prim_output['Y']} # Set transpose check_dot = lambda v: v is X - Y_BAR = paddle.static.data( - name='Y_BAR', shape=[2, 4, 5, 3], dtype='int64') + Y_BAR = paddle.static.data(name='Y_BAR', + shape=[2, 4, 5, 3], + dtype='int64') self.transpose_args = (check_dot, Y_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -364,11 +395,14 @@ class TestTransposePJVPAndTranspose(TestAddPJVPAndTranspose): class TestSplitPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'split_p' X = paddle.static.data(name='X', shape=[2, 7, 10], dtype='int64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'YS': [ self.layer_help.create_variable_for_type_inference( @@ -378,8 +412,9 @@ class TestSplitPJVPAndTranspose(TestAddPJVPAndTranspose): self.prim_attrs = {'num_or_sections': [2, 3, 4, 1], 'axis': 2} # Set JVP - X_DOT = paddle.static.data( - name='X_DOT', shape=[2, 7, 10], dtype='int64') + X_DOT = paddle.static.data(name='X_DOT', + shape=[2, 7, 10], + dtype='int64') self.jvp_args = (X_DOT, ) self.jvp_out_shape_map = { 0: self.prim_output['YS'][0], @@ -391,17 +426,15 @@ class TestSplitPJVPAndTranspose(TestAddPJVPAndTranspose): # Set transpose check_dot = lambda v: v is X YS_BAR = [ - paddle.static.data( - name='Y_BAR1', shape=[2, 7, 2], dtype='int64'), - paddle.static.data( - name='Y_BAR2', shape=[2, 7, 3], dtype='int64'), - paddle.static.data( - name='Y_BAR3', shape=[2, 7, 4], dtype='int64'), - paddle.static.data( - name='Y_BAR4', shape=[2, 7, 1], dtype='int64'), + paddle.static.data(name='Y_BAR1', shape=[2, 7, 2], dtype='int64'), + paddle.static.data(name='Y_BAR2', shape=[2, 7, 3], dtype='int64'), + paddle.static.data(name='Y_BAR3', shape=[2, 7, 4], dtype='int64'), + paddle.static.data(name='Y_BAR4', shape=[2, 7, 1], dtype='int64'), ] self.transpose_args = (check_dot, YS_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -414,13 +447,16 @@ class TestSplitPJVPAndTranspose(TestAddPJVPAndTranspose): class TestConcatPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'concat_p' X = paddle.static.data(name='X', shape=[3, 9, 5], dtype='float64') Y = paddle.static.data(name='Y', shape=[3, 2, 5], dtype='float64') Z = paddle.static.data(name='Z', shape=[3, 3, 5], dtype='float64') - self.prim_input = {'XS': [X, Y, Z], } + self.prim_input = { + 'XS': [X, Y, Z], + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -429,20 +465,18 @@ class TestConcatPJVPAndTranspose(TestAddPJVPAndTranspose): # Set JVP XS_DOT = [ - paddle.static.data( - name='X_DOT1', shape=[3, 9, 5], dtype='float64'), - paddle.static.data( - name='X_DOT2', shape=[3, 2, 5], dtype='float64'), - paddle.static.data( - name='X_DOT3', shape=[3, 3, 5], dtype='float64'), + paddle.static.data(name='X_DOT1', shape=[3, 9, 5], dtype='float64'), + paddle.static.data(name='X_DOT2', shape=[3, 2, 5], dtype='float64'), + paddle.static.data(name='X_DOT3', shape=[3, 3, 5], dtype='float64'), ] self.jvp_args = (XS_DOT, ) self.jvp_out_shape_map = {0: self.prim_output['Y']} # Set transpose check_dot = lambda v: v is X or v is Y or v is Z - Y_BAR = paddle.static.data( - name='Y_BAR', shape=[3, 14, 5], dtype='float64') + Y_BAR = paddle.static.data(name='Y_BAR', + shape=[3, 14, 5], + dtype='float64') self.transpose_args = (check_dot, Y_BAR) self.transpose_out_shape_map = { 0: X, @@ -461,6 +495,7 @@ class TestConcatPJVPAndTranspose(TestAddPJVPAndTranspose): class TestReducePJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'reduce_p' @@ -473,17 +508,21 @@ class TestReducePJVPAndTranspose(TestAddPJVPAndTranspose): self.prim_attrs = {'axis': [2], 'keepdim': False} # Set JVP - X_DOT = paddle.static.data( - name='X_DOT1', shape=[2, 3, 4, 5], dtype='float64') + X_DOT = paddle.static.data(name='X_DOT1', + shape=[2, 3, 4, 5], + dtype='float64') self.jvp_args = (X_DOT, ) self.jvp_out_shape_map = {0: self.prim_output['Y']} # Set transpose check_dot = lambda v: v is X - Y_BAR = paddle.static.data( - name='Y_BAR', shape=[2, 3, 5], dtype='float64') + Y_BAR = paddle.static.data(name='Y_BAR', + shape=[2, 3, 5], + dtype='float64') self.transpose_args = (check_dot, Y_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -497,6 +536,7 @@ class TestReducePJVPAndTranspose(TestAddPJVPAndTranspose): class TestMatmulPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'matmul_p' @@ -519,7 +559,9 @@ class TestMatmulPJVPAndTranspose(TestAddPJVPAndTranspose): check_dot = lambda v: v is X Z_BAR = paddle.static.data(name='Z_BAR', shape=[2, 4], dtype='float64') self.transpose_args = (check_dot, Z_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -535,11 +577,14 @@ class TestMatmulPJVPAndTranspose(TestAddPJVPAndTranspose): class TestSliceSelectPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'slice_select_p' X = paddle.static.data(name='X', shape=[3, 20], dtype='float64') - self.prim_input = {'X': X, } + self.prim_input = { + 'X': X, + } self.prim_output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -560,7 +605,9 @@ class TestSliceSelectPJVPAndTranspose(TestAddPJVPAndTranspose): check_dot = lambda v: v is X Y_BAR = paddle.static.data(name='Y_BAR', shape=[3, 10], dtype='float64') self.transpose_args = (check_dot, Y_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -574,6 +621,7 @@ class TestSliceSelectPJVPAndTranspose(TestAddPJVPAndTranspose): class TestSliceAssignPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'slice_assign_p' @@ -616,12 +664,14 @@ class TestSliceAssignPJVPAndTranspose(TestAddPJVPAndTranspose): class TestGatherPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'gather_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') - IndexTensor = paddle.static.data( - name='IndexTensor', shape=[3], dtype='int32') + IndexTensor = paddle.static.data(name='IndexTensor', + shape=[3], + dtype='int32') self.prim_input = {'X': X, 'IndexTensor': IndexTensor} self.prim_output = { 'Y': @@ -633,14 +683,17 @@ class TestGatherPJVPAndTranspose(TestAddPJVPAndTranspose): X_DOT = paddle.static.data(name='X_DOT', shape=[9, 5], dtype='float64') self.jvp_args = ( X_DOT, - IndexTensor, ) + IndexTensor, + ) self.jvp_out_shape_map = {0: self.prim_output['Y']} # Set transpose check_dot = lambda v: v is X Y_BAR = paddle.static.data(name='Y_BAR', shape=[9, 3], dtype='float64') self.transpose_args = (check_dot, Y_BAR) - self.transpose_out_shape_map = {0: X, } + self.transpose_out_shape_map = { + 0: X, + } self.all_ops = [ # prim op: @@ -654,13 +707,15 @@ class TestGatherPJVPAndTranspose(TestAddPJVPAndTranspose): class TestScatterAddPJVPAndTranspose(TestAddPJVPAndTranspose): + def init_data(self): # Set prim op self.op_type = 'scatter_add_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') Y = paddle.static.data(name='Y', shape=[9, 3], dtype='float64') - IndexTensor = paddle.static.data( - name='IndexTensor', shape=[3], dtype='int32') + IndexTensor = paddle.static.data(name='IndexTensor', + shape=[3], + dtype='int32') self.prim_input = {'X': X, 'Y': Y, 'IndexTensor': IndexTensor} self.prim_output = { 'Z': diff --git a/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py b/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py index 24c8febccf5..924292c4a4a 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ paddle.enable_static() ############################ Test orig2prim rules ############################ class TestElementWiseAddOrig2Prim(unittest.TestCase): + def setUp(self): self.main_program = paddle.static.Program() self.startup_program = paddle.static.Program() @@ -53,11 +54,10 @@ class TestElementWiseAddOrig2Prim(unittest.TestCase): def test_op(self): with paddle.static.program_guard(self.main_program, self.startup_program): - op = self.layer_help.append_op( - type=self.op_type, - inputs=self.input, - outputs=self.output, - attrs=self.attrs) + op = self.layer_help.append_op(type=self.op_type, + inputs=self.input, + outputs=self.output, + attrs=self.attrs) prim_out = _orig2prim(op, *self.orig2prim_args) all_ops = [op.type for op in self.main_program.block(0).ops] @@ -69,11 +69,14 @@ class TestElementWiseAddOrig2Prim(unittest.TestCase): class TestSqrtOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'sqrt' X = paddle.static.data(name='X', shape=[7, 8], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -87,6 +90,7 @@ class TestSqrtOrig2Prim(TestElementWiseAddOrig2Prim): class TestElementWiseMulOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'elementwise_mul' X = paddle.static.data(name='X', shape=[8, 8], dtype='float') @@ -106,6 +110,7 @@ class TestElementWiseMulOrig2Prim(TestElementWiseAddOrig2Prim): class TestMatmulV2Orig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'matmul_v2' X = paddle.static.data(name='X', shape=[3, 4], dtype='float') @@ -124,11 +129,14 @@ class TestMatmulV2Orig2Prim(TestElementWiseAddOrig2Prim): class TestTanhOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'tanh' X = paddle.static.data(name='X', shape=[3, 4], dtype='float') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -141,13 +149,17 @@ class TestTanhOrig2Prim(TestElementWiseAddOrig2Prim): class TestReshape2Orig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'reshape2' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { - 'Out': X, + 'Out': + X, 'XShape': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) } @@ -156,19 +168,23 @@ class TestReshape2Orig2Prim(TestElementWiseAddOrig2Prim): self.orig2prim_args = ( None, None, - X, ) + X, + ) self.all_ops = ['reshape2', 'reshape_p', 'fill_constant_p'] # Do not checke XShape self.out_map = {0: self.output['Out']} class TestConcatOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'concat' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') Y = paddle.static.data(name='Y', shape=[3, 6], dtype='int64') - self.input = {'X': [X, Y], } + self.input = { + 'X': [X, Y], + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -177,17 +193,21 @@ class TestConcatOrig2Prim(TestElementWiseAddOrig2Prim): self.orig2prim_args = ( None, - (X, Y), ) + (X, Y), + ) self.all_ops = ['concat', 'concat_p'] self.out_map = {0: self.output['Out']} class TestSliceOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'slice' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.input = {'Input': X, } + self.input = { + 'Input': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -204,11 +224,14 @@ class TestSliceOrig2Prim(TestElementWiseAddOrig2Prim): class TestFillZerosLikeOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'fill_zeros_like' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -221,6 +244,7 @@ class TestFillZerosLikeOrig2Prim(TestElementWiseAddOrig2Prim): class TestSumOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'sum' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') @@ -239,11 +263,14 @@ class TestSumOrig2Prim(TestElementWiseAddOrig2Prim): class TestPNormOrig2Prim1(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'p_norm' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -259,11 +286,14 @@ class TestPNormOrig2Prim1(TestElementWiseAddOrig2Prim): class TestPNormOrig2Prim2(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'p_norm' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -279,6 +309,7 @@ class TestPNormOrig2Prim2(TestElementWiseAddOrig2Prim): class TestIndexSelectOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'index_select' X = paddle.static.data(name='X', shape=[5, 6], dtype='int64') @@ -289,16 +320,20 @@ class TestIndexSelectOrig2Prim(TestElementWiseAddOrig2Prim): 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) } - self.attrs = {'dim': 0, } + self.attrs = { + 'dim': 0, + } self.orig2prim_args = ( Index, - X, ) + X, + ) self.all_ops = ['index_select', 'gather_p'] self.out_map = {0: self.output['Out']} class TestElementwiseSubOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'elementwise_sub' X = paddle.static.data(name='X', shape=[5, 6], dtype='int32') @@ -309,21 +344,27 @@ class TestElementwiseSubOrig2Prim(TestElementWiseAddOrig2Prim): 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) } - self.attrs = {'dim': 0, } + self.attrs = { + 'dim': 0, + } self.orig2prim_args = ( X, - Y, ) + Y, + ) self.all_ops = ['elementwise_sub', 'broadcast_p', 'sub_p'] self.out_map = {0: self.output['Out']} class TestScaleOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'scale' X = paddle.static.data(name='X', shape=[10, 7], dtype='int32') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -332,7 +373,8 @@ class TestScaleOrig2Prim(TestElementWiseAddOrig2Prim): self.orig2prim_args = ( None, - X, ) + X, + ) self.all_ops = [ 'scale', 'fill_constant_p', 'fill_constant_p', 'mul_p', 'add_p' ] @@ -340,11 +382,14 @@ class TestScaleOrig2Prim(TestElementWiseAddOrig2Prim): class TestAssignOrig2Prim(TestElementWiseAddOrig2Prim): + def init_data(self): self.op_type = 'assign' X = paddle.static.data(name='X', shape=[10, 7], dtype='int32') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Out': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) diff --git a/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py b/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py index 15ab016fc54..56a28f38712 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ paddle.enable_static() ############################ Test prim2orig rules ############################ class TestAddPPrim2Orig(unittest.TestCase): + def setUp(self): self.main_program = paddle.static.Program() self.startup_program = paddle.static.Program() @@ -53,11 +54,10 @@ class TestAddPPrim2Orig(unittest.TestCase): def test_op(self): with paddle.static.program_guard(self.main_program, self.startup_program): - op = self.layer_help.append_op( - type=self.op_type, - inputs=self.input, - outputs=self.output, - attrs=self.attrs) + op = self.layer_help.append_op(type=self.op_type, + inputs=self.input, + outputs=self.output, + attrs=self.attrs) orig_out = _prim2orig(op, *self.prim2orig_args) all_ops = [op.type for op in self.main_program.block(0).ops] @@ -68,6 +68,7 @@ class TestAddPPrim2Orig(unittest.TestCase): class TestSubPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'sub_p' X = paddle.static.data(name='X', shape=[7, 8], dtype='float64') @@ -86,6 +87,7 @@ class TestSubPPrim2Orig(TestAddPPrim2Orig): class TestMulPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'mul_p' X = paddle.static.data(name='X', shape=[7, 8], dtype='float64') @@ -104,6 +106,7 @@ class TestMulPPrim2Orig(TestAddPPrim2Orig): class TestDivPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'div_p' X = paddle.static.data(name='X', shape=[7, 8], dtype='float64') @@ -122,11 +125,14 @@ class TestDivPPrim2Orig(TestAddPPrim2Orig): class TestSqrtPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'sqrt_p' X = paddle.static.data(name='X', shape=[7, 8], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -139,11 +145,14 @@ class TestSqrtPPrim2Orig(TestAddPPrim2Orig): class TestTanhPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'tanh_p' X = paddle.static.data(name='X', shape=[7, 8], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -156,11 +165,14 @@ class TestTanhPPrim2Orig(TestAddPPrim2Orig): class TestReshapePPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'reshape_p' X = paddle.static.data(name='X', shape=[2, 8], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -173,11 +185,14 @@ class TestReshapePPrim2Orig(TestAddPPrim2Orig): class TestBroadcastPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'broadcast_p' X = paddle.static.data(name='X', shape=[2, 8], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -190,11 +205,14 @@ class TestBroadcastPPrim2Orig(TestAddPPrim2Orig): class TestTransposePPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'transpose_p' X = paddle.static.data(name='X', shape=[7, 8, 9, 10], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -207,11 +225,14 @@ class TestTransposePPrim2Orig(TestAddPPrim2Orig): class TestSplitPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'split_p' X = paddle.static.data(name='X', shape=[3, 9, 5], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'YS': [ self.layer_help.create_variable_for_type_inference( @@ -230,13 +251,16 @@ class TestSplitPPrim2Orig(TestAddPPrim2Orig): class TestConcatPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'concat_p' X = paddle.static.data(name='X', shape=[3, 9, 5], dtype='float64') Y = paddle.static.data(name='Y', shape=[2, 9, 5], dtype='float64') Z = paddle.static.data(name='Z', shape=[1, 9, 5], dtype='float64') - self.input = {'XS': [X, Y, Z], } + self.input = { + 'XS': [X, Y, Z], + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -249,6 +273,7 @@ class TestConcatPPrim2Orig(TestAddPPrim2Orig): class TestReducePPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'reduce_p' X = paddle.static.data(name='X', shape=[3, 9, 5], dtype='float64') @@ -266,6 +291,7 @@ class TestReducePPrim2Orig(TestAddPPrim2Orig): class TestMatmulPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'matmul_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') @@ -284,11 +310,14 @@ class TestMatmulPPrim2Orig(TestAddPPrim2Orig): class TestSliceSelectPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'slice_select_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') - self.input = {'X': X, } + self.input = { + 'X': X, + } self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) @@ -301,6 +330,7 @@ class TestSliceSelectPPrim2Orig(TestAddPPrim2Orig): class TestSliceAssignPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'slice_assign_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') @@ -319,40 +349,49 @@ class TestSliceAssignPPrim2Orig(TestAddPPrim2Orig): class TestGatherPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'gather_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') - IndexTensor = paddle.static.data( - name='IndexTensor', shape=[3], dtype='int32') + IndexTensor = paddle.static.data(name='IndexTensor', + shape=[3], + dtype='int32') self.input = {'X': X, 'IndexTensor': IndexTensor} self.output = { 'Y': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) } - self.attrs = {'axis': 0, } + self.attrs = { + 'axis': 0, + } self.prim2orig_args = ( IndexTensor, - X, ) + X, + ) self.all_ops = ['gather_p', 'gather'] self.out_map = {self.output['Y']: 0} class TestScatterAddPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'scatter_add_p' X = paddle.static.data(name='X', shape=[9, 5], dtype='float64') Y = paddle.static.data(name='Y', shape=[3, 5], dtype='float64') - IndexTensor = paddle.static.data( - name='IndexTensor', shape=[3], dtype='int32') + IndexTensor = paddle.static.data(name='IndexTensor', + shape=[3], + dtype='int32') self.input = {'X': X, 'Y': Y, 'IndexTensor': IndexTensor} self.output = { 'Z': self.layer_help.create_variable_for_type_inference(dtype=X.dtype) } - self.attrs = {'axis': 0, } + self.attrs = { + 'axis': 0, + } self.prim2orig_args = (IndexTensor, X, Y) self.all_ops = [ @@ -362,6 +401,7 @@ class TestScatterAddPPrim2Orig(TestAddPPrim2Orig): class TestFillConstantPPrim2Orig(TestAddPPrim2Orig): + def init_data(self): self.op_type = 'fill_constant_p' diff --git a/python/paddle/fluid/tests/unittests/autograd/test_primops.py b/python/paddle/fluid/tests/unittests/autograd/test_primops.py index e6a8c4ec3fe..ccbd630bfd0 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_primops.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_primops.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,10 +15,12 @@ import unittest import numpy as np import paddle -from paddle.incubate.autograd.primops import ( - neg, set_value, add, sub, mul, div, sqrt, tanh, reshape, broadcast, - transpose, split, concat, reduce, matmul, slice_select, slice_assign, - gather, scatter_add, fill_const) +from paddle.incubate.autograd.primops import (neg, set_value, add, sub, mul, + div, sqrt, tanh, reshape, + broadcast, transpose, split, + concat, reduce, matmul, + slice_select, slice_assign, + gather, scatter_add, fill_const) from paddle.incubate.autograd.primx import Transform, topo_path, orig2prim, prim2orig, _gradients from paddle.incubate.autograd.utils import enable_prim, disable_prim, prim_enabled @@ -104,19 +106,29 @@ class TestPyPrimOps(unittest.TestCase): self.assertEqual(matmul_1.dtype, d.dtype) self.assertEqual(matmul_1.shape, (2, 2)) - slice_select_1 = slice_select( - e, axis=[0], starts=[0], ends=[2], strides=[1]) + slice_select_1 = slice_select(e, + axis=[0], + starts=[0], + ends=[2], + strides=[1]) self.assertEqual(slice_select_1.dtype, e.dtype) self.assertEqual(slice_select_1.shape, (2, 2)) - slice_select_2 = slice_select( - d, axis=[0, 1], starts=[0, 1], ends=[2, 3], strides=[1, 2]) + slice_select_2 = slice_select(d, + axis=[0, 1], + starts=[0, 1], + ends=[2, 3], + strides=[1, 2]) self.assertEqual(slice_select_2.dtype, d.dtype) self.assertEqual(slice_select_2.shape, (2, 1)) y = broadcast(b, [2, 2]) - slice_assign_1 = slice_assign( - d, y, axis=[1], starts=[1], ends=[3], strides=[1]) + slice_assign_1 = slice_assign(d, + y, + axis=[1], + starts=[1], + ends=[3], + strides=[1]) self.assertEqual(slice_assign_1.dtype, d.dtype) self.assertEqual(slice_assign_1.shape, d.shape) @@ -138,8 +150,13 @@ class TestPyPrimOps(unittest.TestCase): self.assertEqual(neg_1.shape, b.shape) self.assertEqual(neg_1.dtype, b.dtype) - set_value_1 = set_value( - d, a, axis=[1], starts=[1], ends=[3], strides=[1], out=d) + set_value_1 = set_value(d, + a, + axis=[1], + starts=[1], + ends=[3], + strides=[1], + out=d) self.assertEqual(set_value_1.shape, d.shape) self.assertEqual(set_value_1.dtype, d.dtype) diff --git a/python/paddle/fluid/tests/unittests/autograd/test_transform.py b/python/paddle/fluid/tests/unittests/autograd/test_transform.py index a2b75f5d7bb..08626593e29 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_transform.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_transform.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestAutoGradTransformForAdd(unittest.TestCase): + def setUp(self): self.main_program = paddle.static.Program() self.startup_program = paddle.static.Program() @@ -36,11 +37,13 @@ class TestAutoGradTransformForAdd(unittest.TestCase): self.xs_shape_map = {0: (20, 40), 1: (20, 40)} # { output_index: output_shape } self.ys_shape_map = {0: (20, 40)} - X0 = paddle.static.data( - name='X0', shape=self.xs_shape_map[0], dtype='float32') + X0 = paddle.static.data(name='X0', + shape=self.xs_shape_map[0], + dtype='float32') X0.stop_gradient = False - X1 = paddle.static.data( - name='X1', shape=self.xs_shape_map[1], dtype='float32') + X1 = paddle.static.data(name='X1', + shape=self.xs_shape_map[1], + dtype='float32') X1.stop_gradient = False A = paddle.tanh(X0) @@ -48,7 +51,9 @@ class TestAutoGradTransformForAdd(unittest.TestCase): Y = paddle.add(A, B) self.orig_xs = [X0, X1] - self.orig_ys = [Y, ] + self.orig_ys = [ + Y, + ] self.orig_ops = ['tanh', 'tanh', 'elementwise_add'] self.orig2prim_ops = ['tanh_p', 'tanh_p', 'add_p'] @@ -134,16 +139,19 @@ class TestAutoGradTransformForAdd(unittest.TestCase): class TestAutoGradTransformForMatmul(TestAutoGradTransformForAdd): + def init_data(self): # { input_index: input_shape } self.xs_shape_map = {0: (100, 2), 1: (5, 2)} # { output_index: output_shape } self.ys_shape_map = {0: (100, 5)} - X0 = paddle.static.data( - 'X0', shape=self.xs_shape_map[0], dtype='float32') + X0 = paddle.static.data('X0', + shape=self.xs_shape_map[0], + dtype='float32') X0.stop_gradient = False - X1 = paddle.static.data( - 'X1', shape=self.xs_shape_map[1], dtype='float32') + X1 = paddle.static.data('X1', + shape=self.xs_shape_map[1], + dtype='float32') X1.stop_gradient = False A = paddle.reshape(X1, [2, 5]) @@ -151,7 +159,9 @@ class TestAutoGradTransformForMatmul(TestAutoGradTransformForAdd): Y = paddle.matmul(X0, B) self.orig_xs = [X0, X1] - self.orig_ys = [Y, ] + self.orig_ys = [ + Y, + ] self.orig_ops = ['reshape2', 'scale', 'matmul_v2'] self.orig2prim_ops = [ @@ -210,20 +220,24 @@ class TestAutoGradTransformForMatmul(TestAutoGradTransformForAdd): class TestAutoGradTransformForIndexSelect(TestAutoGradTransformForAdd): + def init_data(self): # { input_index: input_shape } self.xs_shape_map = {0: (7, 8, 9), 1: (8, 1), 2: (7, 8, 9), 3: (3, )} # { output_index: output_shape } self.ys_shape_map = {0: (3, 16, 9)} - X0 = paddle.static.data( - 'X0', shape=self.xs_shape_map[0], dtype='float32') + X0 = paddle.static.data('X0', + shape=self.xs_shape_map[0], + dtype='float32') X0.stop_gradient = False - X1 = paddle.static.data( - 'X1', shape=self.xs_shape_map[1], dtype='float32') + X1 = paddle.static.data('X1', + shape=self.xs_shape_map[1], + dtype='float32') X1.stop_gradient = False - X2 = paddle.static.data( - 'X2', shape=self.xs_shape_map[2], dtype='float32') + X2 = paddle.static.data('X2', + shape=self.xs_shape_map[2], + dtype='float32') X2.stop_gradient = False X3 = paddle.static.data('X3', shape=self.xs_shape_map[3], dtype='int32') X3.stop_gradient = False @@ -235,7 +249,9 @@ class TestAutoGradTransformForIndexSelect(TestAutoGradTransformForAdd): Y = paddle.index_select(D, X3, axis=0) # (3, 16, 9) self.orig_xs = [X0, X1, X2, X3] - self.orig_ys = [Y, ] + self.orig_ys = [ + Y, + ] self.orig_ops = [ 'elementwise_add', 'p_norm', 'elementwise_sub', 'concat', 'index_select' diff --git a/python/paddle/fluid/tests/unittests/autograd/utils.py b/python/paddle/fluid/tests/unittests/autograd/utils.py index 0816b57fbf7..4105ea2672b 100644 --- a/python/paddle/fluid/tests/unittests/autograd/utils.py +++ b/python/paddle/fluid/tests/unittests/autograd/utils.py @@ -65,8 +65,8 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): for i in range(fout_size): jac_i = list([] for _ in range(fin_size)) for j in range(fin_size): - jac_i[j] = np.zeros( - (_product(ys[i].shape), _product(xs[j].shape)), dtype=np_dtype) + jac_i[j] = np.zeros((_product(ys[i].shape), _product(xs[j].shape)), + dtype=np_dtype) jacobian[i] = jac_i for j in range(fin_size): @@ -109,16 +109,16 @@ def _compute_numerical_hessian(func, xs, delta, np_dtype): orig = _get_item(xs[j], q) x_pos = orig + delta xs[j] = _set_item(xs[j], q, x_pos) - jacobian_pos = _compute_numerical_jacobian(func, xs, delta, - np_dtype) + jacobian_pos = _compute_numerical_jacobian( + func, xs, delta, np_dtype) x_neg = orig - delta xs[j] = _set_item(xs[j], q, x_neg) - jacobian_neg = _compute_numerical_jacobian(func, xs, delta, - np_dtype) + jacobian_neg = _compute_numerical_jacobian( + func, xs, delta, np_dtype) xs[j] = _set_item(xs[j], q, orig) hessian[i][j][p][q] = ( - jacobian_pos[0][i][0][p] - jacobian_neg[0][i][0][p] - ) / delta / 2. + jacobian_pos[0][i][0][p] - + jacobian_neg[0][i][0][p]) / delta / 2. return hessian @@ -197,8 +197,7 @@ def _compute_numerical_batch_hessian(func, xs, delta, np_dtype): mid = len(hessian_res) // 2 for i in range(mid): hessian_result.append( - np.stack( - (hessian_res[i], hessian_res[mid + i]), axis=0)) + np.stack((hessian_res[i], hessian_res[mid + i]), axis=0)) return hessian_result @@ -262,6 +261,7 @@ def unuse(x, y): def nested(x): + def inner(y): return x * y diff --git a/python/paddle/fluid/tests/unittests/benchmark.py b/python/paddle/fluid/tests/unittests/benchmark.py index 9ea95f3e870..14479e7a271 100644 --- a/python/paddle/fluid/tests/unittests/benchmark.py +++ b/python/paddle/fluid/tests/unittests/benchmark.py @@ -27,6 +27,7 @@ from op_test import OpTest class BenchmarkSuite(OpTest): + def timeit_function(self, callback, iters, *args, **kwargs): assert iters != 0, "Iters should >= 1" start = time.time() @@ -46,12 +47,9 @@ class BenchmarkSuite(OpTest): var_name = variable if isinstance( variable, six.string_types) else variable.name self.assertTrue( - np.allclose( - actual_t, expect_t, atol=atol), - "Output (" + var_name + ") has diff" + str(actual_t) + "\n" + - str(expect_t)) - self.assertListEqual(actual.lod(), - expect.lod(), + np.allclose(actual_t, expect_t, atol=atol), "Output (" + + var_name + ") has diff" + str(actual_t) + "\n" + str(expect_t)) + self.assertListEqual(actual.lod(), expect.lod(), "Output (" + var_name + ") has different lod") def _get_input_names(self): @@ -98,13 +96,12 @@ class BenchmarkSuite(OpTest): def timeit_grad_with_place(self, place, iters=100): inputs_to_check = self._get_input_names() output_names = self._get_output_names() - return self.timeit_function( - self._get_gradient, - iters, - inputs_to_check, - place, - output_names, - no_grad_set=None) + return self.timeit_function(self._get_gradient, + iters, + inputs_to_check, + place, + output_names, + no_grad_set=None) def timeit_grad(self, iters=100): places = self._get_places() diff --git a/python/paddle/fluid/tests/unittests/benchmark_sum_op.py b/python/paddle/fluid/tests/unittests/benchmark_sum_op.py index 0e7338b839e..37cc77836a8 100644 --- a/python/paddle/fluid/tests/unittests/benchmark_sum_op.py +++ b/python/paddle/fluid/tests/unittests/benchmark_sum_op.py @@ -25,6 +25,7 @@ from op_test import OpTest class TestSumOp(BenchmarkSuite): + def setUp(self): self.op_type = "sum" self.customize_testcase() diff --git a/python/paddle/fluid/tests/unittests/c_comm_init_op.py b/python/paddle/fluid/tests/unittests/c_comm_init_op.py index ed6a75230c6..52409ccf8c8 100644 --- a/python/paddle/fluid/tests/unittests/c_comm_init_op.py +++ b/python/paddle/fluid/tests/unittests/c_comm_init_op.py @@ -25,6 +25,7 @@ paddle.enable_static() class TestCCommInitOp(unittest.TestCase): + def setUp(self): self.endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS").split(',') self.current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") @@ -45,25 +46,23 @@ class TestCCommInitOp(unittest.TestCase): name=fluid.unique_name.generate('nccl_id'), persistable=True, type=fluid.core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': nccl_id_var}, - attrs={ - 'rank': self.rank, - 'endpoint': self.current_endpoint, - 'other_endpoints': self.other_endpoints - }) - block.append_op( - type='c_comm_init', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'nranks': self.nranks, - 'rank': self.rank, - 'ring_id': 0, - 'device_id': self.gpu_id - }) + block.append_op(type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': self.rank, + 'endpoint': self.current_endpoint, + 'other_endpoints': self.other_endpoints + }) + block.append_op(type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': self.nranks, + 'rank': self.rank, + 'ring_id': 0, + 'device_id': self.gpu_id + }) self.exe.run(program) diff --git a/python/paddle/fluid/tests/unittests/c_embedding_op_base.py b/python/paddle/fluid/tests/unittests/c_embedding_op_base.py index 8bdeecae456..8b5f1840790 100644 --- a/python/paddle/fluid/tests/unittests/c_embedding_op_base.py +++ b/python/paddle/fluid/tests/unittests/c_embedding_op_base.py @@ -36,6 +36,7 @@ def get_c_embedding(start, end, table, ids): class TestCEmbeddingCPU(OpTest): + def setUp(self): self.init_dtype() self.initcase() @@ -47,8 +48,8 @@ class TestCEmbeddingCPU(OpTest): def initcase(self): self.op_type = "c_embedding" table = np.random.random((17, 64)).astype(self.dtype) - ids = np.random.randint( - low=0, high=17 * 2, size=(2, 4)).astype(self.ids_dtype) + ids = np.random.randint(low=0, high=17 * 2, + size=(2, 4)).astype(self.ids_dtype) self.start_index = 10 self.end_index = self.start_index + 17 @@ -71,6 +72,7 @@ class TestCEmbeddingCPU(OpTest): class TestCEmbeddingOpBase(TestCEmbeddingCPU): + def setUp(self): self.init_dtype() self.initcase() @@ -97,6 +99,7 @@ class TestCEmbeddingOpBase(TestCEmbeddingCPU): class TestCEmbeddingOpFP32(TestCEmbeddingOpBase): + def setUp(self): self.init_dtype() self.initcase() @@ -104,8 +107,8 @@ class TestCEmbeddingOpFP32(TestCEmbeddingOpBase): def initcase(self): self.op_type = "c_embedding" table = np.random.random((17, 64)).astype(self.dtype) - ids = np.random.randint( - low=0, high=17 * 2, size=(2, 4)).astype(self.ids_dtype) + ids = np.random.randint(low=0, high=17 * 2, + size=(2, 4)).astype(self.ids_dtype) self.start_index = 10 ids[0][1] = 12 ids[0][2] = 12 diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py index 13a7ff6860e..d188ae66545 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py @@ -36,8 +36,8 @@ np.random.seed(0) def generator(): batch_size = 5 for i in range(5): - curr_train_x = np.random.randint( - batch_size, size=(batch_size, 3)).astype("float32") + curr_train_x = np.random.randint(batch_size, + size=(batch_size, 3)).astype("float32") if i >= 2: curr_train_x[0, :] = np.nan curr_train_x[-1, :] = np.inf @@ -94,12 +94,14 @@ def check(use_cuda): for train_data, y_label in generator(): outs = exe.run( main, - feed={'x': train_data, - 'y': y_label}, + feed={ + 'x': train_data, + 'y': y_label + }, fetch_list=[y_predict.name, avg_cost.name, acc_top1.name]) step += 1 - print('iter={:.0f},cost={},acc1={}'.format(step, outs[1][0], - outs[2][0])) + print('iter={:.0f},cost={},acc1={}'.format( + step, outs[1][0], outs[2][0])) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base_dygraph.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base_dygraph.py index dee74fdcb1f..93ccc8b54f7 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base_dygraph.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base_dygraph.py @@ -33,8 +33,8 @@ np.random.seed(0) def generator(): batch_size = 5 for i in range(5): - curr_train_x = np.random.randint( - batch_size, size=(batch_size, 3)).astype("float32") + curr_train_x = np.random.randint(batch_size, + size=(batch_size, 3)).astype("float32") if i >= 2: curr_train_x[0, :] = np.nan curr_train_x[-1, :] = np.inf @@ -47,6 +47,7 @@ def generator(): class TestLayer(nn.Layer): + def __init__(self): super(TestLayer, self).__init__() self.linear1 = nn.Linear(3, 400) @@ -86,8 +87,8 @@ def check(use_cuda): acc_top1 = paddle.metric.accuracy(input=y_pred, label=y, k=1) - print('iter={:.0f}, cost={}, acc1={}'.format( - step, avg_cost.numpy(), acc_top1.numpy())) + print('iter={:.0f}, cost={}, acc1={}'.format(step, avg_cost.numpy(), + acc_top1.numpy())) sgd.step() sgd.clear_grad() diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/collective_allgather_api.py index 63d7f52c11a..d2a639d0294 100644 --- a/python/paddle/fluid/tests/unittests/collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allgather_api.py @@ -39,14 +39,16 @@ paddle.enable_static() class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): tensor_list = [] - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.all_gather(tensor_list, tindata) return tensor_list diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_op.py b/python/paddle/fluid/tests/unittests/collective_allgather_op.py index f77a97aa915..bbfc35e6c9d 100644 --- a/python/paddle/fluid/tests/unittests/collective_allgather_op.py +++ b/python/paddle/fluid/tests/unittests/collective_allgather_op.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveAllGather(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,25 +46,26 @@ class TestCollectiveAllGather(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofgather", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_allgather", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'nranks': nranks}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_allgather", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'nranks': nranks + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/collective_allreduce_api.py index 67242b274fc..c72fd144ed8 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.all_reduce(tindata) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_new_group_api.py b/python/paddle/fluid/tests/unittests/collective_allreduce_new_group_api.py index 597765cfb98..859161af456 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_new_group_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_new_group_api.py @@ -39,16 +39,19 @@ paddle.enable_static() class TestCollectiveAllreduceNewGroupAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') gp = paddle.distributed.new_group([0, 1]) - paddle.distributed.all_reduce( - tindata, group=gp, use_calc_stream=False) + paddle.distributed.all_reduce(tindata, + group=gp, + use_calc_stream=False) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_op.py b/python/paddle/fluid/tests/unittests/collective_allreduce_op.py index eef59ee3dde..800131a6a6f 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_op.py @@ -39,30 +39,30 @@ paddle.enable_static() class TestCollectiveAllreduce(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program): ring_id = 0 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofallreduce", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_allreduce_sum", - inputs={'X': tindata}, - attrs={'ring_id': ring_id}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_allreduce_sum", + inputs={'X': tindata}, + attrs={'ring_id': ring_id}, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_op_wait.py b/python/paddle/fluid/tests/unittests/collective_allreduce_op_wait.py index 61a0ad3bd76..a254ddd6068 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_op_wait.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_op_wait.py @@ -39,14 +39,16 @@ paddle.enable_static() class TestCollectiveAllreduce(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program): ring_id = 0 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofallreduce", dtype='float32', @@ -62,33 +64,32 @@ class TestCollectiveAllreduce(TestCollectiveRunnerBase): 'X': tindata, 'Y': tindata, }, - outputs={'Out': toutdata}, ) + outputs={'Out': toutdata}, + ) main_prog.global_block().append_op( type="elementwise_sub", inputs={ 'X': toutdata, 'Y': tindata, }, - outputs={'Out': toutdata}, ) + outputs={'Out': toutdata}, + ) - main_prog.global_block().append_op( - type='c_wait_compute', - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type='c_wait_compute', + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) - main_prog.global_block().append_op( - type="c_allreduce_sum", - inputs={'X': toutdata}, - attrs={'ring_id': ring_id}, - outputs={'Out': toutdata}, - attr={'use_calc_stream': False}) + main_prog.global_block().append_op(type="c_allreduce_sum", + inputs={'X': toutdata}, + attrs={'ring_id': ring_id}, + outputs={'Out': toutdata}, + attr={'use_calc_stream': False}) - main_prog.global_block().append_op( - type="c_wait_comm", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_wait_comm", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) # tout = tin + tout - tin = tout if True: @@ -98,14 +99,16 @@ class TestCollectiveAllreduce(TestCollectiveRunnerBase): 'X': tindata, 'Y': toutdata, }, - outputs={'Out': toutdata}, ) + outputs={'Out': toutdata}, + ) main_prog.global_block().append_op( type="elementwise_sub", inputs={ 'X': toutdata, 'Y': tindata, }, - outputs={'Out': toutdata}, ) + outputs={'Out': toutdata}, + ) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_alltoall_api.py b/python/paddle/fluid/tests/unittests/collective_alltoall_api.py index be18b68a1da..343ba13c4e8 100644 --- a/python/paddle/fluid/tests/unittests/collective_alltoall_api.py +++ b/python/paddle/fluid/tests/unittests/collective_alltoall_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveAllToAllAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') tindata = paddle.split(tindata, 2, axis=0) tout_data = [] paddle.distributed.alltoall(tindata, tout_data) diff --git a/python/paddle/fluid/tests/unittests/collective_alltoall_api_dygraph.py b/python/paddle/fluid/tests/unittests/collective_alltoall_api_dygraph.py index 02a59aef071..b5994db5cb6 100644 --- a/python/paddle/fluid/tests/unittests/collective_alltoall_api_dygraph.py +++ b/python/paddle/fluid/tests/unittests/collective_alltoall_api_dygraph.py @@ -37,6 +37,7 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main class TestCollectiveAllToAllAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 diff --git a/python/paddle/fluid/tests/unittests/collective_barrier_api.py b/python/paddle/fluid/tests/unittests/collective_barrier_api.py index dbcc70d540b..1e08c73f8cb 100644 --- a/python/paddle/fluid/tests/unittests/collective_barrier_api.py +++ b/python/paddle/fluid/tests/unittests/collective_barrier_api.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveBarrierAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 diff --git a/python/paddle/fluid/tests/unittests/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/collective_broadcast_api.py index 08a3d948906..b928e409f0e 100644 --- a/python/paddle/fluid/tests/unittests/collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/collective_broadcast_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.broadcast(tindata, src=1) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/collective_broadcast_op.py b/python/paddle/fluid/tests/unittests/collective_broadcast_op.py index 127f48be618..140df2b91d9 100644 --- a/python/paddle/fluid/tests/unittests/collective_broadcast_op.py +++ b/python/paddle/fluid/tests/unittests/collective_broadcast_op.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveBroadcast(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,25 +47,26 @@ class TestCollectiveBroadcast(TestCollectiveRunnerBase): ring_id = 0 rootid = 1 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofbroadcast", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_broadcast", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'root': rootid}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_broadcast", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'root': rootid + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_concat_op.py b/python/paddle/fluid/tests/unittests/collective_concat_op.py index c9de1713e72..2f2e4d699f7 100644 --- a/python/paddle/fluid/tests/unittests/collective_concat_op.py +++ b/python/paddle/fluid/tests/unittests/collective_concat_op.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveConcat(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,23 +46,23 @@ class TestCollectiveConcat(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofconcat", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_concat", - inputs={'X': tindata}, - attrs={ - 'ring_id': ring_id, - 'rank': self.rank, - 'nranks': nranks - }, - outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_concat", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'rank': self.rank, + 'nranks': nranks + }, + outputs={'Out': toutdata}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_global_gather.py b/python/paddle/fluid/tests/unittests/collective_global_gather.py index 164abe05934..60909f63211 100644 --- a/python/paddle/fluid/tests/unittests/collective_global_gather.py +++ b/python/paddle/fluid/tests/unittests/collective_global_gather.py @@ -29,6 +29,7 @@ paddle.enable_static() class TestCollectiveGlobalGatherAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -40,12 +41,15 @@ class TestCollectiveGlobalGatherAPI(TestCollectiveAPIRunnerBase): n_expert = 2 world_size = 2 tot_expert = n_expert * world_size - local_input_buf = paddle.static.data( - name="local_input_buf", shape=[-1, in_feat], dtype="float32") - local_expert_count = paddle.static.data( - name="local_expert_count", shape=[tot_expert], dtype="int64") - global_expert_count = paddle.static.data( - name="global_expert_count", shape=[tot_expert], dtype="int64") + local_input_buf = paddle.static.data(name="local_input_buf", + shape=[-1, in_feat], + dtype="float32") + local_expert_count = paddle.static.data(name="local_expert_count", + shape=[tot_expert], + dtype="int64") + global_expert_count = paddle.static.data(name="global_expert_count", + shape=[tot_expert], + dtype="int64") output = paddle.distributed.utils.global_gather( local_input_buf, local_expert_count, global_expert_count) @@ -79,13 +83,12 @@ class TestCollectiveGlobalGatherAPI(TestCollectiveAPIRunnerBase): # Call paddle.distributed.alltoall() under legacy dygraph _enable_legacy_dygraph() np.random.seed(os.getpid()) - local_expert_count = np.random.randint( - 1, 4, size=tot_expert).astype("int64") + local_expert_count = np.random.randint(1, 4, + size=tot_expert).astype("int64") local_expert_count = paddle.to_tensor(local_expert_count) global_expert_count = [] - paddle.distributed.alltoall( - paddle.split( - local_expert_count, 2, axis=0), global_expert_count) + paddle.distributed.alltoall(paddle.split(local_expert_count, 2, axis=0), + global_expert_count) global_expert_count = paddle.concat(global_expert_count, axis=0) global_expert_count = global_expert_count.numpy() local_expert_count = local_expert_count.numpy() diff --git a/python/paddle/fluid/tests/unittests/collective_global_gather_dygraph.py b/python/paddle/fluid/tests/unittests/collective_global_gather_dygraph.py index 20df5f35555..0b264f5ba89 100644 --- a/python/paddle/fluid/tests/unittests/collective_global_gather_dygraph.py +++ b/python/paddle/fluid/tests/unittests/collective_global_gather_dygraph.py @@ -25,6 +25,7 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main class TestCollectiveGlobalGatherAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -41,8 +42,7 @@ class TestCollectiveGlobalGatherAPI(TestCollectiveAPIRunnerBase): local_expert_count = paddle.to_tensor(local_expert_count) global_expert_count = [] paddle.distributed.alltoall( - paddle.split( - local_expert_count, 2, axis=0), + paddle.split(local_expert_count, 2, axis=0), global_expert_count) global_expert_count = paddle.concat(global_expert_count, axis=0) fwd_expert_count = sum(global_expert_count) diff --git a/python/paddle/fluid/tests/unittests/collective_global_scatter.py b/python/paddle/fluid/tests/unittests/collective_global_scatter.py index 74d12b61aca..c4950025877 100644 --- a/python/paddle/fluid/tests/unittests/collective_global_scatter.py +++ b/python/paddle/fluid/tests/unittests/collective_global_scatter.py @@ -28,6 +28,7 @@ paddle.enable_static() class TestCollectiveGlobalScatterAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -39,14 +40,15 @@ class TestCollectiveGlobalScatterAPI(TestCollectiveAPIRunnerBase): n_expert = 2 world_size = 2 tot_expert = n_expert * world_size - local_input_buf = paddle.static.data( - name="local_input_buf", shape=[-1, in_feat], dtype="float32") - local_expert_count = paddle.static.data( - name="local_expert_count", shape=[tot_expert], dtype="int64") + local_input_buf = paddle.static.data(name="local_input_buf", + shape=[-1, in_feat], + dtype="float32") + local_expert_count = paddle.static.data(name="local_expert_count", + shape=[tot_expert], + dtype="int64") global_expert_count = [] paddle.distributed.alltoall( - paddle.split( - local_expert_count, 2, axis=0), + paddle.split(local_expert_count, 2, axis=0), global_expert_count) global_expert_count = paddle.concat(global_expert_count, axis=0) output = paddle.distributed.utils.global_scatter( @@ -75,8 +77,8 @@ class TestCollectiveGlobalScatterAPI(TestCollectiveAPIRunnerBase): n_expert = 2 world_size = 2 tot_expert = n_expert * world_size - local_expert_count = np.random.randint( - 1, 4, size=tot_expert).astype("int64") + local_expert_count = np.random.randint(1, 4, + size=tot_expert).astype("int64") fwd_expert_count = sum(local_expert_count) local_input_buf = np.random.rand(fwd_expert_count, in_feat).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/collective_global_scatter_dygraph.py b/python/paddle/fluid/tests/unittests/collective_global_scatter_dygraph.py index f7e13a87622..82816c899e2 100644 --- a/python/paddle/fluid/tests/unittests/collective_global_scatter_dygraph.py +++ b/python/paddle/fluid/tests/unittests/collective_global_scatter_dygraph.py @@ -25,6 +25,7 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main class TestCollectiveGlobalScatterAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,8 +46,7 @@ class TestCollectiveGlobalScatterAPI(TestCollectiveAPIRunnerBase): local_input_buf = paddle.to_tensor(local_input_buf) global_expert_count = [] paddle.distributed.alltoall( - paddle.split( - local_expert_count, 2, axis=0), + paddle.split(local_expert_count, 2, axis=0), global_expert_count) global_expert_count = paddle.concat(global_expert_count, axis=0) local_input_buf.stop_gradient = False diff --git a/python/paddle/fluid/tests/unittests/collective_identity_op.py b/python/paddle/fluid/tests/unittests/collective_identity_op.py index e024b64e825..a757b0605a5 100644 --- a/python/paddle/fluid/tests/unittests/collective_identity_op.py +++ b/python/paddle/fluid/tests/unittests/collective_identity_op.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveIdentity(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,20 +46,22 @@ class TestCollectiveIdentity(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofgather", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_identity", - inputs={'X': tindata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id, - 'nranks': nranks}) + main_prog.global_block().append_op(type="c_identity", + inputs={'X': tindata}, + outputs={'Out': toutdata}, + attrs={ + 'ring_id': ring_id, + 'nranks': nranks + }) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/collective_reduce_api.py index 41e31146a22..d474dd683dd 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.reduce(tindata, dst=0) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/collective_reduce_op.py index 0448c66d132..c39a8a38f48 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveReduce(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,25 +47,26 @@ class TestCollectiveReduce(TestCollectiveRunnerBase): ring_id = 0 rootid = 1 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofreduce", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_reduce_sum", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'root_id': rootid}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_reduce_sum", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'root_id': rootid + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py index 7a9e0b148d5..0a1fc2b79a9 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveReduce(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,28 +47,27 @@ class TestCollectiveReduce(TestCollectiveRunnerBase): ring_id = 0 rootid = 1 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofreduce", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_reduce_sum", - inputs={'X': tindata}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': True, - 'root_id': rootid - }, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_reduce_sum", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + 'root_id': rootid + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter.py b/python/paddle/fluid/tests/unittests/collective_reducescatter.py index 00d4a1c4cf6..27f7fd506b5 100644 --- a/python/paddle/fluid/tests/unittests/collective_reducescatter.py +++ b/python/paddle/fluid/tests/unittests/collective_reducescatter.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveReduceScatter(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,8 +46,9 @@ class TestCollectiveReduceScatter(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = fluid.layers.collective._c_reducescatter(tindata, nranks) toutdata = fluid.layers.collective._c_sync_comm_stream(toutdata, 0) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py index 91712e2b50f..a1843394e84 100644 --- a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py +++ b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveReduceScatter(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,25 +47,26 @@ class TestCollectiveReduceScatter(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofrs", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_reducescatter", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'nranks': nranks}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_reducescatter", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'nranks': nranks + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_api.py b/python/paddle/fluid/tests/unittests/collective_scatter_api.py index 643106ff53a..0a0d1e1593e 100644 --- a/python/paddle/fluid/tests/unittests/collective_scatter_api.py +++ b/python/paddle/fluid/tests/unittests/collective_scatter_api.py @@ -39,18 +39,19 @@ paddle.enable_static() class TestCollectiveScatterAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", - shape=[10, 1000], - dtype='float32', - append_batch_size=False) - toutdata = layers.fill_constant( - shape=[5, 1000], dtype='float32', value=1.0) + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32', + append_batch_size=False) + toutdata = layers.fill_constant(shape=[5, 1000], + dtype='float32', + value=1.0) tensor_list = None if rank == 1: tensor_list = paddle.split(tindata, 2, axis=0) diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_op.py b/python/paddle/fluid/tests/unittests/collective_scatter_op.py index 7afa4aec639..1434bd3be6a 100644 --- a/python/paddle/fluid/tests/unittests/collective_scatter_op.py +++ b/python/paddle/fluid/tests/unittests/collective_scatter_op.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveScatter(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,26 +47,27 @@ class TestCollectiveScatter(TestCollectiveRunnerBase): ring_id = 0 rootid = 1 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofreduce", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_scatter", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'root': rootid, - 'nranks': 2}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_scatter", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'root': rootid, + 'nranks': 2 + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py index 551537a0ea4..a4e699b64a9 100644 --- a/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_api.py @@ -39,16 +39,16 @@ paddle.enable_static() class TestCollectiveSendRecvAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", - shape=[10, 1000], - dtype='float32', - append_batch_size=False) + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32', + append_batch_size=False) if rank == 0: paddle.distributed.send(tindata, dst=1) else: diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py index 10028488e85..8508c3d043c 100644 --- a/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_api_dygraph.py @@ -37,6 +37,7 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main class TestCollectiveSendRecvAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_op.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_op.py index 18a7aeccf4c..e19bdab2bb3 100644 --- a/python/paddle/fluid/tests/unittests/collective_sendrecv_op.py +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_op.py @@ -39,37 +39,36 @@ paddle.enable_static() class TestCollectiveSendRecv(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program): ring_id = self.global_ring_id with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", - shape=[10, 1000], - dtype='float64', - append_batch_size=False) + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float64', + append_batch_size=False) if self.rank == 0: - main_prog.global_block().append_op( - type="send_v2", - inputs={'X': tindata}, - attrs={ - 'ring_id': ring_id, - 'peer': 1, - 'use_calc_stream': True - }) + main_prog.global_block().append_op(type="send_v2", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'peer': 1, + 'use_calc_stream': True + }) else: - main_prog.global_block().append_op( - type="recv_v2", - outputs={'Out': tindata}, - attrs={ - 'peer': 0, - 'ring_id': ring_id, - 'dtype': tindata.dtype, - 'out_shape': tindata.shape, - 'use_calc_stream': True, - }) + main_prog.global_block().append_op(type="recv_v2", + outputs={'Out': tindata}, + attrs={ + 'peer': 0, + 'ring_id': ring_id, + 'dtype': tindata.dtype, + 'out_shape': + tindata.shape, + 'use_calc_stream': True, + }) return tindata diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_op_array.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_op_array.py index 6876a70ce91..ee8c4cce738 100644 --- a/python/paddle/fluid/tests/unittests/collective_sendrecv_op_array.py +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_op_array.py @@ -39,44 +39,39 @@ paddle.enable_static() class TestCollectiveSendRecv(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program): ring_id = self.global_ring_id with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", - shape=[10, 1000], - dtype='float64', - append_batch_size=False) + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float64', + append_batch_size=False) if self.rank == 0: data1 = fluid.layers.assign( - np.array( - [[0, 1, 2]], dtype='float32')) + np.array([[0, 1, 2]], dtype='float32')) data2 = fluid.layers.assign( - np.array( - [[3, 4, 5]], dtype='float32')) + np.array([[3, 4, 5]], dtype='float32')) elif self.rank == 1: data1 = fluid.layers.assign( - np.array( - [[3, 4, 5]], dtype='float32')) + np.array([[3, 4, 5]], dtype='float32')) data2 = fluid.layers.assign( - np.array( - [[0, 1, 2]], dtype='float32')) + np.array([[0, 1, 2]], dtype='float32')) tensor_array = fluid.layers.create_array(dtype='float32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) fluid.layers.array_write(data1, i, tensor_array) fluid.layers.array_write(data2, i + 1, tensor_array) if self.rank == 0: - main_prog.global_block().append_op( - type="send_v2", - inputs={'X': tensor_array}, - attrs={ - 'ring_id': ring_id, - 'peer': 1, - 'use_calc_stream': True - }) + main_prog.global_block().append_op(type="send_v2", + inputs={'X': tensor_array}, + attrs={ + 'ring_id': ring_id, + 'peer': 1, + 'use_calc_stream': True + }) else: main_prog.global_block().append_op( type="recv_v2", diff --git a/python/paddle/fluid/tests/unittests/collective_sendrecv_op_dynamic_shape.py b/python/paddle/fluid/tests/unittests/collective_sendrecv_op_dynamic_shape.py index 093af635f44..45f349ed285 100644 --- a/python/paddle/fluid/tests/unittests/collective_sendrecv_op_dynamic_shape.py +++ b/python/paddle/fluid/tests/unittests/collective_sendrecv_op_dynamic_shape.py @@ -39,39 +39,38 @@ paddle.enable_static() class TestCollectiveSendRecvDynamicShape(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program): ring_id = self.global_ring_id with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", - shape=[10, 1000], - dtype='float64', - append_batch_size=False) + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float64', + append_batch_size=False) if self.rank == 0: - main_prog.global_block().append_op( - type="send_v2", - inputs={'X': tindata}, - attrs={ - 'ring_id': ring_id, - 'peer': 1, - 'use_calc_stream': True, - 'dynamic_shape': True - }) + main_prog.global_block().append_op(type="send_v2", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'peer': 1, + 'use_calc_stream': True, + 'dynamic_shape': True + }) else: - main_prog.global_block().append_op( - type="recv_v2", - outputs={'Out': tindata}, - attrs={ - 'peer': 0, - 'ring_id': ring_id, - 'dtype': tindata.dtype, - 'out_shape': tindata.shape, - 'use_calc_stream': True, - 'dynamic_shape': True - }) + main_prog.global_block().append_op(type="recv_v2", + outputs={'Out': tindata}, + attrs={ + 'peer': 0, + 'ring_id': ring_id, + 'dtype': tindata.dtype, + 'out_shape': + tindata.shape, + 'use_calc_stream': True, + 'dynamic_shape': True + }) return tindata diff --git a/python/paddle/fluid/tests/unittests/collective_split_op.py b/python/paddle/fluid/tests/unittests/collective_split_op.py index 553955354fe..f899d82d897 100644 --- a/python/paddle/fluid/tests/unittests/collective_split_op.py +++ b/python/paddle/fluid/tests/unittests/collective_split_op.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveAllGather(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,23 +46,23 @@ class TestCollectiveAllGather(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofsplit", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_split", - inputs={'X': tindata}, - attrs={ - 'ring_id': ring_id, - 'rank': self.rank, - 'nranks': nranks - }, - outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_split", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'rank': self.rank, + 'nranks': nranks + }, + outputs={'Out': toutdata}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py index 815018dc4b2..b9ebbdc3807 100644 --- a/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py @@ -41,6 +41,7 @@ paddle.enable_static() class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -50,8 +51,9 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase): np.random.seed(2020) np_array = np.random.rand(1000, 16) - data = paddle.static.data( - name='tindata', shape=[10, 1000], dtype="float32") + data = paddle.static.data(name='tindata', + shape=[10, 1000], + dtype="float32") paddle.distributed.broadcast(data, src=0) if rank == 0: param_attr = paddle.fluid.ParamAttr( @@ -69,7 +71,8 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase): axis=1, num_partitions=2, weight_attr=param_attr, - bias_attr=True, ) + bias_attr=True, + ) return [linear_out] diff --git a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py index 815e77896ed..f96c4589b26 100644 --- a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py +++ b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py @@ -63,10 +63,12 @@ def load_lr_input_record(sent): class CtrReader(object): + def __init__(self): pass def _reader_creator(self, filelist): + def get_rand(low=0.0, high=1.0): return random.random() @@ -85,7 +87,9 @@ class CtrReader(object): class DatasetCtrReader(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def get_rand(low=0.0, high=1.0): return random.random() @@ -113,8 +117,7 @@ def prepare_data(): lines = f.readlines() err_info = "wrong meta format" assert len(lines) == 2, err_info - assert 'dnn_input_dim:' in lines[0] and 'lr_input_dim:' in lines[ - 1], err_info + assert 'dnn_input_dim:' in lines[0] and 'lr_input_dim:' in lines[1], err_info res = map(int, [_.split(':')[1] for _ in lines]) res = list(res) dnn_input_dim = res[0] @@ -195,8 +198,8 @@ def prepare_fake_data(file_nums=4, file_lines=500): for line_index in range(file_lines - 1): file_str += gen_fake_line() fin.write(file_str) - warnings.warn("Write done ctr_train_data_part_{}".format( - file_index)) + warnings.warn( + "Write done ctr_train_data_part_{}".format(file_index)) file_list = [os.path.join(file_dir, x) for x in os.listdir(file_dir)] assert len(file_list) == file_nums diff --git a/python/paddle/fluid/tests/unittests/decorator_helper.py b/python/paddle/fluid/tests/unittests/decorator_helper.py index 1a5f4540cf0..20e1e49b5e8 100644 --- a/python/paddle/fluid/tests/unittests/decorator_helper.py +++ b/python/paddle/fluid/tests/unittests/decorator_helper.py @@ -20,7 +20,9 @@ __all__ = ['many_times', 'prog_scope'] def many_times(times): + def __impl__(fn): + def __fn__(*args, **kwargs): for _ in range(times): fn(*args, **kwargs) @@ -31,7 +33,9 @@ def many_times(times): def prog_scope(): + def __impl__(fn): + def __fn__(*args, **kwargs): prog = fluid.Program() startup_prog = fluid.Program() diff --git a/python/paddle/fluid/tests/unittests/detected_gpu.py b/python/paddle/fluid/tests/unittests/detected_gpu.py index 8abd44aff71..28e0cc78760 100644 --- a/python/paddle/fluid/tests/unittests/detected_gpu.py +++ b/python/paddle/fluid/tests/unittests/detected_gpu.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,8 +19,8 @@ import paddle.fluid as fluid print("compile with cuda:", fluid.core.is_compiled_with_cuda()) print("get_cuda_device_count:", fluid.core.get_cuda_device_count()) -if fluid.core.is_compiled_with_cuda() and fluid.core.get_cuda_device_count( -) > 0: +if fluid.core.is_compiled_with_cuda( +) and fluid.core.get_cuda_device_count() > 0: sys.exit(0) else: sys.exit(1) diff --git a/python/paddle/fluid/tests/unittests/detected_xpu.py b/python/paddle/fluid/tests/unittests/detected_xpu.py index d7b6f58c941..a1b4b2ec1ec 100644 --- a/python/paddle/fluid/tests/unittests/detected_xpu.py +++ b/python/paddle/fluid/tests/unittests/detected_xpu.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py index de52072d4a8..1360d975603 100644 --- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py @@ -75,6 +75,7 @@ def cnn_model(data): class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2, single_device=False): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -87,16 +88,17 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) # Optimization # TODO(typhoonzero): fix distributed adam optimizer diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py index c5aae1eef18..6cd452ed195 100644 --- a/python/paddle/fluid/tests/unittests/dist_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_ctr.py @@ -30,28 +30,26 @@ fluid.default_main_program().random_seed = 1 class TestDistCTR2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): dnn_input_dim, lr_input_dim = dist_ctr_reader.load_data_meta() """ network definition """ - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="int64", + lod_level=0, + append_batch_size=False) # build dnn model dnn_layer_dims = [128, 64, 32, 1] @@ -63,8 +61,8 @@ class TestDistCTR2x2(TestDistRunnerBase): name="deep_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=IS_SPARSE) - dnn_pool = fluid.layers.sequence_pool( - input=dnn_embedding, pool_type="sum") + dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, + pool_type="sum") dnn_out = dnn_pool for i, dim in enumerate(dnn_layer_dims[1:]): fc = fluid.layers.fc( @@ -106,11 +104,10 @@ class TestDistCTR2x2(TestDistRunnerBase): use_lr_decay = bool(os.getenv('LR_DECAY', 0)) lr = 0.0001 if use_lr_decay: - lr = fluid.layers.exponential_decay( - learning_rate=0.0001, - decay_steps=10000, - decay_rate=0.999, - staircase=True) + lr = fluid.layers.exponential_decay(learning_rate=0.0001, + decay_steps=10000, + decay_rate=0.999, + staircase=True) sgd_optimizer = fluid.optimizer.SGD(learning_rate=lr, regularization=regularization) diff --git a/python/paddle/fluid/tests/unittests/dist_ctr_reader.py b/python/paddle/fluid/tests/unittests/dist_ctr_reader.py index c030afdd4ff..4bc231e4eaf 100644 --- a/python/paddle/fluid/tests/unittests/dist_ctr_reader.py +++ b/python/paddle/fluid/tests/unittests/dist_ctr_reader.py @@ -109,6 +109,7 @@ feeding_index = {'dnn_input': 0, 'lr_input': 1, 'click': 2} class Dataset(object): + def train(self): ''' Load trainset. @@ -163,8 +164,7 @@ def load_data_meta(): lines = read_data('data.meta.txt') err_info = "wrong meta format" assert len(lines) == 2, err_info - assert 'dnn_input_dim:' in lines[0] and 'lr_input_dim:' in lines[ - 1], err_info + assert 'dnn_input_dim:' in lines[0] and 'lr_input_dim:' in lines[1], err_info res = map(int, [_.split(':')[1] for _ in lines]) res = list(res) logger.info('dnn input dim: %d' % res[0]) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index be5118f0acc..9508dc6c262 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -39,6 +39,7 @@ fluid.default_main_program().random_seed = 1 def fake_ctr_reader(): + def reader(): for _ in range(1000): deep = np.random.random_integers(0, 1e5 - 1, size=16).tolist() @@ -66,40 +67,36 @@ class TestDistCTR2x2(FleetDistRunnerBase): """ dnn_input_dim, lr_input_dim = int(1e5), int(1e5) - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="int64", + lod_level=0, + append_batch_size=False) datas = [dnn_data, lr_data, label] if args.reader == "pyreader": if is_train: - self.reader = fluid.io.PyReader( - feed_list=datas, - capacity=64, - iterable=False, - use_double_buffer=False) + self.reader = fluid.io.PyReader(feed_list=datas, + capacity=64, + iterable=False, + use_double_buffer=False) else: - self.test_reader = fluid.io.PyReader( - feed_list=datas, - capacity=64, - iterable=False, - use_double_buffer=False) + self.test_reader = fluid.io.PyReader(feed_list=datas, + capacity=64, + iterable=False, + use_double_buffer=False) + # build dnn model dnn_layer_dims = [128, 128, 64, 32, 1] @@ -112,8 +109,8 @@ class TestDistCTR2x2(FleetDistRunnerBase): initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True, padding_idx=0) - dnn_pool = fluid.layers.sequence_pool( - input=dnn_embedding, pool_type="sum") + dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, + pool_type="sum") dnn_out = dnn_pool for i, dim in enumerate(dnn_layer_dims[1:]): fc = fluid.layers.fc( @@ -186,8 +183,8 @@ class TestDistCTR2x2(FleetDistRunnerBase): loss_val = exe.run(program=paddle.static.default_main_program(), fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) - message = "TEST ---> batch_idx: {} loss: {}\n".format(batch_idx, - loss_val) + message = "TEST ---> batch_idx: {} loss: {}\n".format( + batch_idx, loss_val) fleet.util.print_on_rank(message, 0) except fluid.core.EOFException: self.test_reader.reset() @@ -223,8 +220,8 @@ class TestDistCTR2x2(FleetDistRunnerBase): # np.array(loss_val), mode="sum") # loss_all_trainer = fleet.util.all_gather(float(loss_val)) # loss_val = float(reduce_output) / len(loss_all_trainer) - message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, - loss_val) + message = "TRAIN ---> pass: {} loss: {}\n".format( + epoch_id, loss_val) fleet.util.print_on_rank(message, 0) pass_time = time.time() - pass_start @@ -236,8 +233,9 @@ class TestDistCTR2x2(FleetDistRunnerBase): fleet.save_persistables(exe, dirname=dirname) model_dir = tempfile.mkdtemp() - fleet.save_inference_model( - exe, model_dir, [feed.name for feed in self.feeds], self.avg_cost) + fleet.save_inference_model(exe, model_dir, + [feed.name for feed in self.feeds], + self.avg_cost) self.check_model_right(model_dir) shutil.rmtree(model_dir) @@ -256,24 +254,22 @@ class TestDistCTR2x2(FleetDistRunnerBase): dataset = paddle.distributed.QueueDataset() pipe_command = 'python ctr_dataset_reader.py' - dataset.init( - batch_size=batch_size, - use_var=self.feeds, - pipe_command=pipe_command, - thread_num=thread_num) + dataset.init(batch_size=batch_size, + use_var=self.feeds, + pipe_command=pipe_command, + thread_num=thread_num) dataset.set_filelist(filelist) for epoch_id in range(1): pass_start = time.time() dataset.set_filelist(filelist) - exe.train_from_dataset( - program=fluid.default_main_program(), - dataset=dataset, - fetch_list=[self.avg_cost], - fetch_info=["cost"], - print_period=2, - debug=int(os.getenv("Debug", "0"))) + exe.train_from_dataset(program=fluid.default_main_program(), + dataset=dataset, + fetch_list=[self.avg_cost], + fetch_info=["cost"], + print_period=2, + debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start if os.getenv("SAVE_MODEL") == "1": @@ -317,13 +313,12 @@ class TestDistCTR2x2(FleetDistRunnerBase): for epoch_id in range(1): pass_start = time.time() - exe.train_from_dataset( - program=fluid.default_main_program(), - dataset=dataset, - fetch_list=[self.avg_cost], - fetch_info=["cost"], - print_period=2, - debug=int(os.getenv("Debug", "0"))) + exe.train_from_dataset(program=fluid.default_main_program(), + dataset=dataset, + fetch_list=[self.avg_cost], + fetch_info=["cost"], + print_period=2, + debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start dataset.release_memory() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py index 8b3d49a741a..4ecad3e97c6 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py @@ -75,12 +75,12 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): loss_val = exe.run(program=fleet.main_program, fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) - reduce_output = fleet.util.all_reduce( - np.array(loss_val), mode="sum") + reduce_output = fleet.util.all_reduce(np.array(loss_val), + mode="sum") loss_all_trainer = fleet.util.all_gather(float(loss_val)) loss_val = float(reduce_output) / len(loss_all_trainer) - message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, - loss_val) + message = "TRAIN ---> pass: {} loss: {}\n".format( + epoch_id, loss_val) fleet.util.print_on_rank(message, 0) pass_time = time.time() - pass_start @@ -88,8 +88,9 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): self.reader.reset() model_dir = tempfile.mkdtemp() - fleet.save_inference_model( - exe, model_dir, [feed.name for feed in self.feeds], self.avg_cost) + fleet.save_inference_model(exe, model_dir, + [feed.name for feed in self.feeds], + self.avg_cost) self.check_model_right(model_dir) if fleet.is_first_worker(): fleet.save_persistables(executor=exe, dirname=model_dir) @@ -125,13 +126,12 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): for epoch_id in range(1): pass_start = time.time() dataset.set_filelist(filelist) - exe.train_from_dataset( - program=fleet.main_program, - dataset=dataset, - fetch_list=[self.avg_cost], - fetch_info=["cost"], - print_period=2, - debug=int(os.getenv("Debug", "0"))) + exe.train_from_dataset(program=fleet.main_program, + dataset=dataset, + fetch_list=[self.avg_cost], + fetch_info=["cost"], + print_period=2, + debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start if os.getenv("SAVE_MODEL") == "1": diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py b/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py index 7e811408291..d3cf735808d 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py @@ -23,6 +23,7 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.transpiler.distribute_transpiler import DistributeTranspilerConfig + logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger("fluid") logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py index c6c2537b42c..f714526286c 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py @@ -55,24 +55,21 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): dnn_input_dim, lr_input_dim = int(1e5), int(1e5) with fluid.device_guard("cpu"): - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="float32", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="float32", + lod_level=0, + append_batch_size=False) datas = [dnn_data, lr_data, label] @@ -86,8 +83,8 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): name="deep_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) - dnn_pool = fluid.layers.sequence_pool( - input=dnn_embedding, pool_type="sum") + dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, + pool_type="sum") dnn_out = dnn_pool # build lr model @@ -99,8 +96,8 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): name="wide_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) - lr_pool = fluid.layers.sequence_pool( - input=lr_embbding, pool_type="sum") + lr_pool = fluid.layers.sequence_pool(input=lr_embbding, + pool_type="sum") with fluid.device_guard("gpu"): for i, dim in enumerate(dnn_layer_dims[1:]): @@ -144,8 +141,8 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): train_file_list = ctr_dataset_reader.prepare_fake_data() exe = fluid.Executor(fluid.CPUPlace()) - real_program = fluid.default_main_program()._heter_pipeline_opt[ - "section_program"] + real_program = fluid.default_main_program( + )._heter_pipeline_opt["section_program"] print(real_program) exe.run(fluid.default_startup_program()) @@ -170,13 +167,12 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): for epoch_id in range(1): pass_start = time.time() dataset.set_filelist(filelist) - exe.train_from_dataset( - program=fluid.default_main_program(), - dataset=dataset, - fetch_list=[self.avg_cost], - fetch_info=["cost"], - print_period=2, - debug=int(os.getenv("Debug", "0"))) + exe.train_from_dataset(program=fluid.default_main_program(), + dataset=dataset, + fetch_list=[self.avg_cost], + fetch_info=["cost"], + print_period=2, + debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start print("do_dataset_training done. using time {}".format(pass_time)) exe.close() @@ -186,20 +182,19 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): exe = fluid.Executor() exe.run(fluid.default_startup_program()) fleet.init_worker() - real_program = fluid.default_main_program()._heter_pipeline_opt[ - "section_program"] + real_program = fluid.default_main_program( + )._heter_pipeline_opt["section_program"] print(real_program) thread_num = int(os.getenv("CPU_NUM", 2)) batch_size = 128 pass_start = time.time() - exe.train_from_dataset( - program=fluid.default_main_program(), - fetch_list=[self.avg_cost], - fetch_info=["cost"], - print_period=2, - debug=int(os.getenv("Debug", "0"))) + exe.train_from_dataset(program=fluid.default_main_program(), + fetch_list=[self.avg_cost], + fetch_info=["cost"], + print_period=2, + debug=int(os.getenv("Debug", "0"))) exe.close() pass_time = time.time() - pass_start print("do_dataset_heter_training done. using time {}".format(pass_time)) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py index 575c07390a3..19e278b4f46 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py @@ -67,6 +67,7 @@ def cnn_model(data): class TestFleetMetaOptimizerPrecision(TestDistRunnerBase): + def get_model(self, batch_size=2, single_device=False): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -79,16 +80,17 @@ class TestFleetMetaOptimizerPrecision(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) test_program = fluid.default_main_program().clone(for_test=True) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) optimizer = paddle.fluid.optimizer.Adam(0.01) if single_device: @@ -98,8 +100,8 @@ class TestFleetMetaOptimizerPrecision(TestDistRunnerBase): fleet.init(role) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.without_graph_optimization = True - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) return test_program, avg_cost, train_reader, test_reader, batch_acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py index aaf33d04e6b..cab4484d3e4 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py @@ -67,6 +67,7 @@ def cnn_model(data): class TestFleetMetaOptimizerFuseAllReducePrecision(TestDistRunnerBase): + def get_model(self, batch_size=2, single_device=False): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -79,16 +80,17 @@ class TestFleetMetaOptimizerFuseAllReducePrecision(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) test_program = fluid.default_main_program().clone(for_test=True) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) optimizer = paddle.fluid.optimizer.Adam(0.01) if single_device: @@ -101,8 +103,8 @@ class TestFleetMetaOptimizerFuseAllReducePrecision(TestDistRunnerBase): strategy.fuse_all_reduce_ops = True strategy._calc_comm_same_stream = False strategy.fuse_grad_size_in_num = 8 - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) return test_program, avg_cost, train_reader, test_reader, batch_acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index cfd9887f332..4a43fb44f46 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -54,6 +54,7 @@ fluid.default_main_program().random_seed = 1 def fake_simnet_reader(): + def reader(): for _ in range(1000): q = np.random.random_integers(0, 1500 - 1, size=1).tolist() @@ -69,24 +70,27 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'), - cos_q_pt) + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), - loss_op2) + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -97,26 +101,31 @@ def train_network(batch_size, is_self_contained_lr=False, is_pyreader=False): # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) datas = [q, label, pt, nt] reader = None if is_pyreader: - reader = fluid.io.PyReader( - feed_list=datas, - capacity=64, - iterable=False, - use_double_buffer=False) + reader = fluid.io.PyReader(feed_list=datas, + capacity=64, + iterable=False, + use_double_buffer=False) # embedding q_emb = fluid.embedding( @@ -137,7 +146,8 @@ def train_network(batch_size, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__q_fc__", - learning_rate=base_lr), ) + learning_rate=base_lr), + ) # embedding pt_emb = fluid.embedding( @@ -235,8 +245,8 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase): loss_val = exe.run(program=fluid.default_main_program(), fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) - message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, - loss_val) + message = "TRAIN ---> pass: {} loss: {}\n".format( + epoch_id, loss_val) fleet.util.print_on_rank(message, 0) pass_time = time.time() - pass_start diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index 4e21d115612..60b8a7bb6fd 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -30,6 +30,7 @@ from test_dist_fleet_base import runtime_main, FleetDistRunnerBase def fake_ctr_reader(): + def reader(): for _ in range(1000): deep = np.random.random_integers(0, 1e10, size=16).tolist() @@ -57,33 +58,29 @@ class TestDistCTR2x2(FleetDistRunnerBase): """ dnn_input_dim, lr_input_dim = 10, 10 - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="int64", + lod_level=0, + append_batch_size=False) datas = [dnn_data, lr_data, label] if args.reader == "pyreader": - self.reader = fluid.io.PyReader( - feed_list=datas, - capacity=64, - iterable=False, - use_double_buffer=False) + self.reader = fluid.io.PyReader(feed_list=datas, + capacity=64, + iterable=False, + use_double_buffer=False) # build dnn model initializer = int(os.getenv("INITIALIZER", "0")) @@ -105,10 +102,9 @@ class TestDistCTR2x2(FleetDistRunnerBase): size=[dnn_input_dim, dnn_layer_dims[0]], is_test=inference, entry=entry, - param_attr=fluid.ParamAttr( - name="deep_embedding", initializer=init)) - dnn_pool = fluid.layers.sequence_pool( - input=dnn_embedding, pool_type="sum") + param_attr=fluid.ParamAttr(name="deep_embedding", initializer=init)) + dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, + pool_type="sum") dnn_out = dnn_pool for i, dim in enumerate(dnn_layer_dims[1:]): fc = fluid.layers.fc( @@ -170,8 +166,8 @@ class TestDistCTR2x2(FleetDistRunnerBase): loss_val = exe.run(program=fluid.default_main_program(), fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) - print("TRAIN ---> pass: {} loss: {}\n".format(epoch_id, - loss_val)) + print("TRAIN ---> pass: {} loss: {}\n".format( + epoch_id, loss_val)) except fluid.core.EOFException: self.reader.reset() diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py index f63139464e7..cdfec08f9fe 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist.py @@ -76,6 +76,7 @@ def cnn_model(data): class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -88,8 +89,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Optimization @@ -99,18 +101,19 @@ class TestDistMnist2x2(TestDistRunnerBase): if not use_dgc: opt = fluid.optimizer.Momentum(learning_rate=self.lr, momentum=0.9) else: - opt = fluid.optimizer.DGCMomentumOptimizer( - learning_rate=self.lr, momentum=0.9, rampup_begin_step=2) + opt = fluid.optimizer.DGCMomentumOptimizer(learning_rate=self.lr, + momentum=0.9, + rampup_begin_step=2) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=dist_strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=dist_strategy) _, param_grads = dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py index d386e75fd88..ca59e33ec9e 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py @@ -49,6 +49,7 @@ def test_merge_reader(repeat_batch_size=8): class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -61,8 +62,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Optimization @@ -70,8 +72,8 @@ class TestDistMnist2x2(TestDistRunnerBase): # Reader train_reader = paddle.batch(test_merge_reader, batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) opt.minimize(avg_cost) return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py index 3198c6cac86..b78dd744a9a 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py @@ -29,6 +29,7 @@ fluid.default_main_program().random_seed = 1 class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -41,20 +42,21 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Optimization - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = FP16AllReduce(opt) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) opt.minimize(avg_cost) return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge.py b/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge.py index 66ea24e0bde..50a053f57b8 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge.py @@ -28,6 +28,7 @@ fluid.default_main_program().random_seed = 1 class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -40,20 +41,21 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Optimization - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = fluid.optimizer.GradientMergeOptimizer(opt, 2) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) opt.minimize(avg_cost) return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py b/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py index 733c4267db6..ff31a7016a6 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_mnist import cnn_model class TestDistMnistGradientMergeRawOptimizer(TestDistRunnerBase): + def get_model(self, batch_size=2, single_device=False): paddle.enable_static() paddle.seed(1) @@ -53,8 +54,9 @@ class TestDistMnistGradientMergeRawOptimizer(TestDistRunnerBase): strategy.without_graph_optimization = True fleet.init(is_collective=True, strategy=strategy) - image = paddle.static.data( - name='image', shape=[None, 1, 28, 28], dtype="float32") + image = paddle.static.data(name='image', + shape=[None, 1, 28, 28], + dtype="float32") label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') predict = cnn_model(image) acc = paddle.metric.accuracy(predict, label) @@ -86,10 +88,10 @@ class TestDistMnistGradientMergeRawOptimizer(TestDistRunnerBase): else: assert start_allreduce_idx == 1 - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) return test_program, cost, train_reader, test_reader, acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py index 977e17c37f7..31362565c89 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py @@ -40,6 +40,7 @@ fluid.default_main_program().random_seed = 1 class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) @@ -52,19 +53,20 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Optimization - opt = fluid.optimizer.LarsMomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.LarsMomentumOptimizer(learning_rate=0.001, + momentum=0.9) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) opt.minimize(avg_cost) return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict diff --git a/python/paddle/fluid/tests/unittests/dist_save_load.py b/python/paddle/fluid/tests/unittests/dist_save_load.py index dd010e962e2..175b100990b 100644 --- a/python/paddle/fluid/tests/unittests/dist_save_load.py +++ b/python/paddle/fluid/tests/unittests/dist_save_load.py @@ -40,7 +40,9 @@ from dist_simnet_bow import TestDistSimnetBow2x2, DATA_URL, DATA_MD5 class TestDistSaveLoad2x2(TestDistSimnetBow2x2): + def _load_persistable_vars(self, executor, dirname, program): + def _is_checkpoint_var(var): """ the checkpoint will not save or load all the variables. @@ -68,20 +70,18 @@ class TestDistSaveLoad2x2(TestDistSimnetBow2x2): return var.persistable - io.load_vars( - executor, - dirname=dirname, - main_program=program, - predicate=_is_checkpoint_var, - filename=None) + io.load_vars(executor, + dirname=dirname, + main_program=program, + predicate=_is_checkpoint_var, + filename=None) def run_pserver(self, args): self.get_model(batch_size=2) # NOTE: pserver should not call memory optimize - t = self.get_transpiler(args.trainer_id, - fluid.default_main_program(), args.endpoints, - args.trainers, args.sync_mode, False, - args.current_endpoint) + t = self.get_transpiler(args.trainer_id, fluid.default_main_program(), + args.endpoints, args.trainers, args.sync_mode, + False, args.current_endpoint) pserver_prog = t.get_pserver_program(args.current_endpoint) startup_prog = t.get_startup_program(args.current_endpoint, pserver_prog) @@ -130,11 +130,10 @@ class TestDistSaveLoad2x2(TestDistSimnetBow2x2): else: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce - exe = fluid.ParallelExecutor( - args.use_cuda, - loss_name=avg_cost.name, - exec_strategy=strategy, - build_strategy=build_stra) + exe = fluid.ParallelExecutor(args.use_cuda, + loss_name=avg_cost.name, + exec_strategy=strategy, + build_strategy=build_stra) feed_var_list = [ var for var in trainer_prog.global_block().vars.values() @@ -167,8 +166,8 @@ class TestDistSaveLoad2x2(TestDistSimnetBow2x2): if need_save and model_dir: io.save_persistables(startup_exe, model_dir, trainer_prog) - var = np.array(fluid.global_scope().find_var('__fc_b__').get_tensor( - )) + var = np.array( + fluid.global_scope().find_var('__fc_b__').get_tensor()) sys.stdout.buffer.write(pickle.dumps(np.ravel(var).tolist())) elif save_mode == "DIST": diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index 5ba40c7c838..ad5d632637e 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -49,6 +49,7 @@ train_parameters = { class SE_ResNeXt(): + def __init__(self, layers=50): self.params = train_parameters self.layers = layers @@ -64,56 +65,53 @@ class SE_ResNeXt(): depth = [3, 4, 6, 3] num_filters = [128, 256, 512, 1024] - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') + conv = self.conv_bn_layer(input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + conv = fluid.layers.pool2d(input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 101: cardinality = 32 reduction_ratio = 16 depth = [3, 4, 23, 3] num_filters = [128, 256, 512, 1024] - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') + conv = self.conv_bn_layer(input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + conv = fluid.layers.pool2d(input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 152: cardinality = 64 reduction_ratio = 16 depth = [3, 8, 36, 3] num_filters = [128, 256, 512, 1024] - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=3, - stride=2, - act='relu') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu') - conv = self.conv_bn_layer( - input=conv, - num_filters=128, - filter_size=3, - stride=1, - act='relu') + conv = self.conv_bn_layer(input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu') + conv = self.conv_bn_layer(input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu') + conv = self.conv_bn_layer(input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu') conv = fluid.layers.pool2d( input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ pool_type='max') @@ -127,16 +125,18 @@ class SE_ResNeXt(): cardinality=cardinality, reduction_ratio=reduction_ratio) - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) + pool = fluid.layers.pool2d(input=conv, + pool_size=7, + pool_type='avg', + global_pooling=True) drop = fluid.layers.dropout(x=pool, dropout_prob=0.2) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) out = fluid.layers.fc( input=drop, size=class_dim, act='softmax', - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.05))) return out def shortcut(self, input, ch_out, stride): @@ -149,21 +149,23 @@ class SE_ResNeXt(): def bottleneck_block(self, input, num_filters, stride, cardinality, reduction_ratio): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality, - act='relu') - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) - scale = self.squeeze_excitation( - input=conv2, - num_channels=num_filters * 2, - reduction_ratio=reduction_ratio) + conv0 = self.conv_bn_layer(input=input, + num_filters=num_filters, + filter_size=1, + act='relu') + conv1 = self.conv_bn_layer(input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = self.conv_bn_layer(input=conv1, + num_filters=num_filters * 2, + filter_size=1, + act=None) + scale = self.squeeze_excitation(input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) short = self.shortcut(input, num_filters * 2, stride) @@ -185,37 +187,41 @@ class SE_ResNeXt(): groups=groups, act=None, # avoid pserver CPU init differs from GPU - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05)), + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.05)), bias_attr=False) return fluid.layers.batch_norm(input=conv, act=act) def squeeze_excitation(self, input, num_channels, reduction_ratio): - pool = fluid.layers.pool2d( - input=input, pool_size=0, pool_type='avg', global_pooling=True) + pool = fluid.layers.pool2d(input=input, + pool_size=0, + pool_type='avg', + global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) squeeze = fluid.layers.fc( input=pool, size=num_channels // reduction_ratio, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05)), + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.05)), act='relu') stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) excitation = fluid.layers.fc( input=squeeze, size=num_channels, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05)), + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.05)), act='sigmoid') scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) return scale class DistSeResneXt2x2(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False): # Input data - image = fluid.layers.data( - name="data", shape=[3, 224, 224], dtype='float32') + image = fluid.layers.data(name="data", + shape=[3, 224, 224], + dtype='float32') label = fluid.layers.data(name="int64", shape=[1], dtype='int64') # Train program @@ -241,24 +247,24 @@ class DistSeResneXt2x2(TestDistRunnerBase): if not use_dgc: optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + learning_rate=fluid.layers.piecewise_decay(boundaries=bd, + values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.DGCMomentumOptimizer( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + learning_rate=fluid.layers.piecewise_decay(boundaries=bd, + values=lr), momentum=0.9, rampup_begin_step=0, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) # Reader - train_reader = paddle.batch( - paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.flowers.test(use_xmap=False), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.flowers.test(use_xmap=False), + batch_size=batch_size) return test_program, avg_cost, train_reader, test_reader, acc_top1, out diff --git a/python/paddle/fluid/tests/unittests/dist_sharding_save.py b/python/paddle/fluid/tests/unittests/dist_sharding_save.py index 7d3d934cb45..e31901c8c85 100755 --- a/python/paddle/fluid/tests/unittests/dist_sharding_save.py +++ b/python/paddle/fluid/tests/unittests/dist_sharding_save.py @@ -42,18 +42,20 @@ def runtime_main(): fleet.init(role) with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -64,10 +66,10 @@ def runtime_main(): "sharding_degree": 2, } - optimizer = paddle.fluid.optimizer.Momentum( - learning_rate=0.01, momentum=0.9) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = paddle.fluid.optimizer.Momentum(learning_rate=0.01, + momentum=0.9) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) # execution @@ -76,8 +78,10 @@ def runtime_main(): exe = fluid.Executor(place) exe.run(startup_prog) dirname = "./ut_sharding_save_model" - sharding.utils.save_persistables( - exe, dirname, main_program=train_prog, filename=None) + sharding.utils.save_persistables(exe, + dirname, + main_program=train_prog, + filename=None) out_losses = [] sys.stdout.buffer.write(pickle.dumps(out_losses)) @@ -85,8 +89,8 @@ def runtime_main(): if __name__ == "__main__": #NOTE(liangjianzhong): dist unittest should be imlpement using runtime_main in test_dist_base.py - # but the runtime_main in test_dist_base.py use the fleet, DistributedStrategy from - # paddle.fluid.incubate.fleet.collective which is not support by sharding (paddle.distributed.fleet). + # but the runtime_main in test_dist_base.py use the fleet, DistributedStrategy from + # paddle.fluid.incubate.fleet.collective which is not support by sharding (paddle.distributed.fleet). # this should be update in future. # runtime_main(TestDistMnist2x2) runtime_main() diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py index b96032b92eb..ede62e643d2 100644 --- a/python/paddle/fluid/tests/unittests/dist_text_classification.py +++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py @@ -77,28 +77,30 @@ def conv_net(input, filter_size=window_size, act="tanh", pool_type="max", - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.01))) fc_0 = fluid.layers.fc( input=[conv_3], size=fc0_dim, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.01))) prediction = fluid.layers.fc( input=[fc_0], size=class_dim, act="softmax", - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.01))) return prediction def inference_network(dict_dim): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) out = conv_net(data, dict_dim) return out @@ -119,14 +121,17 @@ def get_optimizer(learning_rate): class TestDistTextClassification2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): vocab = os.path.join(paddle.dataset.common.DATA_HOME, "text_classification", "imdb.vocab") word_dict, dict_dim = get_worddict(vocab) # Input data - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name='label', shape=[1], dtype='int64') # Train program @@ -162,9 +167,9 @@ def tokenize(pattern): while tf != None: if bool(pattern.match(tf.name)): # newline and punctuations removal and ad-hoc tokenization. - yield tarf.extractfile(tf).read().rstrip(six.b( - "\n\r")).translate( - None, six.b(string.punctuation)).lower().split() + yield tarf.extractfile(tf).read().rstrip( + six.b("\n\r")).translate(None, six.b( + string.punctuation)).lower().split() tf = tarf.next() @@ -198,9 +203,8 @@ def train(word_idx): :return: Training reader creator :rtype: callable """ - return reader_creator( - re.compile(r"train/pos/.*\.txt$"), - re.compile(r"train/neg/.*\.txt$"), word_idx) + return reader_creator(re.compile(r"train/pos/.*\.txt$"), + re.compile(r"train/neg/.*\.txt$"), word_idx) def test(word_idx): @@ -215,9 +219,8 @@ def test(word_idx): :return: Test reader creator :rtype: callable """ - return reader_creator( - re.compile(r"test/pos/.*\.txt$"), - re.compile(r"test/neg/.*\.txt$"), word_idx) + return reader_creator(re.compile(r"test/pos/.*\.txt$"), + re.compile(r"test/neg/.*\.txt$"), word_idx) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index db321f94178..b91e43c53b9 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -193,8 +193,8 @@ input_descs = { # encoder. # The actual data shape of src_slf_attn_bias is: # [batch_size, n_head, max_src_len_in_batch, max_src_len_in_batch] - "src_slf_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, - seq_len), "float32"], + "src_slf_attn_bias": + [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # The actual data shape of trg_word is: # [batch_size * max_trg_len_in_batch, 1] "trg_word": [(batch_size, seq_len, long_type(1)), "int64", @@ -206,14 +206,14 @@ input_descs = { # subsequent words in the decoder. # The actual data shape of trg_slf_attn_bias is: # [batch_size, n_head, max_trg_len_in_batch, max_trg_len_in_batch] - "trg_slf_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, - seq_len), "float32"], + "trg_slf_attn_bias": + [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # This input is used to remove attention weights on paddings of the source # input in the encoder-decoder attention. # The actual data shape of trg_src_attn_bias is: # [batch_size, n_head, max_trg_len_in_batch, max_src_len_in_batch] - "trg_src_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, - seq_len), "float32"], + "trg_src_attn_bias": + [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # This input is used in independent decoder program for inference. # The actual data shape of enc_output is: # [batch_size, max_src_len_in_batch, d_model] @@ -234,31 +234,37 @@ input_descs = { # Names of word embedding table which might be reused for weight sharing. word_emb_param_names = ( "src_word_emb_table", - "trg_word_emb_table", ) + "trg_word_emb_table", +) # Names of position encoding table which will be initialized externally. pos_enc_param_names = ( "src_pos_enc_table", - "trg_pos_enc_table", ) + "trg_pos_enc_table", +) # separated inputs for different usages. encoder_data_input_fields = ( "src_word", "src_pos", - "src_slf_attn_bias", ) + "src_slf_attn_bias", +) decoder_data_input_fields = ( "trg_word", "trg_pos", "trg_slf_attn_bias", "trg_src_attn_bias", - "enc_output", ) + "enc_output", +) label_data_input_fields = ( "lbl_word", - "lbl_weight", ) + "lbl_weight", +) # In fast decoder, trg_pos (only containing the current time step) is generated # by ops and trg_slf_attn_bias is not needed. fast_decoder_data_input_fields = ( "trg_word", "init_score", - "trg_src_attn_bias", ) + "trg_src_attn_bias", +) # fast_decoder_util_input_fields = ( # "trg_slf_attn_pre_softmax_shape_delta", @@ -314,21 +320,22 @@ def pad_batch_data(insts, """ return_list = [] max_len = max(len(inst) for inst in insts) - num_token = six.moves.reduce( - lambda x, y: x + y, - [len(inst) for inst in insts]) if return_num_token else 0 + num_token = six.moves.reduce(lambda x, y: x + y, + [len(inst) + for inst in insts]) if return_num_token else 0 # Any token included in dict can be used to pad, since the paddings' loss # will be masked out by weights and make no effect on parameter gradients. inst_data = np.array( [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) return_list += [inst_data.astype("int64").reshape([-1, 1])] if is_label: # label weight - inst_weight = np.array( - [[1.] * len(inst) + [0.] * (max_len - len(inst)) for inst in insts]) + inst_weight = np.array([[1.] * len(inst) + [0.] * (max_len - len(inst)) + for inst in insts]) return_list += [inst_weight.astype("float32").reshape([-1, 1])] else: # position data inst_pos = np.array([ - list(range(1, len(inst) + 1)) + [0] * (max_len - len(inst)) + list(range(1, + len(inst) + 1)) + [0] * (max_len - len(inst)) for inst in insts ]) return_list += [inst_pos.astype("int64").reshape([-1, 1])] @@ -461,12 +468,11 @@ def test_context(test_program, avg_cost, train_exe, dev_count, data_input_names, strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 - test_exe = fluid.ParallelExecutor( - use_cuda=TrainTaskConfig.use_gpu, - main_program=test_program, - share_vars_from=train_exe, - build_strategy=build_strategy, - exec_strategy=strategy) + test_exe = fluid.ParallelExecutor(use_cuda=TrainTaskConfig.use_gpu, + main_program=test_program, + share_vars_from=train_exe, + build_strategy=build_strategy, + exec_strategy=strategy) def test(exe=test_exe): test_total_cost = 0 @@ -477,8 +483,7 @@ def test_context(test_program, avg_cost, train_exe, dev_count, data_input_names, for batch_id, data in enumerate(test_data()): feed_list = [] for place_id, data_buffer in enumerate( - split_data( - data, num_part=dev_count)): + split_data(data, num_part=dev_count)): data_input_dict, _ = prepare_batch_input( data_buffer, data_input_names, ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, ModelHyperParams.n_head, @@ -536,12 +541,11 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 - train_exe = fluid.ParallelExecutor( - use_cuda=TrainTaskConfig.use_gpu, - loss_name=sum_cost.name, - main_program=train_progm, - build_strategy=build_strategy, - exec_strategy=strategy) + train_exe = fluid.ParallelExecutor(use_cuda=TrainTaskConfig.use_gpu, + loss_name=sum_cost.name, + main_program=train_progm, + build_strategy=build_strategy, + exec_strategy=strategy) data_input_names = encoder_data_input_fields + decoder_data_input_fields[: -1] + label_data_input_fields @@ -552,10 +556,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, # the best cross-entropy value with label smoothing loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log( - (1. - TrainTaskConfig.label_smooth_eps - )) + TrainTaskConfig.label_smooth_eps * - np.log(TrainTaskConfig.label_smooth_eps / ( - ModelHyperParams.trg_vocab_size - 1) + 1e-20)) + (1. - TrainTaskConfig.label_smooth_eps)) + + TrainTaskConfig.label_smooth_eps * + np.log(TrainTaskConfig.label_smooth_eps / + (ModelHyperParams.trg_vocab_size - 1) + 1e-20)) init = False for pass_id in six.moves.xrange(TrainTaskConfig.pass_num): pass_start_time = time.time() @@ -570,8 +574,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, lr_rate = lr_scheduler.update_learning_rate() for place_id, data_buffer in enumerate( - split_data( - data, num_part=dev_count)): + split_data(data, num_part=dev_count)): data_input_dict, num_token = prepare_batch_input( data_buffer, data_input_names, ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, ModelHyperParams.n_head, @@ -579,9 +582,8 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, total_num_token += num_token feed_kv_pairs = list(data_input_dict.items()) if TrainTaskConfig.local: - feed_kv_pairs += list({ - lr_scheduler.learning_rate.name: lr_rate - }.items()) + feed_kv_pairs += list( + {lr_scheduler.learning_rate.name: lr_rate}.items()) feed_list.append(dict(feed_kv_pairs)) if not init: @@ -626,6 +628,7 @@ class SortType(object): class Converter(object): + def __init__(self, vocab, beg, end, unk, delimiter): self._vocab = vocab self._beg = beg @@ -641,6 +644,7 @@ class Converter(object): class ComposedConverter(object): + def __init__(self, converters): self._converters = converters @@ -652,6 +656,7 @@ class ComposedConverter(object): class SentenceBatchCreator(object): + def __init__(self, batch_size): self.batch = [] self._batch_size = batch_size @@ -665,6 +670,7 @@ class SentenceBatchCreator(object): class TokenBatchCreator(object): + def __init__(self, batch_size): self.batch = [] self.max_len = -1 @@ -684,6 +690,7 @@ class TokenBatchCreator(object): class SampleInfo(object): + def __init__(self, i, max_len, min_len): self.i = i self.min_len = min_len @@ -691,6 +698,7 @@ class SampleInfo(object): class MinMaxFilter(object): + def __init__(self, max_len, min_len, underlying_creator): self._min_len = min_len self._max_len = max_len @@ -823,21 +831,19 @@ class DataReader(object): def load_src_trg_ids(self, end_mark, fpattern, start_mark, tar_fname, unk_mark): converters = [ - Converter( - vocab=self._src_vocab, - beg=self._src_vocab[start_mark], - end=self._src_vocab[end_mark], - unk=self._src_vocab[unk_mark], - delimiter=self._token_delimiter) + Converter(vocab=self._src_vocab, + beg=self._src_vocab[start_mark], + end=self._src_vocab[end_mark], + unk=self._src_vocab[unk_mark], + delimiter=self._token_delimiter) ] if not self._only_src: converters.append( - Converter( - vocab=self._trg_vocab, - beg=self._trg_vocab[start_mark], - end=self._trg_vocab[end_mark], - unk=self._trg_vocab[unk_mark], - delimiter=self._token_delimiter)) + Converter(vocab=self._trg_vocab, + beg=self._trg_vocab[start_mark], + end=self._trg_vocab[end_mark], + unk=self._trg_vocab[unk_mark], + delimiter=self._token_delimiter)) converters = ComposedConverter(converters) @@ -865,8 +871,9 @@ class DataReader(object): for line in f.extractfile(tar_fname): line = cpt.to_text(line) fields = line.strip("\n").split(self._field_delimiter) - if (not self._only_src and len(fields) == 2) or ( - self._only_src and len(fields) == 1): + if (not self._only_src + and len(fields) == 2) or (self._only_src + and len(fields) == 1): yield fields else: for fpath in fpaths: @@ -877,8 +884,9 @@ class DataReader(object): for line in f: line = cpt.to_text(line) fields = line.strip("\n").split(self._field_delimiter) - if (not self._only_src and len(fields) == 2) or ( - self._only_src and len(fields) == 1): + if (not self._only_src + and len(fields) == 2) or (self._only_src + and len(fields) == 1): yield fields @staticmethod @@ -896,8 +904,9 @@ class DataReader(object): def batch_generator(self): # global sort or global shuffle if self._sort_type == SortType.GLOBAL: - infos = sorted( - self._sample_infos, key=lambda x: x.max_len, reverse=True) + infos = sorted(self._sample_infos, + key=lambda x: x.max_len, + reverse=True) else: if self._shuffle: infos = self._sample_infos @@ -1006,8 +1015,8 @@ def multi_head_attention(queries, hidden_size = x.shape[-1] # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head]) + reshaped = layers.reshape(x=x, + shape=[0, 0, n_head, hidden_size // n_head]) # permute the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] @@ -1039,11 +1048,10 @@ def multi_head_attention(queries, product += attn_bias weights = layers.softmax(product) if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - seed=ModelHyperParams.dropout_seed, - is_test=False) + weights = layers.dropout(weights, + dropout_prob=dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) out = layers.matmul(weights, v) return out @@ -1102,18 +1110,16 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.): if cmd == "a": # add residual connection out = out + prev_out if prev_out else out elif cmd == "n": # add layer normalization - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.initializer.Constant(1.), - bias_attr=fluid.initializer.Constant(0.)) + out = layers.layer_norm(out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.initializer.Constant(1.), + bias_attr=fluid.initializer.Constant(0.)) elif cmd == "d": # add dropout if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - seed=ModelHyperParams.dropout_seed, - is_test=False) + out = layers.dropout(out, + dropout_prob=dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) return out @@ -1145,9 +1151,9 @@ def prepare_encoder(src_word, src_word_emb = layers.embedding( src_word, size=[src_vocab_size, src_emb_dim], - param_attr=fluid.ParamAttr( - name=word_emb_param_name, - initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5))) + param_attr=fluid.ParamAttr(name=word_emb_param_name, + initializer=fluid.initializer.Normal( + 0., src_emb_dim**-0.5))) src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5) src_pos_enc = layers.embedding( @@ -1159,17 +1165,16 @@ def prepare_encoder(src_word, initializer=fluid.initializer.ConstantInitializer(0.001))) src_pos_enc.stop_gradient = True enc_input = src_word_emb + src_pos_enc - return layers.dropout( - enc_input, - dropout_prob=dropout_rate, - seed=ModelHyperParams.dropout_seed, - is_test=False) if dropout_rate else enc_input + return layers.dropout(enc_input, + dropout_prob=dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) if dropout_rate else enc_input -prepare_encoder = partial( - prepare_encoder, pos_enc_param_name=pos_enc_param_names[0]) -prepare_decoder = partial( - prepare_encoder, pos_enc_param_name=pos_enc_param_names[1]) +prepare_encoder = partial(prepare_encoder, + pos_enc_param_name=pos_enc_param_names[0]) +prepare_decoder = partial(prepare_encoder, + pos_enc_param_name=pos_enc_param_names[1]) def encoder_layer(enc_input, @@ -1240,12 +1245,14 @@ def decoder_layer(dec_input, d_model, n_head, dropout_rate, - cache, ) + cache, + ) slf_attn_output = post_process_layer( dec_input, slf_attn_output, "dan", # residual connection + dropout + layer normalization - dropout_rate, ) + dropout_rate, + ) enc_attn_output = multi_head_attention( slf_attn_output, enc_output, @@ -1255,21 +1262,25 @@ def decoder_layer(dec_input, d_value, d_model, n_head, - dropout_rate, ) + dropout_rate, + ) enc_attn_output = post_process_layer( slf_attn_output, enc_attn_output, "dan", # residual connection + dropout + layer normalization - dropout_rate, ) + dropout_rate, + ) ffd_output = positionwise_feed_forward( enc_attn_output, d_inner_hid, - d_model, ) + d_model, + ) dec_output = post_process_layer( enc_attn_output, ffd_output, "dan", # residual connection + dropout + layer normalization - dropout_rate, ) + dropout_rate, + ) return dec_output @@ -1293,18 +1304,17 @@ def decoder(dec_input, if caches is not None: cache = caches[i] - dec_output = decoder_layer( - dec_input, - enc_output, - dec_slf_attn_bias, - dec_enc_attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - dropout_rate, - cache=cache) + dec_output = decoder_layer(dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + cache=cache) dec_input = dec_output return dec_output @@ -1315,30 +1325,30 @@ def make_all_inputs(input_fields): """ inputs = [] for input_field in input_fields: - input_var = layers.data( - name=input_field, - shape=input_descs[input_field][0], - dtype=input_descs[input_field][1], - lod_level=input_descs[input_field][2] - if len(input_descs[input_field]) == 3 else 0, - append_batch_size=False) + input_var = layers.data(name=input_field, + shape=input_descs[input_field][0], + dtype=input_descs[input_field][1], + lod_level=input_descs[input_field][2] + if len(input_descs[input_field]) == 3 else 0, + append_batch_size=False) inputs.append(input_var) return inputs def transformer( - src_vocab_size, - trg_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - dropout_rate, - weight_sharing, - label_smooth_eps, ): + src_vocab_size, + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + weight_sharing, + label_smooth_eps, +): if weight_sharing: assert src_vocab_size == src_vocab_size, ( "Vocabularies in source and target should be same for weight sharing." @@ -1356,7 +1366,8 @@ def transformer( d_inner_hid, dropout_rate, weight_sharing, - enc_inputs, ) + enc_inputs, + ) dec_inputs = make_all_inputs(decoder_data_input_fields[:-1]) @@ -1372,20 +1383,19 @@ def transformer( dropout_rate, weight_sharing, dec_inputs, - enc_output, ) + enc_output, + ) # Padding index do not contribute to the total loss. The weights is used to # cancel padding index in calculating the loss. label, weights = make_all_inputs(label_data_input_fields) if label_smooth_eps: - label = layers.label_smooth( - label=layers.one_hot( - input=label, depth=trg_vocab_size), - epsilon=label_smooth_eps) + label = layers.label_smooth(label=layers.one_hot(input=label, + depth=trg_vocab_size), + epsilon=label_smooth_eps) cost = layers.softmax_with_cross_entropy( - logits=layers.reshape( - predict, shape=[-1, trg_vocab_size]), + logits=layers.reshape(predict, shape=[-1, trg_vocab_size]), label=label, soft_label=True if label_smooth_eps else False) weighted_cost = cost * weights @@ -1417,14 +1427,13 @@ def wrap_encoder(src_vocab_size, else: src_word, src_pos, src_slf_attn_bias = \ enc_inputs - enc_input = prepare_encoder( - src_word, - src_pos, - src_vocab_size, - d_model, - max_length, - dropout_rate, - word_emb_param_name=word_emb_param_names[0]) + enc_input = prepare_encoder(src_word, + src_pos, + src_vocab_size, + d_model, + max_length, + dropout_rate, + word_emb_param_name=word_emb_param_names[0]) enc_output = encoder(enc_input, src_slf_attn_bias, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, dropout_rate) return enc_output @@ -1454,34 +1463,32 @@ def wrap_decoder(trg_vocab_size, else: trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias = dec_inputs - dec_input = prepare_decoder( - trg_word, - trg_pos, - trg_vocab_size, - d_model, - max_length, - dropout_rate, - word_emb_param_name=word_emb_param_names[0] - if weight_sharing else word_emb_param_names[1]) - dec_output = decoder( - dec_input, - enc_output, - trg_slf_attn_bias, - trg_src_attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - dropout_rate, - caches=caches) + dec_input = prepare_decoder(trg_word, + trg_pos, + trg_vocab_size, + d_model, + max_length, + dropout_rate, + word_emb_param_name=word_emb_param_names[0] + if weight_sharing else word_emb_param_names[1]) + dec_output = decoder(dec_input, + enc_output, + trg_slf_attn_bias, + trg_src_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + caches=caches) # Return logits for training and probs for inference. if weight_sharing: - predict = layers.matmul( - x=dec_output, - y=fluid.framework._get_var(word_emb_param_names[0]), - transpose_y=True) + predict = layers.matmul(x=dec_output, + y=fluid.framework._get_var( + word_emb_param_names[0]), + transpose_y=True) else: predict = layers.fc(input=dec_output, size=trg_vocab_size, @@ -1494,20 +1501,21 @@ def wrap_decoder(trg_vocab_size, def fast_decode( - src_vocab_size, - trg_vocab_size, - max_in_len, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - dropout_rate, - weight_sharing, - beam_size, - max_out_len, - eos_idx, ): + src_vocab_size, + trg_vocab_size, + max_in_len, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + weight_sharing, + beam_size, + max_out_len, + eos_idx, +): """ Use beam search to decode. Caches will be used to store states of history steps which can make the decoding faster. @@ -1519,30 +1527,32 @@ def fast_decode( make_all_inputs(fast_decoder_data_input_fields ) def beam_search(): - max_len = layers.fill_constant( - shape=[1], dtype=start_tokens.dtype, value=max_out_len) - step_idx = layers.fill_constant( - shape=[1], dtype=start_tokens.dtype, value=0) + max_len = layers.fill_constant(shape=[1], + dtype=start_tokens.dtype, + value=max_out_len) + step_idx = layers.fill_constant(shape=[1], + dtype=start_tokens.dtype, + value=0) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) # array states will be stored for each step. - ids = layers.array_write( - layers.reshape(start_tokens, (-1, 1)), step_idx) + ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), + step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps to reduce redundant # computation in decoder. caches = [{ - "k": layers.fill_constant_batch_size_like( - input=start_tokens, - shape=[-1, 0, d_model], - dtype=enc_output.dtype, - value=0), - "v": layers.fill_constant_batch_size_like( - input=start_tokens, - shape=[-1, 0, d_model], - dtype=enc_output.dtype, - value=0) + "k": + layers.fill_constant_batch_size_like(input=start_tokens, + shape=[-1, 0, d_model], + dtype=enc_output.dtype, + value=0), + "v": + layers.fill_constant_batch_size_like(input=start_tokens, + shape=[-1, 0, d_model], + dtype=enc_output.dtype, + value=0) } for i in range(n_layer)] with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) @@ -1550,47 +1560,46 @@ def fast_decode( pre_scores = layers.array_read(array=scores, i=step_idx) # sequence_expand can gather sequences according to lod thus can be # used in beam search to sift states corresponding to selected ids. - pre_src_attn_bias = layers.sequence_expand( - x=trg_src_attn_bias, y=pre_scores) + pre_src_attn_bias = layers.sequence_expand(x=trg_src_attn_bias, + y=pre_scores) pre_enc_output = layers.sequence_expand(x=enc_output, y=pre_scores) pre_caches = [{ - "k": layers.sequence_expand( - x=cache["k"], y=pre_scores), - "v": layers.sequence_expand( - x=cache["v"], y=pre_scores), + "k": + layers.sequence_expand(x=cache["k"], y=pre_scores), + "v": + layers.sequence_expand(x=cache["v"], y=pre_scores), } for cache in caches] pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( - input=pre_enc_output, # can't use pre_ids here since it has lod + input= + pre_enc_output, # can't use pre_ids here since it has lod value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), - y=layers.increment( - x=step_idx, value=1.0, in_place=False), + y=layers.increment(x=step_idx, value=1.0, in_place=False), axis=0) - logits = wrap_decoder( - trg_vocab_size, - max_in_len, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - dropout_rate, - weight_sharing, - dec_inputs=(pre_ids, pre_pos, None, pre_src_attn_bias), - enc_output=pre_enc_output, - caches=pre_caches) + logits = wrap_decoder(trg_vocab_size, + max_in_len, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + weight_sharing, + dec_inputs=(pre_ids, pre_pos, None, + pre_src_attn_bias), + enc_output=pre_enc_output, + caches=pre_caches) logits = layers.reshape(logits, (-1, trg_vocab_size)) topk_scores, topk_indices = layers.topk( input=layers.softmax(logits), k=beam_size) - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), - y=layers.reshape( - pre_scores, shape=[-1]), - axis=0) + accu_scores = layers.elementwise_add(x=layers.log(topk_scores), + y=layers.reshape(pre_scores, + shape=[-1]), + axis=0) # beam_search op uses lod to distinguish branches. topk_indices = layers.lod_reset(topk_indices, pre_ids) selected_ids, selected_scores = layers.beam_search( @@ -1653,11 +1662,10 @@ def get_model(is_dist, is_async): .noam_decay(ModelHyperParams.d_model, TrainTaskConfig.warmup_steps) - optimizer = fluid.optimizer.Adam( - learning_rate=lr_decay, - beta1=TrainTaskConfig.beta1, - beta2=TrainTaskConfig.beta2, - epsilon=TrainTaskConfig.eps) + optimizer = fluid.optimizer.Adam(learning_rate=lr_decay, + beta1=TrainTaskConfig.beta1, + beta2=TrainTaskConfig.beta2, + epsilon=TrainTaskConfig.eps) optimizer.minimize(sum_cost) return sum_cost, avg_cost, predict, token_num, local_lr_scheduler, test_program @@ -1667,7 +1675,8 @@ def update_args(): src_dict = DataReader.load_dict(TrainTaskConfig.src_vocab_fpath) trg_dict = DataReader.load_dict(TrainTaskConfig.trg_vocab_fpath) dict_args = [ - "src_vocab_size", str(len(src_dict)), "trg_vocab_size", + "src_vocab_size", + str(len(src_dict)), "trg_vocab_size", str(len(trg_dict)), "bos_idx", str(src_dict[TrainTaskConfig.special_token[0]]), "eos_idx", str(src_dict[TrainTaskConfig.special_token[1]]), "unk_idx", @@ -1677,11 +1686,11 @@ def update_args(): class DistTransformer2x2(TestDistRunnerBase): + def run_pserver(self, args): get_model(True, not args.sync_mode) - t = self.get_transpiler(args.trainer_id, - fluid.default_main_program(), args.endpoints, - args.trainers, args.sync_mode) + t = self.get_transpiler(args.trainer_id, fluid.default_main_program(), + args.endpoints, args.trainers, args.sync_mode) pserver_prog = t.get_pserver_program(args.current_endpoint) startup_prog = t.get_startup_program(args.current_endpoint, pserver_prog) diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py index 835306edd0f..744a6d6729a 100644 --- a/python/paddle/fluid/tests/unittests/dist_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py @@ -39,6 +39,7 @@ fluid.default_main_program().random_seed = 1 class TestDistWord2vec2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): BATCH_SIZE = batch_size @@ -91,8 +92,8 @@ class TestDistWord2vec2x2(TestDistRunnerBase): act='softmax', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1))) - cost = fluid.layers.cross_entropy( - input=predict_word, label=words[4]) + cost = fluid.layers.cross_entropy(input=predict_word, + label=words[4]) avg_cost = fluid.layers.mean(cost) return avg_cost, predict_word @@ -100,8 +101,9 @@ class TestDistWord2vec2x2(TestDistRunnerBase): dict_size = len(word_dict) first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') - second_word = fluid.layers.data( - name='secondw', shape=[1], dtype='int64') + second_word = fluid.layers.data(name='secondw', + shape=[1], + dtype='int64') third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') @@ -113,10 +115,10 @@ class TestDistWord2vec2x2(TestDistRunnerBase): sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) - train_reader = paddle.batch( - paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE) + train_reader = paddle.batch(paddle.dataset.imikolov.train(word_dict, N), + BATCH_SIZE) + test_reader = paddle.batch(paddle.dataset.imikolov.test(word_dict, N), + BATCH_SIZE) return inference_program, avg_cost, train_reader, test_reader, None, predict_word diff --git a/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py b/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py index 0af7d40a2f0..ee2b180586d 100644 --- a/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -76,6 +76,7 @@ def prune_fwd_bwd_ops(program, start_idx): class GradClipDecorator(ClipGradBase): + def __init__(self, clip, clip_after_allreduce): self.clip = clip self.clip_after_allreduce = clip_after_allreduce @@ -91,17 +92,17 @@ class GradClipDecorator(ClipGradBase): scale = 1.0 / world_size # scale = 1.0 for p, g in params_grads: - block.append_op( - type='c_allreduce_sum', - inputs={'X': [g]}, - outputs={'Out': [g]}, - attrs={'ring_id': 0, - 'use_calc_stream': True}) - block.append_op( - type='scale', - inputs={'X': [g]}, - outputs={'Out': [g]}, - attrs={'scale': scale}) + block.append_op(type='c_allreduce_sum', + inputs={'X': [g]}, + outputs={'Out': [g]}, + attrs={ + 'ring_id': 0, + 'use_calc_stream': True + }) + block.append_op(type='scale', + inputs={'X': [g]}, + outputs={'Out': [g]}, + attrs={'scale': scale}) def _static_clip(self, params_grads): if self.clip_after_allreduce: @@ -114,6 +115,7 @@ class GradClipDecorator(ClipGradBase): class IdentityGradClip(ClipGradBase): + def _dygraph_clip(self, params_grads): return params_grads @@ -130,12 +132,12 @@ def run_model(use_distributed_lamb, use_fp16, use_master_param_norm, **kwargs): with paddle.static.program_guard(main, startup): with paddle.fluid.unique_name.guard(): with paddle.static.amp.fp16_guard(): - image = paddle.static.data( - name='image', - shape=[None, 3, 224, 224], - dtype=paddle.float32) - label = paddle.static.data( - name='label', shape=[None, 1], dtype=paddle.int64) + image = paddle.static.data(name='image', + shape=[None, 3, 224, 224], + dtype=paddle.float32) + label = paddle.static.data(name='label', + shape=[None, 1], + dtype=paddle.int64) model = resnet() pred = model(image) loss_fn = paddle.nn.loss.CrossEntropyLoss() @@ -222,8 +224,8 @@ def run_model(use_distributed_lamb, use_fp16, use_master_param_norm, **kwargs): def reader(): for _ in range(6): - yield dict( - [(grad.name, gen_random_grad_tensor(grad)) for grad in grads]) + yield dict([(grad.name, gen_random_grad_tensor(grad)) + for grad in grads]) scope = paddle.static.Scope() fetch_list = params @@ -253,6 +255,7 @@ def run_model(use_distributed_lamb, use_fp16, use_master_param_norm, **kwargs): class TestDistributedFusedLamb(unittest.TestCase): + @classmethod def setUpClass(cls): if not paddle.is_compiled_with_cuda(): @@ -265,16 +268,18 @@ class TestDistributedFusedLamb(unittest.TestCase): def config(self): clip_after_allreduce = bool( - distutils.util.strtobool( - os.getenv('CLIP_AFTER_ALLREDUCE', 'True'))) + distutils.util.strtobool(os.getenv('CLIP_AFTER_ALLREDUCE', 'True'))) max_global_norm = float(os.getenv('MAX_GLOBAL_NORM', -1.0)) gm_steps = int(os.getenv('GRADIENT_MERGE_STEPS', 1)) print('clip_after_allreduce = {}, max_global_norm = {}'.format( clip_after_allreduce, max_global_norm)) return { - 'clip_after_allreduce': clip_after_allreduce, - 'gradient_accumulation_steps': gm_steps, - 'grad_clip': paddle.nn.ClipGradByGlobalNorm(max_global_norm) + 'clip_after_allreduce': + clip_after_allreduce, + 'gradient_accumulation_steps': + gm_steps, + 'grad_clip': + paddle.nn.ClipGradByGlobalNorm(max_global_norm) if max_global_norm > 0 else None, } diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py index e024ef1d5d1..63abdeef595 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,12 +26,14 @@ from dist_pass_test_base import DistPassTestBase import paddle.distributed.fleet as fleet import paddle.distributed.auto_parallel as auto + sys.path.append("..") import auto_parallel_gpt_model as modeling from auto_parallel_gpt_model import GPTModel, GPTForPretraining, GPTPretrainingCriterion class AutoPallelPassTestBase(DistPassTestBase): + def setUp(self): paddle.enable_static() seed = int(os.environ.get('SEED', -1)) @@ -62,10 +64,14 @@ class AutoPallelPassTestBase(DistPassTestBase): fleet.init(is_collective=True, strategy=dist_strategy) def check_main(self, gpus=None, **kwargs): - no_pass_rets = self._distributed_launch( - model=None, apply_pass=False, gpus=gpus, **kwargs) - pass_rets = self._distributed_launch( - model=None, apply_pass=True, gpus=gpus, **kwargs) + no_pass_rets = self._distributed_launch(model=None, + apply_pass=False, + gpus=gpus, + **kwargs) + pass_rets = self._distributed_launch(model=None, + apply_pass=True, + gpus=gpus, + **kwargs) self.check_results(no_pass_rets, pass_rets) def _run_gpu_main(self, model, apply_pass, dump_file, **kwargs): @@ -113,72 +119,71 @@ class AutoPallelPassTestBase(DistPassTestBase): else: raise ValueError("'get_gpt_model' only support dp and mp.") - tokens = paddle.static.data( - name="tokens", shape=[batch_size, sequence_len], dtype='int64') - position_ids = paddle.static.data( - name="position_ids", - shape=[batch_size, sequence_len], - dtype='int64') + tokens = paddle.static.data(name="tokens", + shape=[batch_size, sequence_len], + dtype='int64') + position_ids = paddle.static.data(name="position_ids", + shape=[batch_size, sequence_len], + dtype='int64') attention_mask = paddle.static.data( name="attention_mask", shape=[batch_size, 1, sequence_len, sequence_len], dtype='float32') - labels = paddle.static.data( - name="labels", shape=[batch_size, sequence_len], dtype='int64') - loss_mask = paddle.static.data( - name="loss_mask", shape=[batch_size, sequence_len], dtype='float32') + labels = paddle.static.data(name="labels", + shape=[batch_size, sequence_len], + dtype='int64') + loss_mask = paddle.static.data(name="loss_mask", + shape=[batch_size, sequence_len], + dtype='float32') data_holder = [tokens, position_ids, attention_mask, labels, loss_mask] if modeling._global_parallel_strategy == "dp": - auto.shard_tensor( - tokens, - dist_attr={ - "process_mesh": modeling._global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(tokens, + dist_attr={ + "process_mesh": modeling._global_process_mesh, + "dims_mapping": [0, -1] + }) elif modeling._global_parallel_strategy == "pp": - auto.shard_tensor( - tokens, - dist_attr={ - "process_mesh": modeling.PP_MESH_LIST[0], - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - attention_mask, - dist_attr={ - "process_mesh": modeling.PP_MESH_LIST[0], - "dims_mapping": [-1, -1, -1, -1] - }) - - gpt = GPTModel( - vocab_size=1000, - hidden_size=64, - num_hidden_layers=2, - num_attention_heads=8, - intermediate_size=256, - hidden_act="gelu", - hidden_dropout_prob=0.0, - attention_probs_dropout_prob=0.0, - max_position_embeddings=1024, - type_vocab_size=1, - initializer_range=0.02, - pad_token_id=0, - eos_token_id=7, - bos_token_id=0, - eol_token_id=3) - - model = GPTForPretraining( - gpt, vocab_size=1000, hidden_size=64, initializer_range=0.02) + auto.shard_tensor(tokens, + dist_attr={ + "process_mesh": modeling.PP_MESH_LIST[0], + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(attention_mask, + dist_attr={ + "process_mesh": modeling.PP_MESH_LIST[0], + "dims_mapping": [-1, -1, -1, -1] + }) + + gpt = GPTModel(vocab_size=1000, + hidden_size=64, + num_hidden_layers=2, + num_attention_heads=8, + intermediate_size=256, + hidden_act="gelu", + hidden_dropout_prob=0.0, + attention_probs_dropout_prob=0.0, + max_position_embeddings=1024, + type_vocab_size=1, + initializer_range=0.02, + pad_token_id=0, + eos_token_id=7, + bos_token_id=0, + eol_token_id=3) + + model = GPTForPretraining(gpt, + vocab_size=1000, + hidden_size=64, + initializer_range=0.02) preds = model(tokens, position_ids, attention_mask) criterion = GPTPretrainingCriterion() loss = criterion(preds, labels, loss_mask) clip = paddle.nn.ClipGradByNorm(clip_norm=1.0) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=clip) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=clip) optimizer = fleet.distributed_optimizer(optimizer) startup_program = paddle.static.default_startup_program() _, _, dist_startup_prog, dist_main_prog = optimizer.minimize( @@ -194,13 +199,11 @@ class AutoPallelPassTestBase(DistPassTestBase): loss_mask = [] for _ in range(batch_size): tokens.append( - np.random.randint( - vocab_size, size=sequence_len)) + np.random.randint(vocab_size, size=sequence_len)) position_ids.append(np.arange(sequence_len)) attention_mask.append([np.tril(np.ones(sequence_len))]) labels.append( - np.random.randint( - vocab_size, size=sequence_len)) + np.random.randint(vocab_size, size=sequence_len)) loss_mask.append(np.ones(sequence_len)) yield tokens, position_ids, attention_mask, labels, loss_mask diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/check_pass_conflict_example.py b/python/paddle/fluid/tests/unittests/distributed_passes/check_pass_conflict_example.py index fc0582f7aac..ffb8ea8e381 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/check_pass_conflict_example.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/check_pass_conflict_example.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from model_zoo import resnet_model class CheckPassConflictTest1(PassConflictChecker): + def pass_config(self): return [ new_pass("fuse_all_reduce", {"max_memory_size": 1024 * 1024}), @@ -30,6 +31,7 @@ class CheckPassConflictTest1(PassConflictChecker): class CheckPassConflictTest2(PassConflictChecker): + def pass_config(self): return [ new_pass("fuse_elewise_add_act"), diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py index 786ee06487f..f13439575c9 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -57,6 +57,7 @@ def remove_path_if_exists(path): # NOTE: only support GPU now class DistPassTestBase(unittest.TestCase): + def setUp(self): paddle.enable_static() if paddle.is_compiled_with_cuda(): @@ -85,10 +86,14 @@ class DistPassTestBase(unittest.TestCase): raise NotImplementedError() def check_main(self, model=None, gpus=None, **kwargs): - pass_rets = self._distributed_launch( - model=model, apply_pass=True, gpus=gpus, **kwargs) - no_pass_rets = self._distributed_launch( - model=model, apply_pass=False, gpus=gpus, **kwargs) + pass_rets = self._distributed_launch(model=model, + apply_pass=True, + gpus=gpus, + **kwargs) + no_pass_rets = self._distributed_launch(model=model, + apply_pass=False, + gpus=gpus, + **kwargs) self.check_results(no_pass_rets, pass_rets) def check_results(self, no_pass_rets, pass_rets): @@ -101,12 +106,11 @@ class DistPassTestBase(unittest.TestCase): self.assertTrue(out_var_pass is None) else: self.assertTrue( - np.allclose( - out_var_no_pass, - out_var_pass, - rtol=self.rtol, - atol=self.atol, - equal_nan=self.equal_nan)) + np.allclose(out_var_no_pass, + out_var_pass, + rtol=self.rtol, + atol=self.atol, + equal_nan=self.equal_nan)) @classmethod def _to_var_names(cls, names_or_vars): @@ -238,8 +242,8 @@ class DistPassTestBase(unittest.TestCase): dump_file = '{0}/{1}.bin'.format(output_dir, i) self.assertTrue( os.path.exists(dump_file), - "Pass test failed with apply_pass = {}, please view log in {}". - format(apply_pass, output_dir)) + "Pass test failed with apply_pass = {}, please view log in {}" + .format(apply_pass, output_dir)) with open(dump_file, "rb") as f: results.append(pickle.load(f)) return results @@ -249,6 +253,7 @@ class DistPassTestBase(unittest.TestCase): class PassConflictChecker(DistPassTestBase): + def setUp(self): os.environ['DEBUG'] = '1' # to save the debug directory super(PassConflictChecker, self).setUp() @@ -266,16 +271,14 @@ class PassConflictChecker(DistPassTestBase): auto_pass_manager = PassManager(passes, auto_solve_conflict=True) new_passes = auto_pass_manager.passes self.assertEqual( - len(passes), - len(new_passes), + len(passes), len(new_passes), "After solving conflicts, the left passes are: {}".format( auto_pass_manager.names)) for i, (p1, p2) in enumerate(zip(passes, new_passes)): self.assertEqual( - id(p1), - id(p2), - "After solving conflicts, the {}-th pass is different: {} vs {}". - format(i, p1.name, p2.name)) + id(p1), id(p2), + "After solving conflicts, the {}-th pass is different: {} vs {}" + .format(i, p1.name, p2.name)) auto_pass_manager.apply([main_prog], [startup_prog]) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/launch.py b/python/paddle/fluid/tests/unittests/distributed_passes/launch.py index c225fe85cd8..ee9ff484523 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/launch.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/launch.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py b/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py index 7eebee47e59..9a48d117bb1 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,25 @@ from paddle.vision.models import resnet50 as resnet import numpy as np import paddle.nn as nn -__all__ = ['resnet_model', ] +__all__ = [ + 'resnet_model', +] def get_seed_from_env(): return int(os.environ.get("SEED", 0)) -def resnet_model(place, batch_size, image_shape=[3, 224, 224], +def resnet_model(place, + batch_size, + image_shape=[3, 224, 224], num_classes=1000): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') - label = paddle.static.data( - shape=[batch_size, 1], dtype='int64', name='label') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') + label = paddle.static.data(shape=[batch_size, 1], + dtype='int64', + name='label') model = resnet(pretrained=False) loss_fn = nn.loss.CrossEntropyLoss() pred_out = model(image) @@ -52,8 +58,9 @@ def resnet_model(place, batch_size, image_shape=[3, 224, 224], np.random.seed(seed + rank) for _ in range(10): image_np = np.random.random(size=image.shape).astype('float32') - label_np = np.random.randint( - low=0, high=num_classes, size=label.shape).astype('int64') + label_np = np.random.randint(low=0, + high=num_classes, + size=label.shape).astype('int64') yield image_np, label_np main_program = paddle.static.default_main_program() @@ -62,10 +69,12 @@ def resnet_model(place, batch_size, image_shape=[3, 224, 224], def simple_net(place, batch_size, image_shape=[784], num_classes=10): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') - label = paddle.static.data( - shape=[batch_size, 1], dtype='int64', name='label') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') + label = paddle.static.data(shape=[batch_size, 1], + dtype='int64', + name='label') linears = [nn.Linear(784, 784) for _ in range(3)] hidden = image for linear in linears: @@ -89,8 +98,9 @@ def simple_net(place, batch_size, image_shape=[784], num_classes=10): np.random.seed(seed + rank) for _ in range(10): image_np = np.random.random(size=image.shape).astype('float32') - label_np = np.random.randint( - low=0, high=num_classes, size=label.shape).astype('int64') + label_np = np.random.randint(low=0, + high=num_classes, + size=label.shape).astype('int64') yield image_np, label_np main_program = paddle.static.default_main_program() diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py b/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py index 1dad8796a61..95c670ce909 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,13 +29,13 @@ def parse_args(): parser.add_argument( '--class_name', type=str, - help='The test class name. It is the class name that inherits the DistPassTestBase class.' + help= + 'The test class name. It is the class name that inherits the DistPassTestBase class.' ) - parser.add_argument( - '--apply_pass', - default=False, - action="store_true", - help='Whether to apply distributed passes.') + parser.add_argument('--apply_pass', + default=False, + action="store_true", + help='Whether to apply distributed passes.') parser.add_argument( '--input_file', type=str, diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py index 93a0044a5e4..beddb79fd9e 100755 --- a/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,6 +28,7 @@ import paddle.distributed.fleet as fleet class PsPassTestBase(unittest.TestCase): + def init(self): self.config = {} self.config['ps_mode_config'] = "" diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_amp_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_amp_pass.py index 0507909b132..4585fe997cd 100755 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_amp_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_amp_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from auto_parallel_pass_test_base import AutoPallelPassTestBase class TestAMPPass(AutoPallelPassTestBase): + def init(self): if paddle.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) @@ -51,8 +52,10 @@ class TestAMPPass(AutoPallelPassTestBase): fleet.init(is_collective=True, strategy=dist_strategy) def test_bs_8(self): - self.check_main( - gpus=[0, 1], batch_size=8, sequence_len=512, vocab_size=1000) + self.check_main(gpus=[0, 1], + batch_size=8, + sequence_len=512, + vocab_size=1000) def get_model(self, place, batch_size, sequence_len, vocab_size): return self.get_gpt_model("mp", place, batch_size, sequence_len, diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_fp16_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_fp16_pass.py index ccc60bc6782..5ac78cc5fec 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_fp16_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_fp16_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ from test_auto_parallel_amp_pass import TestAMPPass class TestPF16Pass(TestAMPPass): + def apply_passes(self): dist_strategy = fleet.DistributedStrategy() dist_strategy.amp = True diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py index 74a751881dd..7afa10d49db 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,6 +26,7 @@ from auto_parallel_pass_test_base import AutoPallelPassTestBase class TestRecomputePass(AutoPallelPassTestBase): + def init(self): if paddle.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) @@ -45,8 +46,10 @@ class TestRecomputePass(AutoPallelPassTestBase): fleet.init(is_collective=True, strategy=dist_strategy) def test_bs_8(self): - self.check_main( - gpus=[0, 1], batch_size=8, sequence_len=512, vocab_size=1000) + self.check_main(gpus=[0, 1], + batch_size=8, + sequence_len=512, + vocab_size=1000) def get_model(self, place, batch_size, sequence_len, vocab_size): return self.get_gpt_model("mp", place, batch_size, sequence_len, @@ -54,6 +57,7 @@ class TestRecomputePass(AutoPallelPassTestBase): class TestRecomputePassDP(TestRecomputePass): + def get_model(self, place, batch_size, sequence_len, vocab_size): return self.get_gpt_model("dp", place, batch_size, sequence_len, vocab_size) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py index 51e87260609..16d63b09643 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,12 +23,14 @@ import paddle.distributed.fleet as fleet import paddle.distributed.auto_parallel as auto from paddle.distributed.passes import new_pass, PassManager from auto_parallel_pass_test_base import AutoPallelPassTestBase + sys.path.append("..") import auto_parallel_gpt_model as modeling from auto_parallel_gpt_model import GPTModel, GPTForPretraining, GPTPretrainingCriterion class TestShardingPass(AutoPallelPassTestBase): + def init(self): if paddle.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) @@ -58,8 +60,10 @@ class TestShardingPass(AutoPallelPassTestBase): fleet.init(is_collective=True, strategy=dist_strategy) def test_bs_8(self): - self.check_main( - gpus=[0, 1], batch_size=8, sequence_len=512, vocab_size=1000) + self.check_main(gpus=[0, 1], + batch_size=8, + sequence_len=512, + vocab_size=1000) def get_model(self, place, batch_size, sequence_len, vocab_size): return self.get_gpt_model('dp', place, batch_size, sequence_len, diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py index 8430eb615a2..48679116ccf 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from model_zoo import resnet_model class TestBuildCINNPass(DistPassTestBase): + def init(self): self.atol = 0.5 self.rtol = 0.0 diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py index e030420d324..31bc9bd66d0 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from model_zoo import simple_net class TestBuildCINNPass(DistPassTestBase): + def init(self): self.atol = 0.0 self.rtol = 0.0 diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_adam_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_adam_pass.py index 3ca71fb8315..85c3bf321a3 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_adam_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_adam_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class DemoNet(nn.Layer): + def __init__(self): super(DemoNet, self).__init__() @@ -39,13 +40,15 @@ class DemoNet(nn.Layer): class TestFuseAdamPass(DistPassTestBase): + def init(self): self.atol = 1e-4 self.rtol = 1e-4 def get_model(self, place, batch_size=32, image_shape=[224, 224, 3]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = DemoNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_all_reduce_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_all_reduce_pass.py index c011815b7d2..06cd2ac6da4 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_all_reduce_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_all_reduce_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from model_zoo import resnet_model class TestFuseAllReducePass(DistPassTestBase): + def init(self): self.atol = 0.0 self.rtol = 0.0 diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_act_pass.py index a7147724fbc..a0090f6d8c3 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_act_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_act_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class BatchNormActNet(nn.Layer): + def __init__(self): super(BatchNormActNet, self).__init__() @@ -39,13 +40,15 @@ class BatchNormActNet(nn.Layer): class TestFuseBatchNormActPass(DistPassTestBase): + def init(self): self.atol = 1e-4 self.rtol = 1e-4 def get_model(self, place, batch_size=32, image_shape=[224, 224, 3]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = BatchNormActNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_add_act_pass.py index 1b01260eaf2..eb9a901a40a 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_bn_add_act_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class BatchNormAddActNet(nn.Layer): + def __init__(self): super(BatchNormAddActNet, self).__init__() @@ -43,13 +44,15 @@ class BatchNormAddActNet(nn.Layer): class TestFuseBatchNormAddActPass(DistPassTestBase): + def init(self): self.atol = 1e-4 self.rtol = 1e-4 def get_model(self, place, batch_size=32, image_shape=[224, 224, 3]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = BatchNormAddActNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_momentum_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_momentum_pass.py index a0dd634b3ad..11bd4f5d2b1 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_momentum_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_momentum_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class DemoNet(nn.Layer): + def __init__(self): super(DemoNet, self).__init__() @@ -39,13 +40,15 @@ class DemoNet(nn.Layer): class TestFuseAdamPass(DistPassTestBase): + def init(self): self.atol = 1e-4 self.rtol = 1e-4 def get_model(self, place, batch_size=32, image_shape=[224, 224, 3]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = DemoNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_relu_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_relu_depthwise_conv_pass.py index c07744c882e..0a7442a18d7 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_relu_depthwise_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_relu_depthwise_conv_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class ReluDepthwiseConvNet(nn.Layer): + def __init__(self): super(ReluDepthwiseConvNet, self).__init__() @@ -39,13 +40,15 @@ class ReluDepthwiseConvNet(nn.Layer): class TestFuseReluDepthwiseConvPass(DistPassTestBase): + def init(self): self.atol = 1e-4 self.rtol = 1e-4 def get_model(self, place, batch_size=32, image_shape=[3, 224, 224]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = ReluDepthwiseConvNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_sgd_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_sgd_pass.py index 3939bd53739..3e96e9d3440 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_sgd_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_sgd_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class DemoNet(nn.Layer): + def __init__(self): super(DemoNet, self).__init__() @@ -39,13 +40,15 @@ class DemoNet(nn.Layer): class TestFuseAdamPass(DistPassTestBase): + def init(self): self.atol = 1e-4 self.rtol = 1e-4 def get_model(self, place, batch_size=32, image_shape=[224, 224, 3]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = DemoNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_gradient_merge_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_gradient_merge_pass.py index 0c324ba8ee9..f856059402e 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_gradient_merge_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_gradient_merge_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -42,6 +42,7 @@ _global_process_mesh = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=128, intermediate_size=4 * 128, @@ -55,18 +56,30 @@ class MLPLayer(nn.Layer): weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr1, bias_attr=bias_attr) - self.linear2 = nn.Linear( - d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr) - self.linear3 = nn.Linear( - dim_feedforward, d_model, weight_attr1, bias_attr=bias_attr) - self.linear4 = nn.Linear( - d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr) - self.linear5 = nn.Linear( - dim_feedforward, d_model, weight_attr1, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr0, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr1, + bias_attr=bias_attr) + self.linear2 = nn.Linear(d_model, + dim_feedforward, + weight_attr0, + bias_attr=bias_attr) + self.linear3 = nn.Linear(dim_feedforward, + d_model, + weight_attr1, + bias_attr=bias_attr) + self.linear4 = nn.Linear(d_model, + dim_feedforward, + weight_attr0, + bias_attr=bias_attr) + self.linear5 = nn.Linear(dim_feedforward, + d_model, + weight_attr1, + bias_attr=bias_attr) self.norm0 = nn.LayerNorm(d_model, epsilon=1e-5) self.norm1 = nn.LayerNorm(d_model, epsilon=1e-5) self.norm2 = nn.LayerNorm(d_model, epsilon=1e-5) @@ -91,17 +104,15 @@ class MLPLayer(nn.Layer): def mlp_forward(input, label, hidden_size): if _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) loss = paddle.mean(error_cost) @@ -109,6 +120,7 @@ def mlp_forward(input, label, hidden_size): class TestGradientMergePass(DistPassTestBase): + def init(self): self._params_grads = None self._config = {"k_steps": 4, "avg": True} @@ -128,20 +140,18 @@ class TestGradientMergePass(DistPassTestBase): fleet.init(is_collective=True, strategy=dist_strategy) def test_result(self): - no_pass_rets = self._distributed_launch( - model=None, - apply_pass=False, - gpus=[0], - gradient_merge=False, - batch_size=32, - max_step=2) - pass_rets = self._distributed_launch( - model=None, - apply_pass=True, - gpus=[0], - gradient_merge=True, - batch_size=8, - max_step=8) + no_pass_rets = self._distributed_launch(model=None, + apply_pass=False, + gpus=[0], + gradient_merge=False, + batch_size=32, + max_step=2) + pass_rets = self._distributed_launch(model=None, + apply_pass=True, + gpus=[0], + gradient_merge=True, + batch_size=8, + max_step=8) """ # avg loss for gradient_merge pass avg_loss = 0 @@ -193,10 +203,12 @@ class TestGradientMergePass(DistPassTestBase): with static.program_guard(train_program, startup_program), \ utils.unique_name.guard(): - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') input.stop_gradient = False loss = mlp_forward(input, label, hidden_size) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_inplace_addto_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_inplace_addto_pass.py index 32a6257a5f6..32bb1ca83a9 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_inplace_addto_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_inplace_addto_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from dist_pass_test_base import DistPassTestBase class DemoNet(nn.Layer): + def __init__(self): super(DemoNet, self).__init__() @@ -40,14 +41,16 @@ class DemoNet(nn.Layer): class TestInplaceAddtoPass(DistPassTestBase): + def init(self): self.atol = 0.0 self.rtol = 0.0 paddle.fluid.set_flags({"FLAGS_max_inplace_grad_add": 8}) def get_model(self, place, batch_size=32, image_shape=[224, 224, 3]): - image = paddle.static.data( - shape=[batch_size] + image_shape, dtype='float32', name='image') + image = paddle.static.data(shape=[batch_size] + image_shape, + dtype='float32', + name='image') model = DemoNet() pred_out = model(image) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_server_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_server_pass.py index e9beda446aa..5c46794d8a4 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_server_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_server_pass.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ps.ps_dnn_trainer import DnnTrainer class TestPsServerPass(PsPassTestBase): + def init(self): pass diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py index 054950df1eb..964e13d5371 100755 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py @@ -26,6 +26,7 @@ from paddle.fluid.tests.unittests.ps.ps_dnn_trainer import DnnTrainer class TestPsTrainerPass(PsPassTestBase): + def setUp(self): pass diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_white_lists.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_white_lists.py index 37abe1e121f..645fa38099d 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_white_lists.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_white_lists.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ from paddle.distributed.passes.pass_base import _make_rule_from_white_lists_dict class TestConcretePass(PassBase): + def __init__(self): super(TestConcretePass, self).__init__() @@ -33,35 +34,41 @@ class TestConcretePass(PassBase): @register_pass("A") class A(TestConcretePass): + def __init__(self): super(A, self).__init__() @register_pass("B") class B(TestConcretePass): + def __init__(self): super(B, self).__init__() @register_pass("C") class C(TestConcretePass): + def __init__(self): super(C, self).__init__() @register_pass("D") class D(TestConcretePass): + def __init__(self): super(D, self).__init__() @register_pass("E") class E(TestConcretePass): + def __init__(self): super(E, self).__init__() class TestMakeWhiteListsRule(unittest.TestCase): + def test_main(self): before_white_lists = {"A": ["B", "C"]} after_white_lists = {"D": ["C"]} diff --git a/python/paddle/fluid/tests/unittests/distribution/parameterize.py b/python/paddle/fluid/tests/unittests/distribution/parameterize.py index 09aa241b15d..72c9ac03325 100644 --- a/python/paddle/fluid/tests/unittests/distribution/parameterize.py +++ b/python/paddle/fluid/tests/unittests/distribution/parameterize.py @@ -29,6 +29,7 @@ def xrand(shape=(10, 10, 10), dtype=config.DEFAULT_DTYPE, min=1.0, max=10.0): def place(devices, key='place'): + def decorate(cls): module = sys.modules[cls.__module__].__dict__ raw_classes = { @@ -70,7 +71,9 @@ def parameterize_cls(fields, values=None): return decorate -def parameterize_func(input, name_func=None, doc_func=None, +def parameterize_func(input, + name_func=None, + doc_func=None, skip_on_empty=False): doc_func = doc_func or default_doc_func name_func = name_func or default_name_func @@ -90,9 +93,8 @@ def parameterize_func(input, name_func=None, doc_func=None, digits = len(str(len(parameters) - 1)) for num, p in enumerate(parameters): - name = name_func( - f, "{num:0>{digits}}".format( - digits=digits, num=num), p) + name = name_func(f, "{num:0>{digits}}".format(digits=digits, + num=num), p) # If the original function has patches applied by 'mock.patch', # re-construct all patches on the just former decoration layer # of param_as_standalone_func so as not to share @@ -111,7 +113,9 @@ def parameterize_func(input, name_func=None, doc_func=None, def reapply_patches_if_need(func): + def dummy_wrapper(orgfunc): + @wraps(orgfunc) def dummy_func(*args, **kwargs): return orgfunc(*args, **kwargs) @@ -163,6 +167,7 @@ def default_doc_func(func, num, p): def param_as_standalone_func(p, func, name): + @functools.wraps(func) def standalone_func(*a): return func(*(a + p.args), **p.kwargs) @@ -204,6 +209,7 @@ _param = collections.namedtuple("param", "args kwargs") class param(_param): + def __new__(cls, *args, **kwargs): return _param.__new__(cls, args, kwargs) diff --git a/python/paddle/fluid/tests/unittests/distribution/test_dirichlet_op.py b/python/paddle/fluid/tests/unittests/distribution/test_dirichlet_op.py index 3e7662b573e..2e85b47a20e 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_dirichlet_op.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_dirichlet_op.py @@ -25,6 +25,7 @@ import paddle.fluid.dygraph as dg import paddle.static as static import scipy.stats from numpy.random import random as rand + sys.path.append("../") from op_test import OpTest from paddle.fluid import Program, program_guard @@ -55,6 +56,5 @@ class TestDirichletOp(OpTest): scipy.stats.kstest( outs[0][:, 0], # scipy dirichlet have not cdf, use beta to replace it. - scipy.stats.beta( - a=self.alpha[0], b=self.alpha[1]).cdf)[0], + scipy.stats.beta(a=self.alpha[0], b=self.alpha[1]).cdf)[0], 0.01) diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution.py index 7a1cb25b96f..028faac6e84 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution.py @@ -28,6 +28,7 @@ paddle.enable_static() class DistributionNumpy(): + def sample(self): raise NotImplementedError @@ -45,6 +46,7 @@ class DistributionNumpy(): class DistributionTestName(unittest.TestCase): + def get_prefix(self, string): return (string.split('.')[0]) @@ -137,9 +139,10 @@ class DistributionTestName(unittest.TestCase): @parameterize.parameterize_cls( (parameterize.TEST_CASE_NAME, 'batch_shape', 'event_shape'), [('test-tuple', (10, 20), (10, 20)), - ('test-list', [100, 100], [100, 200, 300]), ('test-null-eventshape', - (100, 100), ())]) + ('test-list', [100, 100], [100, 200, 300]), + ('test-null-eventshape', (100, 100), ())]) class TestDistributionShape(unittest.TestCase): + def setUp(self): paddle.disable_static() self.dist = paddle.distribution.Distribution( @@ -169,6 +172,7 @@ class TestDistributionShape(unittest.TestCase): class TestDistributionException(unittest.TestCase): + def setUp(self): self._d = paddle.distribution.Distribution() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta.py index fb0c37e3d65..1d23b0f79d2 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta.py @@ -28,6 +28,7 @@ from parameterize import TEST_CASE_NAME, parameterize_cls, place, xrand [('test-scale', 1.0, 2.0), ('test-tensor', xrand(), xrand()), ('test-broadcast', xrand((2, 1)), xrand((2, 5)))]) class TestBeta(unittest.TestCase): + def setUp(self): # scale no need convert to tensor for scale input unittest alpha, beta = self.alpha, self.beta @@ -97,8 +98,8 @@ class TestBeta(unittest.TestCase): ] for case in cases: self.assertTrue( - self._paddle_beta.sample(case.get('input')).shape == - case.get('expect')) + self._paddle_beta.sample(case.get('input')).shape == case.get( + 'expect')) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta_static.py index e8fe0f17600..83b66f5c2b2 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_beta_static.py @@ -27,11 +27,13 @@ paddle.enable_static() @param.place(config.DEVICES) -@param.parameterize_cls( - (param.TEST_CASE_NAME, 'alpha', 'beta'), [('test-tensor', xrand( - (10, 10)), xrand((10, 10))), ('test-broadcast', xrand((2, 1)), xrand( - (2, 5))), ('test-larger-data', xrand((10, 20)), xrand((10, 20)))]) +@param.parameterize_cls((param.TEST_CASE_NAME, 'alpha', 'beta'), + [('test-tensor', xrand((10, 10)), xrand((10, 10))), + ('test-broadcast', xrand((2, 1)), xrand((2, 5))), + ('test-larger-data', xrand((10, 20)), xrand( + (10, 20)))]) class TestBeta(unittest.TestCase): + def setUp(self): self.program = paddle.static.Program() self.executor = paddle.static.Executor(self.place) @@ -48,23 +50,23 @@ class TestBeta(unittest.TestCase): [mean] = self.executor.run(self.program, feed=self.feeds, fetch_list=[self._paddle_beta.mean]) - np.testing.assert_allclose( - mean, - scipy.stats.beta.mean(self.alpha, self.beta), - rtol=RTOL.get(str(self.alpha.dtype)), - atol=ATOL.get(str(self.alpha.dtype))) + np.testing.assert_allclose(mean, + scipy.stats.beta.mean( + self.alpha, self.beta), + rtol=RTOL.get(str(self.alpha.dtype)), + atol=ATOL.get(str(self.alpha.dtype))) def test_variance(self): with paddle.static.program_guard(self.program): - [variance] = self.executor.run( - self.program, - feed=self.feeds, - fetch_list=[self._paddle_beta.variance]) - np.testing.assert_allclose( - variance, - scipy.stats.beta.var(self.alpha, self.beta), - rtol=RTOL.get(str(self.alpha.dtype)), - atol=ATOL.get(str(self.alpha.dtype))) + [variance + ] = self.executor.run(self.program, + feed=self.feeds, + fetch_list=[self._paddle_beta.variance]) + np.testing.assert_allclose(variance, + scipy.stats.beta.var( + self.alpha, self.beta), + rtol=RTOL.get(str(self.alpha.dtype)), + atol=ATOL.get(str(self.alpha.dtype))) def test_prob(self): @@ -79,11 +81,12 @@ class TestBeta(unittest.TestCase): [prob] = self.executor.run(self.program, feed=feeds, fetch_list=[prob]) - np.testing.assert_allclose( - prob, - scipy.stats.beta.pdf(random_number, self.alpha, self.beta), - rtol=RTOL.get(str(self.alpha.dtype)), - atol=ATOL.get(str(self.alpha.dtype))) + np.testing.assert_allclose(prob, + scipy.stats.beta.pdf( + random_number, self.alpha, + self.beta), + rtol=RTOL.get(str(self.alpha.dtype)), + atol=ATOL.get(str(self.alpha.dtype))) def test_log_prob(self): with paddle.static.program_guard(self.program): @@ -95,23 +98,24 @@ class TestBeta(unittest.TestCase): [prob] = self.executor.run(self.program, feed=feeds, fetch_list=[prob]) - np.testing.assert_allclose( - prob, - scipy.stats.beta.logpdf(random_number, self.alpha, self.beta), - rtol=RTOL.get(str(self.alpha.dtype)), - atol=ATOL.get(str(self.alpha.dtype))) + np.testing.assert_allclose(prob, + scipy.stats.beta.logpdf( + random_number, self.alpha, + self.beta), + rtol=RTOL.get(str(self.alpha.dtype)), + atol=ATOL.get(str(self.alpha.dtype))) def test_entropy(self): with paddle.static.program_guard(self.program): - [entropy] = self.executor.run( - self.program, - feed=self.feeds, - fetch_list=[self._paddle_beta.entropy()]) - np.testing.assert_allclose( - entropy, - scipy.stats.beta.entropy(self.alpha, self.beta), - rtol=RTOL.get(str(self.alpha.dtype)), - atol=ATOL.get(str(self.alpha.dtype))) + [entropy + ] = self.executor.run(self.program, + feed=self.feeds, + fetch_list=[self._paddle_beta.entropy()]) + np.testing.assert_allclose(entropy, + scipy.stats.beta.entropy( + self.alpha, self.beta), + rtol=RTOL.get(str(self.alpha.dtype)), + atol=ATOL.get(str(self.alpha.dtype))) def test_sample(self): with paddle.static.program_guard(self.program): diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py index f43ac7bea76..24c21d1bd45 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from test_distribution import DistributionNumpy class CategoricalNumpy(DistributionNumpy): + def __init__(self, logits): self.logits = np.array(logits).astype('float32') @@ -51,6 +52,7 @@ class CategoricalNumpy(DistributionNumpy): class CategoricalTest(unittest.TestCase): + def setUp(self, use_gpu=False, batch_size=3, dims=5): self.use_gpu = use_gpu if not use_gpu: @@ -100,12 +102,15 @@ class CategoricalTest(unittest.TestCase): def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.logits_static = fluid.data( - name='logits', shape=self.logits_shape, dtype='float32') - self.other_logits_static = fluid.data( - name='other_logits', shape=self.logits_shape, dtype='float32') - self.value_static = fluid.data( - name='value', shape=self.value_shape, dtype='int64') + self.logits_static = fluid.data(name='logits', + shape=self.logits_shape, + dtype='float32') + self.other_logits_static = fluid.data(name='other_logits', + shape=self.logits_shape, + dtype='float32') + self.value_static = fluid.data(name='value', + shape=self.value_shape, + dtype='int64') def get_numpy_selected_probs(self, probability): np_probs = np.zeros(self.dist_shape + self.value_shape) @@ -126,20 +131,28 @@ class CategoricalTest(unittest.TestCase): np_entropy = np_categorical.entropy() np_kl = np_categorical.kl_divergence(np_other_categorical) - np.testing.assert_allclose( - entropy, np_entropy, rtol=log_tolerance, atol=log_tolerance) - np.testing.assert_allclose( - kl, np_kl, rtol=log_tolerance, atol=log_tolerance) + np.testing.assert_allclose(entropy, + np_entropy, + rtol=log_tolerance, + atol=log_tolerance) + np.testing.assert_allclose(kl, + np_kl, + rtol=log_tolerance, + atol=log_tolerance) sum_dist = np.sum(self.logits_np, axis=-1, keepdims=True) probability = self.logits_np / sum_dist np_probs = self.get_numpy_selected_probs(probability) np_log_prob = np.log(np_probs) - np.testing.assert_allclose( - probs, np_probs, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - log_prob, np_log_prob, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(probs, + np_probs, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(log_prob, + np_log_prob, + rtol=tolerance, + atol=tolerance) def test_categorical_distribution_dygraph(self, tolerance=1e-6): paddle.disable_static(self.place) @@ -184,6 +197,7 @@ class CategoricalTest(unittest.TestCase): class CategoricalTest2(CategoricalTest): + def init_numpy_data(self, batch_size, dims): # input logtis is 2-D Tensor with dtype Float64 # value used in probs and log_prob method is 1-D Tensor @@ -199,15 +213,19 @@ class CategoricalTest2(CategoricalTest): def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.logits_static = fluid.data( - name='logits', shape=self.logits_shape, dtype='float64') - self.other_logits_static = fluid.data( - name='other_logits', shape=self.logits_shape, dtype='float64') - self.value_static = fluid.data( - name='value', shape=self.value_shape, dtype='int64') + self.logits_static = fluid.data(name='logits', + shape=self.logits_shape, + dtype='float64') + self.other_logits_static = fluid.data(name='other_logits', + shape=self.logits_shape, + dtype='float64') + self.value_static = fluid.data(name='value', + shape=self.value_shape, + dtype='int64') class CategoricalTest3(CategoricalTest): + def init_dynamic_data(self, batch_size, dims): # input logtis is 2-D numpy.ndarray with dtype Float32 # value used in probs and log_prob method is 1-D Tensor @@ -219,11 +237,13 @@ class CategoricalTest3(CategoricalTest): with fluid.program_guard(self.test_program): self.logits_static = self.logits_np self.other_logits_static = self.other_logits_np - self.value_static = fluid.data( - name='value', shape=self.value_shape, dtype='int64') + self.value_static = fluid.data(name='value', + shape=self.value_shape, + dtype='int64') class CategoricalTest4(CategoricalTest): + def init_numpy_data(self, batch_size, dims): # input logtis is 2-D numpy.ndarray with dtype Float64 # value used in probs and log_prob method is 1-D Tensor @@ -246,12 +266,14 @@ class CategoricalTest4(CategoricalTest): with fluid.program_guard(self.test_program): self.logits_static = self.logits_np self.other_logits_static = self.other_logits_np - self.value_static = fluid.data( - name='value', shape=self.value_shape, dtype='int64') + self.value_static = fluid.data(name='value', + shape=self.value_shape, + dtype='int64') # test shape of logits and value used in probs and log_prob method class CategoricalTest5(CategoricalTest): + def init_numpy_data(self, batch_size, dims): # input logtis is 1-D Tensor # value used in probs and log_prob method is 1-D Tensor @@ -272,6 +294,7 @@ class CategoricalTest5(CategoricalTest): class CategoricalTest6(CategoricalTest): + def init_numpy_data(self, batch_size, dims): # input logtis is 2-D Tensor # value used in probs and log_prob method has the same number of batches with input @@ -293,6 +316,7 @@ class CategoricalTest6(CategoricalTest): class CategoricalTest7(CategoricalTest): + def init_numpy_data(self, batch_size, dims): # input logtis is 3-D Tensor # value used in probs and log_prob method has the same number of distribuions with input @@ -315,6 +339,7 @@ class CategoricalTest7(CategoricalTest): class CategoricalTest8(CategoricalTest): + def init_dynamic_data(self, batch_size, dims): # input logtis is 2-D list # value used in probs and log_prob method is 1-D Tensor @@ -326,11 +351,13 @@ class CategoricalTest8(CategoricalTest): with fluid.program_guard(self.test_program): self.logits_static = self.logits_np.tolist() self.other_logits_static = self.other_logits_np.tolist() - self.value_static = fluid.data( - name='value', shape=self.value_shape, dtype='int64') + self.value_static = fluid.data(name='value', + shape=self.value_shape, + dtype='int64') class CategoricalTest9(CategoricalTest): + def init_dynamic_data(self, batch_size, dims): # input logtis is 2-D tuple # value used in probs and log_prob method is 1-D Tensor @@ -342,11 +369,13 @@ class CategoricalTest9(CategoricalTest): with fluid.program_guard(self.test_program): self.logits_static = tuple(self.logits_np.tolist()) self.other_logits_static = tuple(self.other_logits_np.tolist()) - self.value_static = fluid.data( - name='value', shape=self.value_shape, dtype='int64') + self.value_static = fluid.data(name='value', + shape=self.value_shape, + dtype='int64') class DistributionTestError(unittest.TestCase): + def test_distribution_error(self): distribution = Distribution() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_constraint.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_constraint.py index c31d2124193..b927aef8e9b 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_constraint.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_constraint.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ import parameterize as param @param.param_cls((param.TEST_CASE_NAME, 'value'), [('NotImplement', np.random.rand(2, 3))]) class TestConstraint(unittest.TestCase): + def setUp(self): self._constraint = constraint.Constraint() @@ -36,6 +37,7 @@ class TestConstraint(unittest.TestCase): @param.param_cls((param.TEST_CASE_NAME, 'value', 'expect'), [('real', 1., True)]) class TestReal(unittest.TestCase): + def setUp(self): self._constraint = constraint.Real() @@ -46,6 +48,7 @@ class TestReal(unittest.TestCase): @param.param_cls((param.TEST_CASE_NAME, 'lower', 'upper', 'value', 'expect'), [('in_range', 0, 1, 0.5, True), ('out_range', 0, 1, 2, False)]) class TestRange(unittest.TestCase): + def setUp(self): self._constraint = constraint.Range(self.lower, self.upper) @@ -56,6 +59,7 @@ class TestRange(unittest.TestCase): @param.param_cls((param.TEST_CASE_NAME, 'value', 'expect'), [('positive', 1, True), ('negative', -1, False)]) class TestPositive(unittest.TestCase): + def setUp(self): self._constraint = constraint.Positive() @@ -67,6 +71,7 @@ class TestPositive(unittest.TestCase): [('simplex', paddle.to_tensor([0.5, 0.5]), True), ('non_simplex', paddle.to_tensor([-0.5, 0.5]), False)]) class TestSimplex(unittest.TestCase): + def setUp(self): self._constraint = constraint.Simplex() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet.py index 9caec312b33..8188b2231f2 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet.py @@ -31,6 +31,7 @@ import parameterize as param # ('test-multi-dim', config.xrand((10, 20, 30))) ]) class TestDirichlet(unittest.TestCase): + def setUp(self): self._paddle_diric = paddle.distribution.Dirichlet( paddle.to_tensor(self.concentration)) @@ -91,17 +92,18 @@ class TestDirichlet(unittest.TestCase): self.assertTrue( np.all( self._paddle_diric._log_normalizer( - paddle.to_tensor(param.xrand((100, 100, 100)))).numpy() < - 0.0)) + paddle.to_tensor(param.xrand((100, 100, + 100)))).numpy() < 0.0)) @param.place(DEVICES) @param.param_cls((param.TEST_CASE_NAME, 'concentration'), [('test-zero-dim', np.array(1.0))]) class TestDirichletException(unittest.TestCase): + def TestInit(self): with self.assertRaises(ValueError): - paddle.distribution.Dirichlet( - paddle.squeeze(self.concentration)) + paddle.distribution.Dirichlet(paddle.squeeze( + self.concentration)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet_static.py index f7096d295ee..c4630bbd84b 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_dirichlet_static.py @@ -28,6 +28,7 @@ paddle.enable_static() @parameterize_cls((TEST_CASE_NAME, 'concentration'), [('test-one-dim', np.random.rand(89) + 5.0)]) class TestDirichlet(unittest.TestCase): + def setUp(self): self.program = paddle.static.Program() self.executor = paddle.static.Executor() @@ -95,10 +96,9 @@ class TestDirichlet(unittest.TestCase): def test_entropy(self): with paddle.static.program_guard(self.program): - [out] = self.executor.run( - self.program, - feed=self.feeds, - fetch_list=[self._paddle_diric.entropy()]) + [out] = self.executor.run(self.program, + feed=self.feeds, + fetch_list=[self._paddle_diric.entropy()]) np.testing.assert_allclose( out, scipy.stats.dirichlet.entropy(self.concentration), diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py index b601ac28584..cc4b8430913 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py @@ -25,11 +25,12 @@ import parameterize @parameterize.place(config.DEVICES) @parameterize.parameterize_cls( - (parameterize.TEST_CASE_NAME, 'dist'), [('test-mock-exp', - mock.Exponential(rate=paddle.rand( - [100, 200, 99], - dtype=config.DEFAULT_DTYPE)))]) + (parameterize.TEST_CASE_NAME, 'dist'), + [('test-mock-exp', + mock.Exponential( + rate=paddle.rand([100, 200, 99], dtype=config.DEFAULT_DTYPE)))]) class TestExponentialFamily(unittest.TestCase): + def test_entropy(self): np.testing.assert_allclose( self.dist.entropy(), @@ -43,11 +44,12 @@ class TestExponentialFamily(unittest.TestCase): (config.TEST_CASE_NAME, 'dist'), [('test-dummy', mock.DummyExpFamily(0.5, 0.5)), ('test-dirichlet', - paddle.distribution.Dirichlet(paddle.to_tensor(parameterize.xrand()))), ( - 'test-beta', paddle.distribution.Beta( - paddle.to_tensor(parameterize.xrand()), - paddle.to_tensor(parameterize.xrand())))]) + paddle.distribution.Dirichlet(paddle.to_tensor(parameterize.xrand()))), + ('test-beta', + paddle.distribution.Beta(paddle.to_tensor(parameterize.xrand()), + paddle.to_tensor(parameterize.xrand())))]) class TestExponentialFamilyException(unittest.TestCase): + def test_entropy_exception(self): with self.assertRaises(NotImplementedError): paddle.distribution.ExponentialFamily.entropy(self.dist) diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily_static.py index 28c337b617b..63f1fa81bf1 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily_static.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ paddle.enable_static() @parameterize.place(config.DEVICES) class TestExponentialFamily(unittest.TestCase): + def setUp(self): self.program = paddle.static.Program() self.executor = paddle.static.Executor() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent.py index f67c260cbcc..4f0639a0380 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent.py @@ -23,10 +23,12 @@ import parameterize as param @param.place(config.DEVICES) -@param.param_cls((param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank'), - [('base_beta', paddle.distribution.Beta( - paddle.rand([1, 2]), paddle.rand([1, 2])), 1)]) +@param.param_cls( + (param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank'), + [('base_beta', + paddle.distribution.Beta(paddle.rand([1, 2]), paddle.rand([1, 2])), 1)]) class TestIndependent(unittest.TestCase): + def setUp(self): self._t = paddle.distribution.Independent(self.base, self.reinterpreted_batch_rank) @@ -82,6 +84,7 @@ class TestIndependent(unittest.TestCase): [('base_not_transform', '', 1, TypeError), ('rank_less_than_zero', paddle.distribution.Transform(), -1, ValueError)]) class TestIndependentException(unittest.TestCase): + def test_init(self): with self.assertRaises(self.expected_exception): paddle.distribution.IndependentTransform( diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent_static.py index eb078160a03..e0196ecbf13 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_independent_static.py @@ -27,9 +27,10 @@ paddle.enable_static() @param.place(config.DEVICES) @param.param_cls( (param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank', 'alpha', 'beta'), - [('base_beta', paddle.distribution.Beta, 1, np.random.rand(1, 2), - np.random.rand(1, 2))]) + [('base_beta', paddle.distribution.Beta, 1, np.random.rand( + 1, 2), np.random.rand(1, 2))]) class TestIndependent(unittest.TestCase): + def setUp(self): value = np.random.rand(1) self.dtype = value.dtype @@ -63,45 +64,42 @@ class TestIndependent(unittest.TestCase): self.mean, self.variance, self.entropy, self.log_prob, self.base_mean, self.base_variance, self.base_entropy, self.base_log_prob - ] = exe.run( - mp, - feed={'value': value, - 'alpha': self.alpha, - 'beta': self.beta}, - fetch_list=fetch_list) + ] = exe.run(mp, + feed={ + 'value': value, + 'alpha': self.alpha, + 'beta': self.beta + }, + fetch_list=fetch_list) def test_mean(self): - np.testing.assert_allclose( - self.mean, - self.base_mean, - rtol=config.RTOL.get(str(self.dtype)), - atol=config.ATOL.get(str(self.dtype))) + np.testing.assert_allclose(self.mean, + self.base_mean, + rtol=config.RTOL.get(str(self.dtype)), + atol=config.ATOL.get(str(self.dtype))) def test_variance(self): - np.testing.assert_allclose( - self.variance, - self.base_variance, - rtol=config.RTOL.get(str(self.dtype)), - atol=config.ATOL.get(str(self.dtype))) + np.testing.assert_allclose(self.variance, + self.base_variance, + rtol=config.RTOL.get(str(self.dtype)), + atol=config.ATOL.get(str(self.dtype))) def test_entropy(self): - np.testing.assert_allclose( - self._np_sum_rightmost(self.base_entropy, - self.reinterpreted_batch_rank), - self.entropy, - rtol=config.RTOL.get(str(self.dtype)), - atol=config.ATOL.get(str(self.dtype))) + np.testing.assert_allclose(self._np_sum_rightmost( + self.base_entropy, self.reinterpreted_batch_rank), + self.entropy, + rtol=config.RTOL.get(str(self.dtype)), + atol=config.ATOL.get(str(self.dtype))) def _np_sum_rightmost(self, value, n): return np.sum(value, tuple(range(-n, 0))) if n > 0 else value def test_log_prob(self): - np.testing.assert_allclose( - self._np_sum_rightmost(self.base_log_prob, - self.reinterpreted_batch_rank), - self.log_prob, - rtol=config.RTOL.get(str(self.dtype)), - atol=config.ATOL.get(str(self.dtype))) + np.testing.assert_allclose(self._np_sum_rightmost( + self.base_log_prob, self.reinterpreted_batch_rank), + self.log_prob, + rtol=config.RTOL.get(str(self.dtype)), + atol=config.ATOL.get(str(self.dtype))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial.py index 851645a96d4..0bec1c5a58c 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial.py @@ -31,6 +31,7 @@ import parameterize ('prob-sum-non-one', 10, np.array([2., 3., 5.])), ]) class TestMultinomial(unittest.TestCase): + def setUp(self): self._dist = paddle.distribution.Multinomial( total_count=self.total_count, probs=paddle.to_tensor(self.probs)) @@ -38,29 +39,26 @@ class TestMultinomial(unittest.TestCase): def test_mean(self): mean = self._dist.mean self.assertEqual(mean.numpy().dtype, self.probs.dtype) - np.testing.assert_allclose( - mean, - self._np_mean(), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(mean, + self._np_mean(), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) def test_variance(self): var = self._dist.variance self.assertEqual(var.numpy().dtype, self.probs.dtype) - np.testing.assert_allclose( - var, - self._np_variance(), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(var, + self._np_variance(), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) def test_entropy(self): entropy = self._dist.entropy() self.assertEqual(entropy.numpy().dtype, self.probs.dtype) - np.testing.assert_allclose( - entropy, - self._np_entropy(), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(entropy, + self._np_entropy(), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) def test_sample(self): sample_shape = () @@ -82,10 +80,12 @@ class TestMultinomial(unittest.TestCase): sample_shape = (5000, ) samples = self._dist.sample(sample_shape) sample_mean = samples.mean(axis=0) - # Tolerance value 0.2 is empirical value which is consistent with + # Tolerance value 0.2 is empirical value which is consistent with # TensorFlow - np.testing.assert_allclose( - sample_mean, self._dist.mean, atol=0, rtol=0.20) + np.testing.assert_allclose(sample_mean, + self._dist.mean, + atol=0, + rtol=0.20) def _np_variance(self): probs = self.probs / self.probs.sum(-1, keepdims=True) @@ -106,11 +106,12 @@ class TestMultinomial(unittest.TestCase): [ ('value-float', 10, np.array([0.2, 0.3, 0.5]), np.array([2., 3., 5.])), ('value-int', 10, np.array([0.2, 0.3, 0.5]), np.array([2, 3, 5])), - ('value-multi-dim', 10, np.array([[0.3, 0.7], [0.5, 0.5]]), - np.array([[4., 6], [8, 2]])), + ('value-multi-dim', 10, np.array([[0.3, 0.7], [0.5, 0.5] + ]), np.array([[4., 6], [8, 2]])), # ('value-sum-non-n', 10, np.array([0.5, 0.2, 0.3]), np.array([4,5,2])), ]) class TestMultinomialPmf(unittest.TestCase): + def setUp(self): self._dist = paddle.distribution.Multinomial( total_count=self.total_count, probs=paddle.to_tensor(self.probs)) @@ -132,6 +133,7 @@ class TestMultinomialPmf(unittest.TestCase): ('probs_zero_dim', np.array(0)), ]) class TestMultinomialException(unittest.TestCase): + def TestInit(self): with self.assertRaises(ValueError): paddle.distribution.Multinomial(self.total_count, diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial_static.py index ac86ad8d3e1..f9beb6b7702 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_multinomial_static.py @@ -33,6 +33,7 @@ paddle.enable_static() ('prob-sum-non-one', 5, np.array([2., 3., 5.])), ]) class TestMultinomial(unittest.TestCase): + def setUp(self): startup_program = paddle.static.Program() main_program = paddle.static.Program() @@ -57,28 +58,25 @@ class TestMultinomial(unittest.TestCase): def test_mean(self): self.assertEqual(str(self.mean.dtype).split('.')[-1], self.probs.dtype) - np.testing.assert_allclose( - self.mean, - self._np_mean(), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(self.mean, + self._np_mean(), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) def test_variance(self): self.assertEqual(str(self.var.dtype).split('.')[-1], self.probs.dtype) - np.testing.assert_allclose( - self.var, - self._np_variance(), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(self.var, + self._np_variance(), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) def test_entropy(self): self.assertEqual( str(self.entropy.dtype).split('.')[-1], self.probs.dtype) - np.testing.assert_allclose( - self.entropy, - self._np_entropy(), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(self.entropy, + self._np_entropy(), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) def test_sample(self): self.assertEqual( @@ -107,11 +105,12 @@ class TestMultinomial(unittest.TestCase): [ ('value-float', 5, np.array([0.2, 0.3, 0.5]), np.array([1., 1., 3.])), ('value-int', 5, np.array([0.2, 0.3, 0.5]), np.array([2, 2, 1])), - ('value-multi-dim', 5, np.array([[0.3, 0.7], [0.5, 0.5]]), - np.array([[1., 4.], [2., 3.]])), + ('value-multi-dim', 5, np.array([[0.3, 0.7], [0.5, 0.5] + ]), np.array([[1., 4.], [2., 3.]])), # ('value-sum-non-n', 10, np.array([0.5, 0.2, 0.3]), np.array([4,5,2])), ]) class TestMultinomialPmf(unittest.TestCase): + def setUp(self): startup_program = paddle.static.Program() main_program = paddle.static.Program() @@ -133,12 +132,12 @@ class TestMultinomialPmf(unittest.TestCase): fetch_list=fetch_list) def test_prob(self): - np.testing.assert_allclose( - self.pmf, - scipy.stats.multinomial.pmf(self.value, self.total_count, - self.probs), - rtol=config.RTOL.get(str(self.probs.dtype)), - atol=config.ATOL.get(str(self.probs.dtype))) + np.testing.assert_allclose(self.pmf, + scipy.stats.multinomial.pmf( + self.value, self.total_count, + self.probs), + rtol=config.RTOL.get(str(self.probs.dtype)), + atol=config.ATOL.get(str(self.probs.dtype))) @parameterize.place(config.DEVICES) @@ -149,6 +148,7 @@ class TestMultinomialPmf(unittest.TestCase): ('probs_zero_dim', np.array(0)), ]) class TestMultinomialException(unittest.TestCase): + def setUp(self): startup_program = paddle.static.Program() self.main_program = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py index 0c23e367f98..9e597c3d363 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from test_distribution import DistributionNumpy class NormalNumpy(DistributionNumpy): + def __init__(self, loc, scale): self.loc = np.array(loc) self.scale = np.array(scale) @@ -39,8 +40,9 @@ class NormalNumpy(DistributionNumpy): def log_prob(self, value): var = self.scale * self.scale log_scale = np.log(self.scale) - return -((value - self.loc) * (value - self.loc)) / ( - 2. * var) - log_scale - math.log(math.sqrt(2. * math.pi)) + return -((value - self.loc) * + (value - self.loc)) / (2. * var) - log_scale - math.log( + math.sqrt(2. * math.pi)) def probs(self, value): var = self.scale * self.scale @@ -60,6 +62,7 @@ class NormalNumpy(DistributionNumpy): class NormalTest(unittest.TestCase): + def setUp(self, use_gpu=False, batch_size=2, dims=3): self.use_gpu = use_gpu if not use_gpu: @@ -105,8 +108,9 @@ class NormalTest(unittest.TestCase): self.static_other_loc = self.other_loc_np self.static_other_scale = self.other_scale_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[], dtype='float32') + self.static_values = layers.data(name='values', + shape=[], + dtype='float32') def compare_with_numpy(self, fetch_list, sample_shape=7, tolerance=1e-6): sample, entropy, log_prob, probs, kl = fetch_list @@ -127,14 +131,22 @@ class NormalTest(unittest.TestCase): log_tolerance = 1e-4 np.testing.assert_equal(sample.shape, np_sample.shape) - np.testing.assert_allclose( - entropy, np_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - log_prob, np_lp, rtol=log_tolerance, atol=log_tolerance) - np.testing.assert_allclose( - probs, np_p, rtol=log_tolerance, atol=log_tolerance) - np.testing.assert_allclose( - kl, np_kl, rtol=log_tolerance, atol=log_tolerance) + np.testing.assert_allclose(entropy, + np_entropy, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(log_prob, + np_lp, + rtol=log_tolerance, + atol=log_tolerance) + np.testing.assert_allclose(probs, + np_p, + rtol=log_tolerance, + atol=log_tolerance) + np.testing.assert_allclose(kl, + np_kl, + rtol=log_tolerance, + atol=log_tolerance) def test_normal_distribution_dygraph(self, sample_shape=7, tolerance=1e-6): paddle.disable_static(self.place) @@ -182,6 +194,7 @@ class NormalTest(unittest.TestCase): class NormalTest2(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc ans scale are 'int' self.loc_np = int((np.random.ranf() - 0.5) * 8) @@ -197,6 +210,7 @@ class NormalTest2(NormalTest): class NormalTest3(NormalTest): + def init_numpy_data(self, batch_size, dims): # test broadcast: loc is float, scale is numpy.ndarray with dtype 'float32'. self.loc_np = (np.random.ranf() - 0.5) * 4 @@ -218,11 +232,13 @@ class NormalTest3(NormalTest): self.static_other_loc = self.other_loc_np self.static_other_scale = self.other_scale_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class NormalTest4(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are numpy.ndarray with dtype 'float32'. self.loc_np = np.random.randn(batch_size, dims).astype('float32') @@ -244,11 +260,13 @@ class NormalTest4(NormalTest): self.static_other_loc = self.other_loc_np self.static_other_scale = self.other_scale_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class NormalTest5(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are numpy.ndarray with dtype 'float64'. self.loc_np = np.random.randn(batch_size, dims).astype('float64') @@ -277,11 +295,13 @@ class NormalTest5(NormalTest): self.static_other_loc = self.other_loc_np self.static_other_scale = self.other_scale_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float64') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float64') class NormalTest6(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are Tensor with dtype 'VarType.FP32'. self.loc_np = np.random.randn(batch_size, dims).astype('float32') @@ -306,19 +326,25 @@ class NormalTest6(NormalTest): def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.static_loc = layers.data( - name='loc', shape=[dims], dtype='float32') - self.static_scale = layers.data( - name='scale', shape=[dims], dtype='float32') - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') - self.static_other_loc = layers.data( - name='other_loc', shape=[dims], dtype='float32') - self.static_other_scale = layers.data( - name='other_scale', shape=[dims], dtype='float32') + self.static_loc = layers.data(name='loc', + shape=[dims], + dtype='float32') + self.static_scale = layers.data(name='scale', + shape=[dims], + dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') + self.static_other_loc = layers.data(name='other_loc', + shape=[dims], + dtype='float32') + self.static_other_scale = layers.data(name='other_scale', + shape=[dims], + dtype='float32') class NormalTest7(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are Tensor with dtype 'VarType.FP64'. self.loc_np = np.random.randn(batch_size, dims).astype('float64') @@ -338,26 +364,32 @@ class NormalTest7(NormalTest): self.dynamic_loc = paddle.to_tensor(self.loc_np, dtype='float64') self.dynamic_scale = paddle.to_tensor(self.scale_np, dtype='float64') self.dynamic_values = paddle.to_tensor(self.values_np, dtype='float64') - self.dynamic_other_loc = paddle.to_tensor( - self.other_loc_np, dtype='float64') - self.dynamic_other_scale = paddle.to_tensor( - self.other_scale_np, dtype='float64') + self.dynamic_other_loc = paddle.to_tensor(self.other_loc_np, + dtype='float64') + self.dynamic_other_scale = paddle.to_tensor(self.other_scale_np, + dtype='float64') def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.static_loc = layers.data( - name='loc', shape=[dims], dtype='float64') - self.static_scale = layers.data( - name='scale', shape=[dims], dtype='float64') - self.static_values = layers.data( - name='values', shape=[dims], dtype='float64') - self.static_other_loc = layers.data( - name='other_loc', shape=[dims], dtype='float64') - self.static_other_scale = layers.data( - name='other_scale', shape=[dims], dtype='float64') + self.static_loc = layers.data(name='loc', + shape=[dims], + dtype='float64') + self.static_scale = layers.data(name='scale', + shape=[dims], + dtype='float64') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float64') + self.static_other_loc = layers.data(name='other_loc', + shape=[dims], + dtype='float64') + self.static_other_scale = layers.data(name='other_scale', + shape=[dims], + dtype='float64') class NormalTest8(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are Tensor with dtype 'VarType.FP64'. value's dtype is 'VarType.FP32'. self.loc_np = np.random.randn(batch_size, dims).astype('float64') @@ -377,26 +409,32 @@ class NormalTest8(NormalTest): self.dynamic_loc = paddle.to_tensor(self.loc_np, dtype='float64') self.dynamic_scale = paddle.to_tensor(self.scale_np, dtype='float64') self.dynamic_values = paddle.to_tensor(self.values_np) - self.dynamic_other_loc = paddle.to_tensor( - self.other_loc_np, dtype='float64') - self.dynamic_other_scale = paddle.to_tensor( - self.other_scale_np, dtype='float64') + self.dynamic_other_loc = paddle.to_tensor(self.other_loc_np, + dtype='float64') + self.dynamic_other_scale = paddle.to_tensor(self.other_scale_np, + dtype='float64') def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.static_loc = layers.data( - name='loc', shape=[dims], dtype='float64') - self.static_scale = layers.data( - name='scale', shape=[dims], dtype='float64') - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') - self.static_other_loc = layers.data( - name='other_loc', shape=[dims], dtype='float64') - self.static_other_scale = layers.data( - name='other_scale', shape=[dims], dtype='float64') + self.static_loc = layers.data(name='loc', + shape=[dims], + dtype='float64') + self.static_scale = layers.data(name='scale', + shape=[dims], + dtype='float64') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') + self.static_other_loc = layers.data(name='other_loc', + shape=[dims], + dtype='float64') + self.static_other_scale = layers.data(name='other_scale', + shape=[dims], + dtype='float64') class NormalTest9(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are list. self.loc_np = np.random.randn(batch_size, @@ -422,11 +460,13 @@ class NormalTest9(NormalTest): self.static_other_loc = self.other_loc_np self.static_other_scale = self.other_scale_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class NormalTest10(NormalTest): + def init_numpy_data(self, batch_size, dims): # loc and scale are tuple. self.loc_np = tuple( @@ -452,8 +492,9 @@ class NormalTest10(NormalTest): self.static_other_loc = self.other_loc_np self.static_other_scale = self.other_scale_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform.py index b1304a52ef3..8311a10f4d5 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform.py @@ -25,21 +25,22 @@ import parameterize as param @param.place(config.DEVICES) class TestTransform(unittest.TestCase): + def setUp(self): self._t = transform.Transform() - @param.param_func([ - (paddle.distribution.Distribution(), - paddle.distribution.TransformedDistribution), - (paddle.distribution.ExpTransform(), paddle.distribution.ChainTransform) - ]) + @param.param_func([(paddle.distribution.Distribution(), + paddle.distribution.TransformedDistribution), + (paddle.distribution.ExpTransform(), + paddle.distribution.ChainTransform)]) def test_call(self, input, expected_type): t = transform.Transform() self.assertIsInstance(t(input), expected_type) - @param.param_func( - [(transform.Type.BIJECTION, True), (transform.Type.INJECTION, True), - (transform.Type.SURJECTION, False), (transform.Type.OTHER, False)]) + @param.param_func([(transform.Type.BIJECTION, True), + (transform.Type.INJECTION, True), + (transform.Type.SURJECTION, False), + (transform.Type.OTHER, False)]) def test_is_injective(self, type, expected): transform.Transform._type = type self.assertEqual(self._t._is_injective(), expected) @@ -50,26 +51,26 @@ class TestTransform(unittest.TestCase): def test_codomain(self): self.assertTrue(isinstance(self._t._codomain, variable.Real)) - @param.param_func([(0, TypeError), (paddle.rand((2, 3)), - NotImplementedError)]) + @param.param_func([(0, TypeError), (paddle.rand( + (2, 3)), NotImplementedError)]) def test_forward(self, input, expected): with self.assertRaises(expected): self._t.forward(input) - @param.param_func([(0, TypeError), (paddle.rand((2, 3)), - NotImplementedError)]) + @param.param_func([(0, TypeError), (paddle.rand( + (2, 3)), NotImplementedError)]) def test_inverse(self, input, expected): with self.assertRaises(expected): self._t.inverse(input) - @param.param_func([(0, TypeError), (paddle.rand((2, 3)), - NotImplementedError)]) + @param.param_func([(0, TypeError), (paddle.rand( + (2, 3)), NotImplementedError)]) def test_forward_log_det_jacobian(self, input, expected): with self.assertRaises(expected): self._t.forward_log_det_jacobian(input) - @param.param_func([(0, TypeError), (paddle.rand((2, 3)), - NotImplementedError)]) + @param.param_func([(0, TypeError), (paddle.rand( + (2, 3)), NotImplementedError)]) def test_inverse_log_det_jacobian(self, input, expected): with self.assertRaises(expected): self._t.inverse_log_det_jacobian(input) @@ -87,6 +88,7 @@ class TestTransform(unittest.TestCase): @param.place(config.DEVICES) class TestAbsTransform(unittest.TestCase): + def setUp(self): self._t = transform.AbsTransform() @@ -107,46 +109,44 @@ class TestAbsTransform(unittest.TestCase): (np.array([[1., -1., -0.1], [-3., -0.1, 0]]), np.array([[1., 1., 0.1], [3., 0.1, 0]]))]) def test_forward(self, input, expected): - np.testing.assert_allclose( - self._t.forward(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array(1.), (-np.array(1.), np.array(1.)))]) def test_inverse(self, input, expected): actual0, actual1 = self._t.inverse(paddle.to_tensor(input)) expected0, expected1 = expected - np.testing.assert_allclose( - actual0.numpy(), - expected0, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - np.testing.assert_allclose( - actual1.numpy(), - expected1, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual0.numpy(), + expected0, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual1.numpy(), + expected1, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def test_forward_log_det_jacobian(self): with self.assertRaises(NotImplementedError): self._t.forward_log_det_jacobian(paddle.rand((10, ))) - @param.param_func([(np.array(1.), (np.array(0.), np.array(0.))), ]) + @param.param_func([ + (np.array(1.), (np.array(0.), np.array(0.))), + ]) def test_inverse_log_det_jacobian(self, input, expected): actual0, actual1 = self._t.inverse_log_det_jacobian( paddle.to_tensor(input)) expected0, expected1 = expected - np.testing.assert_allclose( - actual0.numpy(), - expected0, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - np.testing.assert_allclose( - actual1.numpy(), - expected1, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual0.numpy(), + expected0, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual1.numpy(), + expected1, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -163,9 +163,10 @@ class TestAbsTransform(unittest.TestCase): ('broadcast', np.random.rand(2, 10), np.random.rand(10)), ]) class TestAffineTransform(unittest.TestCase): + def setUp(self): - self._t = transform.AffineTransform( - paddle.to_tensor(self.loc), paddle.to_tensor(self.scale)) + self._t = transform.AffineTransform(paddle.to_tensor(self.loc), + paddle.to_tensor(self.scale)) @param.param_func([ (paddle.rand([1]), 0, TypeError), @@ -253,6 +254,7 @@ class TestAffineTransform(unittest.TestCase): @param.place(config.DEVICES) class TestExpTransform(unittest.TestCase): + def setUp(self): self._t = transform.ExpTransform() @@ -269,35 +271,36 @@ class TestExpTransform(unittest.TestCase): self.assertEqual(self._t._codomain.event_rank, 0) self.assertEqual(self._t._codomain.is_discrete, False) - @param.param_func( - [(np.array([0., 1., 2., 3.]), np.exp(np.array([0., 1., 2., 3.]))), - (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), - np.exp(np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]])))]) + @param.param_func([(np.array([0., 1., 2., + 3.]), np.exp(np.array([0., 1., 2., 3.]))), + (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), + np.exp(np.array([[0., 1., 2., 3.], [-5., 6., 7., + 8.]])))]) def test_forward(self, input, expected): - np.testing.assert_allclose( - self._t.forward(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1., 2., 3.]), np.log(np.array([1., 2., 3.]))), (np.array([[1., 2., 3.], [6., 7., 8.]]), np.log(np.array([[1., 2., 3.], [6., 7., 8.]])))]) def test_inverse(self, input, expected): - np.testing.assert_allclose( - self._t.inverse(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.inverse( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1., 2., 3.]), ), (np.array([[1., 2., 3.], [6., 7., 8.]]), )]) def test_forward_log_det_jacobian(self, input): - np.testing.assert_allclose( - self._t.forward_log_det_jacobian(paddle.to_tensor(input)).numpy(), - self._np_forward_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward_log_det_jacobian( + paddle.to_tensor(input)).numpy(), + self._np_forward_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_forward_jacobian(self, x): return x @@ -305,11 +308,11 @@ class TestExpTransform(unittest.TestCase): @param.param_func([(np.array([1., 2., 3.]), ), (np.array([[1., 2., 3.], [6., 7., 8.]]), )]) def test_inverse_log_det_jacobian(self, input): - np.testing.assert_allclose( - self._t.inverse_log_det_jacobian(paddle.to_tensor(input)).numpy(), - self._np_inverse_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.inverse_log_det_jacobian( + paddle.to_tensor(input)).numpy(), + self._np_inverse_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_inverse_jacobian(self, y): return -self._np_forward_jacobian(np.log(y)) @@ -325,19 +328,21 @@ class TestExpTransform(unittest.TestCase): @param.place(config.DEVICES) class TestChainTransform(unittest.TestCase): + @param.param_func([(paddle.distribution.Transform, TypeError), ([0], TypeError)]) def test_init_exception(self, transforms, exception): with self.assertRaises(exception): paddle.distribution.ChainTransform(transforms) - @param.param_func(( - (transform.ChainTransform( - (transform.AbsTransform(), - transform.AffineTransform(paddle.rand([1]), paddle.rand([1])))), - False), (transform.ChainTransform(( - transform.AffineTransform(paddle.rand([1]), paddle.rand([1])), - transform.ExpTransform(), )), True))) + @param.param_func(((transform.ChainTransform( + (transform.AbsTransform(), + transform.AffineTransform(paddle.rand([1]), paddle.rand([1])))), + False), (transform.ChainTransform(( + transform.AffineTransform(paddle.rand([1]), + paddle.rand([1])), + transform.ExpTransform(), + )), True))) def test_is_injective(self, chain, expected): self.assertEqual(chain._is_injective(), expected) @@ -361,74 +366,83 @@ class TestChainTransform(unittest.TestCase): self.assertEqual(input._codomain.event_rank, expected.event_rank) self.assertEqual(input._codomain.is_discrete, expected.is_discrete) - @param.param_func( - [(transform.ChainTransform((transform.AffineTransform( - paddle.to_tensor(0.0), paddle.to_tensor(1.0)), - transform.ExpTransform())), - np.array([0., 1., 2., 3.]), np.exp(np.array([0., 1., 2., 3.]) * 1.0)), - (transform.ChainTransform((transform.ExpTransform(), - transform.TanhTransform())), - np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]]), - np.tanh(np.exp(np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]]))))]) + @param.param_func([ + (transform.ChainTransform( + (transform.AffineTransform(paddle.to_tensor(0.0), + paddle.to_tensor(1.0)), + transform.ExpTransform())), np.array([0., 1., 2., 3.]), + np.exp(np.array([0., 1., 2., 3.]) * 1.0)), + (transform.ChainTransform( + (transform.ExpTransform(), transform.TanhTransform())), + np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]]), + np.tanh(np.exp(np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]])))) + ]) def test_forward(self, chain, input, expected): - np.testing.assert_allclose( - chain.forward(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(chain.forward( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) - @param.param_func( - [(transform.ChainTransform( - (transform.AffineTransform( - paddle.to_tensor(0.0), paddle.to_tensor(-1.0)), + @param.param_func([ + (transform.ChainTransform( + (transform.AffineTransform(paddle.to_tensor(0.0), + paddle.to_tensor(-1.0)), transform.ExpTransform())), np.array([0., 1., 2., 3.]), - np.log(np.array([0., 1., 2., 3.])) / (-1.0)), - (transform.ChainTransform((transform.ExpTransform(), - transform.TanhTransform())), - np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]]), - np.log(np.arctanh(np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]]))))]) + np.log(np.array([0., 1., 2., 3.])) / (-1.0)), + (transform.ChainTransform( + (transform.ExpTransform(), transform.TanhTransform())), + np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]]), + np.log(np.arctanh(np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]])))) + ]) def test_inverse(self, chain, input, expected): - np.testing.assert_allclose( - chain.inverse(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(chain.inverse( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([ (transform.ChainTransform( - (transform.AffineTransform( - paddle.to_tensor(0.0), paddle.to_tensor(-1.0)), + (transform.AffineTransform(paddle.to_tensor(0.0), + paddle.to_tensor(-1.0)), transform.PowerTransform(paddle.to_tensor(2.0)))), np.array([1., 2., 3.]), np.log(2. * np.array([1., 2., 3.]))), ]) def test_forward_log_det_jacobian(self, chain, input, expected): - np.testing.assert_allclose( - chain.forward_log_det_jacobian(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - - @param.param_func([(transform.ChainTransform((transform.AffineTransform( - paddle.to_tensor(0.0), - paddle.to_tensor(-1.0)), transform.ExpTransform())), (2, 3, 5), - (2, 3, 5)), ]) + np.testing.assert_allclose(chain.forward_log_det_jacobian( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + + @param.param_func([ + (transform.ChainTransform( + (transform.AffineTransform(paddle.to_tensor(0.0), + paddle.to_tensor(-1.0)), + transform.ExpTransform())), (2, 3, 5), (2, 3, 5)), + ]) def test_forward_shape(self, chain, shape, expected_shape): self.assertEqual(chain.forward_shape(shape), expected_shape) - @param.param_func([(transform.ChainTransform((transform.AffineTransform( - paddle.to_tensor(0.0), - paddle.to_tensor(-1.0)), transform.ExpTransform())), (2, 3, 5), - (2, 3, 5)), ]) + @param.param_func([ + (transform.ChainTransform( + (transform.AffineTransform(paddle.to_tensor(0.0), + paddle.to_tensor(-1.0)), + transform.ExpTransform())), (2, 3, 5), (2, 3, 5)), + ]) def test_inverse_shape(self, chain, shape, expected_shape): self.assertEqual(chain.inverse_shape(shape), expected_shape) @param.place(config.DEVICES) @param.param_cls( - (param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank', 'x'), - [('rank-over-zero', transform.ExpTransform(), 2, np.random.rand(2, 3, 3)), - ]) + (param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank', 'x'), [ + ('rank-over-zero', transform.ExpTransform(), 2, np.random.rand(2, 3, + 3)), + ]) class TestIndependentTransform(unittest.TestCase): + def setUp(self): self._t = transform.IndependentTransform(self.base, self.reinterpreted_batch_rank) @@ -474,16 +488,14 @@ class TestIndependentTransform(unittest.TestCase): def test_forward_log_det_jacobian(self): actual = self._t.forward_log_det_jacobian(paddle.to_tensor(self.x)) - self.assertEqual( - tuple(actual.shape), self.x.shape[:-self.reinterpreted_batch_rank]) - expected = self.base.forward_log_det_jacobian( - paddle.to_tensor(self.x)).sum( - list(range(-self.reinterpreted_batch_rank, 0))) - np.testing.assert_allclose( - actual.numpy(), - expected.numpy(), - rtol=config.RTOL.get(str(self.x.dtype)), - atol=config.ATOL.get(str(self.x.dtype))) + self.assertEqual(tuple(actual.shape), + self.x.shape[:-self.reinterpreted_batch_rank]) + expected = self.base.forward_log_det_jacobian(paddle.to_tensor( + self.x)).sum(list(range(-self.reinterpreted_batch_rank, 0))) + np.testing.assert_allclose(actual.numpy(), + expected.numpy(), + rtol=config.RTOL.get(str(self.x.dtype)), + atol=config.ATOL.get(str(self.x.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -496,6 +508,7 @@ class TestIndependentTransform(unittest.TestCase): @param.place(config.DEVICES) class TestPowerTransform(unittest.TestCase): + def setUp(self): self._t = transform.PowerTransform(paddle.to_tensor(2.)) @@ -516,35 +529,34 @@ class TestPowerTransform(unittest.TestCase): self.assertEqual(self._t._codomain.event_rank, 0) self.assertEqual(self._t._codomain.is_discrete, False) - @param.param_func([(np.array([2.]), np.array([0., -1., 2.]), np.power( - np.array([0., -1., 2.]), - 2.)), (np.array([[0.], [3.]]), np.array([[1., 0.], [5., 6.]]), np.power( - np.array([[1., 0.], [5., 6.]]), np.array([[0.], [3.]])))]) + @param.param_func([(np.array([2.]), np.array([0., -1., 2.]), + np.power(np.array([0., -1., 2.]), 2.)), + (np.array([[0.], [3.]]), np.array([[1., 0.], [5., 6.]]), + np.power(np.array([[1., 0.], [5., 6.]]), + np.array([[0.], [3.]])))]) def test_forward(self, power, x, y): t = transform.PowerTransform(paddle.to_tensor(power)) - np.testing.assert_allclose( - t.forward(paddle.to_tensor(x)).numpy(), - y, - rtol=config.RTOL.get(str(x.dtype)), - atol=config.ATOL.get(str(x.dtype))) + np.testing.assert_allclose(t.forward(paddle.to_tensor(x)).numpy(), + y, + rtol=config.RTOL.get(str(x.dtype)), + atol=config.ATOL.get(str(x.dtype))) @param.param_func([(np.array([2.]), np.array([4.]), np.array([2.]))]) def test_inverse(self, power, y, x): t = transform.PowerTransform(paddle.to_tensor(power)) - np.testing.assert_allclose( - t.inverse(paddle.to_tensor(y)).numpy(), - x, - rtol=config.RTOL.get(str(x.dtype)), - atol=config.ATOL.get(str(x.dtype))) + np.testing.assert_allclose(t.inverse(paddle.to_tensor(y)).numpy(), + x, + rtol=config.RTOL.get(str(x.dtype)), + atol=config.ATOL.get(str(x.dtype))) @param.param_func(((np.array([2.]), np.array([3., 1.4, 0.8])), )) def test_forward_log_det_jacobian(self, power, x): t = transform.PowerTransform(paddle.to_tensor(power)) - np.testing.assert_allclose( - t.forward_log_det_jacobian(paddle.to_tensor(x)).numpy(), - self._np_forward_jacobian(power, x), - rtol=config.RTOL.get(str(x.dtype)), - atol=config.ATOL.get(str(x.dtype))) + np.testing.assert_allclose(t.forward_log_det_jacobian( + paddle.to_tensor(x)).numpy(), + self._np_forward_jacobian(power, x), + rtol=config.RTOL.get(str(x.dtype)), + atol=config.ATOL.get(str(x.dtype))) def _np_forward_jacobian(self, alpha, x): return np.abs(np.log(alpha * np.power(x, alpha - 1))) @@ -560,6 +572,7 @@ class TestPowerTransform(unittest.TestCase): @param.place(config.DEVICES) class TestTanhTransform(unittest.TestCase): + def setUp(self): self._t = transform.TanhTransform() @@ -578,36 +591,37 @@ class TestTanhTransform(unittest.TestCase): self.assertEqual(self._t._codomain._constraint._lower, -1) self.assertEqual(self._t._codomain._constraint._upper, 1) - @param.param_func( - [(np.array([0., 1., 2., 3.]), np.tanh(np.array([0., 1., 2., 3.]))), - (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), - np.tanh(np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]])))]) + @param.param_func([(np.array([0., 1., 2., + 3.]), np.tanh(np.array([0., 1., 2., 3.]))), + (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), + np.tanh(np.array([[0., 1., 2., 3.], [-5., 6., 7., + 8.]])))]) def test_forward(self, input, expected): - np.testing.assert_allclose( - self._t.forward(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - - @param.param_func( - [(np.array([1., 2., 3.]), np.arctanh(np.array([1., 2., 3.]))), - (np.array([[1., 2., 3.], [6., 7., 8.]]), - np.arctanh(np.array([[1., 2., 3.], [6., 7., 8.]])))]) + np.testing.assert_allclose(self._t.forward( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + + @param.param_func([(np.array([1., 2., + 3.]), np.arctanh(np.array([1., 2., 3.]))), + (np.array([[1., 2., 3.], [6., 7., 8.]]), + np.arctanh(np.array([[1., 2., 3.], [6., 7., 8.]])))]) def test_inverse(self, input, expected): - np.testing.assert_allclose( - self._t.inverse(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.inverse( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1., 2., 3.]), ), (np.array([[1., 2., 3.], [6., 7., 8.]]), )]) def test_forward_log_det_jacobian(self, input): - np.testing.assert_allclose( - self._t.forward_log_det_jacobian(paddle.to_tensor(input)).numpy(), - self._np_forward_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward_log_det_jacobian( + paddle.to_tensor(input)).numpy(), + self._np_forward_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_forward_jacobian(self, x): return 2. * (np.log(2.) - x - self._np_softplus(-2. * x)) @@ -623,11 +637,11 @@ class TestTanhTransform(unittest.TestCase): @param.param_func([(np.array([1., 2., 3.]), ), (np.array([[1., 2., 3.], [6., 7., 8.]]), )]) def test_inverse_log_det_jacobian(self, input): - np.testing.assert_allclose( - self._t.inverse_log_det_jacobian(paddle.to_tensor(input)).numpy(), - self._np_inverse_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.inverse_log_det_jacobian( + paddle.to_tensor(input)).numpy(), + self._np_inverse_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -643,6 +657,7 @@ class TestTanhTransform(unittest.TestCase): ('regular_shape', (2, 3), (3, 2)), ]) class TestReshapeTransform(unittest.TestCase): + def setUp(self): self._t = transform.ReshapeTransform(self.in_event_shape, self.out_event_shape) @@ -664,27 +679,24 @@ class TestReshapeTransform(unittest.TestCase): def test_forward(self): x = paddle.ones(self.in_event_shape) - np.testing.assert_allclose( - self._t.forward(x), - paddle.ones(self.out_event_shape), - rtol=config.RTOL.get(str(x.numpy().dtype)), - atol=config.ATOL.get(str(x.numpy().dtype))) + np.testing.assert_allclose(self._t.forward(x), + paddle.ones(self.out_event_shape), + rtol=config.RTOL.get(str(x.numpy().dtype)), + atol=config.ATOL.get(str(x.numpy().dtype))) def test_inverse(self): x = paddle.ones(self.out_event_shape) - np.testing.assert_allclose( - self._t.inverse(x).numpy(), - paddle.ones(self.in_event_shape).numpy(), - rtol=config.RTOL.get(str(x.numpy().dtype)), - atol=config.ATOL.get(str(x.numpy().dtype))) + np.testing.assert_allclose(self._t.inverse(x).numpy(), + paddle.ones(self.in_event_shape).numpy(), + rtol=config.RTOL.get(str(x.numpy().dtype)), + atol=config.ATOL.get(str(x.numpy().dtype))) def test_forward_log_det_jacobian(self): x = paddle.ones(self.in_event_shape) - np.testing.assert_allclose( - self._t.forward_log_det_jacobian(x).numpy(), - paddle.zeros([1]).numpy(), - rtol=config.RTOL.get(str(x.numpy().dtype)), - atol=config.ATOL.get(str(x.numpy().dtype))) + np.testing.assert_allclose(self._t.forward_log_det_jacobian(x).numpy(), + paddle.zeros([1]).numpy(), + rtol=config.RTOL.get(str(x.numpy().dtype)), + atol=config.ATOL.get(str(x.numpy().dtype))) def test_in_event_shape(self): self.assertEqual(self._t.in_event_shape, self.in_event_shape) @@ -710,6 +722,7 @@ def _np_softplus(x, beta=1., threshold=20.): class TestSigmoidTransform(unittest.TestCase): + def setUp(self): self._t = transform.SigmoidTransform() @@ -722,33 +735,32 @@ class TestSigmoidTransform(unittest.TestCase): def test_codomain(self): self.assertTrue(isinstance(self._t._codomain, variable.Variable)) - @param.param_func(((np.ones((5, 10)), - 1 / (1 + np.exp(-np.ones((5, 10))))), )) + @param.param_func(((np.ones( + (5, 10)), 1 / (1 + np.exp(-np.ones((5, 10))))), )) def test_forward(self, input, expected): - np.testing.assert_allclose( - self._t.forward(paddle.to_tensor(input)), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward(paddle.to_tensor(input)), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) - @param.param_func(( - (np.ones(10), np.log(np.ones(10)) - np.log1p(-np.ones(10))), )) + @param.param_func( + ((np.ones(10), np.log(np.ones(10)) - np.log1p(-np.ones(10))), )) def test_inverse(self, input, expected): - np.testing.assert_allclose( - self._t.inverse(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - - @param.param_func(( - (np.ones(10), - -_np_softplus(-np.ones(10)) - _np_softplus(np.ones(10))), )) + np.testing.assert_allclose(self._t.inverse( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + + @param.param_func( + ((np.ones(10), + -_np_softplus(-np.ones(10)) - _np_softplus(np.ones(10))), )) def test_forward_log_det_jacobian(self, input, expected): - np.testing.assert_allclose( - self._t.forward_log_det_jacobian(paddle.to_tensor(input)).numpy(), - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward_log_det_jacobian( + paddle.to_tensor(input)).numpy(), + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -760,6 +772,7 @@ class TestSigmoidTransform(unittest.TestCase): class TestSoftmaxTransform(unittest.TestCase): + def setUp(self): self._t = transform.SoftmaxTransform() @@ -774,19 +787,17 @@ class TestSoftmaxTransform(unittest.TestCase): @param.param_func(((np.random.random((5, 10)), ), )) def test_forward(self, input): - np.testing.assert_allclose( - self._t.forward(paddle.to_tensor(input)), - self._np_forward(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.forward(paddle.to_tensor(input)), + self._np_forward(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func(((np.random.random(10), ), )) def test_inverse(self, input): - np.testing.assert_allclose( - self._t.inverse(paddle.to_tensor(input)), - self._np_inverse(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.inverse(paddle.to_tensor(input)), + self._np_inverse(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_forward(self, x): x = np.exp(x - np.max(x, -1, keepdims=True)[0]) @@ -819,6 +830,7 @@ class TestSoftmaxTransform(unittest.TestCase): class TestStickBreakingTransform(unittest.TestCase): + def setUp(self): self._t = transform.StickBreakingTransform() @@ -833,11 +845,11 @@ class TestStickBreakingTransform(unittest.TestCase): @param.param_func(((np.random.random((10)), ), )) def test_forward(self, input): - np.testing.assert_allclose( - self._t.inverse(self._t.forward(paddle.to_tensor(input))), - input, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(self._t.inverse( + self._t.forward(paddle.to_tensor(input))), + input, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([((2, 3, 5), (2, 3, 6))]) def test_forward_shape(self, shape, expected_shape): @@ -859,6 +871,7 @@ class TestStickBreakingTransform(unittest.TestCase): ('simple_one_transform', [transform.ExpTransform()], 0), ]) class TestStackTransform(unittest.TestCase): + def setUp(self): self._t = transform.StackTransform(self.transforms, self.axis) @@ -874,22 +887,22 @@ class TestStackTransform(unittest.TestCase): @param.param_func([(np.array([[0., 1., 2., 3.]]), ), (np.array([[-5., 6., 7., 8.]]), )]) def test_forward(self, input): - self.assertEqual( - tuple(self._t.forward(paddle.to_tensor(input)).shape), input.shape) + self.assertEqual(tuple(self._t.forward(paddle.to_tensor(input)).shape), + input.shape) @param.param_func([(np.array([[1., 2., 3.]]), ), (np.array([[6., 7., 8.]], ), )]) def test_inverse(self, input): - self.assertEqual( - tuple(self._t.inverse(paddle.to_tensor(input)).shape), input.shape) + self.assertEqual(tuple(self._t.inverse(paddle.to_tensor(input)).shape), + input.shape) - @param.param_func([(np.array([[1., 2., 3.]]), ), - (np.array([[6., 7., 8.]]), )]) + @param.param_func([(np.array([[1., 2., 3.]]), ), (np.array([[6., 7., + 8.]]), )]) def test_forward_log_det_jacobian(self, input): self.assertEqual( tuple( - self._t.forward_log_det_jacobian(paddle.to_tensor(input)) - .shape), input.shape) + self._t.forward_log_det_jacobian( + paddle.to_tensor(input)).shape), input.shape) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -902,9 +915,9 @@ class TestStackTransform(unittest.TestCase): def test_axis(self): self.assertEqual(self._t.axis, self.axis) - @param.param_func( - [(0, 0, TypeError), ([0], 0, TypeError), - ([paddle.distribution.ExpTransform()], 'axis', TypeError)]) + @param.param_func([(0, 0, TypeError), ([0], 0, TypeError), + ([paddle.distribution.ExpTransform()], 'axis', TypeError) + ]) def test_init_exception(self, transforms, axis, exc): with self.assertRaises(exc): paddle.distribution.StackTransform(transforms, axis) diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform_static.py index fa5742fb261..00a1f409dad 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transform_static.py @@ -26,12 +26,14 @@ paddle.enable_static() @param.place(config.DEVICES) class TestTransform(unittest.TestCase): + def setUp(self): self._t = transform.Transform() - @param.param_func( - [(transform.Type.BIJECTION, True), (transform.Type.INJECTION, True), - (transform.Type.SURJECTION, False), (transform.Type.OTHER, False)]) + @param.param_func([(transform.Type.BIJECTION, True), + (transform.Type.INJECTION, True), + (transform.Type.SURJECTION, False), + (transform.Type.OTHER, False)]) def test_is_injective(self, type, expected): transform.Transform._type = type self.assertEqual(self._t._is_injective(), expected) @@ -42,8 +44,8 @@ class TestTransform(unittest.TestCase): def test_codomain(self): self.assertTrue(isinstance(self._t._codomain, variable.Real)) - @param.param_func([(np.array(0), NotImplementedError), (np.random.random( - (2, 3)), NotImplementedError)]) + @param.param_func([(np.array(0), NotImplementedError), + (np.random.random((2, 3)), NotImplementedError)]) def test_forward(self, input, expected): with self.assertRaises(expected): exe = paddle.static.Executor() @@ -57,8 +59,8 @@ class TestTransform(unittest.TestCase): exe.run(sp) exe.run(mp, feed={'input': input}, fetch_list=[output]) - @param.param_func([(np.array(0), NotImplementedError), (np.random.random( - (2, 3)), NotImplementedError)]) + @param.param_func([(np.array(0), NotImplementedError), + (np.random.random((2, 3)), NotImplementedError)]) def test_inverse(self, input, expected): with self.assertRaises(expected): exe = paddle.static.Executor() @@ -72,8 +74,8 @@ class TestTransform(unittest.TestCase): exe.run(sp) exe.run(mp, feed={'input': input}, fetch_list=[output]) - @param.param_func([(np.array(0), NotImplementedError), (paddle.rand( - (2, 3)), NotImplementedError)]) + @param.param_func([(np.array(0), NotImplementedError), + (paddle.rand((2, 3)), NotImplementedError)]) def test_forward_log_det_jacobian(self, input, expected): with self.assertRaises(expected): exe = paddle.static.Executor() @@ -87,8 +89,8 @@ class TestTransform(unittest.TestCase): exe.run(sp) exe.run(mp, feed={'input': input}, fetch_list=[output]) - @param.param_func([(np.array(0), NotImplementedError), (paddle.rand( - (2, 3)), NotImplementedError)]) + @param.param_func([(np.array(0), NotImplementedError), + (paddle.rand((2, 3)), NotImplementedError)]) def test_inverse_log_det_jacobian(self, input, expected): with self.assertRaises(expected): exe = paddle.static.Executor() @@ -115,6 +117,7 @@ class TestTransform(unittest.TestCase): @param.place(config.DEVICES) class TestAbsTransform(unittest.TestCase): + def setUp(self): self._t = transform.AbsTransform() @@ -144,11 +147,10 @@ class TestAbsTransform(unittest.TestCase): output = t.forward(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1.]), (-np.array([1.]), np.array([1.])))]) def test_inverse(self, input, expected): @@ -164,16 +166,14 @@ class TestAbsTransform(unittest.TestCase): feed={'input': input}, fetch_list=[actual0, actual1]) expected0, expected1 = expected - np.testing.assert_allclose( - actual0, - expected0, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - np.testing.assert_allclose( - actual1, - expected1, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual0, + expected0, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual1, + expected1, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def test_forward_log_det_jacobian(self): input = np.random.random((10, )) @@ -189,7 +189,9 @@ class TestAbsTransform(unittest.TestCase): exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - @param.param_func([(np.array([1.]), (np.array([0.]), np.array([0.]))), ]) + @param.param_func([ + (np.array([1.]), (np.array([0.]), np.array([0.]))), + ]) def test_inverse_log_det_jacobian(self, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -203,16 +205,14 @@ class TestAbsTransform(unittest.TestCase): feed={'input': input}, fetch_list=[actual0, actual1]) expected0, expected1 = expected - np.testing.assert_allclose( - actual0, - expected0, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - np.testing.assert_allclose( - actual1, - expected1, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual0, + expected0, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(actual1, + expected1, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -229,6 +229,7 @@ class TestAbsTransform(unittest.TestCase): ('broadcast', np.random.rand(2, 10), np.random.rand(10)), ]) class TestAffineTransform(unittest.TestCase): + def setUp(self): sp = paddle.static.Program() mp = paddle.static.Program() @@ -265,17 +266,17 @@ class TestAffineTransform(unittest.TestCase): self.loc.dtype) output = t.forward(static_input) exe.run(sp) - [output] = exe.run( - mp, - feed={'input': input, - 'loc': self.loc, - 'scale': self.scale}, - fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_forward(input), - rtol=config.RTOL.get(str(self.loc.dtype)), - atol=config.ATOL.get(str(self.loc.dtype))) + [output] = exe.run(mp, + feed={ + 'input': input, + 'loc': self.loc, + 'scale': self.scale + }, + fetch_list=[output]) + np.testing.assert_allclose(output, + self._np_forward(input), + rtol=config.RTOL.get(str(self.loc.dtype)), + atol=config.ATOL.get(str(self.loc.dtype))) def test_inverse(self): input = np.random.random(self.loc.shape) @@ -291,17 +292,17 @@ class TestAffineTransform(unittest.TestCase): self.loc.dtype) output = t.inverse(static_input) exe.run(sp) - [output] = exe.run( - mp, - feed={'input': input, - 'loc': self.loc, - 'scale': self.scale}, - fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_inverse(input), - rtol=config.RTOL.get(str(self.loc.dtype)), - atol=config.ATOL.get(str(self.loc.dtype))) + [output] = exe.run(mp, + feed={ + 'input': input, + 'loc': self.loc, + 'scale': self.scale + }, + fetch_list=[output]) + np.testing.assert_allclose(output, + self._np_inverse(input), + rtol=config.RTOL.get(str(self.loc.dtype)), + atol=config.ATOL.get(str(self.loc.dtype))) def _np_forward(self, x): return self.loc + self.scale * x @@ -328,17 +329,17 @@ class TestAffineTransform(unittest.TestCase): static_input = paddle.static.data('input', input.shape, input.dtype) output = t.inverse_log_det_jacobian(static_input) exe.run(sp) - [output] = exe.run( - mp, - feed={'input': input, - 'loc': self.loc, - 'scale': self.scale}, - fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_inverse_jacobian(input), - rtol=config.RTOL.get(str(self.loc.dtype)), - atol=config.ATOL.get(str(self.loc.dtype))) + [output] = exe.run(mp, + feed={ + 'input': input, + 'loc': self.loc, + 'scale': self.scale + }, + fetch_list=[output]) + np.testing.assert_allclose(output, + self._np_inverse_jacobian(input), + rtol=config.RTOL.get(str(self.loc.dtype)), + atol=config.ATOL.get(str(self.loc.dtype))) def test_forward_log_det_jacobian(self): input = np.random.random(self.scale.shape) @@ -353,17 +354,17 @@ class TestAffineTransform(unittest.TestCase): static_input = paddle.static.data('input', input.shape, input.dtype) output = t.forward_log_det_jacobian(static_input) exe.run(sp) - [output] = exe.run( - mp, - feed={'input': input, - 'loc': self.loc, - 'scale': self.scale}, - fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_forward_jacobian(input), - rtol=config.RTOL.get(str(self.loc.dtype)), - atol=config.ATOL.get(str(self.loc.dtype))) + [output] = exe.run(mp, + feed={ + 'input': input, + 'loc': self.loc, + 'scale': self.scale + }, + fetch_list=[output]) + np.testing.assert_allclose(output, + self._np_forward_jacobian(input), + rtol=config.RTOL.get(str(self.loc.dtype)), + atol=config.ATOL.get(str(self.loc.dtype))) def test_forward_shape(self): shape = self.loc.shape @@ -380,6 +381,7 @@ class TestAffineTransform(unittest.TestCase): @param.place(config.DEVICES) class TestExpTransform(unittest.TestCase): + def setUp(self): self._t = transform.ExpTransform() @@ -396,10 +398,11 @@ class TestExpTransform(unittest.TestCase): self.assertEqual(self._t._codomain.event_rank, 0) self.assertEqual(self._t._codomain.is_discrete, False) - @param.param_func( - [(np.array([0., 1., 2., 3.]), np.exp(np.array([0., 1., 2., 3.]))), - (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), - np.exp(np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]])))]) + @param.param_func([(np.array([0., 1., 2., + 3.]), np.exp(np.array([0., 1., 2., 3.]))), + (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), + np.exp(np.array([[0., 1., 2., 3.], [-5., 6., 7., + 8.]])))]) def test_forward(self, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -410,11 +413,10 @@ class TestExpTransform(unittest.TestCase): output = t.forward(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1., 2., 3.]), np.log(np.array([1., 2., 3.]))), (np.array([[1., 2., 3.], [6., 7., 8.]]), @@ -429,11 +431,10 @@ class TestExpTransform(unittest.TestCase): output = t.inverse(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1., 2., 3.]), ), (np.array([[1., 2., 3.], [6., 7., 8.]]), )]) @@ -447,11 +448,10 @@ class TestExpTransform(unittest.TestCase): output = t.forward_log_det_jacobian(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_forward_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + self._np_forward_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_forward_jacobian(self, x): return x @@ -468,11 +468,10 @@ class TestExpTransform(unittest.TestCase): output = t.inverse_log_det_jacobian(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_inverse_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + self._np_inverse_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_inverse_jacobian(self, y): return -self._np_forward_jacobian(np.log(y)) @@ -488,13 +487,15 @@ class TestExpTransform(unittest.TestCase): @param.place(config.DEVICES) class TestChainTransform(unittest.TestCase): - @param.param_func(( - (transform.ChainTransform( - (transform.AbsTransform(), - transform.AffineTransform(paddle.rand([1]), paddle.rand([1])))), - False), (transform.ChainTransform(( - transform.AffineTransform(paddle.rand([1]), paddle.rand([1])), - transform.ExpTransform(), )), True))) + + @param.param_func(((transform.ChainTransform( + (transform.AbsTransform(), + transform.AffineTransform(paddle.rand([1]), paddle.rand([1])))), + False), (transform.ChainTransform(( + transform.AffineTransform(paddle.rand([1]), + paddle.rand([1])), + transform.ExpTransform(), + )), True))) def test_is_injective(self, chain, expected): self.assertEqual(chain._is_injective(), expected) @@ -518,11 +519,12 @@ class TestChainTransform(unittest.TestCase): self.assertEqual(input._codomain.event_rank, expected.event_rank) self.assertEqual(input._codomain.is_discrete, expected.is_discrete) - @param.param_func( - [(transform.ChainTransform((transform.ExpTransform(), - transform.TanhTransform())), - np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]]), - np.tanh(np.exp(np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]]))))]) + @param.param_func([ + (transform.ChainTransform( + (transform.ExpTransform(), transform.TanhTransform())), + np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]]), + np.tanh(np.exp(np.array([[0., -1., 2., -3.], [-5., 6., 7., -8.]])))) + ]) def test_forward(self, chain, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -533,17 +535,17 @@ class TestChainTransform(unittest.TestCase): output = t.forward(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - - @param.param_func( - [(transform.ChainTransform((transform.ExpTransform(), - transform.TanhTransform())), - np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]]), - np.log(np.arctanh(np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]]))))]) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + + @param.param_func([ + (transform.ChainTransform( + (transform.ExpTransform(), transform.TanhTransform())), + np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]]), + np.log(np.arctanh(np.array([[0., 1., 2., 3.], [5., 6., 7., 8.]])))) + ]) def test_inverse(self, chain, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -554,33 +556,38 @@ class TestChainTransform(unittest.TestCase): output = t.inverse(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - - @param.param_func([(transform.ChainTransform((transform.AffineTransform( - paddle.full([1], 0.0), - paddle.full([1], -1.0)), transform.ExpTransform())), (2, 3, 5), - (2, 3, 5)), ]) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + + @param.param_func([ + (transform.ChainTransform( + (transform.AffineTransform(paddle.full([1], 0.0), + paddle.full([1], -1.0)), + transform.ExpTransform())), (2, 3, 5), (2, 3, 5)), + ]) def test_forward_shape(self, chain, shape, expected_shape): self.assertEqual(chain.forward_shape(shape), expected_shape) - @param.param_func([(transform.ChainTransform((transform.AffineTransform( - paddle.full([1], 0.0), - paddle.full([1], -1.0)), transform.ExpTransform())), (2, 3, 5), - (2, 3, 5)), ]) + @param.param_func([ + (transform.ChainTransform( + (transform.AffineTransform(paddle.full([1], 0.0), + paddle.full([1], -1.0)), + transform.ExpTransform())), (2, 3, 5), (2, 3, 5)), + ]) def test_inverse_shape(self, chain, shape, expected_shape): self.assertEqual(chain.forward_shape(shape), expected_shape) @param.place(config.DEVICES) @param.param_cls( - (param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank', 'x'), - [('rank-over-zero', transform.ExpTransform(), 2, np.random.rand(2, 3, 3)), - ]) + (param.TEST_CASE_NAME, 'base', 'reinterpreted_batch_rank', 'x'), [ + ('rank-over-zero', transform.ExpTransform(), 2, np.random.rand(2, 3, + 3)), + ]) class TestIndependentTransform(unittest.TestCase): + def setUp(self): self._t = transform.IndependentTransform(self.base, self.reinterpreted_batch_rank) @@ -619,11 +626,10 @@ class TestIndependentTransform(unittest.TestCase): [output, expected] = exe.run(mp, feed={'input': self.x}, fetch_list=[output, expected]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(self.x.dtype)), - atol=config.ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(self.x.dtype)), + atol=config.ATOL.get(str(self.x.dtype))) def test_inverse(self): exe = paddle.static.Executor() @@ -640,11 +646,10 @@ class TestIndependentTransform(unittest.TestCase): [output, expected] = exe.run(mp, feed={'input': self.x}, fetch_list=[output, expected]) - np.testing.assert_allclose( - expected, - output, - rtol=config.RTOL.get(str(self.x.dtype)), - atol=config.ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(expected, + output, + rtol=config.RTOL.get(str(self.x.dtype)), + atol=config.ATOL.get(str(self.x.dtype))) def test_forward_log_det_jacobian(self): exe = paddle.static.Executor() @@ -657,19 +662,18 @@ class TestIndependentTransform(unittest.TestCase): self.x.dtype) output = t.forward_log_det_jacobian(static_input) expected = self.base.forward_log_det_jacobian( - static_input.sum( - list(range(-self.reinterpreted_batch_rank, 0)))) + static_input.sum(list(range(-self.reinterpreted_batch_rank, + 0)))) exe.run(sp) [actual, expected] = exe.run(mp, feed={'input': self.x}, fetch_list=[output, expected]) - self.assertEqual( - tuple(actual.shape), self.x.shape[:-self.reinterpreted_batch_rank]) - np.testing.assert_allclose( - actual, - expected, - rtol=config.RTOL.get(str(self.x.dtype)), - atol=config.ATOL.get(str(self.x.dtype))) + self.assertEqual(tuple(actual.shape), + self.x.shape[:-self.reinterpreted_batch_rank]) + np.testing.assert_allclose(actual, + expected, + rtol=config.RTOL.get(str(self.x.dtype)), + atol=config.ATOL.get(str(self.x.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -682,6 +686,7 @@ class TestIndependentTransform(unittest.TestCase): @param.place(config.DEVICES) class TestPowerTransform(unittest.TestCase): + def setUp(self): self._t = transform.PowerTransform(paddle.full([1], 2.)) @@ -702,10 +707,11 @@ class TestPowerTransform(unittest.TestCase): self.assertEqual(self._t._codomain.event_rank, 0) self.assertEqual(self._t._codomain.is_discrete, False) - @param.param_func([(np.array([2.]), np.array([0., -1., 2.]), np.power( - np.array([0., -1., 2.]), - 2.)), (np.array([[0.], [3.]]), np.array([[1., 0.], [5., 6.]]), np.power( - np.array([[1., 0.], [5., 6.]]), np.array([[0.], [3.]])))]) + @param.param_func([(np.array([2.]), np.array([0., -1., 2.]), + np.power(np.array([0., -1., 2.]), 2.)), + (np.array([[0.], [3.]]), np.array([[1., 0.], [5., 6.]]), + np.power(np.array([[1., 0.], [5., 6.]]), + np.array([[0.], [3.]])))]) def test_forward(self, power, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -717,14 +723,15 @@ class TestPowerTransform(unittest.TestCase): output = t.forward(static_input) exe.run(sp) [output] = exe.run(mp, - feed={'input': input, - 'power': power}, + feed={ + 'input': input, + 'power': power + }, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([2.]), np.array([4.]), np.array([2.]))]) def test_inverse(self, power, input, expected): @@ -738,14 +745,15 @@ class TestPowerTransform(unittest.TestCase): output = t.inverse(static_input) exe.run(sp) [output] = exe.run(mp, - feed={'input': input, - 'power': power}, + feed={ + 'input': input, + 'power': power + }, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func(((np.array([2.]), np.array([3., 1.4, 0.8])), )) def test_forward_log_det_jacobian(self, power, input): @@ -759,14 +767,15 @@ class TestPowerTransform(unittest.TestCase): output = t.forward_log_det_jacobian(static_input) exe.run(sp) [output] = exe.run(mp, - feed={'input': input, - 'power': power}, + feed={ + 'input': input, + 'power': power + }, fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_forward_jacobian(power, input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + self._np_forward_jacobian(power, input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_forward_jacobian(self, alpha, x): return np.abs(np.log(alpha * np.power(x, alpha - 1))) @@ -782,6 +791,7 @@ class TestPowerTransform(unittest.TestCase): @param.place(config.DEVICES) class TestTanhTransform(unittest.TestCase): + def setUp(self): self._t = transform.TanhTransform() @@ -800,10 +810,11 @@ class TestTanhTransform(unittest.TestCase): self.assertEqual(self._t._codomain._constraint._lower, -1) self.assertEqual(self._t._codomain._constraint._upper, 1) - @param.param_func( - [(np.array([0., 1., 2., 3.]), np.tanh(np.array([0., 1., 2., 3.]))), - (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), - np.tanh(np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]])))]) + @param.param_func([(np.array([0., 1., 2., + 3.]), np.tanh(np.array([0., 1., 2., 3.]))), + (np.array([[0., 1., 2., 3.], [-5., 6., 7., 8.]]), + np.tanh(np.array([[0., 1., 2., 3.], [-5., 6., 7., + 8.]])))]) def test_forward(self, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -814,16 +825,15 @@ class TestTanhTransform(unittest.TestCase): output = t.forward(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) - - @param.param_func( - [(np.array([1., 2., 3.]), np.arctanh(np.array([1., 2., 3.]))), - (np.array([[1., 2., 3.], [6., 7., 8.]]), - np.arctanh(np.array([[1., 2., 3.], [6., 7., 8.]])))]) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) + + @param.param_func([(np.array([1., 2., + 3.]), np.arctanh(np.array([1., 2., 3.]))), + (np.array([[1., 2., 3.], [6., 7., 8.]]), + np.arctanh(np.array([[1., 2., 3.], [6., 7., 8.]])))]) def test_inverse(self, input, expected): exe = paddle.static.Executor() sp = paddle.static.Program() @@ -834,11 +844,10 @@ class TestTanhTransform(unittest.TestCase): output = t.inverse(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([(np.array([1., 2., 3.]), ), (np.array([[1., 2., 3.], [6., 7., 8.]]), )]) @@ -852,11 +861,10 @@ class TestTanhTransform(unittest.TestCase): output = t.forward_log_det_jacobian(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_forward_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + self._np_forward_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) def _np_forward_jacobian(self, x): return 2. * (np.log(2.) - x - self._np_softplus(-2. * x)) @@ -881,11 +889,10 @@ class TestTanhTransform(unittest.TestCase): output = t.inverse_log_det_jacobian(static_input) exe.run(sp) [output] = exe.run(mp, feed={'input': input}, fetch_list=[output]) - np.testing.assert_allclose( - output, - self._np_inverse_jacobian(input), - rtol=config.RTOL.get(str(input.dtype)), - atol=config.ATOL.get(str(input.dtype))) + np.testing.assert_allclose(output, + self._np_inverse_jacobian(input), + rtol=config.RTOL.get(str(input.dtype)), + atol=config.ATOL.get(str(input.dtype))) @param.param_func([((), ()), ((2, 3, 5), (2, 3, 5))]) def test_forward_shape(self, shape, expected_shape): @@ -901,6 +908,7 @@ class TestTanhTransform(unittest.TestCase): ('regular_shape', (2, 3), (3, 2)), ]) class TestReshapeTransform(unittest.TestCase): + def setUp(self): self._t = transform.ReshapeTransform(self.in_event_shape, self.out_event_shape) @@ -926,11 +934,10 @@ class TestReshapeTransform(unittest.TestCase): exe.run(sp) [output] = exe.run(mp, feed={}, fetch_list=[output]) expected = np.ones(self.out_event_shape) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(expected.dtype)), - atol=config.ATOL.get(str(expected.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(expected.dtype)), + atol=config.ATOL.get(str(expected.dtype))) def test_inverse(self): exe = paddle.static.Executor() @@ -945,11 +952,10 @@ class TestReshapeTransform(unittest.TestCase): [output] = exe.run(mp, feed={}, fetch_list=[output]) expected = np.ones(self.in_event_shape) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(expected.dtype)), - atol=config.ATOL.get(str(expected.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(expected.dtype)), + atol=config.ATOL.get(str(expected.dtype))) def test_forward_log_det_jacobian(self): exe = paddle.static.Executor() @@ -963,11 +969,10 @@ class TestReshapeTransform(unittest.TestCase): exe.run(sp) [output] = exe.run(mp, feed={}, fetch_list=[output]) expected = np.zeros([1]) - np.testing.assert_allclose( - output, - expected, - rtol=config.RTOL.get(str(expected.dtype)), - atol=config.ATOL.get(str(expected.dtype))) + np.testing.assert_allclose(output, + expected, + rtol=config.RTOL.get(str(expected.dtype)), + atol=config.ATOL.get(str(expected.dtype))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution.py index 2f7bb61e38d..c47250195da 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution.py @@ -24,12 +24,13 @@ import parameterize as param @param.place(config.DEVICES) @param.param_cls((param.TEST_CASE_NAME, 'base', 'transforms'), - [('base_normal', paddle.distribution.Normal(0., 1.), - [paddle.distribution.ExpTransform()])]) + [('base_normal', paddle.distribution.Normal( + 0., 1.), [paddle.distribution.ExpTransform()])]) class TestIndependent(unittest.TestCase): + def setUp(self): - self._t = paddle.distribution.TransformedDistribution(self.base, - self.transforms) + self._t = paddle.distribution.TransformedDistribution( + self.base, self.transforms) def _np_sum_rightmost(self, value, n): return np.sum(value, tuple(range(-n, 0))) if n > 0 else value diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution_static.py index f07205a6268..4e4bcc1f4d4 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_transformed_distribution_static.py @@ -29,6 +29,7 @@ paddle.enable_static() [('base_normal', paddle.distribution.Normal, [paddle.distribution.ExpTransform()])]) class TestIndependent(unittest.TestCase): + def setUp(self): value = np.array([0.5]) loc = np.array([0.]) @@ -54,17 +55,18 @@ class TestIndependent(unittest.TestCase): [self.actual_log_prob, self.expected_log_prob, self.sample_data] = exe.run( mp, - feed={'value': value, - 'loc': loc, - 'scale': scale}, + feed={ + 'value': value, + 'loc': loc, + 'scale': scale + }, fetch_list=[actual_log_prob, expected_log_prob, sample_data]) def test_log_prob(self): - np.testing.assert_allclose( - self.actual_log_prob, - self.expected_log_prob, - rtol=config.RTOL.get(str(self.dtype)), - atol=config.ATOL.get(str(self.dtype))) + np.testing.assert_allclose(self.actual_log_prob, + self.expected_log_prob, + rtol=config.RTOL.get(str(self.dtype)), + atol=config.ATOL.get(str(self.dtype))) def transformed_log_prob(self, value, base, transforms): log_prob = 0.0 diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py index d8fe23b9c1b..3fbb382a240 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from test_distribution import DistributionNumpy class UniformNumpy(DistributionNumpy): + def __init__(self, low, high): self.low = np.array(low) self.high = np.array(high) @@ -52,6 +53,7 @@ class UniformNumpy(DistributionNumpy): class UniformTest(unittest.TestCase): + def setUp(self, use_gpu=False, batch_size=5, dims=6): self.use_gpu = use_gpu if not use_gpu: @@ -86,8 +88,9 @@ class UniformTest(unittest.TestCase): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[], dtype='float32') + self.static_values = layers.data(name='values', + shape=[], + dtype='float32') def compare_with_numpy(self, fetch_list, sample_shape=7, tolerance=1e-6): sample, entropy, log_prob, probs = fetch_list @@ -99,10 +102,14 @@ class UniformTest(unittest.TestCase): np_p = np_uniform.probs(self.values_np) np.testing.assert_equal(sample.shape, np_sample.shape) - np.testing.assert_allclose( - entropy, np_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - log_prob, np_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(entropy, + np_entropy, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(log_prob, + np_lp, + rtol=tolerance, + atol=tolerance) np.testing.assert_allclose(probs, np_p, rtol=tolerance, atol=tolerance) def test_uniform_distribution_dygraph(self, sample_shape=7, tolerance=1e-6): @@ -141,6 +148,7 @@ class UniformTest(unittest.TestCase): class UniformTest2(UniformTest): + def init_numpy_data(self, batch_size, dims): # low ans high are 'int' self.low_np = int(np.random.uniform(-2, 1)) @@ -149,6 +157,7 @@ class UniformTest2(UniformTest): class UniformTest3(UniformTest): + def init_numpy_data(self, batch_size, dims): # test broadcast: low is float, high is numpy.ndarray with dtype 'float32'. self.low_np = np.random.uniform(-2, 1) @@ -160,11 +169,13 @@ class UniformTest3(UniformTest): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTest4(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are numpy.ndarray with dtype 'float32'. self.low_np = np.random.randn(batch_size, dims).astype('float32') @@ -176,11 +187,13 @@ class UniformTest4(UniformTest): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTest5(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are numpy.ndarray with dtype 'float64'. self.low_np = np.random.randn(batch_size, dims).astype('float64') @@ -197,11 +210,13 @@ class UniformTest5(UniformTest): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float64') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float64') class UniformTest6(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are Tensor with dtype 'VarType.FP32'. self.low_np = np.random.randn(batch_size, dims).astype('float32') @@ -216,15 +231,19 @@ class UniformTest6(UniformTest): def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.static_low = layers.data( - name='low', shape=[dims], dtype='float32') - self.static_high = layers.data( - name='high', shape=[dims], dtype='float32') - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_low = layers.data(name='low', + shape=[dims], + dtype='float32') + self.static_high = layers.data(name='high', + shape=[dims], + dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTest7(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are Tensor with dtype 'VarType.FP64'. self.low_np = np.random.randn(batch_size, dims).astype('float64') @@ -239,15 +258,19 @@ class UniformTest7(UniformTest): def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.static_low = layers.data( - name='low', shape=[dims], dtype='float64') - self.static_high = layers.data( - name='high', shape=[dims], dtype='float64') - self.static_values = layers.data( - name='values', shape=[dims], dtype='float64') + self.static_low = layers.data(name='low', + shape=[dims], + dtype='float64') + self.static_high = layers.data(name='high', + shape=[dims], + dtype='float64') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float64') class UniformTest8(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are Tensor with dtype 'VarType.FP64'. value's dtype is 'VarType.FP32'. self.low_np = np.random.randn(batch_size, dims).astype('float64') @@ -262,15 +285,19 @@ class UniformTest8(UniformTest): def init_static_data(self, batch_size, dims): with fluid.program_guard(self.test_program): - self.static_low = layers.data( - name='low', shape=[dims], dtype='float64') - self.static_high = layers.data( - name='high', shape=[dims], dtype='float64') - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_low = layers.data(name='low', + shape=[dims], + dtype='float64') + self.static_high = layers.data(name='high', + shape=[dims], + dtype='float64') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTest9(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are numpy.ndarray with dtype 'float32'. # high < low. @@ -283,11 +310,13 @@ class UniformTest9(UniformTest): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTest10(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are list. self.low_np = np.random.randn(batch_size, @@ -300,29 +329,33 @@ class UniformTest10(UniformTest): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTest11(UniformTest): + def init_numpy_data(self, batch_size, dims): # low and high are tuple. self.low_np = tuple( np.random.randn(batch_size, dims).astype('float32').tolist()) self.high_np = tuple( - np.random.uniform(5.0, 15.0, (batch_size, dims)).astype('float32') - .tolist()) + np.random.uniform(5.0, 15.0, + (batch_size, dims)).astype('float32').tolist()) self.values_np = np.random.randn(batch_size, dims).astype('float32') def init_static_data(self, batch_size, dims): self.static_low = self.low_np self.static_high = self.high_np with fluid.program_guard(self.test_program): - self.static_values = layers.data( - name='values', shape=[dims], dtype='float32') + self.static_values = layers.data(name='values', + shape=[dims], + dtype='float32') class UniformTestSample(unittest.TestCase): + def setUp(self): self.init_param() @@ -340,6 +373,7 @@ class UniformTestSample(unittest.TestCase): class UniformTestSample2(UniformTestSample): + def init_param(self): self.low = -5.0 self.high = 2.0 diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_variable.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_variable.py index 6cd50157207..94558395e00 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_variable.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_variable.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ import parameterize as param (param.TEST_CASE_NAME, 'is_discrete', 'event_rank', 'constraint'), [('NotImplement', False, 0, constraint.Constraint())]) class TestVariable(unittest.TestCase): + def setUp(self): self._var = variable.Variable(self.is_discrete, self.event_rank, self.constraint) @@ -40,10 +41,13 @@ class TestVariable(unittest.TestCase): @param.param_cls((param.TEST_CASE_NAME, 'base', 'rank'), [('real_base', variable.real, 10)]) class TestIndependent(unittest.TestCase): + def setUp(self): self._var = variable.Independent(self.base, self.rank) - @param.param_func([(paddle.rand([2, 3, 4]), ValueError), ]) + @param.param_func([ + (paddle.rand([2, 3, 4]), ValueError), + ]) def test_costraint(self, value, expect): with self.assertRaises(expect): self._var.constraint(value) @@ -52,13 +56,16 @@ class TestIndependent(unittest.TestCase): @param.param_cls((param.TEST_CASE_NAME, 'vars', 'axis'), [('real_base', [variable.real], 10)]) class TestStack(unittest.TestCase): + def setUp(self): self._var = variable.Stack(self.vars, self.axis) def test_is_discrete(self): self.assertEqual(self._var.is_discrete, False) - @param.param_func([(paddle.rand([2, 3, 4]), ValueError), ]) + @param.param_func([ + (paddle.rand([2, 3, 4]), ValueError), + ]) def test_costraint(self, value, expect): with self.assertRaises(expect): self._var.constraint(value) diff --git a/python/paddle/fluid/tests/unittests/distribution/test_kl.py b/python/paddle/fluid/tests/unittests/distribution/test_kl.py index 635f5446c8e..0a957c540be 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_kl.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_kl.py @@ -30,16 +30,18 @@ paddle.set_default_dtype('float64') @param.place(config.DEVICES) @param.parameterize_cls((param.TEST_CASE_NAME, 'a1', 'b1', 'a2', 'b2'), [ - ('test_regular_input', 6.0 * np.random.random((4, 5)) + 1e-4, - 6.0 * np.random.random((4, 5)) + 1e-4, 6.0 * np.random.random( - (4, 5)) + 1e-4, 6.0 * np.random.random((4, 5)) + 1e-4), + ('test_regular_input', 6.0 * np.random.random( + (4, 5)) + 1e-4, 6.0 * np.random.random( + (4, 5)) + 1e-4, 6.0 * np.random.random( + (4, 5)) + 1e-4, 6.0 * np.random.random((4, 5)) + 1e-4), ]) class TestKLBetaBeta(unittest.TestCase): + def setUp(self): - self.p = paddle.distribution.Beta( - paddle.to_tensor(self.a1), paddle.to_tensor(self.b1)) - self.q = paddle.distribution.Beta( - paddle.to_tensor(self.a2), paddle.to_tensor(self.b2)) + self.p = paddle.distribution.Beta(paddle.to_tensor(self.a1), + paddle.to_tensor(self.b1)) + self.q = paddle.distribution.Beta(paddle.to_tensor(self.a2), + paddle.to_tensor(self.b2)) def test_kl_divergence(self): with paddle.fluid.dygraph.guard(self.place): @@ -58,10 +60,11 @@ class TestKLBetaBeta(unittest.TestCase): @param.place(config.DEVICES) @param.param_cls((param.TEST_CASE_NAME, 'conc1', 'conc2'), [ - ('test-regular-input', np.random.random((5, 7, 8, 10)), np.random.random( - (5, 7, 8, 10))), + ('test-regular-input', np.random.random( + (5, 7, 8, 10)), np.random.random((5, 7, 8, 10))), ]) class TestKLDirichletDirichlet(unittest.TestCase): + def setUp(self): self.p = paddle.distribution.Dirichlet(paddle.to_tensor(self.conc1)) self.q = paddle.distribution.Dirichlet(paddle.to_tensor(self.conc2)) @@ -79,10 +82,10 @@ class TestKLDirichletDirichlet(unittest.TestCase): scipy.special.gammaln(np.sum(conc1, -1)) - scipy.special.gammaln(np.sum(conc2, -1)) - np.sum( scipy.special.gammaln(conc1) - scipy.special.gammaln(conc2), -1) - + np.sum((conc1 - conc2) * - (scipy.special.digamma(conc1) - - scipy.special.digamma(np.sum(conc1, -1, keepdims=True))), - -1)) + + np.sum( + (conc1 - conc2) * + (scipy.special.digamma(conc1) - + scipy.special.digamma(np.sum(conc1, -1, keepdims=True))), -1)) class DummyDistribution(paddle.distribution.Distribution): @@ -93,25 +96,27 @@ class DummyDistribution(paddle.distribution.Distribution): @param.param_cls((param.TEST_CASE_NAME, 'p', 'q'), [('test-unregister', DummyDistribution(), DummyDistribution)]) class TestDispatch(unittest.TestCase): + def test_dispatch_with_unregister(self): with self.assertRaises(NotImplementedError): paddle.distribution.kl_divergence(self.p, self.q) @param.place(config.DEVICES) -@param.param_cls((param.TEST_CASE_NAME, 'p', 'q'), - [('test-diff-dist', - mock.Exponential(paddle.rand((100, 200, 100)) + 1.0), - mock.Exponential(paddle.rand((100, 200, 100)) + 2.0)), - ('test-same-dist', mock.Exponential(paddle.to_tensor(1.0)), - mock.Exponential(paddle.to_tensor(1.0)))]) +@param.param_cls( + (param.TEST_CASE_NAME, 'p', 'q'), + [('test-diff-dist', mock.Exponential(paddle.rand((100, 200, 100)) + 1.0), + mock.Exponential(paddle.rand((100, 200, 100)) + 2.0)), + ('test-same-dist', mock.Exponential( + paddle.to_tensor(1.0)), mock.Exponential(paddle.to_tensor(1.0)))]) class TestKLExpfamilyExpFamily(unittest.TestCase): + def test_kl_expfamily_expfamily(self): - np.testing.assert_allclose( - paddle.distribution.kl_divergence(self.p, self.q), - kl._kl_expfamily_expfamily(self.p, self.q), - rtol=config.RTOL.get(config.DEFAULT_DTYPE), - atol=config.ATOL.get(config.DEFAULT_DTYPE)) + np.testing.assert_allclose(paddle.distribution.kl_divergence( + self.p, self.q), + kl._kl_expfamily_expfamily(self.p, self.q), + rtol=config.RTOL.get(config.DEFAULT_DTYPE), + atol=config.ATOL.get(config.DEFAULT_DTYPE)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/distribution/test_kl_static.py b/python/paddle/fluid/tests/unittests/distribution/test_kl_static.py index b061650a53b..3bd62e1334b 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_kl_static.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_kl_static.py @@ -30,11 +30,13 @@ paddle.enable_static() @param.place(config.DEVICES) @param.param_cls((param.TEST_CASE_NAME, 'a1', 'b1', 'a2', 'b2'), [ - ('test_regular_input', 6.0 * np.random.random((4, 5)) + 1e-4, - 6.0 * np.random.random((4, 5)) + 1e-4, 6.0 * np.random.random( - (4, 5)) + 1e-4, 6.0 * np.random.random((4, 5)) + 1e-4), + ('test_regular_input', 6.0 * np.random.random( + (4, 5)) + 1e-4, 6.0 * np.random.random( + (4, 5)) + 1e-4, 6.0 * np.random.random( + (4, 5)) + 1e-4, 6.0 * np.random.random((4, 5)) + 1e-4), ]) class TestKLBetaBeta(unittest.TestCase): + def setUp(self): self.mp = paddle.static.Program() self.sp = paddle.static.Program() @@ -63,11 +65,11 @@ class TestKLBetaBeta(unittest.TestCase): feed=self.feeds, fetch_list=[out]) - np.testing.assert_allclose( - out, - self.scipy_kl_beta_beta(self.a1, self.b1, self.a2, self.b2), - rtol=config.RTOL.get(str(self.a1.dtype)), - atol=config.ATOL.get(str(self.a1.dtype))) + np.testing.assert_allclose(out, + self.scipy_kl_beta_beta( + self.a1, self.b1, self.a2, self.b2), + rtol=config.RTOL.get(str(self.a1.dtype)), + atol=config.ATOL.get(str(self.a1.dtype))) def scipy_kl_beta_beta(self, a1, b1, a2, b2): return (scipy.special.betaln(a2, b2) - scipy.special.betaln(a1, b1) + @@ -78,10 +80,11 @@ class TestKLBetaBeta(unittest.TestCase): @param.place(config.DEVICES) @param.param_cls((param.TEST_CASE_NAME, 'conc1', 'conc2'), [ - ('test-regular-input', np.random.random((5, 7, 8, 10)), np.random.random( - (5, 7, 8, 10))), + ('test-regular-input', np.random.random( + (5, 7, 8, 10)), np.random.random((5, 7, 8, 10))), ]) class TestKLDirichletDirichlet(unittest.TestCase): + def setUp(self): self.mp = paddle.static.Program() self.sp = paddle.static.Program() @@ -114,10 +117,10 @@ class TestKLDirichletDirichlet(unittest.TestCase): scipy.special.gammaln(np.sum(conc1, -1)) - scipy.special.gammaln(np.sum(conc2, -1)) - np.sum( scipy.special.gammaln(conc1) - scipy.special.gammaln(conc2), -1) - + np.sum((conc1 - conc2) * - (scipy.special.digamma(conc1) - - scipy.special.digamma(np.sum(conc1, -1, keepdims=True))), - -1)) + + np.sum( + (conc1 - conc2) * + (scipy.special.digamma(conc1) - + scipy.special.digamma(np.sum(conc1, -1, keepdims=True))), -1)) class DummyDistribution(paddle.distribution.Distribution): @@ -128,6 +131,7 @@ class DummyDistribution(paddle.distribution.Distribution): @param.param_cls((param.TEST_CASE_NAME, 'p', 'q'), [('test-dispatch-exception')]) class TestDispatch(unittest.TestCase): + def setUp(self): self.mp = paddle.static.Program() self.sp = paddle.static.Program() @@ -150,15 +154,18 @@ class TestDispatch(unittest.TestCase): np.random.rand(100, 200, 100) + 2.0), ('test-same-dist', np.array([1.0]), np.array([1.0]))]) class TestKLExpfamilyExpFamily(unittest.TestCase): + def setUp(self): self.mp = paddle.static.Program() self.sp = paddle.static.Program() self.executor = paddle.static.Executor(self.place) with paddle.static.program_guard(self.mp, self.sp): - rate1 = paddle.static.data( - 'rate1', shape=self.rate1.shape, dtype=self.rate1.dtype) - rate2 = paddle.static.data( - 'rate2', shape=self.rate2.shape, dtype=self.rate2.dtype) + rate1 = paddle.static.data('rate1', + shape=self.rate1.shape, + dtype=self.rate1.dtype) + rate2 = paddle.static.data('rate2', + shape=self.rate2.shape, + dtype=self.rate2.dtype) self.p = mock.Exponential(rate1) self.q = mock.Exponential(rate2) self.feeds = {'rate1': self.rate1, 'rate2': self.rate2} diff --git a/python/paddle/fluid/tests/unittests/dygraph_fleet_api.py b/python/paddle/fluid/tests/unittests/dygraph_fleet_api.py index de4457a58fb..a649b3a19eb 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_fleet_api.py +++ b/python/paddle/fluid/tests/unittests/dygraph_fleet_api.py @@ -31,6 +31,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv class TestDygraphFleetAPI(unittest.TestCase): + def setUp(self): paddle.seed(2022) random.seed(2022) @@ -48,8 +49,8 @@ class TestDygraphFleetAPI(unittest.TestCase): strategy.amp = True strategy.recompute = True fleet.init(is_collective=True, strategy=strategy) - net = paddle.nn.Sequential( - paddle.nn.Linear(10, 1), paddle.nn.Linear(1, 2)) + net = paddle.nn.Sequential(paddle.nn.Linear(10, 1), + paddle.nn.Linear(1, 2)) net = dist.fleet.distributed_model(net) data = np.random.uniform(-1, 1, [30, 10]).astype('float32') data = paddle.to_tensor(data) diff --git a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api.py b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api.py index a1a853f006c..34b485a8bd4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api.py +++ b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,6 +35,7 @@ batch_size = 100 class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -50,6 +51,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -80,18 +82,20 @@ def train_mlp(model, shard_level, use_pure_fp16, output_dir): model = paddle.amp.decorate(models=model, level='O2', save_dtype='float32') scaler = paddle.amp.GradScaler(init_loss_scaling=32768) - model, optimizer, scaler = group_sharded_parallel( - model=model, optimizer=optimizer, level=shard_level, scaler=scaler) + model, optimizer, scaler = group_sharded_parallel(model=model, + optimizer=optimizer, + level=shard_level, + scaler=scaler) - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -102,8 +106,8 @@ def train_mlp(model, shard_level, use_pure_fp16, output_dir): img.stop_gradient = True with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) if not use_pure_fp16: @@ -129,17 +133,20 @@ def test_sharding_api(): output_dir = tempfile.mkdtemp() # fp16 - stage2_params = train_mlp( - mlp1, shard_level="os_g", use_pure_fp16=True, output_dir=output_dir) - stage3_params = train_mlp( - mlp2, shard_level="p_g_os", use_pure_fp16=True, output_dir=output_dir) + stage2_params = train_mlp(mlp1, + shard_level="os_g", + use_pure_fp16=True, + output_dir=output_dir) + stage3_params = train_mlp(mlp2, + shard_level="p_g_os", + use_pure_fp16=True, + output_dir=output_dir) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage3_params[i].numpy(), - rtol=1e-4, - atol=1e-3) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage3_params[i].numpy(), + rtol=1e-4, + atol=1e-3) shutil.rmtree(output_dir) diff --git a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api_eager.py b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api_eager.py index 85a5446cb64..8f6dadb5ce9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api_eager.py +++ b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_api_eager.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,6 +35,7 @@ batch_size = 100 class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -50,6 +51,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -78,18 +80,20 @@ def train_mlp(model, shard_level, use_pure_fp16, output_dir): model = paddle.amp.decorate(models=model, level='O2', save_dtype='float32') scaler = paddle.amp.GradScaler(init_loss_scaling=32768) - model, optimizer, scaler = group_sharded_parallel( - model=model, optimizer=optimizer, level=shard_level, scaler=scaler) + model, optimizer, scaler = group_sharded_parallel(model=model, + optimizer=optimizer, + level=shard_level, + scaler=scaler) - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -100,8 +104,8 @@ def train_mlp(model, shard_level, use_pure_fp16, output_dir): img.stop_gradient = True with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) if not use_pure_fp16: @@ -128,17 +132,20 @@ def test_sharding_api(): output_dir = tempfile.mkdtemp() # fp16 - stage2_params = train_mlp( - mlp1, shard_level="os_g", use_pure_fp16=True, output_dir=output_dir) - stage3_params = train_mlp( - mlp2, shard_level="p_g_os", use_pure_fp16=True, output_dir=output_dir) + stage2_params = train_mlp(mlp1, + shard_level="os_g", + use_pure_fp16=True, + output_dir=output_dir) + stage3_params = train_mlp(mlp2, + shard_level="p_g_os", + use_pure_fp16=True, + output_dir=output_dir) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage3_params[i].numpy(), - rtol=1e-4, - atol=1e-3) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage3_params[i].numpy(), + rtol=1e-4, + atol=1e-3) shutil.rmtree(output_dir) diff --git a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2.py b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2.py index 8c07734d513..f4cc451c40a 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -40,6 +40,7 @@ paddle.seed(seed) class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -55,6 +56,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -66,14 +68,13 @@ def reader_decorator(linear_size=1000): def optimizer_setting(model, use_pure_fp16, opt_group=False): clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) - optimizer = paddle.optimizer.AdamW( - parameters=[{ - "params": model.parameters(), - }] if opt_group else model.parameters(), - learning_rate=0.001, - weight_decay=0.00001, - grad_clip=clip, - multi_precision=use_pure_fp16) + optimizer = paddle.optimizer.AdamW(parameters=[{ + "params": model.parameters(), + }] if opt_group else model.parameters(), + learning_rate=0.001, + weight_decay=0.00001, + grad_clip=clip, + multi_precision=use_pure_fp16) return optimizer @@ -89,8 +90,9 @@ def train_mlp(model, if sharding_stage != "dp": group = paddle.distributed.new_group([0, 1], backend="nccl") if opt_group: - optimizer = optimizer_setting( - model=model, use_pure_fp16=use_pure_fp16, opt_group=opt_group) + optimizer = optimizer_setting(model=model, + use_pure_fp16=use_pure_fp16, + opt_group=opt_group) else: optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) @@ -98,8 +100,10 @@ def train_mlp(model, optimizer = GroupShardedOptimizerStage2( params=optimizer._parameter_list, optim=optimizer, group=group) - model = GroupShardedStage2( - model, optimizer, group=group, buffer_max_size=2**21) + model = GroupShardedStage2(model, + optimizer, + group=group, + buffer_max_size=2**21) else: model = paddle.DataParallel(model) @@ -112,15 +116,15 @@ def train_mlp(model, "====== Find sharding_stage2_optimizer.minimize() error ======") return - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) if sharding_stage == 2: @@ -175,42 +179,50 @@ def test_dp_stage2(): mlp7.set_state_dict(state_dict) # DP VS stage2 - dp_params = train_mlp( - mlp1, sharding_stage="dp", use_pure_fp16=False, opt_group=False) - stage2_params = train_mlp( - mlp2, sharding_stage=2, use_pure_fp16=False, opt_group=False) + dp_params = train_mlp(mlp1, + sharding_stage="dp", + use_pure_fp16=False, + opt_group=False) + stage2_params = train_mlp(mlp2, + sharding_stage=2, + use_pure_fp16=False, + opt_group=False) for i in range(len(dp_params)): - np.testing.assert_allclose( - dp_params[i].numpy(), stage2_params[i].numpy(), rtol=1e-6) + np.testing.assert_allclose(dp_params[i].numpy(), + stage2_params[i].numpy(), + rtol=1e-6) # stage2 accumulate grad stage2_params = train_mlp(mlp3, sharding_stage=2, accumulate_grad=True) - stage2_accumulate_grad = train_mlp( - mlp4, sharding_stage=2, batch_size=20, accumulate_grad=True) + stage2_accumulate_grad = train_mlp(mlp4, + sharding_stage=2, + batch_size=20, + accumulate_grad=True) for i in range(len(stage2_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage2_accumulate_grad[i].numpy(), - rtol=1e-5, - atol=1e-5) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage2_accumulate_grad[i].numpy(), + rtol=1e-5, + atol=1e-5) # stage2 param list VS param group - stage2_params = train_mlp( - mlp5, sharding_stage=2, use_pure_fp16=False, opt_group=True) + stage2_params = train_mlp(mlp5, + sharding_stage=2, + use_pure_fp16=False, + opt_group=True) for i in range(len(dp_params)): - np.testing.assert_allclose( - dp_params[i].numpy(), stage2_params[i].numpy(), rtol=1e-6) + np.testing.assert_allclose(dp_params[i].numpy(), + stage2_params[i].numpy(), + rtol=1e-6) # save/load model output_dir = tempfile.mkdtemp() model_file = os.path.join(output_dir, "model.pdmodel") optimizer_file = os.path.join(output_dir, "model.pdopt") - model_stage2, optimizer_stage2 = train_mlp( - mlp6, - sharding_stage=2, - use_pure_fp16=False, - opt_group=False, - save_model=True) + model_stage2, optimizer_stage2 = train_mlp(mlp6, + sharding_stage=2, + use_pure_fp16=False, + opt_group=False, + save_model=True) paddle.save(model_stage2.state_dict(), model_file) paddle.save(optimizer_stage2.state_dict(), optimizer_file) m_state_dict = paddle.load(model_file) diff --git a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2_offload.py b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2_offload.py index b09314ae9e3..060b856505f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2_offload.py +++ b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage2_offload.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -47,19 +47,20 @@ def train_mlp(model, offload=False): scaler = paddle.amp.GradScaler(init_loss_scaling=1024) scaler = GroupShardedScaler(scaler) - optimizer = GroupShardedOptimizerStage2( - params=optimizer._parameter_list, optim=optimizer, offload=offload) + optimizer = GroupShardedOptimizerStage2(params=optimizer._parameter_list, + optim=optimizer, + offload=offload) model = GroupShardedStage2(model, optimizer, buffer_max_size=2**21) - train_reader = paddle.batch( - reader_decorator(linear_size), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(linear_size), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -72,8 +73,8 @@ def train_mlp(model, offload=False): with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) scaler.scale(avg_loss).backward() @@ -99,11 +100,10 @@ def test_sharding_stage2_offload(): mlp_offload_params = train_mlp(mlp_offload, offload=True) for i in range(len(mlp_params)): - np.testing.assert_allclose( - mlp_params[i].numpy(), - mlp_offload_params[i].numpy(), - rtol=5e-3, - atol=5e-3) + np.testing.assert_allclose(mlp_params[i].numpy(), + mlp_offload_params[i].numpy(), + rtol=5e-3, + atol=5e-3) return diff --git a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3.py b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3.py index 6c350e63f44..31b56cdfb8c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -42,6 +42,7 @@ l2_decay = 1e-4 class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -57,6 +58,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -91,28 +93,31 @@ def train_mlp(model, save_model=False): group = paddle.distributed.new_group([0, 1]) if opt_group: - optimizer = optimizer_setting( - model=model, use_pure_fp16=use_pure_fp16, opt_group=opt_group) + optimizer = optimizer_setting(model=model, + use_pure_fp16=use_pure_fp16, + opt_group=opt_group) else: optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) if use_pure_fp16: - model = paddle.amp.decorate( - models=model, level='O2', save_dtype='float32') + model = paddle.amp.decorate(models=model, + level='O2', + save_dtype='float32') scaler = paddle.amp.GradScaler(init_loss_scaling=32768) scaler = GroupShardedScaler(scaler) if sharding_stage == 2: optimizer = GroupShardedOptimizerStage2( params=optimizer._parameter_list, optim=optimizer, group=group) - model = GroupShardedStage2( - model, optimizer, group=group, buffer_max_size=2**21) + model = GroupShardedStage2(model, + optimizer, + group=group, + buffer_max_size=2**21) elif sharding_stage == 3: - model = GroupShardedStage3( - model, - optimizer=optimizer, - group=group, - sync_comm=sync_comm, - segment_size=2**15) + model = GroupShardedStage3(model, + optimizer=optimizer, + group=group, + sync_comm=sync_comm, + segment_size=2**15) # check optimizer.minimize() error if test_minimize: @@ -123,15 +128,15 @@ def train_mlp(model, "====== Find sharding_stage3_optimizer.minimize() error ======") return - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -142,8 +147,8 @@ def train_mlp(model, img.stop_gradient = True with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) if batch_size == 20: @@ -192,75 +197,79 @@ def test_stage2_stage3(): mlp9.set_state_dict(state_dict) mlp10.set_state_dict(state_dict) - # fp32 - stage2_params = train_mlp( - mlp1, sharding_stage=2, use_pure_fp16=False, opt_group=False) - stage3_params = train_mlp( - mlp2, sharding_stage=3, use_pure_fp16=False, opt_group=False) + # fp32 + stage2_params = train_mlp(mlp1, + sharding_stage=2, + use_pure_fp16=False, + opt_group=False) + stage3_params = train_mlp(mlp2, + sharding_stage=3, + use_pure_fp16=False, + opt_group=False) for i in range(len(stage2_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage3_params[i].numpy(), - rtol=1e-6, - atol=1e-6) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage3_params[i].numpy(), + rtol=1e-6, + atol=1e-6) # fp32 accumulate grad - stage3_params = train_mlp( - mlp3, - sharding_stage=3, - use_pure_fp16=False, - accumulate_grad=True, - opt_group=True) - stage3_params_add = train_mlp( - mlp4, - sharding_stage=3, - use_pure_fp16=False, - accumulate_grad=True, - batch_size=20, - opt_group=True) + stage3_params = train_mlp(mlp3, + sharding_stage=3, + use_pure_fp16=False, + accumulate_grad=True, + opt_group=True) + stage3_params_add = train_mlp(mlp4, + sharding_stage=3, + use_pure_fp16=False, + accumulate_grad=True, + batch_size=20, + opt_group=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_add[i].numpy(), - rtol=1e-6, - atol=1e-4) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_add[i].numpy(), + rtol=1e-6, + atol=1e-4) # fp16 - stage2_params = train_mlp( - mlp5, sharding_stage=2, use_pure_fp16=True, opt_group=False) - stage3_params = train_mlp( - mlp6, sharding_stage=3, use_pure_fp16=True, opt_group=False) + stage2_params = train_mlp(mlp5, + sharding_stage=2, + use_pure_fp16=True, + opt_group=False) + stage3_params = train_mlp(mlp6, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False) for i in range(len(stage2_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage3_params[i].numpy(), - rtol=1e-4, - atol=1e-3) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage3_params[i].numpy(), + rtol=1e-4, + atol=1e-3) # fp16 sync_comm - stage3_params = train_mlp( - mlp7, sharding_stage=3, use_pure_fp16=True, opt_group=False) - stage3_params_re = train_mlp( - mlp8, - sharding_stage=3, - use_pure_fp16=True, - opt_group=False, - sync_comm=True) + stage3_params = train_mlp(mlp7, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False) + stage3_params_re = train_mlp(mlp8, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False, + sync_comm=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), stage3_params_re[i].numpy(), rtol=1e-6) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_re[i].numpy(), + rtol=1e-6) # save/load model output_dir = tempfile.mkdtemp() model_file = os.path.join(output_dir, "model.pdmodel") optimizer_file = os.path.join(output_dir, "model.pdopt") - model_stage3, optimizer_stage3 = train_mlp( - mlp9, - sharding_stage=3, - use_pure_fp16=False, - opt_group=False, - save_model=True) + model_stage3, optimizer_stage3 = train_mlp(mlp9, + sharding_stage=3, + use_pure_fp16=False, + opt_group=False, + save_model=True) paddle.save(model_stage3.state_dict(), model_file) paddle.save(optimizer_stage3.state_dict(), optimizer_file) m_state_dict = paddle.load(model_file) @@ -270,12 +279,11 @@ def test_stage2_stage3(): shutil.rmtree(output_dir) # check optimizer.minimize() error - train_mlp( - mlp10, - sharding_stage=3, - use_pure_fp16=False, - opt_group=False, - test_minimize=True) + train_mlp(mlp10, + sharding_stage=3, + use_pure_fp16=False, + opt_group=False, + test_minimize=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3_offload.py b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3_offload.py index 5f9ec5c6e70..da84fb67ca9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3_offload.py +++ b/python/paddle/fluid/tests/unittests/dygraph_group_sharded_stage3_offload.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -37,6 +37,7 @@ l2_decay = 1e-4 class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -52,6 +53,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -63,14 +65,13 @@ def reader_decorator(linear_size=1000): def optimizer_setting(model, use_pure_fp16, opt_group=False): clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) - optimizer = paddle.optimizer.AdamW( - parameters=[{ - "params": model.parameters() - }] if opt_group else model.parameters(), - learning_rate=0.001, - weight_decay=0.00001, - grad_clip=clip, - multi_precision=use_pure_fp16) + optimizer = paddle.optimizer.AdamW(parameters=[{ + "params": model.parameters() + }] if opt_group else model.parameters(), + learning_rate=0.001, + weight_decay=0.00001, + grad_clip=clip, + multi_precision=use_pure_fp16) return optimizer @@ -85,27 +86,27 @@ def train_mlp(model, optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) if use_pure_fp16: - model = paddle.amp.decorate( - models=model, level='O2', save_dtype='float32') + model = paddle.amp.decorate(models=model, + level='O2', + save_dtype='float32') scaler = paddle.amp.GradScaler(init_loss_scaling=32768) scaler = GroupShardedScaler(scaler) - model = GroupShardedStage3( - model, - optimizer=optimizer, - group=group, - offload=offload, - segment_size=2**15) - - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) - - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + model = GroupShardedStage3(model, + optimizer=optimizer, + group=group, + offload=offload, + segment_size=2**15) + + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) + + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -116,8 +117,8 @@ def train_mlp(model, img.stop_gradient = True with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) if accumulate_grad: @@ -165,38 +166,36 @@ def test_stage3_offload(): stage3_params = train_mlp(mlp1, use_pure_fp16=False) stage3_params_offload = train_mlp(mlp2, use_pure_fp16=False, offload=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_offload[i].numpy(), - rtol=1e-6, - atol=1e-8) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_offload[i].numpy(), + rtol=1e-6, + atol=1e-8) # fp16 offload stage3_params = train_mlp(mlp3, use_pure_fp16=True) stage3_params_offload = train_mlp(mlp4, use_pure_fp16=True, offload=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_offload[i].numpy(), - rtol=1e-2, - atol=1e-2) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_offload[i].numpy(), + rtol=1e-2, + atol=1e-2) # fp32 accumulate grad offload - stage3_params = train_mlp( - mlp5, use_pure_fp16=False, batch_size=20, accumulate_grad=True) - stage3_params_offload = train_mlp( - mlp6, - use_pure_fp16=False, - accumulate_grad=True, - offload=True, - batch_size=20, - convert2cpu=True) + stage3_params = train_mlp(mlp5, + use_pure_fp16=False, + batch_size=20, + accumulate_grad=True) + stage3_params_offload = train_mlp(mlp6, + use_pure_fp16=False, + accumulate_grad=True, + offload=True, + batch_size=20, + convert2cpu=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_offload[i].numpy(), - rtol=1e-6, - atol=1e-8) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_offload[i].numpy(), + rtol=1e-6, + atol=1e-8) return diff --git a/python/paddle/fluid/tests/unittests/dygraph_sharding_optimizer_stage2.py b/python/paddle/fluid/tests/unittests/dygraph_sharding_optimizer_stage2.py index 0ed9b681fdc..22d001c7c46 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_sharding_optimizer_stage2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_sharding_optimizer_stage2.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -38,6 +38,7 @@ class_dim = 102 class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -51,6 +52,7 @@ class MLP(fluid.Layer): def reader_decorator(): + def __reader__(): for _ in range(100): img = np.random.rand(10).astype('float32') @@ -76,18 +78,18 @@ def train_mlp(): mlp = MLP() optimizer = optimizer_setting(parameter_list=mlp.parameters()) - oss_optimizer = ShardingOptimizerStage2( - params=mlp.parameters(), optim=optimizer, group=group) + oss_optimizer = ShardingOptimizerStage2(params=mlp.parameters(), + optim=optimizer, + group=group) # cover grad_storage code trainable_param2align = dict() for p in mlp.parameters(): trainable_param2align[p.name] = 0 - grad_storage = GradStorage( - 10000, - dtype=paddle.float32, - device="gpu", - destination=0, - parm2align=trainable_param2align) + grad_storage = GradStorage(10000, + dtype=paddle.float32, + device="gpu", + destination=0, + parm2align=trainable_param2align) for p in mlp.parameters(): grad_storage.can_add_grad_view(p, trainable_param2align[p.name]) grad_storage.add_grad(p, trainable_param2align[p.name]) @@ -95,15 +97,15 @@ def train_mlp(): grad_storage.rebuild() grad_storage.reset_checked_in() - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): diff --git a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py index 58432540d1b..756b1bfb607 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -48,6 +48,7 @@ paddle.seed(seed) class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -63,6 +64,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -74,14 +76,13 @@ def reader_decorator(linear_size=1000): def optimizer_setting(model, use_pure_fp16, opt_group=False): clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) - optimizer = paddle.optimizer.AdamW( - parameters=[{ - "params": model.parameters() - }] if opt_group else model.parameters(), - learning_rate=0.001, - weight_decay=0.00001, - grad_clip=clip, - multi_precision=use_pure_fp16) + optimizer = paddle.optimizer.AdamW(parameters=[{ + "params": model.parameters() + }] if opt_group else model.parameters(), + learning_rate=0.001, + weight_decay=0.00001, + grad_clip=clip, + multi_precision=use_pure_fp16) return optimizer @@ -99,30 +100,34 @@ def train_mlp(model, else: group = paddle.distributed.new_group([0, 1]) if opt_group: - optimizer = optimizer_setting( - model=model, use_pure_fp16=use_pure_fp16, opt_group=opt_group) + optimizer = optimizer_setting(model=model, + use_pure_fp16=use_pure_fp16, + opt_group=opt_group) else: optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) if sharding_stage == 2: - optimizer = ShardingOptimizerStage2( - params=model.parameters(), optim=optimizer, group=group) - - model = ShardingStage2( - model, optimizer, group=group, buffer_max_size=2**21) + optimizer = ShardingOptimizerStage2(params=model.parameters(), + optim=optimizer, + group=group) + + model = ShardingStage2(model, + optimizer, + group=group, + buffer_max_size=2**21) else: optimizer = fleet.distributed_optimizer(optimizer) model = fleet.distributed_model(model) - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) if sharding_stage == 2: @@ -174,42 +179,50 @@ def test_dp_stage2(): mlp6.set_state_dict(state_dict) # DP VS stage2 - dp_params = train_mlp( - mlp1, sharding_stage="dp", use_pure_fp16=False, opt_group=False) - stage2_params = train_mlp( - mlp2, sharding_stage=2, use_pure_fp16=False, opt_group=False) + dp_params = train_mlp(mlp1, + sharding_stage="dp", + use_pure_fp16=False, + opt_group=False) + stage2_params = train_mlp(mlp2, + sharding_stage=2, + use_pure_fp16=False, + opt_group=False) for i in range(len(dp_params)): - np.testing.assert_allclose( - dp_params[i].numpy(), stage2_params[i].numpy(), rtol=1e-6) + np.testing.assert_allclose(dp_params[i].numpy(), + stage2_params[i].numpy(), + rtol=1e-6) # stage2 accumulate grad stage2_params = train_mlp(mlp3, sharding_stage=2, accumulate_grad=True) - stage2_accumulate_grad = train_mlp( - mlp4, sharding_stage=2, batch_size=20, accumulate_grad=True) + stage2_accumulate_grad = train_mlp(mlp4, + sharding_stage=2, + batch_size=20, + accumulate_grad=True) for i in range(len(stage2_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage2_accumulate_grad[i].numpy(), - rtol=1e-5, - atol=1e-5) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage2_accumulate_grad[i].numpy(), + rtol=1e-5, + atol=1e-5) # stage2 param list VS param group - stage2_params = train_mlp( - mlp5, sharding_stage=2, use_pure_fp16=False, opt_group=True) + stage2_params = train_mlp(mlp5, + sharding_stage=2, + use_pure_fp16=False, + opt_group=True) for i in range(len(dp_params)): - np.testing.assert_allclose( - dp_params[i].numpy(), stage2_params[i].numpy(), rtol=1e-6) + np.testing.assert_allclose(dp_params[i].numpy(), + stage2_params[i].numpy(), + rtol=1e-6) # save/load model output_dir = tempfile.mkdtemp() model_file = os.path.join(output_dir, "model.pdmodel") optimizer_file = os.path.join(output_dir, "model.pdopt") - model_stage2, optimizer_stage2 = train_mlp( - mlp6, - sharding_stage=2, - use_pure_fp16=False, - opt_group=False, - save_model=True) + model_stage2, optimizer_stage2 = train_mlp(mlp6, + sharding_stage=2, + use_pure_fp16=False, + opt_group=False, + save_model=True) paddle.save(model_stage2.state_dict(), model_file) paddle.save(optimizer_stage2.state_dict(), optimizer_file) m_state_dict = paddle.load(model_file) diff --git a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2_offload.py b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2_offload.py index cd2d7b3f127..1acdce548d5 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2_offload.py +++ b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage2_offload.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -55,19 +55,20 @@ def train_mlp(model, offload=False): scaler = paddle.amp.GradScaler(init_loss_scaling=1024) scaler = ShardingScaler(scaler) - optimizer = ShardingOptimizerStage2( - params=model.parameters(), optim=optimizer, offload=offload) + optimizer = ShardingOptimizerStage2(params=model.parameters(), + optim=optimizer, + offload=offload) model = ShardingStage2(model, optimizer, buffer_max_size=2**21) - train_reader = paddle.batch( - reader_decorator(linear_size), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(linear_size), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -80,8 +81,8 @@ def train_mlp(model, offload=False): with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) scaler.scale(avg_loss).backward() @@ -106,11 +107,10 @@ def test_sharding_stage2_offload(): mlp_offload_params = train_mlp(mlp_offload, offload=True) for i in range(len(mlp_params)): - np.testing.assert_allclose( - mlp_params[i].numpy(), - mlp_offload_params[i].numpy(), - rtol=5e-3, - atol=5e-3) + np.testing.assert_allclose(mlp_params[i].numpy(), + mlp_offload_params[i].numpy(), + rtol=5e-3, + atol=5e-3) return diff --git a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py index fc4002ef405..c48e7a36424 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -42,6 +42,7 @@ l2_decay = 1e-4 class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -57,6 +58,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -91,24 +93,31 @@ def train_mlp(model, save_model=False): group = paddle.distributed.new_group([0, 1]) if opt_group: - optimizer = optimizer_setting( - model=model, use_pure_fp16=use_pure_fp16, opt_group=opt_group) + optimizer = optimizer_setting(model=model, + use_pure_fp16=use_pure_fp16, + opt_group=opt_group) else: optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) if use_pure_fp16: - model = paddle.amp.decorate( - models=model, level='O2', save_dtype='float32') + model = paddle.amp.decorate(models=model, + level='O2', + save_dtype='float32') scaler = paddle.amp.GradScaler(init_loss_scaling=32768) scaler = ShardingScaler(scaler) if sharding_stage == 2: - optimizer = ShardingOptimizerStage2( - params=model.parameters(), optim=optimizer, group=group) - model = ShardingStage2( - model, optimizer, group=group, buffer_max_size=2**21) + optimizer = ShardingOptimizerStage2(params=model.parameters(), + optim=optimizer, + group=group) + model = ShardingStage2(model, + optimizer, + group=group, + buffer_max_size=2**21) elif sharding_stage == 3: - model = ShardingStage3( - model, optimizer=optimizer, group=group, sync_comm=sync_comm) + model = ShardingStage3(model, + optimizer=optimizer, + group=group, + sync_comm=sync_comm) # check optimizer.minimize() error if test_minimize: @@ -119,15 +128,15 @@ def train_mlp(model, "====== Find sharding_stage3_optimizer.minimize() error ======") return - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -138,8 +147,8 @@ def train_mlp(model, img.stop_gradient = True with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) if batch_size == 20: @@ -187,75 +196,79 @@ def test_stage2_stage3(): mlp9.set_state_dict(state_dict) mlp10.set_state_dict(state_dict) - # fp32 - stage2_params = train_mlp( - mlp1, sharding_stage=2, use_pure_fp16=False, opt_group=False) - stage3_params = train_mlp( - mlp2, sharding_stage=3, use_pure_fp16=False, opt_group=False) + # fp32 + stage2_params = train_mlp(mlp1, + sharding_stage=2, + use_pure_fp16=False, + opt_group=False) + stage3_params = train_mlp(mlp2, + sharding_stage=3, + use_pure_fp16=False, + opt_group=False) for i in range(len(stage2_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage3_params[i].numpy(), - rtol=1e-6, - atol=1e-6) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage3_params[i].numpy(), + rtol=1e-6, + atol=1e-6) # fp32 accumulate grad - stage3_params = train_mlp( - mlp3, - sharding_stage=3, - use_pure_fp16=False, - accumulate_grad=True, - opt_group=True) - stage3_params_add = train_mlp( - mlp4, - sharding_stage=3, - use_pure_fp16=False, - accumulate_grad=True, - batch_size=20, - opt_group=True) + stage3_params = train_mlp(mlp3, + sharding_stage=3, + use_pure_fp16=False, + accumulate_grad=True, + opt_group=True) + stage3_params_add = train_mlp(mlp4, + sharding_stage=3, + use_pure_fp16=False, + accumulate_grad=True, + batch_size=20, + opt_group=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_add[i].numpy(), - rtol=1e-6, - atol=1e-4) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_add[i].numpy(), + rtol=1e-6, + atol=1e-4) # fp16 - stage2_params = train_mlp( - mlp5, sharding_stage=2, use_pure_fp16=True, opt_group=False) - stage3_params = train_mlp( - mlp6, sharding_stage=3, use_pure_fp16=True, opt_group=False) + stage2_params = train_mlp(mlp5, + sharding_stage=2, + use_pure_fp16=True, + opt_group=False) + stage3_params = train_mlp(mlp6, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False) for i in range(len(stage2_params)): - np.testing.assert_allclose( - stage2_params[i].numpy(), - stage3_params[i].numpy(), - rtol=1e-4, - atol=1e-3) + np.testing.assert_allclose(stage2_params[i].numpy(), + stage3_params[i].numpy(), + rtol=1e-4, + atol=1e-3) # fp16 sync_comm - stage3_params = train_mlp( - mlp7, sharding_stage=3, use_pure_fp16=True, opt_group=False) - stage3_params_re = train_mlp( - mlp8, - sharding_stage=3, - use_pure_fp16=True, - opt_group=False, - sync_comm=True) + stage3_params = train_mlp(mlp7, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False) + stage3_params_re = train_mlp(mlp8, + sharding_stage=3, + use_pure_fp16=True, + opt_group=False, + sync_comm=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), stage3_params_re[i].numpy(), rtol=1e-6) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_re[i].numpy(), + rtol=1e-6) # save/load model output_dir = tempfile.mkdtemp() model_file = os.path.join(output_dir, "model.pdmodel") optimizer_file = os.path.join(output_dir, "model.pdopt") - model_stage3, optimizer_stage3 = train_mlp( - mlp9, - sharding_stage=3, - use_pure_fp16=False, - opt_group=False, - save_model=True) + model_stage3, optimizer_stage3 = train_mlp(mlp9, + sharding_stage=3, + use_pure_fp16=False, + opt_group=False, + save_model=True) paddle.save(model_stage3.state_dict(), model_file) paddle.save(optimizer_stage3.state_dict(), optimizer_file) m_state_dict = paddle.load(model_file) @@ -265,12 +278,11 @@ def test_stage2_stage3(): shutil.rmtree(output_dir) # check optimizer.minimize() error - train_mlp( - mlp10, - sharding_stage=3, - use_pure_fp16=False, - opt_group=False, - test_minimize=True) + train_mlp(mlp10, + sharding_stage=3, + use_pure_fp16=False, + opt_group=False, + test_minimize=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3_offload.py b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3_offload.py index 763a7a8b97f..19c0c91c20d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3_offload.py +++ b/python/paddle/fluid/tests/unittests/dygraph_sharding_stage3_offload.py @@ -1,13 +1,13 @@ # -*- coding: UTF-8 -*- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -37,6 +37,7 @@ l2_decay = 1e-4 class MLP(fluid.Layer): + def __init__(self, linear_size=1000, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -52,6 +53,7 @@ class MLP(fluid.Layer): def reader_decorator(linear_size=1000): + def __reader__(): for _ in range(100): img = np.random.rand(linear_size).astype('float32') @@ -63,14 +65,13 @@ def reader_decorator(linear_size=1000): def optimizer_setting(model, use_pure_fp16, opt_group=False): clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) - optimizer = paddle.optimizer.AdamW( - parameters=[{ - "params": model.parameters() - }] if opt_group else model.parameters(), - learning_rate=0.001, - weight_decay=0.00001, - grad_clip=clip, - multi_precision=use_pure_fp16) + optimizer = paddle.optimizer.AdamW(parameters=[{ + "params": model.parameters() + }] if opt_group else model.parameters(), + learning_rate=0.001, + weight_decay=0.00001, + grad_clip=clip, + multi_precision=use_pure_fp16) return optimizer @@ -85,23 +86,26 @@ def train_mlp(model, optimizer = optimizer_setting(model=model, use_pure_fp16=use_pure_fp16) if use_pure_fp16: - model = paddle.amp.decorate( - models=model, level='O2', save_dtype='float32') + model = paddle.amp.decorate(models=model, + level='O2', + save_dtype='float32') scaler = paddle.amp.GradScaler(init_loss_scaling=32768) scaler = ShardingScaler(scaler) - model = ShardingStage3( - model, optimizer=optimizer, group=group, offload=offload) + model = ShardingStage3(model, + optimizer=optimizer, + group=group, + offload=offload) - train_reader = paddle.batch( - reader_decorator(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(reader_decorator(), + batch_size=batch_size, + drop_last=True) - train_loader = paddle.io.DataLoader.from_generator( - capacity=32, - use_double_buffer=True, - iterable=True, - return_list=True, - use_multiprocess=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=32, + use_double_buffer=True, + iterable=True, + return_list=True, + use_multiprocess=True) train_loader.set_sample_list_generator(train_reader) for eop in range(epoch): @@ -112,8 +116,8 @@ def train_mlp(model, img.stop_gradient = True with paddle.amp.auto_cast(True, level='O2'): out = model(img) - loss = paddle.nn.functional.cross_entropy( - input=out, label=label) + loss = paddle.nn.functional.cross_entropy(input=out, + label=label) avg_loss = paddle.mean(x=loss.cast(dtype=paddle.float32)) if accumulate_grad: @@ -160,38 +164,36 @@ def test_stage3_offload(): stage3_params = train_mlp(mlp1, use_pure_fp16=False) stage3_params_offload = train_mlp(mlp2, use_pure_fp16=False, offload=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_offload[i].numpy(), - rtol=1e-6, - atol=1e-8) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_offload[i].numpy(), + rtol=1e-6, + atol=1e-8) # fp16 offload stage3_params = train_mlp(mlp3, use_pure_fp16=True) stage3_params_offload = train_mlp(mlp4, use_pure_fp16=True, offload=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_offload[i].numpy(), - rtol=1e-2, - atol=1e-2) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_offload[i].numpy(), + rtol=1e-2, + atol=1e-2) # fp32 accumulate grad offload - stage3_params = train_mlp( - mlp5, use_pure_fp16=False, batch_size=20, accumulate_grad=True) - stage3_params_offload = train_mlp( - mlp6, - use_pure_fp16=False, - accumulate_grad=True, - offload=True, - batch_size=20, - convert2cpu=True) + stage3_params = train_mlp(mlp5, + use_pure_fp16=False, + batch_size=20, + accumulate_grad=True) + stage3_params_offload = train_mlp(mlp6, + use_pure_fp16=False, + accumulate_grad=True, + offload=True, + batch_size=20, + convert2cpu=True) for i in range(len(stage3_params)): - np.testing.assert_allclose( - stage3_params[i].numpy(), - stage3_params_offload[i].numpy(), - rtol=1e-6, - atol=1e-8) + np.testing.assert_allclose(stage3_params[i].numpy(), + stage3_params_offload[i].numpy(), + rtol=1e-6, + atol=1e-8) return diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index b302dd37794..7ee6203fb94 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -22,6 +22,7 @@ from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer class PositionwiseFeedForwardLayer(Layer): + def __init__(self, hidden_act, d_inner_hid, @@ -31,33 +32,35 @@ class PositionwiseFeedForwardLayer(Layer): name=""): super(PositionwiseFeedForwardLayer, self).__init__() - self._i2h = Linear( - input_dim=d_model, - output_dim=d_inner_hid, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0', - act=hidden_act) - - self._h2o = Linear( - input_dim=d_inner_hid, - output_dim=d_model, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') + self._i2h = Linear(input_dim=d_model, + output_dim=d_inner_hid, + param_attr=fluid.ParamAttr( + name=name + '_fc_0.w_0', + initializer=param_initializer), + bias_attr=name + '_fc_0.b_0', + act=hidden_act) + + self._h2o = Linear(input_dim=d_inner_hid, + output_dim=d_model, + param_attr=fluid.ParamAttr( + name=name + '_fc_1.w_0', + initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') self._dropout_rate = dropout_rate def forward(self, x): hidden = self._i2h(x) if self._dropout_rate: - hidden = fluid.layers.dropout( - hidden, dropout_prob=self._dropout_rate, is_test=False) + hidden = fluid.layers.dropout(hidden, + dropout_prob=self._dropout_rate, + is_test=False) out = self._h2o(hidden) return out class EncoderSubLayer(Layer): + def __init__(self, hidden_act, n_head, @@ -78,8 +81,9 @@ class EncoderSubLayer(Layer): self._preprocess_cmd = preprocess_cmd self._postprocess_cmd = postprocess_cmd self._prepostprocess_dropout = prepostprocess_dropout - self._preprocess_layer = PrePostProcessLayer( - self._preprocess_cmd, d_model, prepostprocess_dropout) + self._preprocess_layer = PrePostProcessLayer(self._preprocess_cmd, + d_model, + prepostprocess_dropout) self._multihead_attention_layer = MultiHeadAttention( d_key, d_value, d_model, n_head, attention_dropout, param_initializer) @@ -108,6 +112,7 @@ class EncoderSubLayer(Layer): class EncoderLayer(Layer): + def __init__(self, hidden_act, n_layer, @@ -137,20 +142,19 @@ class EncoderLayer(Layer): self._encoder_sublayers.append( self.add_sublayer( 'esl_%d' % i, - EncoderSubLayer( - hidden_act, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - param_initializer, - name=name + '_layer_' + str(i)))) + EncoderSubLayer(hidden_act, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + param_initializer, + name=name + '_layer_' + str(i)))) def forward(self, enc_input, attn_bias): for i in range(self._n_layer): @@ -161,6 +165,7 @@ class EncoderLayer(Layer): class BertModelLayer(Layer): + def __init__(self, config, return_pooled_out=True, use_fp16=False): super(BertModelLayer, self).__init__() @@ -183,31 +188,31 @@ class BertModelLayer(Layer): self._param_initializer = fluid.initializer.TruncatedNormal( scale=config['initializer_range']) - self._src_emb = Embedding( - size=[self._voc_size, self._emb_size], - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - dtype=self._dtype) + self._src_emb = Embedding(size=[self._voc_size, self._emb_size], + param_attr=fluid.ParamAttr( + name=self._word_emb_name, + initializer=self._param_initializer), + dtype=self._dtype) self._pos_emb = Embedding( size=[self._max_position_seq_len, self._emb_size], - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer), + param_attr=fluid.ParamAttr(name=self._pos_emb_name, + initializer=self._param_initializer), dtype=self._dtype) - self._sent_emb = Embedding( - size=[self._sent_types, self._emb_size], - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer), - dtype=self._dtype) + self._sent_emb = Embedding(size=[self._sent_types, self._emb_size], + param_attr=fluid.ParamAttr( + name=self._sent_emb_name, + initializer=self._param_initializer), + dtype=self._dtype) - self.pooled_fc = Linear( - input_dim=self._emb_size, - output_dim=self._emb_size, - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0", - act="tanh") + self.pooled_fc = Linear(input_dim=self._emb_size, + output_dim=self._emb_size, + param_attr=fluid.ParamAttr( + name="pooled_fc.w_0", + initializer=self._param_initializer), + bias_attr="pooled_fc.b_0", + act="tanh") self.pre_process_layer = PrePostProcessLayer( "nd", self._emb_size, self._prepostprocess_dropout) @@ -237,12 +242,16 @@ class BertModelLayer(Layer): emb_out = self.pre_process_layer(emb_out) - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) + self_attn_mask = fluid.layers.matmul(x=input_mask, + y=input_mask, + transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, + scale=10000.0, + bias=-1.0, + bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * + self._n_head, + axis=1) n_head_self_attn_mask.stop_gradient = True enc_output = self._encoder(emb_out, n_head_self_attn_mask) @@ -252,16 +261,19 @@ class BertModelLayer(Layer): # #if not self.return_pooled_out: # return enc_output - next_sent_feat = fluid.layers.slice( - input=enc_output, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.slice(input=enc_output, + axes=[1], + starts=[0], + ends=[1]) next_sent_feat = self.pooled_fc(next_sent_feat) - next_sent_feat = fluid.layers.reshape( - next_sent_feat, shape=[-1, self._emb_size]) + next_sent_feat = fluid.layers.reshape(next_sent_feat, + shape=[-1, self._emb_size]) return enc_output, next_sent_feat class PretrainModelLayer(Layer): + def __init__(self, config, return_pooled_out=True, @@ -281,33 +293,32 @@ class PretrainModelLayer(Layer): self.use_fp16 = use_fp16 self._dtype = "float16" if use_fp16 else "float32" - self.bert_layer = BertModelLayer( - config=self.config, return_pooled_out=True, use_fp16=self.use_fp16) + self.bert_layer = BertModelLayer(config=self.config, + return_pooled_out=True, + use_fp16=self.use_fp16) self.pre_process_layer = PrePostProcessLayer( "n", self._emb_size, self._prepostprocess_dropout) - self.pooled_fc = Linear( - input_dim=self._emb_size, - output_dim=self._emb_size, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", - initializer=self._param_initializer), - bias_attr="mask_lm_trans_fc.b_0", - act="tanh") + self.pooled_fc = Linear(input_dim=self._emb_size, + output_dim=self._emb_size, + param_attr=fluid.ParamAttr( + name="mask_lm_trans_fc.w_0", + initializer=self._param_initializer), + bias_attr="mask_lm_trans_fc.b_0", + act="tanh") self.mask_lm_out_bias_attr = fluid.ParamAttr( name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) if not self._weight_sharing: - self.out_fc = Linear( - input_dim=self._emb_size, - output_dim=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=self.mask_lm_out_bias_attr) + self.out_fc = Linear(input_dim=self._emb_size, + output_dim=self._voc_size, + param_attr=fluid.ParamAttr( + name="mask_lm_out_fc.w_0", + initializer=self._param_initializer), + bias_attr=self.mask_lm_out_bias_attr) else: self.fc_create_params = self.create_parameter( shape=[self._voc_size], @@ -315,12 +326,12 @@ class PretrainModelLayer(Layer): attr=self.mask_lm_out_bias_attr, is_bias=True) - self.next_sent_fc = Linear( - input_dim=self._emb_size, - output_dim=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") + self.next_sent_fc = Linear(input_dim=self._emb_size, + output_dim=2, + param_attr=fluid.ParamAttr( + name="next_sent_fc.w_0", + initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") @declarative def forward(self, src_ids, position_ids, sentence_ids, input_mask, @@ -329,24 +340,23 @@ class PretrainModelLayer(Layer): enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids, sentence_ids, input_mask) - reshaped_emb_out = fluid.layers.reshape( - x=enc_output, shape=[-1, self._emb_size]) + reshaped_emb_out = fluid.layers.reshape(x=enc_output, + shape=[-1, self._emb_size]) mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) mask_trans_feat = self.pooled_fc(mask_feat) mask_trans_feat = self.pre_process_layer(mask_trans_feat) if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=self.bert_layer._src_emb._w, - transpose_y=True) + fc_out = fluid.layers.matmul(x=mask_trans_feat, + y=self.bert_layer._src_emb._w, + transpose_y=True) fc_out += self.fc_create_params else: fc_out = self.out_fc(mask_trans_feat) - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, + label=mask_label) mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) next_sent_fc_out = self.next_sent_fc(next_sent_feat) @@ -354,8 +364,8 @@ class PretrainModelLayer(Layer): next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( logits=next_sent_fc_out, label=labels, return_softmax=True) - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, + label=labels) mean_next_sent_loss = fluid.layers.mean(next_sent_loss) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_utils.py index a18bb34e182..53996775a61 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_utils.py @@ -15,6 +15,7 @@ from __future__ import absolute_import, division, print_function import numpy as np import random + SEED = 2020 @@ -88,9 +89,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3): # ensure at least mask one word in a sentence while not mask_flag: - token_index = int( - self_random.randint( - 1, high=len(sent) - 1, size=1)) + token_index = int(self_random.randint(1, high=len(sent) - 1, + size=1)) if sent[token_index] != SEP and sent[token_index] != CLS: mask_label.append(sent[token_index]) sent[token_index] = MASK @@ -132,8 +132,8 @@ def pad_batch_data(insts, if return_input_mask: # This is used to avoid attention on paddings. - input_mask_data = np.array([[1] * len(inst) + [0] * - (max_len - len(inst)) for inst in insts]) + input_mask_data = np.array( + [[1] * len(inst) + [0] * (max_len - len(inst)) for inst in insts]) input_mask_data = np.expand_dims(input_mask_data, axis=-1) return_list += [input_mask_data.astype("float32")] @@ -177,28 +177,26 @@ def prepare_batch_data(insts, # First step: do mask without padding if mask_id >= 0: - out, mask_label, mask_pos = mask( - batch_src_ids, - total_token_num, - vocab_size=voc_size, - CLS=cls_id, - SEP=sep_id, - MASK=mask_id) + out, mask_label, mask_pos = mask(batch_src_ids, + total_token_num, + vocab_size=voc_size, + CLS=cls_id, + SEP=sep_id, + MASK=mask_id) else: out = batch_src_ids # Second step: padding - src_id, self_input_mask = pad_batch_data( - out, pad_idx=pad_id, return_input_mask=True) - pos_id = pad_batch_data( - batch_pos_ids, - pad_idx=pad_id, - return_pos=False, - return_input_mask=False) - sent_id = pad_batch_data( - batch_sent_ids, - pad_idx=pad_id, - return_pos=False, - return_input_mask=False) + src_id, self_input_mask = pad_batch_data(out, + pad_idx=pad_id, + return_input_mask=True) + pos_id = pad_batch_data(batch_pos_ids, + pad_idx=pad_id, + return_pos=False, + return_input_mask=False) + sent_id = pad_batch_data(batch_sent_ids, + pad_idx=pad_id, + return_pos=False, + return_input_mask=False) if mask_id >= 0: return_list = [ @@ -212,6 +210,7 @@ def prepare_batch_data(insts, class DataReader(object): + def __init__(self, batch_size=4096, in_tokens=True, @@ -268,7 +267,9 @@ class DataReader(object): yield token_ids, sent_ids, pos_ids, label def data_generator(self): + def wrapper(): + def reader(): for epoch in range(self.epoch): self.current_epoch = epoch + 1 @@ -292,25 +293,25 @@ class DataReader(object): total_token_num += len(token_ids) else: yield batch, total_token_num - batch, total_token_num, max_len = [parsed_line], len( - token_ids), len(token_ids) + batch, total_token_num, max_len = [ + parsed_line + ], len(token_ids), len(token_ids) if len(batch) > 0: yield batch, total_token_num for batch_data, total_token_num in batch_reader( reader, self.batch_size, self.in_tokens): - yield prepare_batch_data( - batch_data, - total_token_num, - voc_size=self.voc_size, - pad_id=self.pad_id, - cls_id=self.cls_id, - sep_id=self.sep_id, - mask_id=self.mask_id, - return_input_mask=True, - return_max_len=False, - return_num_token=False) + yield prepare_batch_data(batch_data, + total_token_num, + voc_size=self.voc_size, + pad_id=self.pad_id, + cls_id=self.cls_id, + sep_id=self.sep_id, + mask_id=self.mask_id, + return_input_mask=True, + return_max_len=False, + return_num_token=False) return wrapper @@ -325,12 +326,11 @@ class ModelHyperParams(object): def get_feed_data_reader(bert_config): args = ModelHyperParams() - data_reader = DataReader( - batch_size=args.batch_size, - in_tokens=args.in_tokens, - voc_size=bert_config['vocab_size'], - epoch=args.epoch, - max_seq_len=args.max_seq_len, - generate_neg_sample=args.generate_neg_sample) + data_reader = DataReader(batch_size=args.batch_size, + in_tokens=args.in_tokens, + voc_size=bert_config['vocab_size'], + epoch=args.epoch, + max_seq_len=args.max_seq_len, + generate_neg_sample=args.generate_neg_sample) return data_reader diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index cdf478f8710..58482bb9771 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -21,6 +21,7 @@ from paddle.fluid.dygraph.base import to_variable class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, ch_in, ch_out, @@ -32,26 +33,23 @@ class ConvBNLayer(fluid.dygraph.Layer): is_test=True): super(ConvBNLayer, self).__init__() - self.conv = Conv2D( - num_channels=ch_in, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=groups, - param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0., 0.02)), - bias_attr=False, - act=None) + self.conv = Conv2D(num_channels=ch_in, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02)), + bias_attr=False, + act=None) self.batch_norm = BatchNorm( num_channels=ch_out, is_test=is_test, - param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0., 0.02), - regularizer=L2Decay(0.)), - bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), - regularizer=L2Decay(0.))) + param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), + regularizer=L2Decay(0.)), + bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), + regularizer=L2Decay(0.))) self.act = act @@ -64,6 +62,7 @@ class ConvBNLayer(fluid.dygraph.Layer): class DownSample(fluid.dygraph.Layer): + def __init__(self, ch_in, ch_out, @@ -74,13 +73,12 @@ class DownSample(fluid.dygraph.Layer): super(DownSample, self).__init__() - self.conv_bn_layer = ConvBNLayer( - ch_in=ch_in, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - is_test=is_test) + self.conv_bn_layer = ConvBNLayer(ch_in=ch_in, + ch_out=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + is_test=is_test) self.ch_out = ch_out def forward(self, inputs): @@ -89,23 +87,22 @@ class DownSample(fluid.dygraph.Layer): class BasicBlock(fluid.dygraph.Layer): + def __init__(self, ch_in, ch_out, is_test=True): super(BasicBlock, self).__init__() - self.conv1 = ConvBNLayer( - ch_in=ch_in, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - is_test=is_test) - self.conv2 = ConvBNLayer( - ch_in=ch_out, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test) + self.conv1 = ConvBNLayer(ch_in=ch_in, + ch_out=ch_out, + filter_size=1, + stride=1, + padding=0, + is_test=is_test) + self.conv2 = ConvBNLayer(ch_in=ch_out, + ch_out=ch_out * 2, + filter_size=3, + stride=1, + padding=1, + is_test=is_test) def forward(self, inputs): conv1 = self.conv1(inputs) @@ -115,6 +112,7 @@ class BasicBlock(fluid.dygraph.Layer): class LayerWarp(fluid.dygraph.Layer): + def __init__(self, ch_in, ch_out, count, is_test=True): super(LayerWarp, self).__init__() @@ -123,8 +121,7 @@ class LayerWarp(fluid.dygraph.Layer): for i in range(1, count): res_out = self.add_sublayer( "basic_block_%d" % (i), - BasicBlock( - ch_out * 2, ch_out, is_test=is_test)) + BasicBlock(ch_out * 2, ch_out, is_test=is_test)) self.res_out_list.append(res_out) self.ch_out = ch_out @@ -139,18 +136,18 @@ DarkNet_cfg = {53: ([1, 2, 8, 8, 4])} class DarkNet53_conv_body(fluid.dygraph.Layer): + def __init__(self, ch_in=3, is_test=True): super(DarkNet53_conv_body, self).__init__() self.stages = DarkNet_cfg[53] self.stages = self.stages[0:5] - self.conv0 = ConvBNLayer( - ch_in=ch_in, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - is_test=is_test) + self.conv0 = ConvBNLayer(ch_in=ch_in, + ch_out=32, + filter_size=3, + stride=1, + padding=1, + is_test=is_test) self.downsample0 = DownSample(ch_in=32, ch_out=32 * 2, is_test=is_test) self.darknet53_conv_block_list = [] @@ -159,16 +156,14 @@ class DarkNet53_conv_body(fluid.dygraph.Layer): for i, stage in enumerate(self.stages): conv_block = self.add_sublayer( "stage_%d" % (i), - LayerWarp( - int(ch_in[i]), 32 * (2**i), stage, is_test=is_test)) + LayerWarp(int(ch_in[i]), 32 * (2**i), stage, is_test=is_test)) self.darknet53_conv_block_list.append(conv_block) for i in range(len(self.stages) - 1): downsample = self.add_sublayer( "stage_%d_downsample" % i, - DownSample( - ch_in=32 * (2**(i + 1)), - ch_out=32 * (2**(i + 2)), - is_test=is_test)) + DownSample(ch_in=32 * (2**(i + 1)), + ch_out=32 * (2**(i + 2)), + is_test=is_test)) self.downsample_list.append(downsample) def forward(self, inputs): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py index ecb7d7f6bd1..0c7d2903c36 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py @@ -103,8 +103,7 @@ def dyfunc_with_if_else3(x): def dyfunc_with_if_else_with_list_geneator(x): if 10 > 5: y = paddle.add_n( - [paddle.full( - shape=[2], fill_value=v) for v in range(5)]) + [paddle.full(shape=[2], fill_value=v) for v in range(5)]) else: y = x return y @@ -131,8 +130,9 @@ def nested_if_else(x_v): if fluid.layers.mean(y).numpy()[0] < batch_size: y = fluid.layers.abs(y) else: - tmp = fluid.layers.fill_constant( - [feat_size], dtype='float32', value=-1) + tmp = fluid.layers.fill_constant([feat_size], + dtype='float32', + value=-1) y = y - tmp else: y = x_v - bias @@ -148,13 +148,15 @@ def nested_if_else_2(x): x_shape_0 = x.shape[0] if x_shape_0 < 1: if fluid.layers.shape(y).numpy()[0] < 1: - res = fluid.layers.fill_constant( - value=2, shape=x.shape, dtype="int32") + res = fluid.layers.fill_constant(value=2, + shape=x.shape, + dtype="int32") # `z` is a new var here. z = y + 1 else: - res = fluid.layers.fill_constant( - value=3, shape=x.shape, dtype="int32") + res = fluid.layers.fill_constant(value=3, + shape=x.shape, + dtype="int32") else: res = x return res @@ -179,29 +181,32 @@ def nested_if_else_3(x): else: y_shape = fluid.layers.shape(y) if y_shape.numpy()[0] < 1: - res = fluid.layers.fill_constant( - value=2, shape=x.shape, dtype="int32") + res = fluid.layers.fill_constant(value=2, + shape=x.shape, + dtype="int32") # `z` is created in above code block. z = y + 1 else: - res = fluid.layers.fill_constant( - value=3, shape=x.shape, dtype="int32") + res = fluid.layers.fill_constant(value=3, + shape=x.shape, + dtype="int32") # `out` is a new var. out = x + 1 return res class NetWithControlFlowIf(fluid.dygraph.Layer): + def __init__(self, hidden_dim=16): super(NetWithControlFlowIf, self).__init__() self.hidden_dim = hidden_dim self.fc = fluid.dygraph.Linear( input_dim=hidden_dim, output_dim=5, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5))) self.alpha = 10. self.constant_vars = {} @@ -210,11 +215,12 @@ class NetWithControlFlowIf(fluid.dygraph.Layer): hidden_dim = input.shape[-1] if hidden_dim != self.hidden_dim: raise ValueError( - "hidden_dim {} of input is not equal to FC.weight[0]: {}" - .format(hidden_dim, self.hidden_dim)) + "hidden_dim {} of input is not equal to FC.weight[0]: {}". + format(hidden_dim, self.hidden_dim)) - self.constant_vars['bias'] = fluid.layers.fill_constant( - [5], dtype='float32', value=1) + self.constant_vars['bias'] = fluid.layers.fill_constant([5], + dtype='float32', + value=1) # Control flow `if` statement fc_out = self.fc(input) if fluid.layers.mean(fc_out).numpy()[0] < 0: @@ -233,8 +239,9 @@ class NetWithControlFlowIf(fluid.dygraph.Layer): [hidden_dim], dtype='float32', value=9) y = fluid.layers.abs(y) else: - tmp = fluid.layers.fill_constant( - [5], dtype='float32', value=-1) + tmp = fluid.layers.fill_constant([5], + dtype='float32', + value=-1) y = y - tmp else: y = fc_out - self.constant_vars['bias'] @@ -245,8 +252,8 @@ class NetWithControlFlowIf(fluid.dygraph.Layer): def if_with_and_or(x_v, label=None): batch_size = fluid.layers.shape(x_v) - if x_v is not None and (fluid.layers.mean(x_v).numpy()[0] > 0 or - label is not None) and batch_size[0] > 1 and True: + if x_v is not None and (fluid.layers.mean(x_v).numpy()[0] > 0 or label + is not None) and batch_size[0] > 1 and True: x_v = x_v - 1 else: x_v = x_v + 1 @@ -289,17 +296,19 @@ def if_with_and_or_3(x, y=None): def if_with_and_or_4(x, y=None): batch_size = fluid.layers.shape(x) mean_res = fluid.layers.mean(x) - if (x is not None and batch_size[0] > 1) or (y is not None and - mean_res.numpy()[0] > 0): + if (x is not None and batch_size[0] > 1) or (y is not None + and mean_res.numpy()[0] > 0): x = x + 1 - if (x is not None or batch_size[0] > 1) and (y is not None or - mean_res.numpy()[0] > 0): + if (x is not None or batch_size[0] > 1) and (y is not None + or mean_res.numpy()[0] > 0): x = x - 1 return x def if_with_class_var(x, y=None): + class Foo(object): + def __init__(self): self.a = 1 self.b = 2 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 0a3be447812..8c7f301e9ed 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -29,11 +29,13 @@ from seq2seq_utils import Seq2SeqModelHyperParams as args INF = 1. * 1e5 alpha = 0.6 -uniform_initializer = lambda x: fluid.initializer.UniformInitializer(low=-x, high=x) +uniform_initializer = lambda x: fluid.initializer.UniformInitializer(low=-x, + high=x) zero_constant = fluid.initializer.Constant(0.0) class BasicLSTMUnit(Layer): + def __init__(self, hidden_size, input_size, @@ -59,11 +61,10 @@ class BasicLSTMUnit(Layer): shape=[self._input_size + self._hiden_size, 4 * self._hiden_size], dtype=self._dtype) - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hiden_size], - dtype=self._dtype, - is_bias=True) + self._bias = self.create_parameter(attr=self._bias_attr, + shape=[4 * self._hiden_size], + dtype=self._dtype, + is_bias=True) def forward(self, input, pre_hidden, pre_cell): concat_input_hidden = layers.concat([input, pre_hidden], 1) @@ -82,6 +83,7 @@ class BasicLSTMUnit(Layer): class BaseModel(fluid.dygraph.Layer): + def __init__(self, hidden_size, src_vocab_size, @@ -130,30 +132,27 @@ class BaseModel(fluid.dygraph.Layer): self.enc_units.append( self.add_sublayer( "enc_units_%d" % i, - BasicLSTMUnit( - hidden_size=self.hidden_size, - input_size=self.hidden_size, - param_attr=param_attr, - bias_attr=bias_attr, - forget_bias=forget_bias))) + BasicLSTMUnit(hidden_size=self.hidden_size, + input_size=self.hidden_size, + param_attr=param_attr, + bias_attr=bias_attr, + forget_bias=forget_bias))) self.dec_units = [] for i in range(num_layers): self.dec_units.append( self.add_sublayer( "dec_units_%d" % i, - BasicLSTMUnit( - hidden_size=self.hidden_size, - input_size=self.hidden_size, - param_attr=param_attr, - bias_attr=bias_attr, - forget_bias=forget_bias))) - - self.fc = fluid.dygraph.nn.Linear( - self.hidden_size, - self.tar_vocab_size, - param_attr=param_attr, - bias_attr=False) + BasicLSTMUnit(hidden_size=self.hidden_size, + input_size=self.hidden_size, + param_attr=param_attr, + bias_attr=bias_attr, + forget_bias=forget_bias))) + + self.fc = fluid.dygraph.nn.Linear(self.hidden_size, + self.tar_vocab_size, + param_attr=param_attr, + bias_attr=False) def _transpose_batch_time(self, x): return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape)))) @@ -191,25 +190,26 @@ class BaseModel(fluid.dygraph.Layer): # NOTE: modify model code about `enc_hidden` and `enc_cell` to transforme dygraph code successfully. # Because nested list can't be transformed now. enc_hidden_0 = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) enc_cell_0 = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) zero = fluid.layers.zeros(shape=[1], dtype="int64") enc_hidden = fluid.layers.create_array(dtype="float32") enc_cell = fluid.layers.create_array(dtype="float32") for i in range(self.num_layers): index = zero + i - enc_hidden = fluid.layers.array_write( - enc_hidden_0, index, array=enc_hidden) - enc_cell = fluid.layers.array_write( - enc_cell_0, index, array=enc_cell) + enc_hidden = fluid.layers.array_write(enc_hidden_0, + index, + array=enc_hidden) + enc_cell = fluid.layers.array_write(enc_cell_0, + index, + array=enc_cell) max_seq_len = src_emb.shape[0] - enc_len_mask = fluid.layers.sequence_mask( - src_sequence_length, maxlen=max_seq_len, dtype="float32") + enc_len_mask = fluid.layers.sequence_mask(src_sequence_length, + maxlen=max_seq_len, + dtype="float32") enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) # TODO: Because diff exits if call while_loop in static graph. @@ -221,8 +221,9 @@ class BaseModel(fluid.dygraph.Layer): step_mask = enc_len_mask[k] new_enc_hidden, new_enc_cell = [], [] for i in range(self.num_layers): - enc_new_hidden, enc_new_cell = self.enc_units[i]( - enc_step_input, enc_hidden[i], enc_cell[i]) + enc_new_hidden, enc_new_cell = self.enc_units[i](enc_step_input, + enc_hidden[i], + enc_cell[i]) if self.dropout != None and self.dropout > 0.0: enc_step_input = fluid.layers.dropout( enc_new_hidden, @@ -247,8 +248,9 @@ class BaseModel(fluid.dygraph.Layer): step_input = tar_emb[j] new_dec_hidden, new_dec_cell = [], [] for i in range(self.num_layers): - new_hidden, new_cell = self.dec_units[i]( - step_input, dec_hidden[i], dec_cell[i]) + new_hidden, new_cell = self.dec_units[i](step_input, + dec_hidden[i], + dec_cell[i]) new_dec_hidden.append(new_hidden) new_dec_cell.append(new_cell) if self.dropout != None and self.dropout > 0.0: @@ -262,12 +264,14 @@ class BaseModel(fluid.dygraph.Layer): dec_output = fluid.layers.stack(dec_output) dec_output = self.fc(self._transpose_batch_time(dec_output)) - loss = fluid.layers.softmax_with_cross_entropy( - logits=dec_output, label=label, soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=dec_output, + label=label, + soft_label=False) loss = fluid.layers.squeeze(loss, axes=[2]) max_tar_seq_len = fluid.layers.shape(tar)[1] - tar_mask = fluid.layers.sequence_mask( - tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32') + tar_mask = fluid.layers.sequence_mask(tar_sequence_length, + maxlen=max_tar_seq_len, + dtype='float32') loss = loss * tar_mask loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -282,25 +286,26 @@ class BaseModel(fluid.dygraph.Layer): src_emb = self.src_embeder(self._transpose_batch_time(src)) enc_hidden_0 = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) enc_cell_0 = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) zero = fluid.layers.zeros(shape=[1], dtype="int64") enc_hidden = fluid.layers.create_array(dtype="float32") enc_cell = fluid.layers.create_array(dtype="float32") for j in range(self.num_layers): index = zero + j - enc_hidden = fluid.layers.array_write( - enc_hidden_0, index, array=enc_hidden) - enc_cell = fluid.layers.array_write( - enc_cell_0, index, array=enc_cell) + enc_hidden = fluid.layers.array_write(enc_hidden_0, + index, + array=enc_hidden) + enc_cell = fluid.layers.array_write(enc_cell_0, + index, + array=enc_cell) max_seq_len = src_emb.shape[0] - enc_len_mask = fluid.layers.sequence_mask( - src_sequence_length, maxlen=max_seq_len, dtype="float32") + enc_len_mask = fluid.layers.sequence_mask(src_sequence_length, + maxlen=max_seq_len, + dtype="float32") enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) for k in range(args.max_seq_len): @@ -310,8 +315,9 @@ class BaseModel(fluid.dygraph.Layer): new_enc_hidden, new_enc_cell = [], [] for i in range(self.num_layers): - enc_new_hidden, enc_new_cell = self.enc_units[i]( - enc_step_input, enc_hidden[i], enc_cell[i]) + enc_new_hidden, enc_new_cell = self.enc_units[i](enc_step_input, + enc_hidden[i], + enc_cell[i]) if self.dropout != None and self.dropout > 0.0: enc_step_input = fluid.layers.dropout( enc_new_hidden, @@ -329,21 +335,18 @@ class BaseModel(fluid.dygraph.Layer): # beam search batch_beam_shape = (self.batch_size, self.beam_size) - vocab_size_tensor = to_variable(np.full(( - 1), self.tar_vocab_size)).astype("int64") + vocab_size_tensor = to_variable(np.full( + (1), self.tar_vocab_size)).astype("int64") start_token_tensor = to_variable( - np.full( - batch_beam_shape, self.beam_start_token, dtype='int64')) + np.full(batch_beam_shape, self.beam_start_token, dtype='int64')) end_token_tensor = to_variable( - np.full( - batch_beam_shape, self.beam_end_token, dtype='int64')) + np.full(batch_beam_shape, self.beam_end_token, dtype='int64')) step_input = self.tar_embeder(start_token_tensor) beam_finished = to_variable( - np.full( - batch_beam_shape, 0, dtype='float32')) + np.full(batch_beam_shape, 0, dtype='float32')) beam_state_log_probs = to_variable( - np.array( - [[0.] + [-self.kinf] * (self.beam_size - 1)], dtype="float32")) + np.array([[0.] + [-self.kinf] * (self.beam_size - 1)], + dtype="float32")) beam_state_log_probs = fluid.layers.expand(beam_state_log_probs, [self.batch_size, 1]) dec_hidden, dec_cell = enc_hidden, enc_cell @@ -352,9 +355,8 @@ class BaseModel(fluid.dygraph.Layer): batch_pos = fluid.layers.expand( fluid.layers.unsqueeze( - to_variable(np.arange( - 0, self.batch_size, 1, dtype="int64")), [1]), - [1, self.beam_size]) + to_variable(np.arange(0, self.batch_size, 1, dtype="int64")), + [1]), [1, self.beam_size]) predicted_ids = [] parent_ids = [] @@ -370,8 +372,9 @@ class BaseModel(fluid.dygraph.Layer): dec_cell = [self._merge_batch_beams(state) for state in dec_cell] for i in range(self.num_layers): - new_hidden, new_cell = self.dec_units[i]( - step_input, dec_hidden[i], dec_cell[i]) + new_hidden, new_cell = self.dec_units[i](step_input, + dec_hidden[i], + dec_cell[i]) new_dec_hidden.append(new_hidden) new_dec_cell.append(new_cell) if self.dropout != None and self.dropout > 0.0: @@ -389,24 +392,24 @@ class BaseModel(fluid.dygraph.Layer): noend_array = [-self.kinf] * self.tar_vocab_size noend_array[self.beam_end_token] = 0 noend_mask_tensor = to_variable( - np.array( - noend_array, dtype='float32')) + np.array(noend_array, dtype='float32')) step_log_probs = fluid.layers.elementwise_mul( fluid.layers.expand(fluid.layers.unsqueeze(beam_finished, [2]), [1, 1, self.tar_vocab_size]), noend_mask_tensor, axis=-1) - \ fluid.layers.elementwise_mul(step_log_probs, (beam_finished - 1), axis=0) - log_probs = fluid.layers.elementwise_add( - x=step_log_probs, y=beam_state_log_probs, axis=0) + log_probs = fluid.layers.elementwise_add(x=step_log_probs, + y=beam_state_log_probs, + axis=0) scores = fluid.layers.reshape( log_probs, [-1, self.beam_size * self.tar_vocab_size]) - topk_scores, topk_indices = fluid.layers.topk( - input=scores, k=self.beam_size) + topk_scores, topk_indices = fluid.layers.topk(input=scores, + k=self.beam_size) - beam_indices = fluid.layers.elementwise_floordiv(topk_indices, - vocab_size_tensor) - token_indices = fluid.layers.elementwise_mod(topk_indices, - vocab_size_tensor) + beam_indices = fluid.layers.elementwise_floordiv( + topk_indices, vocab_size_tensor) + token_indices = fluid.layers.elementwise_mod( + topk_indices, vocab_size_tensor) next_log_probs = self._gather(scores, topk_indices, batch_pos) x = 0 @@ -451,6 +454,7 @@ class BaseModel(fluid.dygraph.Layer): class AttentionModel(fluid.dygraph.Layer): + def __init__(self, hidden_size, src_vocab_size, @@ -501,12 +505,11 @@ class AttentionModel(fluid.dygraph.Layer): self.enc_units.append( self.add_sublayer( "enc_units_%d" % i, - BasicLSTMUnit( - hidden_size=self.hidden_size, - input_size=self.hidden_size, - param_attr=param_attr, - bias_attr=bias_attr, - forget_bias=forget_bias))) + BasicLSTMUnit(hidden_size=self.hidden_size, + input_size=self.hidden_size, + param_attr=param_attr, + bias_attr=bias_attr, + forget_bias=forget_bias))) self.dec_units = [] for i in range(num_layers): @@ -514,52 +517,50 @@ class AttentionModel(fluid.dygraph.Layer): self.dec_units.append( self.add_sublayer( "dec_units_%d" % i, - BasicLSTMUnit( - hidden_size=self.hidden_size, - input_size=self.hidden_size * 2, - param_attr=ParamAttr( - name="dec_units_%d" % i, - initializer=uniform_initializer( - self.init_scale)), - bias_attr=bias_attr, - forget_bias=forget_bias))) + BasicLSTMUnit(hidden_size=self.hidden_size, + input_size=self.hidden_size * 2, + param_attr=ParamAttr( + name="dec_units_%d" % i, + initializer=uniform_initializer( + self.init_scale)), + bias_attr=bias_attr, + forget_bias=forget_bias))) else: self.dec_units.append( self.add_sublayer( "dec_units_%d" % i, - BasicLSTMUnit( - hidden_size=self.hidden_size, - input_size=self.hidden_size, - param_attr=ParamAttr( - name="dec_units_%d" % i, - initializer=uniform_initializer( - self.init_scale)), - bias_attr=bias_attr, - forget_bias=forget_bias))) + BasicLSTMUnit(hidden_size=self.hidden_size, + input_size=self.hidden_size, + param_attr=ParamAttr( + name="dec_units_%d" % i, + initializer=uniform_initializer( + self.init_scale)), + bias_attr=bias_attr, + forget_bias=forget_bias))) self.attn_fc = fluid.dygraph.nn.Linear( self.hidden_size, self.hidden_size, - param_attr=ParamAttr( - name="self_attn_fc", - initializer=uniform_initializer(self.init_scale)), + param_attr=ParamAttr(name="self_attn_fc", + initializer=uniform_initializer( + self.init_scale)), bias_attr=False) self.concat_fc = fluid.dygraph.nn.Linear( 2 * self.hidden_size, self.hidden_size, - param_attr=ParamAttr( - name="self_concat_fc", - initializer=uniform_initializer(self.init_scale)), + param_attr=ParamAttr(name="self_concat_fc", + initializer=uniform_initializer( + self.init_scale)), bias_attr=False) - self.fc = fluid.dygraph.nn.Linear( - self.hidden_size, - self.tar_vocab_size, - param_attr=ParamAttr( - name="self_fc", - initializer=uniform_initializer(self.init_scale)), - bias_attr=False) + self.fc = fluid.dygraph.nn.Linear(self.hidden_size, + self.tar_vocab_size, + param_attr=ParamAttr( + name="self_fc", + initializer=uniform_initializer( + self.init_scale)), + bias_attr=False) def _transpose_batch_time(self, x): return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape)))) @@ -572,15 +573,16 @@ class AttentionModel(fluid.dygraph.Layer): expand_times = [1] * len(x.shape) expand_times[1] = self.beam_size x = fluid.layers.expand(x, expand_times) # [batch_size, beam_size, ...] - x = fluid.layers.transpose(x, list(range(2, len(x.shape))) + + x = fluid.layers.transpose(x, + list(range(2, len(x.shape))) + [0, 1]) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape - x = fluid.layers.reshape( - x, shape=[0] * - (len(x.shape) - 2) + [-1]) # [..., batch_size * beam_size] + x = fluid.layers.reshape(x, shape=[0] * (len(x.shape) - 2) + + [-1]) # [..., batch_size * beam_size] x = fluid.layers.transpose( x, [len(x.shape) - 1] + - list(range(0, len(x.shape) - 1))) # [batch_size * beam_size, ...] + list(range(0, + len(x.shape) - 1))) # [batch_size * beam_size, ...] return x def _split_batch_beams(self, x): @@ -635,27 +637,28 @@ class AttentionModel(fluid.dygraph.Layer): # NOTE: modify model code about `enc_hidden` and `enc_cell` to transforme dygraph code successfully. # Because nested list can't be transformed now. enc_hidden_0 = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) enc_hidden_0.stop_gradient = True enc_cell_0 = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) enc_hidden_0.stop_gradient = True zero = fluid.layers.zeros(shape=[1], dtype="int64") enc_hidden = fluid.layers.create_array(dtype="float32") enc_cell = fluid.layers.create_array(dtype="float32") for i in range(self.num_layers): index = zero + i - enc_hidden = fluid.layers.array_write( - enc_hidden_0, index, array=enc_hidden) - enc_cell = fluid.layers.array_write( - enc_cell_0, index, array=enc_cell) + enc_hidden = fluid.layers.array_write(enc_hidden_0, + index, + array=enc_hidden) + enc_cell = fluid.layers.array_write(enc_cell_0, + index, + array=enc_cell) max_seq_len = src_emb.shape[0] - enc_len_mask = fluid.layers.sequence_mask( - src_sequence_length, maxlen=max_seq_len, dtype="float32") + enc_len_mask = fluid.layers.sequence_mask(src_sequence_length, + maxlen=max_seq_len, + dtype="float32") enc_padding_mask = (enc_len_mask - 1.0) enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) @@ -669,8 +672,9 @@ class AttentionModel(fluid.dygraph.Layer): step_mask = enc_len_mask[k] new_enc_hidden, new_enc_cell = [], [] for i in range(self.num_layers): - enc_new_hidden, enc_new_cell = self.enc_units[i]( - enc_step_input, enc_hidden[i], enc_cell[i]) + enc_new_hidden, enc_new_cell = self.enc_units[i](enc_step_input, + enc_hidden[i], + enc_cell[i]) if self.dropout != None and self.dropout > 0.0: enc_step_input = fluid.layers.dropout( enc_new_hidden, @@ -691,8 +695,7 @@ class AttentionModel(fluid.dygraph.Layer): # train input_feed = to_variable( - np.zeros( - (self.batch_size, self.hidden_size), dtype='float32')) + np.zeros((self.batch_size, self.hidden_size), dtype='float32')) # NOTE: set stop_gradient here, otherwise grad var is null input_feed.stop_gradient = True dec_hidden, dec_cell = enc_hidden, enc_cell @@ -706,8 +709,9 @@ class AttentionModel(fluid.dygraph.Layer): step_input = fluid.layers.concat([step_input, input_feed], 1) new_dec_hidden, new_dec_cell = [], [] for i in range(self.num_layers): - new_hidden, new_cell = self.dec_units[i]( - step_input, dec_hidden[i], dec_cell[i]) + new_hidden, new_cell = self.dec_units[i](step_input, + dec_hidden[i], + dec_cell[i]) new_dec_hidden.append(new_hidden) new_dec_cell.append(new_cell) if self.dropout != None and self.dropout > 0.0: @@ -727,12 +731,14 @@ class AttentionModel(fluid.dygraph.Layer): dec_output = fluid.layers.stack(dec_output) dec_output = self.fc(self._transpose_batch_time(dec_output)) - loss = fluid.layers.softmax_with_cross_entropy( - logits=dec_output, label=label, soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=dec_output, + label=label, + soft_label=False) loss = fluid.layers.squeeze(loss, axes=[2]) max_tar_seq_len = fluid.layers.shape(tar)[1] - tar_mask = fluid.layers.sequence_mask( - tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32') + tar_mask = fluid.layers.sequence_mask(tar_sequence_length, + maxlen=max_tar_seq_len, + dtype='float32') loss = loss * tar_mask loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_utils.py index 821fea3a67d..34e9aaffdcf 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_utils.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import numpy as np + SEED = 2020 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py index affec2f7dfe..a2ec446c728 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py @@ -41,12 +41,12 @@ class EmbeddingLayer(object): """ # TODO(huihuangzheng): The original code set the is_sparse=True, but it # causes crush in dy2stat. Set it to True after fixing it. - emb = Embedding( - size=[self.dict_size, self.emb_dim], - is_sparse=True, - padding_idx=self.padding_idx, - param_attr=attr.ParamAttr( - name=self.name, initializer=fluid.initializer.Xavier())) + emb = Embedding(size=[self.dict_size, self.emb_dim], + is_sparse=True, + padding_idx=self.padding_idx, + param_attr=attr.ParamAttr( + name=self.name, + initializer=fluid.initializer.Xavier())) return emb @@ -327,8 +327,8 @@ class FC(Layer): def _build_once(self, input): i = 0 - for inp, param in self._helper.iter_inputs_and_params(input, - self._param_attr): + for inp, param in self._helper.iter_inputs_and_params( + input, self._param_attr): input_shape = inp.shape param_shape = [ @@ -338,16 +338,17 @@ class FC(Layer): self.__w.append( self.add_parameter( '_w%d' % i, - self.create_parameter( - attr=param, - shape=param_shape, - dtype=self._dtype, - is_bias=False))) + self.create_parameter(attr=param, + shape=param_shape, + dtype=self._dtype, + is_bias=False))) i += 1 size = list([self._size]) - self._b = self.create_parameter( - attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True) + self._b = self.create_parameter(attr=self._bias_attr, + shape=size, + dtype=self._dtype, + is_bias=True) # TODO(songyouwei): We should remove _w property @property @@ -382,18 +383,19 @@ class FC(Layer): def forward(self, input): mul_results = list() i = 0 - for inp, param in self._helper.iter_inputs_and_params(input, - self._param_attr): + for inp, param in self._helper.iter_inputs_and_params( + input, self._param_attr): tmp = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="mul", - inputs={"X": inp, - "Y": self.__w[i]}, - outputs={"Out": tmp}, - attrs={ - "x_num_col_dims": self._num_flatten_dims, - "y_num_col_dims": 1 - }) + self._helper.append_op(type="mul", + inputs={ + "X": inp, + "Y": self.__w[i] + }, + outputs={"Out": tmp}, + attrs={ + "x_num_col_dims": self._num_flatten_dims, + "y_num_col_dims": 1 + }) i += 1 mul_results.append(tmp) @@ -402,21 +404,21 @@ class FC(Layer): else: pre_bias = self._helper.create_variable_for_type_inference( self._dtype) - self._helper.append_op( - type="sum", - inputs={"X": mul_results}, - outputs={"Out": pre_bias}, - attrs={"use_mkldnn": False}) + self._helper.append_op(type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": False}) if self._b is not None: pre_activation = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self._b]}, - outputs={'Out': [pre_activation]}, - attrs={'axis': self._num_flatten_dims}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self._b] + }, + outputs={'Out': [pre_activation]}, + attrs={'axis': self._num_flatten_dims}) else: pre_activation = pre_bias # Currently, we don't support inplace in dygraph mode @@ -482,10 +484,10 @@ class BOW(Layer): # embedding layer left_emb = self.emb_layer(left) right_emb = self.emb_layer(right) - left_emb = fluid.layers.reshape( - left_emb, shape=[-1, self.seq_len, self.bow_dim]) - right_emb = fluid.layers.reshape( - right_emb, shape=[-1, self.seq_len, self.bow_dim]) + left_emb = fluid.layers.reshape(left_emb, + shape=[-1, self.seq_len, self.bow_dim]) + right_emb = fluid.layers.reshape(right_emb, + shape=[-1, self.seq_len, self.bow_dim]) bow_left = fluid.layers.reduce_sum(left_emb, dim=1) bow_right = fluid.layers.reduce_sum(right_emb, dim=1) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index 5cbaeb0f404..4d919383013 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -313,8 +313,8 @@ class FC(paddle.nn.Layer): def _build_once(self, input): i = 0 - for inp, param in self._helper.iter_inputs_and_params(input, - self._param_attr): + for inp, param in self._helper.iter_inputs_and_params( + input, self._param_attr): input_shape = inp.shape param_shape = [ @@ -324,16 +324,17 @@ class FC(paddle.nn.Layer): self.__w.append( self.add_parameter( '_w%d' % i, - self.create_parameter( - attr=param, - shape=param_shape, - dtype=self._dtype, - is_bias=False))) + self.create_parameter(attr=param, + shape=param_shape, + dtype=self._dtype, + is_bias=False))) i += 1 size = list([self._size]) - self._b = self.create_parameter( - attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True) + self._b = self.create_parameter(attr=self._bias_attr, + shape=size, + dtype=self._dtype, + is_bias=True) # TODO(songyouwei): We should remove _w property @property @@ -368,18 +369,19 @@ class FC(paddle.nn.Layer): def forward(self, input): mul_results = list() i = 0 - for inp, param in self._helper.iter_inputs_and_params(input, - self._param_attr): + for inp, param in self._helper.iter_inputs_and_params( + input, self._param_attr): tmp = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="mul", - inputs={"X": inp, - "Y": self.__w[i]}, - outputs={"Out": tmp}, - attrs={ - "x_num_col_dims": self._num_flatten_dims, - "y_num_col_dims": 1 - }) + self._helper.append_op(type="mul", + inputs={ + "X": inp, + "Y": self.__w[i] + }, + outputs={"Out": tmp}, + attrs={ + "x_num_col_dims": self._num_flatten_dims, + "y_num_col_dims": 1 + }) i += 1 mul_results.append(tmp) @@ -388,21 +390,21 @@ class FC(paddle.nn.Layer): else: pre_bias = self._helper.create_variable_for_type_inference( self._dtype) - self._helper.append_op( - type="sum", - inputs={"X": mul_results}, - outputs={"Out": pre_bias}, - attrs={"use_mkldnn": False}) + self._helper.append_op(type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": False}) if self._b is not None: pre_activation = self._helper.create_variable_for_type_inference( dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self._b]}, - outputs={'Out': [pre_activation]}, - attrs={'axis': self._num_flatten_dims}) + self._helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [self._b] + }, + outputs={'Out': [pre_activation]}, + attrs={'axis': self._num_flatten_dims}) else: pre_activation = pre_bias # Currently, we don't support inplace in dygraph mode @@ -455,8 +457,8 @@ class BOW(paddle.nn.Layer): self.seq_len = conf_dict["seq_len"] self.emb_layer = EmbeddingLayer(self.dict_size, self.emb_dim, "emb").ops() - self.bow_layer = paddle.nn.Linear( - in_features=self.bow_dim, out_features=self.bow_dim) + self.bow_layer = paddle.nn.Linear(in_features=self.bow_dim, + out_features=self.bow_dim) self.bow_layer_po = FCLayer(self.bow_dim, None, "fc").ops() self.softmax_layer = FCLayer(2, "softmax", "cos_sim").ops() @@ -469,10 +471,10 @@ class BOW(paddle.nn.Layer): # embedding layer left_emb = self.emb_layer(left) right_emb = self.emb_layer(right) - left_emb = paddle.reshape( - left_emb, shape=[-1, self.seq_len, self.bow_dim]) - right_emb = paddle.reshape( - right_emb, shape=[-1, self.seq_len, self.bow_dim]) + left_emb = paddle.reshape(left_emb, + shape=[-1, self.seq_len, self.bow_dim]) + right_emb = paddle.reshape(right_emb, + shape=[-1, self.seq_len, self.bow_dim]) bow_left = paddle.fluid.layers.reduce_sum(left_emb, dim=1) bow_right = paddle.fluid.layers.reduce_sum(right_emb, dim=1) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py index d4646833ea2..e2f8f69fa83 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py @@ -35,6 +35,7 @@ def dyfunc_assert_non_variable(x=True): class TestAssertVariable(unittest.TestCase): + def _run(self, func, x, with_exception, to_static): ProgramTranslator().enable(to_static) if with_exception: @@ -50,22 +51,28 @@ class TestAssertVariable(unittest.TestCase): self._run(func, x, with_exception, False) def test_non_variable(self): - self._run_dy_static( - dyfunc_assert_non_variable, x=False, with_exception=True) - self._run_dy_static( - dyfunc_assert_non_variable, x=True, with_exception=False) + self._run_dy_static(dyfunc_assert_non_variable, + x=False, + with_exception=True) + self._run_dy_static(dyfunc_assert_non_variable, + x=True, + with_exception=False) def test_bool_variable(self): - self._run_dy_static( - dyfunc_assert_variable, x=numpy.array([False]), with_exception=True) - self._run_dy_static( - dyfunc_assert_variable, x=numpy.array([True]), with_exception=False) + self._run_dy_static(dyfunc_assert_variable, + x=numpy.array([False]), + with_exception=True) + self._run_dy_static(dyfunc_assert_variable, + x=numpy.array([True]), + with_exception=False) def test_int_variable(self): - self._run_dy_static( - dyfunc_assert_variable, x=numpy.array([0]), with_exception=True) - self._run_dy_static( - dyfunc_assert_variable, x=numpy.array([1]), with_exception=False) + self._run_dy_static(dyfunc_assert_variable, + x=numpy.array([0]), + with_exception=True) + self._run_dy_static(dyfunc_assert_variable, + x=numpy.array([1]), + with_exception=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py index 31a50226f0b..00eb25792b2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py @@ -38,6 +38,7 @@ class TestAST2Func(unittest.TestCase): return transformed_func def test_ast2func(self): + def func(x, y): return x + y @@ -55,6 +56,7 @@ class TestAST2Func(unittest.TestCase): self.assertTrue((true_ret == test_ret).all()) def test_ast2func_static(self): + def func(x): y = fluid.layers.relu(x) loss = fluid.layers.mean(y) @@ -73,8 +75,8 @@ class TestAST2Func(unittest.TestCase): def test_ast2func_error(self): with self.assertRaises(Exception) as e: self.assertRaises(TypeError, ast_to_func("x = a + b", 'foo')) - self.assertTrue("Type of ast_root should be gast.AST or ast.AST" in - str(e.exception)) + self.assertTrue("Type of ast_root should be gast.AST or ast.AST" in str( + e.exception)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py index b86b85bb90f..b818ed95a24 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py @@ -72,6 +72,7 @@ def dyfunc_bool_to_tensor(x): class TestDygraphBasicApi_ToVariable(unittest.TestCase): + def setUp(self): self.input = np.ones(5).astype("int32") self.test_funcs = [ @@ -79,8 +80,8 @@ class TestDygraphBasicApi_ToVariable(unittest.TestCase): dyfunc_float_to_tensor, dyfunc_to_variable, dyfunc_to_variable_2, dyfunc_to_variable_3 ] - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() def get_dygraph_output(self): with fluid.dygraph.guard(): @@ -103,10 +104,9 @@ class TestDygraphBasicApi_ToVariable(unittest.TestCase): self.dygraph_func = func dygraph_res = self.get_dygraph_output() static_res = self.get_static_output() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph is {}\n static_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph is {}\n static_res is {}'.format( + dygraph_res, static_res)) # 1. test Apis that inherit from layers.Layer @@ -115,14 +115,13 @@ def dyfunc_BilinearTensorProduct(layer1, layer2): input1_dim=5, input2_dim=4, output_dim=1000, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5))) - - res = bilinearTensorProduct( - fluid.dygraph.base.to_variable(layer1), - fluid.dygraph.base.to_variable(layer2)) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5))) + + res = bilinearTensorProduct(fluid.dygraph.base.to_variable(layer1), + fluid.dygraph.base.to_variable(layer2)) return res @@ -131,10 +130,11 @@ def dyfunc_Conv2D(input): num_channels=3, num_filters=2, filter_size=3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5)), ) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5)), + ) res = conv2d(input) return res @@ -144,10 +144,11 @@ def dyfunc_Conv3D(input): num_channels=3, num_filters=2, filter_size=3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5)), ) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5)), + ) res = conv3d(input) return res @@ -158,10 +159,11 @@ def dyfunc_Conv2DTranspose(input): num_filters=12, filter_size=12, use_cudnn=False, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5)), ) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5)), + ) ret = conv2dTranspose(input) return ret @@ -172,10 +174,11 @@ def dyfunc_Conv3DTranspose(input): num_filters=12, filter_size=12, use_cudnn=False, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5)), ) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5)), + ) ret = conv3dTranspose(input) return ret @@ -185,19 +188,24 @@ def dyfunc_Linear(input): input_dim=10, output_dim=5, act='relu', - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5)), ) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5)), + ) res = fc(input) return res def dyfunc_Pool2D(input): - fluid.dygraph.Pool2D( - pool_size=2, pool_type='avg', pool_stride=1, global_pooling=False) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, pool_type='avg', pool_stride=1, global_pooling=False) + fluid.dygraph.Pool2D(pool_size=2, + pool_type='avg', + pool_stride=1, + global_pooling=False) + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='avg', + pool_stride=1, + global_pooling=False) res = pool2d(input) return res @@ -244,13 +252,13 @@ class TestDygraphBasicApi(unittest.TestCase): def test_transformed_static_result(self): dygraph_res = self.get_dygraph_output() static_res = self.get_static_output() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_res, static_res)) class TestDygraphBasicApi_BilinearTensorProduct(TestDygraphBasicApi): + def setUp(self): self.input1 = np.random.random((5, 5)).astype('float32') self.input2 = np.random.random((5, 4)).astype('float32') @@ -279,36 +287,42 @@ class TestDygraphBasicApi_BilinearTensorProduct(TestDygraphBasicApi): class TestDygraphBasicApi_Conv2D(TestDygraphBasicApi): + def setUp(self): self.input = np.random.random((1, 3, 3, 5)).astype('float32') self.dygraph_func = dyfunc_Conv2D class TestDygraphBasicApi_Conv3D(TestDygraphBasicApi): + def setUp(self): self.input = np.random.random((1, 3, 3, 3, 5)).astype('float32') self.dygraph_func = dyfunc_Conv3D class TestDygraphBasicApi_Conv2DTranspose(TestDygraphBasicApi): + def setUp(self): self.input = np.random.random((5, 3, 32, 32)).astype('float32') self.dygraph_func = dyfunc_Conv2DTranspose class TestDygraphBasicApi_Conv3DTranspose(TestDygraphBasicApi): + def setUp(self): self.input = np.random.random((5, 3, 12, 32, 32)).astype('float32') self.dygraph_func = dyfunc_Conv3DTranspose class TestDygraphBasicApi_Linear(TestDygraphBasicApi): + def setUp(self): self.input = np.random.random((4, 3, 10)).astype('float32') self.dygraph_func = dyfunc_Linear class TestDygraphBasicApi_Prelu(TestDygraphBasicApi): + def setUp(self): self.input = np.ones([5, 20, 10, 10]).astype('float32') self.dygraph_func = dyfunc_Prelu @@ -317,41 +331,39 @@ class TestDygraphBasicApi_Prelu(TestDygraphBasicApi): # 2. test Apis that inherit from LearningRateDecay def dyfunc_CosineDecay(): base_lr = 0.1 - CosineDecay = fluid.dygraph.CosineDecay( - learning_rate=base_lr, step_each_epoch=10000, epochs=120) + CosineDecay = fluid.dygraph.CosineDecay(learning_rate=base_lr, + step_each_epoch=10000, + epochs=120) lr = CosineDecay() return lr def dyfunc_ExponentialDecay(): base_lr = 0.1 - exponential_decay = fluid.dygraph.ExponentialDecay( - learning_rate=base_lr, - decay_steps=10000, - decay_rate=0.5, - staircase=True) + exponential_decay = fluid.dygraph.ExponentialDecay(learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True) lr = exponential_decay() return lr def dyfunc_InverseTimeDecay(): base_lr = 0.1 - inverse_time_decay = fluid.dygraph.InverseTimeDecay( - learning_rate=base_lr, - decay_steps=10000, - decay_rate=0.5, - staircase=True) + inverse_time_decay = fluid.dygraph.InverseTimeDecay(learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True) lr = inverse_time_decay() return lr def dyfunc_NaturalExpDecay(): base_lr = 0.1 - natural_exp_decay = fluid.dygraph.NaturalExpDecay( - learning_rate=base_lr, - decay_steps=10000, - decay_rate=0.5, - staircase=True) + natural_exp_decay = fluid.dygraph.NaturalExpDecay(learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True) lr = natural_exp_decay() return lr @@ -380,6 +392,7 @@ def dyfunc_PolynomialDecay(): class TestDygraphBasicApi_CosineDecay(unittest.TestCase): + def setUp(self): self.dygraph_func = dyfunc_CosineDecay @@ -406,38 +419,43 @@ class TestDygraphBasicApi_CosineDecay(unittest.TestCase): def test_transformed_static_result(self): dygraph_res = self.get_dygraph_output() static_res = self.get_static_output() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_res, static_res)) class TestDygraphBasicApi_ExponentialDecay(TestDygraphBasicApi_CosineDecay): + def setUp(self): self.dygraph_func = dyfunc_ExponentialDecay class TestDygraphBasicApi_InverseTimeDecay(TestDygraphBasicApi_CosineDecay): + def setUp(self): self.dygraph_func = dyfunc_InverseTimeDecay class TestDygraphBasicApi_NaturalExpDecay(TestDygraphBasicApi_CosineDecay): + def setUp(self): self.dygraph_func = dyfunc_NaturalExpDecay class TestDygraphBasicApi_NoamDecay(TestDygraphBasicApi_CosineDecay): + def setUp(self): self.dygraph_func = dyfunc_NoamDecay class TestDygraphBasicApi_PiecewiseDecay(TestDygraphBasicApi_CosineDecay): + def setUp(self): self.dygraph_func = dyfunc_PiecewiseDecay class TestDygraphBasicApi_PolynomialDecay(TestDygraphBasicApi_CosineDecay): + def setUp(self): self.dygraph_func = dyfunc_PolynomialDecay @@ -451,6 +469,7 @@ def _dygraph_fn(): class TestDygraphApiRecognition(unittest.TestCase): + def setUp(self): self.src = inspect.getsource(_dygraph_fn) self.root = gast.parse(self.src) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py index db533e6379a..f26ed2a6823 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py @@ -29,14 +29,15 @@ from bert_utils import get_bert_config, get_feed_data_reader from predictor_utils import PredictorTools program_translator = ProgramTranslator() -place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( -) +place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() SEED = 2020 STEP_NUM = 10 PRINT_STEP = 2 class TestBert(unittest.TestCase): + def setUp(self): self.bert_config = get_bert_config() self.data_reader = get_feed_data_reader(self.bert_config) @@ -56,13 +57,14 @@ class TestBert(unittest.TestCase): fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED - data_loader = fluid.io.DataLoader.from_generator( - capacity=50, iterable=True) - data_loader.set_batch_generator( - data_reader.data_generator(), places=place) + data_loader = fluid.io.DataLoader.from_generator(capacity=50, + iterable=True) + data_loader.set_batch_generator(data_reader.data_generator(), + places=place) - bert = PretrainModelLayer( - config=bert_config, weight_sharing=False, use_fp16=False) + bert = PretrainModelLayer(config=bert_config, + weight_sharing=False, + use_fp16=False) optimizer = fluid.optimizer.Adam(parameter_list=bert.parameters()) step_idx = 0 @@ -120,12 +122,11 @@ class TestBert(unittest.TestCase): paddle.enable_static() exe = fluid.Executor(place) # load inference model - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - self.model_save_dir, - executor=exe, - model_filename=self.model_filename, - params_filename=self.params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(self.model_save_dir, + executor=exe, + model_filename=self.model_filename, + params_filename=self.params_filename) pred_res = exe.run(inference_program, feed=dict(zip(feed_target_names, data)), fetch_list=fetch_targets) @@ -135,8 +136,9 @@ class TestBert(unittest.TestCase): def predict_dygraph(self, bert_config, data): program_translator.enable(False) with fluid.dygraph.guard(place): - bert = PretrainModelLayer( - config=bert_config, weight_sharing=False, use_fp16=False) + bert = PretrainModelLayer(config=bert_config, + weight_sharing=False, + use_fp16=False) model_dict, _ = fluid.dygraph.load_dygraph( self.dy_state_dict_save_path) @@ -145,14 +147,13 @@ class TestBert(unittest.TestCase): input_vars = [fluid.dygraph.to_variable(x) for x in data] src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos, labels = input_vars - pred_res = bert( - src_ids=src_ids, - position_ids=pos_ids, - sentence_ids=sent_ids, - input_mask=input_mask, - mask_label=mask_label, - mask_pos=mask_pos, - labels=labels) + pred_res = bert(src_ids=src_ids, + position_ids=pos_ids, + sentence_ids=sent_ids, + input_mask=input_mask, + mask_label=mask_label, + mask_pos=mask_pos, + labels=labels) pred_res = [var.numpy() for var in pred_res] return pred_res @@ -180,14 +181,12 @@ class TestBert(unittest.TestCase): self.data_reader) dygraph_loss, dygraph_ppl = self.train_dygraph(self.bert_config, self.data_reader) - self.assertTrue( - np.allclose(static_loss, dygraph_loss), - msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, - dygraph_loss)) - self.assertTrue( - np.allclose(static_ppl, dygraph_ppl), - msg="static_ppl: {} \n dygraph_ppl: {}".format(static_ppl, - dygraph_ppl)) + self.assertTrue(np.allclose(static_loss, dygraph_loss), + msg="static_loss: {} \n dygraph_loss: {}".format( + static_loss, dygraph_loss)) + self.assertTrue(np.allclose(static_ppl, dygraph_ppl), + msg="static_ppl: {} \n dygraph_ppl: {}".format( + static_ppl, dygraph_ppl)) self.verify_predict() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index bec9b35a7fe..14683b33feb 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -94,6 +94,7 @@ def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample, class Conv1D(fluid.dygraph.Layer): + def __init__(self, prefix, num_channels=256, @@ -105,25 +106,22 @@ class Conv1D(fluid.dygraph.Layer): super(Conv1D, self).__init__() fan_in = num_channels * size_k * 1 k = 1. / math.sqrt(fan_in) - param_attr = ParamAttr( - name=prefix + "_w", - initializer=fluid.initializer.Uniform( - low=-k, high=k)) - bias_attr = ParamAttr( - name=prefix + "_b", - initializer=fluid.initializer.Uniform( - low=-k, high=k)) - - self._conv2d = fluid.dygraph.Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=(1, size_k), - stride=1, - padding=(0, padding), - groups=groups, - act=act, - param_attr=param_attr, - bias_attr=bias_attr) + param_attr = ParamAttr(name=prefix + "_w", + initializer=fluid.initializer.Uniform(low=-k, + high=k)) + bias_attr = ParamAttr(name=prefix + "_b", + initializer=fluid.initializer.Uniform(low=-k, + high=k)) + + self._conv2d = fluid.dygraph.Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=(1, size_k), + stride=1, + padding=(0, padding), + groups=groups, + act=act, + param_attr=param_attr, + bias_attr=bias_attr) def forward(self, x): x = fluid.layers.unsqueeze(input=x, axes=[2]) @@ -133,6 +131,7 @@ class Conv1D(fluid.dygraph.Layer): class BMN(fluid.dygraph.Layer): + def __init__(self, cfg): super(BMN, self).__init__() @@ -147,49 +146,50 @@ class BMN(fluid.dygraph.Layer): self.hidden_dim_3d = 512 # Base Module - self.b_conv1 = Conv1D( - prefix="Base_1", - num_channels=cfg.feat_dim, - num_filters=self.hidden_dim_1d, - size_k=3, - padding=1, - groups=4, - act="relu") - self.b_conv2 = Conv1D( - prefix="Base_2", - num_filters=self.hidden_dim_1d, - size_k=3, - padding=1, - groups=4, - act="relu") + self.b_conv1 = Conv1D(prefix="Base_1", + num_channels=cfg.feat_dim, + num_filters=self.hidden_dim_1d, + size_k=3, + padding=1, + groups=4, + act="relu") + self.b_conv2 = Conv1D(prefix="Base_2", + num_filters=self.hidden_dim_1d, + size_k=3, + padding=1, + groups=4, + act="relu") # Temporal Evaluation Module - self.ts_conv1 = Conv1D( - prefix="TEM_s1", - num_filters=self.hidden_dim_1d, - size_k=3, - padding=1, - groups=4, - act="relu") - self.ts_conv2 = Conv1D( - prefix="TEM_s2", num_filters=1, size_k=1, padding=0, act="sigmoid") - self.te_conv1 = Conv1D( - prefix="TEM_e1", - num_filters=self.hidden_dim_1d, - size_k=3, - padding=1, - groups=4, - act="relu") - self.te_conv2 = Conv1D( - prefix="TEM_e2", num_filters=1, size_k=1, padding=0, act="sigmoid") + self.ts_conv1 = Conv1D(prefix="TEM_s1", + num_filters=self.hidden_dim_1d, + size_k=3, + padding=1, + groups=4, + act="relu") + self.ts_conv2 = Conv1D(prefix="TEM_s2", + num_filters=1, + size_k=1, + padding=0, + act="sigmoid") + self.te_conv1 = Conv1D(prefix="TEM_e1", + num_filters=self.hidden_dim_1d, + size_k=3, + padding=1, + groups=4, + act="relu") + self.te_conv2 = Conv1D(prefix="TEM_e2", + num_filters=1, + size_k=1, + padding=0, + act="sigmoid") #Proposal Evaluation Module - self.p_conv1 = Conv1D( - prefix="PEM_1d", - num_filters=self.hidden_dim_2d, - size_k=3, - padding=1, - act="relu") + self.p_conv1 = Conv1D(prefix="PEM_1d", + num_filters=self.hidden_dim_2d, + size_k=3, + padding=1, + act="relu") # init to speed up sample_mask = get_interp1d_mask(self.tscale, self.dscale, @@ -263,8 +263,8 @@ class BMN(fluid.dygraph.Layer): xp = self.p_conv1(x) # BM layer xp = fluid.layers.matmul(xp, self.sample_mask) - xp = fluid.layers.reshape( - xp, shape=[0, 0, -1, self.dscale, self.tscale]) + xp = fluid.layers.reshape(xp, + shape=[0, 0, -1, self.dscale, self.tscale]) xp = self.p_conv3d1(xp) xp = fluid.layers.squeeze(xp, axes=[2]) @@ -277,6 +277,7 @@ class BMN(fluid.dygraph.Layer): def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, cfg): + def _get_mask(cfg): dscale = cfg.dscale tscale = cfg.tscale @@ -286,24 +287,29 @@ def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, ] + [0 for i in range(idx)] bm_mask.append(mask_vector) bm_mask = np.array(bm_mask, dtype=np.float32) - self_bm_mask = fluid.layers.create_global_var( - shape=[dscale, tscale], value=0, dtype=DATATYPE, persistable=True) + self_bm_mask = fluid.layers.create_global_var(shape=[dscale, tscale], + value=0, + dtype=DATATYPE, + persistable=True) fluid.layers.assign(bm_mask, self_bm_mask) self_bm_mask.stop_gradient = True return self_bm_mask def tem_loss_func(pred_start, pred_end, gt_start, gt_end): + def bi_loss(pred_score, gt_label): - pred_score = fluid.layers.reshape( - x=pred_score, shape=[-1], inplace=False) - gt_label = fluid.layers.reshape( - x=gt_label, shape=[-1], inplace=False) + pred_score = fluid.layers.reshape(x=pred_score, + shape=[-1], + inplace=False) + gt_label = fluid.layers.reshape(x=gt_label, + shape=[-1], + inplace=False) gt_label.stop_gradient = True pmask = fluid.layers.cast(x=(gt_label > 0.5), dtype=DATATYPE) - num_entries = fluid.layers.cast( - fluid.layers.shape(pmask), dtype=DATATYPE) - num_positive = fluid.layers.cast( - fluid.layers.reduce_sum(pmask), dtype=DATATYPE) + num_entries = fluid.layers.cast(fluid.layers.shape(pmask), + dtype=DATATYPE) + num_positive = fluid.layers.cast(fluid.layers.reduce_sum(pmask), + dtype=DATATYPE) ratio = num_entries / num_positive coef_0 = 0.5 * ratio / (ratio - 1) coef_1 = 0.5 * ratio @@ -334,26 +340,26 @@ def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, u_lmask = fluid.layers.cast(x=u_lmask, dtype=DATATYPE) u_lmask = fluid.layers.elementwise_mul(u_lmask, mask) - num_h = fluid.layers.cast( - fluid.layers.reduce_sum(u_hmask), dtype=DATATYPE) - num_m = fluid.layers.cast( - fluid.layers.reduce_sum(u_mmask), dtype=DATATYPE) - num_l = fluid.layers.cast( - fluid.layers.reduce_sum(u_lmask), dtype=DATATYPE) + num_h = fluid.layers.cast(fluid.layers.reduce_sum(u_hmask), + dtype=DATATYPE) + num_m = fluid.layers.cast(fluid.layers.reduce_sum(u_mmask), + dtype=DATATYPE) + num_l = fluid.layers.cast(fluid.layers.reduce_sum(u_lmask), + dtype=DATATYPE) r_m = num_h / num_m u_smmask = fluid.layers.assign( - local_random.uniform(0., 1., [ - gt_iou_map.shape[1], gt_iou_map.shape[2] - ]).astype(DATATYPE)) + local_random.uniform( + 0., 1., + [gt_iou_map.shape[1], gt_iou_map.shape[2]]).astype(DATATYPE)) u_smmask = fluid.layers.elementwise_mul(u_mmask, u_smmask) u_smmask = fluid.layers.cast(x=(u_smmask > (1. - r_m)), dtype=DATATYPE) r_l = num_h / num_l u_slmask = fluid.layers.assign( - local_random.uniform(0., 1., [ - gt_iou_map.shape[1], gt_iou_map.shape[2] - ]).astype(DATATYPE)) + local_random.uniform( + 0., 1., + [gt_iou_map.shape[1], gt_iou_map.shape[2]]).astype(DATATYPE)) u_slmask = fluid.layers.elementwise_mul(u_lmask, u_slmask) u_slmask = fluid.layers.cast(x=(u_slmask > (1. - r_l)), dtype=DATATYPE) @@ -388,12 +394,16 @@ def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, loss = -1 * (loss_pos + loss_neg) / num_entries return loss - pred_bm_reg = fluid.layers.squeeze( - fluid.layers.slice( - pred_bm, axes=[1], starts=[0], ends=[1]), axes=[1]) - pred_bm_cls = fluid.layers.squeeze( - fluid.layers.slice( - pred_bm, axes=[1], starts=[1], ends=[2]), axes=[1]) + pred_bm_reg = fluid.layers.squeeze(fluid.layers.slice(pred_bm, + axes=[1], + starts=[0], + ends=[1]), + axes=[1]) + pred_bm_cls = fluid.layers.squeeze(fluid.layers.slice(pred_bm, + axes=[1], + starts=[1], + ends=[2]), + axes=[1]) bm_mask = _get_mask(cfg) @@ -433,8 +443,7 @@ def optimizer(cfg, parameter_list): l2_weight_decay = cfg.l2_weight_decay lr = [base_lr, base_lr * lr_decay] optimizer = fluid.optimizer.Adam( - fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + fluid.layers.piecewise_decay(boundaries=bd, values=lr), parameter_list=parameter_list, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=l2_weight_decay)) @@ -442,6 +451,7 @@ def optimizer(cfg, parameter_list): def fake_data_reader(args, mode='train'): + def iou_with_anchors(anchors_min, anchors_max, box_min, box_max): """Compute jaccard score between a box and the anchors. """ @@ -517,8 +527,9 @@ def fake_data_reader(args, mode='train'): for jdx in range(len(anchor_xmin)): match_score_start.append( np.max( - ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[ - jdx], gt_start_bboxs[:, 0], gt_start_bboxs[:, 1]))) + ioa_with_anchors(anchor_xmin[jdx], anchor_xmax[jdx], + gt_start_bboxs[:, 0], gt_start_bboxs[:, + 1]))) match_score_end = [] for jdx in range(len(anchor_xmin)): match_score_end.append( @@ -547,8 +558,8 @@ def fake_data_reader(args, mode='train'): batch_out.append( (video_feat, gt_iou_map, gt_start, gt_end, video_idx)) else: - raise NotImplementedError('mode {} not implemented'.format( - mode)) + raise NotImplementedError( + 'mode {} not implemented'.format(mode)) if len(batch_out) == args.batch_size: yield batch_out batch_out = [] @@ -582,7 +593,9 @@ def val_bmn(model, args): avg_loss = fluid.layers.mean(loss) loss_data += [ - avg_loss.numpy()[0], tem_loss.numpy()[0], pem_reg_loss.numpy()[0], + avg_loss.numpy()[0], + tem_loss.numpy()[0], + pem_reg_loss.numpy()[0], pem_cls_loss.numpy()[0] ] @@ -597,6 +610,7 @@ def val_bmn(model, args): class TestTrain(unittest.TestCase): + def setUp(self): self.args = Args() self.place = fluid.CPUPlace() if not fluid.is_compiled_with_cuda() \ @@ -629,14 +643,14 @@ class TestTrain(unittest.TestCase): for epoch in range(args.epoch): for batch_id, data in enumerate(train_reader()): - video_feat = np.array( - [item[0] for item in data]).astype(DATATYPE) - gt_iou_map = np.array( - [item[1] for item in data]).astype(DATATYPE) - gt_start = np.array( - [item[2] for item in data]).astype(DATATYPE) - gt_end = np.array( - [item[3] for item in data]).astype(DATATYPE) + video_feat = np.array([item[0] + for item in data]).astype(DATATYPE) + gt_iou_map = np.array([item[1] + for item in data]).astype(DATATYPE) + gt_start = np.array([item[2] + for item in data]).astype(DATATYPE) + gt_end = np.array([item[3] + for item in data]).astype(DATATYPE) x_data = to_variable(video_feat) gt_iou_map = to_variable(gt_iou_map) @@ -658,12 +672,14 @@ class TestTrain(unittest.TestCase): bmn.clear_gradients() # log loss data to verify correctness loss_data += [ - avg_loss.numpy()[0], tem_loss.numpy()[0], - pem_reg_loss.numpy()[0], pem_cls_loss.numpy()[0] + avg_loss.numpy()[0], + tem_loss.numpy()[0], + pem_reg_loss.numpy()[0], + pem_cls_loss.numpy()[0] ] - if args.log_interval > 0 and ( - batch_id % args.log_interval == 0): + if args.log_interval > 0 and (batch_id % args.log_interval + == 0): print('[TRAIN] Epoch {}, iter {} '.format(epoch, batch_id) + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format( '%f' % avg_loss.numpy()[0], '%f' % tem_loss.numpy()[0], \ @@ -748,12 +764,11 @@ class TestTrain(unittest.TestCase): paddle.enable_static() exe = fluid.Executor(self.place) # load inference model - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - self.model_save_dir, - executor=exe, - model_filename=self.model_filename, - params_filename=self.params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(self.model_save_dir, + executor=exe, + model_filename=self.model_filename, + params_filename=self.params_filename) pred_res = exe.run(inference_program, feed={feed_target_names[0]: data}, fetch_list=fetch_targets) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py index f67dda3fbd7..79b6880b0d8 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py @@ -139,7 +139,9 @@ def test_for_in_else(x): def while_loop_class_var(x): + class Foo(object): + def __init__(self): self.a = 3 self.b = 4 @@ -183,10 +185,11 @@ def test_optim_break_in_while(x): class TestContinueInFor(unittest.TestCase): + def setUp(self): self.input = np.zeros((1)).astype('int64') - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_dygraph_func() def init_dygraph_func(self): @@ -205,58 +208,67 @@ class TestContinueInFor(unittest.TestCase): def test_transformed_static_result(self): static_res = self.run_static_mode() dygraph_res = self.run_dygraph_mode() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph res is {}\nstatic_res is {}'.format( + dygraph_res, static_res)) class TestContinueInForAtEnd(TestContinueInFor): + def init_dygraph_func(self): self.dygraph_func = test_continue_in_for_at_end class TestBreakInFor(TestContinueInFor): + def init_dygraph_func(self): self.dygraph_func = test_break_in_for class TestBreakInForAtEnd(TestContinueInFor): + def init_dygraph_func(self): self.dygraph_func = test_break_in_for_at_end class TestBreakContinueInFor(TestContinueInFor): + def init_dygraph_func(self): self.dygraph_func = test_break_continue_in_for class TestForInElse(TestContinueInFor): + def init_dygraph_func(self): self.dygraph_func = test_for_in_else class TestContinueInWhile(TestContinueInFor): + def init_dygraph_func(self): self.dygraph_func = test_continue_in_while class TestBreakInWhile(TestContinueInWhile): + def init_dygraph_func(self): self.dygraph_func = test_break_in_while class TestWhileLoopClassVar(TestContinueInWhile): + def init_dygraph_func(self): self.dygraph_func = while_loop_class_var class TestOptimBreakInFor(TestContinueInWhile): + def init_dygraph_func(self): self.dygraph_func = test_optim_break_in_for class TestOptimBreakInWhile(TestContinueInWhile): + def init_dygraph_func(self): self.dygraph_func = test_optim_break_in_while diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py index 95ea5ad227e..27272985d55 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ program_translator = ProgramTranslator() class TestResnetWithPass(unittest.TestCase): + def setUp(self): self.build_strategy = paddle.static.BuildStrategy() self.build_strategy.fuse_elewise_add_act_ops = True @@ -44,24 +45,22 @@ class TestResnetWithPass(unittest.TestCase): st_pre = self.resnet_helper.predict_static(image) dy_jit_pre = self.resnet_helper.predict_dygraph_jit(image) predictor_pre = self.resnet_helper.predict_analysis_inference(image) - self.assertTrue( - np.allclose(dy_pre, st_pre), - msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) - self.assertTrue( - np.allclose(dy_jit_pre, st_pre), - msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) - self.assertTrue( - np.allclose(predictor_pre, st_pre), - msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre, - st_pre)) + self.assertTrue(np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format( + dy_pre, st_pre)) + self.assertTrue(np.allclose(dy_jit_pre, st_pre), + msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format( + dy_jit_pre, st_pre)) + self.assertTrue(np.allclose(predictor_pre, st_pre), + msg="predictor_pre:\n {}\n, st_pre: \n{}.".format( + predictor_pre, st_pre)) def test_resnet(self): static_loss = self.train(to_static=True) dygraph_loss = self.train(to_static=False) - self.assertTrue( - np.allclose(static_loss, dygraph_loss), - msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, - dygraph_loss)) + self.assertTrue(np.allclose(static_loss, dygraph_loss), + msg="static_loss: {} \n dygraph_loss: {}".format( + static_loss, dygraph_loss)) self.verify_predict() def test_in_static_mode_mkldnn(self): @@ -74,7 +73,9 @@ class TestResnetWithPass(unittest.TestCase): class TestError(unittest.TestCase): + def test_type_error(self): + def foo(x): out = x + 1 return out diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py index b72149a29c7..3d2339f58f3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py @@ -28,6 +28,7 @@ from test_fetch_feed import Pool2D, Linear class TestCacheProgram(unittest.TestCase): + def setUp(self): self.batch_num = 5 self.dygraph_class = Pool2D @@ -55,12 +56,14 @@ class TestCacheProgram(unittest.TestCase): cur_out, (tuple, list)) else cur_out.numpy() self.assertTrue( np.allclose(prev_out_numpy, cur_out_numpy), - msg='Output in previous batch is {}\n Output in current batch is \n{}' + msg= + 'Output in previous batch is {}\n Output in current batch is \n{}' .format(prev_out_numpy, cur_out_numpy)) self.assertEqual(prev_ops, cur_ops) class TestCacheProgram2(TestCacheProgram): + def setUp(self): self.batch_num = 5 self.dygraph_class = Linear @@ -68,6 +71,7 @@ class TestCacheProgram2(TestCacheProgram): class TestCacheProgramWithOptimizer(unittest.TestCase): + def setUp(self): self.dygraph_class = Linear self.data = np.random.random((4, 10)).astype('float32') @@ -102,10 +106,9 @@ class TestCacheProgramWithOptimizer(unittest.TestCase): def test_with_optimizer(self): dygraph_loss = self.train_dygraph() static_loss = self.train_static() - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_loss, static_loss)) def simple_func(x): @@ -115,6 +118,7 @@ def simple_func(x): class TestConvertWithCache(unittest.TestCase): + def test_cache(self): static_func = convert_to_static(simple_func) # Get transformed function from cache. @@ -145,6 +149,7 @@ def sum_under_while(limit): class TestToOutputWithCache(unittest.TestCase): + def test_output(self): with fluid.dygraph.guard(): ret = sum_even_until_limit(80, 10) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py index b4cc38b3a60..da67b08287c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py @@ -61,9 +61,10 @@ def test_mix_cast(x): class TestCastBase(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.prepare() self.set_func() @@ -97,6 +98,7 @@ class TestCastBase(unittest.TestCase): class TestIntCast(TestCastBase): + def prepare(self): self.input_shape = (1, ) self.input_dtype = 'float32' @@ -110,6 +112,7 @@ class TestIntCast(TestCastBase): class TestFloatCast(TestCastBase): + def prepare(self): self.input_shape = (8, 16) self.input_dtype = 'bool' @@ -123,6 +126,7 @@ class TestFloatCast(TestCastBase): class TestMixCast(TestCastBase): + def prepare(self): self.input_shape = (8, 32) self.input_dtype = 'float32' @@ -152,6 +156,7 @@ class TestMixCast(TestCastBase): class TestNotVarCast(TestCastBase): + def prepare(self): self.input = 3.14 self.cast_dtype = 'int' diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py index 74f4a895d15..6ed32e49775 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ import tempfile class BufferLayers(paddle.nn.Layer): + def __init__(self, out_channel): super(BufferLayers, self).__init__() self.out_channel = out_channel @@ -37,6 +38,7 @@ class BufferLayers(paddle.nn.Layer): class SequentialNet(paddle.nn.Layer): + def __init__(self, sub_layer, in_channel, out_channel): super(SequentialNet, self).__init__() self.layer = paddle.nn.Sequential( @@ -50,14 +52,17 @@ class SequentialNet(paddle.nn.Layer): class NestSequentialNet(paddle.nn.Layer): + def __init__(self): super().__init__() group1 = paddle.nn.Sequential( paddle.nn.Linear(10, 10), - paddle.nn.Sigmoid(), ) + paddle.nn.Sigmoid(), + ) group2 = paddle.nn.Sequential( paddle.nn.Linear(10, 3), - paddle.nn.ReLU(), ) + paddle.nn.ReLU(), + ) self.layers = paddle.nn.Sequential(group1, group2) def forward(self, x): @@ -65,6 +70,7 @@ class NestSequentialNet(paddle.nn.Layer): class TestSequential(unittest.TestCase): + def setUp(self): paddle.set_device('cpu') self.seed = 2021 @@ -90,9 +96,9 @@ class TestSequential(unittest.TestCase): out = self.net(x) if to_static: load_out = self._test_load(self.net, x) - self.assertTrue( - np.allclose(load_out, out), - msg='load_out is {}\st_out is {}'.format(load_out, out)) + self.assertTrue(np.allclose(load_out, out), + msg='load_out is {}\st_out is {}'.format( + load_out, out)) return out @@ -100,9 +106,9 @@ class TestSequential(unittest.TestCase): paddle.jit.set_code_level(100) dy_out = self._run(to_static=False) st_out = self._run(to_static=True) - self.assertTrue( - np.allclose(dy_out, st_out), - msg='dygraph_res is {}\nstatic_res is {}'.format(dy_out, st_out)) + self.assertTrue(np.allclose(dy_out, st_out), + msg='dygraph_res is {}\nstatic_res is {}'.format( + dy_out, st_out)) def _test_load(self, net, x): paddle.jit.save(net, self.model_path) @@ -112,6 +118,7 @@ class TestSequential(unittest.TestCase): class TestNestSequential(TestSequential): + def _init_config(self): self.net = NestSequentialNet() self.model_path = os.path.join(self.temp_dir.name, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py index 2e2918facf8..38746337ce3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py @@ -66,10 +66,11 @@ def dyfunc_with_third_library_logging(x_v): class TestRecursiveCall1(unittest.TestCase): + def setUp(self): self.input = np.random.random([10, 16]).astype('float32') - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_test_func() def init_test_func(self): @@ -90,26 +91,26 @@ class TestRecursiveCall1(unittest.TestCase): def test_transformed_static_result(self): static_res = self.get_static_output() dygraph_res = self.get_dygraph_output() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph res is {}\nstatic_res is {}'.format( + dygraph_res, static_res)) lambda_fun = lambda x: x class MyConvLayer(fluid.dygraph.Layer): + def __init__(self): super(MyConvLayer, self).__init__() self._conv = fluid.dygraph.Conv2D( num_channels=3, num_filters=2, filter_size=3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5))) @paddle.jit.to_static def forward(self, inputs): @@ -125,6 +126,7 @@ class MyConvLayer(fluid.dygraph.Layer): class MyLayer(fluid.dygraph.Layer): + def __init__(self): super(MyLayer, self).__init__() @@ -133,10 +135,10 @@ class MyLayer(fluid.dygraph.Layer): input_dim=5, output_dim=1, act='relu', - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.99)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.5))) @paddle.jit.to_static def forward(self, inputs): @@ -146,10 +148,11 @@ class MyLayer(fluid.dygraph.Layer): class TestRecursiveCall2(unittest.TestCase): + def setUp(self): self.input = np.random.random((1, 3, 3, 5)).astype('float32') - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.set_func() def set_func(self): @@ -173,13 +176,13 @@ class TestRecursiveCall2(unittest.TestCase): def test_transformed_static_result(self): dygraph_res = self.get_dygraph_output() static_res = self.get_static_output() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_res, static_res)) class TestThirdPartyLibrary(TestRecursiveCall2): + def set_func(self): self.dygraph_func = dyfunc_with_third_library_logging @@ -205,6 +208,7 @@ def func_convert_then_not_to_static(x): class TestClass(paddle.nn.Layer): + @paddle.jit.not_to_static def called_member(self, x): return paddle.sum(x) @@ -216,6 +220,7 @@ class TestClass(paddle.nn.Layer): class TestNotToConvert(TestRecursiveCall2): + def set_func(self): self.dygraph_func = func_not_to_static @@ -226,16 +231,19 @@ class TestNotToConvert(TestRecursiveCall2): class TestNotToConvert2(TestRecursiveCall2): + def set_func(self): self.dygraph_func = func_convert_then_not_to_static class TestNotToConvert3(TestRecursiveCall2): + def set_func(self): self.dygraph_func = TestClass() class TestDynamicToStaticCode(unittest.TestCase): + def setUp(self): self.set_func() self.set_answer_func() @@ -244,7 +252,9 @@ class TestDynamicToStaticCode(unittest.TestCase): self.func = func_not_to_static def set_answer_func(self): + class StaticCode(): + @paddle.jit.not_to_static def func_not_to_static(x): res = func_sum(x) @@ -270,11 +280,14 @@ class TestDynamicToStaticCode(unittest.TestCase): class TestDynamicToStaticCode2(TestDynamicToStaticCode): + def set_func(self): self.func = func_convert_then_not_to_static def set_answer_func(self): + class StaticCode(): + def func_convert_then_not_to_static(x): y = _jst.convert_call(func_not_to_static)(x) return y diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call_generator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call_generator.py index cfe9e191ed4..19645d6fd63 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call_generator.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call_generator.py @@ -40,6 +40,7 @@ def main_func(): class TestConvertGenerator(unittest.TestCase): + def test_raise_error(self): with self.assertRaises(Exception): to_static(main_func)() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py index bb1942692fd..6188d6a786b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py @@ -19,12 +19,14 @@ from paddle.jit.dy2static.convert_operators import eval_if_exist_else_none class CallNotExist(paddle.nn.Layer): + def __call__(self): # call a non-exist API to trigger exception return paddle.nn.not_exist_api class ForwardNotExist(paddle.nn.Layer): + def forward(self): return 0 @@ -34,7 +36,9 @@ setattr(net, "forward", "A string so that convert forward will fail") class TestConvertCall(unittest.TestCase): + def test_class_exception(self): + @paddle.jit.to_static def call_not_exist(): net = CallNotExist() @@ -52,9 +56,10 @@ class TestConvertCall(unittest.TestCase): class TestConvertShapeCompare(unittest.TestCase): + def test_non_variable(self): - self.assertEqual( - paddle.jit.dy2static.convert_shape_compare(1, "<", 2), True) + self.assertEqual(paddle.jit.dy2static.convert_shape_compare(1, "<", 2), + True) self.assertEqual( paddle.jit.dy2static.convert_shape_compare(1, "<", 2, "<=", 3), True) @@ -69,8 +74,9 @@ class TestConvertShapeCompare(unittest.TestCase): raise ValueError("Used for test") self.assertEqual( - paddle.jit.dy2static.convert_shape_compare( - 1, ">", 2, "<=", lambda: error_func()), False) + paddle.jit.dy2static.convert_shape_compare(1, ">", 2, "<=", + lambda: error_func()), + False) self.assertEqual( paddle.jit.dy2static.convert_shape_compare(1, "<", 2, "in", @@ -99,30 +105,30 @@ class TestConvertShapeCompare(unittest.TestCase): x = paddle.static.data(name='x', shape=[3, 2], dtype='float32') y = paddle.static.data(name='y', shape=[3, 2], dtype='float32') self.assertEqual( - paddle.jit.dy2static.convert_shape_compare(x, "is", x, "is not", - y), True) + paddle.jit.dy2static.convert_shape_compare( + x, "is", x, "is not", y), True) self.assertEqual( - paddle.jit.dy2static.convert_shape_compare(x, "is not", x, - "is not", y), False) + paddle.jit.dy2static.convert_shape_compare( + x, "is not", x, "is not", y), False) self.assertEqual( paddle.jit.dy2static.convert_shape_compare(x, "is", x, "is", y), False) eq_out = paddle.jit.dy2static.convert_shape_compare(x, "==", y) not_eq_out = paddle.jit.dy2static.convert_shape_compare(x, "!=", y) - long_eq_out = paddle.jit.dy2static.convert_shape_compare(x, "==", x, - "!=", y) + long_eq_out = paddle.jit.dy2static.convert_shape_compare( + x, "==", x, "!=", y) - place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) x_y_eq_out = exe.run(feed={ "x": np.ones([3, 2]).astype(np.float32), "y": np.ones([3, 2]).astype(np.float32) }, fetch_list=[eq_out, not_eq_out, long_eq_out]) - np.testing.assert_array_equal( - np.array(x_y_eq_out), np.array([[True], [False], [False]])) + np.testing.assert_array_equal(np.array(x_y_eq_out), + np.array([[True], [False], [False]])) set_a_zero = np.ones([3, 2]).astype(np.float32) set_a_zero[0][0] = 0.0 @@ -132,16 +138,16 @@ class TestConvertShapeCompare(unittest.TestCase): "y": set_a_zero }, fetch_list=[eq_out, not_eq_out, long_eq_out]) - np.testing.assert_array_equal( - np.array(x_y_not_eq_out), np.array([[False], [True], [True]])) + np.testing.assert_array_equal(np.array(x_y_not_eq_out), + np.array([[False], [True], [True]])) paddle.disable_static() class TestChooseShapeAttrOrApi(unittest.TestCase): + def test_api_shape_is_none(self): self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api([1, 2], None), - [1, 2]) + paddle.jit.dy2static.choose_shape_attr_or_api([1, 2], None), [1, 2]) self.assertEqual( paddle.jit.dy2static.choose_shape_attr_or_api([1], None), [1]) self.assertEqual( @@ -151,38 +157,37 @@ class TestChooseShapeAttrOrApi(unittest.TestCase): def test_attr_shape_is_int(self): x = paddle.zeros([1, 3, 5, 7]) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api(x.shape[0], - paddle.shape(x)[0]), - 1) + paddle.jit.dy2static.choose_shape_attr_or_api( + x.shape[0], + paddle.shape(x)[0]), 1) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api(x.shape[1], - paddle.shape(x)[1]), - 3) + paddle.jit.dy2static.choose_shape_attr_or_api( + x.shape[1], + paddle.shape(x)[1]), 3) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api(-1, - paddle.shape(x)[0]), + paddle.jit.dy2static.choose_shape_attr_or_api( + -1, + paddle.shape(x)[0]), paddle.shape(x)[0]) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api(-1, - paddle.shape(x), 0), + paddle.jit.dy2static.choose_shape_attr_or_api( + -1, paddle.shape(x), 0), paddle.shape(x)[0]) def test_positive_attr_shape(self): x = paddle.zeros([1, 3, 5, 7]) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api(x.shape, - paddle.shape(x)), - x.shape) + paddle.jit.dy2static.choose_shape_attr_or_api( + x.shape, paddle.shape(x)), x.shape) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api(x.shape, - paddle.shape(x), 3), - x.shape[3]) + paddle.jit.dy2static.choose_shape_attr_or_api( + x.shape, paddle.shape(x), 3), x.shape[3]) def test_negative_attr_shape(self): x = paddle.zeros([7]) self.assertEqual( - paddle.jit.dy2static.choose_shape_attr_or_api([-1], - paddle.shape(x), 0), + paddle.jit.dy2static.choose_shape_attr_or_api([-1], paddle.shape(x), + 0), paddle.shape(x)[0]) self.assertEqual( paddle.jit.dy2static.choose_shape_attr_or_api([-1], @@ -191,6 +196,7 @@ class TestChooseShapeAttrOrApi(unittest.TestCase): class TestEvaIfExistElseNone(unittest.TestCase): + def test_globals(self): global x_shape x_shape = [1, 2, 3] @@ -205,10 +211,10 @@ class TestEvaIfExistElseNone(unittest.TestCase): def foo(): y_shape = [2, 3, 4] - self.assertEqual( - eval_if_exist_else_none('x_shape', globals()), [1, 2, 3]) - self.assertEqual( - eval_if_exist_else_none('y_shape', locals()), [2, 3, 4]) + self.assertEqual(eval_if_exist_else_none('x_shape', globals()), + [1, 2, 3]) + self.assertEqual(eval_if_exist_else_none('y_shape', locals()), + [2, 3, 4]) foo() del x_shape @@ -220,17 +226,18 @@ class TestEvaIfExistElseNone(unittest.TestCase): global y_shape y_shape = [2, 3, 4] - self.assertEqual( - eval_if_exist_else_none('y_shape', globals()), [2, 3, 4]) + self.assertEqual(eval_if_exist_else_none('y_shape', globals()), + [2, 3, 4]) self.assertEqual(eval_if_exist_else_none('x_shape', locals()), None) - self.assertEqual( - eval_if_exist_else_none('x_shape', globals()), None) + self.assertEqual(eval_if_exist_else_none('x_shape', globals()), + None) del y_shape foo() def test_none(self): + def foo(): x_shape = [2, 3, 4] return x_shape @@ -239,6 +246,7 @@ class TestEvaIfExistElseNone(unittest.TestCase): class ShapeLayer(paddle.nn.Layer): + def __init__(self): super(ShapeLayer, self).__init__() @@ -253,6 +261,7 @@ class ShapeLayer(paddle.nn.Layer): class TestChooseShapeAttrOrApiWithLayer(unittest.TestCase): + def test_tensor_shape(self): x = paddle.zeros(shape=[4, 1], dtype='float32') net = ShapeLayer() @@ -262,6 +271,7 @@ class TestChooseShapeAttrOrApiWithLayer(unittest.TestCase): class TestIfElseNoValue(unittest.TestCase): + def test_else_ret_none(self): input_x = paddle.to_tensor([[1, 2, 3], [4, 5, 6]]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py index 8a9a1e19205..2fe985490ba 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py @@ -64,6 +64,7 @@ program_translator = ProgramTranslator() class Cycle_Gan(fluid.dygraph.Layer): + def __init__(self, input_channel, istrain=True): super(Cycle_Gan, self).__init__() @@ -88,11 +89,9 @@ class Cycle_Gan(fluid.dygraph.Layer): cyc_B = self.build_generator_resnet_9blocks_a(fake_A) diff_A = fluid.layers.abs( - fluid.layers.elementwise_sub( - x=input_A, y=cyc_A)) + fluid.layers.elementwise_sub(x=input_A, y=cyc_A)) diff_B = fluid.layers.abs( - fluid.layers.elementwise_sub( - x=input_B, y=cyc_B)) + fluid.layers.elementwise_sub(x=input_B, y=cyc_B)) cyc_A_loss = fluid.layers.reduce_mean(diff_A) * lambda_A cyc_B_loss = fluid.layers.reduce_mean(diff_B) * lambda_B cyc_loss = cyc_A_loss + cyc_B_loss @@ -138,24 +137,23 @@ class Cycle_Gan(fluid.dygraph.Layer): class build_resnet_block(fluid.dygraph.Layer): + def __init__(self, dim, use_bias=False): super(build_resnet_block, self).__init__() - self.conv0 = conv2d( - num_channels=dim, - num_filters=dim, - filter_size=3, - stride=1, - stddev=0.02, - use_bias=False) - self.conv1 = conv2d( - num_channels=dim, - num_filters=dim, - filter_size=3, - stride=1, - stddev=0.02, - relu=False, - use_bias=False) + self.conv0 = conv2d(num_channels=dim, + num_filters=dim, + filter_size=3, + stride=1, + stddev=0.02, + use_bias=False) + self.conv1 = conv2d(num_channels=dim, + num_filters=dim, + filter_size=3, + stride=1, + stddev=0.02, + relu=False, + use_bias=False) self.dim = dim def forward(self, inputs): @@ -168,30 +166,28 @@ class build_resnet_block(fluid.dygraph.Layer): class build_generator_resnet_9blocks(fluid.dygraph.Layer): + def __init__(self, input_channel): super(build_generator_resnet_9blocks, self).__init__() - self.conv0 = conv2d( - num_channels=input_channel, - num_filters=32, - filter_size=7, - stride=1, - padding=0, - stddev=0.02) - self.conv1 = conv2d( - num_channels=32, - num_filters=64, - filter_size=3, - stride=2, - padding=1, - stddev=0.02) - self.conv2 = conv2d( - num_channels=64, - num_filters=128, - filter_size=3, - stride=2, - padding=1, - stddev=0.02) + self.conv0 = conv2d(num_channels=input_channel, + num_filters=32, + filter_size=7, + stride=1, + padding=0, + stddev=0.02) + self.conv1 = conv2d(num_channels=32, + num_filters=64, + filter_size=3, + stride=2, + padding=1, + stddev=0.02) + self.conv2 = conv2d(num_channels=64, + num_filters=128, + filter_size=3, + stride=2, + padding=1, + stddev=0.02) self.build_resnet_block_list = [] dim = 128 for i in range(9): @@ -205,25 +201,24 @@ class build_generator_resnet_9blocks(fluid.dygraph.Layer): stride=2, stddev=0.02, padding=[1, 1], - outpadding=[0, 1, 0, 1], ) - self.deconv1 = DeConv2D( - num_channels=32 * 2, - num_filters=32, - filter_size=3, - stride=2, - stddev=0.02, - padding=[1, 1], - outpadding=[0, 1, 0, 1]) - self.conv3 = conv2d( - num_channels=32, - num_filters=input_channel, - filter_size=7, - stride=1, - stddev=0.02, - padding=0, - relu=False, - norm=False, - use_bias=True) + outpadding=[0, 1, 0, 1], + ) + self.deconv1 = DeConv2D(num_channels=32 * 2, + num_filters=32, + filter_size=3, + stride=2, + stddev=0.02, + padding=[1, 1], + outpadding=[0, 1, 0, 1]) + self.conv3 = conv2d(num_channels=32, + num_filters=input_channel, + filter_size=7, + stride=1, + stddev=0.02, + padding=0, + relu=False, + norm=False, + use_bias=True) def forward(self, inputs): pad_input = fluid.layers.pad2d(inputs, [3, 3, 3, 3], mode="reflect") @@ -241,53 +236,49 @@ class build_generator_resnet_9blocks(fluid.dygraph.Layer): class build_gen_discriminator(fluid.dygraph.Layer): + def __init__(self, input_channel): super(build_gen_discriminator, self).__init__() - self.conv0 = conv2d( - num_channels=input_channel, - num_filters=64, - filter_size=4, - stride=2, - stddev=0.02, - padding=1, - norm=False, - use_bias=True, - relufactor=0.2) - self.conv1 = conv2d( - num_channels=64, - num_filters=128, - filter_size=4, - stride=2, - stddev=0.02, - padding=1, - relufactor=0.2) - self.conv2 = conv2d( - num_channels=128, - num_filters=IMAGE_SIZE, - filter_size=4, - stride=2, - stddev=0.02, - padding=1, - relufactor=0.2) - self.conv3 = conv2d( - num_channels=IMAGE_SIZE, - num_filters=512, - filter_size=4, - stride=1, - stddev=0.02, - padding=1, - relufactor=0.2) - self.conv4 = conv2d( - num_channels=512, - num_filters=1, - filter_size=4, - stride=1, - stddev=0.02, - padding=1, - norm=False, - relu=False, - use_bias=True) + self.conv0 = conv2d(num_channels=input_channel, + num_filters=64, + filter_size=4, + stride=2, + stddev=0.02, + padding=1, + norm=False, + use_bias=True, + relufactor=0.2) + self.conv1 = conv2d(num_channels=64, + num_filters=128, + filter_size=4, + stride=2, + stddev=0.02, + padding=1, + relufactor=0.2) + self.conv2 = conv2d(num_channels=128, + num_filters=IMAGE_SIZE, + filter_size=4, + stride=2, + stddev=0.02, + padding=1, + relufactor=0.2) + self.conv3 = conv2d(num_channels=IMAGE_SIZE, + num_filters=512, + filter_size=4, + stride=1, + stddev=0.02, + padding=1, + relufactor=0.2) + self.conv4 = conv2d(num_channels=512, + num_filters=1, + filter_size=4, + stride=1, + stddev=0.02, + padding=1, + norm=False, + relu=False, + use_bias=True) def forward(self, inputs): y = self.conv0(inputs) @@ -320,18 +311,17 @@ class conv2d(fluid.dygraph.Layer): con_bias_attr = fluid.ParamAttr( initializer=fluid.initializer.Constant(0.0)) - self.conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - use_cudnn=use_cudnn, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev)), - bias_attr=con_bias_attr) - # Note(Aurelius84): The calculation of GPU kernel in BN is non-deterministic, + self.conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + use_cudnn=use_cudnn, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=stddev)), + bias_attr=con_bias_attr) + # Note(Aurelius84): The calculation of GPU kernel in BN is non-deterministic, # failure rate is 1/100 in Dev but seems incremental in CE platform. # If on GPU, we disable BN temporarily. if fluid.is_compiled_with_cuda(): @@ -361,6 +351,7 @@ class conv2d(fluid.dygraph.Layer): class DeConv2D(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters=64, @@ -389,8 +380,8 @@ class DeConv2D(fluid.dygraph.Layer): padding=padding, use_cudnn=use_cudnn, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev)), + initializer=fluid.initializer.NormalInitializer(loc=0.0, + scale=stddev)), bias_attr=de_bias_attr) if fluid.is_compiled_with_cuda(): norm = False @@ -412,8 +403,10 @@ class DeConv2D(fluid.dygraph.Layer): def forward(self, inputs): conv = self._deconv(inputs) - conv = fluid.layers.pad2d( - conv, paddings=self.outpadding, mode='constant', pad_value=0.0) + conv = fluid.layers.pad2d(conv, + paddings=self.outpadding, + mode='constant', + pad_value=0.0) if self.norm: conv = self.bn(conv) @@ -423,6 +416,7 @@ class DeConv2D(fluid.dygraph.Layer): class ImagePool(object): + def __init__(self, pool_size=50): self.pool = [] self.count = 0 @@ -445,6 +439,7 @@ class ImagePool(object): def reader_creater(): + def reader(): while True: fake_image = np.uint8( @@ -482,15 +477,14 @@ class Args(object): def optimizer_setting(parameters): lr = 0.0002 - optimizer = fluid.optimizer.Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=[ - 100 * step_per_epoch, 120 * step_per_epoch, - 140 * step_per_epoch, 160 * step_per_epoch, 180 * step_per_epoch - ], - values=[lr, lr * 0.8, lr * 0.6, lr * 0.4, lr * 0.2, lr * 0.1]), - parameter_list=parameters, - beta1=0.5) + optimizer = fluid.optimizer.Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=[ + 100 * step_per_epoch, 120 * step_per_epoch, 140 * step_per_epoch, + 160 * step_per_epoch, 180 * step_per_epoch + ], + values=[lr, lr * 0.8, lr * 0.6, lr * 0.4, lr * 0.2, lr * 0.1]), + parameter_list=parameters, + beta1=0.5) return optimizer @@ -534,11 +528,11 @@ def train(args, to_static): s_time = time.time() data_A = np.array( - [data_A[0].reshape(3, IMAGE_SIZE, IMAGE_SIZE)]).astype( - "float32") + [data_A[0].reshape(3, IMAGE_SIZE, + IMAGE_SIZE)]).astype("float32") data_B = np.array( - [data_B[0].reshape(3, IMAGE_SIZE, IMAGE_SIZE)]).astype( - "float32") + [data_B[0].reshape(3, IMAGE_SIZE, + IMAGE_SIZE)]).astype("float32") data_A = to_variable(data_A) data_B = to_variable(data_B) @@ -552,19 +546,19 @@ def train(args, to_static): fake_pool_B = B_pool.pool_image(fake_B).numpy() fake_pool_B = np.array( - [fake_pool_B[0].reshape(3, IMAGE_SIZE, IMAGE_SIZE)]).astype( - "float32") + [fake_pool_B[0].reshape(3, IMAGE_SIZE, + IMAGE_SIZE)]).astype("float32") fake_pool_B = to_variable(fake_pool_B) fake_pool_A = A_pool.pool_image(fake_A).numpy() fake_pool_A = np.array( - [fake_pool_A[0].reshape(3, IMAGE_SIZE, IMAGE_SIZE)]).astype( - "float32") + [fake_pool_A[0].reshape(3, IMAGE_SIZE, + IMAGE_SIZE)]).astype("float32") fake_pool_A = to_variable(fake_pool_A) # optimize the d_A network - rec_B, fake_pool_rec_B = cycle_gan.discriminatorA(data_B, - fake_pool_B) + rec_B, fake_pool_rec_B = cycle_gan.discriminatorA( + data_B, fake_pool_B) d_loss_A = (fluid.layers.square(fake_pool_rec_B) + fluid.layers.square(rec_B - 1)) / 2.0 d_loss_A = fluid.layers.reduce_mean(d_loss_A) @@ -574,8 +568,8 @@ def train(args, to_static): cycle_gan.clear_gradients() # optimize the d_B network - rec_A, fake_pool_rec_A = cycle_gan.discriminatorB(data_A, - fake_pool_A) + rec_A, fake_pool_rec_A = cycle_gan.discriminatorB( + data_A, fake_pool_A) d_loss_B = (fluid.layers.square(fake_pool_rec_A) + fluid.layers.square(rec_A - 1)) / 2.0 d_loss_B = fluid.layers.reduce_mean(d_loss_B) @@ -596,8 +590,8 @@ def train(args, to_static): t_time += batch_time if batch_id % args.log_step == 0: print( - "batch: {}\t Batch_time_cost: {}\n g_loss: {}\t d_A_loss: {}\t d_B_loss:{}\n g_A_loss: {}\t g_A_cyc_loss: {}\t g_A_idt_loss: {}\n g_B_loss: {}\t g_B_cyc_loss: {}\t g_B_idt_loss: {}". - format(batch_id, batch_time, *cur_batch_loss)) + "batch: {}\t Batch_time_cost: {}\n g_loss: {}\t d_A_loss: {}\t d_B_loss:{}\n g_A_loss: {}\t g_A_cyc_loss: {}\t g_A_idt_loss: {}\n g_B_loss: {}\t g_B_cyc_loss: {}\t g_B_idt_loss: {}" + .format(batch_id, batch_time, *cur_batch_loss)) if batch_id > args.train_step: break @@ -607,6 +601,7 @@ def train(args, to_static): class TestCycleGANModel(unittest.TestCase): + def setUp(self): self.args = Args() @@ -619,15 +614,14 @@ class TestCycleGANModel(unittest.TestCase): dy_out = self.train(to_static=False) assert_func = np.allclose - # Note(Aurelius84): Because we disable BN on GPU, + # Note(Aurelius84): Because we disable BN on GPU, # but here we enhance the check on CPU by `np.array_equal` # which means the dy_out and st_out shall be exactly same. if not fluid.is_compiled_with_cuda(): assert_func = np.array_equal - self.assertTrue( - assert_func(dy_out, st_out), - msg="dy_out:\n {}\n st_out:\n{}".format(dy_out, st_out)) + self.assertTrue(assert_func(dy_out, st_out), + msg="dy_out:\n {}\n st_out:\n{}".format(dy_out, st_out)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py index 35dfe550552..ef9eff26518 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py @@ -28,6 +28,7 @@ program_trans = ProgramTranslator() class SimpleNet(Layer): + def __init__(self): super(SimpleNet, self).__init__() self.linear = fluid.dygraph.Linear(10, 3) @@ -83,6 +84,7 @@ class SimpleNet(Layer): class TestStaticFunctionInstance(unittest.TestCase): + def test_instance_same_class(self): with fluid.dygraph.guard(fluid.CPUPlace()): net_1 = SimpleNet() @@ -100,6 +102,7 @@ class TestStaticFunctionInstance(unittest.TestCase): class TestInputSpec(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() self.model_path = os.path.join(self.temp_dir.name, 'simple_net') @@ -157,12 +160,12 @@ class TestInputSpec(unittest.TestCase): # 2. requires len(input_spec) <= len(args) with self.assertRaises(ValueError): - net.add_func = declarative( - net.add_func, - input_spec=[ - InputSpec([-1, 10]), InputSpec([-1, 10]), - InputSpec([10]) - ]) + net.add_func = declarative(net.add_func, + input_spec=[ + InputSpec([-1, 10]), + InputSpec([-1, 10]), + InputSpec([10]) + ]) net.add_func(x, y) def test_concrete_program(self): @@ -175,10 +178,8 @@ class TestInputSpec(unittest.TestCase): # We can get concrete_program by specificing InputSpec information. Faking input is no need. net.add_func = declarative( net.add_func, - input_spec=[ - InputSpec([-1, 10]), InputSpec( - [-1, 10], name='y') - ]) + input_spec=[InputSpec([-1, 10]), + InputSpec([-1, 10], name='y')]) cp1 = net.add_func.concrete_program self.assertTrue(cp1.inputs[-1].shape == (-1, 10)) self.assertTrue(cp1.inputs[-1].name == 'y') @@ -186,8 +187,8 @@ class TestInputSpec(unittest.TestCase): # generate another program net.add_func = declarative( net.add_func, - input_spec=[InputSpec([10]), InputSpec( - [10], name='label')]) + input_spec=[InputSpec([10]), + InputSpec([10], name='label')]) cp2 = net.add_func.concrete_program self.assertTrue(cp2.inputs[-1].shape == (10, )) self.assertTrue(cp2.inputs[-1].name == 'label') @@ -203,6 +204,7 @@ def foo_func(a, b, c=1, d=2): class TestDifferentInputSpecCacheProgram(unittest.TestCase): + def setUp(self): program_trans.enable(True) @@ -248,25 +250,26 @@ class TestDifferentInputSpecCacheProgram(unittest.TestCase): foo = declarative(foo_func) # 1. specific InputSpec for `x`/`y` - concrete_program_1 = foo.get_concrete_program( - InputSpec([None, 10]), InputSpec([10])) + concrete_program_1 = foo.get_concrete_program(InputSpec([None, 10]), + InputSpec([10])) self.assertTrue(len(foo.program_cache) == 1) # 2. specific `c`/`d` explicitly with same default value - concrete_program_2 = foo.get_concrete_program( - InputSpec([None, 10]), InputSpec([10]), 1, 2) + concrete_program_2 = foo.get_concrete_program(InputSpec([None, 10]), + InputSpec([10]), 1, 2) self.assertTrue(concrete_program_2 == concrete_program_1) self.assertTrue(len(foo.program_cache) == 1) # 3. specific `c` = 2 - concrete_program_3 = foo.get_concrete_program( - InputSpec([None, 10]), InputSpec([10]), c=2) + concrete_program_3 = foo.get_concrete_program(InputSpec([None, 10]), + InputSpec([10]), + c=2) self.assertTrue(concrete_program_3 != concrete_program_1) self.assertTrue(len(foo.program_cache) == 2) # 4. specific x.shape = [10] - concrete_program_4 = foo.get_concrete_program( - InputSpec([10]), InputSpec([10])) + concrete_program_4 = foo.get_concrete_program(InputSpec([10]), + InputSpec([10])) self.assertTrue(concrete_program_4 != concrete_program_1) self.assertTrue(len(foo.program_cache) == 3) @@ -276,20 +279,19 @@ class TestDifferentInputSpecCacheProgram(unittest.TestCase): # 6. specific unknown kwargs `e`=4 with self.assertRaises(TypeError): - concrete_program_5 = foo.get_concrete_program( - InputSpec([10]), InputSpec([10]), e=4) + concrete_program_5 = foo.get_concrete_program(InputSpec([10]), + InputSpec([10]), + e=4) def test_concrete_program(self): with fluid.dygraph.guard(fluid.CPUPlace()): # usage 1 - foo_1 = paddle.jit.to_static( - foo_func, - input_spec=[ - InputSpec( - [10], name='x'), InputSpec( - [10], name='y') - ]) + foo_1 = paddle.jit.to_static(foo_func, + input_spec=[ + InputSpec([10], name='x'), + InputSpec([10], name='y') + ]) self.assertTrue(isinstance(foo_1.concrete_program, ConcreteProgram)) # usage 2 @@ -304,6 +306,7 @@ class TestDifferentInputSpecCacheProgram(unittest.TestCase): class TestInputDefaultName(unittest.TestCase): + def setUp(self): paddle.disable_static() self.net = SimpleNet() @@ -328,6 +331,7 @@ class TestInputDefaultName(unittest.TestCase): class TestDeclarativeAPI(unittest.TestCase): + def test_error(self): func = declarative(dyfunc_to_variable) @@ -346,6 +350,7 @@ class TestDeclarativeAPI(unittest.TestCase): class TestDecorateModelDirectly(unittest.TestCase): + def setUp(self): paddle.disable_static() program_trans.enable(True) @@ -372,6 +377,7 @@ class TestDecorateModelDirectly(unittest.TestCase): class TestErrorWithInitFromStaticMode(unittest.TestCase): + def test_raise_error(self): # disable imperative paddle.enable_static() @@ -391,6 +397,7 @@ class TestErrorWithInitFromStaticMode(unittest.TestCase): class CallNonForwardFuncNet(paddle.nn.Layer): + def __init__(self): super(CallNonForwardFuncNet, self).__init__() self.sub = CallNonForwardFuncSubNet() @@ -401,6 +408,7 @@ class CallNonForwardFuncNet(paddle.nn.Layer): class CallNonForwardFuncSubNet(paddle.nn.Layer): + def __init__(self): super(CallNonForwardFuncSubNet, self).__init__() self.a = paddle.to_tensor([1, 2]) @@ -411,6 +419,7 @@ class CallNonForwardFuncSubNet(paddle.nn.Layer): class TestCallNonForwardFunc(unittest.TestCase): + def test_call_non_forward(self): paddle.disable_static() net = CallNonForwardFuncNet() @@ -420,6 +429,7 @@ class TestCallNonForwardFunc(unittest.TestCase): class SetBuffersNet1(paddle.nn.Layer): + def __init__(self): super(SetBuffersNet1, self).__init__() self.a = paddle.to_tensor([1]) @@ -431,6 +441,7 @@ class SetBuffersNet1(paddle.nn.Layer): class SetBuffersNet2(paddle.nn.Layer): + def __init__(self): super(SetBuffersNet2, self).__init__() self.b = paddle.to_tensor([2]) @@ -443,6 +454,7 @@ class SetBuffersNet2(paddle.nn.Layer): class TestSetBuffers(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() self.model_path = os.path.join(self.temp_dir.name, 'SetBuffersNet1') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py index a7be8855764..e8999acce0e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py @@ -23,31 +23,30 @@ import paddle.fluid as fluid from paddle.jit import to_static from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator -PLACE = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( -) +PLACE = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() class SubNetWithDict(fluid.dygraph.Layer): + def __init__(self, hidden_size=16, output_size=16): super(SubNetWithDict, self).__init__() - init_weight = lambda x: fluid.ParamAttr(initializer=fluid.initializer.Constant(x)) - - self.q_fc = fluid.dygraph.Linear( - input_dim=hidden_size, - output_dim=output_size, - bias_attr=False, - param_attr=init_weight(0.6)) - self.k_fc = fluid.dygraph.Linear( - input_dim=hidden_size, - output_dim=output_size, - bias_attr=False, - param_attr=init_weight(0.5)) - self.v_fc = fluid.dygraph.Linear( - input_dim=hidden_size, - output_dim=output_size, - bias_attr=False, - param_attr=init_weight(0.2)) + init_weight = lambda x: fluid.ParamAttr(initializer=fluid.initializer. + Constant(x)) + + self.q_fc = fluid.dygraph.Linear(input_dim=hidden_size, + output_dim=output_size, + bias_attr=False, + param_attr=init_weight(0.6)) + self.k_fc = fluid.dygraph.Linear(input_dim=hidden_size, + output_dim=output_size, + bias_attr=False, + param_attr=init_weight(0.5)) + self.v_fc = fluid.dygraph.Linear(input_dim=hidden_size, + output_dim=output_size, + bias_attr=False, + param_attr=init_weight(0.2)) def forward(self, input, cache=None): input = fluid.dygraph.to_variable(input) @@ -70,6 +69,7 @@ class SubNetWithDict(fluid.dygraph.Layer): class MainNetWithDict(fluid.dygraph.Layer): + def __init__(self, batch_size=64, hidden_size=16, output_size=16): super(MainNetWithDict, self).__init__() self.batch_size = batch_size @@ -81,11 +81,13 @@ class MainNetWithDict(fluid.dygraph.Layer): def forward(self, input, max_len=4): input = fluid.dygraph.to_variable(input) cache = { - "k": fluid.layers.fill_constant( + "k": + fluid.layers.fill_constant( shape=[self.batch_size, self.output_size], dtype='float32', value=0), - "v": fluid.layers.fill_constant( + "v": + fluid.layers.fill_constant( shape=[self.batch_size, self.output_size], dtype='float32', value=0), @@ -166,10 +168,11 @@ def test_dic_pop_2(x): class TestDictPop(unittest.TestCase): + def setUp(self): self.input = np.random.random((3)).astype('int32') - self.place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() self._set_test_func() def _set_test_func(self): @@ -192,18 +195,19 @@ class TestDictPop(unittest.TestCase): def test_transformed_result(self): dygraph_res = self._run_dygraph() static_res = self._run_static() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph result is {}\nstatic result is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph result is {}\nstatic result is {}'.format( + dygraph_res, static_res)) class TestDictPop2(TestDictPop): + def _set_test_func(self): self.dygraph_func = test_dic_pop_2 class NetWithDictPop(paddle.nn.Layer): + def __init__(self): super(NetWithDictPop, self).__init__() @@ -220,6 +224,7 @@ class NetWithDictPop(paddle.nn.Layer): class TestDictPop3(TestNetWithDict): + def setUp(self): self.x = np.array([2, 2]).astype('float32') @@ -235,14 +240,15 @@ class TestDictPop3(TestNetWithDict): dygraph_result = self._run_dygraph() static_result = self._run_static() - self.assertTrue( - (dygraph_result == static_result).all(), - msg="dygraph result: {}\nstatic result: {}".format(dygraph_result, - static_result)) + self.assertTrue((dygraph_result == static_result).all(), + msg="dygraph result: {}\nstatic result: {}".format( + dygraph_result, static_result)) class TestDictCmpInFor(unittest.TestCase): + def test_with_for(self): + def func(): pos = [1, 3] neg = [-1, -3] @@ -259,6 +265,7 @@ class TestDictCmpInFor(unittest.TestCase): self.assertEqual(paddle.jit.to_static(func)()['minus'], 8) def test_with_for_enumerate(self): + def func(): pos = [1, 3] neg = [-1, -3] diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py index 7383c834ba9..d5c83235747 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py @@ -26,6 +26,7 @@ def drop_path(x, training=False): class DropPath(paddle.nn.Layer): + def __init__(self): super(DropPath, self).__init__() @@ -35,6 +36,7 @@ class DropPath(paddle.nn.Layer): class TestTrainEval(unittest.TestCase): + def setUp(self): self.model = DropPath() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py index 146608cb07a..555e71ce9a0 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py @@ -25,10 +25,13 @@ SEED = 2020 class Pool2D(fluid.dygraph.Layer): + def __init__(self): super(Pool2D, self).__init__() - self.pool2d = fluid.dygraph.Pool2D( - pool_size=2, pool_type='avg', pool_stride=1, global_pooling=False) + self.pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='avg', + pool_stride=1, + global_pooling=False) @declarative def forward(self, x): @@ -41,6 +44,7 @@ class Pool2D(fluid.dygraph.Layer): class Linear(fluid.dygraph.Layer): + def __init__(self, input_dim=10, output_dim=5): super(Linear, self).__init__() self.fc = fluid.dygraph.Linear( @@ -60,6 +64,7 @@ class Linear(fluid.dygraph.Layer): class TestPool2D(unittest.TestCase): + def setUp(self): self.dygraph_class = Pool2D self.data = np.random.random((1, 2, 4, 4)).astype('float32') @@ -87,13 +92,13 @@ class TestPool2D(unittest.TestCase): dygraph_res = self.train_dygraph() static_res = self.train_static() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph_res is {}\n static_res is \n{}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph_res is {}\n static_res is \n{}'.format( + dygraph_res, static_res)) class TestLinear(TestPool2D): + def setUp(self): self.dygraph_class = Linear self.data = np.random.random((4, 10)).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py index 337e9cd7202..4c69849ccbd 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py @@ -37,7 +37,7 @@ def for_in_range(x): return z -# 1. for iter list +# 1. for iter list @paddle.jit.to_static def for_iter_list(x_array): z = fluid.layers.fill_constant([1], 'int32', 0) @@ -292,6 +292,7 @@ def for_tuple_as_enumerate_value(x_array): # 20. test for function in a class class ForwardContainsForLayer(paddle.nn.Layer): + def __init__(self): super(ForwardContainsForLayer, self).__init__() self.high = 5 @@ -307,7 +308,7 @@ class ForwardContainsForLayer(paddle.nn.Layer): return z -# 21. for original list +# 21. for original list @paddle.jit.to_static def for_original_list(): z = fluid.layers.fill_constant([1], 'int32', 0) @@ -327,7 +328,8 @@ def for_original_tuple(): # 23. for zip error @paddle.jit.to_static( - input_spec=[InputSpec(shape=[None, 10]), InputSpec(shape=[None, 10])]) + input_spec=[InputSpec(shape=[None, 10]), + InputSpec(shape=[None, 10])]) def for_zip_error(x, y): for i, j in zip(x, y): a = i + j @@ -336,7 +338,8 @@ def for_zip_error(x, y): # 24. for zip @paddle.jit.to_static( - input_spec=[InputSpec(shape=[2, 10]), InputSpec(shape=[2, 10])]) + input_spec=[InputSpec(shape=[2, 10]), + InputSpec(shape=[2, 10])]) def for_zip(x, y): for i, j in zip(x, y): a = i + j @@ -344,9 +347,10 @@ def for_zip(x, y): class TestTransformBase(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.set_input() self.set_test_func() @@ -370,6 +374,7 @@ class TestTransformBase(unittest.TestCase): class TestTransform(TestTransformBase): + def transformed_result_compare(self): dy_outs = self.get_dygraph_output() if not isinstance(dy_outs, (tuple, list)): @@ -384,6 +389,7 @@ class TestTransform(TestTransformBase): class TestTransformForOriginalList(TestTransform): + def _run(self, to_static): program_translator.enable(to_static) with fluid.dygraph.guard(): @@ -391,6 +397,7 @@ class TestTransformForOriginalList(TestTransform): class TestTransformError(TestTransformBase): + def transformed_error(self, etype): with self.assertRaises(etype): dy_out = self.get_dygraph_output() @@ -398,6 +405,7 @@ class TestTransformError(TestTransformBase): class TestForInRange(TestTransform): + def set_input(self): self.input = np.array([5]) @@ -409,6 +417,7 @@ class TestForInRange(TestTransform): class TestForIterList(TestTransform): + def set_test_func(self): self.dygraph_func = for_iter_list @@ -417,16 +426,19 @@ class TestForIterList(TestTransform): class TestForEnumerateSimple(TestForIterList): + def set_test_func(self): self.dygraph_func = for_enumerate_list class TestForInRangeWithBreak(TestForInRange): + def set_test_func(self): self.dygraph_func = for_in_range_with_break class TestForIterVarNumpy(TestTransform): + def set_input(self): self.input = np.array([1, 2, 3, 4, 5]) @@ -438,86 +450,103 @@ class TestForIterVarNumpy(TestTransform): class TestForEnumerateVarNumpy(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_numpy class TestForEnumerateVarNumpyWithStart(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_numpy_with_start class TestForEnumerateVarNumpyWithBreak(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_numpy_with_break class TestForEnumerateVarNumpyWithContinue(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_numpy_with_continue class TestForEnumerateVarNumpyWithStartAndBreak(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_numpy_with_start_break class TestForEnumerateVarNumpyWithStartAndContinue(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_numpy_with_start_continue class TestForIterVar(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_iter_var class TestForIterVarIdx(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_iter_var_idx class TestForEnumerateVar(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var class TestForEnumerateVarWithNestedRange(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_enumerate_var_with_nested_range class TestForIterVarList(TestForInRange): + def set_test_func(self): self.dygraph_func = for_iter_var_list class TestForEnumerateVarList(TestForInRange): + def set_test_func(self): self.dygraph_func = for_enumerate_var_list class TestForTupleAsIterVar(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_tuple_as_iter_var class TestForTupleAsEnumerateIter(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_tuple_as_enumerate_iter class TestForTupleAsEnumerateValue(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = for_tuple_as_enumerate_value class TestForwardContainsForLayer(TestForIterVarNumpy): + def set_test_func(self): self.dygraph_func = ForwardContainsForLayer() class TestForOriginalList(TestTransformForOriginalList): + def set_test_func(self): self.dygraph_func = for_original_list @@ -526,6 +555,7 @@ class TestForOriginalList(TestTransformForOriginalList): class TestForOriginalTuple(TestTransformForOriginalList): + def set_test_func(self): self.dygraph_func = for_original_tuple @@ -534,6 +564,7 @@ class TestForOriginalTuple(TestTransformForOriginalList): class TestForZip(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py index 4f7fa65ee9c..33b50af7c6d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py @@ -46,6 +46,7 @@ def decorated_call_decorated(x): class DoubleDecorated(object): + @classmethod @declarative def double_decorated_func1(self, x): @@ -58,6 +59,7 @@ class DoubleDecorated(object): class TestFullNameDecorator(unittest.TestCase): + def test_run_success(self): x = np.ones([1, 2]).astype("float32") answer = np.zeros([1, 2]).astype("float32") @@ -74,6 +76,7 @@ class TestFullNameDecorator(unittest.TestCase): class TestImportProgramTranslator(unittest.TestCase): + def test_diff_pkg_same_cls(self): dygraph_prog_trans = fluid.dygraph.ProgramTranslator() dy_to_stat_prog_trans = fluid.dygraph.dygraph_to_static.ProgramTranslator( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py index c242bb34626..9fdb6e7c6d3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestFunctionSpec(unittest.TestCase): + def test_constructor(self): foo_spec = FunctionSpec(foo_func) args_name = foo_spec.args_name @@ -50,10 +51,11 @@ class TestFunctionSpec(unittest.TestCase): self.assertTrue(len(kwargs) == 0) # case 2: foo(a=10, b=20, d=4) - args, kwargs = foo_spec.unified_args_and_kwargs( - [], {'a': 10, - 'b': 20, - 'd': 4}) + args, kwargs = foo_spec.unified_args_and_kwargs([], { + 'a': 10, + 'b': 20, + 'd': 4 + }) self.assertTupleEqual(args, (10, 20, 1, 4)) self.assertTrue(len(kwargs) == 0) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py index b5160e210c1..f7eccf1f9e7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py @@ -22,6 +22,7 @@ import tempfile class GradLayer(paddle.nn.Layer): + def __init__(self): super(GradLayer, self).__init__() @@ -34,6 +35,7 @@ class GradLayer(paddle.nn.Layer): class GradLinearLayer(paddle.nn.Layer): + def __init__(self): super(GradLinearLayer, self).__init__() self.linear = paddle.nn.Linear(5, 5, bias_attr=False) @@ -45,12 +47,15 @@ class GradLinearLayer(paddle.nn.Layer): for i in range(10): tmp = self.linear(tmp) out = tmp - dx = paddle.grad( - [out], [x], None, create_graph=True, allow_unused=False)[0] + dx = paddle.grad([out], [x], + None, + create_graph=True, + allow_unused=False)[0] return dx class NoGradLinearLayer(paddle.nn.Layer): + def __init__(self): super(NoGradLinearLayer, self).__init__() self.linear = paddle.nn.Linear(5, 5, bias_attr=False) @@ -67,6 +72,7 @@ class NoGradLinearLayer(paddle.nn.Layer): class TestGrad(unittest.TestCase): + def setUp(self): self.func = GradLayer() self.x = paddle.ones(shape=[10, 2, 5], dtype='float32') @@ -86,6 +92,7 @@ class TestGrad(unittest.TestCase): class TestGradLinear(TestGrad): + def setUp(self): self.func = GradLinearLayer() self.x = paddle.ones(shape=[10, 2, 5], dtype='float32') @@ -102,8 +109,7 @@ class TestGradLinear(TestGrad): def test_save_infer_program(self): input_spec = [ - paddle.static.InputSpec( - shape=[10, 2, 5], dtype='float32') + paddle.static.InputSpec(shape=[10, 2, 5], dtype='float32') ] paddle.jit.save(self.func, self.infer_model_path, input_spec=input_spec) load_func = paddle.jit.load(self.infer_model_path) @@ -134,6 +140,7 @@ class TestGradLinear(TestGrad): class TestNoGradLinear(TestGradLinear): + def setUp(self): self.func = NoGradLinearLayer() self.x = paddle.ones(shape=[10, 2, 5], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py index ea2964d4c8b..574f65ffeaa 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py @@ -27,6 +27,7 @@ paddle.seed(2020) class GridGenerator(nn.Layer): + def __init__(self, in_channels, num_fiducial): super(GridGenerator, self).__init__() self.eps = 1e-6 @@ -35,13 +36,14 @@ class GridGenerator(nn.Layer): initializer = nn.initializer.Constant(value=0.0) param_attr = ParamAttr(learning_rate=0.0, initializer=initializer) bias_attr = ParamAttr(learning_rate=0.0, initializer=initializer) - self.fc = nn.Linear( - in_channels, 6, weight_attr=param_attr, bias_attr=bias_attr) + self.fc = nn.Linear(in_channels, + 6, + weight_attr=param_attr, + bias_attr=bias_attr) @paddle.jit.to_static(input_spec=[ - paddle.static.InputSpec( - shape=[None, 3, 32, 100], dtype='float32'), paddle.static.InputSpec( - shape=[32, 100], dtype='float32') + paddle.static.InputSpec(shape=[None, 3, 32, 100], dtype='float32'), + paddle.static.InputSpec(shape=[32, 100], dtype='float32') ]) def forward(self, batch_C_prime, I_r_size): """ @@ -91,17 +93,16 @@ class GridGenerator(nn.Layer): hat_C[i, j] = r hat_C[j, i] = r hat_C = (hat_C**2) * paddle.log(hat_C) - delta_C = paddle.concat( - [ - paddle.concat( - [paddle.ones((F, 1)), C, hat_C], axis=1), - paddle.concat( - [paddle.zeros((2, 3)), paddle.transpose( - C, perm=[1, 0])], - axis=1), paddle.concat( - [paddle.zeros((1, 3)), paddle.ones((1, F))], axis=1) - ], - axis=0) + delta_C = paddle.concat([ + paddle.concat([paddle.ones((F, 1)), C, hat_C], axis=1), + paddle.concat( + [paddle.zeros((2, 3)), + paddle.transpose(C, perm=[1, 0])], + axis=1), + paddle.concat([paddle.zeros( + (1, 3)), paddle.ones((1, F))], axis=1) + ], + axis=0) inv_delta_C = paddle.inverse(delta_C) return inv_delta_C @@ -114,8 +115,8 @@ class GridGenerator(nn.Layer): P_diff = P_tile - C_tile rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False) - rbf = paddle.multiply( - paddle.square(rbf_norm), paddle.log(rbf_norm + eps)) + rbf = paddle.multiply(paddle.square(rbf_norm), + paddle.log(rbf_norm + eps)) P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1) return P_hat @@ -128,6 +129,7 @@ class GridGenerator(nn.Layer): class TestGridGenerator(unittest.TestCase): + def setUp(self): self.x = paddle.uniform(shape=[1, 20, 2], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py index 276aa68e895..5ce163c7685 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py @@ -60,36 +60,42 @@ class TestDygraphIfElse(unittest.TestCase): class TestDygraphIfElse2(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = dyfunc_with_if_else2 class TestDygraphIfElse3(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = dyfunc_with_if_else3 class TestDygraphIfElseWithListGenerator(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = dyfunc_with_if_else_with_list_geneator class TestDygraphNestedIfElse(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = nested_if_else class TestDygraphNestedIfElse2(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = nested_if_else_2 class TestDygraphNestedIfElse3(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = nested_if_else_3 @@ -122,6 +128,7 @@ def dyfunc_ifExp_with_while(x): class TestDygraphIfElse6(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = dyfunc_ifExp_with_while @@ -146,48 +153,56 @@ def dyfunc_ifExp(x): class TestDygraphIfElse7(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = dyfunc_ifExp class TestDygraphIfElseWithAndOr(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_with_and_or class TestDygraphIfElseWithAndOr1(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_with_and_or_1 class TestDygraphIfElseWithAndOr2(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_with_and_or_2 class TestDygraphIfElseWithAndOr3(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_with_and_or_3 class TestDygraphIfElseWithAndOr4(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_with_and_or_4 class TestDygraphIfElseWithClassVar(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_with_class_var class TestDygraphIfTensor(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = if_tensor_case @@ -242,12 +257,14 @@ def call_external_func(x, label=None): class TestAst2FuncWithExternalFunc(TestDygraphIfElse): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.dyfunc = call_external_func class NetWithExternalFunc(fluid.dygraph.Layer): + @declarative def forward(self, x, label=None): if fluid.layers.mean(x) < 0: @@ -268,12 +285,14 @@ def softmax(x): class TestNetWithExternalFunc(TestDygraphIfElseNet): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.Net = NetWithExternalFunc class DiffModeNet1(paddle.nn.Layer): + def __init__(self, mode): super(DiffModeNet1, self).__init__() self.mode = mode @@ -290,6 +309,7 @@ class DiffModeNet1(paddle.nn.Layer): class DiffModeNet2(paddle.nn.Layer): + def __init__(self, mode): super(DiffModeNet2, self).__init__() self.mode = mode @@ -328,23 +348,28 @@ class TestDiffModeNet(unittest.TestCase): return ret.numpy() def test_train_mode(self): - self.assertTrue((self._run( - mode='train', to_static=True) == self._run( - mode='train', to_static=False)).all()) + self.assertTrue( + (self._run(mode='train', + to_static=True) == self._run(mode='train', + to_static=False)).all()) def test_infer_mode(self): - self.assertTrue((self._run( - mode='infer', to_static=True) == self._run( - mode='infer', to_static=False)).all()) + self.assertTrue( + (self._run(mode='infer', + to_static=True) == self._run(mode='infer', + to_static=False)).all()) class TestDiffModeNet2(TestDiffModeNet): + def init_net(self): self.Net = DiffModeNet2 class TestNewVarCreateInOneBranch(unittest.TestCase): + def test_var_used_in_another_for(self): + def case_func(training): # targets and targets_list is dynamically defined by training if training: @@ -367,6 +392,7 @@ class TestNewVarCreateInOneBranch(unittest.TestCase): class TestDy2StIfElseRetInt1(unittest.TestCase): + def setUp(self): self.x = np.random.random([5]).astype('float32') self.dyfunc = dyfunc_ifelse_ret_int1 @@ -385,6 +411,7 @@ class TestDy2StIfElseRetInt1(unittest.TestCase): class TestDy2StIfElseRetInt2(TestDy2StIfElseRetInt1): + def setUp(self): self.x = np.random.random([5]).astype('float32') self.dyfunc = dyfunc_ifelse_ret_int2 @@ -396,6 +423,7 @@ class TestDy2StIfElseRetInt2(TestDy2StIfElseRetInt1): class TestDy2StIfElseRetInt3(TestDy2StIfElseRetInt1): + def setUp(self): self.x = np.random.random([5]).astype('float32') self.dyfunc = dyfunc_ifelse_ret_int3 @@ -406,6 +434,7 @@ class TestDy2StIfElseRetInt3(TestDy2StIfElseRetInt1): class TestDy2StIfElseRetInt4(TestDy2StIfElseRetInt1): + def setUp(self): self.x = np.random.random([5]).astype('float32') self.dyfunc = dyfunc_ifelse_ret_int4 @@ -415,20 +444,22 @@ class TestDy2StIfElseRetInt4(TestDy2StIfElseRetInt1): with self.assertRaises(TypeError): static_func = paddle.jit.to_static(self.dyfunc) out = static_func(self.x) - # Why need set `_in_declarative_mode_` here? - # In Dy2St we use `with _switch_declarative_mode_guard_()` to indicate - # that the code block is under @to_static, but in this UT - # an exception is thrown during Dy2St, making the `_in_declarative_mode_` + # Why need set `_in_declarative_mode_` here? + # In Dy2St we use `with _switch_declarative_mode_guard_()` to indicate + # that the code block is under @to_static, but in this UT + # an exception is thrown during Dy2St, making the `_in_declarative_mode_` # a wrong value. So We need set `_in_declarative_mode_` to False manually. paddle.fluid.dygraph.base._in_declarative_mode_ = False ProgramTranslator().enable(False) class IfElseNet(paddle.nn.Layer): + def __init__(self): super(IfElseNet, self).__init__() - self.param = self.create_parameter( - shape=[3, 2], dtype='float32', is_bias=False) + self.param = self.create_parameter(shape=[3, 2], + dtype='float32', + is_bias=False) @paddle.jit.to_static def forward(self, a, b, c): @@ -444,6 +475,7 @@ class IfElseNet(paddle.nn.Layer): class TestDy2StIfElseBackward(unittest.TestCase): + def test_run_backward(self): a = paddle.randn((4, 3), dtype='float32') a.stop_gradient = False diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse_basic.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse_basic.py index 975797a487b..826063cf673 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse_basic.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse_basic.py @@ -39,9 +39,8 @@ class TestGetNameIds(unittest.TestCase): source = textwrap.dedent(self.source) root = gast.parse(source) all_name_ids = get_name_ids([root]) - self.assertDictEqual( - self.transfer_dict(self.all_name_ids), - self.transfer_dict(all_name_ids)) + self.assertDictEqual(self.transfer_dict(self.all_name_ids), + self.transfer_dict(all_name_ids)) def transfer_dict(self, name_ids_dict): new_dict = {} @@ -51,6 +50,7 @@ class TestGetNameIds(unittest.TestCase): class TestGetNameIds2(TestGetNameIds): + def setUp(self): self.source = """ def test_fn(x, y): @@ -64,11 +64,14 @@ class TestGetNameIds2(TestGetNameIds): return z """ self.all_name_ids = { - 'x': [ - gast.Param(), gast.Store(), gast.Load(), gast.Load(), - gast.Load() - ], - 'a': [gast.Store(), gast.Load(), gast.Load()], + 'x': + [gast.Param(), + gast.Store(), + gast.Load(), + gast.Load(), + gast.Load()], + 'a': [gast.Store(), gast.Load(), + gast.Load()], 'y': [ gast.Param(), gast.Load(), @@ -87,6 +90,7 @@ class TestGetNameIds2(TestGetNameIds): class TestGetNameIds3(TestGetNameIds): + def setUp(self): self.source = """ def test_fn(x, y): @@ -119,6 +123,7 @@ class TestGetNameIds3(TestGetNameIds): class TestIsControlFlowIf(unittest.TestCase): + def check_false_case(self, code): code = textwrap.dedent(code) node = gast.parse(code) @@ -248,14 +253,14 @@ class TestIsControlFlowIf(unittest.TestCase): var_name_to_type = {"x": {NodeVarType.TENSOR}} self.assertTrue( - is_control_flow_to_transform( - node_test, var_name_to_type=var_name_to_type)) + is_control_flow_to_transform(node_test, + var_name_to_type=var_name_to_type)) # if x is not a Tensor var_name_to_type = {"x": {NodeVarType.NUMPY_NDARRAY}} self.assertFalse( - is_control_flow_to_transform( - node_test, var_name_to_type=var_name_to_type)) + is_control_flow_to_transform(node_test, + var_name_to_type=var_name_to_type)) def test_raise_error(self): node = "a + b" diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py index a838ac6842a..95432b58a33 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py @@ -31,11 +31,13 @@ import paddle.nn as nn class SimpleReturnLayer(nn.Layer): + def forward(self, x): return x class AddAttrLayer(nn.Layer): + def __init__(self): super(AddAttrLayer, self).__init__() self.attr = None @@ -46,6 +48,7 @@ class AddAttrLayer(nn.Layer): class IsInstanceLayer(nn.Layer): + def __init__(self, layer): super(IsInstanceLayer, self).__init__() self.layer = layer @@ -59,6 +62,7 @@ class IsInstanceLayer(nn.Layer): class SequentialLayer(nn.Layer): + def __init__(self, layers): super(SequentialLayer, self).__init__() self.layers = nn.LayerList(layers) @@ -84,6 +88,7 @@ def train(model, to_static): class TestIsinstance(unittest.TestCase): + def test_isinstance_simple_return_layer(self): model = IsInstanceLayer(SimpleReturnLayer()) self._test_model(model) @@ -103,9 +108,8 @@ class TestIsinstance(unittest.TestCase): def _test_model(self, model): st_out = train(model, to_static=True) dy_out = train(model, to_static=False) - self.assertTrue( - np.allclose(dy_out, st_out), - msg="dy_out:\n {}\n st_out:\n{}".format(dy_out, st_out)) + self.assertTrue(np.allclose(dy_out, st_out), + msg="dy_out:\n {}\n st_out:\n{}".format(dy_out, st_out)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index e0a9a3ad2af..ddda462525f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -20,6 +20,7 @@ import unittest import os import tempfile + os.environ["CUDA_VISIBLE_DEVICES"] = "2" import paddle @@ -43,6 +44,7 @@ input_specs = [ class DynamicGRU(fluid.dygraph.Layer): + def __init__(self, size, h_0=None, @@ -55,13 +57,12 @@ class DynamicGRU(fluid.dygraph.Layer): init_size=None): super(DynamicGRU, self).__init__() - self.gru_unit = GRUUnit( - size * 3, - param_attr=param_attr, - bias_attr=bias_attr, - activation=candidate_activation, - gate_activation=gate_activation, - origin_mode=origin_mode) + self.gru_unit = GRUUnit(size * 3, + param_attr=param_attr, + bias_attr=bias_attr, + activation=candidate_activation, + gate_activation=gate_activation, + origin_mode=origin_mode) self.size = size self.h_0 = h_0 @@ -81,13 +82,15 @@ class DynamicGRU(fluid.dygraph.Layer): j = i # input_ = inputs[:, j:j+1, :] # original code - input_ = fluid.layers.slice( - inputs, axes=[1], starts=[j], ends=[j + 1]) - input_ = fluid.layers.reshape( - input_, [-1, input_.shape[2]], inplace=False) + input_ = fluid.layers.slice(inputs, + axes=[1], + starts=[j], + ends=[j + 1]) + input_ = fluid.layers.reshape(input_, [-1, input_.shape[2]], + inplace=False) hidden, reset, gate = self.gru_unit(input_, hidden) - hidden_ = fluid.layers.reshape( - hidden, [-1, 1, hidden.shape[1]], inplace=False) + hidden_ = fluid.layers.reshape(hidden, [-1, 1, hidden.shape[1]], + inplace=False) res.append(hidden_) if self.is_reverse: @@ -97,6 +100,7 @@ class DynamicGRU(fluid.dygraph.Layer): class BiGRU(fluid.dygraph.Layer): + def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): super(BiGRU, self).__init__() @@ -104,8 +108,8 @@ class BiGRU(fluid.dygraph.Layer): input_dim=input_dim, output_dim=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), + initializer=fluid.initializer.Uniform(low=-init_bound, + high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) @@ -113,8 +117,8 @@ class BiGRU(fluid.dygraph.Layer): size=grnn_hidden_dim, h_0=h_0, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), + initializer=fluid.initializer.Uniform(low=-init_bound, + high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) @@ -122,8 +126,8 @@ class BiGRU(fluid.dygraph.Layer): input_dim=input_dim, output_dim=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), + initializer=fluid.initializer.Uniform(low=-init_bound, + high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) @@ -132,8 +136,8 @@ class BiGRU(fluid.dygraph.Layer): is_reverse=True, h_0=h_0, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), + initializer=fluid.initializer.Uniform(low=-init_bound, + high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) @@ -149,6 +153,7 @@ class BiGRU(fluid.dygraph.Layer): class LinearChainCRF(fluid.dygraph.Layer): + def __init__(self, param_attr, size=None, is_test=False, dtype='float32'): super(LinearChainCRF, self).__init__() @@ -191,20 +196,22 @@ class LinearChainCRF(fluid.dygraph.Layer): } if length is not None: this_inputs['Length'] = [length] - self._helper.append_op( - type='linear_chain_crf', - inputs=this_inputs, - outputs={ - "Alpha": [alpha], - "EmissionExps": [emission_exps], - "TransitionExps": transition_exps, - "LogLikelihood": log_likelihood - }, - attrs={"is_test": self._is_test, }) + self._helper.append_op(type='linear_chain_crf', + inputs=this_inputs, + outputs={ + "Alpha": [alpha], + "EmissionExps": [emission_exps], + "TransitionExps": transition_exps, + "LogLikelihood": log_likelihood + }, + attrs={ + "is_test": self._is_test, + }) return log_likelihood class CRFDecoding(fluid.dygraph.Layer): + def __init__(self, param_attr, size=None, is_test=False, dtype='float32'): super(CRFDecoding, self).__init__() @@ -239,16 +246,20 @@ class CRFDecoding(fluid.dygraph.Layer): } if length is not None: this_inputs['Length'] = [length] - self._helper.append_op( - type='crf_decoding', - inputs=this_inputs, - outputs={"ViterbiPath": [viterbi_path]}, - attrs={"is_test": self._is_test, }) + self._helper.append_op(type='crf_decoding', + inputs=this_inputs, + outputs={"ViterbiPath": [viterbi_path]}, + attrs={ + "is_test": self._is_test, + }) return viterbi_path class ChunkEval(fluid.dygraph.Layer): - def __init__(self, num_chunk_types, chunk_scheme, + + def __init__(self, + num_chunk_types, + chunk_scheme, excluded_chunk_types=None): super(ChunkEval, self).__init__() self.num_chunk_types = num_chunk_types @@ -257,10 +268,11 @@ class ChunkEval(fluid.dygraph.Layer): def forward(self, input, label, seq_length=None): if _non_static_mode(): - return _C_ops.chunk_eval( - input, label, seq_length, "num_chunk_types", - self.num_chunk_types, "chunk_scheme", self.chunk_scheme, - "excluded_chunk_types", self.excluded_chunk_types or []) + return _C_ops.chunk_eval(input, label, seq_length, + "num_chunk_types", self.num_chunk_types, + "chunk_scheme", self.chunk_scheme, + "excluded_chunk_types", + self.excluded_chunk_types or []) precision = self._helper.create_variable_for_type_inference( dtype="float32") @@ -279,27 +291,30 @@ class ChunkEval(fluid.dygraph.Layer): if seq_length is not None: this_input["SeqLength"] = [seq_length] - self._helper.append_op( - type='chunk_eval', - inputs=this_input, - outputs={ - "Precision": [precision], - "Recall": [recall], - "F1-Score": [f1_score], - "NumInferChunks": [num_infer_chunks], - "NumLabelChunks": [num_label_chunks], - "NumCorrectChunks": [num_correct_chunks] - }, - attrs={ - "num_chunk_types": self.num_chunk_types, - "chunk_scheme": self.chunk_scheme, - "excluded_chunk_types": self.excluded_chunk_types or [] - }) + self._helper.append_op(type='chunk_eval', + inputs=this_input, + outputs={ + "Precision": [precision], + "Recall": [recall], + "F1-Score": [f1_score], + "NumInferChunks": [num_infer_chunks], + "NumLabelChunks": [num_label_chunks], + "NumCorrectChunks": [num_correct_chunks] + }, + attrs={ + "num_chunk_types": + self.num_chunk_types, + "chunk_scheme": + self.chunk_scheme, + "excluded_chunk_types": + self.excluded_chunk_types or [] + }) return (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) class LexNet(fluid.dygraph.Layer): + def __init__(self, args, length=None): super(LexNet, self).__init__() """ @@ -325,11 +340,11 @@ class LexNet(fluid.dygraph.Layer): self.word_embedding = Embedding( size=[self.vocab_size, self.word_emb_dim], dtype='float32', - param_attr=fluid.ParamAttr( - learning_rate=self.emb_lr, - name="word_emb", - initializer=fluid.initializer.Uniform( - low=-self.init_bound, high=self.init_bound))) + param_attr=fluid.ParamAttr(learning_rate=self.emb_lr, + name="word_emb", + initializer=fluid.initializer.Uniform( + low=-self.init_bound, + high=self.init_bound))) h_0 = np.zeros((args.batch_size, self.grnn_hidden_dim), dtype="float32") h_0 = to_variable(h_0) @@ -340,39 +355,34 @@ class LexNet(fluid.dygraph.Layer): self.bigru_units.append( self.add_sublayer( "bigru_units%d" % i, - BiGRU( - self.grnn_hidden_dim, - self.grnn_hidden_dim, - self.init_bound, - h_0=h_0))) + BiGRU(self.grnn_hidden_dim, + self.grnn_hidden_dim, + self.init_bound, + h_0=h_0))) else: self.bigru_units.append( self.add_sublayer( "bigru_units%d" % i, - BiGRU( - self.grnn_hidden_dim * 2, - self.grnn_hidden_dim, - self.init_bound, - h_0=h_0))) - - self.fc = Linear( - input_dim=self.grnn_hidden_dim * 2, - output_dim=self.num_labels, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-self.init_bound, high=self.init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=1e-4))) - - self.linear_chain_crf = LinearChainCRF( - param_attr=fluid.ParamAttr( - name='linear_chain_crfw', learning_rate=self.crf_lr), - size=self.num_labels) - - self.crf_decoding = CRFDecoding( - param_attr=fluid.ParamAttr( - name='crfw', learning_rate=self.crf_lr), - size=self.num_labels) + BiGRU(self.grnn_hidden_dim * 2, + self.grnn_hidden_dim, + self.init_bound, + h_0=h_0))) + + self.fc = Linear(input_dim=self.grnn_hidden_dim * 2, + output_dim=self.num_labels, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform( + low=-self.init_bound, high=self.init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4))) + + self.linear_chain_crf = LinearChainCRF(param_attr=fluid.ParamAttr( + name='linear_chain_crfw', learning_rate=self.crf_lr), + size=self.num_labels) + + self.crf_decoding = CRFDecoding(param_attr=fluid.ParamAttr( + name='crfw', learning_rate=self.crf_lr), + size=self.num_labels) # share weight self.crf_decoding.weight = self.linear_chain_crf.weight @@ -390,8 +400,9 @@ class LexNet(fluid.dygraph.Layer): emission = self.fc(bigru_output) - crf_cost = self.linear_chain_crf( - input=emission, label=target, length=length) + crf_cost = self.linear_chain_crf(input=emission, + label=target, + length=length) avg_cost = fluid.layers.mean(x=crf_cost) crf_decode = self.crf_decoding(input=emission, length=length) return avg_cost, crf_decode @@ -420,8 +431,8 @@ def get_random_input_data(batch_size, vocab_size, num_labels, max_seq_len=64): cur_len = local_random.randint(3, max_seq_len) word_ids = local_random.randint(0, vocab_size, [cur_len]).astype('int64').tolist() - label_ids = local_random.randint(0, num_labels, - [cur_len]).astype('int64').tolist() + label_ids = local_random.randint( + 0, num_labels, [cur_len]).astype('int64').tolist() batch.append((word_ids, label_ids)) init_lens.append(cur_len) if len(batch) == batch_size: @@ -446,8 +457,9 @@ def get_random_input_data(batch_size, vocab_size, num_labels, max_seq_len=64): def create_dataloader(reader, place): - data_loader = fluid.io.DataLoader.from_generator( - capacity=16, use_double_buffer=True, iterable=True) + data_loader = fluid.io.DataLoader.from_generator(capacity=16, + use_double_buffer=True, + iterable=True) data_loader.set_sample_list_generator(reader, places=place) @@ -455,10 +467,11 @@ def create_dataloader(reader, place): class TestLACModel(unittest.TestCase): + def setUp(self): self.args = Args() - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.temp_dir = tempfile.TemporaryDirectory() self.model_save_dir = os.path.join(self.temp_dir.name, 'inference') self.model_save_prefix = os.path.join(self.model_save_dir, 'lac') @@ -468,8 +481,8 @@ class TestLACModel(unittest.TestCase): def train(self, args, to_static): program_translator.enable(to_static) - place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) @@ -482,8 +495,8 @@ class TestLACModel(unittest.TestCase): optimizer = fluid.optimizer.AdamOptimizer( learning_rate=args.base_learning_rate, parameter_list=model.parameters()) - chunk_eval = ChunkEval( - int(math.ceil((args.num_labels - 1) / 2.0)), "IOB") + chunk_eval = ChunkEval(int(math.ceil((args.num_labels - 1) / 2.0)), + "IOB") step = 0 chunk_evaluator = fluid.metrics.ChunkEvaluator() @@ -505,8 +518,10 @@ class TestLACModel(unittest.TestCase): if step % args.print_steps == 0: (precision, recall, f1_score, num_infer_chunks, - num_label_chunks, num_correct_chunks) = chunk_eval( - input=crf_decode, label=targets, seq_length=length) + num_label_chunks, + num_correct_chunks) = chunk_eval(input=crf_decode, + label=targets, + seq_length=length) outputs = [avg_cost, precision, recall, f1_score] avg_cost, precision, recall, f1_score = [ np.mean(x.numpy()) for x in outputs @@ -534,28 +549,27 @@ class TestLACModel(unittest.TestCase): def test_train(self): st_out = self.train(self.args, to_static=True) dy_out = self.train(self.args, to_static=False) - self.assertTrue( - np.allclose(dy_out, st_out), - msg="dygraph output:\n{},\nstatic output:\n {}.".format(dy_out, - st_out)) + self.assertTrue(np.allclose(dy_out, st_out), + msg="dygraph output:\n{},\nstatic output:\n {}.".format( + dy_out, st_out)) # Prediction needs trained models, so put `test_predict` at last of `test_train` # self.verify_predict() def verify_predict(self): - reader = get_random_input_data( - self.args.batch_size, self.args.vocab_size, self.args.num_labels) + reader = get_random_input_data(self.args.batch_size, + self.args.vocab_size, + self.args.num_labels) for batch in reader(): batch = [np.vstack(var) for var in zip(*batch)] dy_pre = self.predict_dygraph(batch) st_pre = self.predict_static(batch) dy_jit_pre = self.predict_dygraph_jit(batch) - self.assertTrue( - np.allclose(dy_pre, st_pre), - msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) - self.assertTrue( - np.allclose(dy_jit_pre, st_pre), - msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, - st_pre)) + self.assertTrue(np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format( + dy_pre, st_pre)) + self.assertTrue(np.allclose(dy_jit_pre, st_pre), + msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format( + dy_jit_pre, st_pre)) def predict_dygraph(self, batch): words, targets, length = batch @@ -567,8 +581,8 @@ class TestLACModel(unittest.TestCase): model.set_dict(model_dict) model.eval() - _, pred_res = model( - to_variable(words), to_variable(targets), to_variable(length)) + _, pred_res = model(to_variable(words), to_variable(targets), + to_variable(length)) return pred_res.numpy() @@ -580,19 +594,19 @@ class TestLACModel(unittest.TestCase): paddle.enable_static() exe = fluid.Executor(self.place) # load inference model - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - self.model_save_dir, - executor=exe, - model_filename=self.model_filename, - params_filename=self.params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(self.model_save_dir, + executor=exe, + model_filename=self.model_filename, + params_filename=self.params_filename) words, targets, length = batch - pred_res = exe.run( - inference_program, - feed={feed_target_names[0]: words, - feed_target_names[1]: length}, - fetch_list=fetch_targets) + pred_res = exe.run(inference_program, + feed={ + feed_target_names[0]: words, + feed_target_names[1]: length + }, + fetch_list=fetch_targets) return pred_res[0] def predict_dygraph_jit(self, batch): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py index 1ab10461fd2..7eccbedf4d2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py @@ -80,11 +80,12 @@ def call_lambda_with_ifExpr2(x): class TestLambda(unittest.TestCase): + def setUp(self): self.x = np.random.random([10, 16]).astype('float32') self.x = np.array([1, 3]).astype('float32') - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_func() def init_func(self): @@ -108,8 +109,8 @@ class TestLambda(unittest.TestCase): def test_ast_to_func(self): for func in self.dyfuncs: - self.assertTrue((self.run_dygraph(func) == self.run_static(func) - ).all()) + self.assertTrue( + (self.run_dygraph(func) == self.run_static(func)).all()) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py index 357d9611053..b06b01a46fe 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,6 +29,7 @@ def forward_pre_hook1(layer, input): class SimpleNet(paddle.nn.Layer): + def __init__(self, ): super(SimpleNet, self).__init__() self.fc1 = paddle.nn.Linear(10, 10) @@ -52,6 +53,7 @@ class SimpleNet(paddle.nn.Layer): class TestNestLayerHook(unittest.TestCase): + def setUp(self): paddle.seed(2022) self.x = paddle.randn([4, 10]) @@ -83,12 +85,12 @@ class TestNestLayerHook(unittest.TestCase): st_out = self.train_net(to_static=True) load_out = self.load_train() print(st_out, dy_out, load_out) - self.assertTrue( - np.allclose(st_out, dy_out), - msg='dygraph_res is {}\nstatic_res is {}'.format(dy_out, st_out)) - self.assertTrue( - np.allclose(st_out, load_out), - msg='load_out is {}\nstatic_res is {}'.format(load_out, st_out)) + self.assertTrue(np.allclose(st_out, dy_out), + msg='dygraph_res is {}\nstatic_res is {}'.format( + dy_out, st_out)) + self.assertTrue(np.allclose(st_out, load_out), + msg='load_out is {}\nstatic_res is {}'.format( + load_out, st_out)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py index 00a1b018376..28f79b57b6b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py @@ -42,9 +42,10 @@ def len_with_lod_tensor_array(x): class TestLen(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.x_data = np.random.random([10, 16]).astype('float32') self.init_func() @@ -69,6 +70,7 @@ class TestLen(unittest.TestCase): class TestLenWithTensorArray(TestLen): + def init_func(self): self.func = len_with_lod_tensor_array @@ -78,11 +80,10 @@ class TestLenWithTensorArray(TestLen): def len_with_selected_rows(place): block = fluid.default_main_program().global_block() # create selected_rows variable - var = block.create_var( - name="X", - dtype="float32", - persistable=True, - type=fluid.core.VarDesc.VarType.SELECTED_ROWS) + var = block.create_var(name="X", + dtype="float32", + persistable=True, + type=fluid.core.VarDesc.VarType.SELECTED_ROWS) # y is Variable(SelectedRows) y = fluid.layers.merge_selected_rows(var) y_len = convert_call(len)(y) @@ -108,9 +109,10 @@ def len_with_selected_rows(place): class TestLenWithSelectedRows(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() def test_len(self): selected_rows_var_len, var_tensor_len = len_with_selected_rows( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py index ba1f5ed2b3e..55dff1c92bb 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py @@ -44,8 +44,7 @@ def test_list_append_in_if(x): a.append(x) else: a.append( - fluid.layers.fill_constant( - shape=[1, 2], value=9, dtype="int64")) + fluid.layers.fill_constant(shape=[1, 2], value=9, dtype="int64")) # TODO(Aurelius84): Currently, run_program_op doesn't support output LoDTensorArray. return a[0] @@ -101,8 +100,9 @@ def test_list_append_in_for_loop_with_concat(x, iter_num): def test_list_append_in_while_loop(x, iter_num): x = fluid.dygraph.to_variable(x) - iter_num = fluid.layers.fill_constant( - shape=[1], value=iter_num, dtype="int32") + iter_num = fluid.layers.fill_constant(shape=[1], + value=iter_num, + dtype="int32") a = [] i = 0 while i < iter_num: @@ -113,8 +113,9 @@ def test_list_append_in_while_loop(x, iter_num): def test_list_append_in_while_loop_with_stack(x, iter_num): x = fluid.dygraph.to_variable(x) - iter_num = fluid.layers.fill_constant( - shape=[1], value=iter_num, dtype="int32") + iter_num = fluid.layers.fill_constant(shape=[1], + value=iter_num, + dtype="int32") a = [] i = 0 while i < iter_num.numpy()[0]: @@ -182,8 +183,9 @@ def test_list_pop_in_for_loop(x, iter_num): def test_list_pop_in_while_loop(x, iter_num): x = fluid.dygraph.to_variable(x) - iter_num = fluid.layers.fill_constant( - shape=[1], value=iter_num, dtype="int32") + iter_num = fluid.layers.fill_constant(shape=[1], + value=iter_num, + dtype="int32") a = [] b = [x] b.append(x) @@ -200,9 +202,10 @@ def test_list_pop_in_while_loop(x, iter_num): class TestListWithoutControlFlow(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_data() self.init_dygraph_func() @@ -249,16 +252,18 @@ class TestListWithoutControlFlow(unittest.TestCase): for stat_res, dy_res in zip(static_res_list, dygraph_res_list): self.assertTrue( np.allclose(stat_res, dy_res), - msg='dygraph_res is {}\nstatic_res is {}'.format(stat_res, - dy_res)) + msg='dygraph_res is {}\nstatic_res is {}'.format( + stat_res, dy_res)) class TestListInIf(TestListWithoutControlFlow): + def init_dygraph_func(self): self.all_dygraph_funcs = [test_list_append_in_if, test_list_pop_in_if] class TestListInWhileLoop(TestListWithoutControlFlow): + def init_data(self): self.input = np.random.random((3)).astype('int32') self.iter_num = 3 @@ -279,11 +284,13 @@ class TestListInWhileLoop(TestListWithoutControlFlow): class TestListInWhileLoopWithStack(TestListInWhileLoop): + def init_dygraph_func(self): self.all_dygraph_funcs = [test_list_append_in_while_loop_with_stack] class TestListInForLoop(TestListInWhileLoop): + def init_dygraph_func(self): self.all_dygraph_funcs = [ test_list_append_in_for_loop, test_list_pop_in_for_loop @@ -291,11 +298,15 @@ class TestListInForLoop(TestListInWhileLoop): class TestListInForLoopWithConcat(TestListInWhileLoopWithStack): + def init_dygraph_func(self): - self.all_dygraph_funcs = [test_list_append_in_for_loop_with_concat, ] + self.all_dygraph_funcs = [ + test_list_append_in_for_loop_with_concat, + ] class TestListInForLoopWithSubscript(TestListWithoutControlFlow): + def init_dygraph_func(self): self.all_dygraph_funcs = [ test_list_append_in_for_subscript, @@ -307,6 +318,7 @@ class TestListInForLoopWithSubscript(TestListWithoutControlFlow): class ListWithCondNet(paddle.nn.Layer): + def __init__(self): super(ListWithCondNet, self).__init__() @@ -330,6 +342,7 @@ class ListWithCondNet(paddle.nn.Layer): class TestListWithCondGradInferVarType(unittest.TestCase): + def test_to_static(self): net = ListWithCondNet() x = paddle.to_tensor([2, 3, 4], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py index 385b7ce204a..e1ea7d99d82 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py @@ -28,6 +28,7 @@ from unittest import mock class TestLoggingUtils(unittest.TestCase): + def setUp(self): self.verbosity_level = 1 self.code_level = 3 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py index b11e9441c8c..0a510eb81b1 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py @@ -172,10 +172,11 @@ def test_shape_not_equal(x): class TestLogicalBase(unittest.TestCase): + def setUp(self): self.input = np.array([3]).astype('int32') - self.place = paddle.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else paddle.CPUPlace() self._set_test_func() def _set_test_func(self): @@ -196,69 +197,77 @@ class TestLogicalBase(unittest.TestCase): class TestLogicalNot(TestLogicalBase): + def _set_test_func(self): self.dygraph_func = test_logical_not def test_transformed_result(self): dygraph_res = self._run_dygraph() static_res = self._run_static() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph result is {}\nstatic_result is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph result is {}\nstatic_result is {}'.format( + dygraph_res, static_res)) class TestLogicalNot2(TestLogicalBase): + def _set_test_func(self): self.dygraph_func = test_logical_not_2 def test_transformed_result(self): dygraph_res = self._run_dygraph() static_res = self._run_static() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph result is {}\nstatic_result is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph result is {}\nstatic_result is {}'.format( + dygraph_res, static_res)) class TestLogicalAnd(TestLogicalNot): + def _set_test_func(self): self.dygraph_func = test_logical_and class TestLogicalAnd2(TestLogicalNot): + def _set_test_func(self): self.dygraph_func = test_logical_and_2 class TestLogicalOr(TestLogicalNot): + def _set_test_func(self): self.dygraph_func = test_logical_or class TestLogicalOr2(TestLogicalNot): + def _set_test_func(self): self.dygraph_func = test_logical_or_2 class TestLogicalNotAndOr(TestLogicalNot): + def _set_test_func(self): self.dygraph_func = test_logical_not_and_or class TestShapeEqual(TestLogicalNot): + def _set_test_func(self): self.input = np.ones([1, 2, 3]).astype('float32') self.dygraph_func = test_shape_equal class TestShapeNotEqual(TestLogicalNot): + def _set_test_func(self): self.input = np.ones([1, 2, 3]).astype('float32') self.dygraph_func = test_shape_not_equal class TestCmpopNodeToStr(unittest.TestCase): + def test_exception(self): with self.assertRaises(KeyError): cmpop_node_to_str(gast.Or()) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py index 93eb1247888..56e9cabbef4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py @@ -150,7 +150,9 @@ def while_loop_bool_op2(x): def while_loop_class_var(x): + class Foo(object): + def __init__(self): self.a = 3 self.b = 4 @@ -176,7 +178,9 @@ def loop_var_contains_property(x): def for_loop_class_var(max_len): + class Foo(object): + def __init__(self): self.a = 3 self.b = 4 @@ -185,8 +189,9 @@ def for_loop_class_var(max_len): foo = Foo() # Use `to_variable` so that static analysis can analyze the type of X is Tensor - max_len = fluid.layers.fill_constant( - shape=[1], value=max_len, dtype="int32") + max_len = fluid.layers.fill_constant(shape=[1], + value=max_len, + dtype="int32") for i in range(max_len): foo.b = fluid.layers.zeros(shape=[1], dtype='float32') @@ -225,13 +230,16 @@ def for_loop_dufunc_with_listcomp(array): class TestNameVisitor(unittest.TestCase): + def setUp(self): self.loop_funcs = [ while_loop_dyfunc, for_loop_dyfunc, while_loop_dyfunc_with_none, for_loop_dufunc_with_listcomp ] self.loop_var_names = [ - set(["i", "x"]), set(["i", "ret", "max_len"]), set(["i", "x"]), + set(["i", "x"]), + set(["i", "ret", "max_len"]), + set(["i", "x"]), set(["j", "array", "res", "x"]) ] self.create_var_names = [set(), set(["ret"]), set(), set(["res", "x"])] @@ -258,7 +266,9 @@ class TestNameVisitor(unittest.TestCase): name_visitor = NameVisitor(gast_root) self.loop_var_names = [ - set(["j", "two"]), set(["i", "three", "b"]), set(["i", "j"]) + set(["j", "two"]), + set(["i", "three", "b"]), + set(["i", "j"]) ] self.create_var_names = [set(), set(["b"]), set()] @@ -275,15 +285,17 @@ class TestNameVisitor(unittest.TestCase): self.assertEqual( create_var_names, self.create_var_names[i], - msg="i = {}\ncreate_var_names : {}, \nexpected create_var_names : {}". - format(i, create_var_names, self.create_var_names[i])) + msg= + "i = {}\ncreate_var_names : {}, \nexpected create_var_names : {}" + .format(i, create_var_names, self.create_var_names[i])) i += 1 class TestTransformWhileLoop(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.x = np.zeros(shape=(1), dtype=np.int32) self._init_dyfunc() @@ -316,49 +328,58 @@ class TestTransformWhileLoop(unittest.TestCase): class TestTransformWhileLoopWithoutTensor(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = while_loop_dyfunc_without_tensor class TestTransformWhileLoopWithConflicVar(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = while_loop_dyfun_with_conflict_var class TestTransformWhileLoopWithNone(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = while_loop_dyfunc_with_none class TestForBreakSingleReturn(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = for_break_single_return class TestWhileLoopBoolOp(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = while_loop_bool_op class TestWhileLoopBoolOp2(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = while_loop_bool_op2 class TestWhileLoopClassVar(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = while_loop_class_var class TestLoopVarContainsProperty(TestTransformWhileLoop): + def _init_dyfunc(self): self.dyfunc = loop_var_contains_property class TestTransformForLoop(unittest.TestCase): + def setUp(self): - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.len = 100 self._init_dyfunc() @@ -384,31 +405,37 @@ class TestTransformForLoop(unittest.TestCase): class TestTransformForLoop2(TestTransformForLoop): + def _init_dyfunc(self): self.dyfunc = for_loop_dyfunc2 class TestTransformForLoop3(TestTransformForLoop): + def _init_dyfunc(self): self.dyfunc = for_loop_dyfunc3 class TestTransformForLoop4(TestTransformForLoop): + def _init_dyfunc(self): self.dyfunc = for_loop_dyfunc4 class TestClassVarInForLoop(TestTransformForLoop): + def _init_dyfunc(self): self.dyfunc = for_loop_class_var class TestVarCreateInForLoop(TestTransformForLoop): + def _init_dyfunc(self): self.dyfunc = var_create_in_for_loop class TestErrorInForLoop(TestTransformForLoop): + def _init_dyfunc(self): self.dyfunc = for_loop_dyfunc_not_support diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py index 8d54e199800..60175851561 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py @@ -21,10 +21,13 @@ import tempfile class LSTMLayer(nn.Layer): + def __init__(self, in_channels, hidden_size): super(LSTMLayer, self).__init__() - self.cell = nn.LSTM( - in_channels, hidden_size, direction='bidirectional', num_layers=2) + self.cell = nn.LSTM(in_channels, + hidden_size, + direction='bidirectional', + num_layers=2) def forward(self, x): x, _ = self.cell(x) @@ -32,6 +35,7 @@ class LSTMLayer(nn.Layer): class Net(nn.Layer): + def __init__(self, in_channels, hidden_size): super(Net, self).__init__() self.lstm = LSTMLayer(in_channels, hidden_size) @@ -42,6 +46,7 @@ class Net(nn.Layer): class TestLstm(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() @@ -64,10 +69,9 @@ class TestLstm(unittest.TestCase): def test_lstm_to_static(self): dygraph_out = self.run_lstm(to_static=False) static_out = self.run_lstm(to_static=True) - self.assertTrue( - np.allclose(dygraph_out, static_out), - msg='dygraph_out is {}\n static_out is \n{}'.format(dygraph_out, - static_out)) + self.assertTrue(np.allclose(dygraph_out, static_out), + msg='dygraph_out is {}\n static_out is \n{}'.format( + dygraph_out, static_out)) def test_save_in_eval(self, with_training=True): paddle.jit.ProgramTranslator().enable(True) @@ -94,23 +98,22 @@ class TestLstm(unittest.TestCase): load_net = paddle.jit.load(model_path) static_out = load_net(x) - self.assertTrue( - np.allclose(dygraph_out.numpy(), static_out.numpy()), - msg='dygraph_out is {}\n static_out is \n{}'.format(dygraph_out, - static_out)) + self.assertTrue(np.allclose(dygraph_out.numpy(), static_out.numpy()), + msg='dygraph_out is {}\n static_out is \n{}'.format( + dygraph_out, static_out)) # switch back into train mode. net.train() train_out = net(x) - self.assertTrue( - np.allclose(dygraph_out.numpy(), train_out.numpy()), - msg='dygraph_out is {}\n static_out is \n{}'.format(dygraph_out, - train_out)) + self.assertTrue(np.allclose(dygraph_out.numpy(), train_out.numpy()), + msg='dygraph_out is {}\n static_out is \n{}'.format( + dygraph_out, train_out)) def test_save_without_training(self): self.test_save_in_eval(with_training=False) class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self.fc = nn.Linear(10, 12) @@ -124,6 +127,7 @@ class LinearNet(nn.Layer): class TestSaveInEvalMode(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() @@ -156,13 +160,13 @@ class TestSaveInEvalMode(unittest.TestCase): eval_out = net(x) infer_out = load_net(x) - self.assertTrue( - np.allclose(eval_out.numpy(), infer_out.numpy()), - msg='eval_out is {}\n infer_out is \n{}'.format(eval_out, - infer_out)) + self.assertTrue(np.allclose(eval_out.numpy(), infer_out.numpy()), + msg='eval_out is {}\n infer_out is \n{}'.format( + eval_out, infer_out)) class TestEvalAfterSave(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py index 2bb3879efb7..35c8b4d9522 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py @@ -40,6 +40,7 @@ if paddle.fluid.is_compiled_with_cuda(): class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -59,26 +60,24 @@ class SimpleImgConvPool(fluid.dygraph.Layer): bias_attr=None): super(SimpleImgConvPool, self).__init__() - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - act=act, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) + self._conv2d = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + act=act, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D(pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) def forward(self, inputs): x = self._conv2d(inputs) @@ -87,25 +86,33 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer): + def __init__(self): super(MNIST, self).__init__() - self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") + self._simple_img_conv_pool_1 = SimpleImgConvPool(1, + 20, + 5, + 2, + 2, + act="relu") - self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") + self._simple_img_conv_pool_2 = SimpleImgConvPool(20, + 50, + 5, + 2, + 2, + act="relu") self.pool_2_shape = 50 * 4 * 4 SIZE = 10 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 - self._fc = Linear( - self.pool_2_shape, - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") + self._fc = Linear(self.pool_2_shape, + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") def forward(self, inputs, label=None): x = self.inference(inputs) @@ -127,15 +134,15 @@ class MNIST(fluid.dygraph.Layer): class TestMNIST(unittest.TestCase): + def setUp(self): self.epoch_num = 1 self.batch_size = 64 - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() - self.train_reader = paddle.batch( - paddle.dataset.mnist.train(), - batch_size=self.batch_size, - drop_last=True) + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() + self.train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=self.batch_size, + drop_last=True) self.temp_dir = tempfile.TemporaryDirectory() def tearDown(self): @@ -158,17 +165,15 @@ class TestMNISTWithToStatic(TestMNIST): def test_mnist_to_static(self): dygraph_loss = self.train_dygraph() static_loss = self.train_static() - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_loss, static_loss)) with _test_eager_guard(): dygraph_loss = self.train_dygraph() static_loss = self.train_static() - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_loss, static_loss)) def test_mnist_declarative_cpu_vs_mkldnn(self): dygraph_loss_cpu = self.train_dygraph() @@ -177,10 +182,9 @@ class TestMNISTWithToStatic(TestMNIST): dygraph_loss_mkldnn = self.train_dygraph() finally: fluid.set_flags({'FLAGS_use_mkldnn': False}) - self.assertTrue( - np.allclose(dygraph_loss_cpu, dygraph_loss_mkldnn), - msg='cpu dygraph is {}\n mkldnn dygraph is \n{}'.format( - dygraph_loss_cpu, dygraph_loss_mkldnn)) + self.assertTrue(np.allclose(dygraph_loss_cpu, dygraph_loss_mkldnn), + msg='cpu dygraph is {}\n mkldnn dygraph is \n{}'.format( + dygraph_loss_cpu, dygraph_loss_mkldnn)) def train(self, to_static=False): @@ -191,17 +195,17 @@ class TestMNISTWithToStatic(TestMNIST): mnist = MNIST() if to_static: mnist = paddle.jit.to_static(mnist) - adam = AdamOptimizer( - learning_rate=0.001, parameter_list=mnist.parameters()) + adam = AdamOptimizer(learning_rate=0.001, + parameter_list=mnist.parameters()) for epoch in range(self.epoch_num): start = time() for batch_id, data in enumerate(self.train_reader()): - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + dy_x_data = np.array([ + x[0].reshape(1, 28, 28) for x in data + ]).astype('float32') + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = to_variable(dy_x_data) label = to_variable(y_data) @@ -217,9 +221,9 @@ class TestMNISTWithToStatic(TestMNIST): if batch_id % 10 == 0: print( "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}" - .format(epoch, batch_id, - avg_loss.numpy(), - acc.numpy(), time() - start)) + .format(epoch, batch_id, avg_loss.numpy(), + acc.numpy(), + time() - start)) start = time() if batch_id == 50: mnist.eval() @@ -239,11 +243,10 @@ class TestMNISTWithToStatic(TestMNIST): model_save_prefix = os.path.join(model_save_dir, 'mnist') model_filename = "mnist" + INFER_MODEL_SUFFIX params_filename = "mnist" + INFER_PARAMS_SUFFIX - fluid.dygraph.jit.save( - layer=model, - path=model_save_prefix, - input_spec=input_spec, - output_spec=[gt_out]) + fluid.dygraph.jit.save(layer=model, + path=model_save_prefix, + input_spec=input_spec, + output_spec=[gt_out]) # load in static mode static_infer_out = self.jit_load_and_run_inference_static( model_save_dir, model_filename, params_filename, inputs) @@ -262,12 +265,11 @@ class TestMNISTWithToStatic(TestMNIST): params_filename, inputs): paddle.enable_static() exe = fluid.Executor(self.place) - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - dirname=model_path, - executor=exe, - model_filename=model_filename, - params_filename=params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(dirname=model_path, + executor=exe, + model_filename=model_filename, + params_filename=params_filename) assert len(inputs) == len(feed_target_names) results = exe.run(inference_program, feed=dict(zip(feed_target_names, inputs)), @@ -280,8 +282,9 @@ class TestMNISTWithToStatic(TestMNIST): pred = infer_net(inputs[0]) return pred.numpy() - def predictor_load_and_run_inference_analysis( - self, model_path, model_filename, params_filename, inputs): + def predictor_load_and_run_inference_analysis(self, model_path, + model_filename, + params_filename, inputs): output = PredictorTools(model_path, model_filename, params_filename, inputs) out = output() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py index 573ce1678d5..ad4d64d4b9c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ if paddle.fluid.is_compiled_with_cuda(): class TestAMP(TestMNIST): + def train_static(self): return self.train(to_static=True) @@ -37,11 +38,9 @@ class TestAMP(TestMNIST): # NOTE(Aurelius84): In static AMP training, there is a grep_list but # dygraph AMP don't. It will bring the numbers of cast_op is different # and leads to loss has a bit diff. - self.assertTrue( - np.allclose( - dygraph_loss, static_loss, atol=1e-3), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss, atol=1e-3), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_loss, static_loss)) def train(self, to_static=False): paddle.seed(SEED) @@ -51,8 +50,8 @@ class TestAMP(TestMNIST): print("Successfully to apply @to_static.") mnist = paddle.jit.to_static(mnist) - adam = AdamOptimizer( - learning_rate=0.001, parameter_list=mnist.parameters()) + adam = AdamOptimizer(learning_rate=0.001, + parameter_list=mnist.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=1024) @@ -62,8 +61,8 @@ class TestAMP(TestMNIST): for batch_id, data in enumerate(self.train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) @@ -82,8 +81,8 @@ class TestAMP(TestMNIST): if batch_id % 10 == 0: print( "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}" - .format(epoch, batch_id, - avg_loss.numpy(), acc.numpy(), time() - start)) + .format(epoch, batch_id, avg_loss.numpy(), acc.numpy(), + time() - start)) start = time() if batch_id == 50: break diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py index 10ba073f63e..d54231d2c46 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,6 +26,7 @@ if paddle.fluid.is_compiled_with_cuda(): class TestPureFP16(TestMNIST): + def train_static(self): return self.train(to_static=True) @@ -37,11 +38,9 @@ class TestPureFP16(TestMNIST): dygraph_loss = self.train_dygraph() static_loss = self.train_static() # NOTE: In pure fp16 training, loss is not stable, so we enlarge atol here. - self.assertTrue( - np.allclose( - dygraph_loss, static_loss, atol=1e-3), - msg='dygraph is {}\n static_res is \n{}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss, atol=1e-3), + msg='dygraph is {}\n static_res is \n{}'.format( + dygraph_loss, static_loss)) def train(self, to_static=False): np.random.seed(SEED) @@ -58,16 +57,15 @@ class TestPureFP16(TestMNIST): build_strategy.enable_inplace = False mnist = paddle.jit.to_static(mnist, build_strategy=build_strategy) - optimizer = paddle.optimizer.Adam( - learning_rate=0.001, parameters=mnist.parameters()) + optimizer = paddle.optimizer.Adam(learning_rate=0.001, + parameters=mnist.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=1024) - mnist, optimizer = paddle.amp.decorate( - models=mnist, - optimizers=optimizer, - level='O2', - save_dtype='float32') + mnist, optimizer = paddle.amp.decorate(models=mnist, + optimizers=optimizer, + level='O2', + save_dtype='float32') loss_data = [] for epoch in range(self.epoch_num): @@ -75,18 +73,17 @@ class TestPureFP16(TestMNIST): for batch_id, data in enumerate(self.train_reader()): dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) label.stop_gradient = True - with paddle.amp.auto_cast( - enable=True, - custom_white_list=None, - custom_black_list=None, - level='O2'): + with paddle.amp.auto_cast(enable=True, + custom_white_list=None, + custom_black_list=None, + level='O2'): prediction, acc, avg_loss = mnist(img, label=label) scaled = scaler.scale(avg_loss) @@ -99,8 +96,8 @@ class TestPureFP16(TestMNIST): if batch_id % 2 == 0: print( "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}" - .format(epoch, batch_id, - avg_loss.numpy(), acc.numpy(), time() - start)) + .format(epoch, batch_id, avg_loss.numpy(), acc.numpy(), + time() - start)) start = time() if batch_id == 10: break diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index 7b98ced95e2..18694f6cdec 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -39,6 +39,7 @@ program_translator = ProgramTranslator() class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, filter_size, @@ -52,18 +53,18 @@ class ConvBNLayer(fluid.dygraph.Layer): name=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "_weights"), - bias_attr=False) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(initializer=MSRA(), + name=self.full_name() + + "_weights"), + bias_attr=False) self._batch_norm = BatchNorm( num_filters, @@ -82,6 +83,7 @@ class ConvBNLayer(fluid.dygraph.Layer): class DepthwiseSeparable(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters1, @@ -92,14 +94,14 @@ class DepthwiseSeparable(fluid.dygraph.Layer): name=None): super(DepthwiseSeparable, self).__init__() - self._depthwise_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=3, - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=True) + self._depthwise_conv = ConvBNLayer(num_channels=num_channels, + num_filters=int(num_filters1 * + scale), + filter_size=3, + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=True) self._pointwise_conv = ConvBNLayer( num_channels=int(num_filters1 * scale), @@ -115,127 +117,118 @@ class DepthwiseSeparable(fluid.dygraph.Layer): class MobileNetV1(fluid.dygraph.Layer): + def __init__(self, scale=1.0, class_dim=1000): super(MobileNetV1, self).__init__() self.scale = scale self.dwsl = [] - self.conv1 = ConvBNLayer( - num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32 * scale), - stride=2, - padding=1) - - dws21 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(32 * scale), - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, - scale=scale), - name="conv2_1") + self.conv1 = ConvBNLayer(num_channels=3, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1) + + dws21 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 32 * scale), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale), + name="conv2_1") self.dwsl.append(dws21) - dws22 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(64 * scale), - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, - scale=scale), - name="conv2_2") + dws22 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 64 * scale), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale), + name="conv2_2") self.dwsl.append(dws22) - dws31 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, - scale=scale), - name="conv3_1") + dws31 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 128 * scale), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale), + name="conv3_1") self.dwsl.append(dws31) - dws32 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, - scale=scale), - name="conv3_2") + dws32 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 128 * scale), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale), + name="conv3_2") self.dwsl.append(dws32) - dws41 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, - scale=scale), - name="conv4_1") + dws41 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 256 * scale), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale), + name="conv4_1") self.dwsl.append(dws41) - dws42 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, - scale=scale), - name="conv4_2") + dws42 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 256 * scale), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale), + name="conv4_2") self.dwsl.append(dws42) for i in range(5): - tmp = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, - scale=scale), - name="conv5_" + str(i + 1)) - self.dwsl.append(tmp) - - dws56 = self.add_sublayer( - sublayer=DepthwiseSeparable( + tmp = self.add_sublayer(sublayer=DepthwiseSeparable( num_channels=int(512 * scale), num_filters1=512, - num_filters2=1024, + num_filters2=512, num_groups=512, - stride=2, + stride=1, scale=scale), - name="conv5_6") + name="conv5_" + str(i + 1)) + self.dwsl.append(tmp) + + dws56 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 512 * scale), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale), + name="conv5_6") self.dwsl.append(dws56) - dws6 = self.add_sublayer( - sublayer=DepthwiseSeparable( - num_channels=int(1024 * scale), - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, - scale=scale), - name="conv6") + dws6 = self.add_sublayer(sublayer=DepthwiseSeparable(num_channels=int( + 1024 * scale), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale), + name="conv6") self.dwsl.append(dws6) self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) - self.out = Linear( - int(1024 * scale), - class_dim, - param_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) + self.out = Linear(int(1024 * scale), + class_dim, + param_attr=ParamAttr(initializer=MSRA(), + name=self.full_name() + + "fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) @declarative def forward(self, inputs): @@ -249,44 +242,43 @@ class MobileNetV1(fluid.dygraph.Layer): class InvertedResidualUnit(fluid.dygraph.Layer): + def __init__( - self, - num_channels, - num_in_filter, - num_filters, - stride, - filter_size, - padding, - expansion_factor, ): + self, + num_channels, + num_in_filter, + num_filters, + stride, + filter_size, + padding, + expansion_factor, + ): super(InvertedResidualUnit, self).__init__() num_expfilter = int(round(num_in_filter * expansion_factor)) - self._expand_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - act=None, - num_groups=1) - - self._bottleneck_conv = ConvBNLayer( - num_channels=num_expfilter, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - act=None, - use_cudnn=True) - - self._linear_conv = ConvBNLayer( - num_channels=num_expfilter, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - num_groups=1) + self._expand_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + act=None, + num_groups=1) + + self._bottleneck_conv = ConvBNLayer(num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + act=None, + use_cudnn=True) + + self._linear_conv = ConvBNLayer(num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + num_groups=1) def forward(self, inputs, ifshortcut): y = self._expand_conv(inputs, if_act=True) @@ -298,30 +290,29 @@ class InvertedResidualUnit(fluid.dygraph.Layer): class InvresiBlocks(fluid.dygraph.Layer): + def __init__(self, in_c, t, c, n, s): super(InvresiBlocks, self).__init__() - self._first_block = InvertedResidualUnit( - num_channels=in_c, - num_in_filter=in_c, - num_filters=c, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t) + self._first_block = InvertedResidualUnit(num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t) self._inv_blocks = [] for i in range(1, n): - tmp = self.add_sublayer( - sublayer=InvertedResidualUnit( - num_channels=c, - num_in_filter=c, - num_filters=c, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t), - name=self.full_name() + "_" + str(i + 1)) + tmp = self.add_sublayer(sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t), + name=self.full_name() + "_" + str(i + 1)) self._inv_blocks.append(tmp) def forward(self, inputs): @@ -332,6 +323,7 @@ class InvresiBlocks(fluid.dygraph.Layer): class MobileNetV2(fluid.dygraph.Layer): + def __init__(self, class_dim=1000, scale=1.0): super(MobileNetV2, self).__init__() self.scale = scale @@ -348,13 +340,12 @@ class MobileNetV2(fluid.dygraph.Layer): ] #1. conv1 - self._conv1 = ConvBNLayer( - num_channels=3, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - act=None, - padding=1) + self._conv1 = ConvBNLayer(num_channels=3, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + act=None, + padding=1) #2. bottleneck sequences self._invl = [] @@ -363,33 +354,33 @@ class MobileNetV2(fluid.dygraph.Layer): for layer_setting in bottleneck_params_list: t, c, n, s = layer_setting i += 1 - tmp = self.add_sublayer( - sublayer=InvresiBlocks( - in_c=in_c, t=t, c=int(c * scale), n=n, s=s), - name='conv' + str(i)) + tmp = self.add_sublayer(sublayer=InvresiBlocks(in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s), + name='conv' + str(i)) self._invl.append(tmp) in_c = int(c * scale) #3. last_conv self._out_c = int(1280 * scale) if scale > 1.0 else 1280 - self._conv9 = ConvBNLayer( - num_channels=in_c, - num_filters=self._out_c, - filter_size=1, - stride=1, - act=None, - padding=0) + self._conv9 = ConvBNLayer(num_channels=in_c, + num_filters=self._out_c, + filter_size=1, + stride=1, + act=None, + padding=0) #4. pool self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) #5. fc tmp_param = ParamAttr(name=self.full_name() + "fc10_weights") - self._fc = Linear( - self._out_c, - class_dim, - param_attr=tmp_param, - bias_attr=ParamAttr(name="fc10_offset")) + self._fc = Linear(self._out_c, + class_dim, + param_attr=tmp_param, + bias_attr=ParamAttr(name="fc10_offset")) @declarative def forward(self, inputs): @@ -439,8 +430,8 @@ class Args(object): class_dim = 50 print_step = 1 train_step = 10 - place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() model_save_dir = None model_save_prefix = None model_filename = None @@ -486,8 +477,8 @@ def train_mobilenet(args, to_static): t_end = time.time() softmax_out = fluid.layers.softmax(out, use_cudnn=False) - loss = fluid.layers.cross_entropy( - input=softmax_out, label=label) + loss = fluid.layers.cross_entropy(input=softmax_out, + label=label) avg_loss = fluid.layers.mean(x=loss) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) @@ -523,12 +514,11 @@ def predict_static(args, data): exe = fluid.Executor(args.place) # load inference model - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - args.model_save_dir, - executor=exe, - model_filename=args.model_filename, - params_filename=args.params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(args.model_save_dir, + executor=exe, + model_filename=args.model_filename, + params_filename=args.params_filename) pred_res = exe.run(inference_program, feed={feed_target_names[0]: data}, @@ -571,6 +561,7 @@ def predict_analysis_inference(args, data): class TestMobileNet(unittest.TestCase): + def setUp(self): self.args = Args() self.temp_dir = tempfile.TemporaryDirectory() @@ -594,9 +585,8 @@ class TestMobileNet(unittest.TestCase): def assert_same_loss(self, model_name): dy_out = self.train(model_name, to_static=False) st_out = self.train(model_name, to_static=True) - self.assertTrue( - np.allclose(dy_out, st_out), - msg="dy_out: {}, st_out: {}".format(dy_out, st_out)) + self.assertTrue(np.allclose(dy_out, st_out), + msg="dy_out: {}, st_out: {}".format(dy_out, st_out)) def assert_same_predict(self, model_name): self.args.model = model_name @@ -612,17 +602,15 @@ class TestMobileNet(unittest.TestCase): st_pre = predict_static(self.args, image) dy_jit_pre = predict_dygraph_jit(self.args, image) predictor_pre = predict_analysis_inference(self.args, image) - self.assertTrue( - np.allclose(dy_pre, st_pre), - msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) - self.assertTrue( - np.allclose(dy_jit_pre, st_pre), - msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) - self.assertTrue( - np.allclose( - predictor_pre, st_pre, atol=1e-5), - msg="inference_pred_res:\n {}\n, st_pre: \n{}.".format( - predictor_pre, st_pre)) + self.assertTrue(np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format( + dy_pre, st_pre)) + self.assertTrue(np.allclose(dy_jit_pre, st_pre), + msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format( + dy_jit_pre, st_pre)) + self.assertTrue(np.allclose(predictor_pre, st_pre, atol=1e-5), + msg="inference_pred_res:\n {}\n, st_pre: \n{}.".format( + predictor_pre, st_pre)) def test_mobile_net(self): # MobileNet-V1 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py index a39b5d7cd1a..bf8252b56ea 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ from paddle.static import InputSpec class MySub(paddle.nn.Layer): + def __init__(self): super(MySub, self).__init__() @@ -29,6 +30,7 @@ class MySub(paddle.nn.Layer): class NetWithOpAttr(paddle.nn.Layer): + def __init__(self, in_num, out_num): super(NetWithOpAttr, self).__init__() @@ -53,6 +55,7 @@ class NetWithOpAttr(paddle.nn.Layer): class CheckOpAttr(unittest.TestCase): + def setUp(self): self.in_num = 16 self.out_num = 16 @@ -88,8 +91,8 @@ class CheckOpAttr(unittest.TestCase): self.assertEqual(len(net.linear._forward_pre_hooks), 1) self.assertEqual(len(net.linear._forward_post_hooks), 1) # to_static - net = paddle.jit.to_static( - net, input_spec=[InputSpec.from_tensor(self.x)]) + net = paddle.jit.to_static(net, + input_spec=[InputSpec.from_tensor(self.x)]) # assert attrs have be set. self.check_op_attrs(net.forward.concrete_program.main_program) @@ -103,8 +106,8 @@ class CheckOpAttr(unittest.TestCase): ops = cur_block.ops for op in ops: if op.type not in self.infos: continue - for attr_name, expect_vals in six.iteritems(self.infos[ - op.type]): + for attr_name, expect_vals in six.iteritems( + self.infos[op.type]): op_vals = op.desc.attr(attr_name) if not isinstance(expect_vals, list): expect_vals = [expect_vals] @@ -120,9 +123,8 @@ class CheckOpAttr(unittest.TestCase): def test_set_op_attrs_with_sub_block(self): net = NetWithOpAttr(self.in_num, self.out_num) # set attrs - net.linear._set_op_attrs({ - "int_vals": [0, 0] - }) # test overwrite behavior + net.linear._set_op_attrs({"int_vals": [0, + 0]}) # test overwrite behavior net.linear._set_op_attrs(self.fc_attrs) net.bn._set_op_attrs(self.bn_attrs) net.sub._set_op_attrs(self.sub_attrs) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py index cd3c76412fe..1a4eca95920 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py @@ -20,6 +20,7 @@ from paddle.jit import to_static, ProgramTranslator class NetWithParameterList(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(NetWithParameterList, self).__init__() weight = self.create_parameter([in_size, out_size]) @@ -35,6 +36,7 @@ class NetWithParameterList(paddle.nn.Layer): class NetWithParameterListIter(NetWithParameterList): + def __init__(self, in_size, out_size): super(NetWithParameterListIter, self).__init__(in_size, out_size) @@ -49,6 +51,7 @@ class NetWithParameterListIter(NetWithParameterList): class TestParameterList(unittest.TestCase): + def setUp(self): self.seed = 2021 self.iter_num = 5 @@ -77,28 +80,26 @@ class TestParameterList(unittest.TestCase): def test_parameter_list(self): static_loss = self.train(False, to_static=True) dygraph_loss = self.train(False, to_static=False) - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg='dygraph result is {}\nstatic result is {}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg='dygraph result is {}\nstatic result is {}'.format( + dygraph_loss, static_loss)) def test_parameter_list_iter(self): static_loss = self.train(True, to_static=True) dygraph_loss = self.train(True, to_static=False) - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg='dygraph result is {}\nstatic result is {}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg='dygraph result is {}\nstatic result is {}'.format( + dygraph_loss, static_loss)) class NetWithRawParamList(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(NetWithRawParamList, self).__init__() weight = self.add_parameter('w', self.create_parameter([in_size, out_size])) bias = self.add_parameter( - 'b', self.create_parameter( - [out_size], is_bias=True)) + 'b', self.create_parameter([out_size], is_bias=True)) self.params = [weight] self.bias_dict = {'b': bias} @@ -111,6 +112,7 @@ class NetWithRawParamList(paddle.nn.Layer): class TestRawParameterList(unittest.TestCase): + def setUp(self): self.seed = 2021 self.iter_num = 5 @@ -140,13 +142,13 @@ class TestRawParameterList(unittest.TestCase): def test_parameter_list(self): static_loss = self.train(to_static=True) dygraph_loss = self.train(to_static=False) - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg='dygraph result is {}\nstatic result is {}'.format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg='dygraph result is {}\nstatic result is {}'.format( + dygraph_loss, static_loss)) class NetWithSubLayerParamList(paddle.nn.Layer): + def __init__(self, sub_layer): super(NetWithSubLayerParamList, self).__init__() self.sub_layer = sub_layer @@ -162,6 +164,7 @@ class NetWithSubLayerParamList(paddle.nn.Layer): class TestSubLayerParameterList(TestRawParameterList): + def init_net(self): fc = paddle.nn.Linear(10, 3) self.net = NetWithSubLayerParamList(fc) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py index 4f55dbd324c..8549d03f7e2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py @@ -55,6 +55,7 @@ def fake_data(shape): class TestWithNestedInput(unittest.TestCase): + def setUp(self): self.x = None self.y = None @@ -63,7 +64,8 @@ class TestWithNestedInput(unittest.TestCase): self.x = fake_data([10, 16]) self.y = [ fake_data([10, 16]), "preprocess_cmd", 64, { - 'z': [fake_data([10, 12]), fake_data([10, 12])], + 'z': [fake_data([10, 12]), + fake_data([10, 12])], 'c': fake_data([10, 10]), 'd': { 'da': 12, @@ -91,6 +93,7 @@ class TestWithNestedInput(unittest.TestCase): class TestWithNestedOutput(unittest.TestCase): + def setUp(self): self.x = None self.y = None @@ -126,6 +129,7 @@ class TestWithNestedOutput(unittest.TestCase): class TestWithTrainAndEval(unittest.TestCase): + def test_switch_eval_and_train(self): program_translator = ProgramTranslator() @@ -155,6 +159,7 @@ class TestWithTrainAndEval(unittest.TestCase): class TestWithNoGrad(unittest.TestCase): + def test_with_no_grad(self): with fluid.dygraph.guard(): linear_net = Linear() @@ -170,6 +175,7 @@ class TestWithNoGrad(unittest.TestCase): class GPT2LMHeadModel(fluid.dygraph.Layer): + def __init__(self): super(GPT2LMHeadModel, self).__init__() self.embedding0 = paddle.nn.Embedding(20, 16) @@ -185,6 +191,7 @@ class GPT2LMHeadModel(fluid.dygraph.Layer): class TestPruneUnusedParamInProgram(unittest.TestCase): + def test_prune(self): input_ids = np.array([[15, 11, 6, 3, 18, 13]]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py index aabfd3b2c48..ae773f36d6f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py @@ -154,10 +154,11 @@ def dyfunc_print_continue_vars(x): class TestPrintBase(unittest.TestCase): + def setUp(self): self.input = numpy.ones(5).astype("int32") - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.set_test_func() def set_test_func(self): @@ -177,6 +178,7 @@ class TestPrintBase(unittest.TestCase): class TestPrintVariable(TestPrintBase): + def set_test_func(self): self.dygraph_func = dyfunc_print_variable @@ -186,31 +188,37 @@ class TestPrintVariable(TestPrintBase): class TestPrintNdArray(TestPrintVariable): + def set_test_func(self): self.dygraph_func = dyfunc_print_ndarray class TestPrintWithFormat(TestPrintVariable): + def set_test_func(self): self.dygraph_func = dyfunc_print_with_format class TestPrintWithFormat2(TestPrintVariable): + def set_test_func(self): self.dygraph_func = dyfunc_print_with_format2 class TestPrintWithIfElse(TestPrintVariable): + def set_test_func(self): self.dygraph_func = dyfunc_print_with_ifelse class TestPrintMultipleVar(TestPrintVariable): + def set_test_func(self): self.dygraph_func = dyfunc_print_multi_vars class TestPrintContinueVar(TestPrintVariable): + def set_test_func(self): self.dygraph_func = dyfunc_print_continue_vars diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py index 4e90c73baa9..b656a4dc595 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py @@ -64,6 +64,7 @@ def get_source_code(func): class StaticCode1(): + def dyfunc_with_if_else(x_v, label=None): __return_value_init_0 = paddle.fluid.layers.fill_constant( shape=[1], dtype='float64', value=0.0, name='__return_value_init_0') @@ -107,8 +108,8 @@ class StaticCode1(): __return_value_0 = _jst.convert_ifelse( _jst.convert_logical_not(__return_0), true_fn_2, false_fn_2, - (__return_0, __return_value_0, - x_v), (__return_value_0, ), (__return_value_0, )) + (__return_0, __return_value_0, x_v), (__return_value_0, ), + (__return_value_0, )) return __return_value_0 @@ -157,12 +158,13 @@ class StaticCode2(): __return_value_1 = _jst.convert_ifelse( _jst.convert_logical_not(__return_2), true_fn_5, false_fn_5, - (__return_2, __return_value_1, - x_v), (__return_value_1, ), (__return_value_1, )) + (__return_2, __return_value_1, x_v), (__return_value_1, ), + (__return_value_1, )) return __return_value_1 class NetWithError(fluid.dygraph.layers.Layer): + @declarative def forward(self, x): linear = fluid.dygraph.Linear(32, 64) @@ -171,6 +173,7 @@ class NetWithError(fluid.dygraph.layers.Layer): class TestDygraphToStaticCode(unittest.TestCase): + def setUp(self): # set to print all string diff when assertEqual fails self.maxDiff = None @@ -189,6 +192,7 @@ class TestDygraphToStaticCode(unittest.TestCase): class TestEnableDeclarative(unittest.TestCase): + def setUp(self): self.x = np.random.randn(30, 10, 32).astype('float32') self.weight = np.random.randn(32, 64).astype('float32') @@ -212,8 +216,9 @@ class TestEnableDeclarative(unittest.TestCase): dygraph_output = self.program_translator.get_output( simple_func, self.x, self.weight) self.assertTrue( - np.allclose( - static_output.numpy(), dygraph_output.numpy(), atol=1e-4)) + np.allclose(static_output.numpy(), + dygraph_output.numpy(), + atol=1e-4)) def test_enable_disable_get_func(self): @@ -230,14 +235,14 @@ class TestEnableDeclarative(unittest.TestCase): self.assertTrue(callable(dygraph_func)) dygraph_output = dygraph_func(self.x, self.weight) self.assertTrue( - isinstance(dygraph_output, (fluid.core.VarBase, - fluid.core.eager.Tensor))) + isinstance(dygraph_output, + (fluid.core.VarBase, fluid.core.eager.Tensor))) def test_enable_disable_get_program(self): self.program_translator.enable(True) - static_output = self.program_translator.get_program(simple_func, self.x, - self.weight) + static_output = self.program_translator.get_program( + simple_func, self.x, self.weight) self.assertTrue(isinstance(static_output, tuple)) self.assertEqual(len(static_output), 4) self.assertTrue(isinstance(static_output[0], fluid.Program)) @@ -254,8 +259,8 @@ class TestEnableDeclarative(unittest.TestCase): dygraph_output = self.program_translator.get_program( simple_func, self.x, self.weight) self.assertTrue( - isinstance(dygraph_output, (fluid.core.VarBase, - fluid.core.eager.Tensor))) + isinstance(dygraph_output, + (fluid.core.VarBase, fluid.core.eager.Tensor))) def test_enable_disable_declarative(self): @@ -267,11 +272,13 @@ class TestEnableDeclarative(unittest.TestCase): with fluid.dygraph.guard(): dygraph_output = decorated_simple_func(self.x, self.weight) self.assertTrue( - np.allclose( - static_output.numpy(), dygraph_output.numpy(), atol=1e-4)) + np.allclose(static_output.numpy(), + dygraph_output.numpy(), + atol=1e-4)) class Net(fluid.dygraph.layers.Layer): + def __init__(self): super(Net, self).__init__() @@ -280,6 +287,7 @@ class Net(fluid.dygraph.layers.Layer): class TestErrorWithInitFromStaticMode(unittest.TestCase): + def setUp(self): self.program_translator = ProgramTranslator() self.x = np.random.randn(10, 32).astype('float32') @@ -300,6 +308,7 @@ class TestErrorWithInitFromStaticMode(unittest.TestCase): class SwitchModeNet(paddle.nn.Layer): + def __init__(self): super(SwitchModeNet, self).__init__() @@ -318,6 +327,7 @@ def switch_mode_funciton(): class TestFunctionTrainEvalMode(unittest.TestCase): + def test_switch_mode(self): paddle.disable_static() switch_mode_funciton.eval() @@ -347,6 +357,7 @@ class TestFunctionTrainEvalMode(unittest.TestCase): class TestRemoveCommentInDy2St(unittest.TestCase): + def func_with_comment(self): # Comment1 x = paddle.to_tensor([1, 2, 3]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index 7b7ff66343a..75f17e22e46 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -34,6 +34,7 @@ program_translator = ProgramTranslator() class SimpleLSTMRNN(fluid.Layer): + def __init__(self, hidden_size, num_steps, @@ -94,8 +95,9 @@ class SimpleLSTMRNN(fluid.Layer): gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) + i, j, f, o = fluid.layers.split(gate_input, + num_or_sections=4, + dim=-1) c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( i) * fluid.layers.tanh(j) m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) @@ -124,6 +126,7 @@ class SimpleLSTMRNN(fluid.Layer): class PtbModel(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -138,12 +141,11 @@ class PtbModel(fluid.Layer): self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) + self.simple_lstm_rnn = SimpleLSTMRNN(hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) self.embedding = Embedding( size=[vocab_size, hidden_size], dtype='float32', @@ -186,14 +188,15 @@ class PtbModel(fluid.Layer): x_emb, dropout_prob=self.dropout, dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( + x_emb, init_h, init_c) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -220,16 +223,15 @@ def train(place): with fluid.dygraph.guard(place): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - dropout=dropout) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale, + dropout=dropout) - sgd = SGDOptimizer( - learning_rate=1e-3, parameter_list=ptb_model.parameters()) + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=ptb_model.parameters()) for epoch_id in range(max_epoch): @@ -237,10 +239,10 @@ def train(place): iters = 0.0 total_sample = 0 - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_hidden_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) @@ -255,8 +257,8 @@ def train(place): x = to_variable(x_data) y = to_variable(y_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) out_loss = dy_loss.numpy() dy_loss.backward() @@ -268,8 +270,9 @@ def train(place): total_sample += 1 if step_id % PRINT_STEP == 0: if step_id == 0: - logging.info("epoch %d | step %d, loss %0.3f" % ( - epoch_id, step_id, total_loss / total_sample)) + logging.info( + "epoch %d | step %d, loss %0.3f" % + (epoch_id, step_id, total_loss / total_sample)) avg_batch_time = time.time() else: speed = PRINT_STEP / (time.time() - avg_batch_time) @@ -293,6 +296,7 @@ def train_static(place): class TestPtb(unittest.TestCase): + def setUp(self): self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ else fluid.CPUPlace() @@ -301,16 +305,15 @@ class TestPtb(unittest.TestCase): loss_1, hidden_1, cell_1 = train_static(self.place) loss_2, hidden_2, cell_2 = train_dygraph(self.place) - self.assertTrue( - np.allclose(loss_1, loss_2), - msg="static loss: {} \ndygraph loss: {}".format(loss_1, loss_2)) - self.assertTrue( - np.allclose(hidden_1, hidden_2), - msg="static hidden: {} \ndygraph acc1: {}".format(hidden_1, - hidden_2)) - self.assertTrue( - np.allclose(cell_1, cell_2), - msg="static cell: {} \ndygraph cell: {}".format(cell_1, cell_2)) + self.assertTrue(np.allclose(loss_1, loss_2), + msg="static loss: {} \ndygraph loss: {}".format( + loss_1, loss_2)) + self.assertTrue(np.allclose(hidden_1, hidden_2), + msg="static hidden: {} \ndygraph acc1: {}".format( + hidden_1, hidden_2)) + self.assertTrue(np.allclose(cell_1, cell_2), + msg="static cell: {} \ndygraph cell: {}".format( + cell_1, cell_2)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py index 0d45d7edb27..5d0d488915d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py @@ -28,6 +28,7 @@ program_translator = paddle.jit.ProgramTranslator() class SimpleLSTMRNN(paddle.nn.Layer): + def __init__(self, hidden_size, num_steps, @@ -86,8 +87,9 @@ class SimpleLSTMRNN(paddle.nn.Layer): gate_input = paddle.matmul(x=nn, y=weight_1) gate_input = paddle.add(x=gate_input, y=bias) - i, j, f, o = paddle.split( - x=gate_input, num_or_sections=4, axis=-1) + i, j, f, o = paddle.split(x=gate_input, + num_or_sections=4, + axis=-1) c = pre_cell * paddle.nn.functional.sigmoid( f) + paddle.nn.functional.sigmoid(i) * paddle.tanh(j) m = paddle.tanh(c) * paddle.nn.functional.sigmoid(o) @@ -116,6 +118,7 @@ class SimpleLSTMRNN(paddle.nn.Layer): class PtbModel(paddle.nn.Layer): + def __init__(self, hidden_size, vocab_size, @@ -130,20 +133,19 @@ class PtbModel(paddle.nn.Layer): self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) + self.simple_lstm_rnn = SimpleLSTMRNN(hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) self.embedding = paddle.fluid.dygraph.nn.Embedding( size=[vocab_size, hidden_size], dtype='float32', is_sparse=False, param_attr=paddle.ParamAttr( name='embedding_para', - initializer=paddle.nn.initializer.Uniform( - low=-init_scale, high=init_scale))) + initializer=paddle.nn.initializer.Uniform(low=-init_scale, + high=init_scale))) self.softmax_weight = self.create_parameter( attr=paddle.ParamAttr(), shape=[self.hidden_size, self.vocab_size], @@ -163,23 +165,23 @@ class PtbModel(paddle.nn.Layer): @paddle.jit.to_static def forward(self, input, label, init_hidden, init_cell): - init_h = paddle.reshape( - init_hidden, shape=[self.num_layers, -1, self.hidden_size]) + init_h = paddle.reshape(init_hidden, + shape=[self.num_layers, -1, self.hidden_size]) - init_c = paddle.reshape( - init_cell, shape=[self.num_layers, -1, self.hidden_size]) + init_c = paddle.reshape(init_cell, + shape=[self.num_layers, -1, self.hidden_size]) x_emb = self.embedding(input) - x_emb = paddle.reshape( - x_emb, shape=[-1, self.num_steps, self.hidden_size]) + x_emb = paddle.reshape(x_emb, + shape=[-1, self.num_steps, self.hidden_size]) if self.dropout is not None and self.dropout > 0.0: x_emb = paddle.nn.functional.dropout( x_emb, dropout_prob=self.dropout, dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( + x_emb, init_h, init_c) projection = paddle.matmul(x=rnn_out, y=self.softmax_weight) projection = paddle.add(x=projection, y=self.softmax_bias) @@ -212,13 +214,12 @@ def train(place): paddle.disable_static(place) paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - dropout=dropout) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale, + dropout=dropout) sgd = paddle.optimizer.SGD(learning_rate=1e-3, parameters=ptb_model.parameters()) @@ -229,15 +230,19 @@ def train(place): iters = 0.0 total_sample = 0 - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - - init_hidden = paddle.to_tensor( - data=init_hidden_data, dtype=None, place=None, stop_gradient=True) - init_cell = paddle.to_tensor( - data=init_cell_data, dtype=None, place=None, stop_gradient=True) + init_hidden_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') + + init_hidden = paddle.to_tensor(data=init_hidden_data, + dtype=None, + place=None, + stop_gradient=True) + init_cell = paddle.to_tensor(data=init_cell_data, + dtype=None, + place=None, + stop_gradient=True) for step_id in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') @@ -246,10 +251,14 @@ def train(place): x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, num_steps, 1)) - x = paddle.to_tensor( - data=x_data, dtype=None, place=None, stop_gradient=True) - y = paddle.to_tensor( - data=y_data, dtype=None, place=None, stop_gradient=True) + x = paddle.to_tensor(data=x_data, + dtype=None, + place=None, + stop_gradient=True) + y = paddle.to_tensor(data=y_data, + dtype=None, + place=None, + stop_gradient=True) dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, init_cell) @@ -290,6 +299,7 @@ def train_static(place): class TestPtb(unittest.TestCase): + def setUp(self): self.place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda() \ else paddle.CPUPlace() @@ -298,16 +308,15 @@ class TestPtb(unittest.TestCase): loss_1, hidden_1, cell_1 = train_static(self.place) loss_2, hidden_2, cell_2 = train_dygraph(self.place) - self.assertTrue( - np.allclose(loss_1, loss_2), - msg="static loss: {} \ndygraph loss: {}".format(loss_1, loss_2)) - self.assertTrue( - np.allclose(hidden_1, hidden_2), - msg="static hidden: {} \ndygraph acc1: {}".format(hidden_1, - hidden_2)) - self.assertTrue( - np.allclose(cell_1, cell_2), - msg="static cell: {} \ndygraph cell: {}".format(cell_1, cell_2)) + self.assertTrue(np.allclose(loss_1, loss_2), + msg="static loss: {} \ndygraph loss: {}".format( + loss_1, loss_2)) + self.assertTrue(np.allclose(hidden_1, hidden_2), + msg="static hidden: {} \ndygraph acc1: {}".format( + hidden_1, hidden_2)) + self.assertTrue(np.allclose(cell_1, cell_2), + msg="static cell: {} \ndygraph cell: {}".format( + cell_1, cell_2)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py index 4ddca0c689e..cc373f07e99 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py @@ -29,6 +29,7 @@ program_translator = ProgramTranslator() class Policy(Layer): + def __init__(self): super(Policy, self).__init__() @@ -188,9 +189,9 @@ def train(args, place, to_static): running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward if i_episode % args.log_interval == 0: print( - 'Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}\t loss_probs: {}'. - format(i_episode, ep_reward, running_reward, - loss.numpy()[0])) + 'Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}\t loss_probs: {}' + .format(i_episode, ep_reward, running_reward, + loss.numpy()[0])) if i_episode > args.train_step: break @@ -199,6 +200,7 @@ def train(args, place, to_static): class TestDeclarative(unittest.TestCase): + def setUp(self): self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ else fluid.CPUPlace() @@ -207,9 +209,9 @@ class TestDeclarative(unittest.TestCase): def test_train(self): st_out = train(self.args, self.place, to_static=True) dy_out = train(self.args, self.place, to_static=False) - self.assertTrue( - np.allclose(st_out, dy_out), - msg="dy_out:\n {}\n st_out:\n{}\n".format(dy_out, st_out)) + self.assertTrue(np.allclose(st_out, dy_out), + msg="dy_out:\n {}\n st_out:\n{}\n".format( + dy_out, st_out)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 1a531c65bbf..553ad00a6d2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -58,6 +58,7 @@ def optimizer_setting(parameter_list=None): class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -67,15 +68,14 @@ class ConvBNLayer(fluid.dygraph.Layer): act=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) self._batch_norm = BatchNorm(num_filters, act=act) @@ -87,32 +87,29 @@ class ConvBNLayer(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, stride, shortcut=True): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) self.shortcut = shortcut @@ -130,12 +127,13 @@ class BottleneckBlock(fluid.dygraph.Layer): y = fluid.layers.elementwise_add(x=short, y=conv2) - layer_helper = fluid.layer_helper.LayerHelper( - self.full_name(), act='relu') + layer_helper = fluid.layer_helper.LayerHelper(self.full_name(), + act='relu') return layer_helper.append_activation(y) class ResNet(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=102): super(ResNet, self).__init__() @@ -153,10 +151,15 @@ class ResNet(fluid.dygraph.Layer): num_channels = [64, 256, 512, 1024] num_filters = [64, 128, 256, 512] - self.conv = ConvBNLayer( - num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') - self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool2d_max = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] for block in range(len(depth)): @@ -164,17 +167,17 @@ class ResNet(fluid.dygraph.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut)) + BottleneckBlock(num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut)) self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1 @@ -200,6 +203,7 @@ class ResNet(fluid.dygraph.Layer): def reader_decorator(reader): + def __reader__(): for item in reader(): img = np.array(item[0]).astype('float32').reshape(3, 224, 224) @@ -210,6 +214,7 @@ def reader_decorator(reader): class ResNetHelper: + def __init__(self): self.temp_dir = tempfile.TemporaryDirectory() self.model_save_dir = os.path.join(self.temp_dir.name, 'inference') @@ -231,18 +236,18 @@ class ResNetHelper: paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) - train_reader = paddle.batch( - reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True) - data_loader = fluid.io.DataLoader.from_generator( - capacity=5, iterable=True) + train_reader = paddle.batch(reader_decorator( + paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True) + data_loader = fluid.io.DataLoader.from_generator(capacity=5, + iterable=True) data_loader.set_sample_list_generator(train_reader) resnet = ResNet() if to_static: - resnet = paddle.jit.to_static( - resnet, build_strategy=build_strategy) + resnet = paddle.jit.to_static(resnet, + build_strategy=build_strategy) optimizer = optimizer_setting(parameter_list=resnet.parameters()) for epoch in range(epoch_num): @@ -258,10 +263,12 @@ class ResNetHelper: pred = resnet(img) loss = fluid.layers.cross_entropy(input=pred, label=label) avg_loss = fluid.layers.mean(x=loss) - acc_top1 = fluid.layers.accuracy( - input=pred, label=label, k=1) - acc_top5 = fluid.layers.accuracy( - input=pred, label=label, k=5) + acc_top1 = fluid.layers.accuracy(input=pred, + label=label, + k=1) + acc_top5 = fluid.layers.accuracy(input=pred, + label=label, + k=5) avg_loss.backward() optimizer.minimize(avg_loss) @@ -308,12 +315,11 @@ class ResNetHelper: def predict_static(self, data): paddle.enable_static() exe = fluid.Executor(place) - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - self.model_save_dir, - executor=exe, - model_filename=self.model_filename, - params_filename=self.params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(self.model_save_dir, + executor=exe, + model_filename=self.model_filename, + params_filename=self.params_filename) pred_res = exe.run(inference_program, feed={feed_target_names[0]: data}, @@ -338,6 +344,7 @@ class ResNetHelper: class TestResnet(unittest.TestCase): + def setUp(self): self.resnet_helper = ResNetHelper() @@ -351,24 +358,22 @@ class TestResnet(unittest.TestCase): st_pre = self.resnet_helper.predict_static(image) dy_jit_pre = self.resnet_helper.predict_dygraph_jit(image) predictor_pre = self.resnet_helper.predict_analysis_inference(image) - self.assertTrue( - np.allclose(dy_pre, st_pre), - msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) - self.assertTrue( - np.allclose(dy_jit_pre, st_pre), - msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) - self.assertTrue( - np.allclose(predictor_pre, st_pre), - msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre, - st_pre)) + self.assertTrue(np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format( + dy_pre, st_pre)) + self.assertTrue(np.allclose(dy_jit_pre, st_pre), + msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format( + dy_jit_pre, st_pre)) + self.assertTrue(np.allclose(predictor_pre, st_pre), + msg="predictor_pre:\n {}\n, st_pre: \n{}.".format( + predictor_pre, st_pre)) def test_resnet(self): static_loss = self.train(to_static=True) dygraph_loss = self.train(to_static=False) - self.assertTrue( - np.allclose(static_loss, dygraph_loss), - msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, - dygraph_loss)) + self.assertTrue(np.allclose(static_loss, dygraph_loss), + msg="static_loss: {} \n dygraph_loss: {}".format( + static_loss, dygraph_loss)) self.verify_predict() def test_in_static_mode_mkldnn(self): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py index 1d45e906cd3..cfdd7d9df51 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py @@ -62,11 +62,10 @@ def train(to_static, build_strategy=None): for batch_id in range(100): start_time = time.time() img = paddle.to_tensor( - np.random.random([batch_size, 3, 224, 224]).astype( - 'float32')) + np.random.random([batch_size, 3, 224, + 224]).astype('float32')) label = paddle.to_tensor( - np.random.randint( - 0, 100, [batch_size, 1], dtype='int64')) + np.random.randint(0, 100, [batch_size, 1], dtype='int64')) img.stop_gradient = True label.stop_gradient = True @@ -102,6 +101,7 @@ def train(to_static, build_strategy=None): class TestResnet(unittest.TestCase): + def train(self, to_static): program_translator.enable(to_static) return train(to_static) @@ -109,10 +109,9 @@ class TestResnet(unittest.TestCase): def test_resnet(self): static_loss = self.train(to_static=True) dygraph_loss = self.train(to_static=False) - self.assertTrue( - np.allclose(static_loss, dygraph_loss), - msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, - dygraph_loss)) + self.assertTrue(np.allclose(static_loss, dygraph_loss), + msg="static_loss: {} \n dygraph_loss: {}".format( + static_loss, dygraph_loss)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py index 49d114730e4..fa0460f5200 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py @@ -50,8 +50,10 @@ def train(to_static, build_strategy=None): optimizer = optimizer_setting(parameter_list=resnet.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=1024) - resnet, optimizer = paddle.amp.decorate( - models=resnet, optimizers=optimizer, level='O2', save_dtype='float32') + resnet, optimizer = paddle.amp.decorate(models=resnet, + optimizers=optimizer, + level='O2', + save_dtype='float32') for epoch in range(epoch_num): loss_data = [] @@ -65,16 +67,14 @@ def train(to_static, build_strategy=None): img = paddle.to_tensor( np.random.random([batch_size, 3, 224, 224]).astype('float32')) label = paddle.to_tensor( - np.random.randint( - 0, 100, [batch_size, 1], dtype='int64')) + np.random.randint(0, 100, [batch_size, 1], dtype='int64')) img.stop_gradient = True label.stop_gradient = True - with paddle.amp.auto_cast( - enable=True, - custom_white_list=None, - custom_black_list=None, - level='O2'): + with paddle.amp.auto_cast(enable=True, + custom_white_list=None, + custom_black_list=None, + level='O2'): pred = resnet(img) loss = fluid.layers.cross_entropy(input=pred, label=label) avg_loss = fluid.layers.mean(x=pred) @@ -104,6 +104,7 @@ def train(to_static, build_strategy=None): class TestResnet(unittest.TestCase): + def train(self, to_static): program_translator.enable(to_static) build_strategy = paddle.static.BuildStrategy() @@ -117,11 +118,9 @@ class TestResnet(unittest.TestCase): static_loss = self.train(to_static=True) dygraph_loss = self.train(to_static=False) # NOTE: In pure fp16 training, loss is not stable, so we enlarge atol here. - self.assertTrue( - np.allclose( - static_loss, dygraph_loss, atol=1e-3), - msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, - dygraph_loss)) + self.assertTrue(np.allclose(static_loss, dygraph_loss, atol=1e-3), + msg="static_loss: {} \n dygraph_loss: {}".format( + static_loss, dygraph_loss)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py index c79a86015eb..0832c5f523f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py @@ -15,6 +15,7 @@ from __future__ import print_function import os + os.environ["FLAGS_enable_eager_mode"] = "0" import math import time @@ -55,6 +56,7 @@ def optimizer_setting(parameter_list=None): class ConvBNLayer(paddle.nn.Layer): + def __init__(self, num_channels, num_filters, @@ -64,14 +66,13 @@ class ConvBNLayer(paddle.nn.Layer): act=None): super(ConvBNLayer, self).__init__() - self._conv = paddle.nn.Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False) + self._conv = paddle.nn.Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) self._batch_norm = paddle.nn.BatchNorm(num_filters, act=act) @@ -83,32 +84,29 @@ class ConvBNLayer(paddle.nn.Layer): class BottleneckBlock(paddle.nn.Layer): + def __init__(self, num_channels, num_filters, stride, shortcut=True): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) self.shortcut = shortcut @@ -126,12 +124,13 @@ class BottleneckBlock(paddle.nn.Layer): y = paddle.add(x=short, y=conv2) - layer_helper = paddle.fluid.layer_helper.LayerHelper( - self.full_name(), act='relu') + layer_helper = paddle.fluid.layer_helper.LayerHelper(self.full_name(), + act='relu') return layer_helper.append_activation(y) class ResNet(paddle.nn.Layer): + def __init__(self, layers=50, class_dim=102): super(ResNet, self).__init__() @@ -149,10 +148,15 @@ class ResNet(paddle.nn.Layer): num_channels = [64, 256, 512, 1024] num_filters = [64, 128, 256, 512] - self.conv = ConvBNLayer( - num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') - self.pool2d_max = paddle.fluid.dygraph.Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool2d_max = paddle.fluid.dygraph.Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] for block in range(len(depth)): @@ -160,17 +164,17 @@ class ResNet(paddle.nn.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut)) + BottleneckBlock(num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut)) self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = paddle.fluid.dygraph.Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = paddle.fluid.dygraph.Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1 @@ -197,6 +201,7 @@ class ResNet(paddle.nn.Layer): def reader_decorator(reader): + def __reader__(): for item in reader(): img = np.array(item[0]).astype('float32').reshape(3, 224, 224) @@ -207,6 +212,7 @@ def reader_decorator(reader): class TestResnet(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() @@ -230,12 +236,12 @@ class TestResnet(unittest.TestCase): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) - train_reader = paddle.batch( - reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True) - data_loader = paddle.io.DataLoader.from_generator( - capacity=5, iterable=True) + train_reader = paddle.batch(reader_decorator( + paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True) + data_loader = paddle.io.DataLoader.from_generator(capacity=5, + iterable=True) data_loader.set_sample_list_generator(train_reader) resnet = ResNet() @@ -252,8 +258,8 @@ class TestResnet(unittest.TestCase): img, label = data pred = resnet(img) - loss = paddle.nn.functional.cross_entropy( - input=pred, label=label) + loss = paddle.nn.functional.cross_entropy(input=pred, + label=label) avg_loss = paddle.mean(x=loss) acc_top1 = paddle.metric.accuracy(input=pred, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=pred, label=label, k=5) @@ -296,8 +302,10 @@ class TestResnet(unittest.TestCase): resnet.eval() pred_res = resnet( - paddle.to_tensor( - data=data, dtype=None, place=None, stop_gradient=True)) + paddle.to_tensor(data=data, + dtype=None, + place=None, + stop_gradient=True)) ret = pred_res.numpy() paddle.enable_static() @@ -345,24 +353,22 @@ class TestResnet(unittest.TestCase): st_pre = self.predict_static(image) dy_jit_pre = self.predict_dygraph_jit(image) predictor_pre = self.predict_analysis_inference(image) - self.assertTrue( - np.allclose(dy_pre, st_pre), - msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) - self.assertTrue( - np.allclose(dy_jit_pre, st_pre), - msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) - self.assertTrue( - np.allclose(predictor_pre, st_pre), - msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre, - st_pre)) + self.assertTrue(np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format( + dy_pre, st_pre)) + self.assertTrue(np.allclose(dy_jit_pre, st_pre), + msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format( + dy_jit_pre, st_pre)) + self.assertTrue(np.allclose(predictor_pre, st_pre), + msg="predictor_pre:\n {}\n, st_pre: \n{}.".format( + predictor_pre, st_pre)) def test_resnet(self): static_loss = self.train(to_static=True) dygraph_loss = self.train(to_static=False) - self.assertTrue( - np.allclose(static_loss, dygraph_loss), - msg="static_loss: {} \n dygraph_loss: {}".format(static_loss, - dygraph_loss)) + self.assertTrue(np.allclose(static_loss, dygraph_loss), + msg="static_loss: {} \n dygraph_loss: {}".format( + static_loss, dygraph_loss)) self.verify_predict() def test_in_static_mode_mkldnn(self): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py index 507133aba98..07e3fe518c2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py @@ -202,10 +202,11 @@ def test_return_without_paddle_cond(x): class TestReturnBase(unittest.TestCase): + def setUp(self): self.input = np.ones((1)).astype('int32') - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_dygraph_func() self.program_translator = ProgramTranslator() @@ -235,91 +236,106 @@ class TestReturnBase(unittest.TestCase): dygraph_res[i], static_res[i])) elif isinstance(dygraph_res, np.ndarray): - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph res is {}\nstatic_res is {}'.format( + dygraph_res, static_res)) else: self.assertEqual(dygraph_res, static_res) class TestInsideFuncBase(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_inside_func_base class TestReturnIf(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_if class TestReturnIfElse(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_if_else class TestReturnInWhile(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_in_while class TestReturnInFor(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_in_for class TestRecursiveReturn(TestReturnBase): + def init_dygraph_func(self): self.input = self.input.astype(np.float32) self.dygraph_func = test_recursive_return class TestReturnDifferentLengthIfBody(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_different_length_if_body class TestReturnDifferentLengthElse(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_different_length_else class TestNoReturn(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_no_return class TestReturnNone(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_none class TestReturnNoVariable(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_no_variable class TestReturnListOneValue(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_list_one_value class TestReturnListManyValue(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_list_many_values class TestReturnTupleOneValue(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_tuple_one_value class TestReturnTupleManyValue(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_tuple_many_values class TestReturnSpecial(TestReturnBase): + def init_dygraph_func(self): self.dygraph_func = test_return_without_paddle_cond diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py index 794aa17038c..6c8216dac55 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py @@ -30,12 +30,13 @@ SEED = 2020 np.random.seed(SEED) -place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( -) +place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() program_translator = ProgramTranslator() class SimpleFcLayer(fluid.dygraph.Layer): + def __init__(self, fc_size): super(SimpleFcLayer, self).__init__() self._linear = fluid.dygraph.Linear(fc_size, fc_size) @@ -49,6 +50,7 @@ class SimpleFcLayer(fluid.dygraph.Layer): class TestDyToStaticSaveInferenceModel(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() @@ -77,18 +79,19 @@ class TestDyToStaticSaveInferenceModel(unittest.TestCase): self.temp_dir.name, "test_dy2stat_inference_in_guard/model") infer_model_dir = os.path.join(self.temp_dir.name, "test_dy2stat_inference_in_guard") - fluid.dygraph.jit.save( - layer=layer, - path=infer_model_prefix, - input_spec=[x], - output_spec=[pred]) + fluid.dygraph.jit.save(layer=layer, + path=infer_model_prefix, + input_spec=[x], + output_spec=[pred]) # Check the correctness of the inference dygraph_out, _ = layer(x) self.check_save_inference_model(layer, [x_data], dygraph_out.numpy()) - self.check_save_inference_model( - layer, [x_data], dygraph_out.numpy(), fetch=[loss]) - self.check_save_inference_model( - layer, [x_data], dygraph_out.numpy(), feed=[x]) + self.check_save_inference_model(layer, [x_data], + dygraph_out.numpy(), + fetch=[loss]) + self.check_save_inference_model(layer, [x_data], + dygraph_out.numpy(), + feed=[x]) def check_save_inference_model(self, model, @@ -105,11 +108,10 @@ class TestDyToStaticSaveInferenceModel(unittest.TestCase): "test_dy2stat_inference") model_filename = "model" + INFER_MODEL_SUFFIX params_filename = "model" + INFER_PARAMS_SUFFIX - fluid.dygraph.jit.save( - layer=model, - path=infer_model_prefix, - input_spec=feed if feed else None, - output_spec=fetch if fetch else None) + fluid.dygraph.jit.save(layer=model, + path=infer_model_prefix, + input_spec=feed if feed else None, + output_spec=fetch if fetch else None) # Check the correctness of the inference infer_out = self.load_and_run_inference(infer_model_dir, model_filename, params_filename, inputs) @@ -119,12 +121,11 @@ class TestDyToStaticSaveInferenceModel(unittest.TestCase): params_filename, inputs): paddle.enable_static() exe = fluid.Executor(place) - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - dirname=model_path, - executor=exe, - model_filename=model_filename, - params_filename=params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(dirname=model_path, + executor=exe, + model_filename=model_filename, + params_filename=params_filename) results = exe.run(inference_program, feed=dict(zip(feed_target_names, inputs)), fetch_list=fetch_targets) @@ -133,6 +134,7 @@ class TestDyToStaticSaveInferenceModel(unittest.TestCase): class TestPartialProgramRaiseError(unittest.TestCase): + def test_param_type(self): program_translator = ProgramTranslator() program_translator.enable(True) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py index c5677756f50..cc75dcd949d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py @@ -27,11 +27,12 @@ from test_fetch_feed import Linear np.random.seed(2020) -place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( -) +place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() class TestDyToStaticSaveLoad(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() self.model_path = os.path.join(self.temp_dir.name, @@ -50,8 +51,8 @@ class TestDyToStaticSaveLoad(unittest.TestCase): program_translator.enable(True) x = fluid.dygraph.to_variable(x_data) net = Linear(32, 64) - adam = AdamOptimizer( - learning_rate=0.1, parameter_list=net.parameters()) + adam = AdamOptimizer(learning_rate=0.1, + parameter_list=net.parameters()) for i in range(batch_num): static_out, static_loss = net(x) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index 7ac1f40de99..965013adf5d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -78,8 +78,9 @@ def optimizer_setting(params, parameter_list): lr = params["lr"] num_epochs = params["num_epochs"] optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.cosine_decay( - learning_rate=lr, step_each_epoch=step, epochs=num_epochs), + learning_rate=fluid.layers.cosine_decay(learning_rate=lr, + step_each_epoch=step, + epochs=num_epochs), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay), parameter_list=parameter_list) @@ -88,6 +89,7 @@ def optimizer_setting(params, parameter_list): class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -97,15 +99,14 @@ class ConvBNLayer(fluid.dygraph.Layer): act=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) self._batch_norm = BatchNorm(num_filters, act=act) @@ -117,6 +118,7 @@ class ConvBNLayer(fluid.dygraph.Layer): class SqueezeExcitation(fluid.dygraph.Layer): + def __init__(self, num_channels, reduction_ratio): super(SqueezeExcitation, self).__init__() @@ -147,6 +149,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -156,33 +159,29 @@ class BottleneckBlock(fluid.dygraph.Layer): shortcut=True): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu") - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality, - act="relu") - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 2, - filter_size=1, - act=None) - - self.scale = SqueezeExcitation( - num_channels=num_filters * 2, reduction_ratio=reduction_ratio) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu") + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act="relu") + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 2, + filter_size=1, + act=None) + + self.scale = SqueezeExcitation(num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 2, - filter_size=1, - stride=stride) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 2, + filter_size=1, + stride=stride) self.shortcut = shortcut @@ -204,6 +203,7 @@ class BottleneckBlock(fluid.dygraph.Layer): class SeResNeXt(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=102): super(SeResNeXt, self).__init__() @@ -217,52 +217,53 @@ class SeResNeXt(fluid.dygraph.Layer): reduction_ratio = 16 depth = [3, 4, 6, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 101: cardinality = 32 reduction_ratio = 16 depth = [3, 4, 23, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 152: cardinality = 64 reduction_ratio = 16 depth = [3, 8, 36, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=3, - stride=2, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=64, - num_filters=64, - filter_size=3, - stride=1, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=64, - num_filters=128, - filter_size=3, - stride=1, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu') + self.conv1 = ConvBNLayer(num_channels=64, + num_filters=64, + filter_size=3, + stride=1, + act='relu') + self.conv2 = ConvBNLayer(num_channels=64, + num_filters=128, + filter_size=3, + stride=1, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] num_channels = 64 @@ -273,19 +274,19 @@ class SeResNeXt(fluid.dygraph.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio, - shortcut=shortcut)) + BottleneckBlock(num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio, + shortcut=shortcut)) num_channels = bottleneck_block._num_channels_out self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) stdv = 1.0 / math.sqrt(2048 * 1.0) self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1 @@ -325,12 +326,12 @@ class SeResNeXt(fluid.dygraph.Layer): class TestSeResnet(unittest.TestCase): + def setUp(self): - self.train_reader = paddle.batch( - paddle.dataset.flowers.train( - use_xmap=False, cycle=True), - batch_size=BATCH_SIZE, - drop_last=True) + self.train_reader = paddle.batch(paddle.dataset.flowers.train( + use_xmap=False, cycle=True), + batch_size=BATCH_SIZE, + drop_last=True) self.temp_dir = tempfile.TemporaryDirectory() self.model_save_dir = os.path.join(self.temp_dir.name, "inference") @@ -365,12 +366,12 @@ class TestSeResnet(unittest.TestCase): step_idx = 0 speed_list = [] for step_id, data in enumerate(train_reader()): - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape( - BATCH_SIZE, 1) + dy_x_data = np.array([ + x[0].reshape(3, 224, 224) for x in data + ]).astype('float32') + y_data = np.array([x[1] + for x in data]).astype('int64').reshape( + BATCH_SIZE, 1) img = to_variable(dy_x_data) label = to_variable(y_data) @@ -406,10 +407,10 @@ class TestSeResnet(unittest.TestCase): step_idx += 1 if step_idx == STEP_NUM: if to_static: - fluid.dygraph.jit.save( - se_resnext, - self.model_save_prefix, [img], - output_spec=[pred]) + fluid.dygraph.jit.save(se_resnext, + self.model_save_prefix, + [img], + output_spec=[pred]) else: fluid.dygraph.save_dygraph( se_resnext.state_dict(), @@ -439,12 +440,11 @@ class TestSeResnet(unittest.TestCase): def predict_static(self, data): paddle.enable_static() exe = fluid.Executor(place) - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - self.model_save_dir, - executor=exe, - model_filename=self.model_filename, - params_filename=self.params_filename) + [inference_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(self.model_save_dir, + executor=exe, + model_filename=self.model_filename, + params_filename=self.params_filename) pred_res = exe.run(inference_program, feed={feed_target_names[0]: data}, @@ -473,12 +473,12 @@ class TestSeResnet(unittest.TestCase): st_pre = self.predict_static(image) dy_jit_pre = self.predict_dygraph_jit(image) predictor_pre = self.predict_analysis_inference(image) - self.assertTrue( - np.allclose(dy_pre, st_pre), - msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) - self.assertTrue( - np.allclose(dy_jit_pre, st_pre), - msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) + self.assertTrue(np.allclose(dy_pre, st_pre), + msg="dy_pre:\n {}\n, st_pre: \n{}.".format( + dy_pre, st_pre)) + self.assertTrue(np.allclose(dy_jit_pre, st_pre), + msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format( + dy_jit_pre, st_pre)) flat_st_pre = st_pre.flatten() flat_predictor_pre = np.array(predictor_pre).flatten() @@ -492,23 +492,23 @@ class TestSeResnet(unittest.TestCase): flat_predictor_pre[i], flat_st_pre[i])) def test_check_result(self): - pred_1, loss_1, acc1_1, acc5_1 = self.train( - self.train_reader, to_static=False) - pred_2, loss_2, acc1_2, acc5_2 = self.train( - self.train_reader, to_static=True) - - self.assertTrue( - np.allclose(pred_1, pred_2), - msg="static pred: {} \ndygraph pred: {}".format(pred_1, pred_2)) - self.assertTrue( - np.allclose(loss_1, loss_2), - msg="static loss: {} \ndygraph loss: {}".format(loss_1, loss_2)) - self.assertTrue( - np.allclose(acc1_1, acc1_2), - msg="static acc1: {} \ndygraph acc1: {}".format(acc1_1, acc1_2)) - self.assertTrue( - np.allclose(acc5_1, acc5_2), - msg="static acc5: {} \ndygraph acc5: {}".format(acc5_1, acc5_2)) + pred_1, loss_1, acc1_1, acc5_1 = self.train(self.train_reader, + to_static=False) + pred_2, loss_2, acc1_2, acc5_2 = self.train(self.train_reader, + to_static=True) + + self.assertTrue(np.allclose(pred_1, pred_2), + msg="static pred: {} \ndygraph pred: {}".format( + pred_1, pred_2)) + self.assertTrue(np.allclose(loss_1, loss_2), + msg="static loss: {} \ndygraph loss: {}".format( + loss_1, loss_2)) + self.assertTrue(np.allclose(acc1_1, acc1_2), + msg="static acc1: {} \ndygraph acc1: {}".format( + acc1_1, acc1_2)) + self.assertTrue(np.allclose(acc5_1, acc5_2), + msg="static acc5: {} \ndygraph acc5: {}".format( + acc5_1, acc5_2)) self.verify_predict() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py index b72894fb147..108c060fab8 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py @@ -33,6 +33,7 @@ if fluid.is_compiled_with_cuda(): class SimpleConvPool(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -41,13 +42,12 @@ class SimpleConvPool(fluid.dygraph.Layer): batch_size=None): super(SimpleConvPool, self).__init__() self.batch_size = batch_size - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - padding=[1, 1], - use_cudnn=use_cudnn, - act='tanh') + self._conv2d = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + padding=[1, 1], + use_cudnn=use_cudnn, + act='tanh') def forward(self, inputs): x = self._conv2d(inputs) @@ -57,6 +57,7 @@ class SimpleConvPool(fluid.dygraph.Layer): class CNN(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): super(CNN, self).__init__() self.dict_dim = dict_dim @@ -68,28 +69,25 @@ class CNN(fluid.dygraph.Layer): self.win_size = [3, self.hid_dim] self.batch_size = batch_size self.seq_len = seq_len - self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - is_sparse=False) - self._simple_conv_pool_1 = SimpleConvPool( - self.channels, - self.hid_dim, - self.win_size, - batch_size=self.batch_size) - self._fc1 = Linear( - input_dim=self.hid_dim * self.seq_len, - output_dim=self.fc_hid_dim, - act="softmax") - self._fc_prediction = Linear( - input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + self.embedding = Embedding(size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + is_sparse=False) + self._simple_conv_pool_1 = SimpleConvPool(self.channels, + self.hid_dim, + self.win_size, + batch_size=self.batch_size) + self._fc1 = Linear(input_dim=self.hid_dim * self.seq_len, + output_dim=self.fc_hid_dim, + act="softmax") + self._fc_prediction = Linear(input_dim=self.fc_hid_dim, + output_dim=self.class_dim, + act="softmax") @declarative def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = ( - fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim).astype( - dtype='float32') + o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != + self.dict_dim).astype(dtype='float32') mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) emb = emb * mask_emb emb = fluid.layers.reshape( @@ -105,6 +103,7 @@ class CNN(fluid.dygraph.Layer): class BOW(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): super(BOW, self).__init__() self.dict_dim = dict_dim @@ -114,23 +113,24 @@ class BOW(fluid.dygraph.Layer): self.class_dim = 2 self.batch_size = batch_size self.seq_len = seq_len - self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - is_sparse=False) - self._fc1 = Linear( - input_dim=self.hid_dim, output_dim=self.hid_dim, act="tanh") - self._fc2 = Linear( - input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh") - self._fc_prediction = Linear( - input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + self.embedding = Embedding(size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + is_sparse=False) + self._fc1 = Linear(input_dim=self.hid_dim, + output_dim=self.hid_dim, + act="tanh") + self._fc2 = Linear(input_dim=self.hid_dim, + output_dim=self.fc_hid_dim, + act="tanh") + self._fc_prediction = Linear(input_dim=self.fc_hid_dim, + output_dim=self.class_dim, + act="softmax") @declarative def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = ( - fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim).astype( - dtype='float32') + o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != + self.dict_dim).astype(dtype='float32') mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) emb = emb * mask_emb emb = fluid.layers.reshape(emb, shape=[-1, self.seq_len, self.hid_dim]) @@ -147,6 +147,7 @@ class BOW(fluid.dygraph.Layer): class GRU(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): super(GRU, self).__init__() self.dict_dim = dict_dim @@ -156,29 +157,30 @@ class GRU(fluid.dygraph.Layer): self.class_dim = 2 self.batch_size = batch_size self.seq_len = seq_len - self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr(learning_rate=30), - is_sparse=False) + self.embedding = Embedding(size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + param_attr=fluid.ParamAttr(learning_rate=30), + is_sparse=False) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3) - self._fc2 = Linear( - input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh") - self._fc_prediction = Linear( - input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") + self._fc2 = Linear(input_dim=self.hid_dim, + output_dim=self.fc_hid_dim, + act="tanh") + self._fc_prediction = Linear(input_dim=self.fc_hid_dim, + output_dim=self.class_dim, + act="softmax") self._gru = DynamicGRU(size=self.hid_dim, h_0=h_0) @declarative def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim - ).astype('float32') + o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != + self.dict_dim).astype('float32') mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) emb = emb * mask_emb - emb = fluid.layers.reshape( - emb, shape=[self.batch_size, -1, self.hid_dim]) + emb = fluid.layers.reshape(emb, + shape=[self.batch_size, -1, self.hid_dim]) fc_1 = self._fc1(emb) gru_hidden = self._gru(fc_1) gru_hidden = fluid.layers.reduce_max(gru_hidden, dim=1) @@ -193,6 +195,7 @@ class GRU(fluid.dygraph.Layer): class BiGRU(fluid.dygraph.Layer): + def __init__(self, dict_dim, batch_size, seq_len): super(BiGRU, self).__init__() self.dict_dim = dict_dim @@ -202,32 +205,35 @@ class BiGRU(fluid.dygraph.Layer): self.class_dim = 2 self.batch_size = batch_size self.seq_len = seq_len - self.embedding = Embedding( - size=[self.dict_dim + 1, self.emb_dim], - dtype='float32', - param_attr=fluid.ParamAttr(learning_rate=30), - is_sparse=False) + self.embedding = Embedding(size=[self.dict_dim + 1, self.emb_dim], + dtype='float32', + param_attr=fluid.ParamAttr(learning_rate=30), + is_sparse=False) h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32") h_0 = to_variable(h_0) self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3) - self._fc2 = Linear( - input_dim=self.hid_dim * 2, output_dim=self.fc_hid_dim, act="tanh") - self._fc_prediction = Linear( - input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax") - self._gru_forward = DynamicGRU( - size=self.hid_dim, h_0=h_0, is_reverse=False) - self._gru_backward = DynamicGRU( - size=self.hid_dim, h_0=h_0, is_reverse=True) + self._fc2 = Linear(input_dim=self.hid_dim * 2, + output_dim=self.fc_hid_dim, + act="tanh") + self._fc_prediction = Linear(input_dim=self.fc_hid_dim, + output_dim=self.class_dim, + act="softmax") + self._gru_forward = DynamicGRU(size=self.hid_dim, + h_0=h_0, + is_reverse=False) + self._gru_backward = DynamicGRU(size=self.hid_dim, + h_0=h_0, + is_reverse=True) @declarative def forward(self, inputs, label=None): emb = self.embedding(inputs) - o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != self.dict_dim - ).astype('float32') + o_np_mask = (fluid.layers.reshape(inputs, [-1, 1]) != + self.dict_dim).astype('float32') mask_emb = fluid.layers.expand(o_np_mask, [1, self.hid_dim]) emb = emb * mask_emb - emb = fluid.layers.reshape( - emb, shape=[self.batch_size, -1, self.hid_dim]) + emb = fluid.layers.reshape(emb, + shape=[self.batch_size, -1, self.hid_dim]) fc_1 = self._fc1(emb) gru_forward = self._gru_forward(fc_1) gru_backward = self._gru_backward(fc_1) @@ -258,8 +264,8 @@ def fake_data_reader(class_num, vocab_size, batch_size, padding_size): seq_len = local_random.randint(padding_size // 2, int(padding_size * 1.2)) word_ids = local_random.randint(0, vocab_size, [seq_len]).tolist() - word_ids = word_ids[:padding_size] + [vocab_size] * (padding_size - - seq_len) + word_ids = word_ids[:padding_size] + [vocab_size + ] * (padding_size - seq_len) batch_data.append((word_ids, [label], seq_len)) if len(batch_data) == batch_size: yield batch_data @@ -339,6 +345,7 @@ def train(args, to_static): class TestSentiment(unittest.TestCase): + def setUp(self): self.args = Args() @@ -346,9 +353,9 @@ class TestSentiment(unittest.TestCase): self.args.model_type = model_type st_out = train(self.args, True) dy_out = train(self.args, False) - self.assertTrue( - np.allclose(dy_out, st_out), - msg="dy_out:\n {}\n st_out:\n {}".format(dy_out, st_out)) + self.assertTrue(np.allclose(dy_out, st_out), + msg="dy_out:\n {}\n st_out:\n {}".format( + dy_out, st_out)) def test_train(self): model_types = ['cnn_net', 'bow_net', 'gru_net', 'bigru_net'] diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py index bc462ab8c95..7ed2d12f5a8 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py @@ -25,8 +25,9 @@ from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator from seq2seq_dygraph_model import BaseModel, AttentionModel from seq2seq_utils import Seq2SeqModelHyperParams from seq2seq_utils import get_data_iter -place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( -) + +place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() program_translator = ProgramTranslator() STEP_NUM = 10 PRINT_STEP = 2 @@ -50,23 +51,21 @@ def train(args, attn_model=False): fluid.default_main_program().random_seed = 2020 if attn_model: - model = AttentionModel( - args.hidden_size, - args.src_vocab_size, - args.tar_vocab_size, - args.batch_size, - num_layers=args.num_layers, - init_scale=args.init_scale, - dropout=args.dropout) + model = AttentionModel(args.hidden_size, + args.src_vocab_size, + args.tar_vocab_size, + args.batch_size, + num_layers=args.num_layers, + init_scale=args.init_scale, + dropout=args.dropout) else: - model = BaseModel( - args.hidden_size, - args.src_vocab_size, - args.tar_vocab_size, - args.batch_size, - num_layers=args.num_layers, - init_scale=args.init_scale, - dropout=args.dropout) + model = BaseModel(args.hidden_size, + args.src_vocab_size, + args.tar_vocab_size, + args.batch_size, + num_layers=args.num_layers, + init_scale=args.init_scale, + dropout=args.dropout) gloabl_norm_clip = GradientClipByGlobalNorm(args.max_grad_norm) optimizer = fluid.optimizer.SGD(args.learning_rate, @@ -122,27 +121,25 @@ def infer(args, attn_model=False): with fluid.dygraph.guard(place): if attn_model: - model = AttentionModel( - args.hidden_size, - args.src_vocab_size, - args.tar_vocab_size, - args.batch_size, - beam_size=args.beam_size, - num_layers=args.num_layers, - init_scale=args.init_scale, - dropout=0.0, - mode='beam_search') + model = AttentionModel(args.hidden_size, + args.src_vocab_size, + args.tar_vocab_size, + args.batch_size, + beam_size=args.beam_size, + num_layers=args.num_layers, + init_scale=args.init_scale, + dropout=0.0, + mode='beam_search') else: - model = BaseModel( - args.hidden_size, - args.src_vocab_size, - args.tar_vocab_size, - args.batch_size, - beam_size=args.beam_size, - num_layers=args.num_layers, - init_scale=args.init_scale, - dropout=0.0, - mode='beam_search') + model = BaseModel(args.hidden_size, + args.src_vocab_size, + args.tar_vocab_size, + args.batch_size, + beam_size=args.beam_size, + num_layers=args.num_layers, + init_scale=args.init_scale, + dropout=0.0, + mode='beam_search') model_path = args.attn_model_path if attn_model else args.base_model_path state_dict, _ = fluid.dygraph.load_dygraph(model_path) @@ -161,6 +158,7 @@ def infer(args, attn_model=False): class TestSeq2seq(unittest.TestCase): + def setUp(self): self.args = Seq2SeqModelHyperParams self.temp_dir = tempfile.TemporaryDirectory() @@ -192,19 +190,17 @@ class TestSeq2seq(unittest.TestCase): dygraph_loss = self.run_dygraph(mode="train", attn_model=attn_model) static_loss = self.run_static(mode="train", attn_model=attn_model) result = np.allclose(dygraph_loss, static_loss) - self.assertTrue( - result, - msg="\ndygraph_loss = {} \nstatic_loss = {}".format(dygraph_loss, - static_loss)) + self.assertTrue(result, + msg="\ndygraph_loss = {} \nstatic_loss = {}".format( + dygraph_loss, static_loss)) def _test_predict(self, attn_model=False): pred_dygraph = self.run_dygraph(mode="test", attn_model=attn_model) pred_static = self.run_static(mode="test", attn_model=attn_model) result = np.allclose(pred_static, pred_dygraph) - self.assertTrue( - result, - msg="\npred_dygraph = {} \npred_static = {}".format(pred_dygraph, - pred_static)) + self.assertTrue(result, + msg="\npred_dygraph = {} \npred_static = {}".format( + pred_dygraph, pred_static)) def test_base_model(self): self._test_train(attn_model=False) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py index ce88ea74af2..2bc344ae95a 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py @@ -36,20 +36,22 @@ def create_conf_dict(): def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument( - "--batch_size", - type=int, - default=32, - help="Total examples' number in batch for training.") - parser.add_argument( - "--seq_len", type=int, default=32, help="The length of each sentence.") - parser.add_argument( - "--epoch", type=int, default=1, help="The number of training epoch.") - parser.add_argument( - "--fake_sample_size", - type=int, - default=128, - help="The number of samples of fake data.") + parser.add_argument("--batch_size", + type=int, + default=32, + help="Total examples' number in batch for training.") + parser.add_argument("--seq_len", + type=int, + default=32, + help="The length of each sentence.") + parser.add_argument("--epoch", + type=int, + default=1, + help="The number of training epoch.") + parser.add_argument("--fake_sample_size", + type=int, + default=128, + help="The number of samples of fake data.") args = parser.parse_args([]) return args @@ -70,6 +72,7 @@ vocab = fake_vocabulary() class FakeReaderProcessor(object): + def __init__(self, args, vocab): self.vocab = vocab self.seq_len = args.seq_len @@ -83,6 +86,7 @@ class FakeReaderProcessor(object): np.array([query, pos_title, neg_title]).astype(np.int64)) def get_reader(self, mode, epoch=0): + def reader_with_pairwise(): if mode == "train": for i in range(self.sample_size): @@ -133,11 +137,10 @@ def train(conf_dict, to_static): return_list=True, iterable=True, use_double_buffer=True) - get_train_examples = simnet_process.get_reader( - "train", epoch=args.epoch) + get_train_examples = simnet_process.get_reader("train", + epoch=args.epoch) train_loader.set_sample_list_generator( - paddle.batch( - get_train_examples, batch_size=args.batch_size), place) + paddle.batch(get_train_examples, batch_size=args.batch_size), place) for left, pos_right, neg_right in train_loader(): left = fluid.layers.reshape(left, shape=[-1, 1]) @@ -157,6 +160,7 @@ def train(conf_dict, to_static): class TestSimnet(unittest.TestCase): + def test_dygraph_static_same_loss(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py index 872d419ff89..f2c72e9932e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py @@ -34,20 +34,22 @@ def create_conf_dict(): def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument( - "--batch_size", - type=int, - default=32, - help="Total examples' number in batch for training.") - parser.add_argument( - "--seq_len", type=int, default=32, help="The length of each sentence.") - parser.add_argument( - "--epoch", type=int, default=1, help="The number of training epoch.") - parser.add_argument( - "--fake_sample_size", - type=int, - default=128, - help="The number of samples of fake data.") + parser.add_argument("--batch_size", + type=int, + default=32, + help="Total examples' number in batch for training.") + parser.add_argument("--seq_len", + type=int, + default=32, + help="The length of each sentence.") + parser.add_argument("--epoch", + type=int, + default=1, + help="The number of training epoch.") + parser.add_argument("--fake_sample_size", + type=int, + default=128, + help="The number of samples of fake data.") args = parser.parse_args([]) return args @@ -68,6 +70,7 @@ vocab = fake_vocabulary() class FakeReaderProcessor(object): + def __init__(self, args, vocab): self.vocab = vocab self.seq_len = args.seq_len @@ -81,6 +84,7 @@ class FakeReaderProcessor(object): np.array([query, pos_title, neg_title]).astype(np.int64)) def get_reader(self, mode, epoch=0): + def reader_with_pairwise(): if mode == "train": for i in range(self.sample_size): @@ -114,24 +118,24 @@ def train(conf_dict, to_static): net = BOW(conf_dict) loss = HingeLoss(conf_dict) - optimizer = paddle.optimizer.Adam( - learning_rate=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - parameters=net.parameters()) + optimizer = paddle.optimizer.Adam(learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + parameters=net.parameters()) metric = paddle.metric.Auc(name="auc") global_step = 0 losses = [] - train_loader = paddle.io.DataLoader.from_generator( - capacity=16, return_list=True, iterable=True, use_double_buffer=True) + train_loader = paddle.io.DataLoader.from_generator(capacity=16, + return_list=True, + iterable=True, + use_double_buffer=True) get_train_examples = simnet_process.get_reader("train", epoch=args.epoch) train_loader.set_sample_list_generator( - paddle.batch( - get_train_examples, batch_size=args.batch_size), place) + paddle.batch(get_train_examples, batch_size=args.batch_size), place) for left, pos_right, neg_right in train_loader(): left = paddle.reshape(left, shape=[-1, 1]) @@ -152,6 +156,7 @@ def train(conf_dict, to_static): class TestSimnet(unittest.TestCase): + def test_dygraph_static_same_loss(self): if paddle.is_compiled_with_cuda(): paddle.fluid.set_flags({"FLAGS_cudnn_deterministic": True}) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py index eecb6d8b758..48dc33cc6c7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py @@ -98,6 +98,7 @@ def test_set_value(x): class LayerWithSetValue(paddle.nn.Layer): + def __init__(self, input_dim, hidden): super(LayerWithSetValue, self).__init__() self.linear = paddle.nn.Linear(input_dim, hidden) @@ -110,10 +111,11 @@ class LayerWithSetValue(paddle.nn.Layer): class TestSliceWithoutControlFlow(unittest.TestCase): + def setUp(self): self.init_input() - self.place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() self.init_dygraph_func() paddle.disable_static() @@ -137,28 +139,31 @@ class TestSliceWithoutControlFlow(unittest.TestCase): def test_transformed_static_result(self): static_res = self.run_static_mode() dygraph_res = self.run_dygraph_mode() - self.assertTrue( - np.allclose(dygraph_res, static_res), - msg='dygraph_res is {}\nstatic_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(np.allclose(dygraph_res, static_res), + msg='dygraph_res is {}\nstatic_res is {}'.format( + dygraph_res, static_res)) class TestSliceInIf(TestSliceWithoutControlFlow): + def init_dygraph_func(self): self.dygraph_func = test_slice_in_if class TestSliceInWhileLoop(TestSliceWithoutControlFlow): + def init_dygraph_func(self): self.dygraph_func = test_slice_in_while_loop class TestSliceInForLoop(TestSliceWithoutControlFlow): + def init_dygraph_func(self): self.dygraph_func = test_slice_in_for_loop class TestSetValue(TestSliceWithoutControlFlow): + def init_input(self): self.input = np.full([3, 4, 5], 5).astype('float32') @@ -167,6 +172,7 @@ class TestSetValue(TestSliceWithoutControlFlow): class TestSetValueWithLayerAndSave(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() self.model_path = os.path.join(self.temp_dir.name, @@ -179,8 +185,10 @@ class TestSetValueWithLayerAndSave(unittest.TestCase): prog_trans.enable(True) model = LayerWithSetValue(input_dim=10, hidden=1) x = paddle.full(shape=[5, 10], fill_value=5.0, dtype="float32") - paddle.jit.save( - layer=model, path=self.model_path, input_spec=[x], output_spec=None) + paddle.jit.save(layer=model, + path=self.model_path, + input_spec=[x], + output_spec=None) class TestSliceSupplementSpecialCase(unittest.TestCase): @@ -213,8 +221,8 @@ class TestSliceSupplementSpecialCase(unittest.TestCase): return inps[::2], inps[::-2] origin_result = func(inps) - sfunc = paddle.jit.to_static( - func, input_spec=[InputSpec(shape=[None, 4, 4])]) + sfunc = paddle.jit.to_static(func, + input_spec=[InputSpec(shape=[None, 4, 4])]) static_result = sfunc(inps) self.assertTrue( @@ -224,6 +232,7 @@ class TestSliceSupplementSpecialCase(unittest.TestCase): class TestPaddleStridedSlice(unittest.TestCase): + def test_compare_paddle_strided_slice_with_numpy(self): paddle.disable_static() array = np.arange(5) @@ -232,8 +241,19 @@ class TestPaddleStridedSlice(unittest.TestCase): s1 = 3 e1 = 1 stride1 = -2 - sl = paddle.strided_slice( - pt, axes=[0, ], starts=[s1, ], ends=[e1, ], strides=[stride1, ]) + sl = paddle.strided_slice(pt, + axes=[ + 0, + ], + starts=[ + s1, + ], + ends=[ + e1, + ], + strides=[ + stride1, + ]) self.assertTrue(array[s1:e1:stride1], sl) @@ -242,20 +262,27 @@ class TestPaddleStridedSlice(unittest.TestCase): s2 = [8, -1] e2 = [1, -5] stride2 = [-2, -3] - sl = paddle.strided_slice( - pt, axes=[0, 1], starts=s2, ends=e2, strides=stride2) + sl = paddle.strided_slice(pt, + axes=[0, 1], + starts=s2, + ends=e2, + strides=stride2) self.assertTrue( - np.array_equal(sl.numpy(), array[s2[0]:e2[0]:stride2[0], s2[1]:e2[ - 1]:stride2[1]])) + np.array_equal( + sl.numpy(), array[s2[0]:e2[0]:stride2[0], + s2[1]:e2[1]:stride2[1]])) array = np.arange(6 * 7 * 8).reshape((6, 7, 8)) pt = paddle.to_tensor(array) s2 = [7, -1] e2 = [2, -5] stride2 = [-2, -3] - sl = paddle.strided_slice( - pt, axes=[0, 2], starts=s2, ends=e2, strides=stride2) + sl = paddle.strided_slice(pt, + axes=[0, 2], + starts=s2, + ends=e2, + strides=stride2) array_slice = array[s2[0]:e2[0]:stride2[0], ::, s2[1]:e2[1]:stride2[1]] self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py index 361fcbf9c73..7311fd285ab 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ import unittest class Net(Layer): + def __init__(self): super(Net, self).__init__() self.fc = paddle.nn.Linear(16, 3) @@ -36,6 +37,7 @@ class Net(Layer): class TestArgsSpecName(unittest.TestCase): + def read_from_dataset(self): self.x = paddle.randn([4, 2, 8]) self.y = paddle.randn([4, 2, 8]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py index 388291a51c2..36b2425058f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py @@ -94,6 +94,7 @@ result_var_type4 = { def func_to_test5(): + def inner_int_func(): return 1 @@ -144,7 +145,7 @@ result_var_type6 = { } -def func_to_test7(a: int, b: float, c: paddle.Tensor, d: float='diff'): +def func_to_test7(a: int, b: float, c: paddle.Tensor, d: float = 'diff'): a = True e, f = paddle.shape(c) g: paddle.Tensor = len(c) @@ -171,6 +172,7 @@ result_var_type = [ class TestStaticAnalysis(unittest.TestCase): + def _check_wrapper(self, wrapper, node_to_wrapper_map): self.assertEqual(node_to_wrapper_map[wrapper.node], wrapper) if wrapper.parent is not None: diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py index f06d48c963d..f535cf4c35d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py @@ -27,6 +27,7 @@ def tensor_clone(x): class TestTensorClone(unittest.TestCase): + def _run(self, to_static): prog_trans = paddle.jit.ProgramTranslator() prog_trans.enable(to_static) @@ -36,10 +37,9 @@ class TestTensorClone(unittest.TestCase): def test_tensor_clone(self): dygraph_res = self._run(to_static=False) static_res = self._run(to_static=True) - self.assertTrue( - numpy.allclose(dygraph_res, static_res), - msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(numpy.allclose(dygraph_res, static_res), + msg='dygraph res is {}\nstatic_res is {}'.format( + dygraph_res, static_res)) @paddle.jit.to_static @@ -50,6 +50,7 @@ def tensor_numpy(x): class TestTensorDygraphOnlyMethodError(unittest.TestCase): + def _run(self, to_static): prog_trans = paddle.jit.ProgramTranslator() prog_trans.enable(to_static) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py index 5cf9d7749c3..3e30eb84ed6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py @@ -111,11 +111,13 @@ def dyfunc_with_if_1(x): # `res.shape[0]` is transformed into # `paddle.jit.dy2static.convert_var_shape(res)[0]` if res.shape[0] > 1: - res = fluid.layers.fill_constant( - value=2, shape=x.shape, dtype="int32") + res = fluid.layers.fill_constant(value=2, + shape=x.shape, + dtype="int32") else: - res = fluid.layers.fill_constant( - value=3, shape=x.shape, dtype="int32") + res = fluid.layers.fill_constant(value=3, + shape=x.shape, + dtype="int32") return res @@ -231,10 +233,11 @@ def dyfunc_dict_assign_shape(): # 1. Basic tests without control flow class TestTensorShapeBasic(unittest.TestCase): + def setUp(self): self.input = numpy.ones(5).astype("int32") - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self._set_input_spec() self._set_expected_op_num() self.init_test_func() @@ -262,10 +265,9 @@ class TestTensorShapeBasic(unittest.TestCase): def test_transformed_static_result(self): static_res = self.get_static_output() dygraph_res = self.get_dygraph_output() - self.assertTrue( - numpy.allclose(dygraph_res, static_res), - msg='dygraph res is {}\nstatic_res is {}'.format(dygraph_res, - static_res)) + self.assertTrue(numpy.allclose(dygraph_res, static_res), + msg='dygraph res is {}\nstatic_res is {}'.format( + dygraph_res, static_res)) def _set_expected_op_num(self): self.expected_op_num = 2 @@ -293,6 +295,7 @@ class TestTensorShapeBasic(unittest.TestCase): class TestTensorShapeBasic2(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_tensor_shape_2 @@ -303,16 +306,19 @@ class TestTensorShapeBasic2(TestTensorShapeBasic): class TestTensorShapeBasic3(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_tensor_shape_3 class TestTensorShapeBasic4(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_tensor_shape_4 class TestTensorShapeBasic5(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_tensor_shape_5 @@ -323,6 +329,7 @@ class TestTensorShapeBasic5(TestTensorShapeBasic): class TestTensorShapeBasic6(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_tensor_shape_6 @@ -333,6 +340,7 @@ class TestTensorShapeBasic6(TestTensorShapeBasic): class TestTupleShape1(TestTensorShapeBasic): + def init_test_func(self): self.input = numpy.ones((5, 7)).astype("int32") self.input_spec = [paddle.static.InputSpec(shape=[5, 7], dtype="int32")] @@ -345,6 +353,7 @@ class TestTupleShape1(TestTensorShapeBasic): class TestTupleShape2(TestTensorShapeBasic): + def init_test_func(self): self.input = numpy.ones((5, 7)).astype("int32") self.input_spec = [paddle.static.InputSpec(shape=[5, 7], dtype="int32")] @@ -357,6 +366,7 @@ class TestTupleShape2(TestTensorShapeBasic): class TestTupleShape3(TestTensorShapeBasic): + def init_test_func(self): self.input = numpy.ones((5, 7)).astype("int32") self.input_spec = [paddle.static.InputSpec(shape=[5, 7], dtype="int32")] @@ -369,6 +379,7 @@ class TestTupleShape3(TestTensorShapeBasic): class TestPaddleShapeApi(TestTensorShapeBasic): + def init_test_func(self): self.input = numpy.ones((5, 7)).astype("int32") self.input_spec = [paddle.static.InputSpec(shape=[5, 7], dtype="int32")] @@ -382,6 +393,7 @@ class TestPaddleShapeApi(TestTensorShapeBasic): # 2. Tests with control flow if class TestTensorShapeInIf1(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_with_if_1 @@ -392,6 +404,7 @@ class TestTensorShapeInIf1(TestTensorShapeBasic): class TestTensorShapeInIf2(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_with_if_2 @@ -403,6 +416,7 @@ class TestTensorShapeInIf2(TestTensorShapeBasic): # 3. Tests with control flow for loop class TestTensorShapeInFor1(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_with_for_1 @@ -413,6 +427,7 @@ class TestTensorShapeInFor1(TestTensorShapeBasic): class TestTensorShapeInFor2(TestTensorShapeInFor1): + def init_test_func(self): self.dygraph_func = dyfunc_with_for_2 @@ -423,6 +438,7 @@ class TestTensorShapeInFor2(TestTensorShapeInFor1): class TestTensorShapeInFor3(TestTensorShapeInFor1): + def init_test_func(self): self.dygraph_func = dyfunc_with_for_3 @@ -434,11 +450,13 @@ class TestTensorShapeInFor3(TestTensorShapeInFor1): # 4. Tests with control flow while loop class TestTensorShapeInWhile1(TestTensorShapeInFor1): + def init_test_func(self): self.dygraph_func = dyfunc_with_while_1 class TestTensorShapeInWhile2(TestTensorShapeInFor1): + def init_test_func(self): self.dygraph_func = dyfunc_with_while_2 @@ -449,6 +467,7 @@ class TestTensorShapeInWhile2(TestTensorShapeInFor1): class TestTensorShapeInWhile3(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_with_while_3 @@ -459,6 +478,7 @@ class TestTensorShapeInWhile3(TestTensorShapeBasic): class TestTensorShapeInWhile4(TestTensorShapeBasic): + def init_test_func(self): self.dygraph_func = dyfunc_with_while_4 @@ -470,6 +490,7 @@ class TestTensorShapeInWhile4(TestTensorShapeBasic): # 5. Test op num for negetive dim class TestOpNumBasicWithTensorShape(unittest.TestCase): + def setUp(self): self._set_input_spec() self._set_test_func() @@ -477,8 +498,7 @@ class TestOpNumBasicWithTensorShape(unittest.TestCase): def _set_input_spec(self): self.input_spec = [ - paddle.static.InputSpec( - shape=[-1, 5], dtype="int32") + paddle.static.InputSpec(shape=[-1, 5], dtype="int32") ] def _set_test_func(self): @@ -511,6 +531,7 @@ class TestOpNumBasicWithTensorShape(unittest.TestCase): class TestOpNumBasicWithTensorShape4(TestOpNumBasicWithTensorShape): + def _set_test_func(self): self.dygraph_func = dyfunc_tensor_shape_4 @@ -521,6 +542,7 @@ class TestOpNumBasicWithTensorShape4(TestOpNumBasicWithTensorShape): class TestOpNumWithTensorShapeTuple1(TestOpNumBasicWithTensorShape): + def _set_test_func(self): self.dygraph_func = dyfunc_tuple_shape_1 @@ -531,6 +553,7 @@ class TestOpNumWithTensorShapeTuple1(TestOpNumBasicWithTensorShape): class TestOpNumWithTensorShapeInIf1(TestOpNumBasicWithTensorShape): + def _set_test_func(self): self.dygraph_func = dyfunc_with_if_1 @@ -541,6 +564,7 @@ class TestOpNumWithTensorShapeInIf1(TestOpNumBasicWithTensorShape): class TestOpNumWithTensorShapeInFor1(TestOpNumBasicWithTensorShape): + def _set_test_func(self): self.dygraph_func = dyfunc_with_for_1 @@ -551,6 +575,7 @@ class TestOpNumWithTensorShapeInFor1(TestOpNumBasicWithTensorShape): class TestOpNumWithTensorShapeInWhile1(TestOpNumBasicWithTensorShape): + def _set_test_func(self): self.dygraph_func = dyfunc_with_while_1 @@ -561,6 +586,7 @@ class TestOpNumWithTensorShapeInWhile1(TestOpNumBasicWithTensorShape): class TestChangeShapeAfterAssign(TestTensorShapeBasic): + def init_test_func(self): self.input = numpy.ones((2, 3)).astype("int32") self.input_spec = [paddle.static.InputSpec(shape=[2, 3], dtype="int32")] @@ -580,13 +606,15 @@ def dyfunc_with_static_convert_var_shape(x): else: # Test for correctly to find `batch_size__static_convert_var_shape_suffix_0` in # deeply nested scope. - res = fluid.layers.fill_constant( - value=8, shape=[batch_size], dtype="int32") + res = fluid.layers.fill_constant(value=8, + shape=[batch_size], + dtype="int32") return res class TestFindStatiConvertVarShapeSuffixVar(unittest.TestCase): + def test(self): x_spec = paddle.static.InputSpec(shape=[None, 10]) func = paddle.jit.to_static(dyfunc_with_if_2, input_spec=[x_spec]) @@ -595,6 +623,7 @@ class TestFindStatiConvertVarShapeSuffixVar(unittest.TestCase): class TestPaddleShape(unittest.TestCase): + def test_paddle_shape(self): func = paddle.jit.to_static(dyfunc_len_paddle_shape) func_code = func.code.replace("\n", "").replace(" ", "") diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py index c8fe3e39329..32bd9bc5d50 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py @@ -26,8 +26,8 @@ import transformer_util as util from transformer_dygraph_model import CrossEntropyCriterion, Transformer, position_encoding_init trainer_count = 1 -place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( -) +place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() SEED = 10 STEP_NUM = 10 @@ -72,16 +72,16 @@ def train_static(args, batch_generator): # define optimizer learning_rate = fluid.layers.learning_rate_scheduler.noam_decay( args.d_model, args.warmup_steps, args.learning_rate) - optimizer = fluid.optimizer.Adam( - learning_rate=learning_rate, - beta1=args.beta1, - beta2=args.beta2, - epsilon=float(args.eps)) + optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, + beta1=args.beta1, + beta2=args.beta2, + epsilon=float(args.eps)) optimizer.minimize(avg_cost) # the best cross-entropy value with label smoothing loss_normalizer = -((1. - args.label_smooth_eps) * np.log( - (1. - args.label_smooth_eps)) + args.label_smooth_eps * np.log( - args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20)) + (1. - args.label_smooth_eps)) + args.label_smooth_eps * + np.log(args.label_smooth_eps / + (args.trg_vocab_size - 1) + 1e-20)) step_idx = 0 total_batch_num = 0 avg_loss = [] @@ -94,8 +94,8 @@ def train_static(args, batch_generator): feed=feed_dict, fetch_list=[sum_cost.name, token_num.name]) if step_idx % args.print_step == 0: - sum_cost_val, token_num_val = np.array(outs[0]), np.array(outs[ - 1]) + sum_cost_val, token_num_val = np.array(outs[0]), np.array( + outs[1]) total_sum_cost = sum_cost_val.sum() total_token_num = token_num_val.sum() total_avg_cost = total_sum_cost / total_token_num @@ -114,8 +114,8 @@ def train_static(args, batch_generator): "normalized loss: %f, ppl: %f, speed: %.2f steps/s" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, - np.exp([min(total_avg_cost, 100)]), - args.print_step / (time.time() - avg_batch_time))) + np.exp([min(total_avg_cost, 100)]), args.print_step / + (time.time() - avg_batch_time))) avg_batch_time = time.time() batch_id += 1 step_idx += 1 @@ -160,8 +160,9 @@ def train_dygraph(args, batch_generator): # the best cross-entropy value with label smoothing loss_normalizer = -( (1. - args.label_smooth_eps) * np.log( - (1. - args.label_smooth_eps)) + args.label_smooth_eps * - np.log(args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20)) + (1. - args.label_smooth_eps)) + + args.label_smooth_eps * np.log(args.label_smooth_eps / + (args.trg_vocab_size - 1) + 1e-20)) ce_time = [] ce_ppl = [] avg_loss = [] @@ -176,8 +177,8 @@ def train_dygraph(args, batch_generator): logits = transformer(src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias) - sum_cost, avg_cost, token_num = criterion(logits, lbl_word, - lbl_weight) + sum_cost, avg_cost, token_num = criterion( + logits, lbl_word, lbl_weight) avg_cost.backward() optimizer.minimize(avg_cost) transformer.clear_gradients() @@ -196,11 +197,11 @@ def train_dygraph(args, batch_generator): logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f, speed: %.2f steps/s" - % - (step_idx, pass_id, batch_id, total_avg_cost, - total_avg_cost - loss_normalizer, - np.exp([min(total_avg_cost, 100)]), - args.print_step / (time.time() - avg_batch_time))) + % (step_idx, pass_id, batch_id, total_avg_cost, + total_avg_cost - loss_normalizer, + np.exp([min(total_avg_cost, 100) + ]), args.print_step / + (time.time() - avg_batch_time))) ce_ppl.append(np.exp([min(total_avg_cost, 100)])) avg_batch_time = time.time() batch_id += 1 @@ -310,8 +311,8 @@ def predict_static(args, batch_generator): input_field = util.InputField(input_slots) feed_list = input_field.feed_list - loader = fluid.io.DataLoader.from_generator( - feed_list=feed_list, capacity=10) + loader = fluid.io.DataLoader.from_generator(feed_list=feed_list, + capacity=10) # define model transformer = Transformer( @@ -322,12 +323,11 @@ def predict_static(args, batch_generator): args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx) - out_ids, out_scores = transformer.beam_search( - *feed_list, - bos_id=args.bos_idx, - eos_id=args.eos_idx, - beam_size=args.beam_size, - max_len=args.max_out_len) + out_ids, out_scores = transformer.beam_search(*feed_list, + bos_id=args.bos_idx, + eos_id=args.eos_idx, + beam_size=args.beam_size, + max_len=args.max_out_len) # This is used here to set dropout to the test mode. test_prog = test_prog.clone(for_test=True) @@ -335,8 +335,8 @@ def predict_static(args, batch_generator): # define the executor and program for training exe = fluid.Executor(place) - util.load(test_prog, - os.path.join(args.save_static_model_path, "transformer"), exe) + util.load(test_prog, os.path.join(args.save_static_model_path, + "transformer"), exe) loader.set_batch_generator(batch_generator, places=place) @@ -372,6 +372,7 @@ def predict_static(args, batch_generator): class TestTransformer(unittest.TestCase): + def setUp(self): self.temp_dir = tempfile.TemporaryDirectory() @@ -401,14 +402,12 @@ class TestTransformer(unittest.TestCase): static_seq_ids, static_scores = predict_static(args, batch_generator) dygraph_seq_ids, dygraph_scores = predict_dygraph(args, batch_generator) - self.assertTrue( - np.allclose(static_seq_ids, static_seq_ids), - msg="static_seq_ids: {} \n dygraph_seq_ids: {}".format( - static_seq_ids, dygraph_seq_ids)) - self.assertTrue( - np.allclose(static_scores, dygraph_scores), - msg="static_scores: {} \n dygraph_scores: {}".format( - static_scores, dygraph_scores)) + self.assertTrue(np.allclose(static_seq_ids, static_seq_ids), + msg="static_seq_ids: {} \n dygraph_seq_ids: {}".format( + static_seq_ids, dygraph_seq_ids)) + self.assertTrue(np.allclose(static_scores, dygraph_scores), + msg="static_scores: {} \n dygraph_scores: {}".format( + static_scores, dygraph_scores)) def test_check_result(self): self._test_train() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py index 37fc78458dd..481858be6f4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py @@ -33,21 +33,20 @@ np.random.seed(0) def parse_args(): parser = argparse.ArgumentParser("Paddle Video train script") - parser.add_argument( - '--config', - type=str, - default='tsm.yaml', - help='path to config file of model') - parser.add_argument( - '--use_gpu', - type=bool, - default=fluid.is_compiled_with_cuda(), - help='default use gpu.') + parser.add_argument('--config', + type=str, + default='tsm.yaml', + help='path to config file of model') + parser.add_argument('--use_gpu', + type=bool, + default=fluid.is_compiled_with_cuda(), + help='default use gpu.') args = parser.parse_args(['--config', 'tsm.yaml']) return args class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -57,22 +56,20 @@ class ConvBNLayer(fluid.dygraph.Layer): act=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=None, - act=None, - param_attr=fluid.param_attr.ParamAttr(), - bias_attr=False) - - self._batch_norm = BatchNorm( - num_filters, - act=act, - param_attr=fluid.param_attr.ParamAttr(), - bias_attr=fluid.param_attr.ParamAttr()) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=None, + act=None, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=False) + + self._batch_norm = BatchNorm(num_filters, + act=act, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=fluid.param_attr.ParamAttr()) def forward(self, inputs): y = self._conv(inputs) @@ -82,6 +79,7 @@ class ConvBNLayer(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -90,29 +88,25 @@ class BottleneckBlock(fluid.dygraph.Layer): seg_num=8): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) self.shortcut = shortcut self.seg_num = seg_num self._num_channels_out = int(num_filters * 4) @@ -131,6 +125,7 @@ class BottleneckBlock(fluid.dygraph.Layer): class TSM_ResNet(fluid.dygraph.Layer): + def __init__(self, name_scope, config, mode): super(TSM_ResNet, self).__init__(name_scope) @@ -148,10 +143,15 @@ class TSM_ResNet(fluid.dygraph.Layer): raise NotImplementedError num_filters = [64, 128, 256, 512] - self.conv = ConvBNLayer( - num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') - self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool2d_max = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] num_channels = 64 @@ -161,17 +161,17 @@ class TSM_ResNet(fluid.dygraph.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - seg_num=self.seg_num)) + BottleneckBlock(num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + seg_num=self.seg_num)) num_channels = int(bottleneck_block._num_channels_out) self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) import math stdv = 1.0 / math.sqrt(2048 * 1.0) @@ -202,6 +202,7 @@ class TSM_ResNet(fluid.dygraph.Layer): class FakeDataReader(object): + def __init__(self, mode, cfg): self.format = cfg.MODEL.format self.num_classes = cfg.MODEL.num_classes @@ -211,8 +212,8 @@ class FakeDataReader(object): self.target_size = cfg[mode.upper()]['target_size'] self.img_mean = np.array(cfg.MODEL.image_mean).reshape( [3, 1, 1]).astype(np.float32) - self.img_std = np.array(cfg.MODEL.image_std).reshape( - [3, 1, 1]).astype(np.float32) + self.img_std = np.array(cfg.MODEL.image_std).reshape([3, 1, 1]).astype( + np.float32) self.batch_size = 1 if sys.platform == 'darwin' or os.name == 'nt' else cfg[ mode.upper()]['batch_size'] @@ -232,6 +233,7 @@ class FakeDataReader(object): self.generator_out.append(batch_out) def create_reader(self): + def batch_reader(): for i in range(self.total_iter): yield self.generator_out[i] @@ -251,8 +253,7 @@ def create_optimizer(cfg, params): momentum = cfg.momentum optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=momentum, regularization=fluid.regularizer.L2Decay(l2_weight_decay), parameter_list=params) @@ -299,13 +300,16 @@ def train(args, fake_data_reader, to_static): labels = to_variable(y_data) labels.stop_gradient = True outputs = video_model(imgs) - loss = fluid.layers.cross_entropy( - input=outputs, label=labels, ignore_index=-1) + loss = fluid.layers.cross_entropy(input=outputs, + label=labels, + ignore_index=-1) avg_loss = fluid.layers.mean(loss) - acc_top1 = fluid.layers.accuracy( - input=outputs, label=labels, k=1) - acc_top5 = fluid.layers.accuracy( - input=outputs, label=labels, k=5) + acc_top1 = fluid.layers.accuracy(input=outputs, + label=labels, + k=1) + acc_top5 = fluid.layers.accuracy(input=outputs, + label=labels, + k=5) avg_loss.backward() optimizer.minimize(avg_loss) @@ -319,20 +323,23 @@ def train(args, fake_data_reader, to_static): print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'. format(epoch, batch_id, avg_loss.numpy()[0], - acc_top1.numpy()[0], acc_top5.numpy()[0])) + acc_top1.numpy()[0], + acc_top5.numpy()[0])) ret.extend([ - avg_loss.numpy()[0], acc_top1.numpy()[0], + avg_loss.numpy()[0], + acc_top1.numpy()[0], acc_top5.numpy()[0] ]) print( 'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'. - format(epoch, total_loss / total_sample, total_acc1 / - total_sample, total_acc5 / total_sample)) + format(epoch, total_loss / total_sample, + total_acc1 / total_sample, total_acc5 / total_sample)) return ret class TestTsm(unittest.TestCase): + def test_dygraph_static_same_loss(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) @@ -340,10 +347,9 @@ class TestTsm(unittest.TestCase): fake_data_reader = FakeDataReader("train", parse_config(args.config)) dygraph_loss = train(args, fake_data_reader, to_static=False) static_loss = train(args, fake_data_reader, to_static=True) - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg="dygraph_loss: {} \nstatic_loss: {}".format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg="dygraph_loss: {} \nstatic_loss: {}".format( + dygraph_loss, static_loss)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py index 7017cdda9cd..66b154ee30a 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from typing import Tuple, List, Dict, TypeVar class BaseLayer(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(BaseLayer, self).__init__() self._linear = paddle.nn.Linear(in_size, out_size) @@ -31,6 +32,7 @@ class BaseLayer(paddle.nn.Layer): class LinearNetWithTuple(BaseLayer): + def __init__(self, in_size, out_size): super(LinearNetWithTuple, self).__init__(in_size, out_size) @@ -40,6 +42,7 @@ class LinearNetWithTuple(BaseLayer): class LinearNetWithTuple2(BaseLayer): + def __init__(self, in_size, out_size): super(LinearNetWithTuple2, self).__init__(in_size, out_size) @@ -49,6 +52,7 @@ class LinearNetWithTuple2(BaseLayer): class LinearNetWithList(BaseLayer): + def __init__(self, in_size, out_size): super(LinearNetWithList, self).__init__(in_size, out_size) @@ -58,6 +62,7 @@ class LinearNetWithList(BaseLayer): class LinearNetWithDict(BaseLayer): + def __init__(self, in_size, out_size): super(LinearNetWithDict, self).__init__(in_size, out_size) @@ -67,6 +72,7 @@ class LinearNetWithDict(BaseLayer): class TestTyping(unittest.TestCase): + def setUp(self): self.in_num = 16 self.out_num = 16 @@ -99,6 +105,7 @@ class TestTyping(unittest.TestCase): class TestTypingTuple(TestTyping): + def build_net(self): return LinearNetWithTuple2(self.in_num, self.out_num) @@ -109,6 +116,7 @@ class TestTypingTuple(TestTyping): class TestTypingList(TestTyping): + def build_net(self): return LinearNetWithList(self.in_num, self.out_num) @@ -118,6 +126,7 @@ class TestTypingList(TestTyping): class TestTypingDict(TestTyping): + def build_net(self): return LinearNetWithDict(self.in_num, self.out_num) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py index 747e9f1c0db..6f4fe613db7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py @@ -25,6 +25,7 @@ from test_program_translator import get_source_code class TestIndexInList(unittest.TestCase): + def test_index_in_list(self): list_to_test = [1, 2, 3, 4, 5] self.assertEqual(index_in_list(list_to_test, 4), 3) @@ -42,6 +43,7 @@ def dyfunc_assign(input): class StaticCode(): + def dyfunc_assign(input): b = 1 a = b @@ -56,6 +58,7 @@ class StaticCode(): class TestSplitAssignTransformer(unittest.TestCase): + def test_code(self): answer = get_source_code(StaticCode.dyfunc_assign) program_translator = ProgramTranslator() @@ -64,6 +67,7 @@ class TestSplitAssignTransformer(unittest.TestCase): class TestIsPaddle(unittest.TestCase): + def fake_module(self): return types.ModuleType('paddlenlp') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py index 8500f46d974..377353c0ab6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py @@ -26,6 +26,7 @@ from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import data_laye class TestDataLayerNotCheck(unittest.TestCase): + def test_create_none_shape(self): main_program = fluid.Program() with fluid.program_guard(main_program): @@ -38,8 +39,8 @@ class TestDataLayerNotCheck(unittest.TestCase): with fluid.program_guard(main_program): d = data_layer_not_check(name="d", shape=(1, 2, 3)) feed_in_data = np.random.uniform(size=[1, 2, 4]).astype(np.float32) - place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, feed={d.name: feed_in_data}, @@ -48,6 +49,7 @@ class TestDataLayerNotCheck(unittest.TestCase): class TestVariableTransFunc(unittest.TestCase): + def test_create_fill_constant_node(self): node = create_fill_constant_node("a", 1.0) source = "a = paddle.fluid.layers.fill_constant(shape=[1], dtype='float64', value=1.0, name='a')" diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py index f270c5672af..f510e2dca6f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py @@ -61,8 +61,9 @@ def build_dict(corpus, min_freq=3): word_freq_dict[word] = 0 word_freq_dict[word] += 1 - word_freq_dict = sorted( - word_freq_dict.items(), key=lambda x: x[1], reverse=True) + word_freq_dict = sorted(word_freq_dict.items(), + key=lambda x: x[1], + reverse=True) word2id_dict = dict() word2id_freq = dict() @@ -109,9 +110,10 @@ corpus = convert_corpus_to_id(corpus, word2id_dict) def subsampling(corpus, word2id_freq): + def keep(word_id): - return random.uniform(0, 1) < math.sqrt(1e-4 / word2id_freq[word_id] * - len(corpus)) + return random.uniform(0, 1) < math.sqrt( + 1e-4 / word2id_freq[word_id] * len(corpus)) new_corpus = [] for line in corpus: @@ -136,12 +138,13 @@ def build_data(corpus, window_size = random.randint(1, max_window_size) center_word = line[center_word_idx] - positive_word_range = (max(0, center_word_idx - window_size), min( - len(line) - 1, center_word_idx + window_size)) + positive_word_range = (max(0, center_word_idx - window_size), + min( + len(line) - 1, + center_word_idx + window_size)) positive_word_candidates = [ - line[idx] - for idx in range(positive_word_range[0], positive_word_range[1] - + 1) + line[idx] for idx in range(positive_word_range[0], + positive_word_range[1] + 1) if idx != center_word_idx and line[idx] != line[center_word_idx] ] @@ -203,6 +206,7 @@ def build_batch(dataset, batch_size, epoch_num): class SkipGram(fluid.dygraph.Layer): + def __init__(self, name_scope, vocab_size, embedding_size, init_scale=0.1): super(SkipGram, self).__init__(name_scope) self.vocab_size = vocab_size @@ -259,8 +263,8 @@ def train(to_static): random.seed(0) np.random.seed(0) - place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 @@ -293,13 +297,13 @@ def train(to_static): class TestWord2Vec(unittest.TestCase): + def test_dygraph_static_same_loss(self): dygraph_loss = train(to_static=False) static_loss = train(to_static=True) - self.assertTrue( - np.allclose(dygraph_loss, static_loss), - msg="dygraph_loss: {} \nstatic_loss: {}".format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, static_loss), + msg="dygraph_loss: {} \nstatic_loss: {}".format( + dygraph_loss, static_loss)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py index 9326af2952e..ef074447893 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py @@ -47,6 +47,7 @@ class SmoothedValue(object): class FakeDataReader(object): + def __init__(self): self.generator_out = [] self.total_iter = cfg.max_iter @@ -58,13 +59,15 @@ class FakeDataReader(object): point1 = cfg.input_size / 4 point2 = cfg.input_size / 2 gt_boxes = np.array([[point1, point1, point2, point2]]) - gt_labels = np.random.randint( - low=0, high=cfg.class_num, size=[1]) + gt_labels = np.random.randint(low=0, + high=cfg.class_num, + size=[1]) gt_scores = np.zeros([1]) batch_out.append([img, gt_boxes, gt_labels, gt_scores]) self.generator_out.append(batch_out) def reader(self): + def generator(): for i in range(self.total_iter): yield self.generator_out[i] @@ -94,14 +97,16 @@ def train(to_static): learning_rate = cfg.learning_rate values = [learning_rate * (gamma**i) for i in range(step_num + 1)] - lr = fluid.dygraph.PiecewiseDecay( - boundaries=boundaries, values=values, begin=0) + lr = fluid.dygraph.PiecewiseDecay(boundaries=boundaries, + values=values, + begin=0) lr = fluid.layers.linear_lr_warmup( learning_rate=lr, warmup_steps=cfg.warm_up_iter, start_lr=0.0, - end_lr=cfg.learning_rate, ) + end_lr=cfg.learning_rate, + ) optimizer = fluid.optimizer.Momentum( learning_rate=lr, @@ -146,8 +151,8 @@ def train(to_static): total_sample += 1 print("Iter {:d}, loss {:.6f}, time {:.5f}".format( - iter_id, - smoothed_loss.get_mean_value(), start_time - prev_start_time)) + iter_id, smoothed_loss.get_mean_value(), + start_time - prev_start_time)) ret.append(smoothed_loss.get_mean_value()) loss.backward() @@ -159,14 +164,16 @@ def train(to_static): class TestYolov3(unittest.TestCase): + def test_dygraph_static_same_loss(self): dygraph_loss = train(to_static=False) static_loss = train(to_static=True) - self.assertTrue( - np.allclose( - dygraph_loss, static_loss, atol=1e-5, rtol=1e-3), - msg="dygraph_loss: {} \nstatic_loss: {}".format(dygraph_loss, - static_loss)) + self.assertTrue(np.allclose(dygraph_loss, + static_loss, + atol=1e-5, + rtol=1e-3), + msg="dygraph_loss: {} \nstatic_loss: {}".format( + dygraph_loss, static_loss)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 07e9b1ac62e..ab52d518fe7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -32,10 +32,10 @@ def position_encoding_init(n_position, d_pos_vec): num_timescales = channels // 2 log_timescale_increment = (np.log(float(1e4) / float(1)) / (num_timescales - 1)) - inv_timescales = np.exp(np.arange( - num_timescales)) * -log_timescale_increment - scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, - 0) + inv_timescales = np.exp( + np.arange(num_timescales)) * -log_timescale_increment + scaled_time = np.expand_dims(position, 1) * np.expand_dims( + inv_timescales, 0) signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) signal = np.pad(signal, [[0, 0], [0, np.mod(channels, 2)]], 'constant') position_enc = signal @@ -43,6 +43,7 @@ def position_encoding_init(n_position, d_pos_vec): class PrePostProcessLayer(Layer): + def __init__(self, process_cmd, d_model, dropout_rate): super(PrePostProcessLayer, self).__init__() self.process_cmd = process_cmd @@ -53,8 +54,8 @@ class PrePostProcessLayer(Layer): elif cmd == "n": # add layer normalization self.functors.append( self.add_sublayer( - "layer_norm_%d" % len( - [layer for layer in self.children()]), + "layer_norm_%d" % + len([layer for layer in self.children()]), LayerNorm( normalized_shape=d_model, param_attr=fluid.ParamAttr( @@ -63,8 +64,8 @@ class PrePostProcessLayer(Layer): initializer=fluid.initializer.Constant(0.))))) elif cmd == "d": # add dropout if dropout_rate: - self.functors.append(lambda x: layers.dropout( - x, dropout_prob=dropout_rate)) + self.functors.append( + lambda x: layers.dropout(x, dropout_prob=dropout_rate)) def forward(self, x, residual=None): for i, cmd in enumerate(self.process_cmd): @@ -76,6 +77,7 @@ class PrePostProcessLayer(Layer): class MultiHeadAttention(Layer): + def __init__(self, d_key, d_value, @@ -131,8 +133,10 @@ class MultiHeadAttention(Layer): v = layers.concat([cache_v, v], axis=2) cache["k"], cache["v"] = k, v # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.d_model**-0.5) if attn_bias is not None: product += attn_bias weights = layers.softmax(product) @@ -146,6 +150,7 @@ class MultiHeadAttention(Layer): class FFN(Layer): + def __init__(self, d_inner_hid, d_model, dropout_rate): super(FFN, self).__init__() self.dropout_rate = dropout_rate @@ -161,6 +166,7 @@ class FFN(Layer): class EncoderLayer(Layer): + def __init__(self, n_head, d_key, @@ -189,8 +195,8 @@ class EncoderLayer(Layer): prepostprocess_dropout) def forward(self, enc_input, attn_bias): - attn_output = self.self_attn( - self.preprocesser1(enc_input), None, None, attn_bias) + attn_output = self.self_attn(self.preprocesser1(enc_input), None, None, + attn_bias) attn_output = self.postprocesser1(attn_output, enc_input) ffn_output = self.ffn(self.preprocesser2(attn_output)) ffn_output = self.postprocesser2(ffn_output, attn_output) @@ -198,6 +204,7 @@ class EncoderLayer(Layer): class Encoder(Layer): + def __init__(self, n_layer, n_head, @@ -234,6 +241,7 @@ class Encoder(Layer): class Embedder(Layer): + def __init__(self, vocab_size, emb_dim, bos_idx=0): super(Embedder, self).__init__() self.word_embedder = Embedding( @@ -248,6 +256,7 @@ class Embedder(Layer): class WrapEncoder(Layer): + def __init__(self, src_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, @@ -275,12 +284,14 @@ class WrapEncoder(Layer): emb = word_emb + pos_enc enc_input = layers.dropout( emb, - dropout_prob=self.emb_dropout, ) if self.emb_dropout else emb + dropout_prob=self.emb_dropout, + ) if self.emb_dropout else emb enc_output = self.encoder(enc_input, src_slf_attn_bias) return enc_output class DecoderLayer(Layer): + def __init__(self, n_head, d_key, @@ -318,8 +329,8 @@ class DecoderLayer(Layer): self_attn_bias, cross_attn_bias, cache=None): - self_attn_output = self.self_attn( - self.preprocesser1(dec_input), None, None, self_attn_bias, cache) + self_attn_output = self.self_attn(self.preprocesser1(dec_input), None, + None, self_attn_bias, cache) self_attn_output = self.postprocesser1(self_attn_output, dec_input) cross_attn_output = self.cross_attn( self.preprocesser2(self_attn_output), enc_output, enc_output, @@ -332,6 +343,7 @@ class DecoderLayer(Layer): class Decoder(Layer): + def __init__(self, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd): @@ -357,13 +369,14 @@ class Decoder(Layer): caches=None): for i, decoder_layer in enumerate(self.decoder_layers): dec_output = decoder_layer(dec_input, enc_output, self_attn_bias, - cross_attn_bias, None - if caches is None else caches[i]) + cross_attn_bias, + None if caches is None else caches[i]) dec_input = dec_output return self.processer(dec_output) class WrapDecoder(Layer): + def __init__(self, trg_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, @@ -389,8 +402,9 @@ class WrapDecoder(Layer): word_embedder.weight, transpose_y=True) else: - self.linear = Linear( - input_dim=d_model, output_dim=trg_vocab_size, bias_attr=False) + self.linear = Linear(input_dim=d_model, + output_dim=trg_vocab_size, + bias_attr=False) def forward(self, trg_word, @@ -406,26 +420,28 @@ class WrapDecoder(Layer): emb = word_emb + pos_enc dec_input = layers.dropout( emb, - dropout_prob=self.emb_dropout, ) if self.emb_dropout else emb + dropout_prob=self.emb_dropout, + ) if self.emb_dropout else emb dec_output = self.decoder(dec_input, enc_output, trg_slf_attn_bias, trg_src_attn_bias, caches) dec_output = layers.reshape( dec_output, - shape=[-1, dec_output.shape[-1]], ) + shape=[-1, dec_output.shape[-1]], + ) logits = self.linear(dec_output) return logits class CrossEntropyCriterion(object): + def __init__(self, label_smooth_eps): self.label_smooth_eps = label_smooth_eps def __call__(self, predict, label, weights): if self.label_smooth_eps: - label_out = layers.label_smooth( - label=layers.one_hot( - input=label, depth=predict.shape[-1]), - epsilon=self.label_smooth_eps) + label_out = layers.label_smooth(label=layers.one_hot( + input=label, depth=predict.shape[-1]), + epsilon=self.label_smooth_eps) cost = layers.softmax_with_cross_entropy( logits=predict, @@ -440,6 +456,7 @@ class CrossEntropyCriterion(object): class Transformer(Layer): + def __init__(self, src_vocab_size, trg_vocab_size, @@ -459,25 +476,29 @@ class Transformer(Layer): bos_id=0, eos_id=1): super(Transformer, self).__init__() - src_word_embedder = Embedder( - vocab_size=src_vocab_size, emb_dim=d_model, bos_idx=bos_id) - self.encoder = WrapEncoder( - src_vocab_size, max_length, n_layer, n_head, d_key, d_value, - d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, - relu_dropout, preprocess_cmd, postprocess_cmd, src_word_embedder) + src_word_embedder = Embedder(vocab_size=src_vocab_size, + emb_dim=d_model, + bos_idx=bos_id) + self.encoder = WrapEncoder(src_vocab_size, max_length, n_layer, n_head, + d_key, d_value, d_model, d_inner_hid, + prepostprocess_dropout, attention_dropout, + relu_dropout, preprocess_cmd, + postprocess_cmd, src_word_embedder) if weight_sharing: assert src_vocab_size == trg_vocab_size, ( "Vocabularies in source and target should be same for weight sharing." ) trg_word_embedder = src_word_embedder else: - trg_word_embedder = Embedder( - vocab_size=trg_vocab_size, emb_dim=d_model, bos_idx=bos_id) - self.decoder = WrapDecoder( - trg_vocab_size, max_length, n_layer, n_head, d_key, d_value, - d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, - relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, - trg_word_embedder) + trg_word_embedder = Embedder(vocab_size=trg_vocab_size, + emb_dim=d_model, + bos_idx=bos_id) + self.decoder = WrapDecoder(trg_vocab_size, max_length, n_layer, n_head, + d_key, d_value, d_model, d_inner_hid, + prepostprocess_dropout, attention_dropout, + relu_dropout, preprocess_cmd, + postprocess_cmd, weight_sharing, + trg_word_embedder) self.trg_vocab_size = trg_vocab_size self.n_layer = n_layer @@ -504,9 +525,10 @@ class Transformer(Layer): eos_id=1, beam_size=4, max_len=256): + def expand_to_beam_size(tensor, beam_size): - tensor = layers.reshape( - tensor, [tensor.shape[0], 1] + list(tensor.shape[1:])) + tensor = layers.reshape(tensor, [tensor.shape[0], 1] + + list(tensor.shape[1:])) tile_dims = [1] * len(tensor.shape) tile_dims[1] = beam_size return layers.expand(tensor, tile_dims) @@ -518,9 +540,9 @@ class Transformer(Layer): list(range(var_dim_in_state, len(tensor.shape))) + list(range(0, var_dim_in_state))) - tensor = layers.reshape(tensor, - [0] * (len(tensor.shape) - var_dim_in_state - ) + [batch_size * beam_size]) + tensor = layers.reshape(tensor, [0] * + (len(tensor.shape) - var_dim_in_state) + + [batch_size * beam_size]) res = layers.transpose( tensor, list( @@ -535,9 +557,9 @@ class Transformer(Layer): tensor, list(range(var_dim_in_state, len(tensor.shape))) + list(range(0, var_dim_in_state))) - tensor = layers.reshape(tensor, - [0] * (len(tensor.shape) - var_dim_in_state - ) + [batch_size, beam_size]) + tensor = layers.reshape(tensor, [0] * + (len(tensor.shape) - var_dim_in_state) + + [batch_size, beam_size]) res = layers.transpose( tensor, list( @@ -548,13 +570,11 @@ class Transformer(Layer): def mask_probs(probs, finished, noend_mask_tensor): finished = layers.cast(finished, dtype=probs.dtype) - probs = layers.elementwise_mul( - layers.expand( - layers.unsqueeze(finished, [2]), - [1, 1, self.trg_vocab_size]), - noend_mask_tensor, - axis=-1) - layers.elementwise_mul( - probs, (finished - 1), axis=0) + probs = layers.elementwise_mul(layers.expand( + layers.unsqueeze(finished, [2]), [1, 1, self.trg_vocab_size]), + noend_mask_tensor, + axis=-1) - layers.elementwise_mul( + probs, (finished - 1), axis=0) return probs def gather(input, indices, batch_pos): @@ -568,32 +588,31 @@ class Transformer(Layer): # constant number inf = float(1. * 1e7) max_len = (enc_output.shape[1] + 20) if max_len is None else max_len - vocab_size_tensor = layers.fill_constant( - shape=[1], dtype="int64", value=self.trg_vocab_size) + vocab_size_tensor = layers.fill_constant(shape=[1], + dtype="int64", + value=self.trg_vocab_size) end_token_tensor = to_variable( - np.full( - [batch_size, beam_size], eos_id, dtype="int64")) + np.full([batch_size, beam_size], eos_id, dtype="int64")) noend_array = [-inf] * self.trg_vocab_size noend_array[eos_id] = 0 noend_mask_tensor = to_variable(np.array(noend_array, dtype="float32")) batch_pos = layers.expand( layers.unsqueeze( - to_variable(np.arange( - 0, batch_size, 1, dtype="int64")), [1]), [1, beam_size]) + to_variable(np.arange(0, batch_size, 1, dtype="int64")), [1]), + [1, beam_size]) predict_ids = [] parent_ids = [] ### initialize states of beam search ### log_probs = to_variable( - np.array( - [[0.] + [-inf] * (beam_size - 1)] * batch_size, - dtype="float32")) + np.array([[0.] + [-inf] * (beam_size - 1)] * batch_size, + dtype="float32")) - finished = to_variable( - np.full( - [batch_size, beam_size], 0, dtype="bool")) + finished = to_variable(np.full([batch_size, beam_size], 0, + dtype="bool")) - trg_word = layers.fill_constant( - shape=[batch_size * beam_size, 1], dtype="int64", value=bos_id) + trg_word = layers.fill_constant(shape=[batch_size * beam_size, 1], + dtype="int64", + value=bos_id) trg_src_attn_bias = merge_batch_beams( expand_to_beam_size(trg_src_attn_bias, beam_size)) @@ -602,19 +621,22 @@ class Transformer(Layer): # init states (caches) for transformer, need to be updated according to selected beam caches = [{ - "k": layers.fill_constant( + "k": + layers.fill_constant( shape=[batch_size, beam_size, self.n_head, 0, self.d_key], dtype=enc_output.dtype, value=0), - "v": layers.fill_constant( + "v": + layers.fill_constant( shape=[batch_size, beam_size, self.n_head, 0, self.d_value], dtype=enc_output.dtype, value=0), } for i in range(self.n_layer)] for i in range(max_len): - trg_pos = layers.fill_constant( - shape=trg_word.shape, dtype="int64", value=i) + trg_pos = layers.fill_constant(shape=trg_word.shape, + dtype="int64", + value=i) caches = map_structure(merge_batch_beams, caches) # TODO: modified for dygraph2static logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias, @@ -625,17 +647,18 @@ class Transformer(Layer): step_log_probs = mask_probs(step_log_probs, finished, noend_mask_tensor) - log_probs = layers.elementwise_add( - x=step_log_probs, y=log_probs, axis=0) + log_probs = layers.elementwise_add(x=step_log_probs, + y=log_probs, + axis=0) log_probs = layers.reshape(log_probs, [-1, beam_size * self.trg_vocab_size]) scores = log_probs - topk_scores, topk_indices = fluid.layers.topk( - input=scores, k=beam_size) - beam_indices = fluid.layers.elementwise_floordiv(topk_indices, - vocab_size_tensor) - token_indices = fluid.layers.elementwise_mod(topk_indices, - vocab_size_tensor) + topk_scores, topk_indices = fluid.layers.topk(input=scores, + k=beam_size) + beam_indices = fluid.layers.elementwise_floordiv( + topk_indices, vocab_size_tensor) + token_indices = fluid.layers.elementwise_mod( + topk_indices, vocab_size_tensor) # update states caches = map_structure(lambda x: gather(x, beam_indices, batch_pos), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py index e264a300d8c..bf06fb12bdd 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py @@ -32,14 +32,15 @@ def get_input_descs(args, mode="train"): input_descs_train = { "src_word": [(batch_size, seq_len), "int64", 2], "src_pos": [(batch_size, seq_len), "int64"], - "src_slf_attn_bias": - [(batch_size, n_head, seq_len, seq_len), "float32"], + "src_slf_attn_bias": [(batch_size, n_head, seq_len, seq_len), + "float32"], "trg_word": [(batch_size, seq_len), "int64", 2], "trg_pos": [(batch_size, seq_len), "int64"], - "trg_slf_attn_bias": - [(batch_size, n_head, seq_len, seq_len), "float32"], - "trg_src_attn_bias": [(batch_size, n_head, seq_len, seq_len), "float32" - ], # TODO: 1 for predict, seq_len for train + "trg_slf_attn_bias": [(batch_size, n_head, seq_len, seq_len), + "float32"], + "trg_src_attn_bias": + [(batch_size, n_head, seq_len, seq_len), + "float32"], # TODO: 1 for predict, seq_len for train "enc_output": [(batch_size, seq_len, d_model), "float32"], "lbl_word": [(None, 1), "int64"], "lbl_weight": [(None, 1), "float32"], @@ -49,12 +50,12 @@ def get_input_descs(args, mode="train"): input_descs_predict = { "src_word": [(batch_size, seq_len), "int64", 2], "src_pos": [(batch_size, seq_len), "int64"], - "src_slf_attn_bias": - [(batch_size, n_head, seq_len, seq_len), "float32"], + "src_slf_attn_bias": [(batch_size, n_head, seq_len, seq_len), + "float32"], "trg_word": [(batch_size, seq_len), "int64", 2], "trg_pos": [(batch_size, seq_len), "int64"], - "trg_slf_attn_bias": - [(batch_size, n_head, seq_len, seq_len), "float32"], + "trg_slf_attn_bias": [(batch_size, n_head, seq_len, seq_len), + "float32"], "trg_src_attn_bias": [(batch_size, n_head, 1, seq_len), "float32"], "enc_output": [(batch_size, seq_len, d_model), "float32"], "lbl_word": [(None, 1), "int64"], @@ -69,19 +70,23 @@ def get_input_descs(args, mode="train"): encoder_data_input_fields = ( "src_word", "src_pos", - "src_slf_attn_bias", ) + "src_slf_attn_bias", +) decoder_data_input_fields = ( "trg_word", "trg_pos", "trg_slf_attn_bias", "trg_src_attn_bias", - "enc_output", ) + "enc_output", +) label_data_input_fields = ( "lbl_word", - "lbl_weight", ) + "lbl_weight", +) fast_decoder_data_input_fields = ( "trg_word", - "trg_src_attn_bias", ) + "trg_src_attn_bias", +) class ModelHyperParams(object): @@ -220,19 +225,20 @@ def prepare_infer_input(insts, src_pad_idx, bos_idx, n_head): def get_feed_data_reader(args, mode='train'): + def __for_train__(): - train_reader = paddle.batch( - wmt16.train(args.src_vocab_size, args.trg_vocab_size), - batch_size=args.batch_size) + train_reader = paddle.batch(wmt16.train(args.src_vocab_size, + args.trg_vocab_size), + batch_size=args.batch_size) for batch in train_reader(): tensors = prepare_train_input(batch, args.eos_idx, args.eos_idx, args.n_head) yield tensors def __for_test__(): - test_reader = paddle.batch( - wmt16.test(args.src_vocab_size, args.trg_vocab_size), - batch_size=args.batch_size) + test_reader = paddle.batch(wmt16.test(args.src_vocab_size, + args.trg_vocab_size), + batch_size=args.batch_size) for batch in test_reader(): tensors = prepare_infer_input(batch, args.eos_idx, args.eos_idx, args.n_head) @@ -242,16 +248,16 @@ def get_feed_data_reader(args, mode='train'): class InputField(object): + def __init__(self, input_slots): self.feed_list = [] for slot in input_slots: self.feed_list.append( - fluid.layers.data( - name=slot['name'], - shape=slot['shape'], - dtype=slot['dtype'], - lod_level=slot.get('lod_level', 0), - append_batch_size=False)) + fluid.layers.data(name=slot['name'], + shape=slot['shape'], + dtype=slot['dtype'], + lod_level=slot.get('lod_level', 0), + append_batch_size=False)) def load(program, model_path, executor=None, var_list=None): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py index 4fedd1b246b..0b37e94b3a2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py @@ -14,6 +14,7 @@ import yaml import logging + logger = logging.getLogger(__name__) CONFIG_SECS = [ @@ -25,6 +26,7 @@ CONFIG_SECS = [ class AttrDict(dict): + def __getattr__(self, key): return self[key] @@ -76,8 +78,8 @@ def merge_configs(cfg, sec, args_dict): def print_configs(cfg, mode): - logger.info("---------------- {:>5} Arguments ----------------".format( - mode)) + logger.info( + "---------------- {:>5} Arguments ----------------".format(mode)) for sec, sec_items in cfg.items(): logger.info("{}:".format(sec)) for k, v in sec_items.items(): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py index bb95bdf9fc6..f1552869a2d 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py @@ -32,6 +32,7 @@ from darknet import ConvBNLayer class AttrDict(dict): + def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) @@ -100,7 +101,7 @@ cfg.learning_rate = 0.001 cfg.max_iter = 20 if fluid.is_compiled_with_cuda() else 1 # Disable mixup in last N iter cfg.no_mixup_iter = 10 if fluid.is_compiled_with_cuda() else 1 -# warm up to learning rate +# warm up to learning rate cfg.warm_up_iter = 10 if fluid.is_compiled_with_cuda() else 1 cfg.warm_up_factor = 0. # lr steps_with_decay @@ -120,54 +121,49 @@ cfg.class_num = 80 class YoloDetectionBlock(fluid.dygraph.Layer): + def __init__(self, ch_in, channel, is_test=True): super(YoloDetectionBlock, self).__init__() assert channel % 2 == 0, \ "channel {} cannot be divided by 2".format(channel) - self.conv0 = ConvBNLayer( - ch_in=ch_in, - ch_out=channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test) - self.conv1 = ConvBNLayer( - ch_in=channel, - ch_out=channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test) - self.conv2 = ConvBNLayer( - ch_in=channel * 2, - ch_out=channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test) - self.conv3 = ConvBNLayer( - ch_in=channel, - ch_out=channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test) - self.route = ConvBNLayer( - ch_in=channel * 2, - ch_out=channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test) - self.tip = ConvBNLayer( - ch_in=channel, - ch_out=channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test) + self.conv0 = ConvBNLayer(ch_in=ch_in, + ch_out=channel, + filter_size=1, + stride=1, + padding=0, + is_test=is_test) + self.conv1 = ConvBNLayer(ch_in=channel, + ch_out=channel * 2, + filter_size=3, + stride=1, + padding=1, + is_test=is_test) + self.conv2 = ConvBNLayer(ch_in=channel * 2, + ch_out=channel, + filter_size=1, + stride=1, + padding=0, + is_test=is_test) + self.conv3 = ConvBNLayer(ch_in=channel, + ch_out=channel * 2, + filter_size=3, + stride=1, + padding=1, + is_test=is_test) + self.route = ConvBNLayer(ch_in=channel * 2, + ch_out=channel, + filter_size=1, + stride=1, + padding=0, + is_test=is_test) + self.tip = ConvBNLayer(ch_in=channel, + ch_out=channel * 2, + filter_size=3, + stride=1, + padding=1, + is_test=is_test) def forward(self, inputs): out = self.conv0(inputs) @@ -180,6 +176,7 @@ class YoloDetectionBlock(fluid.dygraph.Layer): class Upsample(fluid.dygraph.Layer): + def __init__(self, scale=2): super(Upsample, self).__init__() self.scale = scale @@ -187,20 +184,24 @@ class Upsample(fluid.dygraph.Layer): def forward(self, inputs): # get dynamic upsample output shape shape_nchw = fluid.layers.shape(inputs) - shape_hw = fluid.layers.slice( - shape_nchw, axes=[0], starts=[2], ends=[4]) + shape_hw = fluid.layers.slice(shape_nchw, + axes=[0], + starts=[2], + ends=[4]) shape_hw.stop_gradient = True in_shape = fluid.layers.cast(shape_hw, dtype='int32') out_shape = in_shape * self.scale out_shape.stop_gradient = True # reisze by actual_shape - out = fluid.layers.resize_nearest( - input=inputs, scale=self.scale, actual_shape=out_shape) + out = fluid.layers.resize_nearest(input=inputs, + scale=self.scale, + actual_shape=out_shape) return out class YOLOv3(fluid.dygraph.Layer): + def __init__(self, ch_in, is_train=True, use_random=False): super(YOLOv3, self).__init__() @@ -215,39 +216,36 @@ class YOLOv3(fluid.dygraph.Layer): for i in range(3): yolo_block = self.add_sublayer( "yolo_detecton_block_%d" % (i), - YoloDetectionBlock( - ch_in_list[i], - channel=512 // (2**i), - is_test=not self.is_train)) + YoloDetectionBlock(ch_in_list[i], + channel=512 // (2**i), + is_test=not self.is_train)) self.yolo_blocks.append(yolo_block) num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5) block_out = self.add_sublayer( "block_out_%d" % (i), - Conv2D( - num_channels=1024 // (2**i), - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0., 0.02)), - bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), - regularizer=L2Decay(0.)))) + Conv2D(num_channels=1024 // (2**i), + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02)), + bias_attr=ParamAttr( + initializer=fluid.initializer.Constant(0.0), + regularizer=L2Decay(0.)))) self.block_outputs.append(block_out) if i < 2: route = self.add_sublayer( "route2_%d" % i, - ConvBNLayer( - ch_in=512 // (2**i), - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not self.is_train))) + ConvBNLayer(ch_in=512 // (2**i), + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not self.is_train))) self.route_blocks_2.append(route) self.upsample = Upsample() @@ -313,8 +311,7 @@ class YOLOv3(fluid.dygraph.Layer): name="yolo_box" + str(i)) self.boxes.append(boxes) self.scores.append( - fluid.layers.transpose( - scores, perm=[0, 2, 1])) + fluid.layers.transpose(scores, perm=[0, 2, 1])) self.downsample //= 2 if not self.is_train: @@ -322,14 +319,13 @@ class YOLOv3(fluid.dygraph.Layer): yolo_boxes = fluid.layers.concat(self.boxes, axis=1) yolo_scores = fluid.layers.concat(self.scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=cfg.valid_thresh, - nms_top_k=cfg.nms_topk, - keep_top_k=cfg.nms_posk, - nms_threshold=cfg.nms_thresh, - background_label=-1) + pred = fluid.layers.multiclass_nms(bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=cfg.valid_thresh, + nms_top_k=cfg.nms_topk, + keep_top_k=cfg.nms_posk, + nms_threshold=cfg.nms_thresh, + background_label=-1) return pred else: return sum(self.losses) diff --git a/python/paddle/fluid/tests/unittests/elastic_demo.py b/python/paddle/fluid/tests/unittests/elastic_demo.py index c5177c0f529..af26abd0d88 100644 --- a/python/paddle/fluid/tests/unittests/elastic_demo.py +++ b/python/paddle/fluid/tests/unittests/elastic_demo.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,9 +15,10 @@ import os, sys import time -sys.stderr.write("{}-DISTRIBUTED_TRAINER_ENDPOINTS={}\n".format(os.environ[ - 'PADDLE_TRAINER_ID'], os.environ['DISTRIBUTED_TRAINER_ENDPOINTS'])) -sys.stderr.write("{}-PADDLE_TRAINERS={}\n".format(os.environ[ - 'PADDLE_TRAINER_ID'], os.environ['PADDLE_TRAINERS'])) +sys.stderr.write("{}-DISTRIBUTED_TRAINER_ENDPOINTS={}\n".format( + os.environ['PADDLE_TRAINER_ID'], + os.environ['DISTRIBUTED_TRAINER_ENDPOINTS'])) +sys.stderr.write("{}-PADDLE_TRAINERS={}\n".format( + os.environ['PADDLE_TRAINER_ID'], os.environ['PADDLE_TRAINERS'])) time.sleep(600) diff --git a/python/paddle/fluid/tests/unittests/fake_reader.py b/python/paddle/fluid/tests/unittests/fake_reader.py index 34a256e15dd..f97884218eb 100644 --- a/python/paddle/fluid/tests/unittests/fake_reader.py +++ b/python/paddle/fluid/tests/unittests/fake_reader.py @@ -21,14 +21,18 @@ def fake_imdb_reader(word_dict_size, lower_seq_len=100, upper_seq_len=200, class_dim=2): + def __reader__(): for _ in six.moves.range(sample_num): - length = np.random.random_integers( - low=lower_seq_len, high=upper_seq_len, size=[1])[0] - ids = np.random.random_integers( - low=0, high=word_dict_size - 1, size=[length]).astype('int64') - label = np.random.random_integers( - low=0, high=class_dim - 1, size=[1]).astype('int64')[0] + length = np.random.random_integers(low=lower_seq_len, + high=upper_seq_len, + size=[1])[0] + ids = np.random.random_integers(low=0, + high=word_dict_size - 1, + size=[length]).astype('int64') + label = np.random.random_integers(low=0, + high=class_dim - 1, + size=[1]).astype('int64')[0] yield ids, label return __reader__ diff --git a/python/paddle/fluid/tests/unittests/feed_data_reader.py b/python/paddle/fluid/tests/unittests/feed_data_reader.py index 1e6016d57bd..9ea7e88f66e 100644 --- a/python/paddle/fluid/tests/unittests/feed_data_reader.py +++ b/python/paddle/fluid/tests/unittests/feed_data_reader.py @@ -18,6 +18,7 @@ from paddle.fluid.framework import Variable def cyclic_reader(reader): + def __reader__(): while True: for data in reader(): @@ -27,6 +28,7 @@ def cyclic_reader(reader): class FeedDataReader(object): + def __init__(self, feed_list, reader): self._feed_list = [] for var in feed_list: diff --git a/python/paddle/fluid/tests/unittests/fft/__init__.py b/python/paddle/fluid/tests/unittests/fft/__init__.py index b9a7651e449..185a92b8d94 100644 --- a/python/paddle/fluid/tests/unittests/fft/__init__.py +++ b/python/paddle/fluid/tests/unittests/fft/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/tests/unittests/fft/spectral_op_np.py b/python/paddle/fluid/tests/unittests/fft/spectral_op_np.py index b00111f6821..3c48c99af34 100644 --- a/python/paddle/fluid/tests/unittests/fft/spectral_op_np.py +++ b/python/paddle/fluid/tests/unittests/fft/spectral_op_np.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -52,8 +52,8 @@ def _fftc2r(a, n=None, axis=-1, norm=None, forward=None): inv_norm = _get_forward_norm(n, norm) else: inv_norm = _get_backward_norm(n, norm) - output = _raw_fft(a.conj() - if forward else a, n, axis, True, False, inv_norm) + output = _raw_fft(a.conj() if forward else a, n, axis, True, False, + inv_norm) return output diff --git a/python/paddle/fluid/tests/unittests/fft/test_fft.py b/python/paddle/fluid/tests/unittests/fft/test_fft.py index 7ee5a04ece4..a3c62323c2c 100644 --- a/python/paddle/fluid/tests/unittests/fft/test_fft.py +++ b/python/paddle/fluid/tests/unittests/fft/test_fft.py @@ -44,13 +44,14 @@ def rand_x(dims=1, complex=False): shape = [np.random.randint(min_dim_len, max_dim_len) for i in range(dims)] if complex: - return np.random.randn(*shape).astype(dtype) + 1.j * np.random.randn( - *shape).astype(dtype) + return np.random.randn(*shape).astype( + dtype) + 1.j * np.random.randn(*shape).astype(dtype) else: return np.random.randn(*shape).astype(dtype) def place(devices, key='place'): + def decorate(cls): module = sys.modules[cls.__module__].__dict__ raw_classes = { @@ -97,65 +98,66 @@ def parameterize(fields, values=None): @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), - ('test_x_complex', rand_x( - 5, complex=True), None, -1, - 'backward'), ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), 11, -1, - 'backward'), ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5, complex=True), 3, -1, 'backward'), + ('test_x_complex', rand_x(5, complex=True), None, -1, 'backward'), + ('test_n_grater_input_length', rand_x(5, + max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5, complex=True), 3, -1, 'backward'), ('test_axis_not_last', rand_x(5), None, 3, 'backward'), ('test_norm_forward', rand_x(5), None, 3, 'forward'), ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestFft(unittest.TestCase): + def test_fft(self): """Test fft with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.fft(self.x, self.n, self.axis, self.norm), - paddle.fft.fft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.fft(self.x, self.n, self.axis, self.norm), + paddle.fft.fft(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), - ('test_x_complex', rand_x( - 5, complex=True), None, -1, - 'backward'), ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), 11, -1, - 'backward'), ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5, complex=True), 3, -1, 'backward'), + ('test_x_complex', rand_x(5, complex=True), None, -1, 'backward'), + ('test_n_grater_input_length', rand_x(5, + max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5, complex=True), 3, -1, 'backward'), ('test_axis_not_last', rand_x(5), None, 3, 'backward'), ('test_norm_forward', rand_x(5), None, 3, 'forward'), ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestIfft(unittest.TestCase): + def test_fft(self): """Test ifft with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.ifft(self.x, self.n, self.axis, self.norm), - paddle.fft.ifft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.ifft(self.x, self.n, self.axis, + self.norm), + paddle.fft.ifft(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) -@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), - ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), - ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError) -]) +@parameterize( + (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), + [('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), + ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError)] +) class TestFftException(unittest.TestCase): + def test_fft(self): """Test fft with buoudary condition Test case include: @@ -165,56 +167,55 @@ class TestFftException(unittest.TestCase): - norm out of range """ with self.assertRaises(self.expect_exception): - paddle.fft.fft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.fft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), - ('test_x_complex128', rand_x( - 5, complex=True), None, (0, 1), 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (0, 1), 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5, complex=True), (4, 4), (0, 1), 'backward'), - ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), - ('test_axis_none', rand_x(5), None, None, 'backward'), - ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), - ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), + ('test_x_complex128', rand_x(5, complex=True), None, (0, 1), 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (0, 1), 'backward'), + ('test_n_smaller_than_input_length', rand_x(5, min_dim_len=5, complex=True), + (4, 4), (0, 1), 'backward'), + ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), + ('test_axis_none', rand_x(5), None, None, 'backward'), + ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), + ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), +]) class TestFft2(unittest.TestCase): + def test_fft2(self): """Test fft2 with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.fft2(self.x, self.n, self.axis, self.norm), - paddle.fft.fft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.fft2(self.x, self.n, self.axis, + self.norm), + paddle.fft.fft2(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex_input', rand_x( - 2, complex=True), None, (0, 1), None, - ValueError), ('test_x_1dim_tensor', rand_x(1), None, (0, 1), None, - ValueError), ('test_n_nagative', rand_x(2), -1, (0, 1), - 'backward', ValueError), - ('test_n_len_not_equal_axis', rand_x( - 5, max_dim_len=5), 11, (0, 1), 'backward', - ValueError), ('test_n_zero', rand_x(2), (0, 0), (0, 1), 'backward', - ValueError), ('test_axis_out_of_range', rand_x(2), None, - (0, 1, 2), 'backward', ValueError), + [('test_x_complex_input', rand_x(2, complex=True), None, + (0, 1), None, ValueError), + ('test_x_1dim_tensor', rand_x(1), None, (0, 1), None, ValueError), + ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), + ('test_n_len_not_equal_axis', rand_x(5, max_dim_len=5), 11, + (0, 1), 'backward', ValueError), + ('test_n_zero', rand_x(2), (0, 0), (0, 1), 'backward', ValueError), + ('test_axis_out_of_range', rand_x(2), None, + (0, 1, 2), 'backward', ValueError), ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', ValueError), ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError)]) class TestFft2Exception(unittest.TestCase): + def test_fft2(self): """Test fft2 with buoudary condition Test case include: @@ -227,58 +228,59 @@ class TestFft2Exception(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.fft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.fft2(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_x_complex128', rand_x( - 5, complex=True), None, None, - 'backward'), ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (1, 2), 'backward'), ( - 'test_n_smaller_input_length', rand_x( - 5, min_dim_len=5, complex=True), (3, 3), (1, 2), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), - 'backward'), ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_x_complex128', rand_x(5, complex=True), None, None, 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (1, 2), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5, complex=True), + (3, 3), (1, 2), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), ('test_norm_ortho', rand_x(5), None, None, 'ortho')]) class TestFftn(unittest.TestCase): + def test_fftn(self): """Test fftn with norm condition """ with paddle.fluid.dygraph.guard(self.place): - np.testing.assert_allclose( - scipy.fft.fftn(self.x, self.n, self.axis, self.norm), - paddle.fft.fftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.fftn(self.x, self.n, self.axis, + self.norm), + paddle.fft.fftn(paddle.to_tensor(self.x), + self.n, self.axis, + self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_x_complex128', rand_x( - 5, complex=True), None, None, - 'backward'), ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (1, 2), 'backward'), ( - 'test_n_smaller_input_length', rand_x( - 5, min_dim_len=5, complex=True), (3, 3), (1, 2), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), - 'backward'), ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_x_complex128', rand_x(5, complex=True), None, None, 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (1, 2), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5, complex=True), + (3, 3), (1, 2), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), ('test_norm_ortho', rand_x(5), None, None, 'ortho')]) class TestIFftn(unittest.TestCase): + def test_ifftn(self): """Test ifftn with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.ifftn(self.x, self.n, self.axis, self.norm), - paddle.fft.ifftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), + paddle.fft.ifftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm), rtol=RTOL.get(str(self.x.dtype)), atol=ATOL.get(str(self.x.dtype))) @@ -286,67 +288,60 @@ class TestIFftn(unittest.TestCase): @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, -1, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 4, -1, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 2, -1, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, 1, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, 1, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, -1, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 4, -1, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 2, -1, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, 1, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, 1, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "ortho"), ]) class TestHfft(unittest.TestCase): + def test_hfft(self): """Test hfft with norm condition """ with paddle.fluid.dygraph.guard(self.place): - np.testing.assert_allclose( - scipy.fft.hfft(self.x, self.n, self.axis, self.norm), - paddle.fft.hfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.hfft(self.x, self.n, self.axis, + self.norm), + paddle.fft.hfft(paddle.to_tensor(self.x), + self.n, self.axis, + self.norm), + rtol=1e-5, + atol=0) @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, -1, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 4, -1, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 2, -1, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, -1, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 4, -1, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 2, -1, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "ortho"), ]) class TestIrfft(unittest.TestCase): + def test_irfft(self): """Test irfft with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.irfft(self.x, self.n, self.axis, self.norm), - paddle.fft.irfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), + paddle.fft.irfft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm), rtol=1e-5, atol=0) @@ -354,33 +349,29 @@ class TestIrfft(unittest.TestCase): @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, None, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [4], None, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2], None, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, None, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [4], None, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [2], None, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "ortho"), ]) class TestIrfftn(unittest.TestCase): + def test_irfftn(self): """Test irfftn with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.irfftn(self.x, self.n, self.axis, self.norm), - paddle.fft.irfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), + paddle.fft.irfftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm), rtol=1e-5, atol=0) @@ -388,33 +379,29 @@ class TestIrfftn(unittest.TestCase): @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, None, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [4], None, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2], None, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, None, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [4], None, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [2], None, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "ortho"), ]) class TestHfftn(unittest.TestCase): + def test_hfftn(self): """Test hfftn with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.hfftn(self.x, self.n, self.axis, self.norm), - paddle.fft.hfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), + paddle.fft.hfftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm), rtol=1e-5, atol=0) @@ -422,29 +409,30 @@ class TestHfftn(unittest.TestCase): @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 's', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, (-2, -1), "backward"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, (-2, -1), "backward"), ('test_with_s', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2, 2], (-2, -1), "backward", ValueError), ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "backward"), ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "forward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "forward"), ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "ortho"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "ortho"), ]) class TestHfft2(unittest.TestCase): + def test_hfft2(self): """Test hfft2 with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.hfft2(self.x, self.s, self.axis, self.norm), - paddle.fft.hfft2( - paddle.to_tensor(self.x), self.s, self.axis, self.norm), + paddle.fft.hfft2(paddle.to_tensor(self.x), self.s, self.axis, + self.norm), rtol=1e-5, atol=0) @@ -452,57 +440,55 @@ class TestHfft2(unittest.TestCase): @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 's', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, (-2, -1), "backward"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, (-2, -1), "backward"), ('test_n_equal_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (4, 6), (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (4, 6), + (-2, -1), "backward"), ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "backward"), ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "forward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "forward"), ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "ortho"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "ortho"), ]) class TestIrfft2(unittest.TestCase): + def test_irfft2(self): """Test irfft2 with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.irfft2(self.x, self.s, self.axis, self.norm), - paddle.fft.irfft2( - paddle.to_tensor(self.x), self.s, self.axis, self.norm), + paddle.fft.irfft2(paddle.to_tensor(self.x), self.s, self.axis, + self.norm), rtol=1e-5, atol=0) @place(DEVICES) -@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [( - 'test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype(np.bool8), - None, -1, 'backward', NotImplementedError), ( - 'test_n_nagative', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), -1, -1, - 'backward', ValueError), ( - 'test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - 0, -1, 'backward', ValueError), ( - 'test_n_type', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2, 3), -1, 'backward', ValueError), ( - 'test_axis_out_of_range', - np.random.randn(4) + 1j * np.random.randn(4), None, 10, - 'backward', ValueError), ( - 'test_axis_with_array', - np.random.randn(4) + 1j * np.random.randn(4), None, - (0, 1), 'backward', ValueError), ( - 'test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, -1, 'random', ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_bool_input', + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, -1, 'backward', NotImplementedError), + ('test_n_nagative', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), -1, -1, 'backward', ValueError), + ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, + 'backward', ValueError), + ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), + (1, 2, 3), -1, 'backward', ValueError), + ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), + None, 10, 'backward', ValueError), + ('test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), None, + (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, -1, 'random', ValueError) +]) class TestHfftException(unittest.TestCase): + def test_hfft(self): """Test hfft with buoudary condition Test case include: @@ -515,28 +501,27 @@ class TestHfftException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.hfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.hfft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_n_nagative', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), -1, -1, - 'backward', ValueError), - ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, - 'backward', ValueError), - ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), -1, 'backward', ValueError), - ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, 10, 'backward', ValueError), - ('test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), - None, (0, 1), 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), None, - None, 'random', ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_n_nagative', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), -1, -1, 'backward', ValueError), + ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, + 'backward', ValueError), + ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), + (1, 2), -1, 'backward', ValueError), + ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), + None, 10, 'backward', ValueError), + ('test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), None, + (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError) +]) class TestIrfftException(unittest.TestCase): + def test_irfft(self): """ Test irfft with buoudary condition @@ -549,16 +534,16 @@ class TestIrfftException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.irfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.irfft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, (-2, -1), 'backward', NotImplementedError), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, (-2, -1), 'backward', NotImplementedError), ('test_n_nagative', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (-1, -2), (-2, -1), 'backward', ValueError), @@ -567,16 +552,16 @@ class TestIrfftException(unittest.TestCase): ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 3, None, 'backward', ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), (-1), - 'backward', ValueError), ('test_axis_out_of_range', - np.random.randn(4) + 1j * np.random.randn(4), - None, (1, 2), 'backward', ValueError), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-1), 'backward', ValueError), + ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), + None, (1, 2), 'backward', ValueError), ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, -1, - 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), None, - None, 'random', ValueError)]) + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestHfft2Exception(unittest.TestCase): + def test_hfft2(self): """ Test hfft2 with buoudary condition @@ -590,8 +575,8 @@ class TestHfft2Exception(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.hfft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.hfft2(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @@ -601,23 +586,23 @@ class TestHfft2Exception(unittest.TestCase): np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (-1, -2), (-2, -1), 'backward', ValueError), ('test_zero_point', - np.random.randn(4, 4, 1) + 1j * np.random.randn(4, 4, 1), None, (-2, -1), - "backward", ValueError), + np.random.randn(4, 4, 1) + 1j * np.random.randn(4, 4, 1), None, + (-2, -1), "backward", ValueError), ('test_n_zero', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (0, 0), (-2, -1), 'backward', ValueError), ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - 3, -1, 'backward', - ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), (-3, -2, -1), 'backward', ValueError), + 3, -1, 'backward', ValueError), + ('test_n_axis_dim', + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-3, -2, -1), 'backward', ValueError), ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, (1, 2), 'backward', ValueError), ( - 'test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, - 1, 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, None, 'random', ValueError)]) + None, (1, 2), 'backward', ValueError), + ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, 1, + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestIrfft2Exception(unittest.TestCase): + def test_irfft2(self): """ Test irfft2 with buoudary condition @@ -631,16 +616,16 @@ class TestIrfft2Exception(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.irfft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.irfft2(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, (-2, -1), 'backward', NotImplementedError), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, (-2, -1), 'backward', NotImplementedError), ('test_n_nagative', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (-1, -2), (-2, -1), 'backward', ValueError), @@ -649,17 +634,16 @@ class TestIrfft2Exception(unittest.TestCase): ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 3, -1, 'backward', ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), (-3, -2, -1), 'backward', - ValueError), ('test_axis_out_of_range', - np.random.randn(4) + 1j * np.random.randn(4), None, - (10, 20), 'backward', ValueError), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-3, -2, -1), 'backward', ValueError), + ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), + None, (10, 20), 'backward', ValueError), ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, 1, - 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), None, - None, 'random', ValueError)]) + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestHfftnException(unittest.TestCase): + def test_hfftn(self): """Test hfftn with buoudary condition Test case include: @@ -672,8 +656,8 @@ class TestHfftnException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.hfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.hfftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @@ -685,18 +669,18 @@ class TestHfftnException(unittest.TestCase): ('test_n_zero', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (0, 0), (-2, -1), 'backward', ValueError), ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - 3, -1, 'backward', - ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), (-3, -2, -1), 'backward', ValueError), + 3, -1, 'backward', ValueError), + ('test_n_axis_dim', + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-3, -2, -1), 'backward', ValueError), ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), None, (10, 20), 'backward', ValueError), ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, 1, - 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), None, - None, 'random', ValueError)]) + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestIrfftnException(unittest.TestCase): + def test_irfftn(self): """Test irfftn with buoudary condition Test case include: @@ -708,44 +692,46 @@ class TestIrfftnException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.irfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.irfftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), - [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), ( - 'test_n_grater_than_input_length', rand_x( - 5, max_dim_len=5), 11, -1, 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), 3, -1, - 'backward'), ('test_axis_not_last', rand_x(5), None, 3, 'backward'), - ('test_norm_forward', rand_x(5), None, 3, 'forward'), - ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), + [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), + ('test_n_grater_than_input_length', rand_x( + 5, max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5), 3, -1, 'backward'), + ('test_axis_not_last', rand_x(5), None, 3, 'backward'), + ('test_norm_forward', rand_x(5), None, 3, 'forward'), + ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestRfft(unittest.TestCase): + def test_rfft(self): """Test rfft with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.rfft(self.x, self.n, self.axis, self.norm), - paddle.fft.rfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.rfft(self.x, self.n, self.axis, + self.norm), + paddle.fft.rfft(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) -@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), - ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), - ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError) -]) +@parameterize( + (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), + [('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), + ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError)] +) class TestRfftException(unittest.TestCase): + def test_rfft(self): """Test rfft with buoudary condition Test case include: @@ -756,54 +742,52 @@ class TestRfftException(unittest.TestCase): - the dimensions of n and axis are different """ with self.assertRaises(self.expect_exception): - paddle.fft.rfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.rfft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (0, 1), 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), (4, 4), (0, 1), 'backward'), - ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), - ('test_axis_none', rand_x(5), None, None, 'backward'), - ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), - ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (0, 1), 'backward'), + ('test_n_smaller_than_input_length', rand_x(5, min_dim_len=5), (4, 4), + (0, 1), 'backward'), + ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), + ('test_axis_none', rand_x(5), None, None, 'backward'), + ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), + ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), +]) class TestRfft2(unittest.TestCase): + def test_rfft2(self): """Test rfft2 with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.rfft2(self.x, self.n, self.axis, self.norm), - paddle.fft.rfft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.rfft2(self.x, self.n, self.axis, + self.norm), + paddle.fft.rfft2(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_x_complex_input', rand_x( - 2, complex=True), None, (0, 1), 'backward', RuntimeError), - ('test_x_1dim_tensor', rand_x(1), None, (0, 1), 'backward', ValueError), - ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), - ('test_n_zero', rand_x(2), 0, (0, 1), 'backward', ValueError), - ('test_axis_out_of_range', rand_x(2), None, (0, 1, 2), 'backward', - ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), - ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', - ValueError), - ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_x_complex_input', rand_x(2, complex=True), None, + (0, 1), 'backward', RuntimeError), + ('test_x_1dim_tensor', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), + ('test_n_zero', rand_x(2), 0, (0, 1), 'backward', ValueError), + ('test_axis_out_of_range', rand_x(2), None, + (0, 1, 2), 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', ValueError), + ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError), +]) class TestRfft2Exception(unittest.TestCase): + def test_rfft2(self): """Test rfft2 with buoudary condition Test case include: @@ -816,49 +800,48 @@ class TestRfft2Exception(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.rfft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.rfft2(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (1, 2), 'backward'), - ('test_n_smaller_input_length', rand_x( - 5, min_dim_len=5), (3, 3), (1, 2), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), - ('test_norm_forward', rand_x(5), None, None, 'forward'), - ('test_norm_ortho', rand_x(5), None, None, 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (1, 2), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5), (3, 3), + (1, 2), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_norm_ortho', rand_x(5), None, None, 'ortho'), +]) class TestRfftn(unittest.TestCase): + def test_rfftn(self): """Test rfftn with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.rfftn(self.x, self.n, self.axis, self.norm), - paddle.fft.rfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.rfftn(self.x, self.n, self.axis, + self.norm), + paddle.fft.rfftn(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex', rand_x( - 4, complex=True), None, None, 'backward', - RuntimeError), ('test_n_nagative', rand_x(4), (-1, -1), (1, 2), - 'backward', ValueError), - ('test_n_not_sequence', rand_x(4), -1, None, 'backward', ValueError), - ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), ( - 'test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', - ValueError), - ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_x_complex', rand_x( + 4, complex=True), None, None, 'backward', RuntimeError), + ('test_n_nagative', rand_x(4), (-1, -1), (1, 2), 'backward', ValueError), + ('test_n_not_sequence', rand_x(4), -1, None, 'backward', ValueError), + ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', ValueError), + ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError) +]) class TestRfftnException(unittest.TestCase): + def test_rfftn(self): """Test rfftn with buoudary condition Test case include: @@ -869,43 +852,45 @@ class TestRfftnException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.rfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.rfftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), - [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), ( - 'test_n_grater_than_input_length', rand_x( - 5, max_dim_len=5), 11, -1, 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), 3, -1, - 'backward'), ('test_axis_not_last', rand_x(5), None, 3, 'backward'), - ('test_norm_forward', rand_x(5), None, 3, 'forward'), - ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), + [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), + ('test_n_grater_than_input_length', rand_x( + 5, max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5), 3, -1, 'backward'), + ('test_axis_not_last', rand_x(5), None, 3, 'backward'), + ('test_norm_forward', rand_x(5), None, 3, 'forward'), + ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestIhfft(unittest.TestCase): + def test_ihfft(self): """Test ihfft with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.ihfft(self.x, self.n, self.axis, self.norm), - paddle.fft.ihfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), + paddle.fft.ihfft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm), rtol=RTOL.get(str(self.x.dtype)), atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) -@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), - ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), - ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError) -]) +@parameterize( + (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), + [('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), + ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError)] +) class TestIhfftException(unittest.TestCase): + def test_ihfft(self): """Test ihfft with buoudary condition Test case include: @@ -915,32 +900,32 @@ class TestIhfftException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.ihfft( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.ihfft(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (11, 11), (0, 1), 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), (1, 1), (0, 1), 'backward'), - ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), - ('test_axis_none', rand_x(5), None, None, 'backward'), - ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), - ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (11, 11), + (0, 1), 'backward'), + ('test_n_smaller_than_input_length', rand_x(5, min_dim_len=5), (1, 1), + (0, 1), 'backward'), + ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), + ('test_axis_none', rand_x(5), None, None, 'backward'), + ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), + ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), +]) class TestIhfft2(unittest.TestCase): + def test_ihfft2(self): """Test ihfft2 with norm condition """ with paddle.fluid.dygraph.guard(self.place): np.testing.assert_allclose( scipy.fft.ihfft2(self.x, self.n, self.axis, self.norm), - paddle.fft.ihfft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), + paddle.fft.ihfft2(paddle.to_tensor(self.x), self.n, self.axis, + self.norm), rtol=RTOL.get(str(self.x.dtype)), atol=ATOL.get(str(self.x.dtype))) @@ -948,19 +933,20 @@ class TestIhfft2(unittest.TestCase): @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex_input', rand_x( - 2, complex=True), None, (0, 1), None, ValueError), - ('test_x_1dim_tensor', rand_x(1), None, (0, 1), None, - ValueError), ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', - ValueError), ('test_n_len_not_equal_axis', rand_x( - 5, max_dim_len=5), 11, (0, 1), 'backward', ValueError), + [('test_x_complex_input', rand_x(2, complex=True), None, + (0, 1), None, ValueError), + ('test_x_1dim_tensor', rand_x(1), None, (0, 1), None, ValueError), + ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), + ('test_n_len_not_equal_axis', rand_x(5, max_dim_len=5), 11, + (0, 1), 'backward', ValueError), ('test_n_zero', rand_x(2), (0, 0), (0, 1), 'backward', ValueError), - ('test_axis_out_of_range', rand_x(2), None, (0, 1, 2), 'backward', - ValueError), ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), ('test_axis_not_sequence', rand_x(5), None, - -10, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(2), None, + (0, 1, 2), 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', ValueError), ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError)]) class TestIhfft2Exception(unittest.TestCase): + def test_ihfft2(self): """Test ihfft2 with buoudary condition Test case include: @@ -973,46 +959,47 @@ class TestIhfft2Exception(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.ihfft2( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.ihfft2(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (11, 11), (0, 1), - 'backward'), ('test_n_smaller_input_length', rand_x( - 5, min_dim_len=5), (1, 1), (0, 1), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), - 'backward'), ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (11, 11), + (0, 1), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5), (1, 1), + (0, 1), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), ('test_norm_ortho', rand_x(5), None, None, 'ortho')]) class TestIhfftn(unittest.TestCase): + def test_ihfftn(self): """Test ihfftn with norm condition """ with paddle.fluid.dygraph.guard(self.place): self.assertTrue( - np.allclose( - scipy.fft.ihfftn(self.x, self.n, self.axis, self.norm), - paddle.fft.ihfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype)))) + np.allclose(scipy.fft.ihfftn(self.x, self.n, self.axis, + self.norm), + paddle.fft.ihfftn(paddle.to_tensor(self.x), self.n, + self.axis, self.norm), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype)))) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex', rand_x( +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_x_complex', rand_x( 4, complex=True), None, None, 'backward', RuntimeError), - ('test_n_nagative', rand_x(4), -1, None, 'backward', ValueError), - ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), ( - 'test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', - ValueError), - ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError)]) + ('test_n_nagative', rand_x(4), -1, None, 'backward', ValueError), + ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', ValueError), + ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError) +]) class TestIhfftnException(unittest.TestCase): + def test_ihfftn(self): """Test ihfftn with buoudary condition Test case include: @@ -1023,8 +1010,8 @@ class TestIhfftnException(unittest.TestCase): """ with paddle.fluid.dygraph.guard(self.place): with self.assertRaises(self.expect_exception): - paddle.fft.ihfftn( - paddle.to_tensor(self.x), self.n, self.axis, self.norm) + paddle.fft.ihfftn(paddle.to_tensor(self.x), self.n, self.axis, + self.norm) @place(DEVICES) @@ -1033,6 +1020,7 @@ class TestIhfftnException(unittest.TestCase): ('test_with_d', 20, 0.5, 'float32'), ]) class TestFftFreq(unittest.TestCase): + def test_fftfreq(self): """Test fftfreq with norm condition """ @@ -1050,6 +1038,7 @@ class TestFftFreq(unittest.TestCase): ('test_with_d', 20, 0.5, 'float32'), ]) class TestRfftFreq(unittest.TestCase): + def test_rfftfreq(self): """Test rfftfreq with norm condition """ @@ -1070,37 +1059,39 @@ class TestRfftFreq(unittest.TestCase): np.random.randn(5, 5) + 1j * np.random.randn(5, 5), None, 'complex128'), ]) class TestFftShift(unittest.TestCase): + def test_fftshift(self): """Test fftshift with norm condition """ with paddle.fluid.dygraph.guard(self.place): - np.testing.assert_allclose( - scipy.fft.fftshift(self.x, self.axes), - paddle.fft.fftshift(paddle.to_tensor(self.x), - self.axes).numpy(), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.fftshift(self.x, self.axes), + paddle.fft.fftshift( + paddle.to_tensor(self.x), + self.axes).numpy(), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'axes'), - [('test_1d', np.random.randn(10), (0, ), - 'float64'), ('test_2d', np.random.randn(10, 10), (0, 1), 'float64'), + [('test_1d', np.random.randn(10), (0, ), 'float64'), + ('test_2d', np.random.randn(10, 10), (0, 1), 'float64'), ('test_2d_with_all_axes', np.random.randn(10, 10), None, 'float64'), ('test_2d_odd_with_all_axes', np.random.randn(5, 5) + 1j * np.random.randn(5, 5), None, 'complex128')]) class TestIfftShift(unittest.TestCase): + def test_ifftshift(self): """Test ifftshift with norm condition """ with paddle.fluid.dygraph.guard(self.place): - np.testing.assert_allclose( - scipy.fft.ifftshift(self.x, self.axes), - paddle.fft.ifftshift(paddle.to_tensor(self.x), - self.axes).numpy(), - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.ifftshift(self.x, self.axes), + paddle.fft.ifftshift( + paddle.to_tensor(self.x), + self.axes).numpy(), + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py b/python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py index 4f19cd06a49..ce0a623aea0 100644 --- a/python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py +++ b/python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py @@ -42,27 +42,27 @@ def stgraph(func, place, x, n, axes, norm): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), - [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), - ('test_x_complex64', rand_x( - 5, np.float64, complex=True), None, -1, - 'backward'), ('test_n_grater_than_input_length', rand_x( - 5, max_dim_len=5), 11, -1, - 'backward'), ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), 3, -1, 'backward'), - ('test_axis_not_last', rand_x(5), None, 3, 'backward'), - ('test_norm_forward', rand_x(5), None, 3, 'forward'), - ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), + [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), + ('test_x_complex64', rand_x(5, np.float64, + complex=True), None, -1, 'backward'), + ('test_n_grater_than_input_length', rand_x( + 5, max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5), 3, -1, 'backward'), + ('test_axis_not_last', rand_x(5), None, 3, 'backward'), + ('test_norm_forward', rand_x(5), None, 3, 'forward'), + ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestFft(unittest.TestCase): + def test_static_rfft(self): with stgraph(paddle.fft.fft, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.fft(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.fft(self.x, self.n, self.axis, + self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @@ -70,11 +70,12 @@ class TestFft(unittest.TestCase): (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', - ValueError), ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), ('test_norm_not_in_enum_value', rand_x(2), - None, -1, 'random', ValueError)]) + ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError)] +) class TestFftException(unittest.TestCase): + def test_fft(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.fft, self.place, self.x, self.n, self.axis, @@ -83,29 +84,28 @@ class TestFftException(unittest.TestCase): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), - ('test_x_complex128', rand_x( - 5, complex=True), None, (0, 1), 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (0, 1), 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), (4, 4), (0, 1), 'backward'), - ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), - ('test_axis_none', rand_x(5), None, None, 'backward'), - ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), - ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), + ('test_x_complex128', rand_x(5, complex=True), None, (0, 1), 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (0, 1), 'backward'), + ('test_n_smaller_than_input_length', rand_x(5, min_dim_len=5), (4, 4), + (0, 1), 'backward'), + ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), + ('test_axis_none', rand_x(5), None, None, 'backward'), + ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), + ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), +]) class TestFft2(unittest.TestCase): + def test_static_fft2(self): with stgraph(paddle.fft.fft2, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.fft2(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.fft2(self.x, self.n, self.axis, + self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @@ -116,15 +116,16 @@ class TestFft2(unittest.TestCase): ('test_x_1dim_tensor', rand_x(1), None, (0, 1), 'backward', ValueError), ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), ('test_n_zero', rand_x(2), 0, (0, 1), 'backward', ValueError), - ('test_axis_out_of_range', rand_x(2), None, (0, 1, 2), 'backward', - ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), + ('test_axis_out_of_range', rand_x(2), None, + (0, 1, 2), 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, + (0, 1), 'backward', ValueError), ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', ValueError), ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError) ]) class TestFft2Exception(unittest.TestCase): + def test_static_fft2(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.fft2, self.place, self.x, self.n, self.axis, @@ -136,39 +137,39 @@ class TestFft2Exception(unittest.TestCase): @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_x_complex128', rand_x( - 5, np.float64, complex=True), None, None, - 'backward'), ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (1, 2), - 'backward'), ('test_n_smaller_input_length', rand_x( - 5, min_dim_len=5), (3, 3), (1, 2), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), - 'backward'), ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_x_complex128', rand_x(5, np.float64, + complex=True), None, None, 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (1, 2), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5), (3, 3), + (1, 2), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), ('test_norm_ortho', rand_x(5), None, None, 'ortho')]) class TestFftn(unittest.TestCase): + def test_static_fftn(self): with stgraph(paddle.fft.fftn, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.fftn(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.fftn(self.x, self.n, self.axis, + self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex', rand_x( - 4, complex=True), None, None, 'backward', - TypeError), ('test_n_nagative', rand_x(4), (-1, -1), (1, 2), 'backward', - ValueError), ('test_n_not_sequence', rand_x(4), -1, None, - 'backward', ValueError), - ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', - ValueError), ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', - ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_x_complex', rand_x(4, + complex=True), None, None, 'backward', TypeError), + ('test_n_nagative', rand_x(4), (-1, -1), (1, 2), 'backward', ValueError), + ('test_n_not_sequence', rand_x(4), -1, None, 'backward', ValueError), + ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', ValueError), + ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError) +]) class TestRfftnException(unittest.TestCase): + def test_static_rfftn(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.rfftn, self.place, self.x, self.n, @@ -179,23 +180,18 @@ class TestRfftnException(unittest.TestCase): @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, -1, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 4, -1, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 2, -1, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, 1, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, 1, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, -1, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 4, -1, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 2, -1, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, 1, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, 1, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "ortho"), ]) class TestHfft(unittest.TestCase): """Test hfft with norm condition @@ -204,33 +200,28 @@ class TestHfft(unittest.TestCase): def test_hfft(self): with stgraph(paddle.fft.hfft, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.hfft(self.x, self.n, self.axis, self.norm), - y, - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.hfft(self.x, self.n, self.axis, + self.norm), + y, + rtol=1e-5, + atol=0) @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, -1, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 4, -1, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 2, -1, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, -1, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, -1, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 4, -1, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 2, -1, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, -1, "ortho"), ]) class TestIrfft(unittest.TestCase): """Test irfft with norm condition @@ -239,33 +230,28 @@ class TestIrfft(unittest.TestCase): def test_irfft(self): with stgraph(paddle.fft.irfft, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.irfft(self.x, self.n, self.axis, self.norm), - y, - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.irfft(self.x, self.n, + self.axis, self.norm), + y, + rtol=1e-5, + atol=0) @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, None, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [4], None, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2], None, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, None, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [4], None, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [2], None, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "ortho"), ]) class Testirfftn(unittest.TestCase): """Test irfftn with norm condition @@ -274,33 +260,28 @@ class Testirfftn(unittest.TestCase): def test_static_irfftn(self): with stgraph(paddle.fft.irfftn, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.irfftn(self.x, self.n, self.axis, self.norm), - y, - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.irfftn(self.x, self.n, + self.axis, self.norm), + y, + rtol=1e-5, + atol=0) @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, None, "backward"), - ('test_n_grater_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [4], None, - "backward"), - ('test_n_smaller_than_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2], None, - "backward"), - ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "backward"), - ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "forward"), - ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, None, - "ortho"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, None, "backward"), + ('test_n_grater_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [4], None, "backward"), + ('test_n_smaller_than_input_length', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), [2], None, "backward"), + ('test_axis_not_last', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "backward"), + ('test_norm_forward', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "forward"), + ('test_norm_ortho', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), None, None, "ortho"), ]) class Testhfftn(unittest.TestCase): """Test hfftn with norm condition @@ -309,33 +290,33 @@ class Testhfftn(unittest.TestCase): def test_static_hfftn(self): with stgraph(paddle.fft.hfftn, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.hfftn(self.x, self.n, self.axis, self.norm), - y, - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.hfftn(self.x, self.n, + self.axis, self.norm), + y, + rtol=1e-5, + atol=0) @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 's', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, (-2, -1), "backward"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, (-2, -1), "backward"), ('test_n_grater_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [4, 8], (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [4, 8], + (-2, -1), "backward"), ('test_n_smaller_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2, 4], (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), [2, 4], + (-2, -1), "backward"), ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "backward"), ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "forward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "forward"), ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "ortho"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "ortho"), ]) class Testhfft2(unittest.TestCase): """Test hfft2 with norm condition @@ -344,30 +325,30 @@ class Testhfft2(unittest.TestCase): def test_static_hfft2(self): with stgraph(paddle.fft.hfft2, self.place, self.x, self.s, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.hfft2(self.x, self.s, self.axis, self.norm), - y, - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.hfft2(self.x, self.s, + self.axis, self.norm), + y, + rtol=1e-5, + atol=0) @place(DEVICES) @parameterize((TEST_CASE_NAME, 'x', 's', 'axis', 'norm'), [ ('test_x_complex128', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.complex128), None, (-2, -1), "backward"), + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.complex128), None, (-2, -1), "backward"), ('test_n_equal_input_length', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (2, 4), (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (2, 4), + (-2, -1), "backward"), ('test_axis_not_last', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "backward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "backward"), ('test_norm_forward', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "forward"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "forward"), ('test_norm_ortho', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, (-2, -1), - "ortho"), + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), None, + (-2, -1), "ortho"), ]) class TestIrfft2(unittest.TestCase): """Test irfft2 with norm condition @@ -376,34 +357,33 @@ class TestIrfft2(unittest.TestCase): def test_static_irfft2(self): with stgraph(paddle.fft.irfft2, self.place, self.x, self.s, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.irfft2(self.x, self.s, self.axis, self.norm), - y, - rtol=1e-5, - atol=0) + np.testing.assert_allclose(scipy.fft.irfft2(self.x, self.s, + self.axis, self.norm), + y, + rtol=1e-5, + atol=0) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_input_dtype', np.random.randn(4, 4, 4), None, -1, 'backward', - TypeError), ('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, -1, 'backward', TypeError), - ('test_n_nagative', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), -1, -1, - 'backward', ValueError), - ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, - 'backward', ValueError), - ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2, 3), -1, 'backward', ValueError), - ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, 10, 'backward', ValueError), ( - 'test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), - None, (0, 1), 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, -1, 'random', ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_input_dtype', np.random.randn(4, 4, + 4), None, -1, 'backward', TypeError), + ('test_bool_input', + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, -1, 'backward', TypeError), + ('test_n_nagative', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), -1, -1, 'backward', ValueError), + ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, + 'backward', ValueError), + ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), + (1, 2, 3), -1, 'backward', ValueError), + ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), + None, 10, 'backward', ValueError), + ('test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), None, + (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, -1, 'random', ValueError) +]) class TestHfftException(unittest.TestCase): '''Test hfft with buoudary condition Test case include: @@ -421,26 +401,25 @@ class TestHfftException(unittest.TestCase): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_input_dtype', np.random.randn(4, 4, 4), None, -1, 'backward', - TypeError), ('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, -1, 'backward', TypeError), - ('test_n_nagative', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), -1, -1, - 'backward', ValueError), - ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, - 'backward', ValueError), - ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), -1, 'backward', ValueError), - ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, 10, 'backward', ValueError), ( - 'test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), - None, (0, 1), 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, None, 'random', ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_input_dtype', np.random.randn(4, 4, + 4), None, -1, 'backward', TypeError), + ('test_bool_input', + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, -1, 'backward', TypeError), + ('test_n_nagative', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), -1, -1, 'backward', ValueError), + ('test_n_zero', np.random.randn(4, 4) + 1j * np.random.randn(4, 4), 0, -1, + 'backward', ValueError), + ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), + (1, 2), -1, 'backward', ValueError), + ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), + None, 10, 'backward', ValueError), + ('test_axis_with_array', np.random.randn(4) + 1j * np.random.randn(4), None, + (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError) +]) class TestIrfftException(unittest.TestCase): '''Test Irfft with buoudary condition Test case include: @@ -461,27 +440,27 @@ class TestIrfftException(unittest.TestCase): @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_input_dtype', np.random.randn(4, 4, 4), None, None, 'backward', - TypeError), ('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, (-2, -1), 'backward', TypeError), + [('test_input_dtype', np.random.randn( + 4, 4, 4), None, None, 'backward', TypeError), + ('test_bool_input', + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, (-2, -1), 'backward', TypeError), ('test_n_nagative', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (-1, -2), (-2, -1), 'backward', ValueError), ('test_n_zero', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (0, 0), (-2, -1), 'backward', ValueError), ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - 3, None, 'backward', - ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), (-1), 'backward', ValueError), + 3, None, 'backward', ValueError), + ('test_n_axis_dim', + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-1), 'backward', ValueError), ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, (1, 2), 'backward', ValueError), ( - 'test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, - -1, 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, None, 'random', ValueError)]) + None, (1, 2), 'backward', ValueError), + ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, -1, + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestHfft2Exception(unittest.TestCase): '''Test hfft2 with buoudary condition Test case include: @@ -502,27 +481,27 @@ class TestHfft2Exception(unittest.TestCase): @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_input_dtype', np.random.randn(4, 4, 4), None, None, 'backward', - TypeError), ('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, (-2, -1), 'backward', TypeError), + [('test_input_dtype', np.random.randn( + 4, 4, 4), None, None, 'backward', TypeError), + ('test_bool_input', + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, (-2, -1), 'backward', TypeError), ('test_n_nagative', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (-1, -2), (-2, -1), 'backward', ValueError), ('test_n_zero', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (0, 0), (-2, -1), 'backward', ValueError), ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - 3, -1, 'backward', - ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), (-3, -2, -1), 'backward', ValueError), + 3, -1, 'backward', ValueError), + ('test_n_axis_dim', + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-3, -2, -1), 'backward', ValueError), ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, (1, 2), 'backward', ValueError), ( - 'test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, - 1, 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, None, 'random', ValueError)]) + None, (1, 2), 'backward', ValueError), + ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, 1, + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestIrfft2Exception(unittest.TestCase): '''Test irfft2 with buoudary condition Test case include: @@ -543,27 +522,27 @@ class TestIrfft2Exception(unittest.TestCase): @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_input_dtype', np.random.randn(4, 4, 4), None, None, 'backward', - TypeError), ('test_bool_input', - (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) - ).astype(np.bool8), None, (-2, -1), 'backward', TypeError), + [('test_input_dtype', np.random.randn( + 4, 4, 4), None, None, 'backward', TypeError), + ('test_bool_input', + (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4)).astype( + np.bool8), None, (-2, -1), 'backward', TypeError), ('test_n_nagative', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (-1, -2), (-2, -1), 'backward', ValueError), ('test_n_zero', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (0, 0), (-2, -1), 'backward', ValueError), ('test_n_type', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - 3, -1, 'backward', - ValueError), ('test_n_axis_dim', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), - (1, 2), (-3, -2, -1), 'backward', ValueError), + 3, -1, 'backward', ValueError), + ('test_n_axis_dim', + np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), + (-3, -2, -1), 'backward', ValueError), ('test_axis_out_of_range', np.random.randn(4) + 1j * np.random.randn(4), - None, (10, 20), 'backward', ValueError), ( - 'test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, - 1, 'backward', - ValueError), ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), - None, None, 'random', ValueError)]) + None, (10, 20), 'backward', ValueError), + ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, 1, + 'backward', ValueError), + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError)]) class TestHfftnException(unittest.TestCase): '''Test hfftn with buoudary condition Test case include: @@ -585,8 +564,8 @@ class TestHfftnException(unittest.TestCase): @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_input_dtype', np.random.randn(4, 4, 4), None, None, 'backward', - TypeError), + ('test_input_dtype', np.random.randn( + 4, 4, 4), None, None, 'backward', TypeError), # ('test_bool_input', # (np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4) # ).astype(np.bool8), None, (-2, -1), 'backward', ValueError), @@ -596,9 +575,8 @@ class TestHfftnException(unittest.TestCase): ('test_n_zero', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (0, 0), (-2, -1), 'backward', ValueError), - ('test_n_type', - np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), 3, -1, - 'backward', ValueError), + ('test_n_type', np.random.randn(4, 4, 4) + + 1j * np.random.randn(4, 4, 4), 3, -1, 'backward', ValueError), ('test_n_axis_dim', np.random.randn(4, 4, 4) + 1j * np.random.randn(4, 4, 4), (1, 2), (-3, -2, -1), 'backward', ValueError), @@ -606,9 +584,8 @@ class TestHfftnException(unittest.TestCase): None, (10, 20), 'backward', ValueError), ('test_axis_type', np.random.randn(4) + 1j * np.random.randn(4), None, 1, 'backward', ValueError), - ('test_norm_not_in_enum_value', - np.random.randn(4, 4) + 1j * np.random.randn(4, 4), None, None, - 'random', ValueError) + ('test_norm_not_in_enum_value', np.random.randn(4, 4) + + 1j * np.random.randn(4, 4), None, None, 'random', ValueError) ]) class TestIrfftnException(unittest.TestCase): '''Test irfftn with buoudary condition @@ -628,25 +605,25 @@ class TestIrfftnException(unittest.TestCase): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), - [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), ( - 'test_n_grater_than_input_length', rand_x( - 5, max_dim_len=5), 11, -1, 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), 3, -1, - 'backward'), ('test_axis_not_last', rand_x(5), None, 3, 'backward'), - ('test_norm_forward', rand_x(5), None, 3, 'forward'), - ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), + [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), + ('test_n_grater_than_input_length', rand_x( + 5, max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5), 3, -1, 'backward'), + ('test_axis_not_last', rand_x(5), None, 3, 'backward'), + ('test_norm_forward', rand_x(5), None, 3, 'forward'), + ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestRfft(unittest.TestCase): + def test_static_rfft(self): with stgraph(paddle.fft.rfft, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.rfft(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.rfft(self.x, self.n, self.axis, + self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @@ -654,11 +631,12 @@ class TestRfft(unittest.TestCase): (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', - ValueError), ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), ('test_norm_not_in_enum_value', rand_x(2), - None, -1, 'random', ValueError)]) + ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError)] +) class TestRfftException(unittest.TestCase): + def test_rfft(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.rfft, self.place, self.x, self.n, self.axis, @@ -667,48 +645,49 @@ class TestRfftException(unittest.TestCase): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (0, 1), 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), (4, 4), (0, 1), 'backward'), - ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), - ('test_axis_none', rand_x(5), None, None, 'backward'), - ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), - ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (0, 1), 'backward'), + ('test_n_smaller_than_input_length', rand_x(5, min_dim_len=5), (4, 4), + (0, 1), 'backward'), + ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), + ('test_axis_none', rand_x(5), None, None, 'backward'), + ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), + ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), +]) class TestRfft2(unittest.TestCase): + def test_static_rfft2(self): with stgraph(paddle.fft.rfft2, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.rfft2(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.rfft2(self.x, self.n, + self.axis, self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_x_complex_input', rand_x( - 2, complex=True), None, (0, 1), 'backward', TypeError), + ('test_x_complex_input', rand_x(2, complex=True), None, + (0, 1), 'backward', TypeError), # ('test_x_not_tensor', [0, 1], None, (0, 1), 'backward', ValueError), ('test_x_1dim_tensor', rand_x(1), None, (0, 1), 'backward', ValueError), ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), ('test_n_zero', rand_x(2), 0, (0, 1), 'backward', ValueError), - ('test_axis_out_of_range', rand_x(2), None, (0, 1, 2), 'backward', - ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), + ('test_axis_out_of_range', rand_x(2), None, + (0, 1, 2), 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, + (0, 1), 'backward', ValueError), ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', ValueError), ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError) ]) class TestRfft2Exception(unittest.TestCase): + def test_static_rfft(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.rfft2, self.place, self.x, self.n, @@ -720,37 +699,37 @@ class TestRfft2Exception(unittest.TestCase): @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (6, 6), (1, 2), - 'backward'), ('test_n_smaller_input_length', rand_x( - 5, min_dim_len=5), (3, 3), (1, 2), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), - 'backward'), ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (6, 6), + (1, 2), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5), (3, 3), + (1, 2), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), ('test_norm_ortho', rand_x(5), None, None, 'ortho')]) class TestRfftn(unittest.TestCase): + def test_static_rfft(self): with stgraph(paddle.fft.rfftn, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.rfftn(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.rfftn(self.x, self.n, + self.axis, self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex', rand_x( - 4, complex=True), None, None, 'backward', - TypeError), ('test_n_nagative', rand_x(4), (-1, -1), (1, 2), 'backward', - ValueError), ('test_n_not_sequence', rand_x(4), -1, None, - 'backward', ValueError), - ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', - ValueError), ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', - ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_x_complex', rand_x(4, + complex=True), None, None, 'backward', TypeError), + ('test_n_nagative', rand_x(4), (-1, -1), (1, 2), 'backward', ValueError), + ('test_n_not_sequence', rand_x(4), -1, None, 'backward', ValueError), + ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', ValueError), + ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError) +]) class TestRfftnException(unittest.TestCase): + def test_static_rfftn(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.rfftn, self.place, self.x, self.n, @@ -759,36 +738,38 @@ class TestRfftnException(unittest.TestCase): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), - [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), ( - 'test_n_grater_than_input_length', rand_x( - 5, max_dim_len=5), 11, -1, 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), 3, -1, - 'backward'), ('test_axis_not_last', rand_x(5), None, 3, 'backward'), - ('test_norm_forward', rand_x(5), None, 3, 'forward'), - ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), + [('test_x_float64', rand_x(5, np.float64), None, -1, 'backward'), + ('test_n_grater_than_input_length', rand_x( + 5, max_dim_len=5), 11, -1, 'backward'), + ('test_n_smaller_than_input_length', rand_x( + 5, min_dim_len=5), 3, -1, 'backward'), + ('test_axis_not_last', rand_x(5), None, 3, 'backward'), + ('test_norm_forward', rand_x(5), None, 3, 'forward'), + ('test_norm_ortho', rand_x(5), None, 3, 'ortho')]) class TestIhfft(unittest.TestCase): + def test_static_ihfft(self): with stgraph(paddle.fft.ihfft, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.ihfft(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.ihfft(self.x, self.n, + self.axis, self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) -@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), - ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), - ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError) -]) +@parameterize( + (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), + [('test_n_nagative', rand_x(2), -1, -1, 'backward', ValueError), + ('test_n_zero', rand_x(2), 0, -1, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, 10, 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', ValueError), + ('test_norm_not_in_enum_value', rand_x(2), None, -1, 'random', ValueError)] +) class TestIhfftException(unittest.TestCase): + def test_static_ihfft(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.ihfft, self.place, self.x, self.n, @@ -797,50 +778,51 @@ class TestIhfftException(unittest.TestCase): @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ - ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (11, 11), (0, 1), 'backward'), - ('test_n_smaller_than_input_length', rand_x( - 5, min_dim_len=5), (1, 1), (0, 1), 'backward'), - ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), - ('test_axis_none', rand_x(5), None, None, 'backward'), - ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), - ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), - ]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [ + ('test_x_float64', rand_x(5), None, (0, 1), 'backward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (11, 11), + (0, 1), 'backward'), + ('test_n_smaller_than_input_length', rand_x(5, min_dim_len=5), (1, 1), + (0, 1), 'backward'), + ('test_axis_random', rand_x(5), None, (1, 2), 'backward'), + ('test_axis_none', rand_x(5), None, None, 'backward'), + ('test_norm_forward', rand_x(5), None, (0, 1), 'forward'), + ('test_norm_ortho', rand_x(5), None, (0, 1), 'ortho'), +]) class TestIhfft2(unittest.TestCase): + def test_static_ihfft2(self): with stgraph(paddle.fft.ihfft2, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.ihfft2(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.ihfft2(self.x, self.n, + self.axis, self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ - ('test_x_complex_input', rand_x( - 2, complex=True), None, (0, 1), None, ValueError), + ('test_x_complex_input', rand_x(2, complex=True), None, + (0, 1), None, ValueError), # ('test_x_not_tensor', [0, 1], None, (0, 1), None, ValueError), ('test_x_1dim_tensor', rand_x(1), None, (0, 1), None, ValueError), ('test_n_nagative', rand_x(2), -1, (0, 1), 'backward', ValueError), - ('test_n_len_not_equal_axis', rand_x( - 5, max_dim_len=5), 11, (0, 1), 'backward', ValueError), + ('test_n_len_not_equal_axis', rand_x(5, max_dim_len=5), 11, + (0, 1), 'backward', ValueError), ('test_n_zero', rand_x(2), (0, 0), (0, 1), 'backward', ValueError), - ('test_axis_out_of_range', rand_x(2), None, (0, 1, 2), 'backward', - ValueError), - ('test_axis_with_array', rand_x(1), None, (0, 1), 'backward', - ValueError), + ('test_axis_out_of_range', rand_x(2), None, + (0, 1, 2), 'backward', ValueError), + ('test_axis_with_array', rand_x(1), None, + (0, 1), 'backward', ValueError), ('test_axis_not_sequence', rand_x(5), None, -10, 'backward', ValueError), ('test_norm_not_enum', rand_x(2), None, -1, 'random', ValueError) ]) class TestIhfft2Exception(unittest.TestCase): + def test_static_ihfft2(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.ihfft2, self.place, self.x, self.n, @@ -852,35 +834,36 @@ class TestIhfft2Exception(unittest.TestCase): @parameterize( (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm'), [('test_x_float64', rand_x(5, np.float64), None, None, 'backward'), - ('test_n_grater_input_length', rand_x( - 5, max_dim_len=5), (11, 11), (0, 1), - 'backward'), ('test_n_smaller_input_length', rand_x( - 5, min_dim_len=5), (1, 1), (0, 1), 'backward'), - ('test_axis_not_default', rand_x(5), None, (1, 2), - 'backward'), ('test_norm_forward', rand_x(5), None, None, 'forward'), + ('test_n_grater_input_length', rand_x(5, max_dim_len=5), (11, 11), + (0, 1), 'backward'), + ('test_n_smaller_input_length', rand_x(5, min_dim_len=5), (1, 1), + (0, 1), 'backward'), + ('test_axis_not_default', rand_x(5), None, (1, 2), 'backward'), + ('test_norm_forward', rand_x(5), None, None, 'forward'), ('test_norm_ortho', rand_x(5), None, None, 'ortho')]) class TestIhfftn(unittest.TestCase): + def test_static_ihfftn(self): with stgraph(paddle.fft.ihfftn, self.place, self.x, self.n, self.axis, self.norm) as y: - np.testing.assert_allclose( - scipy.fft.ihfftn(self.x, self.n, self.axis, self.norm), - y, - rtol=RTOL.get(str(self.x.dtype)), - atol=ATOL.get(str(self.x.dtype))) + np.testing.assert_allclose(scipy.fft.ihfftn(self.x, self.n, + self.axis, self.norm), + y, + rtol=RTOL.get(str(self.x.dtype)), + atol=ATOL.get(str(self.x.dtype))) @place(DEVICES) -@parameterize( - (TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), - [('test_x_complex', rand_x( - 4, complex=True), None, None, 'backward', TypeError), - ('test_n_nagative', rand_x(4), -1, None, 'backward', - ValueError), ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), - ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', - ValueError), ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', - ValueError)]) +@parameterize((TEST_CASE_NAME, 'x', 'n', 'axis', 'norm', 'expect_exception'), [ + ('test_x_complex', rand_x(4, + complex=True), None, None, 'backward', TypeError), + ('test_n_nagative', rand_x(4), -1, None, 'backward', ValueError), + ('test_n_zero', rand_x(4), 0, None, 'backward', ValueError), + ('test_axis_out_of_range', rand_x(1), None, [0, 1], 'backward', ValueError), + ('test_norm_not_in_enum', rand_x(2), None, -1, 'random', ValueError) +]) class TestIhfftnException(unittest.TestCase): + def test_static_ihfftn(self): with self.assertRaises(self.expect_exception): with stgraph(paddle.fft.ihfftn, self.place, self.x, self.n, @@ -897,6 +880,7 @@ class TestIhfftnException(unittest.TestCase): np.random.randn(5, 5) + 1j * np.random.randn(5, 5), None, 'complex128'), ]) class TestFftShift(unittest.TestCase): + def test_fftshift(self): """Test fftshift with norm condition """ @@ -916,12 +900,13 @@ class TestFftShift(unittest.TestCase): @place(DEVICES) @parameterize( (TEST_CASE_NAME, 'x', 'axes'), - [('test_1d', np.random.randn(10), (0, ), - 'float64'), ('test_2d', np.random.randn(10, 10), (0, 1), 'float64'), + [('test_1d', np.random.randn(10), (0, ), 'float64'), + ('test_2d', np.random.randn(10, 10), (0, 1), 'float64'), ('test_2d_with_all_axes', np.random.randn(10, 10), None, 'float64'), ('test_2d_odd_with_all_axes', np.random.randn(5, 5) + 1j * np.random.randn(5, 5), None, 'complex128')]) class TestIfftShift(unittest.TestCase): + def test_ifftshift(self): """Test ifftshift with norm condition """ diff --git a/python/paddle/fluid/tests/unittests/fft/test_spectral_op.py b/python/paddle/fluid/tests/unittests/fft/test_spectral_op.py index a84092e36f6..ba409296592 100644 --- a/python/paddle/fluid/tests/unittests/fft/test_spectral_op.py +++ b/python/paddle/fluid/tests/unittests/fft/test_spectral_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,6 +26,7 @@ import paddle.fluid.dygraph as dg import paddle.static as static from numpy.random import random as rand from paddle.fluid import Program, program_guard + sys.path.append("../") from op_test import OpTest @@ -72,22 +73,22 @@ def class_name(cls, num, params_dict): return "{}_{}{}".format(cls.__name__, num, suffix and "_" + suffix) -@parameterize((TEST_CASE_NAME, 'x', 'axes', 'norm', 'forward'), [ - ('test_axes_is_sqe_type', (np.random.random( - (12, 14)) + 1j * np.random.random((12, 14))).astype(np.complex128), - [0, 1], 'forward', True), ('test_axis_not_last', (np.random.random( - (4, 4, 4)) + 1j * np.random.random((4, 4, 4))).astype(np.complex128), - (0, 1), "backward", False), - ('test_norm_forward', (np.random.random((12, 14)) + 1j * np.random.random( - (12, 14))).astype(np.complex128), (0, ), "forward", - False), ('test_norm_backward', (np.random.random( - (12, 14)) + 1j * np.random.random((12, 14))).astype(np.complex128), - (0, ), "backward", True), ('test_norm_ortho', (np.random.random( - (12, 14)) + 1j * np.random.random( - (12, 14))).astype(np.complex128), (1, ), "ortho", True) -]) +@parameterize( + (TEST_CASE_NAME, 'x', 'axes', 'norm', 'forward'), + [('test_axes_is_sqe_type', (np.random.random( + (12, 14)) + 1j * np.random.random( + (12, 14))).astype(np.complex128), [0, 1], 'forward', True), + ('test_axis_not_last', (np.random.random( + (4, 4, 4)) + 1j * np.random.random( + (4, 4, 4))).astype(np.complex128), (0, 1), "backward", False), + ('test_norm_forward', (np.random.random((12, 14)) + 1j * np.random.random( + (12, 14))).astype(np.complex128), (0, ), "forward", False), + ('test_norm_backward', (np.random.random((12, 14)) + 1j * np.random.random( + (12, 14))).astype(np.complex128), (0, ), "backward", True), + ('test_norm_ortho', (np.random.random((12, 14)) + 1j * np.random.random( + (12, 14))).astype(np.complex128), (1, ), "ortho", True)]) class TestFFTC2COp(OpTest): - # Because framwork not support complex numerial gradient, we skip gradient check. + # Because framwork not support complex numerial gradient, we skip gradient check. no_need_check_grad = True def setUp(self): @@ -110,19 +111,19 @@ class TestFFTC2COp(OpTest): @parameterize( (TEST_CASE_NAME, 'x', 'axes', 'norm', 'forward', 'last_dim_size'), [('test_axes_is_sqe_type', (np.random.random( - (12, 14)) + 1j * np.random.random((12, 14))).astype(np.complex128), - [0, 1], 'forward', True, 26), ('test_axis_not_last', (np.random.random( - (4, 4, 4)) + 1j * np.random.random((4, 4, 4))).astype(np.complex128), - (0, 1), "backward", False, None), + (12, 14)) + 1j * np.random.random( + (12, 14))).astype(np.complex128), [0, 1], 'forward', True, 26), + ('test_axis_not_last', (np.random.random( + (4, 4, 4)) + 1j * np.random.random((4, 4, 4))).astype(np.complex128), + (0, 1), "backward", False, None), ('test_norm_forward', (np.random.random((12, 14)) + 1j * np.random.random( (12, 14))).astype(np.complex128), (0, ), "forward", False, 22), ('test_norm_backward', (np.random.random((12, 14)) + 1j * np.random.random( - (12, 14))).astype(np.complex128), (0, ), "backward", True, - 22), ('test_norm_ortho', (np.random.random( - (12, 14)) + 1j * np.random.random((12, 14))).astype(np.complex128), - (1, ), "ortho", True, 26)]) + (12, 14))).astype(np.complex128), (0, ), "backward", True, 22), + ('test_norm_ortho', (np.random.random((12, 14)) + 1j * np.random.random( + (12, 14))).astype(np.complex128), (1, ), "ortho", True, 26)]) class TestFFTC2ROp(OpTest): - # Because framwork not support complex numerial gradient, we skip gradient check. + # Because framwork not support complex numerial gradient, we skip gradient check. no_need_check_grad = True def setUp(self): @@ -147,17 +148,17 @@ class TestFFTC2ROp(OpTest): @parameterize( (TEST_CASE_NAME, 'x', 'axes', 'norm', 'forward', 'onesided'), [('test_axes_is_sqe_type', np.random.randn(12, 14).astype(np.float64), - (0, 1), 'forward', True, - True), ('test_axis_not_last', np.random.randn(4, 4, 4).astype(np.float64), - (0, 1), "backward", False, True), - ('test_norm_forward', np.random.randn(12, 14).astype(np.float64), (0, 1), - "forward", False, False), - ('test_norm_backward', np.random.randn(12, 14).astype(np.float64), (0, ), - "backward", True, False), ('test_norm_ortho', - np.random.randn(12, 14).astype(np.float64), - (1, ), "ortho", True, False)]) + (0, 1), 'forward', True, True), + ('test_axis_not_last', np.random.randn(4, 4, 4).astype(np.float64), + (0, 1), "backward", False, True), + ('test_norm_forward', np.random.randn(12, 14).astype(np.float64), + (0, 1), "forward", False, False), + ('test_norm_backward', np.random.randn(12, 14).astype(np.float64), + (0, ), "backward", True, False), + ('test_norm_ortho', np.random.randn(12, 14).astype(np.float64), + (1, ), "ortho", True, False)]) class TestFFTR2COp(OpTest): - # Because framwork not support complex numerial gradient, we skip gradient check. + # Because framwork not support complex numerial gradient, we skip gradient check. no_need_check_grad = True def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py index 2b4ae3d60dd..c6a39bd6d04 100644 --- a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py +++ b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py @@ -42,24 +42,21 @@ def net(batch_size=4, lr=0.01): dnn_input_dim, lr_input_dim = int(2), int(2) with fluid.device_guard("cpu"): - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="float32", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="float32", + lod_level=0, + append_batch_size=False) datas = [dnn_data, lr_data, label] @@ -73,8 +70,8 @@ def net(batch_size=4, lr=0.01): name="deep_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) - dnn_pool = fluid.layers.sequence_pool( - input=dnn_embedding, pool_type="sum") + dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, + pool_type="sum") dnn_out = dnn_pool # build lr model diff --git a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py index d3a396f6baf..fe79bae75f5 100755 --- a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py +++ b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py @@ -22,6 +22,7 @@ import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ @@ -37,8 +38,8 @@ class TestFleetMetaOptimizer(unittest.TestCase): main_prog_op_types = [op.type for op in main_prog_ops] startup_prog_op_types = [op.type for op in startup_prog_ops] - print("=== debug program and ops in func [{}] ===" - .format(inspect.stack()[1].function)) + print("=== debug program and ops in func [{}] ===".format( + inspect.stack()[1].function)) print(main_prog) print(main_prog_op_types) print(startup_prog) @@ -49,10 +50,12 @@ class TestFleetMetaOptimizer(unittest.TestCase): with fluid.unique_name.guard(): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, @@ -61,14 +64,15 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() return avg_cost, strategy def pp_net(self, main_prog, startup_prog, pp_degree=2): + def fc_block(input_x): fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') @@ -80,10 +84,12 @@ class TestFleetMetaOptimizer(unittest.TestCase): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) with fluid.device_guard("gpu:0"): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') for stage_idx in range(pp_degree): with fluid.device_guard("gpu:" + str(stage_idx)): @@ -93,8 +99,8 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[input_x], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -136,12 +142,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): regularization=regularization, grad_clip=grad_clip) elif name == 'adamw': - optimizer = paddle.optimizer.AdamW( - learning_rate=0.01, - weight_decay=0.01, - grad_clip=grad_clip) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = paddle.optimizer.AdamW(learning_rate=0.01, + weight_decay=0.01, + grad_clip=grad_clip) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(loss) def set_strategy(self, strategy, name): diff --git a/python/paddle/fluid/tests/unittests/gradient_checker.py b/python/paddle/fluid/tests/unittests/gradient_checker.py index defbffe8f20..be1fa92f088 100644 --- a/python/paddle/fluid/tests/unittests/gradient_checker.py +++ b/python/paddle/fluid/tests/unittests/gradient_checker.py @@ -97,8 +97,8 @@ def make_jacobian(x, y_size, np_dtype): return np.zeros((_product(x.shape), y_size), dtype=np_dtype) elif isinstance(x, Sequence): jacobians = list( - filter(lambda t: t is not None, (make_jacobian( - item, y_size, np_dtype) for item in x))) + filter(lambda t: t is not None, + (make_jacobian(item, y_size, np_dtype) for item in x))) return jacobians else: None @@ -186,8 +186,10 @@ def _compute_analytical_jacobian(program, x, y, place, scope): np_type = dtype_to_np_dtype(y.dtype) # create dy Variable in Program - dy = program.global_block().create_var( - name=dy_name, shape=y.shape, dtype=np_type, persistable=True) + dy = program.global_block().create_var(name=dy_name, + shape=y.shape, + dtype=np_type, + persistable=True) # append backward dx = fluid.gradients(y, x, dy) @@ -217,8 +219,8 @@ def _compute_analytical_jacobian(program, x, y, place, scope): if dx_res[j] is not None: jacobian[dx_idx][:, i] = dx_res[j].flatten() else: - jacobian[dx_idx][:, i] = np.zeros( - dx[dx_idx].shape, dtype=np_type).flatten() + jacobian[dx_idx][:, i] = np.zeros(dx[dx_idx].shape, + dtype=np_type).flatten() _set_item(dy_t, i, 0, np_type) @@ -313,8 +315,8 @@ def grad_check(x, analytical.append( _compute_analytical_jacobian(prog, clone_x, clone_y, place, scope)) - for i, (x_idx, - y_idx) in enumerate(product(*[range(len(x)), range(len(y))])): + for i, (x_idx, y_idx) in enumerate( + product(*[range(len(x)), range(len(y))])): a = analytical[y_idx][x_idx] n = numerical[x_idx][y_idx] if not np.allclose(a, n, rtol, atol): @@ -373,8 +375,10 @@ def double_grad_check(x, for yi in y: dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) - dy = program.global_block().create_var( - name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) + dy = program.global_block().create_var(name=dyi_name, + shape=yi.shape, + dtype=np_type, + persistable=True) dy.stop_gradient = False v = np.random.random(size=yi.shape).astype(np_type) set_var_in_scope(scope, place, dyi_name, v) @@ -398,7 +402,7 @@ def double_grad_check(x, grad_check(x, target_grads, x_init, place, program, eps, atol, rtol) -# TODO(jiabin): We currently support only triple grad check here, extend this to support +# TODO(jiabin): We currently support only triple grad check here, extend this to support # higher order differenciation later. @@ -452,8 +456,10 @@ def triple_grad_check(x, for yi in y: dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) - dy = program.global_block().create_var( - name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) + dy = program.global_block().create_var(name=dyi_name, + shape=yi.shape, + dtype=np_type, + persistable=True) dy.stop_gradient = False v = np.random.random(size=yi.shape).astype(np_type) set_var_in_scope(scope, place, dyi_name, v) @@ -475,11 +481,10 @@ def triple_grad_check(x, for dxi in target_grads: ddxi_name = _append_grad_suffix_(dxi.name) np_type = dtype_to_np_dtype(dxi.dtype) - ddx = program.global_block().create_var( - name=ddxi_name, - shape=dxi.shape, - dtype=np_type, - persistable=True) + ddx = program.global_block().create_var(name=ddxi_name, + shape=dxi.shape, + dtype=np_type, + persistable=True) ddx.stop_gradient = False v = np.random.random(size=dxi.shape).astype(np_type) set_var_in_scope(scope, place, ddxi_name, v) @@ -507,15 +512,14 @@ def triple_grad_check(x, x_init += x_grads_grads_init # x <=> [x, dout, ddx] - grad_check( - x=x, - y=filted_target_grads_grads, - x_init=x_init, - place=place, - program=program, - eps=eps, - atol=atol, - rtol=rtol) + grad_check(x=x, + y=filted_target_grads_grads, + x_init=x_init, + place=place, + program=program, + eps=eps, + atol=atol, + rtol=rtol) def get_static_double_grad(x, @@ -547,8 +551,10 @@ def get_static_double_grad(x, yi = y[i] dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) - dy = program.global_block().create_var( - name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) + dy = program.global_block().create_var(name=dyi_name, + shape=yi.shape, + dtype=np_type, + persistable=True) dy.stop_gradient = False set_var_in_scope(scope, place, dyi_name, dy_init[i]) y_grads.append(dy) @@ -599,8 +605,10 @@ def get_static_double_grad(x, np_type = dtype_to_np_dtype(yi.dtype) dy_name = _append_grad_suffix_(yi.name) # create dy Variable in Program - dy = program.global_block().create_var( - name=dy_name, shape=yi.shape, dtype=np_type, persistable=True) + dy = program.global_block().create_var(name=dy_name, + shape=yi.shape, + dtype=np_type, + persistable=True) # init dy tensor in scope value = np.ones(yi.shape, dtype=np_type) dy_t = set_var_in_scope(scope, place, dy_name, value) @@ -656,12 +664,11 @@ def get_eager_double_grad(func, dys.append(dy_tensor) # calculate first derivative outputs = func(inputs) - d_inputs = paddle.grad( - outputs=outputs, - inputs=inputs, - grad_outputs=dys, - create_graph=True, - allow_unused=True) + d_inputs = paddle.grad(outputs=outputs, + inputs=inputs, + grad_outputs=dys, + create_graph=True, + allow_unused=True) d_inputs = [d_input for d_input in d_inputs if d_input is not None] # calcluate second derivative @@ -678,12 +685,11 @@ def get_eager_double_grad(func, ddy.stop_gradient = False ddys.append(ddy) - dd_inputs = paddle.grad( - outputs=d_inputs, - inputs=inputs, - grad_outputs=ddys, - create_graph=create_graph, - allow_unused=True) + dd_inputs = paddle.grad(outputs=d_inputs, + inputs=inputs, + grad_outputs=ddys, + create_graph=create_graph, + allow_unused=True) if return_mid_result: return dd_inputs, inputs + ddys @@ -790,8 +796,10 @@ def get_static_triple_grad(x, yi = y[i] dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) - dy = program.global_block().create_var( - name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) + dy = program.global_block().create_var(name=dyi_name, + shape=yi.shape, + dtype=np_type, + persistable=True) dy.stop_gradient = False set_var_in_scope(scope, place, dyi_name, dy_init[i]) y_grads.append(dy) @@ -811,8 +819,12 @@ def get_static_triple_grad(x, value = np.ones(dxi.shape, dtype=np_type) x_grads_grads_init.append(value) - return get_static_double_grad( - x, y, x_init, dy_init=x_grads_grads_init, place=place, program=program) + return get_static_double_grad(x, + y, + x_init, + dy_init=x_grads_grads_init, + place=place, + program=program) def get_eager_triple_grad(func, @@ -832,8 +844,11 @@ def get_eager_triple_grad(func, Returns: A list of numpy array that stores second derivative result calulated by dygraph """ - dd_y, dd_x = get_eager_double_grad( - func, x_init, dy_init, place, return_mid_result=True) + dd_y, dd_x = get_eager_double_grad(func, + x_init, + dy_init, + place, + return_mid_result=True) # calcluate third derivative dddys = [] diff --git a/python/paddle/fluid/tests/unittests/hccl_tools.py b/python/paddle/fluid/tests/unittests/hccl_tools.py index e3628ee5a4e..ab35b36161b 100644 --- a/python/paddle/fluid/tests/unittests/hccl_tools.py +++ b/python/paddle/fluid/tests/unittests/hccl_tools.py @@ -1,13 +1,13 @@ # -*- coding:UTF-8 -*- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -56,17 +56,17 @@ def parse_args(): "--device_num", type=str, default="[0,8)", - help="The number of the Ascend accelerators used. please note that the Ascend accelerators" + help= + "The number of the Ascend accelerators used. please note that the Ascend accelerators" "used must be continuous, such [0,4) means to use four chips " "0,1,2,3; [0,1) means to use chip 0; The first four chips are" "a group, and the last four chips are a group. In addition to" "the [0,8) chips are allowed, other cross-group such as [3,6)" "are prohibited.") - parser.add_argument( - "--visible_devices", - type=str, - default="0,1,2,3,4,5,6,7", - help="will use the visible devices sequentially") + parser.add_argument("--visible_devices", + type=str, + default="0,1,2,3,4,5,6,7", + help="will use the visible devices sequentially") parser.add_argument("--server_ip", type=str, default="", help="server ip") args = parser.parse_args() return args @@ -121,8 +121,8 @@ def main(): pass else: raise ValueError( - "device num {} must be in the same group of [0,4] or [4,8] !". - format(args.device_num)) + "device num {} must be in the same group of [0,4] or [4,8] !" + .format(args.device_num)) device_num_list = list(range(first_num, last_num)) print("device_num_list:", device_num_list) @@ -162,8 +162,11 @@ def main(): # save hccn_table to file table_path = os.getcwd() - table_fn = os.path.join(table_path, 'hccl_{}p_{}_{}.json'.format( - len(device_num_list), "".join(map(str, device_num_list)), server_id)) + table_fn = os.path.join( + table_path, + 'hccl_{}p_{}_{}.json'.format(len(device_num_list), + "".join(map(str, + device_num_list)), server_id)) with open(table_fn, 'w') as table_fp: json.dump(hccn_table, table_fp, indent=4) sys.stdout.flush() diff --git a/python/paddle/fluid/tests/unittests/hdfs_test_utils.py b/python/paddle/fluid/tests/unittests/hdfs_test_utils.py index 69ccc7088b8..2dfa86f9766 100644 --- a/python/paddle/fluid/tests/unittests/hdfs_test_utils.py +++ b/python/paddle/fluid/tests/unittests/hdfs_test_utils.py @@ -25,6 +25,7 @@ java_home = os.environ["JAVA_HOME"] class FSTestBase(unittest.TestCase): + def _test_dirs(self, fs): dir_path = os.path.abspath("./test_dir") fs.delete(dir_path) @@ -220,11 +221,10 @@ class FSTestBase(unittest.TestCase): pass def _test_list_dir(self, fs): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=15 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + None, + time_out=15 * 1000, + sleep_inter=100) fs.ls_dir("test_not_exists") def _test_touch(self, fs): diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_communicate_group.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_communicate_group.py index 53d0f95a236..f290705c312 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_communicate_group.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_communicate_group.py @@ -19,6 +19,7 @@ from paddle.distributed import fleet class TestNewGroupAPI(object): + def __init__(self): paddle.distributed.init_parallel_env() topo = fleet.CommunicateTopology(["data", "model", "sharding", "pipe"], @@ -49,27 +50,30 @@ class TestNewGroupAPI(object): tmp = np.array([0, 0, 0]) result = paddle.to_tensor(tmp) - paddle.distributed.scatter( - result, [self.tensor2, self.tensor1], - src=dp_src_rank, - group=dp_gp, - use_calc_stream=True) + paddle.distributed.scatter(result, [self.tensor2, self.tensor1], + src=dp_src_rank, + group=dp_gp, + use_calc_stream=True) if dp_rank == 0: assert np.array_equal(result, self.tensor2) elif dp_rank == 1: assert np.array_equal(result, self.tensor1) print("test scatter api ok") - paddle.distributed.broadcast( - result, src=1, group=dp_gp, use_calc_stream=True) + paddle.distributed.broadcast(result, + src=1, + group=dp_gp, + use_calc_stream=True) assert np.array_equal(result, self.tensor1) print("test broadcast api ok") - paddle.distributed.reduce( - result, dst=dp_src_rank, group=dp_gp, use_calc_stream=True) + paddle.distributed.reduce(result, + dst=dp_src_rank, + group=dp_gp, + use_calc_stream=True) if dp_rank == 0: - assert np.array_equal(result, - paddle.add(self.tensor1, self.tensor1)) + assert np.array_equal(result, paddle.add(self.tensor1, + self.tensor1)) elif dp_rank == 1: assert np.array_equal(result, self.tensor1) print("test reduce api ok") @@ -85,8 +89,10 @@ class TestNewGroupAPI(object): print("test wait api ok") result = [] - paddle.distributed.all_gather( - result, self.tensor1, group=dp_gp, use_calc_stream=True) + paddle.distributed.all_gather(result, + self.tensor1, + group=dp_gp, + use_calc_stream=True) assert np.array_equal(result[0], self.tensor1) assert np.array_equal(result[1], self.tensor1) print("test all_gather api ok") diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_inference_helper.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_inference_helper.py index 949d537586f..830b8ccecbe 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_inference_helper.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_inference_helper.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ import paddle.fluid as fluid import paddle.distributed.fleet as fleet from paddle import framework from paddle.distributed.fleet.utils.hybrid_parallel_inference import HybridParallelInferenceHelper + paddle.enable_static() @@ -46,6 +47,7 @@ def numpy_while(x, w1=1.0, w2=2.0, max_len=2): class TestHybridParallelInferenceHelperClass(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() fleet.init(is_collective=True, strategy=strategy) @@ -64,31 +66,29 @@ class TestHybridParallelInferenceHelperClass(unittest.TestCase): with paddle.static.program_guard(main_program, startup_program): with paddle.fluid.device_guard(f'{device}:0'): - X = paddle.static.data( - name='X', shape=[None, 2], dtype='float32') + X = paddle.static.data(name='X', + shape=[None, 2], + dtype='float32') with paddle.fluid.device_guard(f'{device}:all'): - max_len = layers.fill_constant( - shape=[1], - dtype="int64", - value=2, - force_cpu=False, - name="n") - step_idx = layers.fill_constant( - shape=[1], - dtype="int64", - value=0, - force_cpu=False, - name="i") + max_len = layers.fill_constant(shape=[1], + dtype="int64", + value=2, + force_cpu=False, + name="n") + step_idx = layers.fill_constant(shape=[1], + dtype="int64", + value=0, + force_cpu=False, + name="i") data = layers.array_write(X, step_idx) - cond_int = layers.fill_constant( - shape=[1], - dtype="int64", - value=0, - force_cpu=False, - name="cond_int") + cond_int = layers.fill_constant(shape=[1], + dtype="int64", + value=0, + force_cpu=False, + name="cond_int") cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond, is_test=True) @@ -101,21 +101,19 @@ class TestHybridParallelInferenceHelperClass(unittest.TestCase): with paddle.fluid.device_guard(f'{device}:0'): param_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(1.0)) - weight1 = paddle.static.create_parameter( - shape=[2, 5], - dtype='float32', - attr=param_attr, - is_bias=False) + weight1 = paddle.static.create_parameter(shape=[2, 5], + dtype='float32', + attr=param_attr, + is_bias=False) hidden1 = paddle.matmul(input, weight1) with paddle.fluid.device_guard(f'{device}:1'): param_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(2.0)) - weight2 = paddle.static.create_parameter( - shape=[5, 2], - dtype='float32', - attr=param_attr, - is_bias=False) + weight2 = paddle.static.create_parameter(shape=[5, 2], + dtype='float32', + attr=param_attr, + is_bias=False) hidden2 = paddle.matmul(hidden1, weight2) layers.array_write(hidden2, i=step_idx, array=data) @@ -142,16 +140,17 @@ class TestHybridParallelInferenceHelperClass(unittest.TestCase): micro_batch_size=2, num_mp=1, num_pp=2, - init_comm=nranks > 1, ) - helper.gen_infer_program( - ['array_write_0.out'], ['cond_int.tmp_0'], debug=True) + init_comm=nranks > 1, + ) + helper.gen_infer_program(['array_write_0.out'], ['cond_int.tmp_0'], + debug=True) exe = paddle.static.Executor(paddle.CUDAPlace(dev_id)) exe.run(startup_program) for step in range(2): - init_data = np.random.uniform( - low=0.0, high=1.0, size=[2, 2]).astype('float32') + init_data = np.random.uniform(low=0.0, high=1.0, + size=[2, 2]).astype('float32') [res] = exe.run(main_program, feed={"X": init_data}, fetch_list=[out]) diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_amp.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_amp.py index 4c966585d5f..e36bc5a2211 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_amp.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_amp.py @@ -23,16 +23,21 @@ import unittest class TestMPClipGrad(TestDistMPTraning): + def build_optimizer(self, model): grad_clip = paddle.nn.ClipGradByGlobalNorm(2.0) - scheduler = paddle.optimizer.lr.ExponentialDecay( - learning_rate=0.001, gamma=0.999, verbose=True) + scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.001, + gamma=0.999, + verbose=True) optimizer = paddle.optimizer.SGD(scheduler, grad_clip=grad_clip, parameters=[{ - 'params': model.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1 + 'params': + model.parameters(), + 'weight_decay': + 0.001, + 'learning_rate': + 0.1 }]) return optimizer diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_clip_grad.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_clip_grad.py index ad95aceaa2c..a3fb8774d78 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_clip_grad.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_clip_grad.py @@ -26,10 +26,12 @@ import logging class TestMPClipGrad(TestDistMPTraning): + def build_optimizer(self, model): grad_clip = paddle.nn.ClipGradByGlobalNorm(2.0) - scheduler = paddle.optimizer.lr.ExponentialDecay( - learning_rate=0.001, gamma=0.999, verbose=True) + scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.001, + gamma=0.999, + verbose=True) optimizer = paddle.optimizer.SGD(scheduler, grad_clip=grad_clip, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_fp16.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_fp16.py index 3e5eedbec9a..449a1f18f78 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_fp16.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_fp16.py @@ -23,19 +23,20 @@ import unittest class TestMPFP16(TestDistMPTraning): + def build_optimizer(self, model): grad_clip = paddle.nn.ClipGradByGlobalNorm(1.0) - scheduler = paddle.optimizer.lr.ExponentialDecay( - learning_rate=0.001, gamma=0.999, verbose=True) + scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.001, + gamma=0.999, + verbose=True) optimizer = paddle.optimizer.SGD(scheduler, grad_clip=grad_clip, parameters=model.parameters()) - model, optimizer = paddle.amp.decorate( - models=model, - optimizers=optimizer, - level='O2', - save_dtype='float32') + model, optimizer = paddle.amp.decorate(models=model, + optimizers=optimizer, + level='O2', + save_dtype='float32') return optimizer diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py index 9ae9c14db3f..e9567ae80c0 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py @@ -35,6 +35,7 @@ def set_random_seed(seed): class ColumnLinearNet(fluid.dygraph.Layer): + def __init__(self, input_size, output_size, global_dtype): super(ColumnLinearNet, self).__init__() self.parallel_linear = fleet.meta_parallel.ColumnParallelLinear( @@ -51,6 +52,7 @@ class ColumnLinearNet(fluid.dygraph.Layer): class RowLinearNet(fluid.dygraph.Layer): + def __init__(self, input_size, output_size): super(RowLinearNet, self).__init__() self.parallel_linear = fleet.meta_parallel.RowParallelLinear( @@ -66,10 +68,11 @@ class RowLinearNet(fluid.dygraph.Layer): class EmbeddingNet(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size): super(EmbeddingNet, self).__init__() - self.embedding = fleet.meta_parallel.VocabParallelEmbedding(vocab_size, - hidden_size) + self.embedding = fleet.meta_parallel.VocabParallelEmbedding( + vocab_size, hidden_size) def forward(self, x): output = self.embedding(x) @@ -77,6 +80,7 @@ class EmbeddingNet(fluid.dygraph.Layer): class SimpleMatmul(fluid.dygraph.Layer): + def __init__(self, weight, output_size, global_dtype): super(SimpleMatmul, self).__init__() self.weight = paddle.create_parameter( @@ -96,6 +100,7 @@ class SimpleMatmul(fluid.dygraph.Layer): class SimpleEmbedding(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size, weight): super(SimpleEmbedding, self).__init__() self.embedding = paddle.nn.Embedding( @@ -111,6 +116,7 @@ class SimpleEmbedding(fluid.dygraph.Layer): class TestDistTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 2 @@ -211,8 +217,9 @@ class TestDistTraning(unittest.TestCase): optimizer_a.step() optimizer_b.step() - np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=5e-6) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=5e-6) def test_parallel_embedding(self): batch_size = 17 @@ -301,8 +308,9 @@ class TestDistTraning(unittest.TestCase): check_group = dist.new_group(list(range(self.model_parallel_size))) integral_data = [] partial_data = data.clone().detach() - paddle.distributed.all_gather( - integral_data, partial_data, group=check_group) + paddle.distributed.all_gather(integral_data, + partial_data, + group=check_group) integral_data = paddle.concat(integral_data, axis=-1) integral_data = integral_data.detach().clone() integral_data.stop_gradient = False @@ -311,20 +319,23 @@ class TestDistTraning(unittest.TestCase): loss_b = model_b(integral_data, label).sum() / batch_size print("loss_a: ", loss_a.numpy(), "loss_b: ", loss_b.numpy()) - np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=1e-6) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=1e-6) loss_a.backward() loss_b.backward() integral_grad = [] partial_grad = data.grad.clone().detach() - paddle.distributed.all_gather( - integral_grad, partial_grad, group=check_group) + paddle.distributed.all_gather(integral_grad, + partial_grad, + group=check_group) integral_grad = paddle.concat(integral_grad, axis=-1) - np.testing.assert_allclose( - integral_data.grad.numpy(), integral_grad.numpy(), rtol=1e-6) + np.testing.assert_allclose(integral_data.grad.numpy(), + integral_grad.numpy(), + rtol=1e-6) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_model.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_model.py index f9ec49d8817..2e8acc7f6d0 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_model.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_model.py @@ -63,6 +63,7 @@ def parallel_matmul(lm_output, logit_weights, parallel_output): class SimpleMPNet(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size, inner_size, output_size, np_fc1, np_fc2, mp_id): super(SimpleMPNet, self).__init__() @@ -113,6 +114,7 @@ class SimpleMPNet(fluid.dygraph.Layer): class SimpleDPNet(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size, inner_size, output_size, np_fc1, np_fc2): @@ -156,6 +158,7 @@ class SimpleDPNet(fluid.dygraph.Layer): class TestDistMPTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 2 @@ -210,13 +213,15 @@ class TestDistMPTraning(unittest.TestCase): for _ in range(5): np_data = np.random.randint(0, vocab_size, ( batch_size, - seq_length, )) + seq_length, + )) batch = paddle.to_tensor(np_data) loss_a = self.train_batch(batch, model_a, optimizer_a, True) loss_b = self.train_batch(batch, model_b, optimizer_b, False) - np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=1e-6) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=1e-6) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_random.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_random.py index 59d24066946..32e9fc70898 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_random.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_mp_random.py @@ -26,6 +26,7 @@ import random class TestDistTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 2 diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_alexnet.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_alexnet.py index 71e873b0e2f..62747b5e70a 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_alexnet.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_alexnet.py @@ -37,6 +37,7 @@ micro_batch_size = 2 class TestDistPPTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -54,8 +55,9 @@ class TestDistPPTraning(unittest.TestCase): fleet.init(is_collective=True, strategy=strategy) def build_optimizer(self, model): - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer = paddle.optimizer.SGD(learning_rate=scheduler, parameters=model.parameters()) return scheduler, optimizer @@ -88,14 +90,15 @@ class TestDistPPTraning(unittest.TestCase): param.set_value(parameters[idx + pp_id * (param_len // 2)]) # construct reader - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size, + drop_last=True) for step_id, data in enumerate(train_reader()): x_data = np.array([x[0] for x in data]).astype('float32').reshape( batch_size, 1, 28, 28) - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - batch_size, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(batch_size, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) img.stop_gradient = True @@ -113,8 +116,9 @@ class TestDistPPTraning(unittest.TestCase): loss_b = model_b.train_batch([img, label], optimizer_b, scheduler_b) print("loss: ", loss_a.numpy(), loss_b.numpy()) - np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=5e-5) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=5e-5) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_amp.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_amp.py index 608bdd7a35d..824dd234b70 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_amp.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_amp.py @@ -37,6 +37,7 @@ micro_batch_size = 2 class TestDistPPTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -65,8 +66,9 @@ class TestDistPPTraning(unittest.TestCase): #construct model a model_a = AlexNet(10) - scheduler_a = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler_a = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer_a = paddle.optimizer.SGD(learning_rate=scheduler_a, grad_clip=grad_clip, parameters=model_a.parameters()) @@ -80,8 +82,9 @@ class TestDistPPTraning(unittest.TestCase): # construct model b model_b = AlexNetPipeDesc(num_stages=self.pipeline_parallel_size) - scheduler_b = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler_b = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer_b = paddle.optimizer.SGD(learning_rate=scheduler_b, grad_clip=grad_clip, parameters=model_b.parameters()) @@ -94,14 +97,15 @@ class TestDistPPTraning(unittest.TestCase): param.set_value(parameters[idx + pp_id * (param_len // 2)]) # construct reader - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size, + drop_last=True) for step_id, data in enumerate(train_reader()): x_data = np.array([x[0] for x in data]).astype('float32').reshape( batch_size, 1, 28, 28) - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - batch_size, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(batch_size, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) img.stop_gradient = True @@ -119,12 +123,15 @@ class TestDistPPTraning(unittest.TestCase): scheduler_a.step() with paddle.amp.auto_cast(): - loss_b = model_b.train_batch( - [img, label], optimizer_b, scheduler_b, scaler=scaler_b) + loss_b = model_b.train_batch([img, label], + optimizer_b, + scheduler_b, + scaler=scaler_b) print("loss: ", loss_a.numpy(), loss_b.numpy()) - np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=5e-5) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=5e-5) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_clip_grad.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_clip_grad.py index 430c6e08848..38d2bfabef7 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_clip_grad.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_clip_grad.py @@ -21,10 +21,12 @@ from hybrid_parallel_pp_alexnet import TestDistPPTraning class TestPPClipGrad(TestDistPPTraning): + def build_optimizer(self, model): grad_clip = paddle.nn.ClipGradByGlobalNorm(0.5) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer = paddle.optimizer.SGD(learning_rate=scheduler, grad_clip=grad_clip, parameters=model.parameters()) @@ -32,16 +34,18 @@ class TestPPClipGrad(TestDistPPTraning): class TestPPClipGradParamGroup(TestDistPPTraning): + def build_optimizer(self, model): grad_clip = paddle.nn.ClipGradByGlobalNorm(0.5) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) - optimizer = paddle.optimizer.Momentum( - learning_rate=scheduler, - grad_clip=grad_clip, - parameters=[{ - "params": model.parameters() - }]) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) + optimizer = paddle.optimizer.Momentum(learning_rate=scheduler, + grad_clip=grad_clip, + parameters=[{ + "params": + model.parameters() + }]) return scheduler, optimizer diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_embedding.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_embedding.py index d2be0cb8072..9c077cb70fd 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_embedding.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_embedding.py @@ -43,26 +43,29 @@ hidden_size = 8 class SimpleNet(Layer): + def __init__(self): super(SimpleNet, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) self.softmax_weight = self.create_parameter( shape=[hidden_size, vocab_size]) - self.softmax_bias = self.create_parameter( - shape=[vocab_size], is_bias=False) + self.softmax_bias = self.create_parameter(shape=[vocab_size], + is_bias=False) def forward(self, x1, x2, y1): x_emb = self.word_embeddings(x1) fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = fluid.layers.reshape(fc, shape=[-1, vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=y1, soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=y1, + soft_label=False) return loss.mean() class EmbeddingNet(Layer): + def __init__(self): super(EmbeddingNet, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) @@ -74,6 +77,7 @@ class EmbeddingNet(Layer): class MatmulNet(Layer): + def __init__(self): super(MatmulNet, self).__init__() self.softmax_weight = self.create_parameter( @@ -87,6 +91,7 @@ class MatmulNet(Layer): class BiasNet(Layer): + def __init__(self): super(BiasNet, self).__init__() self.softmax_bias = self.create_parameter(shape=[vocab_size]) @@ -99,17 +104,20 @@ class BiasNet(Layer): class LossNet(Layer): + def __init__(self): super(LossNet, self).__init__() def forward(self, args, y1): projection, x2 = args - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=y1[0], soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=y1[0], + soft_label=False) return loss.mean() class SimpleNetPipe(Layer): + def __init__(self): super(SimpleNetPipe, self).__init__() self.features = Sequential(EmbeddingNet(), MatmulNet(), BiasNet()) @@ -120,6 +128,7 @@ class SimpleNetPipe(Layer): class TestDistEmbeddingTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -152,10 +161,9 @@ class TestDistEmbeddingTraning(unittest.TestCase): parameters=model_a.parameters()) init_net = SimpleNetPipe() - model_b = PipelineLayer( - layers=init_net.to_layers(), - num_stages=self.pipeline_parallel_size, - loss_fn=LossNet()) + model_b = PipelineLayer(layers=init_net.to_layers(), + num_stages=self.pipeline_parallel_size, + loss_fn=LossNet()) scheduler_b = paddle.optimizer.lr.PiecewiseDecay( boundaries=[2, 3, 4], values=[0.01, 0.02, 0.03, 0.04], verbose=True) diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_fp16.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_fp16.py index 4893960345e..5ad1573d2c1 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_fp16.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_fp16.py @@ -37,6 +37,7 @@ micro_batch_size = 2 class TestDistPPTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -65,8 +66,9 @@ class TestDistPPTraning(unittest.TestCase): #construct model a model_a = AlexNet(10) - scheduler_a = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler_a = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer_a = paddle.optimizer.SGD(learning_rate=scheduler_a, grad_clip=grad_clip, parameters=model_a.parameters()) @@ -75,8 +77,9 @@ class TestDistPPTraning(unittest.TestCase): # construct model b model_b = AlexNetPipeDesc(num_stages=self.pipeline_parallel_size) - scheduler_b = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler_b = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer_b = paddle.optimizer.SGD(learning_rate=scheduler_b, grad_clip=grad_clip, parameters=model_b.parameters()) @@ -89,16 +92,14 @@ class TestDistPPTraning(unittest.TestCase): for idx, param in enumerate(model_b.parameters()): param.set_value(parameters[idx + pp_id * (param_len // 2)]) - model_a, optimizer_a = paddle.amp.decorate( - models=model_a, - optimizers=optimizer_a, - level='O2', - save_dtype='float32') - model_b, optimizer_b = paddle.amp.decorate( - models=model_b, - optimizers=optimizer_b, - level='O2', - save_dtype='float32') + model_a, optimizer_a = paddle.amp.decorate(models=model_a, + optimizers=optimizer_a, + level='O2', + save_dtype='float32') + model_b, optimizer_b = paddle.amp.decorate(models=model_b, + optimizers=optimizer_b, + level='O2', + save_dtype='float32') model_b = fleet.distributed_model(model_b) optimizer_b = fleet.distributed_optimizer(optimizer_b) @@ -106,14 +107,15 @@ class TestDistPPTraning(unittest.TestCase): scaler_b = fleet.distributed_scaler(scaler_b) # construct reader - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size, + drop_last=True) for step_id, data in enumerate(train_reader()): x_data = np.array([x[0] for x in data]).astype('float32').reshape( batch_size, 1, 28, 28) - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - batch_size, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(batch_size, 1) img = paddle.to_tensor(x_data) label = paddle.to_tensor(y_data) img.stop_gradient = True @@ -130,12 +132,15 @@ class TestDistPPTraning(unittest.TestCase): scheduler_a.step() with paddle.amp.auto_cast(enable=True, level='O2'): - loss_b = model_b.train_batch( - [img, label], optimizer_b, scheduler_b, scaler=scaler_b) + loss_b = model_b.train_batch([img, label], + optimizer_b, + scheduler_b, + scaler=scaler_b) print("loss: ", loss_a.numpy(), loss_b.numpy()) - np.testing.assert_allclose( - loss_a.numpy(), loss_b.numpy(), rtol=5e-3) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=5e-3) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_layer.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_layer.py index b30df0e9a2f..c1609c975e6 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_layer.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_layer.py @@ -25,6 +25,7 @@ import paddle.nn.functional as F class ReshapeHelp(Layer): + def __init__(self, shape): super(ReshapeHelp, self).__init__() self.shape = shape @@ -34,30 +35,24 @@ class ReshapeHelp(Layer): class AlexNet(Layer): + def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = Sequential( - nn.Conv2D( - 1, 64, kernel_size=11, stride=4, padding=5), + nn.Conv2D(1, 64, kernel_size=11, stride=4, padding=5), nn.ReLU(), - nn.MaxPool2D( - kernel_size=2, stride=2), - nn.Conv2D( - 64, 192, kernel_size=5, padding=2), + nn.MaxPool2D(kernel_size=2, stride=2), + nn.Conv2D(64, 192, kernel_size=5, padding=2), nn.ReLU(), - nn.MaxPool2D( - kernel_size=2, stride=2), - nn.Conv2D( - 192, 384, kernel_size=3, padding=1), + nn.MaxPool2D(kernel_size=2, stride=2), + nn.Conv2D(192, 384, kernel_size=3, padding=1), nn.ReLU(), - nn.Conv2D( - 384, 256, kernel_size=3, padding=1), + nn.Conv2D(384, 256, kernel_size=3, padding=1), nn.ReLU(), - nn.Conv2D( - 256, 256, kernel_size=3, padding=1), + nn.Conv2D(256, 256, kernel_size=3, padding=1), nn.ReLU(), - nn.MaxPool2D( - kernel_size=2, stride=2), ) + nn.MaxPool2D(kernel_size=2, stride=2), + ) self.reshape_layer = ReshapeHelp(shape=[-1, 256]) self.classifier = nn.Linear(256, num_classes) @@ -71,6 +66,7 @@ class AlexNet(Layer): class AlexNetPipe(AlexNet): + def to_layers(self): feat = [self.features[i] for i in range(len(self.features))] loss_fn = [self.reshape_layer, self.classifier] @@ -79,39 +75,33 @@ class AlexNetPipe(AlexNet): class AlexNetPipeDesc(PipelineLayer): + def __init__(self, num_classes=10, **kwargs): self.num_classes = num_classes decs = [ - LayerDesc( - nn.Conv2D, 1, 64, kernel_size=11, stride=4, padding=5), + LayerDesc(nn.Conv2D, 1, 64, kernel_size=11, stride=4, padding=5), LayerDesc(nn.ReLU), - LayerDesc( - nn.MaxPool2D, kernel_size=2, stride=2), - LayerDesc( - nn.Conv2D, 64, 192, kernel_size=5, padding=2), + LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2), + LayerDesc(nn.Conv2D, 64, 192, kernel_size=5, padding=2), F.relu, - LayerDesc( - nn.MaxPool2D, kernel_size=2, stride=2), - LayerDesc( - nn.Conv2D, 192, 384, kernel_size=3, padding=1), + LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2), + LayerDesc(nn.Conv2D, 192, 384, kernel_size=3, padding=1), F.relu, - LayerDesc( - nn.Conv2D, 384, 256, kernel_size=3, padding=1), + LayerDesc(nn.Conv2D, 384, 256, kernel_size=3, padding=1), F.relu, - LayerDesc( - nn.Conv2D, 256, 256, kernel_size=3, padding=1), + LayerDesc(nn.Conv2D, 256, 256, kernel_size=3, padding=1), F.relu, - LayerDesc( - nn.MaxPool2D, kernel_size=2, stride=2), - LayerDesc( - ReshapeHelp, shape=[-1, 256]), + LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2), + LayerDesc(ReshapeHelp, shape=[-1, 256]), LayerDesc(nn.Linear, 256, self.num_classes), # classifier ] - super(AlexNetPipeDesc, self).__init__( - layers=decs, loss_fn=nn.CrossEntropyLoss(), **kwargs) + super(AlexNetPipeDesc, self).__init__(layers=decs, + loss_fn=nn.CrossEntropyLoss(), + **kwargs) class TestPipeLayerAPI(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.pipeline_parallel_size = 2 @@ -129,10 +119,9 @@ class TestPipeLayerAPI(unittest.TestCase): def test_pipelayer_sequential(self): init_net = AlexNetPipe() - pipe_model = PipelineLayer( - layers=init_net.to_layers(), - num_stages=self.pipeline_parallel_size, - loss_fn=nn.CrossEntropyLoss()) + pipe_model = PipelineLayer(layers=init_net.to_layers(), + num_stages=self.pipeline_parallel_size, + loss_fn=nn.CrossEntropyLoss()) stage_id = self.hcg.get_stage_id() init_parameters = init_net.parameters() pipe_parameters = pipe_model.parameters() diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_recompute.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_recompute.py index ebcac70a3b6..8e364290bae 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_recompute.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_recompute.py @@ -45,6 +45,7 @@ dim_feedforward = 4 * d_model class EmbeddingNet(Layer): + def __init__(self): super(EmbeddingNet, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) @@ -58,6 +59,7 @@ class EmbeddingNet(Layer): class TransformerNet(Layer): + def __init__(self): super(TransformerNet, self).__init__() self.linear1 = nn.Linear(d_model, dim_feedforward) @@ -87,17 +89,20 @@ class TransformerNet(Layer): class EmbeddingPipe(EmbeddingNet): + def forward(self, x): return super().forward(x) class TransformerNetPipe(TransformerNet): + def forward(self, x): output = super().forward(x) return output class CriterionPipe(Layer): + def __init__(self): super(CriterionPipe, self).__init__() @@ -107,6 +112,7 @@ class CriterionPipe(Layer): class ModelPipe(PipelineLayer): + def __init__(self, topology): self.descs = [] self.descs.append(LayerDesc(EmbeddingPipe)) @@ -114,17 +120,17 @@ class ModelPipe(PipelineLayer): for x in range(2): self.descs.append(LayerDesc(TransformerNetPipe)) - super().__init__( - layers=self.descs, - loss_fn=CriterionPipe(), - topology=topology, - seg_method="layer:TransformerNetPipe", - recompute_interval=1, - recompute_partition=False, - recompute_offload=False) + super().__init__(layers=self.descs, + loss_fn=CriterionPipe(), + topology=topology, + seg_method="layer:TransformerNetPipe", + recompute_interval=1, + recompute_partition=False, + recompute_offload=False) class TestDistPPTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -151,8 +157,9 @@ class TestDistPPTraning(unittest.TestCase): set_random_seed(1024, dp_id, rank_id) model = ModelPipe(topology) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer = paddle.optimizer.SGD(learning_rate=scheduler, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_save_load.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_save_load.py index e6e27bbb41a..8521ae8b35b 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_save_load.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_save_load.py @@ -33,6 +33,7 @@ vocab_size = 128 class TestDistPPSaveLoadTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -59,8 +60,9 @@ class TestDistPPSaveLoadTraning(unittest.TestCase): set_random_seed(1024, dp_id, rank_id) model = ModelPipe(topology) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer = paddle.optimizer.SGD(learning_rate=scheduler, parameters=model.parameters()) @@ -81,8 +83,9 @@ class TestDistPPSaveLoadTraning(unittest.TestCase): # construct data test_steps = 5 - np_data = np.random.randint( - 0, vocab_size, size=[test_steps, batch_size, length]) + np_data = np.random.randint(0, + vocab_size, + size=[test_steps, batch_size, length]) origin_loss = [] for step_id in range(5): diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_transformer.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_transformer.py index c4c1e565068..ffe4a063a9c 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_transformer.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_pp_transformer.py @@ -45,15 +45,15 @@ dim_feedforward = 4 * d_model class EmbeddingNet(Layer): + def __init__(self): super(EmbeddingNet, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) self.position_embeddings = nn.Embedding(vocab_size, hidden_size) def forward(self, x): - attention_mask = paddle.tensor.triu( - (paddle.ones( - (length, length), dtype="float32") * -1e9), 1) + attention_mask = paddle.tensor.triu((paddle.ones( + (length, length), dtype="float32") * -1e9), 1) no_used = paddle.ones((3, 3), dtype="int32") @@ -68,6 +68,7 @@ class EmbeddingNet(Layer): class TransformerNet(Layer): + def __init__(self): super(TransformerNet, self).__init__() self.linear1 = nn.Linear(d_model, dim_feedforward) @@ -98,11 +99,13 @@ class TransformerNet(Layer): class EmbeddingPipe(EmbeddingNet): + def forward(self, x): return super().forward(x) class TransformerNetPipe(TransformerNet): + def forward(self, args): x, mask, no_used, p_emb = args[0], args[1], args[2], args[3] @@ -113,6 +116,7 @@ class TransformerNetPipe(TransformerNet): class CriterionPipe(Layer): + def __init__(self): super(CriterionPipe, self).__init__() @@ -122,6 +126,7 @@ class CriterionPipe(Layer): class ModelPipe(PipelineLayer): + def __init__(self, topology): self.descs = [] self.descs.append(LayerDesc(EmbeddingPipe)) @@ -131,14 +136,14 @@ class ModelPipe(PipelineLayer): self.descs.append(lambda x: x[0]) - super().__init__( - layers=self.descs, - loss_fn=CriterionPipe(), - topology=topology, - seg_method="layer:TransformerNetPipe") + super().__init__(layers=self.descs, + loss_fn=CriterionPipe(), + topology=topology, + seg_method="layer:TransformerNetPipe") class TestDistPPTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 @@ -165,8 +170,9 @@ class TestDistPPTraning(unittest.TestCase): set_random_seed(1024, dp_id, rank_id) model = ModelPipe(topology) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=[2], values=[0.001, 0.002], verbose=True) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[2], + values=[0.001, 0.002], + verbose=True) optimizer = paddle.optimizer.SGD(learning_rate=scheduler, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_sharding_model.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_sharding_model.py index 8cb1166cd0d..63bdcbc4d87 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_sharding_model.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_sharding_model.py @@ -57,6 +57,7 @@ def parallel_matmul(lm_output, logit_weights, parallel_output): class SimpleMPNet(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size, inner_size, output_size, np_fc1, np_fc2, mp_id): super(SimpleMPNet, self).__init__() @@ -107,6 +108,7 @@ class SimpleMPNet(fluid.dygraph.Layer): class SimpleDPNet(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size, inner_size, output_size, np_fc1, np_fc2): @@ -150,6 +152,7 @@ class SimpleDPNet(fluid.dygraph.Layer): class TestDistMPTraning(unittest.TestCase): + def setUp(self): random.seed(2021) np.random.seed(2021) @@ -166,7 +169,8 @@ class TestDistMPTraning(unittest.TestCase): self.data = [ np.random.randint(0, vocab_size, ( batch_size, - seq_length, )) for _ in range(STEPS) + seq_length, + )) for _ in range(STEPS) ] def train_batch(self, batch, model, optimizer): @@ -228,21 +232,19 @@ class TestDistMPTraning(unittest.TestCase): model_a = SimpleDPNet(vocab_size, hidden_size, inner_size, output_size, np_fc1, np_fc2) - optimizer_a = self.build_optimizer( - model_a, - strategy=self.strategy, - is_sharding=True, - Optimizer=Optimizer) + optimizer_a = self.build_optimizer(model_a, + strategy=self.strategy, + is_sharding=True, + Optimizer=Optimizer) model_a = fleet.distributed_model(model_a) optimizer_a = fleet.distributed_optimizer(optimizer_a) model_b = SimpleDPNet(vocab_size, hidden_size, inner_size, output_size, np_fc1, np_fc2) - optimizer_b = self.build_optimizer( - model_b, - strategy=self.strategy, - is_sharding=False, - Optimizer=Optimizer) + optimizer_b = self.build_optimizer(model_b, + strategy=self.strategy, + is_sharding=False, + Optimizer=Optimizer) return model_a, optimizer_a, model_b, optimizer_b @@ -257,8 +259,8 @@ class TestDistMPTraning(unittest.TestCase): if idx == 2 and paddle.distributed.get_rank() == 0: self.assertTrue( - set(optimizer_a._inner_opt._inner_optimizer.state_dict() - .keys()) == sharded_accumulators) + set(optimizer_a._inner_opt._inner_optimizer.state_dict(). + keys()) == sharded_accumulators) if paddle.distributed.get_rank() == 0: batch_sharding = paddle.to_tensor(self.data[idx][:2]) @@ -270,10 +272,9 @@ class TestDistMPTraning(unittest.TestCase): loss_b = self.train_batch(batch_single, model_b, optimizer_b) for j in range(len(model_a.parameters())): - np.testing.assert_allclose( - model_a.parameters()[j].numpy(), - model_b.parameters()[j].numpy(), - rtol=1e-6) + np.testing.assert_allclose(model_a.parameters()[j].numpy(), + model_b.parameters()[j].numpy(), + rtol=1e-6) def test_sharding_adam(self): sharded_accumulators = set([ @@ -286,16 +287,16 @@ class TestDistMPTraning(unittest.TestCase): 'linear_0.w_0_beta2_pow_acc_0', 'linear_1.b_0_beta2_pow_acc_0', 'linear_2.b_0_beta2_pow_acc_0', 'embedding_0.w_0_beta2_pow_acc_0' ]) - self.sharding_model( - Optimizer="adam", sharded_accumulators=sharded_accumulators) + self.sharding_model(Optimizer="adam", + sharded_accumulators=sharded_accumulators) def test_sharding_momentum(self): sharded_accumulators = set([ 'linear_6.w_0_velocity_0', 'linear_7.b_0_velocity_0', 'linear_8.b_0_velocity_0', 'embedding_2.w_0_velocity_0' ]) - self.sharding_model( - Optimizer="Momentum", sharded_accumulators=sharded_accumulators) + self.sharding_model(Optimizer="Momentum", + sharded_accumulators=sharded_accumulators) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/hybrid_parallel_shared_weight.py b/python/paddle/fluid/tests/unittests/hybrid_parallel_shared_weight.py index 9253f737bf9..20bdb9f9d68 100644 --- a/python/paddle/fluid/tests/unittests/hybrid_parallel_shared_weight.py +++ b/python/paddle/fluid/tests/unittests/hybrid_parallel_shared_weight.py @@ -48,14 +48,15 @@ hidden_size = 16 class SimpleNet(Layer): + def __init__(self): super(SimpleNet, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) self.softmax_weight = self.create_parameter( shape=[hidden_size, vocab_size]) - self.softmax_bias = self.create_parameter( - shape=[vocab_size], is_bias=False) + self.softmax_bias = self.create_parameter(shape=[vocab_size], + is_bias=False) def forward(self, x1, x2, y1): x_emb = self.word_embeddings(x1) @@ -65,12 +66,14 @@ class SimpleNet(Layer): projection = paddle.matmul(projection, self.word_embeddings.weight) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=y1, soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=y1, + soft_label=False) return loss.mean() class EmbeddingPipe(Layer): + def __init__(self): super(EmbeddingPipe, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) @@ -86,6 +89,7 @@ class EmbeddingPipe(Layer): class MatmulNet(Layer): + def __init__(self): super(MatmulNet, self).__init__() self.softmax_weight = self.create_parameter( @@ -99,6 +103,7 @@ class MatmulNet(Layer): class BiasNet(Layer): + def __init__(self): super(BiasNet, self).__init__() self.softmax_bias = self.create_parameter(shape=[vocab_size]) @@ -111,22 +116,26 @@ class BiasNet(Layer): class LossNet(Layer): + def __init__(self): super(LossNet, self).__init__() def forward(self, args, y1): projection = args - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=y1[0], soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=y1[0], + soft_label=False) return loss.mean() class SimpleNetPipe(PipelineLayer): + def __init__(self, **kwargs): self.descs = [] self.descs.append( - SharedLayerDesc( - 'embed', EmbeddingPipe, shared_weight_attr='embedding_weight')) + SharedLayerDesc('embed', + EmbeddingPipe, + shared_weight_attr='embedding_weight')) self.descs.append(LayerDesc(MatmulNet)) self.descs.append(LayerDesc(BiasNet)) @@ -135,17 +144,18 @@ class SimpleNetPipe(PipelineLayer): return paddle.matmul(output[0], embedding.embedding_weight) self.descs.append( - SharedLayerDesc( - 'embed', - EmbeddingPipe, - forward_func=_logits_helper, - shared_weight_attr='embedding_weight')) + SharedLayerDesc('embed', + EmbeddingPipe, + forward_func=_logits_helper, + shared_weight_attr='embedding_weight')) - super(SimpleNetPipe, self).__init__( - layers=self.descs, loss_fn=LossNet(), **kwargs) + super(SimpleNetPipe, self).__init__(layers=self.descs, + loss_fn=LossNet(), + **kwargs) class TestDistEmbeddingTraning(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() self.model_parallel_size = 1 diff --git a/python/paddle/fluid/tests/unittests/init_process_group.py b/python/paddle/fluid/tests/unittests/init_process_group.py index 17887a9d767..c1131d101de 100644 --- a/python/paddle/fluid/tests/unittests/init_process_group.py +++ b/python/paddle/fluid/tests/unittests/init_process_group.py @@ -30,6 +30,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv class TestProcessGroupFp32(unittest.TestCase): + def setUp(self): self.config() diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_controlflow.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_controlflow.py index 7c1497a4853..eeddcaa5bb5 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_controlflow.py +++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_controlflow.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,9 +31,10 @@ paddle.enable_static() # and new executor twice and check the result. # please override the _get_feeds() and build_prgram() class TestCompatibility(unittest.TestCase): + def setUp(self): - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.iter_run = 4 def _get_feed(self): @@ -42,15 +43,20 @@ class TestCompatibility(unittest.TestCase): return None def build_program(self): + def true_func(): - return layers.fill_constant( - shape=[1, 2], dtype='int32', value=1), layers.fill_constant( - shape=[2, 3], dtype='bool', value=True) + return layers.fill_constant(shape=[1, 2], dtype='int32', + value=1), layers.fill_constant( + shape=[2, 3], + dtype='bool', + value=True) def false_func(): - return layers.fill_constant( - shape=[3, 4], dtype='float32', value=3), layers.fill_constant( - shape=[4, 5], dtype='int64', value=2) + return layers.fill_constant(shape=[3, 4], dtype='float32', + value=3), layers.fill_constant( + shape=[4, 5], + dtype='int64', + value=2) main_program = Program() startup_program = Program() @@ -101,12 +107,14 @@ class TestCompatibility(unittest.TestCase): class TestWhile(TestCompatibility): + def _get_feed(self): """ return the feeds """ return None def build_program(self): + def cond(i, ten): return i < ten @@ -117,10 +125,10 @@ class TestWhile(TestCompatibility): main_program = paddle.static.default_main_program() startup_program = paddle.static.default_startup_program() with paddle.static.program_guard(main_program, startup_program): - i = paddle.full( - shape=[1], fill_value=0, dtype='int64') # loop counter - ten = paddle.full( - shape=[1], fill_value=10, dtype='int64') # loop length + i = paddle.full(shape=[1], fill_value=0, + dtype='int64') # loop counter + ten = paddle.full(shape=[1], fill_value=10, + dtype='int64') # loop length i, ten = paddle.static.nn.while_loop(cond, body, [i, ten]) exe = paddle.static.Executor(paddle.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py index a4dad5f53f1..7faff7ec181 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py +++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,6 +13,7 @@ # limitations under the License. import os + os.environ['FLAGS_use_stream_safe_cuda_allocator'] = "true" import sys import shutil @@ -29,9 +30,10 @@ paddle.enable_static() class LinearTestCase(unittest.TestCase): + def setUp(self): - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.place = core.Place() self.place.set_place(place) @@ -48,33 +50,27 @@ class LinearTestCase(unittest.TestCase): def test_interp_base(self): startup_program, main_program, c = self.build_program() - standaloneexecutor = StandaloneExecutor( - self.place, startup_program.desc, main_program.desc, core.Scope()) - out = standaloneexecutor.run({ - "a": np.ones( - [2, 2], dtype="float32") * 2 - }, [c.name]) + standaloneexecutor = StandaloneExecutor(self.place, + startup_program.desc, + main_program.desc, core.Scope()) + out = standaloneexecutor.run( + {"a": np.ones([2, 2], dtype="float32") * 2}, [c.name]) for i in range(10): - out = standaloneexecutor.run({ - "a": np.ones( - [2, 2], dtype="float32") * i - }, [c.name]) + out = standaloneexecutor.run( + {"a": np.ones([2, 2], dtype="float32") * i}, [c.name]) for i in range(10): - out = standaloneexecutor.run({ - "a": np.ones( - [2, 2], dtype="float32") * i - }, ['a', c.name]) + out = standaloneexecutor.run( + {"a": np.ones([2, 2], dtype="float32") * i}, ['a', c.name]) def test_dry_run(self): startup_program, main_program, c = self.build_program() - standaloneexecutor = StandaloneExecutor( - self.place, startup_program.desc, main_program.desc, core.Scope()) + standaloneexecutor = StandaloneExecutor(self.place, + startup_program.desc, + main_program.desc, core.Scope()) # test for cost_info - cost_info = standaloneexecutor.dry_run({ - "a": np.ones( - [2, 2], dtype="float32") - }) + cost_info = standaloneexecutor.dry_run( + {"a": np.ones([2, 2], dtype="float32")}) self.check_cost_info(cost_info) def check_cost_info(self, cost_info): @@ -120,10 +116,11 @@ def build_program(): class ExecutorStatisticsTestCase(unittest.TestCase): + def setUp(self): self.iter_n = 3 - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() def test_standalone_executor_statistics(self): if os.getenv("FLAGS_static_executor_perfstat_filepath") is None: @@ -221,10 +218,11 @@ class ExecutorStatisticsTestCase(unittest.TestCase): class MultiStreamModelTestCase(unittest.TestCase): + def setUp(self): self.iter_n = 2 - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() def test_result(self): ground_truths = self.run_raw_executor() @@ -274,6 +272,7 @@ class MultiStreamModelTestCase(unittest.TestCase): class SwitchExecutorInterfaceTestCase(MultiStreamModelTestCase): + def run_new_executor(self): paddle.seed(2020) os.environ['FLAGS_USE_STANDALONE_EXECUTOR'] = '1' @@ -291,9 +290,10 @@ class SwitchExecutorInterfaceTestCase(MultiStreamModelTestCase): class SwitchExecutorInterfaceWithFeed(unittest.TestCase): + def setUp(self): - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.iter_run = 2 def build_program(self, is_double=False): @@ -325,8 +325,8 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase): if use_compiled: main_program = paddle.static.CompiledProgram( - main_program).with_data_parallel( - fetch_vars[0].name, places=[self.place]) + main_program).with_data_parallel(fetch_vars[0].name, + places=[self.place]) if use_str: # test for fetch name fetch_vars = [x.name for x in fetch_vars] @@ -342,11 +342,15 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase): def run_raw_executor(self, feed, use_compiled=False): # run construct program 1 - out1 = self._run( - feed, use_str=False, is_double=False, use_compiled=use_compiled) + out1 = self._run(feed, + use_str=False, + is_double=False, + use_compiled=use_compiled) # run construct program 2 with same executor - out2 = self._run( - feed, use_str=True, is_double=True, use_compiled=use_compiled) + out2 = self._run(feed, + use_str=True, + is_double=True, + use_compiled=use_compiled) return [out1, out2] @@ -396,6 +400,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase): class TestException(unittest.TestCase): + def setUp(self): self.place = paddle.CPUPlace() self.fetch_vars = None @@ -407,8 +412,10 @@ class TestException(unittest.TestCase): w = paddle.rand([10, 3]) ids = paddle.static.data(name="id", shape=[5], dtype='int64') data = paddle.static.data(name="data", shape=[3], dtype='float32') - emb = paddle.nn.functional.embedding( - x=ids, weight=w, sparse=False, name="embedding") + emb = paddle.nn.functional.embedding(x=ids, + weight=w, + sparse=False, + name="embedding") emb = emb + data return main_program, startup_program, emb @@ -470,6 +477,7 @@ class TestException(unittest.TestCase): class TestInplaceApiWithDataTransform(unittest.TestCase): + def test_increment(self): if paddle.fluid.core.is_compiled_with_cuda(): with paddle.fluid.device_guard("gpu:0"): diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_multiply_write.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_multiply_write.py index 5e298fc3dc7..8006c59d2ba 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_multiply_write.py +++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_multiply_write.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,6 +29,7 @@ paddle.enable_static() class TestMultiplyWrite(TestCompatibility): + def _get_feed(self): """ return the feeds """ diff --git a/python/paddle/fluid/tests/unittests/ipu/op_test_ipu.py b/python/paddle/fluid/tests/unittests/ipu/op_test_ipu.py index ad11083b677..5f2a0d59bb8 100644 --- a/python/paddle/fluid/tests/unittests/ipu/op_test_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/op_test_ipu.py @@ -55,6 +55,7 @@ class ExecutionMode(IntEnum): class IPUTest(unittest.TestCase): + @classmethod def setUpClass(cls): # Get random seeds @@ -87,6 +88,7 @@ class IPUTest(unittest.TestCase): # Decorator for static graph building def static_graph(builder): + def wrapper(self, *args, **kwargs): self.scope = paddle.static.Scope() self.main_prog = paddle.static.Program() @@ -116,6 +118,7 @@ class IPUTest(unittest.TestCase): class IPUOpTest(IPUTest): + @classmethod def setUpClass(cls): super().setUpClass() @@ -181,8 +184,9 @@ class IPUOpTest(IPUTest): ipu_strategy.set_precision_config(enable_fp16=True) IPUOpTest.cast_model_to_fp16(self.main_prog) program = paddle.static.IpuCompiledProgram( - self.main_prog, ipu_strategy=ipu_strategy).compile( - self.feed_list, self.fetch_list) + self.main_prog, + ipu_strategy=ipu_strategy).compile(self.feed_list, + self.fetch_list) else: program = self.main_prog @@ -214,8 +218,10 @@ class IPUOpTest(IPUTest): ipu_fp32 = output_dict[ExecutionMode.IPU_FP32] cpu_fp32 = np.asarray(cpu_fp32).astype(np.float32).flatten() ipu_fp32 = np.asarray(ipu_fp32).astype(np.float32).flatten() - pass_check = np.allclose( - ipu_fp32, cpu_fp32, rtol=self.rtol, atol=self.atol) + pass_check = np.allclose(ipu_fp32, + cpu_fp32, + rtol=self.rtol, + atol=self.atol) if not pass_check: max_atol = np.abs(ipu_fp32 - cpu_fp32).max() cpu_fp32_abs = np.abs(cpu_fp32) @@ -231,8 +237,10 @@ class IPUOpTest(IPUTest): if ExecutionMode.IPU_FP16 in output_dict.keys(): ipu_fp16 = output_dict[ExecutionMode.IPU_FP16] ipu_fp16 = np.asarray(ipu_fp16).astype(np.float32).flatten() - pass_check = np.allclose( - ipu_fp16, cpu_fp32, rtol=self.rtol_fp16, atol=self.atol_fp16) + pass_check = np.allclose(ipu_fp16, + cpu_fp32, + rtol=self.rtol_fp16, + atol=self.atol_fp16) if not pass_check: max_atol = np.abs(ipu_fp16 - cpu_fp32).max() cpu_fp32_abs = np.abs(cpu_fp32) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_activation_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_activation_x_op_ipu.py index b90c3374db9..19abf74a556 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_activation_x_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_activation_x_op_ipu.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestRelu(IPUOpTest): + def setUp(self): self.set_atol() self.set_test_op() @@ -46,8 +47,9 @@ class TestRelu(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = self.op(x, **self.op_attrs) self.fetch_list = [out.name] @@ -63,24 +65,28 @@ class TestRelu(IPUOpTest): class TestTanh(TestRelu): + def set_test_op(self): self.op = F.tanh self.op_attrs = {} class TestLog(TestRelu): + def set_test_op(self): self.op = paddle.fluid.layers.log self.op_attrs = {} class TestSigmoid(TestRelu): + def set_test_op(self): self.op = F.sigmoid self.op_attrs = {} class TestSqrt(TestRelu): + def set_test_op(self): self.op = paddle.fluid.layers.sqrt self.op_attrs = {} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_arg_max_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_arg_max_op_ipu.py index c48ce75ccd9..3612656cea3 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_arg_max_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_arg_max_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.argmax(x, **self.attrs) self.fetch_list = [out.name] @@ -64,6 +66,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"axis": 0} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py index 1239a97f2f6..3b2034ebe83 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -40,8 +41,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') x = paddle.assign(x) out = paddle.fluid.layers.elementwise_add(x, x) self.fetch_list = [out.name] @@ -58,6 +60,7 @@ class TestBase(IPUOpTest): class TestAssignFp32Value(TestBase): + def set_data_feed(self): data = np.random.uniform(size=[2, 3, 1]) self.feed_fp32 = {'in_0': data.astype(np.float32)} @@ -68,14 +71,16 @@ class TestAssignFp32Value(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') assign = paddle.assign(self.assign_fp32) out = paddle.fluid.layers.elementwise_add(x, assign) self.fetch_list = [out.name] class TestAssignBoolValue(TestBase): + def set_data_feed(self): data = np.random.uniform(size=[2, 3, 1]) self.feed_fp32 = {'in_0': data.astype(np.float32)} @@ -85,8 +90,9 @@ class TestAssignBoolValue(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') x = paddle.less_than(x, x) assign = paddle.assign(self.assign_bool) x = paddle.logical_and(x, assign) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_avg_shard_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_avg_shard_ipu.py index cf494034fd8..3f45bf485b8 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_avg_shard_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_avg_shard_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -46,16 +47,25 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - x = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) - x = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) - x = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) - x = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + x = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) + x = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) + x = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) + x = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) self.fetch_list = [x.name] def run_model(self, exec_mode): diff --git a/python/paddle/fluid/tests/unittests/ipu/test_batch_norm_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_batch_norm_op_ipu.py index adb2abfc474..2d2d3315439 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_batch_norm_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_batch_norm_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -53,10 +54,13 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - x = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + x = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) x = paddle.fluid.layers.batch_norm(x, **self.attrs) self.fetch_list = [x.name] @@ -72,6 +76,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_atol(self): self.atol = 1e-6 self.rtol = 1e-6 @@ -86,6 +91,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_atol(self): self.atol = 1e-6 self.rtol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_cast_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_cast_op_ipu.py index d7b15a44295..f361b779bb3 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_cast_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_cast_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -49,10 +50,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype=self.feed_dtype[0]) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype=self.feed_dtype[0]) out = paddle.cast(x, **self.attrs) self.fetch_list = [out.name] @@ -68,6 +68,7 @@ class TestBase(IPUOpTest): class TestEnableFp16(TestBase): + @property def fp16_enabled(self): return True @@ -86,6 +87,7 @@ class TestEnableFp16(TestBase): class TestCase2(TestBase): + def set_data_feed(self): self.feed_fp32 = { "x": np.random.uniform(size=[1, 3, 3, 3]).astype('float16'), @@ -97,6 +99,7 @@ class TestCase2(TestBase): class TestCase3(TestBase): + def set_data_feed(self): self.feed_fp32 = { "x": np.random.uniform(size=[1, 3, 3, 3]).astype('float32'), @@ -108,6 +111,7 @@ class TestCase3(TestBase): class TestCase4(TestBase): + def set_data_feed(self): self.feed_fp32 = { "x": np.random.uniform(size=[1, 3, 3, 3]).astype('int32'), @@ -119,6 +123,7 @@ class TestCase4(TestBase): class TestCase5(TestBase): + def set_data_feed(self): self.feed_fp32 = { "x": np.random.uniform(size=[1, 3, 3, 3]).astype('float16'), @@ -130,6 +135,7 @@ class TestCase5(TestBase): class TestCase6(TestBase): + def set_data_feed(self): self.feed_fp32 = { "x": np.random.uniform(size=[1, 3, 3, 3]).astype('int32'), @@ -142,6 +148,7 @@ class TestCase6(TestBase): @unittest.skip('float64 is not supported') class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['dtype'] = 'float64' @@ -149,6 +156,7 @@ class TestCase2(TestBase): @unittest.skip('skip float16 to float32') class TestCase3(TestBase): + def set_data_feed(self): self.feed_fp32 = { "x": np.random.uniform(size=[1, 3, 3, 3]).astype('float16'), @@ -161,14 +169,16 @@ class TestCase3(TestBase): @unittest.skip('int32 to int8 is not supported') class TestCase4(TestBase): + def set_atol(self): super().set_atol() self.atol = 1 def set_data_feed(self): self.feed_fp32 = { - "x": np.random.randint( - low=1, high=100, size=[1, 3, 3, 3]).astype('int32'), + "x": + np.random.randint(low=1, high=100, size=[1, 3, 3, + 3]).astype('int32'), } def set_op_attrs(self): diff --git a/python/paddle/fluid/tests/unittests/ipu/test_concat_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_concat_op_ipu.py index a5410ab4990..d0160551b93 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_concat_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_concat_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,10 +52,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.concat([x, y], **self.attrs) self.fetch_list = [out.name] @@ -70,6 +73,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"axis": 1} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_conv_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_conv_op_ipu.py index e450621b11d..5a2485e251c 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_conv_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_conv_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -54,8 +55,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') x = paddle.fluid.layers.conv2d(x, **self.attrs) self.fetch_list = [x.name] @@ -71,54 +73,63 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['num_filters'] = 1 class TestCase2(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['filter_size'] = [3, 3] class TestCase2_1(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['filter_size'] = [3, 2] class TestCase3(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['stride'] = [2, 3] class TestCase4(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['dilation'] = [2, 2] class TestCase5(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['groups'] = 3 class TestCase6(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['padding'] = 2 class TestCase7(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['padding'] = [2, 3] class TestCase8(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['padding'] = [1, 2, 2, 3] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_cross_entropy2_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_cross_entropy2_op_ipu.py index d035673e219..ffd4368c089 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_cross_entropy2_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_cross_entropy2_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -47,20 +48,26 @@ class TestBase(IPUOpTest): self.feed_list = list(self.feed_fp32.keys()) def set_op_attrs(self): - self.attrs = {'soft_label': False, } + self.attrs = { + 'soft_label': False, + } @IPUOpTest.static_graph def build_model(self, on_ipu): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32") + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype="float32") if on_ipu: - label = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int32') + label = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int32') else: - label = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int64') - out = paddle.fluid.layers.cross_entropy( - input=x, label=label, **self.attrs) + label = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int64') + out = paddle.fluid.layers.cross_entropy(input=x, + label=label, + **self.attrs) self.fetch_list = [out.name] def run_model(self, exec_mode): @@ -77,6 +84,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { 'soft_label': False, @@ -85,6 +93,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[30, 70]) label = np.arange(30).reshape([30, 1]) @@ -100,8 +109,11 @@ class TestCase2(TestBase): @unittest.skip("soft_label=True is not supported") class TestCase3(TestBase): + def set_op_attrs(self): - self.attrs = {'soft_label': True, } + self.attrs = { + 'soft_label': True, + } if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ipu/test_cumsum_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_cumsum_op_ipu.py index a0a145fb72b..75cd3c92322 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_cumsum_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_cumsum_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -50,8 +51,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32") + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype="float32") out = paddle.fluid.layers.cumsum(x, **self.attrs) self.fetch_list = [out.name] @@ -67,16 +69,19 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"exclusive": True, "reverse": False} class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = {"exclusive": False, "reverse": True} class TestCase3(TestBase): + def set_op_attrs(self): self.attrs = {"exclusive": True, "reverse": True} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py index 4e3b03ffca0..be96762549d 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -48,8 +49,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') x = paddle.fluid.layers.dropout(x, **self.attrs) out = paddle.fluid.layers.elementwise_add(x, x) self.fetch_list = [out.name] @@ -66,6 +68,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "dropout_prob": 0.5, @@ -75,6 +78,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = { "dropout_prob": 0.0, diff --git a/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py index 24082fe49ba..f78f446404d 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestMul(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,10 +45,12 @@ class TestMul(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = self.op(x, y, **self.attrs) self.fetch_list = [out.name] @@ -124,36 +127,43 @@ class TestMul(IPUOpTest): class TestAdd(TestMul): + def set_test_op(self): self.op = paddle.fluid.layers.elementwise_add class TestSub(TestMul): + def set_test_op(self): self.op = paddle.fluid.layers.elementwise_sub class TestDiv(TestMul): + def set_test_op(self): self.op = paddle.fluid.layers.elementwise_div class TestMin(TestMul): + def set_test_op(self): self.op = paddle.fluid.layers.elementwise_min class TestMax(TestMul): + def set_test_op(self): self.op = paddle.fluid.layers.elementwise_max class TestPow(TestMul): + def set_test_op(self): self.op = paddle.fluid.layers.elementwise_pow class TestMod(TestMul): + def set_atol(self): self.atol = 1e-7 self.rtol = 1e-5 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py index 56b9a73f080..ad419c2e2bf 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,10 +52,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.equal(x, y, **self.attrs) self.fetch_list = [out.name] @@ -70,6 +73,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): x = np.ones([1, 10]) y = np.ones([1, 10]) @@ -78,6 +82,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_data_feed(self): x = np.ones([1, 10]) y = np.arange(0, 10).reshape([1, 10]) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_eval_model_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_eval_model_ipu.py index 30a4a537079..f81f5d7de74 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_eval_model_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_eval_model_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -58,22 +59,25 @@ class TestBase(IPUOpTest): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - image = paddle.static.data( - name='image', shape=[1, 3, 10, 10], dtype='float32') - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + image = paddle.static.data(name='image', + shape=[1, 3, 10, 10], + dtype='float32') + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv1) weight_decay = self.attrs['weight_decay'] opt = paddle.optimizer.SGD(learning_rate=1e-1, weight_decay=weight_decay) if self.attrs['optimizer'] == 'adam': - opt = paddle.optimizer.Adam( - learning_rate=1e-1, weight_decay=weight_decay) + opt = paddle.optimizer.Adam(learning_rate=1e-1, + weight_decay=weight_decay) elif self.attrs['optimizer'] == 'lamb': - opt = paddle.optimizer.Lamb( - learning_rate=1e-1, lamb_weight_decay=weight_decay) + opt = paddle.optimizer.Lamb(learning_rate=1e-1, + lamb_weight_decay=weight_decay) opt.minimize(loss) if run_ipu: @@ -90,8 +94,8 @@ class TestBase(IPUOpTest): ipu_strategy.set_graph_config(is_training=True) ipu_strategy.set_options({"runtime_options.enable_eval": True}) program = paddle.static.IpuCompiledProgram( - main_prog, ipu_strategy=ipu_strategy).compile(feed_list, - fetch_list) + main_prog, + ipu_strategy=ipu_strategy).compile(feed_list, fetch_list) else: program = main_prog @@ -99,9 +103,8 @@ class TestBase(IPUOpTest): if run_ipu: for epoch in range(200): if epoch == 100: - ipu_strategy.set_options({ - "runtime_options.enable_eval": False - }) + ipu_strategy.set_options( + {"runtime_options.enable_eval": False}) loss_res = exe.run(program, feed=self.feed, fetch_list=[loss]) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_expand_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_expand_op_ipu.py index 211aa4a61a5..872f4a4bef1 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_expand_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_expand_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32") + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype="float32") out = paddle.fluid.layers.expand(x, **self.attrs) self.fetch_list = [out.name] @@ -62,6 +64,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[2, 2]) self.feed_fp32 = {"x": x.astype(np.float32)} @@ -77,12 +80,14 @@ class TestCase1(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32") + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype="float32") expand_times = paddle.fluid.layers.fill_constant( shape=[len(self.feed_shape[0])], dtype="int32", value=2) - out = paddle.fluid.layers.expand( - x, expand_times=expand_times, **self.attrs) + out = paddle.fluid.layers.expand(x, + expand_times=expand_times, + **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py index b3faabda3cd..a6c49743302 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,8 +45,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') x_fill = paddle.full_like(x, **self.attrs) out = paddle.fluid.layers.elementwise_add(x_fill, x_fill) self.fetch_list = [out.name] @@ -62,6 +64,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {'fill_value': 3, 'dtype': 'int32'} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py index ce457b7abeb..4d4d8835189 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -65,6 +66,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { 'name': 'x', diff --git a/python/paddle/fluid/tests/unittests/ipu/test_flatten_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_flatten_op_ipu.py index a8d530f6b77..29dd9510dda 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_flatten_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_flatten_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.flatten(x=x, **self.attrs) self.fetch_list = [out.name] @@ -62,12 +64,14 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['axis'] = 0 class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['axis'] = 2 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_fp16_support_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_fp16_support_ipu.py index 1d3b17dbc2d..0cfe7692250 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_fp16_support_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_fp16_support_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,15 +52,22 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - conv1 = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) - conv2 = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + conv1 = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) + conv2 = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) add1 = conv1 + conv2 - conv3 = paddle.static.nn.conv2d( - add1, num_filters=8, filter_size=8, bias_attr=False) + conv3 = paddle.static.nn.conv2d(add1, + num_filters=8, + filter_size=8, + bias_attr=False) out = paddle.fluid.layers.relu(conv3, **self.attrs) self.fetch_list = [out.name] @@ -75,6 +83,7 @@ class TestBase(IPUOpTest): class TestIntInput(TestBase): + def set_data_feed(self): embedding = np.random.uniform(size=[10, 20]) indice = np.array([1, 3, 5]).astype(np.int32) @@ -89,10 +98,12 @@ class TestIntInput(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int32') out = paddle.fluid.layers.gather(x, index=y) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_gather_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_gather_op_ipu.py index bbf3ec0ffdf..42ba6babd79 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_gather_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_gather_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,10 +46,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int32') out = paddle.fluid.layers.gather(x, index=y, **self.attrs) self.fetch_list = [out.name] @@ -64,6 +67,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[100]) y = np.array([1, 3, 5]) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_gelu_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_gelu_op_ipu.py index e9721463876..673c7c05032 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_gelu_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_gelu_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,8 +45,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.gelu(x, **self.attrs) self.fetch_list = [out.name] @@ -61,6 +63,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_atol(self): self.atol = 1e-10 self.rtol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_gradient_clip_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_gradient_clip_ipu.py index b7567f60cc3..7eea222e5e3 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_gradient_clip_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_gradient_clip_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -61,10 +62,13 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - image = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + image = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv1) self.fetch_list = [loss.name] @@ -76,13 +80,13 @@ class TestBase(IPUOpTest): weight_decay=weight_decay, grad_clip=clip) elif self.attrs['optimizer'] == 'adam': - opt = paddle.optimizer.Adam( - learning_rate=1e-1, weight_decay=weight_decay, grad_clip=clip) + opt = paddle.optimizer.Adam(learning_rate=1e-1, + weight_decay=weight_decay, + grad_clip=clip) elif self.attrs['optimizer'] == 'lamb': - opt = paddle.optimizer.Lamb( - learning_rate=1e-1, - lamb_weight_decay=weight_decay, - grad_clip=clip) + opt = paddle.optimizer.Lamb(learning_rate=1e-1, + lamb_weight_decay=weight_decay, + grad_clip=clip) else: raise ValueError( f"Not supported optimizer {self.attrs['optimizer']} for test") @@ -100,6 +104,7 @@ class TestBase(IPUOpTest): class TestAdam(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -108,6 +113,7 @@ class TestAdam(TestBase): class TestLamb(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'lamb', diff --git a/python/paddle/fluid/tests/unittests/ipu/test_greater_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_greater_op_ipu.py index c499bb0bd5f..eb3c0601dd1 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_greater_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_greater_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestGreaterThan(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -36,10 +37,12 @@ class TestGreaterThan(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = self.op(x, y, **self.attrs) self.fetch_list = [out.name] @@ -113,11 +116,13 @@ class TestGreaterThan(IPUOpTest): class TestLessThan(TestGreaterThan): + def set_test_op(self): self.op = paddle.fluid.layers.less_than class TestEqual(TestGreaterThan): + def set_test_op(self): self.op = paddle.fluid.layers.equal diff --git a/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py index bb984a8d907..4c5098640fd 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -54,23 +55,30 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') if self.is_training: ch = self.feed_shape[0][1] - conv1 = paddle.static.nn.conv2d( - x, num_filters=ch, filter_size=3, bias_attr=False) + conv1 = paddle.static.nn.conv2d(x, + num_filters=ch, + filter_size=3, + bias_attr=False) scale = paddle.ParamAttr(trainable=True) bias = paddle.ParamAttr(trainable=True) - out = paddle.fluid.layers.nn.group_norm( - conv1, param_attr=scale, bias_attr=bias, **self.attrs) + out = paddle.fluid.layers.nn.group_norm(conv1, + param_attr=scale, + bias_attr=bias, + **self.attrs) loss = paddle.mean(out) adam = paddle.optimizer.Adam(learning_rate=1e-2) adam.minimize(loss) self.fetch_list = [loss.name] else: - out = paddle.fluid.layers.nn.group_norm( - x, param_attr=True, bias_attr=True, **self.attrs) + out = paddle.fluid.layers.nn.group_norm(x, + param_attr=True, + bias_attr=True, + **self.attrs) self.fetch_list = [out.name] def run_model(self, exec_mode): @@ -85,6 +93,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "groups": 4, @@ -94,6 +103,7 @@ class TestCase1(TestBase): class TestTrainCase1(TestBase): + def set_training(self): self.is_training = True self.epoch = 20 @@ -101,6 +111,7 @@ class TestTrainCase1(TestBase): @unittest.skipIf(IPUOpTest.use_ipumodel(), "skip for ipumodel") class TestTrainCase2(TestBase): + def set_atol(self): self.atol = 7e-4 self.rtol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_inference_model_io_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_inference_model_io_ipu.py index 33a63a80e3b..18cd5e30e88 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_inference_model_io_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_inference_model_io_ipu.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -66,16 +67,14 @@ class TestBase(IPUOpTest): with paddle.fluid.unique_name.guard(generator): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype='float32') - conv1 = paddle.static.nn.conv2d( - x, - num_filters=3, - filter_size=3, - bias_attr=False, - name='conv2d') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + conv1 = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False, + name='conv2d') loss = paddle.mean(conv1) if self.attrs['is_training']: @@ -98,8 +97,9 @@ class TestBase(IPUOpTest): ipu_strategy.set_graph_config( is_training=self.attrs['is_training']) program = paddle.static.IpuCompiledProgram( - main_prog, ipu_strategy=ipu_strategy).compile( - self.feed_list, fetch_list) + main_prog, + ipu_strategy=ipu_strategy).compile(self.feed_list, + fetch_list) result = [] for i in range(self.attrs['steps']): @@ -108,8 +108,11 @@ class TestBase(IPUOpTest): fetch_list=fetch_list) result.append(tmp) - paddle.static.save_inference_model( - self.full_name, x, loss, exe, program=program.org_program) + paddle.static.save_inference_model(self.full_name, + x, + loss, + exe, + program=program.org_program) def _test_load(self, run_ipu): if run_ipu: @@ -118,8 +121,8 @@ class TestBase(IPUOpTest): place = paddle.CPUPlace() exe = paddle.static.Executor(place) - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model(self.full_name, exe)) + [inference_program, feed_target_names, fetch_targets + ] = (paddle.static.load_inference_model(self.full_name, exe)) if run_ipu: feed_list = feed_target_names @@ -146,6 +149,7 @@ class TestBase(IPUOpTest): class TestAdam(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['steps'] = 100 @@ -156,6 +160,7 @@ class TestAdam(TestBase): class TestLamb(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['steps'] = 100 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py index fa425cbf9f9..3828728a567 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -50,24 +51,31 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') if self.is_training: ch = self.feed_shape[0][1] - conv1 = paddle.static.nn.conv2d( - x, num_filters=ch, filter_size=3, bias_attr=False) + conv1 = paddle.static.nn.conv2d(x, + num_filters=ch, + filter_size=3, + bias_attr=False) scale = paddle.ParamAttr(trainable=True) bias = paddle.ParamAttr(trainable=True) - out = paddle.fluid.layers.nn.instance_norm( - conv1, param_attr=scale, bias_attr=bias, **self.attrs) + out = paddle.fluid.layers.nn.instance_norm(conv1, + param_attr=scale, + bias_attr=bias, + **self.attrs) loss = paddle.mean(out) adam = paddle.optimizer.Adam(learning_rate=1e-2) adam.minimize(loss) self.fetch_list = [loss.name] else: - out = paddle.fluid.layers.nn.instance_norm( - x, param_attr=True, bias_attr=True, **self.attrs) + out = paddle.fluid.layers.nn.instance_norm(x, + param_attr=True, + bias_attr=True, + **self.attrs) self.fetch_list = [out.name] def run_model(self, exec_mode): @@ -82,6 +90,7 @@ class TestBase(IPUOpTest): class TestTrainCase1(TestBase): + def set_training(self): self.is_training = True self.epoch = 10 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_ipu_shard_api_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_ipu_shard_api_ipu.py index 76ab1a2c3f3..13f146f6fd7 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_shard_api_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_shard_api_ipu.py @@ -24,6 +24,7 @@ paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestIpuShard(unittest.TestCase): + def _test(self): # build graph main_prog = paddle.static.Program() @@ -61,13 +62,13 @@ class TestIpuShard(unittest.TestCase): ipu_index_list = self._test() expected_ipu_index_list = [1, 2, 3, 1, 2, 1, 2] self.assertTrue( - np.allclose( - ipu_index_list, expected_ipu_index_list, atol=0)) + np.allclose(ipu_index_list, expected_ipu_index_list, atol=0)) @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestIpuPipeline(unittest.TestCase): + def _test(self): # build graph main_prog = paddle.static.Program() @@ -106,8 +107,7 @@ class TestIpuPipeline(unittest.TestCase): expected_ipu_index_list = [1, 2, 3, 1, 2, 1, 2] self.assertTrue( - np.allclose( - ipu_index_list, expected_ipu_index_list, atol=0)) + np.allclose(ipu_index_list, expected_ipu_index_list, atol=0)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py index 21a66554067..14128109029 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py @@ -23,6 +23,7 @@ paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestIpuStrategy(unittest.TestCase): + def test_set_options(self): ipu_strategy = paddle.static.IpuStrategy() all_option_names = ipu_strategy._ipu_strategy.get_all_option_names() @@ -78,14 +79,15 @@ class TestIpuStrategy(unittest.TestCase): for k, v in options.items(): ipu_strategy.set_options({k: v}) if (isinstance(v, list)): - assert v.sort() == ipu_strategy.get_option(k).sort( - ), f"set {k} to {v} failed " + assert v.sort() == ipu_strategy.get_option( + k).sort(), f"set {k} to {v} failed " else: assert v == ipu_strategy.get_option( k), f"set {k} to {v} failed " # The custom logger need 2 int as inputs - logger = lambda progress, total: print(f"compile progrss: {progress}/{total}") + logger = lambda progress, total: print( + f"compile progrss: {progress}/{total}") ipu_strategy.set_options({'compilation_progress_logger': logger}) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_layernorm_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_layernorm_op_ipu.py index cab2fa3fde2..e365ffd4e16 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_layernorm_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_layernorm_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -57,23 +58,30 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') if self.is_training: ch = self.feed_shape[0][1] - conv1 = paddle.static.nn.conv2d( - x, num_filters=ch, filter_size=3, bias_attr=False) + conv1 = paddle.static.nn.conv2d(x, + num_filters=ch, + filter_size=3, + bias_attr=False) scale = paddle.ParamAttr(trainable=True) bias = paddle.ParamAttr(trainable=True) - out = paddle.fluid.layers.nn.layer_norm( - conv1, param_attr=scale, bias_attr=bias, **self.attrs) + out = paddle.fluid.layers.nn.layer_norm(conv1, + param_attr=scale, + bias_attr=bias, + **self.attrs) loss = paddle.mean(out) self.fetch_list = [loss.name] else: scale = self.attrs['scale'] bias = self.attrs['shift'] - out = paddle.fluid.layers.nn.layer_norm( - x, param_attr=scale, bias_attr=bias, **self.attrs) + out = paddle.fluid.layers.nn.layer_norm(x, + param_attr=scale, + bias_attr=bias, + **self.attrs) self.fetch_list = [out.name] if self.is_training: @@ -83,8 +91,8 @@ class TestBase(IPUOpTest): elif self.optimizer == 'adam': optimizer = paddle.optimizer.Adam(learning_rate=1e-2) elif self.optimizer == 'lamb': - optimizer = paddle.optimizer.Lamb( - learning_rate=1e-2, lamb_weight_decay=0.0) + optimizer = paddle.optimizer.Lamb(learning_rate=1e-2, + lamb_weight_decay=0.0) if optimizer is not None: optimizer.minimize(loss) @@ -101,6 +109,7 @@ class TestBase(IPUOpTest): @unittest.skip('raise error') class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "scale": False, @@ -112,6 +121,7 @@ class TestCase1(TestBase): @unittest.skip('raise error') class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = { "scale": True, @@ -122,6 +132,7 @@ class TestCase2(TestBase): class TestCase3(TestBase): + def set_op_attrs(self): self.attrs = { "scale": True, @@ -133,6 +144,7 @@ class TestCase3(TestBase): class TestTrainCase1(TestBase): + def set_op_attrs(self): self.attrs = { "scale": True, @@ -152,6 +164,7 @@ class TestTrainCase1(TestBase): class TestTrainCase3(TestBase): + def set_atol(self): super().set_atol() self.atol = 5e-3 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_log_softmax_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_log_softmax_op_ipu.py index c0e4865b3a6..6711894c7de 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_log_softmax_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_log_softmax_op_ipu.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -47,8 +48,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = F.log_softmax(x, **self.attrs) self.fetch_list = [out.name] @@ -64,6 +66,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_attrs(self): self.attrs = {"axis": 1} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_logical_not_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_logical_not_op_ipu.py index 725d2b3429a..a406fa128fc 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_logical_not_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_logical_not_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -41,8 +42,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype="bool") + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype="bool") out = paddle.fluid.layers.logical_not(x) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_logical_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_logical_x_op_ipu.py index 55a2c08c1b5..71a75db9ab3 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_logical_x_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_logical_x_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestLogicalAnd(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -40,14 +41,12 @@ class TestLogicalAnd(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype=self.feed_dtype[0]) - y = paddle.static.data( - name=self.feed_list[1], - shape=self.feed_shape[1], - dtype=self.feed_dtype[1]) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype=self.feed_dtype[0]) + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype=self.feed_dtype[1]) out = self.op(x, y, **self.attrs) self.fetch_list = [out.name] @@ -82,6 +81,7 @@ class TestLogicalAnd(IPUOpTest): class TestLogicalOr(TestLogicalAnd): + def set_test_op(self): self.op = paddle.fluid.layers.logical_or diff --git a/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_op_ipu.py index 80636348cfa..27a70329ca1 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,8 +52,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='int64') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='int64') out = paddle.fluid.layers.embedding(x, **self.attrs) if self.is_training: loss = paddle.mean(out) @@ -76,6 +78,7 @@ class TestBase(IPUOpTest): class TestTrainCase1(TestBase): + def set_atol(self): self.atol = 1e-7 self.rtol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_v2_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_v2_op_ipu.py index 7f021a615af..c15eb3a3b8e 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_v2_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_lookuptable_v2_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,8 +52,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='int64') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='int64') embedding = paddle.nn.Embedding(**self.attrs) out = embedding(x) if self.is_training: @@ -77,6 +79,7 @@ class TestBase(IPUOpTest): class TestTrainCase1(TestBase): + def set_atol(self): self.atol = 1e-7 self.rtol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py index 6641efde694..f7a01b7268d 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_lr_sheduler_ipu.py @@ -21,6 +21,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest class LR_New(LRScheduler): + def __init__(self, learning_rate=1e-5, last_epoch=-1, verbose=False): super(LR_New, self).__init__(learning_rate, last_epoch, verbose) @@ -33,12 +34,16 @@ class LR_New(LRScheduler): @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestConvNet(IPUOpTest): + @IPUOpTest.static_graph def build_model(self): - image = paddle.static.data( - name='image', shape=[1, 3, 10, 10], dtype='float32') - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + image = paddle.static.data(name='image', + shape=[1, 3, 10, 10], + dtype='float32') + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv1) opt = paddle.optimizer.Lamb(learning_rate=LR_New()) @@ -58,8 +63,9 @@ class TestConvNet(IPUOpTest): ipu_strategy = paddle.static.IpuStrategy() ipu_strategy.set_graph_config(is_training=True) program = paddle.static.IpuCompiledProgram( - self.main_prog, ipu_strategy=ipu_strategy).compile( - self.feed_list, self.fetch_list) + self.main_prog, + ipu_strategy=ipu_strategy).compile(self.feed_list, + self.fetch_list) else: program = self.main_prog diff --git a/python/paddle/fluid/tests/unittests/ipu/test_matmul_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_matmul_op_ipu.py index e7e4c000e16..222bb202097 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_matmul_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_matmul_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -50,10 +51,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.matmul(x, y, **self.attrs) self.fetch_list = [out.name] @@ -70,6 +73,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "transpose_x": True, @@ -79,6 +83,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = { "transpose_x": True, @@ -94,6 +99,7 @@ class TestCase2(TestBase): class TestCase3(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[5, 4, 3, 2]) y = np.random.uniform(size=[5, 4, 2, 3]) @@ -103,6 +109,7 @@ class TestCase3(TestBase): class TestCase4(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[4, 3, 2]) y = np.random.uniform(size=[4, 2, 3]) @@ -112,6 +119,7 @@ class TestCase4(TestBase): class TestCase5(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[4, 2, 3]) y = np.random.uniform(size=[3, 2]) @@ -121,6 +129,7 @@ class TestCase5(TestBase): class TestCase6(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3]) @@ -130,6 +139,7 @@ class TestCase6(TestBase): @unittest.skip("not supported") class TestCase6_2(TestCase6): + def set_data_feed(self): x = np.random.uniform(size=[3]) @@ -145,6 +155,7 @@ class TestCase6_2(TestCase6): class TestCase7(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[1, 12, 128, 64]) y = np.random.uniform(size=[1, 12, 128, 64]) @@ -157,6 +168,7 @@ class TestCase7(TestBase): class TestCase8(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3, 1]) y = np.random.uniform(size=[1, 2]) @@ -167,6 +179,7 @@ class TestCase8(TestBase): @unittest.skip("not supported") class TestCase8_2(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3]) y = np.random.uniform(size=[2]) @@ -184,6 +197,7 @@ class TestCase8_2(TestBase): @unittest.skip("dim > 4 is not supported") class TestCase9(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[6, 5, 4, 2, 3]) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_matmul_serilize_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_matmul_serilize_ipu.py index 0a273e91dd5..8151c553265 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_matmul_serilize_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_matmul_serilize_ipu.py @@ -29,6 +29,7 @@ def set_serialize_factor(serialize_factor): @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -52,14 +53,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype=self.feed_dtype[0]) - y = paddle.static.data( - name=self.feed_list[1], - shape=self.feed_shape[1], - dtype=self.feed_dtype[1]) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype=self.feed_dtype[0]) + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype=self.feed_dtype[1]) # decrator maybe the best choice, but need to modify api out = paddle.matmul(x, y, **self.attrs) set_serialize_factor(4) @@ -89,8 +88,7 @@ class TestBase(IPUOpTest): res0 = self.run_model(False) res1 = self.run_model(True) self.assertTrue( - np.allclose( - res0.flatten(), res1.flatten(), atol=self.atol)) + np.allclose(res0.flatten(), res1.flatten(), atol=self.atol)) self.assertTrue(res0.shape == res1.shape) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_matmul_v2_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_matmul_v2_op_ipu.py index 725f3243e0f..4777c42da13 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_matmul_v2_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_matmul_v2_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -46,10 +47,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.matmul(x, y, **self.attrs) self.fetch_list = [out.name] @@ -65,6 +68,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "transpose_x": True, @@ -73,6 +77,7 @@ class TestCase1(TestBase): class TestCase3(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[5, 4, 2, 3]) y = np.random.uniform(size=[5, 4, 3, 2]) @@ -82,6 +87,7 @@ class TestCase3(TestBase): class TestCase4(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[4, 2, 3]) y = np.random.uniform(size=[4, 3, 2]) @@ -91,6 +97,7 @@ class TestCase4(TestBase): class TestCase5(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[4, 2, 3]) y = np.random.uniform(size=[3, 2]) @@ -100,6 +107,7 @@ class TestCase5(TestBase): class TestCase6(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3]) y = np.random.uniform(size=[3]) @@ -110,6 +118,7 @@ class TestCase6(TestBase): @unittest.skip("not supported") class TestCase6_2(TestCase6): + def set_data_feed(self): x = np.random.uniform(size=[3]) y = np.random.uniform(size=[3]) @@ -122,6 +131,7 @@ class TestCase6_2(TestCase6): class TestCase7(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3, 1]) y = np.random.uniform(size=[1, 2]) @@ -132,6 +142,7 @@ class TestCase7(TestBase): @unittest.skip("dim > 4 is not supported") class TestCase8(TestBase): + def set_data_feed(self): self.feed = { "x": np.random.uniform(size=[6, 5, 4, 2, 3]).astype('float32'), diff --git a/python/paddle/fluid/tests/unittests/ipu/test_mean_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_mean_op_ipu.py index c0d7dd1fd17..72c2c9cc3be 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_mean_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_mean_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,8 +45,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.mean(x) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_inference_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_inference_ipu.py index 9bdf2335560..ba8f9c7bad5 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_inference_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_inference_ipu.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -58,8 +59,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') # using fp32 x = paddle.static.nn.conv2d(input=x, num_filters=3, filter_size=3) @@ -110,8 +112,9 @@ class TestBase(IPUOpTest): enable_pipelining=self.enable_pipelining, batches_per_step=self.batches_per_step) program = paddle.static.IpuCompiledProgram( - self.main_prog, ipu_strategy=ipu_strategy).compile( - self.feed_list, self.fetch_list) + self.main_prog, + ipu_strategy=ipu_strategy).compile(self.feed_list, + self.fetch_list) else: program = self.main_prog @@ -128,13 +131,15 @@ class TestBase(IPUOpTest): class TestPipline(TestBase): + @IPUOpTest.static_graph def build_model(self, exec_mode): feed_shape = list(self.feed_shape[0]) if self.is_ipu_mode(exec_mode): feed_shape[0] = 1 - x = paddle.static.data( - name=self.feed_list[0], shape=feed_shape, dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=feed_shape, + dtype='float32') with paddle.static.ipu_shard_guard(index=0, stage=0): # using fp32 x = paddle.static.nn.conv2d(input=x, num_filters=3, filter_size=3) @@ -144,8 +149,9 @@ class TestPipline(TestBase): with paddle.static.ipu_shard_guard(index=1, stage=1): # using fp16 with paddle.static.amp.fp16_guard(): - x = paddle.static.nn.conv2d( - input=x, num_filters=6, filter_size=3) + x = paddle.static.nn.conv2d(input=x, + num_filters=6, + filter_size=3) x = paddle.static.nn.batch_norm(x, act='relu') x = F.max_pool2d(x, kernel_size=2, stride=2) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_training_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_training_ipu.py index c4ac9cddd7c..4fc3b40f9ab 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_training_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_mixed_precision_training_ipu.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -63,8 +64,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') # using fp32 x = paddle.static.nn.conv2d(input=x, num_filters=3, filter_size=3) @@ -119,8 +121,9 @@ class TestBase(IPUOpTest): enable_pipelining=self.enable_pipelining, batches_per_step=self.batches_per_step) program = paddle.static.IpuCompiledProgram( - self.main_prog, ipu_strategy=ipu_strategy).compile( - self.feed_list, self.fetch_list) + self.main_prog, + ipu_strategy=ipu_strategy).compile(self.feed_list, + self.fetch_list) else: program = self.main_prog @@ -140,13 +143,15 @@ class TestBase(IPUOpTest): class TestPipline(TestBase): + @IPUOpTest.static_graph def build_model(self, exec_mode): feed_shape = list(self.feed_shape[0]) if self.is_ipu_mode(exec_mode): feed_shape[0] = 1 - x = paddle.static.data( - name=self.feed_list[0], shape=feed_shape, dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=feed_shape, + dtype='float32') with paddle.static.ipu_shard_guard(index=0, stage=0): # using fp32 @@ -157,8 +162,9 @@ class TestPipline(TestBase): with paddle.static.ipu_shard_guard(index=1, stage=1): # using fp16 with paddle.static.amp.fp16_guard(): - x = paddle.static.nn.conv2d( - input=x, num_filters=6, filter_size=3) + x = paddle.static.nn.conv2d(input=x, + num_filters=6, + filter_size=3) x = paddle.static.nn.batch_norm(x, act='relu') x = F.max_pool2d(x, kernel_size=2, stride=2) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py index 884162d336f..81f5295c7dd 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -60,14 +61,19 @@ class TestBase(IPUOpTest): bs = self.ipu_bs if run_ipu else self.cpu_bs with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - image = paddle.static.data( - name='image', shape=[bs, 3, 10, 10], dtype='float32') + image = paddle.static.data(name='image', + shape=[bs, 3, 10, 10], + dtype='float32') with paddle.static.ipu_shard_guard(index=0): - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) with paddle.static.ipu_shard_guard(index=1): - conv2 = paddle.static.nn.conv2d( - conv1, num_filters=3, filter_size=3, bias_attr=False) + conv2 = paddle.static.nn.conv2d(conv1, + num_filters=3, + filter_size=3, + bias_attr=False) # should consider influence of bs loss = paddle.mean(conv2) @@ -126,6 +132,7 @@ class TestBase(IPUOpTest): class TestReplicaInference(TestBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -149,6 +156,7 @@ class TestReplicaInference(TestBase): class TestReplicaCollectiveInference(TestBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -179,6 +187,7 @@ class TestReplicaCollectiveInference(TestBase): class TestPipelineInference(TestBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 2, @@ -195,12 +204,13 @@ class TestPipelineInference(TestBase): np_image = np.random.rand(1, 3, 10, 10).astype(np.float32) self.feed_cpu = {"image": np_image} self.feed_ipu = { - "image": np.tile(np_image, - [self.ipu_options['batches_per_step'], 1, 1, 1]) + "image": + np.tile(np_image, [self.ipu_options['batches_per_step'], 1, 1, 1]) } class TestTrainBase(TestBase): + def set_training(self): self.is_training = True self.epoch = 10 @@ -220,6 +230,7 @@ class TestTrainBase(TestBase): class TestReplicaTrain(TestTrainBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -250,6 +261,7 @@ class TestReplicaTrain(TestTrainBase): class TestReplicaCollectiveTrain(TestTrainBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -287,6 +299,7 @@ class TestReplicaCollectiveTrain(TestTrainBase): class TestPipelineTrain(TestTrainBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 3, @@ -315,6 +328,7 @@ class TestPipelineTrain(TestTrainBase): class TestAdamTrain(TestTrainBase): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -330,6 +344,7 @@ class TestAdamTrain(TestTrainBase): class TestAdamReplicaTrain(TestReplicaTrain): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -345,6 +360,7 @@ class TestAdamReplicaTrain(TestReplicaTrain): class TestAdamPipelineTrain(TestPipelineTrain): + def set_attrs(self): self.ipu_options = { "batches_per_step": 3, @@ -360,6 +376,7 @@ class TestAdamPipelineTrain(TestPipelineTrain): class TestAdamRecomputationTrain(TestPipelineTrain): + def set_attrs(self): self.ipu_options = { "batches_per_step": 3, @@ -376,6 +393,7 @@ class TestAdamRecomputationTrain(TestPipelineTrain): class TestLambTrain(TestAdamTrain): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -391,6 +409,7 @@ class TestLambTrain(TestAdamTrain): class TestLambReplicaTrain(TestAdamReplicaTrain): + def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -406,6 +425,7 @@ class TestLambReplicaTrain(TestAdamReplicaTrain): class TestLambPipelineTrain(TestAdamPipelineTrain): + def set_attrs(self): self.ipu_options = { "batches_per_step": 3, diff --git a/python/paddle/fluid/tests/unittests/ipu/test_model_pipeline_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_model_pipeline_ipu.py index 7e702399640..27538610a42 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_model_pipeline_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_model_pipeline_ipu.py @@ -26,6 +26,7 @@ SEED = 2021 @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestCastNet(unittest.TestCase): + def _test(self, run_ipu=True): scope = paddle.static.Scope() main_prog = paddle.static.Program() @@ -38,14 +39,19 @@ class TestCastNet(unittest.TestCase): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - image = paddle.static.data( - name='image', shape=[1, 3, 10, 10], dtype='float32') + image = paddle.static.data(name='image', + shape=[1, 3, 10, 10], + dtype='float32') with paddle.static.ipu_shard_guard(index=0): - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) with paddle.static.ipu_shard_guard(index=1): - conv2 = paddle.static.nn.conv2d( - conv1, num_filters=3, filter_size=3, bias_attr=False) + conv2 = paddle.static.nn.conv2d(conv1, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv2) if run_ipu: @@ -59,8 +65,9 @@ class TestCastNet(unittest.TestCase): feed_list = [image.name] fetch_list = [loss.name] ipu_strategy = paddle.static.IpuStrategy() - ipu_strategy.set_graph_config( - num_ipus=2, is_training=False, enable_manual_shard=True) + ipu_strategy.set_graph_config(num_ipus=2, + is_training=False, + enable_manual_shard=True) ipu_strategy.set_pipelining_config(enable_pipelining=False) program = paddle.static.IpuCompiledProgram( main_prog, diff --git a/python/paddle/fluid/tests/unittests/ipu/test_mul_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_mul_op_ipu.py index 583a8941ac6..50be6420a55 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_mul_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_mul_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -49,10 +50,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.mul(x, y, **self.attrs) self.fetch_list = [out.name] @@ -68,6 +71,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[1, 2, 5]) y = np.random.uniform(size=[5, 3]) @@ -82,6 +86,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3, 4, 2, 9]) y = np.random.uniform(size=[3, 6, 1, 2, 3]) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py index a4365c021ff..c796cc7c02b 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,10 +52,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.not_equal(x, y, **self.attrs) self.fetch_list = [out.name] @@ -70,6 +73,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): x = np.ones([1, 10]) y = np.ones([1, 10]) @@ -78,6 +82,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_data_feed(self): x = np.ones([1, 10]) y = np.arange(0, 10).reshape([1, 10]) @@ -88,6 +93,7 @@ class TestCase2(TestBase): @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestScalar(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -98,8 +104,12 @@ class TestScalar(IPUOpTest): def set_data_feed(self): x = np.ones([1, 10]) y = 0.5 - self.feed_fp32 = {"x": x.astype(np.float32), } - self.feed_fp16 = {"x": x.astype(np.float16), } + self.feed_fp32 = { + "x": x.astype(np.float32), + } + self.feed_fp16 = { + "x": x.astype(np.float16), + } def set_feed_attr(self): self.feed_shape = [x.shape for x in self.feed_fp32.values()] @@ -110,8 +120,9 @@ class TestScalar(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = (x != 0.5) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_one_hot_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_one_hot_op_ipu.py index 938654bfafc..6c8c3b11314 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_one_hot_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_one_hot_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,8 +45,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='int32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='int32') out = paddle.fluid.layers.one_hot(x, **self.attrs) self.fetch_list = [out.name] @@ -62,6 +64,7 @@ class TestBase(IPUOpTest): @unittest.skip('does not support allow_out_of_range=True') class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"depth": 4, "allow_out_of_range": True} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_one_hot_v2_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_one_hot_v2_op_ipu.py index ec25f378866..8822c352b8b 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_one_hot_v2_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_one_hot_v2_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,8 +45,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='int32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='int32') out = paddle.fluid.input.one_hot(x, **self.attrs) self.fetch_list = [out.name] @@ -62,6 +64,7 @@ class TestBase(IPUOpTest): @unittest.skip('does not support allow_out_of_range=True') class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"depth": 4, "allow_out_of_range": True} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py index 060a69e8311..5169eddc703 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py @@ -22,6 +22,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -58,22 +59,25 @@ class TestBase(IPUOpTest): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - image = paddle.static.data( - name='image', shape=[1, 3, 10, 10], dtype='float32') - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + image = paddle.static.data(name='image', + shape=[1, 3, 10, 10], + dtype='float32') + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv1) weight_decay = self.attrs['weight_decay'] opt = paddle.optimizer.SGD(learning_rate=1e-1, weight_decay=weight_decay) if self.attrs['optimizer'] == 'adam': - opt = paddle.optimizer.Adam( - learning_rate=1e-1, weight_decay=weight_decay) + opt = paddle.optimizer.Adam(learning_rate=1e-1, + weight_decay=weight_decay) elif self.attrs['optimizer'] == 'lamb': - opt = paddle.optimizer.Lamb( - learning_rate=1e-1, lamb_weight_decay=weight_decay) + opt = paddle.optimizer.Lamb(learning_rate=1e-1, + lamb_weight_decay=weight_decay) opt.minimize(loss) if run_ipu: @@ -88,21 +92,19 @@ class TestBase(IPUOpTest): fetch_list = [loss.name] ipu_strategy = paddle.static.IpuStrategy() ipu_strategy.set_graph_config(is_training=True) - ipu_strategy.set_options({ - 'loss_scaling': self.attrs["loss_scaling"] - }) + ipu_strategy.set_options( + {'loss_scaling': self.attrs["loss_scaling"]}) if "use_no_bias_optimizer" in self.attrs.keys(): ipu_strategy.set_options({ "use_no_bias_optimizer": self.attrs["use_no_bias_optimizer"] }) if "accl1_type" in self.attrs.keys(): - ipu_strategy.set_options({ - "accl1_type": self.attrs["accl1_type"] - }) + ipu_strategy.set_options( + {"accl1_type": self.attrs["accl1_type"]}) program = paddle.static.IpuCompiledProgram( - main_prog, ipu_strategy=ipu_strategy).compile(feed_list, - fetch_list) + main_prog, + ipu_strategy=ipu_strategy).compile(feed_list, fetch_list) else: program = main_prog @@ -123,6 +125,7 @@ class TestBase(IPUOpTest): @unittest.skip('do not support L2 regularization') class TestSGD(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'sgd', @@ -133,6 +136,7 @@ class TestSGD(TestBase): @unittest.skip('do not support L2 regularization') class TestAdamCase1(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -142,6 +146,7 @@ class TestAdamCase1(TestBase): class TestAdamCase2(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -152,6 +157,7 @@ class TestAdamCase2(TestBase): @unittest.skip('cpu do not support AdamNoBias') class TestAdamNoBias(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -163,6 +169,7 @@ class TestAdamNoBias(TestBase): @unittest.skip('cpu do not support FLOAT16') class TestAdamCase3(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -174,6 +181,7 @@ class TestAdamCase3(TestBase): @unittest.skip('seems cpu output wrong') class TestLambCase1(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'lamb', @@ -184,6 +192,7 @@ class TestLambCase1(TestBase): @unittest.skip('seems cpu output wrong') class TestLamb(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'lamb', @@ -194,6 +203,7 @@ class TestLamb(TestBase): @unittest.skip('cpu do not support LambNoBias') class TestLambNoBias(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'lamb', @@ -205,6 +215,7 @@ class TestLambNoBias(TestBase): @unittest.skip('cpu do not support FLOAT16') class TestLambCase2(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'lamb', diff --git a/python/paddle/fluid/tests/unittests/ipu/test_pool_avg_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_pool_avg_op_ipu.py index e5df11eb4ef..a9ffeb8dc01 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_pool_avg_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_pool_avg_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -54,8 +55,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.pool2d(x, **self.attrs) self.fetch_list = [out.name] @@ -71,36 +73,42 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_size'] = 3 class TestCase1_2(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_size'] = [3, 1] class TestCase2(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_stride'] = 2 class TestCase2_2(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_stride'] = [2, 1] class TestCase3(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_padding'] = [1, 1] class TestCase3_2(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_padding'] = [1, 1, 2, 2] @@ -108,6 +116,7 @@ class TestCase3_2(TestBase): @unittest.skip('the results has a positional offset') class TestCase3_3(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_padding'] = [1, 2, 1, 1] @@ -115,6 +124,7 @@ class TestCase3_3(TestBase): @unittest.skip('paddle output has nan') class TestCase3_4(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['pool_size'] = 1 @@ -122,24 +132,28 @@ class TestCase3_4(TestBase): class TestCase4(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['global_pooling'] = True class TestCase5(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['ceil_mode'] = True class TestCase6(TestBase): + def set_attrs(self): super().set_attrs() self.attrs['exclusive'] = False class TestAdaptive(TestBase): + def set_op_attrs(self): self.attrs = { "pool_size": 1, @@ -149,8 +163,9 @@ class TestAdaptive(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.adaptive_pool2d(x, **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_pool_max_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_pool_max_op_ipu.py index 41b2b8406dc..e9fec9a0232 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_pool_max_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_pool_max_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -54,8 +55,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.pool2d(x, **self.attrs) self.fetch_list = [out.name] @@ -71,36 +73,42 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_size'] = 3 class TestCase1_2(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_size'] = [3, 1] class TestCase2(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_stride'] = 2 class TestCase2_2(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_stride'] = [2, 1] class TestCase3(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_padding'] = [1, 1] class TestCase3_2(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_padding'] = [1, 1, 2, 2] @@ -108,6 +116,7 @@ class TestCase3_2(TestBase): @unittest.skip('auto_pad is not currently supported') class TestCase3_3(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_padding'] = 'VALID' @@ -115,30 +124,35 @@ class TestCase3_3(TestBase): @unittest.skip('auto_pad is not currently supported') class TestCase3_4(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['pool_padding'] = 'SAME' class TestCase4(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['global_pooling'] = True class TestCase5(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['ceil_mode'] = True class TestCase6(TestBase): + def set_op_attrs(self): super().set_op_attrs() self.attrs['exclusive'] = False class TestAdaptive(TestBase): + def set_op_attrs(self): self.attrs = { "pool_size": 1, @@ -148,8 +162,9 @@ class TestAdaptive(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.adaptive_pool2d(x, **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_pow_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_pow_op_ipu.py index 5ff1223961b..3f596f951cd 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_pow_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_pow_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.pow(x, **self.attrs) self.fetch_list = [out.name] @@ -62,6 +64,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): data1 = np.random.uniform(size=[1, 3, 2, 2]) data2 = np.array([2.0]) @@ -80,10 +83,12 @@ class TestCase1(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - factor = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + factor = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.pow(x, factor=factor, **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py index 3189e060d58..1c050d1e485 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -49,10 +50,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype=self.feed_dtype[0]) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype=self.feed_dtype[0]) out = paddle.fluid.layers.conv2d(x, num_filters=3, filter_size=3) out = paddle.fluid.layers.Print(out, **self.attrs) @@ -75,11 +75,13 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"message": "input_data"} class TestTrainCase1(TestBase): + def set_op_attrs(self): # "forward" : print forward # "backward" : print forward and backward @@ -93,6 +95,7 @@ class TestTrainCase1(TestBase): @unittest.skip("attrs are not supported") class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = { "first_n": 10, diff --git a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py index 93f96e08fd4..ffa3c6d1550 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestMean(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -38,8 +39,9 @@ class TestMean(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = self.op(x, **self.attrs) self.fetch_list = [out.name] @@ -123,21 +125,25 @@ class TestMean(IPUOpTest): class TestMax(TestMean): + def set_test_op(self): self.op = paddle.fluid.layers.reduce_max class TestMin(TestMean): + def set_test_op(self): self.op = paddle.fluid.layers.reduce_min class TestProd(TestMean): + def set_test_op(self): self.op = paddle.fluid.layers.reduce_prod class TestSum(TestMean): + def set_test_op(self): self.op = paddle.fluid.layers.reduce_sum diff --git a/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py index 35be4d98827..9a8c127ab65 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -48,8 +49,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') add = paddle.fluid.layers.elementwise_add(x, x) out = paddle.fluid.layers.reshape(add, **self.attrs) self.fetch_list = [out.name] @@ -66,6 +68,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_attrs(self): self.attrs = { "shape": [-1, 0, 10], diff --git a/python/paddle/fluid/tests/unittests/ipu/test_reshape_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_reshape_op_ipu.py index 427e9754023..32cedf0cdda 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_reshape_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_reshape_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -46,8 +47,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.reshape(x=x, **self.attrs) self.fetch_list = [out.name] @@ -63,6 +65,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['shape'] = [2, 3, -1, 2] @@ -70,6 +73,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = {} self.attrs['shape'] = [-1, 0, 3, 2] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py index c8f0961baa4..1b39ead9b84 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py @@ -26,6 +26,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -63,16 +64,14 @@ class TestBase(IPUOpTest): with paddle.fluid.unique_name.guard(generator): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype='float32') - conv1 = paddle.static.nn.conv2d( - x, - num_filters=3, - filter_size=3, - bias_attr=False, - name='conv2d') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + conv1 = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False, + name='conv2d') loss = paddle.mean(conv1) # apply optimizer @@ -121,59 +120,69 @@ class TestBase(IPUOpTest): res1 = self._test_base(False) self.assertTrue( - np.allclose( - res0.flatten(), res1.flatten(), atol=self.atol)) + np.allclose(res0.flatten(), res1.flatten(), atol=self.atol)) self.attrs['model_path'].cleanup() class TestMomentum(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Momentum, learning_rate=1e-1) class TestAdam(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adam, learning_rate=1e-1) class TestLamb(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Lamb, learning_rate=1e-1) class TestAdamW(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.AdamW, learning_rate=1e-1) class TestAdamax(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adamax, learning_rate=1e-1) class TestAdagrad(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1) class TestAdadelta(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1) class TestRMSProp(TestBase): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.RMSProp, learning_rate=1e-1) class TestCenteredRMSProp(TestBase): + def set_optimizer(self): - self.optimizer = partial( - paddle.optimizer.RMSProp, learning_rate=1e-1, centered=True) + self.optimizer = partial(paddle.optimizer.RMSProp, + learning_rate=1e-1, + centered=True) @unittest.skipIf(IPUOpTest.use_ipumodel(), "skip for ipumodel") class TestSGDFP16(TestBase): + def set_attrs(self): self.attrs = {} self.attrs['steps'] = 100 @@ -186,49 +195,59 @@ class TestSGDFP16(TestBase): class TestMomentumFp16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Momentum, learning_rate=1e-1) class TestAdamFP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adam, learning_rate=1e-1) class TestLambFP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Lamb, learning_rate=1e-1) class TestAdamWFP16FP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.AdamW, learning_rate=1e-1) class TestAdamaxFP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adamax, learning_rate=1e-1) class TestAdagradFP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1) class TestAdadeltaFP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1) class TestRMSPropFP16(TestSGDFP16): + def set_optimizer(self): self.optimizer = partial(paddle.optimizer.RMSProp, learning_rate=1e-1) class TestCenteredRMSPropFP16(TestSGDFP16): + def set_optimizer(self): - self.optimizer = partial( - paddle.optimizer.RMSProp, learning_rate=1e-1, centered=True) + self.optimizer = partial(paddle.optimizer.RMSProp, + learning_rate=1e-1, + centered=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ipu/test_scale_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_scale_op_ipu.py index f28bcba4cf0..8b6b8425b52 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_scale_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_scale_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -53,8 +54,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.scale(x, **self.attrs) self.fetch_list = [out.name] @@ -70,6 +72,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "scale": 5.0, @@ -79,6 +82,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = { "scale": 1.0, @@ -88,6 +92,7 @@ class TestCase2(TestBase): class TestCase3(TestBase): + def set_op_attrs(self): self.attrs = { "scale": 5.0, @@ -97,6 +102,7 @@ class TestCase3(TestBase): class TestCase4(TestBase): + def set_op_attrs(self): self.attrs = { "scale": 1.0, @@ -106,6 +112,7 @@ class TestCase4(TestBase): class TestCase5(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[3, 3, 10, 10]) y = np.array([3.0]) @@ -120,10 +127,12 @@ class TestCase5(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.scale(x, scale=y, **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_scaled_optimizer_state_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_scaled_optimizer_state_ipu.py index 113b316af4e..79527f7a130 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_scaled_optimizer_state_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_scaled_optimizer_state_ipu.py @@ -22,6 +22,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -52,18 +53,21 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - image = paddle.static.data( - name='image', shape=[1, 3, 10, 10], dtype='float32') - conv1 = paddle.static.nn.conv2d( - image, num_filters=3, filter_size=3, bias_attr=False) + image = paddle.static.data(name='image', + shape=[1, 3, 10, 10], + dtype='float32') + conv1 = paddle.static.nn.conv2d(image, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv1) weight_decay = self.attrs['weight_decay'] - opt = paddle.optimizer.Adam( - learning_rate=1e-1, weight_decay=weight_decay) + opt = paddle.optimizer.Adam(learning_rate=1e-1, + weight_decay=weight_decay) if self.attrs['optimizer'] == 'lamb': - opt = paddle.optimizer.Lamb( - learning_rate=1e-1, lamb_weight_decay=weight_decay) + opt = paddle.optimizer.Lamb(learning_rate=1e-1, + lamb_weight_decay=weight_decay) opt.minimize(loss) self.feed_list = [image.name] self.fetch_list = [loss.name] @@ -74,7 +78,8 @@ class TestBase(IPUOpTest): if self.is_ipu_mode(exec_mode): if "use_no_bias_optimizer" in self.attrs.keys(): ipu_strategy.set_options({ - "use_no_bias_optimizer": self.attrs["use_no_bias_optimizer"] + "use_no_bias_optimizer": + self.attrs["use_no_bias_optimizer"] }) if "scaled_optimizer_state" in self.attrs.keys(): ipu_strategy.set_options({ @@ -92,6 +97,7 @@ class TestBase(IPUOpTest): class TestScaledAdam(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -107,6 +113,7 @@ class TestScaledAdam(TestBase): @unittest.skip('cpu do not support AdamNoBias') class TestScaledAdamNoBias(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'adam', @@ -118,6 +125,7 @@ class TestScaledAdamNoBias(TestBase): @unittest.skip('cpu do not support LambNoBias') class TestScaledLambNoBias(TestBase): + def set_attrs(self): self.attrs = { "optimizer": 'lamb', diff --git a/python/paddle/fluid/tests/unittests/ipu/test_set_batch_size_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_set_batch_size_ipu.py index 5c61012cace..2af8de38377 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_set_batch_size_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_set_batch_size_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -50,22 +51,31 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - conv1 = paddle.static.nn.conv2d( - x, num_filters=3, filter_size=3, bias_attr=False) - conv2 = paddle.static.nn.conv2d( - conv1, num_filters=3, filter_size=3, bias_attr=False) - conv3 = paddle.static.nn.conv2d( - conv2, num_filters=3, filter_size=3, bias_attr=False) - conv4 = paddle.static.nn.conv2d( - conv3, num_filters=3, filter_size=3, bias_attr=False) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + conv1 = paddle.static.nn.conv2d(x, + num_filters=3, + filter_size=3, + bias_attr=False) + conv2 = paddle.static.nn.conv2d(conv1, + num_filters=3, + filter_size=3, + bias_attr=False) + conv3 = paddle.static.nn.conv2d(conv2, + num_filters=3, + filter_size=3, + bias_attr=False) + conv4 = paddle.static.nn.conv2d(conv3, + num_filters=3, + filter_size=3, + bias_attr=False) self.fetch_list = [conv4.name] def run_model(self, exec_mode): ipu_strategy = paddle.static.IpuStrategy() - ipu_strategy.set_graph_config( - is_training=self.is_training, micro_batch_size=2) + ipu_strategy.set_graph_config(is_training=self.is_training, + micro_batch_size=2) self.run_op_test(exec_mode, ipu_strategy) def test(self): diff --git a/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py index ac8ef3e9d65..3a96d4bb0b9 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -49,8 +50,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.slice(x, **self.attrs) self.fetch_list = [out.name] @@ -66,6 +68,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { "axes": [0, 1], @@ -76,6 +79,7 @@ class TestCase1(TestBase): @unittest.skip('dynamic graph is not support on IPU') class TestCase2(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[4, 5, 6]) s = np.array([0, 0, 2]) @@ -96,14 +100,19 @@ class TestCase2(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - starts = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int32') - ends = paddle.static.data( - name=self.feed_list[2], shape=self.feed_shape[2], dtype='int32') - out = paddle.fluid.layers.slice( - x, starts=starts, ends=ends, **self.attrs) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + starts = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int32') + ends = paddle.static.data(name=self.feed_list[2], + shape=self.feed_shape[2], + dtype='int32') + out = paddle.fluid.layers.slice(x, + starts=starts, + ends=ends, + **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py index 0b2d776cf24..be803e61cf5 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.softmax(x, **self.attrs) self.fetch_list = [out.name] @@ -62,6 +64,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"axis": 2} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_softmax_with_cross_entropy_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_softmax_with_cross_entropy_op_ipu.py index cb1ed6ad930..97b0c25f938 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_softmax_with_cross_entropy_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_softmax_with_cross_entropy_op_ipu.py @@ -24,6 +24,7 @@ import paddle.nn.functional as F @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -48,18 +49,23 @@ class TestBase(IPUOpTest): self.feed_list = list(self.feed_fp32.keys()) def set_op_attrs(self): - self.attrs = {'soft_label': False, } + self.attrs = { + 'soft_label': False, + } @IPUOpTest.static_graph def build_model(self, on_ipu): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32") + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype="float32") if on_ipu: - label = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int32') + label = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int32') else: - label = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='int64') + label = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='int64') out = F.softmax_with_cross_entropy(x, label, **self.attrs) self.fetch_list = [out.name] @@ -77,6 +83,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = { 'soft_label': False, @@ -85,6 +92,7 @@ class TestCase1(TestBase): class TestCase2(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[30, 70]) label = np.arange(30).reshape([30, 1]) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_split_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_split_op_ipu.py index 63d9584dae3..76b65a015e9 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_split_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_split_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -44,8 +45,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.split(x, **self.attrs) self.fetch_list = [fetch.name for fetch in out] @@ -63,6 +65,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"num_or_sections": [2, 8], "axis": 2} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_squeeze_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_squeeze_op_ipu.py index 33950221ad5..1afc79b6a65 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_squeeze_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_squeeze_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.squeeze(x, **self.attrs) self.fetch_list = [out.name] @@ -62,11 +64,13 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"axes": []} class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = {"axes": [-2]} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py index 11a827cee09..1828772c07a 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_stack_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -55,12 +56,15 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') - z = paddle.static.data( - name=self.feed_list[2], shape=self.feed_shape[2], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') + z = paddle.static.data(name=self.feed_list[2], + shape=self.feed_shape[2], + dtype='float32') out = paddle.fluid.layers.stack([x, y, z], **self.attrs) self.fetch_list = [out.name] @@ -76,6 +80,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"axis": -2} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py index fdc6ce08b6e..084c6865423 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -46,10 +47,12 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') out = paddle.fluid.layers.sum([x, y], **self.attrs) self.fetch_list = [out.name] @@ -65,6 +68,7 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_data_feed(self): x = np.random.uniform(size=[1, 3, 2, 2]) y = np.random.uniform(size=[1, 3, 2, 2]) @@ -82,12 +86,15 @@ class TestCase1(TestBase): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') - y = paddle.static.data( - name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32') - z = paddle.static.data( - name=self.feed_list[2], shape=self.feed_shape[2], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') + y = paddle.static.data(name=self.feed_list[1], + shape=self.feed_shape[1], + dtype='float32') + z = paddle.static.data(name=self.feed_list[2], + shape=self.feed_shape[2], + dtype='float32') out = paddle.fluid.layers.sum([x, y, z], **self.attrs) self.fetch_list = [out.name] diff --git a/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py index c5331d43f5e..417d9c37675 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestTopKOp(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -51,14 +52,17 @@ class TestTopKOp(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') if not self.use_k_as_const_variable: topk_values, topk_indices = self.op(x, **self.attrs) else: # !important, popart cannot accept non const tensor - K_t = paddle.fluid.layers.fill_constant( - shape=[1], dtype='int32', value=self.k, name="in_2") + K_t = paddle.fluid.layers.fill_constant(shape=[1], + dtype='int32', + value=self.k, + name="in_2") topk_values, topk_indices = self.op(x, K_t, **self.attrs) self.fetch_list = [topk_values.name, topk_indices.name] @@ -81,12 +85,14 @@ class TestTopKOp(IPUOpTest): class TestCase2(TestTopKOp): + def set_test_op(self): self.op = paddle.topk @unittest.skip("Trying to get data as int64 but it is of type int32") class TestCase3(TestTopKOp): + def set_op_attrs(self): self.use_k_as_const_variable = True self.attrs = {} @@ -95,6 +101,7 @@ class TestCase3(TestTopKOp): @unittest.skip("Trying to get data as int64 but it is of type int32") class TestCase4(TestCase3): + def set_test_op(self): self.op = paddle.topk diff --git a/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py index d5fef73a31b..03068d407b2 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.transpose(x, **self.attrs) self.fetch_list = [out.name] @@ -62,11 +64,13 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"perm": [0, 1, 2, 3]} class TestCase2(TestBase): + def set_data_feed(self): data = np.random.uniform(size=[1, 2, 3, 4, 5]) self.feed_fp32 = {"x": data.astype(np.float32)} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_unsqueeze_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_unsqueeze_op_ipu.py index 54cbc571ec6..998eee38b5e 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_unsqueeze_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_unsqueeze_op_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -45,8 +46,9 @@ class TestBase(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='float32') out = paddle.fluid.layers.unsqueeze(x, **self.attrs) self.fetch_list = [out.name] @@ -62,11 +64,13 @@ class TestBase(IPUOpTest): class TestCase1(TestBase): + def set_op_attrs(self): self.attrs = {"axes": -1} class TestCase2(TestBase): + def set_op_attrs(self): self.attrs = {"axes": [1, 2]} diff --git a/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py index 5cc62432dc6..b3535c8cd56 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -55,10 +56,9 @@ class TestBase(IPUOpTest): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - x = paddle.static.data( - name=self.feed_list[0], - shape=self.feed_shape[0], - dtype=self.feed_dtype[0]) + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype=self.feed_dtype[0]) add1 = paddle.fluid.layers.elementwise_add(x, x) reshape = paddle.fluid.layers.reshape(add1, **self.attrs) add2 = paddle.fluid.layers.elementwise_add(reshape, reshape) @@ -76,8 +76,8 @@ class TestBase(IPUOpTest): exe = paddle.static.Executor(place) exe.run(startup_prog) scale1_out = main_prog.global_block().ops[4].output("Out")[0] - main_prog.global_block().ops[4]._rename_output(scale1_out, - add2.name) + main_prog.global_block().ops[4]._rename_output( + scale1_out, add2.name) main_prog.global_block().ops[5]._rename_input(scale1_out, add2.name) if run_ipu: @@ -98,8 +98,7 @@ class TestBase(IPUOpTest): res1 = self._test_base(False) self.assertTrue( - np.allclose( - res0.flatten(), res1.flatten(), atol=self.atol)) + np.allclose(res0.flatten(), res1.flatten(), atol=self.atol)) self.assertTrue(res0.shape == res1.shape) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py index 5e652ce4833..630a00f5a7d 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest "core is not compiled with IPU") @unittest.skipIf(IPUOpTest.use_ipumodel(), "skip for ipumodel") class TestBase(IPUOpTest): + def setUp(self): self.set_atol() self.set_data_feed() @@ -50,6 +51,7 @@ class TestBase(IPUOpTest): } def _test_optimizer(self, run_ipu=True): + def exclude_fn(param): return param.name.endswith('.w_0') @@ -62,13 +64,16 @@ class TestBase(IPUOpTest): with paddle.static.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): - image = paddle.static.data( - name='image', shape=[1, 3, 10, 10], dtype='float32') + image = paddle.static.data(name='image', + shape=[1, 3, 10, 10], + dtype='float32') bias = paddle.fluid.layers.create_parameter( shape=[1, 3, 10, 10], is_bias=True, dtype='float32') add1 = image + bias - conv1 = paddle.static.nn.conv2d( - add1, num_filters=3, filter_size=3, bias_attr=False) + conv1 = paddle.static.nn.conv2d(add1, + num_filters=3, + filter_size=3, + bias_attr=False) loss = paddle.mean(conv1) opt = paddle.optimizer.Lamb( @@ -90,12 +95,11 @@ class TestBase(IPUOpTest): fetch_list = [loss.name] ipu_strategy = paddle.static.IpuStrategy() ipu_strategy.set_graph_config(is_training=True) - ipu_strategy.set_options({ - 'loss_scaling': self.attrs["loss_scaling"] - }) + ipu_strategy.set_options( + {'loss_scaling': self.attrs["loss_scaling"]}) program = paddle.static.IpuCompiledProgram( - main_prog, ipu_strategy=ipu_strategy).compile(feed_list, - fetch_list) + main_prog, + ipu_strategy=ipu_strategy).compile(feed_list, fetch_list) else: program = main_prog diff --git a/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py index 30e003917ef..52e88119af0 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_weight_sharing_ipu.py @@ -23,6 +23,7 @@ from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") class TestWeightSharing(IPUOpTest): + def setUp(self): self.set_atol() self.set_training() @@ -52,8 +53,9 @@ class TestWeightSharing(IPUOpTest): @IPUOpTest.static_graph def build_model(self): - x = paddle.static.data( - name=self.feed_list[0], shape=self.feed_shape[0], dtype='int64') + x = paddle.static.data(name=self.feed_list[0], + shape=self.feed_shape[0], + dtype='int64') with paddle.static.ipu_shard_guard(index=0, stage=0): y = paddle.fluid.layers.embedding( input=x, @@ -82,15 +84,15 @@ class TestWeightSharing(IPUOpTest): exe.run(self.startup_prog) if run_ipu: ipu_strategy = paddle.static.IpuStrategy() - ipu_strategy.set_graph_config( - num_ipus=2, - is_training=self.is_training, - enable_manual_shard=True) - ipu_strategy.set_pipelining_config( - enable_pipelining=True, batches_per_step=3) + ipu_strategy.set_graph_config(num_ipus=2, + is_training=self.is_training, + enable_manual_shard=True) + ipu_strategy.set_pipelining_config(enable_pipelining=True, + batches_per_step=3) program = paddle.static.IpuCompiledProgram( - self.main_prog, ipu_strategy=ipu_strategy).compile( - self.feed_list, self.fetch_list) + self.main_prog, + ipu_strategy=ipu_strategy).compile(self.feed_list, + self.fetch_list) else: program = self.main_prog @@ -103,8 +105,7 @@ class TestWeightSharing(IPUOpTest): res1 = self.run_model(True) self.assertTrue( - np.allclose( - res0.flatten(), res1[0].flatten(), atol=self.atol)) + np.allclose(res0.flatten(), res1[0].flatten(), atol=self.atol)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py index 161c785ef85..1676763a6d8 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py @@ -36,22 +36,20 @@ import hypothesis.strategies as st logging.basicConfig(level=logging.INFO, format="%(message)s") -settings.register_profile( - "ci", - max_examples=100, - suppress_health_check=hypothesis.HealthCheck.all(), - deadline=None, - print_blob=True, - derandomize=True, - report_multiple_bugs=False) -settings.register_profile( - "dev", - max_examples=1000, - suppress_health_check=hypothesis.HealthCheck.all(), - deadline=None, - print_blob=True, - derandomize=True, - report_multiple_bugs=False) +settings.register_profile("ci", + max_examples=100, + suppress_health_check=hypothesis.HealthCheck.all(), + deadline=None, + print_blob=True, + derandomize=True, + report_multiple_bugs=False) +settings.register_profile("dev", + max_examples=1000, + suppress_health_check=hypothesis.HealthCheck.all(), + deadline=None, + print_blob=True, + derandomize=True, + report_multiple_bugs=False) if float(os.getenv('TEST_NUM_PERCENT_CASES', default='1.0')) < 1 or \ os.getenv('HYPOTHESIS_TEST_PROFILE', 'dev') == 'ci': settings.load_profile("ci") @@ -75,6 +73,7 @@ SkipReasons = IgnoreReasons class AutoScanTest(unittest.TestCase): + def __init__(self, *args, **kwargs): np.random.seed(1024) paddle.enable_static() @@ -102,11 +101,9 @@ class AutoScanTest(unittest.TestCase): raise NotImplementedError @abc.abstractmethod - def add_ignore_check_case( - self, - teller: [Callable[[ProgramConfig, paddle_infer.Config], bool]], - reason: IgnoreReasons, - note: str): + def add_ignore_check_case(self, teller: [ + Callable[[ProgramConfig, paddle_infer.Config], bool] + ], reason: IgnoreReasons, note: str): self.ignore_cases.append((teller, reason, note)) def is_program_valid(self, program_config: ProgramConfig) -> bool: @@ -134,9 +131,7 @@ class AutoScanTest(unittest.TestCase): return result @abc.abstractmethod - def assert_tensors_near(self, - atol: float, - rtol: float, + def assert_tensors_near(self, atol: float, rtol: float, tensor: Dict[str, np.array], baseline: Dict[str, np.array]): for key, arr in tensor.items(): @@ -146,8 +141,7 @@ class AutoScanTest(unittest.TestCase): str(baseline[key].shape) + ', but got ' + str(arr.shape)) diff = abs(baseline[key] - arr) self.assertTrue( - np.allclose( - baseline[key], arr, atol=atol, rtol=rtol), + np.allclose(baseline[key], arr, atol=atol, rtol=rtol), "Output has diff, Maximum absolute error: {}".format( np.amax(diff))) @@ -155,17 +149,16 @@ class AutoScanTest(unittest.TestCase): def run_test(self, quant=False): raise NotImplementedError - def generate_op_config(self, - ops_config: List[Dict[str, Any]]) -> List[OpConfig]: + def generate_op_config(self, ops_config: List[Dict[str, + Any]]) -> List[OpConfig]: ops = [] for i in range(len(ops_config)): op_config = ops_config[i] ops.append( - OpConfig( - type=op_config['op_type'], - inputs=op_config['op_inputs'], - outputs=op_config['op_outputs'], - attrs=op_config['op_attrs'])) + OpConfig(type=op_config['op_type'], + inputs=op_config['op_inputs'], + outputs=op_config['op_outputs'], + attrs=op_config['op_attrs'])) return ops @abc.abstractmethod @@ -182,10 +175,10 @@ class AutoScanTest(unittest.TestCase): @abc.abstractmethod def create_inference_config(self, - passes: Optional[List[str]]=None, - use_gpu: bool=False, - use_mkldnn: bool=False, - ir_optim: Optional[bool]=None): + passes: Optional[List[str]] = None, + use_gpu: bool = False, + use_mkldnn: bool = False, + ir_optim: Optional[bool] = None): config = paddle_infer.Config() config.switch_ir_debug(True) config.set_optim_cache_dir(self.cache_dir) @@ -203,6 +196,7 @@ class AutoScanTest(unittest.TestCase): class MkldnnAutoScanTest(AutoScanTest): + def __init__(self, *args, **kwargs): super(MkldnnAutoScanTest, self).__init__(*args, **kwargs) @@ -243,10 +237,10 @@ class MkldnnAutoScanTest(AutoScanTest): ignore_flag = True if ignore_info[ 1] == IgnoreReasons.MKLDNN_ACCURACY_ERROR: - self.ignore_log("[MKLDNN_ACCURACY_ERROR] " + - ignore_info[2] + ' ' + ' vs ' + - self.inference_config_str( - pred_config)) + self.ignore_log( + "[MKLDNN_ACCURACY_ERROR] " + ignore_info[2] + + ' ' + ' vs ' + + self.inference_config_str(pred_config)) else: raise NotImplementedError break @@ -269,8 +263,9 @@ class MkldnnAutoScanTest(AutoScanTest): if not ignore_flag: status = False continue - self.success_log('RUN predictor_config ' + self. - inference_config_str(pred_config) + ' done') + self.success_log('RUN predictor_config ' + + self.inference_config_str(pred_config) + + ' done') self.assertTrue(status) @@ -284,6 +279,7 @@ class MkldnnAutoScanTest(AutoScanTest): class PassAutoScanTest(AutoScanTest): + def __init__(self, *args, **kwargs): super(PassAutoScanTest, self).__init__(*args, **kwargs) self.passes = [] @@ -309,8 +305,8 @@ class PassAutoScanTest(AutoScanTest): self.passes[-1] + ".pdmodel") if not os.path.exists(last_passed_program): raise ValueError( - "Cannot find file {}, please make sure that your pass name is correct". - format(last_passed_program)) + "Cannot find file {}, please make sure that your pass name is correct" + .format(last_passed_program)) model_bytes = paddle.static.load_from_file(last_passed_program) pg = paddle.static.deserialize_program(model_bytes) main_block = pg.desc.block(0) @@ -322,7 +318,8 @@ class PassAutoScanTest(AutoScanTest): self.assertTrue( op_list_after_fusion == after_op_list, "Expected operator list after fusion is {}, but now it's {}".format( - op_list_after_fusion, after_op_list), ) + op_list_after_fusion, after_op_list), + ) def run_and_statis(self, quant=False, @@ -344,7 +341,8 @@ class PassAutoScanTest(AutoScanTest): deadline=None, print_blob=True, derandomize=True, - report_multiple_bugs=False, ) + report_multiple_bugs=False, + ) settings.load_profile("ci") assert passes is not None, "Parameter of passes must be defined in function run_and_statis." self.passes = passes @@ -372,8 +370,8 @@ class PassAutoScanTest(AutoScanTest): logging.info("Number of Ran Programs: {}".format(self.num_ran_programs)) logging.info("Number of Ignore Tests: {}".format(self.num_ignore_tests)) successful_ran_programs = int(self.num_ran_programs - - self.num_ignore_tests / max( - self.num_predictor_kinds, 1)) + self.num_ignore_tests / + max(self.num_predictor_kinds, 1)) logging.info( "Number of successfully ran programs approximately equal to {}". format(successful_ran_programs)) @@ -382,14 +380,14 @@ class PassAutoScanTest(AutoScanTest): "satisfied_programs = ran_programs - num_ignore_tests / num_predictor_kinds" ) logging.error( - "At least {} programs need to ran successfully, but now only about {} programs satisfied.". - format(min_success_num, successful_ran_programs)) + "At least {} programs need to ran successfully, but now only about {} programs satisfied." + .format(min_success_num, successful_ran_programs)) assert False used_time = time.time() - start_time if max_duration > 0 and used_time > max_duration: logging.error( - "The duration exceeds {} seconds, if this is necessary, try to set a larger number for parameter `max_duration`.". - format(max_duration)) + "The duration exceeds {} seconds, if this is necessary, try to set a larger number for parameter `max_duration`." + .format(max_duration)) assert False def run_test(self, quant=False, prog_configs=None): @@ -425,10 +423,10 @@ class PassAutoScanTest(AutoScanTest): ignore_flag = True self.num_ignore_tests += 1 if ignore_info[1] == IgnoreReasons.PASS_ACCURACY_ERROR: - self.ignore_log("[PASS_ACCURACY_ERROR] " + - ignore_info[2] + ' ' + ' vs ' + - self.inference_config_str( - pred_config)) + self.ignore_log( + "[PASS_ACCURACY_ERROR] " + ignore_info[2] + + ' ' + ' vs ' + + self.inference_config_str(pred_config)) else: raise NotImplementedError break @@ -443,17 +441,19 @@ class PassAutoScanTest(AutoScanTest): ir_optim=False, use_gpu=pred_config.use_gpu()) try: # baseline - base_result = self.run_test_config( - model, params, prog_config, base_config, feed_data) + base_result = self.run_test_config(model, params, + prog_config, base_config, + feed_data) self.success_log('RUN_BASELINE ' + - self.inference_config_str( - base_config) + ' done') + self.inference_config_str(base_config) + + ' done') if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) - pred_result = self.run_test_config( - model, params, prog_config, pred_config, feed_data) + pred_result = self.run_test_config(model, params, + prog_config, pred_config, + feed_data) self.assert_tensors_near(atol, rtol, pred_result, base_result) if not ignore_flag: @@ -466,8 +466,9 @@ class PassAutoScanTest(AutoScanTest): if not ignore_flag: status = False continue - self.success_log('RUN predictor_config ' + self. - inference_config_str(pred_config) + ' done') + self.success_log('RUN predictor_config ' + + self.inference_config_str(pred_config) + + ' done') status = self.check_op_version() and status self.assertTrue(status) @@ -502,6 +503,7 @@ class PassAutoScanTest(AutoScanTest): class TrtLayerAutoScanTest(AutoScanTest): + class TensorRTParam: ''' TensorRT subgraph engine parameters. @@ -539,8 +541,7 @@ class TrtLayerAutoScanTest(AutoScanTest): use_calib_mode=False) self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False) self.num_percent_cases = float( - os.getenv( - 'TEST_NUM_PERCENT_CASES', default='1.0')) + os.getenv('TEST_NUM_PERCENT_CASES', default='1.0')) # Choose different tests by week np.random.seed(int(time.strftime("%W"))) @@ -582,12 +583,14 @@ class TrtLayerAutoScanTest(AutoScanTest): ] trt_engine_size = sum(op_types) paddle_op_size = op_size - trt_engine_size - self.assertTrue(trt_engine_size == trt_engine_num, - 'trt_engine_num is {}, but got {}!'.format( - trt_engine_size, trt_engine_num)) - self.assertTrue(paddle_op_size == paddle_op_num, - 'paddle_op_num is {}, but got {}!'.format( - paddle_op_size, paddle_op_num)) + self.assertTrue( + trt_engine_size == trt_engine_num, + 'trt_engine_num is {}, but got {}!'.format(trt_engine_size, + trt_engine_num)) + self.assertTrue( + paddle_op_size == paddle_op_num, + 'paddle_op_num is {}, but got {}!'.format(paddle_op_size, + paddle_op_num)) def inference_config_str(self, config: paddle_infer.Config) -> str: dic = {} @@ -651,8 +654,8 @@ class TrtLayerAutoScanTest(AutoScanTest): if isinstance(threshold, float): atol = threshold rtol = 1e-8 - elif isinstance(threshold, list) or isinstance(threshold, - tuple): + elif isinstance(threshold, list) or isinstance( + threshold, tuple): atol = threshold[0] rtol = threshold[1] else: @@ -670,14 +673,14 @@ class TrtLayerAutoScanTest(AutoScanTest): if ignore_info[0](prog_config, pred_config): ignore_flag = True if ignore_info[1] == IgnoreReasons.TRT_NOT_IMPLEMENTED: - self.ignore_log("[TRT_NOT_IMPLEMENTED] " + - ignore_info[2] + ' ' + ' vs ' + - self.inference_config_str( - pred_config)) + self.ignore_log( + "[TRT_NOT_IMPLEMENTED] " + ignore_info[2] + + ' ' + ' vs ' + + self.inference_config_str(pred_config)) elif ignore_info[1] == IgnoreReasons.TRT_NOT_SUPPORT: - self.ignore_log("[TRT_NOT_SUPPORT] " + ignore_info[ - 2] + ' ' + ' vs ' + self.inference_config_str( - pred_config)) + self.ignore_log( + "[TRT_NOT_SUPPORT] " + ignore_info[2] + ' ' + + ' vs ' + self.inference_config_str(pred_config)) else: raise NotImplementedError break @@ -702,15 +705,14 @@ class TrtLayerAutoScanTest(AutoScanTest): if not ignore_flag: status = False continue - self.success_log('RUN predictor_config ' + self. - inference_config_str(pred_config) + ' done') + self.success_log('RUN predictor_config ' + + self.inference_config_str(pred_config) + + ' done') self.assertTrue(status) # TODO(wilber): just for backward compatible - def add_skip_case( - self, - teller: [Callable[[ProgramConfig, paddle_infer.Config], bool]], - reason: IgnoreReasons, - note: str): + def add_skip_case(self, teller: [ + Callable[[ProgramConfig, paddle_infer.Config], bool] + ], reason: IgnoreReasons, note: str): self.ignore_cases.append((teller, reason, note)) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py index 88045324b38..91c7a8963c4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py @@ -30,6 +30,7 @@ from paddle.fluid.core import create_paddle_predictor class InferencePassTest(unittest.TestCase): + def __init__(self, methodName='runTest'): paddle.enable_static() super(InferencePassTest, self).__init__(methodName) @@ -57,8 +58,8 @@ class InferencePassTest(unittest.TestCase): def _save_models(self, dirname, feeded_var_names, target_vars, executor, program, scope): with fluid.scope_guard(scope): - # save models as combined to ensure that - # there won't be too many useless files + # save models as combined to ensure that + # there won't be too many useless files # after finishing a couple of tests. fluid.io.save_inference_model(dirname, feeded_var_names, target_vars, executor, program) @@ -173,18 +174,17 @@ class InferencePassTest(unittest.TestCase): device = "GPU" if use_gpu else "CPU" with fluid.scope_guard(scope): executor.run(self.startup_program) - self._save_models(self.path, - list(self.feeds.keys()), self.fetch_list, executor, - self.main_program, scope) + self._save_models(self.path, list(self.feeds.keys()), self.fetch_list, + executor, self.main_program, scope) paddle_outs = self._get_paddle_outs(executor, self.main_program, scope) inference_outs = self._get_inference_outs( self._get_analysis_config(use_gpu=use_gpu)) - # Check whether the results calculated on CPU and on GPU are the same. + # Check whether the results calculated on CPU and on GPU are the same. self.assertTrue( len(paddle_outs) == len(inference_outs), - "The number of outputs is different between inference and training forward at {}". - format(device)) + "The number of outputs is different between inference and training forward at {}" + .format(device)) for out, inference_out in zip(paddle_outs, inference_outs): paddle_out = np.array(out) @@ -193,22 +193,21 @@ class InferencePassTest(unittest.TestCase): inference_out = inference_out.flatten() self.assertTrue( - np.allclose( - paddle_out, inference_out, atol=atol), + np.allclose(paddle_out, inference_out, atol=atol), "Output has diff between inference and training forward at {} ". format(device)) - # Check whether the trt results and the GPU results are the same. + # Check whether the trt results and the GPU results are the same. if use_gpu and self.enable_trt: tensorrt_outputs = self._get_inference_outs( - self._get_analysis_config( - use_gpu=use_gpu, use_trt=self.enable_trt)) + self._get_analysis_config(use_gpu=use_gpu, + use_trt=self.enable_trt)) if self.trt_parameters.use_static: #deserialize tensorrt_outputs = self._get_inference_outs( - self._get_analysis_config( - use_gpu=use_gpu, use_trt=self.enable_trt)) + self._get_analysis_config(use_gpu=use_gpu, + use_trt=self.enable_trt)) self.assertTrue( len(tensorrt_outputs) == len(paddle_outs), @@ -222,15 +221,17 @@ class InferencePassTest(unittest.TestCase): tensorrt_output = tensorrt_output.flatten() self.assertTrue( - np.allclose( - paddle_out, tensorrt_output, rtol=rtol, atol=atol), + np.allclose(paddle_out, + tensorrt_output, + rtol=rtol, + atol=atol), "Output has diff between GPU and TensorRT. ") - # Check whether the mkldnn results and the CPU results are the same. + # Check whether the mkldnn results and the CPU results are the same. if (not use_gpu) and self.enable_mkldnn: mkldnn_outputs = self._get_inference_outs( - self._get_analysis_config( - use_gpu=use_gpu, use_mkldnn=self.enable_mkldnn)) + self._get_analysis_config(use_gpu=use_gpu, + use_mkldnn=self.enable_mkldnn)) self.assertTrue( len(paddle_outs) == len(mkldnn_outputs), @@ -240,8 +241,7 @@ class InferencePassTest(unittest.TestCase): atol = 0.01 for paddle_out, mkldnn_output in zip(paddle_outs, mkldnn_outputs): self.assertTrue( - np.allclose( - np.array(paddle_out), mkldnn_output, atol=atol), + np.allclose(np.array(paddle_out), mkldnn_output, atol=atol), "Output has diff between CPU and MKLDNN. ") class TensorRTParam: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py index a8c43daab73..e634dd3dca5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py @@ -34,9 +34,9 @@ class TensorConfig: ''' def __init__(self, - lod: Optional[List[List[int]]]=None, - data_gen: Optional[Callable[..., np.array]]=None, - shape: Optional[List[List[int]]]=None): + lod: Optional[List[List[int]]] = None, + data_gen: Optional[Callable[..., np.array]] = None, + shape: Optional[List[List[int]]] = None): ''' shape: The shape of the tensor. dtype: The data type of the tensor. @@ -71,9 +71,9 @@ class OpConfig: type: str, inputs: Dict[str, List[str]], outputs: Dict[str, List[str]], - attrs: Dict[str, Any]=None, - outputs_var_type: Dict[str, VarType]=None, - outputs_dtype: Dict[str, np.dtype]=None, + attrs: Dict[str, Any] = None, + outputs_var_type: Dict[str, VarType] = None, + outputs_dtype: Dict[str, np.dtype] = None, **kwargs): self.type = type self.inputs = inputs @@ -109,9 +109,9 @@ class BlockConfig: def __init__(self, ops: List[OpConfig], vars: List[str], - vars_dtype: Dict[str, np.dtype]=None, - vars_var_type: Dict[str, VarType]=None, - vars_lod_level: Dict[str, int]=None): + vars_dtype: Dict[str, np.dtype] = None, + vars_var_type: Dict[str, VarType] = None, + vars_lod_level: Dict[str, int] = None): self.ops = ops self.vars = vars self.vars_dtype = vars_dtype @@ -165,8 +165,8 @@ class BlockConfig: if op_config.outputs_dtype is not None and v in op_config.outputs_dtype.keys( ): var_desc.set_dtype( - convert_np_dtype_to_dtype_(op_config.outputs_dtype[ - v])) + convert_np_dtype_to_dtype_( + op_config.outputs_dtype[v])) if op_config.type not in _OP_WITHOUT_KERNEL_SET: op_desc.infer_var_type(block_desc) op_desc.infer_shape(block_desc) @@ -176,11 +176,8 @@ class BlockConfig: class ProgramConfig: ''' A config builder for generating a Program. ''' - def __init__(self, - ops: List[OpConfig], - weights: Dict[str, TensorConfig], - inputs: Dict[str, TensorConfig], - outputs: List[str]): + def __init__(self, ops: List[OpConfig], weights: Dict[str, TensorConfig], + inputs: Dict[str, TensorConfig], outputs: List[str]): self.ops = ops # if no weight need to save, we create a place_holder to help seriazlie params. if not weights: @@ -260,12 +257,13 @@ def create_fake_model(program_config): out_var = util_program.global_block().create_var( type=core.VarDesc.VarType.RAW, name="out_var_0") out_var.desc.set_persistable(True) - util_program.global_block().append_op( - type='save_combine', - inputs={'X': in_vars}, - outputs={'Y': out_var}, - attrs={'file_path': '', - 'save_to_memory': True}) + util_program.global_block().append_op(type='save_combine', + inputs={'X': in_vars}, + outputs={'Y': out_var}, + attrs={ + 'file_path': '', + 'save_to_memory': True + }) for op_config in program_config.ops: op_desc = main_block_desc.append_op() op_desc.set_type(op_config.type) @@ -337,11 +335,10 @@ def create_quant_model(model, scope = global_scope() exe = paddle.static.Executor(place) [inference_program, feed_target_names, - fetch_targets] = paddle.static.load_inference_model( - path_prefix=None, - executor=exe, - model_filename=model, - params_filename=params) + fetch_targets] = paddle.static.load_inference_model(path_prefix=None, + executor=exe, + model_filename=model, + params_filename=params) graph = IrGraph(core.Graph(inference_program.desc), for_test=True) out_scale_op_list = [ @@ -489,18 +486,18 @@ def create_quant_model(model, tensor.set(np.ones(tensor.shape(), dtype=np.float32), place) if save: - fluid.io.save_inference_model( - 'test_inference_model', - feed_target_names, - fetch_targets, - exe, - main_program=main_program) + fluid.io.save_inference_model('test_inference_model', + feed_target_names, + fetch_targets, + exe, + main_program=main_program) feed_vars = [ main_program.global_block().var(name) for name in feed_target_names ] - serialized_program = paddle.static.serialize_program( - feed_vars, fetch_targets, program=main_program) + serialized_program = paddle.static.serialize_program(feed_vars, + fetch_targets, + program=main_program) serialized_params = paddle.static.serialize_persistables( feed_vars, fetch_targets, executor=exe, program=main_program) return serialized_program, serialized_params diff --git a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py index 9ea72043379..b42a54e5efe 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,6 +34,7 @@ from paddle.fluid.core import AnalysisConfig class QuantDequantTest(unittest.TestCase): + def __init__(self, methodName='runTest'): super(QuantDequantTest, self).__init__(methodName) paddle.enable_static() @@ -58,7 +59,7 @@ class QuantDequantTest(unittest.TestCase): np.random.seed(1) random.seed(1) - # from Paddle release2.1 + # from Paddle release2.1 def _normalize_program(self, program, feed_vars, fetch_vars): if not isinstance(program, Program): raise TypeError( @@ -111,13 +112,12 @@ class QuantDequantTest(unittest.TestCase): def _save_models(self, dirname, feeded_var_names, target_vars, executor, program, scope): with fluid.scope_guard(scope): - fluid.io.save_inference_model( - dirname, - feeded_var_names, - target_vars, - executor, - program, - clip_extra=True) + fluid.io.save_inference_model(dirname, + feeded_var_names, + target_vars, + executor, + program, + clip_extra=True) def _get_paddle_outs(self, feed, fetch_list, executor, program, scope): ''' @@ -214,8 +214,8 @@ class QuantDequantTest(unittest.TestCase): executor.run(self.startup_program) executor.run(self.test_startup_program) main_graph = IrGraph(core.Graph(self.main_program.desc), for_test=False) - test_graph = IrGraph( - core.Graph(self.test_main_program.desc), for_test=True) + test_graph = IrGraph(core.Graph(self.test_main_program.desc), + for_test=True) transform_pass = QuantizationTransformPass( scope=scope, @@ -240,12 +240,11 @@ class QuantDequantTest(unittest.TestCase): iters = 10 batch_size = 1 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) - feeder = fluid.DataFeeder( - feed_list=[self.data, self.label], place=place) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) + feeder = fluid.DataFeeder(feed_list=[self.data, self.label], + place=place) with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) @@ -266,23 +265,23 @@ class QuantDequantTest(unittest.TestCase): self.main_program = test_graph.to_program() with fluid.scope_guard(scope): - self.main_program = self._normalize_program( - self.main_program, self.data, self.fetch_list) + self.main_program = self._normalize_program(self.main_program, + self.data, + self.fetch_list) - self._save_models(self.path, - list(self.feeds.keys()), self.fetch_list, executor, - self.main_program, scope) + self._save_models(self.path, list(self.feeds.keys()), self.fetch_list, + executor, self.main_program, scope) paddle_outs = self._get_paddle_outs(self.feeds, self.fetch_list, executor, self.main_program, scope) inference_outs = self._get_inference_outs( self._get_analysis_config(use_gpu=use_gpu)) - # Check whether the results calculated on CPU and on GPU are the same. + # Check whether the results calculated on CPU and on GPU are the same. self.assertTrue( len(paddle_outs) == len(inference_outs), - "The number of outputs is different between inference and training forward at {}". - format(device)) + "The number of outputs is different between inference and training forward at {}" + .format(device)) for out, inference_out in zip(paddle_outs, inference_outs): paddle_out = np.array(out) @@ -292,22 +291,21 @@ class QuantDequantTest(unittest.TestCase): inference_out = inference_out.flatten() self.assertTrue( - np.allclose( - paddle_out, inference_out, atol=atol), + np.allclose(paddle_out, inference_out, atol=atol), "Output has diff between inference and training forward at {} ". format(device)) - # Check whether the trt results and the GPU results are the same. + # Check whether the trt results and the GPU results are the same. if use_gpu and self.enable_trt: tensorrt_outputs = self._get_inference_outs( - self._get_analysis_config( - use_gpu=use_gpu, use_trt=self.enable_trt)) + self._get_analysis_config(use_gpu=use_gpu, + use_trt=self.enable_trt)) if self.trt_parameters.use_static: #deserialize tensorrt_outputs = self._get_inference_outs( - self._get_analysis_config( - use_gpu=use_gpu, use_trt=self.enable_trt)) + self._get_analysis_config(use_gpu=use_gpu, + use_trt=self.enable_trt)) self.assertTrue( len(tensorrt_outputs) == len(paddle_outs), @@ -322,15 +320,17 @@ class QuantDequantTest(unittest.TestCase): tensorrt_output = tensorrt_output.flatten() self.assertTrue( - np.allclose( - paddle_out, tensorrt_output, rtol=rtol, atol=atol), + np.allclose(paddle_out, + tensorrt_output, + rtol=rtol, + atol=atol), "Output has diff between GPU and TensorRT. ") - # Check whether the mkldnn results and the CPU results are the same. + # Check whether the mkldnn results and the CPU results are the same. if (not use_gpu) and self.enable_mkldnn: mkldnn_outputs = self._get_inference_outs( - self._get_analysis_config( - use_gpu=use_gpu, use_mkldnn=self.enable_mkldnn)) + self._get_analysis_config(use_gpu=use_gpu, + use_mkldnn=self.enable_mkldnn)) self.assertTrue( len(paddle_outs) == len(mkldnn_outputs), @@ -340,8 +340,7 @@ class QuantDequantTest(unittest.TestCase): atol = 0.01 for paddle_out, mkldnn_output in zip(paddle_outs, mkldnn_outputs): self.assertTrue( - np.allclose( - np.array(paddle_out), mkldnn_output, atol=atol), + np.allclose(np.array(paddle_out), mkldnn_output, atol=atol), "Output has diff between CPU and MKLDNN. ") class TensorRTParam: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_adaptive_pool2d_convert_global_pass_autoscan.py b/python/paddle/fluid/tests/unittests/ir/inference/test_adaptive_pool2d_convert_global_pass_autoscan.py index a8c3009a5ae..c24a90d5084 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_adaptive_pool2d_convert_global_pass_autoscan.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_adaptive_pool2d_convert_global_pass_autoscan.py @@ -26,53 +26,55 @@ import hypothesis.strategies as st class TestAdaptivePool2dConvertGlobalPass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_config(self, draw): x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=4, + max_size=4)) pooling_type = draw(st.sampled_from(["max", "avg"])) data_format = "NCHW" #trt support this format only strides = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) paddings = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) ceil_mode = draw(st.booleans()) exclusive = draw(st.booleans()) global_pooling = draw(st.booleans()) padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VAILD"])) - pool_op = OpConfig( - "pool2d", - inputs={"X": ["input_data"]}, - outputs={"Out": ["pool_output"]}, - ksize=[1, 1], - adaptive=True, - pooling_type=pooling_type, - data_format=data_format, - strides=strides, - paddings=paddings, - ceil_mode=ceil_mode, - global_pooling=global_pooling, - padding_algorithm=padding_algorithm, - exclusive=exclusive) + pool_op = OpConfig("pool2d", + inputs={"X": ["input_data"]}, + outputs={"Out": ["pool_output"]}, + ksize=[1, 1], + adaptive=True, + pooling_type=pooling_type, + data_format=data_format, + strides=strides, + paddings=paddings, + ceil_mode=ceil_mode, + global_pooling=global_pooling, + padding_algorithm=padding_algorithm, + exclusive=exclusive) ops = [pool_op] - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={"input_data": TensorConfig(shape=x_shape), }, - outputs=["pool_output"]) + program_config = ProgramConfig(ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(shape=x_shape), + }, + outputs=["pool_output"]) return program_config @@ -88,11 +90,10 @@ class TestAdaptivePool2dConvertGlobalPass(PassAutoScanTest): yield config, ['pool2d'], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, - max_examples=300, - passes=["adaptive_pool2d_convert_global_pass"], - min_success_num=40) + self.run_and_statis(quant=False, + max_examples=300, + passes=["adaptive_pool2d_convert_global_pass"], + min_success_num=40) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_act_mkldnn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_act_mkldnn_fuse_pass.py index d029bcd6a7f..1516d1dafd3 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_act_mkldnn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_act_mkldnn_fuse_pass.py @@ -76,9 +76,9 @@ class TestConvActMkldnnFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of conv2d x_shape = draw( - st.lists( - st.integers( - min_value=5, max_value=100), min_size=4, max_size=4)) + st.lists(st.integers(min_value=5, max_value=100), + min_size=4, + max_size=4)) x_shape[1] = draw(st.integers(min_value=5, max_value=10)) # 2. Generate legal attr:data_format of conv2d @@ -86,9 +86,9 @@ class TestConvActMkldnnFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of conv2d f_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=4, + max_size=4)) if data_format == "NCHW": f_shape[1] = x_shape[1] else: @@ -96,37 +96,35 @@ class TestConvActMkldnnFusePass(PassAutoScanTest): # 4. Generate legal attr:strides of conv2d strides = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 5. Generate legal attr:padding_algorithm of conv2d padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VALID"])) # 6. Generate legal attr:padding of conv2d padding = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=4, + max_size=4)) # 7. Generate legal attr:groups of conv2d groups = draw(st.integers(min_value=1, max_value=3)) # 8. Generate legal attr:dilations of conv2d dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 9. Generate legal input:ResidualData of conv2d res_shape = [] if draw(st.booleans()): res_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=100), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=1, max_value=100), + min_size=4, + max_size=4)) # 10. Generate legal shape of input:bias of conv2d conv_bias_shape = [] @@ -159,17 +157,16 @@ class TestConvActMkldnnFusePass(PassAutoScanTest): act_type = draw( st.sampled_from(["relu", "leaky_relu", "relu6", "swish"])) - conv2d_op = OpConfig( - "conv2d", - inputs=inputs, - outputs={"Output": ["conv2d_out"]}, - strides=strides, - padding_algorithm=padding_algorithm, - paddings=padding, - groups=groups, - dilations=dilations, - data_format=data_format, - use_mkldnn=True) + conv2d_op = OpConfig("conv2d", + inputs=inputs, + outputs={"Output": ["conv2d_out"]}, + strides=strides, + padding_algorithm=padding_algorithm, + paddings=padding, + groups=groups, + dilations=dilations, + data_format=data_format, + use_mkldnn=True) # 11. Generate legal attr of act act_op = None @@ -177,33 +174,29 @@ class TestConvActMkldnnFusePass(PassAutoScanTest): if act_type == "relu6": self.passes = ["conv_relu6_mkldnn_fuse_pass"] threshold = draw(st.floats(min_value=1.0, max_value=10.0)) - act_op = OpConfig( - "relu6", - inputs={"X": ["conv2d_out"]}, - outputs={"Out": ["relu_out"]}, - threshold=threshold) + act_op = OpConfig("relu6", + inputs={"X": ["conv2d_out"]}, + outputs={"Out": ["relu_out"]}, + threshold=threshold) if act_type == "leaky_relu": self.passes = ["conv_leaky_relu_mkldnn_fuse_pass"] alpha = draw(st.floats(min_value=0.1, max_value=1.0)) - act_op = OpConfig( - "leaky_relu", - inputs={"X": ["conv2d_out"]}, - outputs={"Out": ["relu_out"]}, - alpha=alpha) + act_op = OpConfig("leaky_relu", + inputs={"X": ["conv2d_out"]}, + outputs={"Out": ["relu_out"]}, + alpha=alpha) if act_type == "relu": self.passes = ["conv_relu_mkldnn_fuse_pass"] - act_op = OpConfig( - "relu", - inputs={"X": ["conv2d_out"]}, - outputs={"Out": ["relu_out"]}) + act_op = OpConfig("relu", + inputs={"X": ["conv2d_out"]}, + outputs={"Out": ["relu_out"]}) if act_type == "swish": self.passes = ["conv_swish_mkldnn_fuse_pass"] beta = draw(st.floats(min_value=0.1, max_value=1.0)) - act_op = OpConfig( - "swish", - inputs={"X": ["conv2d_out"]}, - outputs={"Out": ["swish_out"]}, - beta=beta) + act_op = OpConfig("swish", + inputs={"X": ["conv2d_out"]}, + outputs={"Out": ["swish_out"]}, + beta=beta) ops = [conv2d_op, act_op] @@ -214,7 +207,8 @@ class TestConvActMkldnnFusePass(PassAutoScanTest): "input_x": TensorConfig(shape=x_shape), "residualdata": TensorConfig(shape=res_shape) }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bias_mkldnn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bias_mkldnn_fuse_pass.py index a0213c5b1f4..098cec71159 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bias_mkldnn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bias_mkldnn_fuse_pass.py @@ -76,9 +76,9 @@ class TestConvBiasMkldnnFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of conv2d x_shape = draw( - st.lists( - st.integers( - min_value=5, max_value=100), min_size=4, max_size=4)) + st.lists(st.integers(min_value=5, max_value=100), + min_size=4, + max_size=4)) x_shape[1] = draw(st.integers(min_value=5, max_value=10)) # 2. Generate legal attr:data_format of conv2d @@ -86,9 +86,9 @@ class TestConvBiasMkldnnFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of conv2d f_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=4, + max_size=4)) if data_format == "NCHW": f_shape[1] = x_shape[1] else: @@ -96,27 +96,27 @@ class TestConvBiasMkldnnFusePass(PassAutoScanTest): # 4. Generate legal attr:strides of conv2d strides = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) # 5. Generate legal attr:padding_algorithm of conv2d padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VALID"])) # 6. Generate legal attr:padding of conv2d padding = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=4, + max_size=4)) # 7. Generate legal attr:groups of conv2d groups = draw(st.integers(min_value=1, max_value=3)) # 8. Generate legal attr:dilations of conv2d dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) # 9. Generate legal shape of input:bias of elementwise_add bias_shape = [f_shape[0]] @@ -157,24 +157,24 @@ class TestConvBiasMkldnnFusePass(PassAutoScanTest): } use_mkldnn = False - conv2d_op = OpConfig( - "conv2d", - inputs=inputs, - outputs={"Output": ["conv2d_out"]}, - strides=strides, - padding_algorithm=padding_algorithm, - paddings=padding, - groups=groups, - dilations=dilations, - data_format=data_format, - use_mkldnn=use_mkldnn) - - add_op = OpConfig( - "elementwise_add", - inputs={"X": ["conv2d_out"], - "Y": ["bias"]}, - outputs={"Out": ["add_out"]}, - axis=axis) + conv2d_op = OpConfig("conv2d", + inputs=inputs, + outputs={"Output": ["conv2d_out"]}, + strides=strides, + padding_algorithm=padding_algorithm, + paddings=padding, + groups=groups, + dilations=dilations, + data_format=data_format, + use_mkldnn=use_mkldnn) + + add_op = OpConfig("elementwise_add", + inputs={ + "X": ["conv2d_out"], + "Y": ["bias"] + }, + outputs={"Out": ["add_out"]}, + axis=axis) ops = [conv2d_op, add_op] @@ -186,10 +186,9 @@ class TestConvBiasMkldnnFusePass(PassAutoScanTest): return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=350, - passes=["conv_bias_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=350, + passes=["conv_bias_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py index 67e97b0a375..e23089e7895 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py @@ -26,10 +26,10 @@ import hypothesis.strategies as st class TestConvBnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # mainly for TRT, which is invalid for current pass test framework!! if attrs[0]['data_format'] == "NHWC": @@ -49,17 +49,17 @@ class TestConvBnFusePass(PassAutoScanTest): out_channel = groups * out_channel_factor batch_size = draw(st.integers(min_value=1, max_value=4)) dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=2), + min_size=2, + max_size=2)) paddings = draw( - st.lists( - st.integers( - min_value=0, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=0, max_value=2), + min_size=2, + max_size=2)) strides = draw( - st.lists( - st.integers( - min_value=1, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=2), + min_size=2, + max_size=2)) has_bias = draw(st.booleans()) use_mkldnn = draw(st.booleans()) epsilon = draw(st.floats(min_value=0.0, max_value=0.001)) @@ -94,43 +94,41 @@ class TestConvBnFusePass(PassAutoScanTest): def generate_bn_Var(): return np.random.random(var_shape).astype(np.float32) - conv2d_op = OpConfig( - "conv2d", - inputs={ - "Input": ["conv2d_input"], - "Filter": ["conv2d_weight"], - }, - outputs={"Output": ["conv2d_out"]}, - data_format=data_format, - dilations=dilations, - padding_algorithm=padding_algorithm, - groups=groups, - paddings=paddings, - strides=strides, - use_mkldnn=use_mkldnn, - has_bias=has_bias, - is_test=True) - bn_op = OpConfig( - "batch_norm", - inputs={ - "X": ["conv2d_out"], - "Scale": ["batch_norm_Scale"], - "Bias": ["batch_norm_Bias"], - "Mean": ["batch_norm_Mean"], - "Variance": ["batch_norm_Variance"], - }, - outputs={ - "Y": ["batch_norm_Y"], - "MeanOut": ["batch_norm_Mean"], - "VarianceOut": ["batch_norm_Variance"], - "SavedMean": ["batch_norm_SavedMean"], - "SavedVariance": ["batch_norm_SavedVariance"], - "ReserveSpace": ["batch_norm_ReserveSpace"], - }, - epsilon=epsilon, - trainable_statistics=False, - data_layout=data_format, - is_test=True) + conv2d_op = OpConfig("conv2d", + inputs={ + "Input": ["conv2d_input"], + "Filter": ["conv2d_weight"], + }, + outputs={"Output": ["conv2d_out"]}, + data_format=data_format, + dilations=dilations, + padding_algorithm=padding_algorithm, + groups=groups, + paddings=paddings, + strides=strides, + use_mkldnn=use_mkldnn, + has_bias=has_bias, + is_test=True) + bn_op = OpConfig("batch_norm", + inputs={ + "X": ["conv2d_out"], + "Scale": ["batch_norm_Scale"], + "Bias": ["batch_norm_Bias"], + "Mean": ["batch_norm_Mean"], + "Variance": ["batch_norm_Variance"], + }, + outputs={ + "Y": ["batch_norm_Y"], + "MeanOut": ["batch_norm_Mean"], + "VarianceOut": ["batch_norm_Variance"], + "SavedMean": ["batch_norm_SavedMean"], + "SavedVariance": ["batch_norm_SavedVariance"], + "ReserveSpace": ["batch_norm_ReserveSpace"], + }, + epsilon=epsilon, + trainable_statistics=False, + data_layout=data_format, + is_test=True) if has_bias == True: conv2d_op.inputs["Bias"] = ["conv2d_bias"] ops = [conv2d_op, bn_op] @@ -144,10 +142,14 @@ class TestConvBnFusePass(PassAutoScanTest): weights={ "conv2d_weight": TensorConfig(data_gen=partial(generate_conv2d_Filter)), - "batch_norm_Scale": TensorConfig(data_gen=generate_bn_Scale), - "batch_norm_Bias": TensorConfig(data_gen=generate_bn_Bias), - "batch_norm_Mean": TensorConfig(data_gen=generate_bn_Mean), - "batch_norm_Variance": TensorConfig(data_gen=generate_bn_Var), + "batch_norm_Scale": + TensorConfig(data_gen=generate_bn_Scale), + "batch_norm_Bias": + TensorConfig(data_gen=generate_bn_Bias), + "batch_norm_Mean": + TensorConfig(data_gen=generate_bn_Mean), + "batch_norm_Variance": + TensorConfig(data_gen=generate_bn_Var), }, outputs=["batch_norm_Y"]) if has_bias == True: @@ -182,6 +184,7 @@ class TestConvBnFusePass(PassAutoScanTest): yield config, ['conv2d_fusion'], (1e-5, 1e-5) def add_ignore_pass_case(self): + def teller1(program_config, predictor_config): if program_config.ops[0].attrs[ 'data_format'] == "NHWC" and not predictor_config.mkldnn_enabled( @@ -191,8 +194,8 @@ class TestConvBnFusePass(PassAutoScanTest): # mkldnn Output has diff with bias! def teller2(program_config, predictor_config): - return predictor_config.mkldnn_enabled() and program_config.ops[ - 0].attrs['has_bias'] == True + return predictor_config.mkldnn_enabled( + ) and program_config.ops[0].attrs['has_bias'] == True self.add_ignore_check_case( teller1, IgnoreReasons.PASS_ACCURACY_ERROR, @@ -206,7 +209,8 @@ class TestConvBnFusePass(PassAutoScanTest): def test(self): self.run_and_statis( quant=False, - passes=["conv_bn_fuse_pass"], ) + passes=["conv_bn_fuse_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py index 9dd41bd1c39..56ce8f3ea3b 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py @@ -92,11 +92,9 @@ class TestConvElementwiseAdd2ActPass(PassAutoScanTest): while is_not_valid: # 1. Generate shape of input:X of conv2d x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=100), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=1, max_value=100), + min_size=4, + max_size=4)) x_shape[1] = draw(st.integers(min_value=1, max_value=10)) # 2. Generate legal attr:data_format of conv2d @@ -104,11 +102,9 @@ class TestConvElementwiseAdd2ActPass(PassAutoScanTest): # 3. Generate legal shape of input:Y of conv2d f_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=7), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=1, max_value=7), + min_size=4, + max_size=4)) if data_format == "NCHW": f_shape[1] = x_shape[1] else: @@ -116,11 +112,9 @@ class TestConvElementwiseAdd2ActPass(PassAutoScanTest): # 4. Generate legal attr:strides of conv2d strides = draw( - st.lists( - st.integers( - min_value=1, max_value=5), - min_size=2, - max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 5. Generate legal attr:padding_algorithm of conv2d padding_algorithm = draw( @@ -128,22 +122,18 @@ class TestConvElementwiseAdd2ActPass(PassAutoScanTest): # 6. Generate legal attr:padding of conv2d padding = draw( - st.lists( - st.integers( - min_value=1, max_value=5), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=4, + max_size=4)) # 7. Generate legal attr:groups of conv2d groups = draw(st.integers(min_value=1, max_value=3)) # 8. Generate legal attr:dilations of conv2d dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=5), - min_size=2, - max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 9. Generate legal elemntwise_add: X of conv2d bias_2_dict = dict() @@ -179,35 +169,37 @@ class TestConvElementwiseAdd2ActPass(PassAutoScanTest): # 12. Generate legal attr:axis of elementwise_add_2 axis_2 = -1 - conv2d_op = OpConfig( - "conv2d", - inputs={"Input": ["input_x"], - "Filter": ["filter"]}, - outputs={"Output": ["conv2d_out"]}, - strides=strides, - padding_algorithm=padding_algorithm, - paddings=padding, - groups=groups, - dilations=dilations, - data_format=data_format) - add_1_op = OpConfig( - "elementwise_add", - inputs={"X": ["conv2d_out"], - "Y": ["bias_1"]}, - outputs={"Out": ["add_1_out"]}, - axis=axis_1) - - add_2_op = OpConfig( - "elementwise_add", - inputs={"X": ["bias_2"], - "Y": ["add_1_out"]}, - outputs={"Out": ["add_out"]}, - axis=axis_2) - - relu_op = OpConfig( - "relu", - inputs={"X": ["add_out"]}, - outputs={"Out": ["relu_out"]}) + conv2d_op = OpConfig("conv2d", + inputs={ + "Input": ["input_x"], + "Filter": ["filter"] + }, + outputs={"Output": ["conv2d_out"]}, + strides=strides, + padding_algorithm=padding_algorithm, + paddings=padding, + groups=groups, + dilations=dilations, + data_format=data_format) + add_1_op = OpConfig("elementwise_add", + inputs={ + "X": ["conv2d_out"], + "Y": ["bias_1"] + }, + outputs={"Out": ["add_1_out"]}, + axis=axis_1) + + add_2_op = OpConfig("elementwise_add", + inputs={ + "X": ["bias_2"], + "Y": ["add_1_out"] + }, + outputs={"Out": ["add_out"]}, + axis=axis_2) + + relu_op = OpConfig("relu", + inputs={"X": ["add_out"]}, + outputs={"Out": ["relu_out"]}) ops = [conv2d_op, add_1_op, add_2_op, relu_op] @@ -221,14 +213,14 @@ class TestConvElementwiseAdd2ActPass(PassAutoScanTest): "input_x": TensorConfig(shape=x_shape), "bias_2": TensorConfig(shape=bias_2_shape) }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=300, - passes=["conv_elementwise_add2_act_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=300, + passes=["conv_elementwise_add2_act_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py index 0d93ae9a7d2..f1d2192a4c7 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py @@ -81,9 +81,9 @@ class TestConvElementwiseAddActPass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of conv2d x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=100), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=100), + min_size=4, + max_size=4)) x_shape[1] = draw(st.integers(min_value=1, max_value=10)) # 2. Generate legal attr:data_format of conv2d @@ -91,9 +91,9 @@ class TestConvElementwiseAddActPass(PassAutoScanTest): # 3. Generate legal shape of input:Y of conv2d f_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=7), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=7), + min_size=4, + max_size=4)) if data_format == "NCHW": f_shape[1] = x_shape[1] else: @@ -101,37 +101,35 @@ class TestConvElementwiseAddActPass(PassAutoScanTest): # 4. Generate legal attr:strides of conv2d strides = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 5. Generate legal attr:padding_algorithm of conv2d padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VALID"])) # 6. Generate legal attr:padding of conv2d padding = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=4, + max_size=4)) # 7. Generate legal attr:groups of conv2d groups = draw(st.integers(min_value=1, max_value=3)) # 8. Generate legal attr:dilations of conv2d dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 9. Generate legal input:ResidualData of conv2d res_shape = [] if draw(st.booleans()): res_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=100), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=1, max_value=100), + min_size=4, + max_size=4)) # 10. Generate legal shape of input:bias of elementwise_add bias_shape = [f_shape[0]] @@ -139,29 +137,30 @@ class TestConvElementwiseAddActPass(PassAutoScanTest): # 11. Generate legal attr:axis of elementwise_add axis = 1 - conv2d_op = OpConfig( - "conv2d", - inputs={ - "Input": ["input_x"], - "Filter": ["filter"], - "ResidualData": ["residualdata"] - }, - outputs={"Output": ["conv2d_out"]}, - strides=strides, - padding_algorithm=padding_algorithm, - paddings=padding, - groups=groups, - dilations=dilations, - data_format=data_format) - add_op = OpConfig( - "elementwise_add", - inputs={"X": ["conv2d_out"], - "Y": ["bias"]}, - outputs={"Out": ["add_out"]}, - axis=axis) - - relu_op = OpConfig( - "relu", inputs={"X": ["add_out"]}, outputs={"Out": ["relu_out"]}) + conv2d_op = OpConfig("conv2d", + inputs={ + "Input": ["input_x"], + "Filter": ["filter"], + "ResidualData": ["residualdata"] + }, + outputs={"Output": ["conv2d_out"]}, + strides=strides, + padding_algorithm=padding_algorithm, + paddings=padding, + groups=groups, + dilations=dilations, + data_format=data_format) + add_op = OpConfig("elementwise_add", + inputs={ + "X": ["conv2d_out"], + "Y": ["bias"] + }, + outputs={"Out": ["add_out"]}, + axis=axis) + + relu_op = OpConfig("relu", + inputs={"X": ["add_out"]}, + outputs={"Out": ["relu_out"]}) ops = [conv2d_op, add_op, relu_op] @@ -175,14 +174,14 @@ class TestConvElementwiseAddActPass(PassAutoScanTest): "input_x": TensorConfig(shape=x_shape), "residualdata": TensorConfig(shape=res_shape) }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=400, - passes=["conv_elementwise_add_act_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=400, + passes=["conv_elementwise_add_act_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py index 0bcee474d13..5b33a18af8e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py @@ -26,10 +26,10 @@ import hypothesis.strategies as st class TestConvEltwiseAddFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['data_format'] == "NHWC" and attrs[1]['axis'] != 3: @@ -49,17 +49,17 @@ class TestConvEltwiseAddFusePass(PassAutoScanTest): out_channel = groups * out_channel_factor batch_size = draw(st.integers(min_value=1, max_value=4)) dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=2), + min_size=2, + max_size=2)) paddings = draw( - st.lists( - st.integers( - min_value=0, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=0, max_value=2), + min_size=2, + max_size=2)) strides = draw( - st.lists( - st.integers( - min_value=1, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=2), + min_size=2, + max_size=2)) x_shape = [ batch_size, in_channel, 64, 64 @@ -80,26 +80,26 @@ class TestConvEltwiseAddFusePass(PassAutoScanTest): def generate_scale_bias(): return np.random.random(bias_shape).astype(np.float32) - conv2d_op = OpConfig( - "conv2d", - inputs={ - "Input": ["input_data"], - "Filter": ["conv2d_weight"], - }, - outputs={"Output": ["conv_output"]}, - data_format=data_format, - dilations=dilations, - padding_algorithm=padding_algorithm, - groups=groups, - paddings=paddings, - strides=strides, - is_test=True) - eltwise_op = OpConfig( - "elementwise_add", - inputs={"X": ["conv_output"], - "Y": ["conv2d_bias"]}, - outputs={"Out": ["elementwise_output"]}, - axis=axis) + conv2d_op = OpConfig("conv2d", + inputs={ + "Input": ["input_data"], + "Filter": ["conv2d_weight"], + }, + outputs={"Output": ["conv_output"]}, + data_format=data_format, + dilations=dilations, + padding_algorithm=padding_algorithm, + groups=groups, + paddings=paddings, + strides=strides, + is_test=True) + eltwise_op = OpConfig("elementwise_add", + inputs={ + "X": ["conv_output"], + "Y": ["conv2d_bias"] + }, + outputs={"Out": ["elementwise_output"]}, + axis=axis) ops = [conv2d_op, eltwise_op] program_config = ProgramConfig( @@ -132,7 +132,7 @@ class TestConvEltwiseAddFusePass(PassAutoScanTest): yield config, ['conv2d_fusion'], (1e-4, 1e-4) def add_ignore_pass_case(self): - # If the problem has been fixed, the judgment + # If the problem has been fixed, the judgment # in is_program_valid needs to be deleted!!! def teller1(program_config, predictor_config): if program_config.ops[0].attrs['data_format'] == "NHWC": @@ -149,7 +149,8 @@ class TestConvEltwiseAddFusePass(PassAutoScanTest): def test(self): self.run_and_statis( quant=False, - passes=["conv_elementwise_add_fuse_pass"], ) + passes=["conv_elementwise_add_fuse_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_eltwiseadd_bn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_eltwiseadd_bn_fuse_pass.py index c8319a5f3d7..4463f954371 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_eltwiseadd_bn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_eltwiseadd_bn_fuse_pass.py @@ -107,11 +107,9 @@ class TestConvEltwiseaddBnFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of conv2d x_shape = draw( - st.lists( - st.integers( - min_value=10, max_value=100), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=10, max_value=100), + min_size=4, + max_size=4)) x_shape[1] = draw(st.integers(min_value=1, max_value=10)) # 2. Generate legal attr:data_format of conv2d @@ -119,9 +117,9 @@ class TestConvEltwiseaddBnFusePass(PassAutoScanTest): # 2. Generate legal shape of input:Y of conv2d f_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=7), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=7), + min_size=4, + max_size=4)) if data_format == "NCHW": f_shape[1] = x_shape[1] else: @@ -129,37 +127,35 @@ class TestConvEltwiseaddBnFusePass(PassAutoScanTest): # 3. Generate legal attr:strides of conv2d strides = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 4. Generate legal attr:padding_algorithm of conv2d padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VALID"])) # 5. Generate legal attr:padding of conv2d padding = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=4, + max_size=4)) # 6. Generate legal attr:groups of conv2d groups = draw(st.integers(min_value=1, max_value=3)) # 7. Generate legal attr:dilations of conv2d dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=5), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=5), + min_size=2, + max_size=2)) # 9. Generate legal input:ResidualData of conv2d res_shape = [] if draw(st.booleans()): res_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=100), - min_size=4, - max_size=4)) + st.lists(st.integers(min_value=1, max_value=100), + min_size=4, + max_size=4)) # 10. Generate legal shape of input:bias of elementwise_add bias_shape = [f_shape[0]] @@ -183,51 +179,51 @@ class TestConvEltwiseaddBnFusePass(PassAutoScanTest): epsilon = draw(st.floats(min_value=0.00001, max_value=0.001)) def generate_batch_variance(): - return (0.1 + (1.0 - 0.1) * np.random.random(bn_variance_shape) - ).astype(np.float32) - - conv2d_op = OpConfig( - "conv2d", - inputs={ - "Input": ["input_x"], - "Filter": ["filter"], - "ResidualData": ["residualdata"] - }, - outputs={"Output": ["conv2d_out"]}, - strides=strides, - padding_algorithm=padding_algorithm, - paddings=padding, - groups=groups, - dilations=dilations, - data_format=data_format) - add_op = OpConfig( - "elementwise_add", - inputs={"X": ["conv2d_out"], - "Y": ["bias"]}, - outputs={"Out": ["add_out"]}, - axis=axis) - - bn_op = OpConfig( - "batch_norm", - inputs={ - "X": ["add_out"], - "Scale": ["scale_in"], - "Bias": ["bias_in"], - "Mean": ["mean_in"], - "Variance": ["variance_in"] - }, - outputs={ - "Y": ["y_out"], - "MeanOut": ["mean_in"], - "VarianceOut": ["variance_in"], - "SavedMean": ["SavedMean_out"], - "SavedVariance": ["SavedVariance_out"], - "ReserveSpace": ["ReserveSpace_out"] - }, - epsilon=epsilon, - is_test=True, - trainable_statistics=False, - data_layout=data_format) + return (0.1 + + (1.0 - 0.1) * np.random.random(bn_variance_shape)).astype( + np.float32) + + conv2d_op = OpConfig("conv2d", + inputs={ + "Input": ["input_x"], + "Filter": ["filter"], + "ResidualData": ["residualdata"] + }, + outputs={"Output": ["conv2d_out"]}, + strides=strides, + padding_algorithm=padding_algorithm, + paddings=padding, + groups=groups, + dilations=dilations, + data_format=data_format) + add_op = OpConfig("elementwise_add", + inputs={ + "X": ["conv2d_out"], + "Y": ["bias"] + }, + outputs={"Out": ["add_out"]}, + axis=axis) + + bn_op = OpConfig("batch_norm", + inputs={ + "X": ["add_out"], + "Scale": ["scale_in"], + "Bias": ["bias_in"], + "Mean": ["mean_in"], + "Variance": ["variance_in"] + }, + outputs={ + "Y": ["y_out"], + "MeanOut": ["mean_in"], + "VarianceOut": ["variance_in"], + "SavedMean": ["SavedMean_out"], + "SavedVariance": ["SavedVariance_out"], + "ReserveSpace": ["ReserveSpace_out"] + }, + epsilon=epsilon, + is_test=True, + trainable_statistics=False, + data_layout=data_format) ops = [conv2d_op, add_op, bn_op] @@ -255,10 +251,9 @@ class TestConvEltwiseaddBnFusePass(PassAutoScanTest): return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=300, - passes=["conv_eltwiseadd_bn_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=300, + passes=["conv_eltwiseadd_bn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_bn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_bn_fuse_pass.py index 62515fc2177..6ecfa50d653 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_bn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_bn_fuse_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -40,11 +40,10 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): ''' def test(self): - self.run_and_statis( - quant=False, - max_examples=150, - max_duration=250, - passes=["conv_transpose_bn_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=150, + max_duration=250, + passes=["conv_transpose_bn_fuse_pass"]) def sample_program_config(self, draw): # generate random number @@ -54,26 +53,26 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): random_input_dim2 = draw(st.integers(min_value=20, max_value=50)) random_groups = draw(st.integers(min_value=1, max_value=2)) random_dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=3), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=3), + min_size=2, + max_size=2)) random_strides = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) random_paddings = draw( - st.lists( - st.integers( - min_value=0, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=0, max_value=4), + min_size=2, + max_size=2)) random_padding_algorithm = draw( st.sampled_from(["EXPLICIT", "SAME", "VALID"])) random_data_layout = draw(st.sampled_from(["NCHW", "NHWC"])) random_use_mkldnn = draw(st.booleans()) random_output_size = [] random_filter = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) random_out_channel = draw(st.integers(min_value=10, max_value=25)) random_epsilon = draw(st.floats(min_value=0.0, max_value=0.001)) @@ -94,24 +93,24 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): return np.random.random(shape).astype(np.float32) def generate_batch_norm_Scale(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Bias(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Mean(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Variance(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) # define op conv2d_op = OpConfig( @@ -121,7 +120,9 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): "Filter": ["conv2d_Filter"], #"Bias": ["conv2d_Bias"], }, - outputs={"Output": ["conv2d_Out"], }, + outputs={ + "Output": ["conv2d_Out"], + }, attrs={ 'groups': random_groups, 'dilations': random_dilations, @@ -135,30 +136,31 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): 'is_test': True, }) - batch_norm_op = OpConfig( - type="batch_norm", - inputs={ - "X": ["conv2d_Out"], - "Scale": ["batch_norm_Scale"], - "Bias": ["batch_norm_Bias"], - "Mean": ["batch_norm_Mean"], - "Variance": ["batch_norm_Variance"], - }, - outputs={ - "Y": ["batch_norm_Y"], - "MeanOut": ["batch_norm_Mean"], - "VarianceOut": ["batch_norm_Variance"], - "SavedMean": ["batch_norm_SavedMean"], - "SavedVariance": ["batch_norm_SavedVariance"], - "ReserveSpace": ["batch_norm_ReserveSpace"], - }, - attrs={ - 'epsilon': random_epsilon, - 'is_test': True, - 'trainable_statistics': False, - 'data_layout': random_data_layout, - 'use_mkldnn': random_use_mkldnn, - }) + batch_norm_op = OpConfig(type="batch_norm", + inputs={ + "X": ["conv2d_Out"], + "Scale": ["batch_norm_Scale"], + "Bias": ["batch_norm_Bias"], + "Mean": ["batch_norm_Mean"], + "Variance": ["batch_norm_Variance"], + }, + outputs={ + "Y": ["batch_norm_Y"], + "MeanOut": ["batch_norm_Mean"], + "VarianceOut": ["batch_norm_Variance"], + "SavedMean": ["batch_norm_SavedMean"], + "SavedVariance": + ["batch_norm_SavedVariance"], + "ReserveSpace": + ["batch_norm_ReserveSpace"], + }, + attrs={ + 'epsilon': random_epsilon, + 'is_test': True, + 'trainable_statistics': False, + 'data_layout': random_data_layout, + 'use_mkldnn': random_use_mkldnn, + }) # define model_net model_net = [conv2d_op, batch_norm_op] @@ -169,7 +171,8 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): "conv2d_Input": TensorConfig(data_gen=generate_conv2d_Input), }, weights={ - "conv2d_Filter": TensorConfig(data_gen=generate_conv2d_Filter), + "conv2d_Filter": + TensorConfig(data_gen=generate_conv2d_Filter), "batch_norm_Scale": TensorConfig(data_gen=generate_batch_norm_Scale), "batch_norm_Bias": @@ -195,8 +198,7 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['data_format'] == "NHWC": @@ -205,6 +207,7 @@ class TestConvTransposeBnFusePass(PassAutoScanTest): return True def add_ignore_pass_case(self): + def teller1(program_config, predictor_config): if program_config.ops[0].attrs['data_format'] == "NHWC": return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_eltwiseadd_bn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_eltwiseadd_bn_fuse_pass.py index 58ae05183a4..29099b9a7a5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_eltwiseadd_bn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_transpose_eltwiseadd_bn_fuse_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -44,11 +44,10 @@ class TestConvTransposeEltwiseaddBnFusePass(PassAutoScanTest): ''' def test(self): - self.run_and_statis( - quant=False, - max_examples=150, - max_duration=250, - passes=["conv_transpose_eltwiseadd_bn_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=150, + max_duration=250, + passes=["conv_transpose_eltwiseadd_bn_fuse_pass"]) def sample_program_config(self, draw): # generate random number @@ -58,26 +57,26 @@ class TestConvTransposeEltwiseaddBnFusePass(PassAutoScanTest): random_input_dim2 = draw(st.integers(min_value=20, max_value=50)) random_groups = draw(st.integers(min_value=1, max_value=2)) random_dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=3), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=3), + min_size=2, + max_size=2)) random_strides = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) random_paddings = draw( - st.lists( - st.integers( - min_value=0, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=0, max_value=4), + min_size=2, + max_size=2)) random_padding_algorithm = draw( st.sampled_from(["EXPLICIT", "SAME", "VALID"])) random_data_layout = draw(st.sampled_from(["NCHW", "NHWC"])) random_use_mkldnn = draw(st.booleans()) random_output_size = [] random_filter = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) random_out_channel = draw(st.integers(min_value=20, max_value=25)) random_epsilon = draw(st.floats(min_value=0.0, max_value=0.001)) @@ -98,84 +97,89 @@ class TestConvTransposeEltwiseaddBnFusePass(PassAutoScanTest): return np.random.random(shape).astype(np.float32) def generate_elementwise_add_Y(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Scale(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Bias(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Mean(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) def generate_batch_norm_Variance(): - return np.random.random( - [random_out_channel * random_groups * random_groups]).astype( - np.float32) + return np.random.random([ + random_out_channel * random_groups * random_groups + ]).astype(np.float32) # define op - conv2d_op = OpConfig( - type="conv2d_transpose", - inputs={ - "Input": ["conv2d_Input"], - "Filter": ["conv2d_Filter"], - }, - outputs={"Output": ["conv2d_Out"], }, - attrs={ - 'groups': random_groups, - 'dilations': random_dilations, - 'strides': random_strides, - 'paddings': random_paddings, - 'padding_algorithm': random_padding_algorithm, - 'data_format': random_data_layout, - 'output_size': random_output_size, - 'output_padding': random_output_size, - 'use_mkldnn': random_use_mkldnn, - 'is_test': True, - }) - - elementwise_op = OpConfig( - type="elementwise_add", - inputs={ - "X": ["conv2d_Out"], - "Y": ["elementwise_add_Y"], - }, - outputs={"Out": ["elementwise_add_Out"], }, - attrs={'axis': 1, }) - - batch_norm_op = OpConfig( - type="batch_norm", - inputs={ - "X": ["elementwise_add_Out"], - "Scale": ["batch_norm_Scale"], - "Bias": ["batch_norm_Bias"], - "Mean": ["batch_norm_Mean"], - "Variance": ["batch_norm_Variance"], - }, - outputs={ - "Y": ["batch_norm_Y"], - "MeanOut": ["batch_norm_Mean"], - "VarianceOut": ["batch_norm_Variance"], - "SavedMean": ["batch_norm_SavedMean"], - "SavedVariance": ["batch_norm_SavedVariance"], - "ReserveSpace": ["batch_norm_ReserveSpace"], - }, - attrs={ - 'epsilon': random_epsilon, - 'is_test': True, - 'trainable_statistics': False, - 'data_layout': random_data_layout, - 'use_mkldnn': random_use_mkldnn, - }) + conv2d_op = OpConfig(type="conv2d_transpose", + inputs={ + "Input": ["conv2d_Input"], + "Filter": ["conv2d_Filter"], + }, + outputs={ + "Output": ["conv2d_Out"], + }, + attrs={ + 'groups': random_groups, + 'dilations': random_dilations, + 'strides': random_strides, + 'paddings': random_paddings, + 'padding_algorithm': random_padding_algorithm, + 'data_format': random_data_layout, + 'output_size': random_output_size, + 'output_padding': random_output_size, + 'use_mkldnn': random_use_mkldnn, + 'is_test': True, + }) + + elementwise_op = OpConfig(type="elementwise_add", + inputs={ + "X": ["conv2d_Out"], + "Y": ["elementwise_add_Y"], + }, + outputs={ + "Out": ["elementwise_add_Out"], + }, + attrs={ + 'axis': 1, + }) + + batch_norm_op = OpConfig(type="batch_norm", + inputs={ + "X": ["elementwise_add_Out"], + "Scale": ["batch_norm_Scale"], + "Bias": ["batch_norm_Bias"], + "Mean": ["batch_norm_Mean"], + "Variance": ["batch_norm_Variance"], + }, + outputs={ + "Y": ["batch_norm_Y"], + "MeanOut": ["batch_norm_Mean"], + "VarianceOut": ["batch_norm_Variance"], + "SavedMean": ["batch_norm_SavedMean"], + "SavedVariance": + ["batch_norm_SavedVariance"], + "ReserveSpace": + ["batch_norm_ReserveSpace"], + }, + attrs={ + 'epsilon': random_epsilon, + 'is_test': True, + 'trainable_statistics': False, + 'data_layout': random_data_layout, + 'use_mkldnn': random_use_mkldnn, + }) # define model_net model_net = [conv2d_op, elementwise_op, batch_norm_op] @@ -187,7 +191,8 @@ class TestConvTransposeEltwiseaddBnFusePass(PassAutoScanTest): "conv2d_Input": TensorConfig(data_gen=generate_conv2d_Input), }, weights={ - "conv2d_Filter": TensorConfig(data_gen=generate_conv2d_Filter), + "conv2d_Filter": + TensorConfig(data_gen=generate_conv2d_Filter), "elementwise_add_Y": TensorConfig(data_gen=generate_elementwise_add_Y), "batch_norm_Scale": @@ -215,8 +220,7 @@ class TestConvTransposeEltwiseaddBnFusePass(PassAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['data_format'] == "NHWC": @@ -225,6 +229,7 @@ class TestConvTransposeEltwiseaddBnFusePass(PassAutoScanTest): return True def add_ignore_pass_case(self): + def teller1(program_config, predictor_config): if program_config.ops[0].attrs['data_format'] == "NHWC": return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py index 7379a8d333b..8001c76816e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_emb_eltwise_layernorm_fuse_pass.py @@ -57,8 +57,8 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): if program_config.ops[3].attrs['axis'] not in [-1, 2]: return False - if not (program_config.ops[5].attrs['epsilon'] >= 0 and - program_config.ops[5].attrs['epsilon'] <= 0.001): + if not (program_config.ops[5].attrs['epsilon'] >= 0 + and program_config.ops[5].attrs['epsilon'] <= 0.001): return False if program_config.ops[5].attrs['begin_norm_axis'] != 2: @@ -86,17 +86,17 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): def generate_input(attrs): if attrs[0]['op_type'] == 'lookup_table': - return np.random.randint( - 0, - attrs[3]['weight_size'][0], - size=(attrs[3]['batch_size'], attrs[3]['input_dim'], - 1)).astype(np.int64) + return np.random.randint(0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], + attrs[3]['input_dim'], + 1)).astype(np.int64) else: - return np.random.randint( - 0, - attrs[3]['weight_size'][0], - size=(attrs[3]['batch_size'], - attrs[3]['input_dim'])).astype(np.int64) + return np.random.randint(0, + attrs[3]['weight_size'][0], + size=(attrs[3]['batch_size'], + attrs[3]['input_dim'])).astype( + np.int64) def generate_weight1(attrs): # set embedding weight by attrs @@ -105,9 +105,9 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): def generate_weight2(attrs): # set layernorm weight by attrs if attrs[2]['begin_norm_axis'] == 1: - return np.random.random( - attrs[3]['input_dim'] * - attrs[3]['weight_size'][1]).astype(np.float32) + return np.random.random(attrs[3]['input_dim'] * + attrs[3]['weight_size'][1]).astype( + np.float32) else: return np.random.random(attrs[3]['weight_size'][1]).astype( np.float32) @@ -128,68 +128,69 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): 'weight_size': weight_size }] - emb_op1 = OpConfig( - type=attrs[0]['op_type'], - inputs={"Ids": ["input_data1"], - "W": ["embedding_weight1"]}, - outputs={"Out": ["embedding_output1"]}, - attrs={ - 'is_sparse': attrs[0]['is_sparse'], - 'is_distributed': attrs[0]['is_distributed'], - 'padding_idx': attrs[0]['padding_idx'] - }) - emb_op2 = OpConfig( - type=attrs[0]['op_type'], - inputs={"Ids": ["input_data2"], - "W": ["embedding_weight2"]}, - outputs={"Out": ["embedding_output2"]}, - attrs={ - 'is_sparse': attrs[0]['is_sparse'], - 'is_distributed': attrs[0]['is_distributed'], - 'padding_idx': attrs[0]['padding_idx'] - }) - emb_op3 = OpConfig( - type=attrs[0]['op_type'], - inputs={"Ids": ["input_data3"], - "W": ["embedding_weight3"]}, - outputs={"Out": ["embedding_output3"]}, - attrs={ - 'is_sparse': attrs[0]['is_sparse'], - 'is_distributed': attrs[0]['is_distributed'], - 'padding_idx': attrs[0]['padding_idx'] - }) - add_op1 = OpConfig( - type='elementwise_add', - inputs={ - "X": [emb_op2.outputs["Out"][0]], - "Y": [emb_op3.outputs["Out"][0]], - }, - outputs={"Out": ["elementwise_add_output1"]}, - attrs={"axis": attrs[1]['axis']}) - add_op2 = OpConfig( - type='elementwise_add', - inputs={ - "X": [add_op1.outputs["Out"][0]], - "Y": [emb_op1.outputs["Out"][0]], - }, - outputs={"Out": ["elementwise_add_output2"]}, - attrs={"axis": attrs[1]['axis']}) - layer_norm_op = OpConfig( - type='layer_norm', - inputs={ - "X": [add_op2.outputs["Out"][0]], - "Bias": ["layer_norm_bias"], - "Scale": ["layer_norm_scale"] - }, - outputs={ - "Y": ["layer_norm_output1"], - "Mean": ["layer_norm_output2"], - "Variance": ["layer_norm_output3"] - }, - attrs={ - 'begin_norm_axis': attrs[2]['begin_norm_axis'], - 'epsilon': attrs[2]['epsilon'] - }) + emb_op1 = OpConfig(type=attrs[0]['op_type'], + inputs={ + "Ids": ["input_data1"], + "W": ["embedding_weight1"] + }, + outputs={"Out": ["embedding_output1"]}, + attrs={ + 'is_sparse': attrs[0]['is_sparse'], + 'is_distributed': attrs[0]['is_distributed'], + 'padding_idx': attrs[0]['padding_idx'] + }) + emb_op2 = OpConfig(type=attrs[0]['op_type'], + inputs={ + "Ids": ["input_data2"], + "W": ["embedding_weight2"] + }, + outputs={"Out": ["embedding_output2"]}, + attrs={ + 'is_sparse': attrs[0]['is_sparse'], + 'is_distributed': attrs[0]['is_distributed'], + 'padding_idx': attrs[0]['padding_idx'] + }) + emb_op3 = OpConfig(type=attrs[0]['op_type'], + inputs={ + "Ids": ["input_data3"], + "W": ["embedding_weight3"] + }, + outputs={"Out": ["embedding_output3"]}, + attrs={ + 'is_sparse': attrs[0]['is_sparse'], + 'is_distributed': attrs[0]['is_distributed'], + 'padding_idx': attrs[0]['padding_idx'] + }) + add_op1 = OpConfig(type='elementwise_add', + inputs={ + "X": [emb_op2.outputs["Out"][0]], + "Y": [emb_op3.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output1"]}, + attrs={"axis": attrs[1]['axis']}) + add_op2 = OpConfig(type='elementwise_add', + inputs={ + "X": [add_op1.outputs["Out"][0]], + "Y": [emb_op1.outputs["Out"][0]], + }, + outputs={"Out": ["elementwise_add_output2"]}, + attrs={"axis": attrs[1]['axis']}) + layer_norm_op = OpConfig(type='layer_norm', + inputs={ + "X": [add_op2.outputs["Out"][0]], + "Bias": ["layer_norm_bias"], + "Scale": ["layer_norm_scale"] + }, + outputs={ + "Y": ["layer_norm_output1"], + "Mean": ["layer_norm_output2"], + "Variance": ["layer_norm_output3"] + }, + attrs={ + 'begin_norm_axis': + attrs[2]['begin_norm_axis'], + 'epsilon': attrs[2]['epsilon'] + }) program_config = ProgramConfig( ops=[emb_op1, emb_op2, emb_op3, add_op1, add_op2, layer_norm_op], @@ -241,36 +242,39 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): use_static=False, use_calib_mode=False) if program_config.ops[0].type == 'lookup_table': - config.set_trt_dynamic_shape_info({ - "input_data1": [1, 4, 1], - "input_data2": [1, 4, 1], - "input_data3": [1, 4, 1] - }, { - "input_data1": [4, 512, 1], - "input_data2": [4, 512, 1], - "input_data3": [4, 512, 1] - }, { - "input_data1": [2, 128, 1], - "input_data2": [2, 128, 1], - "input_data3": [2, 128, 1] - }) + config.set_trt_dynamic_shape_info( + { + "input_data1": [1, 4, 1], + "input_data2": [1, 4, 1], + "input_data3": [1, 4, 1] + }, { + "input_data1": [4, 512, 1], + "input_data2": [4, 512, 1], + "input_data3": [4, 512, 1] + }, { + "input_data1": [2, 128, 1], + "input_data2": [2, 128, 1], + "input_data3": [2, 128, 1] + }) else: - config.set_trt_dynamic_shape_info({ - "input_data1": [1, 4], - "input_data2": [1, 4], - "input_data3": [1, 4] - }, { - "input_data1": [4, 512], - "input_data2": [4, 512], - "input_data3": [4, 512] - }, { - "input_data1": [2, 128], - "input_data2": [2, 128], - "input_data3": [2, 128] - }) + config.set_trt_dynamic_shape_info( + { + "input_data1": [1, 4], + "input_data2": [1, 4], + "input_data3": [1, 4] + }, { + "input_data1": [4, 512], + "input_data2": [4, 512], + "input_data3": [4, 512] + }, { + "input_data1": [2, 128], + "input_data2": [2, 128], + "input_data3": [2, 128] + }) yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5) def add_ignore_pass_case(self): + def teller1(program_config, predictor_config): if program_config.ops[3].attrs['axis'] in [ -1, 2 @@ -287,11 +291,10 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest): def test(self): # this fuse need to fix, now there's no program can ran successfully - self.run_and_statis( - quant=False, - max_examples=50, - passes=["embedding_eltwise_layernorm_fuse_pass"], - min_success_num=0) + self.run_and_statis(quant=False, + max_examples=50, + passes=["embedding_eltwise_layernorm_fuse_pass"], + min_success_num=0) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py index 26f91092d2a..c6be25f9ff0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py @@ -48,66 +48,74 @@ class TestFCElementwiseLayerNormFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of fc x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=5)) x_shape = [2, 1] x_rank = len(x_shape) # 2. Generate attr:in_num_col_dims of fc in_num_col_dims = draw(st.integers(min_value=1, max_value=x_rank - 1)) # 3. Generate legal shape of input:W/bias of fc w_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) w_shape[0] = int(np.prod(x_shape[in_num_col_dims:])) w_shape = [1, 2] - fc_bias_shape = [w_shape[1], ] + fc_bias_shape = [ + w_shape[1], + ] if draw(st.booleans()): fc_bias_shape.insert(0, 1) - fc_bias_shape = [2, ] + fc_bias_shape = [ + 2, + ] fc_out_shape = x_shape[:in_num_col_dims] + w_shape[1:] # 4. Generate legal attr:axis/shape of elementwise_add add_bias_shape = fc_out_shape[:] axis = draw(st.integers(min_value=-1, max_value=0)) # 5. Generate legal shape of layer_norm begin_norm_axis = draw( - st.integers( - min_value=1, max_value=len(fc_out_shape) - 1)) + st.integers(min_value=1, max_value=len(fc_out_shape) - 1)) layer_norm_shape = [int(np.prod(fc_out_shape[begin_norm_axis:]))] epsilon = 1e-5 fc_op = OpConfig( "fc", - inputs={"Input": ["fc_x"], - "W": ["fc_w"], - "Bias": ["fc_bias"]}, + inputs={ + "Input": ["fc_x"], + "W": ["fc_w"], + "Bias": ["fc_bias"] + }, outputs={"Out": ["fc_out"]}, in_num_col_dims=in_num_col_dims, padding_weights=False, activation_type="", use_quantizer=False, - use_mkldnn=False, ) + use_mkldnn=False, + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["fc_out"], - "Y": ["add_bias"]}, - outputs={"Out": ["add_out"]}, - axis=axis, ) - layer_norm_op = OpConfig( - "layer_norm", inputs={ - "X": ["add_out"], - "Scale": ["scale"], - "Bias": ["layer_norm_bias"] - }, - outputs={ - "Y": ["layer_norm_out"], - "Mean": ["layer_norm_mean"], - "Variance": ["layer_norm_var"] + "X": ["fc_out"], + "Y": ["add_bias"] }, - begin_norm_axis=begin_norm_axis, - epsilon=epsilon) + outputs={"Out": ["add_out"]}, + axis=axis, + ) + layer_norm_op = OpConfig("layer_norm", + inputs={ + "X": ["add_out"], + "Scale": ["scale"], + "Bias": ["layer_norm_bias"] + }, + outputs={ + "Y": ["layer_norm_out"], + "Mean": ["layer_norm_mean"], + "Variance": ["layer_norm_var"] + }, + begin_norm_axis=begin_norm_axis, + epsilon=epsilon) ops = [fc_op, add_op, layer_norm_op] program_config = ProgramConfig( @@ -119,15 +127,17 @@ class TestFCElementwiseLayerNormFusePass(PassAutoScanTest): "scale": TensorConfig(shape=layer_norm_shape), "layer_norm_bias": TensorConfig(shape=layer_norm_shape), }, - inputs={"fc_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Y"], ) + inputs={ + "fc_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Y"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=300, - passes=["fc_elementwise_layernorm_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=300, + passes=["fc_elementwise_layernorm_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py index dccc29e75f0..86262aaee10 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py @@ -106,19 +106,18 @@ class TestFcFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of mul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=4)) # 2. Generate attr:x_num_col_dims/y_num_col_dims of mul x_num_col_dims = draw( - st.integers( - min_value=1, max_value=len(x_shape) - 1)) + st.integers(min_value=1, max_value=len(x_shape) - 1)) y_num_col_dims = 1 # 3. Generate legal shape of input:Y of mul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = int(np.prod(x_shape[x_num_col_dims:])) # 4. Generate legal attr:axis of elementwise_add mul_out_shape = x_shape[:x_num_col_dims] + y_shape[1:] @@ -131,8 +130,7 @@ class TestFcFusePass(PassAutoScanTest): else: max_bias_rank = 1 bias_rank = draw( - st.integers( - min_value=1, max_value=len(mul_out_shape))) + st.integers(min_value=1, max_value=len(mul_out_shape))) bias_shape = mul_out_shape[-1 * bias_rank:] # 6. Random choose if use broadcast for elementwise_add, e.g [3, 4] -> [1, 4] if draw(st.booleans()): @@ -153,23 +151,28 @@ class TestFcFusePass(PassAutoScanTest): # Use function `add_skip_pass_case` to ignore the programs even if they cause bug while runing mul_op = OpConfig( "mul", - inputs={"X": ["mul_x"], - "Y": ["mul_y"]}, + inputs={ + "X": ["mul_x"], + "Y": ["mul_y"] + }, outputs={"Out": ["mul_out"]}, x_num_col_dims=x_num_col_dims, - y_num_col_dims=y_num_col_dims, ) + y_num_col_dims=y_num_col_dims, + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["mul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["mul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [mul_op, add_op] if has_relu: - relu_op = OpConfig( - "relu", - inputs={"X": ["add_out"]}, - outputs={"Out": ["relu_out"]}) + relu_op = OpConfig("relu", + inputs={"X": ["add_out"]}, + outputs={"Out": ["relu_out"]}) ops.append(relu_op) program_config = ProgramConfig( ops=ops, @@ -177,13 +180,17 @@ class TestFcFusePass(PassAutoScanTest): "mul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"mul_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "mul_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, max_examples=500, passes=["fc_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=500, + passes=["fc_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py index f7b43470d40..3da1516e974 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py @@ -21,26 +21,28 @@ from paddle.fluid.core import PassVersionChecker class FcGruFusePassTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): dict_dim, emb_dim = 128, 64 - data = fluid.data( - name='step_data', shape=[None], dtype='int64', lod_level=1) + data = fluid.data(name='step_data', + shape=[None], + dtype='int64', + lod_level=1) emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim = 512 x = fluid.layers.fc(input=emb, size=hidden_dim * 3) - hidden = fluid.layers.dynamic_gru( - input=x, - size=hidden_dim, - bias_attr=True, - origin_mode=False, - is_reverse=True) + hidden = fluid.layers.dynamic_gru(input=x, + size=hidden_dim, + bias_attr=True, + origin_mode=False, + is_reverse=True) batch = 16 lod_tensor = fluid.LoDTensor() - lod_tensor.set(np.random.randint( - 0, dict_dim, size=[batch]).astype("int64"), - fluid.CPUPlace()) + lod_tensor.set( + np.random.randint(0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) lod_tensor.set_lod([[0, batch]]) self.feeds = {"step_data": lod_tensor} self.fetch_list = [hidden] @@ -52,26 +54,28 @@ class FcGruFusePassTest(InferencePassTest): class MulGruFusePassTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): dict_dim, emb_dim = 128, 64 - data = fluid.data( - name='step_data', shape=[None], dtype='int64', lod_level=1) + data = fluid.data(name='step_data', + shape=[None], + dtype='int64', + lod_level=1) emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim = 512 x = fluid.layers.fc(input=emb, size=hidden_dim * 3, bias_attr=False) - hidden = fluid.layers.dynamic_gru( - input=x, - size=hidden_dim, - bias_attr=True, - origin_mode=False, - is_reverse=True) + hidden = fluid.layers.dynamic_gru(input=x, + size=hidden_dim, + bias_attr=True, + origin_mode=False, + is_reverse=True) batch = 16 lod_tensor = fluid.LoDTensor() - lod_tensor.set(np.random.randint( - 0, dict_dim, size=[batch]).astype("int64"), - fluid.CPUPlace()) + lod_tensor.set( + np.random.randint(0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) lod_tensor.set_lod([[0, batch]]) self.feeds = {"step_data": lod_tensor} self.fetch_list = [hidden] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py index fbb4373dae2..4db2c5f96cd 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py @@ -21,23 +21,26 @@ from paddle.fluid.core import PassVersionChecker class MulLstmFusePassTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): dict_dim, emb_dim = 128, 64 hidden_dim = 512 - data = fluid.data( - name='data', shape=[1], dtype='int64', lod_level=1) + data = fluid.data(name='data', + shape=[1], + dtype='int64', + lod_level=1) emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) x = fluid.layers.fc(input=emb, size=hidden_dim * 4, bias_attr=False) - forward, cell = fluid.layers.dynamic_lstm( - input=x, size=hidden_dim * 4) + forward, cell = fluid.layers.dynamic_lstm(input=x, + size=hidden_dim * 4) batch = 16 lod_tensor = fluid.LoDTensor() - lod_tensor.set(np.random.randint( - 0, dict_dim, size=[batch]).astype("int64"), - fluid.CPUPlace()) + lod_tensor.set( + np.random.randint(0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) lod_tensor.set_lod([[0, batch]]) self.feeds = {"data": lod_tensor} self.fetch_list = [forward, cell] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py index ba99ac306c7..181ed89c65e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_flatten2_matmul_fuse_pass.py @@ -50,9 +50,9 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape and attr of flatten2 x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=10), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=10), + min_size=4, + max_size=4)) # [a, b, c, d] => [a, b*c*d] flatten_axis = 1 flatten_shape = [x_shape[0], x_shape[1] * x_shape[2] * x_shape[3]] @@ -64,15 +64,17 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of matmul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = flatten_shape[1] # 4. Generate legal attr:axis of elementwise_add axis = draw(st.integers(min_value=-1, max_value=1)) if axis == 0: - bias_shape = [flatten_shape[0], ] + bias_shape = [ + flatten_shape[0], + ] elif axis == 1: bias_shape = [y_shape[1]] else: @@ -82,14 +84,21 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): flatten2_op = OpConfig( "flatten2", - inputs={"X": ["flatten2_x"], }, + inputs={ + "X": ["flatten2_x"], + }, axis=flatten_axis, - outputs={"Out": ["flatten2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["flatten2_out"], + "XShape": ["xshape"] + }, + ) matmul_op = OpConfig( "matmul", - inputs={"X": ["flatten2_out"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["flatten2_out"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -99,14 +108,18 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["matmul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["matmul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [flatten2_op, matmul_op, add_op] @@ -117,8 +130,11 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"flatten2_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "flatten2_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) else: program_config = ProgramConfig( ops=ops, @@ -128,15 +144,15 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=50, - max_duration=1000, - passes=["gpu_cpu_flatten2_matmul_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=50, + max_duration=1000, + passes=["gpu_cpu_flatten2_matmul_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_identity_scale_clean_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_identity_scale_clean_pass.py index 8cacb6d29af..f7265193a85 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_identity_scale_clean_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_identity_scale_clean_pass.py @@ -21,6 +21,7 @@ import hypothesis.strategies as st class TestIdentityScaleCleanPass(PassAutoScanTest): + def sample_predictor_configs(self, program_config): config = self.create_trt_inference_config() config.enable_tensorrt_engine( @@ -39,15 +40,15 @@ class TestIdentityScaleCleanPass(PassAutoScanTest): h = draw(st.integers(min_value=1, max_value=20)) w = draw(st.integers(min_value=1, max_value=20)) - relu_op = OpConfig( - "relu", inputs={"X": ["relu_x"]}, outputs={"Out": ["relu_out"]}) - scale_op = OpConfig( - "scale", - inputs={"X": ["relu_out"]}, - outputs={"Out": ["scale_out"]}, - bias=0., - scale=1., - bias_after_scale=True) + relu_op = OpConfig("relu", + inputs={"X": ["relu_x"]}, + outputs={"Out": ["relu_out"]}) + scale_op = OpConfig("scale", + inputs={"X": ["relu_out"]}, + outputs={"Out": ["scale_out"]}, + bias=0., + scale=1., + bias_after_scale=True) program_config = ProgramConfig( ops=[relu_op, scale_op], weights={}, @@ -56,8 +57,8 @@ class TestIdentityScaleCleanPass(PassAutoScanTest): return program_config def test(self): - self.run_and_statis( - max_examples=25, passes=["identity_scale_op_clean_pass"]) + self.run_and_statis(max_examples=25, + passes=["identity_scale_op_clean_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py index 7409bf17f3c..a56ce98b37a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py @@ -75,21 +75,21 @@ class TestFcFusePass(PassAutoScanTest): self.add_ignore_check_case( teller1, IgnoreReasons.PASS_ACCURACY_ERROR, - "Use bad case to test pass.", ) + "Use bad case to test pass.", + ) def sample_program_config(self, draw): - # 1. Generate shape of input:X + # 1. Generate shape of input:X x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=4, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=4, + max_size=5)) x_shape_rank = len(x_shape) # 2. Generate attrs of reduce_mean keep_dim = draw(st.booleans()) reduce_all = False begin_norm_axis = draw( - st.integers( - min_value=1, max_value=x_shape_rank - 1)) + st.integers(min_value=1, max_value=x_shape_rank - 1)) if begin_norm_axis == x_shape_rank - 1 and draw(st.booleans()): reduce_mean_dim = [-1] else: @@ -98,7 +98,9 @@ class TestFcFusePass(PassAutoScanTest): error_test_ratio = draw(st.integers(min_value=1, max_value=10)) if error_test_ratio > 9: keep_dim = True - reduce_mean_dim = [1, ] + reduce_mean_dim = [ + 1, + ] elif error_test_ratio > 8: keep_dim = True begin_norm_axis = 1 @@ -111,20 +113,22 @@ class TestFcFusePass(PassAutoScanTest): pow_axis = -1 def generate_pow_data(): - return np.array([2, ], dtype="float32") + return np.array([ + 2, + ], dtype="float32") # 5. Generate attrs of elementwise_add if keep_dim: add_axis = draw( - st.integers( - min_value=-1, max_value=x_shape_rank - 1)) + st.integers(min_value=-1, max_value=x_shape_rank - 1)) else: add_axis = draw( - st.integers( - min_value=-1, max_value=begin_norm_axis - 1)) + st.integers(min_value=-1, max_value=begin_norm_axis - 1)) def generate_epsilon_data(): - return np.array([1e-5, ], dtype="float32") + return np.array([ + 1e-5, + ], dtype="float32") # 6. Generate attrs of elementwise_div div_axis = 0 @@ -142,58 +146,85 @@ class TestFcFusePass(PassAutoScanTest): mean_op1 = OpConfig( "reduce_mean", - inputs={"X": ["x"], }, + inputs={ + "X": ["x"], + }, outputs={"Out": ["mean_out"]}, dim=reduce_mean_dim, keep_dim=keep_dim, - reduce_all=reduce_all, ) + reduce_all=reduce_all, + ) sub_op = OpConfig( "elementwise_sub", - inputs={"X": ["x"], - "Y": ["mean_out"]}, + inputs={ + "X": ["x"], + "Y": ["mean_out"] + }, outputs={"Out": ["sub_out"]}, - axis=sub_axis, ) + axis=sub_axis, + ) pow_op = OpConfig( "elementwise_pow", - inputs={"X": ["sub_out"], - "Y": ["pow_y"]}, + inputs={ + "X": ["sub_out"], + "Y": ["pow_y"] + }, outputs={"Out": ["pow_out"]}, - axis=pow_axis, ) + axis=pow_axis, + ) mean_op2 = OpConfig( "reduce_mean", - inputs={"X": ["pow_out"], }, + inputs={ + "X": ["pow_out"], + }, outputs={"Out": ["mean_out2"]}, dim=reduce_mean_dim, keep_dim=keep_dim, - reduce_all=reduce_all, ) + reduce_all=reduce_all, + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["mean_out2"], - "Y": ["epsilon_var"]}, + inputs={ + "X": ["mean_out2"], + "Y": ["epsilon_var"] + }, outputs={"Out": ["add_out"]}, - axis=add_axis, ) + axis=add_axis, + ) sqrt_op = OpConfig( "sqrt", - inputs={"X": ["add_out"], }, - outputs={"Out": ["sqrt_out"]}, ) + inputs={ + "X": ["add_out"], + }, + outputs={"Out": ["sqrt_out"]}, + ) div_op = OpConfig( "elementwise_div", - inputs={"X": ["sub_out"], - "Y": ["sqrt_out"]}, + inputs={ + "X": ["sub_out"], + "Y": ["sqrt_out"] + }, outputs={"Out": ["div_out"]}, - axis=div_axis, ) + axis=div_axis, + ) mul_op = OpConfig( "elementwise_mul", - inputs={"X": ["div_out"], - "Y": ["gamma_var"]}, + inputs={ + "X": ["div_out"], + "Y": ["gamma_var"] + }, outputs={"Out": ["mul_out"]}, - axis=mul_axis, ) + axis=mul_axis, + ) add_op2 = OpConfig( "elementwise_add", - inputs={"X": ["mul_out"], - "Y": ["beta_var"]}, + inputs={ + "X": ["mul_out"], + "Y": ["beta_var"] + }, outputs={"Out": ["add_out2"]}, - axis=add_axis2, ) + axis=add_axis2, + ) ops = [ mean_op1, sub_op, pow_op, mean_op2, add_op, sqrt_op, div_op, mul_op, @@ -208,15 +239,19 @@ class TestFcFusePass(PassAutoScanTest): "gamma_var": TensorConfig(shape=gamma_shape), "beta_var": TensorConfig(shape=beta_shape), }, - inputs={"x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): self.run_and_statis( quant=False, max_examples=300, - passes=["layer_norm_fuse_pass"], ) + passes=["layer_norm_fuse_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_to_mul_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_to_mul_pass.py index ce695ec2f01..2dc0556e9e2 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_to_mul_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_to_mul_pass.py @@ -35,11 +35,15 @@ class TestMapMatmulToMulPass(PassAutoScanTest): def sample_predictor_configs(self, program_config): # cpu config = self.create_inference_config(use_gpu=False) - yield config, ["mul", ], (1e-5, 1e-5) + yield config, [ + "mul", + ], (1e-5, 1e-5) # for gpu config = self.create_inference_config(use_gpu=True) - yield config, ["mul", ], (1e-5, 1e-5) + yield config, [ + "mul", + ], (1e-5, 1e-5) # TRT # config = self.create_trt_inference_config() @@ -75,13 +79,13 @@ class TestMapMatmulToMulPass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape and attr of matmul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=5)) y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = x_shape[-1] alpha = 1.0 transpose_X = False @@ -89,8 +93,10 @@ class TestMapMatmulToMulPass(PassAutoScanTest): matmul_op = OpConfig( "matmul", - inputs={"X": ["matmul_x"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["matmul_x"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -100,24 +106,31 @@ class TestMapMatmulToMulPass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) - - ops = [matmul_op, ] - weights = {"matmul_y": TensorConfig(shape=y_shape), } - inputs = {"matmul_x": TensorConfig(shape=x_shape), } + fused_transpose_Out=[], + ) + + ops = [ + matmul_op, + ] + weights = { + "matmul_y": TensorConfig(shape=y_shape), + } + inputs = { + "matmul_x": TensorConfig(shape=x_shape), + } program_config = ProgramConfig( ops=ops, weights=weights, inputs=inputs, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=100, - passes=["gpu_cpu_map_matmul_to_mul_pass"], - max_duration=180) + self.run_and_statis(quant=False, + max_examples=100, + passes=["gpu_cpu_map_matmul_to_mul_pass"], + max_duration=180) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py index fac8b710c8c..2f0de50610f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py @@ -35,11 +35,15 @@ class TestMapMatmulToMulPass(PassAutoScanTest): def sample_predictor_configs(self, program_config): # cpu config = self.create_inference_config(use_gpu=False) - yield config, ["matmul", ], (1e-5, 1e-5) + yield config, [ + "matmul", + ], (1e-5, 1e-5) # for gpu config = self.create_inference_config(use_gpu=True) - yield config, ["matmul", ], (1e-5, 1e-5) + yield config, [ + "matmul", + ], (1e-5, 1e-5) # TRT # config = self.create_trt_inference_config() @@ -71,13 +75,13 @@ class TestMapMatmulToMulPass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape and attr of matmul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=5)) y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) transpose_X = draw(st.booleans()) transpose_Y = draw(st.booleans()) if transpose_X: @@ -96,8 +100,10 @@ class TestMapMatmulToMulPass(PassAutoScanTest): matmul_op = OpConfig( "matmul_v2", - inputs={"X": ["matmul_x"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["matmul_x"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, trans_x=transpose_X, @@ -107,9 +113,12 @@ class TestMapMatmulToMulPass(PassAutoScanTest): fused_reshape_X=[], fused_reshape_Y=[], fused_transpose_X=[], - fused_transpose_Y=[], ) + fused_transpose_Y=[], + ) - ops = [matmul_op, ] + ops = [ + matmul_op, + ] weights = {} inputs = { "matmul_x": TensorConfig(shape=x_shape), @@ -120,14 +129,14 @@ class TestMapMatmulToMulPass(PassAutoScanTest): ops=ops, weights=weights, inputs=inputs, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=100, - passes=["gpu_cpu_map_matmul_v2_to_matmul_pass"]) + self.run_and_statis(quant=False, + max_examples=100, + passes=["gpu_cpu_map_matmul_v2_to_matmul_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py index e8a37ebc7ea..d8dd7a0eac9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py @@ -35,11 +35,15 @@ class TestMapMatmulToMulPass(PassAutoScanTest): def sample_predictor_configs(self, program_config): # cpu config = self.create_inference_config(use_gpu=False) - yield config, ["mul", ], (1e-5, 1e-5) + yield config, [ + "mul", + ], (1e-5, 1e-5) # for gpu config = self.create_inference_config(use_gpu=True) - yield config, ["mul", ], (1e-5, 1e-5) + yield config, [ + "mul", + ], (1e-5, 1e-5) # TRT # config = self.create_trt_inference_config() @@ -71,13 +75,13 @@ class TestMapMatmulToMulPass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape and attr of matmul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=5)) y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = x_shape[-1] alpha = 1.0 transpose_X = False @@ -85,8 +89,10 @@ class TestMapMatmulToMulPass(PassAutoScanTest): matmul_op = OpConfig( "matmul_v2", - inputs={"X": ["matmul_x"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["matmul_x"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, trans_x=transpose_X, @@ -96,23 +102,30 @@ class TestMapMatmulToMulPass(PassAutoScanTest): fused_reshape_X=[], fused_reshape_Y=[], fused_transpose_X=[], - fused_transpose_Y=[], ) - - ops = [matmul_op, ] - weights = {"matmul_y": TensorConfig(shape=y_shape), } - inputs = {"matmul_x": TensorConfig(shape=x_shape), } + fused_transpose_Y=[], + ) + + ops = [ + matmul_op, + ] + weights = { + "matmul_y": TensorConfig(shape=y_shape), + } + inputs = { + "matmul_x": TensorConfig(shape=x_shape), + } program_config = ProgramConfig( ops=ops, weights=weights, inputs=inputs, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=100, - passes=["gpu_cpu_map_matmul_v2_to_mul_pass"]) + self.run_and_statis(quant=False, + max_examples=100, + passes=["gpu_cpu_map_matmul_v2_to_mul_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_scale_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_scale_fuse_pass.py index 9c10ff18fa1..7e3ddf95fb7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_scale_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_scale_fuse_pass.py @@ -37,25 +37,27 @@ class TestMatmulScaleFusePass(PassAutoScanTest): def sample_predictor_configs(self, program_config): # cpu config = self.create_inference_config(use_gpu=False) - yield config, ["matmul", ], (1e-5, 1e-5) + yield config, [ + "matmul", + ], (1e-5, 1e-5) # mkldnn config = self.create_inference_config(use_mkldnn=True) - yield config, ["matmul", ], (1e-5, 1e-5) + yield config, [ + "matmul", + ], (1e-5, 1e-5) def sample_program_config(self, draw): # 1. Generate shape and attr of matmul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=5)) x_shape_rank = len(x_shape) y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), - min_size=x_shape_rank, - max_size=x_shape_rank)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=x_shape_rank, + max_size=x_shape_rank)) y_shape_rank = len(y_shape) y_shape[-2] = x_shape[-1] for i in range(y_shape_rank - 3, -1, -1): @@ -73,8 +75,10 @@ class TestMatmulScaleFusePass(PassAutoScanTest): matmul_op = OpConfig( "matmul", - inputs={"X": ["matmul_x"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["matmul_x"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, transpose_X=transpose_X, transpose_Y=transpose_Y, @@ -85,25 +89,32 @@ class TestMatmulScaleFusePass(PassAutoScanTest): fused_transpose_Y=[], fused_reshape_Out=[], fused_transpose_Out=[], - head_number=1, ) + head_number=1, + ) is_scale_tensor = draw(st.booleans()) if is_scale_tensor: scale_op = OpConfig( "scale", - inputs={"X": ["matmul_out"], - "ScaleTensor": ["scale_tensor"]}, + inputs={ + "X": ["matmul_out"], + "ScaleTensor": ["scale_tensor"] + }, outputs={"Out": ["scale_out"]}, scale=scale_value, bias=0.0, - bias_after_scale=draw(st.booleans()), ) + bias_after_scale=draw(st.booleans()), + ) else: scale_op = OpConfig( "scale", - inputs={"X": ["matmul_out"], }, + inputs={ + "X": ["matmul_out"], + }, outputs={"Out": ["scale_out"]}, scale=scale_value, bias=0.0, - bias_after_scale=draw(st.booleans()), ) + bias_after_scale=draw(st.booleans()), + ) ops = [matmul_op, scale_op] weights = {} @@ -113,7 +124,9 @@ class TestMatmulScaleFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "scale_tensor": TensorConfig(shape=scale_shape) } - inputs = {"matmul_x": TensorConfig(shape=x_shape), } + inputs = { + "matmul_x": TensorConfig(shape=x_shape), + } else: inputs = { "matmul_x": TensorConfig(shape=x_shape), @@ -124,14 +137,16 @@ class TestMatmulScaleFusePass(PassAutoScanTest): ops=ops, weights=weights, inputs=inputs, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): self.run_and_statis( quant=False, max_examples=100, - passes=["matmul_scale_fuse_pass"], ) + passes=["matmul_scale_fuse_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py index 47bd5623646..52da377599d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py @@ -43,21 +43,21 @@ class TestMatmulV2ScaleFusePass(PassAutoScanTest): # mkldnn config = self.create_inference_config(use_mkldnn=True) - yield config, ["matmul_v2", ], (1e-5, 1e-5) + yield config, [ + "matmul_v2", + ], (1e-5, 1e-5) def sample_program_config(self, draw): # 1. Generate shape and attr of matmul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=5)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=5)) x_shape_rank = len(x_shape) y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), - min_size=x_shape_rank, - max_size=x_shape_rank)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=x_shape_rank, + max_size=x_shape_rank)) y_shape_rank = len(y_shape) y_shape[-2] = x_shape[-1] for i in range(y_shape_rank - 3, -1, -1): @@ -74,8 +74,10 @@ class TestMatmulV2ScaleFusePass(PassAutoScanTest): matmul_v2_op = OpConfig( "matmul_v2", - inputs={"X": ["matmul_x"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["matmul_x"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, trans_x=transpose_X, trans_y=transpose_Y, @@ -84,43 +86,56 @@ class TestMatmulV2ScaleFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) is_scale_tensor = draw(st.booleans()) if is_scale_tensor: scale_op = OpConfig( "scale", - inputs={"X": ["matmul_out"], - "ScaleTensor": ["scale_tensor"]}, + inputs={ + "X": ["matmul_out"], + "ScaleTensor": ["scale_tensor"] + }, outputs={"Out": ["scale_out"]}, scale=scale_value, bias=0.0, - bias_after_scale=draw(st.booleans()), ) + bias_after_scale=draw(st.booleans()), + ) else: scale_op = OpConfig( "scale", - inputs={"X": ["matmul_out"], }, + inputs={ + "X": ["matmul_out"], + }, outputs={"Out": ["scale_out"]}, scale=scale_value, bias=0.0, - bias_after_scale=draw(st.booleans()), ) + bias_after_scale=draw(st.booleans()), + ) ops = [matmul_v2_op, scale_op] - weights = {"matmul_y": TensorConfig(shape=y_shape), } + weights = { + "matmul_y": TensorConfig(shape=y_shape), + } if is_scale_tensor: weights["scale_tensor"] = TensorConfig(shape=scale_shape) - inputs = {"matmul_x": TensorConfig(shape=x_shape), } + inputs = { + "matmul_x": TensorConfig(shape=x_shape), + } program_config = ProgramConfig( ops=ops, weights=weights, inputs=inputs, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): self.run_and_statis( quant=False, max_examples=100, - passes=["matmul_v2_scale_fuse_pass"], ) + passes=["matmul_v2_scale_fuse_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_batch_norm_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_batch_norm_act_fuse_pass.py index 0012ebb05b1..6c17db2caa4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_batch_norm_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_batch_norm_act_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestScaleMatmulMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -58,39 +59,40 @@ class TestScaleMatmulMkldnnFusePass(PassAutoScanTest): def generate_weight(): return np.random.random(channel).astype(np.float32) - batch_norm_op = OpConfig( - type="batch_norm", - inputs={ - "X": ["input_data"], - "Bias": ["Bias"], - "Mean": ["Mean"], - "Scale": ["Scale"], - "Variance": ["Variance"] - }, - outputs={ - "Y": ["norm_output"], - "MeanOut": ["Mean"], - "VarianceOut": ["Variance"], - "SavedMean": ["SavedMean"], - "SavedVariance": ["SavedVariance"] - }, - attrs={ - "data_layout": data_layout, - "epsilon": epsilon, - "fuse_with_relu": fuse_with_relu, - "is_test": is_test, - "momentum": momentum, - "trainable_statistics": trainable_statistics, - "use_global_stats": use_global_stats, - "use_mkldnn": use_mkldnn1 - }) - - relu_op = OpConfig( - type="relu", - inputs={"X": ["norm_output"]}, - outputs={"Out": ["relu_output"]}, - attrs={"use_cudnn": use_cudnn, - "use_mkldnn": use_mkldnn2}) + batch_norm_op = OpConfig(type="batch_norm", + inputs={ + "X": ["input_data"], + "Bias": ["Bias"], + "Mean": ["Mean"], + "Scale": ["Scale"], + "Variance": ["Variance"] + }, + outputs={ + "Y": ["norm_output"], + "MeanOut": ["Mean"], + "VarianceOut": ["Variance"], + "SavedMean": ["SavedMean"], + "SavedVariance": ["SavedVariance"] + }, + attrs={ + "data_layout": data_layout, + "epsilon": epsilon, + "fuse_with_relu": fuse_with_relu, + "is_test": is_test, + "momentum": momentum, + "trainable_statistics": + trainable_statistics, + "use_global_stats": use_global_stats, + "use_mkldnn": use_mkldnn1 + }) + + relu_op = OpConfig(type="relu", + inputs={"X": ["norm_output"]}, + outputs={"Out": ["relu_output"]}, + attrs={ + "use_cudnn": use_cudnn, + "use_mkldnn": use_mkldnn2 + }) model_net = [batch_norm_op, relu_op] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_bias_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_bias_fuse_pass.py index ae0ac6a3ecd..3556e5ef334 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_bias_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_bias_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestConv3dBiasMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -48,8 +49,8 @@ class TestConv3dBiasMkldnnFusePass(PassAutoScanTest): [attrs[2]['batch_size'], 64, 32, 64, 48]).astype(np.float32) def generate_weight1(): - return np.random.random( - [16, int(48 / groups), 3, 3, 3]).astype(np.float32) + return np.random.random([16, int(48 / groups), 3, 3, + 3]).astype(np.float32) def generate_weight2(): return np.random.random([16]).astype(np.float32) @@ -104,7 +105,8 @@ class TestConv3dBiasMkldnnFusePass(PassAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "conv_weight": TensorConfig(data_gen=partial(generate_weight1)), + "conv_weight": + TensorConfig(data_gen=partial(generate_weight1)), "elementwise_weight": TensorConfig(data_gen=partial(generate_weight2)) }, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_op.py index f6e668ed590..7f75f8ddf4f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv3d_op.py @@ -26,36 +26,41 @@ import hypothesis.strategies as st class TestMkldnnConv3dOp(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self, *args, **kwargs): + def generate_input(*args, **kwargs): if kwargs["data_format"] == "NCDHW": - return np.random.random( - [kwargs["batch_size"], 48, 64, 32, 64]).astype(np.float32) + return np.random.random([kwargs["batch_size"], 48, 64, 32, + 64]).astype(np.float32) else: - return np.random.random( - [kwargs["batch_size"], 64, 32, 64, 48]).astype(np.float32) + return np.random.random([kwargs["batch_size"], 64, 32, 64, + 48]).astype(np.float32) def generate_weight(*args, **kwargs): - return np.random.random( - [16, int(48 / kwargs["groups"]), 3, 3, 3]).astype(np.float32) + return np.random.random([16, + int(48 / kwargs["groups"]), 3, 3, + 3]).astype(np.float32) - conv3d_op = OpConfig( - type="conv3d", - inputs={"Input": ["input_data"], - "Filter": ["conv_weight"]}, - outputs={"Output": ["conv_output"]}, - attrs={ - "data_format": kwargs["data_format"], - "dilations": kwargs["dilations"], - "padding_algorithm": kwargs["padding_algorithm"], - "groups": kwargs["groups"], - "paddings": kwargs["paddings"], - "strides": kwargs["strides"], - "is_test": True - }) + conv3d_op = OpConfig(type="conv3d", + inputs={ + "Input": ["input_data"], + "Filter": ["conv_weight"] + }, + outputs={"Output": ["conv_output"]}, + attrs={ + "data_format": kwargs["data_format"], + "dilations": kwargs["dilations"], + "padding_algorithm": + kwargs["padding_algorithm"], + "groups": kwargs["groups"], + "paddings": kwargs["paddings"], + "strides": kwargs["strides"], + "is_test": True + }) program_config = ProgramConfig( ops=[conv3d_op], @@ -82,8 +87,8 @@ class TestMkldnnConv3dOp(MkldnnAutoScanTest): groups=st.sampled_from([2]), paddings=st.sampled_from([[0, 3, 2]]), strides=st.sampled_from([[1, 2, 1]]), - batch_size=st.integers( - min_value=1, max_value=4), ) + batch_size=st.integers(min_value=1, max_value=4), + ) def test(self, *args, **kwargs): self.run_test(*args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py index 56cb0748a23..645ca220264 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py @@ -24,17 +24,18 @@ from paddle.fluid.core import PassVersionChecker class ConvActivationMkldnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") - conv_out = fluid.layers.conv2d( - data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - bias_attr=self.conv_bias_attr, - act=self.act) + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") + conv_out = fluid.layers.conv2d(data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + bias_attr=self.conv_bias_attr, + act=self.act) self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") @@ -58,6 +59,7 @@ class ConvActivationMkldnnFusePassTest(InferencePassTest): class ConvActivationMkldnnFusePassTest_1(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 5 self.conv_filter_size = 5 @@ -67,6 +69,7 @@ class ConvActivationMkldnnFusePassTest_1(ConvActivationMkldnnFusePassTest): class ConvActivationMkldnnFusePassTest_2(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 3 self.conv_filter_size = 3 @@ -76,6 +79,7 @@ class ConvActivationMkldnnFusePassTest_2(ConvActivationMkldnnFusePassTest): class ConvActivationMkldnnFusePassTest_3(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 5 self.conv_filter_size = 5 @@ -85,6 +89,7 @@ class ConvActivationMkldnnFusePassTest_3(ConvActivationMkldnnFusePassTest): class ConvActivationMkldnnFusePassTest_4(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 3 self.conv_filter_size = 3 @@ -94,6 +99,7 @@ class ConvActivationMkldnnFusePassTest_4(ConvActivationMkldnnFusePassTest): class ConvActivationMkldnnFusePassTest_5(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 5 self.conv_filter_size = 5 @@ -103,6 +109,7 @@ class ConvActivationMkldnnFusePassTest_5(ConvActivationMkldnnFusePassTest): class ConvActivationMkldnnFusePassTest_6(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 5 self.conv_filter_size = 5 @@ -112,6 +119,7 @@ class ConvActivationMkldnnFusePassTest_6(ConvActivationMkldnnFusePassTest): class ConvHardSigmoidOneDNNFusePassTest(ConvActivationMkldnnFusePassTest): + def set_params(self): self.conv_num_filters = 5 self.conv_filter_size = 5 diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_affine_channel_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_affine_channel_fuse_pass.py index a35b75e69f8..89595f90817 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_affine_channel_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_affine_channel_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestConvAffineChannelFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -41,17 +42,17 @@ class TestConvAffineChannelFusePass(PassAutoScanTest): out_channel = groups * out_channel_factor batch_size = draw(st.integers(min_value=1, max_value=4)) dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=2), + min_size=2, + max_size=2)) paddings = draw( - st.lists( - st.integers( - min_value=0, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=0, max_value=2), + min_size=2, + max_size=2)) strides = draw( - st.lists( - st.integers( - min_value=1, max_value=2), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=2), + min_size=2, + max_size=2)) has_bias = draw(st.booleans()) x_shape = [ @@ -73,30 +74,28 @@ class TestConvAffineChannelFusePass(PassAutoScanTest): def generate_scale_bias(): return np.random.random(bias_shape).astype(np.float32) - conv2d_op = OpConfig( - "conv2d", - inputs={ - "Input": ["input_data"], - "Filter": ["conv2d_weight"], - }, - outputs={"Output": ["conv_output"]}, - data_format=data_format, - dilations=dilations, - padding_algorithm=padding_algorithm, - groups=groups, - paddings=paddings, - strides=strides, - has_bias=has_bias, - is_test=True) - ac_op = OpConfig( - "affine_channel", - inputs={ - "X": ["conv_output"], - "Scale": ["affine_channel_scale"], - "Bias": ["affine_channel_bias"] - }, - outputs={"Out": ["affine_channel_ouput"]}, - data_layout=data_format) + conv2d_op = OpConfig("conv2d", + inputs={ + "Input": ["input_data"], + "Filter": ["conv2d_weight"], + }, + outputs={"Output": ["conv_output"]}, + data_format=data_format, + dilations=dilations, + padding_algorithm=padding_algorithm, + groups=groups, + paddings=paddings, + strides=strides, + has_bias=has_bias, + is_test=True) + ac_op = OpConfig("affine_channel", + inputs={ + "X": ["conv_output"], + "Scale": ["affine_channel_scale"], + "Bias": ["affine_channel_bias"] + }, + outputs={"Out": ["affine_channel_ouput"]}, + data_layout=data_format) if has_bias == True: conv2d_op.inputs["Bias"] = ["conv2d_bias"] ops = [conv2d_op, ac_op] @@ -109,7 +108,8 @@ class TestConvAffineChannelFusePass(PassAutoScanTest): weights={ "conv2d_weight": TensorConfig(data_gen=partial(generate_weight)), - "conv2d_bias": TensorConfig(data_gen=partial(generate_bias)), + "conv2d_bias": + TensorConfig(data_gen=partial(generate_bias)), "affine_channel_scale": TensorConfig(data_gen=partial(generate_scale_bias)), "affine_channel_bias": @@ -126,7 +126,7 @@ class TestConvAffineChannelFusePass(PassAutoScanTest): yield config, ['conv2d', 'elementwise_add'], (1e-4, 1e-4) def add_ignore_pass_case(self): - # If the problem has been fixed, the judgment + # If the problem has been fixed, the judgment # in is_program_valid needs to be deleted!!! def teller1(program_config, predictor_config): if program_config.ops[0].attrs['data_format'] == "NHWC": @@ -135,8 +135,8 @@ class TestConvAffineChannelFusePass(PassAutoScanTest): # mkldnn Output has diff with bias! def teller2(program_config, predictor_config): - return predictor_config.mkldnn_enabled() and program_config.ops[ - 0].attrs['has_bias'] == True + return predictor_config.mkldnn_enabled( + ) and program_config.ops[0].attrs['has_bias'] == True self.add_ignore_check_case( teller1, IgnoreReasons.PASS_ACCURACY_ERROR, @@ -151,7 +151,8 @@ class TestConvAffineChannelFusePass(PassAutoScanTest): def test(self): self.run_and_statis( quant=False, - passes=["conv_affine_channel_mkldnn_fuse_pass"], ) + passes=["conv_affine_channel_mkldnn_fuse_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py index 6c8b9d4d3a8..20c754aee95 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py @@ -25,19 +25,20 @@ from paddle.fluid.core import PassVersionChecker #padding SAME class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") param_attr = fluid.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - padding="SAME", - bias_attr=param_attr) + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + padding="SAME", + bias_attr=param_attr) self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") @@ -54,19 +55,20 @@ class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest): #padding VALID class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") param_attr = fluid.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - padding="VALID", - bias_attr=param_attr) + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + padding="VALID", + bias_attr=param_attr) self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") @@ -77,19 +79,20 @@ class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest): #padding EXPLICT NUMBER class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") param_attr = fluid.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - padding=[2, 4, 6, 8], - bias_attr=param_attr) + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + padding=[2, 4, 6, 8], + bias_attr=param_attr) self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") @@ -99,23 +102,24 @@ class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest): class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") param_attr = fluid.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - padding="VALID", - groups=3, - bias_attr=param_attr, - use_cudnn=False, - act="softmax", - data_format="NCHW") + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + padding="VALID", + groups=3, + bias_attr=param_attr, + use_cudnn=False, + act="softmax", + data_format="NCHW") self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") @@ -126,24 +130,25 @@ class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest): class ConvBiasMkldnnFusePassDialtionsGroupsTest( ConvBiasMkldnnFusePassSamePadTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") param_attr = fluid.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - padding="VALID", - dilation=2, - groups=3, - bias_attr=param_attr, - use_cudnn=False, - act="softmax", - data_format="NCHW") + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + padding="VALID", + dilation=2, + groups=3, + bias_attr=param_attr, + use_cudnn=False, + act="softmax", + data_format="NCHW") self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") @@ -153,20 +158,20 @@ class ConvBiasMkldnnFusePassDialtionsGroupsTest( class ConvTransposeMkldnnFusePassDialtionsGroupsTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[-1, 3, 5, 5], dtype="float32") param_attr = fluid.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) - conv_out = fluid.layers.conv2d_transpose( - input=data, - num_filters=3, - filter_size=3, - padding="SAME", - dilation=1, - bias_attr=param_attr, - use_cudnn=False) + conv_out = fluid.layers.conv2d_transpose(input=data, + num_filters=3, + filter_size=3, + padding="SAME", + dilation=1, + bias_attr=param_attr, + use_cudnn=False) self.feeds = {"data": np.random.random((1, 3, 5, 5)).astype("float32")} self.fetch_list = [conv_out] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py index 6654fbba264..2a313bbdaa1 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestConvConcatReluMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -41,15 +42,15 @@ class TestConvConcatReluMkldnnFusePass(PassAutoScanTest): def generate_input(attrs): if attrs[0]['data_format'] == "NCHW": - return np.random.random( - [attrs[2]['batch_size'], 48, 64, 64]).astype(np.float32) + return np.random.random([attrs[2]['batch_size'], 48, 64, + 64]).astype(np.float32) else: - return np.random.random( - [attrs[2]['batch_size'], 64, 64, 48]).astype(np.float32) + return np.random.random([attrs[2]['batch_size'], 64, 64, + 48]).astype(np.float32) def generate_weight(): - return np.random.random( - [16, int(48 / groups), 3, 3]).astype(np.float32) + return np.random.random([16, int(48 / groups), 3, + 3]).astype(np.float32) attrs = [{ "data_format": data_format, @@ -142,8 +143,8 @@ class TestConvConcatReluMkldnnFusePass(PassAutoScanTest): yield config, ["conv2d", "conv2d", "concat"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["conv_concat_relu_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["conv_concat_relu_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py index 58d09a88061..44b1e8bf064 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py @@ -27,10 +27,10 @@ import hypothesis.strategies as st # the two inputs of elementwise_add are tensor class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[1]['data_format'] == "NHWC" and attrs[3]['axis'] == 0: return False @@ -50,56 +50,58 @@ class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): def generate_input(): if data_format == "NCHW": - return np.random.random( - [batch_size, 48, 64, 64]).astype(np.float32) + return np.random.random([batch_size, 48, 64, + 64]).astype(np.float32) else: - return np.random.random( - [batch_size, 64, 64, 48]).astype(np.float32) + return np.random.random([batch_size, 64, 64, + 48]).astype(np.float32) def generate_weight(): - return np.random.random( - [48, int(48 / groups), 3, 3]).astype(np.float32) - - relu_op = OpConfig( - type="relu", - inputs={"X": ["input_data"]}, - outputs={"Out": ["relu_out"]}, - attrs={}) - - conv2d_op1 = OpConfig( - type="conv2d", - inputs={"Input": ["relu_out"], - "Filter": ["conv_weight1"]}, - outputs={"Output": ["conv_output1"]}, - attrs={ - "data_format": data_format, - "dilations": dilations, - "padding_algorithm": padding_algorithm, - "groups": groups, - "paddings": paddings, - "strides": strides - }) - - conv2d_op2 = OpConfig( - type="conv2d", - inputs={"Input": ["input_data"], - "Filter": ["conv_weight2"]}, - outputs={"Output": ["conv_output2"]}, - attrs={ - "data_format": data_format, - "dilations": dilations, - "padding_algorithm": padding_algorithm, - "groups": groups, - "paddings": paddings, - "strides": strides - }) - - elt_op = OpConfig( - type="elementwise_add", - inputs={"X": ["conv_output1"], - "Y": ["conv_output2"]}, - outputs={"Out": ["elementwise_output"]}, - attrs={'axis': axis}) + return np.random.random([48, int(48 / groups), 3, + 3]).astype(np.float32) + + relu_op = OpConfig(type="relu", + inputs={"X": ["input_data"]}, + outputs={"Out": ["relu_out"]}, + attrs={}) + + conv2d_op1 = OpConfig(type="conv2d", + inputs={ + "Input": ["relu_out"], + "Filter": ["conv_weight1"] + }, + outputs={"Output": ["conv_output1"]}, + attrs={ + "data_format": data_format, + "dilations": dilations, + "padding_algorithm": padding_algorithm, + "groups": groups, + "paddings": paddings, + "strides": strides + }) + + conv2d_op2 = OpConfig(type="conv2d", + inputs={ + "Input": ["input_data"], + "Filter": ["conv_weight2"] + }, + outputs={"Output": ["conv_output2"]}, + attrs={ + "data_format": data_format, + "dilations": dilations, + "padding_algorithm": padding_algorithm, + "groups": groups, + "paddings": paddings, + "strides": strides + }) + + elt_op = OpConfig(type="elementwise_add", + inputs={ + "X": ["conv_output1"], + "Y": ["conv_output2"] + }, + outputs={"Out": ["elementwise_output"]}, + attrs={'axis': axis}) model_net = [relu_op, conv2d_op1, conv2d_op2, elt_op] @@ -121,8 +123,8 @@ class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): yield config, ["relu", "conv2d", "conv2d"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["conv_elementwise_add_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["conv_elementwise_add_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py index 81bb182802e..65634972117 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py @@ -27,6 +27,7 @@ import hypothesis.strategies as st class TestConvGeluMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -42,15 +43,15 @@ class TestConvGeluMkldnnFusePass(PassAutoScanTest): def generate_input(): if data_format == "NCHW": - return np.random.random( - [batch_size, 48, 64, 64]).astype(np.float32) + return np.random.random([batch_size, 48, 64, + 64]).astype(np.float32) else: - return np.random.random( - [batch_size, 64, 64, 48]).astype(np.float32) + return np.random.random([batch_size, 64, 64, + 48]).astype(np.float32) def generate_weight(): - return np.random.random( - [16, int(48 / groups), 3, 3]).astype(np.float32) + return np.random.random([16, int(48 / groups), 3, + 3]).astype(np.float32) ops_config = [{ "op_type": "conv2d", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_sigmoid_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_sigmoid_fuse_pass.py index 2eb071d6eb8..d62770bf758 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_sigmoid_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_sigmoid_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestConvHardSigmoidMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -42,15 +43,15 @@ class TestConvHardSigmoidMkldnnFusePass(PassAutoScanTest): def generate_input(): if data_format == "NCHW": - return np.random.random( - [batch_size, 48, 64, 64]).astype(np.float32) + return np.random.random([batch_size, 48, 64, + 64]).astype(np.float32) else: - return np.random.random( - [batch_size, 64, 64, 48]).astype(np.float32) + return np.random.random([batch_size, 64, 64, + 48]).astype(np.float32) def generate_weight(): - return np.random.random( - [16, int(48 / groups), 3, 3]).astype(np.float32) + return np.random.random([16, int(48 / groups), 3, + 3]).astype(np.float32) ops_config = [{ "op_type": "conv2d", @@ -102,8 +103,8 @@ class TestConvHardSigmoidMkldnnFusePass(PassAutoScanTest): yield config, ["conv2d"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["conv_hard_sigmoid_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["conv_hard_sigmoid_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_swish_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_swish_fuse_pass.py index 990489c3213..ad54ca3d91e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_swish_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_hard_swish_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestConvHardSwishMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -43,15 +44,15 @@ class TestConvHardSwishMkldnnFusePass(PassAutoScanTest): def generate_input(): if data_format == "NCHW": - return np.random.random( - [batch_size, 48, 64, 64]).astype(np.float32) + return np.random.random([batch_size, 48, 64, + 64]).astype(np.float32) else: - return np.random.random( - [batch_size, 64, 64, 48]).astype(np.float32) + return np.random.random([batch_size, 64, 64, + 48]).astype(np.float32) def generate_weight(): - return np.random.random( - [16, int(48 / groups), 3, 3]).astype(np.float32) + return np.random.random([16, int(48 / groups), 3, + 3]).astype(np.float32) ops_config = [{ "op_type": "conv2d", @@ -104,8 +105,8 @@ class TestConvHardSwishMkldnnFusePass(PassAutoScanTest): yield config, ["conv2d"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["conv_hard_swish_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["conv_hard_swish_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_mish_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_mish_fuse_pass.py index b4d2c95087c..365ba5346e3 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_mish_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_mish_fuse_pass.py @@ -21,6 +21,7 @@ import hypothesis.strategies as st class TestConvMishMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [op.attrs for op in program_config.ops] # If the problem has been fixed, the judgment @@ -41,15 +42,15 @@ class TestConvMishMkldnnFusePass(PassAutoScanTest): def generate_input(): if data_format == "NCHW": - return np.random.random( - [batch_size, 48, 64, 64]).astype(np.float32) + return np.random.random([batch_size, 48, 64, + 64]).astype(np.float32) else: - return np.random.random( - [batch_size, 64, 64, 48]).astype(np.float32) + return np.random.random([batch_size, 64, 64, + 48]).astype(np.float32) def generate_weight(): - return np.random.random( - [16, int(48 / groups), 3, 3]).astype(np.float32) + return np.random.random([16, int(48 / groups), 3, + 3]).astype(np.float32) ops_config = [{ "op_type": "conv2d", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_transpose_bias_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_transpose_bias_fuse_pass.py index c5cedac2261..a05cbf5ba35 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_transpose_bias_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_transpose_bias_fuse_pass.py @@ -26,10 +26,10 @@ import hypothesis.strategies as st class TestConvTransposeMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['data_format'] == "NCHW" and attrs[1]["axis"] == 3: @@ -51,11 +51,11 @@ class TestConvTransposeMkldnnFusePass(PassAutoScanTest): def generate_input(): if data_format == "NCHW": - return np.random.random( - [batch_size, 16, 64, 64]).astype(np.float32) + return np.random.random([batch_size, 16, 64, + 64]).astype(np.float32) else: - return np.random.random( - [batch_size, 64, 64, 16]).astype(np.float32) + return np.random.random([batch_size, 64, 64, + 16]).astype(np.float32) def generate_weight1(): return np.random.random([16, 16, 3, 3]).astype(np.float32) @@ -63,29 +63,31 @@ class TestConvTransposeMkldnnFusePass(PassAutoScanTest): def generate_weight2(): return np.random.random([16 * groups]).astype(np.float32) - conv2d_op = OpConfig( - type="conv2d_transpose", - inputs={"Input": ["input_data"], - "Filter": ["conv2d_weight"]}, - outputs={"Output": ["conv_output"]}, - attrs={ - "data_format": data_format, - "dilations": dilations, - "padding_algorithm": padding_algorithm, - "groups": groups, - "paddings": paddings, - "strides": strides, - "output_size": [], - "output_padding": [], - "is_test": True - }) - - elt_op = OpConfig( - type="elementwise_add", - inputs={"X": ["conv_output"], - "Y": ["elementwise_weight"]}, - outputs={"Out": ["elementwise_output"]}, - attrs={'axis': axis}) + conv2d_op = OpConfig(type="conv2d_transpose", + inputs={ + "Input": ["input_data"], + "Filter": ["conv2d_weight"] + }, + outputs={"Output": ["conv_output"]}, + attrs={ + "data_format": data_format, + "dilations": dilations, + "padding_algorithm": padding_algorithm, + "groups": groups, + "paddings": paddings, + "strides": strides, + "output_size": [], + "output_padding": [], + "is_test": True + }) + + elt_op = OpConfig(type="elementwise_add", + inputs={ + "X": ["conv_output"], + "Y": ["elementwise_weight"] + }, + outputs={"Out": ["elementwise_output"]}, + attrs={'axis': axis}) model_net = [conv2d_op, elt_op] @@ -109,10 +111,9 @@ class TestConvTransposeMkldnnFusePass(PassAutoScanTest): yield config, ['conv2d_transpose'], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, - max_duration=300, - passes=["conv_transpose_bias_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + max_duration=300, + passes=["conv_transpose_bias_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py index 4b36e4b742c..95996f22a86 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py @@ -20,11 +20,13 @@ from paddle.fluid.core import PassVersionChecker class TestMKLDNNCpuBfloat16Pass(InferencePassTest): + def setUp(self): self.init_data() with fluid.program_guard(self.main_program, self.startup_program): - x = fluid.data( - name='x', shape=[-1] + self.shape_x, dtype=self.d_type) + x = fluid.data(name='x', + shape=[-1] + self.shape_x, + dtype=self.d_type) out = fluid.layers.transpose(x, perm=[0, 1, 2, 3]) out = fluid.layers.reshape(out, [0, 0, 0, 0]) out = fluid.layers.fc(out, size=1) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_depthwise_conv_pass.py index b484a88273b..312b77acaa4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_depthwise_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_depthwise_conv_pass.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -48,24 +48,24 @@ class DepthwiseConvMKLDNNPass(PassAutoScanTest): random_groups = draw(st.integers(min_value=1, max_value=3)) random_dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=3), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=3), + min_size=2, + max_size=2)) random_strides = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) random_paddings = draw( - st.lists( - st.integers( - min_value=0, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=0, max_value=4), + min_size=2, + max_size=2)) random_padding_algorithm = draw( st.sampled_from(["EXPLICIT", "SAME", "VALID"])) random_data_layout = draw(st.sampled_from(["NCHW", "NHWC"])) random_filter = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) def generate_conv2d_Input(): shape = [random_input_dim1, random_input_dim2] @@ -84,22 +84,23 @@ class DepthwiseConvMKLDNNPass(PassAutoScanTest): return np.random.random(shape).astype(np.float32) # define op - conv2d_op = OpConfig( - type="depthwise_conv2d", - inputs={ - "Input": ["conv2d_Input"], - "Filter": ["conv2d_Filter"], - }, - outputs={"Output": ["conv2d_Out"], }, - attrs={ - 'groups': random_groups, - 'dilations': random_dilations, - 'strides': random_strides, - 'paddings': random_paddings, - 'padding_algorithm': random_padding_algorithm, - 'data_format': random_data_layout, - 'use_mkldnn': True, - }) + conv2d_op = OpConfig(type="depthwise_conv2d", + inputs={ + "Input": ["conv2d_Input"], + "Filter": ["conv2d_Filter"], + }, + outputs={ + "Output": ["conv2d_Out"], + }, + attrs={ + 'groups': random_groups, + 'dilations': random_dilations, + 'strides': random_strides, + 'paddings': random_paddings, + 'padding_algorithm': random_padding_algorithm, + 'data_format': random_data_layout, + 'use_mkldnn': True, + }) # define model_net model_net = [conv2d_op] @@ -124,8 +125,7 @@ class DepthwiseConvMKLDNNPass(PassAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['data_format'] == "NHWC": @@ -134,6 +134,7 @@ class DepthwiseConvMKLDNNPass(PassAutoScanTest): return True def add_ignore_pass_case(self): + def teller1(program_config, predictor_config): if program_config.ops[0].attrs['data_format'] == "NHWC": return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py index 893bd383343..12f4249a4d6 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py @@ -30,10 +30,12 @@ class ElementwiseActivationMkldnnFusePassTest(InferencePassTest): def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data_A = fluid.data( - name="data_A", shape=[-1, 3, 100, 100], dtype="float32") - data_B = fluid.data( - name="data_B", shape=[-1, 3, 100, 100], dtype="float32") + data_A = fluid.data(name="data_A", + shape=[-1, 3, 100, 100], + dtype="float32") + data_B = fluid.data(name="data_B", + shape=[-1, 3, 100, 100], + dtype="float32") elt_out = self.operand(data_A, data_B) if self.act is not None: if self.act_beta is not None: @@ -64,6 +66,7 @@ class ElementwiseActivationMkldnnFusePassTest(InferencePassTest): class ElementwiseActivationMkldnnFusePassTest_Add_Relu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.relu @@ -71,6 +74,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Relu( class ElementwiseActivationMkldnnFusePassTest_Add_Tanh( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.tanh @@ -78,6 +82,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Tanh( class ElementwiseActivationMkldnnFusePassTest_Add_LeakyRelu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act_alpha = 0.2 @@ -86,6 +91,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_LeakyRelu( class ElementwiseActivationMkldnnFusePassTest_Add_Swish( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act_alpha = 4 @@ -94,6 +100,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Swish( class ElementwiseActivationMkldnnFusePassTest_Add_HardSwish( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.hard_swish @@ -101,6 +108,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_HardSwish( class ElementwiseActivationMkldnnFusePassTest_Add_SQRT( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.sqrt @@ -108,6 +116,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_SQRT( class ElementwiseActivationMkldnnFusePassTest_Add_ABS( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.abs @@ -115,6 +124,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_ABS( class ElementwiseActivationMkldnnFusePassTest_Add_Clip( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.clip @@ -124,6 +134,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Clip( class ElementwiseActivationMkldnnFusePassTest_Add_Gelu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.gelu @@ -131,6 +142,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Gelu( class ElementwiseActivationMkldnnFusePassTest_Add_Gelu_Tanh( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.gelu @@ -139,6 +151,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Gelu_Tanh( class ElementwiseActivationMkldnnFusePassTest_Add_Relu6( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.relu6 @@ -147,6 +160,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Relu6( class ElementwiseActivationMkldnnFusePassTest_Add_Sigmoid( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_add self.act = fluid.layers.sigmoid @@ -154,6 +168,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Sigmoid( class ElementwiseActivationMkldnnFusePassTest_Sub_Relu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.relu @@ -161,6 +176,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Relu( class ElementwiseActivationMkldnnFusePassTest_Sub_Tanh( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.tanh @@ -168,6 +184,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Tanh( class ElementwiseActivationMkldnnFusePassTest_Sub_LeakyRelu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act_alpha = 0.2 @@ -176,6 +193,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_LeakyRelu( class ElementwiseActivationMkldnnFusePassTest_Sub_Swish( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.swish @@ -183,6 +201,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Swish( class ElementwiseActivationMkldnnFusePassTest_Sub_HardSwish( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.hard_swish @@ -190,6 +209,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_HardSwish( class ElementwiseActivationMkldnnFusePassTest_Sub_ABS( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.abs @@ -197,6 +217,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_ABS( class ElementwiseActivationMkldnnFusePassTest_Sub_Clip( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.clip @@ -206,6 +227,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Clip( class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.gelu @@ -213,6 +235,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu( class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu_Tanh( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.gelu @@ -221,6 +244,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu_Tanh( class ElementwiseActivationMkldnnFusePassTest_Sub_Relu6( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.relu6 @@ -229,6 +253,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Relu6( class ElementwiseActivationMkldnnFusePassTest_Sub_Sigmoid( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_sub self.act = fluid.layers.sigmoid @@ -236,6 +261,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Sigmoid( class ElementwiseActivationMkldnnFusePassTest_Mul_Relu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.relu @@ -243,6 +269,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Relu( class ElementwiseActivationMkldnnFusePassTest_Mul_Tanh( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.tanh @@ -250,6 +277,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Tanh( class ElementwiseActivationMkldnnFusePassTest_Mul_LeakyRelu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act_alpha = 0.2 @@ -258,6 +286,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_LeakyRelu( class ElementwiseActivationMkldnnFusePassTest_Mul_Swish( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.swish @@ -265,6 +294,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Swish( class ElementwiseActivationMkldnnFusePassTest_Mul_HardSwish( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.hard_swish @@ -272,6 +302,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_HardSwish( class ElementwiseActivationMkldnnFusePassTest_Mul_SQRT( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.sqrt @@ -279,6 +310,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_SQRT( class ElementwiseActivationMkldnnFusePassTest_Mul_ABS( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.abs @@ -286,6 +318,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_ABS( class ElementwiseActivationMkldnnFusePassTest_Mul_Clip( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.clip @@ -295,6 +328,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Clip( class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.gelu @@ -302,6 +336,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu( class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu_Tanh( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.gelu @@ -310,6 +345,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu_Tanh( class ElementwiseActivationMkldnnFusePassTest_Mul_Relu6( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.relu6 @@ -318,6 +354,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Relu6( class ElementwiseActivationMkldnnFusePassTest_Mul_Sigmoid( ElementwiseActivationMkldnnFusePassTest): + def set_params(self): self.operand = fluid.layers.elementwise_mul self.act = fluid.layers.sigmoid diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py index 0f5279b0eda..78393ef59b6 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py @@ -25,6 +25,7 @@ import hypothesis.strategies as st class TestElementWiseAddReluFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -32,8 +33,8 @@ class TestElementWiseAddReluFusePass(PassAutoScanTest): batch_size = draw(st.integers(min_value=1, max_value=4)) def generate_input(): - return np.random.random( - [batch_size, 3, 100, 100]).astype(np.float32) + return np.random.random([batch_size, 3, 100, + 100]).astype(np.float32) ops_config = [{ "op_type": "elementwise_add", @@ -74,8 +75,9 @@ class TestElementWiseAddReluFusePass(PassAutoScanTest): yield config, ["elementwise_add"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["elt_act_mkldnn_fuse_pass"], min_success_num=4) + self.run_and_statis(quant=False, + passes=["elt_act_mkldnn_fuse_pass"], + min_success_num=4) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_act_fuse_pass.py index 66bcca51bed..1a30c0f2d3d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_act_fuse_pass.py @@ -26,11 +26,13 @@ enable_static() class FCGeluTanhOneDnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 128, 768], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 128, 768], + dtype="float32") fc_out = fluid.layers.fc(input=data, size=3072, num_flatten_dims=2) gelu_out = fluid.layers.gelu(fc_out, approximate=False) @@ -47,11 +49,13 @@ class FCGeluTanhOneDnnFusePassTest(InferencePassTest): class FCGeluErfOneDnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 128, 768], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 128, 768], + dtype="float32") fc_out = fluid.layers.fc(input=data, size=3072, num_flatten_dims=2) gelu_out = fluid.layers.gelu(fc_out, approximate=True) @@ -69,11 +73,13 @@ class FCGeluErfOneDnnFusePassTest(InferencePassTest): class FCTanhOneDnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 128, 768], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 128, 768], + dtype="float32") fc_out = fluid.layers.fc(input=data, size=3072, num_flatten_dims=2) tanh_out = fluid.layers.tanh(fc_out) @@ -91,11 +97,13 @@ class FCTanhOneDnnFusePassTest(InferencePassTest): class FCSigmoidOneDnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 128, 768], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 128, 768], + dtype="float32") fc_out = fluid.layers.fc(input=data, size=3072, num_flatten_dims=2) sigmoid_out = fluid.layers.sigmoid(fc_out) @@ -113,11 +121,13 @@ class FCSigmoidOneDnnFusePassTest(InferencePassTest): class FCHardSwishOneDnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 128, 768], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 128, 768], + dtype="float32") fc_out = fluid.layers.fc(input=data, size=3072, num_flatten_dims=2) hardswish_out = fluid.layers.hard_swish(fc_out) @@ -135,11 +145,13 @@ class FCHardSwishOneDnnFusePassTest(InferencePassTest): class FCMishOneDnnFusePassTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 128, 768], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 128, 768], + dtype="float32") fc_out = fluid.layers.fc(input=data, size=3072, num_flatten_dims=2) mish_out = fluid.layers.mish(fc_out) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py index 22b8960497b..61492b1d05d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestFCElementwiseAddMkldnnFusePass(PassAutoScanTest): + def sample_program_config(self, draw): axis = draw(st.sampled_from([-1, 0, 1])) fc_as_x = draw(st.sampled_from([True, False])) @@ -41,37 +42,34 @@ class TestFCElementwiseAddMkldnnFusePass(PassAutoScanTest): def generate_fc_bias(): return np.random.random([fc_wei]).astype(np.float32) - relu_op = OpConfig( - type="relu", - inputs={"X": ["input_data"]}, - outputs={"Out": ["relu_out"]}, - attrs={}) - - fc_op = OpConfig( - type="fc", - inputs={ - "Input": ["relu_out"], - "W": ["fc_weight"], - "Bias": ["fc_bias"] - }, - outputs={"Out": ["fc_output"]}, - attrs={ - "use_mkldnn": True, - "padding_weights": False, - "activation_type": "", - "in_num_col_dims": 1, - }) + relu_op = OpConfig(type="relu", + inputs={"X": ["input_data"]}, + outputs={"Out": ["relu_out"]}, + attrs={}) + + fc_op = OpConfig(type="fc", + inputs={ + "Input": ["relu_out"], + "W": ["fc_weight"], + "Bias": ["fc_bias"] + }, + outputs={"Out": ["fc_output"]}, + attrs={ + "use_mkldnn": True, + "padding_weights": False, + "activation_type": "", + "in_num_col_dims": 1, + }) if fc_as_x: inputs = {"X": ["fc_output"], "Y": ["input_data"]} else: inputs = {"X": ["input_data"], "Y": ["fc_output"]} - elt_add_op = OpConfig( - type="elementwise_add", - inputs=inputs, - outputs={"Out": ["elementwise_output"]}, - attrs={'axis': axis}) + elt_add_op = OpConfig(type="elementwise_add", + inputs=inputs, + outputs={"Out": ["elementwise_output"]}, + attrs={'axis': axis}) model_net = [relu_op, fc_op, elt_add_op] @@ -93,8 +91,8 @@ class TestFCElementwiseAddMkldnnFusePass(PassAutoScanTest): yield config, ["relu", "fc"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["fc_elementwise_add_mkldnn_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["fc_elementwise_add_mkldnn_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_mish_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_mish_fuse_pass.py index 20a7cddbeb2..dd9321b6a74 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_mish_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_mish_fuse_pass.py @@ -20,16 +20,17 @@ import hypothesis.strategies as st class TestFCMishMkldnnFusePass(PassAutoScanTest): + def sample_program_config(self, draw): x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=128), min_size=2, max_size=3)) + st.lists(st.integers(min_value=1, max_value=128), + min_size=2, + max_size=3)) in_num_col_dims = len(x_shape) - 1 w_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=128), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=128), + min_size=2, + max_size=2)) w_shape[0] = int(np.prod(x_shape[in_num_col_dims:])) fc_bias_shape = [w_shape[1]] @@ -62,14 +63,17 @@ class TestFCMishMkldnnFusePass(PassAutoScanTest): ops = self.generate_op_config(ops_config) - program_config = ProgramConfig( - ops=ops, - weights={ - "fc_w": TensorConfig(shape=w_shape), - "fc_bias": TensorConfig(shape=fc_bias_shape), - }, - inputs={"fc_x": TensorConfig(shape=x_shape), }, - outputs=["mish_output"]) + program_config = ProgramConfig(ops=ops, + weights={ + "fc_w": + TensorConfig(shape=w_shape), + "fc_bias": + TensorConfig(shape=fc_bias_shape), + }, + inputs={ + "fc_x": TensorConfig(shape=x_shape), + }, + outputs=["mish_output"]) return program_config def sample_predictor_configs(self, program_config): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py index 4215e56de2c..3b7f0162c20 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py @@ -25,17 +25,22 @@ from paddle.fluid.core import PassVersionChecker class MkldnnInplacePassTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): paddle.enable_static() - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") - conv_out_1 = fluid.layers.conv2d( - data, num_filters=3, filter_size=3, bias_attr=False) + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") + conv_out_1 = fluid.layers.conv2d(data, + num_filters=3, + filter_size=3, + bias_attr=False) softmax_out = fluid.layers.softmax(conv_out_1) relu_out = fluid.layers.relu(conv_out_1) - eltwise_out = fluid.layers.elementwise_add( - softmax_out, relu_out, axis=-1) + eltwise_out = fluid.layers.elementwise_add(softmax_out, + relu_out, + axis=-1) self.pass_name = 'mkldnn_inplace_pass' self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_int8_scale_calculation_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_int8_scale_calculation_pass.py index 31415f64725..3d2895cc619 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_int8_scale_calculation_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_int8_scale_calculation_pass.py @@ -20,6 +20,7 @@ import hypothesis.strategies as st class TestInt8ScaleCalculationMkldnnPass(PassAutoScanTest): + def sample_predictor_configs(self, program_config): config = self.create_inference_config(use_gpu=False) config.pass_builder().append_pass("int8_scale_calculation_mkldnn_pass") @@ -56,40 +57,40 @@ class TestInt8ScaleCalculationMkldnnPass(PassAutoScanTest): def sample_program_config(self, draw): x_shape = draw( - st.lists( - st.integers( - min_value=5, max_value=100), min_size=4, max_size=4)) + st.lists(st.integers(min_value=5, max_value=100), + min_size=4, + max_size=4)) x_shape[1] = draw(st.integers(min_value=5, max_value=10)) data_format = draw(st.sampled_from(["NCHW", "NHWC"])) f_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=4, + max_size=4)) if data_format == "NCHW": f_shape[1] = x_shape[1] else: f_shape[1] = x_shape[3] strides = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VALID"])) padding = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=4, + max_size=4)) groups = draw(st.integers(min_value=1, max_value=3)) dilations = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=2)) bias_shape = [f_shape[0]] inputs = dict() @@ -111,20 +112,21 @@ class TestInt8ScaleCalculationMkldnnPass(PassAutoScanTest): "Input": ["input_x"], "Filter": ["filter"], } - weights = {"filter": TensorConfig(shape=f_shape), } - - conv2d_op = OpConfig( - "conv2d", - inputs=inputs, - outputs={"Output": ["conv2d_out"]}, - strides=strides, - padding_algorithm=padding_algorithm, - paddings=padding, - groups=groups, - dilations=dilations, - data_format=data_format, - use_mkldnn=use_mkldnn, - mkldnn_data_type="int8") + weights = { + "filter": TensorConfig(shape=f_shape), + } + + conv2d_op = OpConfig("conv2d", + inputs=inputs, + outputs={"Output": ["conv2d_out"]}, + strides=strides, + padding_algorithm=padding_algorithm, + paddings=padding, + groups=groups, + dilations=dilations, + data_format=data_format, + use_mkldnn=use_mkldnn, + mkldnn_data_type="int8") ops = [conv2d_op] @@ -136,10 +138,9 @@ class TestInt8ScaleCalculationMkldnnPass(PassAutoScanTest): return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=100, - passes=["int8_scale_calculation_mkldnn_pass"]) + self.run_and_statis(quant=False, + max_examples=100, + passes=["int8_scale_calculation_mkldnn_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_log_softmax_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_log_softmax_op.py index 3dc0623a112..929863b42a7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_log_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_log_softmax_op.py @@ -22,25 +22,26 @@ import hypothesis.strategies as st class TestMKLDNNLogSoftmaxOp(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self, *args, **kwargs): + def generate_input(*args, **kwargs): return np.random.random(kwargs['in_shape']).astype(np.float32) - logsoftmax_op = OpConfig( - type="log_softmax", - inputs={"X": ["input_data"]}, - outputs={"Out": ["output_data"]}, - attrs={"axis": kwargs['axis']}) + logsoftmax_op = OpConfig(type="log_softmax", + inputs={"X": ["input_data"]}, + outputs={"Out": ["output_data"]}, + attrs={"axis": kwargs['axis']}) program_config = ProgramConfig( ops=[logsoftmax_op], weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial(generate_input, - *args, **kwargs)), + "input_data": + TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), }, outputs=["output_data"]) @@ -50,11 +51,10 @@ class TestMKLDNNLogSoftmaxOp(MkldnnAutoScanTest): config = self.create_inference_config(use_mkldnn=True) yield config, (1e-5, 1e-5) - @given( - axis=st.sampled_from([-2, -1, 0, 1]), - in_shape=st.lists( - st.integers( - min_value=2, max_value=5), min_size=3, max_size=5)) + @given(axis=st.sampled_from([-2, -1, 0, 1]), + in_shape=st.lists(st.integers(min_value=2, max_value=5), + min_size=3, + max_size=5)) def test(self, *args, **kwargs): self.run_test(quant=False, *args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py index b1ad5804ebc..a22207030c8 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py @@ -22,6 +22,7 @@ from inference_pass_test import InferencePassTest class TestMKLDNNMatmulFuseOp(InferencePassTest): + def init_data(self): self.bs = 8 self.d_type = np.float32 @@ -31,10 +32,12 @@ class TestMKLDNNMatmulFuseOp(InferencePassTest): def make_network(self): with fluid.program_guard(self.main_program, self.startup_program): - x = fluid.data( - name='x', shape=[-1] + self.shape_x, dtype=self.d_type) - y = fluid.data( - name='y', shape=[-1] + self.shape_y, dtype=self.d_type) + x = fluid.data(name='x', + shape=[-1] + self.shape_x, + dtype=self.d_type) + y = fluid.data(name='y', + shape=[-1] + self.shape_y, + dtype=self.d_type) out = fluid.layers.matmul(x, y) out = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) out = fluid.layers.reshape( @@ -60,6 +63,7 @@ class TestMKLDNNMatmulFuseOp(InferencePassTest): class TestMKLDNNMatmulOtherDimsFuseOp(TestMKLDNNMatmulFuseOp): + def init_data(self): self.bs = 8 self.d_type = np.float32 @@ -69,12 +73,15 @@ class TestMKLDNNMatmulOtherDimsFuseOp(TestMKLDNNMatmulFuseOp): class TestMKLDNNMatmulOpNotFusedWrongTransposeAxis(TestMKLDNNMatmulFuseOp): + def make_network(self): with fluid.program_guard(self.main_program, self.startup_program): - x = fluid.data( - name='x', shape=[-1] + self.shape_x, dtype=self.d_type) - y = fluid.data( - name='y', shape=[-1] + self.shape_y, dtype=self.d_type) + x = fluid.data(name='x', + shape=[-1] + self.shape_x, + dtype=self.d_type) + y = fluid.data(name='y', + shape=[-1] + self.shape_y, + dtype=self.d_type) out = fluid.layers.matmul(x, y) out = fluid.layers.transpose(out, perm=[0, 1, 2, 3]) out = fluid.layers.reshape(out, [0, 0, 0, 0]) @@ -83,6 +90,7 @@ class TestMKLDNNMatmulOpNotFusedWrongTransposeAxis(TestMKLDNNMatmulFuseOp): class TestMKLDNNMatmulOpNotFusedBreakPattern(TestMKLDNNMatmulFuseOp): + def init_data(self): self.bs = 7 self.d_type = np.float32 @@ -92,14 +100,16 @@ class TestMKLDNNMatmulOpNotFusedBreakPattern(TestMKLDNNMatmulFuseOp): def make_network(self): with fluid.program_guard(self.main_program, self.startup_program): - x = fluid.data( - name='x', shape=[-1] + self.shape_x, dtype=self.d_type) - y = fluid.data( - name='y', shape=[-1] + self.shape_y, dtype=self.d_type) + x = fluid.data(name='x', + shape=[-1] + self.shape_x, + dtype=self.d_type) + y = fluid.data(name='y', + shape=[-1] + self.shape_y, + dtype=self.d_type) out = fluid.layers.matmul(x, y) out = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) - out = fluid.layers.transpose( - out, perm=[0, 1, 2, 3]) # breaks pattern + out = fluid.layers.transpose(out, perm=[0, 1, 2, + 3]) # breaks pattern out = fluid.layers.reshape( out, [0, 0, self.shape_y[0] * self.shape_y[2]]) out = fluid.layers.fc(out, size=1) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py index c0d3ff766b8..a5471eca2c2 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py @@ -26,12 +26,12 @@ import hypothesis.strategies as st class TestMatmulTransposeReshapeMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - # If the problem has been fixed, the judgment + # If the problem has been fixed, the judgment # needs to be deleted!!! if 0 in attrs[2]['shape']: return False @@ -67,40 +67,39 @@ class TestMatmulTransposeReshapeMkldnnFusePass(PassAutoScanTest): else: return np.random.random(shape_y).astype(np.float32) - matmul_op = OpConfig( - type="matmul", - inputs={"X": ["input_data1"], - "Y": ["input_data2"]}, - outputs={"Out": ["matmul_output"]}, - attrs={ - "transpose_X": transpose_X, - "transpose_Y": transpose_Y, - "alpha": alpha, - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [] - }) - - transpose2_op = OpConfig( - type="transpose2", - inputs={"X": ["matmul_output"]}, - outputs={ - "Out": ["transpose2_output"], - "XShape": ["transpose2_xshape"] - }, - attrs={'axis': axis}) - - reshape2_op = OpConfig( - type="reshape2", - inputs={"X": ["transpose2_output"]}, - outputs={ - "Out": ["reshape2_output"], - "XShape": ["reshape2_xshape"] - }, - attrs={'shape': shape}) + matmul_op = OpConfig(type="matmul", + inputs={ + "X": ["input_data1"], + "Y": ["input_data2"] + }, + outputs={"Out": ["matmul_output"]}, + attrs={ + "transpose_X": transpose_X, + "transpose_Y": transpose_Y, + "alpha": alpha, + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }) + + transpose2_op = OpConfig(type="transpose2", + inputs={"X": ["matmul_output"]}, + outputs={ + "Out": ["transpose2_output"], + "XShape": ["transpose2_xshape"] + }, + attrs={'axis': axis}) + + reshape2_op = OpConfig(type="reshape2", + inputs={"X": ["transpose2_output"]}, + outputs={ + "Out": ["reshape2_output"], + "XShape": ["reshape2_xshape"] + }, + attrs={'shape': shape}) model_net = [matmul_op, transpose2_op, reshape2_op] @@ -122,8 +121,8 @@ class TestMatmulTransposeReshapeMkldnnFusePass(PassAutoScanTest): yield config, ["matmul"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["matmul_transpose_reshape_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["matmul_transpose_reshape_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py index 3c6560b3b29..28fe916a6ef 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py @@ -26,26 +26,24 @@ import hypothesis.strategies as st class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: if program_config.inputs["input_data1"].shape[ - -4] != 1 and program_config.inputs["input_data2"].shape[ - -4] != 1: + -4] != 1 and program_config.inputs["input_data2"].shape[-4] != 1: if program_config.inputs["input_data1"].shape[ -4] != program_config.inputs["input_data2"].shape[-4]: return False if program_config.inputs["input_data1"].shape[ - -3] != 1 and program_config.inputs["input_data2"].shape[ - -3] != 1: + -3] != 1 and program_config.inputs["input_data2"].shape[-3] != 1: if program_config.inputs["input_data1"].shape[ -3] != program_config.inputs["input_data2"].shape[-3]: return False attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - # If the problem has been fixed, the judgment + # If the problem has been fixed, the judgment # needs to be deleted!!! if 0 in attrs[2]['shape']: return False @@ -82,39 +80,38 @@ class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest): else: return np.random.random(shape_y).astype(np.float32) - matmul_op = OpConfig( - type="matmul_v2", - inputs={"X": ["input_data1"], - "Y": ["input_data2"]}, - outputs={"Out": ["matmul_output"]}, - attrs={ - "trans_x": transpose_X, - "trans_y": transpose_Y, - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [] - }) - - transpose2_op = OpConfig( - type="transpose2", - inputs={"X": ["matmul_output"]}, - outputs={ - "Out": ["transpose2_output"], - "XShape": ["transpose2_xshape"] - }, - attrs={'axis': axis}) - - reshape2_op = OpConfig( - type="reshape2", - inputs={"X": ["transpose2_output"]}, - outputs={ - "Out": ["reshape2_output"], - "XShape": ["reshape2_xshape"] - }, - attrs={'shape': shape}) + matmul_op = OpConfig(type="matmul_v2", + inputs={ + "X": ["input_data1"], + "Y": ["input_data2"] + }, + outputs={"Out": ["matmul_output"]}, + attrs={ + "trans_x": transpose_X, + "trans_y": transpose_Y, + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }) + + transpose2_op = OpConfig(type="transpose2", + inputs={"X": ["matmul_output"]}, + outputs={ + "Out": ["transpose2_output"], + "XShape": ["transpose2_xshape"] + }, + attrs={'axis': axis}) + + reshape2_op = OpConfig(type="reshape2", + inputs={"X": ["transpose2_output"]}, + outputs={ + "Out": ["reshape2_output"], + "XShape": ["reshape2_xshape"] + }, + attrs={'shape': shape}) model_net = [matmul_op, transpose2_op, reshape2_op] @@ -132,7 +129,7 @@ class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest): return program_config def sample_predictor_configs(self, program_config): - # gpu_cpu_map_matmul_v2_to_matmul_pass will affect the type of final fused op + # gpu_cpu_map_matmul_v2_to_matmul_pass will affect the type of final fused op fused_op = "matmul_v2" input1_dim1 = program_config.inputs["input_data1"].shape[0] input2_dim1 = program_config.inputs["input_data2"].shape[0] @@ -145,8 +142,8 @@ class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest): yield config, [fused_op], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["matmul_v2_transpose_reshape_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["matmul_v2_transpose_reshape_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py index 9fa98045ef3..c144830904a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestMkldnnMatmulv2Op(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: if len(program_config.inputs["input_data2"].shape) == 4: if program_config.inputs["input_data1"].shape[ @@ -36,14 +37,14 @@ class TestMkldnnMatmulv2Op(MkldnnAutoScanTest): return False if program_config.inputs["input_data1"].shape[ - -3] != 1 and program_config.inputs["input_data2"].shape[ - -3] != 1: + -3] != 1 and program_config.inputs["input_data2"].shape[-3] != 1: if program_config.inputs["input_data1"].shape[ -3] != program_config.inputs["input_data2"].shape[-3]: return False return True def sample_program_configs(self, *args, **kwargs): + def generate_input(type, *args, **kwargs): transpose_X = kwargs["transpose_X"] transpose_Y = kwargs["transpose_Y"] @@ -83,30 +84,33 @@ class TestMkldnnMatmulv2Op(MkldnnAutoScanTest): else: return np.random.random(shape_y).astype(np.float32) - matmul_op = OpConfig( - type="matmul_v2", - inputs={"X": ["input_data1"], - "Y": ["input_data2"]}, - outputs={"Out": ["matmul_output"]}, - attrs={ - "trans_x": kwargs["transpose_X"], - "trans_y": kwargs["transpose_Y"], - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [] - }) + matmul_op = OpConfig(type="matmul_v2", + inputs={ + "X": ["input_data1"], + "Y": ["input_data2"] + }, + outputs={"Out": ["matmul_output"]}, + attrs={ + "trans_x": kwargs["transpose_X"], + "trans_y": kwargs["transpose_Y"], + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }) program_config = ProgramConfig( ops=[matmul_op], weights={}, inputs={ - "input_data1": TensorConfig(data_gen=partial( - generate_input, "x", *args, **kwargs)), - "input_data2": TensorConfig(data_gen=partial( - generate_input, "y", *args, **kwargs)) + "input_data1": + TensorConfig( + data_gen=partial(generate_input, "x", *args, **kwargs)), + "input_data2": + TensorConfig( + data_gen=partial(generate_input, "y", *args, **kwargs)) }, outputs=["matmul_output"]) @@ -116,17 +120,14 @@ class TestMkldnnMatmulv2Op(MkldnnAutoScanTest): config = self.create_inference_config(use_mkldnn=True) yield config, (1e-5, 1e-5) - @given( - transpose_X=st.booleans(), - transpose_Y=st.booleans(), - y_dim_len=st.sampled_from([3, 4]), - batch_size1=st.integers( - min_value=1, max_value=4), - batch_size2=st.integers( - min_value=1, max_value=4), - channel1=st.sampled_from([1, 16, 32, 64]), - channel2=st.sampled_from([1, 16, 32, 64]), - input_dim=st.sampled_from([16, 32, 64])) + @given(transpose_X=st.booleans(), + transpose_Y=st.booleans(), + y_dim_len=st.sampled_from([3, 4]), + batch_size1=st.integers(min_value=1, max_value=4), + batch_size2=st.integers(min_value=1, max_value=4), + channel1=st.sampled_from([1, 16, 32, 64]), + channel2=st.sampled_from([1, 16, 32, 64]), + input_dim=st.sampled_from([16, 32, 64])) def test(self, *args, **kwargs): self.run_test(*args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_mish_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_mish_op.py index 83744e0a8bd..2b2759cc651 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_mish_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_mish_op.py @@ -22,32 +22,33 @@ import hypothesis.strategies as st class TestMkldnnMishOp(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: # if mode is channel, and in_shape is 1 rank - if len(program_config.inputs['input_data']. - shape) == 1 and program_config.ops[0].attrs['mode'] == 'channel': + if len(program_config.inputs['input_data'].shape + ) == 1 and program_config.ops[0].attrs['mode'] == 'channel': return False return True def sample_program_configs(self, *args, **kwargs): + def generate_input(*args, **kwargs): return np.random.random(kwargs['in_shape']).astype(np.float32) - mish_op = OpConfig( - type="mish", - inputs={"X": ["input_data"]}, - outputs={"Out": ["output_data"]}, - attrs={ - "mode": kwargs['mode'], - "data_format": kwargs['data_format'] - }) + mish_op = OpConfig(type="mish", + inputs={"X": ["input_data"]}, + outputs={"Out": ["output_data"]}, + attrs={ + "mode": kwargs['mode'], + "data_format": kwargs['data_format'] + }) program_config = ProgramConfig( ops=[mish_op], weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial(generate_input, - *args, **kwargs)), + "input_data": + TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), }, outputs=["output_data"]) @@ -57,12 +58,11 @@ class TestMkldnnMishOp(MkldnnAutoScanTest): config = self.create_inference_config(use_mkldnn=True) yield config, (1e-5, 1e-5) - @given( - mode=st.sampled_from(['all', 'channel', 'element']), - data_format=st.sampled_from(['NCHW', 'NHWC']), - in_shape=st.lists( - st.integers( - min_value=1, max_value=32), min_size=1, max_size=4)) + @given(mode=st.sampled_from(['all', 'channel', 'element']), + data_format=st.sampled_from(['NCHW', 'NHWC']), + in_shape=st.lists(st.integers(min_value=1, max_value=32), + min_size=1, + max_size=4)) def test(self, *args, **kwargs): self.run_test(quant=False, *args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_prelu_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_prelu_op.py index 3839c22ca25..3dde53d84aa 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_prelu_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_prelu_op.py @@ -26,14 +26,16 @@ import hypothesis.strategies as st class TestMkldnnPreluOp(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: # if mode is channel, and in_shape is 1 rank - if len(program_config.inputs['input_data']. - shape) == 1 and program_config.ops[0].attrs['mode'] == 'channel': + if len(program_config.inputs['input_data'].shape + ) == 1 and program_config.ops[0].attrs['mode'] == 'channel': return False return True def sample_program_configs(self, *args, **kwargs): + def generate_input(*args, **kwargs): return np.random.random(kwargs['in_shape']).astype(np.float32) @@ -56,15 +58,16 @@ class TestMkldnnPreluOp(MkldnnAutoScanTest): return np.zeros((1)).astype(np.float32) return np.random.random(kwargs['in_shape']).astype(np.float32) - prelu_op = OpConfig( - type="prelu", - inputs={"X": ["input_data"], - "Alpha": ["alpha_weight"]}, - outputs={"Out": ["output_data"]}, - attrs={ - "mode": kwargs['mode'], - "data_format": kwargs['data_format'] - }) + prelu_op = OpConfig(type="prelu", + inputs={ + "X": ["input_data"], + "Alpha": ["alpha_weight"] + }, + outputs={"Out": ["output_data"]}, + attrs={ + "mode": kwargs['mode'], + "data_format": kwargs['data_format'] + }) program_config = ProgramConfig( ops=[prelu_op], @@ -87,12 +90,11 @@ class TestMkldnnPreluOp(MkldnnAutoScanTest): def add_skip_pass_case(self): pass - @given( - mode=st.sampled_from(['all', 'channel', 'element']), - data_format=st.sampled_from(['NCHW', 'NHWC']), - in_shape=st.lists( - st.integers( - min_value=1, max_value=32), min_size=1, max_size=4)) + @given(mode=st.sampled_from(['all', 'channel', 'element']), + data_format=st.sampled_from(['NCHW', 'NHWC']), + in_shape=st.lists(st.integers(min_value=1, max_value=32), + min_size=1, + max_size=4)) def test(self, *args, **kwargs): self.add_skip_pass_case() self.run_test(quant=False, *args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_fuse_pass.py index 952cd27bbae..ce1ea51cb84 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_fuse_pass.py @@ -29,6 +29,7 @@ num = 32 * 64 class TestReshapeTransposeMatmulMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -38,8 +39,8 @@ class TestReshapeTransposeMatmulMkldnnFusePass(PassAutoScanTest): alpha = draw(st.floats(min_value=0.01, max_value=2)) axis = draw(st.sampled_from([[0, 2, 1, 3]])) shape = draw( - st.sampled_from([[0, 64, -1, 32], [0, 32, -1, 64], [-1, 32, 1, 64] - ])) + st.sampled_from([[0, 64, -1, 32], [0, 32, -1, 64], [-1, 32, 1, + 64]])) batch_size = draw(st.integers(min_value=1, max_value=4)) channel = draw(st.integers(min_value=1, max_value=64)) input_dim = draw(st.sampled_from([32, 64])) @@ -63,7 +64,7 @@ class TestReshapeTransposeMatmulMkldnnFusePass(PassAutoScanTest): if matmul_shape[i] == -1: matmul_shape[i] = int(abs(input_volume / shape_volume)) - # Only for transpose axis [0, 2, 1, 3] + # Only for transpose axis [0, 2, 1, 3] matmul_shape[1], matmul_shape[2] = matmul_shape[2], matmul_shape[1] if attrs[2]['transpose_X'] and attrs[2]['transpose_Y']: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py index caf33156fc1..fb8dc034bd5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py @@ -25,23 +25,24 @@ from paddle.fluid.core import PassVersionChecker class TestReshapeTransposeMatmulV2OneDNNFusePass(InferencePassTest): + def setUp(self): self.set_params() self.tranpose_perm = [0, 2, 1, 3] self.pass_name = 'reshape_transpose_matmul_v2_mkldnn_fuse_pass' with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=self.data_shape, dtype="float32") - weight = fluid.layers.create_parameter( - shape=self.weight_shape, dtype="float32") + data = fluid.data(name="data", + shape=self.data_shape, + dtype="float32") + weight = fluid.layers.create_parameter(shape=self.weight_shape, + dtype="float32") reshape = fluid.layers.reshape(data, shape=self.reshape_shape) transpose = fluid.layers.transpose(reshape, self.tranpose_perm) - matmul = paddle.matmul( - transpose, - weight, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y) + matmul = paddle.matmul(transpose, + weight, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y) self.fetch_list = [matmul] self.enable_mkldnn = True @@ -64,6 +65,7 @@ class TestReshapeTransposeMatmulV2OneDNNFusePass(InferencePassTest): class TestReshapeTransposeMatmulV2OneDNNFusePassBroadcast( TestReshapeTransposeMatmulV2OneDNNFusePass): + def set_params(self): self.data_shape = [2, 64, 16] self.weight_shape = [1, 2, 8, 64] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py index 86acbe615b3..f29e20f6b8d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestScaleMatmulMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shape_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shape_op.py index 8f5d7823cdf..92111062b12 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shape_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shape_op.py @@ -22,25 +22,26 @@ import hypothesis.strategies as st class TestMkldnnShapeOp(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self, *args, **kwargs): + def generate_input(*args, **kwargs): - return np.random.random(kwargs['in_shape']).astype(kwargs[ - 'in_dtype']) + return np.random.random(kwargs['in_shape']).astype( + kwargs['in_dtype']) - shape_op = OpConfig( - type="shape", - inputs={"Input": ["input_data"]}, - outputs={"Out": ["output_data"]}) + shape_op = OpConfig(type="shape", + inputs={"Input": ["input_data"]}, + outputs={"Out": ["output_data"]}) program_config = ProgramConfig( ops=[shape_op], weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial(generate_input, - *args, **kwargs)), + "input_data": + TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), }, outputs=["output_data"]) @@ -50,11 +51,10 @@ class TestMkldnnShapeOp(MkldnnAutoScanTest): config = self.create_inference_config(use_mkldnn=True) yield config, (1e-5, 1e-5) - @given( - in_shape=st.lists( - st.integers( - min_value=1, max_value=3), min_size=1, max_size=6), - in_dtype=st.sampled_from([np.float32, np.uint16, np.int8, np.uint8])) + @given(in_shape=st.lists(st.integers(min_value=1, max_value=3), + min_size=1, + max_size=6), + in_dtype=st.sampled_from([np.float32, np.uint16, np.int8, np.uint8])) def test(self, *args, **kwargs): self.run_test(quant=False, *args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_detect_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_detect_pass.py index 828e92dc034..74c3c34212f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_detect_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_detect_pass.py @@ -32,6 +32,7 @@ def product(input): class TestShuffleChannelMKLDNNDetectPass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: input_shape = program_config.inputs['input_data'].shape first_reshape2_shape = program_config.ops[0].attrs['shape'] @@ -130,8 +131,8 @@ class TestShuffleChannelMKLDNNDetectPass(PassAutoScanTest): yield config, ["shuffle_channel"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["shuffle_channel_mkldnn_detect_pass"]) + self.run_and_statis(quant=False, + passes=["shuffle_channel_mkldnn_detect_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_op.py index 26655970290..d9050b58ee3 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_shuffle_channel_op.py @@ -22,25 +22,26 @@ import hypothesis.strategies as st class TestMKLDNNShuffleChannelOp(MkldnnAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self, *args, **kwargs): + def generate_input(*args, **kwargs): return np.random.random(kwargs['in_shape']).astype(np.float32) - shuffle_channel_op = OpConfig( - type="shuffle_channel", - inputs={"X": ["input_data"]}, - outputs={"Out": ["output_data"]}, - attrs={"group": kwargs['group']}) + shuffle_channel_op = OpConfig(type="shuffle_channel", + inputs={"X": ["input_data"]}, + outputs={"Out": ["output_data"]}, + attrs={"group": kwargs['group']}) program_config = ProgramConfig( ops=[shuffle_channel_op], weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial(generate_input, - *args, **kwargs)), + "input_data": + TensorConfig(data_gen=partial(generate_input, *args, **kwargs)), }, outputs=["output_data"]) @@ -50,9 +51,8 @@ class TestMKLDNNShuffleChannelOp(MkldnnAutoScanTest): config = self.create_inference_config(use_mkldnn=True) yield config, (1e-5, 1e-5) - @given( - group=st.sampled_from([1, 2, 8, 32, 128]), - in_shape=st.sampled_from([[5, 512, 2, 3], [2, 256, 5, 4]])) + @given(group=st.sampled_from([1, 2, 8, 32, 128]), + in_shape=st.sampled_from([[5, 512, 2, 3], [2, 256, 5, 4]])) def test(self, *args, **kwargs): self.run_test(quant=False, *args, **kwargs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_softplus_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_softplus_activation_fuse_pass.py index 83c095baeff..0c25a790138 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_softplus_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_softplus_activation_fuse_pass.py @@ -30,8 +30,9 @@ class SoftplusActivationReluOneDNNFusePassTest(InferencePassTest): def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 100, 100], + dtype="float32") softplus_out = fluid.layers.softplus(data) if self.fuse_activation_beta is not None: activation_out = self.fuse_activation( @@ -62,12 +63,14 @@ class SoftplusActivationReluOneDNNFusePassTest(InferencePassTest): class SoftplusActivationTanhOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.tanh class SoftplusActivationLeakyReluOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.leaky_relu self.fuse_activation_alpha = 0.3 @@ -75,6 +78,7 @@ class SoftplusActivationLeakyReluOneDNNFusePassTest( class SoftplusActivationSwishOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.swish self.fuse_activation_alpha = 3 @@ -82,24 +86,28 @@ class SoftplusActivationSwishOneDNNFusePassTest( class SoftplusActivationHardSwishOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.hard_swish class SoftplusActivationSqrtOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.hard_swish class SoftplusActivationAbsOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.abs class SoftplusActivationClipOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.clip self.fuse_activation_alpha = 1.1 @@ -108,12 +116,14 @@ class SoftplusActivationClipOneDNNFusePassTest( class SoftplusActivationGeluErfOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.gelu class SoftplusActivationGeluTanhOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.gelu self.fuse_activation_alpha = True # simulated "Approximate" attr @@ -121,12 +131,14 @@ class SoftplusActivationGeluTanhOneDNNFusePassTest( class SoftplusActivationRelu6OneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.relu6 class SoftplusActivationSigmoidOneDNNFusePassTest( SoftplusActivationReluOneDNNFusePassTest): + def set_params(self): self.fuse_activation = fluid.layers.sigmoid diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mul_gru_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mul_gru_fuse_pass.py index b5a53770435..b1cf07776ef 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mul_gru_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mul_gru_fuse_pass.py @@ -27,6 +27,7 @@ from functools import reduce class TestMulGruFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -47,66 +48,66 @@ class TestMulGruFusePass(PassAutoScanTest): def generate_weight(shape): return np.full(shape, 0.0001).astype(np.float32) - im2sequence_op = OpConfig( - type="im2sequence", - inputs={"X": ["input_data"]}, - outputs={"Out": ["seq_out"]}, - attrs={ - "kernels": [6, 1], - "out_stride": [1, 1], - "paddings": [0, 0, 0, 0], - "strides": [1, 1] - }) - - mul_op = OpConfig( - type="mul", - inputs={"X": ["seq_out"], - "Y": ["mul_weight"]}, - outputs={"Out": ["mul_out"]}, - attrs={"x_num_col_dims": x_col, - "y_num_col_dims": y_col}) + im2sequence_op = OpConfig(type="im2sequence", + inputs={"X": ["input_data"]}, + outputs={"Out": ["seq_out"]}, + attrs={ + "kernels": [6, 1], + "out_stride": [1, 1], + "paddings": [0, 0, 0, 0], + "strides": [1, 1] + }) + + mul_op = OpConfig(type="mul", + inputs={ + "X": ["seq_out"], + "Y": ["mul_weight"] + }, + outputs={"Out": ["mul_out"]}, + attrs={ + "x_num_col_dims": x_col, + "y_num_col_dims": y_col + }) if has_origin_mode: - gru_op = OpConfig( - type="gru", - inputs={ - "Input": ["mul_out"], - "Weight": ["gru_weight"], - "Bias": ["gru_bias"] - }, - outputs={ - "BatchGate": ["batch_gate"], - "BatchHidden": ["batch_hidden"], - "BatchResetHiddenPrev": ["batch_reset"], - "Hidden": ["hidden"] - }, - attrs={ - 'activation': activation, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'is_test': True, - 'origin_mode': origin_mode - }) + gru_op = OpConfig(type="gru", + inputs={ + "Input": ["mul_out"], + "Weight": ["gru_weight"], + "Bias": ["gru_bias"] + }, + outputs={ + "BatchGate": ["batch_gate"], + "BatchHidden": ["batch_hidden"], + "BatchResetHiddenPrev": ["batch_reset"], + "Hidden": ["hidden"] + }, + attrs={ + 'activation': activation, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'is_test': True, + 'origin_mode': origin_mode + }) else: - gru_op = OpConfig( - type="gru", - inputs={ - "Input": ["mul_out"], - "Weight": ["gru_weight"], - "Bias": ["gru_bias"] - }, - outputs={ - "BatchGate": ["batch_gate"], - "BatchHidden": ["batch_hidden"], - "BatchResetHiddenPrev": ["batch_reset"], - "Hidden": ["hidden"] - }, - attrs={ - 'activation': activation, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'is_test': True - }) + gru_op = OpConfig(type="gru", + inputs={ + "Input": ["mul_out"], + "Weight": ["gru_weight"], + "Bias": ["gru_bias"] + }, + outputs={ + "BatchGate": ["batch_gate"], + "BatchHidden": ["batch_hidden"], + "BatchResetHiddenPrev": ["batch_reset"], + "Hidden": ["hidden"] + }, + attrs={ + 'activation': activation, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'is_test': True + }) model_net = [im2sequence_op, mul_op, gru_op] @@ -132,8 +133,9 @@ class TestMulGruFusePass(PassAutoScanTest): yield config, ["im2sequence", "fusion_gru"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, max_duration=300, passes=["mul_gru_fuse_pass"]) + self.run_and_statis(quant=False, + max_duration=300, + passes=["mul_gru_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mul_lstm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mul_lstm_fuse_pass.py index c944abb60c8..959c75d53db 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mul_lstm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mul_lstm_fuse_pass.py @@ -27,6 +27,7 @@ from functools import reduce class TestMulLstmFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -48,46 +49,47 @@ class TestMulLstmFusePass(PassAutoScanTest): def generate_weight(shape): return np.full(shape, 0.0001).astype(np.float32) - im2sequence_op = OpConfig( - type="im2sequence", - inputs={"X": ["input_data"]}, - outputs={"Out": ["seq_out"]}, - attrs={ - "kernels": [6, 1], - "out_stride": [1, 1], - "paddings": [0, 0, 0, 0], - "strides": [1, 1] - }) - - mul_op = OpConfig( - type="mul", - inputs={"X": ["seq_out"], - "Y": ["mul_weight"]}, - outputs={"Out": ["mul_out"]}, - attrs={"x_num_col_dims": x_col, - "y_num_col_dims": y_col}) - - lstm_op = OpConfig( - type="lstm", - inputs={ - "Input": ["mul_out"], - "Weight": ["lstm_weight"], - "Bias": ["lstm_bias"] - }, - outputs={ - "Hidden": ["lstm_hidden"], - "Cell": ["lstm_cell"], - "BatchGate": ["lstm_gate"], - "BatchCellPreAct": ["lstm_batch_cell"] - }, - attrs={ - 'use_peepholes': use_peepholes, - 'is_reverse': is_reverse, - 'gate_activation': gate_activation, - 'cell_activation': cell_activation, - 'candidate_activation': candidate_activation, - 'is_test': True - }) + im2sequence_op = OpConfig(type="im2sequence", + inputs={"X": ["input_data"]}, + outputs={"Out": ["seq_out"]}, + attrs={ + "kernels": [6, 1], + "out_stride": [1, 1], + "paddings": [0, 0, 0, 0], + "strides": [1, 1] + }) + + mul_op = OpConfig(type="mul", + inputs={ + "X": ["seq_out"], + "Y": ["mul_weight"] + }, + outputs={"Out": ["mul_out"]}, + attrs={ + "x_num_col_dims": x_col, + "y_num_col_dims": y_col + }) + + lstm_op = OpConfig(type="lstm", + inputs={ + "Input": ["mul_out"], + "Weight": ["lstm_weight"], + "Bias": ["lstm_bias"] + }, + outputs={ + "Hidden": ["lstm_hidden"], + "Cell": ["lstm_cell"], + "BatchGate": ["lstm_gate"], + "BatchCellPreAct": ["lstm_batch_cell"] + }, + attrs={ + 'use_peepholes': use_peepholes, + 'is_reverse': is_reverse, + 'gate_activation': gate_activation, + 'cell_activation': cell_activation, + 'candidate_activation': candidate_activation, + 'is_test': True + }) model_net = [im2sequence_op, mul_op, lstm_op] @@ -118,8 +120,9 @@ class TestMulLstmFusePass(PassAutoScanTest): yield config, ["im2sequence", "fusion_lstm"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, max_duration=300, passes=["mul_lstm_fuse_pass"]) + self.run_and_statis(quant=False, + max_duration=300, + passes=["mul_lstm_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py index 1814b53401e..f112ccca878 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py @@ -27,6 +27,7 @@ from functools import reduce class TestRepeatedFcReluFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -53,55 +54,61 @@ class TestRepeatedFcReluFusePass(PassAutoScanTest): 'dim': dim }] - mul_op1 = OpConfig( - type="mul", - inputs={"X": ["input_data"], - "Y": ["mul1_weight"]}, - outputs={"Out": ["mul1_output"]}, - attrs={"x_num_col_dims": x_col, - "y_num_col_dims": y_col}) - - elt_op1 = OpConfig( - type="elementwise_add", - inputs={"X": ["mul1_output"], - "Y": ["elementwise1_weight"]}, - outputs={"Out": ["elementwise1_output"]}, - attrs={"axis": axis}) - - relu_op1 = OpConfig( - type="relu", - inputs={"X": ["elementwise1_output"]}, - outputs={"Out": ["relu1_output"]}, - attrs={}) - - mul_op2 = OpConfig( - type="mul", - inputs={"X": ["relu1_output"], - "Y": ["mul2_weight"]}, - outputs={"Out": ["mul2_output"]}, - attrs={"x_num_col_dims": x_col, - "y_num_col_dims": y_col}) - - elt_op2 = OpConfig( - type="elementwise_add", - inputs={"X": ["mul2_output"], - "Y": ["elementwise2_weight"]}, - outputs={"Out": ["elementwise2_output"]}, - attrs={"axis": axis}) - - relu_op2 = OpConfig( - type="relu", - inputs={"X": ["elementwise2_output"]}, - outputs={"Out": ["relu2_output"]}, - attrs={}) + mul_op1 = OpConfig(type="mul", + inputs={ + "X": ["input_data"], + "Y": ["mul1_weight"] + }, + outputs={"Out": ["mul1_output"]}, + attrs={ + "x_num_col_dims": x_col, + "y_num_col_dims": y_col + }) + + elt_op1 = OpConfig(type="elementwise_add", + inputs={ + "X": ["mul1_output"], + "Y": ["elementwise1_weight"] + }, + outputs={"Out": ["elementwise1_output"]}, + attrs={"axis": axis}) + + relu_op1 = OpConfig(type="relu", + inputs={"X": ["elementwise1_output"]}, + outputs={"Out": ["relu1_output"]}, + attrs={}) + + mul_op2 = OpConfig(type="mul", + inputs={ + "X": ["relu1_output"], + "Y": ["mul2_weight"] + }, + outputs={"Out": ["mul2_output"]}, + attrs={ + "x_num_col_dims": x_col, + "y_num_col_dims": y_col + }) + + elt_op2 = OpConfig(type="elementwise_add", + inputs={ + "X": ["mul2_output"], + "Y": ["elementwise2_weight"] + }, + outputs={"Out": ["elementwise2_output"]}, + attrs={"axis": axis}) + + relu_op2 = OpConfig(type="relu", + inputs={"X": ["elementwise2_output"]}, + outputs={"Out": ["relu2_output"]}, + attrs={}) model_net = [mul_op1, elt_op1, relu_op1, mul_op2, elt_op2, relu_op2] program_config = ProgramConfig( ops=model_net, weights={ - "mul1_weight": TensorConfig(data_gen=partial(generate_weight, - [dim, 32])), + "mul1_weight": + TensorConfig(data_gen=partial(generate_weight, [dim, 32])), "mul2_weight": TensorConfig(data_gen=partial(generate_weight, [32, 128])), "elementwise1_weight": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py index 9bec34df5b6..79652c53e12 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_reshape2_matmul_fuse_pass.py @@ -50,9 +50,9 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape and attr of reshape2 reshape = draw( - st.lists( - st.integers( - min_value=1, max_value=10), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=10), + min_size=2, + max_size=2)) x_shape = reshape + [1, 1] # 2. Generate attr:transpose_X/transpose_Y/alpha of matmul @@ -62,9 +62,9 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of matmul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = x_shape[1] # 4. Generate legal attr:axis of elementwise_add @@ -72,13 +72,19 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): if axis == 0 or axis == -1: if draw(st.booleans()): if axis == 0: - bias_shape = [x_shape[0], ] + bias_shape = [ + x_shape[0], + ] else: - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] else: bias_shape = [x_shape[0], y_shape[1]] elif axis == 1: - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] if draw(st.integers(min_value=1, max_value=10)) <= 1: bias_shape[-1] = 1 @@ -87,14 +93,21 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): reshape2_op = OpConfig( "reshape2", - inputs={"X": ["reshape2_x"], }, + inputs={ + "X": ["reshape2_x"], + }, shape=reshape, - outputs={"Out": ["reshape2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["reshape2_out"], + "XShape": ["xshape"] + }, + ) matmul_op = OpConfig( "matmul", - inputs={"X": ["reshape2_out"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["reshape2_out"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -104,14 +117,18 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["matmul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["matmul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [reshape2_op, matmul_op, add_op] @@ -122,8 +139,11 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"reshape2_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "reshape2_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) else: program_config = ProgramConfig( ops=ops, @@ -133,15 +153,15 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=50, - max_duration=1000, - passes=["gpu_cpu_reshape2_matmul_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=50, + max_duration=1000, + passes=["gpu_cpu_reshape2_matmul_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py index c8e939d3926..de0aed5e204 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py @@ -27,6 +27,7 @@ from functools import reduce class TestSeqConcatFcFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -48,19 +49,21 @@ class TestSeqConcatFcFusePass(PassAutoScanTest): def generate_weight(shape): return np.random.random(shape).astype(np.float32) - sequence_expand_op1 = OpConfig( - type="sequence_expand", - inputs={"X": ["input_data1"], - "Y": ["input_data2"]}, - outputs={"Out": ["seq_exp1_out"]}, - attrs={"ref_level": ref_level}) - - sequence_expand_op2 = OpConfig( - type="sequence_expand", - inputs={"X": ["input_data1"], - "Y": ["input_data3"]}, - outputs={"Out": ["seq_exp2_out"]}, - attrs={"ref_level": ref_level}) + sequence_expand_op1 = OpConfig(type="sequence_expand", + inputs={ + "X": ["input_data1"], + "Y": ["input_data2"] + }, + outputs={"Out": ["seq_exp1_out"]}, + attrs={"ref_level": ref_level}) + + sequence_expand_op2 = OpConfig(type="sequence_expand", + inputs={ + "X": ["input_data1"], + "Y": ["input_data3"] + }, + outputs={"Out": ["seq_exp2_out"]}, + attrs={"ref_level": ref_level}) concat_op = OpConfig( type="concat", @@ -68,27 +71,32 @@ class TestSeqConcatFcFusePass(PassAutoScanTest): outputs={"Out": ["concat_output"]}, attrs={'axis': axis1}) - mul_op = OpConfig( - type="mul", - inputs={"X": ["concat_output"], - "Y": ["mul_weight"]}, - outputs={"Out": ["mul_out"]}, - attrs={"x_num_col_dims": x_col, - "y_num_col_dims": y_col}) - - elt_op = OpConfig( - type="elementwise_add", - inputs={"X": ["mul_out"], - "Y": ["elt_weight"]}, - outputs={"Out": ["elt_out"]}, - attrs={"axis": axis2}) - - act_op = OpConfig( - type=act_type, - inputs={"X": ["elt_out"]}, - outputs={"Out": ["act_out"]}, - attrs={"use_cudnn": use_cudnn, - "use_mkldnn": use_mkldnn}) + mul_op = OpConfig(type="mul", + inputs={ + "X": ["concat_output"], + "Y": ["mul_weight"] + }, + outputs={"Out": ["mul_out"]}, + attrs={ + "x_num_col_dims": x_col, + "y_num_col_dims": y_col + }) + + elt_op = OpConfig(type="elementwise_add", + inputs={ + "X": ["mul_out"], + "Y": ["elt_weight"] + }, + outputs={"Out": ["elt_out"]}, + attrs={"axis": axis2}) + + act_op = OpConfig(type=act_type, + inputs={"X": ["elt_out"]}, + outputs={"Out": ["act_out"]}, + attrs={ + "use_cudnn": use_cudnn, + "use_mkldnn": use_mkldnn + }) model_net = [ sequence_expand_op1, sequence_expand_op2, concat_op, mul_op, elt_op, @@ -104,15 +112,18 @@ class TestSeqConcatFcFusePass(PassAutoScanTest): TensorConfig(data_gen=partial(generate_weight, [dim])) }, inputs={ - "input_data1": TensorConfig( - data_gen=partial(generate_input, [batch_size, 128]), - lod=[[0, 1]]), - "input_data2": TensorConfig( - data_gen=partial(generate_input, [batch_size, 128]), - lod=[[0, 1]]), - "input_data3": TensorConfig( - data_gen=partial(generate_input, [batch_size, 128]), - lod=[[0, 1]]) + "input_data1": + TensorConfig(data_gen=partial(generate_input, + [batch_size, 128]), + lod=[[0, 1]]), + "input_data2": + TensorConfig(data_gen=partial(generate_input, + [batch_size, 128]), + lod=[[0, 1]]), + "input_data3": + TensorConfig(data_gen=partial(generate_input, + [batch_size, 128]), + lod=[[0, 1]]) }, outputs=["act_out"]) @@ -123,6 +134,7 @@ class TestSeqConcatFcFusePass(PassAutoScanTest): yield config, ["fusion_seqexpand_concat_fc"], (1e-5, 1e-5) def add_ignore_pass_case(self): + def teller1(program_config, predictor_config): if program_config.ops[-1].type == "relu": return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py index 769720fb258..4140eb32bb8 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py @@ -27,6 +27,7 @@ from functools import reduce class TestSeqconvEltaddReluFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -45,41 +46,41 @@ class TestSeqconvEltaddReluFusePass(PassAutoScanTest): def generate_weight(shape): return np.random.random(shape).astype(np.float32) - im2sequence_op = OpConfig( - type="im2sequence", - inputs={"X": ["input_data"]}, - outputs={"Out": ["seq_out"]}, - attrs={ - "kernels": [6, 1], - "out_stride": [1, 1], - "paddings": [0, 0, 0, 0], - "strides": [1, 1] - }) - - sequence_conv_op = OpConfig( - type="sequence_conv", - inputs={"X": ["seq_out"], - "Filter": ["conv_weight"]}, - outputs={"Out": ["conv_out"]}, - attrs={ - "contextLength": contextLength, - "contextStart": contextStart, - "contextStride": contextStride, - "paddingTrainable": paddingTrainable - }) - - elementwise_add_op = OpConfig( - type="elementwise_add", - inputs={"X": ["conv_out"], - "Y": ["elt_weight"]}, - outputs={"Out": ["elt_output"]}, - attrs={'axis': axis}) - - relu_op = OpConfig( - type="relu", - inputs={"X": ["elt_output"]}, - outputs={"Out": ["relu_output"]}, - attrs={}) + im2sequence_op = OpConfig(type="im2sequence", + inputs={"X": ["input_data"]}, + outputs={"Out": ["seq_out"]}, + attrs={ + "kernels": [6, 1], + "out_stride": [1, 1], + "paddings": [0, 0, 0, 0], + "strides": [1, 1] + }) + + sequence_conv_op = OpConfig(type="sequence_conv", + inputs={ + "X": ["seq_out"], + "Filter": ["conv_weight"] + }, + outputs={"Out": ["conv_out"]}, + attrs={ + "contextLength": contextLength, + "contextStart": contextStart, + "contextStride": contextStride, + "paddingTrainable": paddingTrainable + }) + + elementwise_add_op = OpConfig(type="elementwise_add", + inputs={ + "X": ["conv_out"], + "Y": ["elt_weight"] + }, + outputs={"Out": ["elt_output"]}, + attrs={'axis': axis}) + + relu_op = OpConfig(type="relu", + inputs={"X": ["elt_output"]}, + outputs={"Out": ["relu_output"]}, + attrs={}) model_net = [ im2sequence_op, sequence_conv_op, elementwise_add_op, relu_op @@ -88,8 +89,10 @@ class TestSeqconvEltaddReluFusePass(PassAutoScanTest): program_config = ProgramConfig( ops=model_net, weights={ - "conv_weight": TensorConfig(data_gen=partial( - generate_weight, [768 * contextLength, 16])), + "conv_weight": + TensorConfig( + data_gen=partial(generate_weight, [768 * + contextLength, 16])), "elt_weight": TensorConfig(data_gen=partial(generate_weight, [16])) }, @@ -102,12 +105,12 @@ class TestSeqconvEltaddReluFusePass(PassAutoScanTest): def sample_predictor_configs(self, program_config): config = self.create_inference_config() - yield config, ["im2sequence", "fusion_seqconv_eltadd_relu"], (1e-5, - 1e-5) + yield config, ["im2sequence", + "fusion_seqconv_eltadd_relu"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["seqconv_eltadd_relu_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["seqconv_eltadd_relu_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_seqpool_cvm_concat_fuse_pass_py.py b/python/paddle/fluid/tests/unittests/ir/inference/test_seqpool_cvm_concat_fuse_pass_py.py index 2e403b99ab8..53333da57e7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_seqpool_cvm_concat_fuse_pass_py.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_seqpool_cvm_concat_fuse_pass_py.py @@ -27,6 +27,7 @@ from functools import reduce class TestSeqpoolCvmConcatFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -51,76 +52,80 @@ class TestSeqpoolCvmConcatFusePass(PassAutoScanTest): def generate_input3(): return np.random.random([1, 768]).astype(np.float32) - im2sequence_op = OpConfig( - type="im2sequence", - inputs={"X": ["input_data1"]}, - outputs={"Out": ["seq_out"]}, - attrs={ - "kernels": [6, 1], - "out_stride": [1, 1], - "paddings": [0, 0, 0, 0], - "strides": [1, 1] - }) - - sequence_pool_op1 = OpConfig( - type="sequence_pool", - inputs={"X": ["seq_out"]}, - outputs={"Out": ["seq_pool1_out"], - "MaxIndex": ["index1_out"]}, - attrs={ - "is_test": is_test, - "pooltype": pooltype, - "pad_value": pad_value1 - }) - - sequence_pool_op2 = OpConfig( - type="sequence_pool", - inputs={"X": ["seq_out"]}, - outputs={"Out": ["seq_pool2_out"], - "MaxIndex": ["index2_out"]}, - attrs={ - "is_test": is_test, - "pooltype": pooltype, - "pad_value": pad_value2 - }) - - sequence_pool_op3 = OpConfig( - type="sequence_pool", - inputs={"X": ["seq_out"]}, - outputs={"Out": ["seq_pool3_out"], - "MaxIndex": ["index3_out"]}, - attrs={ - "is_test": is_test, - "pooltype": pooltype, - "pad_value": pad_value3 - }) - - cvm_op1 = OpConfig( - type="cvm", - inputs={"X": ["seq_pool1_out"], - "CVM": ["input_data2"]}, - outputs={"Y": ["cvm1_out"]}, - attrs={"use_cvm": use_cvm}) - - cvm_op2 = OpConfig( - type="cvm", - inputs={"X": ["seq_pool2_out"], - "CVM": ["input_data2"]}, - outputs={"Y": ["cvm2_out"]}, - attrs={"use_cvm": use_cvm}) - - cvm_op3 = OpConfig( - type="cvm", - inputs={"X": ["seq_pool3_out"], - "CVM": ["input_data2"]}, - outputs={"Y": ["cvm3_out"]}, - attrs={"use_cvm": use_cvm}) - - concat_op = OpConfig( - type="concat", - inputs={"X": ["cvm1_out", "cvm2_out", "cvm3_out"]}, - outputs={"Out": ["concat_output"]}, - attrs={'axis': axis}) + im2sequence_op = OpConfig(type="im2sequence", + inputs={"X": ["input_data1"]}, + outputs={"Out": ["seq_out"]}, + attrs={ + "kernels": [6, 1], + "out_stride": [1, 1], + "paddings": [0, 0, 0, 0], + "strides": [1, 1] + }) + + sequence_pool_op1 = OpConfig(type="sequence_pool", + inputs={"X": ["seq_out"]}, + outputs={ + "Out": ["seq_pool1_out"], + "MaxIndex": ["index1_out"] + }, + attrs={ + "is_test": is_test, + "pooltype": pooltype, + "pad_value": pad_value1 + }) + + sequence_pool_op2 = OpConfig(type="sequence_pool", + inputs={"X": ["seq_out"]}, + outputs={ + "Out": ["seq_pool2_out"], + "MaxIndex": ["index2_out"] + }, + attrs={ + "is_test": is_test, + "pooltype": pooltype, + "pad_value": pad_value2 + }) + + sequence_pool_op3 = OpConfig(type="sequence_pool", + inputs={"X": ["seq_out"]}, + outputs={ + "Out": ["seq_pool3_out"], + "MaxIndex": ["index3_out"] + }, + attrs={ + "is_test": is_test, + "pooltype": pooltype, + "pad_value": pad_value3 + }) + + cvm_op1 = OpConfig(type="cvm", + inputs={ + "X": ["seq_pool1_out"], + "CVM": ["input_data2"] + }, + outputs={"Y": ["cvm1_out"]}, + attrs={"use_cvm": use_cvm}) + + cvm_op2 = OpConfig(type="cvm", + inputs={ + "X": ["seq_pool2_out"], + "CVM": ["input_data2"] + }, + outputs={"Y": ["cvm2_out"]}, + attrs={"use_cvm": use_cvm}) + + cvm_op3 = OpConfig(type="cvm", + inputs={ + "X": ["seq_pool3_out"], + "CVM": ["input_data2"] + }, + outputs={"Y": ["cvm3_out"]}, + attrs={"use_cvm": use_cvm}) + + concat_op = OpConfig(type="concat", + inputs={"X": ["cvm1_out", "cvm2_out", "cvm3_out"]}, + outputs={"Out": ["concat_output"]}, + attrs={'axis': axis}) model_net = [ im2sequence_op, sequence_pool_op1, sequence_pool_op2, @@ -144,8 +149,8 @@ class TestSeqpoolCvmConcatFusePass(PassAutoScanTest): yield config, ["im2sequence", "fusion_seqpool_cvm_concat"], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, passes=["seqpool_cvm_concat_fuse_pass"]) + self.run_and_statis(quant=False, + passes=["seqpool_cvm_concat_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_shuffle_channel_detect_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_shuffle_channel_detect_pass.py index 1781eb50483..1bcaa4d43e7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_shuffle_channel_detect_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_shuffle_channel_detect_pass.py @@ -26,10 +26,10 @@ import hypothesis.strategies as st class TestShuffleChannelDetectPass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['input_shape'] != attrs[2]['shape']: @@ -49,31 +49,34 @@ class TestShuffleChannelDetectPass(PassAutoScanTest): def generate_reshape2_Input(): return np.random.random(x_shape).astype(np.float32) - reshape2_op1 = OpConfig( - "reshape2", - inputs={"X": ["reshape2_input1"], }, - outputs={ - "Out": ["reshape2_output1"], - "XShape": ["reshape2_xshape1"] - }, - shape=shape, - input_shape=x_shape) - transpose2_op = OpConfig( - "transpose2", - inputs={"X": ["reshape2_output1"], }, - outputs={ - "Out": ["transpose2_output"], - "XShape": ["transpose2_xshape"] - }, - axis=axis_v) - reshape2_op2 = OpConfig( - "reshape2", - inputs={"X": ["transpose2_output"], }, - outputs={ - "Out": ["reshape2_output2"], - "XShape": ["reshape2_xshape2"] - }, - shape=x_shape) + reshape2_op1 = OpConfig("reshape2", + inputs={ + "X": ["reshape2_input1"], + }, + outputs={ + "Out": ["reshape2_output1"], + "XShape": ["reshape2_xshape1"] + }, + shape=shape, + input_shape=x_shape) + transpose2_op = OpConfig("transpose2", + inputs={ + "X": ["reshape2_output1"], + }, + outputs={ + "Out": ["transpose2_output"], + "XShape": ["transpose2_xshape"] + }, + axis=axis_v) + reshape2_op2 = OpConfig("reshape2", + inputs={ + "X": ["transpose2_output"], + }, + outputs={ + "Out": ["reshape2_output2"], + "XShape": ["reshape2_xshape2"] + }, + shape=x_shape) ops = [reshape2_op1, transpose2_op, reshape2_op2] program_config = ProgramConfig( @@ -100,7 +103,8 @@ class TestShuffleChannelDetectPass(PassAutoScanTest): def test(self): self.run_and_statis( quant=False, - passes=["shuffle_channel_detect_pass"], ) + passes=["shuffle_channel_detect_pass"], + ) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_simplify_with_basic_ops_pass_autoscan.py b/python/paddle/fluid/tests/unittests/ir/inference/test_simplify_with_basic_ops_pass_autoscan.py index cb55dc64445..1d279a2313f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_simplify_with_basic_ops_pass_autoscan.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_simplify_with_basic_ops_pass_autoscan.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestSimplifyWithBasicOpsPassUpscale(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -38,32 +39,34 @@ class TestSimplifyWithBasicOpsPassUpscale(PassAutoScanTest): dropout_prob = draw(st.floats(min_value=0.0, max_value=1.0)) seed = draw(st.integers(min_value=0, max_value=512)) x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=4)) is_test = True - dropout_op = OpConfig( - "dropout", - inputs={"X": ["input_data"]}, - outputs={"Out": ["dropout_output"], - "Mask": ["mask"]}, - fix_seed=fix_seed, - dropout_implementation=dropout_implementation, - dropout_prob=dropout_prob, - seed=seed, - is_test=is_test) - relu_op = OpConfig( - "relu", - inputs={"X": ["dropout_output"]}, - outputs={"Out": ["relu_out"]}) + dropout_op = OpConfig("dropout", + inputs={"X": ["input_data"]}, + outputs={ + "Out": ["dropout_output"], + "Mask": ["mask"] + }, + fix_seed=fix_seed, + dropout_implementation=dropout_implementation, + dropout_prob=dropout_prob, + seed=seed, + is_test=is_test) + relu_op = OpConfig("relu", + inputs={"X": ["dropout_output"]}, + outputs={"Out": ["relu_out"]}) ops = [dropout_op, relu_op] - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={"input_data": TensorConfig(shape=x_shape), }, - outputs=["relu_out"]) + program_config = ProgramConfig(ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(shape=x_shape), + }, + outputs=["relu_out"]) return program_config @@ -83,14 +86,14 @@ class TestSimplifyWithBasicOpsPassUpscale(PassAutoScanTest): yield config, ['relu'], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, - max_examples=30, - passes=["simplify_with_basic_ops_pass"], - min_success_num=30) + self.run_and_statis(quant=False, + max_examples=30, + passes=["simplify_with_basic_ops_pass"], + min_success_num=30) class TestSimplifyWithBasicOpsPassDowngrade(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -100,32 +103,34 @@ class TestSimplifyWithBasicOpsPassDowngrade(PassAutoScanTest): dropout_prob = draw(st.floats(min_value=0.0, max_value=1.0)) seed = draw(st.integers(min_value=0, max_value=512)) x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=4), min_size=2, max_size=4)) + st.lists(st.integers(min_value=1, max_value=4), + min_size=2, + max_size=4)) is_test = True - dropout_op = OpConfig( - "dropout", - inputs={"X": ["input_data"]}, - outputs={"Out": ["dropout_output"], - "Mask": ["mask"]}, - fix_seed=fix_seed, - dropout_implementation=dropout_implementation, - dropout_prob=dropout_prob, - seed=seed, - is_test=is_test) - relu_op = OpConfig( - "relu", - inputs={"X": ["dropout_output"]}, - outputs={"Out": ["relu_out"]}) + dropout_op = OpConfig("dropout", + inputs={"X": ["input_data"]}, + outputs={ + "Out": ["dropout_output"], + "Mask": ["mask"] + }, + fix_seed=fix_seed, + dropout_implementation=dropout_implementation, + dropout_prob=dropout_prob, + seed=seed, + is_test=is_test) + relu_op = OpConfig("relu", + inputs={"X": ["dropout_output"]}, + outputs={"Out": ["relu_out"]}) ops = [dropout_op, relu_op] - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={"input_data": TensorConfig(shape=x_shape), }, - outputs=["relu_out"]) + program_config = ProgramConfig(ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(shape=x_shape), + }, + outputs=["relu_out"]) return program_config @@ -145,11 +150,10 @@ class TestSimplifyWithBasicOpsPassDowngrade(PassAutoScanTest): yield config, ['scale', 'relu'], (1e-5, 1e-5) def test(self): - self.run_and_statis( - quant=False, - max_examples=30, - passes=["simplify_with_basic_ops_pass"], - min_success_num=30) + self.run_and_statis(quant=False, + max_examples=30, + passes=["simplify_with_basic_ops_pass"], + min_success_num=30) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py index 64166daa91f..4b9ba91da49 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py @@ -26,6 +26,7 @@ import hypothesis.strategies as st class TestSquaredMatSubFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -52,95 +53,94 @@ class TestSquaredMatSubFusePass(PassAutoScanTest): else: return np.random.random(shape_y).astype(np.float32) - matmul_op1 = OpConfig( - type="matmul", - inputs={"X": ["input_data1"], - "Y": ["input_data2"]}, - outputs={"Out": ["matmul1_output"]}, - attrs={ - "transpose_X": transpose_X, - "transpose_Y": transpose_Y, - "alpha": alpha1, - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [] - }) - - square_op1 = OpConfig( - type="square", - inputs={"X": ["matmul1_output"]}, - outputs={"Out": ["square1_output"]}, - attrs={}) - - square_op2 = OpConfig( - type="square", - inputs={"X": ["input_data1"]}, - outputs={"Out": ["square2_output"]}, - attrs={}) - - square_op3 = OpConfig( - type="square", - inputs={"X": ["input_data2"]}, - outputs={"Out": ["square3_output"]}, - attrs={}) - - matmul_op2 = OpConfig( - type="matmul", - inputs={"X": ["square2_output"], - "Y": ["square3_output"]}, - outputs={"Out": ["matmul2_output"]}, - attrs={ - "transpose_X": transpose_X, - "transpose_Y": transpose_Y, - "alpha": alpha2, - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [] - }) - - elt_sub_op = OpConfig( - type="elementwise_sub", - inputs={"X": ["square1_output"], - "Y": ["matmul2_output"]}, - outputs={"Out": ["sub_out"]}, - attrs={"axis": axis1}) + matmul_op1 = OpConfig(type="matmul", + inputs={ + "X": ["input_data1"], + "Y": ["input_data2"] + }, + outputs={"Out": ["matmul1_output"]}, + attrs={ + "transpose_X": transpose_X, + "transpose_Y": transpose_Y, + "alpha": alpha1, + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }) + + square_op1 = OpConfig(type="square", + inputs={"X": ["matmul1_output"]}, + outputs={"Out": ["square1_output"]}, + attrs={}) + + square_op2 = OpConfig(type="square", + inputs={"X": ["input_data1"]}, + outputs={"Out": ["square2_output"]}, + attrs={}) + + square_op3 = OpConfig(type="square", + inputs={"X": ["input_data2"]}, + outputs={"Out": ["square3_output"]}, + attrs={}) + + matmul_op2 = OpConfig(type="matmul", + inputs={ + "X": ["square2_output"], + "Y": ["square3_output"] + }, + outputs={"Out": ["matmul2_output"]}, + attrs={ + "transpose_X": transpose_X, + "transpose_Y": transpose_Y, + "alpha": alpha2, + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }) + + elt_sub_op = OpConfig(type="elementwise_sub", + inputs={ + "X": ["square1_output"], + "Y": ["matmul2_output"] + }, + outputs={"Out": ["sub_out"]}, + attrs={"axis": axis1}) if has_str_value: - fill_constant_op = OpConfig( - type="fill_constant", - inputs={}, - outputs={"Out": ["constant_out"]}, - attrs={ - "dtype": 5, - "place_type": place_type, - "str_value": str_value, - "value": value, - "shape": shape - }) + fill_constant_op = OpConfig(type="fill_constant", + inputs={}, + outputs={"Out": ["constant_out"]}, + attrs={ + "dtype": 5, + "place_type": place_type, + "str_value": str_value, + "value": value, + "shape": shape + }) else: - fill_constant_op = OpConfig( - type="fill_constant", - inputs={}, - outputs={"Out": ["constant_out"]}, - attrs={ - "dtype": 5, - "place_type": place_type, - "value": value, - "shape": shape - }) - - elt_mul_op = OpConfig( - type="elementwise_mul", - inputs={"X": ["sub_out"], - "Y": ["constant_out"]}, - outputs={"Out": ["mul_out"]}, - attrs={"axis": axis2}) + fill_constant_op = OpConfig(type="fill_constant", + inputs={}, + outputs={"Out": ["constant_out"]}, + attrs={ + "dtype": 5, + "place_type": place_type, + "value": value, + "shape": shape + }) + + elt_mul_op = OpConfig(type="elementwise_mul", + inputs={ + "X": ["sub_out"], + "Y": ["constant_out"] + }, + outputs={"Out": ["mul_out"]}, + attrs={"axis": axis2}) model_net = [ matmul_op1, square_op1, square_op2, square_op3, matmul_op2, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py index 6d9457f3575..e06a242395f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_squeeze2_matmul_fuse_pass.py @@ -50,9 +50,9 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): def sample_program_config(self, draw): # 1. Generate shape of input:X of squeeze2 x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) # axes of squeeze2 == [2, 3] x_shape += [1, 1] axes = [2, 3] @@ -64,9 +64,9 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of matmul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = x_shape[1] # 4. Generate legal attr:axis of elementwise_add @@ -74,13 +74,19 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): if axis == 0 or axis == -1: if draw(st.booleans()): if axis == 0: - bias_shape = [x_shape[0], ] + bias_shape = [ + x_shape[0], + ] else: - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] else: bias_shape = [x_shape[0], y_shape[1]] elif axis == 1: - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] if draw(st.integers(min_value=1, max_value=10)) <= 1: bias_shape[-1] = 1 @@ -89,14 +95,21 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): squeeze2_op = OpConfig( "squeeze2", - inputs={"X": ["squeeze2_x"], }, + inputs={ + "X": ["squeeze2_x"], + }, axes=axes, - outputs={"Out": ["squeeze2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["squeeze2_out"], + "XShape": ["xshape"] + }, + ) matmul_op = OpConfig( "matmul", - inputs={"X": ["squeeze2_out"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["squeeze2_out"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -106,14 +119,18 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["matmul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["matmul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [squeeze2_op, matmul_op, add_op] @@ -124,8 +141,11 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"squeeze2_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "squeeze2_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) else: program_config = ProgramConfig( ops=ops, @@ -135,15 +155,15 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=50, - max_duration=1000, - passes=["gpu_cpu_squeeze2_matmul_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=50, + max_duration=1000, + passes=["gpu_cpu_squeeze2_matmul_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py index 64c3042b63c..198c4e5c742 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py @@ -37,12 +37,14 @@ class TestTransposeFlattenConcatFusePass(PassAutoScanTest): """ def sample_predictor_configs(self, program_config): - # TRT + # TRT # after tensorrt_subgraph_pass ,The pass needs to be deleted on TRT # for gpu config = self.create_inference_config(use_gpu=True) - yield config, ["fusion_transpose_flatten_concat", ], (1e-5, 1e-5) + yield config, [ + "fusion_transpose_flatten_concat", + ], (1e-5, 1e-5) def is_program_valid(self, prog_config): concat_axis = prog_config.ops[-1].attrs["axis"] @@ -96,36 +98,39 @@ class TestTransposeFlattenConcatFusePass(PassAutoScanTest): if draw(st.booleans()): trans_axis[j], trans_axis[-1] = trans_axis[-1], trans_axis[j] # Generate axis of flatten - flatten_axis = draw( - st.integers( - min_value=0, max_value=x_shape_rank - 1)) + flatten_axis = draw(st.integers(min_value=0, + max_value=x_shape_rank - 1)) for i in range(times): # Generate x_shape of transpose x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=10), - min_size=x_shape_rank, - max_size=x_shape_rank)) + st.lists(st.integers(min_value=1, max_value=10), + min_size=x_shape_rank, + max_size=x_shape_rank)) str_i = str(i) transpose_op = OpConfig( "transpose2", - inputs={"X": ["transpose2_x" + str_i], }, + inputs={ + "X": ["transpose2_x" + str_i], + }, axis=trans_axis, outputs={ "Out": ["trans_out" + str_i], "XShape": ["trans_shape" + str_i] - }, ) + }, + ) ops.append(transpose_op) flatten_op = OpConfig( "flatten2", - inputs={"X": ["trans_out" + str_i], }, + inputs={ + "X": ["trans_out" + str_i], + }, axis=flatten_axis, outputs={ "Out": ["flatten2_out" + str_i], "XShape": ["xshape" + str_i] - }, ) + }, + ) concat_input.append("flatten2_out" + str_i) ops.append(flatten_op) inputs["transpose2_x" + str_i] = TensorConfig(shape=x_shape) @@ -137,7 +142,8 @@ class TestTransposeFlattenConcatFusePass(PassAutoScanTest): "AxisTensor": [], }, outputs={"Out": ["concat_out"]}, - axis=concat_axis, ) + axis=concat_axis, + ) ops.append(concat_op) @@ -145,14 +151,14 @@ class TestTransposeFlattenConcatFusePass(PassAutoScanTest): ops=ops, weights={}, inputs=inputs, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=300, - passes=["transpose_flatten_concat_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=300, + passes=["transpose_flatten_concat_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py index a1f15de4880..7ba824360ad 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig class TensorRTSubgraphPassActivationTest(InferencePassTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -32,8 +33,9 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest): def setUp(self): self.setUpTensorRTParam() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 32, 32], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 32, 32], + dtype="float32") act_out = self.append_act(data) out = fluid.layers.batch_norm(act_out, is_test=True) self.feeds = { @@ -58,52 +60,62 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest): class TensorRTSubgraphPassLeakyReluTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.leaky_relu(x) class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.relu6(x) class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.softmax(x) class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.sigmoid(x) class TensorRTSubgraphPassHardSwishTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.hard_swish(x) class TensorRTSubgraphPassHardSigmoidTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.hard_sigmoid(x) -class TensorRTSubgraphPassHardSwishPluginTest( - TensorRTSubgraphPassActivationTest): +class TensorRTSubgraphPassHardSwishPluginTest(TensorRTSubgraphPassActivationTest + ): + def append_act(self, x): return fluid.layers.hard_swish(x, threshold=4.0, scale=8.0) class TensorRTSubgraphPassClipTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.clip(x, 0, 1) class TensorRTSubgraphPassTanhTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.tanh(x) class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -115,6 +127,7 @@ class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassSwishFp16SerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -126,20 +139,21 @@ class TensorRTSubgraphPassSwishFp16SerializeTest( class TensorRTSubgraphPassDynamicSwishFp16SerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.swish(x) class TensorRTSubgraphPassMishTest(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -151,6 +165,7 @@ class TensorRTSubgraphPassMishTest(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassMishFp16SerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -162,49 +177,53 @@ class TensorRTSubgraphPassMishFp16SerializeTest( class TensorRTSubgraphPassDynamicMishFp16SerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.mish(x) class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.prelu(x, mode='all') class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.prelu(x, mode='channel') class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.prelu(x, mode='element') class TensorRTSubgraphPassPreluDynamicTest(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.prelu(x, mode='all') class TensorRTSubgraphPassPreluFp16Test(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -216,6 +235,7 @@ class TensorRTSubgraphPassPreluFp16Test(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassPreluFp16SerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -227,14 +247,14 @@ class TensorRTSubgraphPassPreluFp16SerializeTest( class TensorRTSubgraphPassPreluFp16DynamicTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.prelu(x, mode='all') @@ -242,39 +262,41 @@ class TensorRTSubgraphPassPreluFp16DynamicTest( class TensorRTSubgraphPassPreluFp16DynamicSerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.prelu(x, mode='all') class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest): + def append_act(self, x): return fluid.layers.gelu(x) class TensorRTSubgraphPassGeluDynamicTest(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.gelu(x) class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -286,6 +308,7 @@ class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassGeluFp16SerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( @@ -295,16 +318,16 @@ class TensorRTSubgraphPassGeluFp16SerializeTest( return fluid.layers.gelu(x) -class TensorRTSubgraphPassGeluFp16DynamicTest( - TensorRTSubgraphPassActivationTest): +class TensorRTSubgraphPassGeluFp16DynamicTest(TensorRTSubgraphPassActivationTest + ): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.gelu(x) @@ -312,14 +335,14 @@ class TensorRTSubgraphPassGeluFp16DynamicTest( class TensorRTSubgraphPassGeluFp16DynamicSerializeTest( TensorRTSubgraphPassActivationTest): + def setUpTensorRTParam(self): self.enable_trt = True self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) self.dynamic_shape_params = TensorRTSubgraphPassActivationTest.DynamicShapeParam( - { - 'data': [1, 6, 8, 8] - }, {'data': [1, 6, 128, 128]}, {'data': [1, 6, 64, 64]}, False) + {'data': [1, 6, 8, 8]}, {'data': [1, 6, 128, 128]}, + {'data': [1, 6, 64, 64]}, False) def append_act(self, x): return fluid.layers.gelu(x) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py index 90cdf784b1f..9bfd9edfe98 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py @@ -25,6 +25,7 @@ from paddle.fluid.core import AnalysisConfig class TRTAffineChannelTest(InferencePassTest): + def setUp(self): self.bs = 2 self.channel = 8 @@ -36,7 +37,7 @@ class TRTAffineChannelTest(InferencePassTest): self.enable_trt = True def build(self): - # set min_graph_size to 2, + # set min_graph_size to 2, # because affine channel doesn't support nhwc format self.trt_parameters = InferencePassTest.TensorRTParam( 1 << 30, self.bs, 2, self.precision, self.serialize, False) @@ -62,7 +63,9 @@ class TRTAffineChannelTest(InferencePassTest): out = fluid.layers.batch_norm(affine_channel_out, is_test=True) shape[0] = self.bs - self.feeds = {'in': np.random.random(shape).astype('float32'), } + self.feeds = { + 'in': np.random.random(shape).astype('float32'), + } self.fetch_list = [out] def check_output(self): @@ -99,9 +102,8 @@ class TRTAffineChannelTest(InferencePassTest): max_shape = [self.bs, self.height * 2, self.width * 2, self.channel] opt_shape = [self.bs, self.height, self.width, self.channel] - dynamic_shape_profile = InferencePassTest.DynamicShapeParam({ - 'in': min_shape - }, {'in': max_shape}, {'in': opt_shape}, False) + dynamic_shape_profile = InferencePassTest.DynamicShapeParam( + {'in': min_shape}, {'in': max_shape}, {'in': opt_shape}, False) dynamic_shape_opt = [None, dynamic_shape_profile] for precision, serialize, dynamic_shape in itertools.product( @@ -123,10 +125,10 @@ class TRTAffineChannelTest(InferencePassTest): self.run_test() def test_dynamic(self): - self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({ - 'in': [self.bs, self.channel, self.height // 2, self.width // 2] - }, {'in': [self.bs, self.channel, self.height * 2, self.width * 2] - }, {'in': [self.bs, self.channel, self.height, self.width]}, False) + self.dynamic_shape_params = InferencePassTest.DynamicShapeParam( + {'in': [self.bs, self.channel, self.height // 2, self.width // 2]}, + {'in': [self.bs, self.channel, self.height * 2, self.width * 2]}, + {'in': [self.bs, self.channel, self.height, self.width]}, False) self.run_test() def test_nchw_all(self): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py index 1d6f1c2c459..e3cecbe4119 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py @@ -25,6 +25,7 @@ from paddle.fluid.core import AnalysisConfig class TRTAnchorGeneratorBaseTest(InferencePassTest): + def setUp(self): self.bs = 1 self.channel = 16 @@ -49,10 +50,9 @@ class TRTAnchorGeneratorBaseTest(InferencePassTest): 1 << 30, self.bs, min_graph_size, self.precision, self.serialize, False) with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name='data', - shape=[-1, self.channel, self.height, self.width], - dtype='float32') + data = fluid.data(name='data', + shape=[-1, self.channel, self.height, self.width], + dtype='float32') anchor, var = fluid.layers.detection.anchor_generator( data, anchor_sizes=self.anchor_sizes, @@ -70,11 +70,12 @@ class TRTAnchorGeneratorBaseTest(InferencePassTest): self.check_output() def set_dynamic(self): - self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({ - 'data': [self.bs, self.channel, self.height // 2, self.width // 2] - }, { - 'data': [self.bs, self.channel, self.height, self.width] - }, {'data': [self.bs, self.channel, self.height, self.width]}, False) + self.dynamic_shape_params = InferencePassTest.DynamicShapeParam( + { + 'data': + [self.bs, self.channel, self.height // 2, self.width // 2] + }, {'data': [self.bs, self.channel, self.height, self.width]}, + {'data': [self.bs, self.channel, self.height, self.width]}, False) def test_base(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py index 8bca7af3f0d..2fdacdc1e4e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py @@ -24,22 +24,23 @@ from paddle.fluid.core import AnalysisConfig class TensorRTSubgraphPassConv3dTest(InferencePassTest): + def setUp(self): self.init_params() self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 6, 32, 32], dtype="float32") - conv_out = fluid.layers.conv3d( - input=data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - use_cudnn=self.use_cudnn, - stride=self.stride, - act=None) + data = fluid.data(name="data", + shape=[-1, 3, 6, 32, 32], + dtype="float32") + conv_out = fluid.layers.conv3d(input=data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + stride=self.stride, + act=None) self.feeds = { "data": np.random.random([1, 3, 6, 32, 32]).astype("float32"), } @@ -69,8 +70,9 @@ class TensorRTSubgraphPassConv3dTest(InferencePassTest): PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) -class TensorRTSubgraphPassConv3dValidPaddingTest( - TensorRTSubgraphPassConv3dTest): +class TensorRTSubgraphPassConv3dValidPaddingTest(TensorRTSubgraphPassConv3dTest + ): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -79,6 +81,7 @@ class TensorRTSubgraphPassConv3dValidPaddingTest( class TensorRTSubgraphPassConv3dSamePaddingTest(TensorRTSubgraphPassConv3dTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -87,6 +90,7 @@ class TensorRTSubgraphPassConv3dSamePaddingTest(TensorRTSubgraphPassConv3dTest): class TensorRTSubgraphPassConv3dPaddingTest(TensorRTSubgraphPassConv3dTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -95,6 +99,7 @@ class TensorRTSubgraphPassConv3dPaddingTest(TensorRTSubgraphPassConv3dTest): class TensorRTSubgraphPassConv3dStrideTest(TensorRTSubgraphPassConv3dTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -104,21 +109,22 @@ class TensorRTSubgraphPassConv3dStrideTest(TensorRTSubgraphPassConv3dTest): class DynamicShapeTensorRTSubgraphPassConv3dTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, -1, -1, -1], dtype="float32") - conv_out = fluid.layers.conv3d( - input=data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - use_cudnn=self.use_cudnn, - stride=self.stride, - act=None) + data = fluid.data(name="data", + shape=[-1, 6, -1, -1, -1], + dtype="float32") + conv_out = fluid.layers.conv3d(input=data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + stride=self.stride, + act=None) self.feeds = { "data": np.random.random([1, 6, 32, 32, 8]).astype("float32"), } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_transpose_op.py index dfec7ef9b4d..6ada9edd18e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_transpose_op.py @@ -24,11 +24,13 @@ from paddle.fluid.core import AnalysisConfig class TensorRTSubgraphPassConv3dTransposeTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 4, 4, 32, 32], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 4, 4, 32, 32], + dtype="float32") conv_out = fluid.layers.conv3d_transpose( input=data, num_filters=self.conv_num_filters, @@ -64,6 +66,7 @@ class TensorRTSubgraphPassConv3dTransposeTest(InferencePassTest): class TensorRTSubgraphPassConv3dTransposeSamePaddingTest( TensorRTSubgraphPassConv3dTransposeTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -74,6 +77,7 @@ class TensorRTSubgraphPassConv3dTransposeSamePaddingTest( class TensorRTSubgraphPassConv3dTransposeMultigroupTest( TensorRTSubgraphPassConv3dTransposeTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -83,11 +87,13 @@ class TensorRTSubgraphPassConv3dTransposeMultigroupTest( class DynamicShapeTensorRTSubgraphPassConv3dTransposeTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, -1, -1, -1], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, -1, -1, -1], + dtype="float32") conv_out = fluid.layers.conv3d_transpose( input=data, num_filters=self.conv_num_filters, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py index ebbf724d0b4..a934c264e47 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py @@ -24,20 +24,21 @@ from paddle.fluid.core import AnalysisConfig class TensorRTSubgraphPassConvTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") - conv_out = fluid.layers.conv2d( - input=data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - use_cudnn=self.use_cudnn, - act=None) + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") + conv_out = fluid.layers.conv2d(input=data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + act=None) self.feeds = { "data": np.random.random([1, 6, 64, 64]).astype("float32"), } @@ -62,6 +63,7 @@ class TensorRTSubgraphPassConvTest(InferencePassTest): class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -71,6 +73,7 @@ class TensorRTSubgraphPassConvValidPaddingTest(TensorRTSubgraphPassConvTest): class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -80,6 +83,7 @@ class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest): class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -89,6 +93,7 @@ class TensorRTSubgraphPassDepthwiseConvTest(TensorRTSubgraphPassConvTest): class TensorRTSubgraphPassDepthwiseConv2Test(TensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 12 self.conv_filter_size = 6 @@ -98,11 +103,13 @@ class TensorRTSubgraphPassDepthwiseConv2Test(TensorRTSubgraphPassConvTest): class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") conv_out = fluid.layers.conv2d_transpose( input=data, num_filters=self.conv_num_filters, @@ -137,6 +144,7 @@ class TensorRTSubgraphPassConvTransposeTest(InferencePassTest): class TensorRTSubgraphPassConvTransposeValidPaddingTest( TensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -147,6 +155,7 @@ class TensorRTSubgraphPassConvTransposeValidPaddingTest( class TensorRTSubgraphPassConvTransposeSamePaddingTest( TensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -157,6 +166,7 @@ class TensorRTSubgraphPassConvTransposeSamePaddingTest( class TensorRTSubgraphPassConvTransposeMultiGroupTest( TensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 @@ -167,6 +177,7 @@ class TensorRTSubgraphPassConvTransposeMultiGroupTest( class TensorRTSubgraphPassConvTranspose2Test( TensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 12 self.conv_filter_size = 4 @@ -177,6 +188,7 @@ class TensorRTSubgraphPassConvTranspose2Test( class TensorRTSubgraphPassDepthwiseConvTransposeTest( TensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 4 @@ -186,21 +198,22 @@ class TensorRTSubgraphPassDepthwiseConvTransposeTest( class DynamicShapeTensorRTSubgraphPassConvTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, -1, -1], dtype="float32") - conv_out = fluid.layers.conv2d( - input=data, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - use_cudnn=self.use_cudnn, - stride=self.stride, - act=None) + data = fluid.data(name="data", + shape=[-1, 6, -1, -1], + dtype="float32") + conv_out = fluid.layers.conv2d(input=data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + stride=self.stride, + act=None) self.feeds = { "data": np.random.random([32, 6, 64, 64]).astype("float32"), } @@ -241,6 +254,7 @@ class DynamicShapeTensorRTSubgraphPassConvTest(InferencePassTest): class DynamicShapeTensorRTSubgraphPassDepthwiseConvTransposeTest( DynamicShapeTensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 6 self.conv_filter_size = 6 diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py index eacdb269689..f800d2fc3f4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py @@ -25,24 +25,25 @@ from paddle.fluid.core import AnalysisConfig class QuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest): + def setUp(self): self.set_params() def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) - conv_out = fluid.layers.conv2d( - input=data_reshape, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - use_cudnn=self.use_cudnn, - act=None) + conv_out = fluid.layers.conv2d(input=data_reshape, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + act=None) if self.conv_padding == [1, 1]: cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816]) elif self.conv_padding == 'VALID': @@ -87,14 +88,17 @@ class QuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest): def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1e-1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) class QuantDequantTensorRTSubgraphPassConvValidPaddingTest( QuantDequantTensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 64 self.conv_filter_size = 4 @@ -105,6 +109,7 @@ class QuantDequantTensorRTSubgraphPassConvValidPaddingTest( class QuantDequantTensorRTSubgraphPassConvSamePaddingTest( QuantDequantTensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 64 self.conv_filter_size = 4 @@ -115,6 +120,7 @@ class QuantDequantTensorRTSubgraphPassConvSamePaddingTest( class QuantDequantTensorRTSubgraphPassDWConvTest( QuantDequantTensorRTSubgraphPassConvTest): + def set_params(self): self.conv_num_filters = 64 self.conv_filter_size = 4 @@ -124,24 +130,25 @@ class QuantDequantTensorRTSubgraphPassDWConvTest( class DynamicShapeQuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest): + def setUp(self): self.set_params() def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) - conv_out = fluid.layers.conv2d( - input=data_reshape, - num_filters=self.conv_num_filters, - filter_size=self.conv_filter_size, - groups=self.conv_groups, - padding=self.conv_padding, - bias_attr=False, - use_cudnn=self.use_cudnn, - act=None) + conv_out = fluid.layers.conv2d(input=data_reshape, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + groups=self.conv_groups, + padding=self.conv_padding, + bias_attr=False, + use_cudnn=self.use_cudnn, + act=None) cout = fluid.layers.reshape(conv_out, shape=[1, 1, 10816]) result = fluid.layers.relu(cout) loss = fluid.layers.cross_entropy(input=result, label=label_shape) @@ -199,19 +206,23 @@ class DynamicShapeQuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest): def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1e-1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) class QuantDequantTensorRTSubgraphPassConvTransposeTest(QuantDequantTest): + def setUp(self): self.set_params() def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') data_reshape = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) @@ -268,14 +279,17 @@ class QuantDequantTensorRTSubgraphPassConvTransposeTest(QuantDequantTest): def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1e-1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) class QuantDequantTensorRTSubgraphPassConvTransValidPaddingTest( QuantDequantTensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 64 self.conv_filter_size = 4 @@ -286,6 +300,7 @@ class QuantDequantTensorRTSubgraphPassConvTransValidPaddingTest( class QuantDequantTensorRTSubgraphPassConvTransSamePaddingTest( QuantDequantTensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 64 self.conv_filter_size = 4 @@ -296,6 +311,7 @@ class QuantDequantTensorRTSubgraphPassConvTransSamePaddingTest( class QuantDequantTensorRTSubgraphPassTransDWConvTest( QuantDequantTensorRTSubgraphPassConvTransposeTest): + def set_params(self): self.conv_num_filters = 64 self.conv_filter_size = 4 diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py index c7f724bdaae..a7532ff3e73 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_activation.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertActivationTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): if dims == 1: return np.ones([32]).astype(np.float32) @@ -58,7 +60,8 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, dims, batch, dics)) }, outputs=["output_data"]) @@ -67,6 +70,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} @@ -102,8 +106,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -118,11 +121,11 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_affine_channel.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_affine_channel.py index 33eb90b9f91..c5958f93ef8 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_affine_channel.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_affine_channel.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertAffineChannelTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(batch, dims, attrs: List[Dict[str, Any]]): if dims == 2: return np.ones([batch, 64]).astype(np.float32) @@ -65,13 +67,16 @@ class TrtConvertAffineChannelTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "scale": TensorConfig(data_gen=partial( - generate_weight1, dims, dics)), - "bias": TensorConfig(data_gen=partial( - generate_weight1, dims, dics)) + "scale": + TensorConfig( + data_gen=partial(generate_weight1, dims, dics)), + "bias": + TensorConfig( + data_gen=partial(generate_weight1, dims, dics)) }, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, batch, dims, dics)) }, outputs=["output_data"]) @@ -80,6 +85,7 @@ class TrtConvertAffineChannelTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 2: self.dynamic_shape.min_input_shape = {"input_data": [1, 32]} @@ -119,8 +125,7 @@ class TrtConvertAffineChannelTest(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -135,11 +140,11 @@ class TrtConvertAffineChannelTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py index 2dd380c53af..0a2877b9a23 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_anchor_generator.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(batch, attrs: List[Dict[str, Any]]): return np.random.random([batch, 3, 64, 64]).astype(np.float32) @@ -61,9 +63,9 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1, - batch, dics)) + "input_data": + TensorConfig(data_gen=partial( + generate_input1, batch, dics)) }, outputs=[ "output_anchors", "output_variances" @@ -73,6 +75,7 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -90,8 +93,7 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): return 0, 4 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -106,11 +108,11 @@ class TrtConvertAnchorGeneratorTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py index 719e4488569..82ac600fd1e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from typing import List class TrtConvertArgMaxTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: input_shape = program_config.inputs["arg_max_input"].shape axis = program_config.ops[0].attrs["axis"] @@ -32,6 +33,7 @@ class TrtConvertArgMaxTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input(rank, batch): dims = [batch] for i in range(rank - 1): @@ -65,7 +67,8 @@ class TrtConvertArgMaxTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "arg_max_input": TensorConfig(data_gen=partial( + "arg_max_input": + TensorConfig(data_gen=partial( generate_input, rank, batch)) }, outputs=["arg_max_out"]) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_batch_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_batch_norm.py index 899cf0e2639..fa73ab7c62e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_batch_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_batch_norm.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertBatchNormTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: if attrs[0]['data_layout'] == "NCHW": @@ -82,30 +84,40 @@ class TrtConvertBatchNormTest(TrtLayerAutoScanTest): "Variance": ["Variance"] }] dics_intputs = [{ - "Bias": TensorConfig(data_gen=partial( + "Bias": + TensorConfig(data_gen=partial( generate_bias, dics, batch)), - "Mean": TensorConfig(data_gen=partial( + "Mean": + TensorConfig(data_gen=partial( generate_mean, dics, batch)), - "Scale": TensorConfig(data_gen=partial( + "Scale": + TensorConfig(data_gen=partial( generate_scale, dics, batch)), - "Variance": TensorConfig(data_gen=partial( + "Variance": + TensorConfig(data_gen=partial( generate_variance, dics, batch)), "MomentumTensor": TensorConfig(data_gen=partial( generate_MomentumTensor, dics, batch)), }, { - "Bias": TensorConfig(data_gen=partial( + "Bias": + TensorConfig(data_gen=partial( generate_bias, dics, batch)), - "Mean": TensorConfig(data_gen=partial( + "Mean": + TensorConfig(data_gen=partial( generate_mean, dics, batch)), - "Scale": TensorConfig(data_gen=partial( + "Scale": + TensorConfig(data_gen=partial( generate_scale, dics, batch)), - "Variance": TensorConfig(data_gen=partial( + "Variance": + TensorConfig(data_gen=partial( generate_variance, dics, batch)) }] ops_config = [{ - "op_type": "batch_norm", - "op_inputs": dics_intput[num_input], + "op_type": + "batch_norm", + "op_inputs": + dics_intput[num_input], "op_outputs": { "Y": ["batch_norm_out"], "MeanOut": ["Mean"], @@ -113,16 +125,17 @@ class TrtConvertBatchNormTest(TrtLayerAutoScanTest): "SavedMean": ["SavedMean"], "SavedVariance": ["SavedVariance"] }, - "op_attrs": dics[0] + "op_attrs": + dics[0] }] ops = self.generate_op_config(ops_config) program_config = ProgramConfig( ops=ops, weights=dics_intputs[num_input], inputs={ - "batch_norm_input": TensorConfig( - data_gen=partial(generate_input1, - dics, batch)) + "batch_norm_input": + TensorConfig(data_gen=partial( + generate_input1, dics, batch)) }, outputs=["batch_norm_out"]) @@ -130,6 +143,7 @@ class TrtConvertBatchNormTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: if attrs[0]['data_layout'] == "NCHW": @@ -182,8 +196,7 @@ class TrtConvertBatchNormTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -197,13 +210,14 @@ class TrtConvertBatchNormTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if len(program_config.weights) == 5: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py index 1277cde011c..aec2f3efd4f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_clip.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertClipTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): if dims == 1: return np.ones([32]).astype(np.float32) @@ -72,13 +74,16 @@ class TrtConvertClipTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "Min_": TensorConfig(data_gen=partial( - generate_weight1, dics)), - "Max_": TensorConfig(data_gen=partial( - generate_weight2, dics)) + "Min_": + TensorConfig( + data_gen=partial(generate_weight1, dics)), + "Max_": + TensorConfig( + data_gen=partial(generate_weight2, dics)) }, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, dims, batch, dics)) }, outputs=["output_data"]) @@ -86,6 +91,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest): yield program_config def sample_predictor_configs(self, program_config): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} @@ -122,8 +128,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -138,11 +143,11 @@ class TrtConvertClipTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_concat.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_concat.py index ebd2f7724da..e8c9a65bbfc 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_concat.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_concat.py @@ -22,14 +22,14 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertConcatTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights outputs = program_config.outputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] #The input dimension should be less than or equal to the set axis. if len(inputs['concat_input1'].shape) <= attrs[0]['axis']: @@ -38,6 +38,7 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 3, 24, 24]).astype(np.float32) @@ -79,33 +80,36 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest): self.dims = dims dics = [{"axis": axis}, {}] dics_intput = [{ - "X": [ - "concat_input1", "concat_input2", - "concat_input3" - ], + "X": + ["concat_input1", "concat_input2", "concat_input3"], "AxisTensor": ["AxisTensor"], }, { - "X": [ - "concat_input1", "concat_input2", - "concat_input3" - ] + "X": + ["concat_input1", "concat_input2", "concat_input3"] }] dics_inputs = [{ - "concat_input1": TensorConfig(data_gen=partial( - generate_input1, dics, batch)), - "concat_input2": TensorConfig(data_gen=partial( - generate_input2, dics, batch)), - "concat_input3": TensorConfig(data_gen=partial( - generate_input3, dics, batch)), - "AxisTensor": TensorConfig(data_gen=partial( - generate_weight1, dics)) + "concat_input1": + TensorConfig( + data_gen=partial(generate_input1, dics, batch)), + "concat_input2": + TensorConfig( + data_gen=partial(generate_input2, dics, batch)), + "concat_input3": + TensorConfig( + data_gen=partial(generate_input3, dics, batch)), + "AxisTensor": + TensorConfig( + data_gen=partial(generate_weight1, dics)) }, { - "concat_input1": TensorConfig(data_gen=partial( - generate_input1, dics, batch)), - "concat_input2": TensorConfig(data_gen=partial( - generate_input2, dics, batch)), - "concat_input3": TensorConfig(data_gen=partial( - generate_input3, dics, batch)) + "concat_input1": + TensorConfig( + data_gen=partial(generate_input1, dics, batch)), + "concat_input2": + TensorConfig( + data_gen=partial(generate_input2, dics, batch)), + "concat_input3": + TensorConfig( + data_gen=partial(generate_input3, dics, batch)) }] ops_config = [{ "op_type": "concat", @@ -126,6 +130,7 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.num_input == 0: if self.dims == 4: @@ -285,8 +290,7 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest): return 0, 5 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -300,13 +304,14 @@ class TrtConvertConcatTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if len(program_config.inputs) == 4: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py index 84ef5b4da68..13b9fb6f5c9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,16 +22,16 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertConv2dTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[ - 1] * attrs[0]['groups']: + if inputs['input_data'].shape[ + 1] != weights['conv2d_weight'].shape[1] * attrs[0]['groups']: return False ver = paddle_infer.get_trt_compile_version() @@ -46,8 +46,8 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): self.trt_param.workspace_size = 1073741824 def generate_input1(batch, attrs: List[Dict[str, Any]]): - return np.ones( - [batch, attrs[0]['groups'] * 3, 64, 64]).astype(np.float32) + return np.ones([batch, attrs[0]['groups'] * 3, 64, + 64]).astype(np.float32) def generate_weight1(attrs: List[Dict[str, Any]]): return np.random.random([24, 3, 3, 3]).astype(np.float32) @@ -111,6 +111,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): input_groups = attrs[0]['groups'] * 3 self.dynamic_shape.min_input_shape = { @@ -135,8 +136,7 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -154,8 +154,8 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-5) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_fusion.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_fusion.py index 8a9a9909571..1a36ea12e86 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_fusion.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_fusion.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,16 +22,16 @@ import unittest class TrtConvertConv2dFusionTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[ - 1] * attrs[0]['groups']: + if inputs['input_data'].shape[ + 1] != weights['conv2d_weight'].shape[1] * attrs[0]['groups']: return False if attrs[0]['groups'] <= 1: @@ -49,8 +49,8 @@ class TrtConvertConv2dFusionTest(TrtLayerAutoScanTest): self.trt_param.workspace_size = 1073741824 def generate_input1(batch, attrs: List[Dict[str, Any]]): - return np.ones( - [batch, attrs[0]['groups'] * 3, 64, 64]).astype(np.float32) + return np.ones([batch, attrs[0]['groups'] * 3, 64, + 64]).astype(np.float32) def generate_weight1(attrs: List[Dict[str, Any]]): return np.random.random([24, 3, 3, 3]).astype(np.float32) @@ -108,9 +108,9 @@ class TrtConvertConv2dFusionTest(TrtLayerAutoScanTest): "conv2d_weight": TensorConfig(data_gen=partial( generate_weight1, dics)), - "elementwise_weight": TensorConfig( - data_gen=partial( - generate_weight2, dics)) + "elementwise_weight": + TensorConfig(data_gen=partial( + generate_weight2, dics)) }, inputs={ "input_data": @@ -123,6 +123,7 @@ class TrtConvertConv2dFusionTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): input_groups = attrs[0]['groups'] * 3 self.dynamic_shape.min_input_shape = { @@ -147,8 +148,7 @@ class TrtConvertConv2dFusionTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -166,8 +166,8 @@ class TrtConvertConv2dFusionTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-5) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py index 65fc35f9c56..0db05156051 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,16 +22,16 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[ - 1] * attrs[0]['groups']: + if inputs['input_data'].shape[ + 1] != weights['conv2d_weight'].shape[1] * attrs[0]['groups']: return False if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[0]: @@ -54,12 +54,12 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): def generate_weight1(num_channels, attrs: List[Dict[str, Any]]): if attrs[0]['groups'] == 1: - return np.random.random( - [num_channels, num_channels, 3, 3]).astype(np.float32) + return np.random.random([num_channels, num_channels, 3, + 3]).astype(np.float32) else: return np.random.random( - [num_channels, int(num_channels / 2), 3, 3]).astype( - np.float32) + [num_channels, int(num_channels / 2), 3, + 3]).astype(np.float32) for num_channels in [2, 4, 6]: for batch in [1, 2, 4]: @@ -120,6 +120,7 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.num_channels == 2: self.dynamic_shape.min_input_shape = { @@ -170,8 +171,7 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -189,8 +189,8 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-3) @@ -199,6 +199,7 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): # attrs, True), (1e-5, 1e-5) def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if self.trt_param.precision == paddle_infer.PrecisionType.Int8: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_deformable_conv.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_deformable_conv.py index c692e92861b..d0d8e35fdd0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_deformable_conv.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_deformable_conv.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,23 +22,23 @@ import unittest class TrtConvertDeformableConvTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if inputs['input_data'].shape[1] != weights['filter_data'].shape[ - 1] * attrs[0]['groups']: + if inputs['input_data'].shape[ + 1] != weights['filter_data'].shape[1] * attrs[0]['groups']: return False return True def sample_program_configs(self): - def compute_output_size(input_size: List[int], - kernel_sizes: List[int], + + def compute_output_size(input_size: List[int], kernel_sizes: List[int], attrs: List[Dict[str, Any]]): strides = attrs[0]['strides'] paddings = attrs[0]['paddings'] @@ -50,40 +50,40 @@ class TrtConvertDeformableConvTest(TrtLayerAutoScanTest): output_size.append((i + 2 * p - k) // s + 1) return output_size - def generate_input1(batch: int, - input_size: List[int], - kernel_sizes: List[int], - attrs: List[Dict[str, Any]]): + def generate_input1(batch: int, input_size: List[int], + kernel_sizes: List[int], attrs: List[Dict[str, + Any]]): return np.random.random([batch, 3] + input_size).astype(np.float32) - def generate_offset1(batch: int, - input_size: List[int], - kernel_sizes: List[int], - attrs: List[Dict[str, Any]]): + def generate_offset1(batch: int, input_size: List[int], + kernel_sizes: List[int], attrs: List[Dict[str, + Any]]): output_size = compute_output_size(input_size, kernel_sizes, attrs) return np.random.random([batch, 2 * np.prod(kernel_sizes)] + output_size).astype(np.float32) - def generate_mask1(batch: int, - input_size: List[int], - kernel_sizes: List[int], - attrs: List[Dict[str, Any]]): + def generate_mask1(batch: int, input_size: List[int], + kernel_sizes: List[int], attrs: List[Dict[str, + Any]]): output_size = compute_output_size(input_size, kernel_sizes, attrs) return np.random.random([batch, np.prod(kernel_sizes)] + output_size).astype(np.float32) - def generate_filter1(batch: int, - input_size: List[int], - kernel_sizes: List[int], - attrs: List[Dict[str, Any]]): + def generate_filter1(batch: int, input_size: List[int], + kernel_sizes: List[int], attrs: List[Dict[str, + Any]]): return np.random.random([6, 3] + kernel_sizes).astype(np.float32) - for batch in [1, ]: + for batch in [ + 1, + ]: for input_size in [[32, 32]]: for kernel_sizes in [[3, 3]]: for strides in [[1, 1], [2, 2]]: for paddings in [[1, 1], [0, 2]]: - for groups in [1, ]: + for groups in [ + 1, + ]: for dilations in [[1, 1], [2, 2]]: dics = [{ "strides": strides, @@ -126,10 +126,10 @@ class TrtConvertDeformableConvTest(TrtLayerAutoScanTest): TensorConfig(data_gen=partial( generate_offset1, batch, input_size, kernel_sizes, dics)), - "mask_data": TensorConfig( - data_gen=partial( - generate_mask1, batch, - input_size, kernel_sizes, dics)) + "mask_data": + TensorConfig(data_gen=partial( + generate_mask1, batch, input_size, + kernel_sizes, dics)) }, outputs=["output_data"]) @@ -137,6 +137,7 @@ class TrtConvertDeformableConvTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.max_input_shape = {} @@ -150,8 +151,7 @@ class TrtConvertDeformableConvTest(TrtLayerAutoScanTest): return 1, 4 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d.py index b87b33d3557..f4d6a5f1efa 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,16 +22,16 @@ import unittest class TrtConvertDepthwiseConv2dTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[ - 1] * attrs[0]['groups']: + if inputs['input_data'].shape[ + 1] != weights['conv2d_weight'].shape[1] * attrs[0]['groups']: return False return True @@ -99,6 +99,7 @@ class TrtConvertDepthwiseConv2dTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if attrs[0]['groups'] == 1: self.dynamic_shape.min_input_shape = { @@ -149,8 +150,7 @@ class TrtConvertDepthwiseConv2dTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -168,8 +168,8 @@ class TrtConvertDepthwiseConv2dTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-5) @@ -178,6 +178,7 @@ class TrtConvertDepthwiseConv2dTest(TrtLayerAutoScanTest): attrs, True), (1e-5, 1e-5) def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if program_config.ops[0].attrs[ 'padding_algorithm'] == "SAME" or program_config.ops[ diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d_transpose.py index 5f77e7de0df..f32dfdb47c9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_depthwise_conv2d_transpose.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,16 +22,16 @@ import unittest class TrtConvertDepthwiseConv2dTransposeTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[ - 1] * attrs[0]['groups']: + if inputs['input_data'].shape[ + 1] != weights['conv2d_weight'].shape[1] * attrs[0]['groups']: return False if inputs['input_data'].shape[1] != weights['conv2d_weight'].shape[1]: @@ -53,12 +53,12 @@ class TrtConvertDepthwiseConv2dTransposeTest(TrtLayerAutoScanTest): self.trt_param.workspace_size = 1073741824 def generate_input1(batch, attrs: List[Dict[str, Any]]): - return np.ones( - [batch, attrs[0]['groups'], 64, 64]).astype(np.float32) + return np.ones([batch, attrs[0]['groups'], 64, + 64]).astype(np.float32) def generate_weight1(attrs: List[Dict[str, Any]]): - return np.random.random( - [attrs[0]['groups'], 1, 3, 3]).astype(np.float32) + return np.random.random([attrs[0]['groups'], 1, 3, + 3]).astype(np.float32) for batch in [1, 2, 4]: for strides in [[1, 1], [2, 2], [1, 2]]: @@ -111,6 +111,7 @@ class TrtConvertDepthwiseConv2dTransposeTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, attrs[0]['groups'], 32, 32], @@ -134,8 +135,7 @@ class TrtConvertDepthwiseConv2dTransposeTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -153,8 +153,8 @@ class TrtConvertDepthwiseConv2dTransposeTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-5) @@ -163,6 +163,7 @@ class TrtConvertDepthwiseConv2dTransposeTest(TrtLayerAutoScanTest): # attrs, True), (1e-5, 1e-5) def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if self.trt_param.precision == paddle_infer.PrecisionType.Int8: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_dropout.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_dropout.py index f9bb4e66f2a..5d8e93ef984 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_dropout.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_dropout.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertDropoutTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): if dims == 1: return np.ones([64]).astype(np.float32) @@ -70,9 +72,9 @@ class TrtConvertDropoutTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1, - dims, batch, dics)) + "input_data": + TensorConfig(data_gen=partial( + generate_input1, dims, batch, dics)) }, outputs=["dropout_output_data"]) @@ -80,6 +82,7 @@ class TrtConvertDropoutTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} @@ -118,8 +121,7 @@ class TrtConvertDropoutTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -134,11 +136,11 @@ class TrtConvertDropoutTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): pass diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py index 27d8247aded..2fabc601389 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -59,7 +61,8 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): TensorConfig(data_gen=partial(generate_weight)) }, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, shape)), }, outputs=["output_data"]) @@ -68,6 +71,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): # The input.dims[1] must be equal to the weight's length. if self.dims == 1: @@ -104,8 +108,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -120,11 +123,11 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): pass @@ -136,6 +139,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): class TrtConvertElementwiseTest_two_input_without_broadcast( TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs if len(inputs['input_data1'].shape) == 1: @@ -144,6 +148,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -172,9 +177,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( ops=ops, weights={}, inputs={ - "input_data1": TensorConfig( + "input_data1": + TensorConfig( data_gen=partial(generate_input, shape)), - "input_data2": TensorConfig( + "input_data2": + TensorConfig( data_gen=partial(generate_input, shape)) }, outputs=["output_data"]) @@ -183,6 +190,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = { @@ -243,8 +251,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -270,6 +277,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs if len(inputs['input_data1'].shape) != len(inputs['input_data2'].shape): @@ -278,6 +286,7 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -332,9 +341,11 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data1": TensorConfig(data_gen=partial( + "input_data1": + TensorConfig(data_gen=partial( generate_input, input1_shape)), - "input_data2": TensorConfig(data_gen=partial( + "input_data2": + TensorConfig(data_gen=partial( generate_input, input2_shape)) }, outputs=["output_data"]) @@ -343,6 +354,7 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): max_shape = [[128], [128, 128], [128, 128, 128], [128, 128, 128, 128]] @@ -368,8 +380,7 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -396,10 +407,12 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -436,7 +449,8 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): TensorConfig(data_gen=partial(generate_weight)) }, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, shape)), }, outputs=["output_data"]) @@ -445,6 +459,7 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): # The input.dims[1] must be equal to the weight's length. if self.dims == 1: @@ -483,8 +498,7 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -499,13 +513,14 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): input_x_names = program_config.ops[0].inputs["X"] for weight_name in program_config.weights: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py index 1eecf9c0497..36b63be3453 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,13 +22,15 @@ import unittest class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch, input_size): - return np.random.randint( - 0, 7, size=(batch, input_size, 1)).astype(np.int64) + return np.random.randint(0, 7, size=(batch, input_size, + 1)).astype(np.int64) def generate_weight1(size11, size2): return np.random.randn(size11, size2).astype(np.float32) @@ -75,7 +77,8 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): "epsilon": epsilon }] ops_config = [{ - "op_type": type, + "op_type": + type, "op_inputs": { "Ids": ["input_data1"], "W": ["embedding1_weight"] @@ -84,11 +87,12 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): "Out": ["embedding1_output"] }, - "op_attrs": dics[0] - if type == "lookup_table" else - dics[1] + "op_attrs": + dics[0] if type + == "lookup_table" else dics[1] }, { - "op_type": type, + "op_type": + type, "op_inputs": { "Ids": ["input_data2"], "W": ["embedding2_weight"] @@ -97,11 +101,12 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): "Out": ["embedding2_output"] }, - "op_attrs": dics[0] - if type == "lookup_table" else - dics[1] + "op_attrs": + dics[0] if type + == "lookup_table" else dics[1] }, { - "op_type": type, + "op_type": + type, "op_inputs": { "Ids": ["input_data3"], "W": ["embedding3_weight"] @@ -110,9 +115,9 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): "Out": ["embedding3_output"] }, - "op_attrs": dics[0] - if type == "lookup_table" else - dics[1] + "op_attrs": + dics[0] if type + == "lookup_table" else dics[1] }, { "op_type": "elementwise_add", "op_inputs": { @@ -120,39 +125,33 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): "Y": ["embedding3_output"] }, "op_outputs": { - "Out": [ - "elementwise_add1_output" - ] + "Out": + ["elementwise_add1_output"] }, "op_attrs": dics[2] }, { "op_type": "elementwise_add", "op_inputs": { - "X": [ - "elementwise_add1_output" - ], + "X": + ["elementwise_add1_output"], "Y": ["embedding1_output"] }, "op_outputs": { - "Out": [ - "elementwise_add2_output" - ] + "Out": + ["elementwise_add2_output"] }, "op_attrs": dics[3] }, { "op_type": "layer_norm", "op_inputs": { - "X": [ - "elementwise_add2_output" - ], - "Bias": - ["layer_norm_bias"], + "X": + ["elementwise_add2_output"], + "Bias": ["layer_norm_bias"], "Scale": ["layer_norm_scale"] }, "op_outputs": { - "Y": - ["layer_norm_output1"], + "Y": ["layer_norm_output1"], "Mean": ["layer_norm_output2"], "Variance": @@ -193,15 +192,18 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): size2)) }, inputs={ - "input_data1": TensorConfig( + "input_data1": + TensorConfig( data_gen=partial( generate_input, batch, input_size)), - "input_data2": TensorConfig( + "input_data2": + TensorConfig( data_gen=partial( generate_input, batch, input_size)), - "input_data3": TensorConfig( + "input_data3": + TensorConfig( data_gen=partial( generate_input, batch, input_size)) @@ -212,6 +214,7 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data1": [1, 4, 1], @@ -235,8 +238,7 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten.py index 7b0089ab9ab..da947dc35df 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch): return np.random.random([batch, 32]).astype(np.float32) @@ -54,7 +56,8 @@ class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, batch)) }, outputs=["output_data"]) @@ -63,6 +66,7 @@ class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 8]} self.dynamic_shape.max_input_shape = {"input_data": [4, 64]} @@ -90,8 +94,7 @@ class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -105,21 +108,23 @@ class TrtConvertFlattenTest_dim_2(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch): return np.random.random([batch, 32, 64]).astype(np.float32) @@ -148,7 +153,8 @@ class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, batch)) }, outputs=["output_data"]) @@ -157,6 +163,7 @@ class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 8, 8]} self.dynamic_shape.max_input_shape = {"input_data": [4, 64, 768]} @@ -184,8 +191,7 @@ class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -199,21 +205,23 @@ class TrtConvertFlattenTest_dim_3(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch): return np.random.random([batch, 8, 8, 8]).astype(np.float32) @@ -242,7 +250,8 @@ class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, batch)) }, outputs=["output_data"]) @@ -251,6 +260,7 @@ class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 4, 4, 4]} self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 64, 64]} @@ -278,8 +288,7 @@ class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -293,21 +302,23 @@ class TrtConvertFlattenTest_dim_4(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch): return np.random.random([batch, 8, 8, 8]).astype(np.float32) @@ -336,7 +347,8 @@ class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, batch)) }, outputs=["output_data"]) @@ -345,6 +357,7 @@ class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 4, 4, 4]} self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 64, 64]} @@ -372,8 +385,7 @@ class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -387,11 +399,11 @@ class TrtConvertFlattenTest_dim_5(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py index a4060349d4b..406f5e1a13c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_flatten_contiguous_range.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch): return np.random.random([2, batch, 4, 8, 3]).astype(np.float32) @@ -54,7 +56,8 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, batch)) }, outputs=["output_data"]) @@ -62,6 +65,7 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [2, 1, 4, 8, 3]} self.dynamic_shape.max_input_shape = {"input_data": [2, 4, 4, 8, 3]} @@ -86,8 +90,7 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -101,11 +104,11 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather.py index 852bb2ffa84..5405f114651 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,11 +23,11 @@ import unittest class TrtConvertGatherTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if len(inputs['input_data'].shape) <= attrs[0]['axis']: return False @@ -35,6 +35,7 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(shape): return np.random.random(shape).astype(np.float32) @@ -74,16 +75,21 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, shape)), - "index_data": TensorConfig(data_gen=partial( + "index_data": + TensorConfig(data_gen=partial( generate_input2, index)), } if len(input) == 2 else { - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, shape)), - "index_data": TensorConfig(data_gen=partial( + "index_data": + TensorConfig(data_gen=partial( generate_input2, index)), - "axis_data": TensorConfig(data_gen=partial( + "axis_data": + TensorConfig(data_gen=partial( generate_input3, axis)), }, outputs=["output_data"]) @@ -92,6 +98,7 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if len(self.shape) == 1: self.dynamic_shape.min_input_shape = { @@ -161,8 +168,7 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest): return 0, 4 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -188,8 +194,8 @@ class TrtConvertGatherTest(TrtLayerAutoScanTest): def teller1(program_config, predictor_config): if len(self.dynamic_shape.min_input_shape) != 0: inputs = program_config.inputs - if len(inputs['input_data'].shape) == 1 or len(inputs[ - 'index_data'].shape) == 1: + if len(inputs['input_data'].shape) == 1 or len( + inputs['index_data'].shape) == 1: return True return False diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py index 6b6a9536d81..9343f1ebd7c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertGatherNdTest_dim_4_1(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(): return np.random.random([2, 32, 64, 64]).astype(np.float32) @@ -58,6 +60,7 @@ class TrtConvertGatherNdTest_dim_4_1(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 8, 8, 8], @@ -78,8 +81,7 @@ class TrtConvertGatherNdTest_dim_4_1(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -101,10 +103,12 @@ class TrtConvertGatherNdTest_dim_4_1(TrtLayerAutoScanTest): class TrtConvertGatherNdTest_dim_4_1_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(): return np.random.random([2, 32, 64, 64]).astype(np.float32) @@ -137,6 +141,7 @@ class TrtConvertGatherNdTest_dim_4_1_2(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 8, 8, 8], @@ -157,8 +162,7 @@ class TrtConvertGatherNdTest_dim_4_1_2(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -180,10 +184,12 @@ class TrtConvertGatherNdTest_dim_4_1_2(TrtLayerAutoScanTest): class TrtConvertGatherNdTest_dim_4_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(): return np.random.random([2, 32, 64, 64]).astype(np.float32) @@ -216,6 +222,7 @@ class TrtConvertGatherNdTest_dim_4_2(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 8, 8, 8], @@ -236,8 +243,7 @@ class TrtConvertGatherNdTest_dim_4_2(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -259,10 +265,12 @@ class TrtConvertGatherNdTest_dim_4_2(TrtLayerAutoScanTest): class TrtConvertGatherNdTest_dim_4_3(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(): return np.random.random([2, 32, 64, 64]).astype(np.float32) @@ -295,6 +303,7 @@ class TrtConvertGatherNdTest_dim_4_3(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 8, 8, 8], @@ -315,8 +324,7 @@ class TrtConvertGatherNdTest_dim_4_3(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -338,10 +346,12 @@ class TrtConvertGatherNdTest_dim_4_3(TrtLayerAutoScanTest): class TrtConvertGatherNdTest_dim_2_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(): return np.random.random([2, 32]).astype(np.float32) @@ -374,6 +384,7 @@ class TrtConvertGatherNdTest_dim_2_2(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 4], @@ -394,8 +405,7 @@ class TrtConvertGatherNdTest_dim_2_2(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -417,16 +427,18 @@ class TrtConvertGatherNdTest_dim_2_2(TrtLayerAutoScanTest): class TrtConvertGatherNdTest_dim_3_3(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(): return np.random.random([16, 32, 256]).astype(np.float32) def generate_input2(): - return np.array( - [[[2, 5], [3, 8]], [[0, 2], [0, 3]]]).astype(np.int32) + return np.array([[[2, 5], [3, 8]], [[0, 2], [0, + 3]]]).astype(np.int32) ops_config = [{ "op_type": "gather_nd", @@ -454,6 +466,7 @@ class TrtConvertGatherNdTest_dim_3_3(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 4, 4], @@ -474,8 +487,7 @@ class TrtConvertGatherNdTest_dim_3_3(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py index 448e4e3e71b..29f656130f7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertGeluTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(dims, attrs: List[Dict[str, Any]]): if dims == 1: return np.ones([32]).astype(np.float32) @@ -57,8 +59,9 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, dims, dics)) + "input_data": + TensorConfig( + data_gen=partial(generate_input1, dims, dics)) }, outputs=["output_data"]) @@ -66,6 +69,7 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} @@ -112,8 +116,7 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -128,11 +131,11 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py index 203e86c4b25..da65c3d2198 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertGroupNormTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(attrs: List[Dict[str, Any]], batch): if attrs[0]['data_layout'] == 'NCHW': return np.random.random([batch, 32, 64, 64]).astype(np.float32) @@ -70,13 +72,16 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "scale_weight": TensorConfig( + "scale_weight": + TensorConfig( data_gen=partial(generate_scale)), - "bias_weight": TensorConfig( + "bias_weight": + TensorConfig( data_gen=partial(generate_bias)) }, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input, dics, batch)) }, outputs=["y_output"]) @@ -85,6 +90,7 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 32, 32]} self.dynamic_shape.max_input_shape = { @@ -107,8 +113,7 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -130,6 +135,7 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest): attrs, True), (1e-5, 1e-5) def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if len(self.dynamic_shape.min_input_shape) != 0: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_sigmoid.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_sigmoid.py index b3f118e9fbf..0980acccb88 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_sigmoid.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_sigmoid.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -51,7 +53,8 @@ class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( + "input_data": + TensorConfig( data_gen=partial(generate_input, shape)) }, outputs=["output_data"]) @@ -60,6 +63,7 @@ class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.input_dim == 2: self.dynamic_shape.min_input_shape = {"input_data": [1, 8]} @@ -86,8 +90,7 @@ class TrtConvertHardSigmoidTest_dim_2(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_swish.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_swish.py index c092d6da868..5f5664d2aa4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_swish.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_hard_swish.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,12 +22,12 @@ import unittest class TrtConvertHardSwishTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['threshold'] <= 0 or attrs[0]['scale'] <= 0: @@ -36,6 +36,7 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([1, 3, 32, 32]).astype(np.float32) @@ -64,8 +65,9 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, dics)) + "input_data": + TensorConfig( + data_gen=partial(generate_input1, dics)) }, outputs=["hard_swish_output_data"]) @@ -73,6 +75,7 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 16, 16]} self.dynamic_shape.max_input_shape = {"input_data": [2, 3, 32, 32]} @@ -87,8 +90,7 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -103,8 +105,8 @@ class TrtConvertHardSwishTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-5) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py index acd920ccd57..457db86c323 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,10 +23,10 @@ import unittest class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['epsilon'] < 0 or attrs[0]['epsilon'] > 0.001: @@ -35,6 +35,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], shape_input): return np.random.random(shape_input).astype(np.float32) @@ -65,13 +66,16 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "bias_data": TensorConfig(data_gen=partial( + "bias_data": + TensorConfig(data_gen=partial( generate_input2, dics, shape_input)), - "scale_data": TensorConfig(data_gen=partial( + "scale_data": + TensorConfig(data_gen=partial( generate_input2, dics, shape_input)) }, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, dics, shape_input)) }, outputs=["y_data"]) @@ -80,6 +84,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.in_dim == 2: self.dynamic_shape.min_input_shape = {"input_data": [1, 4]} @@ -113,8 +118,7 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -129,11 +133,11 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py index 13c932d55b8..16c95bff5e3 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_layer_norm.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,12 +23,12 @@ import unittest class TrtConvertLayerNormTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['epsilon'] < 0 or attrs[0]['epsilon'] > 0.001: @@ -40,6 +40,7 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], shape_input): return np.ones(shape_input).astype(np.float32) @@ -76,14 +77,17 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "bias_data": TensorConfig(data_gen=partial( - generate_input2, dics, shape_input)), - "scale_data": TensorConfig(data_gen=partial( - generate_input2, dics, shape_input)) + "bias_data": + TensorConfig(data_gen=partial(generate_input2, dics, + shape_input)), + "scale_data": + TensorConfig(data_gen=partial(generate_input2, dics, + shape_input)) }, inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, dics, shape_input)) + "input_data": + TensorConfig(data_gen=partial(generate_input1, dics, + shape_input)) }, outputs=["y_data"]) @@ -91,6 +95,7 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -110,8 +115,7 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -126,11 +130,11 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-2 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py index c647849fa7e..7f33cfc64a8 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_leaky_relu.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,10 +23,12 @@ import unittest class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(shape): return np.random.random(shape).astype(np.float32) @@ -50,8 +52,9 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, shape)) + "input_data": + TensorConfig( + data_gen=partial(generate_input1, shape)) }, outputs=["y_data"]) @@ -59,6 +62,7 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.input_dim == 2: self.dynamic_shape.min_input_shape = {"input_data": [1, 8]} @@ -90,8 +94,7 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -109,8 +112,8 @@ class TrtConvertLeakyReluTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-5, 1e-5) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py index c6f2fa205c7..76fcffad459 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertMatmulTest_static(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -73,9 +75,11 @@ class TrtConvertMatmulTest_static(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input1_data": TensorConfig(data_gen=partial( + "input1_data": + TensorConfig(data_gen=partial( generate_input, input1_shape)), - "input2_data": TensorConfig(data_gen=partial( + "input2_data": + TensorConfig(data_gen=partial( generate_input, input2_shape)) }, outputs=["output_data"]) @@ -84,6 +88,7 @@ class TrtConvertMatmulTest_static(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): pass @@ -104,10 +109,12 @@ class TrtConvertMatmulTest_static(TrtLayerAutoScanTest): class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(shape): return np.random.random(shape).astype(np.float32) @@ -154,9 +161,11 @@ class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input1_data": TensorConfig( + "input1_data": + TensorConfig( data_gen=partial(generate_input, input1_shape)), - "input2_data": TensorConfig( + "input2_data": + TensorConfig( data_gen=partial(generate_input, input2_shape)) }, outputs=["output_data"]) @@ -165,6 +174,7 @@ class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input1_data": [1, 4, 4], @@ -180,8 +190,7 @@ class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest): } attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for dynamic_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_mish.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_mish.py index d2b6924a9e9..063fbba1a07 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_mish.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_mish.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertMishTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch, dim1, dim2, dim3): shape = [batch] if dim1 != 0: @@ -68,20 +70,28 @@ class TrtConvertMishTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input, batch, - dim1, dim2, dim3)) + "input_data": + TensorConfig(data_gen=partial( + generate_input, batch, dim1, dim2, + dim3)) }, outputs=["mish_output_data"]) yield program_config def sample_predictor_configs(self, program_config): + def generate_dynamic_shape(attrs): if self.dim1 == 0: - self.dynamic_shape.min_input_shape = {"input_data": [1], } - self.dynamic_shape.max_input_shape = {"input_data": [4], } - self.dynamic_shape.opt_input_shape = {"input_data": [2], } + self.dynamic_shape.min_input_shape = { + "input_data": [1], + } + self.dynamic_shape.max_input_shape = { + "input_data": [4], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2], + } else: if self.dim2 == 0 and self.dim3 == 0: self.dynamic_shape.min_input_shape = { @@ -123,8 +133,7 @@ class TrtConvertMishTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -139,13 +148,14 @@ class TrtConvertMishTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if self.dim1 == 0 and self.dim2 == 0 and self.dim3 == 0: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py index b6a3f0c9cb1..03260a22416 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py @@ -22,6 +22,7 @@ import unittest class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -58,21 +59,23 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): return config def sample_program_configs(self): + def generate_boxes(batch, num_boxes): - return np.arange( - batch * num_boxes * 4, - dtype=np.float32).reshape([batch, num_boxes, 4]) + return np.arange(batch * num_boxes * 4, + dtype=np.float32).reshape([batch, num_boxes, 4]) def generate_scores(batch, num_boxes, num_classes): - return np.arange( - batch * num_classes * num_boxes, - dtype=np.float32).reshape([batch, num_classes, num_boxes]) + return np.arange(batch * num_classes * num_boxes, + dtype=np.float32).reshape( + [batch, num_classes, num_boxes]) # return np.random.rand(batch, num_classes, num_boxes).astype(np.float32) for batch in [1, 2]: for num_boxes in [4, 12]: for num_classes in [2, 6]: - for score_threshold in [0.01, ]: + for score_threshold in [ + 0.01, + ]: ops_config = [{ "op_type": "multiclass_nms3", "op_inputs": { @@ -99,9 +102,11 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_bboxes": TensorConfig(data_gen=partial( + "input_bboxes": + TensorConfig(data_gen=partial( generate_boxes, batch, num_boxes)), - "input_scores": TensorConfig( + "input_scores": + TensorConfig( data_gen=partial(generate_scores, batch, num_boxes, num_classes)) }, @@ -114,6 +119,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.max_input_shape = {} @@ -123,8 +129,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -136,9 +141,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): yield self.create_inference_config(), generate_trt_nodes_num( attrs, False), 1e-2 - def assert_tensors_near(self, - atol: float, - rtol: float, + def assert_tensors_near(self, atol: float, rtol: float, tensor: Dict[str, np.array], baseline: Dict[str, np.array]): # the order of tensorrt outputs are not consistent with paddle @@ -147,12 +150,10 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): continue if key == "nms_output_boxes": basline_arr = np.array( - sorted( - baseline[key].reshape((-1, 6)), - key=lambda i: [i[0], i[1]])) + sorted(baseline[key].reshape((-1, 6)), + key=lambda i: [i[0], i[1]])) arr = np.array( - sorted( - arr.reshape((-1, 6)), key=lambda i: [i[0], i[1]])) + sorted(arr.reshape((-1, 6)), key=lambda i: [i[0], i[1]])) else: basline_arr = np.array(baseline[key].reshape((-1, 1))) arr = np.array(arr.reshape((-1, 1))) @@ -163,8 +164,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): str(basline_arr.shape) + ', but got ' + str(arr.shape)) diff = abs(basline_arr - arr) self.assertTrue( - np.allclose( - basline_arr, arr, atol=atol, rtol=rtol), + np.allclose(basline_arr, arr, atol=atol, rtol=rtol), "Output has diff, Maximum absolute error: {}".format( np.amax(diff))) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py index 26066be7dc7..9fd60e5f3fe 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(batch, dim1): return np.random.random((batch, dim1, 768)).astype(np.float32) @@ -158,8 +160,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): }, "op_outputs": { "Out": ["transpose21_output"], - "XShape": - ["transpose21_output_xshape"] + "XShape": ["transpose21_output_xshape"] }, "op_attrs": dics[3] }, @@ -203,8 +204,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): }, "op_outputs": { "Out": ["transpose22_output"], - "XShape": - ["transpose22_output_xshape"] + "XShape": ["transpose22_output_xshape"] }, "op_attrs": dics[7] }, @@ -248,8 +248,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): }, "op_outputs": { "Out": ["transpose23_output"], - "XShape": - ["transpose23_output_xshape"] + "XShape": ["transpose23_output_xshape"] }, "op_attrs": dics[11] }, @@ -323,8 +322,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): }, "op_outputs": { "Out": ["transpose24_output"], - "XShape": - ["transpose24_output_xshape"] + "XShape": ["transpose24_output_xshape"] }, "op_attrs": dics[18] }, @@ -339,7 +337,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): }, "op_attrs": dics[19] }, - # In order to fuse ops with + # In order to fuse ops with # multihead_matmul_fuse_pass_v2, the last op # must be mul. { @@ -359,28 +357,35 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "mul1_weight": TensorConfig( + "mul1_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "mul2_weight": TensorConfig( + "mul2_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "mul3_weight": TensorConfig( + "mul3_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "mul4_weight": TensorConfig( + "mul4_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "elementwise_add1_weight": TensorConfig( + "elementwise_add1_weight": + TensorConfig( data_gen=partial(generate_weight2)), - "elementwise_add2_weight": TensorConfig( + "elementwise_add2_weight": + TensorConfig( data_gen=partial(generate_weight2)), - "elementwise_add3_weight": TensorConfig( + "elementwise_add3_weight": + TensorConfig( data_gen=partial(generate_weight2)), }, inputs={ - "input_data1": TensorConfig( - data_gen=partial(generate_input1, batch, - dim1)), - "input_data2": TensorConfig( - data_gen=partial(generate_input2, - input2_shape)), + "input_data1": + TensorConfig(data_gen=partial( + generate_input1, batch, dim1)), + "input_data2": + TensorConfig(data_gen=partial( + generate_input2, input2_shape)), }, outputs=["mul4_output"]) @@ -388,6 +393,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): # The last dim of input1 and input2 should be static. self.dynamic_shape.min_input_shape = { @@ -412,8 +418,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -431,6 +436,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): yield self.create_inference_config(), (1, 3), (1e-5, 1e-5) def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if self.trt_param.precision == paddle_infer.PrecisionType.Half: return True @@ -466,7 +472,9 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): + def sample_program_configs(self): + def generate_input1(batch, dim1): return np.random.random((batch, dim1, 768)).astype(np.float32) @@ -608,8 +616,7 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): }, "op_outputs": { "Out": ["transpose21_output"], - "XShape": - ["transpose21_output_xshape"] + "XShape": ["transpose21_output_xshape"] }, "op_attrs": dics[3] }, @@ -653,8 +660,7 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): }, "op_outputs": { "Out": ["transpose22_output"], - "XShape": - ["transpose22_output_xshape"] + "XShape": ["transpose22_output_xshape"] }, "op_attrs": dics[7] }, @@ -698,8 +704,7 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): }, "op_outputs": { "Out": ["transpose23_output"], - "XShape": - ["transpose23_output_xshape"] + "XShape": ["transpose23_output_xshape"] }, "op_attrs": dics[11] }, @@ -773,8 +778,7 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): }, "op_outputs": { "Out": ["transpose24_output"], - "XShape": - ["transpose24_output_xshape"] + "XShape": ["transpose24_output_xshape"] }, "op_attrs": dics[18] }, @@ -789,7 +793,7 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): }, "op_attrs": dics[19] }, - # In order to fuse ops with + # In order to fuse ops with # multihead_matmul_fuse_pass_v2, the last op # must be mul. { @@ -809,28 +813,35 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): program_config = ProgramConfig( ops=ops, weights={ - "mul1_weight": TensorConfig( + "mul1_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "mul2_weight": TensorConfig( + "mul2_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "mul3_weight": TensorConfig( + "mul3_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "mul4_weight": TensorConfig( + "mul4_weight": + TensorConfig( data_gen=partial(generate_weight1)), - "elementwise_add1_weight": TensorConfig( + "elementwise_add1_weight": + TensorConfig( data_gen=partial(generate_weight2)), - "elementwise_add2_weight": TensorConfig( + "elementwise_add2_weight": + TensorConfig( data_gen=partial(generate_weight2)), - "elementwise_add3_weight": TensorConfig( + "elementwise_add3_weight": + TensorConfig( data_gen=partial(generate_weight2)), }, inputs={ - "input_data1": TensorConfig( - data_gen=partial(generate_input1, batch, - dim1)), - "input_data2": TensorConfig( - data_gen=partial(generate_input2, - input2_shape)), + "input_data1": + TensorConfig(data_gen=partial( + generate_input1, batch, dim1)), + "input_data2": + TensorConfig(data_gen=partial( + generate_input2, input2_shape)), }, outputs=["mul4_output"]) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp.py index 56c0b041da2..7cefcb96a3a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,16 +22,16 @@ import unittest class TrtConvertNearestInterpTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - if attrs[0]['scale'] <= 0 and (attrs[0]['out_h'] <= 0 or - attrs[0]['out_w'] <= 0): + if attrs[0]['scale'] <= 0 and (attrs[0]['out_h'] <= 0 + or attrs[0]['out_w'] <= 0): return False if (attrs[0]['out_h'] <= 0) ^ (attrs[0]['out_w'] <= 0): return False @@ -39,6 +39,7 @@ class TrtConvertNearestInterpTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([1, 3, 64, 64]).astype(np.float32) @@ -73,9 +74,9 @@ class TrtConvertNearestInterpTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1, - dics)) + "input_data": + TensorConfig(data_gen=partial( + generate_input1, dics)) }, outputs=["nearest_interp_output_data"]) @@ -83,6 +84,7 @@ class TrtConvertNearestInterpTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -97,8 +99,7 @@ class TrtConvertNearestInterpTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -113,13 +114,14 @@ class TrtConvertNearestInterpTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-2 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if program_config.ops[0].attrs[ 'scale'] <= 0 and self.dynamic_shape.min_input_shape: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py index cf8b7b3516b..2cd4253cb8f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_nearest_interp_v2.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(): return np.ones([1, 3, 32, 32]).astype(np.float32) @@ -60,6 +62,7 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -74,8 +77,7 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -90,11 +92,11 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-2 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad.py index 446f7717e3b..767854b0fba 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,12 +22,12 @@ import unittest class TrtConvertPadTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['pad_value'] != 0.0: @@ -39,6 +39,7 @@ class TrtConvertPadTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([1, 3, 64, 64]).astype(np.float32) @@ -46,8 +47,7 @@ class TrtConvertPadTest(TrtLayerAutoScanTest): return np.random.random([24, 3, 3, 3]).astype(np.float32) for pad_value in [0.0, 1.0, 2.0, -100, 100.0]: - for paddings in [[0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 0, 0, 1, 2, 3, 4], + for paddings in [[0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 2, 3, 4], [0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, -1, -1, 1, 1]]: dics = [{"pad_value": pad_value, "paddings": paddings}, {}] @@ -77,6 +77,7 @@ class TrtConvertPadTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -94,8 +95,7 @@ class TrtConvertPadTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -110,13 +110,14 @@ class TrtConvertPadTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-2 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-2 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): for x in range(len(program_config.ops[0].attrs['paddings']) - 4): if program_config.ops[0].attrs['paddings'][x] != 0: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pool2d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pool2d.py index 89ce1145d74..24e80e01e97 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pool2d.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pool2d.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import unittest class TrtConvertPool2dTest(TrtLayerAutoScanTest): + def is_paddings_valid(self, program_config: ProgramConfig) -> bool: exclusive = program_config.ops[0].attrs['exclusive'] paddings = program_config.ops[0].attrs['paddings'] @@ -80,14 +81,16 @@ class TrtConvertPool2dTest(TrtLayerAutoScanTest): }] ops_config = [{ - "op_type": "pool2d", + "op_type": + "pool2d", "op_inputs": { "X": ["input_data"], }, "op_outputs": { "Out": ["output_data"] }, - "op_attrs": dics[0] + "op_attrs": + dics[0] }] ops = self.generate_op_config( ops_config) @@ -108,6 +111,7 @@ class TrtConvertPool2dTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -122,8 +126,7 @@ class TrtConvertPool2dTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -138,13 +141,14 @@ class TrtConvertPool2dTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller(program_config, predictor_config): if program_config.ops[0].attrs['pooling_type'] == 'avg' and \ program_config.ops[0].attrs['global_pooling'] == False and \ diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_prelu.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_prelu.py index 00e3f7feb60..49a750f14dd 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_prelu.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_prelu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertPreluTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input(batch, dim1, dim2, dim3): shape = [batch] if dim1 != 0: @@ -108,16 +110,16 @@ class TrtConvertPreluTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "alpha_weight": TensorConfig( - data_gen=partial(generate_alpha, - dics, dim1, dim2, - dim3)) + "alpha_weight": + TensorConfig(data_gen=partial( + generate_alpha, dics, dim1, dim2, + dim3)) }, inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input, - batch, dim1, dim2, - dim3)), + "input_data": + TensorConfig(data_gen=partial( + generate_input, batch, dim1, dim2, + dim3)), }, outputs=["output_data"]) @@ -125,11 +127,18 @@ class TrtConvertPreluTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dim1 == 0: - self.dynamic_shape.min_input_shape = {"input_data": [1], } - self.dynamic_shape.max_input_shape = {"input_data": [4], } - self.dynamic_shape.opt_input_shape = {"input_data": [2], } + self.dynamic_shape.min_input_shape = { + "input_data": [1], + } + self.dynamic_shape.max_input_shape = { + "input_data": [4], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2], + } else: if self.dim2 == 0 and self.dim3 == 0: self.dynamic_shape.min_input_shape = { @@ -168,8 +177,7 @@ class TrtConvertPreluTest(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] def generate_trt_nodes_num(attrs, dynamic_shape): @@ -189,11 +197,11 @@ class TrtConvertPreluTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): ver = paddle_infer.get_trt_compile_version() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py index 2e1e04870b9..dc5696a9b79 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,11 +22,11 @@ import unittest class TrtConvertReduceMeanTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] ## dim should be in (-rank, rank), and not NONE @@ -45,6 +45,7 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(dtype, attrs: List[Dict[str, Any]]): if dtype == -1 or dtype == 5: return np.random.random([1, 3, 64, 64]).astype(np.float32) @@ -80,7 +81,8 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, out_dtype, dics)) }, outputs=["reduce_output_data"]) @@ -92,6 +94,7 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -115,8 +118,7 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -131,8 +133,8 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (5e-4, 5e-4) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py index 2a7e673d420..68c3e9bd377 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,11 +23,11 @@ import unittest class TrtConvertReduceSumTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] ## dim should be in (-rank, rank), and not NONE @@ -41,6 +41,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(dtype, attrs: List[Dict[str, Any]]): if dtype == -1 or dtype == 5: return np.random.random([1, 3, 64, 64]).astype(np.float32) @@ -76,7 +77,8 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, out_dtype, dics)) }, outputs=["reduce_output_data"]) @@ -87,6 +89,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): yield program_config def sample_predictor_configs(self, program_config): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -110,8 +113,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reshape.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reshape.py index 4355b83557f..e05a78e66b9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reshape.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reshape.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,10 @@ import unittest class TrtConvertReshapeTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if self.dims == 1: if len(attrs[0]['shape']) != 1: @@ -45,6 +45,7 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): if self.dims == 4: return np.ones([1, 2, 4, 6]).astype(np.float32) @@ -66,10 +67,11 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): for dims in [4, 3, 2, 1]: for num_input in [0, 1, 2, 3]: - for shape in [[1, 6, 8], [1, 2, 4, 6], [1, 1, 0, 12], - [1, 0, 6], [1, -1, 12], [2, -1], [3, 16], - [3, 4, 4], [48]]: - dics = [{"shape": shape, }, {}] + for shape in [[1, 6, 8], [1, 2, 4, 6], [1, 1, 0, 12], [1, 0, 6], + [1, -1, 12], [2, -1], [3, 16], [3, 4, 4], [48]]: + dics = [{ + "shape": shape, + }, {}] self.num_input = num_input self.dims = dims dics_intput = [{ @@ -89,18 +91,22 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): dics_weight = [{ "shape_data": TensorConfig(data_gen=partial(generate_weight1, dics)), - "shapeT1_data": TensorConfig(data_gen=partial( - generate_shapeT1_data, dics)), - "shapeT2_data": TensorConfig(data_gen=partial( - generate_shapeT2_data, dics)) + "shapeT1_data": + TensorConfig( + data_gen=partial(generate_shapeT1_data, dics)), + "shapeT2_data": + TensorConfig( + data_gen=partial(generate_shapeT2_data, dics)) }, { "shape_data": TensorConfig(data_gen=partial(generate_weight1, dics)) }, { - "shapeT1_data": TensorConfig(data_gen=partial( - generate_shapeT1_data, dics)), - "shapeT2_data": TensorConfig(data_gen=partial( - generate_shapeT2_data, dics)) + "shapeT1_data": + TensorConfig( + data_gen=partial(generate_shapeT1_data, dics)), + "shapeT2_data": + TensorConfig( + data_gen=partial(generate_shapeT2_data, dics)) }, {}] ops_config = [{ @@ -116,8 +122,9 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): ops=ops, weights=dics_weight[num_input], inputs={ - "reshape_input": TensorConfig(data_gen=partial( - generate_input1, dics)) + "reshape_input": + TensorConfig( + data_gen=partial(generate_input1, dics)) }, outputs=["reshape_out"]) @@ -125,6 +132,7 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -164,8 +172,7 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] if attrs[0]['shape'][0] > 1 and len(attrs[0]['shape']) > 1: pass @@ -182,13 +189,14 @@ class TrtConvertReshapeTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if len(program_config.weights) >= 1: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py index b2d754337fe..ca12fe876ca 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertRoiAlignTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): return np.ones([batch, 256, 32, 32]).astype(np.float32) @@ -61,30 +63,34 @@ class TrtConvertRoiAlignTest(TrtLayerAutoScanTest): "ROIs": ["ROIs"] }] program_input = [{ - "roi_align_input": TensorConfig( - data_gen=partial(generate_input1, - dics, batch)), - "ROIs": TensorConfig(data_gen=partial( + "roi_align_input": + TensorConfig(data_gen=partial( + generate_input1, dics, batch)), + "ROIs": + TensorConfig(data_gen=partial( generate_input2, dics, batch)), - "RoisNum": TensorConfig( - data_gen=partial(generate_input3, - dics, batch)) + "RoisNum": + TensorConfig(data_gen=partial( + generate_input3, dics, batch)) }, { - "roi_align_input": TensorConfig( - data_gen=partial(generate_input1, - dics, batch)), - "ROIs": TensorConfig( - data_gen=partial(generate_input2, - dics, batch), - lod=[[32, 3]]) + "roi_align_input": + TensorConfig(data_gen=partial( + generate_input1, dics, batch)), + "ROIs": + TensorConfig(data_gen=partial( + generate_input2, dics, batch), + lod=[[32, 3]]) }] ops_config = [{ - "op_type": "roi_align", - "op_inputs": dics_input[num_input], + "op_type": + "roi_align", + "op_inputs": + dics_input[num_input], "op_outputs": { "Out": ["roi_align_out"] }, - "op_attrs": dics[0] + "op_attrs": + dics[0] }] ops = self.generate_op_config(ops_config) program_config = ProgramConfig( @@ -97,6 +103,7 @@ class TrtConvertRoiAlignTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.num_input == 0: self.dynamic_shape.min_input_shape = { @@ -145,8 +152,7 @@ class TrtConvertRoiAlignTest(TrtLayerAutoScanTest): return 0, 4 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -161,13 +167,14 @@ class TrtConvertRoiAlignTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if len(program_config.inputs) == 3: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py index 1b3d3803661..675054317d9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py @@ -22,16 +22,17 @@ import unittest class TrtConvertRollTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([1, 56, 56, 192]).astype(np.float32) @@ -67,6 +68,7 @@ class TrtConvertRollTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 56, 56, 192] @@ -94,8 +96,7 @@ class TrtConvertRollTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -110,11 +111,11 @@ class TrtConvertRollTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-4 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-4 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scale.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scale.py index 75783450e86..1765760e15c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scale.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scale.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertScaleTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 3, 24, 24]).astype(np.float32) @@ -60,27 +62,30 @@ class TrtConvertScaleTest(TrtLayerAutoScanTest): "X": ["scale_input"] }] dics_intputs = [{ - "ScaleTensor": TensorConfig( - data_gen=partial(generate_weight1, - dics)) + "ScaleTensor": + TensorConfig(data_gen=partial( + generate_weight1, dics)) }, {}] ops_config = [{ - "op_type": "scale", - "op_inputs": dics_intput[num_input], + "op_type": + "scale", + "op_inputs": + dics_intput[num_input], "op_outputs": { "Out": ["scale_out"] }, - "op_attrs": dics[0] + "op_attrs": + dics[0] }] ops = self.generate_op_config(ops_config) program_config = ProgramConfig( ops=ops, weights=dics_intputs[num_input], inputs={ - "scale_input": TensorConfig( - data_gen=partial(generate_input1, - dics, batch)) + "scale_input": + TensorConfig(data_gen=partial( + generate_input1, dics, batch)) }, outputs=["scale_out"]) @@ -88,6 +93,7 @@ class TrtConvertScaleTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -121,8 +127,7 @@ class TrtConvertScaleTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -137,13 +142,14 @@ class TrtConvertScaleTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if self.num_input == 0: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py index c6a81472360..9948b29321d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_shuffle_channel.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertShuffleChannelTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): return np.ones([batch, 6, 24, 24]).astype(np.float32) @@ -47,8 +49,9 @@ class TrtConvertShuffleChannelTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "shuffle_channel_input": TensorConfig(data_gen=partial( - generate_input1, dics, batch)) + "shuffle_channel_input": + TensorConfig( + data_gen=partial(generate_input1, dics, batch)) }, outputs=["shuffle_channel_out"]) @@ -56,6 +59,7 @@ class TrtConvertShuffleChannelTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "shuffle_channel_input": [1, 6, 24, 24] @@ -79,8 +83,7 @@ class TrtConvertShuffleChannelTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] self.trt_param.max_batch_size = 9 # for static_shape @@ -95,11 +98,11 @@ class TrtConvertShuffleChannelTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): pass diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py index 9f3e7a81777..c99c3cef5b6 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,20 +22,20 @@ import unittest class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights outputs = program_config.outputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] #The input dimension should be less than or equal to the set axis. if attrs[0]['begin_norm_axis'] >= 0: - if len(inputs['skip_layernorm_inputX_data'].shape) <= attrs[0][ - 'begin_norm_axis']: + if len(inputs['skip_layernorm_inputX_data'].shape + ) <= attrs[0]['begin_norm_axis']: return False #2D input is not supported. @@ -44,6 +44,7 @@ class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 6, 128, 768]).astype(np.float32) @@ -94,18 +95,20 @@ class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): program_config = ProgramConfig( ops=ops, weights={ - "Bias": TensorConfig(data_gen=partial( + "Bias": + TensorConfig(data_gen=partial( generate_weight1, dics)), - "Scale": TensorConfig(data_gen=partial( + "Scale": + TensorConfig(data_gen=partial( generate_weight2, dics)) }, inputs={ - "skip_layernorm_inputX_data": TensorConfig( - data_gen=partial(generate_input1, dics, - batch)), - "skip_layernorm_inputY_data": TensorConfig( - data_gen=partial(generate_input2, dics, - batch)) + "skip_layernorm_inputX_data": + TensorConfig(data_gen=partial( + generate_input1, dics, batch)), + "skip_layernorm_inputY_data": + TensorConfig(data_gen=partial( + generate_input2, dics, batch)) }, outputs=["skip_layernorm_out"]) @@ -113,6 +116,7 @@ class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -184,8 +188,7 @@ class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): return 0, 4 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -200,11 +203,11 @@ class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): pass diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py index 86c52dad23a..f82acb204f0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_slice.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,12 +22,12 @@ import unittest class TrtConvertSliceTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] for x in attrs[0]["decrease_axis"]: @@ -42,8 +42,8 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): else: start = attrs[0]["starts"][x] if attrs[0]["ends"][x] < 0: - end = attrs[0]["ends"][x] + inputs['input_data'].shape[attrs[0][ - "axes"][x]] + end = attrs[0]["ends"][x] + inputs['input_data'].shape[ + attrs[0]["axes"][x]] else: end = attrs[0]["ends"][x] start = max(0, start) @@ -54,6 +54,7 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([6, 6, 64, 64]).astype(np.float32) @@ -86,8 +87,9 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, dics)) + "input_data": + TensorConfig( + data_gen=partial(generate_input1, dics)) }, outputs=["slice_output_data"]) @@ -95,6 +97,7 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [8, 8, 64, 64]} @@ -124,8 +127,7 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] self.trt_param.max_batch_size = 9 # for static_shape @@ -140,11 +142,11 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-4 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-4 def test(self): # TODO(inference): fix. diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_softmax.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_softmax.py index 7efaebf00cf..b6cef5ca17b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_softmax.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_softmax.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,14 +22,14 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertSoftmaxTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights outputs = program_config.outputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] #The input dimension should be less than or equal to the set axis. @@ -39,6 +39,7 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 3, 24, 24]).astype(np.float32) @@ -67,8 +68,9 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "softmax_input": TensorConfig(data_gen=partial( - generate_input1, dics, batch)) + "softmax_input": + TensorConfig( + data_gen=partial(generate_input1, dics, batch)) }, outputs=["softmax_out"]) @@ -76,6 +78,7 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -111,8 +114,7 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -129,11 +131,11 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py index cef84dfbb4e..38ca6963e94 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_split.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,14 +22,14 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertSplitTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights outputs = program_config.outputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # the dimensions of input and axis match if len(inputs['split_input'].shape) <= attrs[0]['axis']: @@ -60,8 +60,9 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): #Test AxisTensor and SectionsTensorList if self.num_input == 0: - if self.dims == 2 and attrs[0]['sections'] == [10, 14] and len( - outputs) == 2: + if self.dims == 2 and attrs[0]['sections'] == [ + 10, 14 + ] and len(outputs) == 2: return True else: return False @@ -69,6 +70,7 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 3, 3, 24]).astype(np.float32) @@ -121,22 +123,25 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): "AxisTensor": TensorConfig(data_gen=partial( generate_AxisTensor, dics)), - "SectionsTensorList1": TensorConfig( - data_gen=partial( - generate_SectionsTensorList1, - dics)), + "SectionsTensorList1": + TensorConfig(data_gen=partial( + generate_SectionsTensorList1, + dics)), "SectionsTensorList2": TensorConfig(data_gen=partial( generate_SectionsTensorList2, dics)) }, {}] ops_config = [{ - "op_type": "split", - "op_inputs": dics_intput[num_input], + "op_type": + "split", + "op_inputs": + dics_intput[num_input], "op_outputs": { "Out": Out }, - "op_attrs": dics[0] + "op_attrs": + dics[0] }] ops = self.generate_op_config(ops_config) program_config = ProgramConfig( @@ -153,6 +158,7 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -195,8 +201,7 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): return 0, 5 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] self.trt_param.max_batch_size = 9 # for static_shape @@ -211,13 +216,14 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): + def teller1(program_config, predictor_config): if len(program_config.weights) == 3: return True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_stack.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_stack.py index 062312b0fab..f9641bad34c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_stack.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_stack.py @@ -22,14 +22,14 @@ import unittest class TrtConvertStackTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights outputs = program_config.outputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] #The input dimension should be less than the set axis. if len(inputs['stack_input1'].shape) < attrs[0]['axis']: @@ -38,6 +38,7 @@ class TrtConvertStackTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 3, 24, 24]).astype(np.float32) @@ -89,12 +90,15 @@ class TrtConvertStackTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "stack_input1": TensorConfig(data_gen=partial( - generate_input1, dics, batch)), - "stack_input2": TensorConfig(data_gen=partial( - generate_input2, dics, batch)), - "stack_input3": TensorConfig(data_gen=partial( - generate_input3, dics, batch)) + "stack_input1": + TensorConfig( + data_gen=partial(generate_input1, dics, batch)), + "stack_input2": + TensorConfig( + data_gen=partial(generate_input2, dics, batch)), + "stack_input3": + TensorConfig( + data_gen=partial(generate_input3, dics, batch)) }, outputs=["stack_output"]) @@ -102,6 +106,7 @@ class TrtConvertStackTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -180,8 +185,7 @@ class TrtConvertStackTest(TrtLayerAutoScanTest): return 0, 5 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -195,11 +199,11 @@ class TrtConvertStackTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def add_skip_trt_case(self): pass diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_strided_slice.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_strided_slice.py index 8bc48047c13..beea119c79f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_strided_slice.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_strided_slice.py @@ -22,16 +22,17 @@ import unittest class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([1, 56, 56, 192]).astype(np.float32) @@ -66,9 +67,9 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1, - dics)) + "input_data": + TensorConfig(data_gen=partial( + generate_input1, dics)) }, outputs=["slice_output_data"]) @@ -76,6 +77,7 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { "input_data": [1, 56, 56, 192] @@ -109,8 +111,7 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -122,8 +123,8 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_swish.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_swish.py index df97e7542b8..1ae92dc527a 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_swish.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_swish.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ import unittest class TrtConvertSwishTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(dims, attrs: List[Dict[str, Any]]): if dims == 1: return np.ones([3]).astype(np.float32) @@ -58,8 +60,9 @@ class TrtConvertSwishTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( - generate_input1, dims, dics)) + "input_data": + TensorConfig( + data_gen=partial(generate_input1, dims, dics)) }, outputs=["output_data"]) @@ -67,6 +70,7 @@ class TrtConvertSwishTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} @@ -104,8 +108,7 @@ class TrtConvertSwishTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -120,11 +123,11 @@ class TrtConvertSwishTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_tile.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_tile.py index cbbd13a7b80..82c707869f8 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_tile.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_tile.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,11 +26,11 @@ import hypothesis.strategies as st class TrtConvertTileTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] for x in attrs[0]['repeat_times']: if x <= 0: @@ -39,6 +39,7 @@ class TrtConvertTileTest(TrtLayerAutoScanTest): return True def sample_program_configs(self, *args, **kwargs): + def generate_input1(attrs: List[Dict[str, Any]]): return np.ones([1, 2, 3, 4]).astype(np.float32) @@ -60,8 +61,8 @@ class TrtConvertTileTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial(generate_input1, - dics)) + "input_data": + TensorConfig(data_gen=partial(generate_input1, dics)) }, outputs=["tile_output_data"]) @@ -69,6 +70,7 @@ class TrtConvertTileTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]} self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]} @@ -90,8 +92,7 @@ class TrtConvertTileTest(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -106,11 +107,11 @@ class TrtConvertTileTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-4 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-4 @given(repeat_times=st.sampled_from([[100], [1, 2], [0, 3], [1, 2, 100]])) def test(self, *args, **kwargs): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_transpose.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_transpose.py index 87e81396ab4..e9604925e4a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_transpose.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_transpose.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,14 +22,14 @@ import unittest class TrtConvertTransposeTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs weights = program_config.weights outputs = program_config.outputs attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] #The shape of input and axis should be equal. @@ -39,6 +39,7 @@ class TrtConvertTransposeTest(TrtLayerAutoScanTest): return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch): if self.dims == 4: return np.ones([batch, 3, 24, 24]).astype(np.float32) @@ -50,8 +51,8 @@ class TrtConvertTransposeTest(TrtLayerAutoScanTest): for dims in [2, 3, 4]: for batch in [1, 2, 4]: for axis in [[0, 1, 3, 2], [0, 3, 2, 1], [3, 2, 0, 1], - [0, 1, 2, 3], [0, 1, 2], [2, 0, 1], [1, 0], - [0, 1]]: + [0, 1, 2, 3], [0, 1, 2], [2, 0, 1], [1, 0], [0, + 1]]: self.dims = dims dics = [{"axis": axis}, {}] ops_config = [{ @@ -69,8 +70,9 @@ class TrtConvertTransposeTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "transpose_input": TensorConfig(data_gen=partial( - generate_input1, dics, batch)) + "transpose_input": + TensorConfig( + data_gen=partial(generate_input1, dics, batch)) }, outputs=["transpose_out"]) @@ -78,6 +80,7 @@ class TrtConvertTransposeTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 4: self.dynamic_shape.min_input_shape = { @@ -125,8 +128,7 @@ class TrtConvertTransposeTest(TrtLayerAutoScanTest): return 0, 3 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -140,11 +142,11 @@ class TrtConvertTransposeTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py index 2abf0a1acda..fd4753528ee 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,12 @@ from typing import Optional, List, Callable, Dict, Any, Set class TrtConvertActivationTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): if dims == 1: return np.ones([32]).astype(np.float32) @@ -58,7 +60,8 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "input_data": TensorConfig(data_gen=partial( + "input_data": + TensorConfig(data_gen=partial( generate_input1, dims, batch, dics)) }, outputs=["output_data"]) @@ -67,6 +70,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [1]} @@ -102,8 +106,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): return 1, 2 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape @@ -118,11 +121,11 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 def test(self): self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box.py index 269523661ee..cebede99e6f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,17 +22,19 @@ import unittest class TrtConvertYoloBoxTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]], batch, channel): if attrs[0]['iou_aware'] == True: - return np.ones( - [batch, 3 * (channel + 6), 13, 13]).astype(np.float32) + return np.ones([batch, 3 * (channel + 6), 13, + 13]).astype(np.float32) else: - return np.ones( - [batch, 3 * (channel + 5), 13, 13]).astype(np.float32) + return np.ones([batch, 3 * (channel + 5), 13, + 13]).astype(np.float32) def generate_input2(attrs: List[Dict[str, Any]], batch): return np.random.random([batch, 2]).astype(np.int32) @@ -47,14 +49,20 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest): for iou_aware in [False, True]: for iou_aware_factor in [0.5]: dics = [{ - "class_num": class_num, - "anchors": anchors, + "class_num": + class_num, + "anchors": + anchors, "downsample_ratio": downsample_ratio, - "conf_thresh": conf_thresh, - "clip_bbox": clip_bbox, - "scale_x_y": scale_x_y, - "iou_aware": iou_aware, + "conf_thresh": + conf_thresh, + "clip_bbox": + clip_bbox, + "scale_x_y": + scale_x_y, + "iou_aware": + iou_aware, "iou_aware_factor": iou_aware_factor }, {}] @@ -82,7 +90,8 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest): generate_input1, dics, batch, class_num)), - "imgsize": TensorConfig( + "imgsize": + TensorConfig( data_gen=partial( generate_input2, dics, batch)) @@ -93,6 +102,7 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): if attrs[0]['iou_aware'] == True: channel = 3 * (attrs[0]['class_num'] + 6) @@ -129,8 +139,7 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest): return 1, 4 attrs = [ - program_config.ops[i].attrs - for i in range(len(program_config.ops)) + program_config.ops[i].attrs for i in range(len(program_config.ops)) ] # for static_shape clear_dynamic_shape() @@ -144,11 +153,11 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, - True), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-3 def add_skip_trt_case(self): pass diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box_head.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box_head.py index ece2d187fb9..08a09338bf2 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box_head.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box_head.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,7 +22,9 @@ import unittest class TrtConvertYoloBoxHeadTest(TrtLayerAutoScanTest): + def sample_program_configs(self): + def generate_input(attrs: List[Dict[str, Any]], batch, shape): gen_shape = shape.copy() gen_shape.insert(0, batch) @@ -53,7 +55,8 @@ class TrtConvertYoloBoxHeadTest(TrtLayerAutoScanTest): ops=ops, weights={}, inputs={ - "yolo_box_head_input": TensorConfig(data_gen=partial( + "yolo_box_head_input": + TensorConfig(data_gen=partial( generate_input, attrs_dict, batch, input_shape[i])) }, outputs=["yolo_box_head_output"]) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_deformable_conv.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_deformable_conv.py index 508095fb801..3bed89e74f5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_deformable_conv.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_deformable_conv.py @@ -24,15 +24,19 @@ from paddle.fluid.core import AnalysisConfig class TRTDeformableConvTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - input = fluid.data( - name='input', shape=self.input_size, dtype=self.dtype) - offset = fluid.data( - name='offset', shape=self.offset_size, dtype=self.dtype) - mask = fluid.data( - name='mask', shape=self.mask_size, dtype=self.dtype) + input = fluid.data(name='input', + shape=self.input_size, + dtype=self.dtype) + offset = fluid.data(name='offset', + shape=self.offset_size, + dtype=self.dtype) + mask = fluid.data(name='mask', + shape=self.mask_size, + dtype=self.dtype) output = fluid.layers.deformable_conv( input, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_dynamic_shape.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_dynamic_shape.py index a7ae6a635ec..8b059538738 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_dynamic_shape.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_dynamic_shape.py @@ -24,30 +24,33 @@ from paddle.fluid.core import AnalysisConfig class TRTDynamicShapeTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 16, 16], dtype="float32") - out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - groups=1, - padding=[1, 1], - bias_attr=False, - act=None) + data = fluid.data(name="data", + shape=[-1, 3, 16, 16], + dtype="float32") + out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + groups=1, + padding=[1, 1], + bias_attr=False, + act=None) self.feeds = self.set_feeds() self.enable_trt = True self.trt_parameters = TRTDynamicShapeTest.TensorRTParam( 1 << 30, 1, 1, AnalysisConfig.Precision.Float32, False, False) - self.dynamic_shape_params = TRTDynamicShapeTest.DynamicShapeParam({ - 'data': [1, 3, 8, 8] - }, {'data': [1, 3, 32, 32]}, {'data': [1, 3, 16, 16]}, False) + self.dynamic_shape_params = TRTDynamicShapeTest.DynamicShapeParam( + {'data': [1, 3, 8, 8]}, {'data': [1, 3, 32, 32]}, + {'data': [1, 3, 16, 16]}, False) self.fetch_list = [out] def set_feeds(self): - return {"data": np.random.random([1, 3, 16, 16]).astype("float32"), } + return { + "data": np.random.random([1, 3, 16, 16]).astype("float32"), + } def test_check_output(self): if core.is_compiled_with_cuda(): @@ -56,8 +59,11 @@ class TRTDynamicShapeTest(InferencePassTest): class TRTDynamicShapeOutOfBound1Test(TRTDynamicShapeTest): + def set_feeds(self): - return {"data": np.random.random([1, 3, 64, 16]).astype("float32"), } + return { + "data": np.random.random([1, 3, 64, 16]).astype("float32"), + } def test_check_output(self): if core.is_compiled_with_cuda(): @@ -71,18 +77,21 @@ class TRTDynamicShapeOutOfBound1Test(TRTDynamicShapeTest): # class TRTDynamicShapeOutOfBound2Test(TRTDynamicShapeTest): # def set_feeds(self): # return {"data": np.random.random([2, 3, 16, 16]).astype("float32"), } -# +# # def test_check_output(self): # if core.is_compiled_with_cuda(): # use_gpu = True # with self.assertRaises(Exception): # self.check_output_with_option(use_gpu) -# +# class TRTDynamicShapeOutOfBound3Test(TRTDynamicShapeTest): + def set_feeds(self): - return {"data": np.random.random([1, 3, 4, 16]).astype("float32"), } + return { + "data": np.random.random([1, 3, 4, 16]).astype("float32"), + } def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_elementwise_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_elementwise_op.py index b40daba4868..a989135a64c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_elementwise_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_elementwise_op.py @@ -26,12 +26,15 @@ from paddle.fluid.core import AnalysisConfig class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data1 = fluid.data( - name="data1", shape=[-1, 3, 64, 64], dtype="float32") - data2 = fluid.data( - name="data2", shape=[-1, 3, 64, 1], dtype="float32") + data1 = fluid.data(name="data1", + shape=[-1, 3, 64, 64], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[-1, 3, 64, 1], + dtype="float32") eltwise_out = self.append_eltwise(data1, data2) out = fluid.layers.batch_norm(eltwise_out, is_test=True) self.feeds = { @@ -58,18 +61,21 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest): class TensorRTSubgraphPassElementwiseBroadcastTest1( TensorRTSubgraphPassElementwiseBroadcastTest): + def append_eltwise(self, data1, data2): return fluid.layers.elementwise_sub(x=data1, y=data2, axis=0) class TensorRTSubgraphPassElementwiseBroadcastTest2( TensorRTSubgraphPassElementwiseBroadcastTest): + def append_eltwise(self, data1, data2): return fluid.layers.elementwise_mul(x=data1, y=data2, axis=0) class TensorRTSubgraphPassElementwiseBroadcastTest3( TensorRTSubgraphPassElementwiseBroadcastTest): + def append_eltwise(self, data1, data2): return fluid.layers.elementwise_div(x=data1, y=data2, axis=0) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py index dd6232fac45..4b086f995fc 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py @@ -25,10 +25,12 @@ from paddle.fluid.core import PassVersionChecker class FCFusePassTRTTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[32, 128, 2, 2], dtype="float32") + data = fluid.data(name="data", + shape=[32, 128, 2, 2], + dtype="float32") fc_out1 = fluid.layers.fc(input=data, size=128, num_flatten_dims=1, @@ -38,9 +40,9 @@ class FCFusePassTRTTest(InferencePassTest): self.feeds = { "data": np.random.random((32, 128, 2, 2)).astype("float32") } - # Diff occurred between GPU and TRT. - # In order to provide TRT CI ASAP, this test for trt part - # is disabled temporarily. + # Diff occurred between GPU and TRT. + # In order to provide TRT CI ASAP, this test for trt part + # is disabled temporarily. # self.enable_trt = True # self.trt_parameters = FCFusePassTRTTest.TensorRTParam( # 1 << 30, 32, 3, AnalysisConfig.Precision.Float32, False, False) @@ -55,10 +57,12 @@ class FCFusePassTRTTest(InferencePassTest): class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[32, 128, 32, 8], dtype="float32") + data = fluid.data(name="data", + shape=[32, 128, 32, 8], + dtype="float32") fc_out1 = fluid.layers.fc(input=data, size=64, num_flatten_dims=1, @@ -82,10 +86,12 @@ class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest): class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[3, 24, 16, 16], dtype="float32") + data = fluid.data(name="data", + shape=[3, 24, 16, 16], + dtype="float32") fc_out1 = fluid.layers.fc(input=data, size=32, num_flatten_dims=2, @@ -109,6 +115,7 @@ class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest): class FCFusePassTRTDynamicDims2Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[32, 128], dtype="float32") @@ -123,9 +130,7 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest): self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam( 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam( - { - 'data': [1, 128] - }, {'data': [64, 128]}, {'data': [32, 128]}, False) + {'data': [1, 128]}, {'data': [64, 128]}, {'data': [32, 128]}, False) self.fetch_list = [out] def test_check_output(self): @@ -137,6 +142,7 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest): class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32") @@ -151,9 +157,8 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam( 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam( - { - 'data': [1, 128, 32] - }, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False) + {'data': [1, 128, 32]}, {'data': [64, 128, 32]}, + {'data': [32, 128, 32]}, False) self.fetch_list = [out] def test_check_output(self): @@ -165,6 +170,7 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32") @@ -179,9 +185,8 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam( 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam( - { - 'data': [1, 32, 32] - }, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False) + {'data': [1, 32, 32]}, {'data': [64, 256, 32]}, + {'data': [32, 128, 32]}, False) self.fetch_list = [out] def test_check_output(self): @@ -193,10 +198,12 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[32, 12, 4, 6], dtype="float32") + data = fluid.data(name="data", + shape=[32, 12, 4, 6], + dtype="float32") fc_out1 = fluid.layers.fc(input=data, size=64, num_flatten_dims=1, @@ -210,9 +217,8 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam( 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam( - { - 'data': [1, 12, 4, 6] - }, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False) + {'data': [1, 12, 4, 6]}, {'data': [64, 12, 4, 6]}, + {'data': [32, 12, 4, 6]}, False) self.fetch_list = [out] def test_check_output(self): @@ -224,10 +230,12 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[32, 128, 32, 32], dtype="float32") + data = fluid.data(name="data", + shape=[32, 128, 32, 32], + dtype="float32") fc_out1 = fluid.layers.fc(input=data, size=64, num_flatten_dims=2, @@ -241,9 +249,8 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam( 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam( - { - 'data': [1, 64, 32, 32] - }, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False) + {'data': [1, 64, 32, 32]}, {'data': [64, 256, 32, 32]}, + {'data': [32, 128, 32, 32]}, False) self.fetch_list = [out] def test_check_output(self): @@ -255,10 +262,12 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[32, 128, 32, 32], dtype="float32") + data = fluid.data(name="data", + shape=[32, 128, 32, 32], + dtype="float32") fc_out1 = fluid.layers.fc(input=data, size=64, num_flatten_dims=3, @@ -272,9 +281,8 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest): self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam( 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam( - { - 'data': [1, 128, 32, 32] - }, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False) + {'data': [1, 128, 32, 32]}, {'data': [64, 128, 32, 32]}, + {'data': [32, 128, 32, 32]}, False) self.fetch_list = [out] def test_check_output(self): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py index 9e1991ae1ae..e62b6557844 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_quant_dequant_pass.py @@ -25,10 +25,13 @@ from paddle.fluid.core import PassVersionChecker class FCQuantDequantFusePassTRTDims3Cols1Test(QuantDequantTest): + def setUp(self): + def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') fc_out = fluid.layers.fc(input=self.data, size=10, @@ -62,10 +65,13 @@ class FCQuantDequantFusePassTRTDims3Cols1Test(QuantDequantTest): { 'data': [1, 28, 28], 'reshape2_1.tmp_0': [1, 1, 10] - }, {'data': [2, 28, 28], - 'reshape2_1.tmp_0': [2, 1, 10]}, - {'data': [1, 28, 28], - 'reshape2_1.tmp_0': [1, 1, 10]}, False) + }, { + 'data': [2, 28, 28], + 'reshape2_1.tmp_0': [2, 1, 10] + }, { + 'data': [1, 28, 28], + 'reshape2_1.tmp_0': [1, 1, 10] + }, False) self.activation_quantize_type = 'moving_average_abs_max' self.weight_quantize_type = 'channel_wise_abs_max' @@ -73,18 +79,23 @@ class FCQuantDequantFusePassTRTDims3Cols1Test(QuantDequantTest): #self.quant_dequant() if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1e-2, flatten=False, rtol=1e-2) + self.check_output_with_option(use_gpu, + atol=1e-2, + flatten=False, + rtol=1e-2) self.assertTrue( PassVersionChecker.IsCompatible( 'quant_conv2d_dequant_fuse_pass')) class FCQuantDequantFusePassTRTDims3Cols2Test(QuantDequantTest): + def setUp(self): + def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') fc_out = fluid.layers.fc(input=self.data, size=28, @@ -119,10 +130,13 @@ class FCQuantDequantFusePassTRTDims3Cols2Test(QuantDequantTest): { 'data': [1, 28, 28], 'reshape2_0.tmp_0': [1, 784] - }, {'data': [4, 28, 28], - 'reshape2_0.tmp_0': - [4, 784]}, {'data': [1, 28, 28], - 'reshape2_0.tmp_0': [1, 784]}, False) + }, { + 'data': [4, 28, 28], + 'reshape2_0.tmp_0': [4, 784] + }, { + 'data': [1, 28, 28], + 'reshape2_0.tmp_0': [1, 784] + }, False) self.activation_quantize_type = 'moving_average_abs_max' self.weight_quantize_type = 'channel_wise_abs_max' @@ -130,18 +144,23 @@ class FCQuantDequantFusePassTRTDims3Cols2Test(QuantDequantTest): #self.quant_dequant() if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1e-1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1e-1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible( 'quant_conv2d_dequant_fuse_pass')) class FCQuantDequantFusePassTRTDims3Cols3Test(QuantDequantTest): + def setUp(self): + def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') label_shape = fluid.layers.reshape(self.label, shape=[1, 1, 1]) reshape_out = fluid.layers.reshape(self.data, shape=[1, 14, 14, 4]) @@ -195,8 +214,10 @@ class FCQuantDequantFusePassTRTDims3Cols3Test(QuantDequantTest): #self.quant_dequant() if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1e0, flatten=False, rtol=1e0) + self.check_output_with_option(use_gpu, + atol=1e0, + flatten=False, + rtol=1e0) self.assertTrue( PassVersionChecker.IsCompatible( 'quant_conv2d_dequant_fuse_pass')) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py index 9d0f8857e92..a02cdb6a347 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten2_matmul_fuse_pass.py @@ -64,14 +64,15 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): self.add_ignore_check_case( teller1, IgnoreReasons.PASS_ACCURACY_ERROR, - "The pass error on TRT while shape of bias is not [out_size].", ) + "The pass error on TRT while shape of bias is not [out_size].", + ) def sample_program_config(self, draw): # 1. Generate shape and attr of flatten2 x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=10), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=10), + min_size=4, + max_size=4)) # [a, b, c, d] => [a, b*c*d] flatten_axis = 1 flatten_shape = [x_shape[0], x_shape[1] * x_shape[2] * x_shape[3]] @@ -83,27 +84,36 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of matmul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = flatten_shape[1] # 4. Generate legal attr:axis of elementwise_add axis = draw(st.integers(min_value=-1, max_value=1)) if axis == 0: axis = -1 - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] flatten2_op = OpConfig( "flatten2", - inputs={"X": ["flatten2_x"], }, + inputs={ + "X": ["flatten2_x"], + }, axis=flatten_axis, - outputs={"Out": ["flatten2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["flatten2_out"], + "XShape": ["xshape"] + }, + ) matmul_op = OpConfig( "matmul", - inputs={"X": ["flatten2_out"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["flatten2_out"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -113,14 +123,18 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["matmul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["matmul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [flatten2_op, matmul_op, add_op] @@ -130,16 +144,18 @@ class TestFlatten2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"flatten2_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "flatten2_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=50, - passes=["trt_flatten2_matmul_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=50, + passes=["trt_flatten2_matmul_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py index bb28fcf7085..8e5728f63f2 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py @@ -24,10 +24,12 @@ from paddle.fluid.core import AnalysisConfig class TRTFlattenTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") flatten_out = self.append_flatten(data) out = fluid.layers.batch_norm(flatten_out, is_test=True) self.feeds = { @@ -50,10 +52,12 @@ class TRTFlattenTest(InferencePassTest): class TRTFlattenDynamicTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") flatten_out = self.append_flatten(data) out = fluid.layers.batch_norm(flatten_out, is_test=True) self.feeds = { @@ -62,11 +66,14 @@ class TRTFlattenDynamicTest(InferencePassTest): self.enable_trt = True self.trt_parameters = TRTFlattenDynamicTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) - self.dynamic_shape_params = TRTFlattenDynamicTest.DynamicShapeParam({ - 'data': [2, 6, 64, 64], - 'flatten_0.tmp_0': [2, 6 * 64 * 64] - }, {'data': [2, 6, 64, 64], - 'flatten_0.tmp_0': [2, 6 * 64 * 64]}, { + self.dynamic_shape_params = TRTFlattenDynamicTest.DynamicShapeParam( + { + 'data': [2, 6, 64, 64], + 'flatten_0.tmp_0': [2, 6 * 64 * 64] + }, { + 'data': [2, 6, 64, 64], + 'flatten_0.tmp_0': [2, 6 * 64 * 64] + }, { 'data': [2, 6, 64, 64], 'flatten_0.tmp_0': [2, 6 * 64 * 64] }, False) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py index a9d11f8fd18..a9a0b0f327d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig class TRTGatherNdTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[-1, 3, 4], dtype="float32") @@ -33,19 +34,24 @@ class TRTGatherNdTest(InferencePassTest): self.feeds = { "data": np.random.random([2, 3, 4]).astype("float32"), - "index": - np.array([[[0, 1], [1, 0]], [[1, 2], [0, 1]]]).astype("int32"), + "index": np.array([[[0, 1], [1, 0]], [[1, 2], + [0, 1]]]).astype("int32"), } self.enable_trt = True self.trt_parameters = TRTGatherNdTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTGatherNdTest.DynamicShapeParam({ - 'data': [1, 3, 4], - 'index': [1, 2, 2] - }, {'data': [3, 3, 4], - 'index': [3, 2, 2]}, {'data': [3, 3, 4], - 'index': [3, 2, 2]}, False) + self.dynamic_shape_params = TRTGatherNdTest.DynamicShapeParam( + { + 'data': [1, 3, 4], + 'index': [1, 2, 2] + }, { + 'data': [3, 3, 4], + 'index': [3, 2, 2] + }, { + 'data': [3, 3, 4], + 'index': [3, 2, 2] + }, False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -56,10 +62,12 @@ class TRTGatherNdTest(InferencePassTest): class TRTGatherNdFp16Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 1280, 192], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 1280, 192], + dtype="float32") index = fluid.data(name="index", shape=[-1, 1028, 2], dtype="int32") gather_nd = fluid.layers.gather_nd(data, index) out = fluid.layers.batch_norm(gather_nd, is_test=True) @@ -73,13 +81,17 @@ class TRTGatherNdFp16Test(InferencePassTest): self.trt_parameters = TRTGatherNdFp16Test.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTGatherNdFp16Test.DynamicShapeParam({ - 'data': [1, 1280, 192], - 'index': [1, 1028, 2] - }, {'data': [3, 1280, 192], - 'index': - [3, 1028, 2]}, {'data': [3, 1280, 192], - 'index': [3, 1028, 2]}, False) + self.dynamic_shape_params = TRTGatherNdFp16Test.DynamicShapeParam( + { + 'data': [1, 1280, 192], + 'index': [1, 1028, 2] + }, { + 'data': [3, 1280, 192], + 'index': [3, 1028, 2] + }, { + 'data': [3, 1280, 192], + 'index': [3, 1028, 2] + }, False) def test_check_output(self, atol=1e-3): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py index 57c295686f6..9536c8c4e08 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig class TRTGatherTest1(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): @@ -40,12 +41,17 @@ class TRTGatherTest1(InferencePassTest): self.enable_trt = True self.trt_parameters = TRTGatherTest1.TensorRTParam( 1 << 30, self.bs, 1, AnalysisConfig.Precision.Float32, False, False) - self.dynamic_shape_params = TRTGatherTest1.DynamicShapeParam({ - 'data': [1, 1], - 'index': [1, 1] - }, {'data': [32, 128], - 'index': [3, 1]}, {'data': [32, 128], - 'index': [3, 1]}, False) + self.dynamic_shape_params = TRTGatherTest1.DynamicShapeParam( + { + 'data': [1, 1], + 'index': [1, 1] + }, { + 'data': [32, 128], + 'index': [3, 1] + }, { + 'data': [32, 128], + 'index': [3, 1] + }, False) self.fetch_list = [out] def set_params(self): @@ -61,6 +67,7 @@ class TRTGatherTest1(InferencePassTest): class TRTGatherTest2(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): @@ -77,12 +84,17 @@ class TRTGatherTest2(InferencePassTest): self.enable_trt = True self.trt_parameters = TRTGatherTest2.TensorRTParam( 1 << 30, self.bs, 1, AnalysisConfig.Precision.Float32, False, False) - self.dynamic_shape_params = TRTGatherTest2.DynamicShapeParam({ - 'data': [2, 4], - 'index': [1] - }, {'data': [256, 256], - 'index': [4]}, {'data': [64, 32], - 'index': [2]}, False) + self.dynamic_shape_params = TRTGatherTest2.DynamicShapeParam( + { + 'data': [2, 4], + 'index': [1] + }, { + 'data': [256, 256], + 'index': [4] + }, { + 'data': [64, 32], + 'index': [2] + }, False) self.fetch_list = [out] def set_params(self): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py index 1bcbbc38c97..de59753d976 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py @@ -24,10 +24,12 @@ from paddle.fluid.core import AnalysisConfig class TRTGroupNormTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 512, 12, 12], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 512, 12, 12], + dtype="float32") out = self.append_group_norm(data) self.feeds = { @@ -36,9 +38,9 @@ class TRTGroupNormTest(InferencePassTest): self.enable_trt = True self.trt_parameters = TRTGroupNormTest.TensorRTParam( 1 << 30, 1, 1, AnalysisConfig.Precision.Float32, False, False) - self.dynamic_shape_params = TRTGroupNormTest.DynamicShapeParam({ - 'data': [1, 512, 12, 12] - }, {'data': [1, 512, 12, 12]}, {'data': [1, 512, 12, 12]}, False) + self.dynamic_shape_params = TRTGroupNormTest.DynamicShapeParam( + {'data': [1, 512, 12, 12]}, {'data': [1, 512, 12, 12]}, + {'data': [1, 512, 12, 12]}, False) self.fetch_list = [out] def append_group_norm(self, data): @@ -48,12 +50,11 @@ class TRTGroupNormTest(InferencePassTest): bias_attr = fluid.ParamAttr( name='group_norm_bias', initializer=fluid.initializer.Constant(value=0.0)) - return fluid.layers.group_norm( - data, - groups=32, - epsilon=0.000009999999747378752, - param_attr=param_attr, - bias_attr=bias_attr) + return fluid.layers.group_norm(data, + groups=32, + epsilon=0.000009999999747378752, + param_attr=param_attr, + bias_attr=bias_attr) def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py index 3d4b2dc10c2..c69e0c98a40 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py @@ -28,19 +28,21 @@ import subprocess class TensorRTInspectorTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[1, 16, 16], dtype="float32") - matmul_out = fluid.layers.matmul( - x=data, - y=data, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + matmul_out = fluid.layers.matmul(x=data, + y=data, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) out = fluid.layers.batch_norm(matmul_out, is_test=True) - self.feeds = {"data": np.ones([1, 16, 16]).astype("float32"), } + self.feeds = { + "data": np.ones([1, 16, 16]).astype("float32"), + } self.enable_trt = True self.trt_parameters = InferencePassTest.TensorRTParam( 1 << 30, 1, 0, AnalysisConfig.Precision.Float32, False, False, True) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py index d283465dcba..67e601cc520 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py @@ -27,6 +27,7 @@ from paddle.fluid.core import AnalysisConfig class TRTInstanceNormTest(InferencePassTest): + def setUp(self): self.bs = 4 self.channel = 4 @@ -47,7 +48,9 @@ class TRTInstanceNormTest(InferencePassTest): out = fluid.layers.batch_norm(instance_norm_out, is_test=True) shape[0] = self.bs - self.feeds = {'in': np.random.random(shape).astype('float32'), } + self.feeds = { + 'in': np.random.random(shape).astype('float32'), + } self.fetch_list = [out] def check_output(self, remove_cache=False): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py index 99e99a83877..14b0e9fa145 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py @@ -22,19 +22,21 @@ from paddle.fluid.core import AnalysisConfig class TensorRTMatMulDims2Test(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[24, 24], dtype="float32") - matmul_out = fluid.layers.matmul( - x=data, - y=data, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + matmul_out = fluid.layers.matmul(x=data, + y=data, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) out = fluid.layers.batch_norm(matmul_out, is_test=True) - self.feeds = {"data": np.ones([24, 24]).astype("float32"), } + self.feeds = { + "data": np.ones([24, 24]).astype("float32"), + } self.enable_trt = True self.trt_parameters = TensorRTMatMulDims2Test.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) @@ -54,20 +56,23 @@ class TensorRTMatMulDims2Test(InferencePassTest): class TensorRTMatMulTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 24, 24], dtype="float32") - matmul_out = fluid.layers.matmul( - x=data, - y=data, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + data = fluid.data(name="data", + shape=[-1, 6, 24, 24], + dtype="float32") + matmul_out = fluid.layers.matmul(x=data, + y=data, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) out = fluid.layers.batch_norm(matmul_out, is_test=True) - self.feeds = {"data": np.ones([1, 6, 24, 24]).astype("float32"), } + self.feeds = { + "data": np.ones([1, 6, 24, 24]).astype("float32"), + } self.enable_trt = True self.trt_parameters = TensorRTMatMulTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) @@ -87,6 +92,7 @@ class TensorRTMatMulTest(InferencePassTest): class TensorRTMatMulTransposeXTest(TensorRTMatMulTest): + def set_params(self): self.transpose_x = True self.transpose_y = False @@ -94,6 +100,7 @@ class TensorRTMatMulTransposeXTest(TensorRTMatMulTest): class TensorRTMatMulTransposeYTest(TensorRTMatMulTest): + def set_params(self): self.transpose_x = False self.transpose_y = True @@ -101,6 +108,7 @@ class TensorRTMatMulTransposeYTest(TensorRTMatMulTest): class TensorRTMatMulScaleTest(TensorRTMatMulTest): + def set_params(self): self.transpose_x = False self.transpose_y = False @@ -108,19 +116,20 @@ class TensorRTMatMulScaleTest(TensorRTMatMulTest): class TensorRTMatMulBroadcastTest(InferencePassTest): + def setUp(self): self.set_params() place = fluid.CPUPlace() with fluid.program_guard(self.main_program, self.startup_program): - data_x = fluid.data( - name="data_x", shape=[-1, 6, 24], dtype="float32") + data_x = fluid.data(name="data_x", + shape=[-1, 6, 24], + dtype="float32") data_y = fluid.data(name="data_y", shape=[24, 16], dtype="float32") - matmul_out = fluid.layers.matmul( - x=data_x, - y=data_y, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + matmul_out = fluid.layers.matmul(x=data_x, + y=data_y, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) out = fluid.layers.batch_norm(matmul_out, is_test=True) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py index adf9ce4aead..01f65b54bd4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py @@ -23,19 +23,20 @@ from paddle.fluid.core import AnalysisConfig class TensorRTMatMulQuantDequantDims3Test(QuantDequantTest): + def setUp(self): self.set_params() def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') - matmul_out = fluid.layers.matmul( - x=self.data, - y=self.data, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + matmul_out = fluid.layers.matmul(x=self.data, + y=self.data, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) fc_out = fluid.layers.fc(input=matmul_out, size=10, num_flatten_dims=1, @@ -76,14 +77,17 @@ class TensorRTMatMulQuantDequantDims3Test(QuantDequantTest): #self.quant_dequant() if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) class TensorRTMatMulQuantDequantDims3TransposeXTest( TensorRTMatMulQuantDequantDims3Test): + def set_params(self): self.transpose_x = True self.transpose_y = False @@ -92,6 +96,7 @@ class TensorRTMatMulQuantDequantDims3TransposeXTest( class TensorRTMatMulQuantDequantDims3TransposeYTest( TensorRTMatMulQuantDequantDims3Test): + def set_params(self): self.transpose_x = False self.transpose_y = True @@ -100,6 +105,7 @@ class TensorRTMatMulQuantDequantDims3TransposeYTest( class TensorRTMatMulQuantDequantDims3TransposeXYTest( TensorRTMatMulQuantDequantDims3Test): + def set_params(self): self.transpose_x = True self.transpose_y = True @@ -107,20 +113,21 @@ class TensorRTMatMulQuantDequantDims3TransposeXYTest( class TensorRTMatMulQuantDequantDims4Test(QuantDequantTest): + def setUp(self): self.set_params() def network(): - self.data = fluid.data( - name='data', shape=[1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[1, 28, 28], + dtype='float32') self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') reshape_out = fluid.layers.reshape(self.data, shape=[1, 4, 14, 14]) - matmul_out = fluid.layers.matmul( - x=reshape_out, - y=reshape_out, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + matmul_out = fluid.layers.matmul(x=reshape_out, + y=reshape_out, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) out = fluid.layers.batch_norm(matmul_out, is_test=True) fc_out = fluid.layers.fc(input=matmul_out, size=10, @@ -162,14 +169,17 @@ class TensorRTMatMulQuantDequantDims4Test(QuantDequantTest): #self.quant_dequant() if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) class TensorRTMatMulQuantDequantDims4TransposeXTest( TensorRTMatMulQuantDequantDims4Test): + def set_params(self): self.transpose_x = True self.transpose_y = False @@ -178,6 +188,7 @@ class TensorRTMatMulQuantDequantDims4TransposeXTest( class TensorRTMatMulQuantDequantDims4TransposeYTest( TensorRTMatMulQuantDequantDims4Test): + def set_params(self): self.transpose_x = False self.transpose_y = True @@ -186,6 +197,7 @@ class TensorRTMatMulQuantDequantDims4TransposeYTest( class TensorRTMatMulQuantDequantDims4TransposeXYTest( TensorRTMatMulQuantDequantDims4Test): + def set_params(self): self.transpose_x = True self.transpose_y = True @@ -193,19 +205,20 @@ class TensorRTMatMulQuantDequantDims4TransposeXYTest( class TensorRTMatMulQuantDequantDims3DynamicTest(QuantDequantTest): + def setUp(self): self.set_params() def network(): - self.data = fluid.data( - name='data', shape=[-1, 28, 28], dtype='float32') + self.data = fluid.data(name='data', + shape=[-1, 28, 28], + dtype='float32') self.label = fluid.data(name='label', shape=[1, 1], dtype='int64') - matmul_out = fluid.layers.matmul( - x=self.data, - y=self.data, - transpose_x=self.transpose_x, - transpose_y=self.transpose_y, - alpha=self.alpha) + matmul_out = fluid.layers.matmul(x=self.data, + y=self.data, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y, + alpha=self.alpha) out = fluid.layers.batch_norm(matmul_out, is_test=True) fc_out = fluid.layers.fc(input=matmul_out, size=10, @@ -236,9 +249,8 @@ class TensorRTMatMulQuantDequantDims3DynamicTest(QuantDequantTest): self.trt_parameters = TensorRTMatMulQuantDequantDims3DynamicTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Int8, False, False) self.dynamic_shape_params = TensorRTMatMulQuantDequantDims3DynamicTest.DynamicShapeParam( - { - 'data': [1, 28, 28] - }, {'data': [4, 28, 28]}, {'data': [3, 28, 28]}, False) + {'data': [1, 28, 28]}, {'data': [4, 28, 28]}, {'data': [3, 28, 28]}, + False) self.activation_quantize_type = 'moving_average_abs_max' self.weight_quantize_type = 'channel_wise_abs_max' @@ -251,14 +263,17 @@ class TensorRTMatMulQuantDequantDims3DynamicTest(QuantDequantTest): #self.quant_dequant() if core.is_compiled_with_cuda(): use_gpu = True - self.check_output_with_option( - use_gpu, atol=1, flatten=False, rtol=1e-1) + self.check_output_with_option(use_gpu, + atol=1, + flatten=False, + rtol=1e-1) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) class TensorRTMatMulQuantDequantDims4TransposeXDynamicTest( TensorRTMatMulQuantDequantDims3DynamicTest): + def set_params(self): self.transpose_x = True self.transpose_y = False @@ -267,6 +282,7 @@ class TensorRTMatMulQuantDequantDims4TransposeXDynamicTest( class TensorRTMatMulQuantDequantDims4TransposeYDynamicTest( TensorRTMatMulQuantDequantDims3DynamicTest): + def set_params(self): self.transpose_x = False self.transpose_y = True @@ -275,6 +291,7 @@ class TensorRTMatMulQuantDequantDims4TransposeYDynamicTest( class TensorRTMatMulQuantDequantDims4TransposeXYDynamicTest( TensorRTMatMulQuantDequantDims3DynamicTest): + def set_params(self): self.transpose_x = True self.transpose_y = True diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms3_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms3_op.py index 8540555497d..1911155ca70 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms3_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms3_op.py @@ -131,8 +131,8 @@ def multiclass_nms(bboxes, score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold', nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta, 'normalized', normalized) - output, index, nms_rois_num = core.ops.multiclass_nms3(bboxes, scores, - rois_num, *attrs) + output, index, nms_rois_num = core.ops.multiclass_nms3( + bboxes, scores, rois_num, *attrs) if not return_index: index = None return output, nms_rois_num, index @@ -153,19 +153,18 @@ def multiclass_nms(bboxes, dtype='int32') outputs['NmsRoisNum'] = nms_rois_num - helper.append_op( - type="multiclass_nms3", - inputs=inputs, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized - }, - outputs=outputs) + helper.append_op(type="multiclass_nms3", + inputs=inputs, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs=outputs) output.stop_gradient = True index.stop_gradient = True if not return_index: @@ -177,6 +176,7 @@ def multiclass_nms(bboxes, class TensorRTMultiClassNMS3Test(InferencePassTest): + def setUp(self): self.enable_trt = True self.enable_tensorrt_varseqlen = True @@ -197,12 +197,12 @@ class TensorRTMultiClassNMS3Test(InferencePassTest): def build(self): with fluid.program_guard(self.main_program, self.startup_program): - boxes = fluid.data( - name='bboxes', shape=[-1, self.num_boxes, 4], dtype='float32') - scores = fluid.data( - name='scores', - shape=[-1, self.num_classes, self.num_boxes], - dtype='float32') + boxes = fluid.data(name='bboxes', + shape=[-1, self.num_boxes, 4], + dtype='float32') + scores = fluid.data(name='scores', + shape=[-1, self.num_classes, self.num_boxes], + dtype='float32') multiclass_nms_out, _, _ = multiclass_nms( bboxes=boxes, scores=scores, @@ -244,10 +244,12 @@ class TensorRTMultiClassNMS3Test(InferencePassTest): } opt_shape = max_shape dynamic_shape_opt = [ - None, InferencePassTest.DynamicShapeParam({ - 'bboxes': [1, 1, 4], - 'scores': [1, 1, 1] - }, max_shape, opt_shape, False) + None, + InferencePassTest.DynamicShapeParam( + { + 'bboxes': [1, 1, 4], + 'scores': [1, 1, 1] + }, max_shape, opt_shape, False) ] for precision, serialize, dynamic_shape in itertools.product( precision_opt, serialize_opt, dynamic_shape_opt): @@ -281,10 +283,11 @@ class TensorRTMultiClassNMS3Test(InferencePassTest): 'scores': [self.bs, self.num_classes, self.num_boxes], } opt_shape = max_shape - self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({ - 'bboxes': [1, 1, 4], - 'scores': [1, 1, 1] - }, max_shape, opt_shape, False) + self.dynamic_shape_params = InferencePassTest.DynamicShapeParam( + { + 'bboxes': [1, 1, 4], + 'scores': [1, 1, 1] + }, max_shape, opt_shape, False) self.run_test() def test_background(self): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms_op.py index 5c9ad5de5a7..5e04241f149 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms_op.py @@ -23,6 +23,7 @@ from paddle.fluid.core import AnalysisConfig class TensorRTMultiClassNMSTest(InferencePassTest): + def setUp(self): self.enable_trt = True self.enable_tensorrt_varseqlen = True @@ -42,12 +43,12 @@ class TensorRTMultiClassNMSTest(InferencePassTest): def build(self): with fluid.program_guard(self.main_program, self.startup_program): - boxes = fluid.data( - name='bboxes', shape=[-1, self.num_boxes, 4], dtype='float32') - scores = fluid.data( - name='scores', - shape=[-1, self.num_classes, self.num_boxes], - dtype='float32') + boxes = fluid.data(name='bboxes', + shape=[-1, self.num_boxes, 4], + dtype='float32') + scores = fluid.data(name='scores', + shape=[-1, self.num_classes, self.num_boxes], + dtype='float32') multiclass_nms_out = fluid.layers.multiclass_nms( bboxes=boxes, scores=scores, @@ -88,10 +89,12 @@ class TensorRTMultiClassNMSTest(InferencePassTest): } opt_shape = max_shape dynamic_shape_opt = [ - None, InferencePassTest.DynamicShapeParam({ - 'bboxes': [1, 1, 4], - 'scores': [1, 1, 1] - }, max_shape, opt_shape, False) + None, + InferencePassTest.DynamicShapeParam( + { + 'bboxes': [1, 1, 4], + 'scores': [1, 1, 1] + }, max_shape, opt_shape, False) ] for precision, serialize, dynamic_shape in itertools.product( precision_opt, serialize_opt, dynamic_shape_opt): @@ -125,10 +128,11 @@ class TensorRTMultiClassNMSTest(InferencePassTest): 'scores': [self.bs, self.num_classes, self.num_boxes], } opt_shape = max_shape - self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({ - 'bboxes': [1, 1, 4], - 'scores': [1, 1, 1] - }, max_shape, opt_shape, False) + self.dynamic_shape_params = InferencePassTest.DynamicShapeParam( + { + 'bboxes': [1, 1, 4], + 'scores': [1, 1, 1] + }, max_shape, opt_shape, False) self.run_test() def test_background(self): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py index 04631534ada..7aba95a0399 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig class TRTNearestInterpTest(InferencePassTest): + def setUp(self): self.set_params() @@ -53,7 +54,9 @@ class TRTNearestInterpTest(InferencePassTest): self.channels ] - self.feeds = {'data': np.random.random(shape).astype('float32'), } + self.feeds = { + 'data': np.random.random(shape).astype('float32'), + } self.enable_trt = True self.trt_parameters = TRTNearestInterpTest.TensorRTParam( 1 << 30, self.bs, 1, AnalysisConfig.Precision.Float32, False, False) @@ -71,16 +74,14 @@ class TRTNearestInterpTest(InferencePassTest): def append_nearest_interp(self, data): if self.scale > 0.: - return fluid.layers.resize_nearest( - data, - scale=self.scale, - align_corners=self.align_corners, - data_format=self.data_layout) - return fluid.layers.resize_nearest( - data, - out_shape=self.resize_shape, - align_corners=self.align_corners, - data_format=self.data_layout) + return fluid.layers.resize_nearest(data, + scale=self.scale, + align_corners=self.align_corners, + data_format=self.data_layout) + return fluid.layers.resize_nearest(data, + out_shape=self.resize_shape, + align_corners=self.align_corners, + data_format=self.data_layout) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -91,6 +92,7 @@ class TRTNearestInterpTest(InferencePassTest): class TRTNearestInterpTest1(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -102,6 +104,7 @@ class TRTNearestInterpTest1(TRTNearestInterpTest): class TRTNearestInterpTest2(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = 2. @@ -113,6 +116,7 @@ class TRTNearestInterpTest2(TRTNearestInterpTest): class TRTNearestInterpTest3(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = 0 @@ -124,6 +128,7 @@ class TRTNearestInterpTest3(TRTNearestInterpTest): class TRTNearestInterpTest4(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -135,6 +140,7 @@ class TRTNearestInterpTest4(TRTNearestInterpTest): class TRTNearestInterpTest5(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -146,6 +152,7 @@ class TRTNearestInterpTest5(TRTNearestInterpTest): class TRTNearestInterpTest6(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = 2. @@ -157,6 +164,7 @@ class TRTNearestInterpTest6(TRTNearestInterpTest): class TRTNearestInterpTest7(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -168,6 +176,7 @@ class TRTNearestInterpTest7(TRTNearestInterpTest): class TRTNearestInterpTest8(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -179,6 +188,7 @@ class TRTNearestInterpTest8(TRTNearestInterpTest): class TRTNearestInterpTest9(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_v2_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_v2_op.py index 73c1c5d3618..1496b96ce21 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_v2_op.py @@ -25,6 +25,7 @@ from paddle.fluid.core import AnalysisConfig class TRTNearestInterpTest(InferencePassTest): + def setUp(self): self.set_params() @@ -54,7 +55,9 @@ class TRTNearestInterpTest(InferencePassTest): self.channels ] - self.feeds = {'data': np.random.random(shape).astype('float32'), } + self.feeds = { + 'data': np.random.random(shape).astype('float32'), + } self.enable_trt = True self.trt_parameters = TRTNearestInterpTest.TensorRTParam( 1 << 30, self.bs, 1, AnalysisConfig.Precision.Float32, False, False) @@ -71,18 +74,16 @@ class TRTNearestInterpTest(InferencePassTest): def append_nearest_interp(self, data): if self.scale > 0.: - return F.interpolate( - data, - scale_factor=self.scale, - align_corners=self.align_corners, - mode='nearest', - data_format=self.data_layout) - return F.interpolate( - data, - size=self.resize_shape, - align_corners=self.align_corners, - mode='nearest', - data_format=self.data_layout) + return F.interpolate(data, + scale_factor=self.scale, + align_corners=self.align_corners, + mode='nearest', + data_format=self.data_layout) + return F.interpolate(data, + size=self.resize_shape, + align_corners=self.align_corners, + mode='nearest', + data_format=self.data_layout) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -93,6 +94,7 @@ class TRTNearestInterpTest(InferencePassTest): class TRTNearestInterpTest1(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = 2. @@ -104,6 +106,7 @@ class TRTNearestInterpTest1(TRTNearestInterpTest): class TRTNearestInterpTest2(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -115,6 +118,7 @@ class TRTNearestInterpTest2(TRTNearestInterpTest): class TRTNearestInterpTest3(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 @@ -126,6 +130,7 @@ class TRTNearestInterpTest3(TRTNearestInterpTest): class TRTNearestInterpTest4(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = 2. @@ -137,6 +142,7 @@ class TRTNearestInterpTest4(TRTNearestInterpTest): class TRTNearestInterpTest5(TRTNearestInterpTest): + def set_params(self): self.bs = 4 self.scale = -1 diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py index 060f6c6c5f0..0a61b83b8ce 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py @@ -23,10 +23,12 @@ from paddle.fluid.core import AnalysisConfig class PadOpTRTTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[1, 3, 128, 128], dtype="float32") + data = fluid.data(name="data", + shape=[1, 3, 128, 128], + dtype="float32") pad_out = fluid.layers.pad(x=data, paddings=[0, 0, 0, 0, 0, 1, 1, 2], pad_value=0.0) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py index 6fbddcf5a1f..22f278d6d5d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py @@ -26,6 +26,7 @@ from paddle.fluid.core import AnalysisConfig class TensorRTPool3dTest(InferencePassTest): + def setUp(self): self.bs = 1 self.channel = 3 @@ -43,7 +44,8 @@ class TensorRTPool3dTest(InferencePassTest): self.serialize = False self.precision = AnalysisConfig.Precision.Float32 self.feeds = { - 'data': np.random.random( + 'data': + np.random.random( [self.bs, self.channel, self.depth, self.height, self.width]).astype('float32'), } @@ -61,15 +63,14 @@ class TensorRTPool3dTest(InferencePassTest): name='data', shape=[-1, self.channel, self.depth, self.height, self.width], dtype='float32') - pool_out = fluid.layers.pool3d( - input=data, - pool_size=self.pool_size, - pool_type=self.pool_type, - pool_stride=self.pool_stride, - pool_padding=self.pool_padding, - global_pooling=self.global_pooling, - ceil_mode=self.ceil_mode, - exclusive=self.exclusive) + pool_out = fluid.layers.pool3d(input=data, + pool_size=self.pool_size, + pool_type=self.pool_type, + pool_stride=self.pool_stride, + pool_padding=self.pool_padding, + global_pooling=self.global_pooling, + ceil_mode=self.ceil_mode, + exclusive=self.exclusive) #out = fluid.layers.batch_norm(pool_out, is_test=True) self.fetch_list = [pool_out] @@ -91,25 +92,27 @@ class TensorRTPool3dTest(InferencePassTest): AnalysisConfig.Precision.Float32, AnalysisConfig.Precision.Half ] serialize_options = [False, True] - dynamic_shape_profile = InferencePassTest.DynamicShapeParam({ - 'data': [ - self.bs, self.channel, self.depth // 2, self.height // 2, - self.width // 2 - ] - }, { - 'data': - [self.bs, self.channel, self.depth, self.height, self.width] - }, { - 'data': - [self.bs, self.channel, self.depth, self.height, self.width] - }, False) + dynamic_shape_profile = InferencePassTest.DynamicShapeParam( + { + 'data': [ + self.bs, self.channel, self.depth // 2, self.height // 2, + self.width // 2 + ] + }, { + 'data': + [self.bs, self.channel, self.depth, self.height, self.width] + }, { + 'data': + [self.bs, self.channel, self.depth, self.height, self.width] + }, False) dynamic_shape_options = [None, dynamic_shape_profile] for precision, serialize, dynamic_shape in itertools.product( precision_options, serialize_options, dynamic_shape_options): is_dynamic = True if dynamic_shape_options is not None else False - with self.subTest('Precision: {}, Serialize: {}, Dynamic: {}'. - format(precision, serialize, is_dynamic)): + with self.subTest( + 'Precision: {}, Serialize: {}, Dynamic: {}'.format( + precision, serialize, is_dynamic)): self.precision = precision self.serialize = serialize self.dynamic_shape_params = dynamic_shape @@ -117,6 +120,7 @@ class TensorRTPool3dTest(InferencePassTest): class TensorRTAvgPool3dTest(TensorRTPool3dTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'avg' @@ -128,6 +132,7 @@ class TensorRTAvgPool3dTest(TensorRTPool3dTest): class TensorRTGlobalPool3dTest(TensorRTPool3dTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -139,6 +144,7 @@ class TensorRTGlobalPool3dTest(TensorRTPool3dTest): class TensorRTCeilPool3dTest(TensorRTPool3dTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -150,6 +156,7 @@ class TensorRTCeilPool3dTest(TensorRTPool3dTest): class TensorRTExclusivePool3dTest(TensorRTPool3dTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -161,6 +168,7 @@ class TensorRTExclusivePool3dTest(TensorRTPool3dTest): class TensorRTSamePaddingPool3dTest(InferencePassTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -172,6 +180,7 @@ class TensorRTSamePaddingPool3dTest(InferencePassTest): class TensorRTValidPaddingPool3dTest(InferencePassTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -183,6 +192,7 @@ class TensorRTValidPaddingPool3dTest(InferencePassTest): class TensorRTAdaptiveAvgPool3DTest(InferencePassTest): + def setUp(self): self.bs = 1 self.channel = 3 @@ -193,7 +203,8 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest): self.serialize = False self.precision = AnalysisConfig.Precision.Float32 self.feeds = { - 'data': np.random.random( + 'data': + np.random.random( [self.bs, self.channel, self.depth, self.height, self.width]).astype('float32'), } @@ -230,25 +241,27 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest): AnalysisConfig.Precision.Float32, AnalysisConfig.Precision.Half ] serialize_options = [False, True] - dynamic_shape_profile = InferencePassTest.DynamicShapeParam({ - 'data': [ - self.bs, self.channel, self.depth // 2, self.height // 2, - self.width // 2 - ] - }, { - 'data': - [self.bs, self.channel, self.depth, self.height, self.width] - }, { - 'data': - [self.bs, self.channel, self.depth, self.height, self.width] - }, False) + dynamic_shape_profile = InferencePassTest.DynamicShapeParam( + { + 'data': [ + self.bs, self.channel, self.depth // 2, self.height // 2, + self.width // 2 + ] + }, { + 'data': + [self.bs, self.channel, self.depth, self.height, self.width] + }, { + 'data': + [self.bs, self.channel, self.depth, self.height, self.width] + }, False) dynamic_shape_options = [None, dynamic_shape_profile] for precision, serialize, dynamic_shape in itertools.product( precision_options, serialize_options, dynamic_shape_options): is_dynamic = True if dynamic_shape_options is not None else False - with self.subTest('Precision: {}, Serialize: {}, Dynamic: {}'. - format(precision, serialize, is_dynamic)): + with self.subTest( + 'Precision: {}, Serialize: {}, Dynamic: {}'.format( + precision, serialize, is_dynamic)): self.precision = precision self.serialize = serialize self.dynamic_shape_params = dynamic_shape @@ -256,6 +269,7 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest): class TensorRTAdaptiveMaxPool3DTest(InferencePassTest): + def setUp(self): self.bs = 1 self.channel = 3 @@ -266,7 +280,8 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest): self.serialize = False self.precision = AnalysisConfig.Precision.Float32 self.feeds = { - 'data': np.random.random( + 'data': + np.random.random( [self.bs, self.channel, self.depth, self.height, self.width]).astype('float32'), } @@ -303,25 +318,27 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest): AnalysisConfig.Precision.Float32, AnalysisConfig.Precision.Half ] serialize_options = [False, True] - dynamic_shape_profile = InferencePassTest.DynamicShapeParam({ - 'data': [ - self.bs, self.channel, self.depth // 2, self.height // 2, - self.width // 2 - ] - }, { - 'data': - [self.bs, self.channel, self.depth, self.height, self.width] - }, { - 'data': - [self.bs, self.channel, self.depth, self.height, self.width] - }, False) + dynamic_shape_profile = InferencePassTest.DynamicShapeParam( + { + 'data': [ + self.bs, self.channel, self.depth // 2, self.height // 2, + self.width // 2 + ] + }, { + 'data': + [self.bs, self.channel, self.depth, self.height, self.width] + }, { + 'data': + [self.bs, self.channel, self.depth, self.height, self.width] + }, False) dynamic_shape_options = [None, dynamic_shape_profile] for precision, serialize, dynamic_shape in itertools.product( precision_options, serialize_options, dynamic_shape_options): is_dynamic = True if dynamic_shape_options is not None else False - with self.subTest('Precision: {}, Serialize: {}, Dynamic: {}'. - format(precision, serialize, is_dynamic)): + with self.subTest( + 'Precision: {}, Serialize: {}, Dynamic: {}'.format( + precision, serialize, is_dynamic)): self.precision = precision self.serialize = serialize self.dynamic_shape_params = dynamic_shape diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py index d71937f986e..3812642d2a5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py @@ -25,6 +25,7 @@ from paddle.fluid.core import AnalysisConfig class TensorRTPoolTest(InferencePassTest): + def setUp(self): self.bs = 1 self.channel = 2 @@ -55,19 +56,17 @@ class TensorRTPoolTest(InferencePassTest): 1 << 30, self.bs, 0, self.precision, self.serialize, False) with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name='data', - shape=[-1, self.channel, self.height, self.width], - dtype='float32') - pool_out = fluid.layers.pool2d( - input=data, - pool_size=self.pool_size, - pool_type=self.pool_type, - pool_stride=self.pool_stride, - pool_padding=self.pool_padding, - global_pooling=self.global_pooling, - ceil_mode=self.ceil_mode, - exclusive=self.exclusive) + data = fluid.data(name='data', + shape=[-1, self.channel, self.height, self.width], + dtype='float32') + pool_out = fluid.layers.pool2d(input=data, + pool_size=self.pool_size, + pool_type=self.pool_type, + pool_stride=self.pool_stride, + pool_padding=self.pool_padding, + global_pooling=self.global_pooling, + ceil_mode=self.ceil_mode, + exclusive=self.exclusive) out = fluid.layers.batch_norm(pool_out, is_test=True) self.fetch_list = [out] @@ -100,8 +99,9 @@ class TensorRTPoolTest(InferencePassTest): for precision, serialize, dynamic_shape in itertools.product( precision_options, serialize_options, dynamic_shape_options): is_dynamic = True if dynamic_shape_options is not None else False - with self.subTest('Precision: {}, Serialize: {}, Dynamic: {}'. - format(precision, serialize, is_dynamic)): + with self.subTest( + 'Precision: {}, Serialize: {}, Dynamic: {}'.format( + precision, serialize, is_dynamic)): self.precision = precision self.serialize = serialize self.dynamic_shape = dynamic_shape @@ -109,6 +109,7 @@ class TensorRTPoolTest(InferencePassTest): class TensorRTAvgPoolTest(TensorRTPoolTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'avg' @@ -120,6 +121,7 @@ class TensorRTAvgPoolTest(TensorRTPoolTest): class TensorRTAvgCeilPoolTest(TensorRTPoolTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'avg' @@ -131,6 +133,7 @@ class TensorRTAvgCeilPoolTest(TensorRTPoolTest): class TensorRTGlobalPoolTest(TensorRTPoolTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -142,6 +145,7 @@ class TensorRTGlobalPoolTest(TensorRTPoolTest): class TensorRTCeilPoolTest(TensorRTPoolTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -153,6 +157,7 @@ class TensorRTCeilPoolTest(TensorRTPoolTest): class TensorRTExclusivePoolTest(TensorRTPoolTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -164,6 +169,7 @@ class TensorRTExclusivePoolTest(TensorRTPoolTest): class TensorRTSamePaddingPoolTest(InferencePassTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' @@ -175,6 +181,7 @@ class TensorRTSamePaddingPoolTest(InferencePassTest): class TensorRTValidPaddingPoolTest(InferencePassTest): + def set_extra_config(self): self.pool_size = 2 self.pool_type = 'max' diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py index 7ccbe673fd6..1086e1428e0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py @@ -24,12 +24,15 @@ from paddle.fluid.core import AnalysisConfig class TRTReduceMeanTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, -1, -1], dtype="float32") - reduce_mean = fluid.layers.reduce_mean( - data, dim=[2, -1], keep_dim=True) + data = fluid.data(name="data", + shape=[-1, 3, -1, -1], + dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, + dim=[2, -1], + keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) self.feeds = { @@ -39,9 +42,9 @@ class TRTReduceMeanTest(InferencePassTest): self.trt_parameters = TRTReduceMeanTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTReduceMeanTest.DynamicShapeParam({ - 'data': [1, 3, 16, 16] - }, {'data': [3, 3, 56, 56]}, {'data': [3, 3, 56, 56]}, False) + self.dynamic_shape_params = TRTReduceMeanTest.DynamicShapeParam( + {'data': [1, 3, 16, 16]}, {'data': [3, 3, 56, 56]}, + {'data': [3, 3, 56, 56]}, False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -52,10 +55,12 @@ class TRTReduceMeanTest(InferencePassTest): class TRTReduceMeanAllNoBatchTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, -1, -1], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, -1, -1], + dtype="float32") reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) @@ -67,9 +72,8 @@ class TRTReduceMeanAllNoBatchTest(InferencePassTest): 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] self.dynamic_shape_params = TRTReduceMeanAllNoBatchTest.DynamicShapeParam( - { - 'data': [1, 3, 16, 16] - }, {'data': [3, 3, 56, 56]}, {'data': [3, 3, 56, 56]}, False) + {'data': [1, 3, 16, 16]}, {'data': [3, 3, 56, 56]}, + {'data': [3, 3, 56, 56]}, False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -80,12 +84,15 @@ class TRTReduceMeanAllNoBatchTest(InferencePassTest): class TRTReduceMeanTestFP16(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, -1, -1], dtype="float32") - reduce_mean = fluid.layers.reduce_mean( - data, dim=[2, -1], keep_dim=True) + data = fluid.data(name="data", + shape=[-1, 3, -1, -1], + dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, + dim=[2, -1], + keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) self.feeds = { @@ -95,9 +102,9 @@ class TRTReduceMeanTestFP16(InferencePassTest): self.trt_parameters = TRTReduceMeanTestFP16.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTReduceMeanTestFP16.DynamicShapeParam({ - 'data': [1, 3, 16, 16] - }, {'data': [3, 3, 56, 56]}, {'data': [3, 3, 56, 56]}, False) + self.dynamic_shape_params = TRTReduceMeanTestFP16.DynamicShapeParam( + {'data': [1, 3, 16, 16]}, {'data': [3, 3, 56, 56]}, + {'data': [3, 3, 56, 56]}, False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -108,10 +115,12 @@ class TRTReduceMeanTestFP16(InferencePassTest): class TRTReduceMeanAllTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 56, 56], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 56, 56], + dtype="float32") reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) @@ -122,9 +131,9 @@ class TRTReduceMeanAllTest(InferencePassTest): self.trt_parameters = TRTReduceMeanAllTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTReduceMeanAllTest.DynamicShapeParam({ - 'data': [1, 3, 56, 56] - }, {'data': [3, 3, 56, 56]}, {'data': [3, 3, 56, 56]}, False) + self.dynamic_shape_params = TRTReduceMeanAllTest.DynamicShapeParam( + {'data': [1, 3, 56, 56]}, {'data': [3, 3, 56, 56]}, + {'data': [3, 3, 56, 56]}, False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -135,12 +144,15 @@ class TRTReduceMeanAllTest(InferencePassTest): class TRTReduceMeanTestStatic(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[3, 3, 56, 56], dtype="float32") - reduce_mean = fluid.layers.reduce_mean( - data, dim=[2, -1], keep_dim=True) + data = fluid.data(name="data", + shape=[3, 3, 56, 56], + dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, + dim=[2, -1], + keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) self.feeds = { @@ -160,10 +172,12 @@ class TRTReduceMeanTestStatic(InferencePassTest): class TRTReduceMeanStaticAllTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[4, 3, 56, 56], dtype="float32") + data = fluid.data(name="data", + shape=[4, 3, 56, 56], + dtype="float32") reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) @@ -184,10 +198,12 @@ class TRTReduceMeanStaticAllTest(InferencePassTest): class TRTReduceMeanStaticFP16(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[4, 3, 56, 56], dtype="float32") + data = fluid.data(name="data", + shape=[4, 3, 56, 56], + dtype="float32") reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) @@ -208,10 +224,12 @@ class TRTReduceMeanStaticFP16(InferencePassTest): class TRTReduceMeanFP16Static(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[4, 3, 56, 56], dtype="float32") + data = fluid.data(name="data", + shape=[4, 3, 56, 56], + dtype="float32") reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) out = fluid.layers.batch_norm(reduce_mean, is_test=True) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_sum_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_sum_op.py index fbe944cd7f3..2e413bde5f7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_sum_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_sum_op.py @@ -24,12 +24,15 @@ from paddle.fluid.core import AnalysisConfig class TRTReduceSumTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 10, 192], dtype="float32") - reduce_sum = fluid.layers.reduce_sum( - data, dim=[2, -1], keep_dim=True) + data = fluid.data(name="data", + shape=[-1, 3, 10, 192], + dtype="float32") + reduce_sum = fluid.layers.reduce_sum(data, + dim=[2, -1], + keep_dim=True) out = fluid.layers.batch_norm(reduce_sum, is_test=True) self.feeds = { @@ -39,9 +42,9 @@ class TRTReduceSumTest(InferencePassTest): self.trt_parameters = TRTReduceSumTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTReduceSumTest.DynamicShapeParam({ - 'data': [1, 3, 8, 8] - }, {'data': [3, 3, 10, 192]}, {'data': [3, 3, 10, 192]}, False) + self.dynamic_shape_params = TRTReduceSumTest.DynamicShapeParam( + {'data': [1, 3, 8, 8]}, {'data': [3, 3, 10, 192]}, + {'data': [3, 3, 10, 192]}, False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -52,10 +55,12 @@ class TRTReduceSumTest(InferencePassTest): class TRTReduceSumAllTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 10, 192], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 10, 192], + dtype="float32") reduce_sum = fluid.layers.reduce_sum(data, keep_dim=True) out = fluid.layers.batch_norm(reduce_sum, is_test=True) @@ -66,9 +71,9 @@ class TRTReduceSumAllTest(InferencePassTest): self.trt_parameters = TRTReduceSumAllTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] - self.dynamic_shape_params = TRTReduceSumAllTest.DynamicShapeParam({ - 'data': [1, 3, 8, 8] - }, {'data': [3, 3, 10, 192]}, {'data': [3, 3, 10, 192]}, False) + self.dynamic_shape_params = TRTReduceSumAllTest.DynamicShapeParam( + {'data': [1, 3, 8, 8]}, {'data': [3, 3, 10, 192]}, + {'data': [3, 3, 10, 192]}, False) def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape2_matmul_fuse_pass.py index ecfc5c9dac0..d2dca92345a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape2_matmul_fuse_pass.py @@ -64,14 +64,15 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): self.add_ignore_check_case( teller1, IgnoreReasons.PASS_ACCURACY_ERROR, - "The pass error on TRT while shape of bias is not [out_size].", ) + "The pass error on TRT while shape of bias is not [out_size].", + ) def sample_program_config(self, draw): # 1. Generate shape and attr of reshape2 reshape = draw( - st.lists( - st.integers( - min_value=1, max_value=10), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=10), + min_size=2, + max_size=2)) x_shape = reshape + [1, 1] # 2. Generate attr:transpose_X/transpose_Y/alpha of matmul @@ -81,16 +82,18 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of matmul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = x_shape[1] # 4. Generate legal attr:axis of elementwise_add axis = draw(st.integers(min_value=-1, max_value=1)) if axis == 0: axis = -1 - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] # if axis == -1: # if draw(st.booleans()): # bias_shape = [y_shape[1], ] @@ -99,14 +102,21 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): reshape2_op = OpConfig( "reshape2", - inputs={"X": ["reshape2_x"], }, + inputs={ + "X": ["reshape2_x"], + }, shape=reshape, - outputs={"Out": ["reshape2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["reshape2_out"], + "XShape": ["xshape"] + }, + ) matmul_op = OpConfig( "matmul", - inputs={"X": ["reshape2_out"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["reshape2_out"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -116,14 +126,18 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["matmul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["matmul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [reshape2_op, matmul_op, add_op] @@ -133,16 +147,18 @@ class TestReshape2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"reshape2_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "reshape2_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=50, - passes=["trt_reshape2_matmul_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=50, + passes=["trt_reshape2_matmul_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape_op.py index 0522df3a921..8fcf993e271 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape_op.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig class TRTReshapeTest(InferencePassTest): + def setUp(self): self.bs = 1 self.input_shape = [16, 3, 8] @@ -33,8 +34,9 @@ class TRTReshapeTest(InferencePassTest): self.input_shape[2] ] with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name='data', shape=self.data_shape, dtype='float32') + data = fluid.data(name='data', + shape=self.data_shape, + dtype='float32') reshape_out = self.append_reshape(data, self.reshape) out = fluid.layers.batch_norm(reshape_out, is_test=True) self.feeds = { @@ -57,6 +59,7 @@ class TRTReshapeTest(InferencePassTest): class TRTReshapeTest1(TRTReshapeTest): + def setUp(self): self.bs = 2 self.input_shape = [23, 13, 12] @@ -66,8 +69,9 @@ class TRTReshapeTest1(TRTReshapeTest): self.input_shape[2] ] with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name='data', shape=self.data_shape, dtype='float32') + data = fluid.data(name='data', + shape=self.data_shape, + dtype='float32') reshape_out = self.append_reshape(data, self.reshape) out = fluid.layers.batch_norm(reshape_out, is_test=True) self.feeds = { @@ -80,6 +84,7 @@ class TRTReshapeTest1(TRTReshapeTest): class TRTReshapeTest2(TRTReshapeTest): + def setUp(self): self.bs = 2 self.input_shape = [23, 13, 12] @@ -89,8 +94,9 @@ class TRTReshapeTest2(TRTReshapeTest): self.input_shape[2] ] with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name='data', shape=self.data_shape, dtype='float32') + data = fluid.data(name='data', + shape=self.data_shape, + dtype='float32') reshape_out = fluid.layers.reshape(x=data, shape=self.reshape) out = fluid.layers.batch_norm(reshape_out, is_test=True) self.feeds = { @@ -103,6 +109,7 @@ class TRTReshapeTest2(TRTReshapeTest): class TRTReshapeTest3(TRTReshapeTest): + def setUp(self): self.bs = 1 self.input_shape = [7, 16, 27] @@ -112,8 +119,9 @@ class TRTReshapeTest3(TRTReshapeTest): self.input_shape[2] ] with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name='data', shape=self.data_shape, dtype='float32') + data = fluid.data(name='data', + shape=self.data_shape, + dtype='float32') bn_out = fluid.layers.batch_norm(data, is_test=True) out = self.append_reshape(bn_out, self.reshape) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py index 37f17661dbc..f644a0954e4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig class TRTRoiAlignTest(InferencePassTest): + def setUp(self): self.bs = 2 self.num_rois = 4 @@ -41,8 +42,10 @@ class TRTRoiAlignTest(InferencePassTest): with fluid.program_guard(self.main_program, self.startup_program): data_shape = [-1, self.channel, self.height, self.width] data = fluid.data(name='data', shape=data_shape, dtype='float32') - rois = fluid.data( - name='rois', shape=[-1, 4], dtype='float32', lod_level=1) + rois = fluid.data(name='rois', + shape=[-1, 4], + dtype='float32', + lod_level=1) roi_align_out = fluid.layers.roi_align(data, rois) out = fluid.layers.batch_norm(roi_align_out, is_test=True) @@ -75,11 +78,13 @@ class TRTRoiAlignTest(InferencePassTest): self.bs, self.channel, self.height // 2, self.width // 2 ] min_shape_spec['rois'] = [1, 4] - max_shape_spec[ - 'data'] = [self.bs, self.channel, self.height * 2, self.width * 2] + max_shape_spec['data'] = [ + self.bs, self.channel, self.height * 2, self.width * 2 + ] max_shape_spec['rois'] = [self.bs * self.num_rois, 4] - opt_shape_spec[ - 'data'] = [self.bs, self.channel, self.height, self.width] + opt_shape_spec['data'] = [ + self.bs, self.channel, self.height, self.width + ] opt_shape_spec['rois'] = [self.bs * self.num_rois, 4] self.dynamic_shape_params = InferencePassTest.DynamicShapeParam( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py index 4530e04d8de..752fe3ac146 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py @@ -24,21 +24,26 @@ from paddle.fluid.core import AnalysisConfig class TRTScaleTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[-1, 512], dtype="float32") scale_out = self.append_scale(data) out = fluid.layers.batch_norm(scale_out, is_test=True) - self.feeds = {"data": np.random.random([1, 512]).astype("float32"), } + self.feeds = { + "data": np.random.random([1, 512]).astype("float32"), + } self.enable_trt = True self.trt_parameters = TRTScaleTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.fetch_list = [out] def append_scale(self, data): - return fluid.layers.scale( - x=data, scale=2.0, bias=-1.0, bias_after_scale=False) + return fluid.layers.scale(x=data, + scale=2.0, + bias=-1.0, + bias_after_scale=False) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -49,10 +54,12 @@ class TRTScaleTest(InferencePassTest): class TRTScaleShape2Test(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 512, 512], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 512, 512], + dtype="float32") scale_out = self.append_scale(data) out = fluid.layers.batch_norm(scale_out, is_test=True) @@ -65,8 +72,10 @@ class TRTScaleShape2Test(InferencePassTest): self.fetch_list = [out] def append_scale(self, data): - return fluid.layers.scale( - x=data, scale=2.0, bias=-1.0, bias_after_scale=False) + return fluid.layers.scale(x=data, + scale=2.0, + bias=-1.0, + bias_after_scale=False) def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py index e9c304496af..ced6c706592 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py @@ -22,10 +22,12 @@ from paddle.fluid.core import AnalysisConfig class ShuffleChannelFuseTRTPassTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") reshape1 = fluid.layers.reshape(x=data, shape=[-1, 2, 3, 64, 64]) trans = fluid.layers.transpose(x=reshape1, perm=[0, 2, 1, 3, 4]) reshape2 = fluid.layers.reshape(x=trans, shape=[-1, 6, 64, 64]) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py index 7b4b84724e8..531b4e3df45 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig #normal starts && ends class SlicePluginTRTDynamicTest(InferencePassTest): + def setUpSliceParams(self): self.params_axes = [1, 3] self.params_starts = [0, 1] @@ -34,9 +35,8 @@ class SlicePluginTRTDynamicTest(InferencePassTest): 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) self.enable_trt = True self.dynamic_shape_params = SlicePluginTRTDynamicTest.DynamicShapeParam( - { - 'data': [1, 1, 1, 1] - }, {'data': [8, 8, 8, 8]}, {'data': [8, 8, 8, 8]}, False) + {'data': [1, 1, 1, 1]}, {'data': [8, 8, 8, 8]}, + {'data': [8, 8, 8, 8]}, False) def setUp(self): self.setUpSliceParams() @@ -46,8 +46,10 @@ class SlicePluginTRTDynamicTest(InferencePassTest): axes = self.params_axes starts = self.params_starts ends = self.params_ends - slice_out = fluid.layers.slice( - data, axes=axes, starts=starts, ends=ends) + slice_out = fluid.layers.slice(data, + axes=axes, + starts=starts, + ends=ends) self.feeds = { "data": np.random.random((3, 3, 3, 3)).astype("float32"), @@ -66,6 +68,7 @@ class SlicePluginTRTDynamicTest(InferencePassTest): class SlicePluginTRTDynamicBoundTest(SlicePluginTRTDynamicTest): + def setUpSliceParams(self): self.params_axes = [1, 3] self.params_starts = [0, 1] @@ -76,12 +79,12 @@ class SlicePluginTRTDynamicBoundTest(SlicePluginTRTDynamicTest): 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) self.enable_trt = True self.dynamic_shape_params = SlicePluginTRTDynamicBoundTest.DynamicShapeParam( - { - 'data': [1, 1, 1, 1] - }, {'data': [8, 8, 8, 8]}, {'data': [8, 8, 8, 8]}, False) + {'data': [1, 1, 1, 1]}, {'data': [8, 8, 8, 8]}, + {'data': [8, 8, 8, 8]}, False) class SlicePluginTRTDynamicNegativeBoundTest(SlicePluginTRTDynamicTest): + def setUpSliceParams(self): self.params_axes = [1, 3] self.params_starts = [-5, 1] @@ -92,9 +95,8 @@ class SlicePluginTRTDynamicNegativeBoundTest(SlicePluginTRTDynamicTest): 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) self.enable_trt = True self.dynamic_shape_params = SlicePluginTRTDynamicNegativeBoundTest.DynamicShapeParam( - { - 'data': [1, 1, 1, 1] - }, {'data': [8, 8, 8, 8]}, {'data': [8, 8, 8, 8]}, False) + {'data': [1, 1, 1, 1]}, {'data': [8, 8, 8, 8]}, + {'data': [8, 8, 8, 8]}, False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py index 98232838ee0..a1249c04c27 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py @@ -24,6 +24,7 @@ from paddle.fluid.core import AnalysisConfig #normal starts && ends class SlicePluginTRTTest(InferencePassTest): + def setUpSliceParams(self): self.params_axes = [1, 3] self.params_starts = [0, 1] @@ -42,8 +43,10 @@ class SlicePluginTRTTest(InferencePassTest): axes = self.params_axes starts = self.params_starts ends = self.params_ends - slice_out = fluid.layers.slice( - data, axes=axes, starts=starts, ends=ends) + slice_out = fluid.layers.slice(data, + axes=axes, + starts=starts, + ends=ends) out = fluid.layers.batch_norm(slice_out, is_test=True) self.feeds = { @@ -64,6 +67,7 @@ class SlicePluginTRTTest(InferencePassTest): #negative starts && ends class SlicePluginTRTTestNegativeStartsAndEnds(SlicePluginTRTTest): + def setUpSliceParams(self): self.params_axes = [2, 3] self.params_starts = [-3, -2] @@ -72,6 +76,7 @@ class SlicePluginTRTTestNegativeStartsAndEnds(SlicePluginTRTTest): #exceeded bound starts && ends class SlicePluginTRTTestStartsAndEndsBoundCheck(SlicePluginTRTTest): + def setUpSliceParams(self): self.params_axes = [2, 3] self.params_starts = [-5, -2] @@ -80,6 +85,7 @@ class SlicePluginTRTTestStartsAndEndsBoundCheck(SlicePluginTRTTest): #fp16 class SlicePluginTRTTestFp16(SlicePluginTRTTest): + def setUpTensorRTParams(self): self.trt_parameters = SlicePluginTRTTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) @@ -87,6 +93,7 @@ class SlicePluginTRTTestFp16(SlicePluginTRTTest): class StaticSlicePluginTRTTestFp16(SlicePluginTRTTest): + def setUpTensorRTParams(self): self.trt_parameters = SlicePluginTRTTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Half, True, False) @@ -94,6 +101,7 @@ class StaticSlicePluginTRTTestFp16(SlicePluginTRTTest): class StaticSlicePluginTRTTestFp32(SlicePluginTRTTest): + def setUpTensorRTParams(self): self.trt_parameters = SlicePluginTRTTest.TensorRTParam( 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, True, False) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_squeeze2_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_squeeze2_matmul_fuse_pass.py index d2791737a1c..a52dd0aed84 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_squeeze2_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_squeeze2_matmul_fuse_pass.py @@ -64,14 +64,15 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): self.add_ignore_check_case( teller1, IgnoreReasons.PASS_ACCURACY_ERROR, - "The pass error on TRT while shape of bias is not [out_size].", ) + "The pass error on TRT while shape of bias is not [out_size].", + ) def sample_program_config(self, draw): # 1. Generate shape of input:X of squeeze2 x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) # axes of squeeze2 == [2, 3] x_shape += [1, 1] axes = [2, 3] @@ -83,16 +84,18 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): # 3. Generate legal shape of input:Y of matmul y_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=8), min_size=2, max_size=2)) + st.lists(st.integers(min_value=1, max_value=8), + min_size=2, + max_size=2)) y_shape[0] = x_shape[1] # 4. Generate legal attr:axis of elementwise_add axis = draw(st.integers(min_value=-1, max_value=1)) if axis == 0: axis = -1 - bias_shape = [y_shape[1], ] + bias_shape = [ + y_shape[1], + ] # if axis == -1: # if draw(st.booleans()): # bias_shape = [y_shape[1], ] @@ -101,14 +104,21 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): squeeze2_op = OpConfig( "squeeze2", - inputs={"X": ["squeeze2_x"], }, + inputs={ + "X": ["squeeze2_x"], + }, axes=axes, - outputs={"Out": ["squeeze2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["squeeze2_out"], + "XShape": ["xshape"] + }, + ) matmul_op = OpConfig( "matmul", - inputs={"X": ["squeeze2_out"], - "Y": ["matmul_y"]}, + inputs={ + "X": ["squeeze2_out"], + "Y": ["matmul_y"] + }, outputs={"Out": ["matmul_out"]}, alpha=alpha, transpose_X=transpose_X, @@ -118,14 +128,18 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): fused_transpose_X=[], fused_transpose_Y=[], fused_reshape_Out=[], - fused_transpose_Out=[], ) + fused_transpose_Out=[], + ) add_op = OpConfig( "elementwise_add", - inputs={"X": ["matmul_out"], - "Y": ["bias"]}, + inputs={ + "X": ["matmul_out"], + "Y": ["bias"] + }, outputs={"Out": ["add_out"]}, - axis=axis, ) + axis=axis, + ) ops = [squeeze2_op, matmul_op, add_op] program_config = ProgramConfig( @@ -134,16 +148,18 @@ class TestSqueeze2MatmulFusePass(PassAutoScanTest): "matmul_y": TensorConfig(shape=y_shape), "bias": TensorConfig(shape=bias_shape), }, - inputs={"squeeze2_x": TensorConfig(shape=x_shape), }, - outputs=ops[-1].outputs["Out"], ) + inputs={ + "squeeze2_x": TensorConfig(shape=x_shape), + }, + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=50, - passes=["trt_squeeze2_matmul_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=50, + passes=["trt_squeeze2_matmul_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index 23a3d191401..2472ff027e3 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -24,10 +24,12 @@ from paddle.fluid.core import AnalysisConfig class TensorRTSubgraphPassFcTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") fc_out = fluid.layers.fc(input=[data], act=None, size=1000) reshape_out = fluid.layers.reshape(x=fc_out, shape=[1, 1000]) self.feeds = { @@ -48,12 +50,15 @@ class TensorRTSubgraphPassFcTest(InferencePassTest): class TensorRTSubgraphPassConcatTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data1 = fluid.data( - name="data1", shape=[-1, 3, 64, 64], dtype="float32") - data2 = fluid.data( - name="data2", shape=[-1, 3, 64, 64], dtype="float32") + data1 = fluid.data(name="data1", + shape=[-1, 3, 64, 64], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[-1, 3, 64, 64], + dtype="float32") concat_out = fluid.layers.concat([data1, data2], axis=2) out = fluid.layers.batch_norm(concat_out, is_test=True) self.feeds = { @@ -74,10 +79,12 @@ class TensorRTSubgraphPassConcatTest(InferencePassTest): class TensorRTSubgraphPassSplitTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 64, 64], + dtype="float32") split_out = fluid.layers.split(data, dim=-1, num_or_sections=2) out = fluid.layers.batch_norm(split_out[0], is_test=True) self.feeds = { @@ -97,10 +104,12 @@ class TensorRTSubgraphPassSplitTest(InferencePassTest): class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 64, 64], + dtype="float32") split_out = fluid.layers.split(data, dim=-1, num_or_sections=2) out = fluid.layers.batch_norm(split_out[0], is_test=True) self.feeds = { @@ -122,10 +131,12 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest): class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 64, 64], + dtype="float32") split_out = fluid.layers.split(data, dim=-1, num_or_sections=2) out = fluid.layers.batch_norm(split_out[0], is_test=True) self.feeds = { @@ -135,9 +146,8 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest): self.trt_parameters = TensorRTSubgraphPassSplitTest.TensorRTParam( 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False) self.dynamic_shape_params = TensorRTSubgraphPassDynamicSplitFp16SerializeTest.DynamicShapeParam( - { - 'data': [1, 3, 8, 64] - }, {'data': [1, 3, 512, 64]}, {'data': [1, 3, 256, 64]}, False) + {'data': [1, 3, 8, 64]}, {'data': [1, 3, 512, 64]}, + {'data': [1, 3, 256, 64]}, False) self.fetch_list = [out] def test_check_output(self): @@ -151,18 +161,21 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest): class TensorRTSubgraphPassInstanceNormTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 3, 64, 64], + dtype="float32") param_attr = fluid.ParamAttr( name='instance_norm_w', initializer=fluid.initializer.Constant(value=1.0)) bias_attr = fluid.ParamAttr( name='instance_norm_b', initializer=fluid.initializer.Constant(value=0.0)) - out = fluid.layers.instance_norm( - input=data, param_attr=param_attr, bias_attr=bias_attr) + out = fluid.layers.instance_norm(input=data, + param_attr=param_attr, + bias_attr=bias_attr) self.feeds = { "data": np.random.random([1, 3, 64, 64]).astype("float32"), } @@ -180,10 +193,12 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest): class TensorRTSubgraphPassTransposeTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") transpose_out = self.append_transpose(data) out = fluid.layers.batch_norm(transpose_out, is_test=True) self.feeds = { @@ -206,13 +221,15 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest): class TensorRTSubgraphPassLayerNormTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 64, 64], dtype="float32") - out = fluid.layers.layer_norm( - data, begin_norm_axis=self.begin_norm_axis) + data = fluid.data(name="data", + shape=[-1, 3, 64, 64], + dtype="float32") + out = fluid.layers.layer_norm(data, + begin_norm_axis=self.begin_norm_axis) self.feeds = { "data": np.random.random([1, 3, 64, 64]).astype("float32"), } @@ -233,13 +250,15 @@ class TensorRTSubgraphPassLayerNormTest(InferencePassTest): class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 3, 64, 64], dtype="float32") - out = fluid.layers.layer_norm( - data, begin_norm_axis=self.begin_norm_axis) + data = fluid.data(name="data", + shape=[-1, 3, 64, 64], + dtype="float32") + out = fluid.layers.layer_norm(data, + begin_norm_axis=self.begin_norm_axis) self.feeds = { "data": np.random.random([1, 3, 64, 64]).astype("float32"), } @@ -253,7 +272,11 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest): self.dynamic_shape_params = TensorRTSubgraphPassLayerNormDynamicTest.DynamicShapeParam( { 'data': [1, 3, 64, 64], - }, {'data': [8, 8, 64, 64], }, {'data': [4, 4, 64, 64], }, False) + }, { + 'data': [8, 8, 64, 64], + }, { + 'data': [4, 4, 64, 64], + }, False) def set_params(self): self.begin_norm_axis = 2 @@ -272,6 +295,7 @@ class TensorRTSubgraphPassLayerNormDynamicTest(InferencePassTest): class TensorRTSubgraphPassLayerNormDynamicFP16Test( TensorRTSubgraphPassLayerNormDynamicTest): + def set_params(self): self.begin_norm_axis = 2 self.precision = AnalysisConfig.Precision.Half @@ -289,23 +313,28 @@ class TensorRTSubgraphPassLayerNormDynamicFP16Test( class TensorRTSubgraphPassLayerNormBeginNormAxis2Test( TensorRTSubgraphPassLayerNormTest): + def set_params(self): self.begin_norm_axis = 2 class TensorRTSubgraphPassLayerNormBeginNormAxis3Test( TensorRTSubgraphPassLayerNormTest): + def set_params(self): self.begin_norm_axis = 3 class TensorRTSubgraphPassElementwiseTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data1 = fluid.data( - name="data1", shape=[-1, 3, 64, 64], dtype="float32") - data2 = fluid.data( - name="data2", shape=[-1, 3, 64, 64], dtype="float32") + data1 = fluid.data(name="data1", + shape=[-1, 3, 64, 64], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[-1, 3, 64, 64], + dtype="float32") eltwise_out = self.append_eltwise(data1, data2) out = fluid.layers.batch_norm(eltwise_out, is_test=True) self.feeds = { @@ -328,14 +357,16 @@ class TensorRTSubgraphPassElementwiseTest(InferencePassTest): PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) -class TensorRTSubgraphPassElementwiseMulTest( - TensorRTSubgraphPassElementwiseTest): +class TensorRTSubgraphPassElementwiseMulTest(TensorRTSubgraphPassElementwiseTest + ): + def append_eltwise(self, data1, data2): return fluid.layers.elementwise_mul(x=data1, y=data2) class TensorRTSubgraphPassElementwiseSerializeTest( TensorRTSubgraphPassElementwiseTest): + def setUp(self): super(TensorRTSubgraphPassElementwiseSerializeTest, self).setUp() self.trt_parameters = TensorRTSubgraphPassElementwiseTest.TensorRTParam( @@ -349,10 +380,12 @@ class TensorRTSubgraphPassElementwiseSerializeTest( class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data1 = fluid.data( - name="data1", shape=[-1, 3, 64, 64], dtype="float32") + data1 = fluid.data(name="data1", + shape=[-1, 3, 64, 64], + dtype="float32") data2 = fluid.data(name="data2", shape=[64, 64], dtype="float32") eltwise_out = self.append_eltwise(data1, data2) out = fluid.layers.batch_norm(eltwise_out, is_test=True) @@ -367,10 +400,13 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest): { 'data1': [1, 3, 8, 64], 'data2': [8, 64] - }, {'data1': [1, 3, 512, 64], - 'data2': - [512, 64]}, {'data1': [1, 3, 256, 64], - 'data2': [256, 64]}, False) + }, { + 'data1': [1, 3, 512, 64], + 'data2': [512, 64] + }, { + 'data1': [1, 3, 256, 64], + 'data2': [256, 64] + }, False) self.fetch_list = [out] def append_eltwise(self, data1, data2): @@ -387,10 +423,12 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest): class TensorRTSubgraphPassShuffleChannelTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") sc_out = fluid.layers.shuffle_channel(data, group=3) out = fluid.layers.batch_norm(sc_out, is_test=True) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py index 8ec6bb50908..78ef0838ca2 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py @@ -25,10 +25,12 @@ from paddle.fluid.core import AnalysisConfig class TRTTileTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[4, 3, 224, 256], dtype="float32") + data = fluid.data(name="data", + shape=[4, 3, 224, 256], + dtype="float32") tile_out = paddle.tile(x=data, repeat_times=[1, 1, 1, 1]) out = fluid.layers.batch_norm(tile_out, is_test=True) @@ -49,6 +51,7 @@ class TRTTileTest(InferencePassTest): class TRTTileExpandTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32") @@ -72,6 +75,7 @@ class TRTTileExpandTest(InferencePassTest): class TRTTileExpandStaticTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32") @@ -95,6 +99,7 @@ class TRTTileExpandStaticTest(InferencePassTest): class TRTTileExpandHalfTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py index b15035c3c4d..3a15f0dcf34 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py @@ -21,19 +21,22 @@ from paddle.fluid.core import AnalysisConfig class TransposeFlattenConcatFusePassTRTTest(InferencePassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data1 = fluid.data( - name="data1", shape=[8, 32, 128], dtype="float32") - data2 = fluid.data( - name="data2", shape=[8, 32, 128], dtype="float32") + data1 = fluid.data(name="data1", + shape=[8, 32, 128], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[8, 32, 128], + dtype="float32") trans1 = fluid.layers.transpose(data1, perm=[0, 2, 1]) trans2 = fluid.layers.transpose(data2, perm=[0, 2, 1]) flatt1 = fluid.layers.flatten(trans1) flatt2 = fluid.layers.flatten(trans2) concat_out = fluid.layers.concat([flatt1, flatt2], axis=1) - # There is no parameters for above structure. - # Hence, append a batch_norm to avoid failure caused by load_combined. + # There is no parameters for above structure. + # Hence, append a batch_norm to avoid failure caused by load_combined. reshape_out = fluid.layers.reshape(concat_out, [-1, 0, 1, 1]) out = fluid.layers.batch_norm(reshape_out, is_test=True) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py index 4a5090fa498..041676e38e8 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py @@ -17,12 +17,14 @@ from __future__ import print_function import unittest import numpy as np import paddle + paddle.enable_static() import paddle.fluid as fluid from paddle.inference import Config, Predictor, create_predictor class TRTTunedDynamicShapeTest(unittest.TestCase): + def get_model(self): place = fluid.CUDAPlace(0) exe = fluid.Executor(place) @@ -30,16 +32,16 @@ class TRTTunedDynamicShapeTest(unittest.TestCase): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - groups=1, - padding=0, - bias_attr=False, - act=None) + data = fluid.data(name="data", + shape=[-1, 6, 64, 64], + dtype="float32") + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + groups=1, + padding=0, + bias_attr=False, + act=None) exe.run(startup_program) serialized_program = paddle.static.serialize_program( data, conv_out, program=main_program) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_yolo_box_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_yolo_box_op.py index b0124f055b4..670d246cd41 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_yolo_box_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_yolo_box_op.py @@ -24,19 +24,22 @@ from paddle.fluid.core import AnalysisConfig class TRTYoloBoxTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): image_shape = [self.bs, self.channel, self.height, self.width] image = fluid.data(name='image', shape=image_shape, dtype='float32') - image_size = fluid.data( - name='image_size', shape=[self.bs, 2], dtype='int32') + image_size = fluid.data(name='image_size', + shape=[self.bs, 2], + dtype='int32') boxes, scores = self.append_yolobox(image, image_size) self.feeds = { - 'image': np.random.random(image_shape).astype('float32'), - 'image_size': np.random.randint( - 32, 64, size=(self.bs, 2)).astype('int32'), + 'image': + np.random.random(image_shape).astype('float32'), + 'image_size': + np.random.randint(32, 64, size=(self.bs, 2)).astype('int32'), } self.enable_trt = True self.trt_parameters = TRTYoloBoxTest.TensorRTParam( @@ -54,13 +57,12 @@ class TRTYoloBoxTest(InferencePassTest): self.downsample_ratio = 32 def append_yolobox(self, image, image_size): - return fluid.layers.yolo_box( - x=image, - img_size=image_size, - class_num=self.class_num, - anchors=self.anchors, - conf_thresh=self.conf_thresh, - downsample_ratio=self.downsample_ratio) + return fluid.layers.yolo_box(x=image, + img_size=image_size, + class_num=self.class_num, + anchors=self.anchors, + conf_thresh=self.conf_thresh, + downsample_ratio=self.downsample_ratio) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -71,13 +73,15 @@ class TRTYoloBoxTest(InferencePassTest): class TRTYoloBoxFP16Test(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): image_shape = [self.bs, self.channel, self.height, self.width] image = fluid.data(name='image', shape=image_shape, dtype='float32') - image_size = fluid.data( - name='image_size', shape=[self.bs, 2], dtype='int32') + image_size = fluid.data(name='image_size', + shape=[self.bs, 2], + dtype='int32') boxes, scores = self.append_yolobox(image, image_size) self.feeds = { @@ -100,13 +104,12 @@ class TRTYoloBoxFP16Test(InferencePassTest): self.downsample_ratio = 32 def append_yolobox(self, image, image_size): - return fluid.layers.yolo_box( - x=image, - img_size=image_size, - class_num=self.class_num, - anchors=self.anchors, - conf_thresh=self.conf_thresh, - downsample_ratio=self.downsample_ratio) + return fluid.layers.yolo_box(x=image, + img_size=image_size, + class_num=self.class_num, + anchors=self.anchors, + conf_thresh=self.conf_thresh, + downsample_ratio=self.downsample_ratio) def test_check_output(self): if core.is_compiled_with_cuda(): @@ -117,19 +120,22 @@ class TRTYoloBoxFP16Test(InferencePassTest): class TRTYoloBoxIoUAwareTest(InferencePassTest): + def setUp(self): self.set_params() with fluid.program_guard(self.main_program, self.startup_program): image_shape = [self.bs, self.channel, self.height, self.width] image = fluid.data(name='image', shape=image_shape, dtype='float32') - image_size = fluid.data( - name='image_size', shape=[self.bs, 2], dtype='int32') + image_size = fluid.data(name='image_size', + shape=[self.bs, 2], + dtype='int32') boxes, scores = self.append_yolobox(image, image_size) self.feeds = { - 'image': np.random.random(image_shape).astype('float32'), - 'image_size': np.random.randint( - 32, 64, size=(self.bs, 2)).astype('int32'), + 'image': + np.random.random(image_shape).astype('float32'), + 'image_size': + np.random.randint(32, 64, size=(self.bs, 2)).astype('int32'), } self.enable_trt = True self.trt_parameters = TRTYoloBoxTest.TensorRTParam( @@ -149,15 +155,14 @@ class TRTYoloBoxIoUAwareTest(InferencePassTest): self.iou_aware_factor = 0.5 def append_yolobox(self, image, image_size): - return fluid.layers.yolo_box( - x=image, - img_size=image_size, - class_num=self.class_num, - anchors=self.anchors, - conf_thresh=self.conf_thresh, - downsample_ratio=self.downsample_ratio, - iou_aware=self.iou_aware, - iou_aware_factor=self.iou_aware_factor) + return fluid.layers.yolo_box(x=image, + img_size=image_size, + class_num=self.class_num, + anchors=self.anchors, + conf_thresh=self.conf_thresh, + downsample_ratio=self.downsample_ratio, + iou_aware=self.iou_aware, + iou_aware_factor=self.iou_aware_factor) def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_unsqueeze2_eltwise_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_unsqueeze2_eltwise_fuse_pass.py index 81acd9856cf..e69091ed855 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_unsqueeze2_eltwise_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_unsqueeze2_eltwise_fuse_pass.py @@ -46,14 +46,16 @@ class TestUnsqueezeEltwiseFusePass(PassAutoScanTest): precision_mode=paddle_infer.PrecisionType.Float32, use_static=False, use_calib_mode=False) - yield config, ['elementwise_mul', ], (1e-5, 1e-5) + yield config, [ + 'elementwise_mul', + ], (1e-5, 1e-5) def sample_program_config(self, draw): # 1. Generate shape and attr of mul x_shape = draw( - st.lists( - st.integers( - min_value=1, max_value=10), min_size=4, max_size=4)) + st.lists(st.integers(min_value=1, max_value=10), + min_size=4, + max_size=4)) axis = -1 # 2. Generate legal shape and attr of input:Y of unsqueeze2 @@ -68,14 +70,20 @@ class TestUnsqueezeEltwiseFusePass(PassAutoScanTest): "AxesTensorList": [] }, axes=unsqueeze2_axes, - outputs={"Out": ["unsqueeze2_out"], - "XShape": ["xshape"]}, ) + outputs={ + "Out": ["unsqueeze2_out"], + "XShape": ["xshape"] + }, + ) mul_op = OpConfig( "elementwise_mul", - inputs={"Y": ["unsqueeze2_out"], - "X": ["mul_x"]}, + inputs={ + "Y": ["unsqueeze2_out"], + "X": ["mul_x"] + }, axis=axis, - outputs={"Out": ["mul_out"]}, ) + outputs={"Out": ["mul_out"]}, + ) ops = [ unsqueeze2_op, @@ -89,14 +97,14 @@ class TestUnsqueezeEltwiseFusePass(PassAutoScanTest): "mul_x": TensorConfig(shape=x_shape), "unsqueeze2_x": TensorConfig(shape=y_shape), }, - outputs=ops[-1].outputs["Out"], ) + outputs=ops[-1].outputs["Out"], + ) return program_config def test(self): - self.run_and_statis( - quant=False, - max_examples=300, - passes=["unsqueeze2_eltwise_fuse_pass"]) + self.run_and_statis(quant=False, + max_examples=300, + passes=["unsqueeze2_eltwise_fuse_pass"]) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_yolo_box_post.py b/python/paddle/fluid/tests/unittests/ir/inference/test_yolo_box_post.py index 2fb83fb0392..b009152071e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_yolo_box_post.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_yolo_box_post.py @@ -17,6 +17,7 @@ import numpy as np import paddle from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper + paddle.enable_static() @@ -48,23 +49,22 @@ def yolo_box_post(box0, } outputs = {'Out': output, 'NmsRoisNum': nms_rois_num} - helper.append_op( - type="yolo_box_post", - inputs=inputs, - attrs={ - 'anchors0': anchors0, - 'anchors1': anchors1, - 'anchors2': anchors2, - 'class_num': class_num, - 'conf_thresh': conf_thresh, - 'downsample_ratio0': downsample_ratio0, - 'downsample_ratio1': downsample_ratio1, - 'downsample_ratio2': downsample_ratio2, - 'clip_bbox': clip_bbox, - 'scale_x_y': scale_x_y, - 'nms_threshold': nms_threshold - }, - outputs=outputs) + helper.append_op(type="yolo_box_post", + inputs=inputs, + attrs={ + 'anchors0': anchors0, + 'anchors1': anchors1, + 'anchors2': anchors2, + 'class_num': class_num, + 'conf_thresh': conf_thresh, + 'downsample_ratio0': downsample_ratio0, + 'downsample_ratio1': downsample_ratio1, + 'downsample_ratio2': downsample_ratio2, + 'clip_bbox': clip_bbox, + 'scale_x_y': scale_x_y, + 'nms_threshold': nms_threshold + }, + outputs=outputs) output.stop_gradient = True nms_rois_num.stop_gradient = True return output, nms_rois_num @@ -73,6 +73,7 @@ def yolo_box_post(box0, @unittest.skipIf(not paddle.is_compiled_with_cuda(), "only support cuda kernel.") class TestYoloBoxPost(unittest.TestCase): + def test_yolo_box_post(self): place = paddle.CUDAPlace(0) program = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/ir/pass_test.py b/python/paddle/fluid/tests/unittests/ir/pass_test.py index e92821387ae..56e31aa705f 100644 --- a/python/paddle/fluid/tests/unittests/ir/pass_test.py +++ b/python/paddle/fluid/tests/unittests/ir/pass_test.py @@ -28,6 +28,7 @@ from paddle.fluid.backward import append_backward class PassTest(unittest.TestCase): + @classmethod def setUpClass(self): self.main_program = fluid.Program() @@ -184,8 +185,9 @@ class PassTest(unittest.TestCase): self.assertTrue( self.num_fused_ops == acctual_num_fused_ops, "Checking of the number of fused operator < {} > failed. " - "Expected: {}, Received: {}".format( - self.fused_op_type, self.num_fused_ops, acctual_num_fused_ops)) + "Expected: {}, Received: {}".format(self.fused_op_type, + self.num_fused_ops, + acctual_num_fused_ops)) def check_program(self, program=None): ''' diff --git a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py index 711891216b6..3eab8578260 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py +++ b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py @@ -24,10 +24,11 @@ paddle.enable_static() np.random.seed(0) -@unittest.skipIf(not paddle.is_compiled_with_cuda() or - paddle.get_cudnn_version() < 8000, +@unittest.skipIf(not paddle.is_compiled_with_cuda() + or paddle.get_cudnn_version() < 8000, "only support with cuda and cudnn version is at least 8.0.") class TestFuseResNetUnit(unittest.TestCase): + def test_fuse_resenet_unit(self): place = paddle.CUDAPlace(0) program = paddle.static.Program() @@ -35,10 +36,14 @@ class TestFuseResNetUnit(unittest.TestCase): with paddle.static.amp.fp16_guard(): with paddle.static.program_guard(program, startup_program): x = paddle.static.data("x", [1, 64, 64, 8]) - conv2d = paddle.nn.Conv2D( - 8, 32, 1, bias_attr=False, data_format='NHWC') - batch_norm = paddle.nn.BatchNorm( - 32, act='relu', data_layout='NHWC') + conv2d = paddle.nn.Conv2D(8, + 32, + 1, + bias_attr=False, + data_format='NHWC') + batch_norm = paddle.nn.BatchNorm(32, + act='relu', + data_layout='NHWC') out = batch_norm(conv2d(x)) graph = core.Graph(program.desc) core.get_pass("fuse_resnet_unit").apply(graph) @@ -47,8 +52,9 @@ class TestFuseResNetUnit(unittest.TestCase): after_params = paddle.static.amp.cast_model_to_fp16(after_program) exe = paddle.static.Executor(place) exe.run(startup_program) - paddle.static.amp.cast_parameters_to_fp16( - place, program, to_fp16_var_names=params) + paddle.static.amp.cast_parameters_to_fp16(place, + program, + to_fp16_var_names=params) paddle.static.amp.cast_parameters_to_fp16( place, after_program, to_fp16_var_names=after_params) feed = {"x": np.random.randn(1, 64, 64, 8).astype("float16")} diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py index aa31bc2a35d..6a573e7beac 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py @@ -21,81 +21,89 @@ import paddle.fluid.core as core class EmbEltwiseLayerNormFusePassTest(PassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - word_id = fluid.layers.data( - name="word_id", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - pos_id = fluid.layers.data( - name="pos_id", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - sent_id = fluid.layers.data( - name="sent_id", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - word_emb = fluid.layers.embedding( - input=word_id, size=(128, 768), dtype='float32') - pos_emb = fluid.layers.embedding( - input=pos_id, size=(128, 768), dtype='float32') - sent_emb = fluid.layers.embedding( - input=sent_id, size=(128, 768), dtype='float32') + word_id = fluid.layers.data(name="word_id", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + pos_id = fluid.layers.data(name="pos_id", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + sent_id = fluid.layers.data(name="sent_id", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + word_emb = fluid.layers.embedding(input=word_id, + size=(128, 768), + dtype='float32') + pos_emb = fluid.layers.embedding(input=pos_id, + size=(128, 768), + dtype='float32') + sent_emb = fluid.layers.embedding(input=sent_id, + size=(128, 768), + dtype='float32') add1 = fluid.layers.elementwise_add(word_emb, pos_emb) add2 = fluid.layers.elementwise_add(add1, sent_emb) hidden1 = fluid.layers.layer_norm(input=add2, begin_norm_axis=2) - id1 = fluid.layers.data( - name="id1", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - id2 = fluid.layers.data( - name="id2", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - id3 = fluid.layers.data( - name="id3", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - id4 = fluid.layers.data( - name="id4", - shape=[1, 128, 1], - dtype="int64", - append_batch_size=False) - emb1 = fluid.layers.embedding( - input=id1, size=(128, 768), dtype='float32') - emb2 = fluid.layers.embedding( - input=id2, size=(128, 768), dtype='float32') - emb3 = fluid.layers.embedding( - input=id3, size=(128, 768), dtype='float32') - emb4 = fluid.layers.embedding( - input=id4, size=(128, 768), dtype='float32') + id1 = fluid.layers.data(name="id1", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + id2 = fluid.layers.data(name="id2", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + id3 = fluid.layers.data(name="id3", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + id4 = fluid.layers.data(name="id4", + shape=[1, 128, 1], + dtype="int64", + append_batch_size=False) + emb1 = fluid.layers.embedding(input=id1, + size=(128, 768), + dtype='float32') + emb2 = fluid.layers.embedding(input=id2, + size=(128, 768), + dtype='float32') + emb3 = fluid.layers.embedding(input=id3, + size=(128, 768), + dtype='float32') + emb4 = fluid.layers.embedding(input=id4, + size=(128, 768), + dtype='float32') add_1 = fluid.layers.elementwise_add(emb1, emb2) add_2 = fluid.layers.elementwise_add(add_1, emb3) add_3 = fluid.layers.elementwise_add(add_2, emb4) hidden_1 = fluid.layers.layer_norm(input=add_3, begin_norm_axis=2) self.feeds = { - "word_id": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), - "pos_id": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), - "sent_id": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), - "id1": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), - "id2": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), - "id3": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), - "id4": np.random.randint( - low=0, high=128, size=(1, 128, 1)).astype("int64"), + "word_id": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), + "pos_id": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), + "sent_id": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), + "id1": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), + "id2": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), + "id3": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), + "id4": + np.random.randint(low=0, high=128, + size=(1, 128, 1)).astype("int64"), } self.fetch_list = [hidden1, hidden_1] self.pass_names = "embedding_eltwise_layernorm_fuse_pass" diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py index cb485609b55..060d63cc332 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py @@ -21,10 +21,13 @@ import paddle.fluid.core as core class FCFusePassTest(PassTest): + def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[32, 128], dtype="float32", lod_level=0) + data = fluid.data(name="data", + shape=[32, 128], + dtype="float32", + lod_level=0) tmp_0 = fluid.layers.fc(input=data, size=128, num_flatten_dims=1, diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py index 84d7bb5c969..10b861fad54 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py @@ -22,12 +22,12 @@ import paddle.fluid.core as core class FusionGroupPassTest(PassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 2) self.feed_vars.append( - fluid.data( - name="data2", shape=[128, 128], dtype=dtype)) + fluid.data(name="data2", shape=[128, 128], dtype=dtype)) # subgraph with only 1 op node tmp_0 = self.feed_vars[0] * self.feed_vars[1] @@ -78,6 +78,7 @@ class FusionGroupPassTest(PassTest): class FusionGroupPassComplicatedTest(FusionGroupPassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 64], dtype, 5) @@ -96,12 +97,12 @@ class FusionGroupPassComplicatedTest(FusionGroupPassTest): class FusionGroupPassInplaceTest(FusionGroupPassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 3) self.feed_vars.append( - fluid.data( - name="data3", shape=[128, 32], dtype=dtype)) + fluid.data(name="data3", shape=[128, 32], dtype=dtype)) # subgraph with 3 op node tmp_0 = self.feed_vars[0] - self.feed_vars[1] @@ -114,6 +115,7 @@ class FusionGroupPassInplaceTest(FusionGroupPassTest): class FusionGroupPassTestFP64(FusionGroupPassTest): + def setUp(self): self.build_program("float64") self.feeds = self._feed_random_data(self.feed_vars) @@ -122,12 +124,12 @@ class FusionGroupPassTestFP64(FusionGroupPassTest): class FusionGroupPassTestCastAndFP16(FusionGroupPassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 2) self.feed_vars.append( - fluid.data( - name="data2", shape=[128, 128], dtype=dtype)) + fluid.data(name="data2", shape=[128, 128], dtype=dtype)) # subgraph with 2 op nodes tmp_0 = self.feed_vars[0] * self.feed_vars[1] @@ -150,12 +152,12 @@ class FusionGroupPassTestCastAndFP16(FusionGroupPassTest): class FusionGroupPassSumTest(FusionGroupPassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 3) self.feed_vars.append( - fluid.data( - name="data3", shape=[128, 128], dtype=dtype)) + fluid.data(name="data3", shape=[128, 128], dtype=dtype)) # subgraph with 2 op nodes tmp_0 = layers.sum( @@ -172,6 +174,7 @@ class FusionGroupPassSumTest(FusionGroupPassTest): class FusionGroupPassCastTest(FusionGroupPassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([2, 2], dtype, 2) @@ -193,14 +196,17 @@ class FusionGroupPassCastTest(FusionGroupPassTest): class FusionGroupPassFillConstantTest(FusionGroupPassTest): + def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([2, 2], dtype, 2) tmp_0 = layers.elementwise_add(self.feed_vars[0], self.feed_vars[1]) tmp_1 = layers.fill_constant(shape=[2, 2], dtype=dtype, value=2.0) - tmp_2 = layers.scale( - tmp_1, scale=3.0, bias=1.0, bias_after_scale=True) + tmp_2 = layers.scale(tmp_1, + scale=3.0, + bias=1.0, + bias_after_scale=True) tmp_3 = layers.elementwise_mul(tmp_2, tmp_0) self.append_gradients(tmp_3) diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py index 2a7c2768e27..7c6ab5d9462 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py @@ -23,7 +23,9 @@ import numpy as np # 1: relu(X=ewadd(X=mul(X=x, Y=w), Y=b)) => fc(Input=x, W=w, Bias=b) @ir.RegisterPass def generate_fc_fuse(): + def create_pass_pair(with_relu): + def pattern(x, w, b): mul = ir.PassDesc.OP.mul(X=x, Y=w) ewadd = ir.PassDesc.OP.elementwise_add(X=mul, Y=b) @@ -34,8 +36,8 @@ def generate_fc_fuse(): def replace(x, w, b): fc = ir.PassDesc.OP.fc(Input=x, W=w, Bias=b) - fc.Attr("in_num_col_dims").MappedPattern( - op="mul", name="x_num_col_dims") + fc.Attr("in_num_col_dims").MappedPattern(op="mul", + name="x_num_col_dims") if with_relu: fc.SetAttr("activation_type", "relu") return fc @@ -55,6 +57,7 @@ def multi_add_to_sum_v1(): @ir.RegisterPass def multi_add_to_sum_v2(): + def pattern(x, y, z): ewadd1 = ir.PassDesc.OP.elementwise_add(X=x, Y=y) ewadd2 = ir.PassDesc.OP.elementwise_add(X=ewadd1, Y=z) @@ -78,6 +81,7 @@ def multi_add_to_sum_v3(): 'y2': InputSpec([32, 48]) }) def generate_combine_mul_v1(): + def pattern(x, y1, y2): mul1 = paddle.matmul(x, y1) mul2 = paddle.matmul(x, y2) @@ -95,6 +99,7 @@ def generate_combine_mul_v1(): @ir.RegisterPass def generate_combine_mul_v2(): + def pattern(x, y1, y2): mul1 = ir.PassDesc.OP.matmul_v2(X=x, Y=y1) mul2 = ir.PassDesc.OP.matmul_v2(X=x, Y=y2) @@ -113,6 +118,7 @@ def generate_combine_mul_v2(): # reshape(reshape(x)) => x @ir.RegisterPass(input_specs={'x': InputSpec([10, 16, 16])}) def generate_simplify_inference_v1(): + def pattern(x): transpose = paddle.transpose(x, [0, 2, 1]) return paddle.transpose(transpose, [0, 2, 1]) @@ -122,6 +128,7 @@ def generate_simplify_inference_v1(): @ir.RegisterPass def generate_simplify_inference_v2(): + def pattern(x): op1 = ir.PassDesc.OP.transpose2 op2 = ir.PassDesc.OP.transpose2 @@ -133,6 +140,7 @@ def generate_simplify_inference_v2(): @ir.RegisterPass def generate_layer_norm_fuse_pass(): + def pattern(x, gamma, beta): gamma.Attr("shape").Size().EQ(1) gamma.Attr("shape")[0].EQ(x.Attr("shape")[-1]) @@ -167,6 +175,7 @@ def generate_layer_norm_fuse_pass(): @ir.RegisterPass def unimplemented_operand_exception(): + def pattern(x, y): return ir.PassDesc.OP.elementwise_add(X=x, Y=y) @@ -180,6 +189,7 @@ def unimplemented_operand_exception(): @ir.RegisterPass def unimplemented_operation_exception(): + def pattern(x, y): return ir.PassDesc.OP.elementwise_add(X=x, Y=y) @@ -198,6 +208,7 @@ def get_multi_pass_desc_from_str(s): class TestGeneratePass(unittest.TestCase): + def convert_ops_to_op_dicts(self, ops): op_dicts = dict() for op in ops: @@ -226,12 +237,13 @@ class TestGeneratePass(unittest.TestCase): core.get_pass("unimplemented_operation_exception").apply(graph) def test_generate_fc_fuse(self): + def _check_fc_fuse_pass(pass_desc, with_relu): pattern_op_dicts = self.convert_ops_to_op_dicts(pass_desc.pattern) replace_op_dicts = self.convert_ops_to_op_dicts(pass_desc.replace) self.assertEqual(len(pattern_op_dicts.get("mul", [])), 1) - self.assertEqual( - len(pattern_op_dicts.get("elementwise_add", [])), 1) + self.assertEqual(len(pattern_op_dicts.get("elementwise_add", [])), + 1) if with_relu: self.assertEqual(len(pattern_op_dicts.get("relu", [])), 1) pattern_op_num = 3 # relu, ewadd, mul @@ -312,8 +324,9 @@ class TestGeneratePass(unittest.TestCase): } before_out1, before_out2 = executor.run( program, feed=feed, fetch_list=[out1.name, out2.name]) - after_out1, after_out2 = executor.run( - after_program, feed=feed, fetch_list=[out1.name, out2.name]) + after_out1, after_out2 = executor.run(after_program, + feed=feed, + fetch_list=[out1.name, out2.name]) self.assertTrue(np.allclose(before_out1, after_out1)) self.assertTrue(np.allclose(before_out2, after_out2)) @@ -368,10 +381,12 @@ class TestGeneratePass(unittest.TestCase): startup_program = paddle.static.Program() with paddle.static.program_guard(program, startup_program): x = paddle.static.data("x", [3, 64, 120], "float32") - gamma = paddle.static.create_parameter( - shape=[120], dtype="float32", is_bias=True) - beta = paddle.static.create_parameter( - shape=[120], dtype="float32", is_bias=True) + gamma = paddle.static.create_parameter(shape=[120], + dtype="float32", + is_bias=True) + beta = paddle.static.create_parameter(shape=[120], + dtype="float32", + is_bias=True) x_sub_mean = x - paddle.mean(x, axis=-1, keepdim=True) std_dev = paddle.mean(x_sub_mean.pow(2), axis=-1, keepdim=True) diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_graph_to_program_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_graph_to_program_pass.py index da6cc4ed64f..1815fe16fdb 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_graph_to_program_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_graph_to_program_pass.py @@ -33,6 +33,7 @@ def IRGraph_to_program(ir_graph): class GraphToProgramPassTest(unittest.TestCase): + def check_vars_equal(self, o_block, c_block): o_params = sorted(o_block.all_parameters(), key=lambda p: p.name) c_params = sorted(c_block.all_parameters(), key=lambda p: p.name) @@ -70,11 +71,12 @@ class GraphToProgramPassTest(unittest.TestCase): o_attr = o_attrs[attr_idx] c_attr = c_attrs[attr_idx] self.assertEqual(o_attr, c_attr) - self.assertEqual( - o_op.desc.attr_type(o_attr), c_op.desc.attr_type(c_attr)) + self.assertEqual(o_op.desc.attr_type(o_attr), + c_op.desc.attr_type(c_attr)) class SingleGraphToProgramPass(GraphToProgramPassTest): + def setUp(self): self.origin_program = self.build_program() ir_graph = program_to_IRGraph(self.origin_program) @@ -91,10 +93,10 @@ class SingleGraphToProgramPass(GraphToProgramPassTest): return program def test_check_parameter(self): - origin_parameter = sorted( - self.origin_program.all_parameters(), key=lambda p: p.name) - converted_parameter = sorted( - self.converted_program.all_parameters(), key=lambda p: p.name) + origin_parameter = sorted(self.origin_program.all_parameters(), + key=lambda p: p.name) + converted_parameter = sorted(self.converted_program.all_parameters(), + key=lambda p: p.name) self.assertEqual(len(origin_parameter), len(converted_parameter)) diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py index 0aac6650f52..25b5fa6ffb7 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py @@ -22,13 +22,18 @@ import paddle.fluid.core as core class SkipLayerNormFusePassTest(PassTest): + def setUp(self): paddle.enable_static() with fluid.program_guard(self.main_program, self.startup_program): - x = fluid.data( - name="x", shape=[128, 768], dtype="float32", lod_level=0) - y = fluid.data( - name="y", shape=[128, 768], dtype="float32", lod_level=0) + x = fluid.data(name="x", + shape=[128, 768], + dtype="float32", + lod_level=0) + y = fluid.data(name="y", + shape=[128, 768], + dtype="float32", + lod_level=0) elementwise_out = fluid.layers.elementwise_add(x=x, y=y) out = fluid.layers.layer_norm(input=elementwise_out) diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py index 49ca89a35f4..0c9170242e7 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,10 +29,13 @@ paddle.enable_static() class TestQuantizationSubGraph(unittest.TestCase): + def build_graph_with_sub_graph(self): + def linear_fc(num): - data = fluid.layers.data( - name='image', shape=[1, 32, 32], dtype='float32') + data = fluid.layers.data(name='image', + shape=[1, 32, 32], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') hidden = data for _ in six.moves.xrange(num): @@ -57,7 +60,7 @@ class TestQuantizationSubGraph(unittest.TestCase): out = layers.cond(pred, true_func, false_func) core_graph = core.Graph(main_program.desc) - # We should create graph for test, otherwise it will throw a + # We should create graph for test, otherwise it will throw a # error that it cannot find the node of "STEP_COUNTER" graph = IrGraph(core_graph, for_test=True) sub_graph = graph.get_sub_graph(0) diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_yolo_box_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_yolo_box_pass.py index 02fb8902204..b710436a511 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_yolo_box_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_yolo_box_pass.py @@ -17,6 +17,7 @@ import numpy as np import paddle from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper + paddle.enable_static() @@ -36,19 +37,18 @@ def multiclass_nms(bboxes, inputs = {'BBoxes': bboxes, 'Scores': scores} outputs = {'Out': output, 'Index': index, 'NmsRoisNum': nms_rois_num} - helper.append_op( - type="multiclass_nms3", - inputs=inputs, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized - }, - outputs=outputs) + helper.append_op(type="multiclass_nms3", + inputs=inputs, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs=outputs) output.stop_gradient = True index.stop_gradient = True @@ -56,6 +56,7 @@ def multiclass_nms(bboxes, class TestYoloBoxPass(unittest.TestCase): + def test_yolo_box_pass(self): program = paddle.static.Program() with paddle.static.program_guard(program): diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py index ea125ccf3fc..cc3e1c2961e 100644 --- a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -33,12 +33,13 @@ os.environ['CPU_NUM'] = '2' class BuildIrMemOptBase(unittest.TestCase): + def setup_reader(self): self.batch_size = 32 self.word_dict = paddle.dataset.imdb.word_dict() - self.train_reader = paddle.batch( - paddle.dataset.imdb.train(self.word_dict), - batch_size=self.batch_size) + self.train_reader = paddle.batch(paddle.dataset.imdb.train( + self.word_dict), + batch_size=self.batch_size) def check_network_convergence(self, network, @@ -58,8 +59,10 @@ class BuildIrMemOptBase(unittest.TestCase): fluid.default_startup_program().random_seed = 100 fluid.default_main_program().random_seed = 100 - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") @@ -78,8 +81,8 @@ class BuildIrMemOptBase(unittest.TestCase): exe.run(fluid.default_startup_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program()) - train_cp = train_cp.with_data_parallel( - loss_name=cost.name, build_strategy=build_strategy) + train_cp = train_cp.with_data_parallel(loss_name=cost.name, + build_strategy=build_strategy) fetch_list = [cost.name] begin = time.time() @@ -99,8 +102,8 @@ class BuildIrMemOptBase(unittest.TestCase): break end = time.time() - print("%.4f Instance per second" % ( - (self.batch_size * iter) / (end - begin))) + print("%.4f Instance per second" % ((self.batch_size * iter) / + (end - begin))) print(first_loss, last_loss) avg_last_loss_val = np.array(last_loss).mean() @@ -113,6 +116,7 @@ class BuildIrMemOptBase(unittest.TestCase): class TestIrMemOptBase(BuildIrMemOptBase): + def setUp(self): self.network = None @@ -130,11 +134,9 @@ class TestIrMemOptBase(BuildIrMemOptBase): cur_first_loss, cur_last_loss = self.check_network_convergence( self.network) - self.assertAlmostEquals( - np.mean(baseline_last_loss), - np.mean(cur_last_loss), - delta=1e-6) - self.assertAlmostEquals( - np.mean(baseline_first_loss), - np.mean(cur_first_loss), - delta=1e-6) + self.assertAlmostEquals(np.mean(baseline_last_loss), + np.mean(cur_last_loss), + delta=1e-6) + self.assertAlmostEquals(np.mean(baseline_first_loss), + np.mean(cur_first_loss), + delta=1e-6) diff --git a/python/paddle/fluid/tests/unittests/launch_function_helper.py b/python/paddle/fluid/tests/unittests/launch_function_helper.py index 04626844401..d5eb73057b9 100644 --- a/python/paddle/fluid/tests/unittests/launch_function_helper.py +++ b/python/paddle/fluid/tests/unittests/launch_function_helper.py @@ -59,6 +59,7 @@ def wait(procs, timeout=30): def _find_free_port(port_set): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('', 0)) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py b/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py index 11b8858b6b1..e13a15e35cf 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py @@ -21,6 +21,7 @@ import os from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.framework import _global_flags from paddle.fluid.framework import _enable_legacy_dygraph + _enable_legacy_dygraph() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py b/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py index ab9dc2455af..6bceff485fd 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py @@ -20,9 +20,8 @@ import paddle.fluid as fluid def __assert_close(test_case, tensor, np_array, msg, atol=1e-4): - test_case.assertTrue( - np.allclose( - np.array(tensor), np_array, atol=atol), msg) + test_case.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), + msg) def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out, @@ -37,18 +36,20 @@ def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out, with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, dtype=np.float32, shape=ground_truth[name].shape) + block.create_var(name=name, + dtype=np.float32, + shape=ground_truth[name].shape) - op = block.append_op( - type=op_type, - inputs={'X': block.var('x'), }, - outputs={'Out': block.var('out')}, - attrs={'use_mkldnn': True}) + op = block.append_op(type=op_type, + inputs={ + 'X': block.var('x'), + }, + outputs={'Out': block.var('out')}, + attrs={'use_mkldnn': True}) # Generate backward op_desc - grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc, - set(), []) + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) @@ -73,8 +74,9 @@ def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out, __assert_close(test_case, x_grad, out[0], 'x@GRAD') -def check_if_mkldnn_batchnorm_primitives_exist_in_bwd( - test_case, var_dict, place, shape, data_layout): +def check_if_mkldnn_batchnorm_primitives_exist_in_bwd(test_case, var_dict, + place, shape, + data_layout): var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', @@ -85,8 +87,9 @@ def check_if_mkldnn_batchnorm_primitives_exist_in_bwd( with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, dtype='float32', shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ @@ -112,8 +115,9 @@ def check_if_mkldnn_batchnorm_primitives_exist_in_bwd( "fuse_with_relu": test_case.fuse_with_relu, "use_global_stats": test_case.use_global_stats }) - block.create_var( - name='y@GRAD', dtype='float32', shape=var_dict['y'].shape) + block.create_var(name='y@GRAD', + dtype='float32', + shape=var_dict['y'].shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py index ac851bf9feb..aeb40ed9502 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py @@ -28,6 +28,7 @@ from paddle.fluid.tests.unittests.test_gelu_op import gelu @OpTestTool.skip_if_not_cpu_bf16() @six.add_metaclass(abc.ABCMeta) class MKLDNNBF16ActivationOp(object): + @abc.abstractmethod def config(self): pass @@ -72,6 +73,7 @@ class MKLDNNBF16ActivationOp(object): class TestMKLDNNSigmoidBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "sigmoid" @@ -83,6 +85,7 @@ class TestMKLDNNSigmoidBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNSqrtBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "sqrt" @@ -97,6 +100,7 @@ class TestMKLDNNSqrtBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNGeluErfBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "gelu" @@ -110,11 +114,13 @@ class TestMKLDNNGeluErfBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNGeluErfDim2BF16Op(TestMKLDNNGeluErfBF16Op): + def init_data(self): self.x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32) class TestMKLDNNGeluTanhBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "gelu" @@ -133,11 +139,13 @@ class TestMKLDNNGeluTanhBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNGeluTanhDim2BF16Op(TestMKLDNNGeluTanhBF16Op): + def init_data(self): self.x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32) class TestMKLDNNReluBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "relu" @@ -149,6 +157,7 @@ class TestMKLDNNReluBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNMishBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "mish" @@ -156,13 +165,14 @@ class TestMKLDNNMishBF16Op(MKLDNNBF16ActivationOp, TestActivation): return x * np.tanh(np.log(1 + np.exp(x))) def op_grad(self, dout, x): - omega = np.exp(3 * x) + 4 * np.exp(2 * x) + np.exp(x) * (4 * x + 6 - ) + 4 * (x + 1) + omega = np.exp( + 3 * x) + 4 * np.exp(2 * x) + np.exp(x) * (4 * x + 6) + 4 * (x + 1) delta = np.exp(2 * x) + 2 * np.exp(x) + 2 return dout * ((np.exp(x) * omega) / delta**2) class TestMKLDNNRelu6BF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "relu6" @@ -174,6 +184,7 @@ class TestMKLDNNRelu6BF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNLeakyReluBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "leaky_relu" @@ -189,6 +200,7 @@ class TestMKLDNNLeakyReluBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNSwishBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "swish" @@ -207,6 +219,7 @@ class TestMKLDNNSwishBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNHardSwishBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "hard_swish" @@ -220,6 +233,7 @@ class TestMKLDNNHardSwishBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNTanhBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "tanh" @@ -231,6 +245,7 @@ class TestMKLDNNTanhBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNAbsBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "abs" @@ -242,6 +257,7 @@ class TestMKLDNNAbsBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNEluBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "elu" @@ -257,6 +273,7 @@ class TestMKLDNNEluBF16Op(MKLDNNBF16ActivationOp, TestActivation): class TestMKLDNNExpBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): self.op_type = "exp" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index 44263b89e16..6796773ae65 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -25,6 +25,7 @@ from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd class TestMKLDNNReluDim2(TestRelu): + def setUp(self): super(TestMKLDNNReluDim2, self).setUp() @@ -35,6 +36,7 @@ class TestMKLDNNReluDim2(TestRelu): class TestMKLDNNRelu6Dim2(TestRelu6): + def setUp(self): super(TestMKLDNNRelu6Dim2, self).setUp() self.attrs.update({"use_mkldnn": True}) @@ -44,6 +46,7 @@ class TestMKLDNNRelu6Dim2(TestRelu6): class TestMKLDNNLeakyReluDim2(TestLeakyRelu): + def setUp(self): super(TestMKLDNNLeakyReluDim2, self).setUp() @@ -54,6 +57,7 @@ class TestMKLDNNLeakyReluDim2(TestLeakyRelu): class TestMKLDNNGeluDim2(TestActivation): + def setUp(self): self.op_type = "gelu" self.dtype = np.float32 @@ -67,6 +71,7 @@ class TestMKLDNNGeluDim2(TestActivation): class TestMKLDNNGeluDim2Approx(TestActivation): + def setUp(self): self.op_type = "gelu" self.dtype = np.float32 @@ -80,6 +85,7 @@ class TestMKLDNNGeluDim2Approx(TestActivation): class TestMKLDNNTanhDim2(TestTanh): + def setUp(self): super(TestMKLDNNTanhDim2, self).setUp() @@ -90,6 +96,7 @@ class TestMKLDNNTanhDim2(TestTanh): class TestMKLDNNSqrtDim2(TestSqrt): + def setUp(self): super(TestMKLDNNSqrtDim2, self).setUp() @@ -100,6 +107,7 @@ class TestMKLDNNSqrtDim2(TestSqrt): class TestMKLDNNAbsDim2(TestAbs): + def setUp(self): super(TestMKLDNNAbsDim2, self).setUp() self.attrs = {"use_mkldnn": True} @@ -109,6 +117,7 @@ class TestMKLDNNAbsDim2(TestAbs): class TestMKLDNNSwishDim2(TestSwish): + def setUp(self): super(TestMKLDNNSwishDim2, self).setUp() @@ -120,6 +129,7 @@ class TestMKLDNNSwishDim2(TestSwish): class TestMKLDNNHardSwishDim2(TestHardSwish): + def setUp(self): super(TestMKLDNNHardSwishDim2, self).setUp() @@ -130,12 +140,14 @@ class TestMKLDNNHardSwishDim2(TestHardSwish): class TestMKLDNNSigmoidDim2(TestSigmoid): + def setUp(self): super(TestMKLDNNSigmoidDim2, self).setUp() self.attrs = {"use_mkldnn": True} class TestMKLDNNReluDim4(TestRelu): + def setUp(self): super(TestMKLDNNReluDim4, self).setUp() @@ -153,6 +165,7 @@ class TestMKLDNNReluDim4(TestRelu): class TestMKLDNNLeakyReluDim4(TestLeakyRelu): + def setUp(self): super(TestMKLDNNLeakyReluDim4, self).setUp() @@ -170,6 +183,7 @@ class TestMKLDNNLeakyReluDim4(TestLeakyRelu): class TestMKLDNNGeluDim4(TestActivation): + def setUp(self): self.op_type = "gelu" self.dtype = np.float32 @@ -183,6 +197,7 @@ class TestMKLDNNGeluDim4(TestActivation): class TestMKLDNNGeluDim4Approx(TestActivation): + def setUp(self): self.op_type = "gelu" self.dtype = np.float32 @@ -198,6 +213,7 @@ class TestMKLDNNGeluDim4Approx(TestActivation): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestMKLDNNGeluBf16Dim4(TestActivation): + def setUp(self): self.op_type = "gelu" self.dtype = np.uint16 @@ -219,6 +235,7 @@ class TestMKLDNNGeluBf16Dim4(TestActivation): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestMKLDNNGeluBf16Dim4Approx(TestActivation): + def setUp(self): self.op_type = "gelu" self.dtype = np.uint16 @@ -238,6 +255,7 @@ class TestMKLDNNGeluBf16Dim4Approx(TestActivation): class TestMKLDNNTanhDim4(TestTanh): + def setUp(self): super(TestMKLDNNTanhDim4, self).setUp() @@ -249,6 +267,7 @@ class TestMKLDNNTanhDim4(TestTanh): class TestMKLDNNSqrtDim4(TestSqrt): + def setUp(self): super(TestMKLDNNSqrtDim4, self).setUp() @@ -260,6 +279,7 @@ class TestMKLDNNSqrtDim4(TestSqrt): class TestMKLDNNAbsDim4(TestAbs): + def setUp(self): super(TestMKLDNNAbsDim4, self).setUp() @@ -275,6 +295,7 @@ class TestMKLDNNAbsDim4(TestAbs): class TestMKLDNNSwishDim4(TestSwish): + def setUp(self): super(TestMKLDNNSwishDim4, self).setUp() @@ -297,6 +318,7 @@ def ref_hardswish(x, threshold=6.0, scale=6.0, offset=3.0): class TestMKLDNNHardSwishDim4(TestHardSwish): + def setUp(self): super(TestMKLDNNHardSwishDim4, self).setUp() @@ -318,6 +340,7 @@ class TestMKLDNNHardSwishDim4(TestHardSwish): class TestMKLDNNMish(TestActivation): + def setUp(self): self.op_type = "mish" self.dtype = np.float32 @@ -331,6 +354,7 @@ class TestMKLDNNMish(TestActivation): class TestMKLDNNRound(TestActivation): + def setUp(self): self.op_type = "round" @@ -343,6 +367,7 @@ class TestMKLDNNRound(TestActivation): class TestMKLDNNSigmoidDim4(TestSigmoid): + def setUp(self): super(TestMKLDNNSigmoidDim4, self).setUp() @@ -354,6 +379,7 @@ class TestMKLDNNSigmoidDim4(TestSigmoid): class TestMKLDNNEluDefaultAlpha(TestActivation): + def setUp(self): self.op_type = "elu" self.set_alpha() @@ -372,11 +398,13 @@ class TestMKLDNNEluDefaultAlpha(TestActivation): class TestMKLDNNEluCustomAlpha(TestMKLDNNEluDefaultAlpha): + def set_alpha(self): self.alpha = 2.5 class TestMKLDNNExpOp(TestActivation): + def setUp(self): self.op_type = "exp" x = np.random.random((5, 5, 4)).astype("float32") @@ -388,6 +416,7 @@ class TestMKLDNNExpOp(TestActivation): # Check if primitives already exist in backward class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase): + def setUp(self): super(TestMKLDNNAbsPrimitivesAlreadyExist, self).setUp() @@ -403,8 +432,9 @@ class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase): return out_grad * np.sign(x) def test_check(self): - check_if_mkldnn_primitives_exist_in_bwd( - self, self.op_type, self.x, self.out, self.out_grad, self.x_grad) + check_if_mkldnn_primitives_exist_in_bwd(self, self.op_type, self.x, + self.out, self.out_grad, + self.x_grad) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py index 85b398f6842..3c6640822ae 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py @@ -28,6 +28,7 @@ _set_use_system_allocator(True) class TestMKLDNNBatchNormOpTraining(TestBatchNormOpTraining): + def init_kernel_type(self): self.use_mkldnn = True self.data_formats = ["NCHW"] @@ -44,19 +45,23 @@ class TestMKLDNNBatchNormOpTraining(TestBatchNormOpTraining): mean_out = saved_mean * (1. - momentum) + momentum * mean variance_out = saved_variance * (1. - momentum) + momentum * variance # run backward - x_grad, scale_grad, bias_grad = _reference_grad( - x, y_grad, scale, saved_mean, saved_variance, epsilon, data_layout) + x_grad, scale_grad, bias_grad = _reference_grad(x, y_grad, scale, + saved_mean, + saved_variance, epsilon, + data_layout) return y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad class TestMKLDNNBatchNormOpTraining_NHWC(TestMKLDNNBatchNormOpTraining): + def init_kernel_type(self): self.use_mkldnn = True self.data_formats = ["NHWC"] class TestMKLDNNBatchNormOpExistedPrimitives(TestMKLDNNBatchNormOpTraining): + def init_test_case(self): TestMKLDNNBatchNormOpTraining.init_test_case(self) self.fetch_list = ['y', 'x@GRAD'] @@ -82,11 +87,12 @@ class TestMKLDNNBatchNormOpExistedPrimitives(TestMKLDNNBatchNormOpTraining): var_dict['x@GRAD'] = x_grad var_dict['scale@GRAD'] = scale_grad var_dict['bias@GRAD'] = bias_grad - check_if_mkldnn_batchnorm_primitives_exist_in_bwd(self, var_dict, place, - shape, data_layout) + check_if_mkldnn_batchnorm_primitives_exist_in_bwd( + self, var_dict, place, shape, data_layout) class TestMKLDNNBatchNormOpInference(TestBatchNormOpInference): + def init_kernel_type(self): self.use_mkldnn = True @@ -97,6 +103,7 @@ class TestMKLDNNBatchNormOpInference(TestBatchNormOpInference): class TestMKLDNNBatchNormOpInference_NHWC(TestMKLDNNBatchNormOpInference): + def test_check_output(self): place = core.CPUPlace() data_format = "NHWC" @@ -104,6 +111,7 @@ class TestMKLDNNBatchNormOpInference_NHWC(TestMKLDNNBatchNormOpInference): class TestMKLDNNBatchNormOpWithReluInference(TestBatchNormOpInference): + def init_kernel_type(self): self.use_mkldnn = True self.fuse_with_relu = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_mkldnn_op.py index e740efa14c5..707f98d753b 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_mkldnn_op.py @@ -59,9 +59,10 @@ def bilinear_interp_mkldnn_np(input, input_h1_w0 = input[:, :, h1, w0] input_h0_w1 = input[:, :, h0, w1] input_h1_w1 = input[:, :, h1, w1] - out[:, :, oh, ow] = input_h0_w0 * (1 - Wh) * ( - 1 - Ww) + input_h1_w0 * Wh * (1 - Ww) + input_h0_w1 * ( - 1 - Wh) * Ww + input_h1_w1 * Wh * Ww + out[:, :, oh, + ow] = input_h0_w0 * (1 - Wh) * (1 - Ww) + input_h1_w0 * Wh * ( + 1 - Ww) + input_h0_w1 * (1 - + Wh) * Ww + input_h1_w1 * Wh * Ww if data_layout == "NHWC": out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC @@ -71,6 +72,7 @@ def bilinear_interp_mkldnn_np(input, @skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.") class TestBilinearInterpMKLDNNOp(OpTest): + def init_test_case(self): pass @@ -129,6 +131,7 @@ class TestBilinearInterpMKLDNNOp(OpTest): class TestBilinearInterpOpMKLDNNNHWC(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 27 @@ -138,6 +141,7 @@ class TestBilinearInterpOpMKLDNNNHWC(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpMKLDNNCase2(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 @@ -146,6 +150,7 @@ class TestBilinearNeighborInterpMKLDNNCase2(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpDataLayout(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [2, 4, 4, 5] self.out_h = 6 @@ -155,6 +160,7 @@ class TestBilinearNeighborInterpDataLayout(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase3(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -163,6 +169,7 @@ class TestBilinearNeighborInterpCase3(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase4(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [4, 1, 7, 8] self.out_h = 1 @@ -172,6 +179,7 @@ class TestBilinearNeighborInterpCase4(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase5(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 9, 6] self.out_h = 12 @@ -181,6 +189,7 @@ class TestBilinearNeighborInterpCase5(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase6(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -190,6 +199,7 @@ class TestBilinearNeighborInterpCase6(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpSame(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [2, 3, 32, 64] self.out_h = 32 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py index e3b0639289a..8a9455690f4 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py @@ -59,9 +59,10 @@ def bilinear_interp_mkldnn_np(input, input_h1_w0 = input[:, :, h1, w0] input_h0_w1 = input[:, :, h0, w1] input_h1_w1 = input[:, :, h1, w1] - out[:, :, oh, ow] = input_h0_w0 * (1 - Wh) * ( - 1 - Ww) + input_h1_w0 * Wh * (1 - Ww) + input_h0_w1 * ( - 1 - Wh) * Ww + input_h1_w1 * Wh * Ww + out[:, :, oh, + ow] = input_h0_w0 * (1 - Wh) * (1 - Ww) + input_h1_w0 * Wh * ( + 1 - Ww) + input_h0_w1 * (1 - + Wh) * Ww + input_h1_w1 * Wh * Ww if data_layout == "NHWC": out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC @@ -71,6 +72,7 @@ def bilinear_interp_mkldnn_np(input, @skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.") class TestBilinearInterpMKLDNNOp(OpTest): + def init_test_case(self): pass @@ -146,6 +148,7 @@ class TestBilinearInterpMKLDNNOp(OpTest): class TestBilinearInterpOpMKLDNNNHWC(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 27 @@ -155,6 +158,7 @@ class TestBilinearInterpOpMKLDNNNHWC(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpMKLDNNCase2(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 @@ -162,6 +166,7 @@ class TestBilinearNeighborInterpMKLDNNCase2(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase3(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -170,6 +175,7 @@ class TestBilinearNeighborInterpCase3(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase4(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -179,6 +185,7 @@ class TestBilinearNeighborInterpCase4(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase5(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 9, 6] self.out_h = 12 @@ -187,6 +194,7 @@ class TestBilinearNeighborInterpCase5(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpCase6(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -196,6 +204,7 @@ class TestBilinearNeighborInterpCase6(TestBilinearInterpMKLDNNOp): class TestBilinearNeighborInterpSame(TestBilinearInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [2, 3, 32, 64] self.out_h = 32 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_cast_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_cast_mkldnn_op.py index 95de37fdc02..331fa871897 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_cast_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_cast_mkldnn_op.py @@ -27,6 +27,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestCastBF16ToFP32MKLDNNOp(OpTest): + def init_data(self): self.out = np.random.random(size=[10, 10]).astype("float32") self.x = convert_float_to_uint16(self.out) @@ -35,7 +36,8 @@ class TestCastBF16ToFP32MKLDNNOp(OpTest): self.init_data() self.inputs = {'X': self.x} self.outputs = {'Out': self.out} - prepare_dtype = lambda x: int(core.VarDesc.VarType.BF16 if x.dtype != np.float32 else core.VarDesc.VarType.FP32) + prepare_dtype = lambda x: int(core.VarDesc.VarType.BF16 if x.dtype != np + .float32 else core.VarDesc.VarType.FP32) self.attrs = { 'in_dtype': prepare_dtype(self.x), 'out_dtype': prepare_dtype(self.out), @@ -56,18 +58,21 @@ class TestCastBF16ToFP32MKLDNNOp(OpTest): class TestCastFP32ToBF16MKLDNNOp(TestCastBF16ToFP32MKLDNNOp): + def init_data(self): self.x = np.random.random(size=[2, 6]).astype("float32") self.out = convert_float_to_uint16(self.x) class TestCastBF16ToBF16MKLDNNOp(TestCastBF16ToFP32MKLDNNOp): + def init_data(self): self.x = np.random.random(size=[6, 13]).astype("uint16") self.out = self.x class TestCastFP32ToFP32MKLDNNOp(TestCastBF16ToFP32MKLDNNOp): + def init_data(self): self.x = np.random.random(size=[7, 15]).astype("float32") self.out = self.x diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py index 97a91375318..adfd0613bd3 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core @OpTestTool.skip_if_not_cpu_bf16() class TestClipOneDNNOp(OpTest): + def setUp(self): self.op_type = "clip" self.set_inputs() @@ -57,16 +58,19 @@ class TestClipOneDNNOp(OpTest): class TestClipMinAsInputOneDNNOp(TestClipOneDNNOp): + def set_additional_inputs(self): self.inputs['Min'] = np.array([6.8]).astype('float32') class TestClipMaxAsInputOneDNNOp(TestClipOneDNNOp): + def set_additional_inputs(self): self.inputs['Max'] = np.array([9.1]).astype('float32') class TestClipMaxAndMinAsInputsOneDNNOp(TestClipOneDNNOp): + def set_additional_inputs(self): self.inputs['Max'] = np.array([8.5]).astype('float32') self.inputs['Min'] = np.array([7.1]).astype('float32') @@ -74,8 +78,10 @@ class TestClipMaxAndMinAsInputsOneDNNOp(TestClipOneDNNOp): # BF16 TESTS def create_bf16_test_class(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestClipBF16OneDNNOp(parent): + def set_inputs(self): self.x_fp32 = np.random.random((10, 10)).astype(np.float32) * 25 self.inputs = {'X': convert_float_to_uint16(self.x_fp32)} diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py index e53afaa57be..a39f14b0b3a 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py @@ -26,6 +26,7 @@ from paddle import enable_static @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestConcatBf16Op(OpTest): + def setUp(self): self.op_type = "concat" self.use_mkldnn = True @@ -43,8 +44,8 @@ class TestConcatBf16Op(OpTest): self.sections = [self.x0.shape[self.axis]] * 2 self.sections[1] += self.x1.shape[self.axis] - self.output = np.concatenate( - (self.x0, self.x1, self.x2), axis=self.axis).astype(np.uint16) + self.output = np.concatenate((self.x0, self.x1, self.x2), + axis=self.axis).astype(np.uint16) self.outputs = {'Out': self.output} def calculate_grads(self): @@ -85,6 +86,7 @@ class TestConcatBf16Op(OpTest): class TestAxis1Case(TestConcatBf16Op): + def init_axis(self): self.axis = 1 @@ -98,6 +100,7 @@ class TestAxis1Case(TestConcatBf16Op): class TestAxis2Case(TestConcatBf16Op): + def init_axis(self): self.axis = 2 @@ -111,6 +114,7 @@ class TestAxis2Case(TestConcatBf16Op): class TestAxis3Case(TestConcatBf16Op): + def init_axis(self): self.axis = 3 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py index ef2fa1c1cc2..900de9ab9ca 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py @@ -20,6 +20,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest class TestConcatOp(OpTest): + def setUp(self): self.op_type = "concat" self.use_mkldnn = True @@ -30,8 +31,8 @@ class TestConcatOp(OpTest): self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]} self.attrs = {'axis': self.axis, 'use_mkldnn': True} - self.output = np.concatenate( - (self.x0, self.x1, self.x2), axis=self.axis).astype('int') + self.output = np.concatenate((self.x0, self.x1, self.x2), + axis=self.axis).astype('int') self.outputs = {'Out': self.output} @@ -59,6 +60,7 @@ class TestConcatOp(OpTest): class TestConcatOp2(TestConcatOp): + def init_test_data(self): self.x0 = (np.random.randint(0, 100, self.x0_shape)).astype('uint8') self.x1 = (np.random.randint(0, 50, self.x1_shape)).astype('uint8') @@ -78,6 +80,7 @@ def create_test_int8_class(parent): #--------------------test concat s8/u8 in with axis 1-------------------- class TestAxis1Case(parent): + def init_axis(self): self.axis = 1 @@ -89,6 +92,7 @@ def create_test_int8_class(parent): #--------------------test concat s8/u8 in with axis 2-------------------- class TestAxis2Case(parent): + def init_axis(self): self.axis = 2 @@ -97,9 +101,11 @@ def create_test_int8_class(parent): self.x1_shape = [2, 3, 5, 5] self.x2_shape = [2, 3, 6, 5] + #--------------------test concat s8/u8 in with axis 3-------------------- class TestAxis3Case(parent): + def init_axis(self): self.axis = 3 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py index 7fc8f1d3080..18b5705ec01 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle import enable_static class TestConcatAxis0OneDNNOp(OpTest): + def setUp(self): self.op_type = "concat" self.mkldnn_data_type = "float32" @@ -38,8 +39,8 @@ class TestConcatAxis0OneDNNOp(OpTest): 'mkldnn_data_type': self.mkldnn_data_type } - self.output = np.concatenate( - (self.x0, self.x1, self.x2), axis=self.axis).astype(self.dtype) + self.output = np.concatenate((self.x0, self.x1, self.x2), + axis=self.axis).astype(self.dtype) self.outputs = {'Out': self.output} @@ -70,6 +71,7 @@ class TestConcatAxis0OneDNNOp(OpTest): class TestConcatAxis1OneDNNOp(TestConcatAxis0OneDNNOp): + def init_axis(self): self.axis = 1 @@ -80,6 +82,7 @@ class TestConcatAxis1OneDNNOp(TestConcatAxis0OneDNNOp): class TestConcatAxis2OneDNNOp(TestConcatAxis0OneDNNOp): + def init_axis(self): self.axis = 2 @@ -90,6 +93,7 @@ class TestConcatAxis2OneDNNOp(TestConcatAxis0OneDNNOp): class TestConcatAxis3OneDNNOp(TestConcatAxis0OneDNNOp): + def init_axis(self): self.axis = 3 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py index 702d26b073b..02c6a5c3dae 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py @@ -32,6 +32,7 @@ def conv2d_residual_naive(out, residual): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestConv2DBF16Op(TestConv2DOp): + def setUp(self): self.op_type = "conv2d" self.use_cudnn = False @@ -63,8 +64,9 @@ class TestConv2DBF16Op(TestConv2DOp): self.inputs_fp32 = {'Input': self.input, 'Filter': self.filter} - conv_out, _, _, _, _ = conv2d_forward_naive( - self.input, self.filter, self.groups, self.conv2d_param) + conv_out, _, _, _, _ = conv2d_forward_naive(self.input, self.filter, + self.groups, + self.conv2d_param) self.conv_output_float = conv_out if self.fuse_residual: @@ -88,9 +90,10 @@ class TestConv2DBF16Op(TestConv2DOp): self.filter = convert_float_to_uint16(self.filter) self.inputs = { - 'Input': self.input, - 'Filter': OpTest.np_dtype_to_fluid_dtype( - self.filter.astype(self.weight_type)) + 'Input': + self.input, + 'Filter': + OpTest.np_dtype_to_fluid_dtype(self.filter.astype(self.weight_type)) } if self.fuse_residual: @@ -156,6 +159,7 @@ class TestConv2DBF16Op(TestConv2DOp): @OpTestTool.skip_if_not_cpu_bf16() class TestConv2DWithGradBF16Op(TestConv2DBF16Op): + def init_fuse_relu(self): self.fuse_activation = None @@ -233,8 +237,8 @@ def conv_backward(dout, x, w, params): for l in range(W_out): for ic in range(IC): dweights[oc, ic, i, j] += x_padded[ - n, ic, i + k * stride[0], j + l * stride[ - 1]] * dout[n, oc, k, l] + n, ic, i + k * stride[0], + j + l * stride[1]] * dout[n, oc, k, l] dx_padded = np.pad(dx, ((0, ), (0, ), (padding, ), (padding, )), 'constant') @@ -250,9 +254,10 @@ def conv_backward(dout, x, w, params): for kh in range(KH): for kw in range(KW): for ic in range(IC): - dx_padded[n, ic, stride[0] * i + kh, stride[1] * - j + kw] += dout[n, oc, i, j] * w[ - oc, ic, kh, kw] + dx_padded[n, ic, stride[0] * i + kh, + stride[1] * j + + kw] += dout[n, oc, i, j] * w[oc, ic, + kh, kw] if padding == 0: dx = dx_padded @@ -263,18 +268,21 @@ def conv_backward(dout, x, w, params): class TestConv2DBF16WithPadding1(TestConv2DWithGradBF16Op): + def init_test_case(self): TestConv2DWithGradBF16Op.init_test_case(self) self.pad = [1, 1] class TestConv2DBF16WithStride2(TestConv2DWithGradBF16Op): + def init_test_case(self): TestConv2DWithGradBF16Op.init_test_case(self) self.stride = [2, 3] class TestConv2D(TestConv2DBF16Op): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -289,6 +297,7 @@ class TestConv2D(TestConv2DBF16Op): class TestWithPad(TestConv2D): + def init_test_case(self): TestConv2D.init_test_case(self) self.pad = [1, 1] @@ -296,11 +305,13 @@ class TestWithPad(TestConv2D): class TestWithGroup(TestConv2D): + def init_group(self): self.groups = 3 class TestWithStride(TestConv2DBF16Op): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -315,6 +326,7 @@ class TestWithStride(TestConv2DBF16Op): class TestWithDilations(TestConv2DBF16Op): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -330,6 +342,7 @@ class TestWithDilations(TestConv2DBF16Op): class TestWith1x1ForceFP32Output(TestConv2DBF16Op): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -346,6 +359,7 @@ class TestWith1x1ForceFP32Output(TestConv2DBF16Op): class TestWithInput1x1Filter1x1(TestConv2DBF16Op): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py index 6fc01488c7e..111def512ee 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py @@ -30,6 +30,7 @@ def conv2d_forward_refer(input, filter, group, conv_param): @unittest.skipIf(not core.supports_int8(), "place does not support int8 computation") class TestConv2DInt8Op(TestConv2DOp): + def setUp(self): self.op_type = "conv2d" self.use_cudnn = False @@ -71,9 +72,9 @@ class TestConv2DInt8Op(TestConv2DOp): scale_output_shift = scale_output_shift / avx_scale def conv2d_forward_refer_helper(input_): - return conv2d_forward_refer( - input_.astype(np.int32), filter_int, self.groups, - conv2d_param).astype(np.float32) * scale_output_shift + return conv2d_forward_refer(input_.astype(np.int32), filter_int, + self.groups, conv2d_param).astype( + np.float32) * scale_output_shift def residual_helper(init_low, init_high, output_): input_residual_ = np.random.randint( @@ -123,8 +124,7 @@ class TestConv2DInt8Op(TestConv2DOp): output = np.round(output).astype(self.dsttype) self.inputs = { - 'Input': - OpTest.np_dtype_to_fluid_dtype(input.astype(self.srctype)), + 'Input': OpTest.np_dtype_to_fluid_dtype(input.astype(self.srctype)), 'Filter': OpTest.np_dtype_to_fluid_dtype(filter) } if self.fuse_residual: @@ -154,8 +154,9 @@ class TestConv2DInt8Op(TestConv2DOp): def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output_with_place( - core.CPUPlace(), atol=0, check_dygraph=False) + self.check_output_with_place(core.CPUPlace(), + atol=0, + check_dygraph=False) def test_check_grad(self): pass @@ -194,6 +195,7 @@ class TestConv2DInt8Op(TestConv2DOp): class TestConv2D(TestConv2DInt8Op): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -209,6 +211,7 @@ class TestConv2D(TestConv2DInt8Op): class TestWithHardSwish(TestConv2D): + def init_fuse_activation(self): self.fuse_activation = "hard_swish" self.fuse_alpha = 0 @@ -216,6 +219,7 @@ class TestWithHardSwish(TestConv2D): class TestWithRelu6(TestConv2D): + def init_fuse_activation(self): self.fuse_activation = "relu6" self.fuse_alpha = 6 @@ -223,6 +227,7 @@ class TestWithRelu6(TestConv2D): class TestWithSwish(TestConv2D): + def init_fuse_activation(self): self.fuse_activation = "swish" self.fuse_alpha = 1 @@ -230,6 +235,7 @@ class TestWithSwish(TestConv2D): class TestWithLeakyRelu(TestConv2D): + def init_fuse_activation(self): self.fuse_activation = "leaky_relu" self.fuse_alpha = 0.02 @@ -237,6 +243,7 @@ class TestWithLeakyRelu(TestConv2D): class TestWithPad(TestConv2D): + def init_test_case(self): TestConv2D.init_test_case(self) self.pad = [1, 1] @@ -244,11 +251,13 @@ class TestWithPad(TestConv2D): class TestWithGroup(TestConv2D): + def init_group(self): self.groups = 3 class TestWithStride(TestConv2DInt8Op): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -264,6 +273,7 @@ class TestWithStride(TestConv2DInt8Op): class TestWithDilations(TestConv2DInt8Op): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -280,6 +290,7 @@ class TestWithDilations(TestConv2DInt8Op): class TestWith1x1(TestConv2DInt8Op): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -295,6 +306,7 @@ class TestWith1x1(TestConv2DInt8Op): class TestWithInput1x1Filter1x1(TestConv2DInt8Op): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -325,31 +337,37 @@ def create_test_int8_class(parent): # --------------------test conv2d s8 in and u8 out-------------------- class TestS8U8Case(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.int8, "relu", False) # --------------------test conv2d s8 in and s8 out-------------------- class TestS8S8Case(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.int8, "", False) # --------------------test conv2d u8 in and s8 out-------------------- class TestU8S8Case(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.uint8, "", False) # --------------------test conv2d u8 in and u8 out without residual fuse-------------------- class TestU8U8Case(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.uint8, "relu", False) # --------------------test conv2d s8 in and s8 out with residual fuse-------------------- class TestS8S8ResCase(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.int8, "", True) # --------------------test conv2d u8 in and s8 out with residual fuse-------------------- class TestU8S8ResCase(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.uint8, "", True) @@ -358,10 +376,10 @@ def create_test_int8_class(parent): cls_name_u8s8 = "{0}_relu_{1}_residual_0".format(parent.__name__, "0") cls_name_u8u8 = "{0}_relu_{1}_residual_0".format(parent.__name__, "1") - cls_name_s8s8_re_1 = "{0}_relu_{1}_residual_{2}".format(parent.__name__, - "0", "1") - cls_name_u8s8_re_1 = "{0}_relu_{1}_residual_{2}".format(parent.__name__, - "0", "1") + cls_name_s8s8_re_1 = "{0}_relu_{1}_residual_{2}".format( + parent.__name__, "0", "1") + cls_name_u8s8_re_1 = "{0}_relu_{1}_residual_{2}".format( + parent.__name__, "0", "1") TestS8U8Case.__name__ = cls_name_s8u8 TestS8S8Case.__name__ = cls_name_s8s8 TestU8S8Case.__name__ = cls_name_u8s8 @@ -379,11 +397,12 @@ def create_test_int8_class(parent): if os.name != 'nt': # --------------------test conv2d s8 in and u8 out with residual fuse-------------------- class TestS8U8ResCase(parent): + def init_data_type(self): init_data_type_with_fusion(self, np.int8, "relu", True) - cls_name_s8u8_re_1 = "{0}_relu_{1}_residual_{2}".format(parent.__name__, - "1", "1") + cls_name_s8u8_re_1 = "{0}_relu_{1}_residual_{2}".format( + parent.__name__, "1", "1") TestS8U8ResCase.__name__ = cls_name_s8u8_re_1 globals()[cls_name_s8u8_re_1] = TestS8U8ResCase @@ -398,6 +417,7 @@ create_test_int8_class(TestWithInput1x1Filter1x1) class TestConv2DOp_AsyPadding_INT_MKLDNN(TestConv2DInt8Op): + def init_kernel_type(self): self.use_mkldnn = True @@ -407,12 +427,14 @@ class TestConv2DOp_AsyPadding_INT_MKLDNN(TestConv2DInt8Op): class TestConv2DOp_Same_INT_MKLDNN(TestConv2DOp_AsyPadding_INT_MKLDNN): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" class TestConv2DOp_Valid_INT_MKLDNN(TestConv2DOp_AsyPadding_INT_MKLDNN): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py index 39f55fb45b8..0471c295ad4 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py @@ -38,6 +38,7 @@ def conv2d_residual_naive(out, residual): class TestConv2DMKLDNNOp(TestConv2DOp): + def init_group(self): self.groups = 1 @@ -106,6 +107,7 @@ class TestConv2DMKLDNNOp(TestConv2DOp): @skip_check_grad_ci( reason="Fusion is for inference only, check_grad is not required.") class TestWithbreluFusion(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.fuse_activation = "relu6" @@ -116,6 +118,7 @@ class TestWithbreluFusion(TestConv2DMKLDNNOp): @skip_check_grad_ci( reason="Fusion is for inference only, check_grad is not required.") class TestWithFuse(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -126,6 +129,7 @@ class TestWithFuse(TestConv2DMKLDNNOp): class TestWithPadWithBias(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -133,6 +137,7 @@ class TestWithPadWithBias(TestConv2DMKLDNNOp): class TestWithStride(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -141,6 +146,7 @@ class TestWithStride(TestConv2DMKLDNNOp): class TestWithGroup(TestConv2DMKLDNNOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -154,12 +160,14 @@ class TestWithGroup(TestConv2DMKLDNNOp): class TestWith1x1(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.filter_size = [40, 3, 1, 1] class TestWithInput1x1Filter1x1(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.input_size = [2, 60, 1, 1] # NCHW @@ -172,6 +180,7 @@ class TestWithInput1x1Filter1x1(TestConv2DMKLDNNOp): class TestConv2DOp_AsyPadding_MKLDNN(TestConv2DOp_v2): + def init_kernel_type(self): self.use_mkldnn = True self.dtype = np.float32 @@ -182,18 +191,21 @@ class TestConv2DOp_AsyPadding_MKLDNN(TestConv2DOp_v2): class TestConv2DOp_Same_MKLDNN(TestConv2DOp_AsyPadding_MKLDNN): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" class TestConv2DOp_Valid_MKLDNN(TestConv2DOp_AsyPadding_MKLDNN): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" class TestConv2DOp_Valid_NHWC_MKLDNN(TestConv2DOp_Valid_MKLDNN): + def init_data_format(self): self.data_format = "NHWC" @@ -203,18 +215,21 @@ class TestConv2DOp_Valid_NHWC_MKLDNN(TestConv2DOp_Valid_MKLDNN): class TestConv2DOp_Same_NHWC_MKLDNN(TestConv2DOp_Valid_NHWC_MKLDNN): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" class TestConv2DOp_AsyPadding_NHWC_MKLDNN(TestConv2DOp_Valid_NHWC_MKLDNN): + def init_paddings(self): self.pad = [0, 0, 1, 2] self.padding_algorithm = "EXPLICIT" class TestMKLDNNDilations(TestConv2DMKLDNNOp): + def init_test_case(self): TestConv2DMKLDNNOp.init_test_case(self) self.pad = [0, 0] @@ -234,6 +249,7 @@ class TestMKLDNNDilations(TestConv2DMKLDNNOp): # TODO(chenweihang): To solve the coverage problem, add this unittest, # remove this unittest after new executor set to default executor class TestConv2dMKLDNNByNewExecutor(TestConv2DMKLDNNOp): + def test_check_output_by_new_executor(self): os.environ['FLAGS_USE_STANDALONE_EXECUTOR'] = '1' self.test_check_output() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_bf16_mkldnn_op.py index c6b7c175d90..1f2fba8d609 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_bf16_mkldnn_op.py @@ -34,6 +34,7 @@ def conv2d_bias_naive(out, bias): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestConv2DTransposeBF16MKLDNNOp(OpTest): + def test_check_output(self): self.check_output_with_place(core.CPUPlace()) @@ -133,6 +134,7 @@ class TestConv2DTransposeBF16MKLDNNOp(OpTest): class TestMKLDNNFuseBias(TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestMKLDNNFuseBias, self).init_test_case() self.pad = [1, 1] @@ -141,6 +143,7 @@ class TestMKLDNNFuseBias(TestConv2DTransposeBF16MKLDNNOp): class TestMKLDNNWithPad(TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestMKLDNNWithPad, self).init_test_case() self.pad = [1, 1] @@ -148,6 +151,7 @@ class TestMKLDNNWithPad(TestConv2DTransposeBF16MKLDNNOp): class TestMKLDNNWithStride(TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestMKLDNNWithStride, self).init_test_case() self.pad = [1, 1] @@ -156,6 +160,7 @@ class TestMKLDNNWithStride(TestConv2DTransposeBF16MKLDNNOp): class TestMKLDNNWithAsymPad(TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestMKLDNNWithAsymPad, self).init_test_case() self.pad = [0, 0, 1, 2] @@ -163,6 +168,7 @@ class TestMKLDNNWithAsymPad(TestConv2DTransposeBF16MKLDNNOp): class TestMKLDNNWithSamePad(TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestMKLDNNWithSamePad, self).init_test_case() self.pad = [0, 0] @@ -170,6 +176,7 @@ class TestMKLDNNWithSamePad(TestConv2DTransposeBF16MKLDNNOp): class TestMKLDNNWithValidPad(TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestMKLDNNWithValidPad, self).init_test_case() self.pad = [1, 1] @@ -177,6 +184,7 @@ class TestMKLDNNWithValidPad(TestConv2DTransposeBF16MKLDNNOp): class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad): + def init_test_case(self): super(TestMKLDNNWithValidPad_NHWC, self).init_test_case() self.data_format = 'NHWC' @@ -186,6 +194,7 @@ class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad): class TestConv2DTransposeMKLDNNWithDilationsExplicitPad( TestConv2DTransposeBF16MKLDNNOp): + def init_test_case(self): super(TestConv2DTransposeMKLDNNWithDilationsExplicitPad, self).init_test_case() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py index a36fc28013b..05c7cf18152 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py @@ -31,6 +31,7 @@ def conv2d_bias_naive(out, bias): class TestConv2DTransposeMKLDNNOp(TestConv2DTransposeOp): + def test_check_grad(self): return @@ -89,6 +90,7 @@ class TestConv2DTransposeMKLDNNOp(TestConv2DTransposeOp): class TestMKLDNNFuseBias(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -97,6 +99,7 @@ class TestMKLDNNFuseBias(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithPad(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -104,6 +107,7 @@ class TestMKLDNNWithPad(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithStride(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -112,6 +116,7 @@ class TestMKLDNNWithStride(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithAsymPad(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [0, 0, 1, 2] @@ -119,6 +124,7 @@ class TestMKLDNNWithAsymPad(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithSamePad(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [0, 0] @@ -126,6 +132,7 @@ class TestMKLDNNWithSamePad(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithValidPad(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -133,6 +140,7 @@ class TestMKLDNNWithValidPad(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad): + def init_test_case(self): super(TestMKLDNNWithValidPad_NHWC, self).init_test_case() self.data_format = "NHWC" @@ -142,6 +150,7 @@ class TestMKLDNNWithValidPad_NHWC(TestMKLDNNWithValidPad): class TestConv2DTransposeMKLDNNWithDilationsExplicitPad( TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.stride = [2, 1] @@ -155,6 +164,7 @@ class TestConv2DTransposeMKLDNNWithDilationsExplicitPad( class TestMKLDNNWithGroups(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [1, 1] @@ -165,6 +175,7 @@ class TestMKLDNNWithGroups(TestConv2DTransposeMKLDNNOp): class TestMKLDNNWithGroups_NHWC(TestConv2DTransposeMKLDNNOp): + def init_test_case(self): TestConv2DTransposeMKLDNNOp.init_test_case(self) self.pad = [1, 1] diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py index dcaee49558b..ae2abb18f13 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py @@ -20,6 +20,7 @@ from paddle.fluid.tests.unittests.test_conv3d_op import TestConv3DOp, TestCase1, class TestMKLDNN(TestConv3DOp): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -27,6 +28,7 @@ class TestMKLDNN(TestConv3DOp): class TestMKLDNNCase1(TestCase1): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -34,6 +36,7 @@ class TestMKLDNNCase1(TestCase1): class TestMKLDNNGroup1(TestWithGroup1): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -41,6 +44,7 @@ class TestMKLDNNGroup1(TestWithGroup1): class TestMKLDNNGroup2(TestWithGroup2): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -48,6 +52,7 @@ class TestMKLDNNGroup2(TestWithGroup2): class TestMKLDNNWith1x1(TestWith1x1): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -55,6 +60,7 @@ class TestMKLDNNWith1x1(TestWith1x1): class TestMKLDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -62,6 +68,7 @@ class TestMKLDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1): class TestConv3DOp_AsyPadding_MKLDNN(TestConv3DOp): + def init_kernel_type(self): self.use_mkldnn = True self.data_format = "NCHW" @@ -73,6 +80,7 @@ class TestConv3DOp_AsyPadding_MKLDNN(TestConv3DOp): class TestConv3DOp_Same_MKLDNN(TestConv3DOp_AsyPadding_MKLDNN): + def init_paddings(self): self.pad = [0, 0, 0] self.padding_algorithm = "SAME" @@ -84,6 +92,7 @@ class TestConv3DOp_Same_MKLDNN(TestConv3DOp_AsyPadding_MKLDNN): class TestConv3DOp_Valid_MKLDNN(TestConv3DOp_AsyPadding_MKLDNN): + def init_paddings(self): self.pad = [1, 1, 1] self.padding_algorithm = "VALID" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py index fae52ab833b..fcd1f26d72c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py @@ -21,6 +21,7 @@ import paddle class TestDeQuantizeOp(OpTest): + def setUp(self): self.op_type = 'dequantize' self.scale = 127.0 @@ -46,12 +47,12 @@ class TestDeQuantizeOp(OpTest): def prepare_input_int8(self): if self.data_type == 'int8': # input data values are integers from interval [-128, 128) - self.input = (np.random.randint(0, 256, self.input_size) - 128 - ).astype(self.data_type) + self.input = (np.random.randint(0, 256, self.input_size) - + 128).astype(self.data_type) else: # input data values are integers from interval [0, 256) - self.input = (np.random.randint( - 0, 256, self.input_size)).astype(self.data_type) + self.input = (np.random.randint(0, 256, self.input_size)).astype( + self.data_type) self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)} self.attrs = {'Scale': self.scale, 'Shift': self.shift} @@ -88,6 +89,7 @@ class TestDeQuantizeOp(OpTest): class TestDeQuantizeOp1(TestDeQuantizeOp): + def set_scale(self): self.scale = 1.5 @@ -96,6 +98,7 @@ class TestDeQuantizeOp1(TestDeQuantizeOp): class TestDeQuantizeOp2(TestDeQuantizeOp): + def set_scale(self): self.scale = 0.8 @@ -104,6 +107,7 @@ class TestDeQuantizeOp2(TestDeQuantizeOp): class TestDeQuantizeOpBf16(TestDeQuantizeOp): + def set_scale(self): self.scale = 1.0 @@ -114,6 +118,7 @@ class TestDeQuantizeOpBf16(TestDeQuantizeOp): # 2-dim input # P - positive input, with shift class TestDeQuantizeOpShift_2_P(TestDeQuantizeOp): + def set_data_type(self): self.data_type = 'uint8' @@ -130,6 +135,7 @@ class TestDeQuantizeOpShift_2_P(TestDeQuantizeOp): # 2-dim input # N - negative input, with shift class TestDeQuantizeOpShift_2_N(TestDeQuantizeOpShift_2_P): + def set_data_type(self): self.data_type = 'int8' @@ -145,22 +151,26 @@ class TestDeQuantizeOpShift_2_N(TestDeQuantizeOpShift_2_P): # 3-dim input class TestDeQuantizeOpShift_3_P(TestDeQuantizeOpShift_2_P): + def set_input_size(self): self.input_size = [2, 3, 4] class TestDeQuantizeOpShift_3_N(TestDeQuantizeOpShift_2_N): + def set_input_size(self): self.input_size = [2, 3, 4] # 4-dim input class TestDeQuantizeOpShift_4_P(TestDeQuantizeOpShift_2_P): + def set_input_size(self): self.input_size = [2, 3, 4, 5] class TestDeQuantizeOpShift_4_N(TestDeQuantizeOpShift_2_N): + def set_input_size(self): self.input_size = [2, 3, 4, 5] diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_bf16_mkldnn_op.py index 3a20ffde7a1..3a9f535a833 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_bf16_mkldnn_op.py @@ -23,6 +23,7 @@ from paddle import enable_static @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestElementwiseAddBf16MklDNNOp(OpTest): + def setUp(self): self.op_type = "elementwise_add" self.use_mkldnn = True @@ -47,32 +48,30 @@ class TestElementwiseAddBf16MklDNNOp(OpTest): # elementwise_add grad (no braodcasting) is just passing upper gradients to either X or Y or both def test_check_grad_normal(self): - self.check_grad_with_place( - core.CPUPlace(), ["X", "Y"], - "Out", - check_dygraph=False, - user_defined_grads=[self.x, self.x], - user_defined_grad_outputs=[self.x_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["X", "Y"], + "Out", + check_dygraph=False, + user_defined_grads=[self.x, self.x], + user_defined_grad_outputs=[self.x_bf16]) def test_check_grad_ingore_x(self): - self.check_grad_with_place( - core.CPUPlace(), ["Y"], - "Out", - check_dygraph=False, - user_defined_grads=[self.y], - user_defined_grad_outputs=[self.y_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["Y"], + "Out", + check_dygraph=False, + user_defined_grads=[self.y], + user_defined_grad_outputs=[self.y_bf16]) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - core.CPUPlace(), ["X"], - "Out", - check_dygraph=False, - user_defined_grads=[self.x], - user_defined_grad_outputs=[self.x_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["X"], + "Out", + check_dygraph=False, + user_defined_grads=[self.x], + user_defined_grad_outputs=[self.x_bf16]) + +class TestElementwiseAddBroadCastingBf16MklDNNOp(TestElementwiseAddBf16MklDNNOp + ): -class TestElementwiseAddBroadCastingBf16MklDNNOp( - TestElementwiseAddBf16MklDNNOp): def generate_data(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(np.float32) self.y = np.random.uniform(1, 2, [100]).astype(np.float32) @@ -90,9 +89,8 @@ class TestElementwiseAddBroadCastingBf16MklDNNOp( core.CPUPlace(), ["X", "Y"], "Out", check_dygraph=False, - user_defined_grads=[ - self.x, self.compute_reduced_gradients(self.x) - ], + user_defined_grads=[self.x, + self.compute_reduced_gradients(self.x)], user_defined_grad_outputs=[self.x_bf16]) def test_check_grad_ingore_x(self): diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py index 585ae38875c..2ae717d64a3 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py @@ -21,6 +21,7 @@ from paddle import enable_static class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp): + def init_kernel_type(self): self.use_mkldnn = True @@ -29,6 +30,7 @@ class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp): class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -36,6 +38,7 @@ class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp): class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp): + def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) @@ -43,6 +46,7 @@ class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp): class TestMKLDNNElementwiseAddOp4(TestMKLDNNElementwiseAddOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) @@ -57,6 +61,7 @@ class TestMKLDNNElementwiseAddOp4(TestMKLDNNElementwiseAddOp): class TestMKLDNNElementwiseAddOp5(TestMKLDNNElementwiseAddOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) @@ -64,6 +69,7 @@ class TestMKLDNNElementwiseAddOp5(TestMKLDNNElementwiseAddOp): class TestMKLDNNElementwiseAddOp_broadcast_3(TestMKLDNNElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -74,6 +80,7 @@ class TestMKLDNNElementwiseAddOp_broadcast_3(TestMKLDNNElementwiseAddOp): class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestMKLDNNElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -99,6 +106,7 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestMKLDNNElementwiseAddOp): @skip_check_grad_ci( reason="oneDNN's int8 elementwise_ops don't implemend grad kernel.") class TestInt8(TestElementwiseAddOp): + def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True @@ -132,6 +140,7 @@ class TestInt8(TestElementwiseAddOp): class TestInt8Scales(TestInt8): + def quantize(self, tensor, dt="int8"): max_int = 127.0 if dt == "int8" else 255.0 scale = max_int / np.abs(np.amax(tensor)) @@ -156,11 +165,12 @@ class TestInt8Scales(TestInt8): # TODO(wangzhongpu): support mkldnn op in dygraph mode self.init_scales() int_atol = 1 # different quantization techniques - self.check_output( - check_dygraph=(self.use_mkldnn == False), atol=int_atol) + self.check_output(check_dygraph=(self.use_mkldnn == False), + atol=int_atol) class TestUint8Scales(TestInt8Scales): + def init_input_output(self): self.x_f = np.random.random((100, )).astype("float") self.y_f = np.random.random((100, )).astype("float") diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py index a3c41d2f034..55b32e1088c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core @OpTestTool.skip_if(not (isinstance(_current_expected_place(), core.CPUPlace)), "GPU is not supported") class TestMKLDNNElementwiseDivOp(OpTest): + def setUp(self): self.op_type = "elementwise_div" self.init_dtype() @@ -65,6 +66,7 @@ class TestMKLDNNElementwiseDivOp(OpTest): class TestMKLDNNElementwiseDivOp2(TestMKLDNNElementwiseDivOp): + def init_input_output(self): self.x = np.random.uniform(0.1, 1, [100]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [100]).astype(self.dtype) @@ -72,6 +74,7 @@ class TestMKLDNNElementwiseDivOp2(TestMKLDNNElementwiseDivOp): class TestMKLDNNElementwiseDivOp3(TestMKLDNNElementwiseDivOp): + def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) @@ -79,6 +82,7 @@ class TestMKLDNNElementwiseDivOp3(TestMKLDNNElementwiseDivOp): class TestMKLDNNElementwiseDivOp4(TestMKLDNNElementwiseDivOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) @@ -93,6 +97,7 @@ class TestMKLDNNElementwiseDivOp4(TestMKLDNNElementwiseDivOp): class TestMKLDNNElementwiseDivOp5(TestMKLDNNElementwiseDivOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) @@ -108,6 +113,7 @@ class TestMKLDNNElementwiseDivOp5(TestMKLDNNElementwiseDivOp): @OpTestTool.skip_if_not_cpu_bf16() class TestBf16(TestMKLDNNElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.init_dtype() @@ -134,24 +140,23 @@ class TestBf16(TestMKLDNNElementwiseDivOp): self.check_output_with_place(core.CPUPlace()) def test_check_grad_normal(self): - self.check_grad_with_place( - core.CPUPlace(), ["X", "Y"], - "Out", - user_defined_grads=[ - np.divide(self.x, self.y), np.divide( - (np.multiply(-self.x, self.x)), np.multiply(self.y, self.y)) - ], - user_defined_grad_outputs=[self.x_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["X", "Y"], + "Out", + user_defined_grads=[ + np.divide(self.x, self.y), + np.divide((np.multiply(-self.x, self.x)), + np.multiply(self.y, self.y)) + ], + user_defined_grad_outputs=[self.x_bf16]) def test_check_grad_ignore_x(self): - self.check_grad_with_place( - core.CPUPlace(), ["Y"], - "Out", - user_defined_grads=[ - np.divide((np.multiply(-self.x, self.y)), - np.multiply(self.y, self.y)) - ], - user_defined_grad_outputs=[self.y_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["Y"], + "Out", + user_defined_grads=[ + np.divide((np.multiply(-self.x, self.y)), + np.multiply(self.y, self.y)) + ], + user_defined_grad_outputs=[self.y_bf16]) def test_check_grad_ignore_y(self): self.check_grad_with_place( @@ -162,6 +167,7 @@ class TestBf16(TestMKLDNNElementwiseDivOp): class TestBf16Broadcasting(TestBf16): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_bf16_mkldnn_op.py index b67ae17ba3a..232c1afef4d 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_bf16_mkldnn_op.py @@ -23,6 +23,7 @@ from paddle import enable_static @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestElementwiseMulBf16MklDNNOp(OpTest): + def setUp(self): self.op_type = "elementwise_mul" self.use_mkldnn = True @@ -45,14 +46,14 @@ class TestElementwiseMulBf16MklDNNOp(OpTest): self.check_output_with_place(core.CPUPlace()) def test_check_grad_normal(self): - self.check_grad_with_place( - core.CPUPlace(), ["X", "Y"], - "Out", - check_dygraph=False, - user_defined_grads=[ - np.multiply(self.x, self.y), np.multiply(self.x, self.x) - ], - user_defined_grad_outputs=[self.x_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["X", "Y"], + "Out", + check_dygraph=False, + user_defined_grads=[ + np.multiply(self.x, self.y), + np.multiply(self.x, self.x) + ], + user_defined_grad_outputs=[self.x_bf16]) def test_check_grad_ingore_x(self): self.check_grad_with_place( @@ -71,8 +72,9 @@ class TestElementwiseMulBf16MklDNNOp(OpTest): user_defined_grad_outputs=[self.x_bf16]) -class TestElementwiseMulBroadcastingBf16MklDNNOp( - TestElementwiseMulBf16MklDNNOp): +class TestElementwiseMulBroadcastingBf16MklDNNOp(TestElementwiseMulBf16MklDNNOp + ): + def generate_data(self): self.x = np.random.uniform(1, 2, [1, 2, 3, 100]).astype(np.float32) self.y = np.random.uniform(1, 2, [100]).astype(np.float32) @@ -85,7 +87,7 @@ class TestElementwiseMulBroadcastingBf16MklDNNOp( part_sum = np.add.reduceat(part_sum, [0], axis=2) return part_sum.flatten() - # TODO(jczaja): elementwise_mul bf16 grad got some potential + # TODO(jczaja): elementwise_mul bf16 grad got some potential # accuracy problems that need to be explained def test_check_grad_normal(self): pass diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py index f2648e5b723..f369f8587b8 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py @@ -21,6 +21,7 @@ from paddle import enable_static class TestMKLDNNElementwiseMulOp(ElementwiseMulOp): + def init_kernel_type(self): self.use_mkldnn = True @@ -29,6 +30,7 @@ class TestMKLDNNElementwiseMulOp(ElementwiseMulOp): class TestMKLDNNElementwiseMulOp2(TestMKLDNNElementwiseMulOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -36,6 +38,7 @@ class TestMKLDNNElementwiseMulOp2(TestMKLDNNElementwiseMulOp): class TestMKLDNNElementwiseMulOp3(TestMKLDNNElementwiseMulOp): + def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) @@ -43,6 +46,7 @@ class TestMKLDNNElementwiseMulOp3(TestMKLDNNElementwiseMulOp): class TestMKLDNNElementwiseMulOp4(TestMKLDNNElementwiseMulOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) @@ -57,6 +61,7 @@ class TestMKLDNNElementwiseMulOp4(TestMKLDNNElementwiseMulOp): class TestMKLDNNElementwiseMulOp5(TestMKLDNNElementwiseMulOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) @@ -79,6 +84,7 @@ class TestMKLDNNElementwiseMulOp5(TestMKLDNNElementwiseMulOp): @skip_check_grad_ci( reason="oneDNN's int8 elementwise_ops don't implemend grad kernel.") class TestInt8(ElementwiseMulOp): + def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True @@ -112,6 +118,7 @@ class TestInt8(ElementwiseMulOp): class TestInt8Scales(TestInt8): + def quantize(self, tensor, dt="int8"): max_int = 127.0 if dt == "int8" else 255.0 scale = max_int / np.abs(np.amax(tensor)) @@ -136,11 +143,12 @@ class TestInt8Scales(TestInt8): # TODO(wangzhongpu): support mkldnn op in dygraph mode self.init_scales() int_atol = 1 # different quantization techniques - self.check_output( - check_dygraph=(self.use_mkldnn == False), atol=int_atol) + self.check_output(check_dygraph=(self.use_mkldnn == False), + atol=int_atol) class TestUint8Scales(TestInt8Scales): + def init_input_output(self): self.x_f = np.random.random((100, )).astype("float") self.y_f = np.random.random((100, )).astype("float") diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py index 62c8c9571b7..e70cc8e3779 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core @OpTestTool.skip_if(not (isinstance(_current_expected_place(), core.CPUPlace)), "GPU is not supported") class TestMKLDNNElementwiseSubOp(OpTest): + def setUp(self): self.op_type = "elementwise_sub" self.init_dtype() @@ -65,6 +66,7 @@ class TestMKLDNNElementwiseSubOp(OpTest): class TestMKLDNNElementwiseSubOp2(TestMKLDNNElementwiseSubOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -72,6 +74,7 @@ class TestMKLDNNElementwiseSubOp2(TestMKLDNNElementwiseSubOp): class TestMKLDNNElementwiseSubOp3(TestMKLDNNElementwiseSubOp): + def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) @@ -79,6 +82,7 @@ class TestMKLDNNElementwiseSubOp3(TestMKLDNNElementwiseSubOp): class TestMKLDNNElementwiseSubOp4(TestMKLDNNElementwiseSubOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) @@ -86,6 +90,7 @@ class TestMKLDNNElementwiseSubOp4(TestMKLDNNElementwiseSubOp): class TestMKLDNNElementwiseSubOp5(TestMKLDNNElementwiseSubOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) @@ -93,6 +98,7 @@ class TestMKLDNNElementwiseSubOp5(TestMKLDNNElementwiseSubOp): class TestMKLDNNElementwiseSubOp_broadcast(TestMKLDNNElementwiseSubOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -103,6 +109,7 @@ class TestMKLDNNElementwiseSubOp_broadcast(TestMKLDNNElementwiseSubOp): class TestElementwiseSubOp_xsize_lessthan_ysize_sub(TestMKLDNNElementwiseSubOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -123,6 +130,7 @@ class TestElementwiseSubOp_xsize_lessthan_ysize_sub(TestMKLDNNElementwiseSubOp): @OpTestTool.skip_if_not_cpu_bf16() class TestBf16(TestMKLDNNElementwiseSubOp): + def setUp(self): self.op_type = "elementwise_sub" self.init_dtype() @@ -149,28 +157,26 @@ class TestBf16(TestMKLDNNElementwiseSubOp): self.check_output_with_place(core.CPUPlace()) def test_check_grad_normal(self): - self.check_grad_with_place( - core.CPUPlace(), ["X", "Y"], - "Out", - user_defined_grads=[self.x, -self.x], - user_defined_grad_outputs=[self.x_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["X", "Y"], + "Out", + user_defined_grads=[self.x, -self.x], + user_defined_grad_outputs=[self.x_bf16]) def test_check_grad_ignore_x(self): - self.check_grad_with_place( - core.CPUPlace(), ["Y"], - "Out", - user_defined_grads=[-self.y], - user_defined_grad_outputs=[self.y_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["Y"], + "Out", + user_defined_grads=[-self.y], + user_defined_grad_outputs=[self.y_bf16]) def test_check_grad_ignore_y(self): - self.check_grad_with_place( - core.CPUPlace(), ["X"], - "Out", - user_defined_grads=[self.x], - user_defined_grad_outputs=[self.x_bf16]) + self.check_grad_with_place(core.CPUPlace(), ["X"], + "Out", + user_defined_grads=[self.x], + user_defined_grad_outputs=[self.x_bf16]) class TestBf16Broadcasting(TestBf16): + def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) @@ -186,9 +192,8 @@ class TestBf16Broadcasting(TestBf16): self.check_grad_with_place( core.CPUPlace(), ["X", "Y"], "Out", - user_defined_grads=[ - self.x, self.compute_reduced_gradients(self.x) - ], + user_defined_grads=[self.x, + self.compute_reduced_gradients(self.x)], user_defined_grad_outputs=[self.x_bf16]) def test_check_grad_ignore_x(self): @@ -200,6 +205,7 @@ class TestBf16Broadcasting(TestBf16): class TestInt8(TestMKLDNNElementwiseSubOp): + def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py index 6229b7f559b..b179571e8f0 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_flo @OpTestTool.skip_if(core.is_compiled_with_cuda(), "CUDA required dygraph so oneDNN UT must be skipped") class TestExpandV2OneDNNOp(OpTest): + def setUp(self): self.op_type = "expand_v2" self.init_data() @@ -53,6 +54,7 @@ class TestExpandV2OneDNNOp(OpTest): class TestExpandV2ExpandDimOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): self.ori_shape = [120] self.shape = [2, 120] @@ -60,6 +62,7 @@ class TestExpandV2ExpandDimOneDNNOp(TestExpandV2OneDNNOp): class TestExpandV2CopyScenarioOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): self.ori_shape = (2, 10, 5) self.shape = (2, 10, 5) @@ -67,6 +70,7 @@ class TestExpandV2CopyScenarioOneDNNOp(TestExpandV2OneDNNOp): class TestExpandV2CopyScenarioShapeNotGivenOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.shape = (-1, -1, -1, -1) @@ -74,6 +78,7 @@ class TestExpandV2CopyScenarioShapeNotGivenOneDNNOp(TestExpandV2OneDNNOp): class TestExpandV2ExpandShapesTensor1OneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): self.ori_shape = [100, 1] self.expand_times = [1, 2] @@ -93,6 +98,7 @@ class TestExpandV2ExpandShapesTensor1OneDNNOp(TestExpandV2OneDNNOp): class TestExpandV2ExpandShapesTensor2OneDNNOp( TestExpandV2ExpandShapesTensor1OneDNNOp): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [1, 1] @@ -101,6 +107,7 @@ class TestExpandV2ExpandShapesTensor2OneDNNOp( class TestExpandV2ShapesTensorOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): self.ori_shape = [100] self.expand_times = [2, 1] @@ -113,8 +120,10 @@ class TestExpandV2ShapesTensorOneDNNOp(TestExpandV2OneDNNOp): # BF16 TESTS def create_expand_v2_bf16_test_class(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestExpandV2BF16OneDNNOp(parent): + def set_inputs(self): self.attrs['mkldnn_data_type'] = 'bfloat16' self.inputs = {"X": convert_float_to_uint16(self.x)} diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fc_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_bf16_mkldnn_op.py index 1104372c741..0cb069dd14b 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fc_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_bf16_mkldnn_op.py @@ -27,6 +27,7 @@ def fully_connected_naive(input, weights, bias_data): class MatrixGenerate: + def __init__(self, mb, ic, oc, h, w): self.input = np.random.random((mb, ic * h * w)).astype(np.float32) self.weights = np.random.random((ic * h * w, oc)).astype(np.float32) @@ -35,6 +36,7 @@ class MatrixGenerate: @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestFcBf16MklDNNOp(OpTest): + def generate_data(self): self.matrix = MatrixGenerate(1, 10, 15, 3, 3) self.bias = np.random.random(15).astype("float32") @@ -75,6 +77,7 @@ class TestFcBf16MklDNNOp(OpTest): class TestFCMKLDNNOp1(TestFcBf16MklDNNOp): + def generate_data(self): self.matrix = MatrixGenerate(2, 15, 48, 2, 2) self.bias = np.random.random(48).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py index e96b8cf8191..84de7246965 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py @@ -25,12 +25,14 @@ def fully_connected_naive(input, weights, bias_data): class MatrixGenerate: + def __init__(self, mb, ic, oc, h, w): self.input = np.random.random((mb, ic * h * w)).astype("float32") self.weights = np.random.random((ic * h * w, oc)).astype("float32") class TestFCMKLDNNOp(OpTest): + def create_data(self): self.matrix = MatrixGenerate(1, 10, 15, 3, 3) self.bias = np.random.random(15).astype("float32") @@ -49,8 +51,9 @@ class TestFCMKLDNNOp(OpTest): self.attrs = {'use_mkldnn': self.use_mkldnn} self.outputs = { - 'Out': fully_connected_naive(self.matrix.input, self.matrix.weights, - self.bias) + 'Out': + fully_connected_naive(self.matrix.input, self.matrix.weights, + self.bias) } def test_check_output(self): @@ -65,6 +68,7 @@ class TestFCMKLDNNOp(OpTest): class TestFCMKLDNNOp1(TestFCMKLDNNOp): + def create_data(self): self.matrix = MatrixGenerate(2, 15, 48, 2, 2) self.bias = np.random.random(48).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fill_constant_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fill_constant_mkldnn_op.py index d729efbb0fb..27400abcf7f 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fill_constant_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fill_constant_mkldnn_op.py @@ -22,6 +22,7 @@ import paddle @OpTestTool.skip_if_not_cpu_bf16() class TestFillConstant2DOneDNNOp(OpTest): + def setUp(self): self.op_type = "fill_constant" self.dtype = np.float32 @@ -63,14 +64,16 @@ class TestFillConstant2DOneDNNOp(OpTest): self.check_output() -class TestFillZerosLike4DShapeTensorPriorityOneDNNOp( - TestFillConstant2DOneDNNOp): +class TestFillZerosLike4DShapeTensorPriorityOneDNNOp(TestFillConstant2DOneDNNOp + ): + def set_inputs(self): self.inputs = {'ShapeTensor': np.array([5, 6, 7, 8]).astype("int32")} class TestFillZerosLike4DShapeTensorListPriorityOneDNNOp( TestFillConstant2DOneDNNOp): + def set_inputs(self): shape = (4, 5, 6, 7) self.shape_tensor_list = [] @@ -82,13 +85,15 @@ class TestFillZerosLike4DShapeTensorListPriorityOneDNNOp( class TestFillZerosLike2DStringValueInfOneDNNOp(TestFillConstant2DOneDNNOp): + def set_attrs(self): self.str_value = "inf" self.attrs = {'shape': (10, 13), 'use_mkldnn': True, 'str_value': "inf"} -class TestFillZerosLike2DStringValueMinusInfOneDNNOp( - TestFillConstant2DOneDNNOp): +class TestFillZerosLike2DStringValueMinusInfOneDNNOp(TestFillConstant2DOneDNNOp + ): + def set_attrs(self): self.str_value = "-inf" self.attrs = { @@ -99,6 +104,7 @@ class TestFillZerosLike2DStringValueMinusInfOneDNNOp( class TestFillZerosLike2DStringValueFloatOneDNNOp(TestFillConstant2DOneDNNOp): + def set_attrs(self): self.str_value = "0.123" self.attrs = { @@ -110,6 +116,7 @@ class TestFillZerosLike2DStringValueFloatOneDNNOp(TestFillConstant2DOneDNNOp): class TestFillZerosLike2DValueTensorPriorityOneDNNOp( TestFillZerosLike2DStringValueFloatOneDNNOp): + def set_inputs(self): self.inputs = {'ValueTensor': np.atleast_1d(2.25).astype("float32")} diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_flags_mkldnn_ops_on_off.py b/python/paddle/fluid/tests/unittests/mkldnn/test_flags_mkldnn_ops_on_off.py index 4e52b7b08cf..a8d2d42ebe3 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_flags_mkldnn_ops_on_off.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_flags_mkldnn_ops_on_off.py @@ -23,6 +23,7 @@ import re class TestFlagsUseMkldnn(unittest.TestCase): + def setUp(self): self._python_interp = sys.executable self._python_interp += " check_flags_mkldnn_ops_on_off.py" @@ -38,11 +39,10 @@ class TestFlagsUseMkldnn(unittest.TestCase): def flags_use_mkl_dnn_common(self, e): cmd = self._python_interp env = dict(self.env, **e) - proc = subprocess.Popen( - cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env) + proc = subprocess.Popen(cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env) out, err = proc.communicate() returncode = proc.returncode diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py b/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py index 0974d6357fc..d86a9467053 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py @@ -23,6 +23,7 @@ import re class TestFlagsUseMkldnn(unittest.TestCase): + def setUp(self): self._python_interp = sys.executable self._python_interp += " check_flags_use_mkldnn.py" @@ -47,11 +48,10 @@ class TestFlagsUseMkldnn(unittest.TestCase): def test_flags_use_mkl_dnn(self): cmd = self._python_interp - proc = subprocess.Popen( - cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=self.env) + proc = subprocess.Popen(cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.env) out, err = proc.communicate() returncode = proc.returncode diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py index c01f244004e..dc750335ea5 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_flo @OpTestTool.skip_if_not_cpu_bf16() class TestFlattenOneDNNOp(OpTest): + def setUp(self): self.set_op_type() self.init_test_case() @@ -51,6 +52,7 @@ class TestFlattenOneDNNOp(OpTest): class TestFlattenOneDNNOp1(TestFlattenOneDNNOp): + def init_test_case(self): self.in_shape = (3, 2, 2, 10) self.axis = 0 @@ -58,6 +60,7 @@ class TestFlattenOneDNNOp1(TestFlattenOneDNNOp): class TestFlattenOneDNNOpSixDims(TestFlattenOneDNNOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 @@ -65,23 +68,28 @@ class TestFlattenOneDNNOpSixDims(TestFlattenOneDNNOp): class TestFlatten2OneDNNOp(TestFlattenOneDNNOp): + def set_op_type(self): self.op_type = "flatten2" class TestFlatten2OneDNNOp1(TestFlattenOneDNNOp1): + def set_op_type(self): self.op_type = "flatten2" class TestFlatten2OneDNNOpSixDims(TestFlattenOneDNNOpSixDims): + def set_op_type(self): self.op_type = "flatten2" # BF16 TESTS def create_flatten_bf16_test_classes(parent): + class TestFlatten2BF16OneDNNOp(parent): + def set_inputs(self): self.dtype = np.uint16 self.inputs = { @@ -93,22 +101,22 @@ def create_flatten_bf16_test_classes(parent): self.dx = np.reshape(self.dout, self.ori_shape) def test_check_output(self): - self.check_output_with_place( - core.CPUPlace(), no_check_set=["XShape"]) + self.check_output_with_place(core.CPUPlace(), + no_check_set=["XShape"]) def test_check_grad(self): self.calculate_grads() - self.check_grad_with_place( - core.CPUPlace(), ["X"], - "Out", - user_defined_grads=[self.dx], - user_defined_grad_outputs=[self.dout]) + self.check_grad_with_place(core.CPUPlace(), ["X"], + "Out", + user_defined_grads=[self.dx], + user_defined_grad_outputs=[self.dout]) cls_name = "{0}_{1}".format(parent.__name__, "Flatten2_BF16") TestFlatten2BF16OneDNNOp.__name__ = cls_name globals()[cls_name] = TestFlatten2BF16OneDNNOp class TestFlattenBF16OneDNNOp(parent): + def set_op_type(self): self.dtype = np.uint16 self.op_type = "flatten" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py index ef26a27d05e..b4b30d1dbca 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py @@ -26,6 +26,7 @@ from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestFusionGRUBF16MKLDNNOp(OpTest): + def set_confs(self): pass @@ -76,10 +77,11 @@ class TestFusionGRUBF16MKLDNNOp(OpTest): N, self.D).astype('float32') if self.with_h0 else np.zeros( (N, self.D), dtype='float32') - _, _, _, hidden = fusion_gru( - x_fp32, self.lod, h0_fp32, wx_fp32, wh_fp32, bias, self.is_reverse, - self.origin_mode, ACTIVATION[self.act_state], - ACTIVATION[self.act_gate]) + _, _, _, hidden = fusion_gru(x_fp32, self.lod, h0_fp32, wx_fp32, + wh_fp32, bias, self.is_reverse, + self.origin_mode, + ACTIVATION[self.act_state], + ACTIVATION[self.act_gate]) hidden_bf16 = convert_float_to_uint16(hidden) @@ -121,16 +123,19 @@ class TestFusionGRUBF16MKLDNNOp(OpTest): class TestFusionGRUINT8MKLDNNOp2(TestFusionGRUBF16MKLDNNOp): + def set_confs(self): self.origin_mode = False class TestFusionGRUINT8MKLDNNOp3(TestFusionGRUBF16MKLDNNOp): + def set_confs(self): self.with_bias = False class TestFusionGRUINT8MKLDNNBF16WeightsOp(TestFusionGRUBF16MKLDNNOp): + def set_confs(self): self.weights_dtype = 'bf16' diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py index 4fda51e9e05..fee53dc3483 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py @@ -20,6 +20,7 @@ from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION class TestFusionGRUINT8MKLDNNOp(OpTest): + def set_confs(self): pass @@ -62,20 +63,19 @@ class TestFusionGRUINT8MKLDNNOp(OpTest): # Scales shape in oneDNN: [3, OC] s8_max = 127.0 scale_ur = s8_max / np.max(np.abs( - np.concatenate( - [ - wx[:, :2 * self.OC], wh.flatten()[:2 * self.OC * self.OC] - .reshape(self.OC, 2 * self.OC) - ], - axis=0)), + np.concatenate([ + wx[:, :2 * self.OC], + wh.flatten()[:2 * self.OC * self.OC].reshape( + self.OC, 2 * self.OC) + ], + axis=0)), axis=0) scale_o = s8_max / np.max(np.abs( - np.concatenate( - [ - wx[:, 2 * self.OC:], wh.flatten()[2 * self.OC * self.OC:] - .reshape(self.OC, self.OC) - ], - axis=0)), + np.concatenate([ + wx[:, 2 * self.OC:], + wh.flatten()[2 * self.OC * self.OC:].reshape(self.OC, self.OC) + ], + axis=0)), axis=0) scale_weights = np.concatenate([scale_ur, scale_o]).astype('float') @@ -128,21 +128,25 @@ class TestFusionGRUINT8MKLDNNOp(OpTest): class TestFusionGRUINT8MKLDNNOp2(TestFusionGRUINT8MKLDNNOp): + def set_confs(self): self.force_fp32_output = False class TestFusionGRUINT8MKLDNNOp3(TestFusionGRUINT8MKLDNNOp): + def set_confs(self): self.origin_mode = False class TestFusionGRUINT8MKLDNNOp4(TestFusionGRUINT8MKLDNNOp): + def set_confs(self): self.with_bias = False class TestFusionGRUINT8MKLDNNOp5(TestFusionGRUINT8MKLDNNOp): + def set_confs(self): self.with_h0 = False diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_mkldnn_op.py index 3c70380493d..2910a2c05c3 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_mkldnn_op.py @@ -18,35 +18,41 @@ from paddle.fluid.tests.unittests.test_fusion_gru_op import TestFusionGRUOp class TestFusionGRUMKLDNNOp(TestFusionGRUOp): + def set_confs(self): self.use_mkldnn = True class TestFusionGRUMKLDNNOpNoInitial(TestFusionGRUOp): + def set_confs(self): self.with_h0 = False self.use_mkldnn = True class TestFusionGRUMKLDNNOpNoBias(TestFusionGRUOp): + def set_confs(self): self.with_bias = False self.use_mkldnn = True class TestFusionGRUMKLDNNOpReverse(TestFusionGRUOp): + def set_confs(self): self.is_reverse = True self.use_mkldnn = True class TestFusionGRUMKLDNNOpOriginMode(TestFusionGRUOp): + def set_confs(self): self.origin_mode = True self.use_mkldnn = True class TestFusionGRUMKLDNNOpMD1(TestFusionGRUOp): + def set_confs(self): self.M = 36 self.D = 8 @@ -54,6 +60,7 @@ class TestFusionGRUMKLDNNOpMD1(TestFusionGRUOp): class TestFusionGRUMKLDNNOpMD2(TestFusionGRUOp): + def set_confs(self): self.M = 8 self.D = 8 @@ -61,6 +68,7 @@ class TestFusionGRUMKLDNNOpMD2(TestFusionGRUOp): class TestFusionGRUMKLDNNOpMD3(TestFusionGRUOp): + def set_confs(self): self.M = 17 self.D = 15 @@ -68,6 +76,7 @@ class TestFusionGRUMKLDNNOpMD3(TestFusionGRUOp): class TestFusionGRUMKLDNNOpBS1(TestFusionGRUOp): + def set_confs(self): self.lod = [[3]] self.D = 16 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py index d07eda32599..e094f8a844f 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py @@ -26,14 +26,16 @@ from paddle.fluid.tests.unittests.test_fusion_gru_op import fusion_gru @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestFusionLSTMBF16ONEDNNOp(OpTest): + def set_confs(self): pass def test_check_output(self): for use_seq in {True, False}: self.attrs['use_seq'] = use_seq - self.check_output( - check_dygraph=False, no_check_set=["Cell"], atol=2e-2) + self.check_output(check_dygraph=False, + no_check_set=["Cell"], + atol=2e-2) def setUp(self): self.op_type = 'fusion_lstm' @@ -137,21 +139,25 @@ class TestFusionLSTMBF16ONEDNNOp(OpTest): class TestFusionLSTMBF16ONEDNNPeepholesOp(TestFusionLSTMBF16ONEDNNOp): + def set_confs(self): self.use_peepholes = True class TestFusionLSTMBF16ONEDNNInitializedStateOp(TestFusionLSTMBF16ONEDNNOp): + def set_confs(self): self.has_initial_state = True class TestFusionLSTMBF16ONEDNNReverseOp(TestFusionLSTMBF16ONEDNNOp): + def set_confs(self): self.is_reverse = True class TestFusionLSTMBF16ONEDNNBF16WeightsOp(TestFusionLSTMBF16ONEDNNOp): + def set_confs(self): self.weights_dtype = 'bf16' diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py index 12f8c01783d..8d3b4db1714 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py @@ -19,6 +19,7 @@ from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION, fus class TestFusionLSTMINT8MKLDNNOp(OpTest): + def set_confs(self): pass @@ -58,8 +59,7 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest): s8_max = 127.0 scale_weights = s8_max / np.max( - np.abs(np.concatenate( - [wx[:, :], wh[:, :]], axis=0)), axis=0) + np.abs(np.concatenate([wx[:, :], wh[:, :]], axis=0)), axis=0) scale_weights = scale_weights.astype('float') @@ -80,10 +80,11 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest): h0 = np.zeros((N, self.OC)).astype('float32') c0 = np.zeros((N, self.OC)).astype('float32') - hidden_f32, c = fusion_lstm( - x_f32, self.lod, wx, bx, h0, c0, wh, w_b, w_c, self.is_reverse, - ACTIVATION[self.act_gate], ACTIVATION[self.act_cell], - ACTIVATION[self.act_cand]) + hidden_f32, c = fusion_lstm(x_f32, self.lod, wx, bx, h0, c0, wh, w_b, + w_c, self.is_reverse, + ACTIVATION[self.act_gate], + ACTIVATION[self.act_cell], + ACTIVATION[self.act_cand]) self.inputs = { 'X': (x_u8, self.lod), @@ -128,23 +129,25 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest): def test_check_output(self): for use_seq in {True, False}: self.attrs['use_seq'] = use_seq - self.check_output( - check_dygraph=False, - no_check_set=["Cell"], - atol=self.error_margin) + self.check_output(check_dygraph=False, + no_check_set=["Cell"], + atol=self.error_margin) class TestFusionLSTMINT8MKLDNNOp2(TestFusionLSTMINT8MKLDNNOp): + def set_confs(self): self.force_fp32_output = True class TestFusionLSTMINT8MKLDNNOp4(TestFusionLSTMINT8MKLDNNOp): + def set_confs(self): self.is_reverse = True class TestFusionLSTMINT8MKLDNNOp5(TestFusionLSTMINT8MKLDNNOp): + def set_confs(self): self.has_initial_state = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_mkldnn_op.py index 9988a033a7d..6c48ba9b46a 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_mkldnn_op.py @@ -18,6 +18,7 @@ from paddle.fluid.tests.unittests.test_fusion_lstm_op import TestFusionLSTMOp class TestFusionLSTMONEDNNOp(TestFusionLSTMOp): + def set_conf(self): self.use_mkldnn = True @@ -28,12 +29,14 @@ class TestFusionLSTMONEDNNOp(TestFusionLSTMOp): class TestFusionLSTMONEDNNOpReverse(TestFusionLSTMONEDNNOp): + def set_conf(self): self.is_reverse = True self.use_mkldnn = True class TestFusionLSTMONEDNNOpInitReverse(TestFusionLSTMONEDNNOp): + def set_conf(self): self.has_initial_state = True self.is_reverse = True @@ -41,6 +44,7 @@ class TestFusionLSTMONEDNNOpInitReverse(TestFusionLSTMONEDNNOp): class TestFusionLSTMONEDNNOpMD1(TestFusionLSTMONEDNNOp): + def set_conf(self): self.M = 36 self.D = 8 @@ -48,6 +52,7 @@ class TestFusionLSTMONEDNNOpMD1(TestFusionLSTMONEDNNOp): class TestFusionLSTMONEDNNOpMD2(TestFusionLSTMONEDNNOp): + def set_conf(self): self.M = 8 self.D = 8 @@ -55,6 +60,7 @@ class TestFusionLSTMONEDNNOpMD2(TestFusionLSTMONEDNNOp): class TestFusionLSTMONEDNNOpMD3(TestFusionLSTMONEDNNOp): + def set_conf(self): self.M = 15 self.D = 3 @@ -62,6 +68,7 @@ class TestFusionLSTMONEDNNOpMD3(TestFusionLSTMONEDNNOp): class TestFusionLSTMONEDNNOpBS1(TestFusionLSTMONEDNNOp): + def set_conf(self): self.lod = [[3]] self.D = 16 @@ -69,6 +76,7 @@ class TestFusionLSTMONEDNNOpBS1(TestFusionLSTMONEDNNOp): class TestFusionLSTMONEDNNOpPeepholesInit(TestFusionLSTMONEDNNOp): + def set_conf(self): self.use_peepholes = True self.has_initial_state = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py index a65efa6deb0..b0b9ddf879a 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py @@ -20,11 +20,13 @@ from paddle.fluid.tests.unittests.test_gaussian_random_op import TestGaussianRan class TestMKLDNNGaussianRandomOpSeed10(TestGaussianRandomOp): + def init_kernel_type(self): self.use_mkldnn = True class TestMKLDNNGaussianRandomOpSeed0(TestGaussianRandomOp): + def setUp(self): TestGaussianRandomOp.setUp(self) self.use_mkldnn = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py index dc881a57521..2cad7cd8cc7 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py @@ -36,10 +36,10 @@ _set_use_system_allocator(True) @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestLayerNormBF16MKLDNNOp(TestLayerNormMKLDNNOp): + def __assert_close(self, tensor, np_array, msg, rtol=2e-02, atol=2): self.assertTrue( - np.allclose( - np.array(tensor), np_array, rtol=rtol, atol=atol), msg) + np.allclose(np.array(tensor), np_array, rtol=rtol, atol=atol), msg) def check_forward(self, shape, @@ -83,15 +83,13 @@ class TestLayerNormBF16MKLDNNOp(TestLayerNormMKLDNNOp): # scale and bias are fp32 and other vars are of bf16 for name in ground_truth: if name == 'x_bf16' or name == 'y_bf16': - block.create_var( - name=name, - dtype='uint16', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='uint16', + shape=ground_truth[name].shape) else: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = {"X": block.var('x_bf16')} if with_scale_bias: @@ -130,8 +128,9 @@ class TestLayerNormBF16MKLDNNOp(TestLayerNormMKLDNNOp): self.__assert_close(variance, out[2], "variance", 1e-3) def test_check_forward_with_is_test(self): - self.check_forward( - shape=[2, 3, 4, 5], begin_norm_axis=3, with_is_test=True) + self.check_forward(shape=[2, 3, 4, 5], + begin_norm_axis=3, + with_is_test=True) # TODO (jczaja): Enable those to test when enabling training using bf16 def test_check_forward_with_scale_and_bias(self): diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py index d20fb003ee9..d36b5cc9e64 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py @@ -52,6 +52,7 @@ def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1): class TestLayerNormMKLDNNOp(unittest.TestCase): + def setUp(self): self.use_mkldnn = True @@ -95,8 +96,9 @@ class TestLayerNormMKLDNNOp(unittest.TestCase): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, dtype='float32', shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = {"X": block.var('x')} if with_scale_bias: @@ -138,12 +140,14 @@ class TestLayerNormMKLDNNOp(unittest.TestCase): self.check_forward(shape=[2, 3, 4, 5], begin_norm_axis=3) def test_check_forward_without_scale_and_bias(self): - self.check_forward( - shape=[2, 3, 4, 5], begin_norm_axis=3, with_scale_bias=False) + self.check_forward(shape=[2, 3, 4, 5], + begin_norm_axis=3, + with_scale_bias=False) def test_check_forward_with_is_test(self): - self.check_forward( - shape=[2, 3, 4, 5], begin_norm_axis=3, with_is_test=True) + self.check_forward(shape=[2, 3, 4, 5], + begin_norm_axis=3, + with_is_test=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_log_softmax_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_log_softmax_mkldnn_op.py index 7477eaf3339..89de5198101 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_log_softmax_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_log_softmax_mkldnn_op.py @@ -22,6 +22,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_flo @OpTestTool.skip_if_not_cpu_bf16() class TestLogSoftmaxOneDNNOp(OpTest): + def setUp(self): self.op_type = 'log_softmax' self.set_dtype() @@ -52,38 +53,45 @@ class TestLogSoftmaxOneDNNOp(OpTest): class TestLogSoftmax1DOneDNNOp(TestLogSoftmaxOneDNNOp): + def set_shape(self): self.shape = [100] class TestLogSoftmax3DOneDNNOp(TestLogSoftmaxOneDNNOp): + def set_shape(self): self.shape = [12, 10, 3] class TestLogSoftmax5DOneDNNOp(TestLogSoftmaxOneDNNOp): + def set_shape(self): self.shape = [2, 3, 4, 5, 6] class TestLogSoftmaxPositiveAxisOneDNNOp(TestLogSoftmaxOneDNNOp): + def set_axis(self): self.axis = 2 # BF16 TESTS class TestLogSoftmax1DBF16OneDNNOp(TestLogSoftmax1DOneDNNOp): + def set_dtype(self): self.dtype = np.uint16 -class TestLogSoftmaxPositiveAxisBF16OneDNNOp( - TestLogSoftmaxPositiveAxisOneDNNOp): +class TestLogSoftmaxPositiveAxisBF16OneDNNOp(TestLogSoftmaxPositiveAxisOneDNNOp + ): + def set_dtype(self): self.dtype = np.uint16 class TestLogSoftmax5DBF16OneDNNOp(TestLogSoftmax5DOneDNNOp): + def set_shape(self): self.shape = [2, 3, 4, 5, 6] diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py index 088b4fb5905..9941f567af2 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py @@ -20,6 +20,7 @@ import paddle.fluid as fluid class TestLRNMKLDNNOp(TestLRNOp): + def get_attrs(self): attrs = TestLRNOp.get_attrs(self) attrs['use_mkldnn'] = True @@ -28,26 +29,33 @@ class TestLRNMKLDNNOp(TestLRNOp): def test_check_output(self): # We cannot validate MidOut as LRN REF has diffrent meaning in it # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output( - atol=0.002, no_check_set=['MidOut'], check_dygraph=False) + self.check_output(atol=0.002, + no_check_set=['MidOut'], + check_dygraph=False) def test_check_grad_normal(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad( - ['X'], 'Out', max_relative_error=0.01, check_dygraph=False) + self.check_grad(['X'], + 'Out', + max_relative_error=0.01, + check_dygraph=False) class TestLRNMKLDNNOpWithIsTest(TestLRNMKLDNNOp): + def get_attrs(self): attrs = TestLRNMKLDNNOp.get_attrs(self) attrs['is_test'] = True return attrs def test_check_grad_normal(self): + def check_raise_is_test(): try: - self.check_grad( - ['X'], 'Out', max_relative_error=0.01, check_dygraph=False) + self.check_grad(['X'], + 'Out', + max_relative_error=0.01, + check_dygraph=False) except Exception as e: t = \ "is_test attribute should be set to False in training phase." @@ -58,6 +66,7 @@ class TestLRNMKLDNNOpWithIsTest(TestLRNMKLDNNOp): class TestLRNMKLDNNOpNHWC(TestLRNMKLDNNOp): + def init_test_case(self): self.data_format = 'NHWC' diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py index dba63be27b4..a16a5f3fdff 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py @@ -25,6 +25,7 @@ from paddle import enable_static @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestMatmulBf16MklDNNOp(OpTest): + def generate_data(self): self.x_fp32 = np.random.random((25, 2, 2)).astype(np.float32) self.y_fp32 = np.random.random((25, 2, 2)).astype(np.float32) @@ -83,10 +84,10 @@ class TestMatmulBf16MklDNNOp(OpTest): x_transpose_axes = [1, 0] if self.x_fp32.ndim == 2 else [0, 2, 1] y_transpose_axes = [1, 0] if self.y_fp32.ndim == 2 else [0, 2, 1] - x = np.transpose(self.x_fp32, x_transpose_axes) if self.attrs[ - 'transpose_X'] is True else self.x_fp32 - y = np.transpose(self.y_fp32, y_transpose_axes) if self.attrs[ - 'transpose_Y'] is True else self.y_fp32 + x = np.transpose(self.x_fp32, x_transpose_axes + ) if self.attrs['transpose_X'] is True else self.x_fp32 + y = np.transpose(self.y_fp32, y_transpose_axes + ) if self.attrs['transpose_Y'] is True else self.y_fp32 dout = self.alpha * np.matmul(x, y) @@ -110,6 +111,7 @@ class TestMatmulBf16MklDNNOp(OpTest): class TestDnnlMatMulOpAlpha(TestMatmulBf16MklDNNOp): + def generate_data(self): self.x_fp32 = np.random.random((17, 2, 3)).astype(np.float32) self.y_fp32 = np.random.random((17, 3, 2)).astype(np.float32) @@ -118,6 +120,7 @@ class TestDnnlMatMulOpAlpha(TestMatmulBf16MklDNNOp): class TestDnnlMatMulOp2D(TestMatmulBf16MklDNNOp): + def generate_data(self): self.x_fp32 = np.random.random((12, 9)).astype(np.float32) self.y_fp32 = np.random.random((9, 12)).astype(np.float32) @@ -125,6 +128,7 @@ class TestDnnlMatMulOp2D(TestMatmulBf16MklDNNOp): class TestDnnlMatMulOpTransposeX(TestMatmulBf16MklDNNOp): + def generate_data(self): self.x_fp32 = np.random.random((12, 9)).astype(np.float32) self.y_fp32 = np.random.random((12, 9)).astype(np.float32) @@ -140,6 +144,7 @@ class TestDnnlMatMulOpTransposeX(TestMatmulBf16MklDNNOp): class TestDnnlMatMulOpTransposeY(TestMatmulBf16MklDNNOp): + def generate_data(self): self.x_fp32 = np.random.random((12, 9)).astype(np.float32) self.y_fp32 = np.random.random((12, 9)).astype(np.float32) @@ -155,6 +160,7 @@ class TestDnnlMatMulOpTransposeY(TestMatmulBf16MklDNNOp): class TestMatmulBf16MklDNNForceFp32Output(TestMatmulBf16MklDNNOp): + def generate_data(self): self.x_fp32 = np.random.random((12, 9)).astype(np.float32) self.y_fp32 = np.random.random((9, 12)).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py index 634288c3e87..af838d7826e 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py @@ -20,6 +20,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci class TestDnnlMatMulOp(OpTest): + def generate_data(self): self.x = np.random.random((25, 2, 2)).astype("float32") self.y = np.random.random((25, 2, 2)).astype("float32") @@ -48,11 +49,13 @@ class TestDnnlMatMulOp(OpTest): class TestDnnlMatMulWithGradOp(TestDnnlMatMulOp): + def test_check_grad(self): self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2) class TestDnnlMatMulOpMixedDims1(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((17, 2, 3)).astype("float32") self.y = np.random.random((3, 4)).astype("float32") @@ -60,6 +63,7 @@ class TestDnnlMatMulOpMixedDims1(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpMixedDimsYWiderTransposeY(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((8, 2, 3)).astype("float32") self.y = np.random.random((4, 3)).astype("float32") @@ -70,6 +74,7 @@ class TestDnnlMatMulOpMixedDimsYWiderTransposeY(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpMixedDimsYWiderTransposeX(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((8, 3, 2)).astype("float32") self.y = np.random.random((3, 4)).astype("float32") @@ -80,28 +85,31 @@ class TestDnnlMatMulOpMixedDimsYWiderTransposeX(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpMixedDimsXWiderTransposeXY(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((8, 3, 2)).astype("float32") self.y = np.random.random((4, 3)).astype("float32") - self.out = np.matmul( - np.transpose(self.x, (0, 2, 1)), np.transpose(self.y)) + self.out = np.matmul(np.transpose(self.x, (0, 2, 1)), + np.transpose(self.y)) def set_attributes(self): self.attrs = {'transpose_X': True, 'transpose_Y': True} class TestDnnlMatMulOpMixedDimsYWiderTransposeXY(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((3, 2)).astype("float32") self.y = np.random.random((8, 4, 3)).astype("float32") - self.out = np.matmul( - np.transpose(self.x), np.transpose(self.y, (0, 2, 1))) + self.out = np.matmul(np.transpose(self.x), + np.transpose(self.y, (0, 2, 1))) def set_attributes(self): self.attrs = {'transpose_X': True, 'transpose_Y': True} class TestDnnlMatMulOpMixedDimsXWiderTransposeX(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((5, 4)).astype("float32") self.y = np.random.random((8, 5, 4)).astype("float32") @@ -112,6 +120,7 @@ class TestDnnlMatMulOpMixedDimsXWiderTransposeX(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpVectorMultiply(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((5)).astype("float32") self.y = np.random.random((5)).astype("float32") @@ -119,6 +128,7 @@ class TestDnnlMatMulOpVectorMultiply(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpVectorMultiplyTranspose(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((5)).astype("float32") x_resized = np.copy(self.x) @@ -133,6 +143,7 @@ class TestDnnlMatMulOpVectorMultiplyTranspose(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpMixedDims2(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((2, 3)).astype("float32") self.y = np.random.random((17, 3, 4)).astype("float32") @@ -140,6 +151,7 @@ class TestDnnlMatMulOpMixedDims2(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpAlpha(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((17, 2, 3)).astype("float32") self.y = np.random.random((17, 3, 2)).astype("float32") @@ -148,6 +160,7 @@ class TestDnnlMatMulOpAlpha(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOp2D(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((12, 9)).astype("float32") self.y = np.random.random((9, 12)).astype("float32") @@ -155,6 +168,7 @@ class TestDnnlMatMulOp2D(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpTransposeX(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((12, 9)).astype("float32") self.y = np.random.random((12, 9)).astype("float32") @@ -165,6 +179,7 @@ class TestDnnlMatMulOpTransposeX(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpTransposeY(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((12, 9)).astype("float32") self.y = np.random.random((12, 9)).astype("float32") @@ -175,6 +190,7 @@ class TestDnnlMatMulOpTransposeY(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulWithGradOp): + def generate_data(self): self.x = np.random.random((17, 3, 2)).astype("float32") self.y = np.random.random((17, 3, 2)).astype("float32") @@ -185,6 +201,7 @@ class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulWithGradOp): class TestDnnlMatMulOpInt8NoScales(TestDnnlMatMulOp): + def generate_data(self): self.x = np.random.random((12, 9)).astype("int8") self.y = np.random.random((9, 12)).astype("int8") @@ -223,6 +240,7 @@ class TestDnnlMatMulOpInt8(TestDnnlMatMulOp): class TestDnnlMatMulOpInt8ForceFP32(TestDnnlMatMulOpInt8): + def generate_data(self): x_float = np.random.random((12, 9)).astype("float32") self.x_scale, self.x = self.quantize(x_float) @@ -242,6 +260,7 @@ class TestDnnlMatMulOpInt8ForceFP32(TestDnnlMatMulOpInt8): class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp): + def generate_data(self): self.x = np.random.randint(0, 3, (12, 9)).astype("int8") self.y = np.random.randint(0, 3, (9, 12)).astype("int8") @@ -253,6 +272,7 @@ class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp): @skip_check_grad_ci(reason="DNNL's MatMul doesn't implement grad kernel.") class TestReshapeTransposeMatMulOp(OpTest): + def init_data_type(self): self.data_type_ = 'float32' @@ -300,6 +320,7 @@ class TestReshapeTransposeMatMulOp(OpTest): class TestReshapeTransposeMatMulOp4DXFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 128, 768]).astype("float32") self.y = np.random.random([2, 128, 768]).astype("float32").reshape( @@ -314,11 +335,13 @@ class TestReshapeTransposeMatMulOp4DXFloat(TestReshapeTransposeMatMulOp): class TestReshapeTransposeMatMulOp4DXInt8(TestReshapeTransposeMatMulOp4DXFloat): + def init_data_type(self): self.data_type_ = 'int8' class TestReshapeTransposeMatMulOp4DYFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 128, 768]).astype("float32").reshape( [2, 128, 12, 64]).transpose([0, 2, 1, 3]) @@ -328,15 +351,18 @@ class TestReshapeTransposeMatMulOp4DYFloat(TestReshapeTransposeMatMulOp): self.fused_transpose_Y = [0, 2, 1, 3] self.fused_reshape_Y = [0, 0, 12, 64] self.out = np.matmul( - self.x, self.y.reshape([2, 128, 12, 64]).transpose([0, 2, 3, 1])) + self.x, + self.y.reshape([2, 128, 12, 64]).transpose([0, 2, 3, 1])) class TestReshapeTransposeMatMulOp4DYInt8(TestReshapeTransposeMatMulOp4DYFloat): + def init_data_type(self): self.data_type_ = 'int8' class TestReshapeTransposeMatMulOp4DXYFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 128, 768]).astype("float32") self.y = np.random.random([2, 128, 768]).astype("float32") @@ -349,13 +375,15 @@ class TestReshapeTransposeMatMulOp4DXYFloat(TestReshapeTransposeMatMulOp): self.y.reshape([2, 128, 12, 64]).transpose([0, 2, 3, 1])) -class TestReshapeTransposeMatMulOp4DXYInt8( - TestReshapeTransposeMatMulOp4DXYFloat): +class TestReshapeTransposeMatMulOp4DXYInt8(TestReshapeTransposeMatMulOp4DXYFloat + ): + def init_data_type(self): self.data_type_ = 'int8' class TestReshapeTransposeMatMulOp2DXFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 5, 10]).astype("float32") self.y = np.random.random([2, 5, 10]).astype("float32").reshape( @@ -365,16 +393,18 @@ class TestReshapeTransposeMatMulOp2DXFloat(TestReshapeTransposeMatMulOp): self.fused_transpose_Y = [] self.fused_reshape_Y = [] self.out = np.matmul( - self.x.reshape([10, 10]).transpose([1, 0]), - self.y.transpose([1, 0])) + self.x.reshape([10, 10]).transpose([1, 0]), self.y.transpose([1, + 0])) class TestReshapeTransposeMatMulOp2DXInt8(TestReshapeTransposeMatMulOp2DXFloat): + def init_data_type(self): self.data_type_ = 'int8' class TestReshapeTransposeMatMulOp2DYFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 5, 10]).astype("float32").reshape( [10, 10]).transpose([1, 0]) @@ -387,11 +417,13 @@ class TestReshapeTransposeMatMulOp2DYFloat(TestReshapeTransposeMatMulOp): class TestReshapeTransposeMatMulOp2DYInt8(TestReshapeTransposeMatMulOp2DYFloat): + def init_data_type(self): self.data_type_ = 'int8' class TestReshapeTransposeMatMulOp3DXFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 2, 5, 5]).astype("float32") self.y = np.random.random([2, 2, 5, 5]).astype("float32").reshape( @@ -406,11 +438,13 @@ class TestReshapeTransposeMatMulOp3DXFloat(TestReshapeTransposeMatMulOp): class TestReshapeTransposeMatMulOp3DXInt8(TestReshapeTransposeMatMulOp3DXFloat): + def init_data_type(self): self.data_type_ = 'int8' class TestReshapeTransposeMatMulOp3DYFloat(TestReshapeTransposeMatMulOp): + def generate_data(self): self.x = np.random.random([2, 2, 5, 5]).astype(self.data_type_).reshape( [2, 10, 5]).transpose([0, 2, 1]) @@ -423,12 +457,14 @@ class TestReshapeTransposeMatMulOp3DYFloat(TestReshapeTransposeMatMulOp): class TestReshapeTransposeMatMulOp3DYInt8(TestReshapeTransposeMatMulOp3DYFloat): + def init_data_type(self): self.data_type_ = 'int8' @skip_check_grad_ci(reason="Tests inference only optimization.") class TestMatMulOpTransposeReshapeEmptyFloat(OpTest): + def init_data_type(self): self.data_type_ = np.float32 @@ -479,12 +515,14 @@ class TestMatMulOpTransposeReshapeEmptyFloat(OpTest): class TestMatMulOpTransposeReshapeIntEmptyInt( TestMatMulOpTransposeReshapeEmptyFloat): + def init_data_type(self): self.data_type_ = np.int8 class TestMatMulOpTransposeReshapeBasicFloat( TestMatMulOpTransposeReshapeEmptyFloat): + def generate_data(self): self.bs = 8 self.x = np.random.random([self.bs, 12, 128, @@ -501,12 +539,14 @@ class TestMatMulOpTransposeReshapeBasicFloat( class TestMatMulOpTransposeReshapeBasicInt( TestMatMulOpTransposeReshapeBasicFloat): + def init_data_type(self): self.data_type_ = np.int8 class TestMatMulOpTransposeReshapeOtherDimFloat( TestMatMulOpTransposeReshapeBasicFloat): + def generate_data(self): self.bs = 11 self.x = np.random.random([self.bs, 12, 14, 18]).astype(self.data_type_) @@ -515,6 +555,7 @@ class TestMatMulOpTransposeReshapeOtherDimFloat( class TestMatMulOpTransposeReshapeOtherDimInt( TestMatMulOpTransposeReshapeOtherDimFloat): + def init_data_type(self): self.data_type_ = np.int8 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py index 69cee49c3ec..6f45da4e31e 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py @@ -59,6 +59,7 @@ def reference_matmul(X, Y, transpose_x=False, transpose_y=False): class TestMatMulV2VectorXVectorOneDNNOp(OpTest): + def config(self): self.x_shape = (100, ) self.y_shape = (100, ) @@ -102,6 +103,7 @@ class TestMatMulV2VectorXVectorOneDNNOp(OpTest): class TestMatMulV2VectorXMatrixTransposeYOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (100, ) self.y_shape = (1, 3, 2, 100) @@ -110,6 +112,7 @@ class TestMatMulV2VectorXMatrixTransposeYOneDNNOp( class TestMatMulV2VectorXMatrixOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (100, ) self.y_shape = (1, 1, 100, 2) @@ -119,6 +122,7 @@ class TestMatMulV2VectorXMatrixOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXVectorTransposeXOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 1, 100, 1) self.y_shape = (100, ) @@ -127,6 +131,7 @@ class TestMatMulV2MatrixXVectorTransposeXOneDNNOp( class TestMatMulV2MatrixXVectorOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 2, 1, 100) self.y_shape = (100, ) @@ -135,6 +140,7 @@ class TestMatMulV2MatrixXVectorOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrixOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 1, 2, 100) self.y_shape = (1, 1, 100, 1) @@ -144,6 +150,7 @@ class TestMatMulV2MatrixXMatrixOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrixTransposeYOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 1, 1, 100) self.y_shape = (2, 1, 2, 100) @@ -152,6 +159,7 @@ class TestMatMulV2MatrixXMatrixTransposeYOneDNNOp( class TestMatMulV2MatrixXMatrix2OneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (2, 1, 12, 9) self.y_shape = (1, 3, 9, 12) @@ -160,6 +168,7 @@ class TestMatMulV2MatrixXMatrix2OneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrix3OneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (2, 1, 2, 100) self.y_shape = (1, 1, 100, 2) @@ -169,6 +178,7 @@ class TestMatMulV2MatrixXMatrix3OneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrixTranposeXOneDNNOp2( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (2, 1, 4, 25) self.y_shape = (1, 1, 4, 25) @@ -178,6 +188,7 @@ class TestMatMulV2MatrixXMatrixTranposeXOneDNNOp2( class TestMatMulV2MatrixXMatrixTranposeX2OneDNNOp3( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (2, 2, 7, 4) self.y_shape = (2, 2, 7, 5) @@ -187,6 +198,7 @@ class TestMatMulV2MatrixXMatrixTranposeX2OneDNNOp3( class TestMatMulV2MatrixXMatrixTransposeX3OneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (3, 1, 6, 7) self.y_shape = (1, 2, 6, 9) @@ -195,6 +207,7 @@ class TestMatMulV2MatrixXMatrixTransposeX3OneDNNOp( class TestMatMulV2MatrixXMatrix4OneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (3, 1, 6, 6) self.y_shape = (1, 2, 6, 9) @@ -203,6 +216,7 @@ class TestMatMulV2MatrixXMatrix4OneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2VectorXMatrix5DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (100) self.y_shape = (1, 2, 2, 100, 2) @@ -211,6 +225,7 @@ class TestMatMulV2VectorXMatrix5DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2Matrix3DXVectorOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (2, 1, 100) self.y_shape = (100) @@ -220,6 +235,7 @@ class TestMatMulV2Matrix3DXVectorOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrixTransposeXTransposeYOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (3, 1, 10, 8) self.y_shape = (1, 2, 9, 10) @@ -229,6 +245,7 @@ class TestMatMulV2MatrixXMatrixTransposeXTransposeYOneDNNOp( class TestMatMulV2MatrixXMatrixTransposeY2OneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (3, 1, 10, 10) self.y_shape = (1, 2, 9, 10) @@ -238,6 +255,7 @@ class TestMatMulV2MatrixXMatrixTransposeY2OneDNNOp( class TestMatMulV2MatrixXMatrix5DTranposeYOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 3, 1, 10, 10) self.y_shape = (3, 1, 2, 9, 10) @@ -246,6 +264,7 @@ class TestMatMulV2MatrixXMatrix5DTranposeYOneDNNOp( class TestMatMulV2MatrixXMatrix6Dx2DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 1, 2, 1, 8, 9) self.y_shape = (9, 12) @@ -254,6 +273,7 @@ class TestMatMulV2MatrixXMatrix6Dx2DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrix2Dx5DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (20, 5) self.y_shape = (1, 2, 1, 5, 11) @@ -263,6 +283,7 @@ class TestMatMulV2MatrixXMatrix2Dx5DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): class TestMatMulV2MatrixXMatrix4Dx3DTransposeXOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (5, 4, 15, 10) self.y_shape = (1, 15, 20) @@ -272,6 +293,7 @@ class TestMatMulV2MatrixXMatrix4Dx3DTransposeXOneDNNOp( class TestMatMulV2MatrixXMatrix3Dx4DTransposeYOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (2, 10, 15) self.y_shape = (4, 2, 20, 15) @@ -281,6 +303,7 @@ class TestMatMulV2MatrixXMatrix3Dx4DTransposeYOneDNNOp( class TestMatMulV2MatrixXMatrix5Dx3DTransposeXTransposeYOneDNNOp( TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (4, 3, 2, 15, 10) self.y_shape = (1, 20, 15) @@ -289,6 +312,7 @@ class TestMatMulV2MatrixXMatrix5Dx3DTransposeXTransposeYOneDNNOp( class TestMatMulV2MatrixXMatrix3Dx4DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): + def config(self): self.x_shape = (1, 1, 32, 16) self.y_shape = (16, 16, 16) @@ -298,8 +322,10 @@ class TestMatMulV2MatrixXMatrix3Dx4DOneDNNOp(TestMatMulV2VectorXVectorOneDNNOp): # BF16 TESTS def create_bf16_test_class(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestMatMulV2Bf16OneDNNOp(parent): + def set_inputs(self, x, y): self.inputs = { 'X': convert_float_to_uint16(x), @@ -348,10 +374,10 @@ def create_bf16_test_class(parent): x_transpose_axes = self.shape_transpose_axes[self.x_fp32.ndim] y_transpose_axes = self.shape_transpose_axes[self.y_fp32.ndim] - x = np.transpose(self.x_fp32, x_transpose_axes) if self.attrs[ - 'trans_x'] is True else self.x_fp32 - y = np.transpose(self.y_fp32, y_transpose_axes) if self.attrs[ - 'trans_y'] is True else self.y_fp32 + x = np.transpose(self.x_fp32, x_transpose_axes + ) if self.attrs['trans_x'] is True else self.x_fp32 + y = np.transpose(self.y_fp32, y_transpose_axes + ) if self.attrs['trans_y'] is True else self.y_fp32 dout = np.matmul(x, y) @@ -383,15 +409,13 @@ def create_bf16_test_class(parent): if is_broadcast: x_reduce_axis = [] y_reduce_axis = [] - for index, ( - first, second - ) in enumerate(zip(x_shape[0:-2], self.dx.shape[0:-2])): + for index, (first, second) in enumerate( + zip(x_shape[0:-2], self.dx.shape[0:-2])): if first != second: x_reduce_axis.append(index) - for index, ( - first, second - ) in enumerate(zip(y_shape[0:-2], self.dy.shape[0:-2])): + for index, (first, second) in enumerate( + zip(y_shape[0:-2], self.dy.shape[0:-2])): if first != second: y_reduce_axis.append(index) @@ -438,23 +462,27 @@ create_bf16_test_class(TestMatMulV2MatrixXMatrix2Dx5DOneDNNOp) class TestMatMulV2OpTransposeReshapeEmptyFloat( TestMatMulOpTransposeReshapeEmptyFloat): + def set_op_type(self): self.op_type = "matmul_v2" class TestMatMulV2OpTransposeReshapeBasicFloat( TestMatMulOpTransposeReshapeBasicFloat): + def set_op_type(self): self.op_type = "matmul_v2" class TestMatMulV2OpTransposeReshapeOtherDimFloat( TestMatMulOpTransposeReshapeOtherDimFloat): + def set_op_type(self): self.op_type = "matmul_v2" class TestMatMulV2OpReshapeTranspose(TestReshapeTransposeMatMulOp): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -462,6 +490,7 @@ class TestMatMulV2OpReshapeTranspose(TestReshapeTransposeMatMulOp): class TestMatMulV2OpReshapeTranspose4DXFloat( TestReshapeTransposeMatMulOp4DXFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -469,6 +498,7 @@ class TestMatMulV2OpReshapeTranspose4DXFloat( class TestMatMulV2OpReshapeTranspose4DYFloat( TestReshapeTransposeMatMulOp4DYFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -476,6 +506,7 @@ class TestMatMulV2OpReshapeTranspose4DYFloat( class TestMatMulV2OpReshapeTranspose4DXYFloat( TestReshapeTransposeMatMulOp4DXYFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -483,6 +514,7 @@ class TestMatMulV2OpReshapeTranspose4DXYFloat( class TestMatMulV2OpReshapeTranspose2DXFloat( TestReshapeTransposeMatMulOp2DXFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -490,6 +522,7 @@ class TestMatMulV2OpReshapeTranspose2DXFloat( class TestMatMulV2OpReshapeTranspose2DYFloat( TestReshapeTransposeMatMulOp2DYFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -497,6 +530,7 @@ class TestMatMulV2OpReshapeTranspose2DYFloat( class TestMatMulV2OpReshapeTranspose3DXFloat( TestReshapeTransposeMatMulOp3DXFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" @@ -504,6 +538,7 @@ class TestMatMulV2OpReshapeTranspose3DXFloat( class TestMatMulV2OpReshapeTranspose3DYFloat( TestReshapeTransposeMatMulOp3DYFloat): + def set_op_type_and_transpose_y_name(self): self.op_type = "matmul_v2" self.transpose_y_name = "trans_y" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py index 9265d5f7edf..67d06e7b22c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py @@ -25,9 +25,11 @@ from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci @skip_check_grad_ci( - reason="mul_mkldnn_op does not implement grad operator, check_grad is not required." + reason= + "mul_mkldnn_op does not implement grad operator, check_grad is not required." ) class TestMKLDNNMulOpS8S8(OpTest): + def setUp(self): self.op_type = "mul" self.init_kernel_type() @@ -78,8 +80,9 @@ class TestMKLDNNMulOpS8S8(OpTest): def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output_with_place( - core.CPUPlace(), atol=0, check_dygraph=False) + self.check_output_with_place(core.CPUPlace(), + atol=0, + check_dygraph=False) ''' @@ -88,6 +91,7 @@ class TestMKLDNNMulOpS8S8(OpTest): class TestMKLDNNMulOpS8U8(TestMKLDNNMulOpS8S8): + def init_data_type(self): self.srctype = np.uint8 self.dsttype = np.float32 if self.force_fp32 else np.int8 @@ -99,6 +103,7 @@ class TestMKLDNNMulOpS8U8(TestMKLDNNMulOpS8S8): class TestMKLDNNMulOpS8S8WithFlatten(TestMKLDNNMulOpS8S8): + def setUp(self): self.op_type = "mul" self.init_kernel_type() @@ -154,6 +159,7 @@ class TestMKLDNNMulOpS8S8WithFlatten(TestMKLDNNMulOpS8S8): class TestMKLDNNMulOpS8U8WithFlatten(TestMKLDNNMulOpS8S8WithFlatten): + def init_data_type(self): self.srctype = np.uint8 self.dsttype = np.float32 if self.force_fp32 else np.int8 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_mkldnn_op.py index a0581d79120..f4e7bd78e23 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 @OpTestTool.skip_if_not_cpu_bf16() class TestMulOneDNNOp(OpTest): + def setUp(self): self.op_type = "mul" self.attrs = {'use_mkldnn': True} @@ -39,9 +40,8 @@ class TestMulOneDNNOp(OpTest): self.inputs = {'X': self.x, 'Y': self.y} - output = np.dot( - np.reshape(self.x_fp32, self.np_x_shape), - np.reshape(self.y_fp32, self.np_y_shape)) + output = np.dot(np.reshape(self.x_fp32, self.np_x_shape), + np.reshape(self.y_fp32, self.np_y_shape)) self.outputs = {'Out': np.reshape(output, self.out_shape)} def init_shapes_and_attrs(self): @@ -70,6 +70,7 @@ class TestMulOneDNNOp(OpTest): class TestMulXNumColDims2OneDNNOp(TestMulOneDNNOp): + def init_shapes_and_attrs(self): self.x_shape = (6, 7, 5) self.y_shape = (5, 21) @@ -83,6 +84,7 @@ class TestMulXNumColDims2OneDNNOp(TestMulOneDNNOp): class TestMulYNumColDims2OneDNNOp(TestMulOneDNNOp): + def init_shapes_and_attrs(self): self.x_shape = (20, 6) self.y_shape = (2, 3, 21) @@ -96,6 +98,7 @@ class TestMulYNumColDims2OneDNNOp(TestMulOneDNNOp): class TestMulYAndXNumColDims2OneDNNOp(TestMulOneDNNOp): + def init_shapes_and_attrs(self): self.x_shape = (10, 5, 6) self.y_shape = (2, 3, 21) @@ -110,6 +113,7 @@ class TestMulYAndXNumColDims2OneDNNOp(TestMulOneDNNOp): class TestMulBF16OneDNNOp(TestMulOneDNNOp): + def init_inputs_dtype(self): self.x = convert_float_to_uint16(self.x) self.y = convert_float_to_uint16(self.y) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_multi_gru_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_multi_gru_mkldnn_op.py index 04941ef22ac..4c117129475 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_multi_gru_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_multi_gru_mkldnn_op.py @@ -42,6 +42,7 @@ def multi_gru( class TestMultiGruMkldnnOp(OpTest): + def set_confs(self): pass @@ -96,9 +97,8 @@ class TestMultiGruMkldnnOp(OpTest): wx.append(np.random.rand(IC, 3 * OC).astype('float32')) wh.append(np.random.rand(OC, 3 * OC).astype('float32')) bias.append( - np.random.rand(1, 3 * OC).astype('float32') - if self.with_bias else np.zeros( - (1, 3 * OC), dtype='float32')) + np.random.rand(1, 3 * OC).astype('float32') if self. + with_bias else np.zeros((1, 3 * OC), dtype='float32')) h0.append(np.zeros((N, OC), dtype='float32')) self.inputs['WeightX'] = [('wx' + str(i), wx[i]) @@ -116,20 +116,20 @@ class TestMultiGruMkldnnOp(OpTest): OC = self.OCs[layer] for j in range(2): scale_ur = s8_max / np.max(np.abs( - np.concatenate( - [ - wx[2 * layer + j][:, :2 * OC], wh[2 * layer + j] - .flatten()[:2 * OC * OC].reshape(OC, 2 * OC) - ], - axis=0)), + np.concatenate([ + wx[2 * layer + j][:, :2 * OC], + wh[2 * layer + j].flatten()[:2 * OC * OC].reshape( + OC, 2 * OC) + ], + axis=0)), axis=0) scale_o = s8_max / np.max(np.abs( - np.concatenate( - [ - wx[2 * layer + j][:, 2 * OC:], wh[2 * layer + j] - .flatten()[2 * OC * OC:].reshape(OC, OC) - ], - axis=0)), + np.concatenate([ + wx[2 * layer + j][:, 2 * OC:], + wh[2 * layer + j].flatten()[2 * OC * OC:].reshape( + OC, OC) + ], + axis=0)), axis=0) scale_weights.append( @@ -167,11 +167,13 @@ class TestMultiGruMkldnnOp(OpTest): class TestMultiGruMkldnnOpNoBias(TestMultiGruMkldnnOp): + def set_confs(self): self.with_bias = False class TestMultiGruMkldnnOpLayers2(TestMultiGruMkldnnOp): + def set_confs(self): self.layers = 2 self.ICs = [2, 6] @@ -179,6 +181,7 @@ class TestMultiGruMkldnnOpLayers2(TestMultiGruMkldnnOp): class TestMultiGruMkldnnOpLayers3(TestMultiGruMkldnnOp): + def set_confs(self): self.layers = 3 self.ICs = [2, 6, 12] @@ -186,60 +189,71 @@ class TestMultiGruMkldnnOpLayers3(TestMultiGruMkldnnOp): class TestMultiGruMkldnnOpOriginMode(TestMultiGruMkldnnOp): + def set_confs(self): self.origin_mode = True class TestMultiGruMkldnnInt8Op(TestMultiGruMkldnnOp): + def set_dtype(self): self.dtype = 'int8' class TestMultiGruMkldnnInt8OpForceFP32Output(TestMultiGruMkldnnInt8Op): + def set_force_fp32_output(self): self.force_fp32_output = True class TestMultiGruMkldnnInt8OpNoBias(TestMultiGruMkldnnOpNoBias): + def set_dtype(self): self.dtype = 'int8' class TestMultiGruMkldnnInt8OpNoBiasForceFP32Output( TestMultiGruMkldnnInt8OpNoBias): + def set_force_fp32_output(self): self.force_fp32_output = True class TestMultiGruMkldnnInt8OpLayers2(TestMultiGruMkldnnOpLayers2): + def set_dtype(self): self.dtype = 'int8' class TestMultiGruMkldnnInt8OpLayers2ForceFP32Output( TestMultiGruMkldnnInt8OpLayers2): + def set_force_fp32_output(self): self.force_fp32_output = True class TestMultiGruMkldnnInt8OpLayers3(TestMultiGruMkldnnOpLayers3): + def set_dtype(self): self.dtype = 'int8' class TestMultiGruMkldnnInt8OpLayers3ForceFP32Output( TestMultiGruMkldnnInt8OpLayers3): + def set_force_fp32_output(self): self.force_fp32_output = True class TestMultiGruMkldnnInt8OpOriginMode(TestMultiGruMkldnnOpOriginMode): + def set_dtype(self): self.dtype = 'int8' class TestMultiGruMkldnnInt8OpOriginModeForceFP32Output( TestMultiGruMkldnnInt8OpOriginMode): + def set_force_fp32_output(self): self.force_fp32_output = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py index a802ef4c612..f2d0dd9101e 100755 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py @@ -60,6 +60,7 @@ def nearest_neighbor_interp_mkldnn_np(X, @skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.") class TestNearestInterpMKLDNNOp(OpTest): + def init_test_case(self): pass @@ -105,9 +106,10 @@ class TestNearestInterpMKLDNNOp(OpTest): out_h = self.out_h out_w = self.out_w - output_np = nearest_neighbor_interp_mkldnn_np( - input_np, out_h, out_w, self.out_size, self.actual_shape, - self.data_layout) + output_np = nearest_neighbor_interp_mkldnn_np(input_np, out_h, out_w, + self.out_size, + self.actual_shape, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: @@ -129,6 +131,7 @@ class TestNearestInterpMKLDNNOp(OpTest): class TestNearestInterpOpMKLDNNNHWC(TestNearestInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 27 @@ -138,6 +141,7 @@ class TestNearestInterpOpMKLDNNNHWC(TestNearestInterpMKLDNNOp): class TestNearestNeighborInterpMKLDNNCase2(TestNearestInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 @@ -146,6 +150,7 @@ class TestNearestNeighborInterpMKLDNNCase2(TestNearestInterpMKLDNNOp): class TestNearestNeighborInterpCase3(TestNearestInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -154,6 +159,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpMKLDNNOp): class TestNearestNeighborInterpCase4(TestNearestInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -163,6 +169,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpMKLDNNOp): class TestNearestNeighborInterpSame(TestNearestInterpMKLDNNOp): + def init_test_case(self): self.input_shape = [2, 3, 32, 64] self.out_h = 32 @@ -171,15 +178,19 @@ class TestNearestNeighborInterpSame(TestNearestInterpMKLDNNOp): def create_test_class(parent): + class TestFp32Case(parent): + def init_data_type(self): self.dtype = np.float32 class TestInt8Case(parent): + def init_data_type(self): self.dtype = np.int8 class TestUint8Case(parent): + def init_data_type(self): self.dtype = np.uint8 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py index d72a1d53d3a..075792e3a51 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py @@ -61,6 +61,7 @@ def nearest_neighbor_interp_mkldnn_np(X, @skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.") @OpTestTool.skip_if_not_cpu_bf16() class TestNearestInterpV2MKLDNNOp(OpTest): + def init_test_case(self): pass @@ -120,9 +121,10 @@ class TestNearestInterpV2MKLDNNOp(OpTest): out_h = self.out_h out_w = self.out_w - output_np = nearest_neighbor_interp_mkldnn_np( - input_np, out_h, out_w, self.out_size, self.actual_shape, - self.data_layout) + output_np = nearest_neighbor_interp_mkldnn_np(input_np, out_h, out_w, + self.out_size, + self.actual_shape, + self.data_layout) if isinstance(self.scale, float): self.scale = [self.scale] @@ -150,6 +152,7 @@ class TestNearestInterpV2MKLDNNOp(OpTest): class TestNearestInterpOpV2MKLDNNNHWC(TestNearestInterpV2MKLDNNOp): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 27 @@ -159,6 +162,7 @@ class TestNearestInterpOpV2MKLDNNNHWC(TestNearestInterpV2MKLDNNOp): class TestNearestNeighborInterpV2MKLDNNCase2(TestNearestInterpV2MKLDNNOp): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 @@ -166,6 +170,7 @@ class TestNearestNeighborInterpV2MKLDNNCase2(TestNearestInterpV2MKLDNNOp): class TestNearestNeighborInterpV2MKLDNNCase3(TestNearestInterpV2MKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -174,6 +179,7 @@ class TestNearestNeighborInterpV2MKLDNNCase3(TestNearestInterpV2MKLDNNOp): class TestNearestNeighborInterpV2MKLDNNCase4(TestNearestInterpV2MKLDNNOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -183,6 +189,7 @@ class TestNearestNeighborInterpV2MKLDNNCase4(TestNearestInterpV2MKLDNNOp): class TestNearestNeighborInterpV2MKLDNNSame(TestNearestInterpV2MKLDNNOp): + def init_test_case(self): self.input_shape = [2, 3, 32, 64] self.out_h = 32 @@ -191,19 +198,24 @@ class TestNearestNeighborInterpV2MKLDNNSame(TestNearestInterpV2MKLDNNOp): def create_test_class(parent): + class TestFp32Case(parent): + def init_data_type(self): self.dtype = np.float32 class TestBf16Case(parent): + def init_data_type(self): self.dtype = np.uint16 class TestInt8Case(parent): + def init_data_type(self): self.dtype = np.int8 class TestUint8Case(parent): + def init_data_type(self): self.dtype = np.uint8 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_bf16_mkldnn_op.py index 5430c1598f8..794871ba5c1 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_bf16_mkldnn_op.py @@ -25,6 +25,7 @@ from paddle import enable_static @OpTestTool.skip_if_not_cpu_bf16() class TestPoolBf16MklDNNOpGrad(TestPool2D_Op_Mixin, OpTest): + def init_kernel_type(self): self.use_mkldnn = True @@ -36,10 +37,11 @@ class TestPoolBf16MklDNNOpGrad(TestPool2D_Op_Mixin, OpTest): self.attrs['mkldnn_data_type'] = "bfloat16" self.x_fp32 = np.random.random(self.shape).astype(np.float32) - output = self.pool2D_forward_naive( - self.x_fp32, self.ksize, self.strides, self.paddings, - self.global_pool, self.ceil_mode, self.exclusive, self.adaptive, - "float32").astype(np.float32) + output = self.pool2D_forward_naive(self.x_fp32, self.ksize, + self.strides, self.paddings, + self.global_pool, self.ceil_mode, + self.exclusive, self.adaptive, + "float32").astype(np.float32) self.inputs = {'X': convert_float_to_uint16(self.x_fp32)} self.outputs = {'Out': convert_float_to_uint16(output)} @@ -48,25 +50,27 @@ class TestPoolBf16MklDNNOpGrad(TestPool2D_Op_Mixin, OpTest): self.check_output_with_place(core.CPUPlace()) def test_check_grad(self): - x_grad = pool2d_backward_naive( - self.x_fp32, - ksize=self.ksize, - strides=self.strides, - paddings=self.paddings, - global_pool=self.global_pool, - ceil_mode=False, - exclusive=self.exclusive, - adaptive=self.adaptive, - data_format=self.data_format, - pool_type=self.pool_type, - padding_algorithm=self.padding_algorithm) + x_grad = pool2d_backward_naive(self.x_fp32, + ksize=self.ksize, + strides=self.strides, + paddings=self.paddings, + global_pool=self.global_pool, + ceil_mode=False, + exclusive=self.exclusive, + adaptive=self.adaptive, + data_format=self.data_format, + pool_type=self.pool_type, + padding_algorithm=self.padding_algorithm) x_grad = x_grad / np.prod(self.outputs['Out'].shape) - self.check_grad_with_place( - core.CPUPlace(), set(['X']), 'Out', user_defined_grads=[x_grad]) + self.check_grad_with_place(core.CPUPlace(), + set(['X']), + 'Out', + user_defined_grads=[x_grad]) @OpTestTool.skip_if_not_cpu_bf16() class TestPoolBf16MklDNNOp(TestPool2D_Op_Mixin, OpTest): + def init_kernel_type(self): self.use_mkldnn = True @@ -75,10 +79,11 @@ class TestPoolBf16MklDNNOp(TestPool2D_Op_Mixin, OpTest): self.dtype = np.uint16 input = np.random.random(self.shape).astype(np.float32) - output = (self.pool2D_forward_naive( - input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, - "float32")).astype(np.float32) + output = (self.pool2D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool, + self.ceil_mode, self.exclusive, + self.adaptive, + "float32")).astype(np.float32) self.inputs = {'X': convert_float_to_uint16(input)} self.outputs = {'Out': convert_float_to_uint16(output)} @@ -91,6 +96,7 @@ class TestPoolBf16MklDNNOp(TestPool2D_Op_Mixin, OpTest): class TestCase1Avg(TestPoolBf16MklDNNOp): + def init_test_case(self): self.shape = [2, 3, 7, 7] self.ksize = [3, 3] @@ -105,6 +111,7 @@ class TestCase1Avg(TestPoolBf16MklDNNOp): class TestCase2Avg(TestPoolBf16MklDNNOp): + def init_test_case(self): self.shape = [2, 3, 7, 7] self.ksize = [3, 3] @@ -119,24 +126,28 @@ class TestCase2Avg(TestPoolBf16MklDNNOp): class TestCase0Max(TestPoolBf16MklDNNOp): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase1Max(TestCase1Avg): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase2Max(TestCase2Avg): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase1PadZeroExclusiveAvgGrad(TestPoolBf16MklDNNOpGrad): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -155,23 +166,27 @@ class TestCase1PadZeroExclusiveAvgGrad(TestPoolBf16MklDNNOpGrad): class TestCase2PadOneNonExclusiveAvgGrad(TestCase1PadZeroExclusiveAvgGrad): + def init_exclusive(self): self.exclusive = False class TestCase0InitialMaxGrad(TestPoolBf16MklDNNOpGrad): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase1PadZeroExclusiveMaxGrad(TestCase1PadZeroExclusiveAvgGrad): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase2PadOneNonExclusiveMaxGrad(TestCase2PadOneNonExclusiveAvgGrad): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py index 639cb570a84..30bdbcbe78b 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, avg_pool2 class TestPool2DMKLDNNInt8_Op(TestPool2D_Op): + def init_kernel_type(self): self.use_mkldnn = True @@ -32,26 +33,29 @@ class TestPool2DMKLDNNInt8_Op(TestPool2D_Op): def setUp(self): TestPool2D_Op.setUp(self) - assert self.dtype in [np.int8, np.uint8 - ], 'Dtype should be int8 or uint8' + assert self.dtype in [np.int8, + np.uint8], 'Dtype should be int8 or uint8' input = np.random.randint(0, 100, self.shape).astype(self.dtype) - output = (self.pool2D_forward_naive( - input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, - self.dtype)).astype(self.dtype) + output = (self.pool2D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool, + self.ceil_mode, self.exclusive, + self.adaptive, + self.dtype)).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.outputs = {'Out': output} def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output_with_place( - core.CPUPlace(), atol=1e-5, check_dygraph=False) + self.check_output_with_place(core.CPUPlace(), + atol=1e-5, + check_dygraph=False) def test_check_grad(self): pass class TestCase1Avg(TestPool2DMKLDNNInt8_Op): + def init_test_case(self): self.shape = [2, 3, 7, 7] self.ksize = [3, 3] @@ -66,6 +70,7 @@ class TestCase1Avg(TestPool2DMKLDNNInt8_Op): class TestCase2Avg(TestPool2DMKLDNNInt8_Op): + def init_test_case(self): self.shape = [2, 3, 7, 7] self.ksize = [3, 3] @@ -80,29 +85,35 @@ class TestCase2Avg(TestPool2DMKLDNNInt8_Op): class TestCase0Max(TestPool2DMKLDNNInt8_Op): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase1Max(TestCase1Avg): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase2Max(TestCase2Avg): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive def create_test_s8_u8_class(parent): + class TestS8Case(parent): + def init_data_type(self): self.dtype = np.int8 class TestU8Case(parent): + def init_data_type(self): self.dtype = np.uint8 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py index 3f80bdc1651..6d39b27b1ae 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py @@ -20,7 +20,9 @@ from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1 def create_test_mkldnn_use_ceil_class(parent): + class TestMKLDNNPool2DUseCeilCase(parent): + def init_kernel_type(self): self.use_mkldnn = True @@ -41,7 +43,9 @@ create_test_mkldnn_use_ceil_class(TestCase2) def create_test_mkldnn_class(parent): + class TestMKLDNNCase(parent): + def init_kernel_type(self): self.use_mkldnn = True @@ -62,6 +66,7 @@ create_test_mkldnn_class(TestCase5) class TestAvgPoolAdaptive(TestPool2D_Op): + def init_adaptive(self): self.adaptive = True @@ -84,6 +89,7 @@ class TestAvgPoolAdaptive(TestPool2D_Op): class TestAvgPoolAdaptive2(TestAvgPoolAdaptive): + def init_test_case(self): self.ksize = [2, 3] self.strides = [1, 1] @@ -93,6 +99,7 @@ class TestAvgPoolAdaptive2(TestAvgPoolAdaptive): class TestAvgPoolAdaptive3(TestAvgPoolAdaptive): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -102,6 +109,7 @@ class TestAvgPoolAdaptive3(TestAvgPoolAdaptive): class TestAsymPad(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -127,68 +135,81 @@ class TestAsymPad(TestPool2D_Op): class TestAsymPadCase1(TestAsymPad): + def init_paddings(self): self.paddings = [1, 1, 0, 0] class TestAsymPadCase2(TestAsymPad): + def init_paddings(self): self.paddings = [1, 0, 1, 2] class TestAsymPadCase3(TestAsymPad): + def init_paddings(self): self.paddings = [1, 2, 1, 2] class TestAsymPadCase4(TestAsymPad): + def init_paddings(self): self.paddings = [1, 0, 1, 2] class TestAsymPadCase5(TestAsymPad): + def init_paddings(self): self.paddings = [2, 2, 1, 2] class TestAsymPadMaxCase1(TestAsymPadCase1): + def init_pool_type(self): self.pool_type = "max" class TestAsymPadMaxCase2(TestAsymPadCase2): + def init_pool_type(self): self.pool_type = "max" class TestAsymPadMaxCase3(TestAsymPadCase3): + def init_pool_type(self): self.pool_type = "max" class TestAsymPadMaxCase4(TestAsymPadCase4): + def init_pool_type(self): self.pool_type = "max" class TestAsymPadMaxCase5(TestAsymPadCase5): + def init_pool_type(self): self.pool_type = "max" class TestAsymPadSame(TestAsymPad): + def init_paddings(self): self.paddings = [0, 0] self.padding_algorithm = "SAME" class TestAsymPadValid(TestAsymPad): + def init_paddings(self): self.paddings = [0, 0, 0, 0] self.padding_algorithm = "VALID" class TestAsymPadValidNHWC(TestAsymPadValid): + def init_data_format(self): self.data_format = "NHWC" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py index 901aa200a37..ab6a4f4c06b 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py @@ -42,6 +42,7 @@ def ref_prelu(x, weight, mode): class TestPReluModeChannelOneDNNOp(OpTest): + def init_attrs(self): self.mode = "element" self.alpha = np.random.random((1, 4, 5, 5)).astype("float32") @@ -70,6 +71,7 @@ class TestPReluModeChannelOneDNNOp(OpTest): class TestPReluModeAllOneDNNOp(TestPReluModeChannelOneDNNOp): + def init_attrs(self): self.mode = "all" self.alpha = np.random.random((1, 1, 1, 1)).astype("float32") @@ -81,12 +83,14 @@ class TestPReluModeAllOneDNNOp(TestPReluModeChannelOneDNNOp): class TestPReluModeElementOneDNNOp(TestPReluModeChannelOneDNNOp): + def init_attrs(self): self.mode = "element" self.alpha = np.random.random((1, 4, 5, 5)).astype("float32") class TestPReluModeChannel3DOneDNNOp(TestPReluModeChannelOneDNNOp): + def init_attrs(self): self.mode = "channel" self.x = np.random.random((1, 100, 1)).astype("float32") @@ -94,6 +98,7 @@ class TestPReluModeChannel3DOneDNNOp(TestPReluModeChannelOneDNNOp): class TestPReluModeChannelAlpha1DOneDNNOp(TestPReluModeChannelOneDNNOp): + def init_attrs(self): self.mode = "channel" self.x = np.random.random((1, 100, 1)).astype("float32") @@ -101,6 +106,7 @@ class TestPReluModeChannelAlpha1DOneDNNOp(TestPReluModeChannelOneDNNOp): class TestPReluModeAllAlpha1DOneDNNOp(TestPReluModeAllOneDNNOp): + def init_attrs(self): self.mode = "channel" self.x = np.random.random((1, 1, 100)).astype("float32") @@ -109,8 +115,10 @@ class TestPReluModeAllAlpha1DOneDNNOp(TestPReluModeAllOneDNNOp): # BF16 TESTS def create_bf16_test_class(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestPReluBF16OneDNNOp(parent): + def set_inputs(self, ): self.inputs = { 'X': convert_float_to_uint16(self.x), diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py index c92d870565f..e0c28115d13 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py @@ -21,6 +21,7 @@ import paddle class TestQuantizeOp(OpTest): + def setUp(self): self.op_type = 'quantize' self.scale = 255.0 @@ -39,12 +40,12 @@ class TestQuantizeOp(OpTest): def prepare_input(self): if self.is_negative: # input data values are from interval [-1.0, 1.0) - self.input = (2 * np.random.random_sample(self.input_size) - 1 - ).astype('float32') + self.input = (2 * np.random.random_sample(self.input_size) - + 1).astype('float32') else: # input data values are from interval [0.0, 1.0) - self.input = ( - np.random.random_sample(self.input_size)).astype('float32') + self.input = (np.random.random_sample( + self.input_size)).astype('float32') self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)} self.attrs = { @@ -56,8 +57,8 @@ class TestQuantizeOp(OpTest): def prepare_output(self): input_data_type = 'int8' if self.is_negative else 'uint8' - output = np.rint(self.input * self.scale + self.shift).astype( - input_data_type) + output = np.rint(self.input * self.scale + + self.shift).astype(input_data_type) self.outputs = {'Output': output} def test_check_output(self): @@ -90,6 +91,7 @@ class TestQuantizeOp(OpTest): class TestQuantizeOp1(TestQuantizeOp): + def set_scale(self): self.scale = 127.0 @@ -98,6 +100,7 @@ class TestQuantizeOp1(TestQuantizeOp): class TestQuantizeOp2(TestQuantizeOp): + def set_scale(self): self.scale = 255.0 @@ -108,6 +111,7 @@ class TestQuantizeOp2(TestQuantizeOp): # 2-dim input # P - positive input class TestQuantizeOpShift_NCHW_2_P(TestQuantizeOp): + def set_output_format(self): self.output_format = 'NCHW' @@ -127,6 +131,7 @@ class TestQuantizeOpShift_NCHW_2_P(TestQuantizeOp): # 2-dim input # N - negative input class TestQuantizeOpShift_NCHW_2_N(TestQuantizeOpShift_NCHW_2_P): + def set_is_negative(self): self.is_nagative = True @@ -138,53 +143,63 @@ class TestQuantizeOpShift_NCHW_2_N(TestQuantizeOpShift_NCHW_2_P): class TestQuantizeOpShift_NHWC_2_P(TestQuantizeOpShift_NCHW_2_P): + def set_output_format(self): self.output_format = 'NHWC' class TestQuantizeOpShift_NHWC_2_N(TestQuantizeOpShift_NCHW_2_N): + def set_output_format(self): self.output_format = 'NHWC' # 3-dim input class TestQuantizeOpShift_NCHW_3_P(TestQuantizeOpShift_NCHW_2_P): + def set_input_size(self): self.input_size = [2, 3, 4] class TestQuantizeOpShift_NCHW_3_N(TestQuantizeOpShift_NCHW_2_N): + def set_input_size(self): self.input_size = [2, 3, 4] class TestQuantizeOpShift_NHWC_3_P(TestQuantizeOpShift_NCHW_3_P): + def set_output_format(self): self.output_format = 'NHWC' class TestQuantizeOpShift_NHWC_3_N(TestQuantizeOpShift_NCHW_3_N): + def set_output_format(self): self.output_format = 'NHWC' # 4-dim input class TestQuantizeOpShift_NCHW_4_P(TestQuantizeOpShift_NCHW_2_P): + def set_input_size(self): self.input_size = [2, 3, 4, 5] class TestQuantizeOpShift_NCHW_4_N(TestQuantizeOpShift_NCHW_2_N): + def set_input_size(self): self.input_size = [2, 3, 4, 5] class TestQuantizeOpShift_NHWC_4_P(TestQuantizeOpShift_NCHW_4_P): + def set_output_format(self): self.output_format = 'NHWC' class TestQuantizeOpShift_NHWC_4_N(TestQuantizeOpShift_NCHW_4_N): + def set_output_format(self): self.output_format = 'NHWC' diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py index d1a65767903..1176a805646 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py @@ -20,11 +20,13 @@ from paddle.fluid.tests.unittests.op_test import OpTestTool, OpTest, skip_check_ import paddle.fluid.core as core import paddle.fluid as fluid import paddle + paddle.enable_static() @OpTestTool.skip_if_not_cpu_bf16() class TestReduceSumDefaultBF16OneDNNOp(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -70,6 +72,7 @@ class TestReduceSumDefaultBF16OneDNNOp(OpTest): class TestReduceDefaultWithGradBF16OneDNNOp(TestReduceSumDefaultBF16OneDNNOp): + def test_check_grad(self): self.calculate_grads() self.check_grad_with_place( @@ -82,6 +85,7 @@ class TestReduceDefaultWithGradBF16OneDNNOp(TestReduceSumDefaultBF16OneDNNOp): class TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp( TestReduceDefaultWithGradBF16OneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -94,6 +98,7 @@ class TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp( class TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsBF16OneDNNOp( TestReduceDefaultWithGradBF16OneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -106,6 +111,7 @@ class TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsBF16OneDNNOp( class TestReduceSum5DReduceAllKeepDimsBF16OneDNNOp( TestReduceDefaultWithGradBF16OneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -116,8 +122,9 @@ class TestReduceSum5DReduceAllKeepDimsBF16OneDNNOp( self.outputs = {'Out': self.x_fp32.sum(keepdims=self.attrs['keep_dim'])} -class TestReduceSum4DReduceAllBF16OneDNNOp( - TestReduceDefaultWithGradBF16OneDNNOp): +class TestReduceSum4DReduceAllBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp + ): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -178,6 +185,7 @@ class TestReduceMin3DBF16OneDNNOp(TestReduceSumDefaultBF16OneDNNOp): class TestReduceMean3DBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp): + def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True @@ -189,6 +197,7 @@ class TestReduceMean3DBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp): class TestReduceMean4DBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp): + def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True @@ -197,7 +206,8 @@ class TestReduceMean4DBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp): self.inputs = {'X': self.x_bf16} self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [0, 1]} self.outputs = { - 'Out': self.x_fp32.sum(axis=tuple(self.attrs['dim'])) / + 'Out': + self.x_fp32.sum(axis=tuple(self.attrs['dim'])) / (self.x_fp32.shape[0] * self.x_fp32.shape[1]) } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py index 7b0bb706aec..23687aec9ef 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py @@ -20,6 +20,7 @@ import paddle class TestReduceSumDefaultOneDNNOp(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -32,11 +33,13 @@ class TestReduceSumDefaultOneDNNOp(OpTest): class TestReduceDefaultWithGradOneDNNOp(TestReduceSumDefaultOneDNNOp): + def test_check_grad(self): self.check_grad(['X'], 'Out') class TestReduceSum4DOneDNNOp(TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -49,6 +52,7 @@ class TestReduceSum4DOneDNNOp(TestReduceDefaultWithGradOneDNNOp): class TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp( TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -60,19 +64,22 @@ class TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp( class TestReduceSum5DKeepDimsOneDNNOp(TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True self.inputs = {'X': np.random.random((2, 5, 3, 2, 2)).astype("float32")} self.attrs = {'dim': (2, 3, 4), 'keep_dim': True, 'use_mkldnn': True} self.outputs = { - 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } -class TestReduceSum5DReduceAllKeepDimsOneDNNOp( - TestReduceDefaultWithGradOneDNNOp): +class TestReduceSum5DReduceAllKeepDimsOneDNNOp(TestReduceDefaultWithGradOneDNNOp + ): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -84,6 +91,7 @@ class TestReduceSum5DReduceAllKeepDimsOneDNNOp( class TestReduceSum4DReduceAllOneDNNOp(TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -95,6 +103,7 @@ class TestReduceSum4DReduceAllOneDNNOp(TestReduceDefaultWithGradOneDNNOp): @OpTestTool.skip_if_not_cpu() class TestReduceSum4DNoReduceSimpleCopyOneDNNOp( TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -153,6 +162,7 @@ class TestReduceMin3DOneDNNOp(TestReduceSumDefaultOneDNNOp): class TestReduceMean3DOneDNNOp(TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True @@ -164,6 +174,7 @@ class TestReduceMean3DOneDNNOp(TestReduceDefaultWithGradOneDNNOp): class TestReduceMean4DReduceAllOneDNNOp(TestReduceDefaultWithGradOneDNNOp): + def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py index 88aebac42e8..336ee80c1fc 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py @@ -23,6 +23,7 @@ from mkldnn_op_test import format_reorder class TestReQuantizeOp(OpTest): + def set_input_size(self): self.input_size = [1, 1, 10, 10] self.format_reorder = format_reorder @@ -44,12 +45,12 @@ class TestReQuantizeOp(OpTest): def prepare_input(self): if self.input_data_type == 'int8': # input data values are integers from interval [-128, 128) - self.input = (np.random.randint(0, 256, self.input_size) - 128 - ).astype(self.input_data_type) + self.input = (np.random.randint(0, 256, self.input_size) - + 128).astype(self.input_data_type) else: # input data values are integers from interval [0, 256) - self.input = (np.random.randint( - 0, 256, self.input_size)).astype(self.input_data_type) + self.input = (np.random.randint(0, 256, self.input_size)).astype( + self.input_data_type) self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)} self.attrs = { @@ -112,24 +113,28 @@ class TestReQuantizeOp(OpTest): class TestReQuantizeOp_S8_SameScales(TestReQuantizeOp): + def set_scales(self): self.scale_in = 127.0 self.scale_out = 127.0 class TestReQuantizeOp_S8_DifferentScales_1(TestReQuantizeOp): + def set_scales(self): self.scale_in = 127.0 self.scale_out = 100.0 class TestReQuantizeOp_S8_DifferentScales_2(TestReQuantizeOp): + def set_scales(self): self.scale_in = 100.0 self.scale_out = 127.0 class TestReQuantizeOp_S8_ZeroInputScale(TestReQuantizeOp): + def set_scales(self): self.scale_in = 0.0 self.scale_out = 127.0 @@ -144,6 +149,7 @@ class TestReQuantizeOp_S8_ZeroInputScale(TestReQuantizeOp): class TestReQuantizeOp_S8_ZeroOutputScale(TestReQuantizeOp): + def set_scales(self): self.scale_in = 127.0 self.scale_out = 0.0 @@ -161,18 +167,21 @@ class TestReQuantizeOp_S8_ZeroOutputScale(TestReQuantizeOp): class TestReQuantizeOp_U8_SameScales(TestReQuantizeOp_S8_SameScales): + def set_input_data_type(self): self.input_data_type = 'uint8' class TestReQuantizeOp_U8_DifferentScales_1( TestReQuantizeOp_S8_DifferentScales_1): + def set_input_data_type(self): self.input_data_type = 'uint8' class TestReQuantizeOp_U8_DifferentScales_2( TestReQuantizeOp_S8_DifferentScales_2): + def set_input_data_type(self): self.input_data_type = 'uint8' @@ -181,6 +190,7 @@ class TestReQuantizeOp_U8_DifferentScales_2( class TestReQuantizeOp_S8_WithShift(TestReQuantizeOp): + def set_scales(self): self.scale_in = 60.0 self.scale_out = 127.0 @@ -196,6 +206,7 @@ class TestReQuantizeOp_S8_WithShift(TestReQuantizeOp): class TestReQuantizeOp_S8_WithOutputShift(TestReQuantizeOp): + def set_scales(self): self.scale_in = 127.0 self.scale_out = 60.0 @@ -209,6 +220,7 @@ class TestReQuantizeOp_S8_WithOutputShift(TestReQuantizeOp): class TestReQuantizeOp_U8_SameScales_SameShift(TestReQuantizeOp_U8_SameScales): + def set_shifts(self): self.shift_in = 128.0 self.shift_out = 128.0 @@ -216,6 +228,7 @@ class TestReQuantizeOp_U8_SameScales_SameShift(TestReQuantizeOp_U8_SameScales): class TestReQuantizeOp_U8_SameScales_DifferentShift_1( TestReQuantizeOp_U8_SameScales): + def set_shifts(self): self.shift_in = 60.0 self.shift_out = 128.0 @@ -223,6 +236,7 @@ class TestReQuantizeOp_U8_SameScales_DifferentShift_1( class TestReQuantizeOp_U8_SameScales_DifferentShift_2( TestReQuantizeOp_U8_SameScales): + def set_shifts(self): self.shift_in = 128.0 self.shift_out = 60.0 @@ -230,6 +244,7 @@ class TestReQuantizeOp_U8_SameScales_DifferentShift_2( class TestReQuantizeOp_U8_DifferentScales_1_SameShift( TestReQuantizeOp_U8_DifferentScales_1): + def set_shifts(self): self.shift_in = 128.0 self.shift_out = 128.0 @@ -237,6 +252,7 @@ class TestReQuantizeOp_U8_DifferentScales_1_SameShift( class TestReQuantizeOp_U8_DifferentScales_2_SameShift( TestReQuantizeOp_U8_DifferentScales_2): + def set_shifts(self): self.shift_in = 128.0 self.shift_out = 128.0 @@ -244,6 +260,7 @@ class TestReQuantizeOp_U8_DifferentScales_2_SameShift( class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_1( TestReQuantizeOp_U8_DifferentScales_1): + def set_shifts(self): self.shift_in = 128.0 self.shift_out = 60.0 @@ -251,6 +268,7 @@ class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_1( class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_1( TestReQuantizeOp_U8_DifferentScales_2): + def set_shifts(self): self.shift_in = 128.0 self.shift_out = 60.0 @@ -258,6 +276,7 @@ class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_1( class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_2( TestReQuantizeOp_U8_DifferentScales_1): + def set_shifts(self): self.shift_in = 60.0 self.shift_out = 128.0 @@ -265,6 +284,7 @@ class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_2( class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_2( TestReQuantizeOp_U8_DifferentScales_2): + def set_shifts(self): self.shift_in = 60.0 self.shift_out = 128.0 @@ -274,6 +294,7 @@ class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_2( class TestReQuantizeOp_2DimFormat(TestReQuantizeOp): + def format_reorder_2Dim(self, out, size): return out @@ -286,6 +307,7 @@ class TestReQuantizeOp_2DimFormat(TestReQuantizeOp): class TestReQuantizeOpReused(TestReQuantizeOp): + def setUp(self): # self.input_size = [1, 1, 10, 10] self.input_size = [1, 1, 2, 2] @@ -317,18 +339,20 @@ class TestReQuantizeOpReused(TestReQuantizeOp): with fluid.program_guard(program): block = program.global_block() for name in variables: - block.create_var( - name=name, dtype="int8", shape=variables[name].shape) - block.append_op( - type="requantize", - inputs={'Input': block.var('input'), }, - outputs={"Output": block.var('output')}, - attrs={ - 'Scale_in': self.scale_in, - 'Scale_out': self.scale_out, - 'Shift_in': self.shift_in, - 'Shift_out': self.shift_out - }) + block.create_var(name=name, + dtype="int8", + shape=variables[name].shape) + block.append_op(type="requantize", + inputs={ + 'Input': block.var('input'), + }, + outputs={"Output": block.var('output')}, + attrs={ + 'Scale_in': self.scale_in, + 'Scale_out': self.scale_out, + 'Shift_in': self.shift_in, + 'Shift_out': self.shift_out + }) place = core.CPUPlace() exe = fluid.Executor(place) for i in range(2): @@ -336,15 +360,15 @@ class TestReQuantizeOpReused(TestReQuantizeOp): feed={'input': variables['input']}, fetch_list=['output']) - self.assertTrue( - np.allclose( - variables['output'], out[0], atol=1e-4), 'output') + self.assertTrue(np.allclose(variables['output'], out[0], atol=1e-4), + 'output') # ---------------test reused requantize op, no shift------------------------ class TestReQuantizeOpReused_WithShift(TestReQuantizeOpReused): + def set_input_data_type(self): self.input_data_type = 'uint8' diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py index ae844834154..fe335931361 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_bf16_op.py @@ -26,6 +26,7 @@ from paddle import enable_static @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestReshapeBf16Op(OpTest): + def setUp(self): self.op_type = "reshape2" self.use_mkldnn = False @@ -58,14 +59,14 @@ class TestReshapeBf16Op(OpTest): self.check_output_with_place(core.CPUPlace(), no_check_set=['XShape']) def test_check_grad(self): - self.check_grad_with_place( - core.CPUPlace(), ["X"], - "Out", - check_dygraph=False, - user_defined_grads=[self.input_data_fp32], - user_defined_grad_outputs=[ - self.inputs["X"].reshape(self.infered_shape) - ]) + self.check_grad_with_place(core.CPUPlace(), ["X"], + "Out", + check_dygraph=False, + user_defined_grads=[self.input_data_fp32], + user_defined_grad_outputs=[ + self.inputs["X"].reshape( + self.infered_shape) + ]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py index 78e5af3311b..828d190735a 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_flo @OpTestTool.skip_if(core.is_compiled_with_cuda(), "CUDA has to be skipped because it forces dygraph") class TestReshape2OneDNNOp(OpTest): + def setUp(self): self.init_data() self.set_op_type() @@ -64,6 +65,7 @@ class TestReshape2OneDNNOp(OpTest): class TestReshape2OneDNNOpDimInfer1(TestReshape2OneDNNOp): + def init_data(self): self.ori_shape = (5, 25) self.new_shape = (5, -1, 5) @@ -71,6 +73,7 @@ class TestReshape2OneDNNOpDimInfer1(TestReshape2OneDNNOp): class TestReshape2OneDNNOpDimInfer2(TestReshape2OneDNNOp): + def init_data(self): self.ori_shape = (6, 20) self.new_shape = (0, -1, 20) @@ -87,6 +90,7 @@ class TestReshape2OneDNNOpDimInfer2(TestReshape2OneDNNOp): class TestReshape2OneDNNOp_attr_OnlyShape(TestReshape2OneDNNOp): + def set_additional_inputs(self): self.inputs["Shape"] = np.array(self.new_shape, dtype="int32") @@ -107,6 +111,7 @@ class TestReshape2OneDNNOp_attr_OnlyShape(TestReshape2OneDNNOp): class TestReshape2OneDNNOpDimInfer1_attr_OnlyShape( TestReshape2OneDNNOp_attr_OnlyShape): + def init_data(self): self.ori_shape = (5, 20) self.new_shape = (5, -1, 10) @@ -115,6 +120,7 @@ class TestReshape2OneDNNOpDimInfer1_attr_OnlyShape( class TestReshape2OneDNNOpDimInfer1_attr_ShapeTensor(TestReshape2OneDNNOp): + def set_additional_inputs(self): shape_tensor = [] for index, ele in enumerate(self.new_shape): @@ -132,6 +138,7 @@ class TestReshape2OneDNNOpDimInfer1_attr_ShapeTensor(TestReshape2OneDNNOp): class TestReshape2OneDNNOpDimInfer1_attr_ShapeTensorAndShape( TestReshape2OneDNNOpDimInfer1_attr_ShapeTensor): + def set_additional_inputs(self): shape_tensor = [] for index, ele in enumerate(self.new_shape): @@ -143,6 +150,7 @@ class TestReshape2OneDNNOpDimInfer1_attr_ShapeTensorAndShape( class TestReshapeOneDNNOp(TestReshape2OneDNNOp): + def set_op_type(self): self.op_type = "reshape" @@ -154,6 +162,7 @@ class TestReshapeOneDNNOp(TestReshape2OneDNNOp): class TestReshapeOneDNNOpDimInfer1(TestReshapeOneDNNOp): + def init_data(self): self.ori_shape = (5, 25) self.new_shape = (5, -1, 5) @@ -161,6 +170,7 @@ class TestReshapeOneDNNOpDimInfer1(TestReshapeOneDNNOp): class TestReshapeOneDNNOp_attr_OnlyShape(TestReshape2OneDNNOp_attr_OnlyShape): + def set_op_type(self): self.op_type = "reshape" @@ -173,6 +183,7 @@ class TestReshapeOneDNNOp_attr_OnlyShape(TestReshape2OneDNNOp_attr_OnlyShape): class TestReshapeOneDNNOpDimInfer1_attr_OnlyShape( TestReshapeOneDNNOp_attr_OnlyShape): + def init_data(self): self.ori_shape = (5, 20) self.new_shape = (5, -1, 10) @@ -182,8 +193,10 @@ class TestReshapeOneDNNOpDimInfer1_attr_OnlyShape( # BF16 TESTS def create_reshape_bf16_test_classes(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestReshape2BF16OneDNNOp(parent): + def set_inputs(self): self.dtype = np.uint16 self.inputs = {"X": convert_float_to_uint16(self.x)} @@ -193,22 +206,22 @@ def create_reshape_bf16_test_classes(parent): self.dx = np.reshape(self.dout, self.ori_shape) def test_check_output(self): - self.check_output_with_place( - core.CPUPlace(), no_check_set=["XShape"]) + self.check_output_with_place(core.CPUPlace(), + no_check_set=["XShape"]) def test_check_grad(self): self.calculate_grads() - self.check_grad_with_place( - core.CPUPlace(), ["X"], - "Out", - user_defined_grads=[self.dx], - user_defined_grad_outputs=[self.dout]) + self.check_grad_with_place(core.CPUPlace(), ["X"], + "Out", + user_defined_grads=[self.dx], + user_defined_grad_outputs=[self.dout]) cls_name = "{0}_{1}".format(parent.__name__, "Reshape2_BF16") TestReshape2BF16OneDNNOp.__name__ = cls_name globals()[cls_name] = TestReshape2BF16OneDNNOp class TestReshapeBF16OneDNNOp(TestReshape2BF16OneDNNOp): + def set_op_type(self): self.dtype = np.uint16 self.op_type = "reshape" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_scale_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_scale_bf16_mkldnn_op.py index 8e9f989f06c..496d6c393a4 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_scale_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_scale_bf16_mkldnn_op.py @@ -27,6 +27,7 @@ import paddle.fluid.core as core @unittest.skipIf(core.is_compiled_with_cuda(), "core is compiled with CUDA which has no BF implementation") class TestScaleOpBF16(OpTest): + def setUp(self): self.op_type = "scale" self.x_fp32 = np.random.random((10, 10)).astype(np.float32) @@ -65,6 +66,7 @@ class TestScaleOpBF16(OpTest): class TestScaleOpBF16BiasNotAfterScale(TestScaleOpBF16): + def setUp(self): self.op_type = "scale" self.x_fp32 = np.random.random((10, 10)).astype(np.float32) @@ -84,6 +86,7 @@ class TestScaleOpBF16BiasNotAfterScale(TestScaleOpBF16): class TestScaleOpBF16ScaleTensor(TestScaleOpBF16): + def setUp(self): self.op_type = "scale" self.scale = -2.3 @@ -99,6 +102,7 @@ class TestScaleOpBF16ScaleTensor(TestScaleOpBF16): class TestScaleOpBF16ScaleTensorNotBiasAfterScale(TestScaleOpBF16): + def setUp(self): self.op_type = "scale" self.scale = 1.2 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_scale_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_scale_mkldnn_op.py index 528b55dcd87..50a5e917985 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_scale_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_scale_mkldnn_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestScaleOp(OpTest): + def setUp(self): self.op_type = "scale" self.inputs = {'X': np.random.random((10, 10)).astype(np.float32)} @@ -39,6 +40,7 @@ class TestScaleOp(OpTest): class TestScaleOpBiasNotAfterScale(OpTest): + def setUp(self): self.op_type = "scale" self.inputs = {'X': np.random.random((10, 10)).astype(np.float32)} @@ -61,6 +63,7 @@ class TestScaleOpBiasNotAfterScale(OpTest): class TestScaleOpScaleTensor(OpTest): + def setUp(self): self.op_type = "scale" self.scale = -2.3 @@ -79,6 +82,7 @@ class TestScaleOpScaleTensor(OpTest): class TestScaleOpScaleTensorNotBiasAfterScale(OpTest): + def setUp(self): self.op_type = "scale" self.scale = -1.2 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_shape_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_shape_mkldnn_op.py index 41e6344a0a1..44f2e30d4fc 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_shape_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_shape_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.op import Operator @OpTestTool.skip_if_not_cpu_bf16() class TestShape3DFP32OneDNNOp(OpTest): + def setUp(self): self.op_type = "shape" self.config() @@ -40,18 +41,21 @@ class TestShape3DFP32OneDNNOp(OpTest): class TestShape6DBF16OneDNNOp(TestShape3DFP32OneDNNOp): + def config(self): self.shape = [10, 2, 3, 4, 5, 2] self.dtype = np.uint16 class TestShape9DINT8OneDNNOp(TestShape3DFP32OneDNNOp): + def config(self): self.shape = [1, 2, 3, 4, 5, 6, 7, 8, 9] self.dtype = np.int8 class TestShape2DUINT8OneDNNOp(TestShape3DFP32OneDNNOp): + def config(self): self.shape = [7, 11] self.dtype = np.uint8 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_shuffle_channel_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_shuffle_channel_mkldnn_op.py index 1d657817503..edbd19285ca 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_shuffle_channel_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_shuffle_channel_mkldnn_op.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core @OpTestTool.skip_if_not_cpu_bf16() class TestShuffleChannelOneDNNOp(OpTest): + def setUp(self): self.op_type = "shuffle_channel" self.set_dtype() @@ -48,11 +49,13 @@ class TestShuffleChannelOneDNNOp(OpTest): class TestShuffleChannelSingleGroupOneDNNOp(TestShuffleChannelOneDNNOp): + def set_group(self): self.group = 1 class TestShuffleChannelBF16OneDNNOp(TestShuffleChannelOneDNNOp): + def set_dtype(self): self.dtype = np.uint16 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py index 443e4d90c3a..6b5bfe21550 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py @@ -26,6 +26,7 @@ import paddle @OpTestTool.skip_if(core.is_compiled_with_cuda(), "CUDA required dygraph so oneDNN UT must be skipped") class TestSliceOneDNNOp(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -62,6 +63,7 @@ class TestSliceOneDNNOp(OpTest): class TestSliceOneDNNOp1(TestSliceOneDNNOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-3, 0, 2] @@ -72,6 +74,7 @@ class TestSliceOneDNNOp1(TestSliceOneDNNOp): class TestSliceOneDNNOp2(TestSliceOneDNNOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-3, 0, 2] @@ -82,6 +85,7 @@ class TestSliceOneDNNOp2(TestSliceOneDNNOp): class TestSliceDecrease1AxisOneDNNOp(TestSliceOneDNNOp): + def set_attrs(self): self.attrs['decrease_axis'] = self.decrease_axis @@ -96,6 +100,7 @@ class TestSliceDecrease1AxisOneDNNOp(TestSliceOneDNNOp): class TestSliceDecrease2AxesOneDNNOp(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [1, 0, 2] @@ -107,6 +112,7 @@ class TestSliceDecrease2AxesOneDNNOp(TestSliceDecrease1AxisOneDNNOp): class TestSliceDecrease3AxesOneDNNOp(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-1, 0, 2] @@ -118,6 +124,7 @@ class TestSliceDecrease3AxesOneDNNOp(TestSliceDecrease1AxisOneDNNOp): class TestSliceDecrease4AxesOneDNNOp(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([3, 4, 5, 7]).astype("float32") self.starts = [0, 1, 2, 3] @@ -129,6 +136,7 @@ class TestSliceDecrease4AxesOneDNNOp(TestSliceDecrease1AxisOneDNNOp): class TestSlice5DOneDNNOp(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([3, 4, 5, 6, 7]).astype("float32") self.starts = [-1] @@ -140,6 +148,7 @@ class TestSlice5DOneDNNOp(TestSliceDecrease1AxisOneDNNOp): class TestSlice3DOneDNNOp(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([5, 4, 5]).astype("float32") self.starts = [-1] @@ -152,6 +161,7 @@ class TestSlice3DOneDNNOp(TestSliceDecrease1AxisOneDNNOp): class TestSliceOneDNNOp_decs_dim_starts_ListTensor( TestSliceDecrease1AxisOneDNNOp): + def set_inputs(self): starts_tensor = [] for index, ele in enumerate(self.starts): @@ -169,6 +179,7 @@ class TestSliceOneDNNOp_decs_dim_starts_ListTensor( class TestSlice4DInferDimsOneDNNOp(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([1, 1, 10, 10]).astype("float32") self.starts = [1, 2] @@ -180,6 +191,7 @@ class TestSlice4DInferDimsOneDNNOp(TestSliceDecrease1AxisOneDNNOp): class TestSlice4DInferDimsOneDNNOp2(TestSliceDecrease1AxisOneDNNOp): + def config(self): self.input = np.random.random([1, 1, 10, 10]).astype("float32") self.starts = [4, 2] @@ -192,8 +204,10 @@ class TestSlice4DInferDimsOneDNNOp2(TestSliceDecrease1AxisOneDNNOp): # BF16 TESTS def create_bf16_test_class(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestSliceBF16OneDNNOp(parent): + def set_inputs(self): self.dtype = np.uint16 self.inputs = {'Input': convert_float_to_uint16(self.input)} @@ -208,8 +222,8 @@ def create_bf16_test_class(parent): for i in range(len(self.axes)): begin[self.axes[i]] = self.starts[i] end[self.axes[i]] = self.ends[i] - self.dx[begin[0]:end[0], begin[1]:end[1], begin[2]:end[2], begin[3]: - end[3]] = self.dout + self.dx[begin[0]:end[0], begin[1]:end[1], begin[2]:end[2], + begin[3]:end[3]] = self.dout def test_check_output(self): self.check_output_with_place(core.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_bf16_mkldnn_op.py index e9b0cafd114..ca61f961b7a 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_bf16_mkldnn_op.py @@ -32,6 +32,7 @@ def stable_softmax(x): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestSoftmaxMKLDNNOp(TestSoftmaxOp): + def get_x_shape(self): return [10, 10] @@ -65,26 +66,31 @@ class TestSoftmaxMKLDNNOp(TestSoftmaxOp): class TestSoftmaxMKLDNNOp2(TestSoftmaxOp2): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp3(TestSoftmaxOp3): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp4(TestSoftmaxOp4): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp5(TestSoftmaxOp5): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp6(TestSoftmaxOp6): + def init_kernel_type(self): self.use_mkldnn = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py index 13c1883af61..ccd43d48baf 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py @@ -30,6 +30,7 @@ def stable_softmax(x): class TestSoftmaxMKLDNNOp(TestSoftmaxOp): + def get_x_shape(self): return [10, 10] @@ -69,46 +70,53 @@ class TestSoftmaxMKLDNNOp(TestSoftmaxOp): if self.use_cudnn or self.dtype == np.float16: place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ["X"], - "Out", - max_relative_error=0.01, - check_dygraph=False) + self.check_grad_with_place(place, ["X"], + "Out", + max_relative_error=0.01, + check_dygraph=False) else: - self.check_grad( - ["X"], "Out", max_relative_error=0.01, check_dygraph=False) + self.check_grad(["X"], + "Out", + max_relative_error=0.01, + check_dygraph=False) def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp2(TestSoftmaxOp2): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp3(TestSoftmaxOp3): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp4(TestSoftmaxOp4): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp5(TestSoftmaxOp5): + def init_kernel_type(self): self.use_mkldnn = True class TestSoftmaxMKLDNNOp6(TestSoftmaxOp6): + def init_kernel_type(self): self.use_mkldnn = True # Check if primitives already exist in backward class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase): + def setUp(self): super(TestSoftmaxMKLDNNPrimitivesAlreadyExist, self).setUp() @@ -124,8 +132,9 @@ class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase): return out * (out_grad - np.dot(out, out_grad)) def test_check(self): - check_if_mkldnn_primitives_exist_in_bwd( - self, self.op_type, self.x, self.out, self.out_grad, self.x_grad) + check_if_mkldnn_primitives_exist_in_bwd(self, self.op_type, self.x, + self.out, self.out_grad, + self.x_grad) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py index c2911114e49..23803ae2898 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py @@ -32,6 +32,7 @@ def ref_softplus(x, beta, threshold): @OpTestTool.skip_if_not_cpu_bf16() class TestSoftplusOneDNNOp(OpTest): + def setUp(self): self.op_type = "softplus" self.beta = 1 @@ -61,44 +62,52 @@ class TestSoftplusOneDNNOp(OpTest): class TestSoftplus4DOneDNNOp(TestSoftplusOneDNNOp): + def config(self): self.x_shape = (10, 5, 4, 2) class TestSoftplus6DOneDNNOp(TestSoftplusOneDNNOp): + def config(self): self.x_shape = (3, 2, 2, 5, 4, 2) class TestSoftplus6DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp): + def config(self): self.x_shape = (3, 5, 2, 5, 4, 2) self.beta = 2.5 class TestSoftplus3DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp): + def config(self): self.x_shape = (20, 4, 2) self.beta = 0.4 class TestSoftplusBF16OneDNNOp(TestSoftplusOneDNNOp): + def set_dtype(self): self.dtype = np.uint16 class TestSoftplus4DBF16OneDNNOp(TestSoftplus4DOneDNNOp): + def set_dtype(self): self.dtype = np.uint16 class TestSoftplus6DBF16OneDNNOp(TestSoftplus6DOneDNNOp): + def set_dtype(self): self.dtype = np.uint16 class TestSoftplus3DExtendedFunctorBF16OneDNNOp( TestSoftplus3DExtendedFunctorOneDNNOp): + def set_dtype(self): self.dtype = np.uint16 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py index 4cb559fc154..f6fbc460754 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py @@ -26,6 +26,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest @unittest.skipIf(core.is_compiled_with_cuda(), "core is compiled with CUDA which has no BF implementation") class TestSplitSectionsBF16OneDNNOp(OpTest): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("uint16") self.axis = 1 @@ -74,6 +75,7 @@ class TestSplitSectionsBF16OneDNNOp(OpTest): class TestSplitNumBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): + def init_data(self): self.x = np.random.random((4, 8, 5, 3)).astype("uint16") self.axis = 1 @@ -84,6 +86,7 @@ class TestSplitNumBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): class TestSplitNumAxisTensorBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("uint16") self.axis = None @@ -95,6 +98,7 @@ class TestSplitNumAxisTensorBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): class TestSplitSectionsTensorBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("uint16") self.axis = 1 @@ -109,6 +113,7 @@ class TestSplitSectionsTensorBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): class TestSplitOpUnknownSectionBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("uint16") self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py index 55b56434f3e..c7c4413da6c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py @@ -22,6 +22,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest class TestSplitSectionsOneDNNOp(OpTest): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("float32") self.axis = 1 @@ -60,6 +61,7 @@ class TestSplitSectionsOneDNNOp(OpTest): # test with attr(num) class TestSplitNumOneDNNOp(TestSplitSectionsOneDNNOp): + def init_data(self): self.x = np.random.random((4, 8, 5, 3)).astype("float32") self.axis = 1 @@ -73,6 +75,7 @@ class TestSplitNumOneDNNOp(TestSplitSectionsOneDNNOp): class TestSplitNumAxisTensorOneDNNOp(TestSplitSectionsOneDNNOp): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("float32") self.axis = None @@ -85,6 +88,7 @@ class TestSplitNumAxisTensorOneDNNOp(TestSplitSectionsOneDNNOp): # attr(sections) is list containing Tensor class TestSplitSectionsTensorOneDNNOp(TestSplitSectionsOneDNNOp): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("float32") self.axis = 1 @@ -99,6 +103,7 @@ class TestSplitSectionsTensorOneDNNOp(TestSplitSectionsOneDNNOp): class TestSplitOpUnknownSectionOneDNNOp(TestSplitSectionsOneDNNOp): + def init_data(self): self.x = np.random.random((4, 5, 6)).astype("float32") self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py index 489d8510380..61729178a92 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_flo @OpTestTool.skip_if(core.is_compiled_with_cuda(), "CUDA has to be skipped because it forces dygraph") class TestSqueeze2OneDNNOp(OpTest): + def set_op_type(self): self.op_type = "squeeze2" @@ -60,6 +61,7 @@ class TestSqueeze2OneDNNOp(OpTest): class TestSqueezeOneDNNOp(TestSqueeze2OneDNNOp): + def set_op_type(self): self.op_type = "squeeze" @@ -71,6 +73,7 @@ class TestSqueezeOneDNNOp(TestSqueeze2OneDNNOp): class TestSqueeze2OneDNNOp1(TestSqueeze2OneDNNOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = (0, -2) @@ -78,6 +81,7 @@ class TestSqueeze2OneDNNOp1(TestSqueeze2OneDNNOp): class TestSqueezeOneDNNOp1(TestSqueezeOneDNNOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = (0, -2) @@ -85,6 +89,7 @@ class TestSqueezeOneDNNOp1(TestSqueezeOneDNNOp): class TestSqueeze2OneDNNOp2(TestSqueeze2OneDNNOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () @@ -92,6 +97,7 @@ class TestSqueeze2OneDNNOp2(TestSqueeze2OneDNNOp): class TestSqueezeOneDNNOp2(TestSqueezeOneDNNOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () @@ -99,6 +105,7 @@ class TestSqueezeOneDNNOp2(TestSqueezeOneDNNOp): class TestSqueeze2OneDNNOp3(TestSqueeze2OneDNNOp): + def init_test_case(self): self.ori_shape = (25, 1, 1, 4, 1) self.axes = (1, -1) @@ -106,6 +113,7 @@ class TestSqueeze2OneDNNOp3(TestSqueeze2OneDNNOp): class TestSqueezeOneDNNOp3(TestSqueezeOneDNNOp): + def init_test_case(self): self.ori_shape = (25, 1, 1, 4, 1) self.axes = (1, -1) @@ -114,8 +122,10 @@ class TestSqueezeOneDNNOp3(TestSqueezeOneDNNOp): # BF16 TESTS def create_squeeze_bf16_test_classes(parent): + @OpTestTool.skip_if_not_cpu_bf16() class TestSqueeze2BF16OneDNNOp(parent): + def set_inputs(self): self.dtype = np.uint16 self.inputs = {"X": convert_float_to_uint16(self.x)} @@ -126,17 +136,17 @@ def create_squeeze_bf16_test_classes(parent): def test_check_grad(self): self.calculate_grads() - self.check_grad_with_place( - core.CPUPlace(), ["X"], - "Out", - user_defined_grads=[self.dx], - user_defined_grad_outputs=[self.dout]) + self.check_grad_with_place(core.CPUPlace(), ["X"], + "Out", + user_defined_grads=[self.dx], + user_defined_grad_outputs=[self.dout]) cls_name = "{0}_{1}".format(parent.__name__, "Squeeze2_BF16") TestSqueeze2BF16OneDNNOp.__name__ = cls_name globals()[cls_name] = TestSqueeze2BF16OneDNNOp class TestSqueezeBF16OneDNNOp(TestSqueeze2BF16OneDNNOp): + def set_op_type(self): self.dtype = np.uint16 self.op_type = "squeeze" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py index f7424014c21..432ceafcfd0 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core @OpTestTool.skip_if_not_cpu() class TestStack2DOneDNNOp(OpTest): + def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (2, 2) @@ -65,18 +66,21 @@ class TestStack2DOneDNNOp(OpTest): class TestStack1DOneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (100) self.axis = 0 class TestStack1DAxis1OneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (100) self.axis = 1 class TestStack2DAxisLastOneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (13, 24) self.num_inputs = 5 @@ -84,12 +88,14 @@ class TestStack2DAxisLastOneDNNOp(TestStack2DOneDNNOp): class TestStack3DAxisNegativeOneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (10, 128, 128) self.axis = -2 class TestStack3DOneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (10, 128, 128) self.num_inputs = 3 @@ -97,6 +103,7 @@ class TestStack3DOneDNNOp(TestStack2DOneDNNOp): class TestStack4DOneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (2, 2, 2, 2) self.num_inputs = 3 @@ -104,6 +111,7 @@ class TestStack4DOneDNNOp(TestStack2DOneDNNOp): class TestStack5DOneDNNOp(TestStack2DOneDNNOp): + def initParameters(self): self.input_dim = (2, 3, 4, 5, 6) self.num_inputs = 6 diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py index c71baad0c70..34c1c7bc249 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py @@ -26,6 +26,7 @@ import paddle.fluid.op as fluid_op @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestSumBF16MKLDNN(TestSumOp): + def setUp(self): self.op_type = "sum" self.use_mkldnn = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py index 1a87b1cea53..33d9af4e0e2 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py @@ -22,6 +22,7 @@ import paddle.fluid.op as fluid_op class TestSumMKLDNN(TestSumOp): + def setUp(self): self.op_type = "sum" self.init_data_type() @@ -47,6 +48,7 @@ class TestSumMKLDNN(TestSumOp): class TestMKLDNNSumInplaceOp(unittest.TestCase): + def setUp(self): self.op_type = "sum" self.init_data_type() @@ -70,15 +72,16 @@ class TestMKLDNNSumInplaceOp(unittest.TestCase): tensor = var.get_tensor() tensor.set(var_value, place) - sum_op = fluid_op.Operator( - "sum", X=["x0", "x1"], Out=out_var_name, use_mkldnn=True) + sum_op = fluid_op.Operator("sum", + X=["x0", "x1"], + Out=out_var_name, + use_mkldnn=True) expected_out = np.array(self.x0 + self.x1) sum_op.run(scope, place) out = scope.find_var("x0").get_tensor() out_array = np.array(out) self.assertTrue( - np.allclose( - expected_out, out_array, atol=1e-5), + np.allclose(expected_out, out_array, atol=1e-5), "Inplace sum_mkldnn_op output has diff with expected output") def test_check_grad(self): diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py index 72efa0aa99e..45f8aca4f98 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_bf16_mkldnn_op.py @@ -24,6 +24,7 @@ from paddle import enable_static @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestTransposeOp(OpTest): + def setUp(self): self.op_type = "transpose2" self.use_mkldnn = True @@ -57,6 +58,7 @@ class TestTransposeOp(OpTest): class TestBF16Case(TestTransposeOp): + def init_test_case(self): self.shape = (2, 4, 6, 8) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_int8_mkldnn_op.py index 6437226bf4c..756630913db 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_int8_mkldnn_op.py @@ -22,6 +22,7 @@ from mkldnn_op_test import format_reorder class TestTransposeOp(OpTest): + def setUp(self): self.init_op_type() self.initTestCase() @@ -49,27 +50,31 @@ class TestTransposeOp(OpTest): def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output_with_place( - core.CPUPlace(), 1e-5, no_check_set=['XShape'], check_dygraph=False) + self.check_output_with_place(core.CPUPlace(), + 1e-5, + no_check_set=['XShape'], + check_dygraph=False) def initTestCase(self): self.shape = (2, 3, 4, 5) def initInputData(self): - self.input_data = ( - np.random.randint(0, 100, self.shape) - 50).astype(np.int8) + self.input_data = (np.random.randint(0, 100, self.shape) - 50).astype( + np.int8) class TestINT8Case(TestTransposeOp): + def initTestCase(self): self.shape = (2, 4, 6, 8) def initInputData(self): - self.input_data = ( - np.random.randint(0, 100, self.shape) - 50).astype(np.int8) + self.input_data = (np.random.randint(0, 100, self.shape) - 50).astype( + np.int8) class TestUINT8Case(TestTransposeOp): + def initTestCase(self): self.shape = (1, 3, 5, 7) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py index 0d670898dd7..18573baa554 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py @@ -21,6 +21,7 @@ import numpy as np class TestTransposeMKLDNN(TestTransposeOp): + def setUp(self): self.init_op_type() self.initTestCase() @@ -53,36 +54,42 @@ class TestTransposeMKLDNN(TestTransposeOp): class TestCase0MKLDNN(TestTransposeMKLDNN): + def initTestCase(self): self.shape = (100, ) self.axis = (0, ) class TestCase1a(TestTransposeMKLDNN): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) class TestCase1b(TestTransposeMKLDNN): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (2, 1, 0) class TestCase2(TestTransposeMKLDNN): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) class TestCase3(TestTransposeMKLDNN): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) class TestCase4(TestTransposeMKLDNN): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) diff --git a/python/paddle/fluid/tests/unittests/mlu/c_comm_init_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/c_comm_init_op_mlu.py index e91f28e3b1d..1f343bb5321 100644 --- a/python/paddle/fluid/tests/unittests/mlu/c_comm_init_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/c_comm_init_op_mlu.py @@ -25,6 +25,7 @@ paddle.enable_static() class TestCCommInitOp(unittest.TestCase): + def setUp(self): self.endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS").split(',') self.current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") @@ -45,25 +46,23 @@ class TestCCommInitOp(unittest.TestCase): name=fluid.unique_name.generate('cncl_id'), persistable=True, type=fluid.core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_cncl_id', - inputs={}, - outputs={'Out': cncl_id_var}, - attrs={ - 'rank': self.rank, - 'endpoint': self.current_endpoint, - 'other_endpoints': self.other_endpoints - }) - block.append_op( - type='c_comm_init', - inputs={'X': cncl_id_var}, - outputs={}, - attrs={ - 'nranks': self.nranks, - 'rank': self.rank, - 'ring_id': 0, - 'device_id': self.mlu_id - }) + block.append_op(type='c_gen_cncl_id', + inputs={}, + outputs={'Out': cncl_id_var}, + attrs={ + 'rank': self.rank, + 'endpoint': self.current_endpoint, + 'other_endpoints': self.other_endpoints + }) + block.append_op(type='c_comm_init', + inputs={'X': cncl_id_var}, + outputs={}, + attrs={ + 'nranks': self.nranks, + 'rank': self.rank, + 'ring_id': 0, + 'device_id': self.mlu_id + }) self.exe.run(program) diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py index 50ae6b1a169..b30d055e5f4 100755 --- a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py @@ -39,14 +39,16 @@ paddle.enable_static() class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): tensor_list = [] - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.all_gather(tensor_list, tindata) return tensor_list diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py index f67b3fbcc6a..591376deb1b 100755 --- a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveAllgather(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,25 +46,26 @@ class TestCollectiveAllgather(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofallgather", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_allgather", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'nranks': nranks}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_allgather", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'nranks': nranks + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py index ebe4e71d22f..51df37d38d4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.all_reduce(tindata) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py index 404ed1235d2..c839e3213f4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py @@ -39,30 +39,30 @@ paddle.enable_static() class TestCollectiveAllreduce(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, col_type): ring_id = 0 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outof" + col_type, dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_" + col_type, - inputs={'X': tindata}, - attrs={'ring_id': ring_id}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_" + col_type, + inputs={'X': tindata}, + attrs={'ring_id': ring_id}, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py index 2002909ea2e..c608dcc6165 100644 --- a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype="float32") + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype="float32") paddle.distributed.broadcast(tindata, src=1) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py index 49bc6a6c4bb..21da1aaa656 100755 --- a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveBroadcast(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,25 +47,26 @@ class TestCollectiveBroadcast(TestCollectiveRunnerBase): ring_id = 0 rootid = 1 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofbroadcast", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_broadcast", - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'root': rootid}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_broadcast", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'root': rootid + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py index f987a71abda..b1824dcba8a 100644 --- a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py @@ -39,13 +39,15 @@ paddle.enable_static() class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') paddle.distributed.reduce(tindata, dst=0) return [tindata] diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py index 05fc17a5c7d..48352dca085 100644 --- a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py @@ -39,6 +39,7 @@ paddle.enable_static() class TestCollectiveReduce(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -46,25 +47,26 @@ class TestCollectiveReduce(TestCollectiveRunnerBase): ring_id = 0 rootid = 1 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outof" + col_type, dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_" + col_type, - inputs={'X': tindata}, - attrs={'ring_id': ring_id, - 'root_id': rootid}, - outputs={'Out': toutdata}) - main_prog.global_block().append_op( - type="c_sync_comm_stream", - inputs={'X': toutdata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id}) + main_prog.global_block().append_op(type="c_" + col_type, + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'root_id': rootid + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op(type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) return toutdata diff --git a/python/paddle/fluid/tests/unittests/mlu/multi_process_mlu.py b/python/paddle/fluid/tests/unittests/mlu/multi_process_mlu.py index 9ea550a8452..782475ff8cb 100644 --- a/python/paddle/fluid/tests/unittests/mlu/multi_process_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/multi_process_mlu.py @@ -45,7 +45,7 @@ def train_abort(prefix): if trainer_id == 0: try: - # train abort + # train abort exit(1) except SystemExit: name = "abort>>> selected_mlus:{} worker_endpoints:{} trainers_num:{} current_endpoint:{} trainer_id:{}"\ diff --git a/python/paddle/fluid/tests/unittests/mlu/test_abs_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_abs_op_mlu.py index 0c33bd6b1ad..65b2d6f1226 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_abs_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_abs_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest import paddle.fluid.core as core @@ -30,6 +31,7 @@ np.random.seed(10) class TestAbs(OpTest): + def setUp(self): self.op_type = "abs" self.set_mlu() @@ -56,11 +58,12 @@ class TestAbs(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], ['Out'], check_eager=False) + self.check_grad_with_place(self.place, ['X'], ['Out'], + check_eager=False) class TestAbsHalf(OpTest): + def setUp(self): self.op_type = "abs" self.set_mlu() @@ -87,8 +90,8 @@ class TestAbsHalf(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], ['Out'], check_eager=False) + self.check_grad_with_place(self.place, ['X'], ['Out'], + check_eager=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py index 5e5c4c9a301..e3754224b24 100755 --- a/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -27,6 +28,7 @@ paddle.enable_static() class TestAccuracyOp(OpTest): + def setUp(self): self.op_type = "accuracy" self.place = paddle.device.MLUPlace(0) @@ -58,6 +60,7 @@ class TestAccuracyOp(OpTest): class TestAccuracyOpFp16(TestAccuracyOp): + def init_dtype(self): self.dtype = np.float16 @@ -66,13 +69,15 @@ class TestAccuracyOpFp16(TestAccuracyOp): class TestAccuracyOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of accuracy_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.MLUPlace(0)) - label = fluid.layers.data( - name='label', shape=[-1, 1], dtype="int32") + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.MLUPlace(0)) + label = fluid.layers.data(name='label', + shape=[-1, 1], + dtype="int32") self.assertRaises(TypeError, fluid.layers.accuracy, x1, label) self.assertRaises(TypeError, paddle.metric.accuracy, x1, label) # The input dtype of accuracy_op must be float32 or float64. @@ -85,13 +90,17 @@ class TestAccuracyOpError(unittest.TestCase): class TestAccuracyAPI1(unittest.TestCase): + def setUp(self): - self.predictions = paddle.static.data( - shape=[2, 5], name="predictions", dtype="float32") - self.label = paddle.static.data( - shape=[2, 1], name="labels", dtype="int32") - self.result = paddle.static.accuracy( - input=self.predictions, label=self.label, k=1) + self.predictions = paddle.static.data(shape=[2, 5], + name="predictions", + dtype="float32") + self.label = paddle.static.data(shape=[2, 1], + name="labels", + dtype="int32") + self.result = paddle.static.accuracy(input=self.predictions, + label=self.label, + k=1) self.input_predictions = np.array( [[0.2, 0.1, 0.4, 0.1, 0.1], [0.2, 0.3, 0.1, 0.15, 0.25]], dtype="float32") @@ -109,6 +118,7 @@ class TestAccuracyAPI1(unittest.TestCase): class TestAccuracyAPI2(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): predictions = paddle.to_tensor( @@ -121,6 +131,7 @@ class TestAccuracyAPI2(unittest.TestCase): class TestAccuracyAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): predictions = paddle.to_tensor( diff --git a/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py index f30a391f653..4354883a442 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py @@ -15,6 +15,7 @@ import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2022 class TestAdam(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adam" @@ -78,6 +80,7 @@ class TestAdam(OpTest): class TestAdamWithEpsilonTensor(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adam" @@ -132,6 +135,7 @@ class TestAdamWithEpsilonTensor(OpTest): class TestAdamOpWithSkipUpdate(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adam" @@ -184,6 +188,7 @@ class TestAdamOpWithSkipUpdate(OpTest): class TestAdamOpWithGlobalBetaPow(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adam" @@ -241,6 +246,7 @@ class TestAdamOpWithGlobalBetaPow(OpTest): class TestNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -255,8 +261,9 @@ class TestNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -280,12 +287,13 @@ class TestNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py index d2827725a20..5c69cdb7409 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py @@ -15,6 +15,7 @@ import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2022 class TestAdamW(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adamw" @@ -84,6 +86,7 @@ class TestAdamW(OpTest): class TestAdamOpWithSkipUpdate(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adamw" @@ -136,6 +139,7 @@ class TestAdamOpWithSkipUpdate(OpTest): class TestAdamOpWithoutDecay(OpTest): + def setUp(self): self.set_mlu() self.op_type = "adamw" @@ -188,6 +192,7 @@ class TestAdamOpWithoutDecay(OpTest): class TestNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -202,8 +207,9 @@ class TestNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -227,12 +233,13 @@ class TestNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_amp_check_finite_and_scale_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_amp_check_finite_and_scale_op_mlu.py index 57fa56acd68..9e0fdbbd208 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_amp_check_finite_and_scale_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_amp_check_finite_and_scale_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ SEED = 2022 class TestCheckFiniteAndUnscaleOp(OpTest): + def setUp(self): self.set_mlu() self.op_type = "check_finite_and_unscale" @@ -54,6 +56,7 @@ class TestCheckFiniteAndUnscaleOp(OpTest): class TestCheckFiniteAndUnscaleOpWithNan(TestCheckFiniteAndUnscaleOp): + def init_test_case(self): x = np.random.random((129, 129)).astype(self.dtype) x[128][128] = np.nan @@ -66,12 +69,13 @@ class TestCheckFiniteAndUnscaleOpWithNan(TestCheckFiniteAndUnscaleOp): } def test_check_output(self): - # When input contains nan, do not check the output, + # When input contains nan, do not check the output, # since the output may be nondeterministic and will be discarded. self.check_output_with_place(self.place, no_check_set=['Out']) class TestCheckFiniteAndUnscaleOpWithInf(TestCheckFiniteAndUnscaleOp): + def init_test_case(self): x = np.random.random((129, 129)).astype(self.dtype) x[128][128] = np.inf @@ -84,12 +88,13 @@ class TestCheckFiniteAndUnscaleOpWithInf(TestCheckFiniteAndUnscaleOp): } def test_check_output(self): - # When input contains inf, do not check the output, + # When input contains inf, do not check the output, # since the output may be nondeterministic and will be discarded. self.check_output_with_place(self.place, no_check_set=['Out']) class TestCheckFiniteAndUnscaleOpMultiInput(TestCheckFiniteAndUnscaleOp): + def init_test_case(self): x0 = np.random.random((129, 129)).astype(self.dtype) x1 = np.random.random((129, 129)).astype(self.dtype) @@ -103,6 +108,7 @@ class TestCheckFiniteAndUnscaleOpMultiInput(TestCheckFiniteAndUnscaleOp): class TestCheckFiniteAndUnscaleOpMultiInputWithNan(TestCheckFiniteAndUnscaleOp): + def init_test_case(self): x0 = np.random.random((129, 129)).astype(self.dtype) x0[128][128] = np.nan @@ -116,12 +122,13 @@ class TestCheckFiniteAndUnscaleOpMultiInputWithNan(TestCheckFiniteAndUnscaleOp): } def test_check_output(self): - # When input contains inf, do not check the output, + # When input contains inf, do not check the output, # since the output may be nondeterministic and will be discarded. self.check_output_with_place(self.place, no_check_set=['Out']) class TestCheckFiniteAndUnscaleOpMultiInputWithInf(TestCheckFiniteAndUnscaleOp): + def init_test_case(self): x0 = np.random.random((129, 129)).astype(self.dtype) x0[128][128] = np.nan @@ -136,7 +143,7 @@ class TestCheckFiniteAndUnscaleOpMultiInputWithInf(TestCheckFiniteAndUnscaleOp): } def test_check_output(self): - # When input contains inf, do not check the output, + # When input contains inf, do not check the output, # since the output may be nondeterministic and will be discarded. self.check_output_with_place(self.place, no_check_set=['Out']) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_assign_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_assign_op_mlu.py index 85302ad76da..8aaba7b2580 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_assign_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_assign_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ SEED = 2022 class TestAssign(OpTest): + def setUp(self): self.set_mlu() self.op_type = "assign" diff --git a/python/paddle/fluid/tests/unittests/mlu/test_assign_value_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_assign_value_op_mlu.py index 5ee9d369e0f..1bf2504c863 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_assign_value_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_assign_value_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy import sys + sys.path.append("..") import op_test @@ -30,6 +31,7 @@ numpy.random.seed(2022) class TestAssignValueMLUOp(op_test.OpTest): + def setUp(self): self.set_mlu() self.op_type = "assign_value" @@ -55,21 +57,24 @@ class TestAssignValueMLUOp(op_test.OpTest): class TestAssignValueMLUOp2(TestAssignValueMLUOp): + def init_data(self): self.value = numpy.random.random(size=(2, 5)).astype(numpy.int32) self.attrs["int32_values"] = [int(v) for v in self.value.flat] class TestAssignValueMLUOp3(TestAssignValueMLUOp): + def init_data(self): self.value = numpy.random.random(size=(2, 5)).astype(numpy.int64) self.attrs["int64_values"] = [int(v) for v in self.value.flat] class TestAssignValueMLUOp4(TestAssignValueMLUOp): + def init_data(self): - self.value = numpy.random.choice( - a=[False, True], size=(2, 5)).astype(numpy.bool) + self.value = numpy.random.choice(a=[False, True], + size=(2, 5)).astype(numpy.bool) self.attrs["bool_values"] = [int(v) for v in self.value.flat] diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py index 4cbff21dfc4..86f044b9d3d 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core from paddle.fluid.op import Operator import paddle.fluid as fluid import sys + sys.path.append('..') from op_test import OpTest, _set_use_system_allocator from paddle.fluid.framework import grad_var_name @@ -159,9 +160,9 @@ def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format): x = np.transpose(x, (0, 2, 3, 1)) y_grad = np.transpose(y_grad, (0, 2, 3, 1)) - x_grad = scale * (y_grad - np.mean( - y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean( - y_grad * (x - mean), axis=(0, 1, 2)) / + x_grad = scale * (y_grad - np.mean(y_grad, axis=(0, 1, 2)) - + (x - mean) * np.mean(y_grad * + (x - mean), axis=(0, 1, 2)) / (var + epsilon)) / np.sqrt(var + epsilon) grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2)) @@ -189,6 +190,7 @@ def create_or_get_tensor(scope, var_name, var, place): def set_output_grad(scope, outputs, place, feed_dict=None): + def __set_tensor__(name, data=None): out_tensor = scope.find_var(name).get_tensor() grad_tensor = scope.var(grad_var_name(name)).get_tensor() @@ -210,6 +212,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None): class TestBatchNormOpInference(unittest.TestCase): + def setUp(self): self.dtype = np.float32 self.fuse_with_relu = False @@ -254,8 +257,8 @@ class TestBatchNormOpInference(unittest.TestCase): OpTest.np_dtype_to_fluid_dtype(x_val), place) scale_tensor = create_or_get_tensor( - scope, "scale_val", - OpTest.np_dtype_to_fluid_dtype(scale_val), place) + scope, "scale_val", OpTest.np_dtype_to_fluid_dtype(scale_val), + place) bias_tensor = create_or_get_tensor( scope, "bias_val", OpTest.np_dtype_to_fluid_dtype(bias_val), place) mean_tensor = create_or_get_tensor(scope, "mean", @@ -297,13 +300,12 @@ class TestBatchNormOpInference(unittest.TestCase): batch_norm_op.run(scope, place) # check inference result - self.__assert_close( - y_tensor, - y_out, - "inference output are different at " + str(place) + ", " + - data_layout + ", " + str(np.dtype(dtype)) + - str(np.array(y_tensor)) + str(y_out), - atol=1e-3) + self.__assert_close(y_tensor, + y_out, + "inference output are different at " + str(place) + + ", " + data_layout + ", " + str(np.dtype(dtype)) + + str(np.array(y_tensor)) + str(y_out), + atol=1e-3) def test_check_output(self): places = [core.CPUPlace()] @@ -321,6 +323,7 @@ class TestBatchNormOpInference(unittest.TestCase): class TestFP16BatchNormOpInference(TestBatchNormOpInference): + def setUp(self): self.dtype = np.float16 self.fuse_with_relu = False @@ -339,6 +342,7 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): class TestBatchNormOpTraining(unittest.TestCase): + def setUp(self): self.fuse_with_relu = False self.data_formats = ["NCHW", "NHWC"] @@ -368,8 +372,9 @@ class TestBatchNormOpTraining(unittest.TestCase): variance_out = var_ref * (1. - momentum) + momentum * variance saved_variance = 1. / np.sqrt(var_ref + epsilon) # run backward - x_grad, scale_grad, bias_grad = _reference_grad( - x, y_grad, scale, saved_mean, var_ref, epsilon, data_layout) + x_grad, scale_grad, bias_grad = _reference_grad(x, y_grad, scale, + saved_mean, var_ref, + epsilon, data_layout) return y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad @@ -385,6 +390,7 @@ class TestBatchNormOpTraining(unittest.TestCase): return mean, variance def test_forward_backward(self): + def test_with_place(place, data_layout, shape): # attr epsilon = self.epsilon @@ -423,10 +429,9 @@ class TestBatchNormOpTraining(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = { "X": block.var('x'), "Scale": block.var('scale'), @@ -456,11 +461,10 @@ class TestBatchNormOpTraining(unittest.TestCase): } block.create_var(name="reserve_space", dtype='float32') outputs["ReserveSpace"] = block.var('reserve_space') - bn_op = block.append_op( - type="batch_norm", - inputs=inputs, - outputs=outputs, - attrs=attrs) + bn_op = block.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc @@ -492,8 +496,10 @@ class TestBatchNormOpTraining(unittest.TestCase): for id, name in enumerate(self.fetch_list): if name == 'variance': - self.__assert_close( - var_dict[name], out[id], name, atol=1e-3) + self.__assert_close(var_dict[name], + out[id], + name, + atol=1e-3) continue self.__assert_close(var_dict[name], out[id], name) print("op test forward passed: ", str(place), data_layout) @@ -512,6 +518,7 @@ class TestBatchNormOpTraining(unittest.TestCase): class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -519,6 +526,7 @@ class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): class TestBatchNormOpTrainingCase2(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set() @@ -530,6 +538,7 @@ class TestBatchNormOpTrainingCase2(TestBatchNormOpTraining): class TestBatchNormOpTrainingCase3(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set(['x@GRAD']) @@ -537,6 +546,7 @@ class TestBatchNormOpTrainingCase3(TestBatchNormOpTraining): class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): + def init_test_case(self): self.use_momentum_variable = True self.use_global_stats = False @@ -548,6 +558,7 @@ class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = True self.no_grad_set = set() @@ -602,6 +613,7 @@ class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): class TestBatchNormOpFreezeStatsAndScaleBiasTraining( TestBatchNormOpFreezeStatsTraining): + def init_test_case(self): self.use_global_stats = True self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -609,11 +621,12 @@ class TestBatchNormOpFreezeStatsAndScaleBiasTraining( class TestBatchNormOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of batch_norm must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.batch_norm, x1) # the input dtype of batch_norm must be float16 or float32 or float64 @@ -623,12 +636,13 @@ class TestBatchNormOpError(unittest.TestCase): class TestDygraphBatchNormAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_norm = fluid.dygraph.BatchNorm(10) # the input of BatchNorm must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, batch_norm, x1) # the input dtype of BatchNorm must be float16 or float32 or float64 @@ -638,6 +652,7 @@ class TestDygraphBatchNormAPIError(unittest.TestCase): class TestDygraphBatchNormTrainableStats(unittest.TestCase): + def test_dygraph(self): places = [fluid.CPUPlace()] if core.is_compiled_with_mlu(): @@ -686,6 +701,7 @@ class TestDygraphBatchNormTrainableStats(unittest.TestCase): class TestDygraphBatchNormOpenReserveSpace(unittest.TestCase): + def test_reservespace(self): with program_guard(Program(), Program()): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py index 7dd9dcdee57..b0fec2bdd0f 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py @@ -19,6 +19,7 @@ import paddle.fluid.core as core from paddle.fluid.op import Operator import paddle.fluid as fluid import sys + sys.path.append("..") from op_test import OpTest, _set_use_system_allocator from paddle.fluid.framework import grad_var_name @@ -30,6 +31,7 @@ paddle.enable_static() class TestBatchNorm(unittest.TestCase): + def test_name(self): places = [fluid.CPUPlace()] if core.is_compiled_with_mlu(): @@ -124,8 +126,9 @@ class TestBatchNorm(unittest.TestCase): def compute_v4(x): with fluid.dygraph.guard(p): - bn = paddle.nn.BatchNorm2D( - shape[1], weight_attr=False, bias_attr=False) + bn = paddle.nn.BatchNorm2D(shape[1], + weight_attr=False, + bias_attr=False) y = bn(fluid.dygraph.to_variable(x)) return y.numpy() @@ -173,6 +176,7 @@ class TestBatchNorm(unittest.TestCase): class TestBatchNormChannelLast(unittest.TestCase): + def setUp(self): self.original_dtyep = paddle.get_default_dtype() paddle.set_default_dtype("float32") @@ -196,8 +200,7 @@ class TestBatchNormChannelLast(unittest.TestCase): y2 = net2(channel_first_x) y2 = paddle.transpose(y2, [0, 2, 1]) self.assertEqual( - np.allclose( - y1.numpy(), y2.numpy(), atol=1e-07), True) + np.allclose(y1.numpy(), y2.numpy(), atol=1e-07), True) def test_2d(self): for p in self.places: @@ -212,8 +215,7 @@ class TestBatchNormChannelLast(unittest.TestCase): y2 = net2(channel_first_x) y2 = paddle.transpose(y2, [0, 2, 3, 1]) self.assertEqual( - np.allclose( - y1.numpy(), y2.numpy(), atol=1e-07), True) + np.allclose(y1.numpy(), y2.numpy(), atol=1e-07), True) def test_3d(self): for p in self.places: @@ -228,8 +230,7 @@ class TestBatchNormChannelLast(unittest.TestCase): y2 = net2(channel_first_x) y2 = paddle.transpose(y2, [0, 2, 3, 4, 1]) self.assertEqual( - np.allclose( - y1.numpy(), y2.numpy(), atol=1e-07), True) + np.allclose(y1.numpy(), y2.numpy(), atol=1e-07), True) # res = np.allclose(y1.numpy(), y2.numpy()) # if res == False: # np.savetxt("./y1.txt", y1.numpy().flatten(), fmt='%.10f', delimiter='\n') @@ -238,6 +239,7 @@ class TestBatchNormChannelLast(unittest.TestCase): class TestBatchNormUseGlobalStats(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_mlu(): diff --git a/python/paddle/fluid/tests/unittests/mlu/test_cast_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_cast_op_mlu.py index 10356b124b2..6ba62b11499 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_cast_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_cast_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest @@ -29,6 +30,7 @@ paddle.enable_static() class TestCastOpFp32ToFp16(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float32')} @@ -47,6 +49,7 @@ class TestCastOpFp32ToFp16(OpTest): class TestCastOpFp16ToFp32(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float16')} @@ -64,6 +67,7 @@ class TestCastOpFp16ToFp32(OpTest): class TestCastOpInt32ToInt32(OpTest): + def setUp(self): ipt = np.random.randint(1000, size=(10, 10)) self.inputs = {'X': ipt.astype('int32')} @@ -81,6 +85,7 @@ class TestCastOpInt32ToInt32(OpTest): class TestCastOpInt32ToFp32(OpTest): + def setUp(self): ipt = np.random.randint(1000, size=[10, 10]) self.inputs = {'X': ipt.astype('int32')} @@ -98,6 +103,7 @@ class TestCastOpInt32ToFp32(OpTest): class TestCastOpInt16ToFp64(OpTest): + def setUp(self): ipt = np.random.randint(1000, size=[10, 10]) self.inputs = {'X': ipt.astype('int16')} @@ -115,11 +121,12 @@ class TestCastOpInt16ToFp64(OpTest): class TestCastOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of cast_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.MLUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.MLUPlace(0)) self.assertRaises(TypeError, fluid.layers.cast, x1, 'int32') diff --git a/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py index 854ac0b6826..72783e3ca19 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_coalesce_tensor_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest from paddle.fluid import core @@ -27,6 +28,7 @@ paddle.enable_static() class TestAllocContinuousSpace(OpTest): + def setUp(self): self.op_type = "coalesce_tensor" self.dtype, self.fluid_dtype = self.init_dtype() @@ -82,13 +84,13 @@ class TestAllocContinuousSpace(OpTest): return outputs, coalesce_tensor_var def test_check_output(self): - self.check_output_with_place( - place=paddle.device.MLUPlace(0), - no_check_set=["FusedOutput"], - atol=1e-5) + self.check_output_with_place(place=paddle.device.MLUPlace(0), + no_check_set=["FusedOutput"], + atol=1e-5) class TestAllocContinuousSpace2(TestAllocContinuousSpace): + def init_attr(self): return { "copy_data": False, @@ -99,10 +101,9 @@ class TestAllocContinuousSpace2(TestAllocContinuousSpace): } def test_check_output(self): - self.check_output_with_place( - place=paddle.device.MLUPlace(0), - no_check_set=["FusedOutput"], - atol=1e-5) + self.check_output_with_place(place=paddle.device.MLUPlace(0), + no_check_set=["FusedOutput"], + atol=1e-5) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather.py index 09166e15aac..65902811732 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCAllgatherOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather_api_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather_api_mlu.py index 576c310cc3a..be3dedefc59 100755 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather_api_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allgather_api_mlu.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveAllgatherAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_api_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_api_mlu.py index 447498b9022..8b3accc5050 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_api_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_api_mlu.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveAllreduceAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_max.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_max.py index bd04e6e2dc6..02901d2d511 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_max.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_max.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCAllreduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_min.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_min.py index 4b16146e2ee..b8bae97f4f9 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_min.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_min.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCAllreduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_prod.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_prod.py index 0c6ea566cfa..519715142fa 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_prod.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_prod.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCAllreduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_sum.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_sum.py index a7a3984f4e5..04ddff84f3d 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_sum.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_allreduce_sum.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCAllreduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_api_base_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_api_base_mlu.py index 9fae73a2540..04332b061f8 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_api_base_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_api_base_mlu.py @@ -41,6 +41,7 @@ def DataTypeCast(date_type): class TestCollectiveAPIRunnerBase(object): + def get_model(self, train_prog, startup_prog, rank, indata=None): raise NotImplementedError( "get model should be implemented by child class.") @@ -95,6 +96,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def setUp(self): self._port_set = set() self._trainers = 2 @@ -103,6 +105,7 @@ class TestDistBase(unittest.TestCase): self._python_interp = sys.executable def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -145,18 +148,16 @@ class TestDistBase(unittest.TestCase): tr1_cmd = tr_cmd % (self._python_interp, model_file) tr0_pipe = open("/tmp/tr0_err_%d.log" % os.getpid(), "w") tr1_pipe = open("/tmp/tr1_err_%d.log" % os.getpid(), "w") - #print(tr0_cmd) - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) - - tr1_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + #print(tr0_cmd) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) + + tr1_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() @@ -200,8 +201,8 @@ class TestDistBase(unittest.TestCase): required_envs["GLOG_v"] = "3" required_envs["GLOG_logtostderr"] = "1" required_envs["GLOO_LOG_LEVEL"] = "TRACE" - tr0_out, tr1_out, pid0, pid1 = self._run_cluster(model_file, - required_envs) + tr0_out, tr1_out, pid0, pid1 = self._run_cluster( + model_file, required_envs) np_data_type = DataTypeCast(data_type) np.random.seed(pid0) input1 = np.random.random((10, 1000)).astype(np_data_type) @@ -214,11 +215,9 @@ class TestDistBase(unittest.TestCase): elif col_type == "allreduce": need_result = input1 + input2 self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "reduce": need_result = input1 + input2 self.assertTrue(np.allclose(tr0_out, need_result)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_base_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_base_mlu.py index f63daaf66ac..4ec1e7f7528 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_base_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_base_mlu.py @@ -53,6 +53,7 @@ def DataTypeCast(date_type): class TestCollectiveRunnerBase(object): + def get_model(self, train_prog, startup_prog, col_type): raise NotImplementedError( "get model should be implemented by child class.") @@ -63,9 +64,8 @@ class TestCollectiveRunnerBase(object): not_ready_endpoints = [] for ep in endpoints: ip_port = ep.split(":") - with closing( - socket.socket(socket.AF_INET, - socket.SOCK_STREAM)) as sock: + with closing(socket.socket(socket.AF_INET, + socket.SOCK_STREAM)) as sock: sock.settimeout(2) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) if hasattr(socket, 'SO_REUSEPORT'): @@ -78,13 +78,14 @@ class TestCollectiveRunnerBase(object): not_ready_endpoints.append(ep) if not all_ok: sys.stderr.write("server not ready, wait 3 sec to retry...\n") - sys.stderr.write("not ready endpoints:" + str( - not_ready_endpoints) + "\n") + sys.stderr.write("not ready endpoints:" + + str(not_ready_endpoints) + "\n") sys.stderr.flush() time.sleep(3) else: break + #endpoints should be ["ip1:port1","ip2:port2"] def initCommunicator(self, program, rank, nranks, wait_port, @@ -94,30 +95,27 @@ class TestCollectiveRunnerBase(object): if rank == 0 and wait_port: self.wait_server_ready(other_endpoints) block = program.global_block() - cncl_id_var = block.create_var( - name=nameGen.generate('cncl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) - - block.append_op( - type='c_gen_cncl_id', - inputs={}, - outputs={'Out': cncl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) - - block.append_op( - type='c_comm_init', - inputs={'X': cncl_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': self.global_ring_id - }) + cncl_id_var = block.create_var(name=nameGen.generate('cncl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) + + block.append_op(type='c_gen_cncl_id', + inputs={}, + outputs={'Out': cncl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + + block.append_op(type='c_comm_init', + inputs={'X': cncl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': self.global_ring_id + }) def run_trainer(self, args): train_prog = fluid.Program() @@ -162,6 +160,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def setUp(self): self._port_set = set() self._trainers = 2 @@ -170,6 +169,7 @@ class TestDistBase(unittest.TestCase): self._python_interp = sys.executable def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -210,17 +210,15 @@ class TestDistBase(unittest.TestCase): tr0_pipe = open("/tmp/tr0_err.log", "wb") tr1_pipe = open("/tmp/tr1_err.log", "wb") - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) - tr1_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + tr1_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() @@ -252,8 +250,8 @@ class TestDistBase(unittest.TestCase): if check_error_log: required_envs["GLOG_v"] = "3" required_envs["GLOG_logtostderr"] = "1" - tr0_out, tr1_out, pid0, pid1 = self._run_cluster(model_file, - required_envs) + tr0_out, tr1_out, pid0, pid1 = self._run_cluster( + model_file, required_envs) np_data_type = DataTypeCast(data_type) np.random.seed(pid0) input1 = np.random.random((10, 1000)).astype(np_data_type) @@ -266,35 +264,27 @@ class TestDistBase(unittest.TestCase): elif col_type == "allreduce_sum": need_result = input1 + input2 self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "allreduce_prod": need_result = input1 * input2 self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "allreduce_max": need_result = np.maximum(input1, input2) self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "allreduce_min": need_result = np.minimum(input1, input2) self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "reduce_sum": need_result = input1 + input2 self.assertTrue(np.allclose(tr1_out, need_result)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast.py index d9f3aca0314..537f125e41b 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCBroadcastOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast_api_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast_api_mlu.py index 95919f33328..b85a37841e8 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast_api_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_broadcast_api_mlu.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveBroadcastAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_api_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_api_mlu.py index dc4b0993306..43a9728e543 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_api_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_api_mlu.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveReduceAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_max.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_max.py index 5da899c581f..e341c10dea0 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_max.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_max.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCReduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_min.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_min.py index 21fea55eff7..932e3a86846 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_min.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_min.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCReduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_prod.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_prod.py index 86d52a8c326..335979ee261 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_prod.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_prod.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCReduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_sum.py b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_sum.py index 7028a0f29e8..2b873f1266f 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_sum.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_sum.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestCReduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py index 87997acce02..ea3b39817e5 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,7 +26,9 @@ from paddle.fluid import Program, program_guard def create_test_class(op_type, typename, callback): + class Cls(OpTest): + def setUp(self): self.set_mlu() self.place = paddle.MLUPlace(0) @@ -76,18 +79,22 @@ def create_test_class(op_type, typename, callback): def test_broadcast_api_1(self): paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.static.data( - name='x', shape=[1, 2, 1, 3], dtype=typename) - y = paddle.static.data( - name='y', shape=[1, 2, 3], dtype=typename) + x = paddle.static.data(name='x', + shape=[1, 2, 1, 3], + dtype=typename) + y = paddle.static.data(name='y', + shape=[1, 2, 3], + dtype=typename) op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) input_x = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(typename) input_y = np.arange(0, 6).reshape((1, 2, 3)).astype(typename) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -95,18 +102,22 @@ def create_test_class(op_type, typename, callback): def test_broadcast_api_2(self): paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.static.data( - name='x', shape=[1, 2, 3], dtype=typename) - y = paddle.static.data( - name='y', shape=[1, 2, 1, 3], dtype=typename) + x = paddle.static.data(name='x', + shape=[1, 2, 3], + dtype=typename) + y = paddle.static.data(name='y', + shape=[1, 2, 1, 3], + dtype=typename) op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) input_x = np.arange(0, 6).reshape((1, 2, 3)).astype(typename) input_y = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(typename) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -122,8 +133,10 @@ def create_test_class(op_type, typename, callback): input_x = np.arange(0, 5).reshape((5)).astype(typename) input_y = np.array([5, 3, 2]).reshape((3, 1)).astype(typename) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py index ba37fcee154..d4ebe0d16ef 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_concat_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestConcatOp(OpTest): + def setUp(self): self.set_mlu() self.op_type = "concat" @@ -43,8 +45,8 @@ class TestConcatOp(OpTest): self.actual_axis = self.axis self.outputs = { - 'Out': np.concatenate( - (self.x0, self.x1, self.x2), axis=self.actual_axis) + 'Out': + np.concatenate((self.x0, self.x1, self.x2), axis=self.actual_axis) } def set_mlu(self): @@ -69,6 +71,7 @@ class TestConcatOp(OpTest): class TestConcatOp2(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) @@ -79,6 +82,7 @@ class TestConcatOp2(TestConcatOp): @skip_check_grad_ci( reason="The function 'check_grad' for large inputs is too slow.") class TestConcatOp3(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype) self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype) @@ -90,9 +94,11 @@ class TestConcatOp3(TestConcatOp): @skip_check_grad_ci( - reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." + reason= + "This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." ) class TestConcatOp4(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) @@ -104,6 +110,7 @@ class TestConcatOp4(TestConcatOp): class TestConcatOp5(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype) self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype) @@ -113,7 +120,9 @@ class TestConcatOp5(TestConcatOp): #----------------Concat Fp16---------------- def create_test_fp16(parent): + class TestConcatFp16(parent): + def init_dtype(self): self.dtype = np.float16 @@ -131,7 +140,9 @@ create_test_fp16(TestConcatOp5) #----------------Concat Int64---------------- def create_test_int64(parent): + class TestConcatInt64(parent): + def init_dtype(self): self.dtype = np.int64 @@ -152,7 +163,9 @@ create_test_int64(TestConcatOp5) #----------------Concat Int32---------------- def create_test_int32(parent): + class TestConcatInt32(parent): + def init_dtype(self): self.dtype = np.int32 @@ -173,7 +186,9 @@ create_test_int32(TestConcatOp5) #----------------Concat AxisTensor---------------- def create_test_AxisTensor(parent): + class TestConcatAxisTensor(parent): + def setUp(self): self.op_type = "concat" self.init_dtype() @@ -192,8 +207,9 @@ def create_test_AxisTensor(parent): self.actual_axis = self.axis self.outputs = { - 'Out': np.concatenate( - (self.x0, self.x1, self.x2), axis=self.actual_axis) + 'Out': + np.concatenate((self.x0, self.x1, self.x2), + axis=self.actual_axis) } self.place = paddle.device.MLUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_conv2d_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_conv2d_op_mlu.py index b09d892554b..79200ab572b 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_conv2d_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_conv2d_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle import paddle.fluid.core as core @@ -29,7 +30,9 @@ paddle.enable_static() def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NHWC" @@ -43,7 +46,9 @@ def create_test_channel_last_class(parent): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" @@ -54,7 +59,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" @@ -65,7 +72,9 @@ def create_test_padding_VALID_class(parent): def create_test_fp16_class(parent): + class TestFp16Case(parent): + def init_dtype(self): self.dtype = np.float16 @@ -75,6 +84,7 @@ def create_test_fp16_class(parent): class TestConv2DOp(OpTest): + def set_mlu(self): self.__class__.use_mlu = True self.place = paddle.device.MLUPlace(0) @@ -103,12 +113,11 @@ class TestConv2DOp(OpTest): input = np.random.random(self.input_size).astype(self.dtype) filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input, - filter, - self.groups, - conv2d_param, - data_format=self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input, + filter, + self.groups, + conv2d_param, + data_format=self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -130,31 +139,28 @@ class TestConv2DOp(OpTest): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def init_test_case(self): self.pad = [0, 0] @@ -172,6 +178,7 @@ class TestConv2DOp(OpTest): class TestWithPad(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -182,6 +189,7 @@ class TestWithPad(TestConv2DOp): class TestWithStride(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -192,6 +200,7 @@ class TestWithStride(TestConv2DOp): class TestWithGroup(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -203,6 +212,7 @@ class TestWithGroup(TestConv2DOp): class TestWith1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -213,12 +223,13 @@ class TestWith1x1(TestConv2DOp): def init_group(self): # FIXME: Supporting group = 3 in this case. - # NOTE(wangran16): There is an unknown error (acl error code is : 507015) + # NOTE(wangran16): There is an unknown error (acl error code is : 507015) # when group = 3, which needs to be fixed. self.groups = 1 class TestWithDepthWise5x5(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -232,6 +243,7 @@ class TestWithDepthWise5x5(TestConv2DOp): class TestWithDepthWise7x7(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -245,6 +257,7 @@ class TestWithDepthWise7x7(TestConv2DOp): class TestWithDilation(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -263,6 +276,7 @@ class TestWithDilation(TestConv2DOp): class TestWithInput1x1Filter1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -276,6 +290,7 @@ class TestWithInput1x1Filter1x1(TestConv2DOp): class TestConv2DOp_v2(OpTest): + def set_mlu(self): self.__class__.use_mlu = True self.place = paddle.device.MLUPlace(0) @@ -300,9 +315,10 @@ class TestConv2DOp_v2(OpTest): input = np.random.random(self.input_size).astype(self.dtype) filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input, filter, self.groups, conv2d_param, self.padding_algorithm, - self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input, filter, self.groups, + conv2d_param, + self.padding_algorithm, + self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -325,30 +341,27 @@ class TestConv2DOp_v2(OpTest): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.02, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.02, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def init_test_case(self): self.pad = [0, 0] @@ -379,12 +392,14 @@ class TestConv2DOp_v2(OpTest): class TestConv2DOp_AsyPadding(TestConv2DOp_v2): + def init_paddings(self): self.pad = [0, 0, 1, 2] self.padding_algorithm = "EXPLICIT" class TestWithPad_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -398,6 +413,7 @@ class TestWithPad_AsyPadding(TestConv2DOp_v2): class TestWithStride_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 3, 6, 6] # NCHW @@ -411,6 +427,7 @@ class TestWithStride_AsyPadding(TestConv2DOp_v2): class TestWithGroup_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 2] @@ -422,6 +439,7 @@ class TestWithGroup_AsyPadding(TestConv2DOp_v2): class TestWith1x1_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -438,6 +456,7 @@ class TestWith1x1_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [3, 4, 10, 10] # NCHW @@ -459,6 +478,7 @@ class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 4, 10, 10] # NCHW @@ -475,6 +495,7 @@ class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 8, 10, 10] # NCHW @@ -491,6 +512,7 @@ class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2): class TestWithDilation_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 10, 10] # NCHW @@ -512,6 +534,7 @@ class TestWithDilation_AsyPadding(TestConv2DOp_v2): class TestWithInput1x1Filter1x1_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [100, 1, 1, 1] # NCHW diff --git a/python/paddle/fluid/tests/unittests/mlu/test_cumsum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_cumsum_op_mlu.py index 5b7ce30728c..3ef23367eeb 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_cumsum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_cumsum_op_mlu.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestMLUCumSumOp(OpTest): + def setUp(self): self.op_type = "cumsum" self.set_mlu() @@ -49,17 +50,18 @@ class TestMLUCumSumOp(OpTest): class TestMLUCumSumOp2(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': -1, 'reverse': True} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.outputs = { - 'Out': np.flip( - np.flip( - self.inputs['X'], axis=2).cumsum(axis=2), axis=2) + 'Out': np.flip(np.flip(self.inputs['X'], axis=2).cumsum(axis=2), + axis=2) } class TestMLUCumSumOp3(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 1} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} @@ -67,6 +69,7 @@ class TestMLUCumSumOp3(TestMLUCumSumOp): class TestMLUCumSumOp4(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 0} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} @@ -74,98 +77,107 @@ class TestMLUCumSumOp4(TestMLUCumSumOp): class TestMLUCumSumOp5(TestMLUCumSumOp): + def init_testcase(self): self.inputs = {'X': np.random.random((5, 20)).astype(self.dtype)} self.outputs = {'Out': self.inputs['X'].cumsum(axis=1)} class TestMLUCumSumOp7(TestMLUCumSumOp): + def init_testcase(self): self.inputs = {'X': np.random.random((100)).astype(self.dtype)} self.outputs = {'Out': self.inputs['X'].cumsum(axis=0)} class TestNPUCumSumExclusive1(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 65)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive2(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((1, 1, 888)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive3(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 888)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive4(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((1, 1, 3049)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive5(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 3096)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumReverseExclusive(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'axis': 2, 'reverse': True, "exclusive": True} a = np.random.random((4, 5, 6)).astype(self.dtype) self.inputs = {'X': a} a = np.flip(a, axis=2) self.outputs = { - 'Out': np.concatenate( - (np.flip( - a[:, :, :-1].cumsum(axis=2), axis=2), np.zeros( - (4, 5, 1), dtype=self.dtype)), + 'Out': + np.concatenate( + (np.flip(a[:, :, :-1].cumsum(axis=2), + axis=2), np.zeros((4, 5, 1), dtype=self.dtype)), axis=2) } class TestNPUCumSumWithFlatten1(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'flatten': True} self.inputs = {'X': np.random.random((5, 6)).astype(self.dtype)} @@ -173,6 +185,7 @@ class TestNPUCumSumWithFlatten1(TestMLUCumSumOp): class TestNPUCumSumWithFlatten2(TestMLUCumSumOp): + def init_testcase(self): self.attrs = {'flatten': True} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} diff --git a/python/paddle/fluid/tests/unittests/mlu/test_dropout_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_dropout_op_mlu.py index f8984f5c6df..e9d172c8941 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_dropout_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_dropout_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -28,6 +29,7 @@ SEED = 2022 class TestDropoutOp(OpTest): + def setUp(self): self.op_type = "dropout" self.set_mlu() @@ -163,6 +165,7 @@ class TestDropoutOpInference(OpTest): @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestDropoutOpInference2(TestDropoutOpInference): + def setUp(self): self.op_type = "dropout" self.set_mlu() @@ -184,8 +187,7 @@ class TestDropoutOpWithSeed(TestDropoutOp): self.init_dtype() self.inputs = { "X": np.random.random((32, 64)).astype(self.dtype), - "Seed": np.asarray( - [125], dtype="int32") + "Seed": np.asarray([125], dtype="int32") } self.attrs = { 'dropout_prob': 0.0, @@ -210,6 +212,7 @@ class TestDropoutOpFp16(TestDropoutOp): class TestDropoutAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace(), paddle.device.MLUPlace(0)] @@ -217,36 +220,43 @@ class TestDropoutAPI(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data(name="input", shape=[40, 40], dtype="float32") - res1 = paddle.nn.functional.dropout( - x=input, p=0., training=False, mode='upscale_in_train') - res2 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=True, mode='upscale_in_train') - res3 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=False, mode='upscale_in_train') - res4 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=True, - mode='upscale_in_train') - res5 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=False, - mode='upscale_in_train') - res6 = paddle.nn.functional.dropout( - x=input, p=1., training=True, mode='upscale_in_train') + res1 = paddle.nn.functional.dropout(x=input, + p=0., + training=False, + mode='upscale_in_train') + res2 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=True, + mode='upscale_in_train') + res3 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=False, + mode='upscale_in_train') + res4 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=True, + mode='upscale_in_train') + res5 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=False, + mode='upscale_in_train') + res6 = paddle.nn.functional.dropout(x=input, + p=1., + training=True, + mode='upscale_in_train') res7 = paddle.fluid.layers.dropout( x=input, dropout_prob=0., dropout_implementation='upscale_in_train') - res8 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=(0, 1), - training=False, - mode='upscale_in_train') + res8 = paddle.nn.functional.dropout(x=input, + p=0., + axis=(0, 1), + training=False, + mode='upscale_in_train') in_np = np.random.random([40, 40]).astype("float32") res_np = in_np diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_add_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_add_op_mlu.py index 3dc711c7d75..2a0d74d5000 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_add_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_add_op_mlu.py @@ -18,6 +18,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append('..') from op_test import OpTest, skip_check_grad_ci import paddle.fluid as fluid @@ -27,6 +28,7 @@ paddle.enable_static() class TestElementwiseAddOp(OpTest): + def set_mlu(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -51,26 +53,25 @@ class TestElementwiseAddOp(OpTest): def test_check_grad_normal(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', max_relative_error=0.01) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=0.01) def test_check_grad_ingore_x(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - max_relative_error=0.01) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + max_relative_error=0.01) def test_check_grad_ingore_y(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set('Y'), - max_relative_error=0.01) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set('Y'), + max_relative_error=0.01) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) @@ -85,6 +86,7 @@ class TestElementwiseAddOp(OpTest): class TestFP16ElementwiseAddOp(TestElementwiseAddOp): + def init_dtype(self): self.dtype = np.float16 @@ -95,6 +97,7 @@ class TestFP16ElementwiseAddOp(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_scalar(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -104,6 +107,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -113,6 +117,7 @@ class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -122,6 +127,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -129,6 +135,7 @@ class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_Vector(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -136,6 +143,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -143,6 +151,7 @@ class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -153,6 +162,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -163,6 +173,7 @@ class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -173,6 +184,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -183,6 +195,7 @@ class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -190,6 +203,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -197,6 +211,7 @@ class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 1).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -207,6 +222,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -217,6 +233,7 @@ class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -227,6 +244,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -237,6 +255,7 @@ class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -244,6 +263,7 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_5(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -251,6 +271,7 @@ class TestFP16ElementwiseAddOp_broadcast_5(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -258,6 +279,7 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype) @@ -265,6 +287,7 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_6(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -272,6 +295,7 @@ class TestFP16ElementwiseAddOp_broadcast_6(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -282,6 +306,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -294,6 +319,7 @@ class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -306,6 +332,7 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -316,6 +343,7 @@ class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -326,6 +354,7 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -336,6 +365,7 @@ class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -346,6 +376,7 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -356,6 +387,7 @@ class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype) @@ -366,6 +398,7 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -376,6 +409,7 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 1, 12).astype(self.dtype) self.y = np.random.rand(10, 2, 12).astype(self.dtype) @@ -386,13 +420,14 @@ class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): class TestElementwiseAddOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.MLUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.MLUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.MLUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.MLUPlace(0)) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) # the input dtype of elementwise_add must be float16 or float32 @@ -402,6 +437,7 @@ class TestElementwiseAddOpError(unittest.TestCase): class TestAddApi(unittest.TestCase): + def _executed_api(self, x, y, name=None): return paddle.add(x, y, name) @@ -445,11 +481,13 @@ class TestAddApi(unittest.TestCase): class TestAddInplaceApi(TestAddApi): + def _executed_api(self, x, y, name=None): return x.add_(y, name) class TestAddInplaceBroadcastSuccess(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 4).astype('float32') self.y_numpy = np.random.rand(3, 4).astype('float32') @@ -466,18 +504,21 @@ class TestAddInplaceBroadcastSuccess(unittest.TestCase): class TestAddInplaceBroadcastSuccess2(TestAddInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(1, 2, 3, 1).astype('float32') self.y_numpy = np.random.rand(3, 1).astype('float32') class TestAddInplaceBroadcastSuccess3(TestAddInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 1, 5).astype('float32') self.y_numpy = np.random.rand(1, 3, 1, 5).astype('float32') class TestAddInplaceBroadcastError(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(3, 4).astype('float32') self.y_numpy = np.random.rand(2, 3, 4).astype('float32') @@ -496,18 +537,21 @@ class TestAddInplaceBroadcastError(unittest.TestCase): class TestAddInplaceBroadcastError2(TestAddInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(2, 1, 4).astype('float32') self.y_numpy = np.random.rand(2, 3, 4).astype('float32') class TestAddInplaceBroadcastError3(TestAddInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(5, 2, 1, 4).astype('float32') self.y_numpy = np.random.rand(2, 3, 4).astype('float32') class TestBoolAddFloatElementwiseAddop(unittest.TestCase): + def test_static_add(self): paddle.enable_static() a = 1.5 diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_div_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_div_op_mlu.py index 8fdac75c4c1..c3eadc341f3 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_div_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_div_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -28,6 +29,7 @@ SEED = 2022 class TestElementwiseDiv(OpTest): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -56,25 +58,25 @@ class TestElementwiseDiv(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', max_relative_error=0.05) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=0.05) def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - max_relative_error=0.05, - no_grad_set=set("X")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + max_relative_error=0.05, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=0.05, - no_grad_set=set("Y")) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.05, + no_grad_set=set("Y")) class TestElementwiseDivFp16(OpTest): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -107,6 +109,7 @@ class TestElementwiseDivFp16(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestTestElementwiseDiv_scalar(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -118,6 +121,7 @@ class TestTestElementwiseDiv_scalar(TestElementwiseDiv): class TestTestElementwiseDiv_Vector(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -129,6 +133,7 @@ class TestTestElementwiseDiv_Vector(TestElementwiseDiv): class TestTestElementwiseDiv_broadcast_0(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -139,12 +144,13 @@ class TestTestElementwiseDiv_broadcast_0(TestElementwiseDiv): self.attrs = {'axis': 0} self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + 'Out': np.divide(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestTestElementwiseDiv_broadcast_1(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -155,12 +161,13 @@ class TestTestElementwiseDiv_broadcast_1(TestElementwiseDiv): self.attrs = {'axis': 1} self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + 'Out': np.divide(self.inputs['X'], + self.inputs['Y'].reshape(1, 100, 1)) } class TestTestElementwiseDiv_broadcast_2(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -170,12 +177,13 @@ class TestTestElementwiseDiv_broadcast_2(TestElementwiseDiv): } self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + 'Out': np.divide(self.inputs['X'], + self.inputs['Y'].reshape(1, 1, 100)) } class TestTestElementwiseDiv_broadcast_3(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -192,6 +200,7 @@ class TestTestElementwiseDiv_broadcast_3(TestElementwiseDiv): class TestTestElementwiseDiv_broadcast_4(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -203,6 +212,7 @@ class TestTestElementwiseDiv_broadcast_4(TestElementwiseDiv): class TestTestElementwiseDiv_broadcast_5(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -214,6 +224,7 @@ class TestTestElementwiseDiv_broadcast_5(TestElementwiseDiv): class TestTestElementwiseDiv_commonuse_1(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -225,6 +236,7 @@ class TestTestElementwiseDiv_commonuse_1(TestElementwiseDiv): class TestTestElementwiseDiv_commonuse_2(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" @@ -236,6 +248,7 @@ class TestTestElementwiseDiv_commonuse_2(TestElementwiseDiv): class TestTestElementwiseDiv_xsize_lessthan_ysize(TestElementwiseDiv): + def setUp(self): self.set_mlu() self.op_type = "elementwise_div" diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py index bc8a08c39ff..dd7be15b812 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, compiler, program_guard from paddle.fluid.op import Operator import sys + sys.path.append('..') from op_test import OpTest, skip_check_grad_ci @@ -31,6 +32,7 @@ paddle.enable_static() class ElementwiseMulOp(OpTest): + def init_kernel_type(self): self.__class__.use_mlu = True self.place = paddle.device.MLUPlace(0) @@ -58,12 +60,14 @@ class ElementwiseMulOp(OpTest): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], 'Out', no_grad_set=set("X")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', no_grad_set=set('Y')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set('Y')) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) @@ -80,6 +84,7 @@ class ElementwiseMulOp(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMulOp_scalar(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -91,6 +96,7 @@ class TestElementwiseMulOp_scalar(ElementwiseMulOp): class TestElementwiseMulOp_Vector(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -102,6 +108,7 @@ class TestElementwiseMulOp_Vector(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -112,6 +119,7 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -127,6 +135,7 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -141,6 +150,7 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -156,6 +166,7 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -167,6 +178,7 @@ class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -178,11 +190,13 @@ class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): class TestElementwiseMulOpFp16(ElementwiseMulOp): + def init_dtype(self): self.dtype = np.float16 class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -194,6 +208,7 @@ class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -205,6 +220,7 @@ class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -221,13 +237,14 @@ class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): class TestElementwiseMulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_mul must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.elementwise_mul, x1, y1) # the input dtype of elementwise_mul must be float16 or float32 or int32 diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_sub_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_sub_op_mlu.py index 9ca5359e05f..a406317a96a 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_sub_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_sub_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -28,6 +29,7 @@ SEED = 2022 class TestElementwiseSubOp(OpTest): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -64,23 +66,22 @@ class TestElementwiseSubOp(OpTest): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseSubOp_scalar(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -92,6 +93,7 @@ class TestElementwiseSubOp_scalar(TestElementwiseSubOp): class TestElementwiseSubOp_Vector(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -103,6 +105,7 @@ class TestElementwiseSubOp_Vector(TestElementwiseSubOp): class TestElementwiseSubOp_broadcast_0(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -117,6 +120,7 @@ class TestElementwiseSubOp_broadcast_0(TestElementwiseSubOp): class TestElementwiseSubOp_broadcast_1(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -131,6 +135,7 @@ class TestElementwiseSubOp_broadcast_1(TestElementwiseSubOp): class TestElementwiseSubOp_broadcast_2(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -144,6 +149,7 @@ class TestElementwiseSubOp_broadcast_2(TestElementwiseSubOp): class TestElementwiseSubOp_broadcast_3(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -158,6 +164,7 @@ class TestElementwiseSubOp_broadcast_3(TestElementwiseSubOp): class TestElementwiseSubOp_broadcast_4(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -169,6 +176,7 @@ class TestElementwiseSubOp_broadcast_4(TestElementwiseSubOp): class TestElementwiseSubOp_commonuse_1(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -180,6 +188,7 @@ class TestElementwiseSubOp_commonuse_1(TestElementwiseSubOp): class TestElementwiseSubOp_commonuse_2(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" @@ -191,6 +200,7 @@ class TestElementwiseSubOp_commonuse_2(TestElementwiseSubOp): class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseSubOp): + def setUp(self): self.set_mlu() self.op_type = "elementwise_sub" diff --git a/python/paddle/fluid/tests/unittests/mlu/test_fill_any_like_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_fill_any_like_op_mlu.py index 065c8072d4c..4847a6a42d9 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_fill_any_like_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_fill_any_like_op_mlu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestFillAnyLikeOp(OpTest): + def setUp(self): self.init_dtype() self.set_mlu() @@ -54,21 +56,25 @@ class TestFillAnyLikeOp(OpTest): class TestFillAnyLikeOp2(TestFillAnyLikeOp): + def set_value(self): self.value = -0.0 class TestFillAnyLikeOp3(TestFillAnyLikeOp): + def set_value(self): self.value = 1.0 class TestFillAnyLikeOp4(TestFillAnyLikeOp): + def set_value(self): self.value = 1e-9 class TestFillAnyLikeOp5(TestFillAnyLikeOp): + def set_value(self): if self.dtype == "float16": self.value = 0.05 @@ -77,6 +83,7 @@ class TestFillAnyLikeOp5(TestFillAnyLikeOp): class TestFillAnyLikeOpInt32(TestFillAnyLikeOp): + def init_dtype(self): self.dtype = np.int32 @@ -85,6 +92,7 @@ class TestFillAnyLikeOpInt32(TestFillAnyLikeOp): class TestFillAnyLikeOpInt64(TestFillAnyLikeOp): + def init_dtype(self): self.dtype = np.int64 @@ -93,6 +101,7 @@ class TestFillAnyLikeOpInt64(TestFillAnyLikeOp): class TestFillAnyLikeOpFloat32(TestFillAnyLikeOp): + def init_dtype(self): self.dtype = np.float32 diff --git a/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py index a43b7d0164d..604dbf4ddbc 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest, convert_float_to_uint16 @@ -32,6 +33,7 @@ paddle.enable_static() # Situation 1: Attr(shape) is a list(without tensor) class TestFillConstantOp1(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -49,6 +51,7 @@ class TestFillConstantOp1(OpTest): class TestFillConstantOp2(OpTest): + def setUp(self): '''Test fill_constant op with default value ''' @@ -66,6 +69,7 @@ class TestFillConstantOp2(OpTest): class TestFillConstantOp3(OpTest): + def setUp(self): '''Test fill_constant op with specified int64 value ''' @@ -83,6 +87,7 @@ class TestFillConstantOp3(OpTest): class TestFillConstantOp4(OpTest): + def setUp(self): '''Test fill_constant op with specified int value ''' @@ -100,14 +105,17 @@ class TestFillConstantOp4(OpTest): class TestFillConstantOpWithSelectedRows(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() # create Out Variable out = scope.var('Out').get_selected_rows() # create and run fill_constant_op operator - fill_constant_op = Operator( - "fill_constant", shape=[123, 92], value=3.8, Out='Out') + fill_constant_op = Operator("fill_constant", + shape=[123, 92], + value=3.8, + Out='Out') fill_constant_op.run(scope, place) # get result from Out @@ -127,6 +135,7 @@ class TestFillConstantOpWithSelectedRows(unittest.TestCase): # Situation 2: Attr(shape) is a list(with tensor) class TestFillConstantOp1_ShapeTensorList(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -154,6 +163,7 @@ class TestFillConstantOp1_ShapeTensorList(OpTest): class TestFillConstantOp2_ShapeTensorList(OpTest): + def setUp(self): '''Test fill_constant op with default value ''' @@ -180,6 +190,7 @@ class TestFillConstantOp2_ShapeTensorList(OpTest): class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): self.shape = [123, 92] self.infer_shape = [123, -1] @@ -187,6 +198,7 @@ class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): self.shape = [123, 92] self.infer_shape = [123, -1] @@ -195,6 +207,7 @@ class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): # Situation 3: shape is a tensor class TestFillConstantOp1_ShapeTensor(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -218,6 +231,7 @@ class TestFillConstantOp1_ShapeTensor(OpTest): # Situation 4: value is a tensor class TestFillConstantOp1_ValueTensor(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -246,6 +260,7 @@ class TestFillConstantOp1_ValueTensor(OpTest): # Situation 5: value is a tensor class TestFillConstantOp2_ValueTensor(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -273,43 +288,56 @@ class TestFillConstantOp2_ValueTensor(OpTest): # Test python API class TestFillConstantAPI(unittest.TestCase): + def test_api(self): positive_2_int32 = fluid.layers.fill_constant([1], "int32", 2) positive_2_int64 = fluid.layers.fill_constant([1], "int64", 2) - shape_tensor_int32 = fluid.data( - name="shape_tensor_int32", shape=[2], dtype="int32") - shape_tensor_int64 = fluid.data( - name="shape_tensor_int64", shape=[2], dtype="int64") - - out_1 = fluid.layers.fill_constant( - shape=[1, 2], dtype="float32", value=1.1) - - out_2 = fluid.layers.fill_constant( - shape=[1, positive_2_int32], dtype="float32", value=1.1) - - out_3 = fluid.layers.fill_constant( - shape=[1, positive_2_int64], dtype="float32", value=1.1) - - out_4 = fluid.layers.fill_constant( - shape=shape_tensor_int32, dtype="float32", value=1.1) - - out_5 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype="float32", value=1.1) - - out_6 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype=np.float32, value=1.1) - - val1 = fluid.layers.fill_constant( - shape=[1], dtype=np.float32, value=1.1) - val2 = fluid.layers.fill_constant( - shape=[1], dtype=np.float64, value=1.1) - out_7 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype=np.float32, value=val1) - - out_8 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype=np.float32, value=val2) + shape_tensor_int32 = fluid.data(name="shape_tensor_int32", + shape=[2], + dtype="int32") + shape_tensor_int64 = fluid.data(name="shape_tensor_int64", + shape=[2], + dtype="int64") + + out_1 = fluid.layers.fill_constant(shape=[1, 2], + dtype="float32", + value=1.1) + + out_2 = fluid.layers.fill_constant(shape=[1, positive_2_int32], + dtype="float32", + value=1.1) + + out_3 = fluid.layers.fill_constant(shape=[1, positive_2_int64], + dtype="float32", + value=1.1) + + out_4 = fluid.layers.fill_constant(shape=shape_tensor_int32, + dtype="float32", + value=1.1) + + out_5 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype="float32", + value=1.1) + + out_6 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype=np.float32, + value=1.1) + + val1 = fluid.layers.fill_constant(shape=[1], + dtype=np.float32, + value=1.1) + val2 = fluid.layers.fill_constant(shape=[1], + dtype=np.float64, + value=1.1) + out_7 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype=np.float32, + value=val1) + + out_8 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype=np.float32, + value=val2) exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8 = exe.run( @@ -318,9 +346,7 @@ class TestFillConstantAPI(unittest.TestCase): "shape_tensor_int32": np.array([1, 2]).astype("int32"), "shape_tensor_int64": np.array([1, 2]).astype("int64"), }, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32")) @@ -333,6 +359,7 @@ class TestFillConstantAPI(unittest.TestCase): class TestFillConstantImperative(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): data1 = np.array([1, 2]).astype('int32') @@ -341,26 +368,26 @@ class TestFillConstantImperative(unittest.TestCase): shape = fluid.dygraph.to_variable(data1) val = fluid.dygraph.to_variable(data2) value = fluid.dygraph.to_variable(data3) - res1 = fluid.layers.fill_constant( - shape=[1, 2], dtype='float32', value=1.1) - res2 = fluid.layers.fill_constant( - shape=shape, dtype='float32', value=1.1) - res3 = fluid.layers.fill_constant( - shape=shape, dtype='float32', value=val) - res4 = fluid.layers.fill_constant( - shape=shape, dtype='int32', value=value) - assert np.array_equal( - res1.numpy(), np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - res2.numpy(), np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - res3.numpy(), np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - res4.numpy(), np.full( - [1, 2], 88, dtype="int32")) + res1 = fluid.layers.fill_constant(shape=[1, 2], + dtype='float32', + value=1.1) + res2 = fluid.layers.fill_constant(shape=shape, + dtype='float32', + value=1.1) + res3 = fluid.layers.fill_constant(shape=shape, + dtype='float32', + value=val) + res4 = fluid.layers.fill_constant(shape=shape, + dtype='int32', + value=value) + assert np.array_equal(res1.numpy(), + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res2.numpy(), + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res3.numpy(), + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res4.numpy(), + np.full([1, 2], 88, dtype="int32")) def test_nan(self): with fluid.dygraph.guard(): @@ -380,45 +407,42 @@ class TestFillConstantImperative(unittest.TestCase): class TestFillConstantOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): #for ci coverage x1 = fluid.layers.data(name='x1', shape=[1], dtype="int16") - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[1], - value=5, - dtype='uint4') - - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[1.1], - value=5, - dtype='float32', - out=x1) + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[1], + value=5, + dtype='uint4') + + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[1.1], + value=5, + dtype='float32', + out=x1) # The argument dtype of fill_constant_op must be one of bool, float16, #float32, float64, uint8, int16, int32 or int64 x2 = fluid.layers.data(name='x2', shape=[1], dtype="int32") - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[1], - value=5, - dtype='float64', - out=x2) + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[1], + value=5, + dtype='float64', + out=x2) x3 = np.random.randn(100, 100).astype('int32') - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[100, 100], - value=5, - dtype='float64', - out=x3) + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[100, 100], + value=5, + dtype='float64', + out=x3) # The argument shape's type of fill_constant_op must be list, tuple or Variable. def test_shape_type(): @@ -434,18 +458,22 @@ class TestFillConstantOpError(unittest.TestCase): # The shape dtype of fill_constant_op must be int32 or int64. def test_shape_tensor_dtype(): - shape = fluid.data( - name="shape_tensor", shape=[2], dtype="float32") - fluid.layers.fill_constant( - shape=shape, dtype="float32", value=1) + shape = fluid.data(name="shape_tensor", + shape=[2], + dtype="float32") + fluid.layers.fill_constant(shape=shape, + dtype="float32", + value=1) self.assertRaises(TypeError, test_shape_tensor_dtype) def test_shape_tensor_list_dtype(): - shape = fluid.data( - name="shape_tensor_list", shape=[1], dtype="bool") - fluid.layers.fill_constant( - shape=[shape, 2], dtype="float32", value=1) + shape = fluid.data(name="shape_tensor_list", + shape=[1], + dtype="bool") + fluid.layers.fill_constant(shape=[shape, 2], + dtype="float32", + value=1) self.assertRaises(TypeError, test_shape_tensor_list_dtype) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_flatten2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_flatten2_op_mlu.py index b5f79a92b19..df0d6f23a23 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_flatten2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_flatten2_op_mlu.py @@ -19,12 +19,15 @@ import numpy as np import paddle.fluid as fluid import paddle import sys + sys.path.append("..") from op_test import OpTest + paddle.enable_static() class TestFlattenOp(OpTest): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -53,6 +56,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.axis = 0 @@ -60,6 +64,7 @@ class TestFlattenOp1(TestFlattenOp): class TestFlattenOpWithDefaultAxis(TestFlattenOp): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -69,6 +74,7 @@ class TestFlattenOpWithDefaultAxis(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 @@ -76,6 +82,7 @@ class TestFlattenOpSixDims(TestFlattenOp): class TestStaticFlattenInferShapePythonAPI(unittest.TestCase): + def execute_api(self, x, axis=1): return fluid.layers.flatten(x, axis=axis) @@ -83,13 +90,15 @@ class TestStaticFlattenInferShapePythonAPI(unittest.TestCase): paddle.enable_static() main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[-1, 3, -1, -1], dtype='float32') + x = paddle.static.data(name="x", + shape=[-1, 3, -1, -1], + dtype='float32') out = self.execute_api(x, axis=2) self.assertTrue((-1, -1) == out.shape) class TestFlatten2OpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input_data = np.random.random((3, 2, 4, 5)).astype("float64") @@ -102,8 +111,9 @@ class TestFlatten2OpError(unittest.TestCase): def test_type(): # dtype must be float32, float64, int8, int32, int64, uint8. - x2 = fluid.layers.data( - name='x2', shape=[3, 2, 4, 5], dtype='float16') + x2 = fluid.layers.data(name='x2', + shape=[3, 2, 4, 5], + dtype='float16') fluid.layers.flatten(x2, axis=1) self.assertRaises(TypeError, test_type) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_flatten_contigous_range_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_flatten_contigous_range_op_mlu.py index 8b14494ea09..1474ec35637 100755 --- a/python/paddle/fluid/tests/unittests/mlu/test_flatten_contigous_range_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_flatten_contigous_range_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestFlattenOp(OpTest): + def setUp(self): self.set_mlu() self.op_type = "flatten_contiguous_range" @@ -66,6 +68,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp_1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 1 @@ -80,6 +83,7 @@ class TestFlattenOp_1(TestFlattenOp): class TestFlattenOp_2(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -94,6 +98,7 @@ class TestFlattenOp_2(TestFlattenOp): class TestFlattenOp_3(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -108,6 +113,7 @@ class TestFlattenOp_3(TestFlattenOp): class TestFlattenOp_4(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = -2 @@ -122,6 +128,7 @@ class TestFlattenOp_4(TestFlattenOp): class TestFlattenOp_5(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 2 @@ -136,6 +143,7 @@ class TestFlattenOp_5(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.start_axis = 3 @@ -150,6 +158,7 @@ class TestFlattenOpSixDims(TestFlattenOp): class TestFlattenOp_Float32(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -165,6 +174,7 @@ class TestFlattenOp_Float32(TestFlattenOp): class TestFlattenOp_int32(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -183,6 +193,7 @@ class TestFlattenOp_int32(TestFlattenOp): class TestFlattenOp_uint8(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -201,6 +212,7 @@ class TestFlattenOp_uint8(TestFlattenOp): class TestFlattenOp_int8(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -219,6 +231,7 @@ class TestFlattenOp_int8(TestFlattenOp): class TestFlattenOp_int64(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -237,6 +250,7 @@ class TestFlattenOp_int64(TestFlattenOp): class TestFlatten2OpError(unittest.TestCase): + def test_errors(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * @@ -244,22 +258,25 @@ class TestFlatten2OpError(unittest.TestCase): x = x.astype('float32') def test_ValueError1(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') out = paddle.flatten(x_var, start_axis=2, stop_axis=1) self.assertRaises(ValueError, test_ValueError1) def test_ValueError2(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=10, stop_axis=1) self.assertRaises(ValueError, test_ValueError2) def test_ValueError3(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=2, stop_axis=10) self.assertRaises(ValueError, test_ValueError3) @@ -269,8 +286,9 @@ class TestFlatten2OpError(unittest.TestCase): x2 = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3]).reshape(image_shape) / 100. x2 = x2.astype('float16') - x2_var = paddle.fluid.data( - name='x2', shape=[3, 2, 4, 5], dtype='float16') + x2_var = paddle.fluid.data(name='x2', + shape=[3, 2, 4, 5], + dtype='float16') paddle.flatten(x2_var) self.assertRaises(TypeError, test_type) @@ -282,6 +300,7 @@ class TestFlatten2OpError(unittest.TestCase): class TestStaticFlattenPythonAPI(unittest.TestCase): + def execute_api(self, x, start_axis=0, stop_axis=-1): return paddle.flatten(x, start_axis, stop_axis) @@ -291,8 +310,9 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[2, 3, 4, 4], dtype='float32') + x = paddle.static.data(name="x", + shape=[2, 3, 4, 4], + dtype='float32') out = self.execute_api(x, start_axis=-2, stop_axis=-1) exe = paddle.static.Executor(place=paddle.MLUPlace(0)) @@ -301,11 +321,13 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): class TestStaticInplaceFlattenPythonAPI(TestStaticFlattenPythonAPI): + def execute_api(self, x, start_axis=0, stop_axis=-1): return x.flatten_(start_axis, stop_axis) class TestFlattenPython(unittest.TestCase): + def test_python_api(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * diff --git a/python/paddle/fluid/tests/unittests/mlu/test_flatten_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_flatten_op_mlu.py index a5503de7cca..5a884cc89cc 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_flatten_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_flatten_op_mlu.py @@ -17,15 +17,18 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle import paddle.fluid as fluid + paddle.enable_static() class TestFlattenOp(OpTest): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -51,6 +54,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 2, 10) self.axis = 0 @@ -58,6 +62,7 @@ class TestFlattenOp1(TestFlattenOp): class TestFlattenOpWithDefaultAxis(TestFlattenOp): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -67,6 +72,7 @@ class TestFlattenOpWithDefaultAxis(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 diff --git a/python/paddle/fluid/tests/unittests/mlu/test_gather_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_gather_op_mlu.py index f0aff986fa1..6c6ddda303d 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_gather_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_gather_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest, convert_float_to_uint16 import paddle @@ -35,6 +36,7 @@ def gather_numpy(x, index, axis): class TestGatherOp(OpTest): + def setUp(self): self.op_type = "gather" self.place = paddle.MLUPlace(0) @@ -65,6 +67,7 @@ class TestGatherOp(OpTest): class TestCase1(TestGatherOp): + def config(self): """ For one dimension input @@ -76,6 +79,7 @@ class TestCase1(TestGatherOp): class TestCase2(TestGatherOp): + def config(self): """ For int64_t index type @@ -87,6 +91,7 @@ class TestCase2(TestGatherOp): class API_TestDygraphGather(unittest.TestCase): + def test_out1(self): paddle.disable_static() input_1 = np.array([[1, 2], [3, 4], [5, 6]]).astype('int32') @@ -124,6 +129,7 @@ class API_TestDygraphGather(unittest.TestCase): class TestGathertError(unittest.TestCase): + def test_error1(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -132,8 +138,9 @@ class TestGathertError(unittest.TestCase): x = paddle.fluid.data(shape=shape, dtype='int8', name='x') axis = paddle.fluid.data(shape=[1], dtype='float32', name='axis') index = paddle.fluid.data(shape=shape, dtype='int32', name='index') - index_float = paddle.fluid.data( - shape=shape, dtype='float32', name='index_float') + index_float = paddle.fluid.data(shape=shape, + dtype='float32', + name='index_float') def test_x_type(): paddle.gather(x, index) @@ -161,8 +168,9 @@ class TestGathertError(unittest.TestCase): shape = [8, 9, 6] x = fluid.data(shape=shape, dtype='int8', name='x') index = fluid.data(shape=shape, dtype='int32', name='mask') - index_float = fluid.data( - shape=shape, dtype='float32', name='index_float') + index_float = fluid.data(shape=shape, + dtype='float32', + name='index_float') def test_x_type(): paddle.fluid.layers.gather(x, index) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_gaussian_random_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_gaussian_random_op_mlu.py index 6f64196a586..9f755de6872 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_gaussian_random_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_gaussian_random_op_mlu.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core from paddle.fluid.op import Operator from paddle.fluid.executor import Executor import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -30,6 +31,7 @@ paddle.enable_static() class TestGaussianRandomOp(OpTest): + def setUp(self): self.op_type = "gaussian_random" self.place = paddle.device.MLUPlace(0) @@ -63,13 +65,12 @@ class TestGaussianRandomOp(OpTest): hist2, _ = np.histogram(data, range=(-3, 5)) hist2 = hist2.astype("float32") hist2 /= float(outs[0].size) - self.assertTrue( - np.allclose( - hist, hist2, rtol=0, atol=0.01), - "hist: " + str(hist) + " hist2: " + str(hist2)) + self.assertTrue(np.allclose(hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) class TestMeanStdAreInt(TestGaussianRandomOp): + def set_attrs(self): self.mean = 1 self.std = 2 diff --git a/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py index c62d30d43c0..2cf89789bfc 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py @@ -18,6 +18,7 @@ import numpy as np from scipy import special import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -33,6 +34,7 @@ def np_gelu(x): class TestGelu(OpTest): + def setUp(self): self.set_mlu() self.op_type = "gelu" @@ -57,11 +59,13 @@ class TestGelu(OpTest): self.check_output_with_place(self.place, atol=1e-3) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.007) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.007) class TestGeluFp16(OpTest): + def setUp(self): self.set_mlu() self.op_type = "gelu" @@ -88,6 +92,7 @@ class TestGeluFp16(OpTest): class TestGeluNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -102,8 +107,9 @@ class TestGeluNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) @@ -127,12 +133,13 @@ class TestGeluNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py index 8b32692020c..5df59be28a8 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py @@ -23,6 +23,7 @@ import paddle.fluid as fluid import paddle.nn.functional as F from functools import reduce import sys + sys.path.append('..') from op_test import _set_use_system_allocator from paddle.fluid import Program, program_guard @@ -37,6 +38,7 @@ _set_use_system_allocator(True) class TestLayerNormOp(unittest.TestCase): + def setUp(self): self.use_cudnn = True self.place = paddle.device.MLUPlace(0) @@ -52,6 +54,7 @@ class TestLayerNormOp(unittest.TestCase): has_bias=True, y_grad_scale=1.0, use_mkldnn=False): + def test_with_place(place, shape, begin_norm_axis, @@ -68,8 +71,8 @@ class TestLayerNormOp(unittest.TestCase): np.float32) if has_scale else None bias = np.random.random_sample(scale_shape).astype( np.float32) if has_bias else None - y_grad = (np.random.random_sample(x_shape) * - y_grad_scale).astype(np.float32) + y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( + np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( @@ -90,10 +93,9 @@ class TestLayerNormOp(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = {"X": block.var('x')} fetch_list = [ 'y', @@ -163,83 +165,79 @@ class TestLayerNormOp(unittest.TestCase): def test_check_forward_backward_with_scale_and_bias(self): self.check_forward_backward(shape=[1, 3, 4, 5], begin_norm_axis=1) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=True) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=True, - has_bias=False) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=False) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=True) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=True, + has_bias=False) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=False) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) - self.check_forward_backward( - shape=[92, 513, 129], begin_norm_axis=2, y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 129], + begin_norm_axis=2, + y_grad_scale=0.1) self.check_forward_backward(shape=[3, 34, 1134], begin_norm_axis=2) - self.check_forward_backward( - shape=[92, 513, 1134], begin_norm_axis=2, y_grad_scale=0.1) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=False, - has_bias=True, - y_grad_scale=0.1) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=True, - has_bias=False, - y_grad_scale=0.1) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=False, - has_bias=False, - y_grad_scale=0.1) - self.check_forward_backward( - shape=[512, 1024], begin_norm_axis=1, has_scale=True, has_bias=True) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=False, + has_bias=True, + y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=True, + has_bias=False, + y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=False, + has_bias=False, + y_grad_scale=0.1) + self.check_forward_backward(shape=[512, 1024], + begin_norm_axis=1, + has_scale=True, + has_bias=True) class TestLayerNormAPI(unittest.TestCase): + def test_case(self): - x = fluid.layers.data( - name='x', - shape=[64, 32, 256], - dtype='float32', - append_batch_size=False) - x = fluid.layers.layer_norm( - x, - scale=True, - shift=True, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None) - x = fluid.layers.layer_norm( - x, - scale=False, - shift=False, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None) - x = fluid.layers.layer_norm( - x, - scale=False, - shift=False, - begin_norm_axis=1, - epsilon=1e-05, - param_attr="scale", - bias_attr="shift") + x = fluid.layers.data(name='x', + shape=[64, 32, 256], + dtype='float32', + append_batch_size=False) + x = fluid.layers.layer_norm(x, + scale=True, + shift=True, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None) + x = fluid.layers.layer_norm(x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None) + x = fluid.layers.layer_norm(x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr="scale", + bias_attr="shift") class TestDygraphLayerNormAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): paddle.enable_static() @@ -255,6 +253,7 @@ class TestDygraphLayerNormAPIError(unittest.TestCase): class TestFP16ScaleBiasLayerNorm(unittest.TestCase): + def check_main(self, x_np, weight_np, bias_np, dtype): paddle.disable_static() @@ -297,6 +296,7 @@ class TestFP16ScaleBiasLayerNorm(unittest.TestCase): class TestGetSetKeepLayerNormScaleBiasFP32Flag(unittest.TestCase): + def test_main(self): self.assertTrue(_keep_layer_norm_scale_bias_to_fp32()) _keep_layer_norm_scale_bias_to_fp32(False) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py index ec2150fceb1..0aad79eb61f 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest from test_activation_op import ref_leaky_relu @@ -28,6 +29,7 @@ SEED = 2021 class TestLeadyRelu(OpTest): + def setUp(self): self.set_mlu() self.op_type = "leaky_relu" @@ -63,28 +65,33 @@ class TestLeadyRelu(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.006) else: self.check_grad_with_place(self.place, ['X'], 'Out') class TestLeadyReluFP16(TestLeadyRelu): + def init_dtype(self): self.dtype = np.float16 class TestLeadyRelu2(TestLeadyRelu): + def set_attrs(self): self.attrs = {'alpha': 0.5} class TestLeadyRelu3(TestLeadyRelu): + def set_attrs(self): self.attrs = {'alpha': -0.5} class TestLeakyReluNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -97,8 +104,9 @@ class TestLeakyReluNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): x = paddle.static.data(name="x", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') y = paddle.nn.functional.leaky_relu(x) @@ -122,8 +130,10 @@ class TestLeakyReluNet(unittest.TestCase): for epoch in range(100): pred_res, loss_res = exe.run(main_prog, - feed={"x": x_np, - "label": label_np}, + feed={ + "x": x_np, + "label": label_np + }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( diff --git a/python/paddle/fluid/tests/unittests/mlu/test_log_softmax_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_log_softmax_op_mlu.py index dea6391b8ba..a1d594b93d0 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_log_softmax_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_log_softmax_op_mlu.py @@ -41,6 +41,7 @@ def ref_log_softmax_grad(x, axis): class TestLogSoftmaxOp(OpTest): + def setUp(self): self.op_type = 'log_softmax' self.set_mlu() @@ -69,21 +70,24 @@ class TestLogSoftmaxOp(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad]) + self.check_grad_with_place(self.place, ['X'], ['Out'], + user_defined_grads=[self.x_grad]) class TestLogSoftmaxShape(TestLogSoftmaxOp): + def set_attrs(self): self.shape = [12, 10] class TestLogSoftmaxAxis(TestLogSoftmaxOp): + def set_attrs(self): self.axis = 1 class TestNNLogSoftmaxAPI(unittest.TestCase): + def setUp(self): self.set_mlu() self.x_shape = [2, 3, 4, 5] @@ -118,6 +122,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase): class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): + def setUp(self): self.set_mlu() self.x_shape = [2, 3, 4, 5] diff --git a/python/paddle/fluid/tests/unittests/mlu/test_lookup_table_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_lookup_table_v2_op_mlu.py index f9a08ba4c9b..17ef85dd2bd 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_lookup_table_v2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_lookup_table_v2_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2022 class TestLookupTableV2(OpTest): + def setUp(self): self.set_mlu() self.op_type = "lookup_table_v2" @@ -36,8 +38,9 @@ class TestLookupTableV2(OpTest): self.init_padding_idx() np.random.seed(SEED) w = np.random.random([self.vocab, self.dim]).astype(self.dtype) - x = np.random.randint( - 0, self.vocab, size=(self.bsz, self.seqlen)).astype(self.ids_dtype) + x = np.random.randint(0, self.vocab, + size=(self.bsz, + self.seqlen)).astype(self.ids_dtype) out = w[x] if self.padding_idx != -1: out[np.squeeze(x == self.padding_idx)] = np.zeros(self.dim) @@ -77,8 +80,9 @@ class TestLookupTableV2(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['W'], 'Out', max_relative_error=0.01) + self.check_grad_with_place(self.place, ['W'], + 'Out', + max_relative_error=0.01) else: self.check_grad_with_place(self.place, ['W'], 'Out') @@ -97,6 +101,7 @@ class TestLookupTableV2FP16(TestLookupTableV2): class TestLookupTableV2Dim32(TestLookupTableV2): + def init_dims(self): self.bsz = 6 self.seqlen = 8 @@ -125,11 +130,13 @@ class TestLookupTableV2Dim32FP16(TestLookupTableV2): class TestLookupTableV2WithPadding(TestLookupTableV2): + def init_padding_idx(self): self.padding_idx = np.random.randint(0, self.vocab) class TestLookupTableV2WithPadding1(TestLookupTableV2): + def init_padding_idx(self): self.padding_idx = np.random.randint(0, self.vocab) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_matmul_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_matmul_op_mlu.py index adfff112e6b..e8e69440ab4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_matmul_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_matmul_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -294,7 +295,9 @@ class TestMatMulOp13(TestMatMulOp): #--------------------test matmul fp16-------------------- def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): + class TestMatMulOpFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -302,10 +305,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): self.check_output_with_place(self.place, atol=atol) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], - 'Out', - max_relative_error=max_relative_error) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=max_relative_error) cls_name = "{0}_{1}".format(parent.__name__, "Fp16") TestMatMulOpFp16Case.__name__ = cls_name diff --git a/python/paddle/fluid/tests/unittests/mlu/test_matmul_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_matmul_v2_op_mlu.py index 011769c29db..85c73aa78ce 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_matmul_v2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_matmul_v2_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -291,7 +292,9 @@ class TestMatMuklOpBroadcast2(TestMatMulV2Op): def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): + class TestMatMulOpFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -299,10 +302,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): self.check_output_with_place(self.place, atol=atol) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], - 'Out', - max_relative_error=max_relative_error) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=max_relative_error) cls_name = "{0}_{1}".format(parent.__name__, "Fp16") TestMatMulOpFp16Case.__name__ = cls_name @@ -329,6 +331,7 @@ create_test_fp16_class(TestMatMuklOp17) class TestMatMulV2API(unittest.TestCase): + def setUp(self): self.places = [paddle.CPUPlace()] if paddle.is_compiled_with_mlu(): @@ -346,8 +349,10 @@ class TestMatMulV2API(unittest.TestCase): exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"input_x": x_np, - "input_y": y_np}, + feed={ + "input_x": x_np, + "input_y": y_np + }, fetch_list=[result]) def test_static(self): diff --git a/python/paddle/fluid/tests/unittests/mlu/test_mean_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_mean_op_mlu.py index 36419327db6..2b296b2d7dc 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_mean_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_mean_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestMean(OpTest): + def setUp(self): self.set_mlu() self.place = paddle.device.MLUPlace(0) @@ -55,6 +57,7 @@ class TestMean(OpTest): class TestMeanFP16(OpTest): + def setUp(self): self.set_mlu() self.place = paddle.MLUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_merged_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_merged_momentum_op_mlu.py index f3699da15b5..31eb98b7a88 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_merged_momentum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_merged_momentum_op_mlu.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,6 +13,7 @@ # limitations under the License. import sys + sys.path.append('..') import unittest import paddle @@ -47,22 +48,21 @@ def run_momentum_op(params, } param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) for p in params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in params ] grad_vars = [ - helper.create_variable( - shape=g.shape, dtype=g.dtype) for g in grads + helper.create_variable(shape=g.shape, dtype=g.dtype) for g in grads ] velocity_vars = [ - helper.create_variable( - persistable=True, shape=v.shape, dtype=v.dtype) - for v in velocitys + helper.create_variable(persistable=True, + shape=v.shape, + dtype=v.dtype) for v in velocitys ] - lr_var = helper.create_variable( - persistable=True, - shape=learning_rate.shape, - dtype=learning_rate.dtype) + lr_var = helper.create_variable(persistable=True, + shape=learning_rate.shape, + dtype=learning_rate.dtype) feed_dict = OrderedDict() @@ -81,14 +81,15 @@ def run_momentum_op(params, if multi_precision: master_param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) - for p in master_params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in master_params ] feed_dict.update( - OrderedDict([(mp_var.name, mp_val) - for mp_var, mp_val in zip(master_param_vars, - master_params)])) + OrderedDict([ + (mp_var.name, mp_val) + for mp_var, mp_val in zip(master_param_vars, master_params) + ])) # CPUPlace does not use MasterParam if isinstance(place, paddle.CUDAPlace): fetch_list = fetch_list + [ @@ -110,8 +111,10 @@ def run_momentum_op(params, if multi_precision: inputs['MasterParam'] = master_param_vars[i] outputs['MasterParamOut'] = master_param_vars[i] - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) else: inputs = { 'Param': param_vars, @@ -123,8 +126,10 @@ def run_momentum_op(params, if multi_precision: inputs['MasterParam'] = master_param_vars outputs['MasterParamOut'] = master_param_vars - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) exe = paddle.static.Executor(place) with paddle.static.scope_guard(paddle.static.Scope()): @@ -154,22 +159,21 @@ def run_momentum_op2(params, helper = LayerHelper(op_type, **locals()) param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) for p in params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in params ] grad_vars = [ - helper.create_variable( - shape=g.shape, dtype=g.dtype) for g in grads + helper.create_variable(shape=g.shape, dtype=g.dtype) for g in grads ] velocity_vars = [ - helper.create_variable( - persistable=True, shape=v.shape, dtype=v.dtype) - for v in velocitys + helper.create_variable(persistable=True, + shape=v.shape, + dtype=v.dtype) for v in velocitys ] - lr_var = helper.create_variable( - persistable=True, - shape=learning_rate.shape, - dtype=learning_rate.dtype) + lr_var = helper.create_variable(persistable=True, + shape=learning_rate.shape, + dtype=learning_rate.dtype) feed_dict = OrderedDict() @@ -188,14 +192,15 @@ def run_momentum_op2(params, if multi_precision: master_param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) - for p in master_params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in master_params ] feed_dict.update( - OrderedDict([(mp_var.name, mp_val) - for mp_var, mp_val in zip(master_param_vars, - master_params)])) + OrderedDict([ + (mp_var.name, mp_val) + for mp_var, mp_val in zip(master_param_vars, master_params) + ])) # CPUPlace does not use MasterParam if isinstance(place, paddle.CUDAPlace): fetch_list = fetch_list + [ @@ -225,8 +230,10 @@ def run_momentum_op2(params, 'regularization_method': 'l2_decay', 'regularization_coeff': 2.0, } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) else: inputs = { 'Param': param_vars, @@ -239,16 +246,22 @@ def run_momentum_op2(params, inputs['MasterParam'] = master_param_vars outputs['MasterParamOut'] = master_param_vars attrs = { - 'mu': mu, - 'multi_precision': multi_precision, - 'rescale_grad': rescale_grad, - 'use_nesterov': use_nesterov, + 'mu': + mu, + 'multi_precision': + multi_precision, + 'rescale_grad': + rescale_grad, + 'use_nesterov': + use_nesterov, 'regularization_method': ['l2_decay' for i in range(len(param_vars))], 'regularization_coeff': [2.0 for i in range(len(param_vars))], } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) exe = paddle.static.Executor(place) with paddle.static.scope_guard(paddle.static.Scope()): @@ -257,6 +270,7 @@ def run_momentum_op2(params, class TestMergedMomentum(unittest.TestCase): + def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -286,18 +300,17 @@ class TestMergedMomentum(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_merged): - # MLU Momentum Op does not support rescale_grad + # MLU Momentum Op does not support rescale_grad rescale_grad = 1.0 - return run_momentum_op( - params, - grads, - velocitys, - master_params, - learning_rate, - place, - multi_precision, - rescale_grad=rescale_grad, - use_merged=use_merged) + return run_momentum_op(params, + grads, + velocitys, + master_params, + learning_rate, + place, + multi_precision, + rescale_grad=rescale_grad, + use_merged=use_merged) outs1 = run_op(True) outs2 = run_op(False) @@ -310,6 +323,7 @@ class TestMergedMomentum(unittest.TestCase): class TestMergedMomentum2(unittest.TestCase): + def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -339,19 +353,18 @@ class TestMergedMomentum2(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_nesterov, use_merged): - # MLU Momentum Op does not support rescale_grad + # MLU Momentum Op does not support rescale_grad rescale_grad = 1.0 - return run_momentum_op2( - params, - grads, - velocitys, - master_params, - learning_rate, - place, - multi_precision, - rescale_grad=rescale_grad, - use_merged=use_merged, - use_nesterov=use_nesterov) + return run_momentum_op2(params, + grads, + velocitys, + master_params, + learning_rate, + place, + multi_precision, + rescale_grad=rescale_grad, + use_merged=use_merged, + use_nesterov=use_nesterov) outs1 = run_op(use_nesterov=True, use_merged=True) outs2 = run_op(use_nesterov=True, use_merged=False) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py index a2cd69fee32..abe16155d03 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py @@ -19,6 +19,7 @@ import numpy as np import paddle.fluid.core as core from paddle.fluid.op import Operator import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -30,6 +31,7 @@ paddle.enable_static() class TestMomentumOp1(OpTest): + def setUp(self): self.op_type = "momentum" self.dtype = np.float32 @@ -74,6 +76,7 @@ class TestMomentumOp1(OpTest): class TestMomentumOpFp16(TestMomentumOp1): + def init_dtype(self): self.dtype = np.float16 @@ -121,14 +124,16 @@ class TestMomentumOp2(OpTest): class TestMomentumV2(unittest.TestCase): + def test_momentum_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Momentum( - learning_rate=0.01, momentum=0.9, parameters=linear.parameters()) + adam = paddle.optimizer.Momentum(learning_rate=0.01, + momentum=0.9, + parameters=linear.parameters()) out = linear(a) out.backward() adam.step() @@ -145,13 +150,13 @@ class TestMomentumV2(unittest.TestCase): cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) - rms_optimizer = paddle.optimizer.Momentum( - learning_rate=0.1, momentum=0.9) + rms_optimizer = paddle.optimizer.Momentum(learning_rate=0.1, + momentum=0.9) rms_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -159,12 +164,14 @@ class TestMomentumV2(unittest.TestCase): exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) def test_raise_error(self): - self.assertRaises( - ValueError, paddle.optimizer.Momentum, learning_rate=None) + self.assertRaises(ValueError, + paddle.optimizer.Momentum, + learning_rate=None) self.assertRaises(ValueError, paddle.optimizer.Momentum, momentum=None) class TestMomentumOpWithDecay(OpTest): + def setUp(self): self.op_type = "momentum" self.place = paddle.device.MLUPlace(0) @@ -219,6 +226,7 @@ class TestMomentumOpWithDecay(OpTest): class TestMomentumOpWithDecayFP16(TestMomentumOpWithDecay): + def init_config(self): self.dtype = np.float16 @@ -227,11 +235,13 @@ class TestMomentumOpWithDecayFP16(TestMomentumOpWithDecay): class TestMomentumOpWithDecay2(TestMomentumOpWithDecay): + def init_config(self): self.use_nesterov = False class TestMomentumOpWithDecayAPI(unittest.TestCase): + def _test_momentum_dygraph_common(self, regularization): paddle.disable_static() inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") @@ -268,8 +278,8 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): momentum_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -278,14 +288,17 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): class TestFusedMomentumWithDecayAPI(unittest.TestCase): + def get_program(self, weight_attr, bias_attr=False): main_program = paddle.static.Program() startup_program = paddle.static.Program() - with paddle.static.program_guard( - main_program=main_program, startup_program=startup_program): + with paddle.static.program_guard(main_program=main_program, + startup_program=startup_program): x = paddle.static.data(name='x', shape=[10, 10]) - linear = paddle.nn.Linear( - 10, 10, weight_attr=weight_attr, bias_attr=bias_attr) + linear = paddle.nn.Linear(10, + 10, + weight_attr=weight_attr, + bias_attr=bias_attr) out = linear(x) loss = paddle.mean(out) optimizer = paddle.optimizer.Momentum( @@ -349,10 +362,11 @@ class TestFusedMomentumWithDecayAPI(unittest.TestCase): class TestMomentumOpVsMomentumOpWithDecayAPI(unittest.TestCase): + def __update_params(self, momentum, linear): for i in range(10): - inp = paddle.full( - shape=[2, 2], fill_value=i, dtype='float32').astype("float32") + inp = paddle.full(shape=[2, 2], fill_value=i, + dtype='float32').astype("float32") inp = paddle.to_tensor(inp) out = linear(inp) loss = paddle.mean(out) @@ -401,6 +415,7 @@ class TestMomentumOpVsMomentumOpWithDecayAPI(unittest.TestCase): class TestMomentumV2Group(TestMomentumV2): + def test_momentum_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -408,18 +423,22 @@ class TestMomentumV2Group(TestMomentumV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Momentum( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - 'momentum': 0.99 - }], - weight_decay=0.1, - momentum=0.9) + adam = paddle.optimizer.Momentum(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001, + 'learning_rate': + 0.1, + 'momentum': + 0.99 + }], + weight_decay=0.1, + momentum=0.9) out = linear_1(a) out = linear_2(out) out.backward() @@ -428,6 +447,7 @@ class TestMomentumV2Group(TestMomentumV2): class TestMultiTensorMomentumDygraph(unittest.TestCase): + def _momentum_optimize_dygraph(self, place, use_param_attr=False, @@ -494,8 +514,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def _check_with_param_arrt(self, place, use_amp): output1, params1 = self._momentum_optimize_dygraph( @@ -511,8 +530,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def _check_with_param_group(self, place, use_amp): output1, params1 = self._momentum_optimize_dygraph( @@ -528,8 +546,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def test_main(self): for place in self._get_places(): @@ -542,6 +559,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): class TestMultiTensorMomentumStatic(unittest.TestCase): + def _momentum_optimize_static(self, place, use_amp=False, @@ -554,8 +572,8 @@ class TestMultiTensorMomentumStatic(unittest.TestCase): exe = paddle.static.Executor(place=paddle.device.MLUPlace(0)) train_program = paddle.static.Program() startup_program = paddle.static.Program() - optimizer = paddle.optimizer.Momentum( - multi_precision=use_amp, use_multi_tensor=use_multi_tensor) + optimizer = paddle.optimizer.Momentum(multi_precision=use_amp, + use_multi_tensor=use_multi_tensor) if use_amp: optimizer = paddle.static.amp.decorate( optimizer, @@ -565,11 +583,13 @@ class TestMultiTensorMomentumStatic(unittest.TestCase): use_fp16_guard=False) with paddle.static.program_guard(train_program, startup_program): if use_amp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float16') else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float32') hidden = paddle.static.nn.fc(x=data, size=10) loss = paddle.fluid.layers.mean(hidden) optimizer.minimize(loss) @@ -592,14 +612,15 @@ class TestMultiTensorMomentumStatic(unittest.TestCase): return places def _check_with_place_amp(self, place, use_amp): - output1 = self._momentum_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=True) - output2 = self._momentum_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=False) + output1 = self._momentum_optimize_static(place=place, + use_amp=use_amp, + use_multi_tensor=True) + output2 = self._momentum_optimize_static(place=place, + use_amp=use_amp, + use_multi_tensor=False) for idx in range(len(output1)): self.assertEqual( - np.allclose( - output1[idx], output2[idx], rtol=1e-05), True) + np.allclose(output1[idx], output2[idx], rtol=1e-05), True) def test_main(self): for place in self._get_places(): diff --git a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py index a56e9ff7558..9af31dcf73f 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py @@ -18,6 +18,7 @@ import unittest import numpy as np import math import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -30,6 +31,7 @@ paddle.enable_static() class TestOneHotOp(OpTest): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -55,6 +57,7 @@ class TestOneHotOp(OpTest): class TestOneHotOp_attr(OpTest): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -82,6 +85,7 @@ class TestOneHotOp_attr(OpTest): class TestOneHotOp_default_dtype(OpTest): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -107,6 +111,7 @@ class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype_attr(OpTest): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -132,6 +137,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_exception(unittest.TestCase): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -149,18 +155,18 @@ class TestOneHotOp_exception(unittest.TestCase): def test_check_output(self): program = Program() with program_guard(program): - x = fluid.layers.data( - name='x', shape=[self.dimension], dtype='float32', lod_level=1) + x = fluid.layers.data(name='x', + shape=[self.dimension], + dtype='float32', + lod_level=1) block = program.current_block() - one_hot_out = block.create_var( - name="one_hot_out", - type=core.VarDesc.VarType.LOD_TENSOR, - dtype='float32') - block.append_op( - type='one_hot', - inputs={'X': x}, - attrs={'depth': self.depth}, - outputs={'Out': one_hot_out}) + one_hot_out = block.create_var(name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op(type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) exe = fluid.Executor(self.place) def run(): @@ -172,6 +178,7 @@ class TestOneHotOp_exception(unittest.TestCase): class TestOneHotOpApi(unittest.TestCase): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -207,12 +214,15 @@ class TestOneHotOpApi(unittest.TestCase): exe = fluid.Executor(self.place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'label': label_data, }, + ret = exe.run(feed={ + 'label': label_data, + }, fetch_list=[one_hot_label], return_numpy=False) class BadInputTestOnehotV2(unittest.TestCase): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -221,11 +231,10 @@ class BadInputTestOnehotV2(unittest.TestCase): with fluid.program_guard(fluid.Program()): def test_bad_x(): - label = fluid.layers.data( - name="label", - shape=[4], - append_batch_size=False, - dtype="float32") + label = fluid.layers.data(name="label", + shape=[4], + append_batch_size=False, + dtype="float32") one_hot_label = fluid.one_hot(input=label, depth=4) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py index 1be3d2d85a4..d33646cbfa3 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid import Program, program_guard import sys + sys.path.append('..') from op_test import OpTest from test_pool2d_op import pool2D_forward_naive, avg_pool2D_forward_naive, max_pool2D_forward_naive, adaptive_start_index, adaptive_end_index @@ -47,8 +48,8 @@ def pool2d_backward_navie(x, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -129,19 +130,19 @@ def pool2d_backward_navie(x, if pool_type == 'avg': if (exclusive or adaptive): - field_size = (in_h_end - in_h_start) * ( - in_w_end - in_w_start) - x_grad[:, :, in_h_start:in_h_end, in_w_start: - in_w_end] += 1 / field_size + field_size = (in_h_end - in_h_start) * (in_w_end - + in_w_start) + x_grad[:, :, in_h_start:in_h_end, + in_w_start:in_w_end] += 1 / field_size elif pool_type == 'max': for n in range(N): for c in range(C): - idx = np.argmax(x[n, c, in_h_start:in_h_end, in_w_start: - in_w_end].flatten()) + idx = np.argmax(x[n, c, in_h_start:in_h_end, + in_w_start:in_w_end].flatten()) idx_h = idx // (in_w_end - in_w_start) idx_w = idx % (in_w_end - in_w_start) - x_grad[n, c, in_h_start + idx_h, in_w_start + - idx_w] += 1 + x_grad[n, c, in_h_start + idx_h, + in_w_start + idx_w] += 1 if data_format == "NHWC": x_grad = x_grad.transpose([0, 2, 3, 1]) @@ -149,6 +150,7 @@ def pool2d_backward_navie(x, class TestPool2D_Op_Mixin(object): + def setUp(self): self.place = paddle.device.MLUPlace(0) self.__class__.use_mlu = True @@ -166,10 +168,12 @@ class TestPool2D_Op_Mixin(object): self.init_shape() input = np.random.random(self.shape).astype(self.dtype) - output = pool2D_forward_naive( - input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, self.data_format, - self.pool_type, self.padding_algorithm).astype(self.dtype) + output = pool2D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool, + self.ceil_mode, self.exclusive, + self.adaptive, self.data_format, + self.pool_type, + self.padding_algorithm).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { @@ -191,25 +195,23 @@ class TestPool2D_Op_Mixin(object): self.check_output_with_place(self.place) def test_check_grad(self): - x_grad = pool2d_backward_navie( - self.inputs["X"], - ksize=self.ksize, - strides=self.strides, - paddings=self.paddings, - global_pool=self.global_pool, - ceil_mode=False, - exclusive=self.exclusive, - adaptive=self.adaptive, - data_format=self.data_format, - pool_type=self.pool_type, - padding_algorithm=self.padding_algorithm) + x_grad = pool2d_backward_navie(self.inputs["X"], + ksize=self.ksize, + strides=self.strides, + paddings=self.paddings, + global_pool=self.global_pool, + ceil_mode=False, + exclusive=self.exclusive, + adaptive=self.adaptive, + data_format=self.data_format, + pool_type=self.pool_type, + padding_algorithm=self.padding_algorithm) x_grad = x_grad / np.prod(self.outputs['Out'].shape) - self.check_grad_with_place( - self.place, - set(['X']), - 'Out', - max_relative_error=0.06, - user_defined_grads=[x_grad]) + self.check_grad_with_place(self.place, + set(['X']), + 'Out', + max_relative_error=0.06, + user_defined_grads=[x_grad]) def init_data_format(self): self.data_format = "NCHW" @@ -250,6 +252,7 @@ class TestPool2D_Op(TestPool2D_Op_Mixin, OpTest): class TestCase1(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -269,6 +272,7 @@ class TestCase1(TestPool2D_Op): class TestCase2(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -288,25 +292,30 @@ class TestCase2(TestPool2D_Op): class TestCase3(TestPool2D_Op): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase4(TestCase1): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase5(TestCase2): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive def create_test_fp16_class(parent): + class TestFp16Case(parent): + def init_data_type(self): self.dtype = np.float16 @@ -330,7 +339,9 @@ create_test_fp16_class(TestCase5) def create_test_use_ceil_class(parent): + class TestPool2DUseCeilCase(parent): + def init_ceil_mode(self): self.ceil_mode = True @@ -344,16 +355,19 @@ create_test_use_ceil_class(TestCase2) class TestAvgInclude(TestCase2): + def init_exclusive(self): self.exclusive = False class TestAvgPoolAdaptive(TestCase1): + def init_adaptive(self): self.adaptive = True class TestAvgPoolAdaptiveAsyOutSize(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -370,6 +384,7 @@ class TestAvgPoolAdaptiveAsyOutSize(TestCase1): class TestPool2D_AsyPadding(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -380,6 +395,7 @@ class TestPool2D_AsyPadding(TestPool2D_Op): class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -390,6 +406,7 @@ class TestCase1_AsyPadding(TestCase1): class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -400,6 +417,7 @@ class TestCase2_AsyPadding(TestCase2): class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -410,6 +428,7 @@ class TestCase3_AsyPadding(TestCase3): class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -420,6 +439,7 @@ class TestCase4_AsyPadding(TestCase4): class TestCase5_AsyPadding((TestCase5)): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -434,6 +454,7 @@ create_test_use_ceil_class(TestCase2_AsyPadding) class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): self.exclusive = False @@ -447,6 +468,7 @@ class TestAvgInclude_AsyPadding(TestCase2): class TestAvgPoolAdaptive_AsyPadding(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -461,6 +483,7 @@ class TestAvgPoolAdaptive_AsyPadding(TestCase1): #----------- test channel_last -------------- class TestPool2D_channel_last(TestPool2D_Op): + def init_data_format(self): self.data_format = "NHWC" @@ -469,6 +492,7 @@ class TestPool2D_channel_last(TestPool2D_Op): class TestCase1_channel_last(TestCase1): + def init_data_format(self): self.data_format = "NHWC" @@ -477,6 +501,7 @@ class TestCase1_channel_last(TestCase1): class TestCase2_channel_last(TestCase2): + def init_data_format(self): self.data_format = "NHWC" @@ -485,6 +510,7 @@ class TestCase2_channel_last(TestCase2): class TestCase3_channel_last(TestCase3): + def init_data_format(self): self.data_format = "NHWC" @@ -493,6 +519,7 @@ class TestCase3_channel_last(TestCase3): class TestCase4_channel_last(TestCase4): + def init_data_format(self): self.data_format = "NHWC" @@ -501,6 +528,7 @@ class TestCase4_channel_last(TestCase4): class TestCase5_channel_last(TestCase5): + def init_data_format(self): self.data_format = "NHWC" @@ -513,11 +541,13 @@ create_test_use_ceil_class(TestCase2_channel_last) class TestCase5_Max(TestCase2): + def init_pool_type(self): self.pool_type = "max" class TestCase5_channel_last_Max(TestCase5_Max): + def init_data_format(self): self.data_format = "NHWC" @@ -526,16 +556,19 @@ class TestCase5_channel_last_Max(TestCase5_Max): class TestAvgInclude_channel_last(TestCase2_channel_last): + def init_exclusive(self): self.exclusive = False class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): + def init_adaptive(self): self.adaptive = True class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -544,6 +577,7 @@ class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -552,6 +586,7 @@ class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -560,6 +595,7 @@ class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -568,6 +604,7 @@ class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -576,6 +613,7 @@ class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -588,6 +626,7 @@ create_test_use_ceil_class(TestCase2_AsyPadding_channel_last) class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -599,7 +638,9 @@ class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.paddings = [0, 0] self.padding_algorithm = "SAME" @@ -625,7 +666,9 @@ create_test_padding_SAME_class(TestCase5_channel_last) def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.paddings = [1, 1] self.padding_algorithm = "VALID" @@ -651,6 +694,7 @@ create_test_padding_VALID_class(TestCase5_channel_last) class TestCase1_strides(TestCase1): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 2] @@ -664,107 +708,96 @@ create_test_padding_SAME_class(TestCase1_strides) # ----- test API class TestPool2DAPI(unittest.TestCase): + def test_api(self): x_NHWC = np.random.random([2, 5, 5, 3]).astype("float32") x_NCHW = np.random.random([2, 3, 5, 5]).astype("float32") - input_NHWC = fluid.layers.data( - name="input_NHWC", - shape=[2, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCHW = fluid.layers.data( - name="input_NCHW", - shape=[2, 3, 5, 5], - append_batch_size=False, - dtype="float32") - - input_NHWC_negetive = fluid.layers.data( - name="input_NHWC_negetive", - shape=[2, -1, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCHW_negetive = fluid.layers.data( - name="input_NCHW_negetive", - shape=[2, 3, -1, -1], - append_batch_size=False, - dtype="float32") + input_NHWC = fluid.layers.data(name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCHW = fluid.layers.data(name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") + + input_NHWC_negetive = fluid.layers.data(name="input_NHWC_negetive", + shape=[2, -1, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCHW_negetive = fluid.layers.data(name="input_NCHW_negetive", + shape=[2, 3, -1, -1], + append_batch_size=False, + dtype="float32") ksize = [3, 3] - out_1 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1], - data_format="NHWC") - - out_2 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="avg", - pool_padding=[[0, 0], [1, 1], [1, 1], [0, 0]], - data_format="NHWC") - - out_3 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=ksize, - pool_type="avg", - pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1]], - data_format="NCHW") - - out_4 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=ksize, - pool_type="avg", - pool_padding=[1, 2, 1, 0], - data_format="NCHW") + out_1 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + data_format="NHWC") + + out_2 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [1, 1], [1, 1], + [0, 0]], + data_format="NHWC") + + out_3 = fluid.layers.pool2d(input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [0, 0], [1, 1], + [1, 1]], + data_format="NCHW") + + out_4 = fluid.layers.pool2d(input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[1, 2, 1, 0], + data_format="NCHW") # test VALID - out_5 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=ksize, - pool_type="avg", - pool_padding="VALID", - data_format="NCHW") - - out_6 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALID", - data_format="NHWC") + out_5 = fluid.layers.pool2d(input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + data_format="NCHW") + + out_6 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + data_format="NHWC") # test SAME - out_7 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=[4, 4], - pool_type="avg", - pool_padding="SAME", - data_format="NCHW") - - out_8 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=[4, 4], - pool_type="max", - pool_padding="SAME", - data_format="NHWC") + out_7 = fluid.layers.pool2d(input=input_NCHW, + pool_size=[4, 4], + pool_type="avg", + pool_padding="SAME", + data_format="NCHW") + + out_8 = fluid.layers.pool2d(input=input_NHWC, + pool_size=[4, 4], + pool_type="max", + pool_padding="SAME", + data_format="NHWC") # test negetive - out_9 = fluid.layers.pool2d( - input=input_NHWC_negetive, - pool_size=ksize, - pool_type="avg", - pool_padding=[0, 0], - data_format="NHWC") + out_9 = fluid.layers.pool2d(input=input_NHWC_negetive, + pool_size=ksize, + pool_type="avg", + pool_padding=[0, 0], + data_format="NHWC") assert out_9.shape == (2, -1, 3, 3) - out_10 = fluid.layers.pool2d( - input=input_NCHW_negetive, - pool_size=ksize, - pool_type="avg", - pool_padding=[0, 0], - data_format="NCHW") + out_10 = fluid.layers.pool2d(input=input_NCHW_negetive, + pool_size=ksize, + pool_type="avg", + pool_padding=[0, 0], + data_format="NCHW") assert out_10.shape == (2, 3, -1, -1) exe = fluid.Executor(place=fluid.MLUPlace(0)) @@ -776,52 +809,44 @@ class TestPool2DAPI(unittest.TestCase): "input_NHWC_negetive": x_NHWC, "input_NCHW_negetive": x_NCHW }, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) assert np.allclose( res_1, - pool2D_forward_naive( - x=x_NHWC, - ksize=ksize, - pool_type="max", - strides=[1, 1], - paddings=[1, 1], - data_format="NHWC")) + pool2D_forward_naive(x=x_NHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1], + paddings=[1, 1], + data_format="NHWC")) assert np.allclose( res_2, - pool2D_forward_naive( - x=x_NHWC, - ksize=ksize, - pool_type="avg", - strides=[1, 1], - paddings=[1, 1, 1, 1], - data_format="NHWC")) - assert np.allclose( - res_3, - pool2D_forward_naive( - x=x_NCHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1], - paddings=[1, 1, 1, 1], - data_format="NCHW"), - rtol=0.07, - atol=1e-05) - - assert np.allclose( - res_4, - pool2D_forward_naive( - x=x_NCHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1], - paddings=[1, 2, 1, 0], - data_format="NCHW"), - rtol=0.07, - atol=1e-05) + pool2D_forward_naive(x=x_NHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 1, 1, 1], + data_format="NHWC")) + assert np.allclose(res_3, + pool2D_forward_naive(x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 1, 1, 1], + data_format="NCHW"), + rtol=0.07, + atol=1e-05) + + assert np.allclose(res_4, + pool2D_forward_naive(x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 2, 1, 0], + data_format="NCHW"), + rtol=0.07, + atol=1e-05) # VALID assert np.allclose( @@ -838,182 +863,170 @@ class TestPool2DAPI(unittest.TestCase): atol=1e-05) assert np.allclose( res_6, - pool2D_forward_naive( - x=x_NHWC, - ksize=ksize, - pool_type="max", - strides=[1, 1], - paddings=[10, 20], - padding_algorithm="VALID", - data_format="NHWC")) + pool2D_forward_naive(x=x_NHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NHWC")) # SAME - assert np.allclose( - res_7, - pool2D_forward_naive( - x=x_NCHW, - ksize=[4, 4], - pool_type="avg", - strides=[1, 1], - paddings=[10, 20], - padding_algorithm="SAME", - data_format="NCHW"), - rtol=0.07, - atol=1e-05) + assert np.allclose(res_7, + pool2D_forward_naive(x=x_NCHW, + ksize=[4, 4], + pool_type="avg", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NCHW"), + rtol=0.07, + atol=1e-05) assert np.allclose( res_8, - pool2D_forward_naive( - x=x_NHWC, - ksize=[4, 4], - pool_type="max", - strides=[1, 1], - paddings=[10, 20], - padding_algorithm="SAME", - data_format="NHWC")) + pool2D_forward_naive(x=x_NHWC, + ksize=[4, 4], + pool_type="max", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NHWC")) class TestPool2DAPI_Error(unittest.TestCase): + def test_api(self): - input_NHWC = fluid.layers.data( - name="input_NHWC", - shape=[2, 5, 5, 3], - append_batch_size=False, - dtype="float32") + input_NHWC = fluid.layers.data(name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") ksize = [3, 3] # data_format value error def run_2(): - out_2 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1], - data_format="NHWCC") + out_2 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + data_format="NHWCC") self.assertRaises(ValueError, run_2) # padding str value error def run_3(): - out_3 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALIDSAME", - data_format="NHWC") + out_3 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALIDSAME", + data_format="NHWC") self.assertRaises(ValueError, run_3) # padding str valid and ceil_mode value error def run_4(): - out_4 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALID", - ceil_mode=True, - data_format="NHWC") + out_4 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + ceil_mode=True, + data_format="NHWC") self.assertRaises(ValueError, run_4) # padding with 8 ele. value error def run_5(): - out_5 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1]], - data_format="NHWC") + out_5 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[[1, 1], [0, 0], [0, 0], + [1, 1]], + data_format="NHWC") self.assertRaises(ValueError, run_5) class TestDygraphPool2DAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of Pool2D must be Variable. data1 = np.random.random((3, 32, 32, 5)).astype('float32') - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=1, - global_pooling=False) + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) self.assertRaises(TypeError, pool2d, data1) - # the input dtype of mlu Pool2D must be float16 or float32 - data2 = fluid.layers.data( - name='x1', shape=[3, 32, 32, 5], dtype="int32") + # the input dtype of mlu Pool2D must be float16 or float32 + data2 = fluid.layers.data(name='x1', + shape=[3, 32, 32, 5], + dtype="int32") self.assertRaises(TypeError, pool2d, data2) def test_data_format_error(self): with program_guard(Program(), Program()): # the data_format must be 'NCHW' or 'NHWC' data1 = np.random.random((3, 32, 32, 5)).astype('float32') - self.assertRaises( - ValueError, - fluid.dygraph.Pool2D, - pool_size=2, - pool_type='max', - pool_stride=1, - global_pooling=False, - data_format='NWHC') + self.assertRaises(ValueError, + fluid.dygraph.Pool2D, + pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False, + data_format='NWHC') class TestDygraphPool2DAPI(unittest.TestCase): + def test_nhwc(self): with fluid.dygraph.guard(): data = np.random.random((3, 32, 32, 5)).astype('float32') x = fluid.dygraph.to_variable(data) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=1, - pool_padding=[0, 0], - global_pooling=False, - data_format='NHWC') + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='max', + pool_stride=1, + pool_padding=[0, 0], + global_pooling=False, + data_format='NHWC') out1 = pool2d(x) - out2 = pool2D_forward_naive( - data, [2, 2], [1, 1], - paddings=[0, 0], - pool_type='max', - data_format='NHWC') + out2 = pool2D_forward_naive(data, [2, 2], [1, 1], + paddings=[0, 0], + pool_type='max', + data_format='NHWC') self.assertTrue(np.allclose(out1.numpy(), out2)) def test_lower_case(self): with fluid.dygraph.guard(): data = np.random.random((3, 32, 32, 5)).astype('float32') x = fluid.dygraph.to_variable(data) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=1, - pool_padding=[0, 0], - global_pooling=False, - data_format='nhwc') + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='max', + pool_stride=1, + pool_padding=[0, 0], + global_pooling=False, + data_format='nhwc') out1 = pool2d(x) - out2 = pool2D_forward_naive( - data, [2, 2], [1, 1], - paddings=[0, 0], - pool_type='max', - data_format='NHWC') + out2 = pool2D_forward_naive(data, [2, 2], [1, 1], + paddings=[0, 0], + pool_type='max', + data_format='NHWC') self.assertTrue(np.allclose(out1.numpy(), out2)) def test_upper_case(self): with fluid.dygraph.guard(): data = np.random.random((3, 32, 32, 5)).astype('float32') x = fluid.dygraph.to_variable(data) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='MAX', - pool_stride=1, - pool_padding=[0, 0], - global_pooling=False, - data_format='nhwc') + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='MAX', + pool_stride=1, + pool_padding=[0, 0], + global_pooling=False, + data_format='nhwc') out1 = pool2d(x) - out2 = pool2D_forward_naive( - data, [2, 2], [1, 1], - paddings=[0, 0], - pool_type='max', - data_format='NHWC') + out2 = pool2D_forward_naive(data, [2, 2], [1, 1], + paddings=[0, 0], + pool_type='max', + data_format='NHWC') self.assertTrue(np.allclose(out1.numpy(), out2)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_reduce_max_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_reduce_max_op_mlu.py index ef33719d368..372f2bd6ad4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_reduce_max_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_reduce_max_op_mlu.py @@ -130,8 +130,9 @@ class TestReduceMaxOpWithOutDtype_fp16(TestMLUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.FP16) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float16) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float16) } def init_dtype(self): @@ -158,8 +159,9 @@ class TestReduceMaxOpWithOutDtype_fp32(TestMLUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/mlu/test_reduce_mean_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_reduce_mean_op_mlu.py index c0be644c791..5fa30f400f2 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_reduce_mean_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_reduce_mean_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestMeanOp(OpTest): + def set_mlu(self): self.__class__.use_mlu = True self.place = paddle.device.MLUPlace(0) @@ -44,6 +46,7 @@ class TestMeanOp(OpTest): class TestMeanOp5D(TestMeanOp): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -54,6 +57,7 @@ class TestMeanOp5D(TestMeanOp): class TestMeanOp6D(TestMeanOp): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -64,6 +68,7 @@ class TestMeanOp6D(TestMeanOp): class TestMeanOp8D(TestMeanOp): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -75,6 +80,7 @@ class TestMeanOp8D(TestMeanOp): class Test1DReduce(TestMeanOp): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -83,6 +89,7 @@ class Test1DReduce(TestMeanOp): class Test2DReduce0(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -92,6 +99,7 @@ class Test2DReduce0(Test1DReduce): class Test2DReduce1(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -103,6 +111,7 @@ class Test2DReduce1(Test1DReduce): class Test3DReduce0(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -114,6 +123,7 @@ class Test3DReduce0(Test1DReduce): class Test3DReduce1(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -125,6 +135,7 @@ class Test3DReduce1(Test1DReduce): class Test3DReduce2(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -136,6 +147,7 @@ class Test3DReduce2(Test1DReduce): class Test3DReduce3(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -147,18 +159,21 @@ class Test3DReduce3(Test1DReduce): class TestKeepDimReduce(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")} self.attrs = {'dim': [1], 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].mean(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } class TestKeepDim8DReduce(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" @@ -167,12 +182,14 @@ class TestKeepDim8DReduce(Test1DReduce): } self.attrs = {'dim': (3, 4, 5), 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].mean(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } class TestReduceAll(Test1DReduce): + def setUp(self): self.set_mlu() self.op_type = "reduce_mean" diff --git a/python/paddle/fluid/tests/unittests/mlu/test_reduce_min_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_reduce_min_op_mlu.py index 284f8f984c2..a2f8007973c 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_reduce_min_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_reduce_min_op_mlu.py @@ -130,8 +130,9 @@ class TestReduceMinOpWithOutDtype_fp16(TestMLUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.FP16) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float16) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float16) } def init_dtype(self): @@ -158,8 +159,9 @@ class TestReduceMinOpWithOutDtype_fp32(TestMLUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py index d2729d77aba..ab984187443 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,6 +26,7 @@ paddle.enable_static() class TestMLUReduceSumOp(OpTest): + def setUp(self): self.init_op_type() self.initTestCase() @@ -39,8 +41,9 @@ class TestMLUReduceSumOp(OpTest): self.outputs = {'Out': self.inputs['X'].sum()} else: self.outputs = { - 'Out': self.inputs['X'].sum(axis=self.axis, - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].sum(axis=self.axis, + keepdims=self.attrs['keep_dim']) } def set_mlu(self): @@ -65,66 +68,77 @@ class TestMLUReduceSumOp(OpTest): class TestSumOp5D(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (1, 2, 5, 6, 10) self.axis = (0, ) class TestSumOp6D(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (1, 1, 2, 5, 6, 10) self.axis = (0, ) class TestSumOp8D(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (1, 3, 1, 2, 1, 4, 3, 10) self.axis = (0, 3) class Test1DReduce(TestMLUReduceSumOp): + def initTestCase(self): self.shape = 120 self.axis = (0, ) class Test2DReduce0(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (20, 10) self.axis = (0, ) class Test2DReduce1(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (20, 10) self.axis = (1, ) class Test3DReduce0(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (1, ) class Test3DReduce1(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (2, ) class Test3DReduce2(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (-2, ) class Test3DReduce3(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (1, 2) class TestKeepDimReduce(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (5, 6, 10) self.axis = (1, ) @@ -132,6 +146,7 @@ class TestKeepDimReduce(TestMLUReduceSumOp): class TestKeepDim8DReduce(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (2, 5, 3, 2, 2, 3, 4, 2) self.axis = (3, 4, 5) @@ -139,6 +154,7 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp): class TestReduceAll(TestMLUReduceSumOp): + def initTestCase(self): self.shape = (5, 6, 2, 10) self.axis = (0, ) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py index 54b1afd0363..ffb6fee30f5 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py @@ -20,6 +20,7 @@ from op_test import OpTest import numpy as np import unittest import sys + sys.path.append("..") paddle.enable_static() @@ -34,6 +35,7 @@ def ref_relu6(x, threshold=6.0): class TestRelu6(OpTest): + def setUp(self): self.set_mlu() self.op_type = "relu6" @@ -63,6 +65,7 @@ class TestRelu6(OpTest): class TestRelu6Float16(TestRelu6): + def set_mlu(self): self.__class__.use_mlu = True self.__class__.no_need_check_grad = True @@ -75,6 +78,7 @@ class TestRelu6Float16(TestRelu6): class TestReluNeg(TestRelu6): + def setUp(self): self.set_mlu() self.op_type = "relu6" @@ -101,6 +105,7 @@ class TestReluNeg(TestRelu6): class TestRelu6Net(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -115,8 +120,9 @@ class TestRelu6Net(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.nn.functional.relu6(sum) @@ -140,12 +146,13 @@ class TestRelu6Net(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py index 25c50f67949..495711e5303 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestRelu(OpTest): + def setUp(self): self.set_mlu() self.op_type = "relu" @@ -52,6 +54,7 @@ class TestRelu(OpTest): class TestReluFp16(OpTest): + def setUp(self): self.set_mlu() self.op_type = "relu" @@ -78,6 +81,7 @@ class TestReluFp16(OpTest): class TestReluNeg(OpTest): + def setUp(self): self.set_mlu() self.op_type = "relu" @@ -103,6 +107,7 @@ class TestReluNeg(OpTest): class TestReluNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -117,8 +122,9 @@ class TestReluNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.nn.functional.relu(sum) @@ -142,12 +148,13 @@ class TestReluNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_reshape2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_reshape2_op_mlu.py index 9cff269913f..2fe28af81b1 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_reshape2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_reshape2_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2022 class TestReshape2(OpTest): + def setUp(self): self.set_mlu() self.op_type = "reshape2" @@ -56,6 +58,7 @@ class TestReshape2(OpTest): class TestReshape2_case2(TestReshape2): + def init_data(self): self.ori_shape = (2, 100) self.new_shape = (-1, 10) @@ -63,6 +66,7 @@ class TestReshape2_case2(TestReshape2): class TestReshape2_case3(TestReshape2): + def init_data(self): self.ori_shape = (100, 5, 6) self.new_shape = (-1, 0, 3) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_scale_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_scale_op_mlu.py index 53254c738d9..aed58a352f4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_scale_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_scale_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -29,6 +30,7 @@ paddle.enable_static() class TestScaleOp(OpTest): + def setUp(self): self.op_type = "scale" self.place = paddle.device.MLUPlace(0) @@ -49,6 +51,7 @@ class TestScaleOp(OpTest): class TestScaleOpScaleVariable(OpTest): + def setUp(self): self.op_type = "scale" self.place = paddle.device.MLUPlace(0) @@ -71,6 +74,7 @@ class TestScaleOpScaleVariable(OpTest): class TestScaleOpSelectedRows(unittest.TestCase): + def init_dtype_type(self): pass @@ -129,7 +133,9 @@ class TestScaleOpSelectedRows(unittest.TestCase): class TestScaleRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.scale([10]) @@ -140,6 +146,7 @@ class TestScaleRaiseError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_mlu(), "core is not compiled with MLU") class TestScaleFp16Op(TestScaleOp): + def init_dtype_type(self): self.dtype = np.float16 @@ -150,6 +157,7 @@ class TestScaleFp16Op(TestScaleOp): @unittest.skipIf(not core.is_compiled_with_mlu(), "core is not compiled with MLU") class TestScaleFp16OpSelectedRows(TestScaleOpSelectedRows): + def init_dtype_type(self): self.dtype = np.float16 @@ -163,6 +171,7 @@ class TestScaleFp16OpSelectedRows(TestScaleOpSelectedRows): class TestScaleApiStatic(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): return paddle.scale(x, scale, bias) @@ -180,11 +189,13 @@ class TestScaleApiStatic(unittest.TestCase): class TestScaleInplaceApiStatic(TestScaleApiStatic): + def _executed_api(self, x, scale=1.0, bias=0.0): return x.scale_(scale, bias) class TestScaleApiDygraph(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): return paddle.scale(x, scale, bias) @@ -198,6 +209,7 @@ class TestScaleApiDygraph(unittest.TestCase): class TestScaleInplaceApiDygraph(TestScaleApiDygraph): + def _executed_api(self, x, scale=1.0, bias=0.0): return x.scale_(scale, bias) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sigmoid_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_sigmoid_op_mlu.py index f4c5612377e..5438e3955d3 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_sigmoid_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_sigmoid_op_mlu.py @@ -26,6 +26,7 @@ SEED = 2021 class TestMLUSigmoid(OpTest): + def setUp(self): self.op_type = "sigmoid" self.set_mlu() @@ -42,8 +43,9 @@ class TestMLUSigmoid(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.01) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.01) def set_mlu(self): self.__class__.use_mlu = True @@ -54,6 +56,7 @@ class TestMLUSigmoid(OpTest): class TestMLUSigmoidFp16(TestMLUSigmoid): + def test_check_output(self): self.check_output_with_place(self.place, atol=1e-3) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_slice_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_slice_op_mlu.py index 44532ddceb7..a074a9d91a8 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_slice_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_slice_op_mlu.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle.fluid.core as core import sys + sys.path.append('..') from op_test import OpTest import paddle.fluid as fluid @@ -30,6 +31,7 @@ paddle.enable_static() # Situation 1: starts(list, no tensor), ends(list, no tensor) # 1.1 without attr(decrease) class TestSliceOp(OpTest): + def setUp(self): self.op_type = "slice" self.set_mlu() @@ -55,8 +57,9 @@ class TestSliceOp(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) def set_mlu(self): self.__class__.use_mlu = True @@ -64,6 +67,7 @@ class TestSliceOp(OpTest): class TestCase1(TestSliceOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-3, 0, 2] @@ -74,6 +78,7 @@ class TestCase1(TestSliceOp): class TestCase2(TestSliceOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-3, 0, 2] @@ -85,6 +90,7 @@ class TestCase2(TestSliceOp): # 1.2 with attr(decrease) class TestSliceOp_decs_dim(OpTest): + def setUp(self): self.op_type = "slice" self.set_mlu() @@ -112,8 +118,9 @@ class TestSliceOp_decs_dim(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) def set_mlu(self): self.__class__.use_mlu = True @@ -121,6 +128,7 @@ class TestSliceOp_decs_dim(OpTest): class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [1, 0, 2] @@ -132,6 +140,7 @@ class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-1, 0, 2] @@ -143,6 +152,7 @@ class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 7]).astype("float32") self.starts = [0, 1, 2, 3] @@ -154,6 +164,7 @@ class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-1] @@ -165,6 +176,7 @@ class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [0, 1, 2, 3] @@ -178,6 +190,7 @@ class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): # Situation 2: starts(list, have tensor), ends(list, no tensor) # without attr(decrease) class TestSliceOp_starts_ListTensor(OpTest): + def setUp(self): self.op_type = "slice" self.set_mlu() @@ -211,8 +224,9 @@ class TestSliceOp_starts_ListTensor(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) def set_mlu(self): self.__class__.use_mlu = True @@ -222,6 +236,7 @@ class TestSliceOp_starts_ListTensor(OpTest): # Situation 2: starts(list, have tensor), ends(list, no tensor) # with attr(decrease) class TestSliceOp_decs_dim_starts_ListTensor(OpTest): + def setUp(self): self.op_type = "slice" self.set_mlu() @@ -258,8 +273,9 @@ class TestSliceOp_decs_dim_starts_ListTensor(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) def set_mlu(self): self.__class__.use_mlu = True @@ -268,6 +284,7 @@ class TestSliceOp_decs_dim_starts_ListTensor(OpTest): class TestSliceOp_decs_dim_5_starts_ListTensor( TestSliceOp_decs_dim_starts_ListTensor): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float32") self.starts = [-1] @@ -283,6 +300,7 @@ class TestSliceOp_decs_dim_5_starts_ListTensor( # Situation 3: starts(tensor), ends(list, no tensor) # with attr(decrease) class TestSliceOp_decs_dim_starts_OneTensor(OpTest): + def setUp(self): self.op_type = "slice" self.__class__.use_mlu = True @@ -290,8 +308,7 @@ class TestSliceOp_decs_dim_starts_OneTensor(OpTest): self.config() self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int32") } self.outputs = {'Out': self.out} self.attrs = { @@ -315,13 +332,15 @@ class TestSliceOp_decs_dim_starts_OneTensor(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) # Situation 4: starts(tensor), ends(tensor) # without attr(decrease) class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): + def setUp(self): self.op_type = "slice" self.__class__.use_mlu = True @@ -330,10 +349,8 @@ class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int64"), - "EndsTensor": np.array( - self.ends, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int64"), + "EndsTensor": np.array(self.ends, dtype="int32") } self.outputs = {'Out': self.out} self.attrs = { @@ -355,13 +372,15 @@ class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) # Situation 5: starts(tensor), ends(tensor) # with attr(decrease) class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): + def setUp(self): self.op_type = "slice" self.__class__.use_mlu = True @@ -369,10 +388,8 @@ class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): self.config() self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32"), - "EndsTensor": np.array( - self.ends, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int32"), + "EndsTensor": np.array(self.ends, dtype="int32") } self.outputs = {'Out': self.out} self.attrs = { @@ -396,13 +413,15 @@ class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) # Situation 6: starts(tensor), ends(list, have tensor) # without attr(decrease) class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): + def setUp(self): self.op_type = "slice" self.__class__.use_mlu = True @@ -416,8 +435,7 @@ class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32"), + "StartsTensor": np.array(self.starts, dtype="int32"), 'EndsTensorList': ends_tensor } self.outputs = {'Out': self.out} @@ -442,12 +460,14 @@ class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) # Test float16 class TestFP16(OpTest): + def setUp(self): self.op_type = "slice" self.__class__.use_mlu = True @@ -475,11 +495,13 @@ class TestFP16(OpTest): self.check_output_with_place(self.place, atol=1e-5) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006) class TestFP16_2(OpTest): + def setUp(self): self.op_type = "slice" self.__class__.use_mlu = True @@ -507,27 +529,24 @@ class TestFP16_2(OpTest): self.check_output_with_place(self.place, atol=1e-5) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['Input'], - 'Out', - max_relative_error=0.006, - numeric_grad_delta=0.5) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.006, + numeric_grad_delta=0.5) class TestSliceApiWithTensor(unittest.TestCase): + def test_starts_ends_is_tensor(self): with paddle.fluid.dygraph.guard(): a = paddle.rand(shape=[4, 5, 6], dtype='float32') axes = [0, 1, 2] starts = [-3, 0, 2] ends = [3, 2, 4] - a_1 = paddle.slice( - a, - axes=axes, - starts=paddle.to_tensor( - starts, dtype='int32'), - ends=paddle.to_tensor( - ends, dtype='int32')) + a_1 = paddle.slice(a, + axes=axes, + starts=paddle.to_tensor(starts, dtype='int32'), + ends=paddle.to_tensor(ends, dtype='int32')) a_2 = paddle.slice(a, axes=axes, starts=starts, ends=ends) self.assertTrue(np.array_equal(a_1.numpy(), a_2.numpy())) @@ -550,6 +569,7 @@ class TestSliceApiWithTensor(unittest.TestCase): class TestImperativeVarBaseGetItem(unittest.TestCase): + def test_getitem_with_long(self): with fluid.dygraph.guard(): data = np.random.random((2, 80, 16128)).astype('float32') @@ -561,6 +581,7 @@ class TestImperativeVarBaseGetItem(unittest.TestCase): self.assertEqual(sliced.shape, [2, 78, 78]) def test_getitem_with_float(self): + def test_float_in_slice_item(): with fluid.dygraph.guard(): data = np.random.random((2, 80, 16128)).astype('float32') @@ -579,6 +600,7 @@ class TestImperativeVarBaseGetItem(unittest.TestCase): class TestInferShape(unittest.TestCase): + def test(self): x = paddle.ones(shape=[3, 4, 5]) x.desc.set_shape([3, -1, 5]) @@ -594,7 +616,9 @@ class TestInferShape(unittest.TestCase): x_arr = np.arange(0, 24, dtype=np.float32).reshape([2, 3, 4]) x = paddle.to_tensor(x_arr) - pp_slice = paddle.slice(x, [100, ], [0], [1]) + pp_slice = paddle.slice(x, [ + 100, + ], [0], [1]) np_slice = x_arr[:, :, 0:1] self.assertTrue(np.array_equal(pp_slice, np_slice)) @@ -606,13 +630,9 @@ class TestInferShape(unittest.TestCase): x = paddle.to_tensor(np.reshape(x_arr, (0, 0, 0))) starts = paddle.to_tensor( - np.reshape( - np.array( - [], dtype=np.int32), (0, ))) + np.reshape(np.array([], dtype=np.int32), (0, ))) ends = paddle.to_tensor( - np.reshape( - np.array( - [], dtype=np.int32), (0, ))) + np.reshape(np.array([], dtype=np.int32), (0, ))) with self.assertRaises(ValueError): paddle.slice(x, [-1000000], starts, ends) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_op_mlu.py index 54acafcf0df..766b88aa154 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest import paddle.fluid.core as core @@ -48,6 +49,7 @@ def ref_softmax(x, axis=None, dtype=None): class TestSoftmaxOp(OpTest): + def get_x_shape(self): return [10, 10] @@ -68,7 +70,9 @@ class TestSoftmaxOp(OpTest): self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.outputs = {'Out': out} - self.attrs = {'axis': self.axis, } + self.attrs = { + 'axis': self.axis, + } def init_kernel_type(self): pass @@ -77,16 +81,19 @@ class TestSoftmaxOp(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ["X"], "Out", max_relative_error=0.01) + self.check_grad_with_place(self.place, ["X"], + "Out", + max_relative_error=0.01) class TestSoftmaxOp2(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] class TestSoftmaxOp3(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -95,6 +102,7 @@ class TestSoftmaxOp3(TestSoftmaxOp): class TestSoftmaxOp4(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -103,6 +111,7 @@ class TestSoftmaxOp4(TestSoftmaxOp): class TestSoftmaxOp5(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -111,6 +120,7 @@ class TestSoftmaxOp5(TestSoftmaxOp): class TestSoftmaxOp6(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -119,6 +129,7 @@ class TestSoftmaxOp6(TestSoftmaxOp): class TestSoftmaxAPI(unittest.TestCase): + def setUp(self): self.place = paddle.MLUPlace(0) self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32') @@ -171,16 +182,19 @@ class TestSoftmaxAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, self.softmax, 1) # The input dtype must be float16, float32 - x_int32 = paddle.fluid.data( - name='x_int32', shape=[2, 3], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[2, 3], + dtype='int32') self.assertRaises(TypeError, self.softmax, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[2, 3], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[2, 3], + dtype='float16') self.softmax(x_fp16) class TestSoftmaxInplaceAPI(TestSoftmaxAPI): + def executed_api(self): self.softmax = F.softmax_ diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py index e626b6a0937..f112cd6f66f 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ SEED = 2021 class TestSoftmaxWithCrossEntropyOp(OpTest): + def set_mlu(self): self.__class__.use_mlu = True @@ -90,14 +92,14 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): if self.dtype == np.float16: return # fp32 has low precision, cpu and mlu both need to relax the max_relative_error if using fp32 - self.check_grad_with_place( - self.place, ['Logits'], - 'Loss', - numeric_grad_delta=0.001, - max_relative_error=0.5) + self.check_grad_with_place(self.place, ['Logits'], + 'Loss', + numeric_grad_delta=0.001, + max_relative_error=0.5) class TestPowNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -112,8 +114,9 @@ class TestPowNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -137,12 +140,13 @@ class TestPowNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_spawn_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_spawn_mlu.py index 773063c7a8a..e52b5ee301c 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_spawn_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_spawn_mlu.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from paddle.fluid import core class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear1 = nn.Linear(10, 10) @@ -62,6 +63,7 @@ def train(print_result=False): class TestSpawn(unittest.TestCase): + def test_nprocs_greater_than_device_num_error(self): with self.assertRaises(RuntimeError): _get_subprocess_env_list(nprocs=100, options=dict()) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py index b8363545d22..2728473f550 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestCase1(OpTest): + def setUp(self): self.set_mlu() self.set_example() @@ -35,8 +37,9 @@ class TestCase1(OpTest): self.place = paddle.device.MLUPlace(0) ipt = self.x.astype(self.dtype) axis = self.axis if isinstance(self.axis, int) else int(self.axis[0]) - tmp_outs = np.split( - ipt, axis=axis, indices_or_sections=self.num_or_sections) + tmp_outs = np.split(ipt, + axis=axis, + indices_or_sections=self.num_or_sections) tmp_outs = [o.astype(self.dtype) for o in tmp_outs] self.outputs = {'Out': []} self.outs = [] @@ -63,6 +66,7 @@ class TestCase1(OpTest): class TestCase2(TestCase1): + def set_example(self): self.dtype = "float32" self.x = np.random.random((20, 4, 50)) @@ -71,6 +75,7 @@ class TestCase2(TestCase1): class TestCase4(TestCase1): + def set_example(self): self.dtype = "float16" self.x = np.random.random((4, 50, 20)) @@ -80,6 +85,7 @@ class TestCase4(TestCase1): # Test Sections class TestCase5(TestCase1): + def set_example(self): super().set_example() self.x = np.random.random((2, 10, 4)) @@ -92,6 +98,7 @@ class TestCase5(TestCase1): class API_TestSplit(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.layers.data('data', shape=[-1, 10], dtype='float32') @@ -106,6 +113,7 @@ class API_TestSplit(unittest.TestCase): class API_TestSplit2(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.layers.data('data', shape=[-1, 10], dtype='float32') @@ -120,6 +128,7 @@ class API_TestSplit2(unittest.TestCase): class API_TestDygraphSplit(unittest.TestCase): + def test_out1(self): with fluid.dygraph.guard(paddle.MLUPlace(0)): input_1 = np.random.random([4, 6, 6]).astype("int32") @@ -151,6 +160,7 @@ class API_TestDygraphSplit(unittest.TestCase): # attr(axis) is Tensor class TestSplitOp_AxisTensor(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() @@ -186,6 +196,7 @@ class TestSplitOp_AxisTensor(OpTest): class TestSplitOp_SectionsTensor(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_squeeze2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_squeeze2_op_mlu.py index 51606c36cfd..6a555ed63b8 100755 --- a/python/paddle/fluid/tests/unittests/mlu/test_squeeze2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_squeeze2_op_mlu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np import paddle @@ -25,6 +26,7 @@ paddle.enable_static() # Correct: General. class TestSqueezeOp(OpTest): + def setUp(self): self.init_test_case() self.set_mlu() @@ -57,6 +59,7 @@ class TestSqueezeOp(OpTest): # Correct: There is mins axis. class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = (0, -2) @@ -65,6 +68,7 @@ class TestSqueezeOp1(TestSqueezeOp): # Correct: No axes input. class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () @@ -73,6 +77,7 @@ class TestSqueezeOp2(TestSqueezeOp): # Correct: Just part of axes be squeezed. class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_squeeze_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_squeeze_op_mlu.py index 10703182c0a..dc60ab96d20 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_squeeze_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_squeeze_op_mlu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -30,13 +31,16 @@ paddle.enable_static() # Correct: General. class TestSqueezeOp(OpTest): + def setUp(self): self.op_type = "squeeze" self.init_test_case() self.set_mlu() self.inputs = {"X": np.random.random(self.ori_shape).astype("float64")} self.init_attrs() - self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape), } + self.outputs = { + "Out": self.inputs["X"].reshape(self.new_shape), + } def set_mlu(self): self.__class__.use_mlu = True @@ -58,6 +62,7 @@ class TestSqueezeOp(OpTest): class TestSqueezeBF16Op(OpTest): + def setUp(self): self.op_type = "squeeze" self.dtype = np.uint16 @@ -85,6 +90,7 @@ class TestSqueezeBF16Op(OpTest): # Correct: There is mins axis. class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 3, 1, 40) self.axes = (0, -2) @@ -93,14 +99,16 @@ class TestSqueezeOp1(TestSqueezeOp): # Correct: No axes input. class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) @@ -109,6 +117,7 @@ class TestSqueezeOp3(TestSqueezeOp): # Correct: The demension of axis is not of size 1 remains unchanged. class TestSqueezeOp4(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, 2) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_sum_op_mlu.py index e9db14de46a..428401408bf 100755 --- a/python/paddle/fluid/tests/unittests/mlu/test_sum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_sum_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestSum1(OpTest): + def setUp(self): self.set_mlu() self.init_dtype() @@ -54,6 +56,7 @@ class TestSum1(OpTest): class TestSum2(OpTest): + def setUp(self): self.set_mlu() self.init_dtype() @@ -66,7 +69,7 @@ class TestSum2(OpTest): x3 = np.random.random((3, 3)).astype(self.dtype) self.inputs = {'X': [("x0", x0), ("x1", x1), ("x2", x2), ("x3", x3)]} # There will be a problem if just using `y=x0+x1+x2+x3` to calculate the - # summation result as the reference standard result. The reason is that + # summation result as the reference standard result. The reason is that # numpy's fp16 data has precision loss when doing `add` operation. # For example, the results of `x0+x1+x2+x3` is different from that of # `x3+x2+x1+x0` if the dtype is fp16. @@ -88,6 +91,7 @@ class TestSum2(OpTest): class TestSum3(OpTest): + def setUp(self): self.set_mlu() self.init_dtype() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py index a5aeeac0ffb..e1023a94bec 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestTanh(OpTest): + def setUp(self): self.set_mlu() self.op_type = "tanh" @@ -58,6 +60,7 @@ class TestTanh(OpTest): class TestTanhFp16(OpTest): + def setUp(self): self.set_mlu() self.op_type = "tanh" @@ -84,6 +87,7 @@ class TestTanhFp16(OpTest): class TestTanhNet(unittest.TestCase): + def _test(self, run_mlu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -98,8 +102,9 @@ class TestTanhNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.tanh(c) @@ -123,12 +128,13 @@ class TestTanhNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_top_k_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_top_k_op_mlu.py index 366f783ce0d..33caf2ff522 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_top_k_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_top_k_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestTopkOp(OpTest): + def setUp(self): self.variable_k = False self.set_args() @@ -66,6 +68,7 @@ class TestTopkOp(OpTest): class TestTopkFP16Op(TestTopkOp): + def init_dtype(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/mlu/test_top_k_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_top_k_v2_op_mlu.py index 8979344bd45..57081f1a545 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_top_k_v2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_top_k_v2_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest import paddle @@ -40,6 +41,7 @@ def numpy_topk(x, k=1, axis=-1, largest=True): class TestTopkOp(OpTest): + def init_args(self): self.k = 3 self.axis = 1 @@ -55,8 +57,10 @@ class TestTopkOp(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} def test_check_output(self): @@ -65,6 +69,7 @@ class TestTopkOp(OpTest): class TestTopkOp1(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 0 @@ -72,6 +77,7 @@ class TestTopkOp1(TestTopkOp): class TestTopkOp2(TestTopkOp): + def init_args(self): self.k = 4 self.axis = 0 @@ -79,6 +85,7 @@ class TestTopkOp2(TestTopkOp): class TestTopkOp3(OpTest): + def init_args(self): self.k = 6 self.axis = 1 @@ -91,12 +98,15 @@ class TestTopkOp3(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopkOp4(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -111,12 +121,15 @@ class TestTopkOp4(TestTopkOp): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopkOp5(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -131,12 +144,15 @@ class TestTopkOp5(TestTopkOp): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopkOp6(OpTest): + def init_args(self): self.k = 100 self.axis = 1 @@ -151,12 +167,15 @@ class TestTopkOp6(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopKAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.dtype = np.float32 @@ -195,15 +214,16 @@ class TestTopKAPI(unittest.TestCase): numpy_result = numpy_topk(self.input_data, k=2, axis=-1, largest=False) self.assertTrue(np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1].numpy(), numpy_result[1])) - # test case for basic test case 6 for the partial sort + # test case for basic test case 6 for the partial sort paddle_result = paddle.topk(large_input_tensor, k=1, axis=-1) numpy_result = numpy_topk(self.large_input_data, k=1, axis=-1) self.assertTrue(np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1].numpy(), numpy_result[1])) - # test case for basic test case 7 for the unsorted + # test case for basic test case 7 for the unsorted paddle_result = paddle.topk(input_tensor, k=2, axis=1, sorted=False) - sort_paddle = numpy_topk( - np.array(paddle_result[0].numpy()), axis=1, k=2) + sort_paddle = numpy_topk(np.array(paddle_result[0].numpy()), + axis=1, + k=2) numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(sort_paddle[0], numpy_result[0])) @@ -211,10 +231,12 @@ class TestTopKAPI(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - input_tensor = paddle.static.data( - name="x", shape=[6, 7, 8], dtype="float32") - large_input_tensor = paddle.static.data( - name="large_x", shape=[2, 1030], dtype="float32") + input_tensor = paddle.static.data(name="x", + shape=[6, 7, 8], + dtype="float32") + large_input_tensor = paddle.static.data(name="large_x", + shape=[2, 1030], + dtype="float32") k_tensor = paddle.static.data(name="k", shape=[1], dtype="int32") result1 = paddle.topk(input_tensor, k=2) result2 = paddle.topk(input_tensor, k=2, axis=-1) @@ -228,17 +250,18 @@ class TestTopKAPI(unittest.TestCase): exe = paddle.static.Executor(place) input_data = np.random.rand(10, 20).astype("float32") large_input_data = np.random.rand(2, 100).astype("float32") - paddle_result = exe.run( - feed={ - "x": self.input_data, - "large_x": self.large_input_data, - "k": np.array([2]).astype("int32") - }, - fetch_list=[ - result1[0], result1[1], result2[0], result2[1], result3[0], - result3[1], result4[0], result4[1], result5[0], result5[1], - result6[0], result6[1], result7[0], result7[1] - ]) + paddle_result = exe.run(feed={ + "x": self.input_data, + "large_x": self.large_input_data, + "k": np.array([2]).astype("int32") + }, + fetch_list=[ + result1[0], result1[1], result2[0], + result2[1], result3[0], result3[1], + result4[0], result4[1], result5[0], + result5[1], result6[0], result6[1], + result7[0], result7[1] + ]) numpy_result = numpy_topk(self.input_data, k=2) self.assertTrue(np.allclose(paddle_result[0], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1], numpy_result[1])) @@ -248,12 +271,16 @@ class TestTopKAPI(unittest.TestCase): numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(paddle_result[4], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[5], numpy_result[1])) - numpy_result = numpy_topk( - self.input_data, k=2, axis=1, largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=1, + largest=False) self.assertTrue(np.allclose(paddle_result[6], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[7], numpy_result[1])) - numpy_result = numpy_topk( - self.input_data, k=2, axis=-1, largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=-1, + largest=False) self.assertTrue(np.allclose(paddle_result[8], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[9], numpy_result[1])) numpy_result = numpy_topk(self.large_input_data, k=1, axis=-1) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py index 6f1bda477f0..bcb41283de9 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append('..') from op_test import OpTest, convert_float_to_uint16 import paddle @@ -28,12 +29,15 @@ paddle.enable_static() class TestTransposeOp(OpTest): + def setUp(self): self.init_op_type() self.initKernelType() self.initTestCase() self.inputs = {'X': np.random.random(self.shape).astype("float32")} - self.attrs = {'axis': list(self.axis), } + self.attrs = { + 'axis': list(self.axis), + } self.outputs = {'Out': self.inputs['X'].transpose(self.axis)} def init_op_type(self): @@ -55,71 +59,83 @@ class TestTransposeOp(OpTest): class TestCase0(TestTransposeOp): + def initTestCase(self): self.shape = (100, ) self.axis = (0, ) class TestCase1(TestTransposeOp): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) class TestCase2(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) class TestCase3(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) class TestCase4(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) class TestCase5(TestTransposeOp): + def initTestCase(self): self.shape = (2, 16, 96) self.axis = (0, 2, 1) class TestCase6(TestTransposeOp): + def initTestCase(self): self.shape = (2, 10, 12, 16) self.axis = (3, 1, 2, 0) class TestCase7(TestTransposeOp): + def initTestCase(self): self.shape = (2, 10, 2, 16) self.axis = (0, 1, 3, 2) class TestCase8(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (0, 1, 3, 2, 4, 5, 6, 7) class TestCase9(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (6, 1, 3, 5, 0, 2, 4, 7) class TestTransposeOpBool(TestTransposeOp): + def test_check_grad(self): pass class TestTransposeOpBool1D(TestTransposeOpBool): + def initTestCase(self): self.shape = (100, ) self.axis = (0, ) @@ -128,6 +144,7 @@ class TestTransposeOpBool1D(TestTransposeOpBool): class TestTransposeOpBool2D(TestTransposeOpBool): + def initTestCase(self): self.shape = (3, 40) self.axis = (1, 0) @@ -136,6 +153,7 @@ class TestTransposeOpBool2D(TestTransposeOpBool): class TestTransposeOpBool3D(TestTransposeOpBool): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) @@ -144,6 +162,7 @@ class TestTransposeOpBool3D(TestTransposeOpBool): class TestTransposeOpBool4D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) @@ -152,6 +171,7 @@ class TestTransposeOpBool4D(TestTransposeOpBool): class TestTransposeOpBool5D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) @@ -160,6 +180,7 @@ class TestTransposeOpBool5D(TestTransposeOpBool): class TestTransposeOpBool6D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) @@ -168,6 +189,7 @@ class TestTransposeOpBool6D(TestTransposeOpBool): class TestTransposeOpBool7D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3) self.axis = (0, 1, 3, 2, 4, 5, 6) @@ -176,6 +198,7 @@ class TestTransposeOpBool7D(TestTransposeOpBool): class TestTransposeOpBool8D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (6, 1, 3, 5, 0, 2, 4, 7) @@ -184,6 +207,7 @@ class TestTransposeOpBool8D(TestTransposeOpBool): class TestTransposeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -216,6 +240,7 @@ class TestTransposeOpError(unittest.TestCase): class TestTransposeApi(unittest.TestCase): + def test_static_out(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): @@ -252,6 +277,7 @@ class TestTransposeApi(unittest.TestCase): class TestTAPI(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program()): data = fluid.data(shape=[10], dtype="float32", name="data") @@ -318,6 +344,7 @@ class TestTAPI(unittest.TestCase): class TestMoveAxis(unittest.TestCase): + def test_moveaxis1(self): x_np = np.random.randn(2, 3, 4, 5, 7).astype('float32') expected = np.moveaxis(x_np, [0, 4, 3, 2], [1, 3, 2, 0]) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_uniform_random_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_uniform_random_op_mlu.py index 3847b010c14..70289853e89 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_uniform_random_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_uniform_random_op_mlu.py @@ -18,6 +18,7 @@ import sys import subprocess import unittest import numpy as np + sys.path.append("..") from op_test import OpTest import paddle @@ -40,6 +41,7 @@ def output_hist(out): class TestMLUUniformRandomOp(OpTest): + def setUp(self): self.set_mlu() self.op_type = "uniform_random" @@ -69,12 +71,12 @@ class TestMLUUniformRandomOp(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestMLUUniformRandomOpSelectedRows(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_mlu(): @@ -89,19 +91,17 @@ class TestMLUUniformRandomOpSelectedRows(unittest.TestCase): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.seed(10) - op = Operator( - "uniform_random", - Out="X", - shape=[1000, 784], - min=-5.0, - max=10.0, - seed=10) + op = Operator("uniform_random", + Out="X", + shape=[1000, 784], + min=-5.0, + max=10.0, + seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze2_op_mlu.py index 0ed5eb7e8a9..0dc498bf6e9 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze2_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze2_op_mlu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -27,6 +28,7 @@ paddle.enable_static() # Correct: General. class TestUnsqueezeOp(OpTest): + def setUp(self): self.init_test_case() self.set_mlu() @@ -59,6 +61,7 @@ class TestUnsqueezeOp(OpTest): # Correct: Single input index. class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -67,6 +70,7 @@ class TestUnsqueezeOp1(TestUnsqueezeOp): # Correct: Mixed input axis. class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -75,6 +79,7 @@ class TestUnsqueezeOp2(TestUnsqueezeOp): # Correct: There is duplicated axis. class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -83,6 +88,7 @@ class TestUnsqueezeOp3(TestUnsqueezeOp): # Correct: Reversed axes. class TestUnsqueezeOp4(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -91,6 +97,7 @@ class TestUnsqueezeOp4(TestUnsqueezeOp): # axes is a list(with tensor) class TestUnsqueezeOp_AxesTensorList(OpTest): + def setUp(self): self.init_test_case() self.set_mlu() @@ -131,6 +138,7 @@ class TestUnsqueezeOp_AxesTensorList(OpTest): class TestUnsqueezeOp1_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -138,6 +146,7 @@ class TestUnsqueezeOp1_AxesTensorList(TestUnsqueezeOp_AxesTensorList): class TestUnsqueezeOp2_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -145,6 +154,7 @@ class TestUnsqueezeOp2_AxesTensorList(TestUnsqueezeOp_AxesTensorList): class TestUnsqueezeOp3_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -152,6 +162,7 @@ class TestUnsqueezeOp3_AxesTensorList(TestUnsqueezeOp_AxesTensorList): class TestUnsqueezeOp4_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -160,6 +171,7 @@ class TestUnsqueezeOp4_AxesTensorList(TestUnsqueezeOp_AxesTensorList): # axes is a Tensor class TestUnsqueezeOp_AxesTensor(OpTest): + def setUp(self): self.init_test_case() self.set_mlu() @@ -195,6 +207,7 @@ class TestUnsqueezeOp_AxesTensor(OpTest): class TestUnsqueezeOp1_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -202,6 +215,7 @@ class TestUnsqueezeOp1_AxesTensor(TestUnsqueezeOp_AxesTensor): class TestUnsqueezeOp2_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -209,6 +223,7 @@ class TestUnsqueezeOp2_AxesTensor(TestUnsqueezeOp_AxesTensor): class TestUnsqueezeOp3_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -216,6 +231,7 @@ class TestUnsqueezeOp3_AxesTensor(TestUnsqueezeOp_AxesTensor): class TestUnsqueezeOp4_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze_op_mlu.py index d75a2f4d21a..47ab0c47290 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_unsqueeze_op_mlu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -27,6 +28,7 @@ paddle.enable_static() # Correct: General. class TestUnsqueezeOp(OpTest): + def setUp(self): self.init_test_case() self.set_mlu() @@ -56,6 +58,7 @@ class TestUnsqueezeOp(OpTest): # Correct: Single input index. class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -64,6 +67,7 @@ class TestUnsqueezeOp1(TestUnsqueezeOp): # Correct: Mixed input axis. class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -72,6 +76,7 @@ class TestUnsqueezeOp2(TestUnsqueezeOp): # Correct: There is duplicated axis. class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -80,6 +85,7 @@ class TestUnsqueezeOp3(TestUnsqueezeOp): # Correct: Reversed axes. class TestUnsqueezeOp4(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_unstack_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_unstack_op_mlu.py index a75a6aa1dfc..23ed85926a0 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_unstack_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_unstack_op_mlu.py @@ -16,6 +16,7 @@ from __future__ import print_function import numpy as np import sys + sys.path.append("..") from op_test import OpTest import unittest @@ -25,6 +26,7 @@ paddle.enable_static() class TestUnStackOpBase(OpTest): + def initDefaultParameters(self): self.input_dim = (5, 6, 7) self.axis = 0 @@ -74,21 +76,25 @@ class TestUnStackOpBase(OpTest): class TestStackOp3(TestUnStackOpBase): + def initParameters(self): self.axis = -1 class TestStackOp4(TestUnStackOpBase): + def initParameters(self): self.axis = -3 class TestStackOp5(TestUnStackOpBase): + def initParameters(self): self.axis = 1 class TestStackOp6(TestUnStackOpBase): + def initParameters(self): self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/multi_process.py b/python/paddle/fluid/tests/unittests/multi_process.py index f999ce803a5..fa6b7200f32 100644 --- a/python/paddle/fluid/tests/unittests/multi_process.py +++ b/python/paddle/fluid/tests/unittests/multi_process.py @@ -44,7 +44,7 @@ def train_abort(prefix): if trainer_id == 0: try: - # train abort + # train abort exit(1) except SystemExit: name = "abort>>> selected_gpus:{} worker_endpoints:{} trainers_num:{} current_endpoint:{} trainer_id:{}"\ diff --git a/python/paddle/fluid/tests/unittests/my_data_generator.py b/python/paddle/fluid/tests/unittests/my_data_generator.py index ac906b32561..00fd636467c 100644 --- a/python/paddle/fluid/tests/unittests/my_data_generator.py +++ b/python/paddle/fluid/tests/unittests/my_data_generator.py @@ -22,7 +22,9 @@ import paddle.distributed.fleet as fleet class MyDataset(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): elements = line.strip().split()[0:] output = [("show", [int(elements[0])]), diff --git a/python/paddle/fluid/tests/unittests/new_group.py b/python/paddle/fluid/tests/unittests/new_group.py index c9c4acc3220..56ef510c304 100644 --- a/python/paddle/fluid/tests/unittests/new_group.py +++ b/python/paddle/fluid/tests/unittests/new_group.py @@ -18,6 +18,7 @@ import paddle class TestNewGroupAPI(object): + def __init__(self): paddle.distributed.init_parallel_env() d1 = np.array([1, 2, 3]) @@ -32,26 +33,27 @@ class TestNewGroupAPI(object): tmp = np.array([0, 0, 0]) result = paddle.to_tensor(tmp) - paddle.distributed.scatter( - result, [self.tensor2, self.tensor1], - src=0, - group=gp, - use_calc_stream=True) + paddle.distributed.scatter(result, [self.tensor2, self.tensor1], + src=0, + group=gp, + use_calc_stream=True) if gp.rank == 0: assert np.array_equal(result, self.tensor2) elif gp.rank == 1: assert np.array_equal(result, self.tensor1) print("test scatter api ok") - paddle.distributed.broadcast( - result, src=1, group=gp, use_calc_stream=True) + paddle.distributed.broadcast(result, + src=1, + group=gp, + use_calc_stream=True) assert np.array_equal(result, self.tensor1) print("test broadcast api ok") paddle.distributed.reduce(result, dst=0, group=gp, use_calc_stream=True) if gp.rank == 0: - assert np.array_equal(result, - paddle.add(self.tensor1, self.tensor1)) + assert np.array_equal(result, paddle.add(self.tensor1, + self.tensor1)) elif gp.rank == 1: assert np.array_equal(result, self.tensor1) print("test reduce api ok") @@ -67,8 +69,10 @@ class TestNewGroupAPI(object): print("test wait api ok") result = [] - paddle.distributed.all_gather( - result, self.tensor1, group=gp, use_calc_stream=True) + paddle.distributed.all_gather(result, + self.tensor1, + group=gp, + use_calc_stream=True) assert np.array_equal(result[0], self.tensor1) assert np.array_equal(result[1], self.tensor1) print("test all_gather api ok") diff --git a/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py b/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py index a85bd4fccc3..d5e8a5dd55a 100644 --- a/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py @@ -38,6 +38,7 @@ paddle.enable_static() class TestCollectiveIdentity(TestCollectiveRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -45,20 +46,22 @@ class TestCollectiveIdentity(TestCollectiveRunnerBase): ring_id = 0 nranks = 2 with fluid.program_guard(main_prog, startup_program): - tindata = layers.data( - name="tindata", shape=[10, 1000], dtype='float32') + tindata = layers.data(name="tindata", + shape=[10, 1000], + dtype='float32') toutdata = main_prog.current_block().create_var( name="outofgather", dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, persistable=False, stop_gradient=False) - main_prog.global_block().append_op( - type="c_identity", - inputs={'X': tindata}, - outputs={'Out': toutdata}, - attrs={'ring_id': ring_id, - 'nranks': nranks}) + main_prog.global_block().append_op(type="c_identity", + inputs={'X': tindata}, + outputs={'Out': toutdata}, + attrs={ + 'ring_id': ring_id, + 'nranks': nranks + }) return toutdata diff --git a/python/paddle/fluid/tests/unittests/npu/process_group_hccl.py b/python/paddle/fluid/tests/unittests/npu/process_group_hccl.py index 37a24885be1..88ab49ea9ad 100644 --- a/python/paddle/fluid/tests/unittests/npu/process_group_hccl.py +++ b/python/paddle/fluid/tests/unittests/npu/process_group_hccl.py @@ -39,6 +39,7 @@ def init_process_group(strategy=None): class TestProcessGroupFp32(unittest.TestCase): + def setUp(self): paddle.seed(2022) random.seed(2022) @@ -234,6 +235,7 @@ class TestProcessGroupFp32(unittest.TestCase): class TestProcessGroupFp16(TestProcessGroupFp32): + def setUp(self): paddle.seed(2022) random.seed(2022) diff --git a/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py index 361efebce91..dd2868af0fe 100644 --- a/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py @@ -18,6 +18,7 @@ import numpy as np import argparse import os import sys + sys.path.append("..") import signal import time @@ -43,6 +44,7 @@ paddle.enable_static() class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -66,11 +68,10 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase): use_cudnn = False with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - data = fluid.layers.data( - name='input', - shape=self.dshape, - dtype=self.dtype, - append_batch_size=False) + data = fluid.layers.data(name='input', + shape=self.dshape, + dtype=self.dtype, + append_batch_size=False) conv = fluid.layers.conv2d( input=data, num_filters=32, diff --git a/python/paddle/fluid/tests/unittests/npu/test_abs_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_abs_op_npu.py index 3c16a24b331..48a07610983 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_abs_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_abs_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ from __future__ import print_function, division import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestNPUAbs(OpTest): + def setUp(self): self.op_type = "abs" self.set_npu() diff --git a/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py index 0f55c8b5914..8c7d6fcfb3e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestAccuracy(OpTest): + def setUp(self): self.op_type = "accuracy" self.set_npu() @@ -62,6 +64,7 @@ class TestAccuracy(OpTest): class TestAccuracy2(TestAccuracy): + def setUp(self): self.op_type = "accuracy" self.set_npu() @@ -86,6 +89,7 @@ class TestAccuracy2(TestAccuracy): class TestAccuracyType(TestAccuracy): + def setUp(self): self.op_type = "accuracy" self.set_npu() @@ -110,6 +114,7 @@ class TestAccuracyType(TestAccuracy): class TestAccuracyType2(TestAccuracy): + def setUp(self): self.op_type = "accuracy" self.set_npu() diff --git a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py index 4899938766f..92cd3025b07 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py @@ -15,6 +15,7 @@ import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestAdam(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -78,6 +80,7 @@ class TestAdam(OpTest): class TestAdamWithEpsilonTensor(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -132,6 +135,7 @@ class TestAdamWithEpsilonTensor(OpTest): class TestAdamOpWithSkipUpdate(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -184,6 +188,7 @@ class TestAdamOpWithSkipUpdate(OpTest): class TestAdamOpWithGlobalBetaPow(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -241,6 +246,7 @@ class TestAdamOpWithGlobalBetaPow(OpTest): class TestNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -255,8 +261,9 @@ class TestNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -280,12 +287,13 @@ class TestNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) @@ -301,6 +309,7 @@ class TestNet(unittest.TestCase): class TestNetWithEpsilonTensor(unittest.TestCase): + def _test(self, place, use_tensor=True, @@ -331,8 +340,9 @@ class TestNetWithEpsilonTensor(unittest.TestCase): with paddle.utils.unique_name.guard(): a = paddle.static.data(name="a", shape=[2, 2], dtype='float32') b = paddle.static.data(name="b", shape=[2, 2], dtype='float32') - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -378,12 +388,11 @@ class TestNetWithEpsilonTensor(unittest.TestCase): align_size=256, grad_clip=clip) else: - adam = paddle.optimizer.Adam( - learning_rate=0.01, - beta1=beta1, - beta2=beta2, - epsilon=epsilon, - grad_clip=clip) + adam = paddle.optimizer.Adam(learning_rate=0.01, + beta1=beta1, + beta2=beta2, + epsilon=epsilon, + grad_clip=clip) else: if use_fluid_api: adam = fluid.optimizer.Adam( @@ -396,12 +405,11 @@ class TestNetWithEpsilonTensor(unittest.TestCase): align_size=256, grad_clip=clip) else: - adam = fluid.optimizer.Adam( - learning_rate=0.01, - beta1=beta1_init, - beta2=beta2_init, - epsilon=epsilon_init, - grad_clip=clip) + adam = fluid.optimizer.Adam(learning_rate=0.01, + beta1=beta1_init, + beta2=beta2_init, + epsilon=epsilon_init, + grad_clip=clip) adam.minimize(loss) @@ -412,12 +420,13 @@ class TestNetWithEpsilonTensor(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(10): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) paddle.disable_static() @@ -431,9 +440,10 @@ class TestNetWithEpsilonTensor(unittest.TestCase): for use_fluid_api in [True, False]: for use_global_beta_pow in [True, False]: for flatten_param_grads in [True, False]: - pred, loss = self._test( - place, use_tensor, use_fluid_api, - use_global_beta_pow, flatten_param_grads) + pred, loss = self._test(place, use_tensor, + use_fluid_api, + use_global_beta_pow, + flatten_param_grads) preds.append(pred) losses.append(loss) for pred in preds: diff --git a/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py index 78ee572d11f..8a0966339e8 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py @@ -15,6 +15,7 @@ import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestAdamW(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -84,6 +86,7 @@ class TestAdamW(OpTest): class TestAdamOpWithSkipUpdate(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -136,6 +139,7 @@ class TestAdamOpWithSkipUpdate(OpTest): class TestAdamOpWithoutDecay(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -188,6 +192,7 @@ class TestAdamOpWithoutDecay(OpTest): class TestNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -202,8 +207,9 @@ class TestNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -227,12 +233,13 @@ class TestNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py index 604eb32db0a..d67b1084579 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestCheckFiniteAndUnscale(unittest.TestCase): + def get_prog(self): paddle.enable_static() main_program = paddle.static.Program() @@ -33,8 +35,9 @@ class TestCheckFiniteAndUnscale(unittest.TestCase): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') scale = paddle.static.data(name="scale", shape=[1], dtype='float32') - float_status = paddle.static.data( - name="status", shape=[8], dtype='float32') + float_status = paddle.static.data(name="status", + shape=[8], + dtype='float32') main_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}) @@ -43,8 +46,9 @@ class TestCheckFiniteAndUnscale(unittest.TestCase): inputs={"FloatStatus": float_status}, outputs={"FloatStatusOut": float_status}) c = paddle.fluid.layers.elementwise_div(a, b) - out, found_inf = check_finite_and_unscale( - [c], scale, float_status=float_status) + out, found_inf = check_finite_and_unscale([c], + scale, + float_status=float_status) return main_program, out, found_inf, float_status @@ -54,9 +58,11 @@ class TestCheckFiniteAndUnscale(unittest.TestCase): exe = fluid.Executor(place) out_, founf_inf_, float_status_ = exe.run( main_program, - feed={"a": a, - "b": b, - "scale": scale}, + feed={ + "a": a, + "b": b, + "scale": scale + }, fetch_list=[out, found_inf, float_status]) print(float_status_) return out_, founf_inf_ @@ -94,6 +100,7 @@ class TestCheckFiniteAndUnscale(unittest.TestCase): class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): + def get_prog(self): paddle.enable_static() main_program = paddle.static.Program() @@ -101,8 +108,9 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') scale = paddle.static.data(name="scale", shape=[1], dtype='float32') - float_status = paddle.static.data( - name="status", shape=[8], dtype='float32') + float_status = paddle.static.data(name="status", + shape=[8], + dtype='float32') main_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}) @@ -111,8 +119,9 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): inputs={"FloatStatus": float_status}, outputs={"FloatStatusOut": float_status}) c = paddle.fluid.layers.elementwise_div(a, b) - out, found_inf = check_finite_and_unscale( - [c], scale, float_status=float_status) + out, found_inf = check_finite_and_unscale([c], + scale, + float_status=float_status) main_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}) @@ -121,8 +130,9 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): inputs={"FloatStatus": float_status}, outputs={"FloatStatusOut": float_status}) d = paddle.fluid.layers.elementwise_add(a, b) - out, found_inf = check_finite_and_unscale( - [d], scale, float_status=float_status) + out, found_inf = check_finite_and_unscale([d], + scale, + float_status=float_status) return main_program, out, found_inf, float_status @@ -132,9 +142,11 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): exe = fluid.Executor(place) out_, founf_inf_, float_status_ = exe.run( main_program, - feed={"a": a, - "b": b, - "scale": scale}, + feed={ + "a": a, + "b": b, + "scale": scale + }, fetch_list=[out, found_inf, float_status]) print(float_status_) return out_, founf_inf_ diff --git a/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py index c6135383721..12da1794e4c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_arg_max_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ paddle.enable_static() class BaseTestCase(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -52,6 +54,7 @@ class BaseTestCase(OpTest): # test argmax, dtype: float16 class TestArgMaxFloat16Case1(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -60,6 +63,7 @@ class TestArgMaxFloat16Case1(BaseTestCase): class TestArgMaxFloat16Case2(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -68,6 +72,7 @@ class TestArgMaxFloat16Case2(BaseTestCase): class TestArgMaxFloat16Case3(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -76,6 +81,7 @@ class TestArgMaxFloat16Case3(BaseTestCase): class TestArgMaxFloat16Case4(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -84,6 +90,7 @@ class TestArgMaxFloat16Case4(BaseTestCase): class TestArgMaxFloat16Case5(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -92,6 +99,7 @@ class TestArgMaxFloat16Case5(BaseTestCase): class TestArgMaxFloat16Case6(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -100,6 +108,7 @@ class TestArgMaxFloat16Case6(BaseTestCase): class TestArgMaxFloat16Case7(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -108,6 +117,7 @@ class TestArgMaxFloat16Case7(BaseTestCase): class TestArgMaxFloat16Case8(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (1, ) @@ -116,6 +126,7 @@ class TestArgMaxFloat16Case8(BaseTestCase): class TestArgMaxFloat16Case9(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (2, ) @@ -124,6 +135,7 @@ class TestArgMaxFloat16Case9(BaseTestCase): class TestArgMaxFloat16Case10(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, ) @@ -133,6 +145,7 @@ class TestArgMaxFloat16Case10(BaseTestCase): # test argmax, dtype: float32 class TestArgMaxFloat32Case1(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -141,6 +154,7 @@ class TestArgMaxFloat32Case1(BaseTestCase): class TestArgMaxFloat32Case2(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -149,6 +163,7 @@ class TestArgMaxFloat32Case2(BaseTestCase): class TestArgMaxFloat32Case3(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -157,6 +172,7 @@ class TestArgMaxFloat32Case3(BaseTestCase): class TestArgMaxFloat32Case4(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -165,6 +181,7 @@ class TestArgMaxFloat32Case4(BaseTestCase): class TestArgMaxFloat32Case5(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -173,6 +190,7 @@ class TestArgMaxFloat32Case5(BaseTestCase): class TestArgMaxFloat32Case6(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -181,6 +199,7 @@ class TestArgMaxFloat32Case6(BaseTestCase): class TestArgMaxFloat32Case7(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -189,6 +208,7 @@ class TestArgMaxFloat32Case7(BaseTestCase): class TestArgMaxFloat32Case8(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (1, ) @@ -197,6 +217,7 @@ class TestArgMaxFloat32Case8(BaseTestCase): class TestArgMaxFloat32Case9(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (2, ) @@ -205,6 +226,7 @@ class TestArgMaxFloat32Case9(BaseTestCase): class TestArgMaxFloat32Case10(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, ) @@ -213,6 +235,7 @@ class TestArgMaxFloat32Case10(BaseTestCase): class BaseTestComplex1_1(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -233,8 +256,7 @@ class BaseTestComplex1_1(OpTest): 'dtype': int(core.VarDesc.VarType.INT32) } self.outputs = { - 'Out': np.argmax( - self.x, axis=self.axis).astype("int32") + 'Out': np.argmax(self.x, axis=self.axis).astype("int32") } def test_check_output(self): @@ -242,6 +264,7 @@ class BaseTestComplex1_1(OpTest): class BaseTestComplex1_2(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -262,8 +285,7 @@ class BaseTestComplex1_2(OpTest): 'dtype': int(core.VarDesc.VarType.INT32) } self.outputs = { - 'Out': np.argmax( - self.x, axis=self.axis).astype("int32") + 'Out': np.argmax(self.x, axis=self.axis).astype("int32") } def test_check_output(self): @@ -271,6 +293,7 @@ class BaseTestComplex1_2(OpTest): class TestArgMaxAPI(unittest.TestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.dtype = 'float32' @@ -282,6 +305,7 @@ class TestArgMaxAPI(unittest.TestCase): self.place = [paddle.NPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -289,8 +313,8 @@ class TestArgMaxAPI(unittest.TestCase): tensor_input = paddle.to_tensor(numpy_input) numpy_output = np.argmax(numpy_input, axis=self.axis) paddle_output = paddle.argmax(tensor_input, axis=self.axis) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) paddle.enable_static() for place in self.place: @@ -298,6 +322,7 @@ class TestArgMaxAPI(unittest.TestCase): class TestArgMaxAPI_2(unittest.TestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.dtype = 'float32' @@ -310,17 +335,19 @@ class TestArgMaxAPI_2(unittest.TestCase): self.place = [paddle.NPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) numpy_input = (np.random.random(self.dims)).astype(self.dtype) tensor_input = paddle.to_tensor(numpy_input) - numpy_output = np.argmax( - numpy_input, axis=self.axis).reshape(1, 4, 5) - paddle_output = paddle.argmax( - tensor_input, axis=self.axis, keepdim=self.keep_dims) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + numpy_output = np.argmax(numpy_input, + axis=self.axis).reshape(1, 4, 5) + paddle_output = paddle.argmax(tensor_input, + axis=self.axis, + keepdim=self.keep_dims) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() @@ -329,6 +356,7 @@ class TestArgMaxAPI_2(unittest.TestCase): class TestArgMaxAPI_3(unittest.TestCase): + def initTestCase(self): self.dims = (1, 9) self.dtype = 'float32' @@ -339,6 +367,7 @@ class TestArgMaxAPI_3(unittest.TestCase): self.place = [paddle.NPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -346,8 +375,8 @@ class TestArgMaxAPI_3(unittest.TestCase): tensor_input = paddle.to_tensor(numpy_input) numpy_output = np.argmax(numpy_input).reshape([1]) paddle_output = paddle.argmax(tensor_input) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/npu/test_arg_min_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_arg_min_op_npu.py index 455f92b8ed6..b129c673c32 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_arg_min_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_arg_min_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class BaseTestCase(OpTest): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -51,6 +53,7 @@ class BaseTestCase(OpTest): # test argmin, dtype: float16 class TestArgMinFloat16Case1(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -59,6 +62,7 @@ class TestArgMinFloat16Case1(BaseTestCase): class TestArgMinFloat16Case2(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -67,6 +71,7 @@ class TestArgMinFloat16Case2(BaseTestCase): class TestArgMinFloat16Case3(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -75,6 +80,7 @@ class TestArgMinFloat16Case3(BaseTestCase): class TestArgMinFloat16Case4(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -83,6 +89,7 @@ class TestArgMinFloat16Case4(BaseTestCase): class TestArgMinFloat16Case5(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -91,6 +98,7 @@ class TestArgMinFloat16Case5(BaseTestCase): class TestArgMinFloat16Case6(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -99,6 +107,7 @@ class TestArgMinFloat16Case6(BaseTestCase): class TestArgMinFloat16Case7(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -107,6 +116,7 @@ class TestArgMinFloat16Case7(BaseTestCase): class TestArgMinFloat16Case8(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (1, ) @@ -115,6 +125,7 @@ class TestArgMinFloat16Case8(BaseTestCase): class TestArgMinFloat16Case9(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (2, ) @@ -123,6 +134,7 @@ class TestArgMinFloat16Case9(BaseTestCase): class TestArgMinFloat16Case10(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, ) @@ -132,6 +144,7 @@ class TestArgMinFloat16Case10(BaseTestCase): # test argmin, dtype: float32 class TestArgMinFloat32Case1(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -140,6 +153,7 @@ class TestArgMinFloat32Case1(BaseTestCase): class TestArgMinFloat32Case2(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -148,6 +162,7 @@ class TestArgMinFloat32Case2(BaseTestCase): class TestArgMinFloat32Case3(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -156,6 +171,7 @@ class TestArgMinFloat32Case3(BaseTestCase): class TestArgMinFloat32Case4(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -164,6 +180,7 @@ class TestArgMinFloat32Case4(BaseTestCase): class TestArgMinFloat32Case5(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -172,6 +189,7 @@ class TestArgMinFloat32Case5(BaseTestCase): class TestArgMinFloat32Case6(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -180,6 +198,7 @@ class TestArgMinFloat32Case6(BaseTestCase): class TestArgMinFloat32Case7(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -188,6 +207,7 @@ class TestArgMinFloat32Case7(BaseTestCase): class TestArgMinFloat32Case8(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (1, ) @@ -196,6 +216,7 @@ class TestArgMinFloat32Case8(BaseTestCase): class TestArgMinFloat32Case9(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (2, ) @@ -204,6 +225,7 @@ class TestArgMinFloat32Case9(BaseTestCase): class TestArgMinFloat32Case10(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, ) @@ -212,6 +234,7 @@ class TestArgMinFloat32Case10(BaseTestCase): class TestArgMinAPI(unittest.TestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.dtype = 'float32' @@ -223,6 +246,7 @@ class TestArgMinAPI(unittest.TestCase): self.place = [paddle.NPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -230,8 +254,8 @@ class TestArgMinAPI(unittest.TestCase): tensor_input = paddle.to_tensor(numpy_input) numpy_output = np.argmin(numpy_input, axis=self.axis) paddle_output = paddle.argmin(tensor_input, axis=self.axis) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) paddle.enable_static() for place in self.place: @@ -239,6 +263,7 @@ class TestArgMinAPI(unittest.TestCase): class TestArgMaxAPI_2(unittest.TestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.dtype = 'float32' @@ -251,17 +276,19 @@ class TestArgMaxAPI_2(unittest.TestCase): self.place = [paddle.NPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) numpy_input = (np.random.random(self.dims)).astype(self.dtype) tensor_input = paddle.to_tensor(numpy_input) - numpy_output = np.argmin( - numpy_input, axis=self.axis).reshape(1, 4, 5) - paddle_output = paddle.argmin( - tensor_input, axis=self.axis, keepdim=self.keep_dims) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + numpy_output = np.argmin(numpy_input, + axis=self.axis).reshape(1, 4, 5) + paddle_output = paddle.argmin(tensor_input, + axis=self.axis, + keepdim=self.keep_dims) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py index ebabea93dd0..59a5f35c99e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_argsort_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -32,6 +33,7 @@ paddle.enable_static() class TestArgsortOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "argsort" @@ -50,11 +52,9 @@ class TestArgsortOp(OpTest): def get_output(self): if self.descending: self.indices = np.flip( - np.argsort( - self.x, kind='heapsort', axis=self.axis), self.axis) + np.argsort(self.x, kind='heapsort', axis=self.axis), self.axis) self.sorted_x = np.flip( - np.sort( - self.x, kind='heapsort', axis=self.axis), self.axis) + np.sort(self.x, kind='heapsort', axis=self.axis), self.axis) else: self.indices = np.argsort(self.x, kind='heapsort', axis=self.axis) self.sorted_x = np.sort(self.x, kind='heapsort', axis=self.axis) @@ -80,68 +80,80 @@ class TestArgsortOp(OpTest): class TestArgsortOpAxis0NPU(TestArgsortOp): + def init_axis(self): self.axis = 0 class TestArgsortOpAxis1NPU(TestArgsortOp): + def init_axis(self): self.axis = 1 class TestArgsortOpAxis2NPU(TestArgsortOp): + def init_axis(self): self.axis = 2 class TestArgsortOpAxisNeg1NPU(TestArgsortOp): + def init_axis(self): self.axis = -1 class TestArgsortOpAxisNeg2NPU(TestArgsortOp): + def init_axis(self): self.axis = -2 class TestArgsortOpDescendingAxisNPU(TestArgsortOp): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis0NPU(TestArgsortOpAxis0NPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis1NPU(TestArgsortOpAxis1NPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis2NPU(TestArgsortOpAxis2NPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg1NPU(TestArgsortOpAxisNeg1NPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg2NPU(TestArgsortOpAxisNeg2NPU): + def init_direction(self): self.descending = True -# liurui25: argsort of npu has bug with type fp32, -# it will change the type from fp32 to fp16, +# liurui25: argsort of npu has bug with type fp32, +# it will change the type from fp32 to fp16, # so the check_output_with_place add thw atol # this test is only used to test the grad # issue: https://gitee.com/ascend/modelzoo/issues/I44I7K class TestArgsortOpAxis0NPUFP32(TestArgsortOp): + def init_axis(self): self.axis = 0 @@ -155,62 +167,74 @@ class TestArgsortOpAxis0NPUFP32(TestArgsortOp): self.__class__.use_npu = True def test_check_grad(self): - self.check_grad_with_place( - self.place, ["X"], "Out", max_relative_error=0.03) + self.check_grad_with_place(self.place, ["X"], + "Out", + max_relative_error=0.03) class TestArgsortOpAxis1NPUFP32(TestArgsortOpAxis0NPUFP32): + def init_axis(self): self.axis = 1 class TestArgsortOpAxis2NPUFP32(TestArgsortOpAxis0NPUFP32): + def init_axis(self): self.axis = 2 class TestArgsortOpAxisNeg1NPUFP32(TestArgsortOpAxis0NPUFP32): + def init_axis(self): self.axis = -1 class TestArgsortOpAxisNeg2NPUFP32(TestArgsortOpAxis0NPUFP32): + def init_axis(self): self.axis = -2 class TestArgsortOpDescendingAxisNPUFP32(TestArgsortOpAxis0NPUFP32): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis0NPUFP32(TestArgsortOpAxis0NPUFP32): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis1NPUFP32(TestArgsortOpAxis1NPUFP32): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis2NPUFP32(TestArgsortOpAxis2NPUFP32): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg1NPUFP32(TestArgsortOpAxisNeg1NPUFP32): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg2NPUFP32(TestArgsortOpAxisNeg2NPUFP32): + def init_direction(self): self.descending = True # test cases for int64 class TestArgsortOpAxis0NPUINT64(TestArgsortOp): + def setUp(self): self.set_npu() self.op_type = "argsort" @@ -220,9 +244,10 @@ class TestArgsortOpAxis0NPUINT64(TestArgsortOp): self.init_axis() self.init_direction() - self.x = np.random.randint( - low=-100, high=100, size=self.input_shape, - dtype=self.dtype).astype(self.dtype) + self.x = np.random.randint(low=-100, + high=100, + size=self.input_shape, + dtype=self.dtype).astype(self.dtype) self.inputs = {"X": self.x} self.attrs = {"axis": self.axis, "descending": self.descending} self.get_output() @@ -242,51 +267,61 @@ class TestArgsortOpAxis0NPUINT64(TestArgsortOp): class TestArgsortOpAxis1NPUINT64(TestArgsortOpAxis0NPUINT64): + def init_axis(self): self.axis = 1 class TestArgsortOpAxis2NPUINT64(TestArgsortOpAxis0NPUINT64): + def init_axis(self): self.axis = 2 class TestArgsortOpAxisNeg1NPUINT64(TestArgsortOpAxis0NPUINT64): + def init_axis(self): self.axis = -1 class TestArgsortOpAxisNeg2NPUINT64(TestArgsortOpAxis0NPUINT64): + def init_axis(self): self.axis = -2 class TestArgsortOpDescendingAxisNPUINT64(TestArgsortOpAxis0NPUINT64): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis0NPUINT64(TestArgsortOpAxis0NPUINT64): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis1NPUINT64(TestArgsortOpAxis1NPUINT64): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis2NPUINT64(TestArgsortOpAxis2NPUINT64): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg1NPUINT64(TestArgsortOpAxisNeg1NPUINT64): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg2NPUINT64(TestArgsortOpAxisNeg2NPUINT64): + def init_direction(self): self.descending = True diff --git a/python/paddle/fluid/tests/unittests/npu/test_assign_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_assign_op_npu.py index 14133d5a385..a070a63e7ed 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_assign_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_assign_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestAssign(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py index 71d4b45e61b..808996d355f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy import sys + sys.path.append("..") import op_test @@ -30,6 +31,7 @@ numpy.random.seed(2021) class TestAssignValueNPUOp(op_test.OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -56,31 +58,35 @@ class TestAssignValueNPUOp(op_test.OpTest): class TestAssignValueNPUOp2(TestAssignValueNPUOp): + def init_data(self): self.value = numpy.random.random(size=(2, 5)).astype(numpy.int32) self.attrs["int32_values"] = [int(v) for v in self.value.flat] class TestAssignValueNPUOp3(TestAssignValueNPUOp): + def init_data(self): self.value = numpy.random.random(size=(2, 5)).astype(numpy.int64) self.attrs["int64_values"] = [int(v) for v in self.value.flat] class TestAssignValueNPUOp4(TestAssignValueNPUOp): + def init_data(self): - self.value = numpy.random.choice( - a=[False, True], size=(2, 5)).astype(numpy.bool) + self.value = numpy.random.choice(a=[False, True], + size=(2, 5)).astype(numpy.bool) self.attrs["bool_values"] = [int(v) for v in self.value.flat] class TestAssignApi(unittest.TestCase): + def setUp(self): self.init_dtype() - self.value = ( - -100 + 200 * numpy.random.random(size=(2, 5))).astype(self.dtype) - self.place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( - ) else fluid.CPUPlace() + self.value = (-100 + 200 * numpy.random.random(size=(2, 5))).astype( + self.dtype) + self.place = fluid.NPUPlace( + 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() def init_dtype(self): self.dtype = "float32" @@ -93,29 +99,31 @@ class TestAssignApi(unittest.TestCase): exe = fluid.Executor(self.place) [fetched_x] = exe.run(main_program, feed={}, fetch_list=[x]) - self.assertTrue( - numpy.array_equal(fetched_x, self.value), - "fetch_x=%s val=%s" % (fetched_x, self.value)) + self.assertTrue(numpy.array_equal(fetched_x, self.value), + "fetch_x=%s val=%s" % (fetched_x, self.value)) self.assertEqual(fetched_x.dtype, self.value.dtype) class TestAssignApi2(TestAssignApi): + def init_dtype(self): self.dtype = "int32" class TestAssignApi3(TestAssignApi): + def init_dtype(self): self.dtype = "int64" class TestAssignApi4(TestAssignApi): + def setUp(self): self.init_dtype() - self.value = numpy.random.choice( - a=[False, True], size=(2, 5)).astype(numpy.bool) - self.place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( - ) else fluid.CPUPlace() + self.value = numpy.random.choice(a=[False, True], + size=(2, 5)).astype(numpy.bool) + self.place = fluid.NPUPlace( + 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() def init_dtype(self): self.dtype = "bool" diff --git a/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py index a18b8a03075..b06481d3f73 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 1024 class TestAtan(OpTest): + def setUp(self): self.set_npu() self.op_type = "atan" @@ -74,11 +76,13 @@ class TestAtan(OpTest): class TestAtanShape(TestAtan): + def set_attrs(self): self.shape = [12, 23, 10] class TestAtanFloat16(TestAtan): + def set_attrs(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py index e01b2b691a2..c6b7fada1fb 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle import paddle.fluid.core as core @@ -32,6 +33,7 @@ paddle.enable_static() class TestBatchNormOpInference(unittest.TestCase): + def setUp(self): self.dtype = np.float32 self.init_kernel_type() @@ -81,8 +83,9 @@ class TestBatchNormOpInference(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, dtype="float32", shape=ground_truth[name].shape) + block.create_var(name=name, + dtype="float32", + shape=ground_truth[name].shape) inputs = { "X": block.var("x"), "Scale": block.var("scale"), @@ -106,8 +109,10 @@ class TestBatchNormOpInference(unittest.TestCase): } block.create_var(name="reserve_space", dtype='float32') outputs["ReserveSpace"] = block.var('reserve_space') - bn_op = block.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + bn_op = block.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) program._sync_with_cpp() @@ -132,6 +137,7 @@ class TestBatchNormOpInference(unittest.TestCase): class TestFP16BatchNormOpInference(TestBatchNormOpInference): + def setUp(self): self.dtype = np.float16 self.init_kernel_type() @@ -139,6 +145,7 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): class TestBatchNormOpTraining(unittest.TestCase): + def set_npu(self): self.__class__.use_npu = True @@ -177,8 +184,9 @@ class TestBatchNormOpTraining(unittest.TestCase): variance_out = var_ref * (1. - momentum) + momentum * variance saved_variance = 1. / np.sqrt(var_ref + epsilon) # run backward - x_grad, scale_grad, bias_grad = _reference_grad( - x, y_grad, scale, saved_mean, var_ref, epsilon, data_layout) + x_grad, scale_grad, bias_grad = _reference_grad(x, y_grad, scale, + saved_mean, var_ref, + epsilon, data_layout) return y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad @@ -194,6 +202,7 @@ class TestBatchNormOpTraining(unittest.TestCase): return mean, variance def test_forward_backward(self): + def test_with_place(place, data_layout, shape): # attr epsilon = self.epsilon @@ -246,10 +255,9 @@ class TestBatchNormOpTraining(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = { "X": block.var('x'), "Scale": block.var('scale'), @@ -279,11 +287,10 @@ class TestBatchNormOpTraining(unittest.TestCase): } block.create_var(name="reserve_space", dtype='float32') outputs["ReserveSpace"] = block.var('reserve_space') - bn_op = block.append_op( - type="batch_norm", - inputs=inputs, - outputs=outputs, - attrs=attrs) + bn_op = block.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) block.create_var(name='y@GRAD', dtype=self.dtype, shape=y.shape) # generate backward op_desc @@ -315,8 +322,10 @@ class TestBatchNormOpTraining(unittest.TestCase): for id, name in enumerate(self.fetch_list): if name == 'variance': - self.__assert_close( - var_dict[name], out[id], name, atol=1e-3) + self.__assert_close(var_dict[name], + out[id], + name, + atol=1e-3) continue self.__assert_close(var_dict[name], out[id], name) print("op test forward passed: ", str(place), data_layout) @@ -330,11 +339,13 @@ class TestBatchNormOpTraining(unittest.TestCase): class TestFP16BatchNormOpTraining(TestBatchNormOpTraining): + def init_dtype(self): self.dtype = np.float16 class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -342,6 +353,7 @@ class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): + def init_test_case(self): self.use_momentum_variable = True self.use_global_stats = False @@ -353,6 +365,7 @@ class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = True self.no_grad_set = set() @@ -436,6 +449,7 @@ class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): class TestBatchNormOpFreezeStatsAndScaleBiasTraining( TestBatchNormOpFreezeStatsTraining): + def init_test_case(self): self.use_global_stats = True self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -443,6 +457,7 @@ class TestBatchNormOpFreezeStatsAndScaleBiasTraining( class TestDygraphBatchNormTrainableStats(unittest.TestCase): + def test_dygraph(self): places = [fluid.NPUPlace(0)] for p in places: diff --git a/python/paddle/fluid/tests/unittests/npu/test_bce_loss_npu.py b/python/paddle/fluid/tests/unittests/npu/test_bce_loss_npu.py index 7c3d32647ae..b7a5cd2405e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_bce_loss_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_bce_loss_npu.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest @@ -33,23 +34,27 @@ def test_static_layer(place, prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=input_np.shape, dtype='float32') - label = paddle.fluid.data( - name='label', shape=label_np.shape, dtype='float32') + input = paddle.fluid.data(name='input', + shape=input_np.shape, + dtype='float32') + label = paddle.fluid.data(name='label', + shape=label_np.shape, + dtype='float32') if weight_np is not None: - weight = paddle.fluid.data( - name='weight', shape=weight_np.shape, dtype='float32') - bce_loss = paddle.nn.loss.BCELoss( - weight=weight, reduction=reduction) + weight = paddle.fluid.data(name='weight', + shape=weight_np.shape, + dtype='float32') + bce_loss = paddle.nn.loss.BCELoss(weight=weight, + reduction=reduction) else: bce_loss = paddle.nn.loss.BCELoss(reduction=reduction) res = bce_loss(input, label) exe = paddle.static.Executor(place) static_result = exe.run(prog, - feed={"input": input_np, - "label": label_np} - if weight_np is None else { + feed={ + "input": input_np, + "label": label_np + } if weight_np is None else { "input": input_np, "label": label_np, "weight": weight_np @@ -66,23 +71,30 @@ def test_static_functional(place, prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=input_np.shape, dtype='float32') - label = paddle.fluid.data( - name='label', shape=label_np.shape, dtype='float32') + input = paddle.fluid.data(name='input', + shape=input_np.shape, + dtype='float32') + label = paddle.fluid.data(name='label', + shape=label_np.shape, + dtype='float32') if weight_np is not None: - weight = paddle.fluid.data( - name='weight', shape=weight_np.shape, dtype='float32') - res = paddle.nn.functional.binary_cross_entropy( - input, label, weight=weight, reduction=reduction) + weight = paddle.fluid.data(name='weight', + shape=weight_np.shape, + dtype='float32') + res = paddle.nn.functional.binary_cross_entropy(input, + label, + weight=weight, + reduction=reduction) else: - res = paddle.nn.functional.binary_cross_entropy( - input, label, reduction=reduction) + res = paddle.nn.functional.binary_cross_entropy(input, + label, + reduction=reduction) exe = paddle.static.Executor(place) static_result = exe.run(prog, - feed={"input": input_np, - "label": label_np} - if weight_np is None else { + feed={ + "input": input_np, + "label": label_np + } if weight_np is None else { "input": input_np, "label": label_np, "weight": weight_np @@ -119,11 +131,14 @@ def test_dygraph_functional(place, if weight_np is not None: weight = paddle.to_tensor(weight_np) - dy_res = paddle.nn.functional.binary_cross_entropy( - input, label, weight=weight, reduction=reduction) + dy_res = paddle.nn.functional.binary_cross_entropy(input, + label, + weight=weight, + reduction=reduction) else: - dy_res = paddle.nn.functional.binary_cross_entropy( - input, label, reduction=reduction) + dy_res = paddle.nn.functional.binary_cross_entropy(input, + label, + reduction=reduction) dy_result = dy_res.numpy() paddle.enable_static() return dy_result @@ -148,6 +163,7 @@ def calc_bceloss(input_np, label_np, reduction='mean', weight_np=None): class TestBCELoss(unittest.TestCase): + def test_BCELoss(self): input_np = np.random.uniform(0.1, 0.8, size=(20, 30)).astype(np.float32) label_np = np.random.randint(0, 2, size=(20, 30)).astype(np.float32) @@ -165,8 +181,8 @@ class TestBCELoss(unittest.TestCase): self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static_functional(place, input_np, - label_np, reduction) + static_functional = test_static_functional( + place, input_np, label_np, reduction) dy_functional = test_dygraph_functional(place, input_np, label_np, reduction) self.assertTrue(np.allclose(static_functional, expected)) @@ -174,43 +190,57 @@ class TestBCELoss(unittest.TestCase): self.assertTrue(np.allclose(dy_functional, expected)) def test_BCELoss_weight(self): - input_np = np.random.uniform( - 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float32) - label_np = np.random.randint( - 0, 2, size=(2, 3, 4, 10)).astype(np.float32) + input_np = np.random.uniform(0.1, 0.8, + size=(2, 3, 4, 10)).astype(np.float32) + label_np = np.random.randint(0, 2, + size=(2, 3, 4, 10)).astype(np.float32) weight_np = np.random.random(size=(3, 4, 10)).astype(np.float32) - place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( - ) else fluid.CPUPlace() + place = fluid.NPUPlace( + 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() for reduction in ['sum', 'mean', 'none']: - static_result = test_static_layer( - place, input_np, label_np, reduction, weight_np=weight_np) - dy_result = test_dygraph_layer( - place, input_np, label_np, reduction, weight_np=weight_np) - expected = calc_bceloss( - input_np, label_np, reduction, weight_np=weight_np) + static_result = test_static_layer(place, + input_np, + label_np, + reduction, + weight_np=weight_np) + dy_result = test_dygraph_layer(place, + input_np, + label_np, + reduction, + weight_np=weight_np) + expected = calc_bceloss(input_np, + label_np, + reduction, + weight_np=weight_np) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static_functional( - place, input_np, label_np, reduction, weight_np=weight_np) - dy_functional = test_dygraph_functional( - place, input_np, label_np, reduction, weight_np=weight_np) + static_functional = test_static_functional(place, + input_np, + label_np, + reduction, + weight_np=weight_np) + dy_functional = test_dygraph_functional(place, + input_np, + label_np, + reduction, + weight_np=weight_np) self.assertTrue(np.allclose(static_functional, expected)) self.assertTrue(np.allclose(static_functional, dy_functional)) self.assertTrue(np.allclose(dy_functional, expected)) def test_BCELoss_error(self): paddle.disable_static(paddle.NPUPlace(0)) - self.assertRaises( - ValueError, paddle.nn.loss.BCELoss, reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.loss.BCELoss, + reduction="unsupport reduction") input = paddle.to_tensor([[0.1, 0.3]], dtype='float32') label = paddle.to_tensor([[0.0, 1.0]], dtype='float32') - self.assertRaises( - ValueError, - paddle.nn.functional.binary_cross_entropy, - input=input, - label=label, - reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.functional.binary_cross_entropy, + input=input, + label=label, + reduction="unsupport reduction") paddle.enable_static() @@ -219,6 +249,7 @@ def bce_loss(input, label): class TestBceLossOp(OpTest): + def setUp(self): self.set_npu() self.init_test_case() @@ -245,11 +276,13 @@ class TestBceLossOp(OpTest): class TestBceLossOpCase1(OpTest): + def init_test_cast(self): self.shape = [2, 3, 4, 5] class TestBceLossOpCase2(OpTest): + def init_test_cast(self): self.shape = [2, 3, 20] diff --git a/python/paddle/fluid/tests/unittests/npu/test_beam_search_decode_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_beam_search_decode_op_npu.py index 647bd29ffae..0a45cec0d0c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_beam_search_decode_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_beam_search_decode_op_npu.py @@ -44,37 +44,28 @@ class TestBeamSearchDecodeNPUOp(unittest.TestCase): # beam_size = 2, end_id = 1 # start with start_id [ - self.append_lod_tensor( - array, [[0, 1, 2], [0, 1, 2]], np.array( - [0, 0], dtype=dtype)) + self.append_lod_tensor(array, [[0, 1, 2], [0, 1, 2]], + np.array([0, 0], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 1, 2], [0, 2, 4]], - np.array( - [2, 3, 4, 5], dtype=dtype)) + self.append_lod_tensor(array, [[0, 1, 2], [0, 2, 4]], + np.array([2, 3, 4, 5], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 2, 4], [0, 2, 2, 4, 4]], - np.array( - [3, 1, 5, 4], dtype=dtype)) + self.append_lod_tensor(array, [[0, 2, 4], [0, 2, 2, 4, 4]], + np.array([3, 1, 5, 4], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 2, 4], [0, 1, 2, 3, 4]], - np.array( - [1, 1, 3, 5], dtype=dtype)) + self.append_lod_tensor(array, [[0, 2, 4], [0, 1, 2, 3, 4]], + np.array([1, 1, 3, 5], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 2, 4], [0, 0, 0, 2, 2]], - np.array( - [5, 1], dtype=dtype)) + self.append_lod_tensor(array, [[0, 2, 4], [0, 0, 0, 2, 2]], + np.array([5, 1], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] @@ -90,7 +81,8 @@ class TestBeamSearchDecodeNPUOp(unittest.TestCase): SentenceIds="sentence_ids", SentenceScores="sentence_scores", beam_size=2, - end_id=1, ) + end_id=1, + ) beam_search_decode_op.run(self.scope, self.place) @@ -101,8 +93,8 @@ class TestBeamSearchDecodeNPUOp(unittest.TestCase): expected_data = np.array( [0, 2, 3, 1, 0, 2, 1, 0, 4, 5, 3, 5, 0, 4, 5, 3, 1], "int64") self.assertTrue(np.array_equal(np.array(sentence_ids), expected_data)) - self.assertTrue( - np.array_equal(np.array(sentence_scores), expected_data)) + self.assertTrue(np.array_equal(np.array(sentence_scores), + expected_data)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_beam_search_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_beam_search_op_npu.py index 14e4fbb73fd..dcfa60e7462 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_beam_search_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_beam_search_op_npu.py @@ -16,6 +16,7 @@ from __future__ import print_function import paddle import sys + sys.path.append("..") from op_test import OpTest import unittest @@ -26,6 +27,7 @@ paddle.enable_static() class TestBeamSearchNPUOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -57,19 +59,18 @@ class TestBeamSearchNPUOp(OpTest): self.beam_size = 2 self.is_accumulated = True self.pre_ids = np.array([[1], [2], [3], [4]], dtype='int64') - self.ids = np.array( - [[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64') + self.ids = np.array([[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], + dtype='int64') self.lod = [[2, 2], [1, 1, 1, 1]] self.out_lod = [[2, 2], [1, 1, 1, 1]] self.offset_lod = [[0, 2, 4], [0, 1, 2, 3, 4]] - self.score = np.array( - [ - [0.5, 0.3, 0.2], - [0.6, 0.3, 0.1], - [0.9, 0.5, 0.1], - [0.7, 0.5, 0.1], - ], - dtype='float32') + self.score = np.array([ + [0.5, 0.3, 0.2], + [0.6, 0.3, 0.1], + [0.9, 0.5, 0.1], + [0.7, 0.5, 0.1], + ], + dtype='float32') self.pre_score = np.array([[0.1], [0.2], [0.3], [0.4]], dtype='float32') self.selected_ids = np.array([4, 2, 3, 8])[:, np.newaxis] self.selected_scores = np.array([0.5, 0.6, 0.9, 0.7])[:, np.newaxis] @@ -80,6 +81,7 @@ class TestBeamSearchNPUOp(OpTest): class TestBeamSearchNPUOp2(TestBeamSearchNPUOp): + def init_data(self): self.beam_size = 2 self.is_accumulated = True @@ -88,13 +90,13 @@ class TestBeamSearchNPUOp2(TestBeamSearchNPUOp): self.lod = [[2, 2], [1, 1, 1, 1]] self.out_lod = [[2, 2], [2, 0, 1, 1]] self.offset_lod = [[0, 2, 4], [0, 2, 2, 3, 4]] - self.score = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.1, 0.7], - ], dtype='float32') + self.score = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.1, 0.7], + ], + dtype='float32') self.pre_score = np.array([[0.1], [0.2], [0.3], [0.4]], dtype='float32') self.selected_ids = np.array([4, 2, 3, 1])[:, np.newaxis] self.selected_scores = np.array([0.6, 0.9, 0.9, 0.7])[:, np.newaxis] @@ -102,6 +104,7 @@ class TestBeamSearchNPUOp2(TestBeamSearchNPUOp): class TestBeamSearchNPUOp3(TestBeamSearchNPUOp): + def init_data(self): # end_id = 0 self.beam_size = 2 @@ -111,13 +114,13 @@ class TestBeamSearchNPUOp3(TestBeamSearchNPUOp): self.lod = [[2, 2], [1, 1, 1, 1]] self.out_lod = [[2, 2], [1, 1, 0, 2]] self.offset_lod = [[0, 2, 4], [0, 1, 2, 2, 4]] - self.score = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.6, 0.7], - ], dtype='float32') + self.score = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.6, 0.7], + ], + dtype='float32') self.pre_score = np.array([[0.1], [1.2], [0.5], [0.4]], dtype='float32') self.selected_ids = np.array([2, 0, 8, 1])[:, np.newaxis] self.selected_scores = np.array([0.9, 1.2, 0.6, 0.7])[:, np.newaxis] @@ -125,6 +128,7 @@ class TestBeamSearchNPUOp3(TestBeamSearchNPUOp): class TestBeamSearchNPUOp4(TestBeamSearchNPUOp): + def init_data(self): # is_accumulated = False self.beam_size = 2 @@ -134,21 +138,22 @@ class TestBeamSearchNPUOp4(TestBeamSearchNPUOp): self.lod = [[2, 2], [1, 1, 1, 1]] self.out_lod = [[2, 2], [0, 2, 1, 1]] self.offset_lod = [[0, 2, 4], [0, 0, 2, 3, 4]] - self.score = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.1, 0.7], - ], dtype='float32') + self.score = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.1, 0.7], + ], + dtype='float32') self.pre_score = np.array([[0.1], [2.2], [0.3], [0.4]], dtype='float32') self.selected_ids = np.array([7, 3, 3, 1])[:, np.newaxis] - self.selected_scores = np.array( - [1.50685, 0.996027, 0.194639, 0.043325])[:, np.newaxis] + self.selected_scores = np.array([1.50685, 0.996027, 0.194639, + 0.043325])[:, np.newaxis] self.parent_idx = np.array([1, 1, 2, 3]) class TestBeamSearchNPUOp5(TestBeamSearchNPUOp): + def init_data(self): # beam_size = 1 self.beam_size = 1 @@ -158,13 +163,13 @@ class TestBeamSearchNPUOp5(TestBeamSearchNPUOp): self.lod = [[1, 1, 1, 1], [1, 1, 1, 1]] self.out_lod = [[1, 1, 1, 1], [1, 1, 1, 1]] self.offset_lod = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]] - self.score = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.1, 0.7], - ], dtype='float32') + self.score = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.1, 0.7], + ], + dtype='float32') self.pre_score = np.array([[0.1], [0.2], [0.3], [0.4]], dtype='float32') self.selected_ids = np.array([2, 7, 3, 1])[:, np.newaxis] self.selected_scores = np.array([0.9, 0.5, 0.9, 0.7])[:, np.newaxis] diff --git a/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py index 6da49b8d84d..44cf417228a 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_bilinear_interp_v2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid.core as core @@ -30,6 +31,7 @@ paddle.enable_static() class TestBilinearInterpOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -127,6 +129,7 @@ class TestBilinearInterpOp(OpTest): class TestBilinearInterpCaseFP16(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCaseFP16, self).init_test_case() self.dtype = 'float16' @@ -134,6 +137,7 @@ class TestBilinearInterpCaseFP16(TestBilinearInterpOp): class TestBilinearInterpCase1(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase1, self).init_test_case() self.input_shape = [4, 1, 7, 8] @@ -143,6 +147,7 @@ class TestBilinearInterpCase1(TestBilinearInterpOp): class TestBilinearInterpCase2(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase2, self).init_test_case() self.input_shape = [3, 3, 9, 6] @@ -152,6 +157,7 @@ class TestBilinearInterpCase2(TestBilinearInterpOp): class TestBilinearInterpCase3(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase3, self).init_test_case() self.input_shape = [1, 1, 32, 64] @@ -161,6 +167,7 @@ class TestBilinearInterpCase3(TestBilinearInterpOp): class TestBilinearInterpCase4(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase4, self).init_test_case() self.input_shape = [4, 1, 7, 8] @@ -171,6 +178,7 @@ class TestBilinearInterpCase4(TestBilinearInterpOp): class TestBilinearInterpCase5(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase5, self).init_test_case() self.input_shape = [3, 3, 9, 6] @@ -181,6 +189,7 @@ class TestBilinearInterpCase5(TestBilinearInterpOp): class TestBilinearInterpCase6(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase6, self).init_test_case() self.input_shape = [1, 1, 32, 64] @@ -191,6 +200,7 @@ class TestBilinearInterpCase6(TestBilinearInterpOp): class TestBilinearInterpCase7(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpCase7, self).init_test_case() self.input_shape = [1, 1, 32, 64] @@ -200,6 +210,7 @@ class TestBilinearInterpCase7(TestBilinearInterpOp): class TestBilinearInterpSame(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpSame, self).init_test_case() self.input_shape = [2, 3, 32, 64] @@ -209,6 +220,7 @@ class TestBilinearInterpSame(TestBilinearInterpOp): class TestBilinearInterpActualShape(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpActualShape, self).init_test_case() self.input_shape = [3, 2, 32, 16] @@ -219,6 +231,7 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp): class TestBilinearInterpDataLayout(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpDataLayout, self).init_test_case() self.input_shape = [2, 5, 5, 3] @@ -230,24 +243,28 @@ class TestBilinearInterpDataLayout(TestBilinearInterpOp): class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 1 class TestBilinearInterpWithMethod2(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 0 class TestBilinearInterpWithMethod3(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = True self.align_mode = 0 class TestBilinearInterpScale1(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpScale1, self).init_test_case() self.input_shape = [2, 3, 5, 7] @@ -257,6 +274,7 @@ class TestBilinearInterpScale1(TestBilinearInterpOp): class TestBilinearInterpScale2(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpScale2, self).init_test_case() self.input_shape = [2, 3, 5, 7] @@ -266,6 +284,7 @@ class TestBilinearInterpScale2(TestBilinearInterpOp): class TestBilinearInterpZero(TestBilinearInterpOp): + def init_test_case(self): super(TestBilinearInterpZero, self).init_test_case() self.input_shape = [2, 3, 5, 7] diff --git a/python/paddle/fluid/tests/unittests/npu/test_box_coder_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_box_coder_op_npu.py index 4d4d61ace84..7febcaba45c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_box_coder_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_box_coder_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import math import paddle @@ -41,8 +42,9 @@ def box_decoder(t_box, p_box, pb_v, output_box, norm, axis=0): pb_y = pb_y.reshape(shape) if pb_v.ndim == 2: - var_shape = (1, pb_v.shape[0], pb_v.shape[1]) if axis == 0 else ( - pb_v.shape[0], 1, pb_v.shape[1]) + var_shape = (1, pb_v.shape[0], + pb_v.shape[1]) if axis == 0 else (pb_v.shape[0], 1, + pb_v.shape[1]) pb_v = pb_v.reshape(var_shape) if pb_v.ndim == 1: tb_x = pb_v[0] * t_box[:, :, 0] * pb_w + pb_x @@ -112,6 +114,7 @@ def batch_box_coder(p_box, pb_v, t_box, lod, code_type, norm, axis=0): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestBoxCoderOp(OpTest): + def setUp(self): self.op_type = "box_coder" self.set_npu() @@ -195,9 +198,10 @@ class TestBoxCoderOp(OpTest): self.attrs['axis'] = self.axis def set_outputs(self): - output_box = batch_box_coder( - self.prior_box, self.prior_box_var, self.target_box, self.lod[0], - self.code_type, self.box_normalized, self.axis) + output_box = batch_box_coder(self.prior_box, self.prior_box_var, + self.target_box, self.lod[0], + self.code_type, self.box_normalized, + self.axis) self.outputs = {'OutputBox': output_box.astype(self.dtype)} def test_check_output(self): @@ -205,6 +209,7 @@ class TestBoxCoderOp(OpTest): class TestBoxCoderOpWithoutBoxVar(TestBoxCoderOp): + def set_init_config(self): super(TestBoxCoderOpWithoutBoxVar, self).set_init_config() self.without_prior_box_var = True @@ -212,6 +217,7 @@ class TestBoxCoderOpWithoutBoxVar(TestBoxCoderOp): class TestBoxCoderOpWithLoD(TestBoxCoderOp): + def set_init_config(self): super(TestBoxCoderOpWithLoD, self).set_init_config() self.M = 20 @@ -222,24 +228,28 @@ class TestBoxCoderOpWithLoD(TestBoxCoderOp): class TestBoxCoderOpWithLoDWithVariance(TestBoxCoderOpWithLoD): + def set_init_config(self): super(TestBoxCoderOpWithLoDWithVariance, self).set_init_config() self.use_variance = True class TestBoxCoderOpWithAxis(TestBoxCoderOp): + def set_init_config(self): super(TestBoxCoderOpWithAxis, self).set_init_config() self.axis = 1 class TestBoxCoderOpWithVariance(TestBoxCoderOp): + def set_init_config(self): super(TestBoxCoderOpWithVariance, self).set_init_config() self.use_variance = True class TestBoxCoderOpFP16(TestBoxCoderOp): + def init_dtype(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/npu/test_c_embedding_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_c_embedding_op_npu.py index 533a3fd12fd..586aa513c46 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_c_embedding_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_c_embedding_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/npu/test_c_identity_npu.py b/python/paddle/fluid/tests/unittests/npu/test_c_identity_npu.py index 9ea52a88d98..59a4f6e8cb6 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_c_identity_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_c_identity_npu.py @@ -24,13 +24,15 @@ paddle.enable_static() class TestIdentityOp(TestDistBase): + def _setup_config(self): pass def test_identity(self, col_type="identity"): dist_env = os.environ - self.check_with_place( - "collective_identity_op_npu.py", col_type, need_envs=dist_env) + self.check_with_place("collective_identity_op_npu.py", + col_type, + need_envs=dist_env) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_cast_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_cast_op_npu.py index 0d79d9b0723..7761d2f6ede 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_cast_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_cast_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -29,6 +30,7 @@ SEED = 2021 @skip_check_grad_ci(reason="[skip NPU cast grad check] not implemented yet.") class TestCast1(OpTest): + def setUp(self): self.set_npu() self.op_type = "cast" @@ -52,6 +54,7 @@ class TestCast1(OpTest): @skip_check_grad_ci(reason="[skip NPU cast grad check] not implemented yet.") class TestCast2(OpTest): + def setUp(self): self.set_npu() self.op_type = "cast" @@ -75,6 +78,7 @@ class TestCast2(OpTest): @skip_check_grad_ci(reason="[skip NPU cast grad check] not implemented yet.") class TestCast3(OpTest): + def setUp(self): self.set_npu() self.op_type = "cast" diff --git a/python/paddle/fluid/tests/unittests/npu/test_clip_by_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_clip_by_norm_op_npu.py index d71fc142ade..2af58509874 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_clip_by_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_clip_by_norm_op_npu.py @@ -20,6 +20,7 @@ import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard import sys + sys.path.append("..") from op_test import OpTest @@ -27,6 +28,7 @@ paddle.enable_static() class TestClipByNormOp(OpTest): + def setUp(self): self.set_npu() self.max_relative_error = 0.006 @@ -35,7 +37,9 @@ class TestClipByNormOp(OpTest): input = np.random.random(self.shape).astype(self.dtype) input[np.abs(input) < self.max_relative_error] = 0.5 self.op_type = "clip_by_norm" - self.inputs = {'X': input, } + self.inputs = { + 'X': input, + } self.attrs = {} self.attrs['max_norm'] = self.max_norm norm = np.sqrt(np.sum(np.square(input))) @@ -61,24 +65,28 @@ class TestClipByNormOp(OpTest): class TestCase1(TestClipByNormOp): + def initTestCase(self): self.shape = (100, ) self.max_norm = 1e20 class TestCase2(TestClipByNormOp): + def initTestCase(self): self.shape = (16, 16) self.max_norm = 0.1 class TestCase3(TestClipByNormOp): + def initTestCase(self): self.shape = (4, 8, 16) self.max_norm = 1.0 class TestClipByNormOpFp16(TestClipByNormOp): + def init_dtype(self): self.dtype = np.float16 @@ -87,18 +95,21 @@ class TestClipByNormOpFp16(TestClipByNormOp): class TestClipByNormOpFp16Case1(TestClipByNormOpFp16): + def initTestCase(self): self.shape = (100, ) self.max_norm = 1e20 class TestClipByNormOpFp16Case2(TestClipByNormOpFp16): + def initTestCase(self): self.shape = (16, 16) self.max_norm = 0.1 class TestClipByNormOpFp16Case3(TestClipByNormOpFp16): + def initTestCase(self): self.shape = (4, 8, 16) self.max_norm = 1.0 diff --git a/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py index 65dcc73aa46..cf6af6462d0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py @@ -20,11 +20,13 @@ import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard import sys + sys.path.append("..") from op_test import OpTest class TestClipOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -75,6 +77,7 @@ class TestClipOp(OpTest): class TestCase1(TestClipOp): + def initTestCase(self): self.shape = (8, 16, 8) self.max = 0.7 @@ -82,6 +85,7 @@ class TestCase1(TestClipOp): class TestCase2(TestClipOp): + def initTestCase(self): self.shape = (8, 16) self.max = 1.0 @@ -89,6 +93,7 @@ class TestCase2(TestClipOp): class TestCase3(TestClipOp): + def initTestCase(self): self.shape = (4, 8, 16) self.max = 0.7 @@ -96,6 +101,7 @@ class TestCase3(TestClipOp): class TestCase4(TestClipOp): + def initTestCase(self): self.shape = (4, 8, 8) self.max = 0.7 @@ -105,6 +111,7 @@ class TestCase4(TestClipOp): class TestCase5(TestClipOp): + def initTestCase(self): self.shape = (4, 8, 16) self.max = 0.5 @@ -112,6 +119,7 @@ class TestCase5(TestClipOp): class TestClipOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -131,6 +139,7 @@ class TestClipOpError(unittest.TestCase): class TestClipAPI(unittest.TestCase): + def _executed_api(self, x, min=None, max=None): return paddle.clip(x, min, max) @@ -142,8 +151,8 @@ class TestClipAPI(unittest.TestCase): min = fluid.data(name='min', shape=[1], dtype='float32') max = fluid.data(name='max', shape=[1], dtype='float32') - place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( - ) else fluid.CPUPlace() + place = fluid.NPUPlace( + 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() exe = fluid.Executor(place) out_1 = self._executed_api(images, min=min, max=max) @@ -162,9 +171,7 @@ class TestClipAPI(unittest.TestCase): "min": np.array([0.2]).astype('float32'), "max": np.array([0.8]).astype('float32') }, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) self.assertTrue(np.allclose(res2, data.clip(0.2, 0.9))) @@ -178,8 +185,8 @@ class TestClipAPI(unittest.TestCase): def test_clip_dygraph(self): paddle.disable_static() - place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( - ) else fluid.CPUPlace() + place = fluid.NPUPlace( + 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() paddle.disable_static(place) data_shape = [1, 9, 9, 4] data = np.random.random(data_shape).astype('float32') @@ -207,6 +214,7 @@ class TestClipAPI(unittest.TestCase): class TestInplaceClipAPI(TestClipAPI): + def _executed_api(self, x, min=None, max=None): return x.clip_(min, max) diff --git a/python/paddle/fluid/tests/unittests/npu/test_coalesce_tensor_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_coalesce_tensor_op_npu.py index 93a969bf10f..313ab90c93f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_coalesce_tensor_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_coalesce_tensor_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ alignment = 512 class TestAllocContinuousSpace(OpTest): + def setUp(self): self.__class__.use_npu = True self.op_type = "coalesce_tensor" @@ -80,10 +82,12 @@ class TestAllocContinuousSpace(OpTest): self.check_output_with_place( place=paddle.NPUPlace(0), no_check_set=["FusedOutput"], - atol=1e-5, ) + atol=1e-5, + ) class TestAllocContinuousSpace2(TestAllocContinuousSpace): + def init_attr(self): return { "copy_data": True, @@ -98,7 +102,8 @@ class TestAllocContinuousSpace2(TestAllocContinuousSpace): self.check_output_with_place( place=paddle.NPUPlace(0), no_check_set=["FusedOutput"], - atol=1e-5, ) + atol=1e-5, + ) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_collective_base_npu.py b/python/paddle/fluid/tests/unittests/npu/test_collective_base_npu.py index 774423a8be1..69f3b1bcbe4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_collective_base_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_collective_base_npu.py @@ -31,6 +31,7 @@ from six import string_types class TestCollectiveRunnerBase(object): + def get_model(self, train_prog, startup_prog): raise NotImplementedError( "get model should be implemented by child class.") @@ -42,9 +43,8 @@ class TestCollectiveRunnerBase(object): not_ready_endpoints = [] for ep in endpoints: ip_port = ep.split(":") - with closing( - socket.socket(socket.AF_INET, - socket.SOCK_STREAM)) as sock: + with closing(socket.socket(socket.AF_INET, + socket.SOCK_STREAM)) as sock: sock.settimeout(2) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) if hasattr(socket, 'SO_REUSEPORT'): @@ -57,13 +57,14 @@ class TestCollectiveRunnerBase(object): not_ready_endpoints.append(ep) if not all_ok: sys.stderr.write("server not ready, wait 3 sec to retry...\n") - sys.stderr.write("not ready endpoints:" + str( - not_ready_endpoints) + "\n") + sys.stderr.write("not ready endpoints:" + + str(not_ready_endpoints) + "\n") sys.stderr.flush() time.sleep(3) else: break + #endpoints should be ["ip1:port1","ip2:port2"] def initCommunicator(self, program, rank, nranks, wait_port, @@ -73,29 +74,26 @@ class TestCollectiveRunnerBase(object): if rank == 0 and wait_port: self.wait_server_ready(other_endpoints) block = program.global_block() - hccl_id_var = block.create_var( - name=nameGen.generate('hccl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_hccl_id', - inputs={}, - outputs={'Out': hccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) - block.append_op( - type='c_comm_init_hccl', - inputs={'X': hccl_id_var}, - outputs={}, - attrs={ - 'rank': rank, - 'ring_id': self.global_ring_id, - 'device_id': int(os.getenv("FLAGS_selected_npus")), - 'rank_ids': nranks - }) + hccl_id_var = block.create_var(name=nameGen.generate('hccl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) + block.append_op(type='c_gen_hccl_id', + inputs={}, + outputs={'Out': hccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + block.append_op(type='c_comm_init_hccl', + inputs={'X': hccl_id_var}, + outputs={}, + attrs={ + 'rank': rank, + 'ring_id': self.global_ring_id, + 'device_id': int(os.getenv("FLAGS_selected_npus")), + 'rank_ids': nranks + }) def run_trainer(self, args): train_prog = fluid.Program() @@ -138,6 +136,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def setUp(self): self._port_set = set() self._trainers = 2 @@ -146,6 +145,7 @@ class TestDistBase(unittest.TestCase): self._python_interp = sys.executable def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -185,18 +185,16 @@ class TestDistBase(unittest.TestCase): tr1_cmd = tr_cmd % (self._python_interp, model_file) tr0_pipe = open("/tmp/tr0_err.log", "wb") tr1_pipe = open("/tmp/tr1_err.log", "wb") - #print(tr0_cmd) - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) - - tr1_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + #print(tr0_cmd) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) + + tr1_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() diff --git a/python/paddle/fluid/tests/unittests/npu/test_collective_process_group_hccl.py b/python/paddle/fluid/tests/unittests/npu/test_collective_process_group_hccl.py index 9b2c6fae15e..d3d5ab76a94 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_collective_process_group_hccl.py +++ b/python/paddle/fluid/tests/unittests/npu/test_collective_process_group_hccl.py @@ -16,11 +16,13 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestProcessGroup(TestMultipleGpus): + def test_process_group_nccl(self): self.run_mnist_2gpu('process_group_hccl.py') diff --git a/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py index 66ce81756fc..ba2e3a083f3 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,7 +26,9 @@ from paddle.fluid import Program, program_guard def create_test_class(op_type, typename, callback): + class Cls(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -76,18 +79,22 @@ def create_test_class(op_type, typename, callback): def test_broadcast_api_1(self): paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.static.data( - name='x', shape=[1, 2, 1, 3], dtype=typename) - y = paddle.static.data( - name='y', shape=[1, 2, 3], dtype=typename) + x = paddle.static.data(name='x', + shape=[1, 2, 1, 3], + dtype=typename) + y = paddle.static.data(name='y', + shape=[1, 2, 3], + dtype=typename) op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) input_x = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(typename) input_y = np.arange(0, 6).reshape((1, 2, 3)).astype(typename) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -95,18 +102,22 @@ def create_test_class(op_type, typename, callback): def test_broadcast_api_2(self): paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.static.data( - name='x', shape=[1, 2, 3], dtype=typename) - y = paddle.static.data( - name='y', shape=[1, 2, 1, 3], dtype=typename) + x = paddle.static.data(name='x', + shape=[1, 2, 3], + dtype=typename) + y = paddle.static.data(name='y', + shape=[1, 2, 1, 3], + dtype=typename) op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) input_x = np.arange(0, 6).reshape((1, 2, 3)).astype(typename) input_y = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(typename) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -122,8 +133,10 @@ def create_test_class(op_type, typename, callback): input_x = np.arange(0, 5).reshape((5)).astype(typename) input_y = np.array([5, 3, 2]).reshape((3, 1)).astype(typename) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) diff --git a/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py index f9eecefdfb2..4fff3ab5fa0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestConcatOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "concat" @@ -43,8 +45,8 @@ class TestConcatOp(OpTest): self.actual_axis = self.axis self.outputs = { - 'Out': np.concatenate( - (self.x0, self.x1, self.x2), axis=self.actual_axis) + 'Out': + np.concatenate((self.x0, self.x1, self.x2), axis=self.actual_axis) } def set_npu(self): @@ -69,6 +71,7 @@ class TestConcatOp(OpTest): class TestConcatOp2(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) @@ -79,6 +82,7 @@ class TestConcatOp2(TestConcatOp): @skip_check_grad_ci( reason="The function 'check_grad' for large inputs is too slow.") class TestConcatOp3(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype) self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype) @@ -90,9 +94,11 @@ class TestConcatOp3(TestConcatOp): @skip_check_grad_ci( - reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." + reason= + "This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." ) class TestConcatOp4(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) @@ -104,6 +110,7 @@ class TestConcatOp4(TestConcatOp): class TestConcatOp5(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype) self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype) @@ -113,7 +120,9 @@ class TestConcatOp5(TestConcatOp): #----------------Concat Fp16---------------- def create_test_fp16(parent): + class TestConcatFp16(parent): + def init_dtype(self): self.dtype = np.float16 @@ -131,7 +140,9 @@ create_test_fp16(TestConcatOp5) #----------------Concat Int64---------------- def create_test_int64(parent): + class TestConcatInt64(parent): + def init_dtype(self): self.dtype = np.int64 @@ -170,8 +181,9 @@ class TestConcatAPIWithLoDTensorArray(unittest.TestCase): with fluid.program_guard(self.program): input = fluid.layers.assign(self.x) tensor_array = fluid.layers.create_array(dtype='float32') - zero = fluid.layers.fill_constant( - shape=[1], value=0, dtype="int64") + zero = fluid.layers.fill_constant(shape=[1], + value=0, + dtype="int64") for i in range(self.iter_num): fluid.layers.array_write(input, zero + i, tensor_array) @@ -208,9 +220,8 @@ class TestConcatAPIWithLoDTensorArray(unittest.TestCase): res = exe.run(self.program, fetch_list=self.out_var) self.assertTrue( np.array_equal( - res[0], - np.concatenate( - [self.x] * self.iter_num, axis=self.axis))) + res[0], np.concatenate([self.x] * self.iter_num, + axis=self.axis))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_depthwise_conv_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_depthwise_conv_npu.py index 2e15a1eac2b..6c300acfe48 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_depthwise_conv_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_depthwise_conv_npu.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid as fluid import sys + sys.path.append("..") from op_test import OpTest from test_conv2d_op import conv2d_forward_naive @@ -31,7 +32,9 @@ SEED = 2021 def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NHWC" @@ -45,7 +48,9 @@ def create_test_channel_last_class(parent): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" @@ -56,7 +61,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" @@ -67,7 +74,9 @@ def create_test_padding_VALID_class(parent): def create_test_fp16_class(parent): + class TestFp16Case(parent): + def init_data_type(self): self.dtype = np.float16 @@ -77,6 +86,7 @@ def create_test_fp16_class(parent): class TestDepthwiseConvNPU(OpTest): + def setUp(self): self.set_npu() self.op_type = "depthwise_conv2d" @@ -134,47 +144,41 @@ class TestDepthwiseConvNPU(OpTest): def test_check_grad(self): if self.dilations[0] == 1 and self.dilations[1] == 1: if self.dtype == np.float16: - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.9) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.9) else: - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - no_grad_set=set(['Filter']), - max_relative_error=0.9) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + no_grad_set=set(['Filter']), + max_relative_error=0.9) else: - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - no_grad_set=set(['Filter']), - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + no_grad_set=set(['Filter']), + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dilations[0] == 1 and self.dilations[1] == 1: if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - max_relative_error=0.9) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + max_relative_error=0.9) else: - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def init_data_format(self): self.data_format = "NCHW" @@ -187,6 +191,7 @@ class TestDepthwiseConvNPU(OpTest): class TestDepthwiseConvNPU2(TestDepthwiseConvNPU): + def init_test_case(self): self.pad = [1, 1] self.dilations = [1, 1] @@ -199,6 +204,7 @@ class TestDepthwiseConvNPU2(TestDepthwiseConvNPU): class TestDepthwiseConvNPU3(TestDepthwiseConvNPU): + def init_test_case(self): self.pad = [1, 1] self.dilations = [1, 1] @@ -211,6 +217,7 @@ class TestDepthwiseConvNPU3(TestDepthwiseConvNPU): class TestDepthwiseConvNPU4(TestDepthwiseConvNPU): + def init_test_case(self): self.pad = [1, 1] self.dilations = [1, 1] @@ -223,6 +230,7 @@ class TestDepthwiseConvNPU4(TestDepthwiseConvNPU): class TestDepthwiseConvNPU_Padding(OpTest): + def setUp(self): self.op_type = "depthwise_conv2d" self.dtype = np.float32 @@ -242,9 +250,10 @@ class TestDepthwiseConvNPU_Padding(OpTest): input = np.random.random(self.input_size).astype(self.dtype) filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input, filter, self.groups, conv2d_param, self.padding_algorithm, - self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input, filter, self.groups, + conv2d_param, + self.padding_algorithm, + self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -281,46 +290,40 @@ class TestDepthwiseConvNPU_Padding(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=1.2) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=1.2) else: - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - max_relative_error=0.7, - no_grad_set=set(['Filter'])) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + max_relative_error=0.7, + no_grad_set=set(['Filter'])) else: - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - max_relative_error=0.8, - no_grad_set=set(['Input'])) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + max_relative_error=0.8, + no_grad_set=set(['Input'])) else: - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def init_data_format(self): self.data_format = "NCHW" @@ -337,6 +340,7 @@ class TestDepthwiseConvNPU_Padding(OpTest): class TestDepthwiseConvNPU2_Padding(TestDepthwiseConvNPU_Padding): + def init_test_case(self): self.pad = [1, 1, 0, 1] self.dilations = [1, 1] @@ -353,6 +357,7 @@ class TestDepthwiseConvNPU2_Padding(TestDepthwiseConvNPU_Padding): class TestDepthwiseConvNPU3_Padding(TestDepthwiseConvNPU_Padding): + def init_test_case(self): self.pad = [1, 1, 0, 1] self.dilations = [1, 1] diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_npu.py index 4070d0267d9..c2244fb9a6e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_conv2d_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle import paddle.fluid.core as core @@ -29,7 +30,9 @@ paddle.enable_static() def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NHWC" @@ -43,7 +46,9 @@ def create_test_channel_last_class(parent): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" @@ -54,7 +59,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" @@ -65,7 +72,9 @@ def create_test_padding_VALID_class(parent): def create_test_fp16_class(parent): + class TestFp16Case(parent): + def init_dtype(self): self.dtype = np.float16 @@ -75,6 +84,7 @@ def create_test_fp16_class(parent): class TestConv2DOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -102,12 +112,11 @@ class TestConv2DOp(OpTest): input = np.random.random(self.input_size).astype(self.dtype) filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input, - filter, - self.groups, - conv2d_param, - data_format=self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input, + filter, + self.groups, + conv2d_param, + data_format=self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -127,27 +136,24 @@ class TestConv2DOp(OpTest): self.check_output_with_place(fluid.NPUPlace(0), atol=1e-2) def test_check_grad(self): - self.check_grad_with_place( - fluid.NPUPlace(0), {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(fluid.NPUPlace(0), {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): - self.check_grad_with_place( - fluid.NPUPlace(0), ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(fluid.NPUPlace(0), ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): - self.check_grad_with_place( - fluid.NPUPlace(0), ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(fluid.NPUPlace(0), ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def init_test_case(self): self.pad = [0, 0] @@ -165,6 +171,7 @@ class TestConv2DOp(OpTest): class TestWithPad(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -175,6 +182,7 @@ class TestWithPad(TestConv2DOp): class TestWithStride(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -185,6 +193,7 @@ class TestWithStride(TestConv2DOp): class TestWithGroup(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -196,6 +205,7 @@ class TestWithGroup(TestConv2DOp): class TestWith1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -206,12 +216,13 @@ class TestWith1x1(TestConv2DOp): def init_group(self): # FIXME: Supporting group = 3 in this case. - # NOTE(wangran16): There is an unknown error (acl error code is : 507015) + # NOTE(wangran16): There is an unknown error (acl error code is : 507015) # when group = 3, which needs to be fixed. self.groups = 1 class TestWithDepthWise5x5(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -225,6 +236,7 @@ class TestWithDepthWise5x5(TestConv2DOp): class TestWithDepthWise7x7(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -238,6 +250,7 @@ class TestWithDepthWise7x7(TestConv2DOp): class TestWithDilation(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -254,6 +267,7 @@ class TestWithDilation(TestConv2DOp): class TestWithInput1x1Filter1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -267,6 +281,7 @@ class TestWithInput1x1Filter1x1(TestConv2DOp): class TestConv2DOp_v2(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -293,9 +308,10 @@ class TestConv2DOp_v2(OpTest): input = np.random.random(self.input_size).astype(self.dtype) filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input, filter, self.groups, conv2d_param, self.padding_algorithm, - self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input, filter, self.groups, + conv2d_param, + self.padding_algorithm, + self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -317,45 +333,39 @@ class TestConv2DOp_v2(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - paddle.NPUPlace(0), {'Input', 'Filter'}, - 'Output', - max_relative_error=1.1) + self.check_grad_with_place(paddle.NPUPlace(0), {'Input', 'Filter'}, + 'Output', + max_relative_error=1.1) else: - self.check_grad_with_place( - paddle.NPUPlace(0), {'Input', 'Filter'}, - 'Output', - max_relative_error=0.02, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(paddle.NPUPlace(0), {'Input', 'Filter'}, + 'Output', + max_relative_error=0.02, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: - self.check_grad_with_place( - paddle.NPUPlace(0), ['Input'], - 'Output', - max_relative_error=0.99, - no_grad_set=set(['Filter'])) + self.check_grad_with_place(paddle.NPUPlace(0), ['Input'], + 'Output', + max_relative_error=0.99, + no_grad_set=set(['Filter'])) else: - self.check_grad_with_place( - paddle.NPUPlace(0), ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(paddle.NPUPlace(0), ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dtype == np.float16: - self.check_grad_with_place( - paddle.NPUPlace(0), ['Filter'], - 'Output', - max_relative_error=0.99, - no_grad_set=set(['Input'])) + self.check_grad_with_place(paddle.NPUPlace(0), ['Filter'], + 'Output', + max_relative_error=0.99, + no_grad_set=set(['Input'])) else: - self.check_grad_with_place( - paddle.NPUPlace(0), ['Filter'], - 'Output', - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(paddle.NPUPlace(0), ['Filter'], + 'Output', + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def init_test_case(self): self.pad = [0, 0] @@ -386,12 +396,14 @@ class TestConv2DOp_v2(OpTest): class TestConv2DOp_AsyPadding(TestConv2DOp_v2): + def init_paddings(self): self.pad = [0, 0, 1, 2] self.padding_algorithm = "EXPLICIT" class TestWithPad_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -405,6 +417,7 @@ class TestWithPad_AsyPadding(TestConv2DOp_v2): class TestWithStride_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 3, 6, 6] # NCHW @@ -418,6 +431,7 @@ class TestWithStride_AsyPadding(TestConv2DOp_v2): class TestWithGroup_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 2] @@ -429,6 +443,7 @@ class TestWithGroup_AsyPadding(TestConv2DOp_v2): class TestWith1x1_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -445,6 +460,7 @@ class TestWith1x1_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [3, 4, 10, 10] # NCHW @@ -464,6 +480,7 @@ class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 4, 10, 10] # NCHW @@ -480,6 +497,7 @@ class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 8, 10, 10] # NCHW @@ -496,6 +514,7 @@ class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2): class TestWithDilation_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 10, 10] # NCHW @@ -515,6 +534,7 @@ class TestWithDilation_AsyPadding(TestConv2DOp_v2): class TestWithInput1x1Filter1x1_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [100, 1, 1, 1] # NCHW diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py index a603f6c9238..c11a583e853 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py @@ -20,6 +20,7 @@ import paddle.nn as nn import paddle.fluid.core as core import paddle.fluid as fluid import sys + sys.path.append("..") from op_test import OpTest @@ -29,6 +30,7 @@ paddle.enable_static() class TestConv2DTransposeOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -78,29 +80,26 @@ class TestConv2DTransposeOp(OpTest): def test_check_grad_no_input(self): if self.need_check_grad: - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.need_check_grad: - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - no_grad_set=set(['Filter']), - max_relative_error=0.006, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + no_grad_set=set(['Filter']), + max_relative_error=0.006, + numeric_place=paddle.CPUPlace()) def test_check_grad(self): if self.need_check_grad: - self.check_grad_with_place( - self.place, - set(['Input', 'Filter']), - 'Output', - max_relative_error=0.02, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02, + numeric_place=paddle.CPUPlace()) def init_test_case(self): self.pad = [0, 0] @@ -119,6 +118,7 @@ class TestConv2DTransposeOp(OpTest): class TestWithSymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -130,12 +130,14 @@ class TestWithSymmetricPad(TestConv2DTransposeOp): class TestWithSymmetricPad_FP16(TestWithSymmetricPad): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithAsymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -147,12 +149,14 @@ class TestWithAsymmetricPad(TestConv2DTransposeOp): class TestWithAsymmetricPad_FP16(TestWithAsymmetricPad): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithSAMEPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [2, 1] self.dilations = [1, 2] @@ -164,12 +168,14 @@ class TestWithSAMEPad(TestConv2DTransposeOp): class TestWithSAMEPad_FP16(TestWithSAMEPad): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithVALIDPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [1, 1] self.dilations = [1, 1] @@ -181,12 +187,14 @@ class TestWithVALIDPad(TestConv2DTransposeOp): class TestWithVALIDPad_FP16(TestWithVALIDPad): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithGroups(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -198,12 +206,14 @@ class TestWithGroups(TestConv2DTransposeOp): class TestWithGroups_FP16(TestWithGroups): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithStride(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -215,12 +225,14 @@ class TestWithStride(TestConv2DTransposeOp): class TestWithStride_FP16(TestWithStride): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithDilation(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -232,12 +244,14 @@ class TestWithDilation(TestConv2DTransposeOp): class TestWithDilation_FP16(TestWithDilation): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithEvenUpsample(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -250,12 +264,14 @@ class TestWithEvenUpsample(TestConv2DTransposeOp): class TestWithEvenUpsample_FP16(TestWithEvenUpsample): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithEvenUpsampleOutputPadding(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -268,12 +284,14 @@ class TestWithEvenUpsampleOutputPadding(TestConv2DTransposeOp): class TestWithEvenUpsampleOutputPadding_FP16(TestWithEvenUpsampleOutputPadding): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class Test_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -286,12 +304,14 @@ class Test_NHWC(TestConv2DTransposeOp): class Test_NHWC_FP16(Test_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithSymmetricPad_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -304,12 +324,14 @@ class TestWithSymmetricPad_NHWC(TestConv2DTransposeOp): class TestWithSymmetricPad_NHWC_FP16(TestWithSymmetricPad_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithAsymmetricPad_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -322,12 +344,14 @@ class TestWithAsymmetricPad_NHWC(TestConv2DTransposeOp): class TestWithAsymmetricPad_NHWC_FP16(TestWithAsymmetricPad_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithGroups_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -340,12 +364,14 @@ class TestWithGroups_NHWC(TestConv2DTransposeOp): class TestWithGroups_NHWC_FP16(TestWithGroups_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithStride_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -358,12 +384,14 @@ class TestWithStride_NHWC(TestConv2DTransposeOp): class TestWithStride_NHWC_FP16(TestWithStride_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithDilation_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -376,12 +404,14 @@ class TestWithDilation_NHWC(TestConv2DTransposeOp): class TestWithDilation_NHWC_FP16(TestWithDilation_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithEvenUpsample_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -395,12 +425,14 @@ class TestWithEvenUpsample_NHWC(TestConv2DTransposeOp): class TestWithEvenUpsample_NHWC_FP16(TestWithEvenUpsample_NHWC): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestWithEvenUpsample_NHWC_output_padding(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -415,64 +447,63 @@ class TestWithEvenUpsample_NHWC_output_padding(TestConv2DTransposeOp): class TestWithEvenUpsample_NHWC_output_padding_FP16( TestWithEvenUpsample_NHWC_output_padding): + def init_dtype(self): self.dtype = np.float16 self.need_check_grad = False class TestConv2DTransposeAPI(unittest.TestCase): + def test_case1(self): - data1 = fluid.layers.data( - name='data1', shape=[3, 5, 5], dtype='float32') - data2 = fluid.layers.data( - name='data2', shape=[5, 5, 3], dtype='float32') - out1 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - data_format='NCHW') - out2 = fluid.layers.conv2d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - data_format='NHWC') - out3 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - padding=[[0, 0], [1, 1], [1, 1], [0, 0]], - data_format='NHWC') - out4 = fluid.layers.conv2d_transpose( - input=data1, - groups=3, - num_filters=6, - filter_size=3, - padding=[[0, 0], [0, 0], [2, 1], [0, 0]], - data_format='NCHW') - out5 = fluid.layers.conv2d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - padding='SAME', - data_format='NCHW') - out6 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - padding='VALID', - data_format='NHWC') - out7 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - output_size=[7, 7], - padding=[0, 0], - data_format='NHWC') + data1 = fluid.layers.data(name='data1', + shape=[3, 5, 5], + dtype='float32') + data2 = fluid.layers.data(name='data2', + shape=[5, 5, 3], + dtype='float32') + out1 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + data_format='NCHW') + out2 = fluid.layers.conv2d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + data_format='NHWC') + out3 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + padding=[[0, 0], [1, 1], [1, 1], + [0, 0]], + data_format='NHWC') + out4 = fluid.layers.conv2d_transpose(input=data1, + groups=3, + num_filters=6, + filter_size=3, + padding=[[0, 0], [0, 0], [2, 1], + [0, 0]], + data_format='NCHW') + out5 = fluid.layers.conv2d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + padding='SAME', + data_format='NCHW') + out6 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + padding='VALID', + data_format='NHWC') + out7 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + output_size=[7, 7], + padding=[0, 0], + data_format='NHWC') data1_np = np.random.random((2, 3, 5, 5)).astype("float32") data2_np = np.random.random((2, 5, 5, 3)).astype("float32") @@ -480,12 +511,13 @@ class TestConv2DTransposeAPI(unittest.TestCase): place = core.NPUPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - results = exe.run( - fluid.default_main_program(), - feed={"data1": data1_np, - "data2": data2_np}, - fetch_list=[out1, out2, out3, out4, out5, out6, out7], - return_numpy=True) + results = exe.run(fluid.default_main_program(), + feed={ + "data1": data1_np, + "data2": data2_np + }, + fetch_list=[out1, out2, out3, out4, out5, out6, out7], + return_numpy=True) self.assertIsNotNone(results[0]) self.assertIsNotNone(results[1]) self.assertIsNotNone(results[2]) @@ -496,6 +528,7 @@ class TestConv2DTransposeAPI(unittest.TestCase): class TestConv2DTransposeRepr(unittest.TestCase): + def test_case(self): paddle.disable_static(paddle.NPUPlace(0)) x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py index d7821f07669..779a75dddb4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py @@ -18,6 +18,7 @@ import unittest import numpy as np import sys + sys.path.append("..") import paddle import paddle.fluid.core as core @@ -30,7 +31,9 @@ paddle.enable_static() def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0, 0] self.padding_algorithm = "SAME" @@ -41,7 +44,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1, 1] self.padding_algorithm = "VALID" @@ -52,7 +57,9 @@ def create_test_padding_VALID_class(parent): def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NDHWC" @@ -66,7 +73,9 @@ def create_test_channel_last_class(parent): def create_test_fp16_class(parent): + class TestFp16Case(parent): + def init_dtype(self): self.dtype = np.float16 @@ -76,6 +85,7 @@ def create_test_fp16_class(parent): class TestConv3DOp(OpTest): + def setUp(self): self.op_type = "conv3d" self.set_npu() @@ -97,7 +107,8 @@ class TestConv3DOp(OpTest): input, filter, self.groups, - conv3d_param, ).astype(self.dtype) + conv3d_param, + ).astype(self.dtype) self.inputs = { 'Input': OpTest.np_dtype_to_fluid_dtype(input), @@ -119,33 +130,30 @@ class TestConv3DOp(OpTest): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def set_npu(self): self.__class__.use_npu = True @@ -173,6 +181,7 @@ class TestConv3DOp(OpTest): class TestCase1(TestConv3DOp): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -186,6 +195,7 @@ class TestCase1(TestConv3DOp): class TestConv3DOp_2(OpTest): + def setUp(self): self.op_type = "conv3d" self.set_npu() @@ -231,33 +241,30 @@ class TestConv3DOp_2(OpTest): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + numeric_place=paddle.CPUPlace()) def test_check_grad_no_filter(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter']), + numeric_place=paddle.CPUPlace()) def test_check_grad_no_input(self): if self.dtype == np.float16: return - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input']), - numeric_place=paddle.CPUPlace()) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input']), + numeric_place=paddle.CPUPlace()) def set_npu(self): self.__class__.use_npu = True @@ -291,6 +298,7 @@ class TestConv3DOp_2(OpTest): class TestConv3DOp_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 2] self.input_size = [2, 3, 4, 4, 4] # NCDHW @@ -304,6 +312,7 @@ class TestConv3DOp_AsyPadding(TestConv3DOp_2): class TestConv3DOp_DiffDataInDiffDim(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 2] self.input_size = [2, 3, 4, 5, 5] # NCDHW @@ -317,6 +326,7 @@ class TestConv3DOp_DiffDataInDiffDim(TestConv3DOp_2): class TestCase1_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 1] self.input_size = [2, 3, 4, 4, 4] # NCDHW @@ -331,210 +341,196 @@ class TestCase1_AsyPadding(TestConv3DOp_2): # --------- test python API --------------- class TestConv3DAPI(unittest.TestCase): + def test_api(self): - input_NDHWC = fluid.layers.data( - name="input_NDHWC", - shape=[2, 5, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCDHW = fluid.layers.data( - name="input_NCDHW", - shape=[2, 3, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - fluid.layers.conv3d( - input=input_NDHWC, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=0, - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=[1, 2, 1, 0, 1, 0], - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]], - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NDHWC, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]], - dilation=[1, 1, 1], - groups=1, - data_format="NDHWC") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding="SAME", - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding="VALID", - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") + input_NDHWC = fluid.layers.data(name="input_NDHWC", + shape=[2, 5, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCDHW = fluid.layers.data(name="input_NCDHW", + shape=[2, 3, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + fluid.layers.conv3d(input=input_NDHWC, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=0, + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=[1, 2, 1, 0, 1, 0], + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]], + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NDHWC, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]], + dilation=[1, 1, 1], + groups=1, + data_format="NDHWC") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding="SAME", + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding="VALID", + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") class TestConv3DAPI_Error(unittest.TestCase): + def test_api(self): - input = fluid.layers.data( - name="input", - shape=[2, 5, 5, 5, 4], - append_batch_size=False, - dtype="float32") + input = fluid.layers.data(name="input", + shape=[2, 5, 5, 5, 4], + append_batch_size=False, + dtype="float32") # ValueError: cudnn def run_1(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding=0, - dilation=1, - groups=1, - use_cudnn=[0], - data_format="NCDHW") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding=0, + dilation=1, + groups=1, + use_cudnn=[0], + data_format="NCDHW") self.assertRaises(ValueError, run_1) # ValueError: data_format def run_2(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=0, - dilation=[1, 1, 1], - groups=1, - use_cudnn=False, - data_format="NCHWC") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=0, + dilation=[1, 1, 1], + groups=1, + use_cudnn=False, + data_format="NCHWC") self.assertRaises(ValueError, run_2) # ValueError: padding def run_3(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding="SAMEE", - dilation=1, - groups=1, - use_cudnn=False, - data_format="NCDHW") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding="SAMEE", + dilation=1, + groups=1, + use_cudnn=False, + data_format="NCDHW") self.assertRaises(ValueError, run_3) def run_4(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding=[[0, 1], [0, 0], [0, 1], [0, 1], [0, 1]], - dilation=1, - groups=1, - use_cudnn=False, - data_format="NCDHW") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding=[[0, 1], [0, 0], [0, 1], [0, 1], [0, + 1]], + dilation=1, + groups=1, + use_cudnn=False, + data_format="NCDHW") self.assertRaises(ValueError, run_4) def run_5(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=0, - stride=0, - padding=[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1]], - dilation=1, - groups=1, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=0, + stride=0, + padding=[[0, 1], [0, 1], [0, 1], [0, 1], [0, + 1]], + dilation=1, + groups=1, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_5) # ValueError: channel dimmention - x = fluid.layers.data( - name="x", - shape=[2, 5, 5, 5, -1], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name="x", + shape=[2, 5, 5, 5, -1], + append_batch_size=False, + dtype="float32") def run_6(): - fluid.layers.conv3d( - input=x, - num_filters=3, - filter_size=3, - stride=1, - padding=0, - dilation=1, - groups=1, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=x, + num_filters=3, + filter_size=3, + stride=1, + padding=0, + dilation=1, + groups=1, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_6) # ValueError: groups def run_7(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding=0, - dilation=1, - groups=3, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding=0, + dilation=1, + groups=3, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_7) # ValueError: filter num def run_8(): - fluid.layers.conv3d( - input=input, - num_filters=0, - filter_size=0, - stride=0, - padding=0, - dilation=0, - groups=1, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=input, + num_filters=0, + filter_size=0, + stride=0, + padding=0, + dilation=0, + groups=1, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_8) diff --git a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py index a4769442b08..44baf7a547c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestCos(OpTest): + def setUp(self): self.set_npu() self.op_type = "cos" @@ -55,6 +57,7 @@ class TestCos(OpTest): class TestCosFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "cos" @@ -81,6 +84,7 @@ class TestCosFp16(OpTest): class TestCosNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -95,8 +99,9 @@ class TestCosNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.cos(c) @@ -120,12 +125,13 @@ class TestCosNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_crop_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_crop_op_npu.py index 02168aeb71d..6398d7d1ed5 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_crop_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_crop_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ np.random.seed(10) class TestCropOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -74,6 +76,7 @@ class TestCropOp(OpTest): class TestCase1(TestCropOp): + def initTestCase(self): self.x_shape = (16, 8, 32) self.crop_shape = [2, 2, 3] @@ -81,6 +84,7 @@ class TestCase1(TestCropOp): class TestCase2(TestCropOp): + def initTestCase(self): self.x_shape = (15, 8) self.crop_shape = [15, 8] @@ -88,6 +92,7 @@ class TestCase2(TestCropOp): class TestCase3(TestCropOp): + def initTestCase(self): self.x_shape = (4, 10) self.crop_shape = [2, 3] @@ -96,6 +101,7 @@ class TestCase3(TestCropOp): class TestCase4(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] @@ -103,6 +109,7 @@ class TestCase4(TestCropOp): class TestCase5(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] @@ -111,6 +118,7 @@ class TestCase5(TestCropOp): class TestCase6(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] @@ -121,6 +129,7 @@ class TestCase6(TestCropOp): class TestCase7(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] @@ -130,6 +139,7 @@ class TestCase7(TestCropOp): class TestCase8(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] @@ -138,6 +148,7 @@ class TestCase8(TestCropOp): class TestCase9(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] @@ -146,6 +157,7 @@ class TestCase9(TestCropOp): class TestCase10(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] diff --git a/python/paddle/fluid/tests/unittests/npu/test_cumsum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_cumsum_op_npu.py index 9289da6641e..9cf22adbb75 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_cumsum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_cumsum_op_npu.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestCumsumOp(unittest.TestCase): + def run_cases(self): data_np = np.arange(12).reshape(3, 4) data = paddle.to_tensor(data_np) @@ -96,6 +97,7 @@ class TestCumsumOp(unittest.TestCase): class TestNPUCumSumOp1(OpTest): + def setUp(self): self.op_type = "cumsum" self.set_npu() @@ -119,17 +121,18 @@ class TestNPUCumSumOp1(OpTest): class TestNPUCumSumOp2(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': -1, 'reverse': True} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.outputs = { - 'Out': np.flip( - np.flip( - self.inputs['X'], axis=2).cumsum(axis=2), axis=2) + 'Out': np.flip(np.flip(self.inputs['X'], axis=2).cumsum(axis=2), + axis=2) } class TestNPUCumSumOp3(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 1} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} @@ -137,6 +140,7 @@ class TestNPUCumSumOp3(TestNPUCumSumOp1): class TestNPUCumSumOp4(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 0} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} @@ -144,98 +148,107 @@ class TestNPUCumSumOp4(TestNPUCumSumOp1): class TestNPUCumSumOp5(TestNPUCumSumOp1): + def init_testcase(self): self.inputs = {'X': np.random.random((5, 20)).astype(self.dtype)} self.outputs = {'Out': self.inputs['X'].cumsum(axis=1)} class TestNPUCumSumOp7(TestNPUCumSumOp1): + def init_testcase(self): self.inputs = {'X': np.random.random((100)).astype(self.dtype)} self.outputs = {'Out': self.inputs['X'].cumsum(axis=0)} class TestNPUCumSumExclusive1(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 65)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive2(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((1, 1, 888)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive3(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 888)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive4(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((1, 1, 3049)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (1, 1, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumExclusive5(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 3096)).astype(self.dtype) self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=self.dtype), a[:, :, :-1].cumsum(axis=2)), + axis=2) } class TestNPUCumSumReverseExclusive(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': 2, 'reverse': True, "exclusive": True} a = np.random.random((4, 5, 6)).astype(self.dtype) self.inputs = {'X': a} a = np.flip(a, axis=2) self.outputs = { - 'Out': np.concatenate( - (np.flip( - a[:, :, :-1].cumsum(axis=2), axis=2), np.zeros( - (4, 5, 1), dtype=self.dtype)), + 'Out': + np.concatenate( + (np.flip(a[:, :, :-1].cumsum(axis=2), + axis=2), np.zeros((4, 5, 1), dtype=self.dtype)), axis=2) } class TestNPUCumSumWithFlatten1(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'flatten': True} self.inputs = {'X': np.random.random((5, 6)).astype(self.dtype)} @@ -243,6 +256,7 @@ class TestNPUCumSumWithFlatten1(TestNPUCumSumOp1): class TestNPUCumSumWithFlatten2(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'flatten': True} self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} @@ -251,21 +265,22 @@ class TestNPUCumSumWithFlatten2(TestNPUCumSumOp1): #----------------Cumsum Int64---------------- class TestNPUCumSumOpInt64(TestNPUCumSumOp1): + def init_testcase(self): self.attrs = {'axis': -1, 'reverse': True} self.inputs = { - 'X': np.random.randint( - 1, 10000, size=(5, 6, 10)).astype(self.dtype) + 'X': np.random.randint(1, 10000, size=(5, 6, 10)).astype(self.dtype) } self.outputs = { - 'Out': np.flip( - np.flip( - self.inputs['X'], axis=2).cumsum(axis=2), axis=2) + 'Out': np.flip(np.flip(self.inputs['X'], axis=2).cumsum(axis=2), + axis=2) } def create_test_int64(parent): + class TestCumSumInt64(parent): + def init_dtype(self): self.dtype = np.int64 diff --git a/python/paddle/fluid/tests/unittests/npu/test_density_prior_box_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_density_prior_box_op_npu.py index a190aa9b6f2..7271644ce82 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_density_prior_box_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_density_prior_box_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import math import paddle @@ -28,6 +29,7 @@ np.random.seed(2021) class TestNpuDensityPriorBoxOp(OpTest): + def set_data(self): self.init_test_params() self.init_test_input() @@ -89,8 +91,8 @@ class TestNpuDensityPriorBoxOp(OpTest): if len(self.fixed_sizes) > 0 and len(self.densities) > 0: for density in self.densities: if len(self.fixed_ratios) > 0: - self.num_priors += len(self.fixed_ratios) * (pow(density, - 2)) + self.num_priors += len(self.fixed_ratios) * (pow( + density, 2)) self.offset = 0.5 self.atol = 1e-5 @@ -149,6 +151,7 @@ class TestNpuDensityPriorBoxOp(OpTest): class TestNpuDensityPriorBoxFlatten(TestNpuDensityPriorBoxOp): + def set_density(self): self.densities = [3, 4] self.fixed_sizes = [1.0, 2.0] @@ -161,6 +164,7 @@ class TestNpuDensityPriorBoxFlatten(TestNpuDensityPriorBoxOp): class TestNpuDensityPriorBoxOp1(TestNpuDensityPriorBoxOp): + def set_density(self): super(TestNpuDensityPriorBoxOp1, self).set_density() self.layer_w = 1 @@ -168,6 +172,7 @@ class TestNpuDensityPriorBoxOp1(TestNpuDensityPriorBoxOp): class TestNpuDensityPriorBoxOp2(TestNpuDensityPriorBoxOp): + def set_density(self): super(TestNpuDensityPriorBoxOp2, self).set_density() self.layer_w = 15 @@ -177,12 +182,14 @@ class TestNpuDensityPriorBoxOp2(TestNpuDensityPriorBoxOp): class TestNpuDensityPriorBoxOp3(TestNpuDensityPriorBoxOp): + def set_density(self): super(TestNpuDensityPriorBoxOp3, self).set_density() self.fixed_ratios = [1.0, 4.0] class TestNpuDensityPriorBoxOpFP16(TestNpuDensityPriorBoxOp): + def init_dtype(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py index fea8502f2d7..bca1d631c8e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -29,6 +30,7 @@ EPOCH = 100 class TestDropoutOp(OpTest): + def setUp(self): self.op_type = "dropout" self.set_npu() @@ -164,6 +166,7 @@ class TestDropoutOpInference(OpTest): @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestDropoutOpInference2(TestDropoutOpInference): + def setUp(self): self.op_type = "dropout" self.set_npu() @@ -185,8 +188,7 @@ class TestDropoutOpWithSeed(TestDropoutOp): self.init_dtype() self.inputs = { "X": np.random.random((32, 64)).astype(self.dtype), - "Seed": np.asarray( - [125], dtype="int32") + "Seed": np.asarray([125], dtype="int32") } self.attrs = { 'dropout_prob': 0.0, @@ -211,6 +213,7 @@ class TestDropoutOpFp16(TestDropoutOp): class TestDropoutAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace(), paddle.NPUPlace(0)] @@ -218,36 +221,43 @@ class TestDropoutAPI(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data(name="input", shape=[40, 40], dtype="float32") - res1 = paddle.nn.functional.dropout( - x=input, p=0., training=False, mode='upscale_in_train') - res2 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=True, mode='upscale_in_train') - res3 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=False, mode='upscale_in_train') - res4 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=True, - mode='upscale_in_train') - res5 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=False, - mode='upscale_in_train') - res6 = paddle.nn.functional.dropout( - x=input, p=1., training=True, mode='upscale_in_train') + res1 = paddle.nn.functional.dropout(x=input, + p=0., + training=False, + mode='upscale_in_train') + res2 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=True, + mode='upscale_in_train') + res3 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=False, + mode='upscale_in_train') + res4 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=True, + mode='upscale_in_train') + res5 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=False, + mode='upscale_in_train') + res6 = paddle.nn.functional.dropout(x=input, + p=1., + training=True, + mode='upscale_in_train') res7 = paddle.fluid.layers.dropout( x=input, dropout_prob=0., dropout_implementation='upscale_in_train') - res8 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=(0, 1), - training=False, - mode='upscale_in_train') + res8 = paddle.nn.functional.dropout(x=input, + p=0., + axis=(0, 1), + training=False, + mode='upscale_in_train') in_np = np.random.random([40, 40]).astype("float32") res_np = in_np diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_add_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_add_op_npu.py index f24c6c455a0..0883fca0794 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_add_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_add_op_npu.py @@ -16,6 +16,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from paddle.fluid import Program, program_guard @@ -28,6 +29,7 @@ paddle.enable_static() class TestElementwiseAddOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_add" @@ -73,13 +75,15 @@ class TestElementwiseAddOp(OpTest): self.place, ['X', 'Y'], 'Out', - max_relative_error=0.15, ) + max_relative_error=0.15, + ) else: self.check_grad_with_place( self.place, ['X', 'Y'], 'Out', - max_relative_error=0.006, ) + max_relative_error=0.006, + ) def test_check_grad_ingore_x(self): if self.dtype == np.int64: @@ -91,14 +95,16 @@ class TestElementwiseAddOp(OpTest): ['Y'], 'Out', no_grad_set=set("X"), - max_relative_error=0.92, ) + max_relative_error=0.92, + ) else: self.check_grad_with_place( self.place, ['Y'], 'Out', no_grad_set=set("X"), - max_relative_error=0.006, ) + max_relative_error=0.006, + ) def test_check_grad_ingore_y(self): if self.dtype == np.int64: @@ -110,22 +116,26 @@ class TestElementwiseAddOp(OpTest): ['X'], 'Out', no_grad_set=set("Y"), - max_relative_error=0.8, ) + max_relative_error=0.8, + ) else: self.check_grad_with_place( self.place, ['X'], 'Out', no_grad_set=set("Y"), - max_relative_error=0.006, ) + max_relative_error=0.006, + ) class TestFP16ElementwiseAddOp(TestElementwiseAddOp): + def init_dtype(self): self.dtype = np.float16 class TestINT64ElementwiseAddOp(TestElementwiseAddOp): + def init_dtype(self): self.dtype = np.int64 @@ -133,6 +143,7 @@ class TestINT64ElementwiseAddOp(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_scalar(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -142,6 +153,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -151,6 +163,7 @@ class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -160,6 +173,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -167,6 +181,7 @@ class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp): class TestAddAPI(unittest.TestCase): + def test_name(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.static.data(name="x", shape=[2, 3], dtype="float32") @@ -191,8 +206,10 @@ class TestAddAPI(unittest.TestCase): place = paddle.NPUPlace(0) exe = paddle.static.Executor(place) - x_value, y_value, z_value = exe.run(feed={"x": x_np, - "y": y_np}, + x_value, y_value, z_value = exe.run(feed={ + "x": x_np, + "y": y_np + }, fetch_list=[x, y, z]) z_expected = np.array([3., 8., 6.]) @@ -211,24 +228,28 @@ class TestAddAPI(unittest.TestCase): class TestAddError(unittest.TestCase): + def test_errors(self): with paddle.static.program_guard(paddle.static.Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) self.assertRaises(TypeError, paddle.add, x1, y1) # the input dtype must be float16 or float32 or float64 or int32 or int64 - x2 = paddle.static.data( - name='x2', shape=[3, 4, 5, 6], dtype="uint8") - y2 = paddle.static.data( - name='y2', shape=[3, 4, 5, 6], dtype="uint8") + x2 = paddle.static.data(name='x2', + shape=[3, 4, 5, 6], + dtype="uint8") + y2 = paddle.static.data(name='y2', + shape=[3, 4, 5, 6], + dtype="uint8") self.assertRaises(TypeError, paddle.add, x2, y2) class TestElementwiseAddOp_Vector(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -236,6 +257,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -243,6 +265,7 @@ class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -253,6 +276,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -263,6 +287,7 @@ class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -273,6 +298,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -283,6 +309,7 @@ class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -290,6 +317,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -297,6 +325,7 @@ class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 1).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -307,6 +336,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -317,6 +347,7 @@ class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -327,6 +358,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -337,6 +369,7 @@ class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -344,6 +377,7 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_5(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -351,6 +385,7 @@ class TestFP16ElementwiseAddOp_broadcast_5(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -358,6 +393,7 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype) @@ -365,6 +401,7 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_6(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -372,6 +409,7 @@ class TestFP16ElementwiseAddOp_broadcast_6(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -382,6 +420,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -394,6 +433,7 @@ class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -406,6 +446,7 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -416,6 +457,7 @@ class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -426,6 +468,7 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -436,6 +479,7 @@ class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -446,6 +490,7 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -456,6 +501,7 @@ class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype) @@ -466,6 +512,7 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -476,6 +523,7 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 1, 12).astype(self.dtype) self.y = np.random.rand(10, 2, 12).astype(self.dtype) @@ -486,13 +534,14 @@ class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): class TestElementwiseAddOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 @@ -503,6 +552,7 @@ class TestElementwiseAddOpError(unittest.TestCase): class TestAddApi(unittest.TestCase): + def _executed_api(self, x, y, name=None): return paddle.add(x, y, name) @@ -546,11 +596,13 @@ class TestAddApi(unittest.TestCase): class TestAddInplaceApi(TestAddApi): + def _executed_api(self, x, y, name=None): return x.add_(y, name) class TestAddInplaceBroadcastSuccess(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 4).astype('float') self.y_numpy = np.random.rand(3, 4).astype('float') @@ -567,18 +619,21 @@ class TestAddInplaceBroadcastSuccess(unittest.TestCase): class TestAddInplaceBroadcastSuccess2(TestAddInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(1, 2, 3, 1).astype('float') self.y_numpy = np.random.rand(3, 1).astype('float') class TestAddInplaceBroadcastSuccess3(TestAddInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 1, 5).astype('float') self.y_numpy = np.random.rand(1, 3, 1, 5).astype('float') class TestAddInplaceBroadcastError(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(3, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') @@ -597,12 +652,14 @@ class TestAddInplaceBroadcastError(unittest.TestCase): class TestAddInplaceBroadcastError2(TestAddInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(2, 1, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') class TestAddInplaceBroadcastError3(TestAddInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(5, 2, 1, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py index 07c22868d5a..9dcf4aa707c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestElementwiseDiv(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_div" @@ -60,7 +62,8 @@ class TestElementwiseDiv(OpTest): self.place, ['X', 'Y'], 'Out', - max_relative_error=0.007, ) + max_relative_error=0.007, + ) def test_check_grad_ingore_x(self): self.check_grad_with_place( @@ -68,14 +71,17 @@ class TestElementwiseDiv(OpTest): ['Y'], 'Out', max_relative_error=0.007, - no_grad_set=set("X"), ) + no_grad_set=set("X"), + ) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', no_grad_set=set("Y")) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y")) class TestElementwiseDivFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_div" @@ -106,6 +112,7 @@ class TestElementwiseDivFp16(OpTest): class TestElementwiseDivNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -124,8 +131,9 @@ class TestElementwiseDivNet(unittest.TestCase): b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') c = paddle.static.data(name="c", shape=[32, 32], dtype='float32') d = paddle.static.data(name="d", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') e = paddle.multiply(a, b) f = paddle.multiply(c, d) @@ -175,6 +183,7 @@ class TestElementwiseDivNet(unittest.TestCase): class TestFloatStatus(unittest.TestCase): + def test_overflow(self): paddle.disable_static() paddle.set_device('npu') diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py index 36d282a3d06..3edf270566d 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,6 +26,7 @@ paddle.enable_static() class TestElementwiseFloorDiv(OpTest): + def setUp(self): self.op_type = "elementwise_floordiv" self.set_npu() @@ -55,6 +57,7 @@ class TestElementwiseFloorDiv(OpTest): class TestElementwiseFloorDiv2(TestElementwiseFloorDiv): + def init_dtype(self): self.dtype = "int32" diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py index cbfc07f3544..6d368361597 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -80,6 +81,7 @@ def ComputeGrad(x, y, out, axis): class TestElementwiseMaxOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_max" @@ -105,8 +107,8 @@ class TestElementwiseMaxOp(OpTest): def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) sgn = np.random.choice([-1, 1], [13, 17]).astype(self.dtype) - self.y = self.x + sgn * np.random.uniform(0.1, 1, - [13, 17]).astype(self.dtype) + self.y = self.x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype( + self.dtype) self.out = np.maximum(self.x, self.y) def init_axis(self): @@ -119,15 +121,18 @@ class TestElementwiseMaxOp(OpTest): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], 'Out', no_grad_set=set("X")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', no_grad_set=set("Y")) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y")) class TestElementwiseMaxOp_int32(TestElementwiseMaxOp): + def init_dtype(self): self.dtype = np.int32 @@ -143,6 +148,7 @@ class TestElementwiseMaxOp_int32(TestElementwiseMaxOp): class TestElementwiseMaxOp_scalar(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.random_integers(-5, 5, [2, 3, 20]).astype(self.dtype) self.y = np.array([0.5]).astype(self.dtype) @@ -150,6 +156,7 @@ class TestElementwiseMaxOp_scalar(TestElementwiseMaxOp): class TestElementwiseMaxOp_vector(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -159,6 +166,7 @@ class TestElementwiseMaxOp_vector(TestElementwiseMaxOp): class TestElementwiseMaxOp_broadcast_0(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (100, 5, 2)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -171,6 +179,7 @@ class TestElementwiseMaxOp_broadcast_0(TestElementwiseMaxOp): class TestElementwiseMaxOp_broadcast_1(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -183,22 +192,21 @@ class TestElementwiseMaxOp_broadcast_1(TestElementwiseMaxOp): def test_check_grad_ingore_x(self): _, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[dy]) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[dy]) def test_check_grad_ingore_y(self): dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + user_defined_grads=[dx]) class TestElementwiseMaxOp_broadcast_2(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (2, 3, 100)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -208,27 +216,27 @@ class TestElementwiseMaxOp_broadcast_2(TestElementwiseMaxOp): def test_check_grad_normal(self): dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy]) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + user_defined_grads=[dx, dy]) def test_check_grad_ingore_x(self): _, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[dy]) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[dy]) def test_check_grad_ingore_y(self): dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + user_defined_grads=[dx]) class TestElementwiseMaxOp_broadcast_3(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (2, 50, 2, 1)).astype(self.dtype) sgn = np.random.choice([-1, 1], (50, 2)).astype(self.dtype) @@ -241,6 +249,7 @@ class TestElementwiseMaxOp_broadcast_3(TestElementwiseMaxOp): class TestElementwiseMaxOp_broadcast_4(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(self.dtype) sgn = np.random.choice([-1, 1], (2, 3, 1, 5)).astype(self.dtype) @@ -250,6 +259,7 @@ class TestElementwiseMaxOp_broadcast_4(TestElementwiseMaxOp): class TestElementwiseMaxOp_broadcast_5(TestElementwiseMaxOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(self.dtype) sgn = np.random.choice([-1, 1], (2, 3, 1, 1)).astype(self.dtype) @@ -259,6 +269,7 @@ class TestElementwiseMaxOp_broadcast_5(TestElementwiseMaxOp): class TestElementwiseMaxNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -273,8 +284,9 @@ class TestElementwiseMaxNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.maximum(a, b) @@ -297,12 +309,13 @@ class TestElementwiseMaxNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py index e191224df81..2ddd7b4069d 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -29,6 +30,7 @@ SEED = 2021 class TestElementwiseMinOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_min" @@ -51,8 +53,8 @@ class TestElementwiseMinOp(OpTest): # to avoid them being too close to each other. self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) self.sgn = np.random.choice([-1, 1], [13, 17]).astype(self.dtype) - self.y = self.x + self.sgn * np.random.uniform( - 0.1, 1, [13, 17]).astype(self.dtype) + self.y = self.x + self.sgn * np.random.uniform(0.1, 1, [13, 17]).astype( + self.dtype) self.out = np.minimum(self.x, self.y) self.axis = -1 @@ -64,59 +66,64 @@ class TestElementwiseMinOp(OpTest): def test_check_grad_normal(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', max_relative_error=0.5) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=0.5) else: self.check_grad_with_place( self.place, ['X', 'Y'], - 'Out', ) + 'Out', + ) def test_check_grad_ingore_x(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - max_relative_error=0.9) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + max_relative_error=0.9) else: self.check_grad_with_place( self.place, ['Y'], 'Out', - no_grad_set=set("X"), ) + no_grad_set=set("X"), + ) def test_check_grad_ingore_y(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - max_relative_error=0.1) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + max_relative_error=0.1) else: self.check_grad_with_place( self.place, ['X'], 'Out', - no_grad_set=set("Y"), ) + no_grad_set=set("Y"), + ) class TestElementwiseMinOpFp16(TestElementwiseMinOp): + def init_dtype(self): self.dtype = np.float16 class TestElementwiseMinOp_Vector(TestElementwiseMinOp): + def init_input_output(self): self.x = np.random.uniform(1, 2, (100, )).astype(self.dtype) self.sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) - self.y = self.x + self.sgn * np.random.uniform(0.1, 1, ( - 100, )).astype(self.dtype) + self.y = self.x + self.sgn * np.random.uniform(0.1, 1, (100, )).astype( + self.dtype) self.out = np.minimum(self.x, self.y) self.axis = -1 class TestElementwiseMinOpFp16_Vector(TestElementwiseMinOp_Vector): + def init_dtype(self): self.dtype = np.float16 @@ -124,6 +131,7 @@ class TestElementwiseMinOpFp16_Vector(TestElementwiseMinOp_Vector): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMinOp_scalar(TestElementwiseMinOp): + def init_input_output(self): self.x = np.random.random_integers(-5, 5, [10, 3, 4]).astype(self.dtype) self.y = np.array([0.5]).astype(self.dtype) @@ -134,11 +142,13 @@ class TestElementwiseMinOp_scalar(TestElementwiseMinOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMinOpFp16_scalar(TestElementwiseMinOp_scalar): + def init_dtype(self): self.dtype = np.float16 class TestElementwiseMinOp_broadcast(TestElementwiseMinOp): + def init_input_output(self): self.x = np.random.uniform(0.5, 1, (2, 3, 100)).astype(self.dtype) self.sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -149,11 +159,13 @@ class TestElementwiseMinOp_broadcast(TestElementwiseMinOp): class TestElementwiseMinOpFp16_broadcast(TestElementwiseMinOp_broadcast): + def init_dtype(self): self.dtype = np.float16 class TestElementwiseMinOpNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -168,8 +180,9 @@ class TestElementwiseMinOpNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.minimum(a, b) @@ -192,12 +205,13 @@ class TestElementwiseMinOpNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_mod_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_mod_op_npu.py index d6551e84080..763f5db52b2 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_mod_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_mod_op_npu.py @@ -17,6 +17,7 @@ import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest @@ -29,6 +30,7 @@ paddle.enable_static() class TestElementwiseModOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -68,11 +70,13 @@ class TestElementwiseModOp(OpTest): class TestElementwiseModOpInt64(TestElementwiseModOp): + def init_dtype(self): self.dtype = np.int64 class TestElementwiseModOp_scalar(TestElementwiseModOp): + def init_input_output(self): scale_x = random.randint(0, 100000000) scale_y = random.randint(1, 100000000) @@ -82,6 +86,7 @@ class TestElementwiseModOp_scalar(TestElementwiseModOp): class TestElementwiseModOpFloat(TestElementwiseModOp): + def init_dtype(self): self.dtype = np.float32 @@ -95,6 +100,7 @@ class TestElementwiseModOpFloat(TestElementwiseModOp): class TestElementwiseModOpDouble(TestElementwiseModOpFloat): + def init_dtype(self): self.dtype = np.float64 @@ -103,6 +109,7 @@ class TestElementwiseModOpDouble(TestElementwiseModOpFloat): class TestElementwiseModOpFP16(TestElementwiseModOpFloat): + def init_dtype(self): self.dtype = np.float16 @@ -111,6 +118,7 @@ class TestElementwiseModOpFP16(TestElementwiseModOpFloat): class TestElementwiseModOp_broadcast_0(TestElementwiseModOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -121,6 +129,7 @@ class TestElementwiseModOp_broadcast_0(TestElementwiseModOp): class TestElementwiseModOp_broadcast_1(TestElementwiseModOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -131,6 +140,7 @@ class TestElementwiseModOp_broadcast_1(TestElementwiseModOp): class TestElementwiseModOp_broadcast_2(TestElementwiseModOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -141,6 +151,7 @@ class TestElementwiseModOp_broadcast_2(TestElementwiseModOp): class TestRemainderOp(unittest.TestCase): + def test_name(self): paddle.set_device('npu:0') with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_mul_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_mul_op_npu.py index 92bbc9f536d..abdf43e98db 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_mul_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_mul_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class ElementwiseMulOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -53,12 +55,14 @@ class ElementwiseMulOp(OpTest): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], 'Out', no_grad_set=set("X")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', no_grad_set=set('Y')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set('Y')) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) @@ -75,6 +79,7 @@ class ElementwiseMulOp(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMulOp_scalar(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -86,6 +91,7 @@ class TestElementwiseMulOp_scalar(ElementwiseMulOp): class TestElementwiseMulOp_Vector(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -97,6 +103,7 @@ class TestElementwiseMulOp_Vector(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -107,6 +114,7 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -122,6 +130,7 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -136,6 +145,7 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -151,6 +161,7 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -162,6 +173,7 @@ class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -175,11 +187,13 @@ class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): @unittest.skipIf(not paddle.is_compiled_with_npu(), "paddle is not compiled with NPU") class TestElementwiseMulOpFp16(ElementwiseMulOp): + def init_dtype(self): self.dtype = np.float16 class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -191,6 +205,7 @@ class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" @@ -202,6 +217,7 @@ class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): + def setUp(self): self.set_npu() self.op_type = "elementwise_mul" diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py index 907e149c8b2..f197f9bd381 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py @@ -20,6 +20,7 @@ from op_test import OpTest import numpy as np import unittest import sys + sys.path.append("..") paddle.enable_static() @@ -79,6 +80,7 @@ def ComputeGrad(x, y, out, axis): class TestElementwisePow(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_pow" @@ -115,27 +117,27 @@ class TestElementwisePow(OpTest): def test_check_grad_normal(self): dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy]) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + user_defined_grads=[dx, dy]) def test_check_grad_ingore_x(self): _, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[dy]) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[dy]) def test_check_grad_ingore_y(self): dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + user_defined_grads=[dx]) class TestElementwisePowFp16(TestElementwisePow): + def init_input_output(self): np.random.seed(SEED) self.x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype) @@ -154,6 +156,7 @@ class TestElementwisePowFp16(TestElementwisePow): class TestElementwisePowDouble(TestElementwisePow): + def init_input_output(self): np.random.seed(SEED) self.x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype) @@ -172,6 +175,7 @@ class TestElementwisePowDouble(TestElementwisePow): class TestElementwisePowOp_broadcast_0(TestElementwisePow): + def init_axis(self): self.axis = 1 @@ -183,27 +187,27 @@ class TestElementwisePowOp_broadcast_0(TestElementwisePow): def test_check_grad_normal(self): dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy]) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + user_defined_grads=[dx, dy]) def test_check_grad_ingore_x(self): _, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[dy]) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[dy]) def test_check_grad_ingore_y(self): dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + user_defined_grads=[dx]) class TestElementwisePowOp_broadcast_1(TestElementwisePow): + def init_axis(self): self.axis = 1 @@ -215,27 +219,27 @@ class TestElementwisePowOp_broadcast_1(TestElementwisePow): def test_check_grad_normal(self): dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy]) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + user_defined_grads=[dx, dy]) def test_check_grad_ingore_x(self): _, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[dy]) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[dy]) def test_check_grad_ingore_y(self): dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + user_defined_grads=[dx]) class TestElementwisePowOp_broadcast_2(TestElementwisePow): + def init_axis(self): self.axis = 0 @@ -247,27 +251,27 @@ class TestElementwisePowOp_broadcast_2(TestElementwisePow): def test_check_grad_normal(self): dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy]) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + user_defined_grads=[dx, dy]) def test_check_grad_ingore_x(self): _, dy = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[dy]) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[dy]) def test_check_grad_ingore_y(self): dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis) - self.check_grad_with_place( - self.place, ['X'], - 'Out', - no_grad_set=set("Y"), - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set("Y"), + user_defined_grads=[dx]) class TestElementwisePowNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -282,8 +286,9 @@ class TestElementwisePowNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.pow(a, b) @@ -306,12 +311,13 @@ class TestElementwisePowNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py index fac2bc66ff4..58ccc04a0f4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestElementwiseSubOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "elementwise_sub" @@ -91,16 +93,19 @@ class TestElementwiseSubOp(OpTest): class TestElementwiseSubOpInt32(TestElementwiseSubOp): + def init_dtype(self): self.dtype = np.int32 class TestElementwiseSubOpInt64(TestElementwiseSubOp): + def init_dtype(self): self.dtype = np.int64 class TestSubtractAPI(unittest.TestCase): + def test_name(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.static.data(name="x", shape=[2, 3], dtype="float32") @@ -125,8 +130,10 @@ class TestSubtractAPI(unittest.TestCase): place = paddle.NPUPlace(0) exe = paddle.static.Executor(place) - x_value, y_value, z_value = exe.run(feed={"x": x_np, - "y": y_np}, + x_value, y_value, z_value = exe.run(feed={ + "x": x_np, + "y": y_np + }, fetch_list=[x, y, z]) z_expected = np.array([1., -2., 2.]) @@ -145,24 +152,28 @@ class TestSubtractAPI(unittest.TestCase): class TestSubtractError(unittest.TestCase): + def test_errors(self): with paddle.static.program_guard(paddle.static.Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) self.assertRaises(TypeError, paddle.subtract, x1, y1) # the input dtype must be float16 or float32 or float64 or int32 or int64 - x2 = paddle.static.data( - name='x2', shape=[3, 4, 5, 6], dtype="uint8") - y2 = paddle.static.data( - name='y2', shape=[3, 4, 5, 6], dtype="uint8") + x2 = paddle.static.data(name='x2', + shape=[3, 4, 5, 6], + dtype="uint8") + y2 = paddle.static.data(name='y2', + shape=[3, 4, 5, 6], + dtype="uint8") self.assertRaises(TypeError, paddle.subtract, x2, y2) class TestSubtractNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -177,8 +188,9 @@ class TestSubtractNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) c = paddle.assign(b) @@ -202,12 +214,13 @@ class TestSubtractNet(unittest.TestCase): for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py index 6be2fe0086b..288239801a1 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,6 +31,7 @@ SEED = 2049 class TestExpNPUOP(OpTest): + def setUp(self): self.set_npu() @@ -63,6 +64,7 @@ class TestExpNPUOP(OpTest): class TestExpNPUOPFloat64(TestExpNPUOP): + def init_dtype(self): self.dtype = np.float64 diff --git a/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py index 99edc25f769..6a1a67645f7 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ np.random.seed(10) class TestExpandAsOpRank1(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -50,6 +52,7 @@ class TestExpandAsOpRank1(OpTest): class TestExpandAsOpRank2(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -73,6 +76,7 @@ class TestExpandAsOpRank2(OpTest): class TestExpandAsOpRank3(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -96,6 +100,7 @@ class TestExpandAsOpRank3(OpTest): class TestExpandAsOpRank4(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -120,24 +125,28 @@ class TestExpandAsOpRank4(OpTest): # Test python API class TestExpandAsV2API(unittest.TestCase): + def test_api(self): input1 = np.random.random([12, 14]).astype("float32") input2 = np.random.random([2, 12, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") - y = fluid.layers.data( - name='target_tensor', - shape=[2, 12, 14], - append_batch_size=False, - dtype="float32") + y = fluid.layers.data(name='target_tensor', + shape=[2, 12, 14], + append_batch_size=False, + dtype="float32") out_1 = paddle.expand_as(x, y=y) exe = fluid.Executor(place=fluid.NPUPlace(0)) res_1 = exe.run(fluid.default_main_program(), - feed={"x": input1, - "target_tensor": input2}, + feed={ + "x": input1, + "target_tensor": input2 + }, fetch_list=[out_1]) assert np.array_equal(res_1[0], np.tile(input1, (2, 1, 1))) diff --git a/python/paddle/fluid/tests/unittests/npu/test_expand_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_expand_op_npu.py index 83b65630d80..5613afe1827 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_expand_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_expand_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestExpand(OpTest): + def setUp(self): self.set_npu() self.op_type = "expand" @@ -55,6 +57,7 @@ class TestExpand(OpTest): class TestExpandV2(TestExpand): + def setUp(self): self.set_npu() self.op_type = "expand" @@ -82,6 +85,7 @@ class TestExpandFp16(TestExpand): class TestExpandNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -94,8 +98,9 @@ class TestExpandNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 1], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') res = paddle.fluid.layers.expand(a, [1, 32]) loss = res.sum() @@ -113,8 +118,10 @@ class TestExpandNet(unittest.TestCase): for epoch in range(100): loss_res = exe.run(main_prog, - feed={"a": a_np, - "label": label_np}, + feed={ + "a": a_np, + "label": label_np + }, fetch_list=[loss]) if epoch % 10 == 0: print("Epoch {} | Loss: {}".format(epoch, loss)) @@ -134,6 +141,7 @@ class TestExpandNet(unittest.TestCase): class TestExpand_expand_times_all_one(TestExpand): + def setUp(self): self.set_npu() self.op_type = "expand" diff --git a/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py index fd0b9850308..058f146de12 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import sys import numpy as np + sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid @@ -29,6 +30,7 @@ np.random.seed(10) # CANN Op Support X: float16, float32, int32, int8 ,uint8 # Situation 1: shape is a list(without tensor) class TestExpandV2NPUOpRank1(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -57,6 +59,7 @@ class TestExpandV2NPUOpRank1(OpTest): class TestExpandV2OpRank2_DimExpanding(TestExpandV2NPUOpRank1): + def init_data(self): self.ori_shape = [120] self.shape = [2, 120] @@ -64,6 +67,7 @@ class TestExpandV2OpRank2_DimExpanding(TestExpandV2NPUOpRank1): class TestExpandV2OpRank2(TestExpandV2NPUOpRank1): + def init_data(self): self.ori_shape = [1, 140] self.shape = [12, 140] @@ -71,6 +75,7 @@ class TestExpandV2OpRank2(TestExpandV2NPUOpRank1): class TestExpandV2OpRank3_Corner(TestExpandV2NPUOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.shape = (2, 10, 5) @@ -78,6 +83,7 @@ class TestExpandV2OpRank3_Corner(TestExpandV2NPUOpRank1): class TestExpandV2OpRank4(TestExpandV2NPUOpRank1): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.shape = (-1, -1, -1, -1) @@ -85,6 +91,7 @@ class TestExpandV2OpRank4(TestExpandV2NPUOpRank1): class TestExpandV2OpRank5(TestExpandV2NPUOpRank1): + def init_data(self): self.ori_shape = (2, 4, 1, 15) self.shape = (2, -1, 4, -1) @@ -92,6 +99,7 @@ class TestExpandV2OpRank5(TestExpandV2NPUOpRank1): class TestExpandV2OpRank6(TestExpandV2NPUOpRank1): + def init_data(self): self.ori_shape = (4, 1, 30) self.shape = (2, -1, 4, 30) @@ -100,6 +108,7 @@ class TestExpandV2OpRank6(TestExpandV2NPUOpRank1): # Situation 2: shape is a list(with tensor) class TestExpandV2OpNPURank1_tensor_attr(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -135,8 +144,9 @@ class TestExpandV2OpNPURank1_tensor_attr(OpTest): self.check_grad_with_place(self.place, ['X'], 'Out') -class TestExpandV2OpRank2_Corner_tensor_attr( - TestExpandV2OpNPURank1_tensor_attr): +class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpNPURank1_tensor_attr + ): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [1, 1] @@ -146,6 +156,7 @@ class TestExpandV2OpRank2_Corner_tensor_attr( # Situation 3: shape is a tensor class TestExpandV2NPUOpRank1_tensor(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -179,6 +190,7 @@ class TestExpandV2NPUOpRank1_tensor(OpTest): # Situation 4: input x is float16 # skip grad check for float16 class TestExpandV2OpFloat(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -201,6 +213,7 @@ class TestExpandV2OpFloat(OpTest): # Situation 5: input x is int32 # skip grad check for int32 class TestExpandV2OpInteger(OpTest): + def init_dtype(self): self.dtype = 'int32' @@ -209,8 +222,7 @@ class TestExpandV2OpInteger(OpTest): self.place = paddle.NPUPlace(0) self.op_type = "expand_v2" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 20)).astype(self.dtype) + 'X': np.random.randint(10, size=(2, 4, 20)).astype(self.dtype) } self.attrs = {'shape': [2, 4, 20]} output = np.tile(self.inputs['X'], (1, 1, 1)) @@ -225,11 +237,13 @@ class TestExpandV2OpInteger(OpTest): class TesstExpandV2OpInt64(TestExpandV2OpInteger): + def init_dtype(self): self.dtype = 'int64' class TesstExpandV2OpBool(TestExpandV2OpInteger): + def init_dtype(self): self.dtype = 'bool' @@ -244,10 +258,11 @@ class TesstExpandV2OpBool(TestExpandV2OpInteger): class TestExpandV2Error(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], paddle.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + paddle.NPUPlace(0)) shape = [2, 2] self.assertRaises(TypeError, paddle.tensor.expand, x1, shape) x2 = fluid.layers.data(name='x2', shape=[2], dtype="uint8") @@ -259,21 +274,20 @@ class TestExpandV2Error(unittest.TestCase): # Test python API class TestExpandV2API(unittest.TestCase): + def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input = np.random.random([12, 14]).astype("float32") - x = fluid.layers.data( - name='x', - shape=[12, 14], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") positive_2 = fluid.layers.fill_constant([1], "int32", 12) - expand_shape = fluid.layers.data( - name="expand_shape", - shape=[2], - append_batch_size=False, - dtype="int32") + expand_shape = fluid.layers.data(name="expand_shape", + shape=[2], + append_batch_size=False, + dtype="int32") out_1 = paddle.expand(x, shape=[12, 14]) out_2 = paddle.expand(x, shape=[positive_2, 14]) @@ -284,7 +298,8 @@ class TestExpandV2API(unittest.TestCase): exe = fluid.Executor(place=paddle.NPUPlace(0)) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ - "x": input, + "x": + input, "expand_shape": np.array([12, 14]).astype("int32") }, diff --git a/python/paddle/fluid/tests/unittests/npu/test_eye_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_eye_op_npu.py index abe981399a9..210d27c3743 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_eye_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_eye_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ np.random.seed(10) class TestEyeOp(OpTest): + def setUp(self): ''' Test eye op with specified shape @@ -73,35 +75,41 @@ class TestEyeOp(OpTest): class TestEyeOp1(TestEyeOp): + def initTestCase(self): self.num_rows = 50 class TestEyeOp2(TestEyeOp): + def initTestCase(self): self.num_rows = 50 self.dtype = np.int32 class TestEyeOp3(TestEyeOp): + def initTestCase(self): self.num_rows = 50 self.dtype = np.float16 class TestEyeOp4(TestEyeOp): + def initTestCase(self): self.num_rows = 1 self.num_columns = 99 class TestEyeOp5(TestEyeOp): + def initTestCase(self): self.num_rows = 100 self.num_columns = 100 class TestEyeOp6(TestEyeOp): + def initTestCase(self): self.num_rows = 100 self.num_columns = 100 @@ -109,6 +117,7 @@ class TestEyeOp6(TestEyeOp): class API_TestTensorEye(unittest.TestCase): + def test_out(self): with paddle.static.program_guard(paddle.static.Program()): data = paddle.eye(10) diff --git a/python/paddle/fluid/tests/unittests/npu/test_fill_any_like_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_fill_any_like_op_npu.py index c3074db1aaf..5b602ca7c0f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_fill_any_like_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_fill_any_like_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ paddle.enable_static() class TestFillAnyLikeNPUOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -52,40 +54,47 @@ class TestFillAnyLikeNPUOp(OpTest): class TestFillAnyLikeNPUOpInt32(TestFillAnyLikeNPUOp): + def init(self): self.dtype = np.int32 self.value = -1 class TestFillAnyLikeNPUOpInt64(TestFillAnyLikeNPUOp): + def init(self): self.dtype = np.int64 self.value = -1 class TestFillAnyLikeNPUOpFloat32(TestFillAnyLikeNPUOp): + def init(self): self.dtype = np.float32 self.value = 0.09 class TestFillAnyLikeNPUOpFloat16(TestFillAnyLikeNPUOp): + def init(self): self.dtype = np.float16 self.value = 0.05 class TestFillAnyLikeNPUOpValue1(TestFillAnyLikeNPUOp): + def init(self): self.value = 1.0 class TestFillAnyLikeNPUOpValue2(TestFillAnyLikeNPUOp): + def init(self): self.value = 1e-9 class TestFillAnyLikeNPUOpShape(TestFillAnyLikeNPUOp): + def init(self): self.shape = [12, 10] diff --git a/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py index 615fe6f7645..01130b68246 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_batch_size_like_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestFillConstantBatchSizeLike(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -84,6 +86,7 @@ class TestFillConstantBatchSizeLike(OpTest): class TestFillConstantBatchSizeLike2(TestFillConstantBatchSizeLike): + def init_shape(self): # test shape self.input_shape = [4, 5, 6, 7] @@ -92,6 +95,7 @@ class TestFillConstantBatchSizeLike2(TestFillConstantBatchSizeLike): class TestFillConstantBatchSizeLike3(TestFillConstantBatchSizeLike): + def init_value(self): # use 'str_value' rather than 'value' self.value = 3.8 @@ -100,6 +104,7 @@ class TestFillConstantBatchSizeLike3(TestFillConstantBatchSizeLike): class TestFillConstantBatchSizeLike4(TestFillConstantBatchSizeLike): + def init_value(self): # str_value = 'inf' self.value = 3.8 @@ -108,6 +113,7 @@ class TestFillConstantBatchSizeLike4(TestFillConstantBatchSizeLike): class TestFillConstantBatchSizeLike5(TestFillConstantBatchSizeLike): + def init_value(self): # str_value = '-inf' self.value = 3.8 @@ -116,6 +122,7 @@ class TestFillConstantBatchSizeLike5(TestFillConstantBatchSizeLike): class TestFillConstantBatchSizeLike6(TestFillConstantBatchSizeLike): + def init_dtype(self): self.dtype = core.VarDesc.VarType.FP16 self.output_dtype = np.float16 @@ -125,17 +132,20 @@ class TestFillConstantBatchSizeLike6(TestFillConstantBatchSizeLike): class TestFillConstantBatchSizeLike7(TestFillConstantBatchSizeLike): + def init_dtype(self): self.dtype = core.VarDesc.VarType.INT32 self.output_dtype = np.int32 class TestFillConstantBatchSizeLike8(TestFillConstantBatchSizeLike): + def init_force_cpu(self): self.force_cpu = True class TestFillConstantBatchSizeLike9(TestFillConstantBatchSizeLike): + def init_shape(self): self.input_shape = [4, 5] self.shape = [123, 92] diff --git a/python/paddle/fluid/tests/unittests/npu/test_fill_constant_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_op_npu.py index 152a4548055..d661f953cf9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_fill_constant_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_fill_constant_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestFillConstant(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -49,6 +51,7 @@ class TestFillConstant(OpTest): class TestFillConstantInt(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -73,6 +76,7 @@ class TestFillConstantInt(OpTest): class TestFillConstantInt64(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -97,6 +101,7 @@ class TestFillConstantInt64(OpTest): class TestFillConstantFP16(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -121,6 +126,7 @@ class TestFillConstantFP16(OpTest): class TestFillConstantBool(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -145,6 +151,7 @@ class TestFillConstantBool(OpTest): class TestFillConstantWithPlaceType(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_fill_zeros_like_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_fill_zeros_like_op_npu.py index e00aa6971eb..f9f338a7310 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_fill_zeros_like_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_fill_zeros_like_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,6 +26,7 @@ paddle.enable_static() class TestFillZerosLikeOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -44,26 +46,31 @@ class TestFillZerosLikeOp(OpTest): class TestFillZerosLikeOpBool(TestFillZerosLikeOp): + def init_dtype(self): self.dtype = np.bool class TestFillZerosLikeOpFp16(TestFillZerosLikeOp): + def init_dtype(self): self.dtype = np.float16 class TestFillZerosLikeOpFp64(TestFillZerosLikeOp): + def init_dtype(self): self.dtype = np.float64 class TestFillZerosLikeOpInt32(TestFillZerosLikeOp): + def init_dtype(self): self.dtype = np.int32 class TestFillZerosLikeOpInt64(TestFillZerosLikeOp): + def init_dtype(self): self.dtype = np.int64 diff --git a/python/paddle/fluid/tests/unittests/npu/test_flags_check_nan_inf_npu.py b/python/paddle/fluid/tests/unittests/npu/test_flags_check_nan_inf_npu.py index 66c39062dc7..69c586fb2d8 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_flags_check_nan_inf_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_flags_check_nan_inf_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestCheckFiniteAndUnscale(unittest.TestCase): + def setUp(self): fluid.set_flags({'FLAGS_check_nan_inf': True}) diff --git a/python/paddle/fluid/tests/unittests/npu/test_flatten2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_flatten2_op_npu.py index acd7ca77016..a415c8be71c 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_flatten2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_flatten2_op_npu.py @@ -16,15 +16,18 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np import paddle import paddle.fluid as fluid from op_test import OpTest + paddle.enable_static() class TestFlatten2Op(OpTest): + def setUp(self): self.set_npu() self.op_type = "flatten2" @@ -56,6 +59,7 @@ class TestFlatten2Op(OpTest): class TestFlatten2OpWithCornerAxis(TestFlatten2Op): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.axis = 0 @@ -63,6 +67,7 @@ class TestFlatten2OpWithCornerAxis(TestFlatten2Op): class TestFlatten2OpWithDefaultAxis(TestFlatten2Op): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -72,6 +77,7 @@ class TestFlatten2OpWithDefaultAxis(TestFlatten2Op): class TestFlatten2OpSixDims(TestFlatten2Op): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 diff --git a/python/paddle/fluid/tests/unittests/npu/test_flatten_contiguous_range_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_flatten_contiguous_range_op_npu.py index 742d156c7f5..3f90c8b19b4 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_flatten_contiguous_range_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_flatten_contiguous_range_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestFlattenOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "flatten_contiguous_range" @@ -65,6 +67,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp_1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 1 @@ -79,6 +82,7 @@ class TestFlattenOp_1(TestFlattenOp): class TestFlattenOp_2(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -93,6 +97,7 @@ class TestFlattenOp_2(TestFlattenOp): class TestFlattenOp_3(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -107,6 +112,7 @@ class TestFlattenOp_3(TestFlattenOp): class TestFlattenOp_4(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = -2 @@ -121,6 +127,7 @@ class TestFlattenOp_4(TestFlattenOp): class TestFlattenOp_5(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 2 @@ -135,6 +142,7 @@ class TestFlattenOp_5(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.start_axis = 3 @@ -149,6 +157,7 @@ class TestFlattenOpSixDims(TestFlattenOp): class TestFlattenOp_Float32(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -164,6 +173,7 @@ class TestFlattenOp_Float32(TestFlattenOp): class TestFlattenOp_int32(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -182,6 +192,7 @@ class TestFlattenOp_int32(TestFlattenOp): class TestFlattenOp_uint8(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -200,6 +211,7 @@ class TestFlattenOp_uint8(TestFlattenOp): class TestFlattenOp_int8(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -218,6 +230,7 @@ class TestFlattenOp_int8(TestFlattenOp): class TestFlattenOp_int64(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -236,6 +249,7 @@ class TestFlattenOp_int64(TestFlattenOp): class TestFlatten2OpError(unittest.TestCase): + def test_errors(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * @@ -243,22 +257,25 @@ class TestFlatten2OpError(unittest.TestCase): x = x.astype('float32') def test_ValueError1(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') out = paddle.flatten(x_var, start_axis=2, stop_axis=1) self.assertRaises(ValueError, test_ValueError1) def test_ValueError2(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=10, stop_axis=1) self.assertRaises(ValueError, test_ValueError2) def test_ValueError3(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=2, stop_axis=10) self.assertRaises(ValueError, test_ValueError3) @@ -268,8 +285,9 @@ class TestFlatten2OpError(unittest.TestCase): x2 = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3]).reshape(image_shape) / 100. x2 = x2.astype('float16') - x2_var = paddle.fluid.data( - name='x2', shape=[3, 2, 4, 5], dtype='float16') + x2_var = paddle.fluid.data(name='x2', + shape=[3, 2, 4, 5], + dtype='float16') paddle.flatten(x2_var) self.assertRaises(TypeError, test_type) @@ -281,6 +299,7 @@ class TestFlatten2OpError(unittest.TestCase): class TestStaticFlattenPythonAPI(unittest.TestCase): + def execute_api(self, x, start_axis=0, stop_axis=-1): return paddle.flatten(x, start_axis, stop_axis) @@ -290,8 +309,9 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[2, 3, 4, 4], dtype='float32') + x = paddle.static.data(name="x", + shape=[2, 3, 4, 4], + dtype='float32') out = self.execute_api(x, start_axis=-2, stop_axis=-1) exe = paddle.static.Executor(place=paddle.NPUPlace(0)) @@ -300,11 +320,13 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): class TestStaticInplaceFlattenPythonAPI(TestStaticFlattenPythonAPI): + def execute_api(self, x, start_axis=0, stop_axis=-1): return x.flatten_(start_axis, stop_axis) class TestFlattenPython(unittest.TestCase): + def test_python_api(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * diff --git a/python/paddle/fluid/tests/unittests/npu/test_float_status_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_float_status_op_npu.py index 206641dab5c..71764aad47c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_float_status_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_float_status_op_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -22,6 +23,7 @@ import paddle._C_ops as ops class TestGetFloatStatusOp(unittest.TestCase): + def setUp(self): device = paddle.set_device('npu') @@ -62,6 +64,7 @@ class TestGetFloatStatusOp(unittest.TestCase): class TestClearFloatStatusOp(unittest.TestCase): + def setUp(self): device = paddle.set_device('npu') diff --git a/python/paddle/fluid/tests/unittests/npu/test_gather_nd_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gather_nd_op_npu.py index acb4ffd686f..5f33d735816 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_gather_nd_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_gather_nd_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid @@ -38,6 +39,7 @@ def gather_nd_grad(x, index): def test_class1(op_type, typename): + class TestGatherNdOpWithEmptyIndex(OpTest): #Index has empty element, which means copy entire tensor @@ -72,7 +74,9 @@ def test_class1(op_type, typename): def test_class2(op_type, typename): + class TestGatherNdOpWithIndex1(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -99,6 +103,7 @@ def test_class2(op_type, typename): def test_class3(op_type, typename): + class TestGatherNdOpWithLowIndex(OpTest): #Index has low rank, X has high rank @@ -123,8 +128,9 @@ def test_class3(op_type, typename): if typename == "float16" or typename == "int64": self.__class__.no_need_check_grad = True else: - self.check_grad_with_place( - self.place, ['X'], 'Out', user_defined_grads=[self.x_grad]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + user_defined_grads=[self.x_grad]) cls_name = "{0}_{1}_3".format(op_type, typename) TestGatherNdOpWithLowIndex.__name__ = cls_name @@ -132,6 +138,7 @@ def test_class3(op_type, typename): def test_class4(op_type, typename): + class TestGatherNdOpIndex1(OpTest): #Index has low rank, X has high rank @@ -164,6 +171,7 @@ def test_class4(op_type, typename): def test_class5(op_type, typename): + class TestGatherNdOpWithSameIndexAsX(OpTest): #Index has same rank as X's rank @@ -195,6 +203,7 @@ def test_class5(op_type, typename): def test_class6(op_type, typename): + class TestGatherNdOpWithHighRankSame(OpTest): #Both Index and X have high rank, and Rank(Index) = Rank(X) @@ -204,8 +213,8 @@ def test_class6(op_type, typename): self.op_type = "gather_nd" shape = (5, 2, 3, 1, 10) xnp = np.random.rand(*shape).astype(typename) - index = np.vstack([np.random.randint( - 0, s, size=2) for s in shape]).T + index = np.vstack([np.random.randint(0, s, size=2) + for s in shape]).T self.inputs = {'X': xnp, 'Index': index.astype("int32")} self.outputs = {'Out': xnp[tuple(index.T)]} @@ -228,6 +237,7 @@ def test_class6(op_type, typename): def test_class7(op_type, typename): + class TestGatherNdOpWithHighRankDiff(OpTest): #Both Index and X have high rank, Rank(Index) < Rank(X) @@ -238,8 +248,7 @@ def test_class7(op_type, typename): shape = (2, 3, 4, 1, 10) xnp = np.random.rand(*shape).astype(typename) index = np.vstack( - [np.random.randint( - 0, s, size=200) for s in shape]).T + [np.random.randint(0, s, size=200) for s in shape]).T index_re = index.reshape([20, 5, 2, 5]) self.inputs = {'X': xnp, 'Index': index_re.astype("int32")} @@ -263,6 +272,7 @@ def test_class7(op_type, typename): class TestGatherNdAPI(unittest.TestCase): + def test_imperative(self): paddle.disable_static() input_1 = np.array([[1, 2], [3, 4], [5, 6]]) diff --git a/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py index daca3d88460..28b8ab9b25f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -35,6 +36,7 @@ def gather_numpy(x, index, axis): class TestGatherOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -58,7 +60,8 @@ class TestGatherOp(OpTest): self.place, ['X'], 'Out', - max_relative_error=0.006, ) + max_relative_error=0.006, + ) def config(self): """ @@ -71,6 +74,7 @@ class TestGatherOp(OpTest): class TestCase1(TestGatherOp): + def config(self): """ For one dimension input @@ -82,6 +86,7 @@ class TestCase1(TestGatherOp): class API_TestGather(unittest.TestCase): + def test_out1(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float32') @@ -91,8 +96,10 @@ class API_TestGather(unittest.TestCase): exe = fluid.Executor(place) input = np.array([[1, 2], [3, 4], [5, 6]]) index_1 = np.array([1, 2]) - result, = exe.run(feed={"data1": input, - "index": index_1}, + result, = exe.run(feed={ + "data1": input, + "index": index_1 + }, fetch_list=[out]) expected_output = np.array([[3, 4], [5, 6]]) self.assertTrue(np.allclose(result, expected_output)) @@ -107,14 +114,17 @@ class API_TestGather(unittest.TestCase): exe = paddle.static.Executor(place) x_np = np.array([[1, 2], [3, 4], [5, 6]]).astype('float32') index_np = np.array([1, 1]).astype('int32') - result, = exe.run(feed={"x": x_np, - "index": index_np}, + result, = exe.run(feed={ + "x": x_np, + "index": index_np + }, fetch_list=[out]) expected_output = gather_numpy(x_np, index_np, axis=0) self.assertTrue(np.allclose(result, expected_output)) class TestGatherGrad(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -127,8 +137,9 @@ class TestGatherGrad(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[8192, 768], dtype='float32') - index = paddle.static.data( - name="index", shape=[1232, 1], dtype='int32') + index = paddle.static.data(name="index", + shape=[1232, 1], + dtype='int32') a.stop_gradient = False b = paddle.gather(a, index) @@ -148,8 +159,10 @@ class TestGatherGrad(unittest.TestCase): for epoch in range(100): pred_res, loss_res = exe.run(main_prog, - feed={"a": a_np, - "index": index_np}, + feed={ + "a": a_np, + "index": index_np + }, fetch_list=[b, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( diff --git a/python/paddle/fluid/tests/unittests/npu/test_gaussian_random_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gaussian_random_op_npu.py index 11f64b8fc7d..470982b9e70 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_gaussian_random_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_gaussian_random_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys import unittest import numpy as np + sys.path.append("..") import paddle import paddle.fluid as fluid @@ -27,6 +28,7 @@ paddle.enable_static() class TestNPUGaussianRandomOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "gaussian_random" @@ -68,10 +70,8 @@ class TestNPUGaussianRandomOp(OpTest): hist2, _ = np.histogram(data, range=(-3, 5)) hist2 = hist2.astype("float32") hist2 /= float(outs[0].size) - self.assertTrue( - np.allclose( - hist, hist2, rtol=0, atol=0.01), - "hist: " + str(hist) + " hist2: " + str(hist2)) + self.assertTrue(np.allclose(hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py index 760ce59812e..a779e797808 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py @@ -18,6 +18,7 @@ import numpy as np from scipy import special import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -33,6 +34,7 @@ def np_gelu(x): class TestGelu(OpTest): + def setUp(self): self.set_npu() self.op_type = "gelu" @@ -57,11 +59,13 @@ class TestGelu(OpTest): self.check_output_with_place(self.place, atol=1e-3) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.007) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.007) class TestGeluFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "gelu" @@ -88,6 +92,7 @@ class TestGeluFp16(OpTest): class TestGeluNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -102,8 +107,9 @@ class TestGeluNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) @@ -127,12 +133,13 @@ class TestGeluNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py index 9ab1161be36..a5830325c83 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py @@ -17,6 +17,7 @@ import unittest import numpy as np import sys + sys.path.append("..") from operator import mul @@ -46,6 +47,7 @@ def group_norm_naive(x, scale, bias, epsilon, groups, data_layout): class TestGroupNormOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): @@ -57,8 +59,9 @@ class TestGroupNormOpError(unittest.TestCase): self.assertRaises(TypeError, test_x_type) def test_x_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[2, 100, 3, 5], dtype='int32') + x2 = fluid.layers.data(name='x2', + shape=[2, 100, 3, 5], + dtype='int32') groups = 2 fluid.layers.group_norm(x2, groups) @@ -66,6 +69,7 @@ class TestGroupNormOpError(unittest.TestCase): class TestGroupNormOp(OpTest): + def setUp(self): self.set_npu() self.op_type = 'group_norm' @@ -86,9 +90,10 @@ class TestGroupNormOp(OpTest): input = np.transpose(input, (0, 2, 3, 1)) scale = np.random.random([self.shape[1]]).astype(self.dtype) bias = np.random.random([self.shape[1]]).astype(self.dtype) - output, mean, var = group_norm_naive( - input, scale, bias, self.attrs['epsilon'], self.attrs['groups'], - self.data_format) + output, mean, var = group_norm_naive(input, scale, bias, + self.attrs['epsilon'], + self.attrs['groups'], + self.data_format) self.inputs = { 'X': OpTest.np_dtype_to_fluid_dtype(input), @@ -130,45 +135,53 @@ class TestGroupNormOp(OpTest): class TestGroupNormOp1(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 class TestGroupNormOp2(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 class TestGroupNormOpBigEps1(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 self.attrs['epsilon'] = 0.5 class TestGroupNormOpBigEps2(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 self.attrs['epsilon'] = 0.5 class TestGroupNormOpBigEps3(TestGroupNormOp): + def init_test_case(self): self.attrs['epsilon'] = 0.5 class TestGroupNormOp1_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 self.data_format = "NHWC" class TestGroupNormOp2_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 self.data_format = "NHWC" class TestGroupNormOpBigEps1_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 self.attrs['epsilon'] = 0.5 @@ -176,6 +189,7 @@ class TestGroupNormOpBigEps1_With_NHWC(TestGroupNormOp): class TestGroupNormOpBigEps2_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 self.attrs['epsilon'] = 0.5 @@ -183,17 +197,20 @@ class TestGroupNormOpBigEps2_With_NHWC(TestGroupNormOp): class TestGroupNormOpBigEps3_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['epsilon'] = 0.5 self.data_format = "NHWC" class TestGroupNormOpFP16(TestGroupNormOp): + def init_dtype(self): self.dtype = np.float16 class TestGroupNormOpFP16_With_NHWC(TestGroupNormOp): + def init_dtype(self): self.dtype = np.float16 @@ -207,8 +224,9 @@ class TestGroupNormException(unittest.TestCase): data = fluid.data(name='data', shape=[None, 3, 3, 4], dtype="float64") def attr_data_format(): - out = fluid.layers.group_norm( - input=data, groups=2, data_layout="NDHW") + out = fluid.layers.group_norm(input=data, + groups=2, + data_layout="NDHW") self.assertRaises(ValueError, attr_data_format) diff --git a/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py index f1d89cb8d56..a83618392a1 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py @@ -32,6 +32,7 @@ def ref_hardsigmoid(x, slope=0.166666666666667, offset=0.5): class TestNPUHardSigmoid(OpTest): + def setUp(self): paddle.enable_static() @@ -74,18 +75,21 @@ class TestNPUHardSigmoid(OpTest): class TestNPUHardSigmoid2(TestNPUHardSigmoid): + def set_attrs(self): self.slope = 0.2 self.offset = 0.5 class TestNPUHardSigmoid3(TestNPUHardSigmoid): + def set_attrs(self): self.slope = 0.2 self.offset = 0.4 class TestNPUHardSigmoidFp16(TestNPUHardSigmoid): + def test_check_output(self): self.check_output_with_place(self.place, atol=1e-3) @@ -142,12 +146,14 @@ class TestHardsigmoidAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.hardsigmoid, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.hardsigmoid, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.hardsigmoid(x_fp16) diff --git a/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py index 9495cdb8a55..4e83700da78 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -33,6 +34,7 @@ def ref_hard_swish_grad(x, threshold=6.0, scale=6.0, offset=3.0): class TestHardSwishNPU(OpTest): + def setUp(self): paddle.enable_static() @@ -48,8 +50,10 @@ class TestHardSwishNPU(OpTest): #the same with TestAbs x[np.abs(x + offset) < 0.005] = 0.02 x[np.abs(x - threshold + offset) < 0.005] = threshold - offset + 0.02 - out = (x * (np.minimum(np.maximum(x + offset, 0.), threshold) / - scale)).astype(self.dtype) + out = ( + x * + (np.minimum(np.maximum(x + offset, 0.), threshold) / scale)).astype( + self.dtype) self.x_grad = ref_hard_swish_grad(x, threshold, scale, offset) self.inputs = {'X': x} @@ -67,14 +71,16 @@ class TestHardSwishNPU(OpTest): def test_check_grad(self): # There is a problem that precision of grad result using float32 - # can't satisfy the default precision requirement - # when compared with numeric_grads, but the results on + # can't satisfy the default precision requirement + # when compared with numeric_grads, but the results on # NPU and CPU are same (verified in TestHardSwishNPUWithCPU) - self.check_grad_with_place( - self.place, ['X'], 'Out', user_defined_grads=[self.x_grad]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + user_defined_grads=[self.x_grad]) class TestHardSwishNPUFp16(TestHardSwishNPU): + def test_check_output(self): self.check_output_with_place(self.place) @@ -84,6 +90,7 @@ class TestHardSwishNPUFp16(TestHardSwishNPU): # test the result of hard_swish and hard_swish_grad on CPU and NPU class TestHardSwishNPUWithCPU(unittest.TestCase): + def setUp(self): paddle.disable_static() diff --git a/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py index a9c195bb8cd..a8fe42b294f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -37,6 +38,7 @@ def huber_loss_forward(val, delta): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestHuberLossOp(OpTest): + def setUp(self): self.set_npu() self.op_type = 'huber_loss' @@ -84,36 +86,38 @@ class TestHuberLossOp(OpTest): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - max_relative_error=0.008, - no_grad_set=set("residual")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + max_relative_error=0.008, + no_grad_set=set("residual")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=0.008, - no_grad_set=set('residual')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.008, + no_grad_set=set('residual')) def TestHuberLossOp1(TestHuberLossOp): + def set_shape(self): return (64) def TestHuberLossOp2(TestHuberLossOp): + def set_shape(self): return (6, 6) def TestHuberLossOp3(TestHuberLossOp): + def set_shape(self): return (6, 6, 1) def TestHuberLossOpFP16(TestHuberLossOp): + def init_dtype(self): self.dtype = np.float16 @@ -121,6 +125,7 @@ def TestHuberLossOpFP16(TestHuberLossOp): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestHuberLossOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input and label must be Variable diff --git a/python/paddle/fluid/tests/unittests/npu/test_increment_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_increment_op_npu.py index 626dbfc52a7..e86f5625398 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_increment_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_increment_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -30,6 +31,7 @@ NPUPlace = 0 class TestIncrement(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(NPUPlace) @@ -56,6 +58,7 @@ class TestIncrement(OpTest): class TestIncrementFP16(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(NPUPlace) @@ -82,6 +85,7 @@ class TestIncrementFP16(OpTest): class TestIncrementINT64(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(NPUPlace) @@ -108,6 +112,7 @@ class TestIncrementINT64(OpTest): class TestIncrementInplace(unittest.TestCase): + def test_npu(self): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -126,7 +131,9 @@ class TestIncrementInplace(unittest.TestCase): exe = paddle.static.Executor(place) exe.run(startup_prog) - b_value = exe.run(main_prog, feed={"a": a_np, }, fetch_list=[b]) + b_value = exe.run(main_prog, feed={ + "a": a_np, + }, fetch_list=[b]) print('input a id is : {}'.format(id(a))) print('input b id is : {}'.format(id(b))) diff --git a/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py index 9b890d22ada..6ce647efc2d 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys import unittest import numpy as np + sys.path.append("..") from op_test import OpTest @@ -27,6 +28,7 @@ paddle.enable_static() class TestIndexSampleOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -35,9 +37,10 @@ class TestIndexSampleOp(OpTest): self.op_type = "index_sample" self.config() xnp = np.random.random(self.x_shape).astype(self.dtype) - indexnp = np.random.randint( - low=0, high=self.x_shape[1], - size=self.index_shape).astype(self.index_type) + indexnp = np.random.randint(low=0, + high=self.x_shape[1], + size=self.index_shape).astype( + self.index_type) self.inputs = {'X': xnp, 'Index': indexnp} index_array = [] for i in range(self.index_shape[0]): @@ -64,6 +67,7 @@ class TestIndexSampleOp(OpTest): class TestCase1(TestIndexSampleOp): + def config(self): """ For one dimension input @@ -75,6 +79,7 @@ class TestCase1(TestIndexSampleOp): class TestCase2(TestIndexSampleOp): + def config(self): """ For int64_t index type @@ -86,6 +91,7 @@ class TestCase2(TestIndexSampleOp): class TestCase3(TestIndexSampleOp): + def config(self): """ For int index type @@ -97,6 +103,7 @@ class TestCase3(TestIndexSampleOp): class TestCase4(TestIndexSampleOp): + def config(self): """ For int64 index type @@ -108,6 +115,7 @@ class TestCase4(TestIndexSampleOp): class TestCase5(TestIndexSampleOp): + def config(self): """ For float16 x type @@ -123,6 +131,7 @@ class TestCase5(TestIndexSampleOp): class TestCase6(TestCase5): + def config(self): """ For int32 x type @@ -135,6 +144,7 @@ class TestCase6(TestCase5): class TestCase7(TestCase5): + def config(self): """ For int64 x type @@ -147,6 +157,7 @@ class TestCase7(TestCase5): class TestIndexSampleShape(unittest.TestCase): + def test_shape(self): paddle.enable_static() # create x value @@ -157,8 +168,8 @@ class TestIndexSampleShape(unittest.TestCase): # create index value index_shape = (2, 3) index_type = "int32" - index_np = np.random.randint( - low=0, high=x_shape[1], size=index_shape).astype(index_type) + index_np = np.random.randint(low=0, high=x_shape[1], + size=index_shape).astype(index_type) x = fluid.data(name='x', shape=[-1, 5], dtype='float32') index = fluid.data(name='index', shape=[-1, 3], dtype='int32') @@ -173,18 +184,18 @@ class TestIndexSampleShape(unittest.TestCase): class TestIndexSampleDynamic(unittest.TestCase): + def test_result(self): with fluid.dygraph.guard(paddle.NPUPlace(0)): - x = paddle.to_tensor( - [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0]], - dtype='float32') - index = paddle.to_tensor( - [[0, 1, 2], [1, 2, 3], [0, 0, 0]], dtype='int32') + x = paddle.to_tensor([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]], + dtype='float32') + index = paddle.to_tensor([[0, 1, 2], [1, 2, 3], [0, 0, 0]], + dtype='int32') out_z1 = paddle.index_sample(x, index) - except_output = np.array( - [[1.0, 2.0, 3.0], [6.0, 7.0, 8.0], [9.0, 9.0, 9.0]]) + except_output = np.array([[1.0, 2.0, 3.0], [6.0, 7.0, 8.0], + [9.0, 9.0, 9.0]]) assert out_z1.numpy().all() == except_output.all() diff --git a/python/paddle/fluid/tests/unittests/npu/test_index_select_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_index_select_op_npu.py index 57293ad5e56..5428bf1e657 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_index_select_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_index_select_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestNPUIndexSelect(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -34,11 +36,10 @@ class TestNPUIndexSelect(OpTest): self.config() x_np = np.random.random(self.x_shape).astype(self.x_type) - index_np = np.random.randint( - low=0, - high=self.x_shape[self.dim], - size=self.index_size, - dtype=self.index_type) + index_np = np.random.randint(low=0, + high=self.x_shape[self.dim], + size=self.index_size, + dtype=self.index_type) # compute real output as baseline. outer_loop = np.prod(self.x_shape[:self.dim]) @@ -77,6 +78,7 @@ class TestNPUIndexSelect(OpTest): class TestNPUIndexSelectCase2(TestNPUIndexSelect): + def config(self): self.dim = -2 self.x_type = np.float32 @@ -86,6 +88,7 @@ class TestNPUIndexSelectCase2(TestNPUIndexSelect): class TestNPUIndexSelectCase3(TestNPUIndexSelect): + def config(self): self.dim = 0 self.x_type = np.float32 @@ -95,6 +98,7 @@ class TestNPUIndexSelectCase3(TestNPUIndexSelect): class TestNPUIndexSelectCase4(TestNPUIndexSelect): + def config(self): self.dim = -1 self.x_type = np.float32 @@ -104,6 +108,7 @@ class TestNPUIndexSelectCase4(TestNPUIndexSelect): class TestNPUIndexSelectAPI(unittest.TestCase): + def input_data(self): self.data_x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]]).astype('float32') @@ -120,8 +125,10 @@ class TestNPUIndexSelectAPI(unittest.TestCase): index = paddle.static.data(name='index', shape=[3], dtype='int32') z = paddle.index_select(x, index, axis=1) exe = paddle.static.Executor(paddle.NPUPlace(0)) - res, = exe.run(feed={'x': self.data_x, - 'index': self.data_index}, + res, = exe.run(feed={ + 'x': self.data_x, + 'index': self.data_index + }, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[1.0, 2.0, 2.0], [5.0, 6.0, 6.0], @@ -134,8 +141,10 @@ class TestNPUIndexSelectAPI(unittest.TestCase): index = paddle.static.data(name='index', shape=[3], dtype='int32') z = paddle.index_select(x, index) exe = paddle.static.Executor(paddle.NPUPlace(0)) - res, = exe.run(feed={'x': self.data_x, - 'index': self.data_index}, + res, = exe.run(feed={ + 'x': self.data_x, + 'index': self.data_index + }, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], diff --git a/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py index 22042ce4920..80ddda34df0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py @@ -18,6 +18,7 @@ import unittest import numpy as np import numpy.random as random import sys + sys.path.append("..") import math import paddle @@ -29,6 +30,7 @@ np.random.seed(2021) class TestNpuIouSimilarityOp(OpTest): + def setUp(self): self.op_type = "iou_similarity" self.set_npu() @@ -104,6 +106,7 @@ class TestNpuIouSimilarityOp(OpTest): class TestNpuIouSimilarityOpWithLoD(TestNpuIouSimilarityOp): + def set_init_config(self): super(TestNpuIouSimilarityOpWithLoD, self).set_init_config() self.box_normalized = True @@ -111,6 +114,7 @@ class TestNpuIouSimilarityOpWithLoD(TestNpuIouSimilarityOp): class TestNpuIouSimilarityOpWithBoxNormalized(TestNpuIouSimilarityOp): + def set_init_config(self): super(TestNpuIouSimilarityOpWithBoxNormalized, self).set_init_config() self.box_normalized = True @@ -118,6 +122,7 @@ class TestNpuIouSimilarityOpWithBoxNormalized(TestNpuIouSimilarityOp): def TestNpuIouSimilarityOpFp16(TestNpuIouSimilarityOp): + def init_dtype(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/npu/test_is_empty_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_is_empty_op_npu.py index 09801b0f5ec..4f903e10630 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_is_empty_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_is_empty_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestEmpty(OpTest): + def setUp(self): self.set_npu() self.init_dtype() @@ -51,6 +53,7 @@ class TestEmpty(OpTest): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestNotEmpty(TestEmpty): + def set_data(self): self.inputs = {'X': np.array([])} self.outputs = {'Out': np.array([True])} @@ -59,6 +62,7 @@ class TestNotEmpty(TestEmpty): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestIsEmptyOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), @@ -73,16 +77,18 @@ class TestIsEmptyOpError(unittest.TestCase): def test_type(): # dtype must be float32, float16 in NPU - x3 = paddle.static.data( - name="x3", shape=[4, 32, 32], dtype="bool") + x3 = paddle.static.data(name="x3", + shape=[4, 32, 32], + dtype="bool") res = paddle.is_empty(x=x3) self.assertRaises(TypeError, test_type) def test_name_type(): # name type must be string. - x4 = paddle.static.data( - name="x4", shape=[3, 2], dtype="float32") + x4 = paddle.static.data(name="x4", + shape=[3, 2], + dtype="float32") res = paddle.is_empty(x=x4, name=1) self.assertRaises(TypeError, test_name_type) @@ -91,6 +97,7 @@ class TestIsEmptyOpError(unittest.TestCase): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestIsEmptyOpDygraph(unittest.TestCase): + def test_dygraph(self): paddle.disable_static(paddle.NPUPlace(0)) input = paddle.rand(shape=[4, 32, 32], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/npu/test_kldiv_loss_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_kldiv_loss_op_npu.py index 7ed1775fa5e..3d9ba6c4407 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_kldiv_loss_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_kldiv_loss_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ from __future__ import print_function, division import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ paddle.enable_static() class TestKLDivLossOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -55,11 +57,10 @@ class TestKLDivLossOp(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], - 'Loss', - no_grad_set=set(["Target"]), - max_relative_error=0.15) + self.check_grad_with_place(self.place, ['X'], + 'Loss', + no_grad_set=set(["Target"]), + max_relative_error=0.15) def initTestCase(self): self.x_shape = (4, 5, 5) @@ -67,24 +68,28 @@ class TestKLDivLossOp(OpTest): class TestKLDivLossOp2(TestKLDivLossOp): + def initTestCase(self): self.x_shape = (3, 2, 7, 7) self.reduction = 'none' class TestKLDivLossOp3(TestKLDivLossOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 7, 9) self.reduction = 'mean' class TestKLDivLossOp4(TestKLDivLossOp): + def initTestCase(self): self.x_shape = (5, 20) self.reduction = 'sum' class TestKLDivLossOp_fp16(TestKLDivLossOp): + def init_dtype(self): self.dtype = 'float16' @@ -94,15 +99,15 @@ class TestKLDivLossOp_fp16(TestKLDivLossOp): def test_check_grad(self): input_grad = -self.inputs['Target'] * ( self.inputs['Target'] > 0) / self.inputs['Target'].shape[0] - self.check_grad_with_place( - self.place, ['X'], - 'Loss', - no_grad_set=set(["Target"]), - max_relative_error=0.2, - user_defined_grads=[input_grad]) + self.check_grad_with_place(self.place, ['X'], + 'Loss', + no_grad_set=set(["Target"]), + max_relative_error=0.2, + user_defined_grads=[input_grad]) class TestKLDivLossDygraph(unittest.TestCase): + def run_kl_loss(self, reduction, shape=(5, 20)): x = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32') @@ -110,8 +115,8 @@ class TestKLDivLossDygraph(unittest.TestCase): with paddle.fluid.dygraph.guard(paddle.NPUPlace(0)): kldiv_criterion = paddle.nn.KLDivLoss(reduction) - pred_loss = kldiv_criterion( - paddle.to_tensor(x), paddle.to_tensor(target)) + pred_loss = kldiv_criterion(paddle.to_tensor(x), + paddle.to_tensor(target)) self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss)) def test_kl_loss_batchmean(self): @@ -137,6 +142,7 @@ class TestKLDivLossDygraph(unittest.TestCase): class TestKLDivLossTypePromotion(unittest.TestCase): + def test_kl_div_promotion(self): with paddle.fluid.dygraph.guard(paddle.NPUPlace(0)): x1 = paddle.rand([5, 20], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/npu/test_label_smooth_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_label_smooth_op_npu.py index d02ddae461b..f298c64d8d2 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_label_smooth_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_label_smooth_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ SEED = 2021 @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestLabelSmoothOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "label_smooth" @@ -78,13 +80,15 @@ class TestLabelSmoothOp(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.5) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.5) else: self.check_grad_with_place(self.place, ['X'], 'Out') class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp): + def set_inputs(self): super(TestLabelSmoothOpWithPriorDist, self).set_inputs() label_dim = self.inputs['X'].shape[-1] @@ -93,33 +97,39 @@ class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp): class TestLabelSmoothOp3D(TestLabelSmoothOp): + def set_inputs(self): super(TestLabelSmoothOp3D, self).set_inputs() self.inputs['X'].reshape([2, -1, self.inputs['X'].shape[-1]]) class TestLabelSmoothOpWithPriorDist3D(TestLabelSmoothOpWithPriorDist): + def set_inputs(self): super(TestLabelSmoothOpWithPriorDist3D, self).set_inputs() self.inputs['X'].reshape([2, -1, self.inputs['X'].shape[-1]]) class TestLabelSmoothOpFP16(TestLabelSmoothOp): + def init_dtype(self): self.dtype = np.float16 class TestLabelSmoothOpWithPriorDistFP16(TestLabelSmoothOpWithPriorDist): + def init_dtype(self): self.dtype = np.float16 class TestLabelSmoothOp3DFP16(TestLabelSmoothOp3D): + def init_dtype(self): self.dtype = np.float16 class TestLabelSmoothOpWithPriorDist3DFP16(TestLabelSmoothOpWithPriorDist3D): + def init_dtype(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/npu/test_layer_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_layer_norm_op_npu.py index 0345ac1f206..5295ed50555 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_layer_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_layer_norm_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest from functools import reduce @@ -37,6 +38,7 @@ _set_use_system_allocator(False) class TestLayerNormOp(unittest.TestCase): + def setUp(self): self.use_cudnn = True self.set_npu() @@ -52,9 +54,9 @@ class TestLayerNormOp(unittest.TestCase): def __assert_close(self, tensor, np_array, msg, atol=1e-4): self.assertTrue( - np.allclose( - np.array(tensor).astype(np_array.dtype), np_array, atol=atol), - msg) + np.allclose(np.array(tensor).astype(np_array.dtype), + np_array, + atol=atol), msg) def check_forward_backward(self, shape, @@ -63,6 +65,7 @@ class TestLayerNormOp(unittest.TestCase): has_bias=True, y_grad_scale=1.0, use_mkldnn=False): + def test_with_place(place, shape, begin_norm_axis, @@ -79,8 +82,8 @@ class TestLayerNormOp(unittest.TestCase): np.float32) if has_scale else None bias = np.random.random_sample(scale_shape).astype( np.float32) if has_bias else None - y_grad = (np.random.random_sample(x_shape) * - y_grad_scale).astype(self.dtype) + y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( + self.dtype) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( @@ -101,10 +104,9 @@ class TestLayerNormOp(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype=self.dtype, - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype=self.dtype, + shape=ground_truth[name].shape) inputs = {"X": block.var('x')} fetch_list = [ 'y', @@ -171,25 +173,23 @@ class TestLayerNormOp(unittest.TestCase): def test_check_forward_backward_with_scale_and_bias(self): self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=True) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=True, - has_bias=False) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=False) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=True) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=True, + has_bias=False) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=False) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) class TestLayerNormOpFP16(TestLayerNormOp): + def init_dtype(self): self.dtype = np.float16 self.atol = 1e-2 diff --git a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py index a0472f9611e..d285d82f9d9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest from test_activation_op import ref_leaky_relu @@ -28,6 +29,7 @@ SEED = 2021 class TestLeadyRelu(OpTest): + def setUp(self): self.set_npu() self.op_type = "leaky_relu" @@ -63,28 +65,33 @@ class TestLeadyRelu(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.006) else: self.check_grad_with_place(self.place, ['X'], 'Out') class TestLeadyReluFP16(TestLeadyRelu): + def init_dtype(self): self.dtype = np.float16 class TestLeadyRelu2(TestLeadyRelu): + def set_attrs(self): self.attrs = {'alpha': 0.5} class TestLeadyRelu3(TestLeadyRelu): + def set_attrs(self): self.attrs = {'alpha': -0.5} class TestLeakyReluNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -97,8 +104,9 @@ class TestLeakyReluNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): x = paddle.static.data(name="x", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') y = paddle.nn.functional.leaky_relu(x) @@ -122,8 +130,10 @@ class TestLeakyReluNet(unittest.TestCase): for epoch in range(100): pred_res, loss_res = exe.run(main_prog, - feed={"x": x_np, - "label": label_np}, + feed={ + "x": x_np, + "label": label_np + }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py index ff1b0e53dfe..2f93b1c223e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -32,6 +33,7 @@ def sigmoid_array(x): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestLogLossOp(OpTest): + def setUp(self): self.set_npu() self.op_type = 'log_loss' @@ -78,6 +80,7 @@ class TestLogLossOp(OpTest): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestLogLossOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py index 5da3cb0ce56..e6724a28354 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestLog(OpTest): + def setUp(self): self.set_npu() self.op_type = "log" @@ -55,6 +57,7 @@ class TestLog(OpTest): class TestLogFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "log" @@ -81,6 +84,7 @@ class TestLogFp16(OpTest): class TestLogNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -95,8 +99,9 @@ class TestLogNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.log(c) @@ -120,12 +125,13 @@ class TestLogNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py index 10ec8621ffa..8971f888b65 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_log_softmax_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,6 +16,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ paddle.enable_static() class TestLogSoftmaxNPUOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -63,17 +65,18 @@ class TestLogSoftmaxNPUOp(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], ['Out'], - user_defined_grads=[self.x_grad], - max_relative_error=0.02) + self.check_grad_with_place(self.place, ['X'], ['Out'], + user_defined_grads=[self.x_grad], + max_relative_error=0.02) else: - self.check_grad_with_place( - self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad]) + self.check_grad_with_place(self.place, ['X'], ['Out'], + user_defined_grads=[self.x_grad]) def test_class(op_type, typename): + class TestLogSoftmaxShape(TestLogSoftmaxNPUOp): + def set_attrs(self): self.shape = [12, 10] @@ -86,7 +89,9 @@ def test_class(op_type, typename): def test_class2(op_type, typename): + class TestLogSoftmaxAxis(TestLogSoftmaxNPUOp): + def set_attrs(self): self.axis = 0 @@ -105,6 +110,7 @@ for _typename in {np.float32, np.float16}: class TestNNLogSoftmaxAPI(unittest.TestCase): + def setUp(self): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1., 1., self.x_shape).astype(np.float32) @@ -137,6 +143,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase): class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): + def setUp(self): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/npu/test_logical_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_logical_op_npu.py index add7d725821..bbf9bd2bf0c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_logical_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_logical_op_npu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import op_test import unittest @@ -158,10 +159,10 @@ def test(unit_test, use_npu=False, test_error=False): META_DATA = dict(TEST_META_WRONG_SHAPE_DATA) for shape_data in META_DATA.values(): for data_type in SUPPORTED_DTYPES: - meta_data['x_np'] = np_data_generator( - shape_data['x_shape'], dtype=data_type) - meta_data['y_np'] = np_data_generator( - shape_data['y_shape'], dtype=data_type) + meta_data['x_np'] = np_data_generator(shape_data['x_shape'], + dtype=data_type) + meta_data['y_np'] = np_data_generator(shape_data['y_shape'], + dtype=data_type) if meta_data['binary_op'] and test_error: # catch C++ Exception unit_test.assertRaises(BaseException, run_static, @@ -176,11 +177,12 @@ def test(unit_test, use_npu=False, test_error=False): else: np_result = np_op(meta_data['x_np']) unit_test.assertTrue((static_result == np_result).all()) - unit_test.assertTrue((dygraph_result.numpy() == np_result).all( - )) + unit_test.assertTrue( + (dygraph_result.numpy() == np_result).all()) def test_type_error(unit_test, use_npu, type_str_map): + def check_type(op_str, x, y, binary_op): op = getattr(paddle, op_str) error_type = ValueError @@ -215,10 +217,12 @@ def test_type_error(unit_test, use_npu, type_str_map): startup_program = paddle.static.Program() main_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - x = paddle.static.data( - name='x', shape=[10], dtype=type_str_map['x']) - y = paddle.static.data( - name='y', shape=[10], dtype=type_str_map['y']) + x = paddle.static.data(name='x', + shape=[10], + dtype=type_str_map['x']) + y = paddle.static.data(name='y', + shape=[10], + dtype=type_str_map['y']) check_type(meta_data['op_str'], x, y, binary_op) @@ -230,6 +234,7 @@ def type_map_factory(): class TestCPU(unittest.TestCase): + def test(self): test(self) @@ -243,6 +248,7 @@ class TestCPU(unittest.TestCase): class TestNPU(unittest.TestCase): + def test(self): test(self, True) diff --git a/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py index 8ec9eb1cf35..100cad468e3 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_lookup_table_v2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestLookupTableV2(OpTest): + def setUp(self): self.set_npu() self.op_type = "lookup_table_v2" @@ -37,8 +39,9 @@ class TestLookupTableV2(OpTest): self.init_padding_idx() np.random.seed(SEED) w = np.random.random([self.vocab, self.dim]).astype(self.dtype) - x = np.random.randint( - 0, self.vocab, size=(self.bsz, self.seqlen)).astype(self.ids_dtype) + x = np.random.randint(0, self.vocab, + size=(self.bsz, + self.seqlen)).astype(self.ids_dtype) out = w[x] if self.padding_idx != -1: out[np.squeeze(x == self.padding_idx)] = np.zeros(self.dim) @@ -77,8 +80,9 @@ class TestLookupTableV2(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['W'], 'Out', max_relative_error=0.01) + self.check_grad_with_place(self.place, ['W'], + 'Out', + max_relative_error=0.01) else: self.check_grad_with_place(self.place, ['W'], 'Out') @@ -96,6 +100,7 @@ class TestLookupTableV2FP16(TestLookupTableV2): class TestLookupTableV2Dim32(TestLookupTableV2): + def init_dims(self): self.bsz = 6 self.seqlen = 8 @@ -123,11 +128,13 @@ class TestLookupTableV2Dim32FP16(TestLookupTableV2): class TestLookupTableV2WithPadding(TestLookupTableV2): + def init_padding_idx(self): self.padding_idx = np.random.randint(0, self.vocab) class TestLookupTableV2WithPadding1(TestLookupTableV2): + def init_padding_idx(self): self.padding_idx = np.random.randint(0, self.vocab) diff --git a/python/paddle/fluid/tests/unittests/npu/test_masked_select_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_masked_select_op_npu.py index 13078aea690..7cd9df1f2eb 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_masked_select_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_masked_select_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -34,6 +35,7 @@ def np_masked_select(x, mask): class TestMaskedSelectOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -63,28 +65,33 @@ class TestMaskedSelectOp(OpTest): class TestMaskedSelectOp1(TestMaskedSelectOp): + def init(self): self.shape = (6, 8, 9, 18) class TestMaskedSelectOp2(TestMaskedSelectOp): + def init(self): self.shape = (168, ) class TestMaskedSelectOpFp16(TestMaskedSelectOp): + def init_dtype(self): self.dtype = np.float16 def test_check_grad(self): x_grad = self.inputs['Mask'].astype(self.dtype) x_grad = x_grad * (1 / x_grad.sum()) - self.check_grad_with_place( - self.place, ['X'], 'Y', user_defined_grads=[x_grad]) + self.check_grad_with_place(self.place, ['X'], + 'Y', + user_defined_grads=[x_grad]) @skip_check_grad_ci(reason="get_numeric_gradient not support int32") class TestMaskedSelectOpInt32(TestMaskedSelectOp): + def init_dtype(self): self.dtype = np.int32 @@ -94,6 +101,7 @@ class TestMaskedSelectOpInt32(TestMaskedSelectOp): @skip_check_grad_ci(reason="get_numeric_gradient not support int64") class TestMaskedSelectOpInt64(TestMaskedSelectOp): + def init_dtype(self): self.dtype = np.int64 @@ -102,6 +110,7 @@ class TestMaskedSelectOpInt64(TestMaskedSelectOp): class TestMaskedSelectAPI(unittest.TestCase): + def test_imperative_mode(self): paddle.disable_static(paddle.NPUPlace(0)) shape = (88, 6, 8) @@ -127,13 +136,16 @@ class TestMaskedSelectAPI(unittest.TestCase): exe = paddle.static.Executor(place=paddle.NPUPlace(0)) res = exe.run(paddle.static.default_main_program(), - feed={"x": np_x, - "mask": np_mask}, + feed={ + "x": np_x, + "mask": np_mask + }, fetch_list=[out]) self.assertEqual(np.allclose(res, np_out), True) class TestMaskedSelectError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -141,8 +153,9 @@ class TestMaskedSelectError(unittest.TestCase): shape = [8, 9, 6] x = paddle.fluid.data(shape=shape, dtype='float32', name='x') mask = paddle.fluid.data(shape=shape, dtype='bool', name='mask') - mask_float = paddle.fluid.data( - shape=shape, dtype='float32', name='mask_float') + mask_float = paddle.fluid.data(shape=shape, + dtype='float32', + name='mask_float') np_x = np.random.random(shape).astype('float32') np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_matmul_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_matmul_op_npu.py index a8dc0c137c3..841521fecdd 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_matmul_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_matmul_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -269,7 +270,9 @@ class TestMatMulOp13(TestMatMulOp): #--------------------test matmul alpha-------------------- def create_test_alpha_class(parent): + class TestMatMulOpAlphaCase(parent): + def init_alpha(self): self.alpha = 0.125 @@ -294,7 +297,9 @@ create_test_alpha_class(TestMatMulOp13) #--------------------test matmul fp16-------------------- def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): + class TestMatMulOpFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -302,10 +307,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): self.check_output_with_place(self.place, atol=atol) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], - 'Out', - max_relative_error=max_relative_error) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=max_relative_error) cls_name = "{0}_{1}".format(parent.__name__, "Fp16") TestMatMulOpFp16Case.__name__ = cls_name diff --git a/python/paddle/fluid/tests/unittests/npu/test_matmulv2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_matmulv2_op_npu.py index 23ca0cf1f49..a607c3035a9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_matmulv2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_matmulv2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -291,7 +292,9 @@ class TestMatMulOpBroadcast2(TestMatMulV2Op): def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): + class TestMatMulOpFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -299,10 +302,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=2.5): self.check_output_with_place(self.place, atol=atol) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], - 'Out', - max_relative_error=max_relative_error) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=max_relative_error) cls_name = "{0}_{1}".format(parent.__name__, "Fp16") TestMatMulOpFp16Case.__name__ = cls_name @@ -329,6 +331,7 @@ create_test_fp16_class(TestMatMulOp17) class TestMatMulV2API(unittest.TestCase): + def setUp(self): self.places = [paddle.CPUPlace()] if paddle.is_compiled_with_npu(): @@ -346,8 +349,10 @@ class TestMatMulV2API(unittest.TestCase): exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"input_x": x_np, - "input_y": y_np}, + feed={ + "input_x": x_np, + "input_y": y_np + }, fetch_list=[result]) def test_static(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py index e69c2fd84dd..a6936541f5b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_mean_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestMean(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -55,6 +57,7 @@ class TestMean(OpTest): class TestMeanFP16(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_memcpy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_memcpy_op_npu.py index 6500a8c8cd8..d11d83f47cc 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_memcpy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_memcpy_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,51 +30,47 @@ SEED = 2021 class TestMemcpy_FillConstant(unittest.TestCase): + def get_prog(self): paddle.enable_static() main_program = Program() with program_guard(main_program): cpu_var_name = "tensor@Cpu" npu_var_name = "tensor@Npu" - cpu_var = main_program.global_block().create_var( - name=cpu_var_name, - shape=[10, 10], - dtype='float32', - persistable=False, - stop_gradient=True) - npu_var = main_program.global_block().create_var( - name=npu_var_name, - shape=[10, 10], - dtype='float32', - persistable=False, - stop_gradient=True) - main_program.global_block().append_op( - type="fill_constant", - outputs={"Out": npu_var_name}, - attrs={ - "shape": [10, 10], - "dtype": npu_var.dtype, - "value": 1.0, - "place_type": 4 - }) - main_program.global_block().append_op( - type="fill_constant", - outputs={"Out": cpu_var_name}, - attrs={ - "shape": [10, 10], - "dtype": cpu_var.dtype, - "value": 0.0, - "place_type": 0 - }) + cpu_var = main_program.global_block().create_var(name=cpu_var_name, + shape=[10, 10], + dtype='float32', + persistable=False, + stop_gradient=True) + npu_var = main_program.global_block().create_var(name=npu_var_name, + shape=[10, 10], + dtype='float32', + persistable=False, + stop_gradient=True) + main_program.global_block().append_op(type="fill_constant", + outputs={"Out": npu_var_name}, + attrs={ + "shape": [10, 10], + "dtype": npu_var.dtype, + "value": 1.0, + "place_type": 4 + }) + main_program.global_block().append_op(type="fill_constant", + outputs={"Out": cpu_var_name}, + attrs={ + "shape": [10, 10], + "dtype": cpu_var.dtype, + "value": 0.0, + "place_type": 0 + }) return main_program, npu_var, cpu_var def test_npu_cpoy_to_cpu(self): main_program, npu_var, cpu_var = self.get_prog() - main_program.global_block().append_op( - type='memcpy', - inputs={'X': npu_var}, - outputs={'Out': cpu_var}, - attrs={'dst_place_type': 0}) + main_program.global_block().append_op(type='memcpy', + inputs={'X': npu_var}, + outputs={'Out': cpu_var}, + attrs={'dst_place_type': 0}) place = fluid.NPUPlace(0) exe = fluid.Executor(place) npu_, cpu_ = exe.run(main_program, @@ -84,11 +81,10 @@ class TestMemcpy_FillConstant(unittest.TestCase): def test_cpu_cpoy_npu(self): main_program, npu_var, cpu_var = self.get_prog() - main_program.global_block().append_op( - type='memcpy', - inputs={'X': cpu_var}, - outputs={'Out': npu_var}, - attrs={'dst_place_type': 4}) + main_program.global_block().append_op(type='memcpy', + inputs={'X': cpu_var}, + outputs={'Out': npu_var}, + attrs={'dst_place_type': 4}) place = fluid.NPUPlace(0) exe = fluid.Executor(place) npu_, cpu_ = exe.run(main_program, diff --git a/python/paddle/fluid/tests/unittests/npu/test_merged_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_merged_momentum_op_npu.py index 96a15fc1caa..dce642cc063 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_merged_momentum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_merged_momentum_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,6 +13,7 @@ # limitations under the License. import sys + sys.path.append('..') import unittest import paddle @@ -47,22 +48,21 @@ def run_momentum_op(params, } param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) for p in params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in params ] grad_vars = [ - helper.create_variable( - shape=g.shape, dtype=g.dtype) for g in grads + helper.create_variable(shape=g.shape, dtype=g.dtype) for g in grads ] velocity_vars = [ - helper.create_variable( - persistable=True, shape=v.shape, dtype=v.dtype) - for v in velocitys + helper.create_variable(persistable=True, + shape=v.shape, + dtype=v.dtype) for v in velocitys ] - lr_var = helper.create_variable( - persistable=True, - shape=learning_rate.shape, - dtype=learning_rate.dtype) + lr_var = helper.create_variable(persistable=True, + shape=learning_rate.shape, + dtype=learning_rate.dtype) feed_dict = OrderedDict() @@ -81,14 +81,15 @@ def run_momentum_op(params, if multi_precision: master_param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) - for p in master_params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in master_params ] feed_dict.update( - OrderedDict([(mp_var.name, mp_val) - for mp_var, mp_val in zip(master_param_vars, - master_params)])) + OrderedDict([ + (mp_var.name, mp_val) + for mp_var, mp_val in zip(master_param_vars, master_params) + ])) # CPUPlace does not use MasterParam if isinstance(place, paddle.CUDAPlace): fetch_list = fetch_list + [ @@ -110,8 +111,10 @@ def run_momentum_op(params, if multi_precision: inputs['MasterParam'] = master_param_vars[i] outputs['MasterParamOut'] = master_param_vars[i] - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) else: inputs = { 'Param': param_vars, @@ -123,8 +126,10 @@ def run_momentum_op(params, if multi_precision: inputs['MasterParam'] = master_param_vars outputs['MasterParamOut'] = master_param_vars - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) exe = paddle.static.Executor(place) with paddle.static.scope_guard(paddle.static.Scope()): @@ -154,22 +159,21 @@ def run_momentum_op2(params, helper = LayerHelper(op_type, **locals()) param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) for p in params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in params ] grad_vars = [ - helper.create_variable( - shape=g.shape, dtype=g.dtype) for g in grads + helper.create_variable(shape=g.shape, dtype=g.dtype) for g in grads ] velocity_vars = [ - helper.create_variable( - persistable=True, shape=v.shape, dtype=v.dtype) - for v in velocitys + helper.create_variable(persistable=True, + shape=v.shape, + dtype=v.dtype) for v in velocitys ] - lr_var = helper.create_variable( - persistable=True, - shape=learning_rate.shape, - dtype=learning_rate.dtype) + lr_var = helper.create_variable(persistable=True, + shape=learning_rate.shape, + dtype=learning_rate.dtype) feed_dict = OrderedDict() @@ -188,14 +192,15 @@ def run_momentum_op2(params, if multi_precision: master_param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) - for p in master_params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in master_params ] feed_dict.update( - OrderedDict([(mp_var.name, mp_val) - for mp_var, mp_val in zip(master_param_vars, - master_params)])) + OrderedDict([ + (mp_var.name, mp_val) + for mp_var, mp_val in zip(master_param_vars, master_params) + ])) # CPUPlace does not use MasterParam if isinstance(place, paddle.CUDAPlace): fetch_list = fetch_list + [ @@ -225,8 +230,10 @@ def run_momentum_op2(params, 'regularization_method': 'l2_decay', 'regularization_coeff': 2.0, } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) else: inputs = { 'Param': param_vars, @@ -239,16 +246,22 @@ def run_momentum_op2(params, inputs['MasterParam'] = master_param_vars outputs['MasterParamOut'] = master_param_vars attrs = { - 'mu': mu, - 'multi_precision': multi_precision, - 'rescale_grad': rescale_grad, - 'use_nesterov': use_nesterov, + 'mu': + mu, + 'multi_precision': + multi_precision, + 'rescale_grad': + rescale_grad, + 'use_nesterov': + use_nesterov, 'regularization_method': ['l2_decay' for i in range(len(param_vars))], 'regularization_coeff': [2.0 for i in range(len(param_vars))], } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) exe = paddle.static.Executor(place) with paddle.static.scope_guard(paddle.static.Scope()): @@ -257,6 +270,7 @@ def run_momentum_op2(params, class TestMergedMomentum(unittest.TestCase): + def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -286,18 +300,17 @@ class TestMergedMomentum(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_merged): - # NPU Momentum Op does not support rescale_grad + # NPU Momentum Op does not support rescale_grad rescale_grad = 1.0 - return run_momentum_op( - params, - grads, - velocitys, - master_params, - learning_rate, - place, - multi_precision, - rescale_grad=rescale_grad, - use_merged=use_merged) + return run_momentum_op(params, + grads, + velocitys, + master_params, + learning_rate, + place, + multi_precision, + rescale_grad=rescale_grad, + use_merged=use_merged) outs1 = run_op(True) outs2 = run_op(False) @@ -310,6 +323,7 @@ class TestMergedMomentum(unittest.TestCase): class TestMergedMomentum2(unittest.TestCase): + def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -339,19 +353,18 @@ class TestMergedMomentum2(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_nesterov, use_merged): - # NPU Momentum Op does not support rescale_grad + # NPU Momentum Op does not support rescale_grad rescale_grad = 1.0 - return run_momentum_op2( - params, - grads, - velocitys, - master_params, - learning_rate, - place, - multi_precision, - rescale_grad=rescale_grad, - use_merged=use_merged, - use_nesterov=use_nesterov) + return run_momentum_op2(params, + grads, + velocitys, + master_params, + learning_rate, + place, + multi_precision, + rescale_grad=rescale_grad, + use_merged=use_merged, + use_nesterov=use_nesterov) outs1 = run_op(use_nesterov=True, use_merged=True) outs2 = run_op(use_nesterov=True, use_merged=False) diff --git a/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py index 39802602bf5..a4d388d2ed4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle.fluid as fluid @@ -27,6 +28,7 @@ paddle.enable_static() class TestMeshgridOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "meshgrid" @@ -71,32 +73,41 @@ class TestMeshgridOp(OpTest): @skip_check_grad_ci( reason="The backward test is not supported for float16 type on NPU.") class TestMeshgridOpFP16(TestMeshgridOp): + def get_dtype(self): return "float16" class TestMeshgridOpINT32(TestMeshgridOp): + def get_dtype(self): return "int32" class TestMeshgridOpINT64(TestMeshgridOp): + def get_dtype(self): return "int64" class TestMeshgridOp2(TestMeshgridOp): + def get_x_shape(self): return [100, 300] class TestMeshgridOp3(unittest.TestCase): + def test_api(self): x = fluid.data(shape=[100], dtype='int32', name='x') y = fluid.data(shape=[200], dtype='int32', name='y') - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_1 = np.reshape(input_1, [100, 1]) out_1 = np.broadcast_to(out_1, [100, 200]) @@ -106,8 +117,10 @@ class TestMeshgridOp3(unittest.TestCase): exe = fluid.Executor(place=fluid.NPUPlace(0)) grid_x, grid_y = paddle.tensor.meshgrid(x, y) res_1, res_2 = exe.run(fluid.default_main_program(), - feed={'x': input_1, - 'y': input_2}, + feed={ + 'x': input_1, + 'y': input_2 + }, fetch_list=[grid_x, grid_y]) self.assertTrue(np.allclose(res_1, out_1)) @@ -115,12 +128,17 @@ class TestMeshgridOp3(unittest.TestCase): class TestMeshgridOp4(unittest.TestCase): + def test_list_input(self): x = fluid.data(shape=[100], dtype='int32', name='x') y = fluid.data(shape=[200], dtype='int32', name='y') - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_1 = np.reshape(input_1, [100, 1]) out_1 = np.broadcast_to(out_1, [100, 200]) @@ -130,8 +148,10 @@ class TestMeshgridOp4(unittest.TestCase): exe = fluid.Executor(place=fluid.NPUPlace(0)) grid_x, grid_y = paddle.tensor.meshgrid([x, y]) res_1, res_2 = exe.run(fluid.default_main_program(), - feed={'x': input_1, - 'y': input_2}, + feed={ + 'x': input_1, + 'y': input_2 + }, fetch_list=[grid_x, grid_y]) self.assertTrue(np.allclose(res_1, out_1)) @@ -139,12 +159,17 @@ class TestMeshgridOp4(unittest.TestCase): class TestMeshgridOp5(unittest.TestCase): + def test_tuple_input(self): x = fluid.data(shape=[100], dtype='int32', name='x') y = fluid.data(shape=[200], dtype='int32', name='y') - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_1 = np.reshape(input_1, [100, 1]) out_1 = np.broadcast_to(out_1, [100, 200]) @@ -154,8 +179,10 @@ class TestMeshgridOp5(unittest.TestCase): exe = fluid.Executor(place=fluid.NPUPlace(0)) grid_x, grid_y = paddle.tensor.meshgrid((x, y)) res_1, res_2 = exe.run(fluid.default_main_program(), - feed={'x': input_1, - 'y': input_2}, + feed={ + 'x': input_1, + 'y': input_2 + }, fetch_list=[grid_x, grid_y]) self.assertTrue(np.allclose(res_1, out_1)) @@ -163,10 +190,15 @@ class TestMeshgridOp5(unittest.TestCase): class TestMeshgridOp6(unittest.TestCase): + def test_api_with_dygraph(self): paddle.disable_static(paddle.NPUPlace(0)) - input_3 = np.random.randint(0, 100, [100, ]).astype('int32') - input_4 = np.random.randint(0, 100, [200, ]).astype('int32') + input_3 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_4 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_3 = np.reshape(input_3, [100, 1]) out_3 = np.broadcast_to(out_3, [100, 200]) @@ -183,10 +215,15 @@ class TestMeshgridOp6(unittest.TestCase): class TestMeshgridOp7(unittest.TestCase): + def test_api_with_dygraph_list_input(self): paddle.disable_static(paddle.NPUPlace(0)) - input_3 = np.random.randint(0, 100, [100, ]).astype('int32') - input_4 = np.random.randint(0, 100, [200, ]).astype('int32') + input_3 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_4 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_3 = np.reshape(input_3, [100, 1]) out_3 = np.broadcast_to(out_3, [100, 200]) @@ -203,10 +240,15 @@ class TestMeshgridOp7(unittest.TestCase): class TestMeshgridOp8(unittest.TestCase): + def test_api_with_dygraph_tuple_input(self): paddle.disable_static(paddle.NPUPlace(0)) - input_3 = np.random.randint(0, 100, [100, ]).astype('int32') - input_4 = np.random.randint(0, 100, [200, ]).astype('int32') + input_3 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_4 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_3 = np.reshape(input_3, [100, 1]) out_3 = np.broadcast_to(out_3, [100, 200]) diff --git a/python/paddle/fluid/tests/unittests/npu/test_mixed_precision_npu.py b/python/paddle/fluid/tests/unittests/npu/test_mixed_precision_npu.py index 193b9eb4e0a..26a74b7b736 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_mixed_precision_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_mixed_precision_npu.py @@ -15,6 +15,7 @@ import unittest import sys import paddle + sys.path.append("..") import test_mixed_precision @@ -22,6 +23,7 @@ paddle.enable_static() class AMPTestNpu(test_mixed_precision.AMPTest): + def setUp(self): self.place = paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py index b8c261c2555..6c2e24bb163 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ paddle.enable_static() class TestMomentumOp1(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -75,6 +77,7 @@ class TestMomentumOp1(OpTest): class TestMomentumOpFp16(TestMomentumOp1): + def init_dtype(self): self.dtype = np.float16 @@ -83,20 +86,23 @@ class TestMomentumOpFp16(TestMomentumOp1): class TestMomentumOp2(TestMomentumOp1): + def init_case(self): self.shape = (123, 321) self.use_nesterov = True class TestMomentumV2(unittest.TestCase): + def test_momentum_dygraph(self): paddle.disable_static(place=fluid.NPUPlace(0)) value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Momentum( - learning_rate=0.01, momentum=0.9, parameters=linear.parameters()) + adam = paddle.optimizer.Momentum(learning_rate=0.01, + momentum=0.9, + parameters=linear.parameters()) out = linear(a) out.backward() adam.step() @@ -113,13 +119,13 @@ class TestMomentumV2(unittest.TestCase): cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) - rms_optimizer = paddle.optimizer.Momentum( - learning_rate=0.1, momentum=0.9) + rms_optimizer = paddle.optimizer.Momentum(learning_rate=0.1, + momentum=0.9) rms_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -127,12 +133,14 @@ class TestMomentumV2(unittest.TestCase): exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) def test_raise_error(self): - self.assertRaises( - ValueError, paddle.optimizer.Momentum, learning_rate=None) + self.assertRaises(ValueError, + paddle.optimizer.Momentum, + learning_rate=None) self.assertRaises(ValueError, paddle.optimizer.Momentum, momentum=None) class TestMomentumOpWithDecay(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -189,6 +197,7 @@ class TestMomentumOpWithDecay(OpTest): class TestMomentumOpWithDecayFP16(TestMomentumOpWithDecay): + def init_config(self): self.dtype = np.float16 @@ -198,11 +207,13 @@ class TestMomentumOpWithDecayFP16(TestMomentumOpWithDecay): class TestMomentumOpWithDecay2(TestMomentumOpWithDecay): + def init_config(self): self.use_nesterov = False class TestMomentumOpWithDecayAPI(unittest.TestCase): + def _test_momentum_dygraph_common(self, regularization): paddle.disable_static(fluid.NPUPlace(0)) inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") @@ -239,8 +250,8 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): momentum_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -249,10 +260,11 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): class TestMomentumOpVsMomentumOpWithDecayAPI(unittest.TestCase): + def __update_params(self, momentum, linear): for i in range(10): - inp = paddle.full( - shape=[2, 2], fill_value=i, dtype='float32').astype("float32") + inp = paddle.full(shape=[2, 2], fill_value=i, + dtype='float32').astype("float32") inp = paddle.to_tensor(inp) out = linear(inp) loss = paddle.mean(out) @@ -298,6 +310,7 @@ class TestMomentumOpVsMomentumOpWithDecayAPI(unittest.TestCase): class TestMomentumV2Group(TestMomentumV2): + def test_momentum_dygraph(self): paddle.disable_static(place=fluid.NPUPlace(0)) value = np.arange(26).reshape(2, 13).astype("float32") @@ -305,18 +318,22 @@ class TestMomentumV2Group(TestMomentumV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Momentum( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - 'momentum': 0.99 - }], - weight_decay=0.1, - momentum=0.9) + adam = paddle.optimizer.Momentum(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001, + 'learning_rate': + 0.1, + 'momentum': + 0.99 + }], + weight_decay=0.1, + momentum=0.9) out = linear_1(a) out = linear_2(out) out.backward() diff --git a/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py index b6e3134439d..c4adebcda6f 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -59,7 +60,8 @@ class TestMul(OpTest): self.place, ['X', 'Y'], 'Out', - max_relative_error=0.0065, ) + max_relative_error=0.0065, + ) def test_check_grad_ingore_x(self): self.check_grad_with_place( @@ -67,7 +69,8 @@ class TestMul(OpTest): ['Y'], 'Out', no_grad_set=set("X"), - max_relative_error=0.0065, ) + max_relative_error=0.0065, + ) def test_check_grad_ingore_y(self): self.check_grad_with_place( @@ -75,12 +78,14 @@ class TestMul(OpTest): ['X'], 'Out', no_grad_set=set("Y"), - max_relative_error=0.0065, ) + max_relative_error=0.0065, + ) @skip_check_grad_ci( reason="Don't support grad checking for NPU OP with FP16 data type.") class TestMulFP16(TestMul): + def init_dtype(self): self.dtype = np.float16 @@ -119,6 +124,7 @@ class TestMul2(TestMul): @skip_check_grad_ci( reason="Don't support grad checking for NPU OP with FP16 data type.") class TestMul2FP16(TestMul2): + def init_dtype(self): self.dtype = np.float16 @@ -157,6 +163,7 @@ class TestMul3(TestMul): @skip_check_grad_ci( reason="Don't support grad checking for NPU OP with FP16 data type.") class TestMul3FP16(TestMul3): + def init_dtype(self): self.dtype = np.float16 @@ -195,6 +202,7 @@ class TestMul4(TestMul): @skip_check_grad_ci( reason="Don't support grad checking for NPU OP with FP16 data type.") class TestMul4FP16(TestMul4): + def init_dtype(self): self.dtype = np.float16 @@ -209,6 +217,7 @@ class TestMul4FP16(TestMul4): class TestMulNet(unittest.TestCase): + def init_dtype(self): self.dtype = np.float32 @@ -230,8 +239,9 @@ class TestMulNet(unittest.TestCase): b = paddle.static.data(name="b", shape=[2, 3], dtype=self.dtype) c = paddle.static.data(name="c", shape=[3, 2], dtype=self.dtype) d = paddle.static.data(name="d", shape=[3, 2], dtype=self.dtype) - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') sum_1 = paddle.add(a, b) sum_2 = paddle.add(c, d) @@ -280,6 +290,7 @@ class TestMulNet(unittest.TestCase): class TestMulNet3_2(unittest.TestCase): + def init_dtype(self): self.dtype = np.float32 @@ -301,8 +312,9 @@ class TestMulNet3_2(unittest.TestCase): b = paddle.static.data(name="b", shape=[2, 3, 4], dtype=self.dtype) c = paddle.static.data(name="c", shape=[12, 5], dtype=self.dtype) d = paddle.static.data(name="d", shape=[12, 5], dtype=self.dtype) - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') sum_1 = paddle.add(a, b) sum_2 = paddle.add(c, d) @@ -346,12 +358,13 @@ class TestMulNet3_2(unittest.TestCase): cpu_pred, cpu_loss = self._test(False) npu_pred, npu_loss = self._test(True) - self.assertTrue(np.allclose( - npu_pred, cpu_pred, atol=1e-5)) # atol needed on cann 20.3 + self.assertTrue(np.allclose(npu_pred, cpu_pred, + atol=1e-5)) # atol needed on cann 20.3 self.assertTrue(np.allclose(npu_loss, cpu_loss, atol=1e-5)) class TestMulNet3_2_xc2(unittest.TestCase): + def init_dtype(self): self.dtype = np.float32 @@ -373,8 +386,9 @@ class TestMulNet3_2_xc2(unittest.TestCase): b = paddle.static.data(name="b", shape=[2, 3, 4], dtype=self.dtype) c = paddle.static.data(name="c", shape=[4, 5], dtype=self.dtype) d = paddle.static.data(name="d", shape=[4, 5], dtype=self.dtype) - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') sum_1 = paddle.add(a, b) sum_2 = paddle.add(c, d) @@ -424,6 +438,7 @@ class TestMulNet3_2_xc2(unittest.TestCase): class TestMulNet4_2(unittest.TestCase): + def init_dtype(self): self.dtype = np.float32 @@ -445,8 +460,9 @@ class TestMulNet4_2(unittest.TestCase): b = paddle.static.data(name="b", shape=[12, 5], dtype=self.dtype) c = paddle.static.data(name="c", shape=[12, 5], dtype=self.dtype) d = paddle.static.data(name="d", shape=[12, 5], dtype=self.dtype) - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') sum_1 = paddle.add(a, b) # [12, 5] sum_2 = paddle.add(c, d) # [12, 5] @@ -493,8 +509,8 @@ class TestMulNet4_2(unittest.TestCase): cpu_pred, cpu_loss = self._test(False) npu_pred, npu_loss = self._test(True) - self.assertTrue(np.allclose( - npu_pred, cpu_pred, atol=1e-5)) # atol needed on cann 20.3 + self.assertTrue(np.allclose(npu_pred, cpu_pred, + atol=1e-5)) # atol needed on cann 20.3 self.assertTrue(np.allclose(npu_loss, cpu_loss, atol=1e-5)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_multinomial_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_multinomial_op_npu.py index 28833a7dc1d..036e6a0a7f9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_multinomial_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_multinomial_op_npu.py @@ -19,6 +19,7 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core import sys + sys.path.append("..") from op_test import OpTest import numpy as np @@ -48,6 +49,7 @@ def sample_output_two_dimension(out, shape): class TestMultinomialOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "multinomial" @@ -65,8 +67,8 @@ class TestMultinomialOp(OpTest): self.attrs = {"num_samples": 100000, "replacement": True} def test_check_output(self): - self.check_output_customized( - self.verify_output, custom_place=self.place) + self.check_output_customized(self.verify_output, + custom_place=self.place) def sample_output(self, out): return sample_output_one_dimension(out, 4) @@ -76,12 +78,12 @@ class TestMultinomialOp(OpTest): prob = self.input_np / self.input_np.sum(axis=-1, keepdims=True) sample_prob = self.sample_output(np.array(outs[0])) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) class TestMultinomialOp2(TestMultinomialOp): + def init_data(self): # input probability is a matrix self.input_np = np.random.rand(3, 4) @@ -93,6 +95,7 @@ class TestMultinomialOp2(TestMultinomialOp): class TestMultinomialOp3(TestMultinomialOp): + def init_data(self): # replacement is False. number of samples must be less than number of categories. self.input_np = np.random.rand(1000) @@ -108,6 +111,7 @@ class TestMultinomialOp3(TestMultinomialOp): class TestMultinomialApi(unittest.TestCase): + def test_dygraph(self): # input probability is a vector, and replacement is True paddle.set_device('npu:0') @@ -119,8 +123,7 @@ class TestMultinomialApi(unittest.TestCase): sample_prob = sample_output_one_dimension(out.numpy(), 4) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) paddle.enable_static() @@ -135,8 +138,7 @@ class TestMultinomialApi(unittest.TestCase): sample_prob = sample_output_two_dimension(out.numpy(), [3, 4]) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) paddle.enable_static() @@ -181,22 +183,24 @@ class TestMultinomialApi(unittest.TestCase): sample_prob = sample_output_one_dimension(out, 4) prob = x_np / x_np.sum(axis=-1, keepdims=True) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) class TestMultinomialAlias(unittest.TestCase): + def test_alias(self): paddle.set_device('npu:0') x = paddle.rand([4]) out1 = paddle.multinomial(x, num_samples=10, replacement=True) out2 = paddle.tensor.multinomial(x, num_samples=10, replacement=True) - out3 = paddle.tensor.random.multinomial( - x, num_samples=10, replacement=True) + out3 = paddle.tensor.random.multinomial(x, + num_samples=10, + replacement=True) class TestMultinomialError(unittest.TestCase): + def setUp(self): paddle.set_device('npu:0') paddle.disable_static() @@ -205,6 +209,7 @@ class TestMultinomialError(unittest.TestCase): paddle.enable_static() def test_num_sample(self): + def test_num_sample_less_than_0(): x = paddle.rand([4]) out = paddle.multinomial(x, num_samples=-2) @@ -212,6 +217,7 @@ class TestMultinomialError(unittest.TestCase): self.assertRaises(ValueError, test_num_sample_less_than_0) def test_input_probs_dim(self): + def test_dim_larger_than_2(): x = paddle.rand([2, 3, 3]) out = paddle.multinomial(x) diff --git a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py index c6f85c8dee4..c17b8461bd1 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ paddle.enable_static() class TestNearestInterpOp(OpTest): + def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -52,9 +54,10 @@ class TestNearestInterpOp(OpTest): out_h = self.out_h out_w = self.out_w - output_np = nearest_neighbor_interp_np( - input_np, out_h, out_w, self.out_size, self.actual_shape, - self.align_corners, self.data_layout) + output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, + self.out_size, self.actual_shape, + self.align_corners, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -88,6 +91,7 @@ class TestNearestInterpOp(OpTest): class TestNearestNeighborInterpCase1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -98,6 +102,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp): class TestNearestNeighborInterpCase2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -107,11 +112,14 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp): self.align_corners = False def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', in_place=True, max_relative_error=0.006) + self.check_grad_with_place(self.place, ['X'], + 'Out', + in_place=True, + max_relative_error=0.006) class TestNearestNeighborInterpCase3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -122,6 +130,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp): class TestNearestNeighborInterpCase4(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -133,6 +142,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp): class TestNearestNeighborInterpCase5(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -144,6 +154,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp): class TestNearestNeighborInterpCase6(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -155,6 +166,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp): class TestNearestNeighborInterpSame(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -165,6 +177,7 @@ class TestNearestNeighborInterpSame(TestNearestInterpOp): class TestNearestNeighborInterpActualShape(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -176,6 +189,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 4, 4, 5] @@ -188,6 +202,7 @@ class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): class TestNearestInterpOpUint8(OpTest): + def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -196,8 +211,8 @@ class TestNearestInterpOpUint8(OpTest): self.actual_shape = None self.init_test_case() self.op_type = "nearest_interp" - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale > 0: out_h = int(self.input_shape[2] * self.scale) @@ -234,6 +249,7 @@ class TestNearestInterpOpUint8(OpTest): class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -244,6 +260,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -255,6 +272,7 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): class TestNearestNeighborInterpScale1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -266,6 +284,7 @@ class TestNearestNeighborInterpScale1(TestNearestInterpOp): class TestNearestNeighborInterpScale2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 5, 7] @@ -277,6 +296,7 @@ class TestNearestNeighborInterpScale2(TestNearestInterpOp): class TestNearestNeighborInterpScale3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -288,6 +308,7 @@ class TestNearestNeighborInterpScale3(TestNearestInterpOp): class TestNearestInterpOp_attr_tensor(OpTest): + def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -350,6 +371,7 @@ class TestNearestInterpOp_attr_tensor(OpTest): # out_size is a tensor list class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -362,6 +384,7 @@ class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): # out_size is a 1-D tensor class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -375,6 +398,7 @@ class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): # scale is a 1-D tensor class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -387,6 +411,7 @@ class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): class TestNearestAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32") @@ -394,19 +419,27 @@ class TestNearestAPI(unittest.TestCase): dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") - - out1 = fluid.layers.resize_nearest( - y, out_shape=[12, 12], data_format='NHWC', align_corners=False) - out2 = fluid.layers.resize_nearest( - x, out_shape=[12, dim], align_corners=False) - out3 = fluid.layers.resize_nearest( - x, out_shape=shape_tensor, align_corners=False) - out4 = fluid.layers.resize_nearest( - x, out_shape=[4, 4], actual_shape=actual_size, align_corners=False) - out5 = fluid.layers.resize_nearest( - x, scale=scale_tensor, align_corners=False) + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") + + out1 = fluid.layers.resize_nearest(y, + out_shape=[12, 12], + data_format='NHWC', + align_corners=False) + out2 = fluid.layers.resize_nearest(x, + out_shape=[12, dim], + align_corners=False) + out3 = fluid.layers.resize_nearest(x, + out_shape=shape_tensor, + align_corners=False) + out4 = fluid.layers.resize_nearest(x, + out_shape=[4, 4], + actual_shape=actual_size, + align_corners=False) + out5 = fluid.layers.resize_nearest(x, + scale=scale_tensor, + align_corners=False) x_data = np.random.random((2, 3, 6, 6)).astype("float32") dim_data = np.array([12]).astype("int32") @@ -429,8 +462,10 @@ class TestNearestAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = nearest_neighbor_interp_np( - x_data, out_h=12, out_w=12, align_corners=False) + expect_res = nearest_neighbor_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=False) self.assertTrue( np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 1)))) for i in range(len(results) - 1): @@ -438,13 +473,15 @@ class TestNearestAPI(unittest.TestCase): class TestNearestInterpException(unittest.TestCase): + def test_exception(self): input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32") def attr_data_format(): # for 4-D input, data_format can only be NCHW or NHWC - out = fluid.layers.resize_nearest( - input, out_shape=[4, 8], data_format='NDHWC') + out = fluid.layers.resize_nearest(input, + out_shape=[4, 8], + data_format='NDHWC') def attr_scale_type(): out = fluid.layers.resize_nearest(input, scale='scale') diff --git a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_v2_op_npu.py index ec51dcf3f8e..5c5a0538388 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_v2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid.core as core @@ -31,6 +32,7 @@ paddle.enable_static() class TestNearestInterpOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -68,9 +70,11 @@ class TestNearestInterpOp(OpTest): output_h = self.out_h output_w = self.out_w - output_np = nearest_neighbor_interp_np( - input_np, output_h, output_w, scale_h, scale_w, self.out_size, - self.actual_shape, self.align_corners, self.data_layout) + output_np = nearest_neighbor_interp_np(input_np, output_h, output_w, + scale_h, scale_w, self.out_size, + self.actual_shape, + self.align_corners, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -97,17 +101,15 @@ class TestNearestInterpOp(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], - 'Out', - in_place=True, - max_relative_error=0.02) + self.check_grad_with_place(self.place, ['X'], + 'Out', + in_place=True, + max_relative_error=0.02) else: - self.check_grad_with_place( - self.place, ['X'], - 'Out', - in_place=True, - max_relative_error=0.006) + self.check_grad_with_place(self.place, ['X'], + 'Out', + in_place=True, + max_relative_error=0.006) def init_dtype(self): self.dtype = np.float32 @@ -123,11 +125,13 @@ class TestNearestInterpOp(OpTest): class TestNearestNeighborInterpFP16(TestNearestInterpOp): + def init_dtype(self): self.dtype = np.float16 class TestNearestNeighborInterpCase1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -138,6 +142,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp): class TestNearestNeighborInterpCase2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -148,6 +153,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp): class TestNearestNeighborInterpCase3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -158,6 +164,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp): class TestNearestNeighborInterpCase4(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -169,6 +176,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp): class TestNearestNeighborInterpCase5(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -180,6 +188,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp): class TestNearestNeighborInterpCase6(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -191,6 +200,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp): class TestNearestNeighborInterpSame(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -201,6 +211,7 @@ class TestNearestNeighborInterpSame(TestNearestInterpOp): class TestNearestNeighborInterpActualShape(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -212,6 +223,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): class TestNearestNeighborInterpScale1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -223,6 +235,7 @@ class TestNearestNeighborInterpScale1(TestNearestInterpOp): class TestNearestNeighborInterpScale2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 5, 7] @@ -234,6 +247,7 @@ class TestNearestNeighborInterpScale2(TestNearestInterpOp): class TestNearestNeighborInterpScale3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -245,6 +259,7 @@ class TestNearestNeighborInterpScale3(TestNearestInterpOp): class TestNearestInterpOp_attr_tensor(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -323,6 +338,7 @@ class TestNearestInterpOp_attr_tensor(OpTest): # out_size is a tensor list class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -335,6 +351,7 @@ class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): # out_size is a 1-D tensor class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -348,6 +365,7 @@ class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): # scale is a 1-D tensor class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -360,6 +378,7 @@ class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): class TestNearestInterpOpAPI_dy(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_npu(): @@ -371,13 +390,14 @@ class TestNearestInterpOpAPI_dy(unittest.TestCase): scale_np = np.array([2, 2]).astype("int64") input_x = paddle.to_tensor(input_data) scale = paddle.to_tensor(scale_np) - expect_res = nearest_neighbor_interp_np( - input_data, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, - scale_factor=scale, - mode="nearest", - align_corners=False) + expect_res = nearest_neighbor_interp_np(input_data, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + scale_factor=scale, + mode="nearest", + align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py index 8e28b3fe413..3934ea3b9bb 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -25,6 +26,7 @@ from paddle.fluid.tests.unittests.test_norm_op import l2_norm class TestNPUNormOp(OpTest): + def setUp(self): paddle.enable_static() self.set_npu() @@ -54,11 +56,13 @@ class TestNPUNormOp(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.006) class TestNPUNormOp2(TestNPUNormOp): + def init_test_case(self): self.shape = [5, 3, 9, 7] self.axis = 0 @@ -66,6 +70,7 @@ class TestNPUNormOp2(TestNPUNormOp): class TestNPUNormOp3(TestNPUNormOp): + def init_test_case(self): self.shape = [5, 3, 2, 7] self.axis = -1 @@ -75,6 +80,7 @@ class TestNPUNormOp3(TestNPUNormOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class TestNPUNormOp4(TestNPUNormOp): + def init_test_case(self): self.shape = [128, 1024, 14, 14] self.axis = 2 @@ -87,6 +93,7 @@ class TestNPUNormOp4(TestNPUNormOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class TestNPUNormOp5(TestNPUNormOp): + def init_test_case(self): self.shape = [2048, 2048] self.axis = 1 @@ -97,6 +104,7 @@ class TestNPUNormOp5(TestNPUNormOp): class API_NormTest(unittest.TestCase): + def test_errors(self): paddle.enable_static() with fluid.program_guard(fluid.Program()): @@ -109,6 +117,7 @@ class API_NormTest(unittest.TestCase): class TestNPUNormOpFP16(TestNPUNormOp): + def set_npu(self): self.__class__.use_npu = True self.__class__.no_need_check_grad = True diff --git a/python/paddle/fluid/tests/unittests/npu/test_npu_place.py b/python/paddle/fluid/tests/unittests/npu/test_npu_place.py index 91e0c29e106..2d0432204c8 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_npu_place.py +++ b/python/paddle/fluid/tests/unittests/npu/test_npu_place.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestNpuPlace(unittest.TestCase): + def test(self): p = core.Place() p.set_place(paddle.NPUPlace(0)) @@ -32,6 +33,7 @@ class TestNpuPlace(unittest.TestCase): class TestNpuPlaceError(unittest.TestCase): + def test_static(self): # NPU is not supported in ParallelExecutor prog = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py index c92fffb2d26..0c77eb8217b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys import unittest import numpy as np + sys.path.append("..") from op_test import OpTest @@ -29,6 +30,7 @@ paddle.enable_static() class TestOneHotOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -57,6 +59,7 @@ class TestOneHotOp(OpTest): class TestOneHotOp_attr(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -84,6 +87,7 @@ class TestOneHotOp_attr(OpTest): class TestOneHotOp_default_dtype(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -112,6 +116,7 @@ class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype_attr(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -139,6 +144,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_out_of_range(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -162,6 +168,7 @@ class TestOneHotOp_out_of_range(OpTest): class TestOneHotOp_dtype_int64(OpTest): + def set_npu(self): self.__class__.use_npu = True diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py index e511286cc2d..d250dbfd2ba 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys import unittest import numpy as np + sys.path.append("..") from op_test import OpTest @@ -29,6 +30,7 @@ paddle.enable_static() class TestOneHotOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -56,6 +58,7 @@ class TestOneHotOp(OpTest): class TestOneHotOp_non_lod(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -79,6 +82,7 @@ class TestOneHotOp_non_lod(OpTest): class TestOneHotOp_attr(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -106,6 +110,7 @@ class TestOneHotOp_attr(OpTest): class TestOneHotOp_default_dtype(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -133,6 +138,7 @@ class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype_attr(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -160,6 +166,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_out_of_range(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -182,6 +189,7 @@ class TestOneHotOp_out_of_range(OpTest): class TestOneHotOp_dtype_int64(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -204,6 +212,7 @@ class TestOneHotOp_dtype_int64(OpTest): class TestOneHotOpApi(unittest.TestCase): + def test_api(self): depth = 10 self._run(depth) @@ -230,7 +239,9 @@ class TestOneHotOpApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'label': label_data, }, + ret = exe.run(feed={ + 'label': label_data, + }, fetch_list=[one_hot_label], return_numpy=False) diff --git a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py index a7ca4edc524..5560b8bbd14 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py @@ -15,6 +15,7 @@ import sys import unittest import numpy as np + sys.path.append("..") import paddle @@ -25,6 +26,7 @@ paddle.enable_static() class TestPnormOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -51,8 +53,9 @@ class TestPnormOp(OpTest): self.check_output_with_place(paddle.NPUPlace(0)) def test_check_grad(self): - self.check_grad_with_place( - paddle.NPUPlace(0), ['X'], 'Out', user_defined_grads=self.gradient) + self.check_grad_with_place(paddle.NPUPlace(0), ['X'], + 'Out', + user_defined_grads=self.gradient) def init_test_case(self): self.shape = [2, 3, 4, 5] @@ -95,6 +98,7 @@ class TestPnormOp(OpTest): class TestPnormOp2(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -105,6 +109,7 @@ class TestPnormOp2(TestPnormOp): class TestPnormOp3(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -115,6 +120,7 @@ class TestPnormOp3(TestPnormOp): class TestPnormOp4(TestPnormOp3): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -125,6 +131,7 @@ class TestPnormOp4(TestPnormOp3): class TestPnormOp5(TestPnormOp3): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -135,6 +142,7 @@ class TestPnormOp5(TestPnormOp3): class TestPnormOp6(TestPnormOp3): + def init_test_case(self): self.shape = [2, 3, 4, 5] self.axis = 1 @@ -145,26 +153,31 @@ class TestPnormOp6(TestPnormOp3): class TestPnormOpfp16(TestPnormOp): + def init_dtype(self): self.dtype = "float16" class TestPnormOp2fp16(TestPnormOp2): + def init_dtype(self): self.dtype = "float16" class TestPnormOp3fp16(TestPnormOp3): + def init_dtype(self): self.dtype = "float16" class TestPnormOp4fp16(TestPnormOp4): + def init_dtype(self): self.dtype = "float16" class TestPnormOp5fp16(TestPnormOp5): + def init_dtype(self): self.dtype = "float16" diff --git a/python/paddle/fluid/tests/unittests/npu/test_pad3d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pad3d_op_npu.py index 234ceb2f0b7..12ade62af4d 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_pad3d_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_pad3d_op_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") import op_test import paddle @@ -26,6 +27,7 @@ import paddle.fluid as fluid class TestPad3dNPUOp(op_test.OpTest): + def setUp(self): paddle.enable_static() self.__class__.use_npu = True @@ -42,11 +44,11 @@ class TestPad3dNPUOp(op_test.OpTest): self.attrs = {} if self.variable_paddings: self.attrs['paddings'] = [] - self.inputs['Paddings'] = np.array(self.paddings).flatten().astype( - "int32") + self.inputs['Paddings'] = np.array( + self.paddings).flatten().astype("int32") else: - self.attrs['paddings'] = np.array(self.paddings).flatten().astype( - "int32") + self.attrs['paddings'] = np.array( + self.paddings).flatten().astype("int32") self.attrs['value'] = self.value self.attrs['mode'] = self.mode self.attrs['data_format'] = self.data_format @@ -87,6 +89,7 @@ class TestPad3dNPUOp(op_test.OpTest): class TestCase1(TestPad3dNPUOp): + def initTestCase(self): self.shape = (3, 4, 5, 6, 7) self.paddings = [0, 1, 2, 3, 4, 5] @@ -99,6 +102,7 @@ class TestCase1(TestPad3dNPUOp): class TestCase2(TestPad3dNPUOp): + def initTestCase(self): self.shape = (4, 5, 6, 7, 8) self.paddings = [1, 1, 1, 1, 1, 1] @@ -107,6 +111,7 @@ class TestCase2(TestPad3dNPUOp): class TestPadAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -163,8 +168,8 @@ class TestPadAPI(unittest.TestCase): def test_static(self): paddle.enable_static() - self.place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( - ) else fluid.CPUPlace() + self.place = fluid.NPUPlace( + 0) if fluid.core.is_compiled_with_npu() else fluid.CPUPlace() with program_guard(Program(), Program()): input_shape = (1, 2, 3, 4, 5) pad = [1, 2, 1, 1, 3, 4] @@ -187,10 +192,16 @@ class TestPadAPI(unittest.TestCase): feed={"x": input_data}, fetch_list=[result1, result2]) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCDHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NDHWC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCDHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NDHWC") self.assertTrue(np.allclose(fetches[0], np_out1)) self.assertTrue(np.allclose(fetches[1], np_out2)) @@ -205,10 +216,16 @@ class TestPadAPI(unittest.TestCase): input_data = np.random.rand(*input_shape).astype(np.float32) tensor_data = paddle.to_tensor(input_data) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCDHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NDHWC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCDHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NDHWC") y1 = F.pad(tensor_data, pad=pad, @@ -235,10 +252,16 @@ class TestPadAPI(unittest.TestCase): input_data = np.random.rand(*input_shape).astype(np.float32) tensor_data = paddle.to_tensor(input_data) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NHWC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NHWC") y1 = F.pad(tensor_data, pad=pad, @@ -265,10 +288,16 @@ class TestPadAPI(unittest.TestCase): input_data = np.random.rand(*input_shape).astype(np.float32) tensor_data = paddle.to_tensor(input_data) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCL") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NLC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCL") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NLC") y1 = F.pad(tensor_data, pad=pad, @@ -286,6 +315,7 @@ class TestPadAPI(unittest.TestCase): class TestPad1dAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -318,26 +348,30 @@ class TestPad1dAPI(unittest.TestCase): input_data = np.random.rand(*input_shape).astype(np.float32) pad_constant = nn.Pad1D(padding=pad, mode="constant", value=value) - pad_constant_int = nn.Pad1D( - padding=pad_int, mode="constant", value=value) + pad_constant_int = nn.Pad1D(padding=pad_int, + mode="constant", + value=value) data = paddle.to_tensor(input_data) output = pad_constant(data) - np_out = self._get_numpy_out( - input_data, pad, "constant", value=value, data_format="NCL") + np_out = self._get_numpy_out(input_data, + pad, + "constant", + value=value, + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant_int(data) - np_out = self._get_numpy_out( - input_data, [pad_int] * 2, - "constant", - value=value, - data_format="NCL") + np_out = self._get_numpy_out(input_data, [pad_int] * 2, + "constant", + value=value, + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) class TestPad2dAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -372,26 +406,30 @@ class TestPad2dAPI(unittest.TestCase): input_data = np.random.rand(*input_shape).astype(np.float32) pad_constant = nn.Pad2D(padding=pad, mode="constant", value=value) - pad_constant_int = nn.Pad2D( - padding=pad_int, mode="constant", value=value) + pad_constant_int = nn.Pad2D(padding=pad_int, + mode="constant", + value=value) data = paddle.to_tensor(input_data) output = pad_constant(data) - np_out = self._get_numpy_out( - input_data, pad, "constant", value=value, data_format="NCHW") + np_out = self._get_numpy_out(input_data, + pad, + "constant", + value=value, + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant_int(data) - np_out = self._get_numpy_out( - input_data, [pad_int] * 4, - "constant", - value=value, - data_format="NCHW") + np_out = self._get_numpy_out(input_data, [pad_int] * 4, + "constant", + value=value, + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) class TestPad3dAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -428,27 +466,32 @@ class TestPad3dAPI(unittest.TestCase): input_data = np.random.rand(*input_shape).astype(np.float32) pad_constant = nn.Pad3D(padding=pad, mode="constant", value=value) - pad_constant_int = nn.Pad3D( - padding=pad_int, mode="constant", value=value) + pad_constant_int = nn.Pad3D(padding=pad_int, + mode="constant", + value=value) data = paddle.to_tensor(input_data) output = pad_constant(data) - np_out = self._get_numpy_out( - input_data, pad, "constant", value=value, data_format="NCDHW") + np_out = self._get_numpy_out(input_data, + pad, + "constant", + value=value, + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant_int(data) - np_out = self._get_numpy_out( - input_data, [pad_int] * 6, - "constant", - value=value, - data_format="NCDHW") + np_out = self._get_numpy_out(input_data, [pad_int] * 6, + "constant", + value=value, + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) class TestPad3dOpNpuError(unittest.TestCase): + def test_errors(self): + def test_value(): input_shape = (1, 2, 3, 4, 5) data = np.random.rand(*input_shape).astype(np.float32) @@ -495,12 +538,14 @@ class TestPad3dOpNpuError(unittest.TestCase): class TestPadDataformatError(unittest.TestCase): + def test_errors(self): + def test_ncl(): input_shape = (1, 2, 3, 4) pad = paddle.to_tensor(np.array([2, 1, 2, 1]).astype('int32')) - data = np.arange( - np.prod(input_shape), dtype=np.float64).reshape(input_shape) + 1 + data = np.arange(np.prod(input_shape), + dtype=np.float64).reshape(input_shape) + 1 my_pad = nn.Pad1D(padding=pad, mode="replicate", data_format="NCL") data = paddle.to_tensor(data) result = my_pad(data) @@ -508,8 +553,8 @@ class TestPadDataformatError(unittest.TestCase): def test_nchw(): input_shape = (1, 2, 4) pad = paddle.to_tensor(np.array([2, 1, 2, 1]).astype('int32')) - data = np.arange( - np.prod(input_shape), dtype=np.float64).reshape(input_shape) + 1 + data = np.arange(np.prod(input_shape), + dtype=np.float64).reshape(input_shape) + 1 my_pad = nn.Pad1D(padding=pad, mode="replicate", data_format="NCHW") data = paddle.to_tensor(data) result = my_pad(data) @@ -517,10 +562,11 @@ class TestPadDataformatError(unittest.TestCase): def test_ncdhw(): input_shape = (1, 2, 3, 4) pad = paddle.to_tensor(np.array([2, 1, 2, 1]).astype('int32')) - data = np.arange( - np.prod(input_shape), dtype=np.float64).reshape(input_shape) + 1 - my_pad = nn.Pad1D( - padding=pad, mode="replicate", data_format="NCDHW") + data = np.arange(np.prod(input_shape), + dtype=np.float64).reshape(input_shape) + 1 + my_pad = nn.Pad1D(padding=pad, + mode="replicate", + data_format="NCDHW") data = paddle.to_tensor(data) result = my_pad(data) diff --git a/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py index d1d2e8b3467..13c99f993f9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,21 +29,25 @@ paddle.enable_static() class TestPadOp(OpTest): + def setUp(self): self.op_type = "pad" self.set_npu() self.init_dtype() self.initTestCase() - self.inputs = {'X': np.random.random(self.shape).astype(self.dtype), } + self.inputs = { + 'X': np.random.random(self.shape).astype(self.dtype), + } self.attrs = {} self.attrs['paddings'] = np.array(self.paddings).flatten() self.attrs['pad_value'] = self.pad_value self.outputs = { - 'Out': np.pad(self.inputs['X'], - self.paddings, - mode='constant', - constant_values=self.pad_value) + 'Out': + np.pad(self.inputs['X'], + self.paddings, + mode='constant', + constant_values=self.pad_value) } def test_check_output(self): @@ -50,8 +55,9 @@ class TestPadOp(OpTest): def test_check_grad_normal(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.6) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.6) else: self.check_grad_with_place(self.place, ['X'], 'Out') @@ -69,6 +75,7 @@ class TestPadOp(OpTest): class TestCase1(TestPadOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.paddings = [(0, 1), (2, 3), (2, 1), (1, 1)] @@ -76,6 +83,7 @@ class TestCase1(TestPadOp): class TestCase2(TestPadOp): + def initTestCase(self): self.shape = (5, 5, 5) self.paddings = [(0, 0), (0, 0), (1, 2)] @@ -83,6 +91,7 @@ class TestCase2(TestPadOp): class TestCase3(TestPadOp): + def initTestCase(self): self.shape = (100) self.paddings = [(0, 1)] @@ -93,7 +102,9 @@ class TestCase3(TestPadOp): def create_test_fp16(parent): + class TestPadFp16(parent): + def init_dtype(self): self.dtype = np.float16 @@ -109,6 +120,7 @@ create_test_fp16(TestCase3) class TestPadOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.random((2, 2)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/npu/test_parallel_dygraph_mnist_npu.py b/python/paddle/fluid/tests/unittests/npu/test_parallel_dygraph_mnist_npu.py index 1d09bd93e9b..76980bf8478 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_parallel_dygraph_mnist_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_parallel_dygraph_mnist_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import os import sys import unittest + sys.path.append("..") from test_dist_base import TestDistBase @@ -51,14 +52,19 @@ need_envs = { os.getenv("ASCEND_AICPU_PATH", "/usr/local/Ascend/nnae/latest"), "ASCEND_OPP_PATH": os.getenv("ASCEND_OPP_PATH", "/usr/local/Ascend/nnae/latest/opp"), - "HCCL_CONNECT_TIMEOUT": "7200", - "HCCL_WHITELIST_DISABLE": "1", - "HCCL_SECURITY_MODE": "1", - "RANK_TABLE_FILE": "rank_table_file.json", + "HCCL_CONNECT_TIMEOUT": + "7200", + "HCCL_WHITELIST_DISABLE": + "1", + "HCCL_SECURITY_MODE": + "1", + "RANK_TABLE_FILE": + "rank_table_file.json", } class TestParallelDygraphMnistNPU(TestDistBase): + def _setup_config(self): self._sync_mode = False self._hccl_mode = True @@ -78,6 +84,7 @@ class TestParallelDygraphMnistNPU(TestDistBase): class TestFleetDygraphMnistNPU(TestParallelDygraphMnistNPU): + def _setup_config(self): self._sync_mode = False self._hccl_mode = True diff --git a/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py index 4822abc3b25..3e7d1fd80ee 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import sys import unittest import numpy as np + sys.path.append("..") import paddle @@ -30,7 +31,9 @@ paddle.enable_static() def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.paddings = [0, 0] self.padding_algorithm = "SAME" @@ -41,7 +44,9 @@ def create_test_padding_SAME_class(parent): def create_test_use_ceil_class(parent): + class TestPool2DUseCeilCase(parent): + def init_ceil_mode(self): self.ceil_mode = True @@ -51,7 +56,9 @@ def create_test_use_ceil_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.paddings = [1, 1] self.padding_algorithm = "VALID" @@ -62,7 +69,9 @@ def create_test_padding_VALID_class(parent): def create_test_fp16_class(parent): + class TestFp16Case(parent): + def init_kernel_type(self): self.use_cudnn = False self.dtype = np.float16 @@ -89,8 +98,8 @@ def pool2d_backward_navie(x, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -171,19 +180,19 @@ def pool2d_backward_navie(x, if pool_type == 'avg': if (exclusive or adaptive): - field_size = (in_h_end - in_h_start) * ( - in_w_end - in_w_start) - x_grad[:, :, in_h_start:in_h_end, in_w_start: - in_w_end] += 1 / field_size + field_size = (in_h_end - in_h_start) * (in_w_end - + in_w_start) + x_grad[:, :, in_h_start:in_h_end, + in_w_start:in_w_end] += 1 / field_size elif pool_type == 'max': for n in range(N): for c in range(C): - idx = np.argmax(x[n, c, in_h_start:in_h_end, in_w_start: - in_w_end].flatten()) + idx = np.argmax(x[n, c, in_h_start:in_h_end, + in_w_start:in_w_end].flatten()) idx_h = idx // (in_w_end - in_w_start) idx_w = idx % (in_w_end - in_w_start) - x_grad[n, c, in_h_start + idx_h, in_w_start + - idx_w] += 1 + x_grad[n, c, in_h_start + idx_h, + in_w_start + idx_w] += 1 if data_format == "NHWC": x_grad = x_grad.transpose([0, 2, 3, 1]) @@ -191,6 +200,7 @@ def pool2d_backward_navie(x, class TestPool2D_Op(OpTest): + def setUp(self): self.set_npu() self.op_type = "pool2d" @@ -210,12 +220,14 @@ class TestPool2D_Op(OpTest): input = np.random.random(self.shape).astype(self.dtype) if self.pool_type == "max": - input = np.array([x for x in range(np.prod(self.shape))]).reshape( - self.shape).astype(self.dtype) - output = pool2D_forward_naive( - input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, self.data_format, - self.pool_type, self.padding_algorithm).astype(self.dtype) + input = np.array([x for x in range(np.prod(self.shape)) + ]).reshape(self.shape).astype(self.dtype) + output = pool2D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool, + self.ceil_mode, self.exclusive, + self.adaptive, self.data_format, + self.pool_type, + self.padding_algorithm).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { @@ -278,28 +290,27 @@ class TestPool2D_Op(OpTest): self.check_output_with_place(fluid.NPUPlace(0), atol=1e-3) def test_check_grad(self): - x_grad = pool2d_backward_navie( - self.inputs["X"], - ksize=self.ksize, - strides=self.strides, - paddings=self.paddings, - global_pool=self.global_pool, - ceil_mode=False, - exclusive=self.exclusive, - adaptive=self.adaptive, - data_format=self.data_format, - pool_type=self.pool_type, - padding_algorithm=self.padding_algorithm) + x_grad = pool2d_backward_navie(self.inputs["X"], + ksize=self.ksize, + strides=self.strides, + paddings=self.paddings, + global_pool=self.global_pool, + ceil_mode=False, + exclusive=self.exclusive, + adaptive=self.adaptive, + data_format=self.data_format, + pool_type=self.pool_type, + padding_algorithm=self.padding_algorithm) x_grad = x_grad / np.prod(self.outputs['Out'].shape) - self.check_grad_with_place( - fluid.NPUPlace(0), - set(['X']), - 'Out', - max_relative_error=0.06, - user_defined_grads=[x_grad]) + self.check_grad_with_place(fluid.NPUPlace(0), + set(['X']), + 'Out', + max_relative_error=0.06, + user_defined_grads=[x_grad]) class TestCase1(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -319,6 +330,7 @@ class TestCase1(TestPool2D_Op): class TestCase2(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -338,29 +350,34 @@ class TestCase2(TestPool2D_Op): class TestCase3(TestPool2D_Op): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase4(TestCase1): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase5(TestCase2): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestAvgInclude(TestCase2): + def init_exclusive(self): self.exclusive = False class TestAvgPoolAdaptive(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -374,6 +391,7 @@ class TestAvgPoolAdaptive(TestCase1): class TestAvgPoolAdaptiveAsyOutSize(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -390,6 +408,7 @@ class TestAvgPoolAdaptiveAsyOutSize(TestCase1): #-------test pool2d with asymmetric padding----- class TestPool2D_AsyPadding(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -400,6 +419,7 @@ class TestPool2D_AsyPadding(TestPool2D_Op): class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -410,6 +430,7 @@ class TestCase1_AsyPadding(TestCase1): class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -420,6 +441,7 @@ class TestCase2_AsyPadding(TestCase2): class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -430,6 +452,7 @@ class TestCase3_AsyPadding(TestCase3): class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -440,6 +463,7 @@ class TestCase4_AsyPadding(TestCase4): class TestCase5_AsyPadding((TestCase5)): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -450,6 +474,7 @@ class TestCase5_AsyPadding((TestCase5)): class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): self.exclusive = False @@ -463,6 +488,7 @@ class TestAvgInclude_AsyPadding(TestCase2): class TestAvgPoolAdaptive_AsyPadding(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -477,6 +503,7 @@ class TestAvgPoolAdaptive_AsyPadding(TestCase1): #----------- test channel_last -------------- class TestPool2D_channel_last(TestPool2D_Op): + def init_data_format(self): self.data_format = "NHWC" @@ -485,6 +512,7 @@ class TestPool2D_channel_last(TestPool2D_Op): class TestCase1_channel_last(TestCase1): + def init_data_format(self): self.data_format = "NHWC" @@ -493,6 +521,7 @@ class TestCase1_channel_last(TestCase1): class TestCase2_channel_last(TestCase2): + def init_data_format(self): self.data_format = "NHWC" @@ -501,6 +530,7 @@ class TestCase2_channel_last(TestCase2): class TestCase3_channel_last(TestCase3): + def init_data_format(self): self.data_format = "NHWC" @@ -509,6 +539,7 @@ class TestCase3_channel_last(TestCase3): class TestCase4_channel_last(TestCase4): + def init_data_format(self): self.data_format = "NHWC" @@ -517,6 +548,7 @@ class TestCase4_channel_last(TestCase4): class TestCase5_channel_last(TestCase5): + def init_data_format(self): self.data_format = "NHWC" @@ -525,11 +557,13 @@ class TestCase5_channel_last(TestCase5): class TestCase5_Max(TestCase2): + def init_pool_type(self): self.pool_type = "max" class TestCase5_channel_last_Max(TestCase5_Max): + def init_data_format(self): self.data_format = "NHWC" @@ -538,11 +572,13 @@ class TestCase5_channel_last_Max(TestCase5_Max): class TestAvgInclude_channel_last(TestCase2_channel_last): + def init_exclusive(self): self.exclusive = False class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): + def init_adaptive(self): self.adaptive = True @@ -555,6 +591,7 @@ class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -563,6 +600,7 @@ class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -571,6 +609,7 @@ class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -579,6 +618,7 @@ class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -587,6 +627,7 @@ class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -595,6 +636,7 @@ class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -603,6 +645,7 @@ class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -610,8 +653,9 @@ class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): self.shape = [2, 7, 7, 3] -class TestAvgPoolAdaptive_AsyPadding_channel_last( - TestAvgPoolAdaptive_AsyPadding): +class TestAvgPoolAdaptive_AsyPadding_channel_last(TestAvgPoolAdaptive_AsyPadding + ): + def init_data_format(self): self.data_format = "NHWC" @@ -620,6 +664,7 @@ class TestAvgPoolAdaptive_AsyPadding_channel_last( class TestCase1_strides(TestCase1): + def init_test_case(self): self.ksize = [3, 3] # fixme: CANN AvgPoolGradV3 dose not support asymmetric strides diff --git a/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py index a188953d70c..6274ba53781 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestPow(OpTest): + def setUp(self): self.set_npu() self.op_type = "pow" @@ -55,6 +57,7 @@ class TestPow(OpTest): class TestPowFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "pow" @@ -81,6 +84,7 @@ class TestPowFp16(OpTest): class TestPowNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -95,8 +99,9 @@ class TestPowNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -120,12 +125,13 @@ class TestPowNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_prior_box_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_prior_box_op_npu.py index 47b78d30820..cfd78c2b05b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_prior_box_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_prior_box_op_npu.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestNPUPriorBox(OpTest): + def setUp(self): self.op_type = "prior_box" self.set_npu() @@ -93,8 +94,8 @@ class TestNPUPriorBox(OpTest): self.flip = True self.set_min_max_aspect_ratios_order() self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0] - self.aspect_ratios = np.array( - self.aspect_ratios, dtype=np.float).flatten() + self.aspect_ratios = np.array(self.aspect_ratios, + dtype=np.float).flatten() self.variances = [0.1, 0.1, 0.2, 0.2] self.variances = np.array(self.variances, dtype=np.float).flatten() @@ -132,22 +133,22 @@ class TestNPUPriorBox(OpTest): ar = self.real_aspect_ratios[r] c_w = min_size * math.sqrt(ar) / 2 c_h = (min_size / math.sqrt(ar)) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 if len(self.max_sizes) > 0: max_size = self.max_sizes[s] # second prior: aspect_ratio = 1, c_w = c_h = math.sqrt(min_size * max_size) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 else: c_w = c_h = min_size / 2. @@ -160,11 +161,11 @@ class TestNPUPriorBox(OpTest): max_size = self.max_sizes[s] # second prior: aspect_ratio = 1, c_w = c_h = math.sqrt(min_size * max_size) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 # rest of priors @@ -174,29 +175,31 @@ class TestNPUPriorBox(OpTest): continue c_w = min_size * math.sqrt(ar) / 2 c_h = (min_size / math.sqrt(ar)) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 # clip the prior's coordidate such that it is within[0, 1] if self.clip: out_boxes = np.clip(out_boxes, 0.0, 1.0) # set the variance. - out_var = np.tile(self.variances, (self.layer_h, self.layer_w, - self.num_priors, 1)) + out_var = np.tile(self.variances, + (self.layer_h, self.layer_w, self.num_priors, 1)) self.out_boxes = out_boxes.astype('float32') self.out_var = out_var.astype('float32') class TestNPUPriorBoxWithoutMaxSize(TestNPUPriorBox): + def set_max_sizes(self): self.max_sizes = [] class TestNPUPriorBoxWithoutSpecifiedOutOrder(TestNPUPriorBox): + def set_min_max_aspect_ratios_order(self): self.min_max_aspect_ratios_order = False self.atol = 1e-1 diff --git a/python/paddle/fluid/tests/unittests/npu/test_randperm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_randperm_op_npu.py index 4ec353c55de..02b2b2caf86 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_randperm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_randperm_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -58,31 +59,36 @@ class TestRandpermOp(OpTest): def verify_output(self, outs): out_np = np.array(outs[0]) - self.assertTrue( - check_randperm_out(self.n, out_np), msg=error_msg(out_np)) + self.assertTrue(check_randperm_out(self.n, out_np), + msg=error_msg(out_np)) class TestRandpermOpN(TestRandpermOp): + def init_attrs(self): self.n = 10000 class TestRandpermOpInt32(TestRandpermOp): + def init_attrs(self): self.dtype = "int32" class TestRandpermOpFloat32(TestRandpermOp): + def init_attrs(self): self.dtype = "float32" class TestRandpermOpFloat64(TestRandpermOp): + def init_attrs(self): self.dtype = "float64" class TestRandpermOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): self.assertRaises(ValueError, paddle.randperm, -3) @@ -90,6 +96,7 @@ class TestRandpermOpError(unittest.TestCase): class TestRandpermAPI(unittest.TestCase): + def test_out(self): n = 10 place = paddle.NPUPlace(0) @@ -107,14 +114,15 @@ class TestRandpermAPI(unittest.TestCase): class TestRandpermImperative(unittest.TestCase): + def test_out(self): paddle.disable_static(paddle.NPUPlace(0)) n = 10 for dtype in ['int32', np.int64, 'float32', 'float64']: data_p = paddle.randperm(n, dtype) data_np = data_p.numpy() - self.assertTrue( - check_randperm_out(n, data_np), msg=error_msg(data_np)) + self.assertTrue(check_randperm_out(n, data_np), + msg=error_msg(data_np)) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/npu/test_range_npu.py b/python/paddle/fluid/tests/unittests/npu/test_range_npu.py index c6700a19c52..d9663a3a151 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_range_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_range_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,6 +26,7 @@ paddle.enable_static() class TestRangeOp(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -40,8 +42,9 @@ class TestRangeOp(OpTest): } self.outputs = { - 'Out': np.arange(self.case[0], self.case[1], - self.case[2]).astype(self.dtype) + 'Out': + np.arange(self.case[0], self.case[1], + self.case[2]).astype(self.dtype) } def init_config(self): @@ -53,42 +56,49 @@ class TestRangeOp(OpTest): class TestFloatRangeOpCase0(TestRangeOp): + def init_config(self): self.dtype = np.float32 self.case = (0, 5, 1) class TestInt32RangeOpCase0(TestRangeOp): + def init_config(self): self.dtype = np.int32 self.case = (0, 5, 2) class TestInt32RangeOpCase1(TestRangeOp): + def init_config(self): self.dtype = np.int32 self.case = (10, 1, -2) class TestInt32RangeOpCase2(TestRangeOp): + def init_config(self): self.dtype = np.int32 self.case = (-1, -10, -2) class TestInt64RangeOpCase0(TestRangeOp): + def init_config(self): self.dtype = np.int64 self.case = (0, 5, 2) class TestInt64RangeOpCase1(TestRangeOp): + def init_config(self): self.dtype = np.int64 self.case = (10, 1, -2) class TestInt64RangeOpCase2(TestRangeOp): + def init_config(self): self.dtype = np.int64 self.case = (-1, -10, -2) diff --git a/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py index 899d4ef43bd..87e1c488024 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,13 +17,16 @@ from __future__ import print_function, division import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle + paddle.enable_static() class TestNPUReciprocal(OpTest): + def setUp(self): self.op_type = "reciprocal" self.set_npu() @@ -40,8 +43,9 @@ class TestNPUReciprocal(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.01) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.01) def set_npu(self): self.__class__.use_npu = True @@ -52,6 +56,7 @@ class TestNPUReciprocal(OpTest): class TestNPUReciprocalFp64(TestNPUReciprocal): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -63,6 +68,7 @@ class TestNPUReciprocalFp64(TestNPUReciprocal): @skip_check_grad_ci( reason="The backward test is not supported for float16 type on NPU.") class TestNPUReciprocalFp16(TestNPUReciprocal): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_any_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_any_op_npu.py index 1a30d139528..ae871b09989 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_any_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_any_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -29,6 +30,7 @@ paddle.enable_static() class TestAny8DOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "reduce_any" @@ -48,6 +50,7 @@ class TestAny8DOp(OpTest): class TestAnyOpWithDim(OpTest): + def setUp(self): self.set_npu() self.op_type = "reduce_any" @@ -64,6 +67,7 @@ class TestAnyOpWithDim(OpTest): class TestAny8DOpWithDim(OpTest): + def setUp(self): self.set_npu() self.op_type = "reduce_any" @@ -83,6 +87,7 @@ class TestAny8DOpWithDim(OpTest): class TestAnyOpWithKeepDim(OpTest): + def setUp(self): self.set_npu() self.op_type = "reduce_any" @@ -90,8 +95,8 @@ class TestAnyOpWithKeepDim(OpTest): self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} self.attrs = {'dim': (1), 'keep_dim': True} self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].any(axis=self.attrs['dim']), axis=1) + 'Out': + np.expand_dims(self.inputs['X'].any(axis=self.attrs['dim']), axis=1) } def set_npu(self): @@ -102,6 +107,7 @@ class TestAnyOpWithKeepDim(OpTest): class TestAny8DOpWithKeepDim(OpTest): + def setUp(self): self.set_npu() self.op_type = "reduce_any" @@ -112,8 +118,8 @@ class TestAny8DOpWithKeepDim(OpTest): } self.attrs = {'dim': (1), 'keep_dim': True} self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].any(axis=self.attrs['dim']), axis=1) + 'Out': + np.expand_dims(self.inputs['X'].any(axis=self.attrs['dim']), axis=1) } def set_npu(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py index 68a28ea72e1..64f66476542 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py @@ -194,8 +194,9 @@ class TestReduceMaxOpWithOutDtype_fp16(TestNPUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.FP16) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float16) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float16) } def test_check_output(self): @@ -219,8 +220,9 @@ class TestReduceMaxOpWithOutDtype_fp32(TestNPUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float32) } @@ -241,8 +243,9 @@ class TestReduceMaxOpWithOutDtype_fp64(TestNPUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.FP64) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float64) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float64) } @@ -263,8 +266,9 @@ class TestReduceMaxOpWithOutDtype_fp32_2(TestNPUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): @@ -288,8 +292,9 @@ class TestReduceMaxOpInt64(TestNPUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.INT64) } self.outputs = { - 'Out': self.inputs['X'].max( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_mean_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_mean_op_npu.py index ed27c335a4e..3a2f70f0d37 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_mean_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_mean_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestMeanOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -43,6 +45,7 @@ class TestMeanOp(OpTest): class TestMeanOp5D(TestMeanOp): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -53,6 +56,7 @@ class TestMeanOp5D(TestMeanOp): class TestMeanOp6D(TestMeanOp): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -63,6 +67,7 @@ class TestMeanOp6D(TestMeanOp): class TestMeanOp8D(TestMeanOp): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -74,6 +79,7 @@ class TestMeanOp8D(TestMeanOp): class Test1DReduce(TestMeanOp): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -82,6 +88,7 @@ class Test1DReduce(TestMeanOp): class Test2DReduce0(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -91,6 +98,7 @@ class Test2DReduce0(Test1DReduce): class Test2DReduce1(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -102,6 +110,7 @@ class Test2DReduce1(Test1DReduce): class Test3DReduce0(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -113,6 +122,7 @@ class Test3DReduce0(Test1DReduce): class Test3DReduce1(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -124,6 +134,7 @@ class Test3DReduce1(Test1DReduce): class Test3DReduce2(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -135,6 +146,7 @@ class Test3DReduce2(Test1DReduce): class Test3DReduce3(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -146,18 +158,21 @@ class Test3DReduce3(Test1DReduce): class TestKeepDimReduce(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")} self.attrs = {'dim': [1], 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].mean(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } class TestKeepDim8DReduce(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" @@ -166,12 +181,14 @@ class TestKeepDim8DReduce(Test1DReduce): } self.attrs = {'dim': (3, 4, 5), 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].mean(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } class TestReduceAll(Test1DReduce): + def setUp(self): self.set_npu() self.op_type = "reduce_mean" diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_min_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_min_op_npu.py index bbf23e1be3e..85d1fe94781 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_min_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_min_op_npu.py @@ -194,8 +194,9 @@ class TestReduceMinOpWithOutDtype_fp16(TestNPUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.FP16) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float16) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float16) } def test_check_output(self): @@ -219,8 +220,9 @@ class TestReduceMinOpWithOutDtype_fp32(TestNPUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float32) } @@ -241,8 +243,9 @@ class TestReduceMinOpWithOutDtype_fp64(TestNPUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.FP64) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float64) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float64) } @@ -263,8 +266,9 @@ class TestReduceMinOpWithOutDtype_fp32_2(TestNPUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.FP32) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): @@ -288,8 +292,9 @@ class TestReduceMinOpInt64(TestNPUReduceMinOp): 'out_dtype': int(core.VarDesc.VarType.INT64) } self.outputs = { - 'Out': self.inputs['X'].min( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].min(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py index 59f181be5ed..c32e105b02a 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py @@ -27,6 +27,7 @@ paddle.enable_static() class TestNPUReduceProd(OpTest): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -50,6 +51,7 @@ class TestNPUReduceProd(OpTest): class TestNPUReduceProd2(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -61,6 +63,7 @@ class TestNPUReduceProd2(TestNPUReduceProd): class TestNPUReduceProd3(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -72,6 +75,7 @@ class TestNPUReduceProd3(TestNPUReduceProd): class TestNPUReduceProd6D(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -87,6 +91,7 @@ class TestNPUReduceProd6D(TestNPUReduceProd): class TestNPUReduceProd8D(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -102,6 +107,7 @@ class TestNPUReduceProd8D(TestNPUReduceProd): class TestReduceAll(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -113,6 +119,7 @@ class TestReduceAll(TestNPUReduceProd): class TestNPUReduceProdWithOutDtype_bool(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -127,6 +134,7 @@ class TestNPUReduceProdWithOutDtype_bool(TestNPUReduceProd): class TestNPUReduceProdWithOutDtype_int16(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -135,12 +143,14 @@ class TestNPUReduceProdWithOutDtype_int16(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.INT16)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.int16) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.int16) } class TestNPUReduceProdWithOutDtype_int32(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -149,12 +159,14 @@ class TestNPUReduceProdWithOutDtype_int32(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.int32) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.int32) } class TestNPUReduceProdWithOutDtype_int64(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -163,12 +175,14 @@ class TestNPUReduceProdWithOutDtype_int64(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.INT64)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.int64) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.int64) } class TestNPUReduceProdWithOutDtype_fp16(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -177,8 +191,9 @@ class TestNPUReduceProdWithOutDtype_fp16(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP16)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.float16) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.float16) } def test_check_output(self): @@ -186,6 +201,7 @@ class TestNPUReduceProdWithOutDtype_fp16(TestNPUReduceProd): class TestNPUReduceProdWithOutDtype_fp32(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -194,12 +210,14 @@ class TestNPUReduceProdWithOutDtype_fp32(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP32)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.float32) } class TestNPUReduceProdWithOutDtype_fp64(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -208,13 +226,15 @@ class TestNPUReduceProdWithOutDtype_fp64(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP64)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.float64) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.float64) } @skip_check_grad_ci(reason="right now not implement grad op") class TestNPUReduceProdWithOutDtype_fp32_2(TestNPUReduceProd): + def setUp(self): self.op_type = "reduce_prod" self.set_npu() @@ -223,8 +243,9 @@ class TestNPUReduceProdWithOutDtype_fp32_2(TestNPUReduceProd): self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP32)} self.outputs = { - 'Out': self.inputs['X'].prod( - axis=tuple(self.attrs['dim'])).astype(np.float32) + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype( + np.float32) } def init_dtype(self): diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py index bd7ce2a040c..632defd7f0e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestReduceSum(OpTest): + def setUp(self): np.random.seed(SEED) self.set_npu() @@ -46,8 +48,9 @@ class TestReduceSum(OpTest): self.outputs = {'Out': self.inputs['X'].sum()} else: self.outputs = { - 'Out': self.inputs['X'].sum(axis=self.axis, - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].sum(axis=self.axis, + keepdims=self.attrs['keep_dim']) } def set_npu(self): @@ -78,11 +81,13 @@ class TestReduceSum(OpTest): class TestReduceSum2(OpTest): + def init_dtype(self): self.dtype = np.int32 class TestReduceSumNet(unittest.TestCase): + def set_reduce_sum_function(self, x): # keep_dim = False return paddle.fluid.layers.reduce_sum(x, dim=-1) @@ -101,8 +106,9 @@ class TestReduceSumNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[2, 3, 4], dtype='float32') b = paddle.static.data(name="b", shape=[2, 3, 4], dtype='float32') - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') a_1 = fluid.layers.fc(input=a, size=4, num_flatten_dims=2, act=None) b_1 = fluid.layers.fc(input=b, size=4, num_flatten_dims=2, act=None) @@ -127,12 +133,13 @@ class TestReduceSumNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) @@ -148,12 +155,14 @@ class TestReduceSumNet(unittest.TestCase): class TestReduceSumNet2(TestReduceSumNet): + def set_reduce_sum_function(self, x): # keep_dim = True return paddle.fluid.layers.reduce_sum(x, dim=-1, keep_dim=True) class TestReduceSumNet3(TestReduceSumNet): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -185,8 +194,10 @@ class TestReduceSumNet3(TestReduceSumNet): for epoch in range(100): loss_res = exe.run(main_prog, - feed={"a": a_np, - "b": b_np}, + feed={ + "a": a_np, + "b": b_np + }, fetch_list=[loss]) if epoch % 10 == 0: print("Epoch {} | Loss: {}".format(epoch, loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py index b1cb5e02a73..1bf503a3779 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py @@ -20,6 +20,7 @@ from op_test import OpTest import numpy as np import unittest import sys + sys.path.append("..") paddle.enable_static() @@ -34,6 +35,7 @@ def ref_relu6(x, threshold=6.0): class TestRelu6(OpTest): + def setUp(self): self.set_npu() self.op_type = "relu6" @@ -63,6 +65,7 @@ class TestRelu6(OpTest): class TestRelu6Float16(TestRelu6): + def set_npu(self): self.__class__.use_npu = True self.__class__.no_need_check_grad = True @@ -75,6 +78,7 @@ class TestRelu6Float16(TestRelu6): class TestReluNeg(TestRelu6): + def setUp(self): self.set_npu() self.op_type = "relu6" @@ -101,6 +105,7 @@ class TestReluNeg(TestRelu6): class TestRelu6Net(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -115,8 +120,9 @@ class TestRelu6Net(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.nn.functional.relu6(sum) @@ -140,12 +146,13 @@ class TestRelu6Net(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py index c909b14b514..f5f95deffba 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestRelu(OpTest): + def setUp(self): self.set_npu() self.op_type = "relu" @@ -53,18 +55,21 @@ class TestRelu(OpTest): def test_check_grad(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.006) else: self.check_grad_with_place(self.place, ['X'], 'Out') class TestReluFp16(TestRelu): + def init_dtype(self): self.dtype = np.float16 class TestReluNeg(OpTest): + def setUp(self): self.set_npu() self.op_type = "relu" @@ -94,6 +99,7 @@ class TestReluNeg(OpTest): class TestReluNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -108,8 +114,9 @@ class TestReluNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.nn.functional.relu(sum) @@ -133,12 +140,13 @@ class TestReluNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_reshape2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reshape2_op_npu.py index 520de15f4df..12819252710 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reshape2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reshape2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestReshape2(OpTest): + def setUp(self): self.set_npu() self.op_type = "reshape2" @@ -56,6 +58,7 @@ class TestReshape2(OpTest): class TestReshape2_case2(TestReshape2): + def init_data(self): self.ori_shape = (2, 100) self.new_shape = (-1, 10) @@ -63,6 +66,7 @@ class TestReshape2_case2(TestReshape2): class TestReshape2_case3(TestReshape2): + def init_data(self): self.ori_shape = (100, 5, 6) self.new_shape = (-1, 0, 3) diff --git a/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py index 8bdf841c5cf..d71c1453c33 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") from op_test import OpTest import numpy as np @@ -29,6 +30,7 @@ SEED = 2021 class TestNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -43,8 +45,9 @@ class TestNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -68,12 +71,13 @@ class TestNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) @@ -89,6 +93,7 @@ class TestNet(unittest.TestCase): class TestCenteredNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -103,8 +108,9 @@ class TestCenteredNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -128,12 +134,13 @@ class TestCenteredNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py index 9ca2856886e..1073e645c3e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_roi_align_op_npu.py @@ -18,6 +18,7 @@ import unittest import numpy as np import math import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ np.random.seed(1243) class TestROIAlignNPUOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() @@ -73,8 +75,8 @@ class TestROIAlignNPUOp(OpTest): bilinear_pos = np.zeros( [self.channels, self.pooled_height, self.pooled_width, count, 4], np.float32) - bilinear_w = np.zeros( - [self.pooled_height, self.pooled_width, count, 4], np.float32) + bilinear_w = np.zeros([self.pooled_height, self.pooled_width, count, 4], + np.float32) for ph in range(self.pooled_width): for pw in range(self.pooled_height): c = 0 @@ -195,6 +197,7 @@ class TestROIAlignNPUOp(OpTest): class TestROIAlignOpWithMinusSample(TestROIAlignNPUOp): + def init_test_case(self): self.batch_size = 3 self.channels = 3 diff --git a/python/paddle/fluid/tests/unittests/npu/test_sampling_id_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sampling_id_op_npu.py index 836d2b6d311..399ac0d2522 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sampling_id_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sampling_id_op_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, _set_use_system_allocator @@ -27,6 +28,7 @@ _set_use_system_allocator(False) class TestSamplingIdShape(unittest.TestCase): + def test_shape(self): paddle.enable_static() x = fluid.layers.data(name='x', shape=[3], dtype='float32') @@ -37,8 +39,7 @@ class TestSamplingIdShape(unittest.TestCase): exe.run(fluid.default_startup_program()) feed = { - 'x': np.array( - [[0.2, 0.3, 0.5], [0.2, 0.3, 0.4]], dtype='float32') + 'x': np.array([[0.2, 0.3, 0.5], [0.2, 0.3, 0.4]], dtype='float32') } output_np = exe.run(feed=feed, fetch_list=[output])[0] diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index 3bdf8146fb2..bb21b1024e3 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import paddle import paddle.fluid as fluid @@ -37,54 +38,62 @@ paddle.enable_static() class TestNPUSaveLoadBase(TestSaveLoadBase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPUSaveLoadPartial(TestSaveLoadPartial): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPUSaveLoadSetStateDict(TestSaveLoadSetStateDict): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPUProgramStatePartial(TestProgramStatePartial): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPULoadFromOldInterface(TestLoadFromOldInterface): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPULoadFromOldInterfaceSingleFile(TestLoadFromOldInterfaceSingleFile): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPUProgramStateOldSave(TestProgramStateOldSave): + def setUp(self): self.test_dygraph = False def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) class TestNPUProgramStateOldSaveSingleModel(TestProgramStateOldSaveSingleModel): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_npu() else paddle.NPUPlace(0) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_scale_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_scale_op_npu.py index 424c4ca0ff3..f8db47345a7 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_scale_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_scale_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestScale(OpTest): + def setUp(self): self.set_npu() self.op_type = "scale" @@ -34,13 +36,14 @@ class TestScale(OpTest): self.init_dtype() self.inputs = { - 'X': OpTest.np_dtype_to_fluid_dtype( + 'X': + OpTest.np_dtype_to_fluid_dtype( np.random.random((10, 10)).astype(self.dtype)) } self.attrs = {'scale': -2.3, 'bias': 0, 'bias_after_scale': True} self.outputs = { - 'Out': (self.inputs['X'] * - self.dtype(self.attrs['scale'])).astype(self.dtype) + 'Out': (self.inputs['X'] * self.dtype(self.attrs['scale'])).astype( + self.dtype) } def set_npu(self): @@ -54,21 +57,25 @@ class TestScale(OpTest): class TestFP16Scale(TestScale): + def init_dtype(self): self.dtype = np.float16 class TestScaleInt(TestScale): + def init_dtype(self): self.dtype = np.int32 class TestScaleInt64(TestScale): + def init_dtype(self): self.dtype = np.int64 class TestBiasAfterScale(OpTest): + def setUp(self): self.set_npu() self.op_type = "scale" @@ -76,7 +83,8 @@ class TestBiasAfterScale(OpTest): self.init_dtype() self.inputs = { - 'X': OpTest.np_dtype_to_fluid_dtype( + 'X': + OpTest.np_dtype_to_fluid_dtype( np.random.random((10, 10)).astype(self.dtype)) } self.attrs = {'scale': -2.3, 'bias': 0, 'bias_after_scale': False} diff --git a/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py index c3536546419..1eb85db274c 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestCast1_FP32(OpTest): + def setUp(self): self.set_npu() self.op_type = "scatter" @@ -51,6 +53,7 @@ class TestCast1_FP32(OpTest): class TestCast_INT32(OpTest): + def setUp(self): self.set_npu() self.op_type = "scatter" @@ -74,6 +77,7 @@ class TestCast_INT32(OpTest): class TestCast2_FP32(OpTest): + def setUp(self): self.set_npu() self.op_type = "scatter" @@ -97,6 +101,7 @@ class TestCast2_FP32(OpTest): class TestCast3_FP32(OpTest): + def setUp(self): self.set_npu() self.op_type = "scatter" @@ -121,6 +126,7 @@ class TestCast3_FP32(OpTest): class TestCast_INT64(OpTest): + def setUp(self): self.set_npu() self.op_type = "scatter" diff --git a/python/paddle/fluid/tests/unittests/npu/test_seed_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_seed_op_npu.py index 85a1e0594ba..37d77e84dba 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_seed_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_seed_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ paddle.enable_static() class TestSeedOpFixSeed(OpTest): + def setUp(self): self.set_npu() self.op_type = "seed" @@ -42,6 +44,7 @@ class TestSeedOpFixSeed(OpTest): class TestSeedOpDiffSeed(OpTest): + def setUp(self): self.set_npu() self.op_type = "seed" diff --git a/python/paddle/fluid/tests/unittests/npu/test_sequence_mask_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sequence_mask_npu.py index 21440de9fdd..850dbfa1fd3 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sequence_mask_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sequence_mask_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ paddle.enable_static() class SequenceMaskTestBase(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -57,13 +59,13 @@ class SequenceMaskTestBase(OpTest): def calc_ground_truth_mask(self): maxlen = np.max(self.x) if self.maxlen < 0 else self.maxlen shape = self.x.shape + (maxlen, ) - index_broadcast = np.broadcast_to( - np.reshape( - range(maxlen), newshape=[1] * self.x.ndim + [-1]), - shape=shape) - x_broadcast = np.broadcast_to( - np.reshape( - self.x, newshape=self.x.shape + (-1, )), shape=shape) + index_broadcast = np.broadcast_to(np.reshape( + range(maxlen), newshape=[1] * self.x.ndim + [-1]), + shape=shape) + x_broadcast = np.broadcast_to(np.reshape(self.x, + newshape=self.x.shape + + (-1, )), + shape=shape) return (index_broadcast < x_broadcast).astype(self.mask_dtype) def test_check_output(self): @@ -71,36 +73,43 @@ class SequenceMaskTestBase(OpTest): class SequenceMaskTest1(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'bool' class SequenceMaskTest2(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'uint8' class SequenceMaskTest3(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'int32' class SequenceMaskTest4(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'float32' class SequenceMaskTest5(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'float64' class SequenceMaskTest6(SequenceMaskTestBase): + def initParameters(self): self.maxlen = -1 class SequenceMaskTestBase_tensor_attr(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -128,13 +137,13 @@ class SequenceMaskTestBase_tensor_attr(OpTest): def calc_ground_truth_mask(self): maxlen = np.max(self.x) if self.maxlen < 0 else self.maxlen shape = self.x.shape + (maxlen, ) - index_broadcast = np.broadcast_to( - np.reshape( - range(maxlen), newshape=[1] * self.x.ndim + [-1]), - shape=shape) - x_broadcast = np.broadcast_to( - np.reshape( - self.x, newshape=self.x.shape + (-1, )), shape=shape) + index_broadcast = np.broadcast_to(np.reshape( + range(maxlen), newshape=[1] * self.x.ndim + [-1]), + shape=shape) + x_broadcast = np.broadcast_to(np.reshape(self.x, + newshape=self.x.shape + + (-1, )), + shape=shape) return (index_broadcast < x_broadcast).astype(self.mask_dtype) def test_check_output(self): @@ -142,31 +151,37 @@ class SequenceMaskTestBase_tensor_attr(OpTest): class SequenceMaskTest1_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'bool' class SequenceMaskTest2_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'uint8' class SequenceMaskTest3_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'int32' class SequenceMaskTest4_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'float32' class SequenceMaskTest5_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'float64' class TestSequenceMaskOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.uniform(1, 5, [4]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/npu/test_set_value_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_set_value_op_npu.py index 421ea1df4cf..969c7ee2fbc 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_set_value_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_set_value_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,6 +26,7 @@ from paddle.fluid import core class TestSetValueBase(unittest.TestCase): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -55,6 +57,7 @@ class TestSetValueBase(unittest.TestCase): class TestSetValueApi(TestSetValueBase): + def _run_static(self): paddle.enable_static() with paddle.static.program_guard(self.program): @@ -80,17 +83,16 @@ class TestSetValueApi(TestSetValueBase): self._get_answer() error_msg = "\nIn {} mode: \nExpected res = \n{}, \n\nbut received : \n{}" - self.assertTrue( - (self.data == static_out).all(), - msg=error_msg.format("static", self.data, static_out)) - self.assertTrue( - (self.data == dynamic_out).all(), - msg=error_msg.format("dynamic", self.data, dynamic_out)) + self.assertTrue((self.data == static_out).all(), + msg=error_msg.format("static", self.data, static_out)) + self.assertTrue((self.data == dynamic_out).all(), + msg=error_msg.format("dynamic", self.data, dynamic_out)) # 1. Test different type of item: int, Python slice, Paddle Tensor # 1.1 item is int class TestSetValueItemInt(TestSetValueApi): + def _call_setitem(self, x): x[0] = self.value @@ -101,6 +103,7 @@ class TestSetValueItemInt(TestSetValueApi): # 1.2 item is slice # 1.2.1 step is 1 class TestSetValueItemSlice(TestSetValueApi): + def _call_setitem(self, x): x[0:2] = self.value @@ -109,6 +112,7 @@ class TestSetValueItemSlice(TestSetValueApi): class TestSetValueItemSlice2(TestSetValueApi): + def _call_setitem(self, x): x[0:-1] = self.value @@ -117,6 +121,7 @@ class TestSetValueItemSlice2(TestSetValueApi): class TestSetValueItemSlice3(TestSetValueApi): + def _call_setitem(self, x): x[0:-1, 0:2] = self.value @@ -125,6 +130,7 @@ class TestSetValueItemSlice3(TestSetValueApi): class TestSetValueItemSlice4(TestSetValueApi): + def _call_setitem(self, x): x[0:, 1:2, :] = self.value @@ -152,6 +158,7 @@ class TestSetValueItemSlice4(TestSetValueApi): # 1.2.2 step > 1 class TestSetValueItemSliceStep(TestSetValueApi): + def set_shape(self): self.shape = [5, 5, 5] @@ -163,6 +170,7 @@ class TestSetValueItemSliceStep(TestSetValueApi): class TestSetValueItemSliceStep2(TestSetValueApi): + def set_shape(self): self.shape = [7, 5, 5] @@ -174,6 +182,7 @@ class TestSetValueItemSliceStep2(TestSetValueApi): class TestSetValueItemSliceStep3(TestSetValueApi): + def _call_setitem(self, x): x[0:-1, 0:2, ::2] = self.value @@ -182,6 +191,7 @@ class TestSetValueItemSliceStep3(TestSetValueApi): class TestSetValueItemSliceStep4(TestSetValueApi): + def _call_setitem(self, x): x[0:, 1:2:2, :] = self.value @@ -191,6 +201,7 @@ class TestSetValueItemSliceStep4(TestSetValueApi): # 1.2.3 step < 0 class TestSetValueItemSliceNegetiveStep(TestSetValueApi): + def set_shape(self): self.shape = [5, 2] @@ -205,6 +216,7 @@ class TestSetValueItemSliceNegetiveStep(TestSetValueApi): class TestSetValueItemSliceNegetiveStep2(TestSetValueApi): + def set_shape(self): self.shape = [5] @@ -219,6 +231,7 @@ class TestSetValueItemSliceNegetiveStep2(TestSetValueApi): class TestSetValueItemSliceNegetiveStep3(TestSetValueApi): + def set_shape(self): self.shape = [3] @@ -233,6 +246,7 @@ class TestSetValueItemSliceNegetiveStep3(TestSetValueApi): class TestSetValueItemSliceNegetiveStep4(TestSetValueApi): + def set_shape(self): self.shape = [3, 4, 5] @@ -247,6 +261,7 @@ class TestSetValueItemSliceNegetiveStep4(TestSetValueApi): class TestSetValueItemEllipsis1(TestSetValueApi): + def _call_setitem(self, x): x[0:, ..., 1:] = self.value @@ -255,6 +270,7 @@ class TestSetValueItemEllipsis1(TestSetValueApi): class TestSetValueItemEllipsis2(TestSetValueApi): + def _call_setitem(self, x): x[0:, ...] = self.value @@ -263,6 +279,7 @@ class TestSetValueItemEllipsis2(TestSetValueApi): class TestSetValueItemEllipsis3(TestSetValueApi): + def _call_setitem(self, x): x[..., 1:] = self.value @@ -271,6 +288,7 @@ class TestSetValueItemEllipsis3(TestSetValueApi): class TestSetValueItemEllipsis4(TestSetValueApi): + def _call_setitem(self, x): x[...] = self.value @@ -280,6 +298,7 @@ class TestSetValueItemEllipsis4(TestSetValueApi): # 1.4 item is Paddle Tensor class TestSetValueItemTensor(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") x[zero] = self.value @@ -289,6 +308,7 @@ class TestSetValueItemTensor(TestSetValueApi): class TestSetValueItemTensor2(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -299,6 +319,7 @@ class TestSetValueItemTensor2(TestSetValueApi): class TestSetValueItemTensor3(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -309,6 +330,7 @@ class TestSetValueItemTensor3(TestSetValueApi): class TestSetValueItemTensor4(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -319,6 +341,7 @@ class TestSetValueItemTensor4(TestSetValueApi): class TestSetValueItemTensor5(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -329,6 +352,7 @@ class TestSetValueItemTensor5(TestSetValueApi): class TestSetValueItemTensor6(TestSetValueApi): + def set_shape(self): self.shape = [3, 4, 5] @@ -343,6 +367,7 @@ class TestSetValueItemTensor6(TestSetValueApi): # 1.5 item is None class TestSetValueItemNone1(TestSetValueApi): + def _call_setitem(self, x): x[None] = self.value @@ -351,6 +376,7 @@ class TestSetValueItemNone1(TestSetValueApi): class TestSetValueItemNone2(TestSetValueApi): + def _call_setitem(self, x): x[0, None, 1] = self.value @@ -359,6 +385,7 @@ class TestSetValueItemNone2(TestSetValueApi): class TestSetValueItemNone3(TestSetValueApi): + def _call_setitem(self, x): x[:, None, None, 1] = self.value @@ -367,6 +394,7 @@ class TestSetValueItemNone3(TestSetValueApi): class TestSetValueItemNone4(TestSetValueApi): + def _call_setitem(self, x): x[0, 0, None, 1] = self.value @@ -375,6 +403,7 @@ class TestSetValueItemNone4(TestSetValueApi): class TestSetValueItemNone5(TestSetValueApi): + def _call_setitem(self, x): x[0, None, 0, None, 1] = self.value @@ -383,6 +412,7 @@ class TestSetValueItemNone5(TestSetValueApi): class TestSetValueItemNone6(TestSetValueApi): + def _call_setitem(self, x): x[None, 0, 0, None, 0] = self.value @@ -391,6 +421,7 @@ class TestSetValueItemNone6(TestSetValueApi): class TestSetValueItemNone7(TestSetValueApi): + def _call_setitem(self, x): x[:, None, 1] = np.zeros(self.shape)[:, None, 0] @@ -399,6 +430,7 @@ class TestSetValueItemNone7(TestSetValueApi): class TestSetValueItemNone8(TestSetValueApi): + def _call_setitem(self, x): x[:, 1, None] = np.zeros(self.shape)[:, 0, None] @@ -407,6 +439,7 @@ class TestSetValueItemNone8(TestSetValueApi): class TestSetValueItemNone9(TestSetValueApi): + def _call_setitem(self, x): x[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] @@ -416,6 +449,7 @@ class TestSetValueItemNone9(TestSetValueApi): # 1.5 item is list or Tensor of bol class TestSetValueItemBool1(TestSetValueApi): + def _call_setitem(self, x): x[[True, False]] = self.value @@ -424,6 +458,7 @@ class TestSetValueItemBool1(TestSetValueApi): class TestSetValueItemBool2(TestSetValueApi): + def _call_setitem(self, x): x[[False, False]] = self.value @@ -432,6 +467,7 @@ class TestSetValueItemBool2(TestSetValueApi): class TestSetValueItemBool3(TestSetValueApi): + def _call_setitem(self, x): x[[False, True]] = np.zeros(self.shape[2]) @@ -440,6 +476,7 @@ class TestSetValueItemBool3(TestSetValueApi): class TestSetValueItemBool4(TestSetValueApi): + def _call_setitem(self, x): idx = paddle.assign(np.array([False, True])) x[idx] = np.zeros(self.shape[2]) @@ -449,17 +486,19 @@ class TestSetValueItemBool4(TestSetValueApi): class TestSetValueItemBool5(TestSetValueApi): + def _call_setitem(self, x): idx = paddle.assign( np.array([[False, True, False], [True, True, False]])) x[idx] = self.value def _get_answer(self): - self.data[np.array([[False, True, False], [True, True, False] - ])] = self.value + self.data[np.array([[False, True, False], [True, True, + False]])] = self.value class TestSetValueItemBool6(TestSetValueApi): + def _call_setitem(self, x): x[0, ...] = 0 x[x > 0] = self.value @@ -470,7 +509,9 @@ class TestSetValueItemBool6(TestSetValueApi): def create_test_value_int32(parent): + class TestValueInt(parent): + def set_value(self): self.value = 7 @@ -490,7 +531,9 @@ create_test_value_int32(TestSetValueItemSlice4) def create_test_value_int64(parent): + class TestValueInt(parent): + def set_value(self): self.value = 7 @@ -510,7 +553,9 @@ create_test_value_int64(TestSetValueItemSlice4) def create_test_value_tensor_fp32(parent): + class TestValueInt(parent): + def set_dtype(self): self.dtype = "float32" @@ -535,6 +580,7 @@ create_test_value_tensor_fp32(TestSetValueItemSlice4) # 3. Test different shape of value class TestSetValueValueShape1(TestSetValueApi): + def set_value(self): self.value = np.array([3, 4, 5, 6]) # shape is (4,) @@ -546,6 +592,7 @@ class TestSetValueValueShape1(TestSetValueApi): class TestSetValueValueShape2(TestSetValueApi): + def set_value(self): self.value = np.array([[3, 4, 5, 6]]) # shape is (1,4) @@ -557,9 +604,10 @@ class TestSetValueValueShape2(TestSetValueApi): class TestSetValueValueShape3(TestSetValueApi): + def set_value(self): - self.value = np.array( - [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) # shape is (3,4) + self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2], + [3, 3, 3, 3]]) # shape is (3,4) def _call_setitem(self, x): x[0] = self.value @@ -569,10 +617,11 @@ class TestSetValueValueShape3(TestSetValueApi): class TestSetValueValueShape4(TestSetValueApi): + def set_value(self): - self.value = np.array( - [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]).astype( - self.dtype) # shape is (3,4) + self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2], + [3, 3, 3, + 3]]).astype(self.dtype) # shape is (3,4) def _call_setitem(self, x): x[0] = paddle.assign(self.value) # x is Paddle.Tensor @@ -582,6 +631,7 @@ class TestSetValueValueShape4(TestSetValueApi): class TestSetValueValueShape5(TestSetValueApi): + def set_value(self): self.value = np.array([3, 3, 3]).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py index 99061cba8d2..1a3d0b1dbdf 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py @@ -15,6 +15,7 @@ import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -25,6 +26,7 @@ SEED = 2021 class TestSGD(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -52,6 +54,7 @@ class TestSGD(OpTest): class TestNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -66,8 +69,9 @@ class TestNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -91,12 +95,13 @@ class TestNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_shape_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_shape_op_npu.py index 0adfb69cd63..a4f42750585 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_shape_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_shape_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestShape(OpTest): + def setUp(self): self.set_npu() self.op_type = "shape" @@ -52,21 +54,25 @@ class TestShape(OpTest): class TestShape_fp16(TestShape): + def init_dtype(self): self.dtype = np.float16 class TestShape_double(TestShape): + def init_dtype(self): self.dtype = np.float64 class TestShape_int32(TestShape): + def init_dtype(self): self.dtype = np.int32 class TestShape_int64(TestShape): + def init_dtype(self): self.dtype = np.int64 diff --git a/python/paddle/fluid/tests/unittests/npu/test_shard_index_op.py b/python/paddle/fluid/tests/unittests/npu/test_shard_index_op.py index ce7e962624a..afa3e1a5819 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_shard_index_op.py +++ b/python/paddle/fluid/tests/unittests/npu/test_shard_index_op.py @@ -18,6 +18,7 @@ import unittest import numpy as np import math import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid @@ -25,6 +26,7 @@ import paddle.fluid.core as core import paddle.fluid.framework as framework from paddle.fluid.framework import Program, program_guard import paddle + paddle.enable_static() SEED = 2021 @@ -58,6 +60,7 @@ def common_setup(self, index_num, nshards, shard_id, ignore_value): class TestShardIndexShardId0Op(OpTest): + def setUp(self): common_setup(self, 20, 2, 0, -1) @@ -66,16 +69,19 @@ class TestShardIndexShardId0Op(OpTest): class TestShardIndexShardId1Op(TestShardIndexShardId0Op): + def setUp(self): common_setup(self, 20, 2, 1, -1) class TestShardIndexIgnoreValueOp(TestShardIndexShardId0Op): + def setUp(self): common_setup(self, 20, 2, 0, -2) class TestShardIndexNotEvenlyDividedOp(TestShardIndexShardId0Op): + def setUp(self): common_setup(self, 15, 2, 1, -1) diff --git a/python/paddle/fluid/tests/unittests/npu/test_sigmoid_cross_entropy_with_logits_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sigmoid_cross_entropy_with_logits_op_npu.py index 913633b725b..777d96afdd8 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sigmoid_cross_entropy_with_logits_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sigmoid_cross_entropy_with_logits_op_npu.py @@ -41,11 +41,13 @@ class TestSigmoidCrossEntropyWithLogitsOp1(OpTest): batch_size = 64 num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype)), - 'Label': np.random.randint(0, 2, (batch_size, num_classes)) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, (batch_size, num_classes)).astype( + self.dtype)), + 'Label': + np.random.randint(0, 2, + (batch_size, num_classes)).astype(self.dtype) } # Fw Pass is implemented as elementwise sigmoid followed by @@ -72,8 +74,8 @@ class TestSigmoidCrossEntropyWithLogitsOp1(OpTest): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestSigmoidCrossEntropyWithLogitsOp3( - TestSigmoidCrossEntropyWithLogitsOp1): +class TestSigmoidCrossEntropyWithLogitsOp3(TestSigmoidCrossEntropyWithLogitsOp1 + ): """Test sigmoid_cross_entropy_with_logit_op with probabalistic label """ @@ -85,11 +87,13 @@ class TestSigmoidCrossEntropyWithLogitsOp3( batch_size = 64 num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype)), - 'Label': np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, (batch_size, num_classes)).astype( + self.dtype)), + 'Label': + np.random.uniform(0, 1, + (batch_size, num_classes)).astype(self.dtype) } # Fw Pass is implemented as elementwise sigmoid followed by @@ -103,8 +107,8 @@ class TestSigmoidCrossEntropyWithLogitsOp3( @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestSigmoidCrossEntropyWithLogitsOp5( - TestSigmoidCrossEntropyWithLogitsOp1): +class TestSigmoidCrossEntropyWithLogitsOp5(TestSigmoidCrossEntropyWithLogitsOp1 + ): """Test sigmoid_cross_entropy_with_logit_op with probabalistic label """ @@ -116,11 +120,14 @@ class TestSigmoidCrossEntropyWithLogitsOp5( batch_size = [10, 10] num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype)), - 'Label': np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, + tuple(batch_size + [num_classes])).astype( + self.dtype)), + 'Label': + np.random.uniform(0, 1, tuple(batch_size + [num_classes])).astype( + self.dtype) } # Fw Pass is implemented as elementwise sigmoid followed by @@ -134,8 +141,8 @@ class TestSigmoidCrossEntropyWithLogitsOp5( @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestSigmoidCrossEntropyWithLogitsOp6( - TestSigmoidCrossEntropyWithLogitsOp1): +class TestSigmoidCrossEntropyWithLogitsOp6(TestSigmoidCrossEntropyWithLogitsOp1 + ): """Test sigmoid_cross_entropy_with_logit_op with binary label """ @@ -147,11 +154,14 @@ class TestSigmoidCrossEntropyWithLogitsOp6( batch_size = [10, 10] num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype)), - 'Label': np.random.randint(0, 2, tuple(batch_size + [num_classes])) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, + tuple(batch_size + [num_classes])).astype( + self.dtype)), + 'Label': + np.random.randint(0, 2, tuple(batch_size + [num_classes])).astype( + self.dtype) } # Fw Pass is implemented as elementwise sigmoid followed by diff --git a/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py index 489f8bfb116..4525fc41105 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py @@ -28,6 +28,7 @@ SEED = 2021 @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestNPUSigmoid(OpTest): + def setUp(self): self.op_type = "sigmoid" self.set_npu() @@ -44,8 +45,9 @@ class TestNPUSigmoid(OpTest): self.check_output_with_place(self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=0.01) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.01) def set_npu(self): self.__class__.use_npu = True @@ -58,6 +60,7 @@ class TestNPUSigmoid(OpTest): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestNPUSigmoidFp16(TestNPUSigmoid): + def test_check_output(self): self.check_output_with_place(self.place, atol=1e-3) diff --git a/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py index 437f5c35e97..da1fd633a48 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,7 +30,9 @@ paddle.enable_static() def test_class(op_type, typename): + class TestSin(OpTest): + def setUp(self): self.op_type = "sin" self.__class__.use_npu = True diff --git a/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py index 80721cbd66a..76fc5846534 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") import paddle import paddle.fluid as fluid @@ -24,6 +25,7 @@ paddle.enable_static() class TestSizeOp(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -46,42 +48,49 @@ class TestSizeOp(OpTest): class TestSizeOp1(TestSizeOp): + def config(self): self.shape = [2] self.dtype = np.float64 class TestSizeOp2(TestSizeOp): + def config(self): self.shape = [2, 3] self.dtype = np.float32 class TestSizeOp3(TestSizeOp): + def config(self): self.shape = [2, 3, 100] self.dtype = np.float16 class TestSizeOp4(TestSizeOp): + def config(self): self.shape = [2**10] self.dtype = np.bool class TestSizeOp5(TestSizeOp): + def config(self): self.shape = [7, 8, 9, 10] self.dtype = np.int64 class TestSizeOp6(TestSizeOp): + def config(self): self.shape = [] self.dtype = np.int64 class TestSizeAPI(unittest.TestCase): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -107,10 +116,12 @@ class TestSizeAPI(unittest.TestCase): "x_2": input_2, }, fetch_list=[out_1, out_2]) - assert (np.array_equal( - res_1, np.array([np.size(input_1)]).astype("int64"))) - assert (np.array_equal( - res_2, np.array([np.size(input_2)]).astype("int64"))) + assert (np.array_equal(res_1, + np.array([np.size(input_1) + ]).astype("int64"))) + assert (np.array_equal(res_2, + np.array([np.size(input_2) + ]).astype("int64"))) def test_size_imperative(self): paddle.disable_static(self.place) diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py index a5b203b6eea..e0ad94361ad 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ EPOCH = 100 class TestSliceOp(OpTest): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -65,13 +67,15 @@ class TestSliceOp(OpTest): def test_check_grad_normal(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.02) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.02) else: self.check_grad_with_place(self.place, ['Input'], 'Out') class TestSliceOp2(TestSliceOp): + def config(self): self.input = np.random.random([10, 5, 6]).astype(self.dtype) self.starts = [0] @@ -82,6 +86,7 @@ class TestSliceOp2(TestSliceOp): class TestSliceOpFp16(TestSliceOp): + def init_dtype(self): self.dtype = np.float16 @@ -92,6 +97,7 @@ class TestSliceOpFp16(TestSliceOp): class TestSliceOpTensor(TestSliceOp): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -120,6 +126,7 @@ class TestSliceOpTensor(TestSliceOp): class TestSliceOpTensor2(TestSliceOpTensor): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -148,6 +155,7 @@ class TestSliceOpTensor2(TestSliceOpTensor): class TestSliceOpFp16Tensor(TestSliceOpTensor): + def init_dtype(self): self.dtype = np.float16 @@ -158,6 +166,7 @@ class TestSliceOpFp16Tensor(TestSliceOpTensor): class TestSliceOpTensorList(TestSliceOp): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -197,6 +206,7 @@ class TestSliceOpTensorList(TestSliceOp): class TestSliceOpTensorList2(TestSliceOpTensorList): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -236,6 +246,7 @@ class TestSliceOpTensorList2(TestSliceOpTensorList): class TestSliceOpFp16TensorList(TestSliceOpTensorList): + def init_dtype(self): self.dtype = np.float16 @@ -246,6 +257,7 @@ class TestSliceOpFp16TensorList(TestSliceOpTensorList): class TestSliceNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -262,8 +274,9 @@ class TestSliceNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=data_shape, dtype='float32') b = paddle.static.data(name="b", shape=data_shape, dtype='float32') - label = paddle.static.data( - name="label", shape=[batch_size, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[batch_size, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.slice(sum, axes=[0, 1], starts=[0, 0], ends=[33, 2]) @@ -286,12 +299,13 @@ class TestSliceNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(EPOCH): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) @@ -307,6 +321,7 @@ class TestSliceNet(unittest.TestCase): class TestSliceOpDecsDim(OpTest): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -352,18 +367,21 @@ class TestSliceOpDecsDim(OpTest): def test_check_grad_normal(self): if self.dtype == np.float16: - self.check_grad_with_place( - self.place, ['Input'], 'Out', max_relative_error=0.5) + self.check_grad_with_place(self.place, ['Input'], + 'Out', + max_relative_error=0.5) else: self.check_grad_with_place(self.place, ['Input'], 'Out') class TestSliceOpDecsDimFp16(TestSliceOpDecsDim): + def init_dtype(self): self.dtype = np.float16 class TestSliceOpDecsDim2(TestSliceOpDecsDim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [1, 0, 2] @@ -375,6 +393,7 @@ class TestSliceOpDecsDim2(TestSliceOpDecsDim): class TestSliceOpDecsDim3(TestSliceOpDecsDim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-1, 0, 2] @@ -386,6 +405,7 @@ class TestSliceOpDecsDim3(TestSliceOpDecsDim): class TestSliceOpDecsDim4(TestSliceOpDecsDim): + def config(self): self.input = np.random.random([3, 4, 5, 7]).astype(self.dtype) self.starts = [0, 1, 2, 3] @@ -397,6 +417,7 @@ class TestSliceOpDecsDim4(TestSliceOpDecsDim): class TestSliceOpDecsDim5(TestSliceOpDecsDim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-1] @@ -408,6 +429,7 @@ class TestSliceOpDecsDim5(TestSliceOpDecsDim): class TestSliceOpDecsDim6(TestSliceOpDecsDim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [0, 1, 2, 3] @@ -419,11 +441,11 @@ class TestSliceOpDecsDim6(TestSliceOpDecsDim): class TestSliceOpDecsDimStartsTensor(TestSliceOpDecsDim): + def set_inputs(self): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype='int32') + "StartsTensor": np.array(self.starts, dtype='int32') } def set_attrs(self): @@ -446,18 +468,18 @@ class TestSliceOpDecsDimStartsTensor(TestSliceOpDecsDim): class TestSliceOpDecsDimStartsTensorFP16(TestSliceOpDecsDimStartsTensor): + def init_dtype(self): self.dtype = np.float16 class TestSliceOpDecsDimStartsTensorStartsAndEndsTensor(TestSliceOpDecsDim): + def set_inputs(self): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype='int64'), - "EndsTensor": np.array( - self.ends, dtype='int32') + "StartsTensor": np.array(self.starts, dtype='int64'), + "EndsTensor": np.array(self.ends, dtype='int32') } def set_attrs(self): @@ -481,11 +503,13 @@ class TestSliceOpDecsDimStartsTensorStartsAndEndsTensor(TestSliceOpDecsDim): class TestSliceOpDecsDimStartsTensorStartsAndEndsTensorFP16( TestSliceOpDecsDimStartsTensorStartsAndEndsTensor): + def init_dtype(self): self.dtype = np.float16 class TestSliceOpDecsDimStartsListTensor(TestSliceOpDecsDim): + def set_inputs(self): starts_tensor = [] for index, ele in enumerate(self.starts): @@ -516,6 +540,7 @@ class TestSliceOpDecsDimStartsListTensor(TestSliceOpDecsDim): class TestSliceOpDecsDimStartsListTensor2(TestSliceOpDecsDimStartsListTensor): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-1] @@ -528,13 +553,15 @@ class TestSliceOpDecsDimStartsListTensor2(TestSliceOpDecsDimStartsListTensor): self.starts_infer = [-1] -class TestSliceOpDecsDimStartsListTensorFP16( - TestSliceOpDecsDimStartsListTensor): +class TestSliceOpDecsDimStartsListTensorFP16(TestSliceOpDecsDimStartsListTensor + ): + def init_dtype(self): self.dtype = np.float16 class TestSliceOpInt64(OpTest): + def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -554,8 +581,8 @@ class TestSliceOpInt64(OpTest): } def config(self): - self.input = np.random.randint( - 100, size=(3, 4, 5, 6)).astype(self.dtype) + self.input = np.random.randint(100, + size=(3, 4, 5, 6)).astype(self.dtype) self.starts = [1, 0, 2] self.ends = [3, 3, 4] self.axes = [0, 1, 2] @@ -570,6 +597,7 @@ class TestSliceOpInt64(OpTest): class TestSliceOpTensorInt64(TestSliceOpInt64): + def setUp(self): self.op_type = "slice" self.set_npu() @@ -589,8 +617,8 @@ class TestSliceOpTensorInt64(TestSliceOpInt64): } def config(self): - self.input = np.random.randint( - 100, size=(3, 4, 5, 6)).astype(self.dtype) + self.input = np.random.randint(100, + size=(3, 4, 5, 6)).astype(self.dtype) self.starts = np.array([1, 0, 2]).astype('int32') self.ends = np.array([3, 3, 4]).astype('int32') self.axes = [0, 1, 2] diff --git a/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py index 8c20f25061b..1ba4e711c1d 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py @@ -17,10 +17,12 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle import paddle.fluid as fluid + paddle.enable_static() @@ -33,6 +35,7 @@ def smooth_l1_loss_forward(val, sigma2): class TestSmoothL1LossOp1(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -60,25 +63,25 @@ class TestSmoothL1LossOp1(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', max_relative_error=0.02) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=0.02) def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - max_relative_error=0.03, - no_grad_set=set("X")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + max_relative_error=0.03, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=0.03, - no_grad_set=set('Y')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.03, + no_grad_set=set('Y')) class TestSmoothL1LossOp2(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -110,32 +113,34 @@ class TestSmoothL1LossOp2(OpTest): self.check_output_with_place(self.place) def test_check_grad_normal(self): - self.check_grad_with_place( - self.place, ['X', 'Y'], 'Out', max_relative_error=0.03) + self.check_grad_with_place(self.place, ['X', 'Y'], + 'Out', + max_relative_error=0.03) def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], - 'Out', - max_relative_error=0.03, - no_grad_set=set(['X', 'InsideWeight', 'OutsideWeight'])) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + max_relative_error=0.03, + no_grad_set=set( + ['X', 'InsideWeight', 'OutsideWeight'])) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=0.03, - no_grad_set=set(['Y', 'InsideWeight', 'OutsideWeight'])) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.03, + no_grad_set=set( + ['Y', 'InsideWeight', 'OutsideWeight'])) class TestSmoothL1LossOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): # The input type of accuracy_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.NPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.NPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.NPUPlace(0)) self.assertRaises(TypeError, fluid.layers.smooth_l1, x1, y1) # The input dtype of accuracy_op must be float32 or float64. x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32") diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py index f2a9ef2bee0..9d734eac48b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestSoftmax(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -53,6 +55,7 @@ class TestSoftmax(OpTest): class TestSoftmaxNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -67,8 +70,9 @@ class TestSoftmaxNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[4, 32], dtype='float32') b = paddle.static.data(name="b", shape=[4, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[4, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[4, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.sqrt(c) @@ -97,12 +101,13 @@ class TestSoftmaxNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py index f0ca7788345..f6f3d746d80 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ SEED = 2021 class TestSoftmaxWithCrossEntropyOp(OpTest): + def set_npu(self): self.__class__.use_npu = True @@ -88,14 +90,14 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): def test_check_grad(self): # fp32 has low precision, cpu and npu both need to relax the max_relative_error if using fp32 - self.check_grad_with_place( - self.place, ['Logits'], - 'Loss', - numeric_grad_delta=0.001, - max_relative_error=0.5) + self.check_grad_with_place(self.place, ['Logits'], + 'Loss', + numeric_grad_delta=0.001, + max_relative_error=0.5) class TestPowNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -110,8 +112,9 @@ class TestPowNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -135,12 +138,13 @@ class TestPowNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py index fd48ec958e4..3a06e0566d4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -30,6 +31,7 @@ SEED = 2021 @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestCase1(OpTest): + def setUp(self): self.set_npu() self.set_example() @@ -37,8 +39,9 @@ class TestCase1(OpTest): self.place = paddle.NPUPlace(0) ipt = self.x.astype(self.dtype) axis = self.axis if isinstance(self.axis, int) else int(self.axis[0]) - tmp_outs = np.split( - ipt, axis=axis, indices_or_sections=self.num_or_sections) + tmp_outs = np.split(ipt, + axis=axis, + indices_or_sections=self.num_or_sections) tmp_outs = [o.astype(self.dtype) for o in tmp_outs] self.outputs = {'Out': []} self.outs = [] @@ -68,6 +71,7 @@ class TestCase1(OpTest): class TestCase2(TestCase1): + def set_example(self): self.dtype = "float32" self.x = np.random.random((20, 4, 50)) @@ -76,6 +80,7 @@ class TestCase2(TestCase1): class TestCase4(TestCase1): + def set_example(self): self.dtype = "float16" self.x = np.random.random((4, 50, 20)) @@ -85,6 +90,7 @@ class TestCase4(TestCase1): # Test Sections class TestCase5(TestCase1): + def set_example(self): super().set_example() self.x = np.random.random((2, 10, 4)) @@ -97,6 +103,7 @@ class TestCase5(TestCase1): class API_TestSplit(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.layers.data('data', shape=[-1, 10], dtype='float32') @@ -111,6 +118,7 @@ class API_TestSplit(unittest.TestCase): class API_TestSplit2(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.layers.data('data', shape=[-1, 10], dtype='float32') @@ -125,6 +133,7 @@ class API_TestSplit2(unittest.TestCase): class API_TestDygraphSplit(unittest.TestCase): + def test_out1(self): with fluid.dygraph.guard(paddle.NPUPlace(0)): input_1 = np.random.random([4, 6, 6]).astype("int32") diff --git a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py index 24b34fa625c..0ac775135e3 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestSqrt(OpTest): + def setUp(self): self.set_npu() self.op_type = "sqrt" @@ -58,6 +60,7 @@ class TestSqrt(OpTest): class TestSqrtFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "sqrt" @@ -84,6 +87,7 @@ class TestSqrtFp16(OpTest): class TestSqrtNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -98,8 +102,9 @@ class TestSqrtNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.sqrt(c) @@ -123,12 +128,13 @@ class TestSqrtNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py index 170f6b6ca4f..49dd0c94eb0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestSquare(OpTest): + def setUp(self): self.set_npu() self.op_type = "square" @@ -55,6 +57,7 @@ class TestSquare(OpTest): class TestSquareFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "square" @@ -81,6 +84,7 @@ class TestSquareFp16(OpTest): class TestSquareNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -95,8 +99,9 @@ class TestSquareNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.square(c) @@ -120,12 +125,13 @@ class TestSquareNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py index d3ee8df1cd1..af285349208 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py @@ -18,6 +18,7 @@ import numpy as np import unittest from numpy import linalg as LA import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -47,10 +48,9 @@ class TestL2LossOp(OpTest): self.check_output_with_place(place=self.place) def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=self.max_relative_error) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=self.max_relative_error) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/npu/test_squeeze_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_squeeze_op_npu.py index 2e741c8d8a5..827fb0344d8 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_squeeze_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_squeeze_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -30,13 +31,16 @@ paddle.enable_static() class TestSqueezeOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "squeeze" self.init_test_case() self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")} self.init_attrs() - self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape), } + self.outputs = { + "Out": self.inputs["X"].reshape(self.new_shape), + } def set_npu(self): self.__class__.use_npu = True @@ -60,6 +64,7 @@ class TestSqueezeOp(OpTest): class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 3, 1, 40) self.axes = (0, -2) @@ -70,16 +75,18 @@ class TestSqueezeOp1(TestSqueezeOp): class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) @@ -90,6 +97,7 @@ class TestSqueezeOp3(TestSqueezeOp): class TestSqueezeOp4(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, 2) @@ -97,12 +105,13 @@ class TestSqueezeOp4(TestSqueezeOp): class TestSqueezeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): # The input type of softmax_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], paddle.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + paddle.NPUPlace(0)) self.assertRaises(TypeError, paddle.squeeze, x1) # The input axes of squeeze must be list. x2 = paddle.static.data(name='x2', shape=[4], dtype="int32") @@ -113,6 +122,7 @@ class TestSqueezeOpError(unittest.TestCase): class API_TestSqueeze(unittest.TestCase): + def setUp(self): self.executed_api() @@ -123,8 +133,9 @@ class API_TestSqueeze(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data1 = paddle.static.data( - 'data1', shape=[-1, 1, 10], dtype='float64') + data1 = paddle.static.data('data1', + shape=[-1, 1, 10], + dtype='float64') result_squeeze = self.squeeze(data1, axis=[1]) place = paddle.NPUPlace(0) exe = paddle.static.Executor(place) @@ -136,11 +147,13 @@ class API_TestSqueeze(unittest.TestCase): class API_TestStaticSqueeze_(API_TestSqueeze): + def executed_api(self): self.squeeze = paddle.squeeze_ class API_TestDygraphSqueeze(unittest.TestCase): + def setUp(self): self.executed_api() @@ -199,12 +212,14 @@ class API_TestDygraphSqueeze(unittest.TestCase): class API_TestDygraphSqueezeInplace(API_TestDygraphSqueeze): + def executed_api(self): self.squeeze = paddle.squeeze_ # Correct: General. class TestSqueeze2Op(OpTest): + def setUp(self): self.set_npu() self.op_type = "squeeze2" @@ -220,8 +235,8 @@ class TestSqueeze2Op(OpTest): self.__class__.use_npu = True def test_check_output(self): - self.check_output_with_place( - paddle.NPUPlace(0), no_check_set=['XShape']) + self.check_output_with_place(paddle.NPUPlace(0), + no_check_set=['XShape']) def test_check_grad(self): self.check_grad_with_place(paddle.NPUPlace(0), ["X"], "Out") @@ -237,6 +252,7 @@ class TestSqueeze2Op(OpTest): # Correct: There is mins axis. class TestSqueeze2Op1(TestSqueeze2Op): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = (0, -2) @@ -245,14 +261,16 @@ class TestSqueeze2Op1(TestSqueeze2Op): # Correct: No axes input. class TestSqueeze2Op2(TestSqueeze2Op): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestSqueeze2Op3(TestSqueeze2Op): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) diff --git a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py index af5648f8f39..ae20f642a28 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ paddle.enable_static() class TestStackOpBase(OpTest): + def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) @@ -78,41 +80,49 @@ class TestStackOpBase(OpTest): class TestStackOp1(TestStackOpBase): + def initParameters(self): self.num_inputs = 16 class TestStackOp2(TestStackOpBase): + def initParameters(self): self.num_inputs = 20 class TestStackOp3(TestStackOpBase): + def initParameters(self): self.axis = -1 class TestStackOp4(TestStackOpBase): + def initParameters(self): self.axis = -4 class TestStackOp5(TestStackOpBase): + def initParameters(self): self.axis = 1 class TestStackOp6(TestStackOpBase): + def initParameters(self): self.axis = 3 class TestStackOpINT32(TestStackOpBase): + def init_dtype(self): self.dtype = np.int32 class TestStackOpINT64(TestStackOpBase): + def init_dtype(self): self.dtype = np.int64 @@ -148,9 +158,8 @@ class TestStackAPIWithLoDTensorArray(unittest.TestCase): exe = fluid.Executor(self.place) res = exe.run(self.program, fetch_list=self.out_var) self.assertTrue( - np.array_equal( - res[0], np.stack( - [self.x] * self.iter_num, axis=self.axis))) + np.array_equal(res[0], + np.stack([self.x] * self.iter_num, axis=self.axis))) class TestTensorStackAPIWithLoDTensorArray(unittest.TestCase): @@ -184,12 +193,12 @@ class TestTensorStackAPIWithLoDTensorArray(unittest.TestCase): exe = fluid.Executor(self.place) res = exe.run(self.program, fetch_list=self.out_var) self.assertTrue( - np.array_equal( - res[0], np.stack( - [self.x] * self.iter_num, axis=self.axis))) + np.array_equal(res[0], + np.stack([self.x] * self.iter_num, axis=self.axis))) class API_test(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[1, 2], dtype='float32') @@ -201,11 +210,12 @@ class API_test(unittest.TestCase): input1 = np.random.random([1, 2]).astype('float32') input2 = np.random.random([1, 2]).astype('float32') input3 = np.random.random([1, 2]).astype('float32') - result, = exe.run( - feed={"data1": input1, - "data2": input2, - "data3": input3}, - fetch_list=[result_stack]) + result, = exe.run(feed={ + "data1": input1, + "data2": input2, + "data3": input3 + }, + fetch_list=[result_stack]) expected_result = np.stack([input1, input2, input3], axis=0) self.assertTrue(np.allclose(expected_result, result)) @@ -216,6 +226,7 @@ class API_test(unittest.TestCase): class API_DygraphTest(unittest.TestCase): + def test_out(self): data1 = np.array([[1.0, 2.0]]) data2 = np.array([[3.0, 4.0]]) diff --git a/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py index 1260017da93..bf32653455c 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py @@ -14,6 +14,7 @@ import sys import numpy as np + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import unittest @@ -57,13 +58,15 @@ def strided_slice_native_forward(input, axes, starts, ends, strides): class TestStridedSliceOp(OpTest): + def setUp(self): self.initTestCase() self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = 'strided_slice' - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) self.inputs = {'Input': self.input} self.outputs = {'Out': self.output} @@ -94,6 +97,7 @@ class TestStridedSliceOp(OpTest): class TestStridedSliceOp1(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(100) self.axes = [0] @@ -104,6 +108,7 @@ class TestStridedSliceOp1(TestStridedSliceOp): class TestStridedSliceOp2(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(100) self.axes = [0] @@ -114,6 +119,7 @@ class TestStridedSliceOp2(TestStridedSliceOp): class TestStridedSliceOp3(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(100) self.axes = [0] @@ -124,6 +130,7 @@ class TestStridedSliceOp3(TestStridedSliceOp): class TestStridedSliceOp4(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 4, 10) self.axes = [0, 1, 2] @@ -134,6 +141,7 @@ class TestStridedSliceOp4(TestStridedSliceOp): class TestStridedSliceOp5(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(5, 5, 5) self.axes = [0, 1, 2] @@ -144,6 +152,7 @@ class TestStridedSliceOp5(TestStridedSliceOp): class TestStridedSliceOp6(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(5, 5, 5) self.axes = [0, 1, 2] @@ -154,6 +163,7 @@ class TestStridedSliceOp6(TestStridedSliceOp): class TestStridedSliceOp7(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(5, 5, 5) self.axes = [0, 1, 2] @@ -164,6 +174,7 @@ class TestStridedSliceOp7(TestStridedSliceOp): class TestStridedSliceOp8(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(1, 100, 1) self.axes = [1] @@ -174,6 +185,7 @@ class TestStridedSliceOp8(TestStridedSliceOp): class TestStridedSliceOp9(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(1, 100, 1) self.axes = [1] @@ -184,6 +196,7 @@ class TestStridedSliceOp9(TestStridedSliceOp): class TestStridedSliceOp10(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(10, 10) self.axes = [0, 1] @@ -194,6 +207,7 @@ class TestStridedSliceOp10(TestStridedSliceOp): class TestStridedSliceOp11(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4) self.axes = [0, 1, 2, 3] @@ -204,6 +218,7 @@ class TestStridedSliceOp11(TestStridedSliceOp): class TestStridedSliceOp12(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4, 5) self.axes = [0, 1, 2, 3, 4] @@ -214,6 +229,7 @@ class TestStridedSliceOp12(TestStridedSliceOp): class TestStridedSliceOp13(TestStridedSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 6, 7, 8) self.axes = [0, 1, 2, 3, 4, 5] @@ -224,11 +240,13 @@ class TestStridedSliceOp13(TestStridedSliceOp): class TestStridedSliceOpBool(TestStridedSliceOp): + def test_check_grad(self): pass class TestStridedSliceOpBool1D(TestStridedSliceOpBool): + def initTestCase(self): self.input = np.random.rand(100).astype("bool") self.axes = [0] @@ -239,6 +257,7 @@ class TestStridedSliceOpBool1D(TestStridedSliceOpBool): class TestStridedSliceOpBool2D(TestStridedSliceOpBool): + def initTestCase(self): self.input = np.random.rand(10, 10).astype("bool") self.axes = [0, 1] @@ -249,6 +268,7 @@ class TestStridedSliceOpBool2D(TestStridedSliceOpBool): class TestStridedSliceOpBool3D(TestStridedSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 4, 10).astype("bool") self.axes = [0, 1, 2] @@ -259,6 +279,7 @@ class TestStridedSliceOpBool3D(TestStridedSliceOpBool): class TestStridedSliceOpBool4D(TestStridedSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4).astype("bool") self.axes = [0, 1, 2, 3] @@ -269,6 +290,7 @@ class TestStridedSliceOpBool4D(TestStridedSliceOpBool): class TestStridedSliceOpBool5D(TestStridedSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4, 5).astype("bool") self.axes = [0, 1, 2, 3, 4] @@ -279,6 +301,7 @@ class TestStridedSliceOpBool5D(TestStridedSliceOpBool): class TestStridedSliceOpBool6D(TestStridedSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 6, 7, 8).astype("bool") self.axes = [0, 1, 2, 3, 4, 5] @@ -289,6 +312,7 @@ class TestStridedSliceOpBool6D(TestStridedSliceOpBool): class TestStridedSliceOp_starts_ListTensor(OpTest): + def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" @@ -320,8 +344,9 @@ class TestStridedSliceOp_starts_ListTensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [1, -1, 1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) self.starts_infer = [1, 10, 2] @@ -333,6 +358,7 @@ class TestStridedSliceOp_starts_ListTensor(OpTest): class TestStridedSliceOp_ends_ListTensor(OpTest): + def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" @@ -364,8 +390,9 @@ class TestStridedSliceOp_ends_ListTensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 2] self.infer_flags = [1, -1, 1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) self.ends_infer = [3, 1, 4] @@ -377,6 +404,7 @@ class TestStridedSliceOp_ends_ListTensor(OpTest): class TestStridedSliceOp_starts_Tensor(OpTest): + def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" @@ -385,8 +413,7 @@ class TestStridedSliceOp_starts_Tensor(OpTest): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { @@ -407,8 +434,9 @@ class TestStridedSliceOp_starts_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output_with_place(self.place) @@ -418,6 +446,7 @@ class TestStridedSliceOp_starts_Tensor(OpTest): class TestStridedSliceOp_ends_Tensor(OpTest): + def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" @@ -426,8 +455,7 @@ class TestStridedSliceOp_ends_Tensor(OpTest): self.inputs = { 'Input': self.input, - "EndsTensor": np.array( - self.ends, dtype="int32") + "EndsTensor": np.array(self.ends, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { @@ -448,8 +476,9 @@ class TestStridedSliceOp_ends_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output_with_place(self.place) @@ -459,6 +488,7 @@ class TestStridedSliceOp_ends_Tensor(OpTest): class TestStridedSliceOp_listTensor_Tensor(OpTest): + def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" @@ -472,8 +502,7 @@ class TestStridedSliceOp_listTensor_Tensor(OpTest): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32"), + "StartsTensor": np.array(self.starts, dtype="int32"), "EndsTensorList": ends_tensor } self.outputs = {'Out': self.output} @@ -495,8 +524,9 @@ class TestStridedSliceOp_listTensor_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output_with_place(self.place) @@ -506,6 +536,7 @@ class TestStridedSliceOp_listTensor_Tensor(OpTest): class TestStridedSliceOp_strides_Tensor(OpTest): + def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" @@ -514,8 +545,7 @@ class TestStridedSliceOp_strides_Tensor(OpTest): self.inputs = { 'Input': self.input, - "StridesTensor": np.array( - self.strides, dtype="int32") + "StridesTensor": np.array(self.strides, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { @@ -536,8 +566,9 @@ class TestStridedSliceOp_strides_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, -1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output_with_place(self.place) @@ -548,42 +579,48 @@ class TestStridedSliceOp_strides_Tensor(OpTest): # Test python API class TestStridedSliceAPI(unittest.TestCase): + def test_1(self): input = np.random.random([3, 4, 5, 6]).astype("float64") minus_1 = fluid.layers.fill_constant([1], "int32", -1) minus_3 = fluid.layers.fill_constant([1], "int32", -3) - starts = fluid.layers.data( - name='starts', shape=[3], dtype='int32', append_batch_size=False) - ends = fluid.layers.data( - name='ends', shape=[3], dtype='int32', append_batch_size=False) - strides = fluid.layers.data( - name='strides', shape=[3], dtype='int32', append_batch_size=False) - - x = fluid.layers.data( - name="x", - shape=[3, 4, 5, 6], - append_batch_size=False, - dtype="float64") - out_1 = fluid.layers.strided_slice( - x, - axes=[0, 1, 2], - starts=[-3, 0, 2], - ends=[3, 100, -1], - strides=[1, 1, 1]) - out_2 = fluid.layers.strided_slice( - x, - axes=[0, 1, 3], - starts=[minus_3, 0, 2], - ends=[3, 100, -1], - strides=[1, 1, 1]) - out_3 = fluid.layers.strided_slice( - x, - axes=[0, 1, 3], - starts=[minus_3, 0, 2], - ends=[3, 100, minus_1], - strides=[1, 1, 1]) - out_4 = fluid.layers.strided_slice( - x, axes=[0, 1, 2], starts=starts, ends=ends, strides=strides) + starts = fluid.layers.data(name='starts', + shape=[3], + dtype='int32', + append_batch_size=False) + ends = fluid.layers.data(name='ends', + shape=[3], + dtype='int32', + append_batch_size=False) + strides = fluid.layers.data(name='strides', + shape=[3], + dtype='int32', + append_batch_size=False) + + x = fluid.layers.data(name="x", + shape=[3, 4, 5, 6], + append_batch_size=False, + dtype="float64") + out_1 = fluid.layers.strided_slice(x, + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[3, 100, -1], + strides=[1, 1, 1]) + out_2 = fluid.layers.strided_slice(x, + axes=[0, 1, 3], + starts=[minus_3, 0, 2], + ends=[3, 100, -1], + strides=[1, 1, 1]) + out_3 = fluid.layers.strided_slice(x, + axes=[0, 1, 3], + starts=[minus_3, 0, 2], + ends=[3, 100, minus_1], + strides=[1, 1, 1]) + out_4 = fluid.layers.strided_slice(x, + axes=[0, 1, 2], + starts=starts, + ends=ends, + strides=strides) out_5 = x[-3:3, 0:100:2, -1:2:-1] out_6 = x[minus_3:3:1, 0:100:2, :, minus_1:2:minus_1] @@ -613,8 +650,11 @@ class TestStridedSliceAPI(unittest.TestCase): starts = [-3, 0, 2] ends = [3, 2, 4] strides_1 = [1, 1, 1] - sliced_1 = paddle.strided_slice( - x, axes=axes, starts=starts, ends=ends, strides=strides_1) + sliced_1 = paddle.strided_slice(x, + axes=axes, + starts=starts, + ends=ends, + strides=strides_1) assert sliced_1.shape == (3, 2, 2, 2) diff --git a/python/paddle/fluid/tests/unittests/npu/test_sum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sum_op_npu.py index 1ea8504ceec..eb2594206ae 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_sum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sum_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestSum1(OpTest): + def setUp(self): self.set_npu() self.init_dtype() @@ -54,6 +56,7 @@ class TestSum1(OpTest): class TestSum2(OpTest): + def setUp(self): self.set_npu() self.init_dtype() @@ -66,7 +69,7 @@ class TestSum2(OpTest): x3 = np.random.random((3, 3)).astype(self.dtype) self.inputs = {'X': [("x0", x0), ("x1", x1), ("x2", x2), ("x3", x3)]} # There will be a problem if just using `y=x0+x1+x2+x3` to calculate the - # summation result as the reference standard result. The reason is that + # summation result as the reference standard result. The reason is that # numpy's fp16 data has precision loss when doing `add` operation. # For example, the results of `x0+x1+x2+x3` is different from that of # `x3+x2+x1+x0` if the dtype is fp16. @@ -88,6 +91,7 @@ class TestSum2(OpTest): class TestSum3(OpTest): + def setUp(self): self.set_npu() self.init_dtype() diff --git a/python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py index c7c488625be..3267820eff5 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from paddle.fluid.tests.unittests.op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 1024 class TestSwishOp(OpTest): + def setUp(self): self.op_type = "swish" self.set_npu() @@ -49,11 +51,10 @@ class TestSwishOp(OpTest): dx = beta * out + expit(x) * (1 - beta * out) dx = dx / x.size - self.check_grad_with_place( - self.place, ['X'], - 'Out', - max_relative_error=0.01, - user_defined_grads=[dx]) + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.01, + user_defined_grads=[dx]) def set_npu(self): self.__class__.use_npu = True @@ -64,6 +65,7 @@ class TestSwishOp(OpTest): class TestSwishOpFp16(TestSwishOp): + def test_check_output(self): self.check_output_with_place(self.place, atol=1e-3) diff --git a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_base_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_base_npu.py index dfd8680c442..4d81e97a9d0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_base_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_base_npu.py @@ -20,6 +20,7 @@ import argparse import os import six import sys + sys.path.append("..") import subprocess import traceback @@ -43,6 +44,7 @@ SEED = 10 class TestSyncBatchNormRunnerBase(object): + def get_model(self, main, startup, @@ -61,9 +63,8 @@ class TestSyncBatchNormRunnerBase(object): not_ready_endpoints = [] for ep in endpoints: ip_port = ep.split(":") - with closing( - socket.socket(socket.AF_INET, - socket.SOCK_STREAM)) as sock: + with closing(socket.socket(socket.AF_INET, + socket.SOCK_STREAM)) as sock: sock.settimeout(2) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) if hasattr(socket, 'SO_REUSEPORT'): @@ -76,13 +77,14 @@ class TestSyncBatchNormRunnerBase(object): not_ready_endpoints.append(ep) if not all_ok: sys.stderr.write("server not ready, wait 3 sec to retry...\n") - sys.stderr.write("not ready endpoints:" + str( - not_ready_endpoints) + "\n") + sys.stderr.write("not ready endpoints:" + + str(not_ready_endpoints) + "\n") sys.stderr.flush() time.sleep(3) else: break + #endpoints should be ["ip1:port1","ip2:port2"] def initCommunicator(self, program, rank, nranks, wait_port, @@ -92,29 +94,26 @@ class TestSyncBatchNormRunnerBase(object): if rank == 0 and wait_port: self.wait_server_ready(other_endpoints) block = program.global_block() - hccl_id_var = block.create_var( - name=nameGen.generate('hccl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_hccl_id', - inputs={}, - outputs={'Out': hccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) - block.append_op( - type='c_comm_init_hccl', - inputs={'X': hccl_id_var}, - outputs={}, - attrs={ - 'rank': rank, - 'ring_id': self.global_ring_id, - 'device_id': int(os.getenv("FLAGS_selected_npus")), - 'rank_ids': nranks - }) + hccl_id_var = block.create_var(name=nameGen.generate('hccl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) + block.append_op(type='c_gen_hccl_id', + inputs={}, + outputs={'Out': hccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + block.append_op(type='c_comm_init_hccl', + inputs={'X': hccl_id_var}, + outputs={}, + attrs={ + 'rank': rank, + 'ring_id': self.global_ring_id, + 'device_id': int(os.getenv("FLAGS_selected_npus")), + 'rank_ids': nranks + }) def run_trainer(self, args): device_id = int(os.getenv("FLAGS_selected_npus", "0")) @@ -339,8 +338,8 @@ class TestSyncBatchNormRunnerBase(object): self.initCommunicator(startup_prog, rank, nranks, True, current_endpoint, endpoints) - sys.stderr.write("after init, startup_prog: " + startup_prog.to_string( - True) + "\n") + sys.stderr.write("after init, startup_prog: " + + startup_prog.to_string(True) + "\n") train_prog.global_seed(SEED) train_prog._sync_with_cpp() startup_prog.global_seed(SEED) @@ -350,8 +349,8 @@ class TestSyncBatchNormRunnerBase(object): self.rank = rank outs = self.get_model(train_prog, startup_prog, place, layout, SEED, True, only_forward) - sys.stderr.write("after get_model, train_prog: " + train_prog.to_string( - True) + "\n") + sys.stderr.write("after get_model, train_prog: " + + train_prog.to_string(True) + "\n") sys.stderr.write("after get_model, startup_prog: " + startup_prog.to_string(True) + "\n") @@ -405,6 +404,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def setUp(self): self._port_set = set() self._trainers = 2 @@ -413,6 +413,7 @@ class TestDistBase(unittest.TestCase): self._python_interp = sys.executable def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -454,18 +455,16 @@ class TestDistBase(unittest.TestCase): tr0_pipe = open("/tmp/tr0_err.log", "wb") tr1_pipe = open("/tmp/tr1_err.log", "wb") # print(tr0_cmd) - # print(tr1_cmd) - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) - - tr1_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + # print(tr1_cmd) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) + + tr1_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() diff --git a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_baseline.py b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_baseline.py index 54a78ea2d52..27926e032f5 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_baseline.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_baseline.py @@ -18,6 +18,7 @@ import numpy as np import paddle import os import sys + sys.path.append("..") from paddle.fluid.tests.unittests.op_test import OpTest, _set_use_system_allocator @@ -29,13 +30,15 @@ paddle.enable_static() class TestSyncBatchNormOp(TestDistBase): + def _setup_config(self): pass def test_identity(self, col_type="identity"): dist_env = os.environ - self.check_with_place( - "sync_batch_norm_op_npu.py", col_type, need_envs=dist_env) + self.check_with_place("sync_batch_norm_op_npu.py", + col_type, + need_envs=dist_env) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py index bafe45b77da..8fe46e3f414 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py +++ b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py @@ -18,6 +18,7 @@ import numpy as np import paddle import os import sys + sys.path.append("..") import paddle @@ -33,28 +34,29 @@ paddle.enable_static() class TestDygraphSyncBatchNormAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): my_sync_batch_norm = paddle.nn.SyncBatchNorm(10) - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.NPUPlace(0)) self.assertRaises(TypeError, my_sync_batch_norm, x1) - # the input dtype of SyncBatchNorm must be float16 or float32 + # the input dtype of SyncBatchNorm must be float16 or float32 # float16 only can be set on GPU place and NPU place x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32") self.assertRaises(TypeError, my_sync_batch_norm, x2) class TestConvertSyncBatchNorm(unittest.TestCase): + def test_convert(self): with program_guard(Program(), Program()): - compare_model = paddle.nn.Sequential( - paddle.nn.Conv2D(3, 5, 3), - paddle.nn.BatchNorm2D(5), paddle.nn.BatchNorm2D(5)) + compare_model = paddle.nn.Sequential(paddle.nn.Conv2D(3, 5, 3), + paddle.nn.BatchNorm2D(5), + paddle.nn.BatchNorm2D(5)) model = paddle.nn.Sequential( - paddle.nn.Conv2D(3, 5, 3), - paddle.nn.BatchNorm2D(5), + paddle.nn.Conv2D(3, 5, 3), paddle.nn.BatchNorm2D(5), paddle.nn.BatchNorm2D( 5, weight_attr=fluid.ParamAttr(name='bn.scale'), @@ -67,8 +69,11 @@ class TestConvertSyncBatchNorm(unittest.TestCase): class TestConvertSyncBatchNormCast1(unittest.TestCase): + def test_convert(self): + class Net(nn.Layer): + def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2D(3, 5, 3) @@ -93,6 +98,7 @@ class TestConvertSyncBatchNormCast1(unittest.TestCase): class TestDygraphSyncBatchNormDataFormatError(unittest.TestCase): + def test_errors(self): with fluid.dygraph.guard(fluid.NPUPlace(0)): my_sync_batch_norm = paddle.nn.SyncBatchNorm(10, data_format='CN') diff --git a/python/paddle/fluid/tests/unittests/npu/test_take_along_axis_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_take_along_axis_op_npu.py index 4aad02f7df0..450cb542943 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_take_along_axis_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_take_along_axis_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ paddle.enable_static() @unittest.skip(reason="Skip unsupported ut, need paddle surpport cann 5.0.4+") class TestTakeAlongAxisOp(OpTest): + def setUp(self): self.set_npu() self.init_data() @@ -59,13 +61,14 @@ class TestTakeAlongAxisOp(OpTest): self.x_type = "float64" self.x_shape = (5, 5, 5) self.index_type = "int32" - self.index = np.array( - [[[1]], [[1]], [[2]], [[4]], [[3]]]).astype(self.index_type) + self.index = np.array([[[1]], [[1]], [[2]], [[4]], + [[3]]]).astype(self.index_type) self.axis = 2 self.axis_type = "int64" class TestCase1(TestTakeAlongAxisOp): + def init_data(self): self.x_type = "float64" self.x_shape = (5, 5, 5) @@ -77,6 +80,7 @@ class TestCase1(TestTakeAlongAxisOp): @unittest.skip(reason="Skip unsupported ut, need paddle surpport cann 5.0.4+") class TestTakeAlongAxisAPI(unittest.TestCase): + def setUp(self): np.random.seed(0) self.shape = [3, 3] @@ -93,8 +97,10 @@ class TestTakeAlongAxisAPI(unittest.TestCase): index = paddle.fluid.data('Index', self.index_shape, "int64") out = paddle.take_along_axis(x, index, self.axis) exe = paddle.static.Executor(self.place) - res = exe.run(feed={'X': self.x_np, - 'Index': self.index_np}, + res = exe.run(feed={ + 'X': self.x_np, + 'Index': self.index_np + }, fetch_list=[out]) out_ref = np.array( np.take_along_axis(self.x_np, self.index_np, self.axis)) @@ -114,12 +120,13 @@ class TestTakeAlongAxisAPI(unittest.TestCase): @unittest.skip(reason="Skip unsupported ut, need paddle surpport cann 5.0.4+") class TestTakeAlongAxisAPICase1(TestTakeAlongAxisAPI): + def setUp(self): np.random.seed(0) self.shape = [2, 2] self.index_shape = [4, 2] - self.index_np = np.array( - [[0, 0], [1, 0], [0, 0], [1, 0]]).astype('int64') + self.index_np = np.array([[0, 0], [1, 0], [0, 0], [1, + 0]]).astype('int64') self.x_np = np.random.random(self.shape).astype(np.float32) self.place = paddle.NPUPlace(0) self.axis = 0 diff --git a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py index 375eef12291..e26f713f00f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -27,6 +28,7 @@ SEED = 2021 class TestTanh(OpTest): + def setUp(self): self.set_npu() self.op_type = "tanh" @@ -58,6 +60,7 @@ class TestTanh(OpTest): class TestTanhFp16(OpTest): + def setUp(self): self.set_npu() self.op_type = "tanh" @@ -84,6 +87,7 @@ class TestTanhFp16(OpTest): class TestTanhNet(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -98,8 +102,9 @@ class TestTanhNet(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') - label = paddle.static.data( - name="label", shape=[32, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[32, 1], + dtype='int64') c = paddle.multiply(a, b) d = paddle.tanh(c) @@ -123,12 +128,13 @@ class TestTanhNet(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(100): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) diff --git a/python/paddle/fluid/tests/unittests/npu/test_tile_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tile_op_npu.py index 0e61fa00fdf..7caacf738ec 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_tile_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_tile_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -30,6 +31,7 @@ np.random.seed(10) #Situation 1: repeat_times is a list (without tensor) class TestTileOpRank1(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -57,36 +59,42 @@ class TestTileOpRank1(OpTest): #with dimension expanding class TestTileOpRank2Expanding(TestTileOpRank1): + def init_data(self): self.ori_shape = [120] self.repeat_times = [2, 2] class TestTileOpRank2(TestTileOpRank1): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] class TestTileOpRank3_Corner(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.repeat_times = (1, 1, 1) class TestTileOpRank3_Corner2(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.repeat_times = (2, 2) class TestTileOpRank3(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 4, 15) self.repeat_times = (2, 1, 4) class TestTileOpRank4(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.repeat_times = (3, 2, 1, 2) @@ -94,6 +102,7 @@ class TestTileOpRank4(TestTileOpRank1): # Situation 2: repeat_times is a list (with tensor) class TestTileOpRank1_tensor_attr(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -128,6 +137,7 @@ class TestTileOpRank1_tensor_attr(OpTest): class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [1, 1] @@ -135,6 +145,7 @@ class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] @@ -143,6 +154,7 @@ class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): # Situation 3: repeat_times is a tensor class TestTileOpRank1_tensor(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -172,6 +184,7 @@ class TestTileOpRank1_tensor(OpTest): class TestTileOpRank2_tensor(TestTileOpRank1_tensor): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] @@ -179,13 +192,13 @@ class TestTileOpRank2_tensor(TestTileOpRank1_tensor): # Situation 4: input x is Integer class TestTileOpInteger(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "tile" self.inputs = { - 'X': np.random.randint( - 10, size=(4, 4, 5)).astype("int32") + 'X': np.random.randint(10, size=(4, 4, 5)).astype("int32") } self.attrs = {'repeat_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) @@ -200,13 +213,13 @@ class TestTileOpInteger(OpTest): # Situation 5: input x is Integer class TestTileOpInt64_t(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "tile" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 5)).astype("int64") + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int64") } self.attrs = {'repeat_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) @@ -221,6 +234,7 @@ class TestTileOpInt64_t(OpTest): # Situation 6: input x is Bool class TestTileOpBool(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -239,6 +253,7 @@ class TestTileOpBool(OpTest): # Test python API class TestTileAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(paddle.NPUPlace(0)): np_x = np.random.random([12, 14]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/npu/test_top_k_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_top_k_op_npu.py index c8a620d9dbb..f05e4f19d8e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_top_k_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_top_k_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ SEED = 2021 class TestTopk(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -40,8 +42,8 @@ class TestTopk(OpTest): [0.96527182, 0.34851612, 0.12959783]]).astype(self.dtype) self.inputs = {'X': x} - np_out = np.array( - [[0.88745828], [0.82196718], [0.96527182]]).astype(self.dtype) + np_out = np.array([[0.88745828], [0.82196718], + [0.96527182]]).astype(self.dtype) np_indices = np.array([[1], [0], [0]]) self.attrs = {'k': 1, "axis": -1} @@ -59,6 +61,7 @@ class TestTopk(OpTest): class TestTopkV2(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -89,6 +92,7 @@ class TestTopkV2(OpTest): class TestTopkV3(OpTest): + def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) @@ -97,8 +101,10 @@ class TestTopkV3(OpTest): self.init_dtype() self.set_input_data() self.set_attrs() - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=True) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=True) self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis} @@ -119,8 +125,8 @@ class TestTopkV3(OpTest): self.axis = 1 def set_input_data(self): - self.input_data = np.random.choice( - 10000, size=(10, 20), replace=False).astype(self.dtype) + self.input_data = np.random.choice(10000, size=(10, 20), + replace=False).astype(self.dtype) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/npu/test_top_k_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_top_k_v2_op_npu.py index a8242be855c..86a58cfae09 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_top_k_v2_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_top_k_v2_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -40,6 +41,7 @@ def numpy_topk(x, k=1, axis=-1, largest=True): class TestTopkV2NPUOp(OpTest): + def setUp(self): paddle.enable_static() self.op_type = "top_k_v2" @@ -48,8 +50,10 @@ class TestTopkV2NPUOp(OpTest): self.set_dtype() self.set_input_data() self.set_attrs() - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} @@ -64,8 +68,8 @@ class TestTopkV2NPUOp(OpTest): self.largest = True def set_input_data(self): - self.input_data = np.random.choice( - 10000, size=(10, 20), replace=False).astype(self.dtype) + self.input_data = np.random.choice(10000, size=(10, 20), + replace=False).astype(self.dtype) def test_check_output(self): self.__class__.no_need_check_grad = True @@ -80,6 +84,7 @@ class TestTopkV2NPUOp(OpTest): class TestTopkV2OpFloat16(TestTopkV2NPUOp): + def set_attrs(self): self.k = 3 self.axis = 1 @@ -93,6 +98,7 @@ class TestTopkV2OpFloat16(TestTopkV2NPUOp): class TestTopkV2OP1Int32(TestTopkV2NPUOp): + def set_attrs(self): self.k = 3 self.axis = 0 @@ -100,6 +106,7 @@ class TestTopkV2OP1Int32(TestTopkV2NPUOp): class TestTopkV2OP2Int32(TestTopkV2NPUOp): + def set_attrs(self): self.k = 4 self.axis = 0 @@ -107,6 +114,7 @@ class TestTopkV2OP2Int32(TestTopkV2NPUOp): class TestTopkV2OP3Int32(TestTopkV2NPUOp): + def set_attrs(self): self.k = 6 self.axis = 1 @@ -114,6 +122,7 @@ class TestTopkV2OP3Int32(TestTopkV2NPUOp): class TestTopkV2OP4Int32(TestTopkV2NPUOp): + def set_attrs(self): self.k = 3 self.axis = 1 @@ -121,26 +130,31 @@ class TestTopkV2OP4Int32(TestTopkV2NPUOp): class TestTopkV2Op1Int64(TestTopkV2OP1Int32): + def set_dtype(self): self.dtype = np.int64 class TestTopkV2Op2Int64(TestTopkV2OP2Int32): + def set_dtype(self): self.dtype = np.int64 class TestTopkV2Op3Int64(TestTopkV2OP3Int32): + def set_dtype(self): self.dtype = np.int64 class TestTopkV2Op4Int64(TestTopkV2OP4Int32): + def set_dtype(self): self.dtype = np.int64 class TestTopkV2Op1Float32(TestTopkV2OP1Int32): + def set_dtype(self): self.dtype = np.float32 @@ -149,6 +163,7 @@ class TestTopkV2Op1Float32(TestTopkV2OP1Int32): class TestTopkV2Op2Float32(TestTopkV2OP2Int32): + def set_dtype(self): self.dtype = np.float32 @@ -157,6 +172,7 @@ class TestTopkV2Op2Float32(TestTopkV2OP2Int32): class TestTopkV2Op3Float32(TestTopkV2OP3Int32): + def set_dtype(self): self.dtype = np.float32 @@ -165,6 +181,7 @@ class TestTopkV2Op3Float32(TestTopkV2OP3Int32): class TestTopkV2Op4Float32(TestTopkV2OP4Int32): + def set_dtype(self): self.dtype = np.float32 @@ -173,6 +190,7 @@ class TestTopkV2Op4Float32(TestTopkV2OP4Int32): class TestTopkV2Op1Float64(TestTopkV2OP1Int32): + def set_dtype(self): self.dtype = np.float64 @@ -181,6 +199,7 @@ class TestTopkV2Op1Float64(TestTopkV2OP1Int32): class TestTopkV2Op2Float64(TestTopkV2OP2Int32): + def set_dtype(self): self.dtype = np.float64 @@ -189,6 +208,7 @@ class TestTopkV2Op2Float64(TestTopkV2OP2Int32): class TestTopkV2Op3Float64(TestTopkV2OP3Int32): + def set_dtype(self): self.dtype = np.float64 @@ -197,6 +217,7 @@ class TestTopkV2Op3Float64(TestTopkV2OP3Int32): class TestTopkV2Op4Float64(TestTopkV2OP4Int32): + def set_dtype(self): self.dtype = np.float64 @@ -205,6 +226,7 @@ class TestTopkV2Op4Float64(TestTopkV2OP4Int32): class TestTopKAPI(unittest.TestCase): + def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -248,15 +270,16 @@ class TestTopKAPI(unittest.TestCase): self.assertTrue(np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1].numpy(), numpy_result[1])) - # test case for basic test case 6 for the partial sort + # test case for basic test case 6 for the partial sort paddle_result = paddle.topk(large_input_tensor, k=1, axis=-1) numpy_result = numpy_topk(self.large_input_data, k=1, axis=-1) self.assertTrue(np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1].numpy(), numpy_result[1])) - # test case for basic test case 7 for the unsorted + # test case for basic test case 7 for the unsorted paddle_result = paddle.topk(input_tensor, k=2, axis=1, sorted=False) - sort_paddle = numpy_topk( - np.array(paddle_result[0].numpy()), axis=1, k=2) + sort_paddle = numpy_topk(np.array(paddle_result[0].numpy()), + axis=1, + k=2) numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(sort_paddle[0], numpy_result[0])) @@ -264,10 +287,12 @@ class TestTopKAPI(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - input_tensor = paddle.static.data( - name="x", shape=[6, 7, 8], dtype="float64") - large_input_tensor = paddle.static.data( - name="large_x", shape=[2, 1030], dtype="float64") + input_tensor = paddle.static.data(name="x", + shape=[6, 7, 8], + dtype="float64") + large_input_tensor = paddle.static.data(name="large_x", + shape=[2, 1030], + dtype="float64") k_tensor = paddle.static.data(name="k", shape=[1], dtype="int32") result1 = paddle.topk(input_tensor, k=2) result2 = paddle.topk(input_tensor, k=2, axis=-1) @@ -281,17 +306,18 @@ class TestTopKAPI(unittest.TestCase): exe = paddle.static.Executor(place) input_data = np.random.rand(10, 20).astype("float64") large_input_data = np.random.rand(2, 100).astype("float64") - paddle_result = exe.run( - feed={ - "x": self.input_data, - "large_x": self.large_input_data, - "k": np.array([2]).astype("int32") - }, - fetch_list=[ - result1[0], result1[1], result2[0], result2[1], result3[0], - result3[1], result4[0], result4[1], result5[0], result5[1], - result6[0], result6[1], result7[0], result7[1] - ]) + paddle_result = exe.run(feed={ + "x": self.input_data, + "large_x": self.large_input_data, + "k": np.array([2]).astype("int32") + }, + fetch_list=[ + result1[0], result1[1], result2[0], + result2[1], result3[0], result3[1], + result4[0], result4[1], result5[0], + result5[1], result6[0], result6[1], + result7[0], result7[1] + ]) numpy_result = numpy_topk(self.input_data, k=2) self.assertTrue(np.allclose(paddle_result[0], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1], numpy_result[1])) @@ -304,13 +330,17 @@ class TestTopKAPI(unittest.TestCase): self.assertTrue(np.allclose(paddle_result[4], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[5], numpy_result[1])) - numpy_result = numpy_topk( - self.input_data, k=2, axis=1, largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=1, + largest=False) self.assertTrue(np.allclose(paddle_result[6], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[7], numpy_result[1])) - numpy_result = numpy_topk( - self.input_data, k=2, axis=-1, largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=-1, + largest=False) self.assertTrue(np.allclose(paddle_result[8], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[9], numpy_result[1])) diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index b1a6bfcdaaa..a5548b5ea12 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest, _set_use_system_allocator import paddle @@ -26,6 +27,7 @@ paddle.enable_static() class TestTransposeOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "transpose2" @@ -55,66 +57,77 @@ class TestTransposeOp(OpTest): class TestCase0(TestTransposeOp): + def init_shape_axis(self): self.shape = (100, ) self.axis = (0, ) class TestCase1(TestTransposeOp): + def init_shape_axis(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) class TestCase2(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) class TestCase3(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) class TestCase4(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) class TestCase5(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 16, 96) self.axis = (0, 2, 1) class TestCase6(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 10, 12, 16) self.axis = (3, 1, 2, 0) class TestCase7(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 10, 2, 16) self.axis = (0, 1, 3, 2) class TestCase8(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (0, 1, 3, 2, 4, 5, 6, 7) class TestCase9(TestTransposeOp): + def init_shape_axis(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (6, 1, 3, 5, 0, 2, 4, 7) class TestTransposeOpFP16(TestTransposeOp): + def init_dtype(self): self.dtype = np.float16 @@ -123,6 +136,7 @@ class TestTransposeOpFP16(TestTransposeOp): class TestTransposeOpInt64(TestTransposeOp): + def init_dtype(self): self.dtype = np.int64 diff --git a/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py index 8239dd4f3fa..b3d5fa9a6b5 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py @@ -42,7 +42,8 @@ class TestNPUTrilTriu(OpTest): 'lower': True if self.real_op_type == 'tril' else False, } self.outputs = { - 'Out': self.real_np_op(self.X, self.diagonal) + 'Out': + self.real_np_op(self.X, self.diagonal) if self.diagonal else self.real_np_op(self.X) } @@ -78,15 +79,17 @@ def case_generator(op_type, Xshape, diagonal, expected): } class FailureCase(unittest.TestCase): + def test_failure(self): paddle.enable_static() data = fluid.data(shape=Xshape, dtype='float32', name=cls_name) - with self.assertRaisesRegexp( - eval(expected.split(':')[-1]), errmsg[expected]): + with self.assertRaisesRegexp(eval(expected.split(':')[-1]), + errmsg[expected]): getattr(tensor, op_type)(x=data, diagonal=diagonal) class SuccessCase(TestNPUTrilTriu): + def initTestCase(self): paddle.enable_static() @@ -100,7 +103,7 @@ def case_generator(op_type, Xshape, diagonal, expected): ### NOTE: meaningful diagonal is [1 - min(H, W), max(H, W) -1] -### test the diagonal just at the border, upper/lower the border, +### test the diagonal just at the border, upper/lower the border, ### negative/positive integer within range and a zero cases = { 'success': { @@ -126,8 +129,9 @@ for _op_type in ['tril', 'triu']: for _expected, _params in cases.items(): for _Xshape, _diaglist in _params.items(): list( - map(lambda _diagonal: case_generator(_op_type, _Xshape, _diagonal, _expected), - _diaglist)) + map( + lambda _diagonal: case_generator( + _op_type, _Xshape, _diagonal, _expected), _diaglist)) class TestTrilTriuOpAPI(unittest.TestCase): @@ -151,7 +155,8 @@ class TestTrilTriuOpAPI(unittest.TestCase): tril_out, triu_out = exe.run( fluid.default_main_program(), feed={"x": data}, - fetch_list=[tril_out, triu_out], ) + fetch_list=[tril_out, triu_out], + ) self.assertTrue(np.allclose(tril_out, np.tril(data))) self.assertTrue(np.allclose(triu_out, np.triu(data))) @@ -189,6 +194,7 @@ class TestTrilTriuOpAPI(unittest.TestCase): # @skip_check_grad_ci(reason="[NPU does not support grad right now.") class TestNPUTrilTriu_bool(TestNPUTrilTriu): + def test_check_output(self): self.check_output_with_place(self.place) diff --git a/python/paddle/fluid/tests/unittests/npu/test_truncated_gaussian_random_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_truncated_gaussian_random_op_npu.py index de94e7febac..0ce6deb42e0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_truncated_gaussian_random_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_truncated_gaussian_random_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -30,6 +31,7 @@ SEED = 2021 class TestTruncatedNormal(unittest.TestCase): + def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -44,10 +46,12 @@ class TestTruncatedNormal(unittest.TestCase): with paddle.static.program_guard(main_prog, startup_prog): weight_attr = paddle.framework.ParamAttr( name="linear_weight", - initializer=paddle.nn.initializer.TruncatedNormal( - mean=0.0, std=2.0)) - linear = paddle.nn.Linear( - 2, 2, weight_attr=weight_attr, bias_attr=False) + initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, + std=2.0)) + linear = paddle.nn.Linear(2, + 2, + weight_attr=weight_attr, + bias_attr=False) if run_npu: place = paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_uniform_random_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_uniform_random_op_npu.py index 0e21c59432b..7f2c2753b9b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_uniform_random_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_uniform_random_op_npu.py @@ -18,6 +18,7 @@ import sys import subprocess import unittest import numpy as np + sys.path.append("..") from op_test import OpTest import paddle @@ -40,6 +41,7 @@ def output_hist(out): class TestNPUUniformRandomOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "uniform_random" @@ -69,12 +71,12 @@ class TestNPUUniformRandomOp(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestNPUUniformRandomOpSelectedRows(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_npu(): @@ -89,19 +91,17 @@ class TestNPUUniformRandomOpSelectedRows(unittest.TestCase): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.seed(10) - op = Operator( - "uniform_random", - Out="X", - shape=[1000, 784], - min=-5.0, - max=10.0, - seed=10) + op = Operator("uniform_random", + Out="X", + shape=[1000, 784], + min=-5.0, + max=10.0, + seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/npu/test_unsqueeze_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_unsqueeze_op_npu.py index cebfed1629a..3f7783d4959 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_unsqueeze_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_unsqueeze_op_npu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -29,6 +30,7 @@ paddle.enable_static() # unsqueeze class TestUnsqueezeOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "unsqueeze" @@ -37,7 +39,9 @@ class TestUnsqueezeOp(OpTest): self.x = np.random.random(self.ori_shape).astype("float32") self.inputs = {"X": OpTest.np_dtype_to_fluid_dtype(self.x)} self.init_attrs() - self.outputs = {"Out": self.x.reshape(self.new_shape), } + self.outputs = { + "Out": self.x.reshape(self.new_shape), + } def set_npu(self): self.__class__.use_npu = True @@ -58,6 +62,7 @@ class TestUnsqueezeOp(OpTest): class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (3, 40) self.axes = (0, -2) @@ -66,22 +71,25 @@ class TestUnsqueezeOp1(TestUnsqueezeOp): # No axes input. class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = () self.new_shape = (1, 20, 5) -# Just part of axes be squeezed. +# Just part of axes be squeezed. class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (6, 5, 1, 4) self.axes = (1, -1) self.new_shape = (6, 1, 5, 1, 4, 1) -# unsqueeze 2 +# unsqueeze 2 class TestUnsqueeze2Op(OpTest): + def setUp(self): self.set_npu() self.op_type = "unsqueeze2" @@ -115,6 +123,7 @@ class TestUnsqueeze2Op(OpTest): # Correct: There is mins axis. class TestUnsqueeze2Op1(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -2) @@ -123,14 +132,16 @@ class TestUnsqueeze2Op1(TestUnsqueeze2Op): # Correct: No axes input. class TestUnsqueeze2Op2(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = () self.new_shape = (1, 20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestUnsqueeze2Op3(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (6, 5, 1, 4) self.axes = (1, -1) diff --git a/python/paddle/fluid/tests/unittests/npu/test_unstack_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_unstack_op_npu.py index 097f31c7246..32519e3e4b6 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_unstack_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_unstack_op_npu.py @@ -16,6 +16,7 @@ from __future__ import print_function import numpy as np import sys + sys.path.append("..") from op_test import OpTest import unittest @@ -25,6 +26,7 @@ paddle.enable_static() class TestUnStackOpBase(OpTest): + def initDefaultParameters(self): self.input_dim = (5, 6, 7) self.axis = 0 @@ -74,21 +76,25 @@ class TestUnStackOpBase(OpTest): class TestStackOp3(TestUnStackOpBase): + def initParameters(self): self.axis = -1 class TestStackOp4(TestUnStackOpBase): + def initParameters(self): self.axis = -3 class TestStackOp5(TestUnStackOpBase): + def initParameters(self): self.axis = 1 class TestStackOp6(TestUnStackOpBase): + def initParameters(self): self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_min_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_min_op_npu.py index 18e2db7f6b1..21be9e295d2 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_min_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_min_op_npu.py @@ -16,6 +16,7 @@ import unittest import numpy as np import sys import os + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ SEED = 2021 class TestUpdateLossScalingOpMinLossScalingBad(TestUpdateLossScalingOpBad): + def setUp(self): self.set_npu() self.op_type = "update_loss_scaling" diff --git a/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_op_npu.py index 1388adf609f..5299369ff17 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_update_loss_scaling_op_npu.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -26,6 +27,7 @@ SEED = 2021 class TestUpdateLossScalingOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "update_loss_scaling" @@ -73,6 +75,7 @@ class TestUpdateLossScalingOp(OpTest): class TestUpdateLossScalingOpBad(TestUpdateLossScalingOp): + def setUp(self): self.set_npu() self.op_type = "update_loss_scaling" @@ -102,17 +105,21 @@ class TestUpdateLossScalingOpBad(TestUpdateLossScalingOp): class TestUpdateLossScalingLayer(unittest.TestCase): + def loss_scaling_check(self, use_npu=True, scope=fluid.Scope()): a = fluid.data(name="a", shape=[1024, 1024], dtype='float32') b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') - prev_loss_scaling = fluid.data( - name="prev_loss_scaling", shape=[1], dtype='float32') - num_good_steps = fluid.data( - name="num_good_steps", shape=[1], dtype='int32') - num_bad_steps = fluid.data( - name="num_bad_steps", shape=[1], dtype='int32') + prev_loss_scaling = fluid.data(name="prev_loss_scaling", + shape=[1], + dtype='float32') + num_good_steps = fluid.data(name="num_good_steps", + shape=[1], + dtype='int32') + num_bad_steps = fluid.data(name="num_bad_steps", + shape=[1], + dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') @@ -126,17 +133,16 @@ class TestUpdateLossScalingLayer(unittest.TestCase): incr_ratio = 2 decr_ratio = 0.8 - result = amp_nn.update_loss_scaling( - x, - found_inf, - prev_loss_scaling, - num_good_steps, - num_bad_steps, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - name="update_loss_scaling") + result = amp_nn.update_loss_scaling(x, + found_inf, + prev_loss_scaling, + num_good_steps, + num_bad_steps, + incr_every_n_steps, + decr_every_n_nan_or_inf, + incr_ratio, + decr_ratio, + name="update_loss_scaling") place = paddle.NPUPlace(0) if use_npu else fluid.CPUPlace() exe = fluid.Executor(place) @@ -168,12 +174,15 @@ class TestUpdateLossScalingLayer(unittest.TestCase): b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') - prev_loss_scaling = fluid.data( - name="prev_loss_scaling", shape=[1], dtype='float32') - num_good_steps = fluid.data( - name="num_good_steps", shape=[1], dtype='int32') - num_bad_steps = fluid.data( - name="num_bad_steps", shape=[1], dtype='int32') + prev_loss_scaling = fluid.data(name="prev_loss_scaling", + shape=[1], + dtype='float32') + num_good_steps = fluid.data(name="num_good_steps", + shape=[1], + dtype='int32') + num_bad_steps = fluid.data(name="num_bad_steps", + shape=[1], + dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') @@ -190,17 +199,16 @@ class TestUpdateLossScalingLayer(unittest.TestCase): incr_ratio = 2 decr_ratio = 0.8 - result = amp_nn.update_loss_scaling( - x, - found_inf, - prev_loss_scaling, - num_good_steps, - num_bad_steps, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - name="update_loss_scaling") + result = amp_nn.update_loss_scaling(x, + found_inf, + prev_loss_scaling, + num_good_steps, + num_bad_steps, + incr_every_n_steps, + decr_every_n_nan_or_inf, + incr_ratio, + decr_ratio, + name="update_loss_scaling") place = paddle.NPUPlace(0) if use_npu else fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py b/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py index 20d7fb6879d..6790afc9af0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py @@ -18,6 +18,7 @@ import numpy as np import unittest import paddle import sys + sys.path.append("..") from op_test import OpTest from paddle.fluid.op import Operator @@ -28,6 +29,7 @@ paddle.enable_static() class TestWhereIndexOp(OpTest): + def setUp(self): self.set_npu() self.op_type = "where_index" @@ -38,7 +40,9 @@ class TestWhereIndexOp(OpTest): self.check_output_with_place(self.place) def init_config(self): - self.inputs = {'Condition': np.array([True, False, True]), } + self.inputs = { + 'Condition': np.array([True, False, True]), + } self.outputs = {'Out': np.array([[0], [2]], dtype='int64')} @@ -47,42 +51,54 @@ class TestWhereIndexOp(OpTest): class TestNotBool(TestWhereIndexOp): + def init_config(self): - self.inputs = {'Condition': np.array([1, 0, 8]), } + self.inputs = { + 'Condition': np.array([1, 0, 8]), + } self.outputs = {'Out': np.array([[0], [2]], dtype='int64')} class TestAllFalse(TestWhereIndexOp): + def init_config(self): - self.inputs = {'Condition': np.array([False, False, False]), } + self.inputs = { + 'Condition': np.array([False, False, False]), + } self.outputs = {'Out': np.array([], dtype='int64')} class TestRank2(TestWhereIndexOp): + def init_config(self): - self.inputs = {'Condition': np.array([[True, False], [False, True]]), } + self.inputs = { + 'Condition': np.array([[True, False], [False, True]]), + } self.outputs = {'Out': np.array([[0, 0], [1, 1]], dtype='int64')} class TestRank3(TestWhereIndexOp): + def init_config(self): self.inputs = { - 'Condition': np.array([[[True, False], [False, True]], - [[False, True], [True, False]], - [[False, False], [False, True]]]), + 'Condition': + np.array([[[True, False], [False, True]], + [[False, True], [True, False]], + [[False, False], [False, True]]]), } self.outputs = { - 'Out': np.array( - [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [2, 1, 1]], - dtype='int64') + 'Out': + np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [2, 1, 1]], + dtype='int64') } class TestWhereOpError(unittest.TestCase): + def test_api(self): with program_guard(Program(), Program()): cond = fluid.layers.data(name='cond', shape=[4], dtype='bool') @@ -95,7 +111,9 @@ class TestWhereOpError(unittest.TestCase): class TestWhereRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.where([10]) diff --git a/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py index cf877ff2872..c90bf0cb493 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ from __future__ import print_function, division import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest import paddle @@ -28,6 +29,7 @@ paddle.enable_static() class TestNPUWhereOp(OpTest): + def setUp(self): self.op_type = "where" self.set_npu() @@ -52,6 +54,7 @@ class TestNPUWhereOp(OpTest): class TestNPUWhereOp2(TestNPUWhereOp): + def init_config(self): self.x = np.random.uniform(-5, 5, (60, 2)).astype("float64") self.y = np.random.uniform(-5, 5, (60, 2)).astype("float64") @@ -59,6 +62,7 @@ class TestNPUWhereOp2(TestNPUWhereOp): class TestNPUWhereOp3(TestNPUWhereOp): + def init_config(self): self.x = np.random.uniform(-3, 5, (20, 2, 4)).astype("float64") self.y = np.random.uniform(-3, 5, (20, 2, 4)).astype("float64") @@ -66,6 +70,7 @@ class TestNPUWhereOp3(TestNPUWhereOp): class TestNPUWhereAPI(unittest.TestCase): + def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) @@ -90,8 +95,9 @@ class TestNPUWhereAPI(unittest.TestCase): train_prog = fluid.Program() startup = fluid.Program() with fluid.program_guard(train_prog, startup): - cond = fluid.data( - name='cond', shape=self.shape, dtype='bool') + cond = fluid.data(name='cond', + shape=self.shape, + dtype='bool') x = fluid.data(name='x', shape=self.shape, dtype='float32') y = fluid.data(name='y', shape=self.shape, dtype='float32') @@ -109,12 +115,13 @@ class TestNPUWhereAPI(unittest.TestCase): fetch_list.append(x.grad_name) if y_stop_gradient is False: fetch_list.append(y.grad_name) - out = exe.run( - train_prog, - feed={'cond': self.cond, - 'x': self.x, - 'y': self.y}, - fetch_list=fetch_list) + out = exe.run(train_prog, + feed={ + 'cond': self.cond, + 'x': self.x, + 'y': self.y + }, + fetch_list=fetch_list) assert np.array_equal(out[0], self.out) if x_stop_gradient is False: @@ -134,21 +141,24 @@ class TestNPUWhereAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32') y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32') x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype("float32") - y_i = np.array([[1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0]]).astype("float32") + y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, + 1.0]]).astype("float32") result = paddle.where(x > 1, x=x, y=y) exe = fluid.Executor(self.place) exe.run(startup) out = exe.run(train_prog, - feed={'x': x_i, - 'y': y_i}, + feed={ + 'x': x_i, + 'y': y_i + }, fetch_list=[result]) assert np.array_equal(out[0], np.where(x_i > 1, x_i, y_i)) class TestWhereDygraphAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(paddle.NPUPlace(0)): x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float64") diff --git a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py index a388761d5e3..22918347a2d 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py @@ -28,13 +28,20 @@ paddle.enable_static() class TestWhileOp(unittest.TestCase): + def simple_net(self): - d0 = layers.data( - "d0", shape=[10], append_batch_size=False, dtype='float32') - d1 = layers.data( - "d1", shape=[10], append_batch_size=False, dtype='float32') - d2 = layers.data( - "d2", shape=[10], append_batch_size=False, dtype='float32') + d0 = layers.data("d0", + shape=[10], + append_batch_size=False, + dtype='float32') + d1 = layers.data("d1", + shape=[10], + append_batch_size=False, + dtype='float32') + d2 = layers.data("d2", + shape=[10], + append_batch_size=False, + dtype='float32') # fill_constant npu op doesn't support int64 i = layers.zeros(shape=[1], dtype='int32') i = layers.cast(i, 'int64') @@ -102,9 +109,11 @@ class TestWhileOp(unittest.TestCase): for i in range(3): d.append(numpy.random.random(size=[10]).astype('float32')) - outs = exe.run(feed={'d0': d[0], - 'd1': d[1], - 'd2': d[2]}, + outs = exe.run(feed={ + 'd0': d[0], + 'd1': d[1], + 'd2': d[2] + }, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index fe1dbf3b927..ded9f188472 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -46,14 +46,16 @@ from testsuite import ( create_op, set_input, append_input_output, - append_loss_ops, ) + append_loss_ops, +) from white_list import ( op_accuracy_white_list, check_shape_white_list, compile_vs_runtime_white_list, no_check_set_white_list, op_threshold_white_list, - no_grad_set_white_list, ) + no_grad_set_white_list, +) # For switch new eager mode globally g_is_in_eager = _in_eager_without_dygraph_check() @@ -88,11 +90,12 @@ def check_out_dtype(api_fn, in_specs, expect_dtypes, target_index=0, **configs): shape, dtype = spec else: raise ValueError( - "Value of in_specs[{}] should contains two elements: [shape, dtype]". - format(index)) + "Value of in_specs[{}] should contains two elements: [shape, dtype]" + .format(index)) input_t.append( - paddle.static.data( - name='data_%s' % index, shape=shape, dtype=dtype)) + paddle.static.data(name='data_%s' % index, + shape=shape, + dtype=dtype)) out = api_fn(*input_t, **configs) out_dtype = fluid.data_feeder.convert_dtype(out.dtype) @@ -112,8 +115,8 @@ def _set_use_system_allocator(value=None): def randomize_probability(batch_size, class_num, dtype='float32'): - prob = np.random.uniform( - 0.1, 1.0, size=(batch_size, class_num)).astype(dtype) + prob = np.random.uniform(0.1, 1.0, + size=(batch_size, class_num)).astype(dtype) prob_sum = prob.sum(axis=1) for i in six.moves.xrange(len(prob)): prob[i] /= prob_sum[i] @@ -152,8 +155,9 @@ def get_numeric_gradient(place, elif tensor_to_check_dtype == core.VarDesc.VarType.COMPLEX128: tensor_tp_check_dtype = np.complex128 else: - raise ValueError("Not supported data type " + str(tensor_to_check_dtype) - + ", tensor name : " + str(input_to_check)) + raise ValueError("Not supported data type " + + str(tensor_to_check_dtype) + ", tensor name : " + + str(input_to_check)) def get_output(): sum = [] @@ -178,10 +182,10 @@ def get_numeric_gradient(place, elif tensor_to_check._dtype() == core.VarDesc.VarType.BF16: numpy_tensor = np.array(tensor).astype(np.uint16) numpy_tensor = numpy_tensor.flatten() - return struct.unpack('= class_interval[i], unique_label < class_interval[i + 1]) - pos_class_center_per_device.append(unique_label[index] - class_interval[ - i]) + pos_class_center_per_device.append(unique_label[index] - + class_interval[i]) unique_label_per_device.append(unique_label[index]) num_samples_per_device = [] @@ -67,6 +67,7 @@ def class_center_sample_numpy(label, classes_list, num_samples): class TestParallelClassCenterSampleOp(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() fleet.init(is_collective=True, strategy=strategy) @@ -90,8 +91,9 @@ class TestParallelClassCenterSampleOp(unittest.TestCase): classes_list = np.random.randint(10, 15, (nranks, )) num_class = np.sum(classes_list) - np_label = np.random.randint( - 0, num_class, (batch_size, ), dtype=dtype) + np_label = np.random.randint(0, + num_class, (batch_size, ), + dtype=dtype) label = paddle.to_tensor(np_label, dtype=dtype) np_remapped_label, np_sampled_class_center_per_device = class_center_sample_numpy( np_label, classes_list, num_samples) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_different.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_different.py index 26c9944abd6..a05de58363c 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_different.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_different.py @@ -28,14 +28,14 @@ np.random.seed(2021) class SimpleNet(fluid.Layer): + def __init__(self, hidden_size, vocab_size, is_sparse=False): super(SimpleNet, self).__init__() self.hidden_size = hidden_size self.vocab_size = vocab_size - self.embedding = Embedding( - size=[self.vocab_size, self.hidden_size], - dtype='float32', - is_sparse=is_sparse) + self.embedding = Embedding(size=[self.vocab_size, self.hidden_size], + dtype='float32', + is_sparse=is_sparse) self.lin_a = paddle.nn.Linear(self.hidden_size, self.vocab_size) self.lin_b = paddle.nn.Linear(self.vocab_size, 1) @@ -60,8 +60,10 @@ class SimpleNet(fluid.Layer): projection = paddle.reshape(projection, shape=[-1, 1]) output = paddle.gather(projection, emb_mask_inds) target = paddle.gather(label, emb_mask_inds) - loss_box = F.smooth_l1_loss( - output, target, reduction='sum', delta=1.0) + loss_box = F.smooth_l1_loss(output, + target, + reduction='sum', + delta=1.0) loss_box = loss_box / len(conf) return loss_box @@ -73,29 +75,33 @@ batch_num = 2000 hidden_size = 5 vocab_size = 100 -conf_dataset = [[0], [0], [0], [0], [1], [0], [1], [0], [0], [1], [0], [1], - [1], [1], [1], [1], [1], [1], [1], [1], [1], [0], [0], [1]] +conf_dataset = [[0], [0], [0], [0], [1], [0], [1], [0], [0], [1], [0], [1], [1], + [1], [1], [1], [1], [1], [1], [1], [1], [0], [0], [1]] def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.randint(0, vocab_size) y_data = np.random.random_sample((1, )).astype('float32') - conf_data = np.array(conf_dataset[i % len(conf_dataset)]).astype( - 'int64') + conf_data = np.array( + conf_dataset[i % len(conf_dataset)]).astype('int64') yield x_data, y_data, conf_data return __reader__ class TestSimpleNet(TestParallelDyGraphRunnerBase): + def get_model(self): - model = SimpleNet( - hidden_size=hidden_size, vocab_size=vocab_size, is_sparse=False) + model = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + is_sparse=False) - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_same.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_same.py index 3157d5e4129..4f780925389 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_same.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_control_flow_same.py @@ -36,14 +36,15 @@ batch_num = 1000 class SimpleNet(fluid.Layer): + def __init__(self): super(SimpleNet, self).__init__() - self.net_a = paddle.nn.Sequential( - paddle.nn.Linear(10, 20), - paddle.nn.Linear(20, 20), paddle.nn.Linear(20, 5)) - self.net_b = paddle.nn.Sequential( - paddle.nn.Linear(10, 20), - paddle.nn.Linear(20, 20), paddle.nn.Linear(20, 5)) + self.net_a = paddle.nn.Sequential(paddle.nn.Linear(10, 20), + paddle.nn.Linear(20, 20), + paddle.nn.Linear(20, 5)) + self.net_b = paddle.nn.Sequential(paddle.nn.Linear(10, 20), + paddle.nn.Linear(20, 20), + paddle.nn.Linear(20, 5)) self.net_unused = Linear(10, 20) self.step = 0 @@ -57,6 +58,7 @@ class SimpleNet(fluid.Layer): def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.random_sample((10, )).astype('float32') @@ -66,10 +68,12 @@ def fake_sample_reader(): class TestSimpleNet(TestParallelDyGraphRunnerBase): + def get_model(self): model = SimpleNet() - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) return model, train_reader, optimizer diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py index 8ce2275868b..049c3a0858a 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_with_pylayer.py @@ -31,6 +31,7 @@ out_dim = 10 class cus_tanh(PyLayer): + @staticmethod def forward(ctx, x): y = paddle.tanh(x) @@ -45,6 +46,7 @@ class cus_tanh(PyLayer): class cus_tanh_eager(EagerPyLayer): + @staticmethod def forward(ctx, x): y = paddle.tanh(x) @@ -59,6 +61,7 @@ class cus_tanh_eager(EagerPyLayer): class SimpleNet(paddle.nn.Layer): + def __init__(self, train_id, model_id): super(SimpleNet, self).__init__() self.w = self.create_parameter(shape=[in_dim, batch], dtype="float32") @@ -82,6 +85,7 @@ class SimpleNet(paddle.nn.Layer): class TestDistTraning(unittest.TestCase): + def test_multiple_gpus(self): self.trainer_id = dist.get_rank() dist.init_parallel_env() diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py index 781d606f33b..b4f94559504 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py @@ -33,16 +33,17 @@ out_dim = 20 class SimpleNet(fluid.Layer): + def __init__(self, train_id): super(SimpleNet, self).__init__() - self.w1 = self.create_parameter( - shape=[in_dim, out_dim], dtype="float32") - self.w2 = self.create_parameter( - shape=[in_dim, out_dim], dtype="float32") + self.w1 = self.create_parameter(shape=[in_dim, out_dim], + dtype="float32") + self.w2 = self.create_parameter(shape=[in_dim, out_dim], + dtype="float32") self.share_net = Linear(out_dim, 10) - self.unused_param = self.create_parameter( - shape=[out_dim, in_dim], dtype="float64") + self.unused_param = self.create_parameter(shape=[out_dim, in_dim], + dtype="float64") # just for test sync_params_buffers self.register_buffer("queue", paddle.randn([10, 5])) @@ -53,8 +54,8 @@ class SimpleNet(fluid.Layer): def forward(self, x): is_use = (paddle.equal_all( - x, paddle.ones(shape=(batch, in_dim))).numpy()[0] and - self.trainer_id == 1) + x, paddle.ones(shape=(batch, in_dim))).numpy()[0] + and self.trainer_id == 1) if is_use: tmp = paddle.matmul(x, self.w1) @@ -65,6 +66,7 @@ class SimpleNet(fluid.Layer): class TestDistTraning(unittest.TestCase): + def test_multiple_gpus(self): dist.init_parallel_env() self.trainer_id = dist.get_rank() diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py index db41236dd5c..debc9e90e07 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py @@ -37,16 +37,17 @@ out_dim = 20 class SimpleNet(fluid.Layer): + def __init__(self, train_id): super(SimpleNet, self).__init__() - self.w1 = self.create_parameter( - shape=[in_dim, out_dim], dtype="float32") - self.w2 = self.create_parameter( - shape=[in_dim, out_dim], dtype="float32") + self.w1 = self.create_parameter(shape=[in_dim, out_dim], + dtype="float32") + self.w2 = self.create_parameter(shape=[in_dim, out_dim], + dtype="float32") self.share_net = Linear(out_dim, 10) - self.unused_param = self.create_parameter( - shape=[out_dim, in_dim], dtype="float64") + self.unused_param = self.create_parameter(shape=[out_dim, in_dim], + dtype="float64") # just for test sync_params_buffers # self.register_buffer("queue", paddle.randn([10, 5])) @@ -57,8 +58,8 @@ class SimpleNet(fluid.Layer): def forward(self, x): is_use = (paddle.equal_all( - x, paddle.ones(shape=(batch, in_dim))).numpy()[0] and - self.trainer_id == 1) + x, paddle.ones(shape=(batch, in_dim))).numpy()[0] + and self.trainer_id == 1) if is_use: tmp = paddle.matmul(x, self.w1) @@ -69,6 +70,7 @@ class SimpleNet(fluid.Layer): class TestDistTraning(unittest.TestCase): + def test_multiple_gpus(self): self.trainer_id = dist.get_rank() with _test_eager_guard(): @@ -80,10 +82,12 @@ class TestDistTraning(unittest.TestCase): state_dict = model_a.state_dict() model_b.set_state_dict(state_dict) - model_a = paddle.DataParallel( - model_a, find_unused_parameters=True, group=self.pg) - model_b = paddle.DataParallel( - model_b, find_unused_parameters=True, group=self.pg) + model_a = paddle.DataParallel(model_a, + find_unused_parameters=True, + group=self.pg) + model_b = paddle.DataParallel(model_b, + find_unused_parameters=True, + group=self.pg) ones_input = paddle.ones(shape=(batch, in_dim)) ones_input.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py index b083e76897c..93ca1fa5b56 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py @@ -33,6 +33,7 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -52,25 +53,23 @@ class SimpleImgConvPool(fluid.dygraph.Layer): bias_attr=None): super(SimpleImgConvPool, self).__init__() - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) + self._conv2d = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D(pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) def forward(self, inputs): x = self._conv2d(inputs) @@ -79,25 +78,33 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer): + def __init__(self): super(MNIST, self).__init__() - self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") + self._simple_img_conv_pool_1 = SimpleImgConvPool(1, + 20, + 5, + 2, + 2, + act="relu") - self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") + self._simple_img_conv_pool_2 = SimpleImgConvPool(20, + 50, + 5, + 2, + 2, + act="relu") self.pool_2_shape = 50 * 4 * 4 SIZE = 10 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 - self._fc = Linear( - self.pool_2_shape, - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") + self._fc = Linear(self.pool_2_shape, + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") def forward(self, inputs, label): x = self._simple_img_conv_pool_1(inputs) @@ -110,20 +117,22 @@ class MNIST(fluid.dygraph.Layer): class TestMnist(TestParallelDyGraphRunnerBase): + def get_model(self): model = MNIST() - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=2, drop_last=True) - opt = paddle.optimizer.Adam( - learning_rate=1e-3, parameters=model.parameters()) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=2, + drop_last=True) + opt = paddle.optimizer.Adam(learning_rate=1e-3, + parameters=model.parameters()) return model, train_reader, opt def run_one_loop(self, model, opt, data): batch_size = len(data) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(batch_size, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(batch_size, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync.py index 9a3b5ee2f0f..5544ad1da16 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync.py @@ -39,6 +39,7 @@ batch_num = 1000 class SimpleNet(fluid.Layer): + def __init__(self): super(SimpleNet, self).__init__() self.net_a = Linear(input_dim=10, output_dim=20) @@ -53,10 +54,12 @@ class SimpleNet(fluid.Layer): class TestNoSync(TestParallelDyGraphRunnerBase): + def get_model(self): model = SimpleNet() - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) return model, train_reader, optimizer @@ -146,6 +149,7 @@ class TestNoSync(TestParallelDyGraphRunnerBase): def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.random_sample((10, )).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_control_flow.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_control_flow.py index 8b3e1b9aedd..b33ef5165fe 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_control_flow.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_control_flow.py @@ -38,6 +38,7 @@ batch_num = 1000 class SimpleNetControlFlow(fluid.Layer): + def __init__(self): super(SimpleNetControlFlow, self).__init__() self.net_a = Linear(input_dim=10, output_dim=20) @@ -56,10 +57,12 @@ class SimpleNetControlFlow(fluid.Layer): class TestNoSyncControlFlow(TestNoSync): + def get_model(self): model = SimpleNetControlFlow() - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) return model, train_reader, optimizer @@ -74,6 +77,7 @@ class TestNoSyncControlFlow(TestNoSync): def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.random_sample((10, )).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_gradient_check.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_gradient_check.py index 642ea14d8a8..f7da6cb2aae 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_gradient_check.py @@ -32,16 +32,17 @@ out_dim = 20 class SimpleNet(fluid.Layer): + def __init__(self, train_id): super(SimpleNet, self).__init__() - self.w1 = self.create_parameter( - shape=[in_dim, out_dim], dtype="float32") - self.w2 = self.create_parameter( - shape=[in_dim, out_dim], dtype="float32") + self.w1 = self.create_parameter(shape=[in_dim, out_dim], + dtype="float32") + self.w2 = self.create_parameter(shape=[in_dim, out_dim], + dtype="float32") self.share_net = Linear(out_dim, 1) - self.unused_param = self.create_parameter( - shape=[out_dim, in_dim], dtype="float32") + self.unused_param = self.create_parameter(shape=[out_dim, in_dim], + dtype="float32") # just for test sync_params_buffers self.register_buffer("queue", paddle.randn([10, 5])) @@ -52,8 +53,8 @@ class SimpleNet(fluid.Layer): def forward(self, x): is_use = (paddle.equal_all( - x, paddle.ones(shape=(batch, in_dim))).numpy()[0] and - self.trainer_id == 1) + x, paddle.ones(shape=(batch, in_dim))).numpy()[0] + and self.trainer_id == 1) if is_use: tmp = paddle.matmul(x, self.w1) @@ -64,6 +65,7 @@ class SimpleNet(fluid.Layer): class TestDistTraning(unittest.TestCase): + def test_multiple_gpus(self): self.trainer_id = dist.get_rank() dist.init_parallel_env() diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_unused_params.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_unused_params.py index 5aecf13bc15..9f28e2ce4c5 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_unused_params.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync_unused_params.py @@ -38,6 +38,7 @@ batch_num = 1000 class SimpleNetUnusedParam(fluid.Layer): + def __init__(self): super(SimpleNetUnusedParam, self).__init__() self.net_a = Linear(input_dim=10, output_dim=20) @@ -55,10 +56,12 @@ class SimpleNetUnusedParam(fluid.Layer): class TestNoSyncUnusedParam(TestNoSync): + def get_model(self): model = SimpleNetUnusedParam() - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) return model, train_reader, optimizer @@ -73,6 +76,7 @@ class TestNoSyncUnusedParam(TestNoSync): def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.random_sample((10, )).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py index fc0246a9720..7c8c40850d9 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_none_var.py @@ -35,14 +35,15 @@ batch_num = 1000 class SimpleNet(fluid.Layer): + def __init__(self): super(SimpleNet, self).__init__() - self.net_a = paddle.nn.Sequential( - paddle.nn.Linear(10, 20), - paddle.nn.Linear(20, 20), paddle.nn.Linear(20, 5)) - self.net_b = paddle.nn.Sequential( - paddle.nn.Linear(10, 20), - paddle.nn.Linear(20, 20), paddle.nn.Linear(20, 5)) + self.net_a = paddle.nn.Sequential(paddle.nn.Linear(10, 20), + paddle.nn.Linear(20, 20), + paddle.nn.Linear(20, 5)) + self.net_b = paddle.nn.Sequential(paddle.nn.Linear(10, 20), + paddle.nn.Linear(20, 20), + paddle.nn.Linear(20, 5)) self.step = 0 def forward(self, x): @@ -50,6 +51,7 @@ class SimpleNet(fluid.Layer): def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.random_sample((10, )).astype('float32') @@ -59,10 +61,12 @@ def fake_sample_reader(): class TestSimpleNet(TestParallelDyGraphRunnerBase): + def get_model(self): model = SimpleNet() - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) return model, train_reader, optimizer diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py index 5e50e915d73..6ee04dd342b 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py @@ -68,15 +68,17 @@ def optimizer_setting(params, parameter_list=None): num_epochs = params["num_epochs"] if fluid._non_static_mode(): optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.cosine_decay( - learning_rate=lr, step_each_epoch=step, epochs=num_epochs), + learning_rate=fluid.layers.cosine_decay(learning_rate=lr, + step_each_epoch=step, + epochs=num_epochs), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay), parameter_list=parameter_list) else: optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.cosine_decay( - learning_rate=lr, step_each_epoch=step, epochs=num_epochs), + learning_rate=fluid.layers.cosine_decay(learning_rate=lr, + step_each_epoch=step, + epochs=num_epochs), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay)) @@ -84,6 +86,7 @@ def optimizer_setting(params, parameter_list=None): class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -93,15 +96,14 @@ class ConvBNLayer(fluid.dygraph.Layer): act=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) # disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape # self._batch_norm = BatchNorm(num_filters, act=act) @@ -114,6 +116,7 @@ class ConvBNLayer(fluid.dygraph.Layer): class SqueezeExcitation(fluid.dygraph.Layer): + def __init__(self, num_channels, reduction_ratio): super(SqueezeExcitation, self).__init__() @@ -144,6 +147,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -153,33 +157,29 @@ class BottleneckBlock(fluid.dygraph.Layer): shortcut=True): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu") - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality, - act="relu") - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 2, - filter_size=1, - act=None) - - self.scale = SqueezeExcitation( - num_channels=num_filters * 2, reduction_ratio=reduction_ratio) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu") + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act="relu") + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 2, + filter_size=1, + act=None) + + self.scale = SqueezeExcitation(num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 2, - filter_size=1, - stride=stride) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 2, + filter_size=1, + stride=stride) self.shortcut = shortcut @@ -201,6 +201,7 @@ class BottleneckBlock(fluid.dygraph.Layer): class SeResNeXt(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=102): super(SeResNeXt, self).__init__() @@ -214,52 +215,53 @@ class SeResNeXt(fluid.dygraph.Layer): reduction_ratio = 16 depth = [3, 4, 6, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 101: cardinality = 32 reduction_ratio = 16 depth = [3, 4, 23, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 152: cardinality = 64 reduction_ratio = 16 depth = [3, 8, 36, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=3, - stride=2, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=64, - num_filters=64, - filter_size=3, - stride=1, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=64, - num_filters=128, - filter_size=3, - stride=1, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu') + self.conv1 = ConvBNLayer(num_channels=64, + num_filters=64, + filter_size=3, + stride=1, + act='relu') + self.conv2 = ConvBNLayer(num_channels=64, + num_filters=128, + filter_size=3, + stride=1, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] num_channels = 64 @@ -268,19 +270,19 @@ class SeResNeXt(fluid.dygraph.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio, - shortcut=shortcut)) + BottleneckBlock(num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio, + shortcut=shortcut)) num_channels = bottleneck_block._num_channels_out self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) stdv = 1.0 / math.sqrt(2048 * 1.0) self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1 @@ -310,14 +312,14 @@ class SeResNeXt(fluid.dygraph.Layer): class TestSeResNeXt(TestParallelDyGraphRunnerBase): + def get_model(self): model = SeResNeXt() - train_reader = paddle.batch( - paddle.dataset.flowers.test(use_xmap=False), - batch_size=train_parameters["batch_size"], - drop_last=True) - optimizer = optimizer_setting( - train_parameters, parameter_list=model.parameters()) + train_reader = paddle.batch(paddle.dataset.flowers.test(use_xmap=False), + batch_size=train_parameters["batch_size"], + drop_last=True) + optimizer = optimizer_setting(train_parameters, + parameter_list=model.parameters()) return model, train_reader, optimizer def run_one_loop(self, model, opt, data): diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py index facac33e4c6..a0906383a4f 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py @@ -28,6 +28,7 @@ paddle.seed(1024) class SimpleNet(fluid.Layer): + def __init__(self): # bias is unused parameters, and it share with net_a super(SimpleNet, self).__init__() @@ -44,6 +45,7 @@ batch_num = 1000 def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.random.random_sample((10, )).astype('float32') @@ -53,10 +55,12 @@ def fake_sample_reader(): class TestSimpleNet(TestParallelDyGraphRunnerBase): + def get_model(self): model = SimpleNet() - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) return model, train_reader, optimizer diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index 33ae0acf43d..53cc8b12677 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -25,6 +25,7 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase class SimpleNet(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -62,8 +63,9 @@ class SimpleNet(fluid.Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = fluid.layers.reshape(fc, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -81,6 +83,7 @@ init_scale = 0.1 def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.arange(num_steps).astype('int64') @@ -91,16 +94,17 @@ def fake_sample_reader(): class TestSparseEmbedding(TestParallelDyGraphRunnerBase): + def get_model(self): - model = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=True) - - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + model = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=True) + + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py index b341a227285..e6ef94f7bb0 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py @@ -19,10 +19,12 @@ import paddle from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase from paddle.nn import Layer, Embedding + paddle.set_default_dtype("float64") class SimpleNet(Layer): + def __init__(self, hidden_size, vocab_size, @@ -40,8 +42,8 @@ class SimpleNet(Layer): self.hidden_size, sparse=True, weight_attr=paddle.ParamAttr( - initializer=paddle.nn.initializer.Uniform( - low=-init_scale, high=init_scale))) + initializer=paddle.nn.initializer.Uniform(low=-init_scale, + high=init_scale))) self.softmax_weight = self.create_parameter( attr=paddle.ParamAttr(), shape=[self.hidden_size, self.vocab_size], @@ -85,6 +87,7 @@ init_scale = 0.1 def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.arange(num_steps).astype('int64') @@ -95,16 +98,17 @@ def fake_sample_reader(): class TestSparseEmbeddingFP64(TestParallelDyGraphRunnerBase): + def get_model(self): - model = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=True) - - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + model = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=True) + + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py index 61749a24c98..7d3ef413f13 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_over_height.py @@ -31,16 +31,17 @@ init_scale = 0.1 class TestSparseEmbeddingOverHeight(TestSparseEmbedding): + def get_model(self): - model = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=True) - - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + model = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=True) + + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py index d525009fbd7..a8e099137a3 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py @@ -33,6 +33,7 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase class TestLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -42,28 +43,27 @@ class TestLayer(fluid.dygraph.Layer): act=None): super(TestLayer, self).__init__() - self._conv = Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False) + self._conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) self._sync_batch_norm = SyncBatchNorm(num_filters) - self._conv2 = Conv2D( - in_channels=num_filters, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False) + self._conv2 = Conv2D(in_channels=num_filters, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) - self._sync_batch_norm2 = SyncBatchNorm( - num_filters, weight_attr=False, bias_attr=False) + self._sync_batch_norm2 = SyncBatchNorm(num_filters, + weight_attr=False, + bias_attr=False) def forward(self, inputs): y = self._conv(inputs) @@ -75,14 +75,14 @@ class TestLayer(fluid.dygraph.Layer): class TestSyncBatchNorm(TestParallelDyGraphRunnerBase): + def get_model(self): model = TestLayer(3, 64, 7) - train_reader = paddle.batch( - paddle.dataset.flowers.test(use_xmap=False), - batch_size=32, - drop_last=True) - opt = fluid.optimizer.Adam( - learning_rate=1e-3, parameter_list=model.parameters()) + train_reader = paddle.batch(paddle.dataset.flowers.test(use_xmap=False), + batch_size=32, + drop_last=True) + opt = fluid.optimizer.Adam(learning_rate=1e-3, + parameter_list=model.parameters()) return model, train_reader, opt def run_one_loop(self, model, opt, data): diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_transformer.py index f149637641a..922c424e178 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_transformer.py @@ -162,32 +162,38 @@ input_descs = { # Names of word embedding table which might be reused for weight sharing. word_emb_param_names = ( "src_word_emb_table", - "trg_word_emb_table", ) + "trg_word_emb_table", +) # Names of position encoding table which will be initialized externally. pos_enc_param_names = ( "src_pos_enc_table", - "trg_pos_enc_table", ) + "trg_pos_enc_table", +) # separated inputs for different usages. encoder_data_input_fields = ( "src_word", "src_pos", - "src_slf_attn_bias", ) + "src_slf_attn_bias", +) decoder_data_input_fields = ( "trg_word", "trg_pos", "trg_slf_attn_bias", "trg_src_attn_bias", - "enc_output", ) + "enc_output", +) label_data_input_fields = ( "lbl_word", - "lbl_weight", ) + "lbl_weight", +) # In fast decoder, trg_pos (only containing the current time step) is generated # by ops and trg_slf_attn_bias is not needed. fast_decoder_data_input_fields = ( "trg_word", # "init_score", # "init_idx", - "trg_src_attn_bias", ) + "trg_src_attn_bias", +) def position_encoding_init(n_position, d_pos_vec): @@ -199,10 +205,10 @@ def position_encoding_init(n_position, d_pos_vec): num_timescales = channels // 2 log_timescale_increment = (np.log(float(1e4) / float(1)) / (num_timescales - 1)) - inv_timescales = np.exp(np.arange( - num_timescales)) * -log_timescale_increment - scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, - 0) + inv_timescales = np.exp( + np.arange(num_timescales)) * -log_timescale_increment + scaled_time = np.expand_dims(position, 1) * np.expand_dims( + inv_timescales, 0) signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) signal = np.pad(signal, [[0, 0], [0, np.mod(channels, 2)]], 'constant') position_enc = signal @@ -216,6 +222,7 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length, class PrePostProcessLayer(Layer): + def __init__(self, d_model, process_cmd, shape_len=None): super(PrePostProcessLayer, self).__init__() for cmd in process_cmd: @@ -244,6 +251,7 @@ class PrePostProcessLayer(Layer): class PositionwiseFeedForwardLayer(Layer): + def __init__(self, d_inner_hid, d_hid, dropout_rate): super(PositionwiseFeedForwardLayer, self).__init__() self._i2h = Linear(d_hid, d_inner_hid, act="relu") @@ -253,16 +261,16 @@ class PositionwiseFeedForwardLayer(Layer): def forward(self, x): hidden = self._i2h(x) if self._dropout_rate: - hidden = fluid.layers.dropout( - hidden, - dropout_prob=self._dropout_rate, - seed=ModelHyperParams.dropout_seed, - is_test=False) + hidden = fluid.layers.dropout(hidden, + dropout_prob=self._dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) out = self._h2o(hidden) return out class MultiHeadAttentionLayer(Layer): + def __init__(self, d_key, d_value, @@ -304,11 +312,10 @@ class MultiHeadAttentionLayer(Layer): transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) # scale dot product attention - product = fluid.layers.matmul( - x=transpose_q, - y=transpose_k, - transpose_y=True, - alpha=self._d_model**-0.5) + product = fluid.layers.matmul(x=transpose_q, + y=transpose_k, + transpose_y=True, + alpha=self._d_model**-0.5) if attn_bias is not None: product += attn_bias weights = fluid.layers.softmax(product) @@ -337,6 +344,7 @@ class MultiHeadAttentionLayer(Layer): class EncoderSubLayer(Layer): + def __init__(self, n_head, d_key, @@ -358,14 +366,16 @@ class EncoderSubLayer(Layer): self._preprocess_cmd, 3) self._multihead_attention_layer = MultiHeadAttentionLayer( d_key, d_value, d_model, n_head, attention_dropout) - self._postprocess_layer = PrePostProcessLayer( - d_model, self._postprocess_cmd, None) + self._postprocess_layer = PrePostProcessLayer(d_model, + self._postprocess_cmd, + None) self._preprocess_layer2 = PrePostProcessLayer(d_model, self._preprocess_cmd, 3) self._positionwise_feed_forward = PositionwiseFeedForwardLayer( d_inner_hid, d_model, relu_dropout) - self._postprocess_layer2 = PrePostProcessLayer( - d_model, self._postprocess_cmd, None) + self._postprocess_layer2 = PrePostProcessLayer(d_model, + self._postprocess_cmd, + None) def forward(self, enc_input, attn_bias): pre_process_multihead = self._preprocess_layer( @@ -385,6 +395,7 @@ class EncoderSubLayer(Layer): class EncoderLayer(Layer): + def __init__(self, n_layer, n_head, @@ -424,6 +435,7 @@ class EncoderLayer(Layer): class PrepareEncoderDecoderLayer(Layer): + def __init__(self, src_vocab_size, src_emb_dim, @@ -437,13 +449,13 @@ class PrepareEncoderDecoderLayer(Layer): self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate - self._input_emb = Embedding( - size=[src_vocab_size, src_emb_dim], - is_sparse=is_sparse, - padding_idx=0, - param_attr=fluid.ParamAttr( - name=word_emb_param_name, - initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5))) + self._input_emb = Embedding(size=[src_vocab_size, src_emb_dim], + is_sparse=is_sparse, + padding_idx=0, + param_attr=fluid.ParamAttr( + name=word_emb_param_name, + initializer=fluid.initializer.Normal( + 0., src_emb_dim**-0.5))) if pos_enc_param_name is pos_enc_param_names[0]: pos_inp = pos_inp1 @@ -459,8 +471,8 @@ class PrepareEncoderDecoderLayer(Layer): def forward(self, src_word, src_pos): src_word_emb = self._input_emb(src_word) - src_word_emb = fluid.layers.scale( - x=src_word_emb, scale=self._src_emb_dim**0.5) + src_word_emb = fluid.layers.scale(x=src_word_emb, + scale=self._src_emb_dim**0.5) # # TODO change this to fit dynamic length input src_pos_emb = self._pos_emb(src_pos) src_pos_emb.stop_gradient = True @@ -473,6 +485,7 @@ class PrepareEncoderDecoderLayer(Layer): class WrapEncoderLayer(Layer): + def __init__(self, src_vocab_size, max_length, @@ -515,6 +528,7 @@ class WrapEncoderLayer(Layer): class DecoderSubLayer(Layer): + def __init__(self, n_head, d_key, @@ -565,10 +579,11 @@ class DecoderSubLayer(Layer): postprocess_cmd, None) def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias): - pre_process_rlt = self._pre_process_layer( - None, dec_input, self._preprocess_cmd, self._prepostprcess_dropout) - slf_attn_output = self._multihead_attention_layer(pre_process_rlt, None, - None, slf_attn_bias) + pre_process_rlt = self._pre_process_layer(None, dec_input, + self._preprocess_cmd, + self._prepostprcess_dropout) + slf_attn_output = self._multihead_attention_layer( + pre_process_rlt, None, None, slf_attn_bias) slf_attn_output_pp = self._post_process_layer( dec_input, slf_attn_output, self._postprocess_cmd, self._prepostprcess_dropout) @@ -577,9 +592,10 @@ class DecoderSubLayer(Layer): self._prepostprcess_dropout) enc_attn_output_pp = self._multihead_attention_layer2( pre_process_rlt2, enc_output, enc_output, dec_enc_attn_bias) - enc_attn_output = self._post_process_layer2( - slf_attn_output_pp, enc_attn_output_pp, self._postprocess_cmd, - self._prepostprcess_dropout) + enc_attn_output = self._post_process_layer2(slf_attn_output_pp, + enc_attn_output_pp, + self._postprocess_cmd, + self._prepostprcess_dropout) pre_process_rlt3 = self._pre_process_layer3(None, enc_attn_output, self._preprocess_cmd, self._prepostprcess_dropout) @@ -591,6 +607,7 @@ class DecoderSubLayer(Layer): class DecoderLayer(Layer): + def __init__(self, n_layer, n_head, @@ -616,25 +633,25 @@ class DecoderLayer(Layer): self._decoder_sub_layers.append( self.add_sublayer( 'dsl_%d' % i, - DecoderSubLayer( - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - cache=None if caches is None else caches[i], - gather_idx=gather_idx))) + DecoderSubLayer(n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + cache=None if caches is None else caches[i], + gather_idx=gather_idx))) def forward(self, dec_input, enc_output, dec_slf_attn_bias, dec_enc_attn_bias): for i in range(self._n_layer): - tmp_dec_output = self._decoder_sub_layers[i]( - dec_input, enc_output, dec_slf_attn_bias, dec_enc_attn_bias) + tmp_dec_output = self._decoder_sub_layers[i](dec_input, enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias) dec_input = tmp_dec_output dec_output = self._pre_process_layer(None, tmp_dec_output, @@ -644,6 +661,7 @@ class DecoderLayer(Layer): class WrapDecoderLayer(Layer): + def __init__(self, trg_vocab_size, max_length, @@ -675,20 +693,19 @@ class WrapDecoderLayer(Layer): is_sparse=is_sparse, word_emb_param_name=word_emb_param_names[1], pos_enc_param_name=pos_enc_param_names[1]) - self._decoder_layer = DecoderLayer( - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - caches=caches, - gather_idx=gather_idx) + self._decoder_layer = DecoderLayer(n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + caches=caches, + gather_idx=gather_idx) self._weight_sharing = weight_sharing if not weight_sharing: self._fc = Linear(d_model, trg_vocab_size, bias_attr=False) @@ -718,6 +735,7 @@ class WrapDecoderLayer(Layer): class TransFormer(Layer): + def __init__(self, src_vocab_size, trg_vocab_size, @@ -745,38 +763,36 @@ class TransFormer(Layer): assert src_vocab_size == trg_vocab_size, ( "Vocabularies in source and target should be same for weight sharing." ) - self._wrap_encoder_layer = WrapEncoderLayer( - src_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - is_sparse=is_sparse) - self._wrap_decoder_layer = WrapDecoderLayer( - trg_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - is_sparse=is_sparse) + self._wrap_encoder_layer = WrapEncoderLayer(src_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + weight_sharing, + is_sparse=is_sparse) + self._wrap_decoder_layer = WrapDecoderLayer(trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + weight_sharing, + is_sparse=is_sparse) if weight_sharing: self._wrap_decoder_layer._prepare_decoder_layer._input_emb.weight = self._wrap_encoder_layer._prepare_encoder_layer._input_emb.weight @@ -786,8 +802,8 @@ class TransFormer(Layer): predict = self._wrap_decoder_layer(dec_inputs, enc_output) if self._label_smooth_eps: label_out = fluid.layers.label_smooth( - label=fluid.layers.one_hot( - input=label, depth=self._trg_vocab_size), + label=fluid.layers.one_hot(input=label, + depth=self._trg_vocab_size), epsilon=self._label_smooth_eps) cost = fluid.layers.softmax_with_cross_entropy( @@ -807,32 +823,39 @@ batch_num = 5 def fake_data_reader(): + def __reader__(): iteration = TrainTaskConfig.batch_size * batch_num for _ in six.moves.range(iteration): # random data np.random.seed = 90 - src_word_np = np.arange(1, seq_len + 1).reshape( - [seq_len]).astype('int64') - src_pos_np = np.random.randint( - 1, seq_len, size=(seq_len), dtype='int64') - src_slf_attn_bias_np = np.random.randn( - ModelHyperParams.n_head, seq_len, seq_len).astype('float32') - - trg_word_np = np.arange(1, seq_len + 1).reshape( - [seq_len]).astype('int64') - trg_pos_np = np.random.randint( - 1, seq_len, size=(seq_len), dtype='int64') - trg_slf_attn_bias_np = np.random.randn( - ModelHyperParams.n_head, seq_len, seq_len).astype('float32') - trg_src_attn_bias_np = np.random.randn( - ModelHyperParams.n_head, seq_len, seq_len).astype('float32') - - lbl_word_np = np.random.randint( - 1, - ModelHyperParams.src_vocab_size - 1, - size=(seq_len, 1), - dtype='int64') + src_word_np = np.arange(1, seq_len + 1).reshape([seq_len + ]).astype('int64') + src_pos_np = np.random.randint(1, + seq_len, + size=(seq_len), + dtype='int64') + src_slf_attn_bias_np = np.random.randn(ModelHyperParams.n_head, + seq_len, + seq_len).astype('float32') + + trg_word_np = np.arange(1, seq_len + 1).reshape([seq_len + ]).astype('int64') + trg_pos_np = np.random.randint(1, + seq_len, + size=(seq_len), + dtype='int64') + trg_slf_attn_bias_np = np.random.randn(ModelHyperParams.n_head, + seq_len, + seq_len).astype('float32') + trg_src_attn_bias_np = np.random.randn(ModelHyperParams.n_head, + seq_len, + seq_len).astype('float32') + + lbl_word_np = np.random.randint(1, + ModelHyperParams.src_vocab_size - 1, + size=(seq_len, 1), + dtype='int64') # Note(chenweihang): weight will introduce diff, so use constant here lbl_weight_np = np.ones((seq_len, 1)).astype('int64') @@ -875,8 +898,9 @@ def np_to_variable(data): var_inputs.append(to_variable(data_inputs[i], name=field)) enc_inputs = var_inputs[0:len(encoder_data_input_fields)] - dec_inputs = var_inputs[len(encoder_data_input_fields):len( - encoder_data_input_fields) + len(decoder_data_input_fields[:-1])] + dec_inputs = var_inputs[len(encoder_data_input_fields + ):len(encoder_data_input_fields) + + len(decoder_data_input_fields[:-1])] label = var_inputs[-2] weights = var_inputs[-1] @@ -887,39 +911,38 @@ naive_optimize = True class TestTransformer(TestParallelDyGraphRunnerBase): + def get_model(self): - model = TransFormer( - ModelHyperParams.src_vocab_size, - ModelHyperParams.trg_vocab_size, - ModelHyperParams.max_length + 1, - ModelHyperParams.n_layer, - ModelHyperParams.n_head, - ModelHyperParams.d_key, - ModelHyperParams.d_value, - ModelHyperParams.d_model, - ModelHyperParams.d_inner_hid, - ModelHyperParams.prepostprocess_dropout, - ModelHyperParams.attention_dropout, - ModelHyperParams.relu_dropout, - ModelHyperParams.preprocess_cmd, - ModelHyperParams.postprocess_cmd, - ModelHyperParams.weight_sharing, - TrainTaskConfig.label_smooth_eps, - is_sparse=True) + model = TransFormer(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size, + ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, + ModelHyperParams.n_head, + ModelHyperParams.d_key, + ModelHyperParams.d_value, + ModelHyperParams.d_model, + ModelHyperParams.d_inner_hid, + ModelHyperParams.prepostprocess_dropout, + ModelHyperParams.attention_dropout, + ModelHyperParams.relu_dropout, + ModelHyperParams.preprocess_cmd, + ModelHyperParams.postprocess_cmd, + ModelHyperParams.weight_sharing, + TrainTaskConfig.label_smooth_eps, + is_sparse=True) train_reader = paddle.batch(fake_data_reader(), TrainTaskConfig.batch_size) if naive_optimize: optimizer = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) else: - optimizer = fluid.optimizer.Adam( - learning_rate=NoamDecay(ModelHyperParams.d_model, - TrainTaskConfig.warmup_steps, - TrainTaskConfig.learning_rate), - beta1=TrainTaskConfig.beta1, - beta2=TrainTaskConfig.beta2, - epsilon=TrainTaskConfig.eps, - parameter_list=model.parameters()) + optimizer = fluid.optimizer.Adam(learning_rate=NoamDecay( + ModelHyperParams.d_model, TrainTaskConfig.warmup_steps, + TrainTaskConfig.learning_rate), + beta1=TrainTaskConfig.beta1, + beta2=TrainTaskConfig.beta2, + epsilon=TrainTaskConfig.eps, + parameter_list=model.parameters()) return model, train_reader, optimizer diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_unused_variables.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_unused_variables.py index b4dd03aecfa..a88a36838a5 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_unused_variables.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_unused_variables.py @@ -22,6 +22,7 @@ from paddle.nn import Layer, Embedding class SimpleNet(Layer): + def __init__(self, hidden_size, vocab_size, @@ -39,8 +40,8 @@ class SimpleNet(Layer): self.hidden_size, sparse=is_sparse, weight_attr=paddle.ParamAttr( - initializer=paddle.nn.initializer.Uniform( - low=-init_scale, high=init_scale))) + initializer=paddle.nn.initializer.Uniform(low=-init_scale, + high=init_scale))) self.softmax_weight = self.create_parameter( attr=paddle.ParamAttr(), shape=[self.hidden_size, self.vocab_size], @@ -88,6 +89,7 @@ init_scale = 0.1 def fake_sample_reader(): + def __reader__(): for i in range(batch_num): x_data = np.arange(num_steps).astype('int64') @@ -98,16 +100,17 @@ def fake_sample_reader(): class TestSparseEmbeddingUnusedVars(TestParallelDyGraphRunnerBase): + def get_model(self): - model = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=False) - - train_reader = paddle.batch( - fake_sample_reader(), batch_size=batch_size, drop_last=True) + model = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=False) + + train_reader = paddle.batch(fake_sample_reader(), + batch_size=batch_size, + drop_last=True) optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/fluid/tests/unittests/parallel_embedding_api.py b/python/paddle/fluid/tests/unittests/parallel_embedding_api.py index 8907adbf46a..1f3d173228d 100644 --- a/python/paddle/fluid/tests/unittests/parallel_embedding_api.py +++ b/python/paddle/fluid/tests/unittests/parallel_embedding_api.py @@ -41,6 +41,7 @@ paddle.enable_static() class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -54,8 +55,9 @@ class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase): paddle.seed(2020) data_in = paddle.randint(0, size[0], shape=(10, 4)) - data = paddle.static.data( - name='tindata', shape=[10, 1000], dtype="float32") + data = paddle.static.data(name='tindata', + shape=[10, 1000], + dtype="float32") per_part_size = size[0] // 2 if rank == 0: param_attr = paddle.fluid.ParamAttr( @@ -66,12 +68,11 @@ class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase): initializer=paddle.fluid.initializer.NumpyArrayInitializer( np_array[per_part_size:size[0], :]), ) - emb_out = paddle.distributed.split( - data_in, - size, - operation="embedding", - num_partitions=2, - weight_attr=param_attr) + emb_out = paddle.distributed.split(data_in, + size, + operation="embedding", + num_partitions=2, + weight_attr=param_attr) return [data_in, emb_out] diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index 2633a599256..46ab8f88511 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -32,6 +32,7 @@ DeviceType = core.DeviceType class TestParallelExecutorBase(unittest.TestCase): + @classmethod def check_network_convergence(cls, method, @@ -52,6 +53,7 @@ class TestParallelExecutorBase(unittest.TestCase): optimizer=fluid.optimizer.Adam, use_fast_executor=False, enable_sequential_execution=False): + def run_executor(exe, binary, feed, fetch_list): if feed_data_reader is None: res = exe.run(binary, feed=feed, fetch_list=fetch_list) @@ -102,17 +104,21 @@ class TestParallelExecutorBase(unittest.TestCase): os.environ.get('CPU_NUM', multiprocessing.cpu_count())) begin = time.time() - first_loss, = run_executor( - exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) + first_loss, = run_executor(exe=exe, + binary=binary, + feed=feed_dict, + fetch_list=[loss.name]) for _ in range(iter): run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[]) - last_loss, = run_executor( - exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) + last_loss, = run_executor(exe=exe, + binary=binary, + feed=feed_dict, + fetch_list=[loss.name]) end = time.time() if batch_size is not None: - print("%.4f Instance per second" % ( - (batch_size * iter + 2) / (end - begin))) + print("%.4f Instance per second" % ((batch_size * iter + 2) / + (end - begin))) avg_last_loss_val = np.array(last_loss).mean() avg_first_loss_val = np.array(first_loss).mean() diff --git a/python/paddle/fluid/tests/unittests/parallel_margin_cross_entropy.py b/python/paddle/fluid/tests/unittests/parallel_margin_cross_entropy.py index 83db08fc615..b77a04d8eea 100644 --- a/python/paddle/fluid/tests/unittests/parallel_margin_cross_entropy.py +++ b/python/paddle/fluid/tests/unittests/parallel_margin_cross_entropy.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,6 +35,7 @@ def set_random_seed(seed): class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): + def setUp(self): strategy = fleet.DistributedStrategy() fleet.init(is_collective=True, strategy=strategy) @@ -62,31 +63,31 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): for num_class_per_card in num_class_per_cards: num_class = np.sum(num_class_per_card) - for margin1, margin2, margin3, scale in zip(margin1s, margin2s, - margin3s, scales): + for margin1, margin2, margin3, scale in zip( + margin1s, margin2s, margin3s, scales): for _ in range(5): np_label = np.random.randint(0, num_class, (batch_size, )) label = paddle.to_tensor(np_label, dtype="int64") - input = paddle.randn( - shape=[batch_size, feature_length], dtype=dtype) + input = paddle.randn(shape=[batch_size, feature_length], + dtype=dtype) input.stop_gradient = False input_l2 = paddle.sqrt( - paddle.sum( - paddle.square(input), axis=1, keepdim=True)) + paddle.sum(paddle.square(input), + axis=1, + keepdim=True)) norm_input = paddle.divide(input, input_l2) weight = paddle.randn( - shape=[ - feature_length, num_class_per_card[rank_id] - ], + shape=[feature_length, num_class_per_card[rank_id]], dtype=dtype) weight.stop_gradient = False weight_l2 = paddle.sqrt( - paddle.sum( - paddle.square(weight), axis=0, keepdim=True)) + paddle.sum(paddle.square(weight), + axis=0, + keepdim=True)) norm_weight = paddle.divide(weight, weight_l2) data = paddle.matmul(norm_input, norm_weight) @@ -96,12 +97,12 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): num_class_per_card[:rank_id]) if rank_id > 0 else 0 end = np.sum(num_class_per_card[:rank_id + 1]) - integral_data = np.zeros( - (batch_size, num_class), dtype=dtype) - integral_data[:, sta:end] = data.clone().detach().numpy( - ) - integral_data = paddle.to_tensor( - integral_data, dtype=dtype) + integral_data = np.zeros((batch_size, num_class), + dtype=dtype) + integral_data[:, + sta:end] = data.clone().detach().numpy() + integral_data = paddle.to_tensor(integral_data, + dtype=dtype) paddle.distributed.all_reduce( integral_data, @@ -141,18 +142,17 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): label=paddle.reshape(label, (-1, 1)), return_softmax=True) - np.testing.assert_allclose( - loss_a.numpy(), - loss_b.numpy(), - rtol=1e-5, - atol=1e-7) + np.testing.assert_allclose(loss_a.numpy(), + loss_b.numpy(), + rtol=1e-5, + atol=1e-7) - integral_prob = np.zeros( - (batch_size, num_class), dtype=dtype) + integral_prob = np.zeros((batch_size, num_class), + dtype=dtype) integral_prob[:, sta:end] = softmax_a.clone().detach( ).numpy() - integral_prob = paddle.to_tensor( - integral_prob, dtype=dtype) + integral_prob = paddle.to_tensor(integral_prob, + dtype=dtype) paddle.distributed.all_reduce( integral_prob, op=paddle.distributed.ReduceOp.SUM, @@ -160,32 +160,30 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): integral_prob = integral_prob.detach().clone() integral_prob.stop_gradient = False - np.testing.assert_allclose( - integral_prob.numpy(), - softmax_b.numpy(), - rtol=1e-5, - atol=1e-6) + np.testing.assert_allclose(integral_prob.numpy(), + softmax_b.numpy(), + rtol=1e-5, + atol=1e-6) loss_a = loss_a.sum() / batch_size loss_b = loss_b.sum() / batch_size loss_a.backward() loss_b.backward() - integral_grad = np.zeros( - (batch_size, num_class), dtype=dtype) + integral_grad = np.zeros((batch_size, num_class), + dtype=dtype) integral_grad[:, sta:end] = data.grad.clone().detach() - integral_grad = paddle.to_tensor( - integral_grad, dtype=dtype) + integral_grad = paddle.to_tensor(integral_grad, + dtype=dtype) paddle.distributed.all_reduce( integral_grad, op=paddle.distributed.ReduceOp.SUM, group=check_group) - np.testing.assert_allclose( - integral_data.grad.numpy(), - integral_grad.numpy(), - rtol=1e-5, - atol=1e-7) + np.testing.assert_allclose(integral_data.grad.numpy(), + integral_grad.numpy(), + rtol=1e-5, + atol=1e-7) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/pipeline_mnist.py index 37e992c4d13..90238f56eea 100644 --- a/python/paddle/fluid/tests/unittests/pipeline_mnist.py +++ b/python/paddle/fluid/tests/unittests/pipeline_mnist.py @@ -85,11 +85,13 @@ def cnn_model(data): class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data with fluid.device_guard("gpu:0"): - images = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype=DTYPE) + images = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype=DTYPE) label = fluid.layers.data(name='label', shape=[1], dtype='int64') if dist_strategy: @@ -107,8 +109,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator with fluid.device_guard("gpu:1"): batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() base_lr = self.lr @@ -125,10 +128,10 @@ class TestDistMnist2x2(TestDistRunnerBase): acc_steps = 2 # accumulated steps for pipeline if dist_strategy: # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) fleet.init(is_collective=True) strategy = fleet.DistributedStrategy() strategy.pipeline = True @@ -138,16 +141,16 @@ class TestDistMnist2x2(TestDistRunnerBase): 'schedule_mode': '1F1B', 'accumulate_steps': acc_steps } - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size * acc_steps) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size * acc_steps) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size * acc_steps) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size * acc_steps) if dist_strategy: return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict, data_loader diff --git a/python/paddle/fluid/tests/unittests/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/pipeline_mnist_multi_device.py index 7211bd3e92f..3ec8dfb4485 100644 --- a/python/paddle/fluid/tests/unittests/pipeline_mnist_multi_device.py +++ b/python/paddle/fluid/tests/unittests/pipeline_mnist_multi_device.py @@ -85,11 +85,13 @@ def cnn_model(data): class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data with fluid.device_guard("gpu:0"): - images = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype=DTYPE) + images = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype=DTYPE) label = fluid.layers.data(name='label', shape=[1], dtype='int64') if dist_strategy: @@ -107,8 +109,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator with fluid.device_guard("gpu:1"): batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() base_lr = self.lr @@ -125,10 +128,10 @@ class TestDistMnist2x2(TestDistRunnerBase): acc_steps = 2 # accumulated steps for pipeline if dist_strategy: # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) fleet.init(is_collective=True) strategy = fleet.DistributedStrategy() strategy.pipeline = True @@ -138,16 +141,16 @@ class TestDistMnist2x2(TestDistRunnerBase): 'schedule_mode': 'F-then-B', 'accumulate_steps': acc_steps } - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size * acc_steps) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size * acc_steps) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size * acc_steps) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size * acc_steps) if dist_strategy: return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict, data_loader diff --git a/python/paddle/fluid/tests/unittests/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/pipeline_mnist_one_device.py index 41b3ad34103..cfc5a4904ac 100644 --- a/python/paddle/fluid/tests/unittests/pipeline_mnist_one_device.py +++ b/python/paddle/fluid/tests/unittests/pipeline_mnist_one_device.py @@ -76,14 +76,16 @@ def cnn_model(data): class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data device_id = 0 if dist_strategy: fleet.init(is_collective=True) with fluid.device_guard("gpu:0"): - images = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype=DTYPE) + images = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype=DTYPE) label = fluid.layers.data(name='label', shape=[1], dtype='int64') if dist_strategy: @@ -101,8 +103,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Evaluator with fluid.device_guard("gpu:0"): batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() base_lr = self.lr @@ -114,10 +117,10 @@ class TestDistMnist2x2(TestDistRunnerBase): opt = fluid.optimizer.Momentum(learning_rate=lr_val, momentum=0.9) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) if dist_strategy: strategy = fleet.DistributedStrategy() @@ -126,8 +129,8 @@ class TestDistMnist2x2(TestDistRunnerBase): 'schedule_mode': 'F-then-B', 'micro_batch_size': batch_size } - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/process_group_gloo.py b/python/paddle/fluid/tests/unittests/process_group_gloo.py index 9be8a35f1ae..f18d73842bd 100644 --- a/python/paddle/fluid/tests/unittests/process_group_gloo.py +++ b/python/paddle/fluid/tests/unittests/process_group_gloo.py @@ -30,6 +30,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv class TestProcessGroupFp32(unittest.TestCase): + def setUp(self): paddle.seed(2022) random.seed(2022) diff --git a/python/paddle/fluid/tests/unittests/process_group_nccl.py b/python/paddle/fluid/tests/unittests/process_group_nccl.py index 3667633d3b3..1635eb6c951 100644 --- a/python/paddle/fluid/tests/unittests/process_group_nccl.py +++ b/python/paddle/fluid/tests/unittests/process_group_nccl.py @@ -39,6 +39,7 @@ def init_process_group(strategy=None): class TestProcessGroupFp32(unittest.TestCase): + def setUp(self): paddle.seed(2022) random.seed(2022) @@ -87,13 +88,15 @@ class TestProcessGroupFp32(unittest.TestCase): max_result = paddle.maximum(tensor_x, tensor_y) if pg.rank() == 0: - task = dist.all_reduce( - tensor_x, dist.ReduceOp.MAX, use_calc_stream=False) + task = dist.all_reduce(tensor_x, + dist.ReduceOp.MAX, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_x, max_result) else: - task = dist.all_reduce( - tensor_y, dist.ReduceOp.MAX, use_calc_stream=False) + task = dist.all_reduce(tensor_y, + dist.ReduceOp.MAX, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_y, max_result) @@ -110,13 +113,15 @@ class TestProcessGroupFp32(unittest.TestCase): min_result = paddle.minimum(tensor_x, tensor_y) if pg.rank() == 0: - task = dist.all_reduce( - tensor_x, dist.ReduceOp.MIN, use_calc_stream=False) + task = dist.all_reduce(tensor_x, + dist.ReduceOp.MIN, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_x, min_result) else: - task = dist.all_reduce( - tensor_y, dist.ReduceOp.MIN, use_calc_stream=False) + task = dist.all_reduce(tensor_y, + dist.ReduceOp.MIN, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_y, min_result) @@ -133,13 +138,15 @@ class TestProcessGroupFp32(unittest.TestCase): prod_result = np.multiply(x, y) if pg.rank() == 0: - task = dist.all_reduce( - tensor_x, dist.ReduceOp.PROD, use_calc_stream=False) + task = dist.all_reduce(tensor_x, + dist.ReduceOp.PROD, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_x, prod_result) else: - task = dist.all_reduce( - tensor_y, dist.ReduceOp.PROD, use_calc_stream=False) + task = dist.all_reduce(tensor_y, + dist.ReduceOp.PROD, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_y, prod_result) @@ -195,10 +202,12 @@ class TestProcessGroupFp32(unittest.TestCase): # rank 1 else: tensor_out_list = [ - paddle.empty_like(tensor_x), paddle.empty_like(tensor_x) + paddle.empty_like(tensor_x), + paddle.empty_like(tensor_x) ] - task = dist.all_gather( - tensor_out_list, tensor_y, use_calc_stream=False) + task = dist.all_gather(tensor_out_list, + tensor_y, + use_calc_stream=False) paddle.device.cuda.synchronize() tensor_out = paddle.concat(tensor_out_list) out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2]) @@ -215,8 +224,9 @@ class TestProcessGroupFp32(unittest.TestCase): # rank 1 else: tensor_out_list = [] - task = dist.all_gather( - tensor_out_list, tensor_y, use_calc_stream=False) + task = dist.all_gather(tensor_out_list, + tensor_y, + use_calc_stream=False) paddle.device.cuda.synchronize() tensor_out = paddle.concat(tensor_out_list) out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2]) @@ -322,13 +332,17 @@ class TestProcessGroupFp32(unittest.TestCase): max_result = paddle.maximum(tensor_x, tensor_y) if pg.rank() == 0: - task = dist.reduce( - tensor_x, 0, dist.ReduceOp.MAX, use_calc_stream=False) + task = dist.reduce(tensor_x, + 0, + dist.ReduceOp.MAX, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_x, max_result) else: - task = dist.reduce( - tensor_y, 0, dist.ReduceOp.MAX, use_calc_stream=False) + task = dist.reduce(tensor_y, + 0, + dist.ReduceOp.MAX, + use_calc_stream=False) task.wait() print("test reduce max api ok") @@ -344,13 +358,17 @@ class TestProcessGroupFp32(unittest.TestCase): min_result = paddle.minimum(tensor_x, tensor_y) if pg.rank() == 0: - task = dist.reduce( - tensor_x, 0, dist.ReduceOp.MIN, use_calc_stream=False) + task = dist.reduce(tensor_x, + 0, + dist.ReduceOp.MIN, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_x, min_result) else: - task = dist.reduce( - tensor_y, 0, dist.ReduceOp.MIN, use_calc_stream=False) + task = dist.reduce(tensor_y, + 0, + dist.ReduceOp.MIN, + use_calc_stream=False) task.wait() print("test reduce min api ok") @@ -366,13 +384,17 @@ class TestProcessGroupFp32(unittest.TestCase): prod_result = np.multiply(x, y) if pg.rank() == 0: - task = dist.reduce( - tensor_x, 0, dist.ReduceOp.PROD, use_calc_stream=False) + task = dist.reduce(tensor_x, + 0, + dist.ReduceOp.PROD, + use_calc_stream=False) task.wait() assert np.array_equal(tensor_x, prod_result) else: - task = dist.reduce( - tensor_y, 0, dist.ReduceOp.PROD, use_calc_stream=False) + task = dist.reduce(tensor_y, + 0, + dist.ReduceOp.PROD, + use_calc_stream=False) task.wait() print("test reduce prod api ok") @@ -386,8 +408,9 @@ class TestProcessGroupFp32(unittest.TestCase): tensor_y = paddle.to_tensor(y) if pg.rank() == 0: in_1, in_2 = paddle.split(tensor_x, 2) - task = dist.scatter( - tensor_y, [in_1, in_2], 0, use_calc_stream=True) + task = dist.scatter(tensor_y, [in_1, in_2], + 0, + use_calc_stream=True) #task.wait() paddle.device.cuda.synchronize() # rank 1 @@ -440,6 +463,7 @@ class TestProcessGroupFp32(unittest.TestCase): class TestProcessGroupFp16(TestProcessGroupFp32): + def setUp(self): paddle.seed(2022) random.seed(2022) diff --git a/python/paddle/fluid/tests/unittests/ps/__init__.py b/python/paddle/fluid/tests/unittests/ps/__init__.py index 1f919f0f05b..5a5bd1e0048 100644 --- a/python/paddle/fluid/tests/unittests/ps/__init__.py +++ b/python/paddle/fluid/tests/unittests/ps/__init__.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License.p -# Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory +# Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory # will still be dirA(), But is should be dirB(). So it will ModulNotFoundError # please refer to https://stackoverflow.com/questions/8953844/import-module-from-subfolder diff --git a/python/paddle/fluid/tests/unittests/ps/dataset_generator_A.py b/python/paddle/fluid/tests/unittests/ps/dataset_generator_A.py index 9aa7452423f..1ab4b3580d6 100755 --- a/python/paddle/fluid/tests/unittests/ps/dataset_generator_A.py +++ b/python/paddle/fluid/tests/unittests/ps/dataset_generator_A.py @@ -23,6 +23,7 @@ categorical_range_ = range(14, 40) class CriteoDataset(dg.MultiSlotDataGenerator): + def generate_sample(self, line): """ Read the data line by line and process it as a dictionary diff --git a/python/paddle/fluid/tests/unittests/ps/dataset_generator_B.py b/python/paddle/fluid/tests/unittests/ps/dataset_generator_B.py index d76897a240c..76b2468592d 100755 --- a/python/paddle/fluid/tests/unittests/ps/dataset_generator_B.py +++ b/python/paddle/fluid/tests/unittests/ps/dataset_generator_B.py @@ -23,6 +23,7 @@ categorical_range_ = range(14, 40) class CriteoDataset(dg.MultiSlotDataGenerator): + def generate_sample(self, line): """ Read the data line by line and process it as a dictionary diff --git a/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py b/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py index 6e9eefe879d..6018060bba5 100755 --- a/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py +++ b/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py @@ -108,14 +108,13 @@ def fl_ps_train(): # A 方和 B 方如果要以文件粒度 shuffle 时,则需要固定同一个种子 dataset.set_filelist(file_list) start_time = time.time() - exe.train_from_dataset( - program=fluid.default_main_program(), - dataset=dataset, - print_period=2, - debug=False) + exe.train_from_dataset(program=fluid.default_main_program(), + dataset=dataset, + print_period=2, + debug=False) end_time = time.time() - print("trainer epoch %d finished, use time=%d\n" % ( - (epoch), end_time - start_time)) + print("trainer epoch %d finished, use time=%d\n" % + ((epoch), end_time - start_time)) exe.close() _runtime_handle._stop_worker() print("Fl partyA Trainer Success!") @@ -125,17 +124,17 @@ def fl_ps_train(): _runtime_handle._init_worker() inputs = [feeds_list[0], feeds_list[-1]] # 顺序务必要和 dataset_generator_B.py 中保持一致 - dataset, file_list = get_dataset( - inputs, config, "python dataset_generator_B.py", "heter_worker") + dataset, file_list = get_dataset(inputs, config, + "python dataset_generator_B.py", + "heter_worker") print("fluid.default_main_program: {}".format( fluid.default_main_program()._heter_pipeline_opt)) for epoch in range(epoch_num): dataset.set_filelist(file_list) - exe.train_from_dataset( - program=fluid.default_main_program(), - dataset=dataset, - print_period=2, - debug=False) + exe.train_from_dataset(program=fluid.default_main_program(), + dataset=dataset, + print_period=2, + debug=False) exe.close() _runtime_handle._stop_worker() print("Fl partB Trainer Success!") diff --git a/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py b/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py index a2ec563efd8..2d430cac648 100755 --- a/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py +++ b/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py @@ -26,6 +26,7 @@ import warnings import ast import numpy as np import struct + sys.path.append("..") from ps_dnn_model import StaticModel @@ -43,6 +44,7 @@ def is_distributed_env(): class YamlHelper(object): + def load_yaml(self, yaml_file, other_part=None): part_list = ["runner", "hyper_parameters"] if other_part: @@ -121,8 +123,8 @@ class YamlHelper(object): for k, v in envs.items(): max_k = max(max_k, len(k)) - h_format = " " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(max_k, " " * - spacing, max_v) + h_format = " " + "|{{:>{}s}}{}{{:^{}s}}|\n".format( + max_k, " " * spacing, max_v) l_format = " " + "|{{:>{}s}}{{}}{{:^{}s}}|\n".format(max_k, max_v) length = max_k + max_v + spacing @@ -260,29 +262,45 @@ def get_model(config): def parse_args(): parser = argparse.ArgumentParser("PsTest train script") - parser.add_argument( - '-m', '--config_yaml', type=str, required=True, help='config file path') - parser.add_argument( - '-bf16', - '--pure_bf16', - type=ast.literal_eval, - default=False, - help="whether use bf16") - - parser.add_argument( - '--run_minimize', type=int, default=0, help="test single pass") - parser.add_argument( - '--run_single_pass', type=int, default=0, help="test single pass") - parser.add_argument( - '--run_the_one_ps', type=int, default=0, help="test the_one_ps") - parser.add_argument( - '--debug_new_minimize', type=int, default=0, help="test single pass") - parser.add_argument( - '--debug_new_pass', type=int, default=0, help="test single pass") - parser.add_argument( - '--applied_pass_name', type=str, default="", help="test single pass") - parser.add_argument( - '--debug_the_one_ps', type=int, default=0, help="test the_one_ps") + parser.add_argument('-m', + '--config_yaml', + type=str, + required=True, + help='config file path') + parser.add_argument('-bf16', + '--pure_bf16', + type=ast.literal_eval, + default=False, + help="whether use bf16") + + parser.add_argument('--run_minimize', + type=int, + default=0, + help="test single pass") + parser.add_argument('--run_single_pass', + type=int, + default=0, + help="test single pass") + parser.add_argument('--run_the_one_ps', + type=int, + default=0, + help="test the_one_ps") + parser.add_argument('--debug_new_minimize', + type=int, + default=0, + help="test single pass") + parser.add_argument('--debug_new_pass', + type=int, + default=0, + help="test single pass") + parser.add_argument('--applied_pass_name', + type=str, + default="", + help="test single pass") + parser.add_argument('--debug_the_one_ps', + type=int, + default=0, + help="test the_one_ps") args = parser.parse_args() args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml)) @@ -307,6 +325,7 @@ def bf16_to_fp32(val): class DnnTrainer(object): + def __init__(self, config): self.metrics = {} self.config = config @@ -368,8 +387,8 @@ class DnnTrainer(object): debug_program(_main_file, loss.block.program) elif self.role_maker._is_heter_worker(): _main_file = ps_log_root_dir + sync_mode + '_run_minimize' + '_debug:_' + str( - self.config[ - 'debug_new_minimize']) + '_heter_worker_main.prototxt' + self.config['debug_new_minimize'] + ) + '_heter_worker_main.prototxt' debug_program(_main_file, loss.block.program) def run_single_pass(self): @@ -413,14 +432,14 @@ class DnnTrainer(object): _main = worker.append_send_ops_pass(_main, compiled_config) if fleet.is_server(): - _main_file = ps_log_root_dir + sync_mode + "_" + str(config[ - "applied_pass_name"]) + '_debug:_' + str(self.config[ - 'debug_new_pass']) + '_server_main.prototxt' + _main_file = ps_log_root_dir + sync_mode + "_" + str( + config["applied_pass_name"]) + '_debug:_' + str( + self.config['debug_new_pass']) + '_server_main.prototxt' debug_program(_main_file, _main) elif fleet.is_worker(): - _main_file = ps_log_root_dir + sync_mode + "_" + str(config[ - "applied_pass_name"]) + '_debug:_' + str(self.config[ - 'debug_new_pass']) + '_worker_main.prototxt' + _main_file = ps_log_root_dir + sync_mode + "_" + str( + config["applied_pass_name"]) + '_debug:_' + str( + self.config['debug_new_pass']) + '_worker_main.prototxt' debug_program(_main_file, _main) def run_the_one_ps(self): @@ -451,14 +470,16 @@ class DnnTrainer(object): if fleet.is_worker(): worker_desc = _runtime_handle.ps_desc_builder.build_worker_desc( ) - with open(ps_log_root_dir + sync_mode + '_' + - 'new_worker_ps_desc', 'w') as f: + with open( + ps_log_root_dir + sync_mode + '_' + + 'new_worker_ps_desc', 'w') as f: f.write(worker_desc) if fleet.is_server(): server_desc = _runtime_handle.ps_desc_builder.build_server_desc( ) - with open(ps_log_root_dir + sync_mode + '_' + - 'new_server_ps_desc', 'w') as f: + with open( + ps_log_root_dir + sync_mode + '_' + + 'new_server_ps_desc', 'w') as f: f.write(server_desc) else: diff --git a/python/paddle/fluid/tests/unittests/ps/test_fl_ps.py b/python/paddle/fluid/tests/unittests/ps/test_fl_ps.py index 2dc5b919d0d..c2fc55efdfe 100755 --- a/python/paddle/fluid/tests/unittests/ps/test_fl_ps.py +++ b/python/paddle/fluid/tests/unittests/ps/test_fl_ps.py @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import os class FlPsTest(unittest.TestCase): + def test_launch_fl_ps(self): pass ''' diff --git a/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py b/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py index 6752ea081a0..628d0d94ece 100755 --- a/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py +++ b/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py @@ -31,6 +31,7 @@ from google.protobuf import text_format class TestTheOnePs(PsPassTestBase): + def setUp(self): pass diff --git a/python/paddle/fluid/tests/unittests/ps_dnn_model.py b/python/paddle/fluid/tests/unittests/ps_dnn_model.py index f41f03297c9..d54e6cd643d 100755 --- a/python/paddle/fluid/tests/unittests/ps_dnn_model.py +++ b/python/paddle/fluid/tests/unittests/ps_dnn_model.py @@ -20,6 +20,7 @@ import paddle.distributed.fleet as fleet class DNNLayer(nn.Layer): + def __init__(self, sparse_feature_number, sparse_feature_dim, @@ -90,6 +91,7 @@ class DNNLayer(nn.Layer): class FlDNNLayer(nn.Layer): + def __init__(self, sparse_feature_number, sparse_feature_dim, @@ -202,8 +204,8 @@ class FlDNNLayer(nn.Layer): def interactive_layer(self, bottom_a, bottom_b): with paddle.fluid.device_guard( self.PART_B_JOINT_OP_DEVICE_FlAG): # joint point - interactive = paddle.fluid.layers.elementwise_add(bottom_a, - bottom_b) + interactive = paddle.fluid.layers.elementwise_add( + bottom_a, bottom_b) return interactive def top_layer(self, interactive, label_input): @@ -219,8 +221,8 @@ class FlDNNLayer(nn.Layer): num_thresholds=2**12, slide_steps=20) - cost = paddle.nn.functional.cross_entropy( - input=y_top, label=label_input) + cost = paddle.nn.functional.cross_entropy(input=y_top, + label=label_input) avg_cost = paddle.mean(x=cost) return auc, avg_cost @@ -238,6 +240,7 @@ class FlDNNLayer(nn.Layer): class StaticModel(): + def __init__(self, config): self.cost = None self.infer_target_var = None @@ -265,14 +268,14 @@ class StaticModel(): self.fc_sizes = self.config.get("hyper_parameters.fc_sizes") def create_feeds(self, is_infer=False): - dense_input = paddle.static.data( - name="dense_input", - shape=[None, self.dense_input_dim], - dtype="float32") + dense_input = paddle.static.data(name="dense_input", + shape=[None, self.dense_input_dim], + dtype="float32") sparse_input_ids = [ - paddle.static.data( - name="C" + str(i), shape=[None, 1], dtype="int64") + paddle.static.data(name="C" + str(i), + shape=[None, 1], + dtype="int64") for i in range(1, self.sparse_inputs_slots) ] @@ -287,13 +290,12 @@ class StaticModel(): self.dense_input = input[-1] sparse_number = self.sparse_inputs_slots - 1 - dnn_model = DNNLayer( - self.sparse_feature_number, - self.sparse_feature_dim, - self.dense_input_dim, - sparse_number, - self.fc_sizes, - sync_mode=self.sync_mode) + dnn_model = DNNLayer(self.sparse_feature_number, + self.sparse_feature_dim, + self.dense_input_dim, + sparse_number, + self.fc_sizes, + sync_mode=self.sync_mode) raw_predict_2d = dnn_model.forward(self.sparse_inputs, self.dense_input) predict_2d = paddle.nn.functional.softmax(raw_predict_2d) self.predict = predict_2d @@ -309,8 +311,8 @@ class StaticModel(): fetch_dict = {'auc': auc} return fetch_dict - cost = paddle.nn.functional.cross_entropy( - input=raw_predict_2d, label=self.label_input) + cost = paddle.nn.functional.cross_entropy(input=raw_predict_2d, + label=self.label_input) avg_cost = paddle.mean(x=cost) self._cost = avg_cost @@ -323,12 +325,11 @@ class StaticModel(): self.dense_input = input[-1] self.sparse_number = self.sparse_inputs_slots - 1 - fl_dnn_model = FlDNNLayer( - self.sparse_feature_number, - self.sparse_feature_dim, - self.dense_input_dim, - self.sparse_number, - sync_mode=self.sync_mode) + fl_dnn_model = FlDNNLayer(self.sparse_feature_number, + self.sparse_feature_dim, + self.dense_input_dim, + self.sparse_number, + sync_mode=self.sync_mode) auc, avg_cost = fl_dnn_model.forward(self.sparse_inputs, self.dense_input, self.label_input) diff --git a/python/paddle/fluid/tests/unittests/py_precise_roi_pool.py b/python/paddle/fluid/tests/unittests/py_precise_roi_pool.py index aa7b8420f48..29721c86200 100644 --- a/python/paddle/fluid/tests/unittests/py_precise_roi_pool.py +++ b/python/paddle/fluid/tests/unittests/py_precise_roi_pool.py @@ -17,6 +17,7 @@ import numpy as np class PyPrRoIPool(object): + def __init__(self): pass @@ -34,36 +35,32 @@ class PyPrRoIPool(object): beta = y0 - float(s_h) lim_alpha = x1 - float(s_w) lim_beta = y1 - float(s_h) - tmp = ( - lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha * - alpha) * ( - lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta) + tmp = (lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + + 0.5 * alpha * alpha) * (lim_beta - 0.5 * lim_beta * lim_beta - + beta + 0.5 * beta * beta) sum_out += self._PrRoIPoolingGetData(this_data, s_h, s_w, h0, w0) * tmp alpha = float(e_w) - x1 lim_alpha = float(e_w) - x0 - tmp = ( - lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha * - alpha) * ( - lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta) + tmp = (lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + + 0.5 * alpha * alpha) * (lim_beta - 0.5 * lim_beta * lim_beta - + beta + 0.5 * beta * beta) sum_out += self._PrRoIPoolingGetData(this_data, s_h, e_w, h0, w0) * tmp alpha = x0 - float(s_w) beta = float(e_h) - y1 lim_alpha = x1 - float(s_w) lim_beta = float(e_h) - y0 - tmp = ( - lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha * - alpha) * ( - lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta) + tmp = (lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + + 0.5 * alpha * alpha) * (lim_beta - 0.5 * lim_beta * lim_beta - + beta + 0.5 * beta * beta) sum_out += self._PrRoIPoolingGetData(this_data, e_h, s_w, h0, w0) * tmp alpha = float(e_w) - x1 lim_alpha = float(e_w) - x0 - tmp = ( - lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + 0.5 * alpha * - alpha) * ( - lim_beta - 0.5 * lim_beta * lim_beta - beta + 0.5 * beta * beta) + tmp = (lim_alpha - 0.5 * lim_alpha * lim_alpha - alpha + + 0.5 * alpha * alpha) * (lim_beta - 0.5 * lim_beta * lim_beta - + beta + 0.5 * beta * beta) sum_out += self._PrRoIPoolingGetData(this_data, e_h, e_w, h0, w0) * tmp return sum_out @@ -141,9 +138,10 @@ class PyPrRoIPool(object): w_iter + 1, max(win_start_h, float(h_iter)), max(win_start_w, float(w_iter)), - min(win_end_h, float(h_iter) + 1.0), - min(win_end_w, float(w_iter + 1.0)), - height, width) + min(win_end_h, + float(h_iter) + 1.0), + min(win_end_w, + float(w_iter + 1.0)), height, width) out_data[i, c, ph, pw] = sum_out / win_size diff --git a/python/paddle/fluid/tests/unittests/rnn/convert.py b/python/paddle/fluid/tests/unittests/rnn/convert.py index 645f67fca27..1a3e571269d 100644 --- a/python/paddle/fluid/tests/unittests/rnn/convert.py +++ b/python/paddle/fluid/tests/unittests/rnn/convert.py @@ -53,10 +53,9 @@ def convert_params_for_net_static(np_net, paddle_net, place): def get_params_for_cell(np_cell, num_layers, idx): state = np_cell.parameters - weight_list = [ - ('{}.weight_{}'.format(num_layers, idx), state['weight_ih']), - ('{}.weight_{}'.format(num_layers, idx + 1), state['weight_hh']) - ] + weight_list = [('{}.weight_{}'.format(num_layers, idx), state['weight_ih']), + ('{}.weight_{}'.format(num_layers, + idx + 1), state['weight_hh'])] bias_list = [('{}.bias_{}'.format(num_layers, idx), state['bias_ih']), ('{}.bias_{}'.format(num_layers, idx + 1), state['bias_hh'])] return weight_list, bias_list diff --git a/python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py b/python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py index dd1e18b89d2..fbdc3ec8a48 100644 --- a/python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py +++ b/python/paddle/fluid/tests/unittests/rnn/rnn_numpy.py @@ -17,11 +17,13 @@ import math class LayerMixin(object): + def __call__(self, *args, **kwargs): return self.forward(*args, **kwargs) class LayerListMixin(LayerMixin): + def __init__(self, layers=None): self._layers = list(layers) if layers else [] @@ -33,6 +35,7 @@ class LayerListMixin(LayerMixin): class SimpleRNNCell(LayerMixin): + def __init__(self, input_size, hidden_size, @@ -49,10 +52,10 @@ class SimpleRNNCell(LayerMixin): self.parameters = dict() std = 1.0 / math.sqrt(hidden_size) - self.weight_ih = np.random.uniform(-std, std, ( - hidden_size, input_size)).astype(dtype) - self.weight_hh = np.random.uniform(-std, std, ( - hidden_size, hidden_size)).astype(dtype) + self.weight_ih = np.random.uniform( + -std, std, (hidden_size, input_size)).astype(dtype) + self.weight_hh = np.random.uniform( + -std, std, (hidden_size, hidden_size)).astype(dtype) self.parameters['weight_ih'] = self.weight_ih self.parameters['weight_hh'] = self.weight_hh if bias: @@ -85,16 +88,17 @@ class SimpleRNNCell(LayerMixin): class GRUCell(LayerMixin): + def __init__(self, input_size, hidden_size, bias=True, dtype="float64"): self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.parameters = dict() std = 1.0 / math.sqrt(hidden_size) - self.weight_ih = np.random.uniform(-std, std, ( - 3 * hidden_size, input_size)).astype(dtype) - self.weight_hh = np.random.uniform(-std, std, ( - 3 * hidden_size, hidden_size)).astype(dtype) + self.weight_ih = np.random.uniform( + -std, std, (3 * hidden_size, input_size)).astype(dtype) + self.weight_hh = np.random.uniform( + -std, std, (3 * hidden_size, hidden_size)).astype(dtype) self.parameters['weight_ih'] = self.weight_ih self.parameters['weight_hh'] = self.weight_hh if bias: @@ -133,16 +137,17 @@ class GRUCell(LayerMixin): class LSTMCell(LayerMixin): + def __init__(self, input_size, hidden_size, bias=True, dtype="float64"): self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.parameters = dict() std = 1.0 / math.sqrt(hidden_size) - self.weight_ih = np.random.uniform(-std, std, ( - 4 * hidden_size, input_size)).astype(dtype) - self.weight_hh = np.random.uniform(-std, std, ( - 4 * hidden_size, hidden_size)).astype(dtype) + self.weight_ih = np.random.uniform( + -std, std, (4 * hidden_size, input_size)).astype(dtype) + self.weight_hh = np.random.uniform( + -std, std, (4 * hidden_size, hidden_size)).astype(dtype) self.parameters['weight_ih'] = self.weight_ih self.parameters['weight_hh'] = self.weight_hh if bias: @@ -327,6 +332,7 @@ def concat_states(states, bidirectional=False, state_components=1): class RNN(LayerMixin): + def __init__(self, cell, is_reverse=False, time_major=False): super(RNN, self).__init__() self.cell = cell @@ -347,6 +353,7 @@ class RNN(LayerMixin): class BiRNN(LayerMixin): + def __init__(self, cell_fw, cell_bw, time_major=False): super(BiRNN, self).__init__() self.cell_fw = cell_fw @@ -371,6 +378,7 @@ class BiRNN(LayerMixin): class RNNMixin(LayerListMixin): + def forward(self, inputs, initial_states=None, sequence_length=None): batch_index = 1 if self.time_major else 0 batch_size = inputs.shape[batch_index] @@ -404,6 +412,7 @@ class RNNMixin(LayerListMixin): class SimpleRNN(RNNMixin): + def __init__(self, input_size, hidden_size, @@ -417,27 +426,36 @@ class SimpleRNN(RNNMixin): bidirectional_list = ["bidirectional", "bidirect"] if direction in ["forward"]: is_reverse = False - cell = SimpleRNNCell( - input_size, hidden_size, nonlinearity=nonlinearity, dtype=dtype) + cell = SimpleRNNCell(input_size, + hidden_size, + nonlinearity=nonlinearity, + dtype=dtype) self.append(RNN(cell, is_reverse, time_major)) for i in range(1, num_layers): - cell = SimpleRNNCell( - hidden_size, - hidden_size, - nonlinearity=nonlinearity, - dtype=dtype) + cell = SimpleRNNCell(hidden_size, + hidden_size, + nonlinearity=nonlinearity, + dtype=dtype) self.append(RNN(cell, is_reverse, time_major)) elif direction in bidirectional_list: - cell_fw = SimpleRNNCell( - input_size, hidden_size, nonlinearity=nonlinearity, dtype=dtype) - cell_bw = SimpleRNNCell( - input_size, hidden_size, nonlinearity=nonlinearity, dtype=dtype) + cell_fw = SimpleRNNCell(input_size, + hidden_size, + nonlinearity=nonlinearity, + dtype=dtype) + cell_bw = SimpleRNNCell(input_size, + hidden_size, + nonlinearity=nonlinearity, + dtype=dtype) self.append(BiRNN(cell_fw, cell_bw, time_major)) for i in range(1, num_layers): - cell_fw = SimpleRNNCell( - 2 * hidden_size, hidden_size, nonlinearity, dtype=dtype) - cell_bw = SimpleRNNCell( - 2 * hidden_size, hidden_size, nonlinearity, dtype=dtype) + cell_fw = SimpleRNNCell(2 * hidden_size, + hidden_size, + nonlinearity, + dtype=dtype) + cell_bw = SimpleRNNCell(2 * hidden_size, + hidden_size, + nonlinearity, + dtype=dtype) self.append(BiRNN(cell_fw, cell_bw, time_major)) else: raise ValueError( @@ -454,6 +472,7 @@ class SimpleRNN(RNNMixin): class LSTM(RNNMixin): + def __init__(self, input_size, hidden_size, @@ -495,6 +514,7 @@ class LSTM(RNNMixin): class GRU(RNNMixin): + def __init__(self, input_size, hidden_size, diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py index cade4b850cd..33dca32b76c 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle + paddle.framework.set_default_dtype("float64") import numpy as np @@ -23,6 +24,7 @@ from convert import convert_params_for_cell class TestSimpleRNNCell(unittest.TestCase): + def __init__(self, bias=True, place="cpu"): super(TestSimpleRNNCell, self).__init__(methodName="runTest") self.bias = bias @@ -32,8 +34,10 @@ class TestSimpleRNNCell(unittest.TestCase): def setUp(self): paddle.disable_static(self.place) rnn1 = SimpleRNNCell(16, 32, bias=self.bias) - rnn2 = paddle.nn.SimpleRNNCell( - 16, 32, bias_ih_attr=self.bias, bias_hh_attr=self.bias) + rnn2 = paddle.nn.SimpleRNNCell(16, + 32, + bias_ih_attr=self.bias, + bias_hh_attr=self.bias) convert_params_for_cell(rnn1, rnn2) self.rnn1 = rnn1 @@ -61,6 +65,7 @@ class TestSimpleRNNCell(unittest.TestCase): np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def test_errors(self): + def test_zero_hidden_size(): cell = paddle.nn.SimpleRNNCell(-1, 0) @@ -73,6 +78,7 @@ class TestSimpleRNNCell(unittest.TestCase): class TestGRUCell(unittest.TestCase): + def __init__(self, bias=True, place="cpu"): super(TestGRUCell, self).__init__(methodName="runTest") self.bias = bias @@ -82,8 +88,10 @@ class TestGRUCell(unittest.TestCase): def setUp(self): paddle.disable_static(self.place) rnn1 = GRUCell(16, 32, bias=self.bias) - rnn2 = paddle.nn.GRUCell( - 16, 32, bias_ih_attr=self.bias, bias_hh_attr=self.bias) + rnn2 = paddle.nn.GRUCell(16, + 32, + bias_ih_attr=self.bias, + bias_hh_attr=self.bias) convert_params_for_cell(rnn1, rnn2) self.rnn1 = rnn1 @@ -111,6 +119,7 @@ class TestGRUCell(unittest.TestCase): np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def test_errors(self): + def test_zero_hidden_size(): cell = paddle.nn.GRUCell(-1, 0) @@ -123,6 +132,7 @@ class TestGRUCell(unittest.TestCase): class TestLSTMCell(unittest.TestCase): + def __init__(self, bias=True, place="cpu"): super(TestLSTMCell, self).__init__(methodName="runTest") self.bias = bias @@ -131,8 +141,10 @@ class TestLSTMCell(unittest.TestCase): def setUp(self): rnn1 = LSTMCell(16, 32, bias=self.bias) - rnn2 = paddle.nn.LSTMCell( - 16, 32, bias_ih_attr=self.bias, bias_hh_attr=self.bias) + rnn2 = paddle.nn.LSTMCell(16, + 32, + bias_ih_attr=self.bias, + bias_hh_attr=self.bias) convert_params_for_cell(rnn1, rnn2) self.rnn1 = rnn1 @@ -147,9 +159,9 @@ class TestLSTMCell(unittest.TestCase): prev_c = np.random.randn(4, 32) y1, (h1, c1) = rnn1(x, (prev_h, prev_c)) - y2, (h2, c2) = rnn2( - paddle.to_tensor(x), - (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) + y2, (h2, + c2) = rnn2(paddle.to_tensor(x), + (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -165,6 +177,7 @@ class TestLSTMCell(unittest.TestCase): np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) def test_errors(self): + def test_zero_hidden_size(): cell = paddle.nn.LSTMCell(-1, 0) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells_static.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells_static.py index bb15b271349..b4a5887c593 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells_static.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells_static.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle + paddle.framework.set_default_dtype("float64") paddle.enable_static() @@ -24,6 +25,7 @@ from rnn_numpy import SimpleRNNCell, LSTMCell, GRUCell class TestSimpleRNNCell(unittest.TestCase): + def __init__(self, bias=True, place="cpu"): super(TestSimpleRNNCell, self).__init__(methodName="runTest") self.bias = bias @@ -37,8 +39,10 @@ class TestSimpleRNNCell(unittest.TestCase): sp = paddle.static.Program() with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): - rnn2 = paddle.nn.SimpleRNNCell( - 16, 32, bias_ih_attr=self.bias, bias_hh_attr=self.bias) + rnn2 = paddle.nn.SimpleRNNCell(16, + 32, + bias_ih_attr=self.bias, + bias_hh_attr=self.bias) place = self.place exe = paddle.static.Executor(place) @@ -119,6 +123,7 @@ class TestSimpleRNNCell(unittest.TestCase): class TestGRUCell(unittest.TestCase): + def __init__(self, bias=True, place="cpu"): super(TestGRUCell, self).__init__(methodName="runTest") self.bias = bias @@ -132,8 +137,10 @@ class TestGRUCell(unittest.TestCase): sp = paddle.static.Program() with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): - rnn2 = paddle.nn.GRUCell( - 16, 32, bias_ih_attr=self.bias, bias_hh_attr=self.bias) + rnn2 = paddle.nn.GRUCell(16, + 32, + bias_ih_attr=self.bias, + bias_hh_attr=self.bias) place = self.place exe = paddle.static.Executor(place) @@ -215,6 +222,7 @@ class TestGRUCell(unittest.TestCase): class TestLSTMCell(unittest.TestCase): + def __init__(self, bias=True, place="cpu"): super(TestLSTMCell, self).__init__(methodName="runTest") self.bias = bias @@ -228,8 +236,10 @@ class TestLSTMCell(unittest.TestCase): sp = paddle.static.Program() with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): - rnn2 = paddle.nn.LSTMCell( - 16, 32, bias_ih_attr=self.bias, bias_hh_attr=self.bias) + rnn2 = paddle.nn.LSTMCell(16, + 32, + bias_ih_attr=self.bias, + bias_hh_attr=self.bias) place = self.place exe = paddle.static.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cudnn_params_packing.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cudnn_params_packing.py index 0712d5be23e..f4dbc3bbbc7 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cudnn_params_packing.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cudnn_params_packing.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,12 +18,15 @@ from unittest import TestCase def create_model(): hidden_size = 32 - bilstm = paddle.nn.LSTM( - hidden_size, hidden_size, num_layers=1, direction='bidirectional') + bilstm = paddle.nn.LSTM(hidden_size, + hidden_size, + num_layers=1, + direction='bidirectional') return bilstm class TestRNNProgramClone(TestCase): + def setUp(self): paddle.enable_static() @@ -35,14 +38,14 @@ class TestRNNProgramClone(TestCase): # test a typical case in static graph usage: create two nearly # identical program with a shared startup program to share their # parameters - # + # # when creating a parameter, the name is checked. If there is already # a parameter with the same name, which is the output of a operator # (i.e. its creator), its re-creation is skipped. - # + # # but if that parameter has been the output of more than one operator, # an exception is raised. For special cases, white list is added. - # flattening rnn's parameters for the need to call cudnn kernel is such + # flattening rnn's parameters for the need to call cudnn kernel is such # a case. with paddle.static.program_guard(train_program, startup_prog): with paddle.fluid.unique_name.guard(): diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py index 263efedc714..83a50c2a447 100755 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle + paddle.set_default_dtype("float64") from paddle.fluid.layers import sequence_mask @@ -26,6 +27,7 @@ bidirectional_list = ["bidirectional", "bidirect"] class TestSimpleRNN(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestSimpleRNN, self).__init__("runTest") self.time_major = time_major @@ -38,10 +40,16 @@ class TestSimpleRNN(unittest.TestCase): # `__init__` to avoid using an error device set by another test case. place = paddle.set_device(self.place) paddle.disable_static(place) - rnn1 = SimpleRNN( - 16, 32, 2, time_major=self.time_major, direction=self.direction) - rnn2 = paddle.nn.SimpleRNN( - 16, 32, 2, time_major=self.time_major, direction=self.direction) + rnn1 = SimpleRNN(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) + rnn2 = paddle.nn.SimpleRNN(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) convert_params_for_net(rnn1, rnn2) self.rnn1 = rnn1 @@ -107,6 +115,7 @@ class TestSimpleRNN(unittest.TestCase): class TestGRU(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestGRU, self).__init__("runTest") self.time_major = time_major @@ -194,6 +203,7 @@ class TestGRU(unittest.TestCase): class TestLSTM(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestLSTM, self).__init__("runTest") self.time_major = time_major @@ -206,10 +216,16 @@ class TestLSTM(unittest.TestCase): # `__init__` to avoid using an error device set by another test case. place = paddle.set_device(self.place) paddle.disable_static(place) - rnn1 = LSTM( - 16, 32, 2, time_major=self.time_major, direction=self.direction) - rnn2 = paddle.nn.LSTM( - 16, 32, 2, time_major=self.time_major, direction=self.direction) + rnn1 = LSTM(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) + rnn2 = paddle.nn.LSTM(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) convert_params_for_net(rnn1, rnn2) self.rnn1 = rnn1 @@ -226,9 +242,9 @@ class TestLSTM(unittest.TestCase): prev_c = np.random.randn(2 * self.num_directions, 4, 32) y1, (h1, c1) = rnn1(x, (prev_h, prev_c)) - y2, (h2, c2) = rnn2( - paddle.to_tensor(x), - (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) + y2, (h2, + c2) = rnn2(paddle.to_tensor(x), + (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -287,6 +303,7 @@ def predict_test_util(place, mode, stop_gradient=True): np.random.seed(123) class Net(paddle.nn.Layer): + def __init__(self): super(Net, self).__init__() self.rnn = getattr(paddle.nn, mode)(16, @@ -308,8 +325,8 @@ def predict_test_util(place, mode, stop_gradient=True): y = y * mask loss = paddle.mean(y) loss.backward() - optimizer = paddle.optimizer.Adam( - learning_rate=0.1, parameters=rnn.parameters()) + optimizer = paddle.optimizer.Adam(learning_rate=0.1, + parameters=rnn.parameters()) optimizer.step() rnn.eval() y, _ = rnn(x) @@ -318,8 +335,7 @@ def predict_test_util(place, mode, stop_gradient=True): rnn.train() rnn = paddle.jit.to_static( - rnn, [paddle.static.InputSpec( - shape=[None, None, 16], dtype=x.dtype)]) + rnn, [paddle.static.InputSpec(shape=[None, None, 16], dtype=x.dtype)]) paddle.jit.save(rnn, "./inference/%s_infer" % mode) paddle.enable_static() @@ -327,9 +343,9 @@ def predict_test_util(place, mode, stop_gradient=True): new_scope = paddle.static.Scope() with paddle.static.scope_guard(new_scope): exe = paddle.static.Executor(place) - [inference_program, feed_target_names, - fetch_targets] = paddle.static.load_inference_model( - "./inference/%s_infer" % mode, exe) + [inference_program, feed_target_names, fetch_targets + ] = paddle.static.load_inference_model("./inference/%s_infer" % mode, + exe) results = exe.run(inference_program, feed={feed_target_names[0]: x.numpy()}, fetch_list=fetch_targets) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets_static.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets_static.py index 5de539ebf39..436bf0b6ea0 100755 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets_static.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets_static.py @@ -13,8 +13,10 @@ # limitations under the License. import paddle + paddle.set_default_dtype("float64") from paddle.fluid.layers import sequence_mask + paddle.enable_static() import numpy as np @@ -27,6 +29,7 @@ bidirectional_list = ["bidirectional", "bidirect"] class TestSimpleRNN(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestSimpleRNN, self).__init__("runTest") self.time_major = time_major @@ -38,19 +41,21 @@ class TestSimpleRNN(unittest.TestCase): # Since `set_device` is global, set `set_device` in `setUp` rather than # `__init__` to avoid using an error device set by another test case. place = paddle.set_device(self.place) - rnn1 = SimpleRNN( - 16, 32, 2, time_major=self.time_major, direction=self.direction) + rnn1 = SimpleRNN(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) mp = paddle.static.Program() sp = paddle.static.Program() with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): - rnn2 = paddle.nn.SimpleRNN( - 16, - 32, - 2, - time_major=self.time_major, - direction=self.direction) + rnn2 = paddle.nn.SimpleRNN(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) exe = paddle.static.Executor(place) scope = paddle.fluid.Scope() @@ -171,6 +176,7 @@ class TestSimpleRNN(unittest.TestCase): class TestGRU(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestGRU, self).__init__("runTest") self.time_major = time_major @@ -317,6 +323,7 @@ class TestGRU(unittest.TestCase): class TestLSTM(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestLSTM, self).__init__("runTest") self.time_major = time_major @@ -328,19 +335,21 @@ class TestLSTM(unittest.TestCase): # Since `set_device` is global, set `set_device` in `setUp` rather than # `__init__` to avoid using an error device set by another test case. place = paddle.set_device(self.place) - rnn1 = LSTM( - 16, 32, 2, time_major=self.time_major, direction=self.direction) + rnn1 = LSTM(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) mp = paddle.static.Program() sp = paddle.static.Program() with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): - rnn2 = paddle.nn.LSTM( - 16, - 32, - 2, - time_major=self.time_major, - direction=self.direction) + rnn2 = paddle.nn.LSTM(16, + 32, + 2, + time_major=self.time_major, + direction=self.direction) exe = paddle.static.Executor(place) scope = paddle.fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/rnn/test_wrappers.py b/python/paddle/fluid/tests/unittests/rnn/test_wrappers.py index 85aebf86ed9..2442e6b7a3b 100755 --- a/python/paddle/fluid/tests/unittests/rnn/test_wrappers.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_wrappers.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle + paddle.set_default_dtype("float64") from paddle.fluid.layers import sequence_mask @@ -24,6 +25,7 @@ from rnn_numpy import GRUCell, RNN, BiRNN class TestRNNWrapper(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestRNNWrapper, self).__init__("runTest") self.time_major = time_major @@ -102,6 +104,7 @@ class TestRNNWrapper(unittest.TestCase): class TestBiRNNWrapper(unittest.TestCase): + def __init__(self, time_major=True, place="cpu"): super(TestBiRNNWrapper, self).__init__("runTest") self.time_major = time_major diff --git a/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py index a24c0874482..61d643916f7 100644 --- a/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py @@ -41,6 +41,7 @@ paddle.enable_static() class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase): + def __init__(self): self.global_ring_id = 0 @@ -50,8 +51,9 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase): np.random.seed(2020) np_array = np.random.rand(1000, 16) - data = paddle.static.data( - name='tindata', shape=[10, 1000], dtype="float32") + data = paddle.static.data(name='tindata', + shape=[10, 1000], + dtype="float32") paddle.distributed.broadcast(data, src=0) data = paddle.split(data, 2, axis=1)[rank] if rank == 0: @@ -70,7 +72,8 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase): axis=0, num_partitions=2, weight_attr=param_attr, - bias_attr=True, ) + bias_attr=True, + ) return [linear_out] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py index 34b6f6dc8e5..5dca2198990 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest @@ -25,6 +26,7 @@ from paddle import fluid class TestSequenceConcat(OpTest): + def setLoD(self): self.lod1 = [7, 3] self.lod2 = [12, 8] @@ -52,6 +54,7 @@ class TestSequenceConcat(OpTest): class TestSequenceConcatCase2(TestSequenceConcat): + def setLoD(self): self.lod1 = [10, 0] self.lod2 = [12, 8] @@ -59,6 +62,7 @@ class TestSequenceConcatCase2(TestSequenceConcat): class TestSequenceConcatCase3(TestSequenceConcat): + def setLoD(self): self.lod1 = [10, 0] self.lod2 = [20, 0] @@ -66,6 +70,7 @@ class TestSequenceConcatCase3(TestSequenceConcat): class TestSequenceConcatCase4(TestSequenceConcat): + def setLoD(self): self.lod1 = [0, 10] self.lod2 = [0, 20] @@ -73,6 +78,7 @@ class TestSequenceConcatCase4(TestSequenceConcat): class TestSequenceConcatCase5(TestSequenceConcat): + def setLoD(self): self.lod1 = [0, 10] self.lod2 = [20, 0] @@ -80,7 +86,9 @@ class TestSequenceConcatCase5(TestSequenceConcat): class TestSequenceConcatOpError(unittest.TestCase): + def test_errors(self): + def test_input_list(): # the input type must be list x_data = fluid.layers.data(name='x', shape=[4], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py index ed804d701a8..e79b66c9990 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py @@ -18,6 +18,7 @@ import unittest import numpy as np import random import sys + sys.path.append("../") from op_test import OpTest @@ -46,8 +47,8 @@ def seqconv(x, [offset[i] - in_begin, offset[i + 1] - offset[i]]) if padding_trainable: sub_w = padding_data[j:j + pad_size, :] - col[offset[i]:offset[i] + pad_size, j * M:(j + 1) * - M] = sub_w + col[offset[i]:offset[i] + pad_size, + j * M:(j + 1) * M] = sub_w out_begin = offset[i] + pad_size in_begin = offset[i] @@ -58,8 +59,8 @@ def seqconv(x, sub_w = padding_data[begin_pad + context_start + j - pad_size:begin_pad + context_start + j, :] - col[offset[i + 1] - pad_size:offset[i + 1], j * M:(j + 1) * - M] = sub_w + col[offset[i + 1] - pad_size:offset[i + 1], + j * M:(j + 1) * M] = sub_w in_end = offset[i + 1] out_end = offset[i + 1] - pad_size if in_end <= in_begin: @@ -70,6 +71,7 @@ def seqconv(x, class TestSeqProject(OpTest): + def setUp(self): self.init_test_case() self.op_type = 'sequence_conv' @@ -83,8 +85,8 @@ class TestSeqProject(OpTest): return # one level, batch size - x = np.random.uniform(0.1, 1, [self.input_size[0], - self.input_size[1]]).astype('float32') + x = np.random.uniform( + 0.1, 1, [self.input_size[0], self.input_size[1]]).astype('float32') w = np.random.uniform(0.1, 1, [ self.context_length * self.input_size[1], self.output_represention ]).astype('float32') @@ -124,51 +126,48 @@ class TestSeqProject(OpTest): def test_check_grad(self): if self.padding_trainable: - self.check_grad( - set(self.inputs_val), 'Out', max_relative_error=0.05) + self.check_grad(set(self.inputs_val), + 'Out', + max_relative_error=0.05) def test_check_grad_input(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.05, - no_grad_set=set(self.inputs_val_no_x)) + self.check_grad(['X'], + 'Out', + max_relative_error=0.05, + no_grad_set=set(self.inputs_val_no_x)) def test_check_grad_padding_data(self): if self.padding_trainable: - self.check_grad( - ['PaddingData'], 'Out', no_grad_set=set(['X', 'Filter'])) + self.check_grad(['PaddingData'], + 'Out', + no_grad_set=set(['X', 'Filter'])) def test_check_grad_Filter(self): - self.check_grad( - ['Filter'], - 'Out', - max_relative_error=0.05, - no_grad_set=set(self.inputs_val_no_f)) + self.check_grad(['Filter'], + 'Out', + max_relative_error=0.05, + no_grad_set=set(self.inputs_val_no_f)) def test_check_grad_input_filter(self): if self.padding_trainable: - self.check_grad( - ['X', 'Filter'], - 'Out', - max_relative_error=0.05, - no_grad_set=set(['PaddingData'])) + self.check_grad(['X', 'Filter'], + 'Out', + max_relative_error=0.05, + no_grad_set=set(['PaddingData'])) def test_check_grad_padding_input(self): if self.padding_trainable: - self.check_grad( - self.inputs_val_no_f, - 'Out', - max_relative_error=0.05, - no_grad_set=set(['Filter'])) + self.check_grad(self.inputs_val_no_f, + 'Out', + max_relative_error=0.05, + no_grad_set=set(['Filter'])) def test_check_grad_padding_filter(self): if self.padding_trainable: - self.check_grad( - self.inputs_val_no_x, - 'Out', - max_relative_error=0.05, - no_grad_set=set(['X'])) + self.check_grad(self.inputs_val_no_x, + 'Out', + max_relative_error=0.05, + no_grad_set=set(['X'])) def init_test_case(self): self.input_row = 11 @@ -187,6 +186,7 @@ class TestSeqProject(OpTest): class TestSeqProjectCase1(TestSeqProject): + def init_test_case(self): self.input_row = 11 self.context_start = -1 @@ -204,6 +204,7 @@ class TestSeqProjectCase1(TestSeqProject): class TestSeqProjectCase2Len0(TestSeqProject): + def init_test_case(self): self.input_row = 11 self.context_start = -1 @@ -221,6 +222,7 @@ class TestSeqProjectCase2Len0(TestSeqProject): class TestSeqProjectCase3(TestSeqProject): + def init_test_case(self): self.input_row = 25 self.context_start = 2 @@ -241,12 +243,15 @@ class TestSeqProjectCase3(TestSeqProject): class TestSeqConvApi(unittest.TestCase): + def test_api(self): import paddle.fluid as fluid x = fluid.layers.data('x', shape=[32], lod_level=1) - y = fluid.layers.sequence_conv( - input=x, num_filters=2, filter_size=3, padding_start=None) + y = fluid.layers.sequence_conv(input=x, + num_filters=2, + filter_size=3, + padding_start=None) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_enumerate_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_enumerate_op.py index 9878e6f7413..c2832127573 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_enumerate_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_enumerate_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest @@ -39,6 +40,7 @@ def sequence_enumerate(input_seq, in_lod, win_size, pad_value): class TestSequenceEnumerateOp(OpTest): + def setUp(self): self.op_type = "sequence_enumerate" self.init_test_case() @@ -60,6 +62,7 @@ class TestSequenceEnumerateOp(OpTest): class TesSequenceEnumerateOpInt64(TestSequenceEnumerateOp): + def init_test_case(self): self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int64") self.lod = [[9, 4, 11, 6]] @@ -71,6 +74,7 @@ class TesSequenceEnumerateOpInt64(TestSequenceEnumerateOp): class TestSequenceEnumerateOpLargeWinSize(TestSequenceEnumerateOp): + def init_test_case(self): self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") self.lod = [[9, 4, 11, 6]] @@ -82,6 +86,7 @@ class TestSequenceEnumerateOpLargeWinSize(TestSequenceEnumerateOp): class TestSequenceEnumerateOpMaxWinSize(TestSequenceEnumerateOp): + def init_test_case(self): self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") self.lod = [[9, 4, 11, 6]] @@ -93,6 +98,7 @@ class TestSequenceEnumerateOpMaxWinSize(TestSequenceEnumerateOp): class TestSequenceEnumerateOpLargePadValue(TestSequenceEnumerateOp): + def init_test_case(self): self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") self.lod = [[9, 4, 11, 6]] @@ -104,6 +110,7 @@ class TestSequenceEnumerateOpLargePadValue(TestSequenceEnumerateOp): class TestSequenceEnumerateOpLargePadValueSeqLen0(TestSequenceEnumerateOp): + def init_test_case(self): self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") self.lod = [[0, 14, 0, 16, 0]] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_erase_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_erase_op.py index 9e060201fe8..6e9023d03c5 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_erase_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_erase_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest @@ -37,6 +38,7 @@ def sequence_erase(in_seq, lod0, tokens): class TestSequenceEraseOpInt32(OpTest): + def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") @@ -52,6 +54,7 @@ class TestSequenceEraseOpInt32(OpTest): class TestSequenceEraseOpInt32LoD2(OpTest): + def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") @@ -67,6 +70,7 @@ class TestSequenceEraseOpInt32LoD2(OpTest): class TestSequenceEraseOpInt64(OpTest): + def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int64") @@ -82,6 +86,7 @@ class TestSequenceEraseOpInt64(OpTest): class TestSequenceEraseOpInt64SeqLen0(OpTest): + def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int64") @@ -97,6 +102,7 @@ class TestSequenceEraseOpInt64SeqLen0(OpTest): class TestSequenceEraseOpEmpty(OpTest): + def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand.py index b3d877a0cd6..3e75b40baf8 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand.py @@ -17,11 +17,13 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest class TestSequenceExpand(OpTest): + def set_data(self): x_data = np.random.uniform(0.1, 1, [3, 40]).astype('float64') y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float64') @@ -81,6 +83,7 @@ class TestSequenceExpand(OpTest): class TestSequenceExpandCase1(TestSequenceExpand): + def set_data(self): x_data = np.random.uniform(0.1, 1, [5, 20]).astype('float64') y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float64') @@ -90,6 +93,7 @@ class TestSequenceExpandCase1(TestSequenceExpand): class TestSequenceExpandCase2(TestSequenceExpand): + def set_data(self): x_data = np.random.uniform(0.1, 1, [1, 2, 50]).astype('float64') x_lod = [[1]] @@ -100,6 +104,7 @@ class TestSequenceExpandCase2(TestSequenceExpand): class TestSequenceExpandCase3(TestSequenceExpand): + def set_data(self): x_data = np.random.uniform(0.1, 1, [4, 25]).astype('float64') x_lod = [[1, 1, 1, 1]] @@ -109,6 +114,7 @@ class TestSequenceExpandCase3(TestSequenceExpand): class TestSequenceExpandCase4(TestSequenceExpand): + def set_data(self): data = np.random.uniform(0.1, 1, [5 * 20, 1]) x_data = np.array(data).reshape([5, 20]).astype('float64') @@ -119,6 +125,7 @@ class TestSequenceExpandCase4(TestSequenceExpand): class TestSequenceExpandCase5(TestSequenceExpand): + def set_data(self): x_data = np.random.uniform(0.1, 1, [6, 20]).astype('float64') y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float64') @@ -128,6 +135,7 @@ class TestSequenceExpandCase5(TestSequenceExpand): class TestSequenceExpandCase6(TestSequenceExpand): + def set_data(self): x_data = np.random.uniform(0.1, 1, [4, 25]).astype('float64') x_lod = [[1, 1, 0, 1, 1]] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand_as.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand_as.py index 98996e21e1c..2cab179b3c5 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand_as.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_expand_as.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") import paddle.fluid as fluid from op_test import OpTest @@ -24,6 +25,7 @@ from paddle.fluid import Program, program_guard class TestSequenceExpandAs(OpTest): + def setUp(self): self.op_type = 'sequence_expand_as' self.set_data() @@ -60,6 +62,7 @@ class TestSequenceExpandAs(OpTest): class TestSequenceExpandAsCase1(TestSequenceExpandAs): + def set_data(self): x_data = np.random.uniform(0.1, 1, [5, 20]).astype('float64') x_lod = [[2, 3]] @@ -69,6 +72,7 @@ class TestSequenceExpandAsCase1(TestSequenceExpandAs): class TestSequenceExpandAsCase2(TestSequenceExpandAs): + def set_data(self): x_data = np.random.uniform(0.1, 1, [5, 20]).astype('float64') x_lod = [[2, 3]] @@ -78,6 +82,7 @@ class TestSequenceExpandAsCase2(TestSequenceExpandAs): class TestSequenceExpandAsCase3(TestSequenceExpandAs): + def set_data(self): x_data = np.random.uniform(0.1, 1, [1, 2, 50]).astype('float64') x_lod = [[1]] @@ -87,6 +92,7 @@ class TestSequenceExpandAsCase3(TestSequenceExpandAs): class TestSequenceExpandAsOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input x must be Variable diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py index 0e7f9202fde..8d21ad789ea 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py @@ -18,11 +18,13 @@ import numpy as np import copy import unittest import sys + sys.path.append("../") from op_test import OpTest class TestSequenceFirstStepOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -35,12 +37,11 @@ class TestSequenceFirstStepOpError(unittest.TestCase): def test_input_dtype(): # the dtype of input must be int64 - type_data = fluid.layers.data( - name='type_data', - shape=[7, 1], - append_batch_size=False, - dtype='int64', - lod_level=1) + type_data = fluid.layers.data(name='type_data', + shape=[7, 1], + append_batch_size=False, + dtype='int64', + lod_level=1) fluid.layers.sequence_last_step(type_data) self.assertRaises(TypeError, test_input_dtype) diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py index ea3a29a832e..0e8fe66d749 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py @@ -18,11 +18,13 @@ import numpy as np import copy import unittest import sys + sys.path.append("../") from op_test import OpTest class TestSequenceLastStepOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -35,12 +37,11 @@ class TestSequenceLastStepOpError(unittest.TestCase): def test_input_dtype(): # the dtype of input must be int64 - type_data = fluid.layers.data( - name='type_data', - shape=[7, 1], - append_batch_size=False, - dtype='int64', - lod_level=1) + type_data = fluid.layers.data(name='type_data', + shape=[7, 1], + append_batch_size=False, + dtype='int64', + lod_level=1) fluid.layers.sequence_last_step(type_data) self.assertRaises(TypeError, test_input_dtype) diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_mask.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_mask.py index de41235fd37..af733edfb61 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_mask.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_mask.py @@ -19,11 +19,13 @@ import numpy as np import copy import unittest import sys + sys.path.append("../") from op_test import OpTest class SequenceMaskTestBase(OpTest): + def initDefaultParameters(self): self.op_type = 'sequence_mask' self.maxlen = 10 @@ -49,13 +51,13 @@ class SequenceMaskTestBase(OpTest): def calc_ground_truth_mask(self): maxlen = np.max(self.x) if self.maxlen < 0 else self.maxlen shape = self.x.shape + (maxlen, ) - index_broadcast = np.broadcast_to( - np.reshape( - range(maxlen), newshape=[1] * self.x.ndim + [-1]), - shape=shape) - x_broadcast = np.broadcast_to( - np.reshape( - self.x, newshape=self.x.shape + (-1, )), shape=shape) + index_broadcast = np.broadcast_to(np.reshape( + range(maxlen), newshape=[1] * self.x.ndim + [-1]), + shape=shape) + x_broadcast = np.broadcast_to(np.reshape(self.x, + newshape=self.x.shape + + (-1, )), + shape=shape) return (index_broadcast < x_broadcast).astype(self.mask_dtype) def test_check_output(self): @@ -63,36 +65,43 @@ class SequenceMaskTestBase(OpTest): class SequenceMaskTest1(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'bool' class SequenceMaskTest2(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'uint8' class SequenceMaskTest3(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'int32' class SequenceMaskTest4(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'float32' class SequenceMaskTest5(SequenceMaskTestBase): + def initParameters(self): self.mask_dtype = 'float64' class SequenceMaskTest6(SequenceMaskTestBase): + def initParameters(self): self.maxlen = -1 class SequenceMaskTestBase_tensor_attr(OpTest): + def initDefaultParameters(self): self.op_type = 'sequence_mask' self.maxlen = 10 @@ -116,13 +125,13 @@ class SequenceMaskTestBase_tensor_attr(OpTest): def calc_ground_truth_mask(self): maxlen = np.max(self.x) if self.maxlen < 0 else self.maxlen shape = self.x.shape + (maxlen, ) - index_broadcast = np.broadcast_to( - np.reshape( - range(maxlen), newshape=[1] * self.x.ndim + [-1]), - shape=shape) - x_broadcast = np.broadcast_to( - np.reshape( - self.x, newshape=self.x.shape + (-1, )), shape=shape) + index_broadcast = np.broadcast_to(np.reshape( + range(maxlen), newshape=[1] * self.x.ndim + [-1]), + shape=shape) + x_broadcast = np.broadcast_to(np.reshape(self.x, + newshape=self.x.shape + + (-1, )), + shape=shape) return (index_broadcast < x_broadcast).astype(self.mask_dtype) def test_check_output(self): @@ -130,31 +139,37 @@ class SequenceMaskTestBase_tensor_attr(OpTest): class SequenceMaskTest1_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'bool' class SequenceMaskTest2_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'uint8' class SequenceMaskTest3_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'int32' class SequenceMaskTest4_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'float32' class SequenceMaskTest5_tensor_attr(SequenceMaskTestBase_tensor_attr): + def initParameters(self): self.mask_dtype = 'float64' class TestSequenceMaskOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.uniform(1, 5, [4]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py index 7d2ba834de1..934e0ebe8fd 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest @@ -23,6 +24,7 @@ import paddle.fluid.core as core class TestSequencePadOp(OpTest): + def set_attr(self): self.x_shape = [12, 10] self.x_len_lod = [[2, 3, 4, 3]] @@ -53,8 +55,8 @@ class TestSequencePadOp(OpTest): x_data = self.inputs['X'][0] pad_value_data = self.inputs['PadValue'] if pad_value_data.shape == (1, ): - pad_value_data = np.broadcast_to( - pad_value_data, shape=x_data.shape[1:]) + pad_value_data = np.broadcast_to(pad_value_data, + shape=x_data.shape[1:]) padded_sequences = [] start_idx = 0 for l in x_len_lod_0: @@ -84,6 +86,7 @@ class TestSequencePadOp(OpTest): class TestSequencePadOp2(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 10] self.x_len_lod = [[2, 3, 4, 3]] @@ -93,6 +96,7 @@ class TestSequencePadOp2(TestSequencePadOp): class TestSequencePadOp3(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 10] self.x_len_lod = [[2, 3, 4, 3]] @@ -102,6 +106,7 @@ class TestSequencePadOp3(TestSequencePadOp): class TestSequencePadOp4(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 10] self.x_len_lod = [[2, 3, 4, 3]] @@ -111,6 +116,7 @@ class TestSequencePadOp4(TestSequencePadOp): class TestSequencePadOp5(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 2, 5] self.x_len_lod = [[2, 3, 4, 3]] @@ -120,6 +126,7 @@ class TestSequencePadOp5(TestSequencePadOp): class TestSequencePadOp6(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 2, 5] self.x_len_lod = [[2, 3, 4, 3]] @@ -129,6 +136,7 @@ class TestSequencePadOp6(TestSequencePadOp): class TestSequencePadOp7(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 2, 5] self.x_len_lod = [[2, 3, 4, 3]] @@ -138,6 +146,7 @@ class TestSequencePadOp7(TestSequencePadOp): class TestSequencePadOp8(TestSequencePadOp): + def set_attr(self): self.x_shape = [12, 2, 5] self.x_len_lod = [[0, 8, 0, 4, 0]] @@ -147,29 +156,35 @@ class TestSequencePadOp8(TestSequencePadOp): class TestSequencePadOpError(unittest.TestCase): + def test_error(self): + def test_x_variable(): # the input x type must be Variable x = np.random.random((2, 4)).astype("float32") - pad_value = fluid.layers.assign(input=np.array( - [0.0], dtype=np.float32)) + pad_value = fluid.layers.assign( + input=np.array([0.0], dtype=np.float32)) fluid.layers.sequence_pad(x=x, pad_value=pad_value) self.assertRaises(TypeError, test_x_variable) def test_pad_value_variable(): - x1 = fluid.layers.data( - name='x1', shape=[10, 5], dtype='float32', lod_level=1) + x1 = fluid.layers.data(name='x1', + shape=[10, 5], + dtype='float32', + lod_level=1) pad_value1 = np.array([0.0], dtype=np.float32) fluid.layers.sequence_pad(x=x1, pad_value=pad_value1) self.assertRaises(TypeError, test_pad_value_variable) def test_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[10, 5], dtype='int16', lod_level=1) - pad_value2 = fluid.layers.assign(input=np.array( - [0.0], dtype=np.int32)) + x2 = fluid.layers.data(name='x2', + shape=[10, 5], + dtype='int16', + lod_level=1) + pad_value2 = fluid.layers.assign( + input=np.array([0.0], dtype=np.int32)) fluid.layers.sequence_pad(x=x2, pad_value=pad_value2) self.assertRaises(TypeError, test_dtype) diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_pool.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_pool.py index 499955df8f1..eff40454c4e 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_pool.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_pool.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest, skip_check_grad_ci from test_reorder_lod_tensor import convert_to_offset @@ -54,6 +55,7 @@ def compute_seqpool_sqrt(x, offset, out, pad_value=0.0): class TestSeqAvgPool(OpTest): + def set_lod(self): return [[11]] @@ -95,54 +97,62 @@ class TestSeqAvgPool(OpTest): class TestSeqAvgPoolBatch1(TestSeqAvgPool): + def set_lod(self): return [[11]] def set_lod_data(self): lod = self.set_lod() - x, _ = self.get_sequence_batch_size_1_input( - lod=lod, shape=[lod[0][0], 23]) + x, _ = self.get_sequence_batch_size_1_input(lod=lod, + shape=[lod[0][0], 23]) return x class TestSeqAvgPoolInstance0(TestSeqAvgPool): + def set_lod(self): return [[0, 0, 4, 0, 3, 0, 0, 5, 0, 0]] def set_lod_data(self): lod = self.set_lod() - x, _ = self.get_sequence_instance_size_0_input( - lod=lod, shape=[sum(lod[0]), 10]) + x, _ = self.get_sequence_instance_size_0_input(lod=lod, + shape=[sum(lod[0]), 10]) return x class TestSeqAvgPoolLen0(TestSeqAvgPool): + def set_lod(self): return [[0, 4, 0, 7, 0]] class TestSeqAvgPoolLen0LoDLevel2(TestSeqAvgPool): + def set_lod(self): return [[2, 0, 1, 2], [0, 4, 0, 7, 0]] class TestSeqSumPool(TestSeqAvgPool): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.1, 'pooltype': "SUM"} compute_seqpool_sum(x, offset, out, self.attrs["pad_value"]) class TestSeqSumPoolLen0(TestSeqSumPool): + def set_lod(self): return [[0, 4, 0, 7, 0]] class TestSeqSumPoolLen0LoDLevel2(TestSeqSumPool): + def set_lod(self): return [[2, 0, 1, 2], [0, 4, 0, 7, 0]] class TestSeqMaxPool(TestSeqAvgPool): + def set_lod(self): return [[13]] @@ -175,32 +185,38 @@ class TestSeqMaxPool(TestSeqAvgPool): class TestSeqMaxPoolLen0(TestSeqMaxPool): + def set_lod(self): return [[0, 1, 1, 5, 6, 0]] class TestSeqMaxPoolLen0LoDLevel2(TestSeqMaxPool): + def set_lod(self): return [[2, 0, 3, 1], [0, 1, 1, 5, 6, 0]] class TestSeqSqrtPool(TestSeqAvgPool): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.0, 'pooltype': "SQRT"} compute_seqpool_sqrt(x, offset, out, self.attrs["pad_value"]) class TestSeqSqrtPoolLen0(TestSeqSqrtPool): + def set_lod(self): return [[0, 7, 0, 2, 2, 0]] class TestSeqSqrtPoolLen0LoDLevel2(TestSeqSqrtPool): + def set_lod(self): return [[1, 2, 0, 3], [0, 7, 0, 2, 2, 0]] class TestSeqLastPool(TestSeqAvgPool): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.0, 'pooltype': "LAST"} level = len(offset) - 1 @@ -213,16 +229,19 @@ class TestSeqLastPool(TestSeqAvgPool): class TestSeqLastPoolLen0(TestSeqLastPool): + def set_lod(self): return [[0, 3, 4, 0, 4, 0]] class TestSeqLastPoolLen0LoDLevel2(TestSeqLastPool): + def set_lod(self): return [[1, 0, 2, 3], [0, 3, 4, 0, 4, 0]] class TestSeqFirstPool(TestSeqAvgPool): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.3, 'pooltype': "FIRST"} level = len(offset) - 1 @@ -235,16 +254,19 @@ class TestSeqFirstPool(TestSeqAvgPool): class TestSeqFirstPoolLen0(TestSeqFirstPool): + def set_lod(self): return [[0, 2, 0, 3, 6, 0]] class TestSeqFirstPoolLen0LoDLevel2(TestSeqFirstPool): + def set_lod(self): return [[1, 0, 2, 3], [0, 2, 0, 3, 6, 0]] class TestSeqAvgPool2D(TestSeqAvgPool): + def set_lod(self): return [[4, 1, 3, 5]] @@ -273,16 +295,19 @@ class TestSeqAvgPool2D(TestSeqAvgPool): class TestSeqAvgPool2DLen0(TestSeqAvgPool2D): + def set_lod(self): return [[0, 5, 0, 8, 0]] class TestSeqAvgPool2DLen0LoDLevel2(TestSeqAvgPool2D): + def set_lod(self): return [[1, 0, 4], [0, 5, 0, 8, 0]] class TestSeqSumPool2D(TestSeqAvgPool2D): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.2, 'pooltype': "SUM"} level = len(offset) - 1 @@ -296,16 +321,19 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): class TestSeqSumPool2DLen0(TestSeqSumPool2D): + def set_lod(self): return [[0, 8, 0, 5, 0]] class TestSeqSumPool2DLen0LoDLevel2(TestSeqSumPool2D): + def set_lod(self): return [[1, 0, 4], [0, 8, 0, 5, 0]] class TestSeqSqrtPool2D(TestSeqAvgPool2D): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.0, 'pooltype': "SQRT"} level = len(offset) - 1 @@ -326,21 +354,26 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): out = out[0] self.outputs['MaxIndex'] = \ np.zeros(out.shape).astype('int32') - self.check_grad( - ["X"], "Out", max_relative_error=0.06, check_dygraph=False) + self.check_grad(["X"], + "Out", + max_relative_error=0.06, + check_dygraph=False) class TestSeqSqrtPool2DLen0(TestSeqSqrtPool2D): + def set_lod(self): return [[0, 8, 0, 5, 0]] class TestSeqSqrtPool2DLen0LoDLevel2(TestSeqSqrtPool2D): + def set_lod(self): return [[1, 0, 2, 2], [0, 8, 0, 5, 0]] class TestSeqMaxPool2D(TestSeqAvgPool2D): + def set_lod(self): return [[4, 1, 3, 5]] @@ -374,11 +407,13 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D): class TestSeqMaxPool2DLen0(TestSeqMaxPool2D): + def set_lod(self): return [[0, 3, 0, 10, 0]] class TestSeqMaxPool2DLen0LoDLevel2(TestSeqMaxPool2D): + def set_lod(self): return [[1, 0, 2, 2], [0, 3, 0, 10, 0]] @@ -386,6 +421,7 @@ class TestSeqMaxPool2DLen0LoDLevel2(TestSeqMaxPool2D): @skip_check_grad_ci(reason="Grad computation does not apply to Sequence MAX " "Pool executed when is_test is true.") class TestSeqMaxPool2DInference(TestSeqMaxPool2D): + def compute(self, x, offset, out): self.attrs = {"pad_value": 1.0, 'pooltype': "MAX", 'is_test': True} level = len(offset) - 1 @@ -404,16 +440,19 @@ class TestSeqMaxPool2DInference(TestSeqMaxPool2D): class TestSeqMaxPool2DInferenceLen0(TestSeqMaxPool2DInference): + def set_lod(self): return [[0, 3, 0, 10, 0]] class TestSeqMaxPool2DInferenceLen0LoDLevel2(TestSeqMaxPool2DInference): + def set_lod(self): return [[1, 0, 2, 2], [0, 3, 0, 10, 0]] class TestSeqLastPool2D(TestSeqAvgPool2D): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.0, 'pooltype': "LAST"} level = len(offset) - 1 @@ -427,16 +466,19 @@ class TestSeqLastPool2D(TestSeqAvgPool2D): class TestSeqLastPool2DLen0(TestSeqLastPool2D): + def set_lod(self): return [[0, 3, 0, 1, 9, 0]] class TestSeqLastPool2DLen0LoDLevel2(TestSeqLastPool2D): + def set_lod(self): return [[1, 0, 2, 3], [0, 3, 0, 1, 9, 0]] class TestSeqFirstPool2D(TestSeqAvgPool2D): + def compute(self, x, offset, out): self.attrs = {"pad_value": 0.0, 'pooltype': "FIRST"} level = len(offset) - 1 @@ -450,11 +492,13 @@ class TestSeqFirstPool2D(TestSeqAvgPool2D): class TestSeqFirstPool2DLen0(TestSeqFirstPool2D): + def set_lod(self): return [[0, 3, 0, 3, 7, 0]] class TestSeqFirstPool2DLen0LoDLevel2(TestSeqFirstPool2D): + def set_lod(self): return [[1, 0, 2, 3], [0, 3, 0, 3, 7, 0]] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py index 6540c6a0944..7a20f70c2da 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py @@ -18,6 +18,7 @@ import unittest import numpy as np import math import sys + sys.path.append("../") from op_test import OpTest @@ -25,6 +26,7 @@ import paddle.fluid as fluid class TestSequenceReshape(OpTest): + def init_data(self): self.dimension = 12 self.x_lod = [[4, 1, 3, 3]] @@ -58,6 +60,7 @@ class TestSequenceReshape(OpTest): class TestSequenceReshape_reduce(TestSequenceReshape): + def init_data(self): self.dimension = 24 self.x_lod = [[4, 2, 2, 4]] @@ -65,6 +68,7 @@ class TestSequenceReshape_reduce(TestSequenceReshape): class TestSequenceReshape_same(TestSequenceReshape): + def init_data(self): self.dimension = 12 self.x_lod = [[4, 2, 2, 4]] @@ -72,6 +76,7 @@ class TestSequenceReshape_same(TestSequenceReshape): class TestSequenceReshape_reduce_seq_len0(TestSequenceReshape): + def init_data(self): self.dimension = 24 self.x_lod = [[0, 6, 0, 2, 4]] @@ -79,6 +84,7 @@ class TestSequenceReshape_reduce_seq_len0(TestSequenceReshape): class TestSequenceReshape_reduce_seq_len0_case1(TestSequenceReshape): + def init_data(self): self.dimension = 24 self.x_lod = [[0, 2, 8, 2, 0]] @@ -86,7 +92,9 @@ class TestSequenceReshape_reduce_seq_len0_case1(TestSequenceReshape): class TestSequenceReshapeOpError(unittest.TestCase): + def test_error(self): + def test_variable(): x = np.random.random((2, 4)).astype("float32") fluid.layers.sequence_reshape(x=x, new_dim=4) @@ -94,12 +102,11 @@ class TestSequenceReshapeOpError(unittest.TestCase): self.assertRaises(TypeError, test_variable) def test_dtype(): - x1 = fluid.layers.data( - name='x1', - shape=[2, 6], - append_batch_size=False, - dtype='float16', - lod_level=1) + x1 = fluid.layers.data(name='x1', + shape=[2, 6], + append_batch_size=False, + dtype='float16', + lod_level=1) fluid.layers.sequence_reshape(x=x1, new_dim=4) self.assertRaises(TypeError, test_dtype) diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py index 4ffec9737af..8e1b447f92f 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py @@ -17,11 +17,13 @@ import paddle.fluid as fluid import paddle.fluid.core as core import numpy as np import sys + sys.path.append("../") from op_test import OpTest class TestSequenceReverseBase(OpTest): + def initParameters(self): pass @@ -34,8 +36,16 @@ class TestSequenceReverseBase(OpTest): self.x = np.random.random(self.size).astype(self.dtype) self.y = self.get_output() - self.inputs = {'X': (self.x, [self.lod, ]), } - self.outputs = {'Y': (self.y, [self.lod, ]), } + self.inputs = { + 'X': (self.x, [ + self.lod, + ]), + } + self.outputs = { + 'Y': (self.y, [ + self.lod, + ]), + } def get_output(self): tmp_x = np.reshape(self.x, newshape=[self.x.shape[0], -1]) @@ -56,31 +66,37 @@ class TestSequenceReverseBase(OpTest): class TestSequenceReserve1(TestSequenceReverseBase): + def initParameters(self): self.size = (12, 10) self.lod = [4, 5, 3] class TestSequenceReverse2(TestSequenceReverseBase): + def initParameters(self): self.size = (12, 10) self.lod = [12] class TestSequenceReverse3(TestSequenceReverseBase): + def initParameters(self): self.size = (12, 10) self.lod = [3, 0, 6, 3] class TestSequenceReverse4(TestSequenceReverseBase): + def initParameters(self): self.size = (12, 10) self.lod = [0, 2, 10, 0] class TestSequenceReverseOpError(unittest.TestCase): + def test_error(self): + def test_variable(): # the input type must be Variable x_data = np.random.random((2, 4)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_scatter_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_scatter_op.py index 1cc78c85b50..2b8e8261829 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_scatter_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_scatter_op.py @@ -15,11 +15,13 @@ import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest class TestSequenceScatterOp(OpTest): + def init_lod(self): return [[30, 50, 40]] @@ -55,31 +57,37 @@ class TestSequenceScatterOp(OpTest): class TestSequenceScatterOpSeqLen0(TestSequenceScatterOp): + def init_lod(self): return [[60, 60, 00]] class TestSequenceScatterOpSeqLen0Case1(TestSequenceScatterOp): + def init_lod(self): return [[0, 60, 60]] class TestSequenceScatterOpSeqLen0Case2(TestSequenceScatterOp): + def init_lod(self): return [[60, 0, 60]] class TestSequenceScatterOpSeqLen0Case3(TestSequenceScatterOp): + def init_lod(self): return [[120, 0, 0]] class TestSequenceScatterOpSeqLen0Case4(TestSequenceScatterOp): + def init_lod(self): return [[0, 120, 0]] class TestSequenceScatterOpSeqLen0Case5(TestSequenceScatterOp): + def init_lod(self): return [[0, 0, 120]] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_slice_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_slice_op.py index 4d254ea6d4f..b961bdc4e85 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_slice_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_slice_op.py @@ -17,11 +17,13 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("../") from op_test import OpTest class TestSequenceSliceOp(OpTest): + def set_data(self): self.init_test_case() # only supprot one level LoD @@ -61,6 +63,7 @@ class TestSequenceSliceOp(OpTest): class TestSequenceSliceOpSeqlen0Case0(TestSequenceSliceOp): + def init_test_case(self): self.x_dim = (100, 3, 2) self.x_lod = [[20, 30, 0, 30, 20]] @@ -69,6 +72,7 @@ class TestSequenceSliceOpSeqlen0Case0(TestSequenceSliceOp): class TestSequenceSliceOpSeqlen0Case1(TestSequenceSliceOp): + def init_test_case(self): self.x_dim = (100, 3, 2) self.x_lod = [[0, 70, 0, 30, 0]] @@ -77,6 +81,7 @@ class TestSequenceSliceOpSeqlen0Case1(TestSequenceSliceOp): class TestSequenceSliceOpSeqlen0Case2(TestSequenceSliceOp): + def init_test_case(self): self.x_dim = (100, 3, 2) self.x_lod = [[0, 100, 0, 0, 0]] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py index cb92a68bde6..db07a0eaa2a 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_softmax_op.py @@ -18,12 +18,14 @@ import unittest import numpy as np import paddle.fluid.core as core import sys + sys.path.append("../") from op_test import OpTest from test_softmax_op import stable_softmax class TestSequenceSoftmaxOp(OpTest): + def setUp(self): self.op_type = "sequence_softmax" self.use_cudnn = False @@ -45,7 +47,9 @@ class TestSequenceSoftmaxOp(OpTest): self.inputs = {"X": (x, self.lod)} self.outputs = {"Out": out} - self.attrs = {'use_cudnn': self.use_cudnn, } + self.attrs = { + 'use_cudnn': self.use_cudnn, + } def init_lod(self): self.lod = [[40, 10, 30, 30]] @@ -72,21 +76,25 @@ class TestSequenceSoftmaxOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSequenceSoftmaxCUDNNOp(TestSequenceSoftmaxOp): + def init_op_type(self): self.use_cudnn = True class TestSequenceSoftmaxOpSeqLen0Case0(TestSequenceSoftmaxOp): + def init_lod(self): self.lod = [[40, 0, 40, 30]] class TestSequenceSoftmaxOpSeqLen0Case1(TestSequenceSoftmaxOp): + def init_lod(self): self.lod = [[0, 40, 70, 0]] class TestSequenceSoftmaxOpSeqLen0Case2(TestSequenceSoftmaxOp): + def init_lod(self): self.lod = [[0, 0, 0, 110]] diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py index fe9aa5ad025..55a0c8d0bc3 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py @@ -18,11 +18,13 @@ import unittest import numpy as np from copy import deepcopy import sys + sys.path.append("../") from op_test import OpTest class TestSequenceTopkAvgPoolingOp(OpTest): + def setUp(self): self.init_op_type() self.set_data() @@ -72,8 +74,9 @@ class TestSequenceTopkAvgPoolingOp(OpTest): for k_idx in range(len(topks)): for k in range(topks[k_idx]): if pos_data[pos_idx + k] != -1: - gradient[in_idx + pos_data[ - pos_idx + k]] += dout_val / topks[k_idx] + gradient[in_idx + + pos_data[pos_idx + + k]] += dout_val / topks[k_idx] in_offset += row_size * col_size pos_offset += row_size * max_k return gradient @@ -109,10 +112,10 @@ class TestSequenceTopkAvgPoolingOp(OpTest): offset += col_lod[0][idx] - out_tmp = out_tmp.reshape([channel_num, -1, len(topks)]).transpose( - 1, 0, 2) - pos_tmp = pos_tmp.reshape([channel_num, -1, max_k]).transpose(1, 0, - 2) + out_tmp = out_tmp.reshape([channel_num, -1, + len(topks)]).transpose(1, 0, 2) + pos_tmp = pos_tmp.reshape([channel_num, -1, + max_k]).transpose(1, 0, 2) out = np.vstack( (out, out_tmp.reshape([-1, len(topks) * channel_num]))) pos = np.hstack((pos, pos_tmp.flatten())) @@ -148,6 +151,7 @@ class TestSequenceTopkAvgPoolingOp(OpTest): class TestSequenceTopkAvgPoolingOpCase1(TestSequenceTopkAvgPoolingOp): + def set_data(self): topks = [2, 3] channel_num = 5 @@ -161,8 +165,11 @@ class TestSequenceTopkAvgPoolingOpCase1(TestSequenceTopkAvgPoolingOp): x = fluid.layers.data(name='x', shape=[1], lod_level=1) row = fluid.layers.data(name='row', shape=[10], lod_level=1) col = fluid.layers.data(name='col', shape=[10], lod_level=1) - topk_avg = fluid.contrib.sequence_topk_avg_pooling( - input=x, row=row, col=col, topks=[1, 3, 5], channel_num=5) + topk_avg = fluid.contrib.sequence_topk_avg_pooling(input=x, + row=row, + col=col, + topks=[1, 3, 5], + channel_num=5) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( @@ -174,12 +181,13 @@ class TestSequenceTopkAvgPoolingOpCase1(TestSequenceTopkAvgPoolingOp): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run( - feed={'x': x_tensor, - 'row': row_tensor, - 'col': col_tensor}, - fetch_list=[topk_avg], - return_numpy=False) + ret = exe.run(feed={ + 'x': x_tensor, + 'row': row_tensor, + 'col': col_tensor + }, + fetch_list=[topk_avg], + return_numpy=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_unpad_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_unpad_op.py index 1d212296227..ab60fbfde33 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_unpad_op.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_unpad_op.py @@ -16,6 +16,7 @@ import unittest import six import numpy as np import sys + sys.path.append("../") from op_test import OpTest @@ -23,6 +24,7 @@ import paddle.fluid as fluid class TestSequenceUnpadOp(OpTest): + def init(self): self.length = [2, 3, 4] self.x_shape = (3, 40) @@ -59,6 +61,7 @@ class TestSequenceUnpadOp(OpTest): class TestSequenceUnpadOp2(TestSequenceUnpadOp): + def init(self): self.length = [2, 3, 4] self.x_shape = (3, 5, 4, 3) @@ -66,6 +69,7 @@ class TestSequenceUnpadOp2(TestSequenceUnpadOp): class TestSequenceUnpadOp3(TestSequenceUnpadOp): + def init(self): self.length = [5, 2, 3, 4] self.x_shape = (4, 5, 3, 3, 6) @@ -73,6 +77,7 @@ class TestSequenceUnpadOp3(TestSequenceUnpadOp): class TestSequenceUnpadOp4(TestSequenceUnpadOp): + def init(self): self.length = [5, 0, 0, 4] self.x_shape = (4, 5, 3, 3, 6) @@ -80,6 +85,7 @@ class TestSequenceUnpadOp4(TestSequenceUnpadOp): class TestSequenceUnpadOp5(TestSequenceUnpadOp): + def init(self): self.length = [0, 4, 3, 0] self.x_shape = (4, 5, 3, 3, 6) @@ -87,7 +93,9 @@ class TestSequenceUnpadOp5(TestSequenceUnpadOp): class TestSequenceUnpadOpError(unittest.TestCase): + def test_error(self): + def test_x_variable(): x = np.random.random((10, 5)).astype("float64") len = fluid.data(name='length2', shape=[10], dtype='int64') diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py index 1f02562dcb4..b014a079b80 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_net.py +++ b/python/paddle/fluid/tests/unittests/seresnext_net.py @@ -14,6 +14,7 @@ from __future__ import print_function import paddle.fluid as fluid + fluid.core._set_eager_deletion_mode(-1, -1, False) import paddle.fluid.layers.ops as ops @@ -22,6 +23,7 @@ from simple_nets import init_data from seresnext_test_base import DeviceType import math import os + os.environ['CPU_NUM'] = str(4) os.environ['FLAGS_cudnn_deterministic'] = str(1) @@ -48,8 +50,8 @@ def squeeze_excitation(input, num_channels, reduction_ratio): # input=input, pool_size=0, pool_type='avg', global_pooling=True) conv = input shape = conv.shape - reshape = fluid.layers.reshape( - x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) + reshape = fluid.layers.reshape(x=conv, + shape=[-1, shape[1], shape[2] * shape[3]]) pool = fluid.layers.reduce_mean(input=reshape, dim=2) squeeze = fluid.layers.fc(input=pool, @@ -62,18 +64,21 @@ def squeeze_excitation(input, num_channels, reduction_ratio): return scale -def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, +def conv_bn_layer(input, + num_filters, + filter_size, + stride=1, + groups=1, act=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - use_cudnn=(not remove_cudnn_conv), - bias_attr=False) + conv = fluid.layers.conv2d(input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + use_cudnn=(not remove_cudnn_conv), + bias_attr=False) return conv if remove_bn else fluid.layers.batch_norm( input=conv, act=act, momentum=0.1) @@ -93,21 +98,23 @@ def shortcut(input, ch_out, stride): def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): # The number of first 1x1 convolutional channels for each bottleneck build block # was halved to reduce the compution cost. - conv0 = conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = conv_bn_layer( - input=conv0, - num_filters=num_filters * 2, - filter_size=3, - stride=stride, - groups=cardinality, - act='relu') - conv2 = conv_bn_layer( - input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) - scale = squeeze_excitation( - input=conv2, - num_channels=num_filters * 2, - reduction_ratio=reduction_ratio) + conv0 = conv_bn_layer(input=input, + num_filters=num_filters, + filter_size=1, + act='relu') + conv1 = conv_bn_layer(input=conv0, + num_filters=num_filters * 2, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = conv_bn_layer(input=conv1, + num_filters=num_filters * 2, + filter_size=1, + act=None) + scale = squeeze_excitation(input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) short = shortcut(input, num_filters * 2, stride) @@ -122,14 +129,26 @@ def SE_ResNeXt50Small(use_feed): img = fluid.layers.data(name='image', shape=img_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - conv = conv_bn_layer( - input=img, num_filters=16, filter_size=3, stride=2, act='relu') - conv = conv_bn_layer( - input=conv, num_filters=16, filter_size=3, stride=1, act='relu') - conv = conv_bn_layer( - input=conv, num_filters=16, filter_size=3, stride=1, act='relu') - conv = fluid.layers.pool2d( - input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + conv = conv_bn_layer(input=img, + num_filters=16, + filter_size=3, + stride=2, + act='relu') + conv = conv_bn_layer(input=conv, + num_filters=16, + filter_size=3, + stride=1, + act='relu') + conv = conv_bn_layer(input=conv, + num_filters=16, + filter_size=3, + stride=1, + act='relu') + conv = fluid.layers.pool2d(input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') cardinality = 32 reduction_ratio = 16 @@ -138,16 +157,15 @@ def SE_ResNeXt50Small(use_feed): for block in range(len(depth)): for i in range(depth[block]): - conv = bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio) + conv = bottleneck_block(input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio) shape = conv.shape - reshape = fluid.layers.reshape( - x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) + reshape = fluid.layers.reshape(x=conv, + shape=[-1, shape[1], shape[2] * shape[3]]) pool = fluid.layers.reduce_mean(input=reshape, dim=2) dropout = pool if remove_dropout else fluid.layers.dropout( x=pool, dropout_prob=0.2, seed=1) @@ -160,8 +178,9 @@ def SE_ResNeXt50Small(use_feed): def optimizer(learning_rate=0.01): optimizer = fluid.optimizer.Momentum( - learning_rate=cosine_decay( - learning_rate=learning_rate, step_each_epoch=2, epochs=1), + learning_rate=cosine_decay(learning_rate=learning_rate, + step_each_epoch=2, + epochs=1), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) return optimizer @@ -187,10 +206,9 @@ gpu_img, gpu_label = init_data( batch_size=batch_size(use_device=DeviceType.CUDA), img_shape=img_shape, label_range=999) -cpu_img, cpu_label = init_data( - batch_size=batch_size(use_device=DeviceType.CPU), - img_shape=img_shape, - label_range=999) +cpu_img, cpu_label = init_data(batch_size=batch_size(use_device=DeviceType.CPU), + img_shape=img_shape, + label_range=999) feed_dict_gpu = {"image": gpu_img, "label": gpu_label} feed_dict_cpu = {"image": cpu_img, "label": cpu_label} diff --git a/python/paddle/fluid/tests/unittests/seresnext_test_base.py b/python/paddle/fluid/tests/unittests/seresnext_test_base.py index bf33adcf486..f9113520131 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_test_base.py +++ b/python/paddle/fluid/tests/unittests/seresnext_test_base.py @@ -21,6 +21,7 @@ import numpy as np class TestResnetBase(TestParallelExecutorBase): + def _compare_result_with_origin_model(self, check_func, use_device, @@ -51,7 +52,9 @@ class TestResnetBase(TestParallelExecutorBase): for loss in zip(func_1_last_loss, func_2_last_loss): self.assertAlmostEquals(loss[0], loss[1], delta=delta2) else: - self.assertAlmostEquals( - np.mean(func_1_first_loss), func_2_first_loss[0], delta=1e-5) - self.assertAlmostEquals( - np.mean(func_1_last_loss), func_2_last_loss[0], delta=delta2) + self.assertAlmostEquals(np.mean(func_1_first_loss), + func_2_first_loss[0], + delta=1e-5) + self.assertAlmostEquals(np.mean(func_1_last_loss), + func_2_last_loss[0], + delta=delta2) diff --git a/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py b/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py index 737677ccf90..62cfa5453d4 100644 --- a/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py +++ b/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py @@ -29,5 +29,6 @@ logger.setLevel(logging.INFO) class DatasetSimnetReader(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): pass diff --git a/python/paddle/fluid/tests/unittests/simple_nets.py b/python/paddle/fluid/tests/unittests/simple_nets.py index 7f22df67d1b..b9e38d21da8 100644 --- a/python/paddle/fluid/tests/unittests/simple_nets.py +++ b/python/paddle/fluid/tests/unittests/simple_nets.py @@ -23,8 +23,8 @@ def simple_fc_net_with_inputs(img, label, class_num=10): hidden, size=100, act='relu', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) prediction = fluid.layers.fc(hidden, size=class_num, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) @@ -44,8 +44,8 @@ def batchnorm_fc_with_inputs(img, label, class_num=10): hidden, size=200, act='relu', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) hidden = fluid.layers.batch_norm(input=hidden) @@ -73,11 +73,14 @@ def bow_net(use_feed, This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") - emb = fluid.layers.embedding( - input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, + is_sparse=is_sparse, + size=[dict_dim, emb_dim]) bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") diff --git a/python/paddle/fluid/tests/unittests/spawn_runner_base.py b/python/paddle/fluid/tests/unittests/spawn_runner_base.py index 11f8cd559d1..42d8f50ea3f 100644 --- a/python/paddle/fluid/tests/unittests/spawn_runner_base.py +++ b/python/paddle/fluid/tests/unittests/spawn_runner_base.py @@ -32,6 +32,7 @@ class SpawnAssistTestArgs(object): class TestDistSpawnRunner(unittest.TestCase): + def setUp(self): # NOTE(chenweihang): keep consistent with # TestDistBase.check_with_place @@ -43,11 +44,10 @@ class TestDistSpawnRunner(unittest.TestCase): def _run_parallel(self, model, args): args.update_method = "nccl2" - context = paddle.distributed.spawn( - func=model.run_trainer_with_spawn, - args=(args, ), - nprocs=self.nprocs, - join=True) + context = paddle.distributed.spawn(func=model.run_trainer_with_spawn, + args=(args, ), + nprocs=self.nprocs, + join=True) result_list = [] for res_queue in context.return_queues: result_list.append(res_queue.get()) @@ -55,10 +55,10 @@ class TestDistSpawnRunner(unittest.TestCase): def check_dist_result_with_spawn(self, test_class, delta=1e-3): with _test_eager_guard(): - self.check_dist_result_with_spawn_func( - test_class=test_class, delta=delta) - self.check_dist_result_with_spawn_func( - test_class=test_class, delta=delta) + self.check_dist_result_with_spawn_func(test_class=test_class, + delta=delta) + self.check_dist_result_with_spawn_func(test_class=test_class, + delta=delta) def check_dist_result_with_spawn_func(self, test_class, delta=1e-3): # 0. prepare model and args @@ -85,6 +85,7 @@ class TestDistSpawnRunner(unittest.TestCase): loss, dist_loss, delta=delta, - msg="The results of single-card execution and multi-card execution are inconsistent." + msg= + "The results of single-card execution and multi-card execution are inconsistent." "signal-card loss is:\n{}\nmulti-card average loss is:\n{}\n". format(loss, dist_loss)) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_by_col.py b/python/paddle/fluid/tests/unittests/static_model_parallel_by_col.py index 6596eca4d39..e1f8185e704 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_by_col.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_by_col.py @@ -61,14 +61,13 @@ def create_model(data, rank): np_bias_part = np_bias[start_col:start_col + OUT_SIZE // 2] weight_attr, bias_attr = get_param_attr(np_weight_part, np_bias_part) - result = paddle.distributed.split( - data, - size=(IN_SIZE, OUT_SIZE), - operation='linear', - axis=1, - num_partitions=MODEL_PARALLEL_SIZE, - weight_attr=weight_attr, - bias_attr=bias_attr) + result = paddle.distributed.split(data, + size=(IN_SIZE, OUT_SIZE), + operation='linear', + axis=1, + num_partitions=MODEL_PARALLEL_SIZE, + weight_attr=weight_attr, + bias_attr=bias_attr) else: weight_attr, bias_attr = get_param_attr(np_weight, np_bias) result = fluid.layers.fc(data, @@ -81,10 +80,12 @@ def create_model(data, rank): class TestModelParallel(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data - data_in = fluid.data( - name='data_in', shape=[batch_size, IN_SIZE], dtype=DTYPE) + data_in = fluid.data(name='data_in', + shape=[batch_size, IN_SIZE], + dtype=DTYPE) if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( @@ -104,8 +105,8 @@ class TestModelParallel(TestDistRunnerBase): opt = fluid.optimizer.SGD(0.1) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_by_row.py b/python/paddle/fluid/tests/unittests/static_model_parallel_by_row.py index fd886e16ced..26ed65ce5fa 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_by_row.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_by_row.py @@ -60,14 +60,13 @@ def create_model(data, rank): np_weight_part = np_weight[start_row:start_row + IN_SIZE // 2, :] weight_attr, bias_attr = get_param_attr(np_weight_part, np_bias) - result = paddle.distributed.split( - data, - size=(IN_SIZE, OUT_SIZE), - operation='linear', - axis=0, - num_partitions=MODEL_PARALLEL_SIZE, - weight_attr=weight_attr, - bias_attr=bias_attr) + result = paddle.distributed.split(data, + size=(IN_SIZE, OUT_SIZE), + operation='linear', + axis=0, + num_partitions=MODEL_PARALLEL_SIZE, + weight_attr=weight_attr, + bias_attr=bias_attr) else: weight_attr, bias_attr = get_param_attr(np_weight, np_bias) result = fluid.layers.fc( @@ -82,10 +81,12 @@ def create_model(data, rank): class TestModelParallel(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data - data_in = fluid.data( - name='data_in', shape=[batch_size, IN_SIZE], dtype=DTYPE) + data_in = fluid.data(name='data_in', + shape=[batch_size, IN_SIZE], + dtype=DTYPE) if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( @@ -105,8 +106,8 @@ class TestModelParallel(TestDistRunnerBase): opt = fluid.optimizer.SGD(0.1) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_embedding.py b/python/paddle/fluid/tests/unittests/static_model_parallel_embedding.py index 4a98792f8a0..d72e61940f8 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_embedding.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_embedding.py @@ -55,27 +55,30 @@ def create_model(data, rank): operation='linear', axis=0, num_partitions=MODEL_PARALLEL_SIZE, - weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - np_weight_part)), - bias_attr=False, ) + weight_attr=paddle.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(np_weight_part)), + bias_attr=False, + ) else: result = fluid.layers.fc( data, size=OUT_SIZE, param_attr=paddle.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer(np_weight)), - bias_attr=False, ) + bias_attr=False, + ) predict = paddle.sum(result) return predict class TestModelParallel(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data - data_in = fluid.data( - name='data_in', shape=[batch_size, IN_SIZE], dtype=DTYPE) + data_in = fluid.data(name='data_in', + shape=[batch_size, IN_SIZE], + dtype=DTYPE) if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( @@ -95,8 +98,8 @@ class TestModelParallel(TestDistRunnerBase): opt = fluid.optimizer.SGD(0.1) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py index 4dc3fe6eab6..908af43e008 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py @@ -45,6 +45,7 @@ def _set_var_distributed(var): class ParallelFusedMultiHeadAttention(Layer): + def __init__(self, embed_dim, num_heads, @@ -106,11 +107,10 @@ class ParallelFusedMultiHeadAttention(Layer): attr=linear_weight_attr, dtype=self._dtype, is_bias=False) - self.linear_bias = self.create_parameter( - shape=[embed_dim], - attr=linear_bias_attr, - dtype=self._dtype, - is_bias=True) + self.linear_bias = self.create_parameter(shape=[embed_dim], + attr=linear_bias_attr, + dtype=self._dtype, + is_bias=True) # tensor model parallel if nranks > 1: @@ -126,8 +126,9 @@ class ParallelFusedMultiHeadAttention(Layer): attr=pre_ln_scale_attr, shape=[embed_dim], default_initializer=Constant(value=1.0)) - self.pre_ln_bias = self.create_parameter( - attr=pre_ln_bias_attr, shape=[embed_dim], is_bias=True) + self.pre_ln_bias = self.create_parameter(attr=pre_ln_bias_attr, + shape=[embed_dim], + is_bias=True) self.ln_scale = None self.ln_bias = None else: @@ -137,8 +138,9 @@ class ParallelFusedMultiHeadAttention(Layer): attr=ln_scale_attr, shape=[embed_dim], default_initializer=Constant(value=1.0)) - self.ln_bias = self.create_parameter( - attr=ln_bias_attr, shape=[embed_dim], is_bias=True) + self.ln_bias = self.create_parameter(attr=ln_bias_attr, + shape=[embed_dim], + is_bias=True) self.dropout_rate = dropout_rate self.attn_dropout_rate = attn_dropout_rate @@ -187,11 +189,11 @@ def create_model(data, rank): np.random.seed(2021) pre_ln_w = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) pre_ln_b = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) - qkv_w = np.random.uniform( - -1, 1, size=(3, n_head, d_key, hidden)).astype(DTYPE) + qkv_w = np.random.uniform(-1, 1, + size=(3, n_head, d_key, hidden)).astype(DTYPE) qkv_b = np.random.uniform(-1, 1, size=(3, n_head, d_key)).astype(DTYPE) - linear_w = np.random.uniform( - -1, 1, size=(n_head * d_key, hidden)).astype(DTYPE) + linear_w = np.random.uniform(-1, 1, + size=(n_head * d_key, hidden)).astype(DTYPE) linear_b = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) data.stop_gradient = False @@ -206,42 +208,40 @@ def create_model(data, rank): qkv_w_attr, qkv_b_attr = get_param_attr(col_qkv_w, col_qkv_b) linear_w_attr, linear_b_attr = get_param_attr(row_linear_w, linear_b) - attn = ParallelFusedMultiHeadAttention( - hidden, - n_head, - dropout_rate=0.0, - attn_dropout_rate=0.0, - normalize_before=False, - qkv_weight_attr=qkv_w_attr, - qkv_bias_attr=qkv_b_attr, - linear_weight_attr=linear_w_attr, - linear_bias_attr=linear_b_attr, - pre_ln_scale_attr=pre_ln_w_attr, - pre_ln_bias_attr=pre_ln_b_attr, - ln_scale_attr=pre_ln_w_attr, - ln_bias_attr=pre_ln_b_attr, - nranks=MODEL_PARALLEL_SIZE, - ring_id=0) + attn = ParallelFusedMultiHeadAttention(hidden, + n_head, + dropout_rate=0.0, + attn_dropout_rate=0.0, + normalize_before=False, + qkv_weight_attr=qkv_w_attr, + qkv_bias_attr=qkv_b_attr, + linear_weight_attr=linear_w_attr, + linear_bias_attr=linear_b_attr, + pre_ln_scale_attr=pre_ln_w_attr, + pre_ln_bias_attr=pre_ln_b_attr, + ln_scale_attr=pre_ln_w_attr, + ln_bias_attr=pre_ln_b_attr, + nranks=MODEL_PARALLEL_SIZE, + ring_id=0) result = attn(data) else: pre_ln_w_attr, pre_ln_b_attr = get_param_attr(pre_ln_w, pre_ln_b) qkv_w_attr, qkv_b_attr = get_param_attr(qkv_w, qkv_b) linear_w_attr, linear_b_attr = get_param_attr(linear_w, linear_b) - attn = ParallelFusedMultiHeadAttention( - hidden, - n_head, - dropout_rate=0.0, - attn_dropout_rate=0.0, - normalize_before=False, - qkv_weight_attr=qkv_w_attr, - qkv_bias_attr=qkv_b_attr, - linear_weight_attr=linear_w_attr, - linear_bias_attr=linear_b_attr, - pre_ln_scale_attr=pre_ln_w_attr, - pre_ln_bias_attr=pre_ln_b_attr, - ln_scale_attr=pre_ln_w_attr, - ln_bias_attr=pre_ln_b_attr) + attn = ParallelFusedMultiHeadAttention(hidden, + n_head, + dropout_rate=0.0, + attn_dropout_rate=0.0, + normalize_before=False, + qkv_weight_attr=qkv_w_attr, + qkv_bias_attr=qkv_b_attr, + linear_weight_attr=linear_w_attr, + linear_bias_attr=linear_b_attr, + pre_ln_scale_attr=pre_ln_w_attr, + pre_ln_bias_attr=pre_ln_b_attr, + ln_scale_attr=pre_ln_w_attr, + ln_bias_attr=pre_ln_b_attr) result = attn(data) predict = paddle.sum(result) @@ -249,11 +249,13 @@ def create_model(data, rank): class TestModelParallel(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data seq_len = 2 - data_in = fluid.data( - name='data_in', shape=[batch_size, seq_len, hidden], dtype=DTYPE) + data_in = fluid.data(name='data_in', + shape=[batch_size, seq_len, hidden], + dtype=DTYPE) if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( @@ -273,8 +275,8 @@ class TestModelParallel(TestDistRunnerBase): opt = fluid.optimizer.SGD(0.1) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py index ad570fc0acf..a5af3cd877c 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py @@ -56,7 +56,8 @@ def fused_feedforward(x, seed = None if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( - "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" + ) mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer helper = LayerHelper("fused_feedforward") @@ -71,69 +72,68 @@ def fused_feedforward(x, 'uint8', stop_gradient=True) dropout2_mask = helper.create_variable_for_type_inference( 'uint8', stop_gradient=True) - ln1_mean = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln1_variance = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln2_mean = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln2_variance = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - linear1_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln1_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - dropout1_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - dropout2_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) + ln1_mean = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln1_variance = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln2_mean = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln2_variance = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + linear1_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln1_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + dropout1_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + dropout2_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) if (seed is None or seed == 0) and helper.main_program.random_seed != 0: seed = helper.main_program.random_seed - helper.append_op( - type='fused_feedforward', - inputs={ - 'X': x, - 'Linear1Weight': linear1_weight, - 'Linear1Bias': linear1_bias, - 'Linear2Weight': linear2_weight, - 'Linear2Bias': linear2_bias, - 'Ln1Scale': ln1_scale, - 'Ln1Bias': ln1_bias, - 'Ln2Scale': ln2_scale, - 'Ln2Bias': ln2_bias, - }, - outputs={ - 'Out': out, - 'Dropout1Mask': dropout1_mask, - 'Dropout2Mask': dropout2_mask, - 'Ln1Mean': ln1_mean, - 'Ln1Variance': ln1_variance, - 'Ln2Mean': ln2_mean, - 'Ln2Variance': ln2_variance, - 'Linear1Out': linear1_out, - 'Ln1Out': ln1_out, - 'Dropout1Out': dropout1_out, - 'Dropout2Out': dropout2_out, - }, - attrs={ - 'dropout1_rate': dropout1_rate, - 'dropout2_rate': dropout2_rate, - 'act_method': activation, - 'pre_layer_norm': pre_layer_norm, - 'ln1_epsilon': ln1_epsilon, - 'ln2_epsilon': ln2_epsilon, - 'dropout1_is_test': not training, - 'dropout2_is_test': not training, - 'dropout1_fix_seed': seed is not None, - 'dropout2_fix_seed': seed is not None, - 'dropout1_seed': seed if seed is not None else 0, - 'dropout2_seed': seed if seed is not None else 0, - 'dropout1_implementation': mode, - 'dropout2_implementation': mode, - 'ring_id': ring_id, - }) + helper.append_op(type='fused_feedforward', + inputs={ + 'X': x, + 'Linear1Weight': linear1_weight, + 'Linear1Bias': linear1_bias, + 'Linear2Weight': linear2_weight, + 'Linear2Bias': linear2_bias, + 'Ln1Scale': ln1_scale, + 'Ln1Bias': ln1_bias, + 'Ln2Scale': ln2_scale, + 'Ln2Bias': ln2_bias, + }, + outputs={ + 'Out': out, + 'Dropout1Mask': dropout1_mask, + 'Dropout2Mask': dropout2_mask, + 'Ln1Mean': ln1_mean, + 'Ln1Variance': ln1_variance, + 'Ln2Mean': ln2_mean, + 'Ln2Variance': ln2_variance, + 'Linear1Out': linear1_out, + 'Ln1Out': ln1_out, + 'Dropout1Out': dropout1_out, + 'Dropout2Out': dropout2_out, + }, + attrs={ + 'dropout1_rate': dropout1_rate, + 'dropout2_rate': dropout2_rate, + 'act_method': activation, + 'pre_layer_norm': pre_layer_norm, + 'ln1_epsilon': ln1_epsilon, + 'ln2_epsilon': ln2_epsilon, + 'dropout1_is_test': not training, + 'dropout2_is_test': not training, + 'dropout1_fix_seed': seed is not None, + 'dropout2_fix_seed': seed is not None, + 'dropout1_seed': seed if seed is not None else 0, + 'dropout2_seed': seed if seed is not None else 0, + 'dropout1_implementation': mode, + 'dropout2_implementation': mode, + 'ring_id': ring_id, + }) return out @@ -151,6 +151,7 @@ def _set_var_distributed(var): class ParallelFusedFeedForward(Layer): + def __init__(self, d_model, dim_feedforward, @@ -196,11 +197,10 @@ class ParallelFusedFeedForward(Layer): attr=linear1_weight_attr, dtype=self._dtype, is_bias=False) - self._linear1_bias = self.create_parameter( - shape=[dim_feedforward], - attr=linear1_bias_attr, - dtype=self._dtype, - is_bias=True) + self._linear1_bias = self.create_parameter(shape=[dim_feedforward], + attr=linear1_bias_attr, + dtype=self._dtype, + is_bias=True) self._linear2_weight = self.create_parameter( shape=[dim_feedforward, d_model], @@ -208,11 +208,10 @@ class ParallelFusedFeedForward(Layer): dtype=self._dtype, is_bias=False) - self._linear2_bias = self.create_parameter( - shape=[d_model], - attr=linear2_bias_attr, - dtype=self._dtype, - is_bias=True) + self._linear2_bias = self.create_parameter(shape=[d_model], + attr=linear2_bias_attr, + dtype=self._dtype, + is_bias=True) if nranks > 1: assert ring_id != -1 @@ -227,8 +226,9 @@ class ParallelFusedFeedForward(Layer): attr=ln1_scale_attr, is_bias=False, default_initializer=Constant(1.0)) - self._ln1_bias = self.create_parameter( - shape=[d_model], attr=ln1_bias_attr, is_bias=True) + self._ln1_bias = self.create_parameter(shape=[d_model], + attr=ln1_bias_attr, + is_bias=True) self._ln2_scale = None self._ln2_bias = None else: @@ -239,31 +239,31 @@ class ParallelFusedFeedForward(Layer): attr=ln2_scale_attr, is_bias=False, default_initializer=Constant(1.0)) - self._ln2_bias = self.create_parameter( - shape=[d_model], attr=ln2_bias_attr, is_bias=True) + self._ln2_bias = self.create_parameter(shape=[d_model], + attr=ln2_bias_attr, + is_bias=True) self.name = name def forward(self, src, cache=None): - out = fused_feedforward( - src, - self._linear1_weight, - self._linear2_weight, - self._linear1_bias, - self._linear2_bias, - self._ln1_scale, - self._ln1_bias, - self._ln2_scale, - self._ln2_bias, - dropout1_rate=self._act_dropout_rate, - dropout2_rate=self._dropout_rate, - activation=self._act_method, - ln1_epsilon=self._epsilon, - ln2_epsilon=self._epsilon, - pre_layer_norm=self._normalize_before, - training=self.training, - ring_id=self._ring_id, - name=self.name) + out = fused_feedforward(src, + self._linear1_weight, + self._linear2_weight, + self._linear1_bias, + self._linear2_bias, + self._ln1_scale, + self._ln1_bias, + self._ln2_scale, + self._ln2_bias, + dropout1_rate=self._act_dropout_rate, + dropout2_rate=self._dropout_rate, + activation=self._act_method, + ln1_epsilon=self._epsilon, + ln2_epsilon=self._epsilon, + pre_layer_norm=self._normalize_before, + training=self.training, + ring_id=self._ring_id, + name=self.name) return out @@ -295,20 +295,19 @@ def create_model(data, rank): w0_attr, b0_attr = get_param_attr(col_w0, col_b0) w1_attr, b1_attr = get_param_attr(row_w1, b1) - ffn = ParallelFusedFeedForward( - IN_SIZE, - OUT_SIZE, - dropout_rate=0.0, - activation='gelu', - normalize_before=True, - linear1_weight_attr=w0_attr, - linear1_bias_attr=b0_attr, - linear2_weight_attr=w1_attr, - linear2_bias_attr=b1_attr, - ln1_scale_attr=ln_w_attr, - ln1_bias_attr=ln_b_attr, - nranks=MODEL_PARALLEL_SIZE, - ring_id=0) + ffn = ParallelFusedFeedForward(IN_SIZE, + OUT_SIZE, + dropout_rate=0.0, + activation='gelu', + normalize_before=True, + linear1_weight_attr=w0_attr, + linear1_bias_attr=b0_attr, + linear2_weight_attr=w1_attr, + linear2_bias_attr=b1_attr, + ln1_scale_attr=ln_w_attr, + ln1_bias_attr=ln_b_attr, + nranks=MODEL_PARALLEL_SIZE, + ring_id=0) #ffn.eval() result = ffn(data) else: @@ -316,18 +315,17 @@ def create_model(data, rank): w0_attr, b0_attr = get_param_attr(w0, b0) w1_attr, b1_attr = get_param_attr(w1, b1) - ffn = ParallelFusedFeedForward( - IN_SIZE, - OUT_SIZE, - dropout_rate=0.0, - activation='gelu', - normalize_before=True, - linear1_weight_attr=w0_attr, - linear1_bias_attr=b0_attr, - linear2_weight_attr=w1_attr, - linear2_bias_attr=b1_attr, - ln1_scale_attr=ln_w_attr, - ln1_bias_attr=ln_b_attr) + ffn = ParallelFusedFeedForward(IN_SIZE, + OUT_SIZE, + dropout_rate=0.0, + activation='gelu', + normalize_before=True, + linear1_weight_attr=w0_attr, + linear1_bias_attr=b0_attr, + linear2_weight_attr=w1_attr, + linear2_bias_attr=b1_attr, + ln1_scale_attr=ln_w_attr, + ln1_bias_attr=ln_b_attr) #ffn.eval() result = ffn(data) @@ -336,11 +334,13 @@ def create_model(data, rank): class TestModelParallel(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data seq_len = 2 - data_in = fluid.data( - name='data_in', shape=[batch_size, seq_len, IN_SIZE], dtype=DTYPE) + data_in = fluid.data(name='data_in', + shape=[batch_size, seq_len, IN_SIZE], + dtype=DTYPE) if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( @@ -360,8 +360,8 @@ class TestModelParallel(TestDistRunnerBase): opt = fluid.optimizer.SGD(0.1) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py index f9c5d4d78c8..5387580f2cd 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py @@ -51,11 +51,11 @@ def create_model(data, rank): np.random.seed(2021) ln_w = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) ln_b = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) - qkv_w = np.random.uniform( - -1, 1, size=(3, num_head, dim_head, hidden)).astype(DTYPE) + qkv_w = np.random.uniform(-1, 1, size=(3, num_head, dim_head, + hidden)).astype(DTYPE) qkv_b = np.random.uniform(-1, 1, size=(3, num_head, dim_head)).astype(DTYPE) - linear_w = np.random.uniform( - -1, 1, size=(num_head * dim_head, hidden)).astype(DTYPE) + linear_w = np.random.uniform(-1, 1, size=(num_head * dim_head, + hidden)).astype(DTYPE) linear_b = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) ffn_ln_w = np.random.uniform(-1, 1, size=(hidden, )).astype(DTYPE) @@ -145,11 +145,13 @@ def create_model(data, rank): class TestModelParallel(TestDistRunnerBase): + def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data seq_len = 2 - data_in = fluid.data( - name='data_in', shape=[batch_size, seq_len, hidden], dtype=DTYPE) + data_in = fluid.data(name='data_in', + shape=[batch_size, seq_len, hidden], + dtype=DTYPE) if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( @@ -169,8 +171,8 @@ class TestModelParallel(TestDistRunnerBase): opt = fluid.optimizer.SGD(0.1) if dist_strategy: - dist_opt = fleet.distributed_optimizer( - optimizer=opt, strategy=strategy) + dist_opt = fleet.distributed_optimizer(optimizer=opt, + strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_Tensor_type.py b/python/paddle/fluid/tests/unittests/test_Tensor_type.py index c40981c0737..176fceb310d 100644 --- a/python/paddle/fluid/tests/unittests/test_Tensor_type.py +++ b/python/paddle/fluid/tests/unittests/test_Tensor_type.py @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class TensorTypeTest(unittest.TestCase): + def func_type_totensor(self): paddle.disable_static() inx = np.array([1, 2]) diff --git a/python/paddle/fluid/tests/unittests/test_accuracy_op.py b/python/paddle/fluid/tests/unittests/test_accuracy_op.py index 10ab76e4bfb..a03f4fa088a 100755 --- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py +++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py @@ -23,6 +23,7 @@ from paddle.fluid import compiler, Program, program_guard class TestAccuracyOp(OpTest): + def setUp(self): self.op_type = "accuracy" self.dtype = np.float32 @@ -52,6 +53,7 @@ class TestAccuracyOp(OpTest): class TestAccuracyOpFp16(TestAccuracyOp): + def init_dtype(self): self.dtype = np.float16 @@ -60,13 +62,15 @@ class TestAccuracyOpFp16(TestAccuracyOp): class TestAccuracyOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of accuracy_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - label = fluid.layers.data( - name='label', shape=[-1, 1], dtype="int32") + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + label = fluid.layers.data(name='label', + shape=[-1, 1], + dtype="int32") self.assertRaises(TypeError, fluid.layers.accuracy, x1, label) self.assertRaises(TypeError, paddle.metric.accuracy, x1, label) # The input dtype of accuracy_op must be float32 or float64. @@ -79,13 +83,17 @@ class TestAccuracyOpError(unittest.TestCase): class TestAccuracyAPI1(unittest.TestCase): + def setUp(self): - self.predictions = paddle.static.data( - shape=[2, 5], name="predictions", dtype="float32") - self.label = paddle.static.data( - shape=[2, 1], name="labels", dtype="int64") - self.result = paddle.static.accuracy( - input=self.predictions, label=self.label, k=1) + self.predictions = paddle.static.data(shape=[2, 5], + name="predictions", + dtype="float32") + self.label = paddle.static.data(shape=[2, 1], + name="labels", + dtype="int64") + self.result = paddle.static.accuracy(input=self.predictions, + label=self.label, + k=1) self.input_predictions = np.array( [[0.2, 0.1, 0.4, 0.1, 0.1], [0.2, 0.3, 0.1, 0.15, 0.25]], dtype="float32") @@ -103,6 +111,7 @@ class TestAccuracyAPI1(unittest.TestCase): class TestAccuracyAPI2(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): predictions = paddle.to_tensor( @@ -115,6 +124,7 @@ class TestAccuracyAPI2(unittest.TestCase): class TestAccuracyAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): predictions = paddle.to_tensor( diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index 41e1f98e5f6..b512aef4f93 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -29,6 +29,7 @@ from decorator_helper import prog_scope class TestSigmoidTripleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 3, 7, 9] @@ -39,8 +40,11 @@ class TestSigmoidTripleGradCheck(unittest.TestCase): y = layers.sigmoid(x) x_arr = np.random.random(shape).astype(dtype) x_arr[np.abs(x_arr) < 0.005] = 0.002 - gradient_checker.triple_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.triple_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -52,6 +56,7 @@ class TestSigmoidTripleGradCheck(unittest.TestCase): class TestSigmoidDoubleGradCheck(unittest.TestCase): + def sigmoid_wrapper(self, x): return fluid.layers.sigmoid(x[0]) @@ -82,6 +87,7 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase): class TestTanhTripleGradCheck(unittest.TestCase): + def tanh_wrapper(self, x): return paddle.tanh(x[0]) @@ -112,6 +118,7 @@ class TestTanhTripleGradCheck(unittest.TestCase): class TestTanhDoubleGradCheck(unittest.TestCase): + def tanh_wrapper(self, x): return paddle.tanh(x[0]) @@ -142,6 +149,7 @@ class TestTanhDoubleGradCheck(unittest.TestCase): class TestAbsDoubleGradCheck(unittest.TestCase): + def abs_wrapper(self, x): return paddle.abs(x[0]) @@ -172,6 +180,7 @@ class TestAbsDoubleGradCheck(unittest.TestCase): class TestReluDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 3, 7, 9] @@ -184,8 +193,11 @@ class TestReluDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) x_arr[np.abs(x_arr) < 0.005] = 0.02 - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -197,6 +209,7 @@ class TestReluDoubleGradCheck(unittest.TestCase): class TestLeakyReluDoubleGradCheck(unittest.TestCase): + def leaky_relu_wrapper(self, x): return paddle.nn.functional.leaky_relu(x[0], negative_slope=0.2) @@ -214,10 +227,16 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) x_arr[np.abs(x_arr) < 0.005] = 0.02 - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.leaky_relu_wrapper, [x], y, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.leaky_relu_wrapper, + [x], + y, + x_init=x_arr, + place=place) def test_grad(self): paddle.enable_static() @@ -229,6 +248,7 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase): class TestELUDoubleGradCheck(unittest.TestCase): + def elu_wrapper(self, x): return paddle.nn.functional.elu(x[0], alpha=0.2) @@ -263,6 +283,7 @@ class TestELUDoubleGradCheck(unittest.TestCase): class TestCELUDoubleGradCheck(unittest.TestCase): + def celu_wrapper(self, x): return paddle.nn.functional.celu(x[0], alpha=0.2) @@ -297,6 +318,7 @@ class TestCELUDoubleGradCheck(unittest.TestCase): class TestSqrtDoubleGradCheck(unittest.TestCase): + def sqrt_wrapper(self, x): return paddle.sqrt(x[0]) @@ -312,10 +334,15 @@ class TestSqrtDoubleGradCheck(unittest.TestCase): y = layers.sqrt(x) x_arr = np.random.uniform(0.1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.sqrt_wrapper, [x], y, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.sqrt_wrapper, [x], + y, + x_init=x_arr, + place=place) def test_grad(self): paddle.enable_static() @@ -327,6 +354,7 @@ class TestSqrtDoubleGradCheck(unittest.TestCase): class TestRsqrtDoubleGradCheck(unittest.TestCase): + def rsqrt_wrapper(self, x): return paddle.rsqrt(x[0]) @@ -342,10 +370,15 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase): y = layers.rsqrt(x) x_arr = np.random.uniform(0.1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.rsqrt_wrapper, [x], y, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.rsqrt_wrapper, [x], + y, + x_init=x_arr, + place=place) def test_grad(self): paddle.enable_static() @@ -357,6 +390,7 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase): class TestSquareDoubleGradCheck(unittest.TestCase): + def square_wrapper(self, x): return paddle.square(x[0]) @@ -389,6 +423,7 @@ class TestSquareDoubleGradCheck(unittest.TestCase): class TestAbsDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -405,8 +440,11 @@ class TestAbsDoubleGradCheck(unittest.TestCase): # we should avoid this x_arr[np.abs(x_arr) < 0.005] = 0.02 - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -418,6 +456,7 @@ class TestAbsDoubleGradCheck(unittest.TestCase): class TestLogDoubleGradCheck(unittest.TestCase): + def log_wrapper(self, x): return paddle.log(x[0]) diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 7be3b300d55..7dde0483823 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -31,22 +31,26 @@ paddle.enable_static() class TestSqrtOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of sqrt op must be Variable or numpy.ndarray. in1 = 1 self.assertRaises(TypeError, fluid.layers.sqrt, in1) # The input dtype of sqrt op must be float16, float32, float64. - in2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") + in2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.sqrt, in2) - in3 = fluid.layers.data( - name='input3', shape=[12, 10], dtype="float16") + in3 = fluid.layers.data(name='input3', + shape=[12, 10], + dtype="float16") fluid.layers.sqrt(x=in3) class TestActivation(OpTest): + def setUp(self): self.op_type = "exp" self.init_dtype() @@ -83,6 +87,7 @@ class TestActivation(OpTest): class TestExpm1(TestActivation): + def setUp(self): self.op_type = "expm1" self.python_api = paddle.expm1 @@ -103,6 +108,7 @@ class TestExpm1(TestActivation): class TestExpm1API(unittest.TestCase): + def init_dtype(self): self.dtype = 'float64' self.shape = [11, 17] @@ -132,6 +138,7 @@ class TestExpm1API(unittest.TestCase): run(place) def test_dygraph_api(self): + def run(place): paddle.disable_static(place) X = paddle.to_tensor(self.x) @@ -151,6 +158,7 @@ class TestExpm1API(unittest.TestCase): class TestParameter(object): + def test_out_name(self): with fluid.program_guard(fluid.Program()): np_x = np.array([0.1]) @@ -176,6 +184,7 @@ class TestParameter(object): class TestSigmoid(TestActivation): + def setUp(self): self.op_type = "sigmoid" self.init_dtype() @@ -199,6 +208,7 @@ class TestSigmoid(TestActivation): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSigmoidBF16(OpTest): + def setUp(self): self.op_type = "sigmoid" self.init_dtype() @@ -225,6 +235,7 @@ class TestSigmoidBF16(OpTest): class TestSilu(TestActivation): + def setUp(self): self.op_type = "silu" self.init_dtype() @@ -281,16 +292,19 @@ class TestSiluAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.silu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[11, 17], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[11, 17], + dtype='int32') self.assertRaises(TypeError, F.silu, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[11, 17], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[11, 17], + dtype='float16') F.silu(x_fp16) class TestLogSigmoid(TestActivation): + def setUp(self): self.op_type = "logsigmoid" self.init_dtype() @@ -356,16 +370,19 @@ class TestLogSigmoidAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.log_sigmoid, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[11, 17], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[11, 17], + dtype='int32') self.assertRaises(TypeError, F.log_sigmoid, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[11, 17], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[11, 17], + dtype='float16') F.log_sigmoid(x_fp16) class TestTanh(TestActivation, TestParameter): + def setUp(self): self.op_type = "tanh" self.init_dtype() @@ -442,12 +459,14 @@ class TestTanhAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, self.tanh, 1) # The input dtype must be float16, float32. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, self.tanh, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') self.tanh(x_fp16) @@ -458,6 +477,7 @@ class TestTanhInplaceAPI(TestTanhAPI): class TestAtan(TestActivation, TestParameter): + def setUp(self): self.op_type = "atan" self.init_dtype() @@ -495,6 +515,7 @@ class TestAtan(TestActivation, TestParameter): class TestSinh(TestActivation): + def setUp(self): self.op_type = "sinh" self.init_dtype() @@ -524,11 +545,10 @@ class TestSinh(TestActivation): with fluid.program_guard(fluid.Program(), fluid.Program()): input_x = np.random.uniform(0.1, 1, test_data_shape).astype("float32") - data_x = fluid.layers.data( - name="data_x", - shape=test_data_shape, - append_batch_size=False, - dtype="float32") + data_x = fluid.layers.data(name="data_x", + shape=test_data_shape, + append_batch_size=False, + dtype="float32") pd_sinh_out = fluid.layers.sinh(data_x) exe = fluid.Executor(place=fluid.CPUPlace()) @@ -554,6 +574,7 @@ class TestSinh(TestActivation): class TestSinhOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program()): # The input type must be Variable. @@ -567,6 +588,7 @@ class TestSinhOpError(unittest.TestCase): class TestCosh(TestActivation): + def setUp(self): self.op_type = "cosh" self.init_dtype() @@ -596,11 +618,10 @@ class TestCosh(TestActivation): with fluid.program_guard(fluid.Program(), fluid.Program()): input_x = np.random.uniform(0.1, 1, test_data_shape).astype("float32") - data_x = fluid.layers.data( - name="data_x", - shape=test_data_shape, - append_batch_size=False, - dtype="float32") + data_x = fluid.layers.data(name="data_x", + shape=test_data_shape, + append_batch_size=False, + dtype="float32") pd_cosh_out = paddle.cosh(data_x) exe = fluid.Executor(place=fluid.CPUPlace()) @@ -626,6 +647,7 @@ class TestCosh(TestActivation): class TestCoshOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program()): # The input type must be Variable. @@ -644,6 +666,7 @@ def ref_tanhshrink(x): class TestTanhshrink(TestActivation): + def setUp(self): self.op_type = "tanh_shrink" self.init_dtype() @@ -709,12 +732,14 @@ class TestTanhshrinkAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.tanhshrink, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.tanhshrink, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.tanhshrink(x_fp16) @@ -725,6 +750,7 @@ def ref_hardshrink(x, threshold): class TestHardShrink(TestActivation): + def setUp(self): self.op_type = "hard_shrink" self.init_dtype() @@ -749,6 +775,7 @@ class TestHardShrink(TestActivation): class TestHardShrink_threshold_negative(TestHardShrink): + def set_attrs(self): self.threshold = -0.1 @@ -808,12 +835,14 @@ class TestHardShrinkAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.hardshrink, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.hardshrink, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.hardshrink(x_fp16) @@ -870,12 +899,14 @@ class TestHardtanhAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.hardtanh, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.hardtanh, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.hardtanh(x_fp16) @@ -887,6 +918,7 @@ def ref_softshrink(x, threshold=0.5): class TestSoftshrink(TestActivation): + def setUp(self): self.op_type = "softshrink" self.check_eager = True @@ -957,20 +989,24 @@ class TestSoftshrinkAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.softshrink, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.softshrink, x_int32) # The threshold must be no less than zero - x_fp32 = paddle.fluid.data( - name='x_fp32', shape=[12, 10], dtype='float32') + x_fp32 = paddle.fluid.data(name='x_fp32', + shape=[12, 10], + dtype='float32') self.assertRaises(ValueError, F.softshrink, x_fp32, -1.0) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.softshrink(x_fp16) class TestSqrt(TestActivation, TestParameter): + def setUp(self): self.op_type = "sqrt" self.python_api = paddle.sqrt @@ -995,6 +1031,7 @@ class TestSqrt(TestActivation, TestParameter): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSqrtBF16(OpTest): + def setUp(self): self.op_type = "sqrt" self.python_api = paddle.sqrt @@ -1022,6 +1059,7 @@ class TestSqrtBF16(OpTest): class TestRsqrt(TestActivation): + def setUp(self): self.op_type = "rsqrt" self.python_api = paddle.rsqrt @@ -1037,11 +1075,14 @@ class TestRsqrt(TestActivation): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad( - ['X'], 'Out', max_relative_error=0.0005, check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.0005, + check_eager=True) class TestAbs(TestActivation): + def setUp(self): self.op_type = "abs" self.init_dtype() @@ -1065,6 +1106,7 @@ class TestAbs(TestActivation): class TestCeil(TestActivation): + def setUp(self): self.op_type = "ceil" self.check_eager = True @@ -1084,6 +1126,7 @@ class TestCeil(TestActivation): class TestFloor(TestActivation): + def setUp(self): self.op_type = "floor" self.check_eager = True @@ -1105,6 +1148,7 @@ class TestFloor(TestActivation): class TestCos(TestActivation): + def setUp(self): self.op_type = "cos" self.init_dtype() @@ -1123,6 +1167,7 @@ class TestCos(TestActivation): class TestTan(TestActivation): + def setUp(self): np.random.seed(1024) self.op_type = "tan" @@ -1174,6 +1219,7 @@ class TestTan(TestActivation): class TestAcos(TestActivation): + def setUp(self): self.op_type = "acos" self.init_dtype() @@ -1192,6 +1238,7 @@ class TestAcos(TestActivation): class TestSin(TestActivation, TestParameter): + def setUp(self): self.op_type = "sin" self.init_dtype() @@ -1210,6 +1257,7 @@ class TestSin(TestActivation, TestParameter): class TestAsin(TestActivation): + def setUp(self): self.op_type = "asin" self.init_dtype() @@ -1228,6 +1276,7 @@ class TestAsin(TestActivation): class TestAcosh(TestActivation): + def setUp(self): self.op_type = "acosh" self.init_dtype() @@ -1246,6 +1295,7 @@ class TestAcosh(TestActivation): class TestAsinh(TestActivation): + def setUp(self): self.op_type = "asinh" self.init_dtype() @@ -1264,6 +1314,7 @@ class TestAsinh(TestActivation): class TestAtanh(TestActivation): + def setUp(self): self.op_type = "atanh" self.init_dtype() @@ -1282,6 +1333,7 @@ class TestAtanh(TestActivation): class TestRound(TestActivation): + def setUp(self): self.op_type = "round" self.check_eager = True @@ -1300,6 +1352,7 @@ class TestRound(TestActivation): class TestRelu(TestActivation): + def setUp(self): self.op_type = "relu" self.init_dtype() @@ -1368,12 +1421,14 @@ class TestReluAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, self.relu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[10, 12], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[10, 12], + dtype='int32') self.assertRaises(TypeError, self.relu, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[10, 12], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[10, 12], + dtype='float16') self.relu(x_fp16) @@ -1390,6 +1445,7 @@ def ref_leaky_relu(x, alpha=0.01): class TestLeakyRelu(TestActivation): + def get_alpha(self): return 0.02 @@ -1415,16 +1471,19 @@ class TestLeakyRelu(TestActivation): class TestLeakyReluAlpha1(TestLeakyRelu): + def get_alpha(self): return 2 class TestLeakyReluAlpha2(TestLeakyRelu): + def get_alpha(self): return -0.01 class TestLeakyReluAlpha3(TestLeakyRelu): + def get_alpha(self): return -2.0 @@ -1485,25 +1544,28 @@ class TestLeakyReluAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.leaky_relu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.leaky_relu, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.leaky_relu(x_fp16) def gelu(x, approximate): if approximate: - y_ref = 0.5 * x * (1.0 + np.tanh( - np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) + y_ref = 0.5 * x * ( + 1.0 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) else: y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2))) return y_ref.astype(x.dtype) class TestGeluApproximate(TestActivation): + def setUp(self): self.op_type = "gelu" self.init_dtype() @@ -1523,6 +1585,7 @@ class TestGeluApproximate(TestActivation): class TestGelu(TestActivation): + def setUp(self): self.op_type = "gelu" self.init_dtype() @@ -1586,16 +1649,19 @@ class TestGELUAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.gelu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[11, 17], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[11, 17], + dtype='int32') self.assertRaises(TypeError, F.gelu, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[11, 17], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[11, 17], + dtype='float16') F.gelu(x_fp16) class TestBRelu(TestActivation): + def setUp(self): self.op_type = "brelu" self.init_dtype() @@ -1657,8 +1723,9 @@ class TestBreluAPI(unittest.TestCase): x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') self.assertRaises(TypeError, fluid.layers.brelu, x_int32) # support the input dtype is float16 - x_fp16 = fluid.layers.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = fluid.layers.data(name='x_fp16', + shape=[12, 10], + dtype='float16') fluid.layers.brelu(x_fp16) @@ -1670,6 +1737,7 @@ def ref_relu6(x, threshold=6.0): class TestRelu6(TestActivation): + def setUp(self): self.op_type = "relu6" self.init_dtype() @@ -1738,12 +1806,14 @@ class TestRelu6API(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.relu6, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.relu6, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.relu6(x_fp16) @@ -1753,6 +1823,7 @@ def ref_hardswish(x, threshold=6.0, scale=6.0, offset=3.0): class TestHardSwish(TestActivation): + def setUp(self): self.op_type = 'hard_swish' self.init_dtype() @@ -1834,12 +1905,14 @@ class TestHardswishAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.hardswish, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.hardswish, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.hardswish(x_fp16) def test_api_eager_dygraph(self): @@ -1849,6 +1922,7 @@ class TestHardswishAPI(unittest.TestCase): class TestSoftRelu(TestActivation): + def setUp(self): self.op_type = "soft_relu" self.init_dtype() @@ -1875,6 +1949,7 @@ class TestSoftRelu(TestActivation): class TestSoftReluOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program()): # The input type must be Variable. @@ -1893,6 +1968,7 @@ def elu(x, alpha): class TestELU(TestActivation): + def setUp(self): self.op_type = "elu" self.init_dtype() @@ -1917,6 +1993,7 @@ class TestELU(TestActivation): class TestELUAlpha(TestELU): + def get_alpha(self): return -0.2 @@ -1972,12 +2049,14 @@ class TestELUAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, self.elu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[10, 12], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[10, 12], + dtype='int32') self.assertRaises(TypeError, self.elu, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[10, 12], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[10, 12], + dtype='float16') self.elu(x_fp16) @@ -1999,6 +2078,7 @@ def celu(x, alpha): class TestCELU(TestActivation): + def setUp(self): self.op_type = "celu" self.init_dtype() @@ -2069,16 +2149,19 @@ class TestCELUAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, self.celu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[10, 12], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[10, 12], + dtype='int32') self.assertRaises(TypeError, self.celu, x_int32) # The alpha must be not equal 0 - x_fp32 = paddle.fluid.data( - name='x_fp32', shape=[10, 12], dtype='float32') + x_fp32 = paddle.fluid.data(name='x_fp32', + shape=[10, 12], + dtype='float32') self.assertRaises(ZeroDivisionError, F.celu, x_fp32, 0) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[10, 12], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[10, 12], + dtype='float16') self.celu(x_fp16) def test_api_eager_dygraph(self): @@ -2088,6 +2171,7 @@ class TestCELUAPI(unittest.TestCase): class TestReciprocal(TestActivation): + def setUp(self): self.op_type = "reciprocal" self.python_api = paddle.reciprocal @@ -2110,6 +2194,7 @@ class TestReciprocal(TestActivation): class TestLog(TestActivation): + def setUp(self): self.op_type = "log" self.check_eager = True @@ -2129,16 +2214,21 @@ class TestLog(TestActivation): self.check_grad(['X'], 'Out', check_eager=True) def test_error(self): - in1 = fluid.layers.data( - name="in1", shape=[11, 17], append_batch_size=False, dtype="int32") - in2 = fluid.layers.data( - name="in2", shape=[11, 17], append_batch_size=False, dtype="int64") + in1 = fluid.layers.data(name="in1", + shape=[11, 17], + append_batch_size=False, + dtype="int32") + in2 = fluid.layers.data(name="in2", + shape=[11, 17], + append_batch_size=False, + dtype="int64") self.assertRaises(TypeError, fluid.layers.log, in1) self.assertRaises(TypeError, fluid.layers.log, in2) class TestLog2(TestActivation): + def setUp(self): self.op_type = "log2" self.check_eager = True @@ -2167,8 +2257,9 @@ class TestLog2(TestActivation): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): input_x = np.random.uniform(0.1, 1, [11, 17]).astype("float64") - data_x = paddle.static.data( - name="data_x", shape=[11, 17], dtype="float64") + data_x = paddle.static.data(name="data_x", + shape=[11, 17], + dtype="float64") out1 = paddle.log2(data_x) exe = paddle.static.Executor(place=fluid.CPUPlace()) @@ -2190,6 +2281,7 @@ class TestLog2(TestActivation): class TestLog10(TestActivation): + def setUp(self): self.op_type = "log10" self.check_eager = True @@ -2218,8 +2310,9 @@ class TestLog10(TestActivation): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): input_x = np.random.uniform(0.1, 1, [11, 17]).astype("float64") - data_x = paddle.static.data( - name="data_x", shape=[11, 17], dtype="float64") + data_x = paddle.static.data(name="data_x", + shape=[11, 17], + dtype="float64") out1 = paddle.log10(data_x) exe = paddle.static.Executor(place=paddle.CPUPlace()) @@ -2241,6 +2334,7 @@ class TestLog10(TestActivation): class TestLog1p(TestActivation): + def setUp(self): self.op_type = "log1p" self.check_eager = True @@ -2262,11 +2356,10 @@ class TestLog1p(TestActivation): def test_api(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input_x = np.random.uniform(0.1, 1, [11, 17]).astype("float64") - data_x = fluid.layers.data( - name="data_x", - shape=[11, 17], - append_batch_size=False, - dtype="float64") + data_x = fluid.layers.data(name="data_x", + shape=[11, 17], + append_batch_size=False, + dtype="float64") out1 = paddle.log1p(data_x) exe = fluid.Executor(place=fluid.CPUPlace()) @@ -2288,6 +2381,7 @@ class TestLog1p(TestActivation): class TestSquare(TestActivation): + def setUp(self): self.op_type = "square" self.python_api = paddle.square @@ -2303,8 +2397,10 @@ class TestSquare(TestActivation): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad( - ['X'], 'Out', max_relative_error=0.007, check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.007, + check_eager=True) def test_check_output(self): self.check_output(check_eager=True) @@ -2313,6 +2409,7 @@ class TestSquare(TestActivation): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSquareBF16(OpTest): + def setUp(self): self.op_type = "square" self.python_api = paddle.square @@ -2336,11 +2433,14 @@ class TestSquareBF16(OpTest): def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', numeric_grad_delta=0.5, check_eager=True) + self.check_grad_with_place(place, ['X'], + 'Out', + numeric_grad_delta=0.5, + check_eager=True) class TestPow(TestActivation): + def setUp(self): self.op_type = "pow" self.python_api = paddle.pow @@ -2365,6 +2465,7 @@ class TestPow(TestActivation): class TestPow_factor_tensor(TestActivation): + def setUp(self): self.op_type = "pow" self.check_eager = False @@ -2393,13 +2494,14 @@ class TestPow_factor_tensor(TestActivation): def test_api(self): input = np.random.uniform(1, 2, [11, 17]).astype("float32") - x = fluid.layers.data( - name="x", shape=[11, 17], append_batch_size=False, dtype="float32") - res = fluid.layers.data( - name="res", - shape=[11, 17], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name="x", + shape=[11, 17], + append_batch_size=False, + dtype="float32") + res = fluid.layers.data(name="res", + shape=[11, 17], + append_batch_size=False, + dtype="float32") factor_1 = 2.0 factor_2 = fluid.layers.fill_constant([1], "float32", 3.0) @@ -2420,20 +2522,22 @@ class TestPow_factor_tensor(TestActivation): assert np.allclose(res_6, np.power(input, 3)) def test_error(self): - in1 = fluid.layers.data( - name="in1", shape=[11, 17], append_batch_size=False, dtype="int32") - in2 = fluid.layers.data( - name="in2", shape=[11, 17], append_batch_size=False, dtype="int64") - in3 = fluid.layers.data( - name="in3", - shape=[11, 17], - append_batch_size=False, - dtype="float32") - in4 = fluid.layers.data( - name="in4", - shape=[11, 17], - append_batch_size=False, - dtype="float64") + in1 = fluid.layers.data(name="in1", + shape=[11, 17], + append_batch_size=False, + dtype="int32") + in2 = fluid.layers.data(name="in2", + shape=[11, 17], + append_batch_size=False, + dtype="int64") + in3 = fluid.layers.data(name="in3", + shape=[11, 17], + append_batch_size=False, + dtype="float32") + in4 = fluid.layers.data(name="in4", + shape=[11, 17], + append_batch_size=False, + dtype="float64") factor_1 = fluid.layers.fill_constant([1], "float64", 3.0) @@ -2449,6 +2553,7 @@ def ref_stanh(x, scale_a=0.67, scale_b=1.7159): class TestSTanh(TestActivation): + def get_scale_a(self): return 0.67 @@ -2477,11 +2582,13 @@ class TestSTanh(TestActivation): class TestSTanhScaleA(TestSTanh): + def get_scale_a(self): return 2.0 class TestSTanhScaleB(TestSTanh): + def get_scale_b(self): return 0.5 @@ -2538,21 +2645,25 @@ class TestSTanhAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, paddle.stanh, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, paddle.stanh, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') paddle.stanh(x_fp16) class TestSTanhAPIScaleA(TestSTanhAPI): + def get_scale_a(self): return 2.0 class TestSTanhAPIScaleB(TestSTanhAPI): + def get_scale_b(self): return 0.5 @@ -2565,6 +2676,7 @@ def ref_softplus(x, beta=1, threshold=20): class TestSoftplus(TestActivation): + def setUp(self): self.op_type = "softplus" self.init_dtype() @@ -2588,6 +2700,7 @@ class TestSoftplus(TestActivation): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftplusBF16(OpTest): + def setUp(self): self.op_type = "softplus" self.init_dtype() @@ -2664,12 +2777,14 @@ class TestSoftplusAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.softplus, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.softplus, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.softplus(x_fp16) @@ -2679,6 +2794,7 @@ def ref_softsign(x): class TestSoftsign(TestActivation): + def setUp(self): self.op_type = "softsign" self.init_dtype() @@ -2743,12 +2859,14 @@ class TestSoftsignAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.softsign, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.softsign, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.softsign(x_fp16) @@ -2758,6 +2876,7 @@ def ref_thresholded_relu(x, threshold=1.0): class TestThresholdedRelu(TestActivation): + def setUp(self): self.op_type = "thresholded_relu" self.init_dtype() @@ -2828,12 +2947,14 @@ class TestThresholdedReluAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.thresholded_relu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.thresholded_relu, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.thresholded_relu(x_fp16) @@ -2842,6 +2963,7 @@ def ref_hardsigmoid(x, slope=0.166666666666667, offset=0.5): class TestHardSigmoid(TestActivation): + def setUp(self): self.op_type = "hard_sigmoid" self.dtype = 'float64' @@ -2869,11 +2991,13 @@ class TestHardSigmoid(TestActivation): class TestHardSigmoidFP32(TestHardSigmoid): + def set_attrs(self): self.dtype = 'float32' class TestHardSigmoidSlopeOffset(TestHardSigmoid): + def set_attrs(self): self.slope = 0.2 self.offset = 0.4 @@ -2929,12 +3053,14 @@ class TestHardsigmoidAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.hardsigmoid, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.hardsigmoid, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.hardsigmoid(x_fp16) @@ -2944,6 +3070,7 @@ def ref_swish(x): class TestSwish(TestActivation): + def setUp(self): self.op_type = "swish" self.python_api = paddle.nn.functional.swish @@ -3018,12 +3145,14 @@ class TestSwishAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.swish, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.swish, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.swish(x_fp16) @@ -3034,6 +3163,7 @@ def ref_mish(x, threshold=20.): class TestMish(TestActivation): + def setUp(self): self.op_type = "mish" self.python_api = paddle.fluid.layers.nn.mish @@ -3102,26 +3232,32 @@ class TestMishAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.mish, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.mish, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.mish(x_fp16) #------------------ Test Error Activation---------------------- def create_test_error_class(op_type): + class TestOpErrors(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): op = getattr(fluid.layers, op_type) # The input dtype of op_type must be float32, float64. - in1 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") - in2 = fluid.layers.data( - name='input3', shape=[12, 10], dtype="int64") + in1 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") + in2 = fluid.layers.data(name='input3', + shape=[12, 10], + dtype="int64") self.assertRaises(TypeError, op, in1) self.assertRaises(TypeError, op, in2) @@ -3150,9 +3286,11 @@ create_test_error_class('atanh') #------------------ Test Cudnn Activation---------------------- def create_test_act_cudnn_class(parent, atol=1e-3, grad_atol=1e-3): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestActCudnn(parent): + def init_kernel_type(self): self.attrs = {"use_cudnn": True} @@ -3172,9 +3310,11 @@ def create_test_act_fp16_class(parent, atol=1e-3, grad_check=True, grad_atol=0.80): + @unittest.skipIf(not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestActFp16(parent): + def init_dtype(self): self.dtype = np.float16 @@ -3188,8 +3328,9 @@ def create_test_act_fp16_class(parent, place = core.CUDAPlace(0) support_fp16 = core.is_float16_supported(place) if support_fp16 and grad_check: - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=grad_atol) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=grad_atol) cls_name = "{0}_{1}".format(parent.__name__, "fp16") TestActFp16.__name__ = cls_name @@ -3253,9 +3394,11 @@ def create_test_act_bf16_class(parent, atol=1e-2, grad_check=True, grad_atol=0.80): + @unittest.skipIf(not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestActBF16(parent): + def init_dtype(self): self.dtype = np.uint16 @@ -3265,8 +3408,9 @@ def create_test_act_bf16_class(parent, def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=grad_atol) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=grad_atol) cls_name = "{0}_{1}".format(parent.__name__, "bf16") TestActBF16.__name__ = cls_name diff --git a/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py b/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py index 5c07a544ca1..cbc32bbc4a1 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,10 +24,11 @@ import paddle class TestSparseSquareOp(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() - # create and initialize Grad Variable + # create and initialize Grad Variable height = 10 rows = [0, 4, 7] self.row_numel = 12 @@ -61,10 +62,11 @@ class TestSparseSquareOp(unittest.TestCase): class TestSparseSqrtOp(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() - # create and initialize Grad Variable + # create and initialize Grad Variable height = 10 rows = [0, 4, 7] self.row_numel = 12 diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py index 44dd3d60bdc..5d96dc38a71 100644 --- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py +++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestAdadeltaOp1(OpTest): + def setUp(self): self.op_type = "adadelta" param = np.random.uniform(-1, 1, (102, 105)).astype("float32") @@ -47,8 +48,8 @@ class TestAdadeltaOp1(OpTest): (1 - rho) * np.square(grad) update = -np.multiply( np.sqrt( - np.divide(avg_squared_update + epsilon, avg_squared_grad_out + - epsilon)), grad) + np.divide(avg_squared_update + epsilon, + avg_squared_grad_out + epsilon)), grad) avg_squared_update_out = rho * avg_squared_update + \ (1 - rho) * np.square(update) @@ -92,8 +93,8 @@ class TestAdadeltaOp2(OpTest): (1 - rho) * np.square(grad) update = -np.multiply( np.sqrt( - np.divide(avg_squared_update + epsilon, avg_squared_grad_out + - epsilon)), grad) + np.divide(avg_squared_update + epsilon, + avg_squared_grad_out + epsilon)), grad) avg_squared_update_out = rho * avg_squared_update + \ (1 - rho) * np.square(update) @@ -111,16 +112,16 @@ class TestAdadeltaOp2(OpTest): class TestAdadeltaV2(unittest.TestCase): + def test_adadelta_dygraph(self): paddle.disable_static(paddle.CPUPlace()) value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adadelta( - learning_rate=0.01, - parameters=linear.parameters(), - weight_decay=0.01) + adam = paddle.optimizer.Adadelta(learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=0.01) out = linear(a) out.backward() adam.step() @@ -141,8 +142,8 @@ class TestAdadeltaV2(unittest.TestCase): rms_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -151,16 +152,18 @@ class TestAdadeltaV2(unittest.TestCase): def test_raise_error(self): self.assertRaises(ValueError, paddle.optimizer.Adadelta, None) - self.assertRaises( - ValueError, paddle.optimizer.Adadelta, learning_rate=0.1, rho=None) - self.assertRaises( - ValueError, - paddle.optimizer.Adadelta, - learning_rate=0.1, - epsilon=None) + self.assertRaises(ValueError, + paddle.optimizer.Adadelta, + learning_rate=0.1, + rho=None) + self.assertRaises(ValueError, + paddle.optimizer.Adadelta, + learning_rate=0.1, + epsilon=None) class TestAdadeltaV2Group(TestAdadeltaV2): + def test_adadelta_dygraph(self): paddle.disable_static(paddle.CPUPlace()) value = np.arange(26).reshape(2, 13).astype("float32") @@ -168,15 +171,17 @@ class TestAdadeltaV2Group(TestAdadeltaV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 5) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adadelta( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - }], - weight_decay=0.1) + adam = paddle.optimizer.Adadelta(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001, + }], + weight_decay=0.1) out = linear_1(a) out = linear_2(out) out.backward() diff --git a/python/paddle/fluid/tests/unittests/test_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_adagrad_op.py index ae047e602d1..4f290d4befa 100644 --- a/python/paddle/fluid/tests/unittests/test_adagrad_op.py +++ b/python/paddle/fluid/tests/unittests/test_adagrad_op.py @@ -86,10 +86,11 @@ class TestAdagradOp2(OpTest): class TestSparseAdagradOp(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() - # create and initialize Grad Variable + # create and initialize Grad Variable height = 10 rows = [0, 4, 7, 4] row_numel = 12 @@ -120,15 +121,14 @@ class TestSparseAdagradOp(unittest.TestCase): moment.set(moment_np_array, place) # create and run sgd operator - adagrad_op = Operator( - "adagrad", - Param='Param', - Grad='Grad', - ParamOut='Param', - Moment='Moment', - MomentOut='Moment', - LearningRate='LearningRate', - epsilon=2.0) + adagrad_op = Operator("adagrad", + Param='Param', + Grad='Grad', + ParamOut='Param', + Moment='Moment', + MomentOut='Moment', + LearningRate='LearningRate', + epsilon=2.0) adagrad_op.run(scope, place) @@ -152,34 +152,31 @@ class TestSparseAdagradOp(unittest.TestCase): def get_out(param, lr, grad, m, epsilon): return param - lr * grad / (math.sqrt(m) + epsilon) - self.assertAlmostEqual( - get_out(5.0, 2.0, 2.0, 6.0, 2.0), - result_array[rows[0], 0], - places=5) - self.assertAlmostEqual( - get_out(5.0, 2.0, 1.0, 3.0, 2.0), - result_array[rows[0], 2], - places=5) - self.assertAlmostEqual( - get_out(5.0, 2.0, 0.0, 2.0, 2.0), result_array[1, 0], places=5) + self.assertAlmostEqual(get_out(5.0, 2.0, 2.0, 6.0, 2.0), + result_array[rows[0], 0], + places=5) + self.assertAlmostEqual(get_out(5.0, 2.0, 1.0, 3.0, 2.0), + result_array[rows[0], 2], + places=5) + self.assertAlmostEqual(get_out(5.0, 2.0, 0.0, 2.0, 2.0), + result_array[1, 0], + places=5) # grad_merge = 1.0 + 1.0 # m = 6.0 - self.assertAlmostEqual( - get_out(5.0, 2.0, 2.0, 6.0, 2.0), - result_array[rows[1], 10], - places=5) - - self.assertAlmostEqual( - get_out(5.0, 2.0, 0.0, 2.0, 2.0), result_array[5, 8], places=5) - self.assertAlmostEqual( - get_out(5.0, 2.0, 1.0, 3.0, 2.0), - result_array[rows[2], 1], - places=5) - self.assertAlmostEqual( - get_out(5.0, 2.0, 4.0, 18.0, 2.0), - result_array[rows[2], 8], - places=5) + self.assertAlmostEqual(get_out(5.0, 2.0, 2.0, 6.0, 2.0), + result_array[rows[1], 10], + places=5) + + self.assertAlmostEqual(get_out(5.0, 2.0, 0.0, 2.0, 2.0), + result_array[5, 8], + places=5) + self.assertAlmostEqual(get_out(5.0, 2.0, 1.0, 3.0, 2.0), + result_array[rows[2], 1], + places=5) + self.assertAlmostEqual(get_out(5.0, 2.0, 4.0, 18.0, 2.0), + result_array[rows[2], 8], + places=5) def test_sparse_adagrad(self): places = [core.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py b/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py index c6a69c0723c..3096dc33a11 100644 --- a/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py @@ -24,20 +24,22 @@ import math class TestAdagradOpV2(unittest.TestCase): + def test_v20_coverage(self): paddle.disable_static() inp = paddle.rand(shape=[10, 10]) linear = paddle.nn.Linear(10, 10) out = linear(inp) loss = paddle.mean(out) - adagrad = paddle.optimizer.Adagrad( - learning_rate=0.1, parameters=linear.parameters()) + adagrad = paddle.optimizer.Adagrad(learning_rate=0.1, + parameters=linear.parameters()) out.backward() adagrad.step() adagrad.clear_grad() class TestAdagradOpV2Group(TestAdagradOpV2): + def test_v20_coverage(self): paddle.disable_static() inp = paddle.rand(shape=[10, 10]) @@ -46,15 +48,17 @@ class TestAdagradOpV2Group(TestAdagradOpV2): out = linear_1(inp) out = linear_2(out) loss = paddle.mean(out) - adagrad = paddle.optimizer.Adagrad( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - }], - weight_decay=0.1) + adagrad = paddle.optimizer.Adagrad(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001, + }], + weight_decay=0.1) out.backward() adagrad.step() adagrad.clear_grad() diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index d254cd286e6..61597562a4a 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestAdamOp1(OpTest): + def setUp(self): '''Test Adam Op with supplied attributes ''' @@ -70,6 +71,7 @@ class TestAdamOp1(OpTest): class TestAdamOp2(OpTest): + def set_shape(self): self.shape = (102, 105) @@ -119,11 +121,13 @@ class TestAdamOp2(OpTest): class TestAdamOnlyTailOp(TestAdamOp2): + def set_shape(self): self.shape = (3) class TestAdamOpMultipleSteps(OpTest): + def setUp(self): '''Test Adam Operator with supplied attributes ''' @@ -294,13 +298,13 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, param_out = np.zeros(shape=[height, row_numel]) def update_row(row_id, update_value): - moment1_out[row_id] = beta1 * moment1[row_id] + (1 - beta1 - ) * update_value + moment1_out[row_id] = beta1 * moment1[row_id] + (1 - + beta1) * update_value moment2_out[row_id] = beta2 * moment2[row_id] + ( 1 - beta2) * np.square(update_value) lr_t = lr * np.sqrt(1 - beta2_pow) / (1 - beta1_pow) - param_out[row_id] = param[row_id] - lr_t * (moment1_out[row_id] / ( - np.sqrt(moment2_out[row_id]) + epsilon)) + param_out[row_id] = param[row_id] - lr_t * ( + moment1_out[row_id] / (np.sqrt(moment2_out[row_id]) + epsilon)) if lazy_mode: for idx, row_id in enumerate(rows): @@ -316,6 +320,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, class TestSparseAdamOp(unittest.TestCase): + def setup(self, scope, place, lazy_mode): beta1 = 0.78 beta2 = 0.836 @@ -409,6 +414,7 @@ class TestSparseAdamOp(unittest.TestCase): class TestAdamOpBetaVariable(OpTest): + def setUp(self): '''Test Adam Op with beta as Variable ''' @@ -456,6 +462,7 @@ class TestAdamOpBetaVariable(OpTest): class TestAdamOpBetaEpsilonVariable(OpTest): + def setUp(self): '''Test Adam Op with beta/epsilon as Variable ''' @@ -504,6 +511,7 @@ class TestAdamOpBetaEpsilonVariable(OpTest): class TestAdamOpWithGlobalBetaPow(OpTest): + def setUp(self): '''Test Adam Op with global_beta_pow ''' @@ -555,6 +563,7 @@ class TestAdamOpWithGlobalBetaPow(OpTest): class TestAdamOpWithSkipUpdate(OpTest): + def setUp(self): '''Test Adam Op with global_beta_pow ''' @@ -604,6 +613,7 @@ class TestAdamOpWithSkipUpdate(OpTest): class TestAdamOpV2(unittest.TestCase): + def test_adam_op(self): place = fluid.CPUPlace() shape = [2, 3, 8, 8] @@ -616,17 +626,20 @@ class TestAdamOpV2(unittest.TestCase): conv = fluid.layers.conv2d(data, 8, 3) loss = fluid.layers.reduce_mean(conv) - beta1 = fluid.layers.create_global_var( - shape=[1], value=0.85, dtype='float32', persistable=True) - beta2 = fluid.layers.create_global_var( - shape=[1], value=0.95, dtype='float32', persistable=True) + beta1 = fluid.layers.create_global_var(shape=[1], + value=0.85, + dtype='float32', + persistable=True) + beta2 = fluid.layers.create_global_var(shape=[1], + value=0.95, + dtype='float32', + persistable=True) betas = [beta1, beta2] - opt = paddle.optimizer.Adam( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8) + opt = paddle.optimizer.Adam(learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) opt.minimize(loss) exe.run(startup) @@ -640,8 +653,8 @@ class TestAdamOpV2(unittest.TestCase): a = fluid.dygraph.to_variable(value) linear = fluid.Linear(13, 5, dtype="float32") - adam = paddle.optimizer.Adam( - learning_rate=0.01, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(learning_rate=0.01, + parameters=linear.parameters()) out = linear(a) out.backward() adam.step() @@ -672,8 +685,8 @@ class TestAdamOpV2(unittest.TestCase): with self.assertRaises(TypeError): learning_rate = np.array([0.01]).astype("float32") learning_rate = paddle.to_tensor(learning_rate) - adam = paddle.optimizer.Adam( - learning_rate=learning_rate, parameters=emb.parameters()) + adam = paddle.optimizer.Adam(learning_rate=learning_rate, + parameters=emb.parameters()) params = adam.get_opti_var_name_list() assert (params is not None) @@ -685,8 +698,9 @@ class TestAdamOpV2(unittest.TestCase): a = fluid.dygraph.to_variable(value) linear = fluid.Linear(13, 5, dtype="float32") clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) - adam = paddle.optimizer.Adam( - 0.1, parameters=linear.parameters(), grad_clip=clip) + adam = paddle.optimizer.Adam(0.1, + parameters=linear.parameters(), + grad_clip=clip) out = linear(a) out.backward() adam.step() @@ -703,8 +717,9 @@ class TestAdamOpV2(unittest.TestCase): cur_lr = adam.get_lr() assert (lr == cur_lr) with self.assertRaises(TypeError): - lr_var = paddle.fluid.layers.create_global_var( - shape=[1], value=lr, dtype='float32') + lr_var = paddle.fluid.layers.create_global_var(shape=[1], + value=lr, + dtype='float32') adam.set_lr(lr_var) paddle.enable_static() @@ -712,14 +727,17 @@ class TestAdamOpV2(unittest.TestCase): paddle.disable_static() linear = paddle.nn.Linear(10, 10) with self.assertRaises(ValueError): - adam = paddle.optimizer.Adam( - 0.1, beta1=-1, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(0.1, + beta1=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.Adam( - 0.1, beta2=-1, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(0.1, + beta2=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.Adam( - 0.1, epsilon=-1, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(0.1, + epsilon=-1, + parameters=linear.parameters()) paddle.enable_static() def test_adam_op_with_sparse_input_and_weight_decay(self): @@ -728,8 +746,9 @@ class TestAdamOpV2(unittest.TestCase): x_data = np.arange(0, 10).reshape((10, 1)).astype(np.int64) x = paddle.to_tensor(x_data, stop_gradient=False) emb = paddle.nn.Embedding(10, 10, sparse=True) - adam = paddle.optimizer.Adam( - 0.001, parameters=emb.parameters(), weight_decay=0.01) + adam = paddle.optimizer.Adam(0.001, + parameters=emb.parameters(), + weight_decay=0.01) with self.assertRaises(RuntimeError): out = emb(x) @@ -747,6 +766,7 @@ class TestAdamOpV2(unittest.TestCase): class TestAdamOptimizer(unittest.TestCase): + def _test(self, place, use_tensor=True, @@ -777,8 +797,9 @@ class TestAdamOptimizer(unittest.TestCase): with paddle.utils.unique_name.guard(): a = paddle.static.data(name="a", shape=[2, 2], dtype='float32') b = paddle.static.data(name="b", shape=[2, 2], dtype='float32') - label = paddle.static.data( - name="label", shape=[2, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[2, 1], + dtype='int64') sum = paddle.add(a, b) z = paddle.pow(sum, 2.0) @@ -824,12 +845,11 @@ class TestAdamOptimizer(unittest.TestCase): align_size=256, grad_clip=clip) else: - adam = paddle.optimizer.Adam( - learning_rate=0.01, - beta1=beta1, - beta2=beta2, - epsilon=epsilon, - grad_clip=clip) + adam = paddle.optimizer.Adam(learning_rate=0.01, + beta1=beta1, + beta2=beta2, + epsilon=epsilon, + grad_clip=clip) else: if use_fluid_api: adam = fluid.optimizer.Adam( @@ -842,12 +862,11 @@ class TestAdamOptimizer(unittest.TestCase): align_size=256, grad_clip=clip) else: - adam = fluid.optimizer.Adam( - learning_rate=0.01, - beta1=beta1_init, - beta2=beta2_init, - epsilon=epsilon_init, - grad_clip=clip) + adam = fluid.optimizer.Adam(learning_rate=0.01, + beta1=beta1_init, + beta2=beta2_init, + epsilon=epsilon_init, + grad_clip=clip) adam.minimize(loss) @@ -858,12 +877,13 @@ class TestAdamOptimizer(unittest.TestCase): print("Start run on {}".format(place)) for epoch in range(10): - pred_res, loss_res = exe.run( - main_prog, - feed={"a": a_np, - "b": b_np, - "label": label_np}, - fetch_list=[prediction, loss]) + pred_res, loss_res = exe.run(main_prog, + feed={ + "a": a_np, + "b": b_np, + "label": label_np + }, + fetch_list=[prediction, loss]) print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) paddle.disable_static() @@ -877,9 +897,10 @@ class TestAdamOptimizer(unittest.TestCase): for use_fluid_api in [True, False]: for use_global_beta_pow in [True, False]: for flatten_param_grads in [True, False]: - pred, loss = self._test( - place, use_tensor, use_fluid_api, - use_global_beta_pow, flatten_param_grads) + pred, loss = self._test(place, use_tensor, + use_fluid_api, + use_global_beta_pow, + flatten_param_grads) preds.append(pred) losses.append(loss) for pred in preds: @@ -913,8 +934,9 @@ class TestAdamOptimizer(unittest.TestCase): cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) - adam = fluid.optimizer.AdamOptimizer( - 0.01, flatten_param_grads=True, align_size=256) + adam = fluid.optimizer.AdamOptimizer(0.01, + flatten_param_grads=True, + align_size=256) adam.minimize(avg_cost) paddle.disable_static() @@ -937,14 +959,13 @@ class TestAdamOptimizer(unittest.TestCase): adam = fluid.optimizer.Adam(use_global_beta_pow=True) adam.minimize(loss) self.assertRaises(Exception, adam._get_global_accumulator, 'tmp') - adam._add_global_accumulator( - 'tmp', type=core.VarDesc.VarType.LOD_TENSOR) + adam._add_global_accumulator('tmp', + type=core.VarDesc.VarType.LOD_TENSOR) adam._get_global_accumulator('tmp') - self.assertRaises( - Exception, - adam._add_global_accumulator, - adam._beta1_pow_acc_str, - type=core.VarDesc.VarType.LOD_TENSOR) + self.assertRaises(Exception, + adam._add_global_accumulator, + adam._beta1_pow_acc_str, + type=core.VarDesc.VarType.LOD_TENSOR) paddle.disable_static() def test_adam_save_load(self): @@ -955,12 +976,12 @@ class TestAdamOptimizer(unittest.TestCase): state_dict = linear.state_dict() fluid.save_dygraph(state_dict, "paddle_dy") - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) - adam = paddle.fluid.optimizer.Adam( - learning_rate=scheduler, - parameter_list=linear.parameters(), - use_global_beta_pow=True) + scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, + warmup_steps=100, + verbose=True) + adam = paddle.fluid.optimizer.Adam(learning_rate=scheduler, + parameter_list=linear.parameters(), + use_global_beta_pow=True) adam.minimize(b) state_dict = adam.state_dict() fluid.save_dygraph(state_dict, "paddle_dy") @@ -981,8 +1002,9 @@ class TestAdamOptimizer(unittest.TestCase): state_dict = linear.state_dict() fluid.save_dygraph(state_dict, "paddle_dy") - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) + scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, + warmup_steps=100, + verbose=True) adam = paddle.fluid.optimizer.Adam( learning_rate=scheduler, parameter_list=linear.parameters(), @@ -1001,13 +1023,14 @@ class TestAdamOptimizer(unittest.TestCase): self.assertRaises(AssertionError, adam2.set_state_dict, opt_state_dict) adam3 = get_opt('float32', [10, 10]) # shape not match - opt_state_dict['beta1_pow_acc_0'] = np.array( - [0.9, 0.9], dtype='float32') + opt_state_dict['beta1_pow_acc_0'] = np.array([0.9, 0.9], + dtype='float32') self.assertRaises(AssertionError, adam3.set_state_dict, opt_state_dict) paddle.enable_static() class TestAdamOpV2Group(TestAdamOpV2): + def test_adam_op(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -1015,17 +1038,16 @@ class TestAdamOpV2Group(TestAdamOpV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adam( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'beta1': 0.1, - 'beta2': 0.99 - }], - weight_decay=0.1) + adam = paddle.optimizer.Adam(learning_rate=0.01, + parameters=[{ + 'params': linear_1.parameters() + }, { + 'params': linear_2.parameters(), + 'weight_decay': 0.001, + 'beta1': 0.1, + 'beta2': 0.99 + }], + weight_decay=0.1) out = linear_1(a) out = linear_2(out) out.backward() @@ -1034,6 +1056,7 @@ class TestAdamOpV2Group(TestAdamOpV2): class TestMultiTensorAdam(unittest.TestCase): + def _adam_optimize_dygraph(self, place, use_param_attr=False, @@ -1056,20 +1079,22 @@ class TestMultiTensorAdam(unittest.TestCase): model = paddle.nn.Linear(5, 5) if not use_param_group: - optimizer = paddle.optimizer.Adam( - parameters=model.parameters(), - use_multi_tensor=use_multi_tensor, - multi_precision=use_amp) + optimizer = paddle.optimizer.Adam(parameters=model.parameters(), + use_multi_tensor=use_multi_tensor, + multi_precision=use_amp) else: - optimizer = paddle.optimizer.Adam( - parameters=[{ - 'params': model.parameters(), - 'weight_decay': 0.001, - 'beta1': 0.1, - 'beta2': 0.99 - }], - use_multi_tensor=use_multi_tensor, - multi_precision=use_amp) + optimizer = paddle.optimizer.Adam(parameters=[{ + 'params': + model.parameters(), + 'weight_decay': + 0.001, + 'beta1': + 0.1, + 'beta2': + 0.99 + }], + use_multi_tensor=use_multi_tensor, + multi_precision=use_amp) for idx in range(2): if place == 'gpu' and use_amp == True: @@ -1105,8 +1130,8 @@ class TestMultiTensorAdam(unittest.TestCase): exe = paddle.static.Executor(place=place) train_program = paddle.static.Program() startup_program = paddle.static.Program() - optimizer = paddle.optimizer.Adam( - multi_precision=use_amp, use_multi_tensor=use_multi_tensor) + optimizer = paddle.optimizer.Adam(multi_precision=use_amp, + use_multi_tensor=use_multi_tensor) if use_amp: optimizer = paddle.static.amp.decorate( optimizer, @@ -1116,11 +1141,13 @@ class TestMultiTensorAdam(unittest.TestCase): use_fp16_guard=False) with paddle.static.program_guard(train_program, startup_program): if use_amp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float16') else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float32') hidden = paddle.static.nn.fc(x=data, size=10) loss = paddle.fluid.layers.mean(hidden) optimizer.minimize(loss) @@ -1151,59 +1178,54 @@ class TestMultiTensorAdam(unittest.TestCase): output_dygraph2, params_dygraph2 = self._adam_optimize_dygraph( place=place, use_amp=use_amp, use_multi_tensor=False) self.assertEqual( - np.allclose( - output_dygraph1, output_dygraph2, rtol=1e-05), True) + np.allclose(output_dygraph1, output_dygraph2, rtol=1e-05), True) for idx in range(len(params_dygraph1)): self.assertEqual( - np.allclose( - params_dygraph1[idx], params_dygraph2[idx], rtol=1e-05), - True) + np.allclose(params_dygraph1[idx], + params_dygraph2[idx], + rtol=1e-05), True) # test static mode - output_static1 = self._adam_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=True) - output_static2 = self._adam_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=False) + output_static1 = self._adam_optimize_static(place=place, + use_amp=use_amp, + use_multi_tensor=True) + output_static2 = self._adam_optimize_static(place=place, + use_amp=use_amp, + use_multi_tensor=False) for idx in range(len(output_static1)): self.assertEqual( - np.allclose( - output_static1[idx], output_static2[idx], rtol=1e-05), - True) + np.allclose(output_static1[idx], + output_static2[idx], + rtol=1e-05), True) def _check_with_param_arrt(self, place, use_amp): - output1, params1 = self._adam_optimize_dygraph( - place=place, - use_amp=use_amp, - use_param_attr=True, - use_multi_tensor=True) - output2, params2 = self._adam_optimize_dygraph( - place=place, - use_amp=use_amp, - use_param_attr=True, - use_multi_tensor=False) + output1, params1 = self._adam_optimize_dygraph(place=place, + use_amp=use_amp, + use_param_attr=True, + use_multi_tensor=True) + output2, params2 = self._adam_optimize_dygraph(place=place, + use_amp=use_amp, + use_param_attr=True, + use_multi_tensor=False) self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def _check_with_param_group(self, place, use_amp): - output1, params1 = self._adam_optimize_dygraph( - place=place, - use_amp=use_amp, - use_param_group=True, - use_multi_tensor=True) - output2, params2 = self._adam_optimize_dygraph( - place=place, - use_amp=use_amp, - use_param_group=True, - use_multi_tensor=False) + output1, params1 = self._adam_optimize_dygraph(place=place, + use_amp=use_amp, + use_param_group=True, + use_multi_tensor=True) + output2, params2 = self._adam_optimize_dygraph(place=place, + use_amp=use_amp, + use_param_group=True, + use_multi_tensor=False) self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def test_main(self): for place in self._get_places(): diff --git a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py index 5ad83179e3c..cc57293a7fa 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py +++ b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py @@ -39,8 +39,8 @@ def main_test_func(place, dtype): adam_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = fluid.io.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = fluid.io.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -49,6 +49,7 @@ def main_test_func(place, dtype): class AdamFp32Test(unittest.TestCase): + def setUp(self): self.dtype = 'float32' @@ -58,6 +59,7 @@ class AdamFp32Test(unittest.TestCase): class AdamFp64Test(AdamFp32Test): + def setUp(self): self.dtype = 'float64' diff --git a/python/paddle/fluid/tests/unittests/test_adamax_api.py b/python/paddle/fluid/tests/unittests/test_adamax_api.py index 1698ac90a9f..dc8f1f969e7 100644 --- a/python/paddle/fluid/tests/unittests/test_adamax_api.py +++ b/python/paddle/fluid/tests/unittests/test_adamax_api.py @@ -23,15 +23,15 @@ from paddle.fluid.framework import _test_eager_guard class TestAdamaxAPI(unittest.TestCase): + def func_adamax_api_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.Adamax( - learning_rate=0.01, - parameters=linear.parameters(), - weight_decay=0.01) + adam = paddle.optimizer.Adamax(learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=0.01) out = linear(a) out.backward() adam.step() @@ -56,12 +56,11 @@ class TestAdamaxAPI(unittest.TestCase): loss = paddle.mean(conv) beta1 = 0.85 beta2 = 0.95 - opt = paddle.optimizer.Adamax( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8) + opt = paddle.optimizer.Adamax(learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) opt.minimize(loss) exe.run(startup) @@ -76,6 +75,7 @@ class TestAdamaxAPI(unittest.TestCase): class TestAdamaxAPIGroup(TestAdamaxAPI): + def func_adamax_api_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -83,17 +83,21 @@ class TestAdamaxAPIGroup(TestAdamaxAPI): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adamax( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'beta1': 0.1, - 'beta2': 0.99 - }], - weight_decay=0.1) + adam = paddle.optimizer.Adamax(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001, + 'beta1': + 0.1, + 'beta2': + 0.99 + }], + weight_decay=0.1) out = linear_1(a) out = linear_2(out) out.backward() diff --git a/python/paddle/fluid/tests/unittests/test_adamax_op.py b/python/paddle/fluid/tests/unittests/test_adamax_op.py index 8ce7656acfa..3c8be0529d1 100644 --- a/python/paddle/fluid/tests/unittests/test_adamax_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamax_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestAdamaxOp1(OpTest): + def setUp(self): '''Test Adamax Operator with supplied attributes ''' @@ -47,8 +48,8 @@ class TestAdamaxOp1(OpTest): self.attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon} - param_out, moment_out, inf_norm_out = adamax_step(self.inputs, - self.attrs) + param_out, moment_out, inf_norm_out = adamax_step( + self.inputs, self.attrs) self.outputs = { 'ParamOut': param_out, @@ -101,6 +102,7 @@ class TestAdamaxOp2(OpTest): class TestAdamaxOpMultipleSteps(OpTest): + def setUp(self): '''Test Adamax Operator with supplied attributes ''' @@ -132,8 +134,8 @@ class TestAdamaxOpMultipleSteps(OpTest): def test_check_output(self): for _ in range(self.num_steps): - param_out, moment_out, inf_norm_out = adamax_step(self.inputs, - self.attrs) + param_out, moment_out, inf_norm_out = adamax_step( + self.inputs, self.attrs) self.outputs = { 'ParamOut': param_out, @@ -185,19 +187,23 @@ def adamax_step(inputs, attributes): class TestAdamaxOpV2(unittest.TestCase): + def test_adamax_op_invalid_input(self): import paddle paddle.disable_static() linear = paddle.nn.Linear(10, 10) with self.assertRaises(ValueError): - adam = paddle.optimizer.Adamax( - 0.1, beta1=-1, parameters=linear.parameters()) + adam = paddle.optimizer.Adamax(0.1, + beta1=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.Adamax( - 0.1, beta2=-1, parameters=linear.parameters()) + adam = paddle.optimizer.Adamax(0.1, + beta2=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.Adamax( - 0.1, epsilon=-1, parameters=linear.parameters()) + adam = paddle.optimizer.Adamax(0.1, + epsilon=-1, + parameters=linear.parameters()) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index 225bd35a8ec..2ece3d2d8dd 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -60,6 +60,7 @@ def adamw_step(inputs, attributes): class TestAdamW(OpTest): + def setUp(self): '''Test AdamW Op with supplied attributes ''' @@ -113,6 +114,7 @@ class TestAdamW(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestAdamW2(OpTest): + def setUp(self): '''Test AdamW Op with supplied attributes ''' @@ -149,8 +151,8 @@ class TestAdamW2(OpTest): "with_decay": True } - param_out, moment1_out, moment2_out = adamw_step(self.inputs, - self.attrs) + param_out, moment1_out, moment2_out = adamw_step( + self.inputs, self.attrs) self.outputs = { 'Moment1Out': moment1_out, @@ -165,16 +167,16 @@ class TestAdamW2(OpTest): class TestAdamWOp(unittest.TestCase): + def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.AdamW( - learning_rate=0.01, - parameters=linear.parameters(), - apply_decay_param_fun=lambda name: True, - weight_decay=0.01) + adam = paddle.optimizer.AdamW(learning_rate=0.01, + parameters=linear.parameters(), + apply_decay_param_fun=lambda name: True, + weight_decay=0.01) for _ in range(2): out = linear(a) @@ -187,11 +189,10 @@ class TestAdamWOp(unittest.TestCase): value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.AdamW( - learning_rate=0.0, - parameters=linear.parameters(), - apply_decay_param_fun=lambda name: True, - weight_decay=0.01) + adam = paddle.optimizer.AdamW(learning_rate=0.0, + parameters=linear.parameters(), + apply_decay_param_fun=lambda name: True, + weight_decay=0.01) assert (adam.__str__() is not None) def test_adamw_op(self): @@ -207,17 +208,20 @@ class TestAdamWOp(unittest.TestCase): conv = fluid.layers.conv2d(data, 8, 3) loss = paddle.mean(conv) - beta1 = fluid.layers.create_global_var( - shape=[1], value=0.85, dtype='float32', persistable=True) - beta2 = fluid.layers.create_global_var( - shape=[1], value=0.95, dtype='float32', persistable=True) + beta1 = fluid.layers.create_global_var(shape=[1], + value=0.85, + dtype='float32', + persistable=True) + beta2 = fluid.layers.create_global_var(shape=[1], + value=0.95, + dtype='float32', + persistable=True) betas = [beta1, beta2] - opt = paddle.optimizer.AdamW( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8) + opt = paddle.optimizer.AdamW(learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) opt.minimize(loss) exe.run(startup) @@ -230,14 +234,17 @@ class TestAdamWOp(unittest.TestCase): paddle.disable_static() linear = paddle.nn.Linear(10, 10) with self.assertRaises(ValueError): - adam = paddle.optimizer.AdamW( - 0.1, beta1=-1, parameters=linear.parameters()) + adam = paddle.optimizer.AdamW(0.1, + beta1=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.AdamW( - 0.1, beta2=-1, parameters=linear.parameters()) + adam = paddle.optimizer.AdamW(0.1, + beta2=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.AdamW( - 0.1, epsilon=-1, parameters=linear.parameters()) + adam = paddle.optimizer.AdamW(0.1, + epsilon=-1, + parameters=linear.parameters()) def test_api_eager_dygraph(self): with _test_eager_guard(): @@ -246,22 +253,25 @@ class TestAdamWOp(unittest.TestCase): class TestAdamWOpGroup(TestAdamWOp): + def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) - adam = paddle.optimizer.AdamW( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001 - }], - apply_decay_param_fun=lambda name: True, - weight_decay=0.01) + adam = paddle.optimizer.AdamW(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001 + }], + apply_decay_param_fun=lambda name: True, + weight_decay=0.01) for _ in range(2): out = linear_1(a) @@ -272,6 +282,7 @@ class TestAdamWOpGroup(TestAdamWOp): class TestAdamWOpMultiPrecison(unittest.TestCase): + def _test_adamw_op_dygraph_place_amp(self, place, use_amp=False): paddle.disable_static() paddle.seed(10) @@ -281,14 +292,17 @@ class TestAdamWOpMultiPrecison(unittest.TestCase): model = paddle.nn.Linear(5, 5) - optimizer = paddle.optimizer.AdamW( - parameters=[{ - 'params': model.parameters(), - 'weight_decay': 0.001, - 'beta1': 0.1, - 'beta2': 0.99 - }], - multi_precision=use_amp) + optimizer = paddle.optimizer.AdamW(parameters=[{ + 'params': + model.parameters(), + 'weight_decay': + 0.001, + 'beta1': + 0.1, + 'beta2': + 0.99 + }], + multi_precision=use_amp) for idx in range(2): if place == 'gpu' and use_amp == True: @@ -324,19 +338,19 @@ class TestAdamWOpMultiPrecison(unittest.TestCase): class TestAdamWOpError(unittest.TestCase): + def test_api_errors(self): + def test_weight_decay_dtype(): linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.AdamW( - learning_rate=0.01, - parameters=linear.parameters(), - weight_decay=1) + adam = paddle.optimizer.AdamW(learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=1) def test_parameters_dtype1(): - adam = paddle.optimizer.AdamW( - learning_rate=0.01, - parameters=paddle.randn((5, 5)), - weight_decay=0.1) + adam = paddle.optimizer.AdamW(learning_rate=0.01, + parameters=paddle.randn((5, 5)), + weight_decay=0.1) def test_parameters_dtype2(): linear = paddle.nn.Linear(13, 5) @@ -346,8 +360,9 @@ class TestAdamWOpError(unittest.TestCase): weight_decay=0.1) def test_parameters_dtype3(): - adam = paddle.optimizer.AdamW( - learning_rate=0.01, parameters=None, weight_decay=0.1) + adam = paddle.optimizer.AdamW(learning_rate=0.01, + parameters=None, + weight_decay=0.1) def test_parameters_dtype4(): linear = paddle.nn.Linear(13, 5) @@ -358,18 +373,16 @@ class TestAdamWOpError(unittest.TestCase): def test_learning_rate_dtype(): linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.AdamW( - learning_rate=1, - parameters=linear.parameters(), - weight_decay=0.1) + adam = paddle.optimizer.AdamW(learning_rate=1, + parameters=linear.parameters(), + weight_decay=0.1) def test_grad_clip_dtype(): linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.AdamW( - learning_rate=0.01, - parameters=linear.parameters(), - weight_decay=0.1, - grad_clip=0.1) + adam = paddle.optimizer.AdamW(learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=0.1, + grad_clip=0.1) self.assertRaises(TypeError, test_weight_decay_dtype) self.assertRaises(TypeError, test_parameters_dtype1) @@ -381,6 +394,7 @@ class TestAdamWOpError(unittest.TestCase): class TestAdamWOpGroupWithLR(TestAdamWOp): + def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -422,6 +436,7 @@ def simple_lr_setting(param, decay_rate, n_layers): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestAdamWOpLayerwiseLR(TestAdamWOp): + def setUp(self): random.seed(2022) np.random.seed(2022) @@ -460,16 +475,15 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): beta1 = 0.9 beta2 = 0.999 - opt = paddle.optimizer.AdamW( - learning_rate=learning_rate, - parameters=[{ - 'params': linear1.parameters() - }, { - 'params': linear2.parameters(), - }], - apply_decay_param_fun=lambda name: True, - weight_decay=weight_decay, - lr_ratio=simple_lr_fun) + opt = paddle.optimizer.AdamW(learning_rate=learning_rate, + parameters=[{ + 'params': linear1.parameters() + }, { + 'params': linear2.parameters(), + }], + apply_decay_param_fun=lambda name: True, + weight_decay=weight_decay, + lr_ratio=simple_lr_fun) def get_numpy_output(param, grad, moment1, moment2, lr_ratio, t): np_inputs = { @@ -490,8 +504,8 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): "coeff": weight_decay, "with_decay": True } - param_out, moment1_out, moment2_out = adamw_step(np_inputs, - np_attrs) + param_out, moment1_out, moment2_out = adamw_step( + np_inputs, np_attrs) return param_out, moment1_out, moment2_out for i in range(5): @@ -503,20 +517,16 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): out.backward() fc1_w, fc1_w_mon1, fc1_w_mon2 = get_numpy_output( - fc1_w, - np.array(linear1.weight.grad), fc1_w_mon1, fc1_w_mon2, + fc1_w, np.array(linear1.weight.grad), fc1_w_mon1, fc1_w_mon2, simple_lr_fun(linear1.weight), i + 1) fc1_b, fc1_b_mon1, fc1_b_mon2 = get_numpy_output( - fc1_b, - np.array(linear1.bias.grad), fc1_b_mon1, fc1_b_mon2, + fc1_b, np.array(linear1.bias.grad), fc1_b_mon1, fc1_b_mon2, simple_lr_fun(linear1.bias), i + 1) fc2_w, fc2_w_mon1, fc2_w_mon2 = get_numpy_output( - fc2_w, - np.array(linear2.weight.grad), fc2_w_mon1, fc2_w_mon2, + fc2_w, np.array(linear2.weight.grad), fc2_w_mon1, fc2_w_mon2, simple_lr_fun(linear2.weight), i + 1) fc2_b, fc2_b_mon1, fc2_b_mon2 = get_numpy_output( - fc2_b, - np.array(linear2.bias.grad), fc2_b_mon1, fc2_b_mon2, + fc2_b, np.array(linear2.bias.grad), fc2_b_mon1, fc2_b_mon2, simple_lr_fun(linear2.bias), i + 1) opt.step() @@ -552,10 +562,14 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): bias_attr2 = paddle.framework.ParamAttr( name="linear_1.b_0", initializer=paddle.nn.initializer.Constant(value=1.0)) - linear1 = paddle.nn.Linear( - 10, 32, weight_attr=weight_attr1, bias_attr=bias_attr1) - linear2 = paddle.nn.Linear( - 32, 1, weight_attr=weight_attr2, bias_attr=bias_attr2) + linear1 = paddle.nn.Linear(10, + 32, + weight_attr=weight_attr1, + bias_attr=bias_attr1) + linear2 = paddle.nn.Linear(32, + 1, + weight_attr=weight_attr2, + bias_attr=bias_attr2) out = linear1(x) out = linear2(out) @@ -572,16 +586,16 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): cost = fluid.layers.square_error_cost(input=out, label=y) avg_cost = fluid.layers.mean(cost) - simple_lr_fun = partial( - simple_lr_setting, decay_rate=0.8, n_layers=2) + simple_lr_fun = partial(simple_lr_setting, + decay_rate=0.8, + n_layers=2) - opt = paddle.optimizer.AdamW( - learning_rate=learning_rate, - beta1=beta1, - beta2=beta2, - weight_decay=weight_decay, - epsilon=epsilon, - lr_ratio=simple_lr_fun) + opt = paddle.optimizer.AdamW(learning_rate=learning_rate, + beta1=beta1, + beta2=beta2, + weight_decay=weight_decay, + epsilon=epsilon, + lr_ratio=simple_lr_fun) opt.minimize(avg_cost) def get_numpy_output(param, grad, moment1, moment2, lr_ratio, t): @@ -603,8 +617,8 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): "coeff": weight_decay, "with_decay": True } - param_out, moment1_out, moment2_out = adamw_step(np_inputs, - np_attrs) + param_out, moment1_out, moment2_out = adamw_step( + np_inputs, np_attrs) return param_out, moment1_out, moment2_out fetch_list1 = [ @@ -625,12 +639,16 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): outputs = np.random.random(size=[8, 1]).astype('float32') param = exe.run(test_prog, - feed={"x": inputs, - "y": outputs}, + feed={ + "x": inputs, + "y": outputs + }, fetch_list=fetch_list1) params_and_gras = exe.run(train_prog, - feed={"x": inputs, - "y": outputs}, + feed={ + "x": inputs, + "y": outputs + }, fetch_list=fetch_list2) fc1_w = param[0] diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py index 47658518551..204a16668ad 100644 --- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py @@ -47,8 +47,8 @@ def avg_pool1D_forward_naive(x, if adaptive: L_out = ksize[0] else: - L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 out = np.zeros((N, C, L_out)) @@ -64,15 +64,16 @@ def avg_pool1D_forward_naive(x, field_size = (r_end - r_start) \ if (exclusive or adaptive) else (ksize[0]) if data_type == np.int8 or data_type == np.uint8: - out[:, :, i] = (np.rint( - np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type) + out[:, :, i] = (np.rint(np.sum(x_masked, axis=(2, 3)) / + field_size)).astype(data_type) else: - out[:, :, i] = (np.sum(x_masked, axis=(2)) / - field_size).astype(data_type) + out[:, :, + i] = (np.sum(x_masked, axis=(2)) / field_size).astype(data_type) return out class TestPool1D_API(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -84,8 +85,11 @@ class TestPool1D_API(unittest.TestCase): input_np = np.random.random([2, 3, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) result = F.adaptive_avg_pool1d(input, output_size=16) - result_np = avg_pool1D_forward_naive( - input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) + result_np = avg_pool1D_forward_naive(input_np, + ksize=[16], + strides=[0], + paddings=[0], + adaptive=True) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -94,8 +98,9 @@ class TestPool1D_API(unittest.TestCase): result = ada_max_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) - result = paddle.nn.functional.common.interpolate( - input, mode="area", size=16) + result = paddle.nn.functional.common.interpolate(input, + mode="area", + size=16) self.assertTrue(np.allclose(result.numpy(), result_np)) def check_adaptive_avg_static_results(self, place): @@ -104,8 +109,11 @@ class TestPool1D_API(unittest.TestCase): result = F.adaptive_avg_pool1d(input, output_size=16) input_np = np.random.random([2, 3, 32]).astype("float32") - result_np = avg_pool1D_forward_naive( - input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) + result_np = avg_pool1D_forward_naive(input_np, + ksize=[16], + strides=[2], + paddings=[0], + adaptive=True) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py index 2b104041f94..2531834b217 100644 --- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py @@ -33,7 +33,9 @@ def adaptive_end_index(index, input_size, output_size): return int(np.ceil((index + 1) * input_size / output_size)) -def adaptive_pool2d_forward(x, output_size, data_format='NCHW', +def adaptive_pool2d_forward(x, + output_size, + data_format='NCHW', pool_type="avg"): N = x.shape[0] @@ -68,16 +70,16 @@ def adaptive_pool2d_forward(x, output_size, data_format='NCHW', if data_format == 'NCHW': x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] if pool_type == 'avg': - field_size = ( - (in_h_end - in_h_start) * (in_w_end - in_w_start)) + field_size = ((in_h_end - in_h_start) * + (in_w_end - in_w_start)) out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size elif pool_type == 'max': out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) elif data_format == 'NHWC': x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] if pool_type == 'avg': - field_size = ( - (in_h_end - in_h_start) * (in_w_end - in_w_start)) + field_size = ((in_h_end - in_h_start) * + (in_w_end - in_w_start)) out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size elif pool_type == 'max': out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) @@ -85,25 +87,29 @@ def adaptive_pool2d_forward(x, output_size, data_format='NCHW', class TestAdaptiveAvgPool2DAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[3, 3], pool_type="avg") + self.res_1_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[3, 3], + pool_type="avg") - self.res_2_np = adaptive_pool2d_forward( - x=self.x_np, output_size=5, pool_type="avg") + self.res_2_np = adaptive_pool2d_forward(x=self.x_np, + output_size=5, + pool_type="avg") - self.res_3_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[2, 5], pool_type="avg") + self.res_3_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[2, 5], + pool_type="avg") - self.res_4_np = adaptive_pool2d_forward( - x=self.x_np, - output_size=[3, 3], - pool_type="avg", - data_format="NHWC") + self.res_4_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[3, 3], + pool_type="avg", + data_format="NHWC") - self.res_5_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[None, 3], pool_type="avg") + self.res_5_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[None, 3], + pool_type="avg") def test_static_graph(self): for use_cuda in ([False, True] @@ -112,25 +118,26 @@ class TestAdaptiveAvgPool2DAPI(unittest.TestCase): paddle.enable_static() x = paddle.fluid.data(name="x", shape=[2, 3, 7, 7], dtype="float32") - out_1 = paddle.nn.functional.adaptive_avg_pool2d( - x=x, output_size=[3, 3]) + out_1 = paddle.nn.functional.adaptive_avg_pool2d(x=x, + output_size=[3, 3]) out_2 = paddle.nn.functional.adaptive_avg_pool2d(x=x, output_size=5) - out_3 = paddle.nn.functional.adaptive_avg_pool2d( - x=x, output_size=[2, 5]) + out_3 = paddle.nn.functional.adaptive_avg_pool2d(x=x, + output_size=[2, 5]) - out_4 = paddle.nn.functional.adaptive_avg_pool2d( - x=x, output_size=[3, 3], data_format="NHWC") + out_4 = paddle.nn.functional.adaptive_avg_pool2d(x=x, + output_size=[3, 3], + data_format="NHWC") out_5 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[None, 3]) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_4, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_4, out_5]) + [res_1, res_2, res_3, res_4, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -149,22 +156,24 @@ class TestAdaptiveAvgPool2DAPI(unittest.TestCase): paddle.disable_static(place=place) x = paddle.to_tensor(self.x_np) - out_1 = paddle.nn.functional.adaptive_avg_pool2d( - x=x, output_size=[3, 3]) + out_1 = paddle.nn.functional.adaptive_avg_pool2d(x=x, + output_size=[3, 3]) out_2 = paddle.nn.functional.adaptive_avg_pool2d(x=x, output_size=5) - out_3 = paddle.nn.functional.adaptive_avg_pool2d( - x=x, output_size=[2, 5]) + out_3 = paddle.nn.functional.adaptive_avg_pool2d(x=x, + output_size=[2, 5]) - out_4 = paddle.nn.functional.adaptive_avg_pool2d( - x=x, output_size=[3, 3], data_format="NHWC") + out_4 = paddle.nn.functional.adaptive_avg_pool2d(x=x, + output_size=[3, 3], + data_format="NHWC") out_5 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[None, 3]) - out_6 = paddle.nn.functional.interpolate( - x=x, mode="area", size=[2, 5]) + out_6 = paddle.nn.functional.interpolate(x=x, + mode="area", + size=[2, 5]) assert np.allclose(out_1.numpy(), self.res_1_np) @@ -180,25 +189,29 @@ class TestAdaptiveAvgPool2DAPI(unittest.TestCase): class TestAdaptiveAvgPool2DClassAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[3, 3], pool_type="avg") + self.res_1_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[3, 3], + pool_type="avg") - self.res_2_np = adaptive_pool2d_forward( - x=self.x_np, output_size=5, pool_type="avg") + self.res_2_np = adaptive_pool2d_forward(x=self.x_np, + output_size=5, + pool_type="avg") - self.res_3_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[2, 5], pool_type="avg") + self.res_3_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[2, 5], + pool_type="avg") - self.res_4_np = adaptive_pool2d_forward( - x=self.x_np, - output_size=[3, 3], - pool_type="avg", - data_format="NHWC") + self.res_4_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[3, 3], + pool_type="avg", + data_format="NHWC") - self.res_5_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[None, 3], pool_type="avg") + self.res_5_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[None, 3], + pool_type="avg") def test_static_graph(self): for use_cuda in ([False, True] @@ -216,8 +229,8 @@ class TestAdaptiveAvgPool2DClassAPI(unittest.TestCase): adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=[2, 5]) out_3 = adaptive_avg_pool(x=x) - adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D( - output_size=[3, 3], data_format="NHWC") + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=[3, 3], + data_format="NHWC") out_4 = adaptive_avg_pool(x=x) adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D( @@ -225,10 +238,10 @@ class TestAdaptiveAvgPool2DClassAPI(unittest.TestCase): out_5 = adaptive_avg_pool(x=x) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_4, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_4, out_5]) + [res_1, res_2, res_3, res_4, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -256,8 +269,8 @@ class TestAdaptiveAvgPool2DClassAPI(unittest.TestCase): adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=[2, 5]) out_3 = adaptive_avg_pool(x=x) - adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D( - output_size=[3, 3], data_format="NHWC") + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=[3, 3], + data_format="NHWC") out_4 = adaptive_avg_pool(x=x) adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D( diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py index deb45da8a01..98258b3558f 100755 --- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py @@ -76,19 +76,19 @@ def adaptive_pool3d_forward(x, w_end = adaptive_end_index(j, W, output_size[2]) if data_format == 'NCDHW': - x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start: - w_end] + x_masked = x[:, :, d_start:d_end, h_start:h_end, + w_start:w_end] if pool_type == 'avg': field_size = (d_end - d_start) * (h_end - h_start) * ( w_end - w_start) - out[:, :, k, i, j] = np.sum(x_masked, - axis=(2, 3, 4)) / field_size + out[:, :, k, i, + j] = np.sum(x_masked, axis=(2, 3, 4)) / field_size elif pool_type == 'max': out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) elif data_format == 'NDHWC': - x_masked = x[:, d_start:d_end, h_start:h_end, w_start: - w_end, :] + x_masked = x[:, d_start:d_end, h_start:h_end, + w_start:w_end, :] if pool_type == 'avg': field_size = (d_end - d_start) * (h_end - h_start) * ( w_end - w_start) @@ -100,33 +100,38 @@ def adaptive_pool3d_forward(x, class TestAdaptiveAvgPool3DAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[3, 3, 3], pool_type="avg") + self.res_1_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="avg") - self.res_2_np = adaptive_pool3d_forward( - x=self.x_np, output_size=5, pool_type="avg") + self.res_2_np = adaptive_pool3d_forward(x=self.x_np, + output_size=5, + pool_type="avg") - self.res_3_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[2, 3, 5], pool_type="avg") + self.res_3_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[2, 3, 5], + pool_type="avg") - self.res_4_np = adaptive_pool3d_forward( - x=self.x_np, - output_size=[3, 3, 3], - pool_type="avg", - data_format="NDHWC") + self.res_4_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="avg", + data_format="NDHWC") - self.res_5_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[None, 3, None], pool_type="avg") + self.res_5_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[None, 3, None], + pool_type="avg") def test_static_graph(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x = paddle.fluid.data( - name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + x = paddle.fluid.data(name="x", + shape=[2, 3, 5, 7, 7], + dtype="float32") out_1 = paddle.nn.functional.adaptive_avg_pool3d( x=x, output_size=[3, 3, 3]) @@ -143,10 +148,10 @@ class TestAdaptiveAvgPool3DAPI(unittest.TestCase): x=x, output_size=[None, 3, None]) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_4, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_4, out_5]) + [res_1, res_2, res_3, res_4, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -179,8 +184,9 @@ class TestAdaptiveAvgPool3DAPI(unittest.TestCase): out_5 = paddle.nn.functional.adaptive_avg_pool3d( x=x, output_size=[None, 3, None]) - out_6 = paddle.nn.functional.interpolate( - x=x, mode="area", size=[2, 3, 5]) + out_6 = paddle.nn.functional.interpolate(x=x, + mode="area", + size=[2, 3, 5]) assert np.allclose(out_1.numpy(), self.res_1_np) @@ -196,33 +202,38 @@ class TestAdaptiveAvgPool3DAPI(unittest.TestCase): class TestAdaptiveAvgPool3DClassAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[3, 3, 3], pool_type="avg") + self.res_1_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="avg") - self.res_2_np = adaptive_pool3d_forward( - x=self.x_np, output_size=5, pool_type="avg") + self.res_2_np = adaptive_pool3d_forward(x=self.x_np, + output_size=5, + pool_type="avg") - self.res_3_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[2, 3, 5], pool_type="avg") + self.res_3_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[2, 3, 5], + pool_type="avg") - self.res_4_np = adaptive_pool3d_forward( - x=self.x_np, - output_size=[3, 3, 3], - pool_type="avg", - data_format="NDHWC") + self.res_4_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="avg", + data_format="NDHWC") - self.res_5_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[None, 3, None], pool_type="avg") + self.res_5_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[None, 3, None], + pool_type="avg") def test_static_graph(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x = paddle.fluid.data( - name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + x = paddle.fluid.data(name="x", + shape=[2, 3, 5, 7, 7], + dtype="float32") adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D( output_size=[3, 3, 3]) @@ -244,10 +255,10 @@ class TestAdaptiveAvgPool3DClassAPI(unittest.TestCase): out_5 = adaptive_avg_pool(x=x) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_4, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_4, out_5]) + [res_1, res_2, res_3, res_4, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) assert np.allclose(res_1, self.res_1_np) diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py index 2a0415722be..db577ec5378 100644 --- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py @@ -45,8 +45,8 @@ def max_pool1D_forward_naive(x, if adaptive: L_out = ksize[0] else: - L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 out = np.zeros((N, C, L_out)) @@ -64,6 +64,7 @@ def max_pool1D_forward_naive(x, class TestPool1D_API(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -76,8 +77,11 @@ class TestPool1D_API(unittest.TestCase): input = fluid.dygraph.to_variable(input_np) result = F.adaptive_max_pool1d(input, output_size=16) - result_np = max_pool1D_forward_naive( - input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) + result_np = max_pool1D_forward_naive(input_np, + ksize=[16], + strides=[0], + paddings=[0], + adaptive=True) self.assertTrue(np.allclose(result.numpy(), result_np)) ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1D( @@ -91,8 +95,11 @@ class TestPool1D_API(unittest.TestCase): result = F.adaptive_max_pool1d(input, output_size=16) input_np = np.random.random([2, 3, 32]).astype("float32") - result_np = max_pool1D_forward_naive( - input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) + result_np = max_pool1D_forward_naive(input_np, + ksize=[16], + strides=[2], + paddings=[0], + adaptive=True) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -107,14 +114,14 @@ class TestPool1D_API(unittest.TestCase): class TestOutDtype(unittest.TestCase): + def test_max_pool(self): api_fn = F.adaptive_max_pool1d shape = [1, 3, 32] - check_out_dtype( - api_fn, - in_specs=[(shape, )], - expect_dtypes=['float32', 'float64'], - output_size=16) + check_out_dtype(api_fn, + in_specs=[(shape, )], + expect_dtypes=['float32', 'float64'], + output_size=16) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py index 037475e1669..f92b47a8d6d 100644 --- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py @@ -34,7 +34,9 @@ def adaptive_end_index(index, input_size, output_size): return int(np.ceil((index + 1) * input_size / output_size)) -def adaptive_pool2d_forward(x, output_size, data_format='NCHW', +def adaptive_pool2d_forward(x, + output_size, + data_format='NCHW', pool_type="max"): N = x.shape[0] @@ -69,16 +71,16 @@ def adaptive_pool2d_forward(x, output_size, data_format='NCHW', if data_format == 'NCHW': x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] if pool_type == 'avg': - field_size = ( - (in_h_end - in_h_start) * (in_w_end - in_w_start)) + field_size = ((in_h_end - in_h_start) * + (in_w_end - in_w_start)) out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size elif pool_type == 'max': out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) elif data_format == 'NHWC': x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] if pool_type == 'avg': - field_size = ( - (in_h_end - in_h_start) * (in_w_end - in_w_start)) + field_size = ((in_h_end - in_h_start) * + (in_w_end - in_w_start)) out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size elif pool_type == 'max': out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) @@ -86,16 +88,20 @@ def adaptive_pool2d_forward(x, output_size, data_format='NCHW', class TestAdaptiveMaxPool2DAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[3, 3], pool_type="max") + self.res_1_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[3, 3], + pool_type="max") - self.res_2_np = adaptive_pool2d_forward( - x=self.x_np, output_size=5, pool_type="max") + self.res_2_np = adaptive_pool2d_forward(x=self.x_np, + output_size=5, + pool_type="max") - self.res_3_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[2, 5], pool_type="max") + self.res_3_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[2, 5], + pool_type="max") """ self.res_4_np = adaptive_pool2d_forward( x=self.x_np, @@ -103,8 +109,9 @@ class TestAdaptiveMaxPool2DAPI(unittest.TestCase): pool_type="max", data_format="NHWC") """ - self.res_5_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[None, 3], pool_type="max") + self.res_5_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[None, 3], + pool_type="max") def test_static_graph(self): for use_cuda in ([False, True] @@ -113,13 +120,13 @@ class TestAdaptiveMaxPool2DAPI(unittest.TestCase): paddle.enable_static() x = paddle.fluid.data(name="x", shape=[2, 3, 7, 7], dtype="float32") - out_1 = paddle.nn.functional.adaptive_max_pool2d( - x=x, output_size=[3, 3]) + out_1 = paddle.nn.functional.adaptive_max_pool2d(x=x, + output_size=[3, 3]) out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5) - out_3 = paddle.nn.functional.adaptive_max_pool2d( - x=x, output_size=[2, 5]) + out_3 = paddle.nn.functional.adaptive_max_pool2d(x=x, + output_size=[2, 5]) #out_4 = paddle.nn.functional.adaptive_max_pool2d( # x=x, output_size=[3, 3], data_format="NHWC") @@ -128,10 +135,10 @@ class TestAdaptiveMaxPool2DAPI(unittest.TestCase): x=x, output_size=[None, 3]) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_5]) + [res_1, res_2, res_3, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -150,13 +157,14 @@ class TestAdaptiveMaxPool2DAPI(unittest.TestCase): paddle.disable_static(place=place) x = paddle.to_tensor(self.x_np) - out_1 = paddle.nn.functional.adaptive_max_pool2d( - x=x, return_mask=False, output_size=[3, 3]) + out_1 = paddle.nn.functional.adaptive_max_pool2d(x=x, + return_mask=False, + output_size=[3, 3]) out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5) - out_3 = paddle.nn.functional.adaptive_max_pool2d( - x=x, output_size=[2, 5]) + out_3 = paddle.nn.functional.adaptive_max_pool2d(x=x, + output_size=[2, 5]) #out_4 = paddle.nn.functional.adaptive_max_pool2d( # x=x, output_size=[3, 3], data_format="NHWC") @@ -176,16 +184,20 @@ class TestAdaptiveMaxPool2DAPI(unittest.TestCase): class TestAdaptiveMaxPool2DClassAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[3, 3], pool_type="max") + self.res_1_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[3, 3], + pool_type="max") - self.res_2_np = adaptive_pool2d_forward( - x=self.x_np, output_size=5, pool_type="max") + self.res_2_np = adaptive_pool2d_forward(x=self.x_np, + output_size=5, + pool_type="max") - self.res_3_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[2, 5], pool_type="max") + self.res_3_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[2, 5], + pool_type="max") #self.res_4_np = adaptive_pool2d_forward( # x=self.x_np, @@ -193,8 +205,9 @@ class TestAdaptiveMaxPool2DClassAPI(unittest.TestCase): # pool_type="max", # data_format="NHWC") - self.res_5_np = adaptive_pool2d_forward( - x=self.x_np, output_size=[None, 3], pool_type="max") + self.res_5_np = adaptive_pool2d_forward(x=self.x_np, + output_size=[None, 3], + pool_type="max") def test_static_graph(self): for use_cuda in ([False, True] @@ -221,10 +234,10 @@ class TestAdaptiveMaxPool2DClassAPI(unittest.TestCase): out_5 = adaptive_max_pool(x=x) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_5]) + [res_1, res_2, res_3, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -272,14 +285,14 @@ class TestAdaptiveMaxPool2DClassAPI(unittest.TestCase): class TestOutDtype(unittest.TestCase): + def test_max_pool(self): api_fn = F.adaptive_max_pool2d shape = [1, 3, 32, 32] - check_out_dtype( - api_fn, - in_specs=[(shape, )], - expect_dtypes=['float32', 'float64'], - output_size=16) + check_out_dtype(api_fn, + in_specs=[(shape, )], + expect_dtypes=['float32', 'float64'], + output_size=16) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py index 2a8fe51ae7f..0f4a89c4761 100755 --- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py @@ -77,19 +77,19 @@ def adaptive_pool3d_forward(x, w_end = adaptive_end_index(j, W, output_size[2]) if data_format == 'NCDHW': - x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start: - w_end] + x_masked = x[:, :, d_start:d_end, h_start:h_end, + w_start:w_end] if pool_type == 'avg': field_size = (d_end - d_start) * (h_end - h_start) * ( w_end - w_start) - out[:, :, k, i, j] = np.sum(x_masked, - axis=(2, 3, 4)) / field_size + out[:, :, k, i, + j] = np.sum(x_masked, axis=(2, 3, 4)) / field_size elif pool_type == 'max': out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) elif data_format == 'NDHWC': - x_masked = x[:, d_start:d_end, h_start:h_end, w_start: - w_end, :] + x_masked = x[:, d_start:d_end, h_start:h_end, + w_start:w_end, :] if pool_type == 'avg': field_size = (d_end - d_start) * (h_end - h_start) * ( w_end - w_start) @@ -101,33 +101,38 @@ def adaptive_pool3d_forward(x, class TestAdaptiveMaxPool3DAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[3, 3, 3], pool_type="max") + self.res_1_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="max") - self.res_2_np = adaptive_pool3d_forward( - x=self.x_np, output_size=5, pool_type="max") + self.res_2_np = adaptive_pool3d_forward(x=self.x_np, + output_size=5, + pool_type="max") - self.res_3_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[2, 3, 5], pool_type="max") + self.res_3_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[2, 3, 5], + pool_type="max") - self.res_4_np = adaptive_pool3d_forward( - x=self.x_np, - output_size=[3, 3, 3], - pool_type="max", - data_format="NDHWC") + self.res_4_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="max", + data_format="NDHWC") - self.res_5_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[None, 3, None], pool_type="max") + self.res_5_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[None, 3, None], + pool_type="max") def test_static_graph(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x = paddle.fluid.data( - name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + x = paddle.fluid.data(name="x", + shape=[2, 3, 5, 7, 7], + dtype="float32") out_1 = paddle.nn.functional.adaptive_max_pool3d( x=x, output_size=[3, 3, 3]) @@ -144,10 +149,10 @@ class TestAdaptiveMaxPool3DAPI(unittest.TestCase): x=x, output_size=[None, 3, None]) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_5]) + [res_1, res_2, res_3, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -192,16 +197,20 @@ class TestAdaptiveMaxPool3DAPI(unittest.TestCase): class TestAdaptiveMaxPool3DClassAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") - self.res_1_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[3, 3, 3], pool_type="max") + self.res_1_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[3, 3, 3], + pool_type="max") - self.res_2_np = adaptive_pool3d_forward( - x=self.x_np, output_size=5, pool_type="max") + self.res_2_np = adaptive_pool3d_forward(x=self.x_np, + output_size=5, + pool_type="max") - self.res_3_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[2, 3, 5], pool_type="max") + self.res_3_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[2, 3, 5], + pool_type="max") # self.res_4_np = adaptive_pool3d_forward( # x=self.x_np, @@ -209,16 +218,18 @@ class TestAdaptiveMaxPool3DClassAPI(unittest.TestCase): # pool_type="max", # data_format="NDHWC") - self.res_5_np = adaptive_pool3d_forward( - x=self.x_np, output_size=[None, 3, None], pool_type="max") + self.res_5_np = adaptive_pool3d_forward(x=self.x_np, + output_size=[None, 3, None], + pool_type="max") def test_static_graph(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x = paddle.fluid.data( - name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + x = paddle.fluid.data(name="x", + shape=[2, 3, 5, 7, 7], + dtype="float32") adaptive_max_pool = paddle.nn.AdaptiveMaxPool3D( output_size=[3, 3, 3]) @@ -240,10 +251,10 @@ class TestAdaptiveMaxPool3DClassAPI(unittest.TestCase): out_5 = adaptive_max_pool(x=x) exe = paddle.static.Executor(place=place) - [res_1, res_2, res_3, res_5] = exe.run( - fluid.default_main_program(), - feed={"x": self.x_np}, - fetch_list=[out_1, out_2, out_3, out_5]) + [res_1, res_2, res_3, + res_5] = exe.run(fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) assert np.allclose(res_1, self.res_1_np) @@ -293,14 +304,14 @@ class TestAdaptiveMaxPool3DClassAPI(unittest.TestCase): class TestOutDtype(unittest.TestCase): + def test_max_pool(self): api_fn = F.adaptive_max_pool3d shape = [1, 3, 32, 32, 32] - check_out_dtype( - api_fn, - in_specs=[(shape, )], - expect_dtypes=['float32', 'float64'], - output_size=16) + check_out_dtype(api_fn, + in_specs=[(shape, )], + expect_dtypes=['float32', 'float64'], + output_size=16) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py index 5424a1447b8..14c201d7606 100644 --- a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py +++ b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py @@ -32,8 +32,9 @@ def add_position_encoding(input, alpha=1.0, beta=1.0): for i in range(batch_size): for j in range(max_length): for k in range(half_shape): - val = j / pow(10000.0, k * 1.0 / ( - half_shape - 1)) if half_shape > 1 else j / 10000.0 + val = j / pow( + 10000.0, k * 1.0 / + (half_shape - 1)) if half_shape > 1 else j / 10000.0 out[i, j, k] = \ input[i, j, k] * alpha + math.sin(val) * beta out[i, j, half_shape + k] = \ @@ -54,7 +55,9 @@ class TestAddPositionEncodingTensorOp(OpTest): self.dtype = np.float64 self.init_input_output() - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x), } + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + } self.outputs = {'Out': self.out} self.attrs = {'alpha': self.alpha, 'beta': self.beta} @@ -94,7 +97,9 @@ class TestAddPositionEncodingLoDTensorOp(OpTest): self.dtype = np.float64 self.init_input_output() - self.inputs = {'X': (self.x, self.lod), } + self.inputs = { + 'X': (self.x, self.lod), + } self.outputs = {'Out': (self.out, self.lod)} self.attrs = {'alpha': self.alpha, 'beta': self.beta} @@ -129,8 +134,9 @@ class TestAddPositionEncodingLoDTensorOp(OpTest): max_length = self.lod[0][i] for j in range(max_length): for k in range(half_shape): - val = j / pow(10000.0, k * 1.0 / ( - half_shape - 1)) if half_shape > 1 else j / 10000.0 + val = j / pow( + 10000.0, k * 1.0 / + (half_shape - 1)) if half_shape > 1 else j / 10000.0 pos = start + j self.out[pos, k] = \ self.x[pos, k] * self.alpha + math.sin(val) * self.beta @@ -140,19 +146,22 @@ class TestAddPositionEncodingLoDTensorOp(OpTest): class TestAddPositionEncodingOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.random((4, 16, 8)).astype("float32") def test_Variable(): # the input type must be Variable - fluid.layers.add_position_encoding( - input=input_data, alpha=1.0, beta=1.0) + fluid.layers.add_position_encoding(input=input_data, + alpha=1.0, + beta=1.0) self.assertRaises(TypeError, test_Variable) class TestAddPositionEncodingOpDygraph(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() tensor = np.random.randn(16, 32, 64) diff --git a/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py b/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py index a1205f7092a..c7479e059b4 100644 --- a/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py +++ b/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py @@ -21,15 +21,15 @@ import time def inplace_add(x, bias): helper = LayerHelper('scale', **locals()) - helper.append_op( - type='scale', - inputs={'X': [x]}, - outputs={'Out': [x]}, - attrs={'bias': bias}) + helper.append_op(type='scale', + inputs={'X': [x]}, + outputs={'Out': [x]}, + attrs={'bias': bias}) return x class TestAddReaderDependency(unittest.TestCase): + def setUp(self): self.batch_num = 3 self.sleep_time = 2 @@ -54,11 +54,12 @@ class TestAddReaderDependency(unittest.TestCase): def data_source(): for _ in range(self.batch_num): time.sleep(self.sleep_time) # sleep some times - yield np.random.uniform( - low=-1, high=1, size=[1]).astype('float32'), + yield np.random.uniform(low=-1, high=1, + size=[1]).astype('float32'), - persistable_in = fluid.data( - name='persistable_in', dtype='float32', shape=[1]) + persistable_in = fluid.data(name='persistable_in', + dtype='float32', + shape=[1]) persistable_in.persistable = True persistable_in = inplace_add(persistable_in, bias=1) @@ -97,6 +98,7 @@ class TestAddReaderDependency(unittest.TestCase): class TestAddReaderDependencyWithoutDoubleBuffer(TestAddReaderDependency): + def setUp(self): self.batch_num = 3 self.sleep_time = 2 diff --git a/python/paddle/fluid/tests/unittests/test_addmm_op.py b/python/paddle/fluid/tests/unittests/test_addmm_op.py index bea7588acd3..da2e2335f7f 100644 --- a/python/paddle/fluid/tests/unittests/test_addmm_op.py +++ b/python/paddle/fluid/tests/unittests/test_addmm_op.py @@ -65,102 +65,91 @@ class TestAddMMOpError(unittest.TestCase): with program_guard(Program(), Program()): # The input type of addmm_op must be Variable. - input = fluid.create_lod_tensor( - np.array([[-1, -1], [-1, -1]]), [[2]], fluid.CPUPlace()) - x1 = fluid.create_lod_tensor( - np.array([[-1, -1], [-1, -1]]), [[2]], fluid.CPUPlace()) - x2 = fluid.create_lod_tensor( - np.array([[-1, -1], [-1, -1]]), [[2]], fluid.CPUPlace()) + input = fluid.create_lod_tensor(np.array([[-1, -1], [-1, -1]]), + [[2]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1, -1], [-1, -1]]), [[2]], + fluid.CPUPlace()) + x2 = fluid.create_lod_tensor(np.array([[-1, -1], [-1, -1]]), [[2]], + fluid.CPUPlace()) self.assertRaises(TypeError, paddle.addmm, input, x1, x2) # The input dtype of mul_op must be float32 or float64. - input = fluid.layers.data( - name='input', - shape=[4, 4], - dtype="int32", - append_batch_size=False) - x3 = fluid.layers.data( - name='x3', shape=[4, 4], dtype="int32", append_batch_size=False) - x4 = fluid.layers.data( - name='x4', shape=[4, 4], dtype="int32", append_batch_size=False) + input = fluid.layers.data(name='input', + shape=[4, 4], + dtype="int32", + append_batch_size=False) + x3 = fluid.layers.data(name='x3', + shape=[4, 4], + dtype="int32", + append_batch_size=False) + x4 = fluid.layers.data(name='x4', + shape=[4, 4], + dtype="int32", + append_batch_size=False) self.assertRaises(TypeError, paddle.addmm, input, x3, x4) # x and y dimension mismatch - x5 = fluid.layers.data( - name='x5', - shape=[4, 5], - dtype="float32", - append_batch_size=False) - x6 = fluid.layers.data( - name='x6', - shape=[4, 4], - dtype="float32", - append_batch_size=False) + x5 = fluid.layers.data(name='x5', + shape=[4, 5], + dtype="float32", + append_batch_size=False) + x6 = fluid.layers.data(name='x6', + shape=[4, 4], + dtype="float32", + append_batch_size=False) self.assertRaises(ValueError, paddle.addmm, input, x5, x6) # input and x are not broadcastable - x7 = fluid.layers.data( - name='x7', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - x8 = fluid.layers.data( - name='x8', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - input1 = fluid.layers.data( - name='input1', - shape=[2, 4], - dtype="float32", - append_batch_size=False) + x7 = fluid.layers.data(name='x7', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + x8 = fluid.layers.data(name='x8', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + input1 = fluid.layers.data(name='input1', + shape=[2, 4], + dtype="float32", + append_batch_size=False) self.assertRaises(ValueError, paddle.addmm, input1, x7, x8) # input and x are not broadcastable - x9 = fluid.layers.data( - name='x9', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - x10 = fluid.layers.data( - name='x10', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - input2 = fluid.layers.data( - name='input2', - shape=[1, 2], - dtype="float32", - append_batch_size=False) + x9 = fluid.layers.data(name='x9', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + x10 = fluid.layers.data(name='x10', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + input2 = fluid.layers.data(name='input2', + shape=[1, 2], + dtype="float32", + append_batch_size=False) self.assertRaises(ValueError, paddle.addmm, input2, x9, x10) - x11 = fluid.layers.data( - name='x11', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - x12 = fluid.layers.data( - name='x12', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - input3 = fluid.layers.data( - name='input3', - shape=[4, 2], - dtype="float32", - append_batch_size=False) + x11 = fluid.layers.data(name='x11', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + x12 = fluid.layers.data(name='x12', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + input3 = fluid.layers.data(name='input3', + shape=[4, 2], + dtype="float32", + append_batch_size=False) self.assertRaises(ValueError, paddle.addmm, input3, x11, x12) - x13 = fluid.layers.data( - name='x13', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - x14 = fluid.layers.data( - name='x14', - shape=[4, 4], - dtype="float32", - append_batch_size=False) - input4 = fluid.layers.data( - name='input4', - shape=[3, 1], - dtype="float32", - append_batch_size=False) + x13 = fluid.layers.data(name='x13', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + x14 = fluid.layers.data(name='x14', + shape=[4, 4], + dtype="float32", + append_batch_size=False) + input4 = fluid.layers.data(name='input4', + shape=[3, 1], + dtype="float32", + append_batch_size=False) self.assertRaises(ValueError, paddle.addmm, input4, x13, x14) @@ -259,6 +248,7 @@ class TestAddMMOp4(OpTest): class TestAddMMOp5(unittest.TestCase): + def test_api_with_dygraph(self): np_input = np.random.random((20, 30)).astype(np.float32) np_x = np.random.random((20, 6)).astype(np.float32) @@ -273,6 +263,7 @@ class TestAddMMOp5(unittest.TestCase): class TestAddMMAPI(unittest.TestCase): + def test_api_error(self): data_x = np.ones((2, 2)).astype(np.float32) data_y = np.ones((2, 2)).astype(np.float32) @@ -285,8 +276,11 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x_wrong) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input) - out = paddle.tensor.addmm( - input=input, x=x, y=y, beta=0.5, alpha=5.0) + out = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=0.5, + alpha=5.0) self.assertRaises(ValueError, test_error1) @@ -295,8 +289,11 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x_wrong) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input) - out = paddle.tensor.addmm( - input=input, x=x, y=y, beta=0.5, alpha=5.0) + out = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=0.5, + alpha=5.0) self.assertRaises(ValueError, test_error2) @@ -305,8 +302,11 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input_wrong) - out = paddle.tensor.addmm( - input=input, x=x, y=y, beta=0.5, alpha=5.0) + out = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=0.5, + alpha=5.0) self.assertRaises(ValueError, test_error3) @@ -315,8 +315,11 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input_wrong) - out = paddle.tensor.addmm( - input=input, x=x, y=y, beta=0.5, alpha=5.0) + out = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=0.5, + alpha=5.0) self.assertRaises(ValueError, test_error4) @@ -334,10 +337,13 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input) - paddle_output = paddle.tensor.addmm( - input=input, x=x, y=y, beta=data_beta, alpha=data_alpha) - numpy_output = data_beta * data_input + data_alpha * np.dot(data_x, - data_y) + paddle_output = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=data_beta, + alpha=data_alpha) + numpy_output = data_beta * data_input + data_alpha * np.dot( + data_x, data_y) self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), True) @@ -355,10 +361,13 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input) - paddle_output = paddle.tensor.addmm( - input=input, x=x, y=y, beta=data_beta, alpha=data_alpha) - numpy_output = data_beta * data_input + data_alpha * np.dot(data_x, - data_y) + paddle_output = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=data_beta, + alpha=data_alpha) + numpy_output = data_beta * data_input + data_alpha * np.dot( + data_x, data_y) self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), True) @@ -376,10 +385,13 @@ class TestAddMMAPI(unittest.TestCase): x = paddle.to_tensor(data_x) y = paddle.to_tensor(data_y) input = paddle.to_tensor(data_input) - paddle_output = paddle.tensor.addmm( - input=input, x=x, y=y, beta=data_beta, alpha=data_alpha) - numpy_output = data_beta * data_input + data_alpha * np.dot(data_x, - data_y) + paddle_output = paddle.tensor.addmm(input=input, + x=x, + y=y, + beta=data_beta, + alpha=data_alpha) + numpy_output = data_beta * data_input + data_alpha * np.dot( + data_x, data_y) self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), True) diff --git a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py index 6157314b1f0..e22f53a2393 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py +++ b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py @@ -36,6 +36,7 @@ def affine_channel(x, scale, bias, layout): class TestAffineChannelOp(OpTest): + def setUp(self): self.op_type = "affine_channel" self.init_test_case() @@ -69,6 +70,7 @@ class TestAffineChannelOp(OpTest): class TestAffineChannelOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program()): @@ -79,28 +81,32 @@ class TestAffineChannelOpError(unittest.TestCase): self.assertRaises(TypeError, test_x_type) def test_x_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[None, 1, 2, 2], dtype='int32') + x2 = fluid.layers.data(name='x2', + shape=[None, 1, 2, 2], + dtype='int32') fluid.layers.affine_channel(x2) self.assertRaises(TypeError, test_x_dtype) def test_scale_type(): - x3 = fluid.layers.data( - name='x3', shape=[None, 1, 2, 2], dtype='float32') + x3 = fluid.layers.data(name='x3', + shape=[None, 1, 2, 2], + dtype='float32') fluid.layers.affine_channel(x3, scale=1) self.assertRaises(TypeError, test_scale_type) def test_bias_type(): - x4 = fluid.layers.data( - name='x4', shape=[None, 1, 2, 2], dtype='float32') + x4 = fluid.layers.data(name='x4', + shape=[None, 1, 2, 2], + dtype='float32') fluid.layers.affine_channel(x4, bias=1) self.assertRaises(TypeError, test_bias_type) class TestAffineChannelNHWC(TestAffineChannelOp): + def init_test_case(self): self.shape = [2, 3, 3, 100] self.C = 100 @@ -114,6 +120,7 @@ class TestAffineChannelNHWC(TestAffineChannelOp): class TestAffineChannel2D(TestAffineChannelOp): + def init_test_case(self): self.shape = [2, 100] self.C = 100 diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_function.py b/python/paddle/fluid/tests/unittests/test_affine_grid_function.py index 6ca13c7a729..61ecc6a8f12 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_grid_function.py +++ b/python/paddle/fluid/tests/unittests/test_affine_grid_function.py @@ -22,6 +22,7 @@ import unittest class AffineGridTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', theta_shape=(20, 2, 3), @@ -48,8 +49,9 @@ class AffineGridTestCase(unittest.TestCase): start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): - theta_var = fluid.data( - "input", self.theta_shape, dtype=self.dtype) + theta_var = fluid.data("input", + self.theta_shape, + dtype=self.dtype) y_var = fluid.layers.affine_grid(theta_var, self.output_shape) feed_dict = {"input": self.theta} exe = fluid.Executor(place) @@ -63,12 +65,12 @@ class AffineGridTestCase(unittest.TestCase): start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): - theta_var = fluid.data( - "input", self.theta_shape, dtype=self.dtype) - y_var = F.affine_grid( - theta_var, - self.output_shape, - align_corners=self.align_corners) + theta_var = fluid.data("input", + self.theta_shape, + dtype=self.dtype) + y_var = F.affine_grid(theta_var, + self.output_shape, + align_corners=self.align_corners) feed_dict = {"input": self.theta} exe = fluid.Executor(place) exe.run(start) @@ -80,10 +82,11 @@ class AffineGridTestCase(unittest.TestCase): theta_var = dg.to_variable( self.theta) if not self.invalid_theta else "invalid" output_shape = dg.to_variable( - self. - output_shape) if self.variable_output_shape else self.output_shape - y_var = F.affine_grid( - theta_var, output_shape, align_corners=self.align_corners) + self.output_shape + ) if self.variable_output_shape else self.output_shape + y_var = F.affine_grid(theta_var, + output_shape, + align_corners=self.align_corners) y_np = y_var.numpy() return y_np @@ -106,6 +109,7 @@ class AffineGridTestCase(unittest.TestCase): class AffineGridErrorTestCase(AffineGridTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -119,21 +123,18 @@ def add_cases(suite): suite.addTest(AffineGridTestCase(methodName='runTest', align_corners=False)) suite.addTest( - AffineGridTestCase( - methodName='runTest', variable_output_shape=True)) + AffineGridTestCase(methodName='runTest', variable_output_shape=True)) suite.addTest( - AffineGridTestCase( - methodName='runTest', - theta_shape=(20, 2, 3), - output_shape=[20, 1, 7, 7], - align_corners=True)) + AffineGridTestCase(methodName='runTest', + theta_shape=(20, 2, 3), + output_shape=[20, 1, 7, 7], + align_corners=True)) def add_error_cases(suite): suite.addTest( - AffineGridErrorTestCase( - methodName='runTest', output_shape="not_valid")) + AffineGridErrorTestCase(methodName='runTest', output_shape="not_valid")) suite.addTest( AffineGridErrorTestCase( methodName='runTest', diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py index 8277256009e..9c5b2e9971e 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py +++ b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py @@ -26,14 +26,12 @@ def AffineGrid(theta, size, align_corners): if not align_corners: h_factor = (h - 1) / float(h) w_factor = (w - 1) / float(w) - h_idx = np.repeat( - np.linspace(-1, 1, h)[np.newaxis, :], w, - axis=0).T[:, :, np.newaxis] * h_factor - w_idx = np.repeat( - np.linspace(-1, 1, w)[np.newaxis, :], h, - axis=0)[:, :, np.newaxis] * w_factor - grid = np.concatenate( - [w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3 + h_idx = np.repeat(np.linspace(-1, 1, h)[np.newaxis, :], w, + axis=0).T[:, :, np.newaxis] * h_factor + w_idx = np.repeat(np.linspace(-1, 1, w)[np.newaxis, :], h, + axis=0)[:, :, np.newaxis] * w_factor + grid = np.concatenate([w_idx, h_idx, np.ones([h, w, 1])], + axis=2) # h * w * 3 grid = np.repeat(grid[np.newaxis, :], size[0], axis=0) # n * h * w *3 ret = np.zeros([n, h * w, 2]) @@ -41,11 +39,13 @@ def AffineGrid(theta, size, align_corners): for i in range(len(theta)): ret[i] = np.dot(grid[i].reshape([h * w, 3]), theta[i]) -# print ret.reshape([h * w, 2]).astype("float32") + +# print ret.reshape([h * w, 2]).astype("float32") return ret.reshape([n, h, w, 2]).astype("float32") class TestAffineGridOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = "affine_grid" @@ -78,6 +78,7 @@ class TestAffineGridOp(OpTest): class TestAffineGridOpCase1(TestAffineGridOp): + def initTestCase(self): self.theta_shape = (20, 2, 3) self.output_shape = np.array([20, 2, 5, 7]).astype("int32") @@ -89,6 +90,7 @@ class TestAffineGridOpCase1(TestAffineGridOp): class TestAffineGridOpCase2(TestAffineGridOp): + def initTestCase(self): self.theta_shape = (20, 2, 3) self.output_shape = np.array([20, 2, 5, 7]).astype("int32") @@ -98,6 +100,7 @@ class TestAffineGridOpCase2(TestAffineGridOp): class TestAffineGridOpCase3(TestAffineGridOp): + def initTestCase(self): self.theta_shape = (20, 2, 3) self.output_shape = np.array([20, 2, 5, 7]).astype("int32") @@ -107,6 +110,7 @@ class TestAffineGridOpCase3(TestAffineGridOp): class TestAffineGridOpCase4(TestAffineGridOp): + def initTestCase(self): self.theta_shape = (25, 2, 3) self.output_shape = np.array([25, 2, 5, 6]).astype("int32") diff --git a/python/paddle/fluid/tests/unittests/test_allclose_layer.py b/python/paddle/fluid/tests/unittests/test_allclose_layer.py index 1e080c80367..66afbcfe209 100644 --- a/python/paddle/fluid/tests/unittests/test_allclose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_allclose_layer.py @@ -20,16 +20,28 @@ from paddle.fluid.framework import _test_eager_guard class TestAllcloseLayer(unittest.TestCase): + def allclose_check(self, use_cuda, dtype='float32'): a = fluid.data(name="a", shape=[2], dtype=dtype) b = fluid.data(name="b", shape=[2], dtype=dtype) - result = paddle.allclose( - a, b, rtol=1e-05, atol=1e-08, equal_nan=False, name="ignore_nan") - result_nan = paddle.allclose( - a, b, rtol=1e-05, atol=1e-08, equal_nan=True, name="equal_nan") - result_corner = paddle.allclose( - a, b, rtol=0.01, atol=0.0, name="corner_case") + result = paddle.allclose(a, + b, + rtol=1e-05, + atol=1e-08, + equal_nan=False, + name="ignore_nan") + result_nan = paddle.allclose(a, + b, + rtol=1e-05, + atol=1e-08, + equal_nan=True, + name="equal_nan") + result_corner = paddle.allclose(a, + b, + rtol=0.01, + atol=0.0, + name="corner_case") place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) @@ -37,24 +49,30 @@ class TestAllcloseLayer(unittest.TestCase): x = np.array([10000., 1e-07]).astype(dtype) y = np.array([10000.1, 1e-08]).astype(dtype) - result_v, result_nan_v = exe.run(feed={'a': x, - 'b': y}, + result_v, result_nan_v = exe.run(feed={ + 'a': x, + 'b': y + }, fetch_list=[result, result_nan]) self.assertEqual(result_v[0], False) self.assertEqual(result_nan_v[0], False) x = np.array([10000., 1e-08]).astype(dtype) y = np.array([10000.1, 1e-09]).astype(dtype) - result_v, result_nan_v = exe.run(feed={'a': x, - 'b': y}, + result_v, result_nan_v = exe.run(feed={ + 'a': x, + 'b': y + }, fetch_list=[result, result_nan]) self.assertEqual(result_v[0], True) self.assertEqual(result_nan_v[0], True) x = np.array([1.0, float('nan')]).astype(dtype) y = np.array([1.0, float('nan')]).astype(dtype) - result_v, result_nan_v = exe.run(feed={'a': x, - 'b': y}, + result_v, result_nan_v = exe.run(feed={ + 'a': x, + 'b': y + }, fetch_list=[result, result_nan]) self.assertEqual(result_v[0], False) self.assertEqual(result_nan_v[0], True) @@ -111,68 +129,68 @@ class TestAllcloseLayer(unittest.TestCase): with fluid.dygraph.guard(): x_v_1 = paddle.to_tensor(x_1) y_v_1 = paddle.to_tensor(y_1) - ret_1 = paddle.allclose( - x_v_1, - y_v_1, - rtol=1e-05, - atol=1e-08, - equal_nan=False, - name='test_1') + ret_1 = paddle.allclose(x_v_1, + y_v_1, + rtol=1e-05, + atol=1e-08, + equal_nan=False, + name='test_1') self.assertEqual(ret_1.numpy()[0], False) - ret_1 = paddle.allclose( - x_v_1, - y_v_1, - rtol=1e-05, - atol=1e-08, - equal_nan=True, - name='test_2') + ret_1 = paddle.allclose(x_v_1, + y_v_1, + rtol=1e-05, + atol=1e-08, + equal_nan=True, + name='test_2') self.assertEqual(ret_1.numpy()[0], False) x_v_2 = paddle.to_tensor(x_2) y_v_2 = paddle.to_tensor(y_2) - ret_2 = paddle.allclose( - x_v_2, - y_v_2, - rtol=1e-05, - atol=1e-08, - equal_nan=False, - name='test_3') + ret_2 = paddle.allclose(x_v_2, + y_v_2, + rtol=1e-05, + atol=1e-08, + equal_nan=False, + name='test_3') self.assertEqual(ret_2.numpy()[0], True) - ret_2 = paddle.allclose( - x_v_2, - y_v_2, - rtol=1e-05, - atol=1e-08, - equal_nan=True, - name='test_4') + ret_2 = paddle.allclose(x_v_2, + y_v_2, + rtol=1e-05, + atol=1e-08, + equal_nan=True, + name='test_4') self.assertEqual(ret_2.numpy()[0], True) x_v_3 = paddle.to_tensor(x_3) y_v_3 = paddle.to_tensor(y_3) - ret_3 = paddle.allclose( - x_v_3, - y_v_3, - rtol=1e-05, - atol=1e-08, - equal_nan=False, - name='test_5') + ret_3 = paddle.allclose(x_v_3, + y_v_3, + rtol=1e-05, + atol=1e-08, + equal_nan=False, + name='test_5') self.assertEqual(ret_3.numpy()[0], False) - ret_3 = paddle.allclose( - x_v_3, - y_v_3, - rtol=1e-05, - atol=1e-08, - equal_nan=True, - name='test_6') + ret_3 = paddle.allclose(x_v_3, + y_v_3, + rtol=1e-05, + atol=1e-08, + equal_nan=True, + name='test_6') self.assertEqual(ret_3.numpy()[0], True) # for corner case x_v_4 = paddle.to_tensor(x_4) y_v_4 = paddle.to_tensor(y_4) - ret_4 = paddle.allclose( - x_v_4, y_v_4, rtol=0.01, atol=0.0, name='test_7') + ret_4 = paddle.allclose(x_v_4, + y_v_4, + rtol=0.01, + atol=0.0, + name='test_7') self.assertEqual(ret_4.numpy()[0], False) x_v_5 = paddle.to_tensor(x_5) y_v_5 = paddle.to_tensor(y_5) - ret_5 = paddle.allclose( - x_v_5, y_v_5, rtol=0.015, atol=0.0, name='test_8') + ret_5 = paddle.allclose(x_v_5, + y_v_5, + rtol=0.015, + atol=0.0, + name='test_8') self.assertEqual(ret_5.numpy()[0], True) def test_dygraph_mode(self): diff --git a/python/paddle/fluid/tests/unittests/test_allclose_op.py b/python/paddle/fluid/tests/unittests/test_allclose_op.py index ec1c5363fcd..26351abe802 100644 --- a/python/paddle/fluid/tests/unittests/test_allclose_op.py +++ b/python/paddle/fluid/tests/unittests/test_allclose_op.py @@ -19,6 +19,7 @@ import paddle class TestAllcloseOp(OpTest): + def set_args(self): self.input = np.array([10000., 1e-07]).astype("float32") self.other = np.array([10000.1, 1e-08]).astype("float32") @@ -38,13 +39,13 @@ class TestAllcloseOp(OpTest): } self.attrs = {'equal_nan': self.equal_nan} self.outputs = { - 'Out': np.array([ - np.allclose( - self.inputs['Input'], - self.inputs['Other'], - rtol=self.rtol, - atol=self.atol, - equal_nan=self.equal_nan) + 'Out': + np.array([ + np.allclose(self.inputs['Input'], + self.inputs['Other'], + rtol=self.rtol, + atol=self.atol, + equal_nan=self.equal_nan) ]) } @@ -53,7 +54,9 @@ class TestAllcloseOp(OpTest): class TestAllcloseOpException(TestAllcloseOp): + def test_check_output(self): + def test_rtol_num(): self.inputs['Rtol'] = np.array([1e-05, 1e-05]).astype("float64") self.inputs['Atol'] = np.array([1e-08]).astype("float64") @@ -84,6 +87,7 @@ class TestAllcloseOpException(TestAllcloseOp): class TestAllcloseOpSmallNum(TestAllcloseOp): + def set_args(self): self.input = np.array([10000., 1e-08]).astype("float32") self.other = np.array([10000.1, 1e-09]).astype("float32") @@ -93,6 +97,7 @@ class TestAllcloseOpSmallNum(TestAllcloseOp): class TestAllcloseOpNanFalse(TestAllcloseOp): + def set_args(self): self.input = np.array([1.0, float('nan')]).astype("float32") self.other = np.array([1.0, float('nan')]).astype("float32") @@ -102,6 +107,7 @@ class TestAllcloseOpNanFalse(TestAllcloseOp): class TestAllcloseOpNanTrue(TestAllcloseOp): + def set_args(self): self.input = np.array([1.0, float('nan')]).astype("float32") self.other = np.array([1.0, float('nan')]).astype("float32") @@ -111,6 +117,7 @@ class TestAllcloseOpNanTrue(TestAllcloseOp): class TestAllcloseDygraph(unittest.TestCase): + def test_api_case(self): paddle.disable_static() x_data = np.random.rand(10, 10) @@ -124,7 +131,9 @@ class TestAllcloseDygraph(unittest.TestCase): class TestAllcloseError(unittest.TestCase): + def test_input_dtype(self): + def test_x_dtype(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -164,6 +173,7 @@ class TestAllcloseError(unittest.TestCase): class TestAllcloseOpFloat32(TestAllcloseOp): + def set_args(self): self.input = np.array([10.1]).astype("float32") self.other = np.array([10]).astype("float32") @@ -173,6 +183,7 @@ class TestAllcloseOpFloat32(TestAllcloseOp): class TestAllcloseOpFloat64(TestAllcloseOp): + def set_args(self): self.input = np.array([10.1]).astype("float64") self.other = np.array([10]).astype("float64") @@ -182,6 +193,7 @@ class TestAllcloseOpFloat64(TestAllcloseOp): class TestAllcloseOpLargeDimInput(TestAllcloseOp): + def set_args(self): self.input = np.array(np.zeros([2048, 1024])).astype("float64") self.other = np.array(np.zeros([2048, 1024])).astype("float64") diff --git a/python/paddle/fluid/tests/unittests/test_allgather.py b/python/paddle/fluid/tests/unittests/test_allgather.py index 9bb34d3db43..ed7e531ffad 100644 --- a/python/paddle/fluid/tests/unittests/test_allgather.py +++ b/python/paddle/fluid/tests/unittests/test_allgather.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestAllGatherOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_allreduce.py b/python/paddle/fluid/tests/unittests/test_allreduce.py index 660f559535c..d3e783b9fe3 100644 --- a/python/paddle/fluid/tests/unittests/test_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_allreduce.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestAllReduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py b/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py index fbacaa3d5ce..d5ea02e6570 100644 --- a/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py +++ b/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestCheckFiniteAndUnscaleOp(OpTest): + def setUp(self): self.op_type = "check_finite_and_unscale" self.init_dtype() @@ -39,6 +40,7 @@ class TestCheckFiniteAndUnscaleOp(OpTest): class TestCheckFiniteAndUnscaleOpWithNan(OpTest): + def setUp(self): self.op_type = "check_finite_and_unscale" self.init_dtype() @@ -56,12 +58,13 @@ class TestCheckFiniteAndUnscaleOpWithNan(OpTest): self.dtype = np.float32 def test_check_output(self): - # When input contains nan, do not check the output, + # When input contains nan, do not check the output, # since the output may be nondeterministic and will be discarded. self.check_output(no_check_set=['Out']) class TestCheckFiniteAndUnscaleOpWithInf(OpTest): + def setUp(self): self.op_type = "check_finite_and_unscale" self.init_dtype() @@ -79,7 +82,7 @@ class TestCheckFiniteAndUnscaleOpWithInf(OpTest): self.dtype = np.float32 def test_check_output(self): - # When input contains inf, do not check the output, + # When input contains inf, do not check the output, # since the output may be nondeterministic and will be discarded. self.check_output(no_check_set=['Out']) diff --git a/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py index d31eaa0114c..4cc77beef8c 100644 --- a/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py +++ b/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py @@ -46,10 +46,11 @@ def anchor_generator_in_python(input_feat, anchor_sizes, aspect_ratios, scale_h = anchor_size / stride[1] w = scale_w * base_w h = scale_h * base_h - out_anchors[h_idx, w_idx, idx, :] = [ - (x_ctr - 0.5 * (w - 1)), (y_ctr - 0.5 * (h - 1)), - (x_ctr + 0.5 * (w - 1)), (y_ctr + 0.5 * (h - 1)) - ] + out_anchors[h_idx, w_idx, + idx, :] = [(x_ctr - 0.5 * (w - 1)), + (y_ctr - 0.5 * (h - 1)), + (x_ctr + 0.5 * (w - 1)), + (y_ctr + 0.5 * (h - 1))] idx += 1 # set the variance. @@ -60,6 +61,7 @@ def anchor_generator_in_python(input_feat, anchor_sizes, aspect_ratios, class TestAnchorGeneratorOp(OpTest): + def set_data(self): self.init_test_params() self.init_test_input() diff --git a/python/paddle/fluid/tests/unittests/test_angle_op.py b/python/paddle/fluid/tests/unittests/test_angle_op.py index 05397c2434d..d21eb61b77d 100644 --- a/python/paddle/fluid/tests/unittests/test_angle_op.py +++ b/python/paddle/fluid/tests/unittests/test_angle_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ from op_test import OpTest import paddle from paddle.fluid import dygraph from paddle import static + paddle.enable_static() @@ -39,6 +40,7 @@ def angle_grad(x, dout): class TestAngleOpFloat(OpTest): + def setUp(self): self.op_type = "angle" self.dtype = "float64" @@ -51,15 +53,16 @@ class TestAngleOpFloat(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[ - angle_grad(self.x, np.ones_like(self.x) / self.x.size) - ]) + self.check_grad(['X'], + 'Out', + user_defined_grads=[ + angle_grad(self.x, + np.ones_like(self.x) / self.x.size) + ]) class TestAngleOpComplex(OpTest): + def setUp(self): self.op_type = "angle" self.dtype = "complex128" @@ -74,15 +77,16 @@ class TestAngleOpComplex(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[ - angle_grad(self.x, np.ones_like(self.x) / self.x.size) - ]) + self.check_grad(['X'], + 'Out', + user_defined_grads=[ + angle_grad(self.x, + np.ones_like(self.x) / self.x.size) + ]) class TestAngleAPI(unittest.TestCase): + def setUp(self): self.x = np.random.randn(2, 3) + 1j * np.random.randn(2, 3) self.out = np.angle(self.x) diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py index 85fe8b76e02..54a83d2a5ec 100644 --- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py +++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,8 +26,9 @@ def get_resnet50_model(): main = paddle.static.Program() startup = paddle.static.Program() with paddle.static.program_guard(main, startup): - image = paddle.static.data( - name="image", shape=[None, 3, 224, 224], dtype="float32") + image = paddle.static.data(name="image", + shape=[None, 3, 224, 224], + dtype="float32") label = paddle.static.data(name="label", shape=[None, 1], dtype="int64") model = resnet50() loss_fn = CrossEntropyLoss() @@ -47,6 +48,7 @@ def global_block_contains_op(program, op_type): class TestApplyPassToProgram(unittest.TestCase): + def setUp(self): paddle.enable_static() @@ -72,6 +74,7 @@ class TestApplyPassToProgram(unittest.TestCase): class TestIRPassBase(unittest.TestCase): + def setUp(self): paddle.enable_static() if paddle.is_compiled_with_cuda(): @@ -183,12 +186,13 @@ class TestIRPassBase(unittest.TestCase): for idx in range(batch_num): feed = { - image.name: np.random.rand(*image_shape).astype('float32'), - label.name: np.random.randint( - low=0, - high=self.num_classes, - size=label_shape, - dtype='int64'), + image.name: + np.random.rand(*image_shape).astype('float32'), + label.name: + np.random.randint(low=0, + high=self.num_classes, + size=label_shape, + dtype='int64'), } with paddle.static.scope_guard(scope1): loss_value1 = self.executor.run(main1, diff --git a/python/paddle/fluid/tests/unittests/test_arange.py b/python/paddle/fluid/tests/unittests/test_arange.py index d62c08b072b..b6236033f8b 100644 --- a/python/paddle/fluid/tests/unittests/test_arange.py +++ b/python/paddle/fluid/tests/unittests/test_arange.py @@ -23,6 +23,7 @@ from op_test import OpTest class TestArangeOp(OpTest): + def setUp(self): self.op_type = "range" self.init_config() @@ -33,8 +34,9 @@ class TestArangeOp(OpTest): } self.outputs = { - 'Out': np.arange(self.case[0], self.case[1], - self.case[2]).astype(self.dtype) + 'Out': + np.arange(self.case[0], self.case[1], + self.case[2]).astype(self.dtype) } def init_config(self): @@ -46,42 +48,48 @@ class TestArangeOp(OpTest): class TestFloatArangeOp(TestArangeOp): + def init_config(self): self.dtype = np.float32 self.case = (0, 5, 1) class TestInt32ArangeOp(TestArangeOp): + def init_config(self): self.dtype = np.int32 self.case = (0, 5, 2) class TestFloat64ArangeOp(TestArangeOp): + def init_config(self): self.dtype = np.float64 self.case = (10, 1, -2) class TestInt64ArangeOp(TestArangeOp): + def init_config(self): self.dtype = np.int64 self.case = (-1, -10, -2) class TestArangeOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): self.assertRaises(TypeError, paddle.arange, 10, dtype='int8') class TestArangeAPI(unittest.TestCase): + def test_out(self): with program_guard(Program(), Program()): x1 = paddle.arange(0, 5, 1, 'float32') - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) out = exe.run(fetch_list=[x1]) @@ -90,9 +98,10 @@ class TestArangeAPI(unittest.TestCase): class TestArangeImperative(unittest.TestCase): + def test_out(self): - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() paddle.disable_static(place) x1 = paddle.arange(0, 5, 1) x2 = paddle.tensor.arange(5) diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py index cbcb4af9269..6056f8f2106 100644 --- a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class BaseTestCase(OpTest): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4, 5) @@ -45,6 +46,7 @@ class BaseTestCase(OpTest): class TestCase0(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -53,6 +55,7 @@ class TestCase0(BaseTestCase): class TestCase1(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -61,6 +64,7 @@ class TestCase1(BaseTestCase): class TestCase2(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -71,6 +75,7 @@ class TestCase2(BaseTestCase): @unittest.skipIf(not paddle.is_compiled_with_cuda(), "FP16 test runs only on GPU") class TestCase0FP16(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4, 5) @@ -81,6 +86,7 @@ class TestCase0FP16(BaseTestCase): @unittest.skipIf(not paddle.is_compiled_with_cuda(), "FP16 test runs only on GPU") class TestCase1FP16(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (3, 4) @@ -89,6 +95,7 @@ class TestCase1FP16(BaseTestCase): class TestCase2_1(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, 4) @@ -97,6 +104,7 @@ class TestCase2_1(BaseTestCase): class TestCase3(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, ) @@ -105,6 +113,7 @@ class TestCase3(BaseTestCase): class TestCase4(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (1, ) @@ -113,6 +122,7 @@ class TestCase4(BaseTestCase): class TestCase3_(BaseTestCase): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (3, ) @@ -120,6 +130,7 @@ class TestCase3_(BaseTestCase): class BaseTestComplex1_1(OpTest): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (4, 5, 6) @@ -134,17 +145,16 @@ class BaseTestComplex1_1(OpTest): self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} if self.op_type == "arg_min": self.outputs = { - 'Out': np.argmin( - self.x, axis=self.axis).asdtype("int32") + 'Out': np.argmin(self.x, axis=self.axis).asdtype("int32") } else: self.outputs = { - 'Out': np.argmax( - self.x, axis=self.axis).asdtype("int32") + 'Out': np.argmax(self.x, axis=self.axis).asdtype("int32") } class BaseTestComplex1_2(OpTest): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (4, 5, 6) @@ -159,17 +169,16 @@ class BaseTestComplex1_2(OpTest): self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} if self.op_type == "arg_min": self.outputs = { - 'Out': np.argmin( - self.x, axis=self.axis).asdtype("int32") + 'Out': np.argmin(self.x, axis=self.axis).asdtype("int32") } else: self.outputs = { - 'Out': np.argmax( - self.x, axis=self.axis).asdtype("int32") + 'Out': np.argmax(self.x, axis=self.axis).asdtype("int32") } class BaseTestComplex2_1(OpTest): + def initTestCase(self): self.op_type = 'arg_max' self.dims = (4, 5, 6) @@ -185,17 +194,20 @@ class BaseTestComplex2_1(OpTest): self.attrs = {'keep_dims': True} if self.op_type == "arg_min": self.outputs = { - 'Out': np.argmin( - self.x, axis=self.axis).asdtype("int32").reshape(4, 5, 1) + 'Out': + np.argmin(self.x, + axis=self.axis).asdtype("int32").reshape(4, 5, 1) } else: self.outputs = { - 'Out': np.argmax( - self.x, axis=self.axis).asdtype("int32").reshape(4, 5, 1) + 'Out': + np.argmax(self.x, + axis=self.axis).asdtype("int32").reshape(4, 5, 1) } class BaseTestComplex2_2(OpTest): + def initTestCase(self): self.op_type = 'arg_min' self.dims = (4, 5, 6) @@ -211,13 +223,15 @@ class BaseTestComplex2_2(OpTest): self.attrs = {'keep_dims': True} if self.op_type == "arg_min": self.outputs = { - 'Out': np.argmin( - self.x, axis=self.axis).asdtype("int32").reshape(4, 5, 1) + 'Out': + np.argmin(self.x, + axis=self.axis).asdtype("int32").reshape(4, 5, 1) } else: self.outputs = { - 'Out': np.argmax( - self.x, axis=self.axis).asdtype("int32").reshape(4, 5, 1) + 'Out': + np.argmax(self.x, + axis=self.axis).asdtype("int32").reshape(4, 5, 1) } diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py index 74f76030a29..83d49acf88f 100644 --- a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py @@ -24,7 +24,9 @@ from paddle.fluid import Program, program_guard def create_kernel_case(op_type, numpy_op_type): + class ArgMinMaxKernelBaseCase(OpTest): + def initTestCase(self): self.op_type = op_type self.numpy_op_type = numpy_op_type @@ -46,30 +48,35 @@ def create_kernel_case(op_type, numpy_op_type): self.check_output() class ArgMinMaxKernelCase0(ArgMinMaxKernelBaseCase): + def initTestCase(self): self.op_type = op_type self.numpy_op_type = numpy_op_type self.axis = 1 class ArgMinMaxKernelCase1(ArgMinMaxKernelBaseCase): + def initTestCase(self): self.op_type = op_type self.numpy_op_type = numpy_op_type self.axis = 2 class ArgMinMaxKernelCase2(ArgMinMaxKernelBaseCase): + def initTestCase(self): self.op_type = op_type self.numpy_op_type = numpy_op_type self.axis = -1 class ArgMinMaxKernelCase3(ArgMinMaxKernelBaseCase): + def initTestCase(self): self.op_type = op_type self.numpy_op_type = numpy_op_type self.axis = -2 class ArgMinMaxKernelCase4(ArgMinMaxKernelBaseCase): + def setUp(self): self.initTestCase() self.dims = (4, 5, 6) @@ -79,11 +86,11 @@ def create_kernel_case(op_type, numpy_op_type): self.attrs = {"axis": self.axis, "keepdims": True} self.numpy_op = eval("np.%s" % (numpy_op_type)) self.outputs = { - 'Out': self.numpy_op( - self.x, axis=self.axis).reshape((1, 5, 6)) + 'Out': self.numpy_op(self.x, axis=self.axis).reshape((1, 5, 6)) } class ArgMinMaxKernelCase5(ArgMinMaxKernelBaseCase): + def setUp(self): self.initTestCase() self.dims = (4) @@ -93,11 +100,11 @@ def create_kernel_case(op_type, numpy_op_type): self.attrs = {"axis": self.axis, "flatten": True} self.numpy_op = eval("np.%s" % (numpy_op_type)) self.outputs = { - 'Out': self.numpy_op( - self.x.flatten(), axis=self.axis) + 'Out': self.numpy_op(self.x.flatten(), axis=self.axis) } class ArgMinMaxKernelCase6(ArgMinMaxKernelBaseCase): + def setUp(self): self.initTestCase() self.dims = (4) @@ -107,9 +114,7 @@ def create_kernel_case(op_type, numpy_op_type): self.attrs = {"axis": self.axis, "flatten": True, "keepdims": True} self.numpy_op = eval("np.%s" % (numpy_op_type)) self.outputs = { - 'Out': - np.array(self.numpy_op( - self.x.flatten(), axis=self.axis)) + 'Out': np.array(self.numpy_op(self.x.flatten(), axis=self.axis)) } cls_name = "ArgMinMaxKernelBaseCase_%s" % (op_type) @@ -150,7 +155,9 @@ for op_type, numpy_op_type in zip(['arg_max', 'arg_min'], ['argmax', 'argmin']): def create_test_case(op_type): + class ArgMaxMinTestCase(unittest.TestCase): + def setUp(self): np.random.seed(123) self.input_data = np.random.rand(10, 10).astype("float32") @@ -164,8 +171,9 @@ def create_test_case(op_type): def run_static(self, place): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - data_var = paddle.static.data( - name="data", shape=[10, 10], dtype="float32") + data_var = paddle.static.data(name="data", + shape=[10, 10], + dtype="float32") op = eval("paddle.%s" % (op_type)) result = op(data_var) exe = paddle.static.Executor(place) @@ -176,8 +184,9 @@ def create_test_case(op_type): True) with paddle.static.program_guard(paddle.static.Program()): - data_var = paddle.static.data( - name="data", shape=[10, 10], dtype="float32") + data_var = paddle.static.data(name="data", + shape=[10, 10], + dtype="float32") op = eval("paddle.%s" % (op_type)) result = op(data_var, axis=1) exe = paddle.static.Executor(place) @@ -187,8 +196,9 @@ def create_test_case(op_type): self.assertTrue((result_data == expected_data).all(), True) with paddle.static.program_guard(paddle.static.Program()): - data_var = paddle.static.data( - name="data", shape=[10, 10], dtype="float32") + data_var = paddle.static.data(name="data", + shape=[10, 10], + dtype="float32") op = eval("paddle.%s" % (op_type)) result = op(data_var, axis=-1) exe = paddle.static.Executor(place) @@ -198,22 +208,24 @@ def create_test_case(op_type): self.assertTrue((result_data == expected_data).all(), True) with paddle.static.program_guard(paddle.static.Program()): - data_var = paddle.static.data( - name="data", shape=[10, 10], dtype="float32") + data_var = paddle.static.data(name="data", + shape=[10, 10], + dtype="float32") op = eval("paddle.%s" % (op_type)) result = op(data_var, axis=-1, keepdim=True) exe = paddle.static.Executor(place) result_data = exe.run(feed={"data": self.input_data}, fetch_list=[result]) - expected_data = self.numpy_op( - self.input_data, axis=-1).reshape((10, 1)) + expected_data = self.numpy_op(self.input_data, axis=-1).reshape( + (10, 1)) self.assertTrue((result_data == expected_data).all(), True) with paddle.static.program_guard(paddle.static.Program()): op = eval("paddle.%s" % (op_type)) - data_var = paddle.static.data( - name="data", shape=[10, 10], dtype="float32") + data_var = paddle.static.data(name="data", + shape=[10, 10], + dtype="float32") result = op(data_var, axis=-1, name="test_arg_api") self.assertTrue("test_arg_api" in result.name) @@ -222,28 +234,28 @@ def create_test_case(op_type): op = eval("paddle.%s" % (op_type)) data_tensor = paddle.to_tensor(self.input_data) - #case 1 + #case 1 result_data = op(data_tensor) excepted_data = self.numpy_op(self.input_data) self.assertTrue((result_data.numpy() == excepted_data).all(), True) - #case 2 + #case 2 result_data = op(data_tensor, axis=1) excepted_data = self.numpy_op(self.input_data, axis=1) self.assertTrue((result_data.numpy() == excepted_data).all(), True) - #case 3 + #case 3 result_data = op(data_tensor, axis=-1) excepted_data = self.numpy_op(self.input_data, axis=-1) self.assertTrue((result_data.numpy() == excepted_data).all(), True) - #case 4 + #case 4 result_data = op(data_tensor, axis=-1, keepdim=True) excepted_data = self.numpy_op(self.input_data, axis=-1) excepted_data = excepted_data.reshape((10, 1)) self.assertTrue((result_data.numpy() == excepted_data).all(), True) - #case 5 + #case 5 result_data = op(data_tensor, axis=-1, keepdim=True, dtype="int32") self.assertTrue(result_data.numpy().dtype == np.int32) @@ -278,6 +290,7 @@ for op_type in ['argmin', 'argmax']: class TestArgMinMaxOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -295,43 +308,49 @@ class TestArgMinMaxOpError(unittest.TestCase): self.assertRaises(TypeError, test_argmin_x_type) def test_argmax_attr_type(): - data = paddle.static.data( - name="test_argmax", shape=[10], dtype="float32") + data = paddle.static.data(name="test_argmax", + shape=[10], + dtype="float32") output = paddle.argmax(x=data, dtype="float32") self.assertRaises(TypeError, test_argmax_attr_type) def test_argmin_attr_type(): - data = paddle.static.data( - name="test_argmax", shape=[10], dtype="float32") + data = paddle.static.data(name="test_argmax", + shape=[10], + dtype="float32") output = paddle.argmin(x=data, dtype="float32") self.assertRaises(TypeError, test_argmin_attr_type) def test_argmax_axis_type(): - data = paddle.static.data( - name="test_argmax", shape=[10], dtype="float32") + data = paddle.static.data(name="test_argmax", + shape=[10], + dtype="float32") output = paddle.argmax(x=data, axis=1.2) self.assertRaises(TypeError, test_argmax_axis_type) def test_argmin_axis_type(): - data = paddle.static.data( - name="test_argmin", shape=[10], dtype="float32") + data = paddle.static.data(name="test_argmin", + shape=[10], + dtype="float32") output = paddle.argmin(x=data, axis=1.2) self.assertRaises(TypeError, test_argmin_axis_type) def test_argmax_dtype_type(): - data = paddle.static.data( - name="test_argmax", shape=[10], dtype="float32") + data = paddle.static.data(name="test_argmax", + shape=[10], + dtype="float32") output = paddle.argmax(x=data, dtype=None) self.assertRaises(ValueError, test_argmax_dtype_type) def test_argmin_dtype_type(): - data = paddle.static.data( - name="test_argmin", shape=[10], dtype="float32") + data = paddle.static.data(name="test_argmin", + shape=[10], + dtype="float32") output = paddle.argmin(x=data, dtype=None) self.assertRaises(ValueError, test_argmin_dtype_type) diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py index 874d66112bd..50350e88795 100644 --- a/python/paddle/fluid/tests/unittests/test_argsort_op.py +++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py @@ -31,6 +31,7 @@ np.random.seed(123) class PyArgsort(object): + def __init__(self, input_shape, axis, descending, dtype): self.x = np.random.random(input_shape).astype(dtype) self.label = np.random.random(input_shape).astype(dtype) @@ -43,20 +44,17 @@ class PyArgsort(object): def forward(self): if self.descending: self.indices = np.flip( - np.argsort( - self.x, kind='quicksort', axis=self.axis), self.axis) + np.argsort(self.x, kind='quicksort', axis=self.axis), self.axis) self.sorted_x = np.flip( - np.sort( - self.x, kind='quicksort', axis=self.axis), self.axis) + np.sort(self.x, kind='quicksort', axis=self.axis), self.axis) else: self.indices = np.argsort(self.x, kind='quicksort', axis=self.axis) self.sorted_x = np.sort(self.x, kind='quicksort', axis=self.axis) self.loss = self.sorted_x * self.label self.loss = np.sum(self.loss) - out = (np.array( - self.indices, dtype=self.indices.dtype), np.array( - self.sorted_x, dtype=self.sorted_x.dtype), np.array( - [self.loss], dtype=self.loss.dtype)) + out = (np.array(self.indices, dtype=self.indices.dtype), + np.array(self.sorted_x, dtype=self.sorted_x.dtype), + np.array([self.loss], dtype=self.loss.dtype)) return out @@ -67,6 +65,7 @@ def create_tensor(np_data, place): class TestArgsortOpCPU(unittest.TestCase): + def setup_program(self): self.main_program = Program() self.startup_program = Program() @@ -86,11 +85,13 @@ class TestArgsortOpCPU(unittest.TestCase): self.descending, self.dtype) with fluid.program_guard(self.main_program, self.startup_program): - x = fluid.layers.data( - name="x", shape=self.input_shape, dtype=self.dtype) + x = fluid.layers.data(name="x", + shape=self.input_shape, + dtype=self.dtype) x.stop_gradient = False - label = fluid.layers.data( - name="label", shape=self.input_shape, dtype=self.dtype) + label = fluid.layers.data(name="label", + shape=self.input_shape, + dtype=self.dtype) self.sorted_x, self.index = fluid.layers.argsort( input=x, axis=self.axis, descending=self.descending) self.sorted_x.stop_gradient = False @@ -133,12 +134,12 @@ class TestArgsortOpCPU(unittest.TestCase): ana_grad = [np.array(x) for x in self.backward()] num_grad = self.get_numerical_gradient(delta=numeric_grad_delta) - self.assert_is_close( - num_grad, - ana_grad, - 'x', - max_relative_error=max_relative_error, - msg_prefix="Gradient Check On %s" % str(self.place)) + self.assert_is_close(num_grad, + ana_grad, + 'x', + max_relative_error=max_relative_error, + msg_prefix="Gradient Check On %s" % + str(self.place)) def check_forward(self): pd_outputs = self.forward() @@ -146,8 +147,7 @@ class TestArgsortOpCPU(unittest.TestCase): for pd_output, py_output in zip(pd_outputs, py_outputs): self.assertEqual(pd_output.shape, py_output.shape) self.assertTrue( - np.allclose( - pd_output, py_output, atol=0, equal_nan=False)) + np.allclose(pd_output, py_output, atol=0, equal_nan=False)) def get_numerical_gradient(self, delta=1e-7): if self.dtype == 'float16': @@ -204,6 +204,7 @@ class TestArgsortOpCPU(unittest.TestCase): class TestArgsortOpGPU(TestArgsortOpCPU): + def init_place(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -212,120 +213,144 @@ class TestArgsortOpGPU(TestArgsortOpCPU): class TestArgsortOpAxis0CPU(TestArgsortOpCPU): + def init_axis(self): self.axis = 0 class TestArgsortOpAxis0GPU(TestArgsortOpGPU): + def init_axis(self): self.axis = 0 class TestArgsortOpAxis1CPU(TestArgsortOpCPU): + def init_axis(self): self.axis = 1 class TestArgsortOpAxis1GPU(TestArgsortOpGPU): + def init_axis(self): self.axis = 1 class TestArgsortOpAxis2CPU(TestArgsortOpCPU): + def init_axis(self): self.axis = 2 class TestArgsortOpAxis2GPU(TestArgsortOpGPU): + def init_axis(self): self.axis = 2 class TestArgsortOpAxisNeg1CPU(TestArgsortOpCPU): + def init_axis(self): self.axis = -1 class TestArgsortOpAxisNeg1GPU(TestArgsortOpGPU): + def init_axis(self): self.axis = -1 class TestArgsortOpAxisNeg2CPU(TestArgsortOpCPU): + def init_axis(self): self.axis = -2 class TestArgsortOpAxisNeg2GPU(TestArgsortOpGPU): + def init_axis(self): self.axis = -2 class TestArgsortOpDescendingAxisCPU(TestArgsortOpCPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisGPU(TestArgsortOpGPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis0CPU(TestArgsortOpAxis0CPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis0GPU(TestArgsortOpAxis0GPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis1CPU(TestArgsortOpAxis1CPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis1GPU(TestArgsortOpAxis1GPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis2CPU(TestArgsortOpAxis2CPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis2GPU(TestArgsortOpAxis2GPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg1CPU(TestArgsortOpAxisNeg1CPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg1GPU(TestArgsortOpAxisNeg1GPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg2CPU(TestArgsortOpAxisNeg2CPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg2GPU(TestArgsortOpAxisNeg2GPU): + def init_direction(self): self.descending = True class TestArgsortErrorOnCPU(unittest.TestCase): + def setUp(self): self.place = core.CPUPlace() def test_error(self): + def test_fluid_var_type(): with fluid.program_guard(fluid.Program()): x = [1] @@ -340,6 +365,7 @@ class TestArgsortErrorOnCPU(unittest.TestCase): class TestArgsortErrorOnGPU(TestArgsortErrorOnCPU): + def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -348,8 +374,11 @@ class TestArgsortErrorOnGPU(TestArgsortErrorOnCPU): class TestArgsort(unittest.TestCase): + def init(self): - self.input_shape = [10000, ] + self.input_shape = [ + 10000, + ] self.axis = 0 def setUp(self): @@ -362,8 +391,9 @@ class TestArgsort(unittest.TestCase): def test_api(self): with fluid.program_guard(fluid.Program()): - input = fluid.data( - name="input", shape=self.input_shape, dtype="float64") + input = fluid.data(name="input", + shape=self.input_shape, + dtype="float64") output = paddle.argsort(input, axis=self.axis) output2 = paddle.argsort(input, axis=self.axis, descending=True) @@ -380,26 +410,32 @@ class TestArgsort(unittest.TestCase): class TestArgsort2(TestArgsort): + def init(self): self.input_shape = [10000, 1] self.axis = 0 class TestArgsort3(TestArgsort): + def init(self): self.input_shape = [1, 10000] self.axis = 1 class TestArgsort4(TestArgsort): + def init(self): self.input_shape = [2, 3, 4] self.axis = 1 class TestArgsortImperative(unittest.TestCase): + def init(self): - self.input_shape = [10000, ] + self.input_shape = [ + 10000, + ] self.axis = 0 def setUp(self): @@ -425,24 +461,28 @@ class TestArgsortImperative(unittest.TestCase): class TestArgsortImperative2(TestArgsortImperative): + def init(self): self.input_shape = [10000, 1] self.axis = 0 class TestArgsortImperative3(TestArgsortImperative): + def init(self): self.input_shape = [1, 10000] self.axis = 1 class TestArgsortImperative4(TestArgsortImperative): + def init(self): self.input_shape = [2, 3, 4] self.axis = 1 class TestArgsortWithInputNaN(unittest.TestCase): + def init(self): self.axis = 0 diff --git a/python/paddle/fluid/tests/unittests/test_array_read_write_op.py b/python/paddle/fluid/tests/unittests/test_array_read_write_op.py index b02cf67f4b2..8ed220daf03 100644 --- a/python/paddle/fluid/tests/unittests/test_array_read_write_op.py +++ b/python/paddle/fluid/tests/unittests/test_array_read_write_op.py @@ -60,12 +60,12 @@ def _test_read_write(x): class TestArrayReadWrite(unittest.TestCase): + def test_read_write(self): x = [ - layers.data( - name='x0', shape=[100]), layers.data( - name='x1', shape=[100]), layers.data( - name='x2', shape=[100]) + layers.data(name='x0', shape=[100]), + layers.data(name='x1', shape=[100]), + layers.data(name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False @@ -75,9 +75,11 @@ class TestArrayReadWrite(unittest.TestCase): place = core.CPUPlace() exe = Executor(place) - outs = exe.run(feed={'x0': tensor, - 'x1': tensor, - 'x2': tensor}, + outs = exe.run(feed={ + 'x0': tensor, + 'x1': tensor, + 'x2': tensor + }, fetch_list=[a_sum, x_sum], scope=core.Scope()) self.assertEqual(outs[0], outs[1]) @@ -91,12 +93,12 @@ class TestArrayReadWrite(unittest.TestCase): map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x])) g_out = [ - item.sum() - for item in exe.run( - feed={'x0': tensor, - 'x1': tensor, - 'x2': tensor}, - fetch_list=g_vars) + item.sum() for item in exe.run(feed={ + 'x0': tensor, + 'x1': tensor, + 'x2': tensor + }, + fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() @@ -117,8 +119,8 @@ class TestArrayReadWrite(unittest.TestCase): total_sum_dygraph = layers.sums( input=[a_sum_dygraph, x_sum_dygraph]) - total_sum_scaled_dygraph = layers.scale( - x=total_sum_dygraph, scale=1 / 6.0) + total_sum_scaled_dygraph = layers.scale(x=total_sum_dygraph, + scale=1 / 6.0) total_sum_scaled_dygraph.backward() g_out_dygraph = [ item._grad_ivar().numpy().sum() for item in x_dygraph @@ -129,32 +131,40 @@ class TestArrayReadWrite(unittest.TestCase): class TestArrayReadWriteOpError(unittest.TestCase): + def _test_errors(self, use_fluid_api=True): if use_fluid_api: with program_guard(Program(), Program()): x1 = numpy.random.randn(2, 4).astype('int32') - x2 = fluid.layers.fill_constant( - shape=[1], dtype='int32', value=1) + x2 = fluid.layers.fill_constant(shape=[1], + dtype='int32', + value=1) x3 = numpy.random.randn(2, 4).astype('int32') - self.assertRaises( - TypeError, fluid.layers.array_read, array=x1, i=x2) - self.assertRaises( - TypeError, fluid.layers.array_write, array=x1, i=x2, out=x3) + self.assertRaises(TypeError, + fluid.layers.array_read, + array=x1, + i=x2) + self.assertRaises(TypeError, + fluid.layers.array_write, + array=x1, + i=x2, + out=x3) else: with program_guard(Program(), Program()): x1 = numpy.random.randn(2, 4).astype('int32') x2 = paddle.ones(shape=[1], dtype='int32') x3 = numpy.random.randn(2, 4).astype('int32') - self.assertRaises( - TypeError, paddle.tensor.array_read, array=x1, i=x2) - self.assertRaises( - TypeError, - paddle.tensor.array_write, - array=x1, - i=x2, - out=x3) + self.assertRaises(TypeError, + paddle.tensor.array_read, + array=x1, + i=x2) + self.assertRaises(TypeError, + paddle.tensor.array_write, + array=x1, + i=x2, + out=x3) def test_fluid_api(self): self._test_errors(use_fluid_api=True) @@ -164,6 +174,7 @@ class TestArrayReadWriteOpError(unittest.TestCase): class TestArrayReadWriteApi(unittest.TestCase): + def test_api(self): paddle.disable_static() arr = paddle.tensor.create_array(dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_ascend_trigger.py b/python/paddle/fluid/tests/unittests/test_ascend_trigger.py index 644b550bc42..eb55962d606 100644 --- a/python/paddle/fluid/tests/unittests/test_ascend_trigger.py +++ b/python/paddle/fluid/tests/unittests/test_ascend_trigger.py @@ -28,11 +28,10 @@ class TestAscendTriggerOP(unittest.TestCase): with fluid.program_guard(program): x = fluid.data(name='x', shape=[1], dtype='int64', lod_level=0) y = fluid.data(name='y', shape=[1], dtype='int64', lod_level=0) - block.append_op( - type="ascend_trigger", - inputs={"FeedList": [x]}, - outputs={"FetchList": [y]}, - attrs={'graph_idx': 0}) + block.append_op(type="ascend_trigger", + inputs={"FeedList": [x]}, + outputs={"FetchList": [y]}, + attrs={'graph_idx': 0}) exe = paddle.static.Executor(paddle.CPUPlace()) try: diff --git a/python/paddle/fluid/tests/unittests/test_assert_op.py b/python/paddle/fluid/tests/unittests/test_assert_op.py index f7ab991de56..5c6cbba0c2d 100644 --- a/python/paddle/fluid/tests/unittests/test_assert_op.py +++ b/python/paddle/fluid/tests/unittests/test_assert_op.py @@ -21,6 +21,7 @@ import unittest class TestAssertOp(unittest.TestCase): + def run_network(self, net_func): main_program = fluid.Program() startup_program = fluid.Program() @@ -30,32 +31,39 @@ class TestAssertOp(unittest.TestCase): exe.run(main_program) def test_assert_true(self): + def net_func(): - condition = layers.fill_constant( - shape=[1], dtype='bool', value=True) + condition = layers.fill_constant(shape=[1], + dtype='bool', + value=True) layers.Assert(condition, []) self.run_network(net_func) def test_assert_false(self): + def net_func(): - condition = layers.fill_constant( - shape=[1], dtype='bool', value=False) + condition = layers.fill_constant(shape=[1], + dtype='bool', + value=False) layers.Assert(condition) with self.assertRaises(ValueError): self.run_network(net_func) def test_assert_cond_numel_error(self): + def net_func(): - condition = layers.fill_constant( - shape=[1, 2], dtype='bool', value=True) + condition = layers.fill_constant(shape=[1, 2], + dtype='bool', + value=True) layers.Assert(condition, []) with self.assertRaises(ValueError): self.run_network(net_func) def test_assert_print_data(self): + def net_func(): zero = layers.fill_constant(shape=[1], dtype='int64', value=0) one = layers.fill_constant(shape=[1], dtype='int64', value=1) @@ -67,6 +75,7 @@ class TestAssertOp(unittest.TestCase): self.run_network(net_func) def test_assert_summary(self): + def net_func(): x = layers.fill_constant(shape=[10], dtype='float32', value=2.0) condition = layers.reduce_max(x) < 1.0 @@ -77,6 +86,7 @@ class TestAssertOp(unittest.TestCase): self.run_network(net_func) def test_assert_summary_greater_than_size(self): + def net_func(): x = layers.fill_constant(shape=[2, 3], dtype='float32', value=2.0) condition = layers.reduce_max(x) < 1.0 diff --git a/python/paddle/fluid/tests/unittests/test_assign_op.py b/python/paddle/fluid/tests/unittests/test_assign_op.py index e1fae5d5aa5..c35d7940a8a 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_op.py @@ -26,6 +26,7 @@ from paddle.fluid.backward import append_backward class TestAssignOp(op_test.OpTest): + def setUp(self): self.python_api = paddle.assign self.op_type = "assign" @@ -45,6 +46,7 @@ class TestAssignOp(op_test.OpTest): class TestAssignFP16Op(op_test.OpTest): + def setUp(self): self.python_api = paddle.assign self.op_type = "assign" @@ -64,6 +66,7 @@ class TestAssignFP16Op(op_test.OpTest): class TestAssignOpWithLoDTensorArray(unittest.TestCase): + def test_assign_LoDTensorArray(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) main_program = Program() @@ -71,8 +74,9 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase): with program_guard(main_program): x = fluid.data(name='x', shape=[100, 10], dtype='float32') x.stop_gradient = False - y = fluid.layers.fill_constant( - shape=[100, 10], dtype='float32', value=1) + y = fluid.layers.fill_constant(shape=[100, 10], + dtype='float32', + value=1) z = fluid.layers.elementwise_add(x=x, y=y) i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) init_array = fluid.layers.array_write(x=z, i=i) @@ -82,8 +86,8 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase): append_backward(mean) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_x = np.random.random(size=(100, 10)).astype('float32') ones = np.ones((100, 10)).astype('float32') @@ -96,11 +100,12 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase): class TestAssignOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The type of input must be Variable or numpy.ndarray. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.assign, x1) # When the type of input is numpy.ndarray, the dtype of input must be float32, int32. x2 = np.array([[2.5, 2.5]], dtype='uint8') @@ -108,14 +113,16 @@ class TestAssignOpError(unittest.TestCase): class TestAssignOApi(unittest.TestCase): + def test_assign_LoDTensorArray(self): main_program = Program() startup_program = Program() with program_guard(main_program): x = fluid.data(name='x', shape=[100, 10], dtype='float32') x.stop_gradient = False - y = fluid.layers.fill_constant( - shape=[100, 10], dtype='float32', value=1) + y = fluid.layers.fill_constant(shape=[100, 10], + dtype='float32', + value=1) z = fluid.layers.elementwise_add(x=x, y=y) i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) init_array = fluid.layers.array_write(x=z, i=i) @@ -124,8 +131,8 @@ class TestAssignOApi(unittest.TestCase): mean = fluid.layers.mean(sums) append_backward(mean) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_x = np.random.random(size=(100, 10)).astype('float32') ones = np.ones((100, 10)).astype('float32') @@ -212,12 +219,13 @@ class TestAssignOApi(unittest.TestCase): class TestAssignOpErrorApi(unittest.TestCase): + def test_errors(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with program_guard(Program(), Program()): # The type of input must be Variable or numpy.ndarray. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, paddle.assign, x1) # When the type of input is numpy.ndarray, the dtype of input must be float32, int32. x2 = np.array([[2.5, 2.5]], dtype='uint8') diff --git a/python/paddle/fluid/tests/unittests/test_assign_pos_op.py b/python/paddle/fluid/tests/unittests/test_assign_pos_op.py index 46761063b8a..3458ce64eca 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_pos_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_pos_op.py @@ -68,6 +68,7 @@ def assert_allclose(res, out, cum_count): def get_redefined_allclose(cum_count): + def redefined_allclose(x, y, *args, **kwargs): return assert_allclose(x, y, cum_count) @@ -77,6 +78,7 @@ def get_redefined_allclose(cum_count): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestAssignPosOpInt64(op_test.OpTest): + def setUp(self): x = np.random.randint(0, 16, size=(100, 2)).astype("int64") y = count(x, 16) @@ -98,6 +100,7 @@ class TestAssignPosOpInt64(op_test.OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestAssignPosAPI(unittest.TestCase): + def setUp(self): self.x = np.random.randint(0, 16, size=(100, 2)).astype("int64") y = count(self.x, 16) @@ -109,12 +112,15 @@ class TestAssignPosAPI(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): x = paddle.fluid.data('x', self.x.shape, dtype="int64") - cum_count = paddle.fluid.data( - 'cum_count', self.cum_count.shape, dtype="int64") + cum_count = paddle.fluid.data('cum_count', + self.cum_count.shape, + dtype="int64") out = utils._assign_pos(x, cum_count) exe = paddle.static.Executor(self.place) - res = exe.run(feed={'x': self.x, - "cum_count": self.cum_count}, + res = exe.run(feed={ + 'x': self.x, + "cum_count": self.cum_count + }, fetch_list=[out]) assert_allclose(res[0], self.out, self.cum_count) diff --git a/python/paddle/fluid/tests/unittests/test_assign_value_op.py b/python/paddle/fluid/tests/unittests/test_assign_value_op.py index 2abdbdc5940..423f70085b8 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_value_op.py @@ -24,6 +24,7 @@ import paddle.fluid.layers as layers class TestAssignValueOp(op_test.OpTest): + def setUp(self): self.op_type = "assign_value" self.inputs = {} @@ -43,31 +44,35 @@ class TestAssignValueOp(op_test.OpTest): class TestAssignValueOp2(TestAssignValueOp): + def init_data(self): self.value = numpy.random.random(size=(2, 5)).astype(numpy.int32) self.attrs["int32_values"] = [int(v) for v in self.value.flat] class TestAssignValueOp3(TestAssignValueOp): + def init_data(self): self.value = numpy.random.random(size=(2, 5)).astype(numpy.int64) self.attrs["int64_values"] = [int(v) for v in self.value.flat] class TestAssignValueOp4(TestAssignValueOp): + def init_data(self): - self.value = numpy.random.choice( - a=[False, True], size=(2, 5)).astype(numpy.bool) + self.value = numpy.random.choice(a=[False, True], + size=(2, 5)).astype(numpy.bool) self.attrs["bool_values"] = [int(v) for v in self.value.flat] class TestAssignApi(unittest.TestCase): + def setUp(self): self.init_dtype() - self.value = ( - -100 + 200 * numpy.random.random(size=(2, 5))).astype(self.dtype) - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.value = (-100 + 200 * numpy.random.random(size=(2, 5))).astype( + self.dtype) + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() def init_dtype(self): self.dtype = "float32" @@ -80,29 +85,31 @@ class TestAssignApi(unittest.TestCase): exe = fluid.Executor(self.place) [fetched_x] = exe.run(main_program, feed={}, fetch_list=[x]) - self.assertTrue( - numpy.array_equal(fetched_x, self.value), - "fetch_x=%s val=%s" % (fetched_x, self.value)) + self.assertTrue(numpy.array_equal(fetched_x, self.value), + "fetch_x=%s val=%s" % (fetched_x, self.value)) self.assertEqual(fetched_x.dtype, self.value.dtype) class TestAssignApi2(TestAssignApi): + def init_dtype(self): self.dtype = "int32" class TestAssignApi3(TestAssignApi): + def init_dtype(self): self.dtype = "int64" class TestAssignApi4(TestAssignApi): + def setUp(self): self.init_dtype() - self.value = numpy.random.choice( - a=[False, True], size=(2, 5)).astype(numpy.bool) - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.value = numpy.random.choice(a=[False, True], + size=(2, 5)).astype(numpy.bool) + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() def init_dtype(self): self.dtype = "bool" diff --git a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py index 17507c70d90..9dee8088ecd 100644 --- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py @@ -38,21 +38,19 @@ def convolutional_neural_network(use_py_reader): iterable=False, use_double_buffer=False) - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) @@ -69,8 +67,8 @@ def test(): place = fluid.CPUPlace() exe = fluid.Executor(place) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=BATCH_SIZE) array, img, label, prediction, avg_loss, acc, py_reader = convolutional_neural_network( use_py_reader=False) @@ -113,10 +111,9 @@ def train(use_cuda, thread_num, cpu_num): optimizer.minimize(avg_loss) print("Adam optimizer minimize done.") - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=BATCH_SIZE) print("declared train reader done.") place = fluid.CPUPlace() @@ -138,12 +135,11 @@ def train(use_cuda, thread_num, cpu_num): exec_strategy.num_iteration_per_run = 10 main_program = fluid.default_main_program() - pe = fluid.ParallelExecutor( - use_cuda=False, - loss_name=avg_loss.name, - main_program=main_program, - build_strategy=build_strategy, - exec_strategy=exec_strategy) + pe = fluid.ParallelExecutor(use_cuda=False, + loss_name=avg_loss.name, + main_program=main_program, + build_strategy=build_strategy, + exec_strategy=exec_strategy) print("declare parallel executor done.") py_reader.set_sample_list_generator(train_reader) @@ -172,24 +168,24 @@ def train(use_cuda, thread_num, cpu_num): class TestAsyncSSAGraphExecutor(unittest.TestCase): + def test_check_async_ssa_exe_train(self): step_list = [] for cpu_num in [1, 2, 4]: print("run cpu_num -> " + str(cpu_num)) with fluid.scope_guard(fluid.core.Scope()): - with fluid.program_guard( - main_program=fluid.Program(), - startup_program=fluid.Program()): + with fluid.program_guard(main_program=fluid.Program(), + startup_program=fluid.Program()): start_time = time.time() - step = train( - use_cuda=False, thread_num=cpu_num, cpu_num=cpu_num) + step = train(use_cuda=False, + thread_num=cpu_num, + cpu_num=cpu_num) end_time = time.time() step_list.append(step) print("cpu_num -> " + str(cpu_num) + " step -> " + str(step) + " time -> " + str(end_time - start_time)) - with fluid.program_guard( - main_program=fluid.Program(), - startup_program=fluid.Program()): + with fluid.program_guard(main_program=fluid.Program(), + startup_program=fluid.Program()): test() assert abs(int(step_list[0] / 2) - int(step_list[1])) < 5 assert abs(int(step_list[1] / 2) - int(step_list[2])) < 5 diff --git a/python/paddle/fluid/tests/unittests/test_atan2_op.py b/python/paddle/fluid/tests/unittests/test_atan2_op.py index ca0e2d2ba6d..90e2a37453f 100644 --- a/python/paddle/fluid/tests/unittests/test_atan2_op.py +++ b/python/paddle/fluid/tests/unittests/test_atan2_op.py @@ -34,6 +34,7 @@ def atan2_grad(x1, x2, dout): class TestAtan2(OpTest): + def setUp(self): self.op_type = "atan2" self.python_api = paddle.atan2 @@ -57,36 +58,40 @@ class TestAtan2(OpTest): class TestAtan2_float(TestAtan2): + def init_dtype(self): self.dtype = np.float32 def test_check_grad(self): if self.dtype not in [np.int32, np.int64]: - self.check_grad( - ['X1', 'X2'], - 'Out', - user_defined_grads=atan2_grad(self.inputs['X1'], - self.inputs['X2'], - 1 / self.inputs['X1'].size), - check_eager=True) + self.check_grad(['X1', 'X2'], + 'Out', + user_defined_grads=atan2_grad( + self.inputs['X1'], self.inputs['X2'], + 1 / self.inputs['X1'].size), + check_eager=True) class TestAtan2_float16(TestAtan2_float): + def init_dtype(self): self.dtype = np.float16 class TestAtan2_int32(TestAtan2_float): + def init_dtype(self): self.dtype = np.int32 class TestAtan2_int64(TestAtan2_float): + def init_dtype(self): self.dtype = np.int64 class TestAtan2API(unittest.TestCase): + def init_dtype(self): self.dtype = 'float64' self.shape = [11, 17] @@ -117,6 +122,7 @@ class TestAtan2API(unittest.TestCase): run(place) def test_dygraph_api(self): + def run(place): paddle.disable_static(place) X1 = paddle.to_tensor(self.x1) diff --git a/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py b/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py index a5fb80b0970..053b716e95f 100644 --- a/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py @@ -46,8 +46,8 @@ def attention_lstm( start_offset = 0 for bid in range(N): seq_len = lod[0][bid] - xi = np.copy(x[start_offset:start_offset + seq_len, :]).reshape(seq_len, - M) + xi = np.copy(x[start_offset:start_offset + seq_len, :]).reshape( + seq_len, M) prev_cell = np.copy(c0[bid]).reshape([1, D]) prev_hidden = np.copy(h0[bid]).reshape([1, D]) for step in range(seq_len): @@ -88,6 +88,7 @@ def attention_lstm( class TestAttentionLSTMOp(OpTest): + def set_conf(self): pass @@ -156,11 +157,13 @@ class TestAttentionLSTMOp(OpTest): class TestAttentionOpNonInit(TestAttentionLSTMOp): + def set_conf(self): self.has_initial_hidden = False class TestAttentionOpAct(TestAttentionLSTMOp): + def set_conf(self): self.M = 3 self.D = 2 @@ -170,24 +173,28 @@ class TestAttentionOpAct(TestAttentionLSTMOp): class TestAttentionOpMD1(TestAttentionLSTMOp): + def set_conf(self): self.M = 36 self.D = 8 class TestAttentionOpMD2(TestAttentionLSTMOp): + def set_conf(self): self.M = 8 self.D = 8 class TestAttentionOpMD3(TestAttentionLSTMOp): + def set_conf(self): self.M = 15 self.D = 30 class TestAttentionOpBS1(TestAttentionLSTMOp): + def set_conf(self): self.lod = [[5]] self.M = 16 @@ -195,11 +202,13 @@ class TestAttentionOpBS1(TestAttentionLSTMOp): class TestAttentionOpBS2(TestAttentionLSTMOp): + def set_conf(self): self.lod = [[3, 6]] class TestAttentionOpBS5(TestAttentionLSTMOp): + def set_conf(self): self.lod = [[3, 2, 4, 7, 5]] diff --git a/python/paddle/fluid/tests/unittests/test_auc_op.py b/python/paddle/fluid/tests/unittests/test_auc_op.py index 6568da5d00c..c2c206905e3 100644 --- a/python/paddle/fluid/tests/unittests/test_auc_op.py +++ b/python/paddle/fluid/tests/unittests/test_auc_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestAucOp(OpTest): + def setUp(self): self.op_type = "auc" pred = np.random.random((128, 2)).astype("float32") @@ -29,10 +30,10 @@ class TestAucOp(OpTest): num_thresholds = 200 slide_steps = 1 - stat_pos = np.zeros((1 + slide_steps) * (num_thresholds + 1) + 1, - ).astype("int64") - stat_neg = np.zeros((1 + slide_steps) * (num_thresholds + 1) + 1, - ).astype("int64") + stat_pos = np.zeros( + (1 + slide_steps) * (num_thresholds + 1) + 1, ).astype("int64") + stat_neg = np.zeros( + (1 + slide_steps) * (num_thresholds + 1) + 1, ).astype("int64") self.inputs = { 'Predict': pred, @@ -66,6 +67,7 @@ class TestAucOp(OpTest): class TestGlobalAucOp(OpTest): + def setUp(self): self.op_type = "auc" pred = np.random.random((128, 2)).astype("float32") @@ -106,6 +108,7 @@ class TestGlobalAucOp(OpTest): class TestAucOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): @@ -117,8 +120,9 @@ class TestAucOpError(unittest.TestCase): self.assertRaises(TypeError, test_type1) def test_type2(): - data2 = fluid.data( - name="input2", shape=[-1, 2], dtype="float32") + data2 = fluid.data(name="input2", + shape=[-1, 2], + dtype="float32") label2 = fluid.data(name="label2", shape=[-1], dtype="float32") result2 = fluid.layers.auc(input=data2, label=label2) diff --git a/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py b/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py index 5093dc1f990..aba58e3593d 100644 --- a/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py +++ b/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py @@ -21,6 +21,7 @@ from paddle.fluid import metrics class TestAucSinglePredOp(OpTest): + def setUp(self): self.op_type = "auc" pred = np.random.random((128, 2)).astype("float32") @@ -29,10 +30,10 @@ class TestAucSinglePredOp(OpTest): num_thresholds = 200 slide_steps = 1 - stat_pos = np.zeros((1 + slide_steps) * (num_thresholds + 1) + 1, - ).astype("int64") - stat_neg = np.zeros((1 + slide_steps) * (num_thresholds + 1) + 1, - ).astype("int64") + stat_pos = np.zeros( + (1 + slide_steps) * (num_thresholds + 1) + 1, ).astype("int64") + stat_neg = np.zeros( + (1 + slide_steps) * (num_thresholds + 1) + 1, ).astype("int64") self.inputs = { 'Predict': pred0, @@ -68,6 +69,7 @@ class TestAucSinglePredOp(OpTest): class TestAucGlobalSinglePredOp(OpTest): + def setUp(self): self.op_type = "auc" pred = np.random.random((128, 2)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py index 3faf7f68620..ce1dfa74364 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py @@ -36,6 +36,7 @@ logger = get_logger() class AutoCheckPointACLBase(AutoCheckpointBase): + def setUp(self): get_logger() logger.info("enter tests") @@ -203,6 +204,7 @@ class AutoCheckPointACLBase(AutoCheckpointBase): class AutoCheckpointTest(AutoCheckPointACLBase): + def setUp(self): get_logger() logger.info("enter tests") diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py index fca1baf85e5..c9172e74f28 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py @@ -37,6 +37,7 @@ logger = get_logger() class AutoCheckpointTest1(AutoCheckPointACLBase): + def setUp(self): get_logger() logger.info("enter tests") diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py index 0c17807a689..22b3c15053a 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py @@ -37,6 +37,7 @@ logger = get_logger() class AutoCheckpointTest2(AutoCheckPointACLBase): + def setUp(self): get_logger() logger.info("enter tests") diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py index ca103be59b9..8d847fe9704 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py @@ -37,6 +37,7 @@ logger = get_logger() class AutoCheckpointTest3(AutoCheckPointACLBase): + def setUp(self): get_logger() logger.info("enter tests") diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py index 3eeff91ff2d..c0aa13aa03f 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py @@ -37,6 +37,7 @@ logger = get_logger() class AutoCheckpointTestDist(AutoCheckPointACLBase): + def setUp(self): get_logger() logger.info("enter tests") diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py index f8c12f89051..da7f2af169d 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py @@ -37,6 +37,7 @@ logger = get_logger() class AutoCheckpointTestMul(AutoCheckPointACLBase): + def setUp(self): get_logger() logger.info("enter tests") diff --git a/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py b/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py index 3ff67a923a2..948aa58990d 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py +++ b/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py @@ -23,6 +23,7 @@ if fluid.is_compiled_with_cuda(): class TestBase(unittest.TestCase): + def setUp(self): if fluid.is_compiled_with_cuda(): self._limit = fluid.core.globals()['FLAGS_gpu_memory_limit_mb'] @@ -35,8 +36,7 @@ class TestBase(unittest.TestCase): place = fluid.CUDAPlace(0) t = fluid.LoDTensor() - t.set(np.ndarray( - [int(self._limit / 2), other_dim], dtype='float32'), + t.set(np.ndarray([int(self._limit / 2), other_dim], dtype='float32'), place) del t diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_api.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_api.py index 7d94139e9a8..f4a02679b32 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_api.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_api.py @@ -29,6 +29,7 @@ process_mesh2 = [[0, 1, 2], [3, 4, 5]] class SimpleNet(nn.Layer): + def __init__(self, vocab_size=128, hidden_size=4): super(SimpleNet, self).__init__() self.word_embeddings = nn.Embedding(vocab_size, hidden_size) @@ -37,10 +38,11 @@ class SimpleNet(nn.Layer): def forward(self, x, y): # Test shard_tensor interface with dist_attr arg - x = dist.shard_tensor( - x, - dist_attr={"process_mesh": process_mesh1, - "dims_mapping": [0, -1]}) + x = dist.shard_tensor(x, + dist_attr={ + "process_mesh": process_mesh1, + "dims_mapping": [0, -1] + }) emb_out = self.word_embeddings(x) # Test shard_tensor interface with no dist_attr arg y = dist.shard_tensor(y) @@ -51,15 +53,18 @@ class SimpleNet(nn.Layer): class TestAutoParallelAPI(unittest.TestCase): + def test_api(self): dist_context = get_default_distributed_context() net = SimpleNet() data1 = fluid.layers.fill_constant(shape=[2, 4], value=1, dtype="int64") - data2 = fluid.layers.fill_constant( - shape=[2, 4], value=2, dtype="float32") - data3 = fluid.layers.fill_constant( - shape=[2, 4], value=4, dtype="float32") + data2 = fluid.layers.fill_constant(shape=[2, 4], + value=2, + dtype="float32") + data3 = fluid.layers.fill_constant(shape=[2, 4], + value=4, + dtype="float32") x, y = net.forward(data1, data2) @@ -86,17 +91,16 @@ class TestAutoParallelAPI(unittest.TestCase): # Test shard_op interface with dist_attr dims_mapping1 = [0, 1] dims_mapping2 = [-1, 0] - dist_add = dist.shard_op( - paddle.add, - dist_attr={ - data2: { - "process_mesh": process_mesh2, - "dims_mapping": dims_mapping1 - }, - data3: { - "dims_mapping": dims_mapping2 - } - }) + dist_add = dist.shard_op(paddle.add, + dist_attr={ + data2: { + "process_mesh": process_mesh2, + "dims_mapping": dims_mapping1 + }, + data3: { + "dims_mapping": dims_mapping2 + } + }) results = dist_add(data2, data3) ops = paddle.static.default_main_program().block(0).ops last_op = ops[-1] diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_autoconvert.py index 131f2d299b5..0390176fb58 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_autoconvert.py @@ -21,6 +21,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestAutoParallelAutoConvert(TestMultipleGpus): + def test_auto_parallel_autoconvert(self): self.run_mnist_2gpu('auto_parallel_autoconvert.py') diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py index 55b36654437..7ef5516bc04 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py @@ -200,6 +200,7 @@ cluster_json = """ class TestAutoParallelCluster(unittest.TestCase): + def test_cluster(self): cluster_json_file = "" cluster_json_object = json.loads(cluster_json) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py index bc4f1671f4e..393d79557a9 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py @@ -33,6 +33,7 @@ from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import append_distributed_attr_suffix from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import set_default_distributed_context + paddle.enable_static() _global_parallel_strategy = None _global_process_mesh = None @@ -40,6 +41,7 @@ _global_process_mesh2 = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -48,57 +50,55 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train") def forward(self, input): if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) elif _global_parallel_strategy == "pp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh2, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh2, + "dims_mapping": [1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -115,36 +115,33 @@ def mlp_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') if _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) out = mlp(input) return train_program, start_program class TestMLPAutoCompletion(unittest.TestCase): + def test_mlp_dp(self): global _global_parallel_strategy _global_parallel_strategy = "dp" @@ -153,8 +150,8 @@ class TestMLPAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = mlp_pretrain_forward(train_program, - start_program) + train_program, start_program = mlp_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -169,8 +166,8 @@ class TestMLPAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = mlp_pretrain_forward(train_program, - start_program) + train_program, start_program = mlp_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -186,8 +183,8 @@ class TestMLPAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = mlp_pretrain_forward(train_program, - start_program) + train_program, start_program = mlp_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -245,6 +242,7 @@ class TestMLPAutoCompletion(unittest.TestCase): class AttentionLayer(nn.Layer): + def __init__(self, hidden_size=1024, sequence_len=512, @@ -266,34 +264,40 @@ class AttentionLayer(nn.Layer): self.initializer_range = initializer_range self.training = True self.attn_mask = None - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.q_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) def forward(self, input): if _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) q = self.q_proj(input) q = tensor.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) @@ -303,43 +307,37 @@ class AttentionLayer(nn.Layer): v = self.v_proj(input) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) @@ -347,8 +345,10 @@ class AttentionLayer(nn.Layer): v = tensor.transpose(x=v, perm=[0, 2, 1, 3]) # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if self.attn_mask is not None: product = product + self.attn_mask @@ -356,11 +356,10 @@ class AttentionLayer(nn.Layer): weights = F.softmax(product) if self.dropout_ratio: - weights = F.dropout( - weights, - self.dropout_ratio, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout_ratio, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -371,19 +370,17 @@ class AttentionLayer(nn.Layer): # project to output out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) return out @@ -394,23 +391,22 @@ def attn_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="query", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - attn = AttentionLayer( - hidden_size=hidden_size, - sequence_len=sequence_len, - intermediate_size=4 * hidden_size, - num_heads=16, - dropout_ratio=0.1, - initializer_range=0.02) + input = static.data(name="query", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + attn = AttentionLayer(hidden_size=hidden_size, + sequence_len=sequence_len, + intermediate_size=4 * hidden_size, + num_heads=16, + dropout_ratio=0.1, + initializer_range=0.02) out = attn(input) return train_program, start_program class TestAttentionAutoCompletion(unittest.TestCase): + def test_attn_dp(self): global _global_parallel_strategy _global_parallel_strategy = "dp" @@ -419,8 +415,8 @@ class TestAttentionAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = attn_pretrain_forward(train_program, - start_program) + train_program, start_program = attn_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -437,8 +433,8 @@ class TestAttentionAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = attn_pretrain_forward(train_program, - start_program) + train_program, start_program = attn_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -454,8 +450,8 @@ class TestAttentionAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = attn_pretrain_forward(train_program, - start_program) + train_program, start_program = attn_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -463,6 +459,7 @@ class TestAttentionAutoCompletion(unittest.TestCase): class DecoderLayer(nn.Layer): + def __init__(self, vocab_size=32768, hidden_size=1024, @@ -492,29 +489,37 @@ class DecoderLayer(nn.Layer): self.word_embeddings = nn.Embedding( self.vocab_size, self.hidden_size, - weight_attr=paddle.ParamAttr( - name="word_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range))) + weight_attr=paddle.ParamAttr(name="word_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, + std=self.initializer_range))) self.position_embeddings = nn.Embedding( self.max_position_embeddings, self.hidden_size, - weight_attr=paddle.ParamAttr( - name="pos_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range))) + weight_attr=paddle.ParamAttr(name="pos_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, + std=self.initializer_range))) weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( mean=0.0, std=self.initializer_range)) bias_attr = None - self.q_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) intermediate_size = 4 * self.hidden_size d_model = self.hidden_size @@ -522,10 +527,14 @@ class DecoderLayer(nn.Layer): weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( mean=0.0, std=self.initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm1 = nn.LayerNorm(d_model, epsilon=1e-5) self.norm2 = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout1 = nn.Dropout(self.dropout_ratio) @@ -534,37 +543,33 @@ class DecoderLayer(nn.Layer): def forward(self, input_ids, position_ids): if _global_parallel_strategy == "dp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) input_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) embeddings = input_embeddings + position_embeddings embeddings = self.dropout1(embeddings) @@ -581,43 +586,37 @@ class DecoderLayer(nn.Layer): v = self.v_proj(target) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) @@ -625,8 +624,10 @@ class DecoderLayer(nn.Layer): v = tensor.transpose(x=v, perm=[0, 2, 1, 3]) # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if self.attn_mask is not None: product = product + self.attn_mask @@ -634,11 +635,10 @@ class DecoderLayer(nn.Layer): weights = F.softmax(product) if self.dropout_ratio: - weights = F.dropout( - weights, - self.dropout_ratio, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout_ratio, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -650,19 +650,17 @@ class DecoderLayer(nn.Layer): out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) # Add residual residual = embeddings + self.dropout2(out) @@ -676,31 +674,27 @@ class DecoderLayer(nn.Layer): out3 = self.linear1(out2) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) # Add residual final = residual + self.dropout3(out3) @@ -713,27 +707,27 @@ def decoder_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input_ids = static.data( - name="input_ids", shape=[batch_size, sequence_len], dtype='int64') - position_ids = static.data( - name="position_ids", - shape=[batch_size, sequence_len], - dtype='int64') - decoder = DecoderLayer( - vocab_size=32768, - hidden_size=hidden_size, - sequence_len=sequence_len, - max_position_embeddings=512, - intermediate_size=4 * hidden_size, - num_heads=16, - dropout_ratio=0.1, - initializer_range=0.02) + input_ids = static.data(name="input_ids", + shape=[batch_size, sequence_len], + dtype='int64') + position_ids = static.data(name="position_ids", + shape=[batch_size, sequence_len], + dtype='int64') + decoder = DecoderLayer(vocab_size=32768, + hidden_size=hidden_size, + sequence_len=sequence_len, + max_position_embeddings=512, + intermediate_size=4 * hidden_size, + num_heads=16, + dropout_ratio=0.1, + initializer_range=0.02) out = decoder(input_ids, position_ids) return train_program, start_program class TestDecoderLayerAutoCompletion(unittest.TestCase): + def test_decoder_dp(self): global _global_parallel_strategy _global_parallel_strategy = "dp" @@ -742,8 +736,8 @@ class TestDecoderLayerAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = decoder_pretrain_forward(train_program, - start_program) + train_program, start_program = decoder_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -758,8 +752,8 @@ class TestDecoderLayerAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = decoder_pretrain_forward(train_program, - start_program) + train_program, start_program = decoder_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -775,8 +769,8 @@ class TestDecoderLayerAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = decoder_pretrain_forward(train_program, - start_program) + train_program, start_program = decoder_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py index 1a9f70b3528..ab110c929f5 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py @@ -78,17 +78,27 @@ class MultiHeadAttention(nn.Layer): if self.fuse: assert self.kdim == embed_dim assert self.vdim == embed_dim - self.qkv_proj = nn.Linear( - embed_dim, 3 * embed_dim, weight_attr, bias_attr=bias_attr) + self.qkv_proj = nn.Linear(embed_dim, + 3 * embed_dim, + weight_attr, + bias_attr=bias_attr) else: - self.q_proj = nn.Linear( - embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = nn.Linear( - embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(embed_dim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(embed_dim, + embed_dim, + weight_attr, + bias_attr=bias_attr) def _fuse_prepare_qkv(self, query): mix_layer = self.qkv_proj(query) @@ -107,19 +117,17 @@ class MultiHeadAttention(nn.Layer): q = self.q_proj(query) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) q = tensor.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) q = tensor.transpose(x=q, perm=[0, 2, 1, 3]) @@ -152,36 +160,32 @@ class MultiHeadAttention(nn.Layer): k = self.k_proj(key) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) v = self.v_proj(value) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) @@ -237,19 +241,20 @@ class MultiHeadAttention(nn.Layer): q, k, v, cache = self._prepare_qkv(query, key, value, use_cache, cache) # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if attn_mask is not None: product = product + attn_mask weights = F.softmax(product) if self.dropout: - weights = F.dropout( - weights, - self.dropout, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -261,19 +266,17 @@ class MultiHeadAttention(nn.Layer): out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) outs = [out] if self.need_weights: @@ -395,24 +398,21 @@ class TransformerDecoderLayer(nn.Layer): weight_attrs = _convert_param_attr_to_list(weight_attr, 3) bias_attrs = _convert_param_attr_to_list(bias_attr, 3) - self.self_attn = MultiHeadAttention( - d_model, - nhead, - dropout=attn_dropout, - weight_attr=weight_attrs[0], - bias_attr=bias_attrs[0], - topo=topo) + self.self_attn = MultiHeadAttention(d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0], + topo=topo) if topo is None or topo.mp_info.size == 1: - self.linear1 = nn.Linear( - d_model, - dim_feedforward, - weight_attrs[2], - bias_attr=bias_attrs[2]) - self.linear2 = nn.Linear( - dim_feedforward, - d_model, - weight_attrs[2], - bias_attr=bias_attrs[2]) + self.linear1 = nn.Linear(d_model, + dim_feedforward, + weight_attrs[2], + bias_attr=bias_attrs[2]) + self.linear2 = nn.Linear(dim_feedforward, + d_model, + weight_attrs[2], + bias_attr=bias_attrs[2]) self.norm1 = nn.LayerNorm(d_model, epsilon=1e-5) self.norm2 = nn.LayerNorm(d_model, epsilon=1e-5) @@ -440,34 +440,30 @@ class TransformerDecoderLayer(nn.Layer): tgt = self.norm2(tgt) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) # tgt = self.dropout2( # self.linear2(F.gelu( @@ -483,8 +479,8 @@ class TransformerDecoderLayer(nn.Layer): return tgt if use_cache is False else (tgt, incremental_cache) def gen_cache(self, memory): - incremental_cache = self.self_attn.gen_cache( - memory, type=self.self_attn.Cache) + incremental_cache = self.self_attn.gen_cache(memory, + type=self.self_attn.Cache) return incremental_cache @@ -506,17 +502,16 @@ class GPTEmbeddings(nn.Layer): self.word_embeddings = nn.Embedding( vocab_size, hidden_size, - weight_attr=paddle.ParamAttr( - name="word_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="word_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, + std=initializer_range))) self.position_embeddings = nn.Embedding( max_position_embeddings, hidden_size, - weight_attr=paddle.ParamAttr( - name="pos_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="pos_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, std=initializer_range))) self.dropout = nn.Dropout(hidden_dropout_prob) @@ -529,19 +524,17 @@ class GPTEmbeddings(nn.Layer): input_embedings = self.word_embeddings(input_ids) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings @@ -580,37 +573,36 @@ class GPTModel(nn.Layer): if self.pipline_mode: self.layer_per_stage = num_hidden_layers // self.topo.pp_info.size - self.embeddings = GPTEmbeddings( - vocab_size, hidden_size, hidden_dropout_prob, - max_position_embeddings, type_vocab_size, self.initializer_range, - topo) + self.embeddings = GPTEmbeddings(vocab_size, hidden_size, + hidden_dropout_prob, + max_position_embeddings, + type_vocab_size, self.initializer_range, + topo) decoder_layers = nn.LayerList() for i in range(num_hidden_layers): DecoderLayer = TransformerDecoderLayer decoder_layers.append( - DecoderLayer( - d_model=hidden_size, - nhead=num_attention_heads, - dim_feedforward=intermediate_size, - dropout=hidden_dropout_prob, - activation=hidden_act, - attn_dropout=attention_probs_dropout_prob, - act_dropout=hidden_dropout_prob, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range)), - bias_attr=None, - topo=topo)) + DecoderLayer(d_model=hidden_size, + nhead=num_attention_heads, + dim_feedforward=intermediate_size, + dropout=hidden_dropout_prob, + activation=hidden_act, + attn_dropout=attention_probs_dropout_prob, + act_dropout=hidden_dropout_prob, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Normal( + mean=0.0, std=self.initializer_range)), + bias_attr=None, + topo=topo)) Decoder = TransformerDecoder - self.decoder = Decoder( - decoder_layers, - num_hidden_layers, - norm="LayerNorm", - hidden_size=hidden_size, - topo=topo) + self.decoder = Decoder(decoder_layers, + num_hidden_layers, + norm="LayerNorm", + hidden_size=hidden_size, + topo=topo) self.checkpoints = [] @@ -625,29 +617,27 @@ class GPTModel(nn.Layer): length = paddle.shape(input_ids)[1] # Use bool mask attention_mask = paddle.tensor.tril( - paddle.ones( - (length, length), - dtype=self.embeddings.word_embeddings.weight.dtype)) + paddle.ones((length, length), + dtype=self.embeddings.word_embeddings.weight.dtype)) if position_ids is None: past_length = 0 if cache is not None: past_length = paddle.shape(cache[0].k)[-2] - position_ids = paddle.arange( - past_length, - paddle.shape(input_ids)[-1] + past_length, - dtype='int64') + position_ids = paddle.arange(past_length, + paddle.shape(input_ids)[-1] + + past_length, + dtype='int64') position_ids = position_ids.unsqueeze(0) # .expand_as(input_ids) - position_ids = paddle.fluid.layers.expand_as(position_ids, - input_ids) - embedding_output = self.embeddings( - input_ids=input_ids, position_ids=position_ids) + position_ids = paddle.fluid.layers.expand_as( + position_ids, input_ids) + embedding_output = self.embeddings(input_ids=input_ids, + position_ids=position_ids) # TODO, use registered buffer - causal_mask = paddle.tensor.triu( - paddle.ones((paddle.shape(input_ids)[-1], - paddle.shape(input_ids)[-1])) * -1e9, - diagonal=1) + causal_mask = paddle.tensor.triu(paddle.ones( + (paddle.shape(input_ids)[-1], paddle.shape(input_ids)[-1])) * -1e9, + diagonal=1) if attention_mask is not None: attention_mask = attention_mask + causal_mask @@ -657,12 +647,11 @@ class GPTModel(nn.Layer): # The tensor returned by triu not in static graph. attention_mask.stop_gradient = True - encoder_outputs = self.decoder( - embedding_output, - memory=None, - tgt_mask=attention_mask, - use_cache=use_cache, - cache=cache) + encoder_outputs = self.decoder(embedding_output, + memory=None, + tgt_mask=attention_mask, + use_cache=use_cache, + cache=cache) self.checkpoints.extend(self.decoder.checkpoints) return encoder_outputs @@ -686,8 +675,9 @@ class GPTForPretraining(nn.Layer): input_parallel = paddle.distributed.collective._c_identity( lm_output, group=None) - logits = paddle.matmul( - input_parallel, logit_weights, transpose_y=True) + logits = paddle.matmul(input_parallel, + logit_weights, + transpose_y=True) if parallel_output: return logits @@ -750,50 +740,49 @@ def gpt_pretrain_forward(train_program, start_program): start_program), utils.unique_name.guard(): batch_size = 16 sequence_len = 512 - input_ids = static.data( - name="input_ids", shape=[batch_size, sequence_len], dtype='int64') - position_ids = static.data( - name="position_ids", - shape=[batch_size, sequence_len], - dtype='int64') + input_ids = static.data(name="input_ids", + shape=[batch_size, sequence_len], + dtype='int64') + position_ids = static.data(name="position_ids", + shape=[batch_size, sequence_len], + dtype='int64') attention_mask = static.data( name="attention_mask", shape=[batch_size, 1, sequence_len, sequence_len], dtype='float64') - labels = static.data( - name="labels", shape=[batch_size, sequence_len], dtype='int64') - loss_mask = static.data( - name="loss_mask", shape=[batch_size, sequence_len], dtype='float64') + labels = static.data(name="labels", + shape=[batch_size, sequence_len], + dtype='int64') + loss_mask = static.data(name="loss_mask", + shape=[batch_size, sequence_len], + dtype='float64') if _global_parallel_strategy == "dp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) - - gpt = GPTModel( - vocab_size=32768, - hidden_size=1024, - num_hidden_layers=2, - num_attention_heads=16, - intermediate_size=4096, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=1024, - type_vocab_size=16, - initializer_range=0.02, - pad_token_id=0, - topo=None) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) + + gpt = GPTModel(vocab_size=32768, + hidden_size=1024, + num_hidden_layers=2, + num_attention_heads=16, + intermediate_size=4096, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=1024, + type_vocab_size=16, + initializer_range=0.02, + pad_token_id=0, + topo=None) model = GPTForPretraining(gpt) @@ -807,6 +796,7 @@ def gpt_pretrain_forward(train_program, start_program): class TestGPTAutoCompletion(unittest.TestCase): + def test_gpt_dp(self): global _global_parallel_strategy _global_parallel_strategy = "dp" @@ -816,8 +806,8 @@ class TestGPTAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = gpt_pretrain_forward(train_program, - start_program) + train_program, start_program = gpt_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -832,8 +822,8 @@ class TestGPTAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = gpt_pretrain_forward(train_program, - start_program) + train_program, start_program = gpt_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) @@ -849,8 +839,8 @@ class TestGPTAutoCompletion(unittest.TestCase): train_program = static.Program() start_program = static.Program() dist_context = DistributedContext() - train_program, start_program = gpt_pretrain_forward(train_program, - start_program) + train_program, start_program = gpt_pretrain_forward( + train_program, start_program) completer = Completer(dist_context) complete_train_program = completer.complete_forward_annotation( train_program) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py index d05e4938793..bb8642d569e 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py @@ -46,6 +46,7 @@ device = "gpu" if core.is_compiled_with_cuda() else "cpu" class MLPLayer(nn.Layer): + def __init__(self, hidden_size=256, intermediate_size=4 * 256, @@ -54,28 +55,34 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.is_distributed = is_distributed def forward(self, input): if self.is_distributed: - auto.shard_tensor( - self.linear0.weight, - dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [-1, 1]}) - auto.shard_tensor( - self.linear1.weight, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [1, -1]}) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -89,8 +96,9 @@ def get_single_node_data(): train_program = paddle.static.Program() startup_program = paddle.static.Program() - loss, train_program, startup_program = mlp_forward( - train_program, startup_program, is_distributed=False) + loss, train_program, startup_program = mlp_forward(train_program, + startup_program, + is_distributed=False) cost_model = core.CostModel() cost_data = cost_model.profile_measure(train_program, startup_program, @@ -112,31 +120,36 @@ def mlp_forward(train_program, start_program, is_distributed=True): hidden_size = 256 sequence_len = 128 if is_distributed: - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') else: - input = paddle.ones( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = paddle.ones( - name="label", shape=[batch_size, 1], dtype='float32') + input = paddle.ones(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = paddle.ones(name="label", + shape=[batch_size, 1], + dtype='float32') if is_distributed: - auto.shard_tensor( - input, - dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [0, -1]}) - auto.shard_tensor( - label, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [0, -1]}) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02, - is_distributed=is_distributed) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [0, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02, + is_distributed=is_distributed) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -160,13 +173,12 @@ def get_dist_prog(train_program, startup_program, dist_context, rank_id): train_program) dist_context.block_state.parse_forward_blocks(complete_train_program) - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) # logical partition partitioner = Partitioner(dist_context, rank_id) @@ -207,18 +219,18 @@ def check_empty_program_memory(cost): class TestCostModel(unittest.TestCase): + def test_empty_program_cost_model(self): empty_program = paddle.static.Program() startup_program = paddle.static.Program() standalone_cost_data = [{}] empty_pp_cfg = None cluster = None - cost = estimate_cost( - [empty_program], - cluster=cluster, - pipeline_config=empty_pp_cfg, - standalone_cost_data=standalone_cost_data, - batch_size=1) + cost = estimate_cost([empty_program], + cluster=cluster, + pipeline_config=empty_pp_cfg, + standalone_cost_data=standalone_cost_data, + batch_size=1) self.assertTrue(check_empty_program_runtime(cost)) self.assertTrue(check_empty_program_memory(cost)) @@ -237,12 +249,11 @@ class TestCostModel(unittest.TestCase): resharder.reshard() dist_program.append(distributed_program) cluster = None - cost = estimate_cost( - dist_program, - cluster=cluster, - pipeline_config=pp_cfg, - standalone_cost_data=standalone_cost_data, - batch_size=4) + cost = estimate_cost(dist_program, + cluster=cluster, + pipeline_config=pp_cfg, + standalone_cost_data=standalone_cost_data, + batch_size=4) self.assertTrue(check_runtime_estimation(cost)) self.assertTrue(check_memory_estimation(cost)) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_data_unshard.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_data_unshard.py index 6cc953dfdee..c8753002aa6 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_data_unshard.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_data_unshard.py @@ -21,6 +21,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestAutoParallelDataUnshard(TestMultipleGpus): + def test_auto_parallel_data_unshard(self): self.run_mnist_2gpu('auto_parallel_data_unshard.py') diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py index 29575dc76c2..ca69535049c 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py @@ -49,13 +49,12 @@ def get_dist_prog(train_program, ) if complete_train_program is None else complete_train_program dist_context.block_state.parse_forward_blocks(complete_train_program) - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) # logical partition partitioner = Partitioner(dist_context, rank_id) @@ -69,6 +68,7 @@ def get_dist_prog(train_program, class TestDistributedTensor(unittest.TestCase): + def test_new_local_tensor(self): test_auto_parallel_reshard._global_process_mesh = auto.ProcessMesh( mesh=[0, 1]) @@ -130,40 +130,46 @@ class TestDistributedTensor(unittest.TestCase): topology = [2, 3] global_sizes = [6, 6] - # rank 0 [(0, 2), (0, 3)] + # rank 0 [(0, 2), (0, 3)] # rank 1 [(2, 4), (0, 3)] # rank 4 [(2, 4), (3, 6)] rank = 0 - local_sizes = DistributedTensor.get_local_sizes( - global_sizes, dims_mapping, topology, processes) + local_sizes = DistributedTensor.get_local_sizes(global_sizes, + dims_mapping, topology, + processes) self.assertEqual(local_sizes, [2, 3]) local_offsets = DistributedTensor.get_local_offsets( global_sizes, dims_mapping, topology, processes, rank) self.assertEqual(local_offsets, [0, 0]) - local_shard = DistributedTensor.get_local_shard( - global_sizes, dims_mapping, topology, processes, rank) + local_shard = DistributedTensor.get_local_shard(global_sizes, + dims_mapping, topology, + processes, rank) self.assertEqual(local_shard, [(0, 2), (0, 3)]) rank = 1 - local_sizes = DistributedTensor.get_local_sizes( - global_sizes, dims_mapping, topology, processes) + local_sizes = DistributedTensor.get_local_sizes(global_sizes, + dims_mapping, topology, + processes) self.assertEqual(local_sizes, [2, 3]) local_offsets = DistributedTensor.get_local_offsets( global_sizes, dims_mapping, topology, processes, rank) self.assertEqual(local_offsets, [2, 0]) - local_shard = DistributedTensor.get_local_shard( - global_sizes, dims_mapping, topology, processes, rank) + local_shard = DistributedTensor.get_local_shard(global_sizes, + dims_mapping, topology, + processes, rank) self.assertEqual(local_shard, [(2, 4), (0, 3)]) rank = 4 - local_sizes = DistributedTensor.get_local_sizes( - global_sizes, dims_mapping, topology, processes) + local_sizes = DistributedTensor.get_local_sizes(global_sizes, + dims_mapping, topology, + processes) self.assertEqual(local_sizes, [2, 3]) local_offsets = DistributedTensor.get_local_offsets( global_sizes, dims_mapping, topology, processes, rank) self.assertEqual(local_offsets, [2, 3]) - local_shard = DistributedTensor.get_local_shard( - global_sizes, dims_mapping, topology, processes, rank) + local_shard = DistributedTensor.get_local_shard(global_sizes, + dims_mapping, topology, + processes, rank) self.assertEqual(local_shard, [(2, 4), (3, 6)]) # global sizes @@ -177,11 +183,12 @@ class TestDistributedTensor(unittest.TestCase): tensor_dist_attr.dims_mapping = [1, 0] tensor_dist_attr.process_mesh = auto.ProcessMesh( mesh=[[0, 1, 2], [3, 4, 5]]) - serial_tensor = paddle.static.data( - name="data", shape=[6, 6], dtype='float32') + serial_tensor = paddle.static.data(name="data", + shape=[6, 6], + dtype='float32') dist_tensor = DistributedTensor(serial_tensor, tensor_dist_attr) - # rank 0 [(0, 2), (0, 3)] + # rank 0 [(0, 2), (0, 3)] # rank 1 [(2, 4), (0, 3)] # rank 4 [(2, 4), (3, 6)] rank = 0 diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py index bbf7e3a4672..3d69924fd6f 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py @@ -23,6 +23,7 @@ from paddle.distributed.auto_parallel.graph import Graph class TestAutoParallelGraph(unittest.TestCase): + def test_graph(self): graph = Graph(name="foo") self.assertEqual(graph.attrs["name"], "foo") diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index 45b9defeb7c..a147b0f1f37 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -375,6 +375,7 @@ cluster_json = """ class MLPLayer(nn.Layer): + def __init__(self, hidden_size=64, intermediate_size=4 * 64, @@ -392,54 +393,57 @@ class MLPLayer(nn.Layer): weight_attr2 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr2)) weight_attr3 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr3)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr1, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr0, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr1, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) - self.linear2 = nn.Linear( - d_model, dim_feedforward, weight_attr2, bias_attr=bias_attr) - self.linear3 = nn.Linear( - dim_feedforward, d_model, weight_attr3, bias_attr=bias_attr) + self.linear2 = nn.Linear(d_model, + dim_feedforward, + weight_attr2, + bias_attr=bias_attr) + self.linear3 = nn.Linear(dim_feedforward, + d_model, + weight_attr3, + bias_attr=bias_attr) def forward(self, input): if _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh[0], - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh[0], - "dims_mapping": [1, -1] - }) - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh[1], - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear3.weight, - dist_attr={ - "process_mesh": _global_process_mesh[1], - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh[0], + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh[0], + "dims_mapping": [1, -1] + }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh[1], + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear3.weight, + dist_attr={ + "process_mesh": _global_process_mesh[1], + "dims_mapping": [1, -1] + }) out = self.norm(input) out = self.linear0(out) out = F.gelu(out, approximate=True) out = self.linear1(out) - auto.shard_tensor( - out, - dist_attr={ - "process_mesh": _global_process_mesh[1], - "dims_mapping": [0, -1] - }) + auto.shard_tensor(out, + dist_attr={ + "process_mesh": _global_process_mesh[1], + "dims_mapping": [0, -1] + }) out = self.linear2(out) out = F.gelu(out, approximate=True) out = self.linear3(out) @@ -451,22 +455,22 @@ def mlp_forward(train_program, start_program): utils.unique_name.guard(): batch_size = 4 hidden_size = 64 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') if _global_parallel_strategy == "dp_mp_pp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh[0], - "dims_mapping": [0, -1] - }) - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh[0], + "dims_mapping": [0, -1] + }) + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) loss = paddle.mean(error_cost) @@ -487,13 +491,12 @@ def get_dist_prog(train_program, startup_program, dist_context, rank_id): complete_train_program = completer.complete_forward_annotation( train_program) dist_context.block_state.parse_forward_blocks(complete_train_program) - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) partitioner = Partitioner(dist_context, rank_id) dist_train_program, dist_startup_prog, dist_params_grads = partitioner.partition( @@ -523,6 +526,7 @@ def get_device_local_ids(machine): class TestAutoParallelMapper(unittest.TestCase): + def test_mapper_dp_mp_pp(self): cluster_json_file = "" cluster_json_object = json.loads(cluster_json) @@ -564,8 +568,8 @@ class TestAutoParallelMapper(unittest.TestCase): self.assertTrue(is_in_machine(device_ids[0], machine)) machine_mapped_ranks.add(rank) machine_mapped_device_local_ids.add(device_ids[0]) - self.assertEqual( - len(machine_mapped_ranks), len(machine_mapped_device_local_ids)) + self.assertEqual(len(machine_mapped_ranks), + len(machine_mapped_device_local_ids)) all_mapped_ranks.update(machine_mapped_ranks) self.assertEqual(set(processes), all_mapped_ranks) @@ -596,24 +600,30 @@ class TestAutoParallelMapper(unittest.TestCase): broadcast_op = train_program.global_block().append_op( type="c_broadcast", inputs={'X': input}, - attrs={'ring_id': ring_id, - 'root': root_id}, + attrs={ + 'ring_id': ring_id, + 'root': root_id + }, outputs={'Out': output}) self.assertEqual(get_comm_volume(broadcast_op, 0, 1), 400) self.assertEqual(get_comm_volume(broadcast_op, 1, 0), None) allgather_op = train_program.global_block().append_op( type="c_allgather", inputs={'X': input}, - attrs={'ring_id': ring_id, - 'nranks': nranks}, + attrs={ + 'ring_id': ring_id, + 'nranks': nranks + }, outputs={'Out': output}) self.assertEqual(get_comm_volume(allgather_op, 0, 1), 400) self.assertEqual(get_comm_volume(allgather_op, 0, 0), None) reduce_op = train_program.global_block().append_op( type="c_reduce_sum", inputs={'X': input}, - attrs={'ring_id': ring_id, - 'root_id': root_id}, + attrs={ + 'ring_id': ring_id, + 'root_id': root_id + }, outputs={'Out': output}) self.assertEqual(get_comm_volume(reduce_op, 0, 1), None) self.assertEqual(get_comm_volume(reduce_op, 1, 0), 400) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py index ef8780a020f..80135b62885 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py @@ -96,13 +96,13 @@ def initialization_check(mode, dist_context, dist_startup_prog, serial_startup_prog, var_need_broadcast, process_mesh, mp_parallel_axis, dp_parallel_axis): if 'mp' in mode: - group_ranks = _get_comm_group( - process_mesh.processes, process_mesh.topology, mp_parallel_axis, 3) + group_ranks = _get_comm_group(process_mesh.processes, + process_mesh.topology, mp_parallel_axis, + 3) mp_ring_id = new_process_group(group_ranks).id broadcast_ops = [ - op for op in dist_startup_prog.global_block().ops - if (op.type == "c_broadcast" and op.desc.attr("ring_id") == - mp_ring_id) + op for op in dist_startup_prog.global_block().ops if + (op.type == "c_broadcast" and op.desc.attr("ring_id") == mp_ring_id) ] broadcast_varnames = sorted( [op.desc.output_arg_names()[0] for op in broadcast_ops]) @@ -110,14 +110,14 @@ def initialization_check(mode, dist_context, dist_startup_prog, return False if 'dp' in mode: - group_ranks = _get_comm_group( - process_mesh.processes, process_mesh.topology, dp_parallel_axis, 3) + group_ranks = _get_comm_group(process_mesh.processes, + process_mesh.topology, dp_parallel_axis, + 3) dp_ring_id = new_process_group(group_ranks).id nparam = len(serial_startup_prog.all_parameters()) nbroadcast_dp = len([ - op for op in dist_startup_prog.global_block().ops - if (op.type == "c_broadcast" and op.desc.attr("ring_id") == - dp_ring_id) + op for op in dist_startup_prog.global_block().ops if + (op.type == "c_broadcast" and op.desc.attr("ring_id") == dp_ring_id) ]) if nparam != nbroadcast_dp: return False @@ -226,7 +226,7 @@ def distributed_attr_check_for_dist_op(serial_main_prog, dist_main_prog, equal = check_equal_var_dist_attr(serial_out_dist_attr, out_dist_attr) - # check op's dist_attr + # check op's dist_attr equal = check_equal_dist_op_attr(dist_context, dist_main_prog, serial_op, dist_ops, dist_op_idx[i]) @@ -251,6 +251,7 @@ def distributed_attr_check_for_program(dist_main_prog, dist_context): class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -259,57 +260,55 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train") def forward(self, input): if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) else: - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -326,36 +325,33 @@ def mlp_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') + input = static.data(name="input", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') if _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - dropout_ratio=0.1, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + dropout_ratio=0.1, + initializer_range=0.02) out = mlp(input) return train_program, start_program class TestMLPAutoPartitioner(unittest.TestCase): + def test_mlp_dp(self): global _global_parallel_strategy _global_parallel_strategy = "dp" @@ -379,18 +375,17 @@ class TestMLPAutoPartitioner(unittest.TestCase): dist_ops = [op.type for op in dist_ops] self.assertTrue(serial_ops == dist_ops) - # parameter initialization + # parameter initialization var_need_broadcast = [] self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=None, - dp_parallel_axis=0)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=None, + dp_parallel_axis=0)) def test_mlp_mp(self): global _global_parallel_strategy @@ -430,19 +425,18 @@ class TestMLPAutoPartitioner(unittest.TestCase): ] self.assertTrue(dist_ops == ref_ops) - # parameter initialization + # parameter initialization var_need_broadcast = sorted( ['layer_norm_0.b_0', 'layer_norm_0.w_0', 'linear_1.b_0']) self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=0, - dp_parallel_axis=None)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=0, + dp_parallel_axis=None)) # check var and op all have dist_attr in dist_main_program self.assertTrue( @@ -498,15 +492,14 @@ class TestMLPAutoPartitioner(unittest.TestCase): var_need_broadcast = sorted( ['layer_norm_0.b_0', 'layer_norm_0.w_0', 'linear_1.b_0']) self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=1, - dp_parallel_axis=0)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=1, + dp_parallel_axis=0)) # check var and op all have dist_attr in dist_main_program self.assertTrue( @@ -521,6 +514,7 @@ class TestMLPAutoPartitioner(unittest.TestCase): class AttentionLayer(nn.Layer): + def __init__(self, hidden_size=1024, sequence_len=512, @@ -542,34 +536,40 @@ class AttentionLayer(nn.Layer): self.initializer_range = initializer_range self.training = True self.attn_mask = None - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.q_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) def forward(self, input): if _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1, -1] + }) q = self.q_proj(input) q = tensor.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) @@ -579,43 +579,37 @@ class AttentionLayer(nn.Layer): v = self.v_proj(input) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) @@ -623,8 +617,10 @@ class AttentionLayer(nn.Layer): v = tensor.transpose(x=v, perm=[0, 2, 1, 3]) # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if self.attn_mask is not None: product = product + self.attn_mask @@ -632,11 +628,10 @@ class AttentionLayer(nn.Layer): weights = F.softmax(product) if self.dropout_ratio: - weights = F.dropout( - weights, - self.dropout_ratio, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout_ratio, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -647,19 +642,17 @@ class AttentionLayer(nn.Layer): # project to output out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) return out @@ -670,23 +663,22 @@ def attn_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="query", - shape=[batch_size, sequence_len, hidden_size], - dtype='float32') - attn = AttentionLayer( - hidden_size=hidden_size, - sequence_len=sequence_len, - intermediate_size=4 * hidden_size, - num_heads=16, - dropout_ratio=0.1, - initializer_range=0.02) + input = static.data(name="query", + shape=[batch_size, sequence_len, hidden_size], + dtype='float32') + attn = AttentionLayer(hidden_size=hidden_size, + sequence_len=sequence_len, + intermediate_size=4 * hidden_size, + num_heads=16, + dropout_ratio=0.1, + initializer_range=0.02) out = attn(input) return train_program, start_program class TestAttentionAutoPartitioner(unittest.TestCase): + def test_attn_dp(self): global _global_parallel_strategy _global_parallel_strategy = "dp" @@ -709,18 +701,17 @@ class TestAttentionAutoPartitioner(unittest.TestCase): dist_ops = [op.type for op in dist_ops] self.assertTrue(serial_ops == dist_ops) - # parameter initialization + # parameter initialization var_need_broadcast = [] self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=None, - dp_parallel_axis=0)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=None, + dp_parallel_axis=0)) def test_attn_mp(self): global _global_parallel_strategy @@ -765,18 +756,17 @@ class TestAttentionAutoPartitioner(unittest.TestCase): ] self.assertTrue(dist_ops == ref_ops) - # parameter initialization + # parameter initialization var_need_broadcast = ['linear_3.b_0'] self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=0, - dp_parallel_axis=None)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=0, + dp_parallel_axis=None)) # check var and op all have dist_attr in dist_main_program self.assertTrue( @@ -833,18 +823,17 @@ class TestAttentionAutoPartitioner(unittest.TestCase): ] self.assertTrue(dist_ops == ref_ops) - # parameter initialization + # parameter initialization var_need_broadcast = ['linear_3.b_0'] self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=1, - dp_parallel_axis=0)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=1, + dp_parallel_axis=0)) # check var and op all have dist_attr in dist_main_program self.assertTrue( @@ -859,6 +848,7 @@ class TestAttentionAutoPartitioner(unittest.TestCase): class DecoderLayer(nn.Layer): + def __init__(self, vocab_size=32768, hidden_size=1024, @@ -888,29 +878,37 @@ class DecoderLayer(nn.Layer): self.word_embeddings = nn.Embedding( self.vocab_size, self.hidden_size, - weight_attr=paddle.ParamAttr( - name="word_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range))) + weight_attr=paddle.ParamAttr(name="word_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, + std=self.initializer_range))) self.position_embeddings = nn.Embedding( self.max_position_embeddings, self.hidden_size, - weight_attr=paddle.ParamAttr( - name="pos_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range))) + weight_attr=paddle.ParamAttr(name="pos_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, + std=self.initializer_range))) weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( mean=0.0, std=self.initializer_range)) bias_attr = None - self.q_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, self.embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = nn.Linear( - self.embed_dim, self.embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(self.embed_dim, + self.embed_dim, + weight_attr, + bias_attr=bias_attr) intermediate_size = 4 * self.hidden_size d_model = self.hidden_size @@ -918,10 +916,14 @@ class DecoderLayer(nn.Layer): weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( mean=0.0, std=self.initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) self.dropout1 = nn.Dropout(self.dropout_ratio) self.dropout2 = nn.Dropout(self.dropout_ratio, mode="upscale_in_train") @@ -929,37 +931,33 @@ class DecoderLayer(nn.Layer): def forward(self, input_ids, position_ids): if _global_parallel_strategy == "dp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) input_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) embeddings = input_embeddings + position_embeddings embeddings = self.dropout1(embeddings) @@ -976,43 +974,37 @@ class DecoderLayer(nn.Layer): v = self.v_proj(target) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) @@ -1020,8 +1012,10 @@ class DecoderLayer(nn.Layer): v = tensor.transpose(x=v, perm=[0, 2, 1, 3]) # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if self.attn_mask is not None: product = product + self.attn_mask @@ -1029,11 +1023,10 @@ class DecoderLayer(nn.Layer): weights = F.softmax(product) if self.dropout_ratio: - weights = F.dropout( - weights, - self.dropout_ratio, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout_ratio, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -1045,26 +1038,23 @@ class DecoderLayer(nn.Layer): out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) else: - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) # Add residual residual = embeddings + self.dropout2(out) @@ -1078,31 +1068,27 @@ class DecoderLayer(nn.Layer): out3 = self.linear1(out2) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) # Add residual final = residual + self.dropout3(out3) @@ -1115,27 +1101,27 @@ def decoder_pretrain_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input_ids = static.data( - name="input_ids", shape=[batch_size, sequence_len], dtype='int64') - position_ids = static.data( - name="position_ids", - shape=[batch_size, sequence_len], - dtype='int64') - decoder = DecoderLayer( - vocab_size=32768, - hidden_size=hidden_size, - sequence_len=sequence_len, - max_position_embeddings=512, - intermediate_size=4 * hidden_size, - num_heads=16, - dropout_ratio=0.1, - initializer_range=0.02) + input_ids = static.data(name="input_ids", + shape=[batch_size, sequence_len], + dtype='int64') + position_ids = static.data(name="position_ids", + shape=[batch_size, sequence_len], + dtype='int64') + decoder = DecoderLayer(vocab_size=32768, + hidden_size=hidden_size, + sequence_len=sequence_len, + max_position_embeddings=512, + intermediate_size=4 * hidden_size, + num_heads=16, + dropout_ratio=0.1, + initializer_range=0.02) out = decoder(input_ids, position_ids) return train_program, start_program class TestDecoderLayerPartitioner(unittest.TestCase): + def test_decoder_dp_mp(self): global _global_parallel_strategy _global_parallel_strategy = "dp_mp" @@ -1191,21 +1177,20 @@ class TestDecoderLayerPartitioner(unittest.TestCase): ] self.assertTrue(dist_ops == ref_ops) - # parameter initialization + # parameter initialization var_need_broadcast = sorted([ 'linear_3.b_0', 'pos_embeddings', 'layer_norm_0.b_0', 'layer_norm_0.w_0', 'linear_5.b_0' ]) self.assertTrue( - initialization_check( - _global_parallel_strategy, - dist_context, - dist_startup_prog, - serial_startup_prog, - var_need_broadcast, - _global_process_mesh, - mp_parallel_axis=1, - dp_parallel_axis=0)) + initialization_check(_global_parallel_strategy, + dist_context, + dist_startup_prog, + serial_startup_prog, + var_need_broadcast, + _global_process_mesh, + mp_parallel_axis=1, + dp_parallel_axis=0)) # check var and op all have dist_attr in dist_main_program self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py index 07d94d1b76f..96738a46662 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py @@ -125,17 +125,27 @@ class MultiHeadAttention(nn.Layer): if self.fuse: assert self.kdim == embed_dim assert self.vdim == embed_dim - self.qkv_proj = nn.Linear( - embed_dim, 3 * embed_dim, weight_attr, bias_attr=bias_attr) + self.qkv_proj = nn.Linear(embed_dim, + 3 * embed_dim, + weight_attr, + bias_attr=bias_attr) else: - self.q_proj = nn.Linear( - embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = nn.Linear( - self.kdim, embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = nn.Linear( - self.vdim, embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = nn.Linear( - embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = nn.Linear(embed_dim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = nn.Linear(self.kdim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = nn.Linear(self.vdim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = nn.Linear(embed_dim, + embed_dim, + weight_attr, + bias_attr=bias_attr) def _fuse_prepare_qkv(self, query): mix_layer = self.qkv_proj(query) @@ -154,19 +164,17 @@ class MultiHeadAttention(nn.Layer): q = self.q_proj(query) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.q_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.q_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) q = tensor.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) q = tensor.transpose(x=q, perm=[0, 2, 1, 3]) @@ -199,36 +207,32 @@ class MultiHeadAttention(nn.Layer): k = self.k_proj(key) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.k_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.k_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) v = self.v_proj(value) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.v_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.v_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) @@ -284,19 +288,20 @@ class MultiHeadAttention(nn.Layer): q, k, v, cache = self._prepare_qkv(query, key, value, use_cache, cache) # scale dot product attention - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = layers.matmul(x=q, + y=k, + transpose_y=True, + alpha=self.head_dim**-0.5) if attn_mask is not None: product = product + attn_mask weights = F.softmax(product) if self.dropout: - weights = F.dropout( - weights, - self.dropout, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -308,19 +313,17 @@ class MultiHeadAttention(nn.Layer): out = self.out_proj(out) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.out_proj.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.out_proj.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) outs = [out] if self.need_weights: @@ -442,24 +445,21 @@ class TransformerDecoderLayer(nn.Layer): weight_attrs = _convert_param_attr_to_list(weight_attr, 3) bias_attrs = _convert_param_attr_to_list(bias_attr, 3) - self.self_attn = MultiHeadAttention( - d_model, - nhead, - dropout=attn_dropout, - weight_attr=weight_attrs[0], - bias_attr=bias_attrs[0], - topo=topo) + self.self_attn = MultiHeadAttention(d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0], + topo=topo) if topo is None or topo.mp_info.size == 1: - self.linear1 = nn.Linear( - d_model, - dim_feedforward, - weight_attrs[2], - bias_attr=bias_attrs[2]) - self.linear2 = nn.Linear( - dim_feedforward, - d_model, - weight_attrs[2], - bias_attr=bias_attrs[2]) + self.linear1 = nn.Linear(d_model, + dim_feedforward, + weight_attrs[2], + bias_attr=bias_attrs[2]) + self.linear2 = nn.Linear(dim_feedforward, + d_model, + weight_attrs[2], + bias_attr=bias_attrs[2]) self.norm1 = nn.LayerNorm(d_model, epsilon=1e-5) self.norm2 = nn.LayerNorm(d_model, epsilon=1e-5) @@ -487,34 +487,30 @@ class TransformerDecoderLayer(nn.Layer): tgt = self.norm2(tgt) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 0] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 0] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, 1] - }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, 1] + }) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.linear2.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) # tgt = self.dropout2( # self.linear2(F.gelu( @@ -530,8 +526,8 @@ class TransformerDecoderLayer(nn.Layer): return tgt if use_cache is False else (tgt, incremental_cache) def gen_cache(self, memory): - incremental_cache = self.self_attn.gen_cache( - memory, type=self.self_attn.Cache) + incremental_cache = self.self_attn.gen_cache(memory, + type=self.self_attn.Cache) return incremental_cache @@ -553,17 +549,16 @@ class GPTEmbeddings(nn.Layer): self.word_embeddings = nn.Embedding( vocab_size, hidden_size, - weight_attr=paddle.ParamAttr( - name="word_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="word_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, + std=initializer_range))) self.position_embeddings = nn.Embedding( max_position_embeddings, hidden_size, - weight_attr=paddle.ParamAttr( - name="pos_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) + weight_attr=paddle.ParamAttr(name="pos_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, std=initializer_range))) self.dropout = nn.Dropout(hidden_dropout_prob) @@ -576,19 +571,17 @@ class GPTEmbeddings(nn.Layer): input_embedings = self.word_embeddings(input_ids) if _global_parallel_strategy == "mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [1, -1] - }) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [1, -1] + }) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings @@ -627,37 +620,36 @@ class GPTModel(nn.Layer): if self.pipline_mode: self.layer_per_stage = num_hidden_layers // self.topo.pp_info.size - self.embeddings = GPTEmbeddings( - vocab_size, hidden_size, hidden_dropout_prob, - max_position_embeddings, type_vocab_size, self.initializer_range, - topo) + self.embeddings = GPTEmbeddings(vocab_size, hidden_size, + hidden_dropout_prob, + max_position_embeddings, + type_vocab_size, self.initializer_range, + topo) decoder_layers = nn.LayerList() for i in range(num_hidden_layers): DecoderLayer = TransformerDecoderLayer decoder_layers.append( - DecoderLayer( - d_model=hidden_size, - nhead=num_attention_heads, - dim_feedforward=intermediate_size, - dropout=hidden_dropout_prob, - activation=hidden_act, - attn_dropout=attention_probs_dropout_prob, - act_dropout=hidden_dropout_prob, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.Normal( - mean=0.0, std=self.initializer_range)), - bias_attr=None, - topo=topo)) + DecoderLayer(d_model=hidden_size, + nhead=num_attention_heads, + dim_feedforward=intermediate_size, + dropout=hidden_dropout_prob, + activation=hidden_act, + attn_dropout=attention_probs_dropout_prob, + act_dropout=hidden_dropout_prob, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Normal( + mean=0.0, std=self.initializer_range)), + bias_attr=None, + topo=topo)) Decoder = TransformerDecoder - self.decoder = Decoder( - decoder_layers, - num_hidden_layers, - norm="LayerNorm", - hidden_size=hidden_size, - topo=topo) + self.decoder = Decoder(decoder_layers, + num_hidden_layers, + norm="LayerNorm", + hidden_size=hidden_size, + topo=topo) self.checkpoints = [] @@ -672,29 +664,27 @@ class GPTModel(nn.Layer): length = paddle.shape(input_ids)[1] # Use bool mask attention_mask = paddle.tensor.tril( - paddle.ones( - (length, length), - dtype=self.embeddings.word_embeddings.weight.dtype)) + paddle.ones((length, length), + dtype=self.embeddings.word_embeddings.weight.dtype)) if position_ids is None: past_length = 0 if cache is not None: past_length = paddle.shape(cache[0].k)[-2] - position_ids = paddle.arange( - past_length, - paddle.shape(input_ids)[-1] + past_length, - dtype='int64') + position_ids = paddle.arange(past_length, + paddle.shape(input_ids)[-1] + + past_length, + dtype='int64') position_ids = position_ids.unsqueeze(0) # .expand_as(input_ids) - position_ids = paddle.fluid.layers.expand_as(position_ids, - input_ids) - embedding_output = self.embeddings( - input_ids=input_ids, position_ids=position_ids) + position_ids = paddle.fluid.layers.expand_as( + position_ids, input_ids) + embedding_output = self.embeddings(input_ids=input_ids, + position_ids=position_ids) # TODO, use registered buffer - causal_mask = paddle.tensor.triu( - paddle.ones((paddle.shape(input_ids)[-1], - paddle.shape(input_ids)[-1])) * -1e9, - diagonal=1) + causal_mask = paddle.tensor.triu(paddle.ones( + (paddle.shape(input_ids)[-1], paddle.shape(input_ids)[-1])) * -1e9, + diagonal=1) if attention_mask is not None: attention_mask = attention_mask + causal_mask @@ -704,12 +694,11 @@ class GPTModel(nn.Layer): # The tensor returned by triu not in static graph. attention_mask.stop_gradient = True - encoder_outputs = self.decoder( - embedding_output, - memory=None, - tgt_mask=attention_mask, - use_cache=use_cache, - cache=cache) + encoder_outputs = self.decoder(embedding_output, + memory=None, + tgt_mask=attention_mask, + use_cache=use_cache, + cache=cache) self.checkpoints.extend(self.decoder.checkpoints) return encoder_outputs @@ -733,8 +722,9 @@ class GPTForPretraining(nn.Layer): input_parallel = paddle.distributed.collective._c_identity( lm_output, group=None) - logits = paddle.matmul( - input_parallel, logit_weights, transpose_y=True) + logits = paddle.matmul(input_parallel, + logit_weights, + transpose_y=True) if parallel_output: return logits @@ -797,50 +787,49 @@ def gpt_pretrain_forward(train_program, startup_program): startup_program), utils.unique_name.guard(): batch_size = 16 sequence_len = 512 - input_ids = static.data( - name="input_ids", shape=[batch_size, sequence_len], dtype='int64') - position_ids = static.data( - name="position_ids", - shape=[batch_size, sequence_len], - dtype='int64') + input_ids = static.data(name="input_ids", + shape=[batch_size, sequence_len], + dtype='int64') + position_ids = static.data(name="position_ids", + shape=[batch_size, sequence_len], + dtype='int64') attention_mask = static.data( name="attention_mask", shape=[batch_size, 1, sequence_len, sequence_len], dtype='float64') - labels = static.data( - name="labels", shape=[batch_size, sequence_len], dtype='int64') - loss_mask = static.data( - name="loss_mask", shape=[batch_size, sequence_len], dtype='float64') + labels = static.data(name="labels", + shape=[batch_size, sequence_len], + dtype='int64') + loss_mask = static.data(name="loss_mask", + shape=[batch_size, sequence_len], + dtype='float64') if _global_parallel_strategy == "dp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) elif _global_parallel_strategy == "dp_mp": - auto.shard_tensor( - input_ids, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) - - gpt = GPTModel( - vocab_size=32768, - hidden_size=768, - num_hidden_layers=2, - num_attention_heads=12, - intermediate_size=4096, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=1024, - type_vocab_size=16, - initializer_range=0.02, - pad_token_id=0, - topo=None) + auto.shard_tensor(input_ids, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) + + gpt = GPTModel(vocab_size=32768, + hidden_size=768, + num_hidden_layers=2, + num_attention_heads=12, + intermediate_size=4096, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=1024, + type_vocab_size=16, + initializer_range=0.02, + pad_token_id=0, + topo=None) model = GPTForPretraining(gpt) @@ -854,18 +843,21 @@ def gpt_pretrain_forward(train_program, startup_program): class FakeStrategy(object): + def __init__(self): self.amp = False self.recompute = False class FakeFleet(object): + def __init__(self): self.user_defined_optimizer = None self._user_defined_strategy = FakeStrategy() class TestGPTPartitioner(unittest.TestCase): + def test_gpt_dp_mp(self): global _global_parallel_strategy _global_parallel_strategy = "dp_mp" @@ -888,13 +880,12 @@ class TestGPTPartitioner(unittest.TestCase): dist_context.block_state.parse_forward_blocks(complete_train_program) # serial backward pass - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) rank_id = 3 partitioner = Partitioner(dist_context, rank_id) @@ -919,7 +910,7 @@ class TestGPTPartitioner(unittest.TestCase): # from paddle.distributed.auto_parallel.completion import Completer # completer = Completer() # completer.complete_forward_annotation(auto_parallel_main_prog) - # fw.write(str(auto_parallel_main_prog)) + # fw.write(str(auto_parallel_main_prog)) nrank = 4 # col parallel weights = [ @@ -953,25 +944,27 @@ class TestGPTPartitioner(unittest.TestCase): mp_parallel_axis = 1 dp_parallel_axis = 0 - group_ranks = _get_comm_group( - process_mesh.processes, process_mesh.topology, mp_parallel_axis, 3) + group_ranks = _get_comm_group(process_mesh.processes, + process_mesh.topology, mp_parallel_axis, + 3) mp_ring_id = new_process_group(group_ranks).id - group_ranks = _get_comm_group( - process_mesh.processes, process_mesh.topology, dp_parallel_axis, 3) + group_ranks = _get_comm_group(process_mesh.processes, + process_mesh.topology, dp_parallel_axis, + 3) dp_ring_id = new_process_group(group_ranks).id tensor_parallel_allreduce_vars = sorted([ op.desc.output_arg_names()[0].split("@")[0] for op in auto_parallel_main_prog.global_block().ops - if (op.type == "c_allreduce_sum" and op.attr('op_role') == 1 and - op.desc.attr("ring_id") == mp_ring_id) + if (op.type == "c_allreduce_sum" and op.attr('op_role') == 1 + and op.desc.attr("ring_id") == mp_ring_id) ]) data_parallel_allreduce_vars = sorted([ op.desc.output_arg_names()[0].split("@")[0] for op in auto_parallel_main_prog.global_block().ops - if (op.type == "c_allreduce_sum" and op.desc.attr("ring_id") == - dp_ring_id) + if (op.type == "c_allreduce_sum" + and op.desc.attr("ring_id") == dp_ring_id) ]) self.assertTrue(all_params == data_parallel_allreduce_vars) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py index 9888d2c68f1..93c5ded0c10 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py @@ -39,6 +39,7 @@ PP_MESH_1 = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -46,43 +47,43 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): if _global_parallel_strategy == "pp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) else: - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -98,43 +99,40 @@ def mlp_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') if _global_parallel_strategy == "pp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) elif _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) else: - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -164,16 +162,15 @@ def get_dist_prog(train_program, if change_process_mesh: global PP_MESH_1 dist_context.get_tensor_dist_attr_for_program( - train_program.global_block().vars[ - "gelu_0.tmp_0"]).process_mesh = PP_MESH_1 + train_program.global_block( + ).vars["gelu_0.tmp_0"]).process_mesh = PP_MESH_1 - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) # logical partition partitioner = Partitioner(dist_context, rank_id) @@ -227,8 +224,7 @@ def check_send_recv_result(dist_main_prog, rank_id): for idx, op in enumerate(ops): if op.type == "send_v2" and "gelu_0.tmp_0@GRAD" in op.input_arg_names: send_result = True - if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[ - 0]: + if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[0]: recv_result = True return send_result and recv_result @@ -269,6 +265,7 @@ def check_initialization_for_dp(dist_startup_prog): class TestMLPReshard(unittest.TestCase): + def test_complete_backward_annotation(self): global _global_process_mesh _global_process_mesh = auto.ProcessMesh(mesh=[0, 1]) diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py index 62f25c5d4a0..7544ff4571c 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py @@ -38,6 +38,7 @@ PP_MESH_1 = auto.ProcessMesh([[2, 3], [6, 7]]) class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -45,25 +46,31 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): - auto.shard_tensor( - self.linear0.weight, - dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [-1, 1]}) - auto.shard_tensor( - self.linear1.weight, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [1, -1]}) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, 1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -79,24 +86,27 @@ def mlp_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') - - auto.shard_tensor( - input, - dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [0, -1]}) - auto.shard_tensor( - label, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [0, -1]}) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') + + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [0, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -121,13 +131,12 @@ def get_dist_prog(train_program, startup_program, dist_context, rank_id): complete_train_program = completer.complete_forward_annotation( train_program) dist_context.block_state.parse_forward_blocks(complete_train_program) - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) # logical partition partitioner = Partitioner(dist_context, rank_id) @@ -155,8 +164,7 @@ def check_send_recv_result(dist_main_prog, rank_id): for idx, op in enumerate(ops): if op.type == "send_v2" and "gelu_0.tmp_0@GRAD" in op.input_arg_names: send_result = True - if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[ - 0]: + if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[0]: recv_result = True return send_result and recv_result @@ -172,6 +180,7 @@ def check_initialization_for_dpmppp(dist_startup_prog): class TestMLPReshard(unittest.TestCase): + def test_mlp_dpmppp(self): train_program = paddle.static.Program() startup_program = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py index 5f9c2ec2371..0e647a3db5b 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py @@ -38,6 +38,7 @@ PP_MESH_1 = auto.ProcessMesh([2, 3]) class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -45,42 +46,51 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None self.word_embeddings = nn.Embedding( hidden_size, hidden_size, - weight_attr=paddle.ParamAttr( - name="word_embeddings", - initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range))) - - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) - self.linear2 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + weight_attr=paddle.ParamAttr(name="word_embeddings", + initializer=nn.initializer.Normal( + mean=0.0, std=initializer_range))) + + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) + self.linear2 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) def forward(self, input): - auto.shard_tensor( - self.word_embeddings.weight, - dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [0, -1]}) - auto.shard_tensor( - self.linear0.weight, - dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [-1, 0]}) - auto.shard_tensor( - self.linear1.weight, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [0, -1]}) - auto.shard_tensor( - self.linear2.weight, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [0, -1]}) + auto.shard_tensor(self.word_embeddings.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, 0] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [0, -1] + }) + auto.shard_tensor(self.linear2.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [0, -1] + }) w_out = self.word_embeddings(input) out = self.linear0(w_out) gelu_out = F.gelu(out, approximate=True) @@ -98,21 +108,24 @@ def mlp_forward(train_program, start_program): hidden_size = 1024 sequence_len = 512 input = static.data(name="input", shape=[batch_size], dtype='int32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') - - auto.shard_tensor( - input, dist_attr={"process_mesh": PP_MESH_0, - "dims_mapping": [-1]}) - auto.shard_tensor( - label, - dist_attr={"process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1]}) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') + + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -137,13 +150,12 @@ def get_dist_prog(train_program, startup_program, dist_context, rank_id): complete_train_program = completer.complete_forward_annotation( train_program) dist_context.block_state.parse_forward_blocks(complete_train_program) - params_grads = parallelizer._generate_backward( - complete_train_program, - startup_program, - loss, - parameter_list=None, - no_grad_set=None, - callbacks=None) + params_grads = parallelizer._generate_backward(complete_train_program, + startup_program, + loss, + parameter_list=None, + no_grad_set=None, + callbacks=None) # logical partition partitioner = Partitioner(dist_context, rank_id) @@ -171,8 +183,7 @@ def check_send_recv_result(dist_main_prog, rank_id): if op.type == "send_v2" and "gelu_0.tmp_0@GRAD" in op.input_arg_names[ 0]: send_result = True - if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[ - 0]: + if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[0]: recv_result = True return send_result and recv_result @@ -206,6 +217,7 @@ def check_allgather(dist_main_program): class TestMLPReshard(unittest.TestCase): + def test_mlp_mppp(self): train_program = paddle.static.Program() startup_program = paddle.static.Program() @@ -230,20 +242,18 @@ class TestMLPReshard(unittest.TestCase): process_mesh = auto.ProcessMesh(mesh=[0, 3]) with static.program_guard(train_program, startup_program): x = paddle.static.data(name="x", shape=[4, 4], dtype='float32') - x = auto.shard_tensor( - x, - dist_attr={ - "process_mesh": process_mesh, - "dims_mapping": [0, -1] - }) + x = auto.shard_tensor(x, + dist_attr={ + "process_mesh": process_mesh, + "dims_mapping": [0, -1] + }) w = paddle.static.data(name="w", shape=[4, 4], dtype='float32') - w = auto.shard_tensor( - w, - dist_attr={ - "process_mesh": process_mesh, - "dims_mapping": [-1, -1] - }) + w = auto.shard_tensor(w, + dist_attr={ + "process_mesh": process_mesh, + "dims_mapping": [-1, -1] + }) # y = paddle.distributed.shard_op(paddle.matmul, process_mesh, { # x.name: [-1, -1], @@ -251,17 +261,16 @@ class TestMLPReshard(unittest.TestCase): # }, **{"x": x, # "y": w})[0] - y = paddle.distributed.shard_op( - paddle.matmul, - dist_attr={ - "process_mesh": process_mesh, - x: { - "dims_mapping": [-1, -1] - }, - w: { - "dims_mapping": [-1, -1] - } - })(x, w)[0] + y = paddle.distributed.shard_op(paddle.matmul, + dist_attr={ + "process_mesh": process_mesh, + x: { + "dims_mapping": [-1, -1] + }, + w: { + "dims_mapping": [-1, -1] + } + })(x, w)[0] rank_id = 0 dist_context = DistributedContext() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py index ac6b06b9ca1..64ff030f5b1 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py @@ -38,6 +38,7 @@ _global_process_mesh = None class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -45,43 +46,43 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): if _global_parallel_strategy == "pp": - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) else: - auto.shard_tensor( - self.linear0.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - self.linear1.weight, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(self.linear0.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(self.linear1.weight, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) out = self.norm(input) out = self.linear0(out) @@ -97,43 +98,40 @@ def mlp_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') if _global_parallel_strategy == "pp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": PP_MESH_0, - "dims_mapping": [-1, -1] - }) - auto.shard_tensor( - label, - dist_attr={ - "process_mesh": PP_MESH_1, - "dims_mapping": [-1, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": PP_MESH_0, + "dims_mapping": [-1, -1] + }) + auto.shard_tensor(label, + dist_attr={ + "process_mesh": PP_MESH_1, + "dims_mapping": [-1, -1] + }) elif _global_parallel_strategy == "dp": - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [0, -1] - }) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [0, -1] + }) else: - auto.shard_tensor( - input, - dist_attr={ - "process_mesh": _global_process_mesh, - "dims_mapping": [-1, -1] - }) - - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + auto.shard_tensor(input, + dist_attr={ + "process_mesh": _global_process_mesh, + "dims_mapping": [-1, -1] + }) + + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -158,12 +156,11 @@ def get_dist_prog_with_parallelizer(train_program, startup_program, loss, train_program, startup_program = mlp_forward(train_program, startup_program) - optimizer = paddle.fluid.optimizer.AdamOptimizer( - learning_rate=0.00001, - beta1=0.9, - beta2=0.999, - epsilon=1e-08, - grad_clip=None) + optimizer = paddle.fluid.optimizer.AdamOptimizer(learning_rate=0.00001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + grad_clip=None) optimizer = fleet.distributed_optimizer(optimizer) _, _, distributed_startup_program, distributed_main_program = optimizer.minimize( @@ -187,14 +184,14 @@ def check_send_recv_result(dist_main_prog, rank_id): for idx, op in enumerate(ops): if op.type == "send_v2" and "gelu_0.tmp_0@GRAD" in op.input_arg_names: send_result = True - if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[ - 0]: + if op.type == "recv_v2" and "gelu_0.tmp_0" in op.output_arg_names[0]: recv_result = True return send_result and recv_result class TestMLPReshard(unittest.TestCase): + def test_mlp_serial(self): global _global_parallel_strategy _global_parallel_strategy = None diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_save_load.py index b96b51e5567..7de26902011 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_save_load.py @@ -21,6 +21,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestAutoParallelSaveLoad(TestMultipleGpus): + def test_auto_parallel_save_load(self): self.run_mnist_2gpu('auto_parallel_save_load.py') diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py index 78ad64b1dd8..5d6119d23f3 100755 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py @@ -40,6 +40,7 @@ paddle.enable_static() class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -47,14 +48,18 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): @@ -73,15 +78,16 @@ def mlp_forward(train_program, start_program): batch_size = 4 hidden_size = 1024 sequence_len = 512 - input = static.data( - name="input", shape=[batch_size, hidden_size], dtype='float32') - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') + input = static.data(name="input", + shape=[batch_size, hidden_size], + dtype='float32') + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') loss_func = paddle.nn.CrossEntropyLoss(reduction="none") - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = loss_func(predict, label) @@ -100,8 +106,8 @@ def set_default_dist_attr(program, dist_context, process_mesh): tensor_dist_attr = TensorDistributedAttribute() tensor_dist_attr.process_mesh = process_mesh tensor_dist_attr.dims_mapping = [-1 for i in vars[var_name].shape] - dist_context.set_tensor_dist_attr_for_program(vars[var_name], - tensor_dist_attr) + dist_context.set_tensor_dist_attr_for_program( + vars[var_name], tensor_dist_attr) op_dist_attr.set_input_dims_mapping(var_name, tensor_dist_attr.dims_mapping) @@ -109,8 +115,8 @@ def set_default_dist_attr(program, dist_context, process_mesh): tensor_dist_attr = TensorDistributedAttribute() tensor_dist_attr.process_mesh = process_mesh tensor_dist_attr.dims_mapping = [-1 for i in vars[var_name].shape] - dist_context.set_tensor_dist_attr_for_program(vars[var_name], - tensor_dist_attr) + dist_context.set_tensor_dist_attr_for_program( + vars[var_name], tensor_dist_attr) op_dist_attr.set_output_dims_mapping(var_name, tensor_dist_attr.dims_mapping) dist_context.set_op_dist_attr_for_program(op, op_dist_attr) @@ -143,6 +149,7 @@ def check_nonpipeline_enumerater(program, process_mesh_topology): class TestMLPSearcher(unittest.TestCase): + def test_update(self): train_program = paddle.static.Program() startup_program = paddle.static.Program() @@ -170,8 +177,7 @@ class TestMLPSearcher(unittest.TestCase): dist_op.dist_attr.set_output_dims_mapping( op.output_arg_names[0], [0] + [ - -1 - for i in range( + -1 for i in range( 1, len(vars[op.output_arg_names[0]].shape)) ]) try: @@ -187,8 +193,7 @@ class TestMLPSearcher(unittest.TestCase): dist_op.dist_attr.set_output_dims_mapping( op.output_arg_names[0], [0] + [ - -1 - for i in range( + -1 for i in range( 1, len(vars[op.output_arg_names[0]].shape)) ]) try: diff --git a/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py b/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py index 8c5913c66a7..76bcf0f1948 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py @@ -30,11 +30,13 @@ from paddle.distributed.auto_parallel.operators.common import get_distributed_op from paddle.distributed.auto_parallel.dist_context import DistributedContext, DistributedOperatorContext from paddle.distributed.auto_parallel.dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute from paddle.distributed.auto_parallel.dist_op import DistributedOperator + paddle.enable_static() device = "gpu" if core.is_compiled_with_cuda() else "cpu" class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -42,14 +44,18 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): @@ -76,17 +82,16 @@ def mlp_forward(train_program, start_program): input = embedding(input) input = paddle.reshape(input, [hidden_size, batch_size]) input = paddle.transpose(input, perm=[1, 0]) - matmulinput = static.data( - name="matmulinput", - shape=[hidden_size, hidden_size], - dtype='float32') + matmulinput = static.data(name="matmulinput", + shape=[hidden_size, hidden_size], + dtype='float32') input = layers.matmul(x=input, y=matmulinput) - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -97,6 +102,7 @@ def mlp_forward(train_program, start_program): class TestCompatible(unittest.TestCase): + def test_matmulv2_matmul_2_compatible(self): valid_op_dist_attr_list = [] program = paddle.static.Program() @@ -105,16 +111,20 @@ class TestCompatible(unittest.TestCase): with static.program_guard(program, start_program), utils.unique_name.guard(): - matmulx3 = static.data( - name="matmulx3", shape=[6, 2, 6], dtype='float32') - matmuly3 = static.data( - name="matmuly3", shape=[6, 6], dtype='float32') + matmulx3 = static.data(name="matmulx3", + shape=[6, 2, 6], + dtype='float32') + matmuly3 = static.data(name="matmuly3", + shape=[6, 6], + dtype='float32') output1 = paddle.matmul(x=matmulx3, y=matmuly3) output_1 = layers.matmul(x=matmulx3, y=matmuly3) - matmulx4 = static.data( - name="matmulx4", shape=[6, 6, 2, 6], dtype='float32') - matmuly4 = static.data( - name="matmuly4", shape=[6, 6, 6, 6], dtype='float32') + matmulx4 = static.data(name="matmulx4", + shape=[6, 6, 2, 6], + dtype='float32') + matmuly4 = static.data(name="matmuly4", + shape=[6, 6, 6, 6], + dtype='float32') output2 = paddle.matmul(x=matmulx4, y=matmuly4) output_2 = layers.matmul(x=matmulx4, y=matmuly4) ops = program.global_block().ops @@ -202,16 +212,20 @@ class TestCompatible(unittest.TestCase): loss, program, start_program = mlp_forward(program, startup_program) with static.program_guard(program, start_program), utils.unique_name.guard(): - matmulx3 = static.data( - name="matmulx3", shape=[6, 2, 6], dtype='float32') - matmuly3 = static.data( - name="matmuly3", shape=[6, 6], dtype='float32') + matmulx3 = static.data(name="matmulx3", + shape=[6, 2, 6], + dtype='float32') + matmuly3 = static.data(name="matmuly3", + shape=[6, 6], + dtype='float32') output1 = paddle.matmul(x=matmulx3, y=matmuly3) output_1 = layers.matmul(x=matmulx3, y=matmuly3) - matmulx4 = static.data( - name="matmulx4", shape=[6, 6, 6, 6], dtype='float32') - matmuly4 = static.data( - name="matmuly4", shape=[6, 6, 6, 6], dtype='float32') + matmulx4 = static.data(name="matmulx4", + shape=[6, 6, 6, 6], + dtype='float32') + matmuly4 = static.data(name="matmuly4", + shape=[6, 6, 6, 6], + dtype='float32') output2 = paddle.matmul(x=matmulx4, y=matmuly4) output_2 = layers.matmul(x=matmulx4, y=matmuly4) ops = program.global_block().ops @@ -289,16 +303,20 @@ class TestCompatible(unittest.TestCase): loss, program, start_program = mlp_forward(program, startup_program) with static.program_guard(program, start_program), utils.unique_name.guard(): - matmulx3 = static.data( - name="matmulx3", shape=[6, 2, 6], dtype='float32') - matmuly3 = static.data( - name="matmuly3", shape=[6, 6], dtype='float32') + matmulx3 = static.data(name="matmulx3", + shape=[6, 2, 6], + dtype='float32') + matmuly3 = static.data(name="matmuly3", + shape=[6, 6], + dtype='float32') output1 = paddle.matmul(x=matmulx3, y=matmuly3) output_1 = layers.matmul(x=matmulx3, y=matmuly3) - matmulx4 = static.data( - name="matmulx4", shape=[6, 6, 2, 6], dtype='float32') - matmuly4 = static.data( - name="matmuly4", shape=[6, 6, 6, 6], dtype='float32') + matmulx4 = static.data(name="matmulx4", + shape=[6, 6, 2, 6], + dtype='float32') + matmuly4 = static.data(name="matmuly4", + shape=[6, 6, 6, 6], + dtype='float32') output2 = paddle.matmul(x=matmulx4, y=matmuly4) output_2 = layers.matmul(x=matmulx4, y=matmuly4) ops = program.global_block().ops diff --git a/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py b/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py index 4cb58eac7cc..568856244c6 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py +++ b/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py @@ -30,11 +30,13 @@ from paddle.distributed.auto_parallel.operators.common import get_distributed_op from paddle.distributed.auto_parallel.dist_context import DistributedContext, DistributedOperatorContext from paddle.distributed.auto_parallel.dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute from paddle.distributed.auto_parallel.dist_op import DistributedOperator + paddle.enable_static() device = "gpu" if core.is_compiled_with_cuda() else "cpu" class MLPLayer(nn.Layer): + def __init__(self, hidden_size=1024, intermediate_size=4 * 1024, @@ -42,14 +44,18 @@ class MLPLayer(nn.Layer): super(MLPLayer, self).__init__() d_model = hidden_size dim_feedforward = intermediate_size - weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal( - mean=0.0, std=initializer_range)) + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)) bias_attr = None - self.linear0 = nn.Linear( - d_model, dim_feedforward, weight_attr, bias_attr=bias_attr) - self.linear1 = nn.Linear( - dim_feedforward, d_model, weight_attr, bias_attr=bias_attr) + self.linear0 = nn.Linear(d_model, + dim_feedforward, + weight_attr, + bias_attr=bias_attr) + self.linear1 = nn.Linear(dim_feedforward, + d_model, + weight_attr, + bias_attr=bias_attr) self.norm = nn.LayerNorm(d_model, epsilon=1e-5) def forward(self, input): @@ -76,17 +82,16 @@ def mlp_forward(train_program, start_program): input = embedding(input) input = paddle.reshape(input, [hidden_size, batch_size]) input = paddle.transpose(input, perm=[1, 0]) - matmulinput = static.data( - name="matmulinput", - shape=[hidden_size, hidden_size], - dtype='float32') + matmulinput = static.data(name="matmulinput", + shape=[hidden_size, hidden_size], + dtype='float32') input = layers.matmul(x=input, y=matmulinput) - label = static.data( - name="label", shape=[batch_size, 1], dtype='float32') - mlp = MLPLayer( - hidden_size=hidden_size, - intermediate_size=4 * hidden_size, - initializer_range=0.02) + label = static.data(name="label", + shape=[batch_size, 1], + dtype='float32') + mlp = MLPLayer(hidden_size=hidden_size, + intermediate_size=4 * hidden_size, + initializer_range=0.02) predict = mlp(input) error_cost = paddle.nn.functional.square_error_cost(predict, label) @@ -97,6 +102,7 @@ def mlp_forward(train_program, start_program): class TestCompatible(unittest.TestCase): + def test_reshape_remove_compatible(self): valid_op_dist_attr_list = [] program = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py index 8572572f146..e49895ca77d 100644 --- a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py +++ b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestAvoidTwiceInitialization(unittest.TestCase): + def test_avoid_twice_initialization(self): cur_program = fluid.Program() cur_block = cur_program.current_block() @@ -27,18 +28,18 @@ class TestAvoidTwiceInitialization(unittest.TestCase): shape=[2, 2], dtype='float32', name='var_a') - cur_block.append_op( - type="c_broadcast", - inputs={"X": [var]}, - outputs={"Out": [var]}, - attrs={'root': 0, - 'ring_id': 0, - 'use_calc_stream': False}) - cur_block.append_op( - type="c_sync_comm_stream", - inputs={'X': [var]}, - outputs={'Out': [var]}, - attrs={'ring_id': 0}) + cur_block.append_op(type="c_broadcast", + inputs={"X": [var]}, + outputs={"Out": [var]}, + attrs={ + 'root': 0, + 'ring_id': 0, + 'use_calc_stream': False + }) + cur_block.append_op(type="c_sync_comm_stream", + inputs={'X': [var]}, + outputs={'Out': [var]}, + attrs={'ring_id': 0}) var2 = cur_block.create_parameter( initializer=fluid.initializer.Constant(value=0.01), shape=[2, 2], diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py index e0d6a606e25..a6c9caacc78 100644 --- a/python/paddle/fluid/tests/unittests/test_backward.py +++ b/python/paddle/fluid/tests/unittests/test_backward.py @@ -58,8 +58,8 @@ class TestBackward(unittest.TestCase): """ def _check_all(self, net): - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() @@ -113,8 +113,8 @@ class TestBackward(unittest.TestCase): block_no_grad_set = None else: block_no_grad_set = set( - map(fluid.backward._strip_grad_suffix_, no_grad_dict[ - self.global_block_idx])) + map(fluid.backward._strip_grad_suffix_, + no_grad_dict[self.global_block_idx])) op_path = fluid.backward._find_op_path_(root_block, outputs, inputs, block_no_grad_set) op_types = [op.type for op in op_path] @@ -131,8 +131,8 @@ class TestBackward(unittest.TestCase): return no_grad_vars def _check_error_param_list(self, net, parameter_list): - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() @@ -146,8 +146,8 @@ class TestBackward(unittest.TestCase): exe.run(feed=net.init_data()) def _check_error_no_grad_set(self, net, no_grad_set): - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() @@ -162,6 +162,7 @@ class TestBackward(unittest.TestCase): class SimpleNet(BackwardNet): + def __init__(self): super(SimpleNet, self).__init__() self.stop_gradient_grad_vars = set([ @@ -201,20 +202,25 @@ class SimpleNet(BackwardNet): x = fluid.data(name='x_no_grad', shape=self.shape, dtype='int64') x2 = fluid.data(name='x2_no_grad', shape=self.shape, dtype='int64') x3 = fluid.data(name='x3_no_grad', shape=self.shape, dtype='int64') - label = fluid.data( - name='label_no_grad', shape=[self.shape[0], 1], dtype='float32') + label = fluid.data(name='label_no_grad', + shape=[self.shape[0], 1], + dtype='float32') # shared layer, the grad of 'w2v' will be summed and renamed. # To test _addup_repetitive_outputs_ - x_emb = fluid.embedding( - x, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')) - x2_emb = fluid.embedding( - x2, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')) - x3_emb = fluid.embedding( - x3, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')) + x_emb = fluid.embedding(x, + size=[100, 64], + param_attr=fluid.ParamAttr(name='w2v')) + x2_emb = fluid.embedding(x2, + size=[100, 64], + param_attr=fluid.ParamAttr(name='w2v')) + x3_emb = fluid.embedding(x3, + size=[100, 64], + param_attr=fluid.ParamAttr(name='w2v')) # merge layers x_merge = fluid.layers.elementwise_add(x_emb, x2_emb, name='x_add_x2') - x2_merge = fluid.layers.elementwise_add( - x2_emb, x3_emb, name='x2_add_x3') + x2_merge = fluid.layers.elementwise_add(x2_emb, + x3_emb, + name='x2_add_x3') # shared fc_w predict = fluid.layers.fc(input=x_merge, size=1, @@ -235,6 +241,7 @@ class SimpleNet(BackwardNet): class TestSimpleNet(TestBackward): + def test_backward(self): """ Instantiate each NetClass to test backward. @@ -245,6 +252,7 @@ class TestSimpleNet(TestBackward): class TestGradientsError(unittest.TestCase): + def test_error(self): x = fluid.data(name='x', shape=[None, 2, 8, 8], dtype='float32') x.stop_gradient = False @@ -265,6 +273,7 @@ class TestGradientsError(unittest.TestCase): class TestSimpleNetWithErrorParamList(TestBackward): + def test_parameter_list_type_error(self): self.global_block_idx = 0 self.net = SimpleNet() @@ -278,6 +287,7 @@ class TestSimpleNetWithErrorParamList(TestBackward): class TestSimpleNetWithErrorNoGradSet(TestBackward): + def test_no_grad_set_type_error(self): self.global_block_idx = 0 self.net = SimpleNet() @@ -291,6 +301,7 @@ class TestSimpleNetWithErrorNoGradSet(TestBackward): class TestAppendBackwardWithError(unittest.TestCase): + def build_net(self): x = fluid.data(name='x', shape=[None, 13], dtype='int64') y = fluid.data(name='y', shape=[None, 1], dtype='float32') @@ -317,8 +328,8 @@ class TestAppendBackwardWithError(unittest.TestCase): def test_parameter_list_type_error(self): with self.assertRaises(TypeError): self.param_names[0] = np.random.random([10]) - fluid.backward.append_backward( - loss=self.avg_loss, parameter_list=self.param_names) + fluid.backward.append_backward(loss=self.avg_loss, + parameter_list=self.param_names) def test_callback_type_error(self): with self.assertRaises(TypeError): @@ -326,11 +337,12 @@ class TestAppendBackwardWithError(unittest.TestCase): def callback(block, context): return - fluid.backward.append_backward( - loss=self.avg_loss, callbacks=callback) + fluid.backward.append_backward(loss=self.avg_loss, + callbacks=callback) class TestGradientsWithOptimizer(unittest.TestCase): + def _check_grad_op_name(self, forward_list, optimiezed_list): backward_list = [op + "_grad" for op in reversed(forward_list)] idx = optimiezed_list.index(backward_list[0], len(backward_list)) @@ -361,6 +373,7 @@ class TestGradientsWithOptimizer(unittest.TestCase): # TODO(Aurelius84): add conditional network test class ConditionalNet(BackwardNet): + def __init__(self): super(ConditionalNet, self).__init__() diff --git a/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py b/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py index a0cd6fca573..0c0a2419cff 100644 --- a/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py +++ b/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,11 +22,14 @@ import warnings class TestBackwardInferVarDataTypeShape(unittest.TestCase): + def test_backward_infer_var_data_type_shape(self): paddle.enable_static() program = fluid.default_main_program() - dy = program.global_block().create_var( - name="Tmp@GRAD", shape=[1, 1], dtype=np.float32, persistable=True) + dy = program.global_block().create_var(name="Tmp@GRAD", + shape=[1, 1], + dtype=np.float32, + persistable=True) # invoke warning fluid.backward._infer_var_data_type_shape_("Tmp@GRAD", program.global_block()) diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index 3bdd03b3212..bb5c691a6e0 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -24,20 +24,26 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class L1(fluid.Layer): + def __init__(self): super(L1, self).__init__() self._param_attr = fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)) - self.w1 = self.create_parameter( - attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False) - self.w2 = self.create_parameter( - attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False) + self.w1 = self.create_parameter(attr=self._param_attr, + shape=[2, 2], + dtype='float32', + is_bias=False) + self.w2 = self.create_parameter(attr=self._param_attr, + shape=[2, 2], + dtype='float32', + is_bias=False) def forward(self): return self.w1 + self.w2 class L2(fluid.Layer): + def __init__(self): super(L2, self).__init__() self.layer1 = L1() @@ -48,6 +54,7 @@ class L2(fluid.Layer): class L3(fluid.Layer): + def __init__(self): super(L3, self).__init__() self.layer1 = L2() @@ -58,6 +65,7 @@ class L3(fluid.Layer): class TestBaseLayer(unittest.TestCase): + def func_test_one_level(self): with fluid.dygraph.guard(): l = L1() @@ -131,6 +139,7 @@ class TestBaseLayer(unittest.TestCase): class BufferLayer(fluid.Layer): + def __init__(self): super(BufferLayer, self).__init__() buffer_var = to_variable(np.zeros([2, 4]).astype('int32')) @@ -141,11 +150,13 @@ class BufferLayer(fluid.Layer): class BufferNet(fluid.Layer): + def __init__(self): super(BufferNet, self).__init__() self.buffer_layer = BufferLayer() - self.w1 = self.create_parameter( - shape=[2, 2], dtype='float32', is_bias=False) + self.w1 = self.create_parameter(shape=[2, 2], + dtype='float32', + is_bias=False) buffer_var = to_variable(np.ones([2, 4]).astype('int32')) self.register_buffer("net_buffer", buffer_var) @@ -156,7 +167,9 @@ class BufferNet(fluid.Layer): class TestBuffer(unittest.TestCase): + def func_test_buffers_and_named_buffers(self): + def names(named_buffers): return [name for name, _ in named_buffers] @@ -173,9 +186,8 @@ class TestBuffer(unittest.TestCase): ['net_buffer', 'new_buffer', 'buffer_layer.layer_buffer']) self.assertEqual(len(net.buffers(include_sublayers=False)), 2) - self.assertEqual( - names(net.named_buffers(include_sublayers=False)), - ['net_buffer', 'new_buffer']) + self.assertEqual(names(net.named_buffers(include_sublayers=False)), + ['net_buffer', 'new_buffer']) def test_buffers_and_named_buffers(self): with _test_eager_guard(): @@ -363,6 +375,7 @@ class TestBuffer(unittest.TestCase): class BufferNetWithModification(paddle.nn.Layer): + def __init__(self, shape): super(BufferNetWithModification, self).__init__() @@ -380,6 +393,7 @@ class BufferNetWithModification(paddle.nn.Layer): class TestModifiedBuffer(unittest.TestCase): + def funcsetUp(self): paddle.disable_static() self.prog_trans = ProgramTranslator() @@ -410,6 +424,7 @@ class TestModifiedBuffer(unittest.TestCase): class TestLayerTo(unittest.TestCase): + def funcsetUp(self): paddle.disable_static() self.linear = paddle.nn.Linear(2, 2) @@ -454,8 +469,8 @@ class TestLayerTo(unittest.TestCase): self.assertEqual(self.linear.weight.place.gpu_device_id(), 0) self.assertTrue(self.linear.buf_name.place.is_gpu_place()) self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0) - self.assertTrue(self.linear.weight._grad_ivar().place.is_gpu_place( - )) + self.assertTrue( + self.linear.weight._grad_ivar().place.is_gpu_place()) self.assertEqual( self.linear.weight._grad_ivar().place.gpu_device_id(), 0) @@ -464,8 +479,8 @@ class TestLayerTo(unittest.TestCase): self.assertEqual(self.linear.weight.place.gpu_device_id(), 0) self.assertTrue(self.linear.buf_name.place.is_gpu_place()) self.assertEqual(self.linear.buf_name.place.gpu_device_id(), 0) - self.assertTrue(self.linear.weight._grad_ivar().place.is_gpu_place( - )) + self.assertTrue( + self.linear.weight._grad_ivar().place.is_gpu_place()) self.assertEqual( self.linear.weight._grad_ivar().place.gpu_device_id(), 0) for p in self.linear.parameters(): diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py index ee8a1b7af24..2a06f192777 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py +++ b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py @@ -56,6 +56,7 @@ def gru_np(input, batch_first=False, is_bidirect=False, sequence_length=None): + def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b): concat_1 = np.concatenate([step_in, pre_hidden], 1) @@ -66,8 +67,8 @@ def gru_np(input, r_hidden = r * pre_hidden - candidate = np.matmul( - np.concatenate([step_in, r_hidden], 1), candidate_w) + candidate = np.matmul(np.concatenate([step_in, r_hidden], 1), + candidate_w) candidate += candidate_b c = tanh(candidate) @@ -95,8 +96,8 @@ def gru_np(input, if is_bidirect: direc_num = 2 if init_h: - init_h = np.reshape( - init_h, shape=[num_layers, direc_num, -1, hidden_size]) + init_h = np.reshape(init_h, + shape=[num_layers, direc_num, -1, hidden_size]) else: init_h = np.zeros([num_layers, direc_num, batch_size, hidden_size]) @@ -141,8 +142,9 @@ def gru_np(input, return rnn_out, last_hidden_out - fw_rnn_out, fw_last_hidden = get_single_direction_output( - input, mask, direc_index=0) + fw_rnn_out, fw_last_hidden = get_single_direction_output(input, + mask, + direc_index=0) if is_bidirect: bw_input = input[::-1] @@ -150,8 +152,9 @@ def gru_np(input, if mask is not None: bw_mask = mask[::-1] - bw_rnn_out, bw_last_hidden = get_single_direction_output( - bw_input, bw_mask, direc_index=1) + bw_rnn_out, bw_last_hidden = get_single_direction_output(bw_input, + bw_mask, + direc_index=1) bw_rnn_out = bw_rnn_out[::-1] @@ -175,6 +178,7 @@ def gru_np(input, class TestBasicGRUApi(unittest.TestCase): + def setUp(self): self.hidden_size = 10 self.batch_size = 5 @@ -184,12 +188,12 @@ class TestBasicGRUApi(unittest.TestCase): self.batch_first = False def test_run(self): - x = layers.data( - name='x', - shape=[-1, self.batch_size, self.hidden_size], - dtype='float32') - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='float32') + x = layers.data(name='x', + shape=[-1, self.batch_size, self.hidden_size], + dtype='float32') + sequence_length = layers.data(name="sequence_length", + shape=[-1], + dtype='float32') rnn_out, last_hidden = basic_gru( x, None, self.hidden_size, num_layers=self.num_layers, \ batch_first = self.batch_first, bidirectional=self.is_bidirect, sequence_length=sequence_length ) @@ -221,29 +225,29 @@ class TestBasicGRUApi(unittest.TestCase): candidate_b_name = "basic_gru_layers_" + str( i) + "/BasicGRUUnit_0.b_1" - gate_w = np.array(fluid.global_scope().find_var(gate_w_name) - .get_tensor()) - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float32') - fluid.global_scope().find_var(gate_w_name).get_tensor().set(gate_w, - place) - - gate_b = np.array(fluid.global_scope().find_var(gate_b_name) - .get_tensor()) - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float32') - fluid.global_scope().find_var(gate_b_name).get_tensor().set(gate_b, - place) - - candidate_w = np.array(fluid.global_scope().find_var( - candidate_w_name).get_tensor()) + gate_w = np.array( + fluid.global_scope().find_var(gate_w_name).get_tensor()) + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float32') + fluid.global_scope().find_var(gate_w_name).get_tensor().set( + gate_w, place) + + gate_b = np.array( + fluid.global_scope().find_var(gate_b_name).get_tensor()) + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float32') + fluid.global_scope().find_var(gate_b_name).get_tensor().set( + gate_b, place) + + candidate_w = np.array( + fluid.global_scope().find_var(candidate_w_name).get_tensor()) candidate_w = np.random.uniform( -0.1, 0.1, size=candidate_w.shape).astype('float32') fluid.global_scope().find_var(candidate_w_name).get_tensor().set( candidate_w, place) - candidate_b = np.array(fluid.global_scope().find_var( - candidate_b_name).get_tensor()) + candidate_b = np.array( + fluid.global_scope().find_var(candidate_b_name).get_tensor()) candidate_b = np.random.uniform( -0.1, 0.1, size=candidate_b.shape).astype('float32') fluid.global_scope().find_var(candidate_b_name).get_tensor().set( @@ -265,17 +269,17 @@ class TestBasicGRUApi(unittest.TestCase): candidate_b_name = "basic_gru_reverse_layers_" + str( i) + "/BasicGRUUnit_0.b_1" - gate_w = np.array(fluid.global_scope().find_var(gate_w_name) - .get_tensor()) - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float32') + gate_w = np.array( + fluid.global_scope().find_var(gate_w_name).get_tensor()) + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float32') fluid.global_scope().find_var(gate_w_name).get_tensor().set( gate_w, place) - gate_b = np.array(fluid.global_scope().find_var(gate_b_name) - .get_tensor()) - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float32') + gate_b = np.array( + fluid.global_scope().find_var(gate_b_name).get_tensor()) + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float32') fluid.global_scope().find_var(gate_b_name).get_tensor().set( gate_b, place) @@ -283,53 +287,53 @@ class TestBasicGRUApi(unittest.TestCase): candidate_w_name).get_tensor()) candidate_w = np.random.uniform( -0.1, 0.1, size=candidate_w.shape).astype('float32') - fluid.global_scope().find_var(candidate_w_name).get_tensor( - ).set(candidate_w, place) + fluid.global_scope().find_var( + candidate_w_name).get_tensor().set(candidate_w, place) candidate_b = np.array(fluid.global_scope().find_var( candidate_b_name).get_tensor()) candidate_b = np.random.uniform( -0.1, 0.1, size=candidate_b.shape).astype('float32') - fluid.global_scope().find_var(candidate_b_name).get_tensor( - ).set(candidate_b, place) + fluid.global_scope().find_var( + candidate_b_name).get_tensor().set(candidate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) candidate_weight.append(candidate_w) candidate_bias.append(candidate_b) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.seq_len, self.batch_size, self.hidden_size)).astype('float32') + step_input_np = np.random.uniform( + -0.1, 0.1, + (self.seq_len, self.batch_size, self.hidden_size)).astype('float32') sequence_length_np = np.random.randint( self.seq_len // 2, self.seq_len, size=(self.batch_size)).astype('int64') - out = exe.run( - feed={'x': step_input_np, - 'sequence_length': sequence_length_np}, - fetch_list=[rnn_out, last_hidden]) + out = exe.run(feed={ + 'x': step_input_np, + 'sequence_length': sequence_length_np + }, + fetch_list=[rnn_out, last_hidden]) api_rnn_out = out[0] api_last_hidden = out[1] - np_out = gru_np( - step_input_np, - None, - self.hidden_size, - gate_weight, - gate_bias, - candidate_weight, - candidate_bias, - num_layers=self.num_layers, - batch_first=self.batch_first, - is_bidirect=self.is_bidirect, - sequence_length=sequence_length_np) + np_out = gru_np(step_input_np, + None, + self.hidden_size, + gate_weight, + gate_bias, + candidate_weight, + candidate_bias, + num_layers=self.num_layers, + batch_first=self.batch_first, + is_bidirect=self.is_bidirect, + sequence_length=sequence_length_np) self.assertTrue(np.allclose(api_rnn_out, np_out[0], rtol=1e-4, atol=0)) self.assertTrue( - np.allclose( - api_last_hidden, np_out[1], rtol=1e-4, atol=0)) + np.allclose(api_last_hidden, np_out[1], rtol=1e-4, atol=0)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py index 597d8306b01..7c4c8ff5aee 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py @@ -66,14 +66,16 @@ def step(step_in, pre_hidden, gate_w, gate_b, candidate_w, candidate_b): class TestBasicGRUUnit(unittest.TestCase): + def setUp(self): self.hidden_size = 5 self.batch_size = 5 def test_run(self): x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32') - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32') + pre_hidden = layers.data(name="pre_hidden", + shape=[-1, self.hidden_size], + dtype='float32') gru_unit = BasicGRUUnit("gru_unit", self.hidden_size) new_hidden = gru_unit(x, pre_hidden) @@ -97,41 +99,43 @@ class TestBasicGRUUnit(unittest.TestCase): candidate_w_name = "gru_unit/BasicGRUUnit_0.w_1" candidate_b_name = "gru_unit/BasicGRUUnit_0.b_1" - gate_w = np.array(fluid.global_scope().find_var(gate_w_name).get_tensor( - )) - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float32') - fluid.global_scope().find_var(gate_w_name).get_tensor().set(gate_w, - place) - - gate_b = np.array(fluid.global_scope().find_var(gate_b_name).get_tensor( - )) - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float32') - fluid.global_scope().find_var(gate_b_name).get_tensor().set(gate_b, - place) - - candidate_w = np.array(fluid.global_scope().find_var(candidate_w_name) - .get_tensor()) + gate_w = np.array( + fluid.global_scope().find_var(gate_w_name).get_tensor()) + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float32') + fluid.global_scope().find_var(gate_w_name).get_tensor().set( + gate_w, place) + + gate_b = np.array( + fluid.global_scope().find_var(gate_b_name).get_tensor()) + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float32') + fluid.global_scope().find_var(gate_b_name).get_tensor().set( + gate_b, place) + + candidate_w = np.array( + fluid.global_scope().find_var(candidate_w_name).get_tensor()) candidate_w = np.random.uniform( -0.1, 0.1, size=candidate_w.shape).astype('float32') fluid.global_scope().find_var(candidate_w_name).get_tensor().set( candidate_w, place) - candidate_b = np.array(fluid.global_scope().find_var(candidate_b_name) - .get_tensor()) + candidate_b = np.array( + fluid.global_scope().find_var(candidate_b_name).get_tensor()) candidate_b = np.random.uniform( -0.1, 0.1, size=candidate_b.shape).astype('float32') fluid.global_scope().find_var(candidate_b_name).get_tensor().set( candidate_b, place) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float32') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float32') + step_input_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') + pre_hidden_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') - out = exe.run(feed={'x': step_input_np, - 'pre_hidden': pre_hidden_np}, + out = exe.run(feed={ + 'x': step_input_np, + 'pre_hidden': pre_hidden_np + }, fetch_list=[new_hidden]) api_out = out[0] diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py index bedba672edf..abe0d6f8d56 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py +++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py @@ -56,6 +56,7 @@ def lstm_np(input, is_bidirect=False, sequence_length=None, forget_bias=1.0): + def step(step_in, pre_hidden, pre_cell, gate_w, gate_b): concat_1 = np.concatenate([step_in, pre_hidden], 1) @@ -187,6 +188,7 @@ def lstm_np(input, class TestBasicLSTMApi(unittest.TestCase): + def setUp(self): self.hidden_size = 10 self.batch_size = 5 @@ -197,12 +199,12 @@ class TestBasicLSTMApi(unittest.TestCase): self.forget_bias = 1.0 def test_run(self): - x = layers.data( - name='x', - shape=[-1, self.batch_size, self.hidden_size], - dtype='float32') - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='float32') + x = layers.data(name='x', + shape=[-1, self.batch_size, self.hidden_size], + dtype='float32') + sequence_length = layers.data(name="sequence_length", + shape=[-1], + dtype='float32') rnn_out, last_hidden, last_cell = basic_lstm( x, None, None, self.hidden_size, num_layers=self.num_layers, \ batch_first = self.batch_first, bidirectional=self.is_bidirect, sequence_length=sequence_length, forget_bias = self.forget_bias ) @@ -227,19 +229,19 @@ class TestBasicLSTMApi(unittest.TestCase): gate_w_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.w_0" gate_b_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.b_0" - gate_w = np.array(fluid.global_scope().find_var(gate_w_name) - .get_tensor()) - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float32') - fluid.global_scope().find_var(gate_w_name).get_tensor().set(gate_w, - place) + gate_w = np.array( + fluid.global_scope().find_var(gate_w_name).get_tensor()) + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float32') + fluid.global_scope().find_var(gate_w_name).get_tensor().set( + gate_w, place) - gate_b = np.array(fluid.global_scope().find_var(gate_b_name) - .get_tensor()) - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float32') - fluid.global_scope().find_var(gate_b_name).get_tensor().set(gate_b, - place) + gate_b = np.array( + fluid.global_scope().find_var(gate_b_name).get_tensor()) + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float32') + fluid.global_scope().find_var(gate_b_name).get_tensor().set( + gate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) @@ -251,57 +253,56 @@ class TestBasicLSTMApi(unittest.TestCase): gate_b_name = "basic_lstm_reverse_layers_" + str( i) + "/BasicLSTMUnit_0.b_0" - gate_w = np.array(fluid.global_scope().find_var(gate_w_name) - .get_tensor()) - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float32') + gate_w = np.array( + fluid.global_scope().find_var(gate_w_name).get_tensor()) + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float32') fluid.global_scope().find_var(gate_w_name).get_tensor().set( gate_w, place) - gate_b = np.array(fluid.global_scope().find_var(gate_b_name) - .get_tensor()) - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float32') + gate_b = np.array( + fluid.global_scope().find_var(gate_b_name).get_tensor()) + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float32') fluid.global_scope().find_var(gate_b_name).get_tensor().set( gate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.seq_len, self.batch_size, self.hidden_size)).astype('float32') + step_input_np = np.random.uniform( + -0.1, 0.1, + (self.seq_len, self.batch_size, self.hidden_size)).astype('float32') sequence_length_np = np.random.randint( self.seq_len // 2, self.seq_len, size=(self.batch_size)).astype('int64') - out = exe.run( - feed={'x': step_input_np, - 'sequence_length': sequence_length_np}, - fetch_list=[rnn_out, last_hidden, last_cell]) + out = exe.run(feed={ + 'x': step_input_np, + 'sequence_length': sequence_length_np + }, + fetch_list=[rnn_out, last_hidden, last_cell]) api_rnn_out = out[0] api_last_hidden = out[1] api_last_cell = out[2] - np_out = lstm_np( - step_input_np, - None, - None, - self.hidden_size, - gate_weight, - gate_bias, - num_layers=self.num_layers, - batch_first=self.batch_first, - is_bidirect=self.is_bidirect, - sequence_length=sequence_length_np) + np_out = lstm_np(step_input_np, + None, + None, + self.hidden_size, + gate_weight, + gate_bias, + num_layers=self.num_layers, + batch_first=self.batch_first, + is_bidirect=self.is_bidirect, + sequence_length=sequence_length_np) self.assertTrue(np.allclose(api_rnn_out, np_out[0], rtol=1e-4, atol=0)) self.assertTrue( - np.allclose( - api_last_hidden, np_out[1], rtol=1e-4, atol=0)) - self.assertTrue( - np.allclose( - api_last_cell, np_out[2], rtol=1e-4, atol=0)) + np.allclose(api_last_hidden, np_out[1], rtol=1e-4, atol=0)) + self.assertTrue(np.allclose(api_last_cell, np_out[2], rtol=1e-4, + atol=0)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py index b94ac1db665..9f76d7d736f 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py @@ -59,16 +59,19 @@ def step(step_in, pre_hidden, pre_cell, gate_w, gate_b, forget_bias=1.0): class TestBasicGRUUnit(unittest.TestCase): + def setUp(self): self.hidden_size = 5 self.batch_size = 5 def test_run(self): x = layers.data(name='x', shape=[-1, self.hidden_size], dtype='float32') - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, self.hidden_size], dtype='float32') - pre_cell = layers.data( - name="pre_cell", shape=[-1, self.hidden_size], dtype='float32') + pre_hidden = layers.data(name="pre_hidden", + shape=[-1, self.hidden_size], + dtype='float32') + pre_cell = layers.data(name="pre_cell", + shape=[-1, self.hidden_size], + dtype='float32') lstm_unit = BasicLSTMUnit("lstm_unit", self.hidden_size) @@ -92,26 +95,26 @@ class TestBasicGRUUnit(unittest.TestCase): gate_w_name = "lstm_unit/BasicLSTMUnit_0.w_0" gate_b_name = "lstm_unit/BasicLSTMUnit_0.b_0" - gate_w = np.array(fluid.global_scope().find_var(gate_w_name).get_tensor( - )) - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float32') - fluid.global_scope().find_var(gate_w_name).get_tensor().set(gate_w, - place) - - gate_b = np.array(fluid.global_scope().find_var(gate_b_name).get_tensor( - )) - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float32') - fluid.global_scope().find_var(gate_b_name).get_tensor().set(gate_b, - place) - - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float32') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float32') - pre_cell_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float32') + gate_w = np.array( + fluid.global_scope().find_var(gate_w_name).get_tensor()) + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float32') + fluid.global_scope().find_var(gate_w_name).get_tensor().set( + gate_w, place) + + gate_b = np.array( + fluid.global_scope().find_var(gate_b_name).get_tensor()) + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float32') + fluid.global_scope().find_var(gate_b_name).get_tensor().set( + gate_b, place) + + step_input_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') + pre_hidden_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') + pre_cell_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') out = exe.run( feed={ 'x' : step_input_np, 'pre_hidden' : pre_hidden_np, \ 'pre_cell' : pre_cell_np }, @@ -124,11 +127,9 @@ class TestBasicGRUUnit(unittest.TestCase): pre_cell_np, gate_w, gate_b) self.assertTrue( - np.allclose( - api_hidden_out, np_hidden_out, rtol=1e-4, atol=0)) + np.allclose(api_hidden_out, np_hidden_out, rtol=1e-4, atol=0)) self.assertTrue( - np.allclose( - api_cell_out, np_cell_out, rtol=1e-4, atol=0)) + np.allclose(api_cell_out, np_cell_out, rtol=1e-4, atol=0)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py b/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py index 303ff9c86a6..34ee0e1693d 100644 --- a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py +++ b/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py @@ -27,6 +27,7 @@ import numpy as np class TestBasicGRUApiName(unittest.TestCase): + def setUp(self): self.name_set = set([ "test1_fw_w_0_gate", "test1_fw_w_0_candidate", "test1_fw_b_0_gate", @@ -45,14 +46,15 @@ class TestBasicGRUApiName(unittest.TestCase): batch_first = False with new_program_scope(): - input = layers.data( - name="input", - shape=[-1, batch_size, input_size], - dtype='float32') - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, hidden_size], dtype='float32') - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='int32') + input = layers.data(name="input", + shape=[-1, batch_size, input_size], + dtype='float32') + pre_hidden = layers.data(name="pre_hidden", + shape=[-1, hidden_size], + dtype='float32') + sequence_length = layers.data(name="sequence_length", + shape=[-1], + dtype='int32') rnn_out, last_hidden = basic_gru( input, pre_hidden, hidden_size, num_layers = num_layers, \ @@ -67,6 +69,7 @@ class TestBasicGRUApiName(unittest.TestCase): class TestBasicLSTMApiName(unittest.TestCase): + def setUp(self): self.name_set = set([ "test1_fw_w_0", "test1_fw_b_0", "test1_fw_w_1", "test1_fw_b_1", @@ -83,16 +86,18 @@ class TestBasicLSTMApiName(unittest.TestCase): batch_first = False with new_program_scope(): - input = layers.data( - name="input", - shape=[-1, batch_size, input_size], - dtype='float32') - pre_hidden = layers.data( - name="pre_hidden", shape=[-1, hidden_size], dtype='float32') - pre_cell = layers.data( - name="pre_cell", shape=[-1, hidden_size], dtype='float32') - sequence_length = layers.data( - name="sequence_length", shape=[-1], dtype='int32') + input = layers.data(name="input", + shape=[-1, batch_size, input_size], + dtype='float32') + pre_hidden = layers.data(name="pre_hidden", + shape=[-1, hidden_size], + dtype='float32') + pre_cell = layers.data(name="pre_cell", + shape=[-1, hidden_size], + dtype='float32') + sequence_length = layers.data(name="sequence_length", + shape=[-1], + dtype='int32') rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \ hidden_size, num_layers = num_layers, \ diff --git a/python/paddle/fluid/tests/unittests/test_batch_fc_op.py b/python/paddle/fluid/tests/unittests/test_batch_fc_op.py index 56631d8d3b4..00c743eded5 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_fc_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_fc_op.py @@ -35,6 +35,7 @@ def np_cal_batchfc(input, w, bias): class TestBatchFCOp(OpTest): + def config(self): self.slot_pairs_num = 10 self.batch_size = 5 @@ -46,10 +47,10 @@ class TestBatchFCOp(OpTest): self.config() self.input = np.random.random((self.slot_pairs_num, self.batch_size, self.in_dim)).astype(self.dtype) - self.w = np.random.random((self.slot_pairs_num, self.in_dim, - self.out_dim)).astype(self.dtype) - self.bias = np.random.random((self.slot_pairs_num, - self.out_dim)).astype(self.dtype) + self.w = np.random.random( + (self.slot_pairs_num, self.in_dim, self.out_dim)).astype(self.dtype) + self.bias = np.random.random( + (self.slot_pairs_num, self.out_dim)).astype(self.dtype) self.op_type = "batch_fc" np_out = np_cal_batchfc(self.input, self.w, self.bias) np_out = np_out.astype(self.dtype) @@ -62,11 +63,12 @@ class TestBatchFCOp(OpTest): def test_check_grad_gpu(self): if core.is_compiled_with_cuda(): - self.check_grad_with_place( - core.CUDAPlace(0), ["Bias", "W", "Input"], "Out") + self.check_grad_with_place(core.CUDAPlace(0), + ["Bias", "W", "Input"], "Out") class TestBatchFCOp1(OpTest): + def config(self): self.slot_pairs_num = 10 self.batch_size = 5 @@ -78,10 +80,10 @@ class TestBatchFCOp1(OpTest): self.config() self.input = np.random.random((self.slot_pairs_num, self.batch_size, self.in_dim)).astype(self.dtype) - self.w = np.random.random((self.slot_pairs_num, self.in_dim, - self.out_dim)).astype(self.dtype) - self.bias = np.random.random((self.slot_pairs_num, - self.out_dim)).astype(self.dtype) + self.w = np.random.random( + (self.slot_pairs_num, self.in_dim, self.out_dim)).astype(self.dtype) + self.bias = np.random.random( + (self.slot_pairs_num, self.out_dim)).astype(self.dtype) self.op_type = "batch_fc" np_out = np_cal_batchfc(self.input, self.w, self.bias) np_out = np_out.astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index b02df024518..b312baea932 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -156,9 +156,9 @@ def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format): x = np.transpose(x, (0, 2, 3, 1)) y_grad = np.transpose(y_grad, (0, 2, 3, 1)) - x_grad = scale * (y_grad - np.mean( - y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean( - y_grad * (x - mean), axis=(0, 1, 2)) / + x_grad = scale * (y_grad - np.mean(y_grad, axis=(0, 1, 2)) - + (x - mean) * np.mean(y_grad * + (x - mean), axis=(0, 1, 2)) / (var + epsilon)) / np.sqrt(var + epsilon) grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2)) @@ -186,6 +186,7 @@ def create_or_get_tensor(scope, var_name, var, place): def set_output_grad(scope, outputs, place, feed_dict=None): + def __set_tensor__(name, data=None): out_tensor = scope.find_var(name).get_tensor() grad_tensor = scope.var(grad_var_name(name)).get_tensor() @@ -207,6 +208,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None): class TestBatchNormOpInference(unittest.TestCase): + def setUp(self): self.dtype = np.float32 self.use_mkldnn = False @@ -252,8 +254,8 @@ class TestBatchNormOpInference(unittest.TestCase): OpTest.np_dtype_to_fluid_dtype(x_val), place) scale_tensor = create_or_get_tensor( - scope, "scale_val", - OpTest.np_dtype_to_fluid_dtype(scale_val), place) + scope, "scale_val", OpTest.np_dtype_to_fluid_dtype(scale_val), + place) bias_tensor = create_or_get_tensor( scope, "bias_val", OpTest.np_dtype_to_fluid_dtype(bias_val), place) mean_tensor = create_or_get_tensor(scope, "mean", @@ -300,7 +302,7 @@ class TestBatchNormOpInference(unittest.TestCase): # of memory descripting. So we need to convert NCHW # dims into NHWC. if data_layout == "NHWC" and self.use_mkldnn == True: - # Create executor to have MKL-DNN cache + # Create executor to have MKL-DNN cache # cleared after NHWC unit test place = core.CPUPlace() exe = fluid.Executor(place) @@ -310,13 +312,12 @@ class TestBatchNormOpInference(unittest.TestCase): y_tensor._set_dims(dims) # check inference result - self.__assert_close( - y_tensor, - y_out, - "inference output are different at " + str(place) + ", " + - data_layout + ", " + str(np.dtype(dtype)) + - str(np.array(y_tensor)) + str(y_out), - atol=1e-3) + self.__assert_close(y_tensor, + y_out, + "inference output are different at " + str(place) + + ", " + data_layout + ", " + str(np.dtype(dtype)) + + str(np.array(y_tensor)) + str(y_out), + atol=1e-3) def test_check_output(self): places = [core.CPUPlace()] @@ -334,6 +335,7 @@ class TestBatchNormOpInference(unittest.TestCase): class TestFP16BatchNormOpInference(TestBatchNormOpInference): + def setUp(self): self.dtype = np.float16 self.use_mkldnn = False @@ -355,6 +357,7 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): class TestBatchNormOpTraining(unittest.TestCase): + def setUp(self): self.use_mkldnn = False self.fuse_with_relu = False @@ -385,8 +388,9 @@ class TestBatchNormOpTraining(unittest.TestCase): variance_out = var_ref * (1. - momentum) + momentum * variance saved_variance = 1. / np.sqrt(var_ref + epsilon) # run backward - x_grad, scale_grad, bias_grad = _reference_grad( - x, y_grad, scale, saved_mean, var_ref, epsilon, data_layout) + x_grad, scale_grad, bias_grad = _reference_grad(x, y_grad, scale, + saved_mean, var_ref, + epsilon, data_layout) return y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad @@ -402,6 +406,7 @@ class TestBatchNormOpTraining(unittest.TestCase): return mean, variance def test_forward_backward(self): + def test_with_place(place, data_layout, shape): # attr epsilon = self.epsilon @@ -440,10 +445,9 @@ class TestBatchNormOpTraining(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = { "X": block.var('x'), "Scale": block.var('scale'), @@ -473,11 +477,10 @@ class TestBatchNormOpTraining(unittest.TestCase): } block.create_var(name="reserve_space", dtype='float32') outputs["ReserveSpace"] = block.var('reserve_space') - bn_op = block.append_op( - type="batch_norm", - inputs=inputs, - outputs=outputs, - attrs=attrs) + bn_op = block.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc @@ -509,8 +512,10 @@ class TestBatchNormOpTraining(unittest.TestCase): for id, name in enumerate(self.fetch_list): if name == 'variance': - self.__assert_close( - var_dict[name], out[id], name, atol=1e-3) + self.__assert_close(var_dict[name], + out[id], + name, + atol=1e-3) continue self.__assert_close(var_dict[name], out[id], name) print("op test forward passed: ", str(place), data_layout) @@ -529,6 +534,7 @@ class TestBatchNormOpTraining(unittest.TestCase): class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -536,6 +542,7 @@ class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): class TestBatchNormOpTrainingCase2(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set() @@ -547,6 +554,7 @@ class TestBatchNormOpTrainingCase2(TestBatchNormOpTraining): class TestBatchNormOpTrainingCase3(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = False self.no_grad_set = set(['x@GRAD']) @@ -554,6 +562,7 @@ class TestBatchNormOpTrainingCase3(TestBatchNormOpTraining): class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): + def init_test_case(self): self.use_momentum_variable = True self.use_global_stats = False @@ -565,6 +574,7 @@ class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): + def init_test_case(self): self.use_global_stats = True self.no_grad_set = set() @@ -619,6 +629,7 @@ class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): class TestBatchNormOpFreezeStatsAndScaleBiasTraining( TestBatchNormOpFreezeStatsTraining): + def init_test_case(self): self.use_global_stats = True self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -626,11 +637,12 @@ class TestBatchNormOpFreezeStatsAndScaleBiasTraining( class TestBatchNormOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of batch_norm must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.batch_norm, x1) # the input dtype of batch_norm must be float16 or float32 or float64 @@ -640,12 +652,13 @@ class TestBatchNormOpError(unittest.TestCase): class TestDygraphBatchNormAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_norm = fluid.dygraph.BatchNorm(10) # the input of BatchNorm must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, batch_norm, x1) # the input dtype of BatchNorm must be float16 or float32 or float64 @@ -655,6 +668,7 @@ class TestDygraphBatchNormAPIError(unittest.TestCase): class TestDygraphBatchNormTrainableStats(unittest.TestCase): + def test_dygraph(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -703,6 +717,7 @@ class TestDygraphBatchNormTrainableStats(unittest.TestCase): class TestDygraphBatchNormOpenReserveSpace(unittest.TestCase): + def test_reservespace(self): with program_guard(Program(), Program()): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index ac09d9f5fdf..9db95f094a7 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -26,6 +26,7 @@ import paddle class TestBatchNorm(unittest.TestCase): + def test_name(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -159,8 +160,9 @@ class TestBatchNorm(unittest.TestCase): def compute_v4(x): with fluid.dygraph.guard(p): - bn = paddle.nn.BatchNorm2D( - shape[1], weight_attr=False, bias_attr=False) + bn = paddle.nn.BatchNorm2D(shape[1], + weight_attr=False, + bias_attr=False) y = bn(paddle.to_tensor(x)) return y.numpy() @@ -208,6 +210,7 @@ class TestBatchNorm(unittest.TestCase): class TestBatchNormChannelLast(unittest.TestCase): + def setUp(self): self.original_dtyep = paddle.get_default_dtype() # MIOPEN not support data type of double @@ -237,8 +240,7 @@ class TestBatchNormChannelLast(unittest.TestCase): if core.is_compiled_with_rocm(): # HIP will fail if no atol self.assertEqual( - np.allclose( - y1.numpy(), y2.numpy(), atol=1e-07), True) + np.allclose(y1.numpy(), y2.numpy(), atol=1e-07), True) else: self.assertEqual(np.allclose(y1.numpy(), y2.numpy()), True) @@ -257,8 +259,7 @@ class TestBatchNormChannelLast(unittest.TestCase): if core.is_compiled_with_rocm(): # HIP will fail if no atol self.assertEqual( - np.allclose( - y1.numpy(), y2.numpy(), atol=1e-07), True) + np.allclose(y1.numpy(), y2.numpy(), atol=1e-07), True) else: self.assertEqual(np.allclose(y1.numpy(), y2.numpy()), True) @@ -277,13 +278,13 @@ class TestBatchNormChannelLast(unittest.TestCase): if core.is_compiled_with_rocm(): # HIP will fail if no atol self.assertEqual( - np.allclose( - y1.numpy(), y2.numpy(), atol=1e-07), True) + np.allclose(y1.numpy(), y2.numpy(), atol=1e-07), True) else: self.assertEqual(np.allclose(y1.numpy(), y2.numpy()), True) class TestBatchNormUseGlobalStats(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_batch_sampler.py b/python/paddle/fluid/tests/unittests/test_batch_sampler.py index 4c5338314af..279176e0b57 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_sampler.py +++ b/python/paddle/fluid/tests/unittests/test_batch_sampler.py @@ -26,6 +26,7 @@ IMAGE_SIZE = 32 class RandomDataset(Dataset): + def __init__(self, sample_num, class_num): self.sample_num = sample_num self.class_num = class_num @@ -41,6 +42,7 @@ class RandomDataset(Dataset): class TestSampler(unittest.TestCase): + def test_main(self): dataset = RandomDataset(100, 10) sampler = Sampler(dataset) @@ -52,6 +54,7 @@ class TestSampler(unittest.TestCase): class TestSequenceSampler(unittest.TestCase): + def test_main(self): dataset = RandomDataset(100, 10) sampler = SequenceSampler(dataset) @@ -62,6 +65,7 @@ class TestSequenceSampler(unittest.TestCase): class TestRandomSampler(unittest.TestCase): + def test_main(self): dataset = RandomDataset(100, 10) sampler = RandomSampler(dataset) @@ -96,8 +100,10 @@ class TestRandomSampler(unittest.TestCase): def test_with_generator_num_samples(self): dataset = RandomDataset(100, 10) generator = iter(range(0, 60)) - sampler = RandomSampler( - dataset, generator=generator, num_samples=50, replacement=True) + sampler = RandomSampler(dataset, + generator=generator, + num_samples=50, + replacement=True) assert len(sampler) == 50 rets = [] @@ -107,6 +113,7 @@ class TestRandomSampler(unittest.TestCase): class TestBatchSampler(unittest.TestCase): + def setUp(self): self.num_samples = 1000 self.num_classes = 10 @@ -116,11 +123,10 @@ class TestBatchSampler(unittest.TestCase): def init_batch_sampler(self): dataset = RandomDataset(self.num_samples, self.num_classes) - bs = BatchSampler( - dataset=dataset, - batch_size=self.batch_size, - shuffle=self.shuffle, - drop_last=self.drop_last) + bs = BatchSampler(dataset=dataset, + batch_size=self.batch_size, + shuffle=self.shuffle, + drop_last=self.drop_last) return bs def test_main(self): @@ -140,6 +146,7 @@ class TestBatchSampler(unittest.TestCase): class TestBatchSamplerDropLast(TestBatchSampler): + def setUp(self): self.num_samples = 1000 self.num_classes = 10 @@ -149,6 +156,7 @@ class TestBatchSamplerDropLast(TestBatchSampler): class TestBatchSamplerShuffle(TestBatchSampler): + def setUp(self): self.num_samples = 1000 self.num_classes = 10 @@ -158,17 +166,18 @@ class TestBatchSamplerShuffle(TestBatchSampler): class TestBatchSamplerWithSampler(TestBatchSampler): + def init_batch_sampler(self): dataset = RandomDataset(1000, 10) sampler = SequenceSampler(dataset) - bs = BatchSampler( - sampler=sampler, - batch_size=self.batch_size, - drop_last=self.drop_last) + bs = BatchSampler(sampler=sampler, + batch_size=self.batch_size, + drop_last=self.drop_last) return bs class TestBatchSamplerWithSamplerDropLast(unittest.TestCase): + def setUp(self): self.num_samples = 1000 self.num_classes = 10 @@ -178,6 +187,7 @@ class TestBatchSamplerWithSamplerDropLast(unittest.TestCase): class TestBatchSamplerWithSamplerShuffle(unittest.TestCase): + def setUp(self): self.num_samples = 1000 self.num_classes = 10 @@ -189,17 +199,17 @@ class TestBatchSamplerWithSamplerShuffle(unittest.TestCase): try: dataset = RandomDataset(self.num_samples, self.num_classes) sampler = RandomSampler(dataset) - bs = BatchSampler( - sampler=sampler, - shuffle=self.shuffle, - batch_size=self.batch_size, - drop_last=self.drop_last) + bs = BatchSampler(sampler=sampler, + shuffle=self.shuffle, + batch_size=self.batch_size, + drop_last=self.drop_last) self.assertTrue(False) except AssertionError: pass class TestWeightedRandomSampler(unittest.TestCase): + def init_probs(self, total, pos): pos_probs = np.random.random((pos, )).astype('float32') probs = np.zeros((total, )).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_bce_loss.py b/python/paddle/fluid/tests/unittests/test_bce_loss.py index 1051fa9c1ae..b1f16a4cecd 100644 --- a/python/paddle/fluid/tests/unittests/test_bce_loss.py +++ b/python/paddle/fluid/tests/unittests/test_bce_loss.py @@ -27,23 +27,27 @@ def test_static_layer(place, prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=input_np.shape, dtype='float64') - label = paddle.fluid.data( - name='label', shape=label_np.shape, dtype='float64') + input = paddle.fluid.data(name='input', + shape=input_np.shape, + dtype='float64') + label = paddle.fluid.data(name='label', + shape=label_np.shape, + dtype='float64') if weight_np is not None: - weight = paddle.fluid.data( - name='weight', shape=weight_np.shape, dtype='float64') - bce_loss = paddle.nn.loss.BCELoss( - weight=weight, reduction=reduction) + weight = paddle.fluid.data(name='weight', + shape=weight_np.shape, + dtype='float64') + bce_loss = paddle.nn.loss.BCELoss(weight=weight, + reduction=reduction) else: bce_loss = paddle.nn.loss.BCELoss(reduction=reduction) res = bce_loss(input, label) exe = paddle.static.Executor(place) static_result = exe.run(prog, - feed={"input": input_np, - "label": label_np} - if weight_np is None else { + feed={ + "input": input_np, + "label": label_np + } if weight_np is None else { "input": input_np, "label": label_np, "weight": weight_np @@ -60,23 +64,30 @@ def test_static_functional(place, prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=input_np.shape, dtype='float64') - label = paddle.fluid.data( - name='label', shape=label_np.shape, dtype='float64') + input = paddle.fluid.data(name='input', + shape=input_np.shape, + dtype='float64') + label = paddle.fluid.data(name='label', + shape=label_np.shape, + dtype='float64') if weight_np is not None: - weight = paddle.fluid.data( - name='weight', shape=weight_np.shape, dtype='float64') - res = paddle.nn.functional.binary_cross_entropy( - input, label, weight=weight, reduction=reduction) + weight = paddle.fluid.data(name='weight', + shape=weight_np.shape, + dtype='float64') + res = paddle.nn.functional.binary_cross_entropy(input, + label, + weight=weight, + reduction=reduction) else: - res = paddle.nn.functional.binary_cross_entropy( - input, label, reduction=reduction) + res = paddle.nn.functional.binary_cross_entropy(input, + label, + reduction=reduction) exe = paddle.static.Executor(place) static_result = exe.run(prog, - feed={"input": input_np, - "label": label_np} - if weight_np is None else { + feed={ + "input": input_np, + "label": label_np + } if weight_np is None else { "input": input_np, "label": label_np, "weight": weight_np @@ -113,11 +124,14 @@ def test_dygraph_functional(place, if weight_np is not None: weight = paddle.to_tensor(weight_np) - dy_res = paddle.nn.functional.binary_cross_entropy( - input, label, weight=weight, reduction=reduction) + dy_res = paddle.nn.functional.binary_cross_entropy(input, + label, + weight=weight, + reduction=reduction) else: - dy_res = paddle.nn.functional.binary_cross_entropy( - input, label, reduction=reduction) + dy_res = paddle.nn.functional.binary_cross_entropy(input, + label, + reduction=reduction) dy_result = dy_res.numpy() paddle.enable_static() return dy_result @@ -142,6 +156,7 @@ def calc_bceloss(input_np, label_np, reduction='mean', weight_np=None): class TestBCELoss(unittest.TestCase): + def test_BCELoss(self): input_np = np.random.uniform(0.1, 0.8, size=(20, 30)).astype(np.float64) label_np = np.random.randint(0, 2, size=(20, 30)).astype(np.float64) @@ -159,8 +174,8 @@ class TestBCELoss(unittest.TestCase): self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static_functional(place, input_np, - label_np, reduction) + static_functional = test_static_functional( + place, input_np, label_np, reduction) dy_functional = test_dygraph_functional(place, input_np, label_np, reduction) self.assertTrue(np.allclose(static_functional, expected)) @@ -168,43 +183,57 @@ class TestBCELoss(unittest.TestCase): self.assertTrue(np.allclose(dy_functional, expected)) def test_BCELoss_weight(self): - input_np = np.random.uniform( - 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) - label_np = np.random.randint( - 0, 2, size=(2, 3, 4, 10)).astype(np.float64) + input_np = np.random.uniform(0.1, 0.8, + size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint(0, 2, + size=(2, 3, 4, 10)).astype(np.float64) weight_np = np.random.random(size=(3, 4, 10)).astype(np.float64) - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() for reduction in ['sum', 'mean', 'none']: - static_result = test_static_layer( - place, input_np, label_np, reduction, weight_np=weight_np) - dy_result = test_dygraph_layer( - place, input_np, label_np, reduction, weight_np=weight_np) - expected = calc_bceloss( - input_np, label_np, reduction, weight_np=weight_np) + static_result = test_static_layer(place, + input_np, + label_np, + reduction, + weight_np=weight_np) + dy_result = test_dygraph_layer(place, + input_np, + label_np, + reduction, + weight_np=weight_np) + expected = calc_bceloss(input_np, + label_np, + reduction, + weight_np=weight_np) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static_functional( - place, input_np, label_np, reduction, weight_np=weight_np) - dy_functional = test_dygraph_functional( - place, input_np, label_np, reduction, weight_np=weight_np) + static_functional = test_static_functional(place, + input_np, + label_np, + reduction, + weight_np=weight_np) + dy_functional = test_dygraph_functional(place, + input_np, + label_np, + reduction, + weight_np=weight_np) self.assertTrue(np.allclose(static_functional, expected)) self.assertTrue(np.allclose(static_functional, dy_functional)) self.assertTrue(np.allclose(dy_functional, expected)) def test_BCELoss_error(self): paddle.disable_static() - self.assertRaises( - ValueError, paddle.nn.loss.BCELoss, reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.loss.BCELoss, + reduction="unsupport reduction") input = paddle.to_tensor([[0.1, 0.3]], dtype='float32') label = paddle.to_tensor([[0.0, 1.0]], dtype='float32') - self.assertRaises( - ValueError, - paddle.nn.functional.binary_cross_entropy, - input=input, - label=label, - reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.functional.binary_cross_entropy, + input=input, + label=label, + reduction="unsupport reduction") paddle.enable_static() @@ -213,6 +242,7 @@ def bce_loss(input, label): class TestBceLossOp(OpTest): + def setUp(self): self.init_test_case() self.op_type = "bce_loss" @@ -234,11 +264,13 @@ class TestBceLossOp(OpTest): class TestBceLossOpCase1(OpTest): + def init_test_cast(self): self.shape = [2, 3, 4, 5] class TestBceLossOpCase2(OpTest): + def init_test_cast(self): self.shape = [2, 3, 20] diff --git a/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py b/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py index ea6d82d15ce..de78c4edcf5 100644 --- a/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py +++ b/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py @@ -20,10 +20,14 @@ from op_test import OpTest from paddle.fluid.framework import _test_eager_guard -def call_bce_layer(logit, label, weight=None, reduction='mean', +def call_bce_layer(logit, + label, + weight=None, + reduction='mean', pos_weight=None): - bce_logit_loss = paddle.nn.loss.BCEWithLogitsLoss( - weight=weight, reduction=reduction, pos_weight=pos_weight) + bce_logit_loss = paddle.nn.loss.BCEWithLogitsLoss(weight=weight, + reduction=reduction, + pos_weight=pos_weight) res = bce_logit_loss(logit, label) return res @@ -49,21 +53,25 @@ def test_static(place, prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(prog, startup_prog): - logit = paddle.fluid.data( - name='logit', shape=logit_np.shape, dtype='float64') - label = paddle.fluid.data( - name='label', shape=label_np.shape, dtype='float64') + logit = paddle.fluid.data(name='logit', + shape=logit_np.shape, + dtype='float64') + label = paddle.fluid.data(name='label', + shape=label_np.shape, + dtype='float64') feed_dict = {"logit": logit_np, "label": label_np} pos_weight = None weight = None if pos_weight_np is not None: - pos_weight = paddle.fluid.data( - name='pos_weight', shape=pos_weight_np.shape, dtype='float64') + pos_weight = paddle.fluid.data(name='pos_weight', + shape=pos_weight_np.shape, + dtype='float64') feed_dict["pos_weight"] = pos_weight_np if weight_np is not None: - weight = paddle.fluid.data( - name='weight', shape=weight_np.shape, dtype='float64') + weight = paddle.fluid.data(name='weight', + shape=weight_np.shape, + dtype='float64') feed_dict["weight"] = weight_np if functional: res = call_bce_functional(logit, label, weight, reduction, @@ -124,6 +132,7 @@ def calc_bce_with_logits_loss(logit_np, class TestBCEWithLogitsLoss(unittest.TestCase): + def test_BCEWithLogitsLoss(self): logit_np = np.random.uniform(0.1, 0.8, size=(20, 30)).astype(np.float64) label_np = np.random.randint(0, 2, size=(20, 30)).astype(np.float64) @@ -133,35 +142,36 @@ class TestBCEWithLogitsLoss(unittest.TestCase): reductions = ['sum', 'mean', 'none'] for place in places: for reduction in reductions: - static_result = test_static( - place, logit_np, label_np, reduction=reduction) - dy_result = test_dygraph( - place, logit_np, label_np, reduction=reduction) + static_result = test_static(place, + logit_np, + label_np, + reduction=reduction) + dy_result = test_dygraph(place, + logit_np, + label_np, + reduction=reduction) expected = calc_bce_with_logits_loss(logit_np, label_np, reduction) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static( - place, - logit_np, - label_np, - reduction=reduction, - functional=True) - dy_functional = test_dygraph( - place, - logit_np, - label_np, - reduction=reduction, - functional=True) + static_functional = test_static(place, + logit_np, + label_np, + reduction=reduction, + functional=True) + dy_functional = test_dygraph(place, + logit_np, + label_np, + reduction=reduction, + functional=True) with _test_eager_guard(): - eager_functional = test_dygraph( - place, - logit_np, - label_np, - reduction=reduction, - functional=True) + eager_functional = test_dygraph(place, + logit_np, + label_np, + reduction=reduction, + functional=True) self.assertTrue(np.allclose(static_functional, expected)) self.assertTrue(np.allclose(static_functional, dy_functional)) @@ -169,58 +179,56 @@ class TestBCEWithLogitsLoss(unittest.TestCase): self.assertTrue(np.allclose(eager_functional, expected)) def test_BCEWithLogitsLoss_weight(self): - logit_np = np.random.uniform( - 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) - label_np = np.random.randint( - 0, 2, size=(2, 3, 4, 10)).astype(np.float64) + logit_np = np.random.uniform(0.1, 0.8, + size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint(0, 2, + size=(2, 3, 4, 10)).astype(np.float64) weight_np = np.random.random(size=(2, 3, 4, 10)).astype(np.float64) - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() for reduction in ['sum', 'mean', 'none']: - static_result = test_static( - place, - logit_np, - label_np, - weight_np=weight_np, - reduction=reduction) - dy_result = test_dygraph( - place, - logit_np, - label_np, - weight_np=weight_np, - reduction=reduction) - expected = calc_bce_with_logits_loss( - logit_np, label_np, reduction, weight_np=weight_np) + static_result = test_static(place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction) + dy_result = test_dygraph(place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction) + expected = calc_bce_with_logits_loss(logit_np, + label_np, + reduction, + weight_np=weight_np) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static( - place, - logit_np, - label_np, - weight_np=weight_np, - reduction=reduction, - functional=True) - dy_functional = test_dygraph( - place, - logit_np, - label_np, - weight_np=weight_np, - reduction=reduction, - functional=True) + static_functional = test_static(place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction, + functional=True) + dy_functional = test_dygraph(place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction, + functional=True) self.assertTrue(np.allclose(static_functional, expected)) self.assertTrue(np.allclose(static_functional, dy_functional)) self.assertTrue(np.allclose(dy_functional, expected)) def test_BCEWithLogitsLoss_pos_weight(self): - logit_np = np.random.uniform( - 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) - label_np = np.random.randint( - 0, 2, size=(2, 3, 4, 10)).astype(np.float64) + logit_np = np.random.uniform(0.1, 0.8, + size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint(0, 2, + size=(2, 3, 4, 10)).astype(np.float64) pos_weight_np = np.random.random(size=(3, 4, 10)).astype(np.float64) weight_np = np.random.random(size=(2, 3, 4, 10)).astype(np.float64) - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() reduction = "mean" static_result = test_static(place, logit_np, label_np, weight_np, reduction, pos_weight_np) @@ -231,40 +239,36 @@ class TestBCEWithLogitsLoss(unittest.TestCase): self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) - static_functional = test_static( - place, - logit_np, - label_np, - weight_np, - reduction, - pos_weight_np, - functional=True) - dy_functional = test_dygraph( - place, - logit_np, - label_np, - weight_np, - reduction, - pos_weight_np, - functional=True) + static_functional = test_static(place, + logit_np, + label_np, + weight_np, + reduction, + pos_weight_np, + functional=True) + dy_functional = test_dygraph(place, + logit_np, + label_np, + weight_np, + reduction, + pos_weight_np, + functional=True) self.assertTrue(np.allclose(static_functional, expected)) self.assertTrue(np.allclose(static_functional, dy_functional)) self.assertTrue(np.allclose(dy_functional, expected)) def test_BCEWithLogitsLoss_error(self): paddle.disable_static() - self.assertRaises( - ValueError, - paddle.nn.BCEWithLogitsLoss, - reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.BCEWithLogitsLoss, + reduction="unsupport reduction") logit = paddle.to_tensor([[0.1, 0.3]], dtype='float32') label = paddle.to_tensor([[0.0, 1.0]], dtype='float32') - self.assertRaises( - ValueError, - paddle.nn.functional.binary_cross_entropy_with_logits, - logit=logit, - label=label, - reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.functional.binary_cross_entropy_with_logits, + logit=logit, + label=label, + reduction="unsupport reduction") paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py index cc3fab9056d..b3206e385f4 100644 --- a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py +++ b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py @@ -43,37 +43,28 @@ class TestBeamSearchDecodeOp(unittest.TestCase): # beam_size = 2, end_id = 1 # start with start_id [ - self.append_lod_tensor( - array, [[0, 1, 2], [0, 1, 2]], np.array( - [0, 0], dtype=dtype)) + self.append_lod_tensor(array, [[0, 1, 2], [0, 1, 2]], + np.array([0, 0], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 1, 2], [0, 2, 4]], - np.array( - [2, 3, 4, 5], dtype=dtype)) + self.append_lod_tensor(array, [[0, 1, 2], [0, 2, 4]], + np.array([2, 3, 4, 5], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 2, 4], [0, 2, 2, 4, 4]], - np.array( - [3, 1, 5, 4], dtype=dtype)) + self.append_lod_tensor(array, [[0, 2, 4], [0, 2, 2, 4, 4]], + np.array([3, 1, 5, 4], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 2, 4], [0, 1, 2, 3, 4]], - np.array( - [1, 1, 3, 5], dtype=dtype)) + self.append_lod_tensor(array, [[0, 2, 4], [0, 1, 2, 3, 4]], + np.array([1, 1, 3, 5], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] [ - self.append_lod_tensor( - array, [[0, 2, 4], [0, 0, 0, 2, 2]], - np.array( - [5, 1], dtype=dtype)) + self.append_lod_tensor(array, [[0, 2, 4], [0, 0, 0, 2, 2]], + np.array([5, 1], dtype=dtype)) for array, dtype in ((ids, "int64"), (scores, "float32")) ] @@ -89,7 +80,8 @@ class TestBeamSearchDecodeOp(unittest.TestCase): SentenceIds="sentence_ids", SentenceScores="sentence_scores", beam_size=2, - end_id=1, ) + end_id=1, + ) beam_search_decode_op.run(self.scope, self.place) @@ -100,19 +92,21 @@ class TestBeamSearchDecodeOp(unittest.TestCase): expected_data = np.array( [0, 2, 3, 1, 0, 2, 1, 0, 4, 5, 3, 5, 0, 4, 5, 3, 1], "int64") self.assertTrue(np.array_equal(np.array(sentence_ids), expected_data)) - self.assertTrue( - np.array_equal(np.array(sentence_scores), expected_data)) + self.assertTrue(np.array_equal(np.array(sentence_scores), + expected_data)) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestBeamSearchDecodeOpGPU(TestBeamSearchDecodeOp): + def setUp(self): self.scope = core.Scope() self.place = core.CUDAPlace(0) class TestBeamSearchDecodeOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -120,8 +114,10 @@ class TestBeamSearchDecodeOpError(unittest.TestCase): # the input pre_ids must be Variable test_ids = np.random.randint(1, 5, [5, 1]).astype("int64") scores = fluid.layers.create_array(dtype='float32') - fluid.layers.beam_search_decode( - test_ids, scores, beam_size=5, end_id=0) + fluid.layers.beam_search_decode(test_ids, + scores, + beam_size=5, + end_id=0) self.assertRaises(TypeError, test_id_Variable) @@ -129,8 +125,10 @@ class TestBeamSearchDecodeOpError(unittest.TestCase): # the input pre_scores must be Variable ids = fluid.layers.create_array(dtype='int64') test_scores = np.random.uniform(1, 5, [5, 1]).astype("float32") - fluid.layers.beam_search_decode( - ids, test_scores, beam_size=5, end_id=0) + fluid.layers.beam_search_decode(ids, + test_scores, + beam_size=5, + end_id=0) self.assertRaises(TypeError, test_score_Variable) @@ -138,8 +136,10 @@ class TestBeamSearchDecodeOpError(unittest.TestCase): # the dtype of input pre_ids must be int64 type_ids = fluid.layers.create_array(dtype='float32') scores = fluid.layers.create_array(dtype='float32') - fluid.layers.beam_search_decode( - type_ids, scores, beam_size=5, end_id=0) + fluid.layers.beam_search_decode(type_ids, + scores, + beam_size=5, + end_id=0) self.assertRaises(TypeError, test_id_dtype) @@ -147,8 +147,10 @@ class TestBeamSearchDecodeOpError(unittest.TestCase): # the dtype of input pre_scores must be float32 ids = fluid.layers.create_array(dtype='int64') type_scores = fluid.layers.create_array(dtype='int64') - fluid.layers.beam_search_decode( - ids, type_scores, beam_size=5, end_id=0) + fluid.layers.beam_search_decode(ids, + type_scores, + beam_size=5, + end_id=0) self.assertRaises(TypeError, test_score_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_op.py index 99ca5779a69..e4fe6580ea1 100644 --- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py +++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py @@ -44,19 +44,18 @@ class BeamSearchOpTester(unittest.TestCase): self.scope.var('parent_idx').get_tensor() def test_run(self): - op = Operator( - 'beam_search', - pre_ids='pre_ids', - pre_scores='pre_scores', - ids='ids', - scores='scores', - selected_ids='selected_ids', - selected_scores='selected_scores', - parent_idx='parent_idx', - level=0, - beam_size=self.beam_size, - end_id=0, - is_accumulated=self.is_accumulated) + op = Operator('beam_search', + pre_ids='pre_ids', + pre_scores='pre_scores', + ids='ids', + scores='scores', + selected_ids='selected_ids', + selected_scores='selected_scores', + parent_idx='parent_idx', + level=0, + beam_size=self.beam_size, + end_id=0, + is_accumulated=self.is_accumulated) op.run(self.scope, core.CPUPlace()) selected_ids = self.scope.find_var("selected_ids").get_tensor() selected_scores = self.scope.find_var("selected_scores").get_tensor() @@ -78,20 +77,19 @@ class BeamSearchOpTester(unittest.TestCase): def _create_ids(self): self.lod = [[0, 2, 4], [0, 1, 2, 3, 4]] - np_data = np.array( - [[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64') + np_data = np.array([[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], + dtype='int64') tensor = create_tensor(self.scope, "ids", np_data) tensor.set_lod(self.lod) def _create_scores(self): - np_data = np.array( - [ - [0.5, 0.3, 0.2], - [0.6, 0.3, 0.1], - [0.9, 0.5, 0.1], - [0.7, 0.5, 0.1], - ], - dtype='float32') + np_data = np.array([ + [0.5, 0.3, 0.2], + [0.6, 0.3, 0.1], + [0.9, 0.5, 0.1], + [0.7, 0.5, 0.1], + ], + dtype='float32') tensor = create_tensor(self.scope, "scores", np_data) tensor.set_lod(self.lod) @@ -105,6 +103,7 @@ class BeamSearchOpTester(unittest.TestCase): class BeamSearchOpTester2(BeamSearchOpTester): + def _create_pre_ids(self): np_data = np.array([[1], [2], [3], [4]], dtype='int64') tensor = create_tensor(self.scope, 'pre_ids', np_data) @@ -120,13 +119,13 @@ class BeamSearchOpTester2(BeamSearchOpTester): tensor.set_lod(self.lod) def _create_scores(self): - np_data = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.1, 0.7], - ], dtype='float32') + np_data = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.1, 0.7], + ], + dtype='float32') tensor = create_tensor(self.scope, "scores", np_data) tensor.set_lod(self.lod) @@ -156,13 +155,13 @@ class BeamSearchOpTester3(BeamSearchOpTester): tensor.set_lod(self.lod) def _create_scores(self): - np_data = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.6, 0.7], - ], dtype='float32') + np_data = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.6, 0.7], + ], + dtype='float32') tensor = create_tensor(self.scope, "scores", np_data) tensor.set_lod(self.lod) @@ -192,13 +191,13 @@ class BeamSearchOpTester4(BeamSearchOpTester): tensor.set_lod(self.lod) def _create_scores(self): - np_data = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.6, 0.7], - ], dtype='float32') + np_data = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.6, 0.7], + ], + dtype='float32') tensor = create_tensor(self.scope, "scores", np_data) tensor.set_lod(self.lod) @@ -228,13 +227,13 @@ class BeamSearchOpTester5(BeamSearchOpTester): tensor.set_lod(self.lod) def _create_scores(self): - np_data = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.1, 0.7], - ], dtype='float32') + np_data = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.1, 0.7], + ], + dtype='float32') tensor = create_tensor(self.scope, "scores", np_data) tensor.set_lod(self.lod) @@ -242,8 +241,8 @@ class BeamSearchOpTester5(BeamSearchOpTester): self.beam_size = 2 self.is_accumulated = False self.output_ids = np.array([7, 3, 3, 1])[:, np.newaxis] - self.output_scores = np.array( - [1.50685, 0.996027, 0.194639, 0.043325])[:, np.newaxis] + self.output_scores = np.array([1.50685, 0.996027, 0.194639, + 0.043325])[:, np.newaxis] self.output_lod = [[0, 2, 4], [0, 0, 2, 3, 4]] self.output_parent_idx = np.array([1, 1, 2, 3]) @@ -265,13 +264,13 @@ class BeamSearchOpTester6(BeamSearchOpTester): tensor.set_lod(self.lod) def _create_scores(self): - np_data = np.array( - [ - [0.6, 0.9], - [0.5, 0.3], - [0.9, 0.5], - [0.1, 0.7], - ], dtype='float32') + np_data = np.array([ + [0.6, 0.9], + [0.5, 0.3], + [0.9, 0.5], + [0.1, 0.7], + ], + dtype='float32') tensor = create_tensor(self.scope, "scores", np_data) tensor.set_lod(self.lod) @@ -285,30 +284,33 @@ class BeamSearchOpTester6(BeamSearchOpTester): class TestBeamSearchOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - pre_ids = fluid.data( - name='pre_id', shape=[1], lod_level=2, dtype='int64') - pre_scores = fluid.data( - name='pre_scores', shape=[1], lod_level=2, dtype='float32') + pre_ids = fluid.data(name='pre_id', + shape=[1], + lod_level=2, + dtype='int64') + pre_scores = fluid.data(name='pre_scores', + shape=[1], + lod_level=2, + dtype='float32') probs = fluid.data(name='probs', shape=[10000], dtype='float32') topk_scores, topk_indices = fluid.layers.topk(probs, k=4) accu_scores = fluid.layers.elementwise_add( x=fluid.layers.log(x=topk_scores), - y=fluid.layers.reshape( - pre_scores, shape=[-1]), + y=fluid.layers.reshape(pre_scores, shape=[-1]), axis=0) def test_preids_Variable(): # the input pre_ids must be Variable preids_data = np.random.randint(1, 5, [5, 1]).astype("int64") - fluid.layers.beam_search( - pre_ids=preids_data, - pre_scores=pre_scores, - ids=topk_indices, - scores=accu_scores, - beam_size=4, - end_id=1) + fluid.layers.beam_search(pre_ids=preids_data, + pre_scores=pre_scores, + ids=topk_indices, + scores=accu_scores, + beam_size=4, + end_id=1) self.assertRaises(TypeError, test_preids_Variable) @@ -316,73 +318,66 @@ class TestBeamSearchOpError(unittest.TestCase): # the input pre_scores must be Variable prescores_data = np.random.uniform(1, 5, [5, 1]).astype("float32") - fluid.layers.beam_search( - pre_ids=pre_ids, - pre_scores=prescores_data, - ids=topk_indices, - scores=accu_scores, - beam_size=4, - end_id=1) + fluid.layers.beam_search(pre_ids=pre_ids, + pre_scores=prescores_data, + ids=topk_indices, + scores=accu_scores, + beam_size=4, + end_id=1) self.assertRaises(TypeError, test_prescores_Variable) def test_ids_Variable(): # the input ids must be Variable or None ids_data = np.random.randint(1, 5, [5, 1]).astype("int64") - fluid.layers.beam_search( - pre_ids=pre_ids, - pre_scores=pre_scores, - ids=ids_data, - scores=accu_scores, - beam_size=4, - end_id=1) + fluid.layers.beam_search(pre_ids=pre_ids, + pre_scores=pre_scores, + ids=ids_data, + scores=accu_scores, + beam_size=4, + end_id=1) self.assertRaises(TypeError, test_ids_Variable) def test_scores_Variable(): # the input scores must be Variable scores_data = np.random.uniform(1, 5, [5, 1]).astype("float32") - fluid.layers.beam_search( - pre_ids=pre_ids, - pre_scores=pre_scores, - ids=topk_indices, - scores=scores_data, - beam_size=4, - end_id=1) + fluid.layers.beam_search(pre_ids=pre_ids, + pre_scores=pre_scores, + ids=topk_indices, + scores=scores_data, + beam_size=4, + end_id=1) self.assertRaises(TypeError, test_scores_Variable) def test_preids_dtype(): # the dtype of input pre_ids must be int64 - preids_type_data = fluid.data( - name='preids_type_data', - shape=[1], - lod_level=2, - dtype='float32') - fluid.layers.beam_search( - pre_ids=preids_type_data, - pre_scores=pre_scores, - ids=topk_indices, - scores=accu_scores, - beam_size=4, - end_id=1) + preids_type_data = fluid.data(name='preids_type_data', + shape=[1], + lod_level=2, + dtype='float32') + fluid.layers.beam_search(pre_ids=preids_type_data, + pre_scores=pre_scores, + ids=topk_indices, + scores=accu_scores, + beam_size=4, + end_id=1) self.assertRaises(TypeError, test_preids_dtype) def test_prescores_dtype(): # the dtype of input pre_scores must be float32 - prescores_type_data = fluid.data( - name='prescores_type_data', - shape=[1], - lod_level=2, - dtype='int64') - fluid.layers.beam_search( - pre_ids=pre_ids, - pre_scores=prescores_type_data, - ids=topk_indices, - scores=accu_scores, - beam_size=4, - end_id=1) + prescores_type_data = fluid.data(name='prescores_type_data', + shape=[1], + lod_level=2, + dtype='int64') + fluid.layers.beam_search(pre_ids=pre_ids, + pre_scores=prescores_type_data, + ids=topk_indices, + scores=accu_scores, + beam_size=4, + end_id=1) self.assertRaises(TypeError, test_prescores_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index fc4ee13384b..4982ed451cd 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -30,6 +30,7 @@ def output_hist(out): class TestBernoulliOp(OpTest): + def setUp(self): self.op_type = "bernoulli" self.inputs = {"X": np.random.uniform(size=(1000, 784))} @@ -41,21 +42,20 @@ class TestBernoulliOp(OpTest): def verify_output(self, outs): hist, prob = output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestBernoulliApi(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() x = paddle.rand([1024, 1024]) out = paddle.bernoulli(x) paddle.enable_static() hist, prob = output_hist(out.numpy()) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) def test_static(self): x = paddle.rand([1024, 1024]) @@ -64,12 +64,12 @@ class TestBernoulliApi(unittest.TestCase): out = exe.run(paddle.static.default_main_program(), fetch_list=[out.name]) hist, prob = output_hist(out[0]) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_bfgs.py b/python/paddle/fluid/tests/unittests/test_bfgs.py index 8a9f9f72aa0..08ec4a23806 100644 --- a/python/paddle/fluid/tests/unittests/test_bfgs.py +++ b/python/paddle/fluid/tests/unittests/test_bfgs.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -45,10 +45,13 @@ def test_static_graph_H0(func, x0, H0, dtype='float32'): startup = paddle.static.Program() with paddle.static.program_guard(main, startup): X = paddle.static.data(name='x', shape=[x0.shape[0]], dtype=dtype) - H = paddle.static.data( - name='h', shape=[H0.shape[0], H0.shape[1]], dtype=dtype) - Y = minimize_bfgs( - func, X, initial_inverse_hessian_estimate=H, dtype=dtype) + H = paddle.static.data(name='h', + shape=[H0.shape[0], H0.shape[1]], + dtype=dtype) + Y = minimize_bfgs(func, + X, + initial_inverse_hessian_estimate=H, + dtype=dtype) exe = paddle.static.Executor() exe.run(startup) @@ -64,15 +67,15 @@ def test_dynamic_graph(func, x0 = paddle.to_tensor(x0) if H0 is not None: H0 = paddle.to_tensor(H0) - return minimize_bfgs( - func, - x0, - initial_inverse_hessian_estimate=H0, - line_search_fn=line_search_fn, - dtype=dtype) + return minimize_bfgs(func, + x0, + initial_inverse_hessian_estimate=H0, + line_search_fn=line_search_fn, + dtype=dtype) class TestBfgs(unittest.TestCase): + def test_quadratic_nd(self): for dimension in [1, 10]: minimum = np.random.random(size=[dimension]).astype('float32') @@ -106,10 +109,11 @@ class TestBfgs(unittest.TestCase): self.assertFalse(results[0][0]) def test_multi_minima(self): + def func(x): # df = 12(x + 1.1)(x - 0.2)(x - 0.8) # f = 3*x^4+0.4*x^3-5.46*x^2+2.112*x - # minimum = -1.1 or 0.8. + # minimum = -1.1 or 0.8. # All these minima may be reached from appropriate starting points. return 3 * x**4 + 0.4 * x**3 - 5.64 * x**2 + 2.112 * x @@ -143,6 +147,7 @@ class TestBfgs(unittest.TestCase): self.func_rosenbrock() def test_exception(self): + def func(x): return paddle.dot(x, x) @@ -159,12 +164,11 @@ class TestBfgs(unittest.TestCase): self.assertRaises(ValueError, test_dynamic_graph, func, x0, H0=H1) # test line_search_fn is bad - self.assertRaises( - NotImplementedError, - test_static_graph, - func, - x0, - line_search_fn='other') + self.assertRaises(NotImplementedError, + test_static_graph, + func, + x0, + line_search_fn='other') if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py b/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py index 8d7dd0d8118..281d6811c62 100644 --- a/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py @@ -110,17 +110,20 @@ def bicubic_interp_np(input, coefficients[ii] = cubic_interp1d( input[i, j, access_y, access_x_0], input[i, j, access_y, access_x_1], - input[i, j, access_y, access_x_2], - input[i, j, access_y, access_x_3], x_t) - out[i, j, k, l] = cubic_interp1d( - coefficients[0], coefficients[1], coefficients[2], - coefficients[3], y_t) + input[i, j, access_y, + access_x_2], input[i, j, access_y, + access_x_3], x_t) + out[i, j, k, + l] = cubic_interp1d(coefficients[0], coefficients[1], + coefficients[2], coefficients[3], + y_t) if data_layout == "NHWC": out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC return out.astype(input.dtype) class TestBicubicInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -171,8 +174,10 @@ class TestBicubicInterpOp(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'bicubic' @@ -185,6 +190,7 @@ class TestBicubicInterpOp(OpTest): class TestBicubicInterpCase1(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [4, 1, 7, 8] @@ -195,6 +201,7 @@ class TestBicubicInterpCase1(TestBicubicInterpOp): class TestBicubicInterpCase2(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [3, 3, 9, 6] @@ -205,6 +212,7 @@ class TestBicubicInterpCase2(TestBicubicInterpOp): class TestBicubicInterpCase3(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [1, 1, 32, 64] @@ -215,6 +223,7 @@ class TestBicubicInterpCase3(TestBicubicInterpOp): class TestBicubicInterpCase4(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [4, 1, 7, 8] @@ -226,6 +235,7 @@ class TestBicubicInterpCase4(TestBicubicInterpOp): class TestBicubicInterpCase5(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [3, 3, 9, 6] @@ -237,6 +247,7 @@ class TestBicubicInterpCase5(TestBicubicInterpOp): class TestBicubicInterpCase6(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [1, 1, 32, 64] @@ -248,6 +259,7 @@ class TestBicubicInterpCase6(TestBicubicInterpOp): class TestBicubicInterpSame(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [2, 3, 32, 64] @@ -258,6 +270,7 @@ class TestBicubicInterpSame(TestBicubicInterpOp): class TestBicubicInterpDataLayout(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [2, 5, 5, 3] @@ -270,6 +283,7 @@ class TestBicubicInterpDataLayout(TestBicubicInterpOp): class TestBicubicInterpOpAPI(unittest.TestCase): + def test_case(self): np.random.seed(200) x_data = np.random.random((2, 3, 6, 6)).astype("float32") @@ -280,34 +294,44 @@ class TestBicubicInterpOpAPI(unittest.TestCase): prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") - shape_tensor = fluid.data( - name="shape_tensor", shape=[2], dtype="int32") - actual_size = fluid.data( - name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") - - out1 = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False) - out2 = interpolate( - x, size=[12, dim], mode='bicubic', align_corners=False) - out3 = interpolate( - x, size=shape_tensor, mode='bicubic', align_corners=False) - out4 = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False) - out5 = interpolate( - x, - scale_factor=scale_tensor, - mode='bicubic', - align_corners=False) + shape_tensor = fluid.data(name="shape_tensor", + shape=[2], + dtype="int32") + actual_size = fluid.data(name="actual_size", + shape=[2], + dtype="int32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") + + out1 = interpolate(x, + size=[12, 12], + mode='bicubic', + align_corners=False) + out2 = interpolate(x, + size=[12, dim], + mode='bicubic', + align_corners=False) + out3 = interpolate(x, + size=shape_tensor, + mode='bicubic', + align_corners=False) + out4 = interpolate(x, + size=[12, 12], + mode='bicubic', + align_corners=False) + out5 = interpolate(x, + scale_factor=scale_tensor, + mode='bicubic', + align_corners=False) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -322,40 +346,51 @@ class TestBicubicInterpOpAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = bicubic_interp_np( - x_data, out_h=12, out_w=12, align_corners=False) + expect_res = bicubic_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=False) for res in results: self.assertTrue(np.allclose(res, expect_res)) with fluid.dygraph.guard(): x = fluid.dygraph.to_variable(x_data) - interp = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False) + interp = interpolate(x, + size=[12, 12], + mode='bicubic', + align_corners=False) dy_result = interp.numpy() - expect = bicubic_interp_np( - x_data, out_h=12, out_w=12, align_corners=False) + expect = bicubic_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=False) self.assertTrue(np.allclose(dy_result, expect)) class TestBicubicOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of interpoalte must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, interpolate, x1) def test_mode_type(): # mode must be "BILINEAR" "TRILINEAR" "NEAREST" "BICUBIC" x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, size=[12, 12], mode='UNKONWN', align_corners=False) + out = interpolate(x, + size=[12, 12], + mode='UNKONWN', + align_corners=False) def test_input_shape(): x = fluid.data(name="x", shape=[2], dtype="float32") - out = interpolate( - x, size=[12, 12], mode='BICUBIC', align_corners=False) + out = interpolate(x, + size=[12, 12], + mode='BICUBIC', + align_corners=False) def test_align_corcers(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") @@ -363,76 +398,77 @@ class TestBicubicOpError(unittest.TestCase): def test_out_shape(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, size=[12], mode='bicubic', align_corners=False) + out = interpolate(x, + size=[12], + mode='bicubic', + align_corners=False) def test_attr_data_format(): # for 5-D input, data_format only can be NCDHW or NDHWC - input = fluid.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32") - out = interpolate( - input, - size=[4, 8, 4, 5], - mode='trilinear', - data_format='NHWC') + input = fluid.data(name="input", + shape=[2, 3, 6, 9, 4], + dtype="float32") + out = interpolate(input, + size=[4, 8, 4, 5], + mode='trilinear', + data_format='NHWC') def test_actual_shape(): # the actual_shape must be Variable. - x = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - out = interpolate( - x, size=[12, 12], mode='BICUBIC', align_corners=False) + x = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + out = interpolate(x, + size=[12, 12], + mode='BICUBIC', + align_corners=False) def test_scale_value(): # the scale must be greater than zero. x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='BICUBIC', - align_corners=False, - scale_factor=-2.0) + out = interpolate(x, + size=None, + mode='BICUBIC', + align_corners=False, + scale_factor=-2.0) def test_attr_5D_input(): # for 5-D input, data_format only can be NCDHW or NDHWC - input = fluid.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32") - out = interpolate( - input, - size=[4, 8, 4, 5], - mode='trilinear', - data_format='NDHWC') + input = fluid.data(name="input", + shape=[2, 3, 6, 9, 4], + dtype="float32") + out = interpolate(input, + size=[4, 8, 4, 5], + mode='trilinear', + data_format='NDHWC') def test_scale_type(): # the scale must be greater than zero. x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - scale = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=scale) + scale = fluid.create_lod_tensor(np.array([-1, 3, 5, + 5]), [[1, 1, 1, 1]], + fluid.CPUPlace()) + out = interpolate(x, + size=None, + mode='bicubic', + align_corners=False, + scale_factor=scale) def test_align_mode(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='nearest', - align_corners=False, - align_mode=2, - scale_factor=1.0) + out = interpolate(x, + size=None, + mode='nearest', + align_corners=False, + align_mode=2, + scale_factor=1.0) def test_outshape_and_scale(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=None) + out = interpolate(x, + size=None, + mode='bicubic', + align_corners=False, + scale_factor=None) self.assertRaises(ValueError, test_mode_type) self.assertRaises(ValueError, test_input_shape) diff --git a/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py index d5c3aee2f43..30a175d69d0 100644 --- a/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py @@ -118,17 +118,20 @@ def bicubic_interp_np(input, coefficients[ii] = cubic_interp1d( input[i, j, access_y, access_x_0], input[i, j, access_y, access_x_1], - input[i, j, access_y, access_x_2], - input[i, j, access_y, access_x_3], x_t) - out[i, j, k, l] = cubic_interp1d( - coefficients[0], coefficients[1], coefficients[2], - coefficients[3], y_t) + input[i, j, access_y, + access_x_2], input[i, j, access_y, + access_x_3], x_t) + out[i, j, k, + l] = cubic_interp1d(coefficients[0], coefficients[1], + coefficients[2], coefficients[3], + y_t) if data_layout == "NHWC": out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC return out.astype(input.dtype) class TestBicubicInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -195,8 +198,10 @@ class TestBicubicInterpOp(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'bicubic' @@ -209,6 +214,7 @@ class TestBicubicInterpOp(OpTest): class TestBicubicInterpCase1(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [4, 1, 7, 8] @@ -219,6 +225,7 @@ class TestBicubicInterpCase1(TestBicubicInterpOp): class TestBicubicInterpCase2(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [3, 3, 9, 6] @@ -229,6 +236,7 @@ class TestBicubicInterpCase2(TestBicubicInterpOp): class TestBicubicInterpCase3(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [1, 1, 32, 64] @@ -239,6 +247,7 @@ class TestBicubicInterpCase3(TestBicubicInterpOp): class TestBicubicInterpCase4(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [4, 1, 7, 8] @@ -250,6 +259,7 @@ class TestBicubicInterpCase4(TestBicubicInterpOp): class TestBicubicInterpCase5(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [3, 3, 9, 6] @@ -261,6 +271,7 @@ class TestBicubicInterpCase5(TestBicubicInterpOp): class TestBicubicInterpCase6(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [1, 1, 32, 64] @@ -272,6 +283,7 @@ class TestBicubicInterpCase6(TestBicubicInterpOp): class TestBicubicInterpSame(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [2, 3, 32, 64] @@ -282,6 +294,7 @@ class TestBicubicInterpSame(TestBicubicInterpOp): class TestBicubicInterpScale(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [2, 3, 32, 64] @@ -292,6 +305,7 @@ class TestBicubicInterpScale(TestBicubicInterpOp): class TestBicubicInterpDataLayout(TestBicubicInterpOp): + def init_test_case(self): self.interp_method = 'bicubic' self.input_shape = [2, 5, 5, 3] @@ -304,6 +318,7 @@ class TestBicubicInterpDataLayout(TestBicubicInterpOp): class TestBicubicInterpOpAPI(unittest.TestCase): + def test_case(self): np.random.seed(200) x_data = np.random.random((2, 3, 6, 6)).astype("float32") @@ -314,38 +329,52 @@ class TestBicubicInterpOpAPI(unittest.TestCase): prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") - shape_tensor = fluid.data( - name="shape_tensor", shape=[2], dtype="int32") - actual_size = fluid.data( - name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") - - out1 = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False) - out2 = interpolate( - x, size=[12, dim], mode='bicubic', align_corners=False) - out3 = interpolate( - x, size=shape_tensor, mode='bicubic', align_corners=False) - out4 = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False) - out5 = interpolate( - x, - scale_factor=scale_tensor, - mode='bicubic', - align_corners=False) - out6 = interpolate( - x, scale_factor=2.0, mode='bicubic', align_corners=False) - out7 = interpolate( - x, scale_factor=[2.0, 2.0], mode='bicubic', align_corners=False) + shape_tensor = fluid.data(name="shape_tensor", + shape=[2], + dtype="int32") + actual_size = fluid.data(name="actual_size", + shape=[2], + dtype="int32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") + + out1 = interpolate(x, + size=[12, 12], + mode='bicubic', + align_corners=False) + out2 = interpolate(x, + size=[12, dim], + mode='bicubic', + align_corners=False) + out3 = interpolate(x, + size=shape_tensor, + mode='bicubic', + align_corners=False) + out4 = interpolate(x, + size=[12, 12], + mode='bicubic', + align_corners=False) + out5 = interpolate(x, + scale_factor=scale_tensor, + mode='bicubic', + align_corners=False) + out6 = interpolate(x, + scale_factor=2.0, + mode='bicubic', + align_corners=False) + out7 = interpolate(x, + scale_factor=[2.0, 2.0], + mode='bicubic', + align_corners=False) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -361,40 +390,51 @@ class TestBicubicInterpOpAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5, out6, out7], return_numpy=True) - expect_res = bicubic_interp_np( - x_data, out_h=12, out_w=12, align_corners=False) + expect_res = bicubic_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=False) for res in results: self.assertTrue(np.allclose(res, expect_res)) with fluid.dygraph.guard(): x = fluid.dygraph.to_variable(x_data) - interp = interpolate( - x, size=[12, 12], mode='bicubic', align_corners=False) + interp = interpolate(x, + size=[12, 12], + mode='bicubic', + align_corners=False) dy_result = interp.numpy() - expect = bicubic_interp_np( - x_data, out_h=12, out_w=12, align_corners=False) + expect = bicubic_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=False) self.assertTrue(np.allclose(dy_result, expect)) class TestBicubicOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of interpoalte must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, interpolate, x1) def test_mode_type(): # mode must be "BILINEAR" "TRILINEAR" "NEAREST" "BICUBIC" x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, size=[12, 12], mode='UNKONWN', align_corners=False) + out = interpolate(x, + size=[12, 12], + mode='UNKONWN', + align_corners=False) def test_input_shape(): x = fluid.data(name="x", shape=[2], dtype="float32") - out = interpolate( - x, size=[12, 12], mode='BICUBIC', align_corners=False) + out = interpolate(x, + size=[12, 12], + mode='BICUBIC', + align_corners=False) def test_align_corcers(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") @@ -402,132 +442,133 @@ class TestBicubicOpError(unittest.TestCase): def test_out_shape(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, size=[12], mode='bicubic', align_corners=False) + out = interpolate(x, + size=[12], + mode='bicubic', + align_corners=False) def test_attr_data_format(): # for 5-D input, data_format only can be NCDHW or NDHWC - input = fluid.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32") - out = interpolate( - input, - size=[4, 8, 4, 5], - mode='trilinear', - data_format='NHWC') + input = fluid.data(name="input", + shape=[2, 3, 6, 9, 4], + dtype="float32") + out = interpolate(input, + size=[4, 8, 4, 5], + mode='trilinear', + data_format='NHWC') def test_actual_shape(): # the actual_shape must be Variable. - x = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - out = interpolate( - x, size=[12, 12], mode='BICUBIC', align_corners=False) + x = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + out = interpolate(x, + size=[12, 12], + mode='BICUBIC', + align_corners=False) def test_scale_value(): # the scale must be greater than zero. x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='BICUBIC', - align_corners=False, - scale_factor=-2.0) + out = interpolate(x, + size=None, + mode='BICUBIC', + align_corners=False, + scale_factor=-2.0) def test_attr_5D_input(): # for 5-D input, data_format only can be NCDHW or NDHWC - input = fluid.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32") - out = interpolate( - input, - size=[4, 8, 4, 5], - mode='trilinear', - data_format='NDHWC') + input = fluid.data(name="input", + shape=[2, 3, 6, 9, 4], + dtype="float32") + out = interpolate(input, + size=[4, 8, 4, 5], + mode='trilinear', + data_format='NDHWC') def test_scale_type(): # the scale must be greater than zero. x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - scale = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=scale) + scale = fluid.create_lod_tensor(np.array([-1, 3, 5, + 5]), [[1, 1, 1, 1]], + fluid.CPUPlace()) + out = interpolate(x, + size=None, + mode='bicubic', + align_corners=False, + scale_factor=scale) def test_align_mode(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='nearest', - align_corners=False, - align_mode=2, - scale_factor=1.0) + out = interpolate(x, + size=None, + mode='nearest', + align_corners=False, + align_mode=2, + scale_factor=1.0) def test_outshape_and_scale(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=None) + out = interpolate(x, + size=None, + mode='bicubic', + align_corners=False, + scale_factor=None) def test_align_corners_and_nearest(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='nearest', - align_corners=True, - scale_factor=None) + out = interpolate(x, + size=None, + mode='nearest', + align_corners=True, + scale_factor=None) def test_scale_shape(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='nearest', - align_corners=False, - scale_factor=[1, 2, 2]) + out = interpolate(x, + size=None, + mode='nearest', + align_corners=False, + scale_factor=[1, 2, 2]) def test_scale_value_1(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=[1, 2, 2]) + out = interpolate(x, + size=None, + mode='bicubic', + align_corners=False, + scale_factor=[1, 2, 2]) def test_size_and_scale(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, - size=None, - mode='bicubic', - align_corners=False, - scale_factor=None) + out = interpolate(x, + size=None, + mode='bicubic', + align_corners=False, + scale_factor=None) def test_size_and_scale2(): - x = fluid.data( - name="input", shape=[2, 3, 6, 9, 4], dtype="float32") - out = interpolate( - x, - size=[2, 2, 2], - mode='trilinear', - align_corners=False, - scale_factor=2.0) + x = fluid.data(name="input", + shape=[2, 3, 6, 9, 4], + dtype="float32") + out = interpolate(x, + size=[2, 2, 2], + mode='trilinear', + align_corners=False, + scale_factor=2.0) def test_size_type(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") - out = interpolate( - x, size={2, 2}, mode='bicubic', align_corners=False) + out = interpolate(x, + size={2, 2}, + mode='bicubic', + align_corners=False) def test_input_shape_1(): x = fluid.data(name="x", shape=[2, 1, 0, 0], dtype="float32") - out = interpolate( - x, size=[3, 3], mode="bicubic", align_corners=False) + out = interpolate(x, + size=[3, 3], + mode="bicubic", + align_corners=False) self.assertRaises(ValueError, test_mode_type) self.assertRaises(ValueError, test_input_shape) diff --git a/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py b/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py index c0d622d7ea1..976e7df60b8 100644 --- a/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py @@ -20,6 +20,7 @@ import math class Gsz: + def __init__(self, h, w, gd, gh, gw, input_chans): self.h = h self.w = w @@ -98,7 +99,10 @@ def naive_bilateral_slice_forward(output, grid, guide, input, gsz, has_offset, wz = weight_z(zz + 0.5 - gz) c_ = coeff_stride * out_c + in_c - coeff_sample += grid[int(b), int(c_), int(z_), int(y_), + coeff_sample += grid[int(b), + int(c_), + int(z_), + int(y_), int(x_)] * wx * wy * wz if in_c < input_chans: @@ -136,6 +140,7 @@ def naive_bilateral_slice(x, guide, grid, has_offset): @unittest.skipIf(not paddle.fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestBilateralSliceOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'bilateral_slice' @@ -153,7 +158,9 @@ class TestBilateralSliceOp(OpTest): output_np = naive_bilateral_slice(x, guide, grid, self.has_offset) self.inputs = {'X': x, 'Grid': grid, 'Guide': guide} - self.attrs = {'has_offset': self.has_offset, } + self.attrs = { + 'has_offset': self.has_offset, + } self.outputs = {'Out': output_np} def test_check_output(self): @@ -173,19 +180,24 @@ class TestBilateralSliceOp(OpTest): @unittest.skipIf(not paddle.fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestBilateralSliceOp1(TestBilateralSliceOp): + def initTestCase(self): self.has_offset = True self.data_type = 'float32' class TestBilateralSliceApi(unittest.TestCase): + def test_api(self): - x = paddle.fluid.data( - name='x', shape=[None, 3, 25, 15], dtype='float32') - guide = paddle.fluid.data( - name='guide', shape=[None, 25, 15], dtype='float32') - grid = paddle.fluid.data( - name='grid', shape=[None, None, 8, 5, 3], dtype='float32') + x = paddle.fluid.data(name='x', + shape=[None, 3, 25, 15], + dtype='float32') + guide = paddle.fluid.data(name='guide', + shape=[None, 25, 15], + dtype='float32') + grid = paddle.fluid.data(name='grid', + shape=[None, None, 8, 5, 3], + dtype='float32') paddle.fluid.contrib.layers.bilateral_slice(x, guide, grid, False) if not paddle.fluid.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_api.py b/python/paddle/fluid/tests/unittests/test_bilinear_api.py index 24eae4797de..01a5eb70522 100644 --- a/python/paddle/fluid/tests/unittests/test_bilinear_api.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_api.py @@ -24,6 +24,7 @@ import numpy as np class TestBilinearAPI(unittest.TestCase): + def test_api(self): with fluid.program_guard(fluid.default_startup_program(), fluid.default_main_program()): @@ -39,24 +40,29 @@ class TestBilinearAPI(unittest.TestCase): layer1 = np.random.random((5, 5)).astype('float32') layer2 = np.random.random((5, 4)).astype('float32') - bilinear = paddle.nn.Bilinear( - in1_features=5, in2_features=4, out_features=1000) + bilinear = paddle.nn.Bilinear(in1_features=5, + in2_features=4, + out_features=1000) ret = bilinear(data1, data2) exe.run(fluid.default_startup_program()) - ret_fetch = exe.run(feed={'X1': layer1, - 'X2': layer2}, + ret_fetch = exe.run(feed={ + 'X1': layer1, + 'X2': layer2 + }, fetch_list=[ret.name]) self.assertEqual(ret_fetch[0].shape, (5, 1000)) class TestBilinearAPIDygraph(unittest.TestCase): + def test_api(self): paddle.disable_static() layer1 = np.random.random((5, 5)).astype('float32') layer2 = np.random.random((5, 4)).astype('float32') - bilinear = paddle.nn.Bilinear( - in1_features=5, in2_features=4, out_features=1000) + bilinear = paddle.nn.Bilinear(in1_features=5, + in2_features=4, + out_features=1000) ret = bilinear(paddle.to_tensor(layer1), paddle.to_tensor(layer2)) self.assertEqual(ret.shape, [5, 1000]) diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py index 1817ef160c7..fa80b8ac0f8 100755 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py @@ -20,6 +20,7 @@ from op_test import OpTest import paddle.fluid.core as core import paddle.fluid as fluid import paddle + paddle.enable_static() @@ -96,6 +97,7 @@ def bilinear_interp_np(input, class TestBilinearInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -147,8 +149,10 @@ class TestBilinearInterpOp(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'bilinear' @@ -162,6 +166,7 @@ class TestBilinearInterpOp(OpTest): class TestBilinearInterpCase1(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -173,6 +178,7 @@ class TestBilinearInterpCase1(TestBilinearInterpOp): class TestBilinearInterpCase2(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -184,6 +190,7 @@ class TestBilinearInterpCase2(TestBilinearInterpOp): class TestBilinearInterpCase3(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -195,6 +202,7 @@ class TestBilinearInterpCase3(TestBilinearInterpOp): class TestBilinearInterpCase4(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -207,6 +215,7 @@ class TestBilinearInterpCase4(TestBilinearInterpOp): class TestBilinearInterpCase5(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -219,6 +228,7 @@ class TestBilinearInterpCase5(TestBilinearInterpOp): class TestBilinearInterpCase6(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -231,6 +241,7 @@ class TestBilinearInterpCase6(TestBilinearInterpOp): class TestBilinearInterpSame(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 32, 64] @@ -242,6 +253,7 @@ class TestBilinearInterpSame(TestBilinearInterpOp): class TestBilinearInterpActualShape(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -254,6 +266,7 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp): class TestBilinearInterpDataLayout(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 5, 5, 3] @@ -267,14 +280,15 @@ class TestBilinearInterpDataLayout(TestBilinearInterpOp): class TestBilinearInterpOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None self.init_test_case() self.op_type = "bilinear_interp" self.check_eager = True - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale > 0: out_h = int(self.input_shape[2] * self.scale) @@ -302,8 +316,9 @@ class TestBilinearInterpOpUint8(OpTest): self.outputs = {'Out': output_np} def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_eager=self.check_eager) + self.check_output_with_place(place=core.CPUPlace(), + atol=1, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'bilinear' @@ -316,6 +331,7 @@ class TestBilinearInterpOpUint8(OpTest): class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 32, 64] @@ -327,6 +343,7 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -339,24 +356,28 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 1 class TestBilinearInterpWithMethod2(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 0 class TestBilinearInterpWithMethod3(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = True self.align_mode = 0 class TestBilinearInterpScale1(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -368,6 +389,7 @@ class TestBilinearInterpScale1(TestBilinearInterpOp): class TestBilinearInterpScale2(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -379,6 +401,7 @@ class TestBilinearInterpScale2(TestBilinearInterpOp): class TestBilinearInterpScale3(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -390,6 +413,7 @@ class TestBilinearInterpScale3(TestBilinearInterpOp): class TestBilinearInterpZero(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -401,6 +425,7 @@ class TestBilinearInterpZero(TestBilinearInterpOp): class TestBilinearInterpOp_attr_tensor(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -448,8 +473,10 @@ class TestBilinearInterpOp_attr_tensor(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'bilinear' @@ -463,6 +490,7 @@ class TestBilinearInterpOp_attr_tensor(OpTest): # out_size is a 1-D tensor class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -475,6 +503,7 @@ class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -488,6 +517,7 @@ class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -500,20 +530,23 @@ class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor): class TestBilinearInterpOpAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") out1 = fluid.layers.resize_bilinear(x, out_shape=[12, 12]) out2 = fluid.layers.resize_bilinear(x, out_shape=[12, dim]) out3 = fluid.layers.resize_bilinear(x, out_shape=shape_tensor) - out4 = fluid.layers.resize_bilinear( - x, out_shape=[4, 4], actual_shape=actual_size) + out4 = fluid.layers.resize_bilinear(x, + out_shape=[4, 4], + actual_shape=actual_size) out5 = fluid.layers.resize_bilinear(x, scale=scale_tensor) x_data = np.random.random((2, 3, 6, 6)).astype("float32") @@ -539,8 +572,10 @@ class TestBilinearInterpOpAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = bilinear_interp_np( - x_data, out_h=12, out_w=12, align_corners=True) + expect_res = bilinear_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=True) for res in results: self.assertTrue(np.allclose(res, expect_res)) diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py index 2ff32b2f95b..788bd0fc411 100755 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py @@ -104,6 +104,7 @@ def bilinear_interp_np(input, class TestBilinearInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -135,9 +136,10 @@ class TestBilinearInterpOp(OpTest): out_h = self.out_h out_w = self.out_w - output_np = bilinear_interp_np( - input_np, out_h, out_w, 0, 0, self.out_size, self.actual_shape, - self.align_corners, self.align_mode, self.data_layout) + output_np = bilinear_interp_np(input_np, out_h, out_w, 0, 0, + self.out_size, self.actual_shape, + self.align_corners, self.align_mode, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -179,6 +181,7 @@ class TestBilinearInterpOp(OpTest): class TestBilinearInterpCase1(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -190,6 +193,7 @@ class TestBilinearInterpCase1(TestBilinearInterpOp): class TestBilinearInterpCase2(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -201,6 +205,7 @@ class TestBilinearInterpCase2(TestBilinearInterpOp): class TestBilinearInterpCase3(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -212,6 +217,7 @@ class TestBilinearInterpCase3(TestBilinearInterpOp): class TestBilinearInterpCase4(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -224,6 +230,7 @@ class TestBilinearInterpCase4(TestBilinearInterpOp): class TestBilinearInterpCase5(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -236,6 +243,7 @@ class TestBilinearInterpCase5(TestBilinearInterpOp): class TestBilinearInterpCase6(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -248,6 +256,7 @@ class TestBilinearInterpCase6(TestBilinearInterpOp): class TestBilinearInterpCase7(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -259,6 +268,7 @@ class TestBilinearInterpCase7(TestBilinearInterpOp): class TestBilinearInterpSame(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 32, 64] @@ -270,6 +280,7 @@ class TestBilinearInterpSame(TestBilinearInterpOp): class TestBilinearInterpActualShape(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -282,6 +293,7 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp): class TestBilinearInterpDataLayout(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 5, 5, 3] @@ -295,13 +307,14 @@ class TestBilinearInterpDataLayout(TestBilinearInterpOp): class TestBilinearInterpOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None self.init_test_case() self.op_type = "bilinear_interp_v2" - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale: if isinstance(self.scale, float) or isinstance(self.scale, int): @@ -355,6 +368,7 @@ class TestBilinearInterpOpUint8(OpTest): class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 32, 64] @@ -366,6 +380,7 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -378,24 +393,28 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 1 class TestBilinearInterpWithMethod2(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 0 class TestBilinearInterpWithMethod3(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = True self.align_mode = 0 class TestBilinearInterpScale1(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -407,6 +426,7 @@ class TestBilinearInterpScale1(TestBilinearInterpOp): class TestBilinearInterpScale2(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -418,6 +438,7 @@ class TestBilinearInterpScale2(TestBilinearInterpOp): class TestBilinearInterpScale3(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -429,6 +450,7 @@ class TestBilinearInterpScale3(TestBilinearInterpOp): class TestBilinearInterpScale4(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -440,6 +462,7 @@ class TestBilinearInterpScale4(TestBilinearInterpOp): class TestBilinearInterpZero(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -451,6 +474,7 @@ class TestBilinearInterpZero(TestBilinearInterpOp): class TestBilinearInterpOp_attr_tensor(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -524,6 +548,7 @@ class TestBilinearInterpOp_attr_tensor(OpTest): # out_size is a 1-D tensor class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -536,6 +561,7 @@ class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -549,6 +575,7 @@ class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -561,20 +588,23 @@ class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor): class TestBilinearInterpOpAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") out1 = fluid.layers.resize_bilinear(x, out_shape=[12, 12]) out2 = fluid.layers.resize_bilinear(x, out_shape=[12, dim]) out3 = fluid.layers.resize_bilinear(x, out_shape=shape_tensor) - out4 = fluid.layers.resize_bilinear( - x, out_shape=[4, 4], actual_shape=actual_size) + out4 = fluid.layers.resize_bilinear(x, + out_shape=[4, 4], + actual_shape=actual_size) out5 = fluid.layers.resize_bilinear(x, scale=scale_tensor) x_data = np.random.random((2, 3, 6, 6)).astype("float32") @@ -600,13 +630,16 @@ class TestBilinearInterpOpAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = bilinear_interp_np( - x_data, out_h=12, out_w=12, align_corners=True) + expect_res = bilinear_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=True) for res in results: self.assertTrue(np.allclose(res, expect_res)) class TestBilinearInterpOpAPI_dy(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_cuda(): @@ -616,14 +649,19 @@ class TestBilinearInterpOpAPI_dy(unittest.TestCase): with fluid.dygraph.guard(place): input_data = np.random.random((2, 3, 6, 6)).astype("float32") input_x = paddle.to_tensor(input_data) - expect_res = bilinear_interp_np( - input_data, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, size=[12, 12], mode="bilinear", align_corners=False) + expect_res = bilinear_interp_np(input_data, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + size=[12, 12], + mode="bilinear", + align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res)) class TestBilinearInterpOpAPI_dy2(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_cuda(): @@ -635,14 +673,19 @@ class TestBilinearInterpOpAPI_dy2(unittest.TestCase): size_np = np.array([12, 12]).astype("int64") input_x = paddle.to_tensor(input_data) size = paddle.to_tensor(size_np) - expect_res = bilinear_interp_np( - input_data, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, size=size, mode="bilinear", align_corners=False) + expect_res = bilinear_interp_np(input_data, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + size=size, + mode="bilinear", + align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res)) class TestBilinearInterpOpAPI_dy3(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_cuda(): @@ -654,17 +697,19 @@ class TestBilinearInterpOpAPI_dy3(unittest.TestCase): size_1 = np.array([12]).astype("int64") input_x = paddle.to_tensor(input_data) size = paddle.to_tensor(size_1) - expect_res = bilinear_interp_np( - input_data, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, - size=[size, size], - mode="bilinear", - align_corners=False) + expect_res = bilinear_interp_np(input_data, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + size=[size, size], + mode="bilinear", + align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res)) class TestBilinearInterpOpAPI_dy4(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_cuda(): @@ -676,13 +721,14 @@ class TestBilinearInterpOpAPI_dy4(unittest.TestCase): scale_np = np.array([2, 2]).astype("int64") input_x = paddle.to_tensor(input_data) scale = paddle.to_tensor(scale_np) - expect_res = bilinear_interp_np( - input_data, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, - scale_factor=scale, - mode="bilinear", - align_corners=False) + expect_res = bilinear_interp_np(input_data, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + scale_factor=scale, + mode="bilinear", + align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res)) diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py index 60e9d0a26b3..53019249270 100644 --- a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py @@ -21,13 +21,15 @@ from op_test import OpTest class TestDygraphBilinearTensorProductAPIError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - layer = fluid.dygraph.nn.BilinearTensorProduct( - input1_dim=5, input2_dim=4, output_dim=1000) + layer = fluid.dygraph.nn.BilinearTensorProduct(input1_dim=5, + input2_dim=4, + output_dim=1000) # the input must be Variable. - x0 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x0 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, layer, x0) # the input dtype must be float32 or float64 x1 = fluid.data(name='x1', shape=[-1, 5], dtype="float16") @@ -36,6 +38,7 @@ class TestDygraphBilinearTensorProductAPIError(unittest.TestCase): class TestBilinearTensorProductOp(OpTest): + def setUp(self): self.op_type = "bilinear_tensor_product" batch_size = 6 diff --git a/python/paddle/fluid/tests/unittests/test_bincount_op.py b/python/paddle/fluid/tests/unittests/test_bincount_op.py index 17b04b954af..2b99c921911 100644 --- a/python/paddle/fluid/tests/unittests/test_bincount_op.py +++ b/python/paddle/fluid/tests/unittests/test_bincount_op.py @@ -43,14 +43,15 @@ class TestBincountOpAPI(unittest.TestCase): img = np.array([0, 1, 1, 3, 2, 1, 7]).astype(np.int64) w = np.array([0, 1, 1, 2, 2, 1, 0]).astype(np.int64) res = exe.run(train_program, - feed={'input': img, - 'weights': w}, + feed={ + 'input': img, + 'weights': w + }, fetch_list=[output]) actual = np.array(res[0]) expected = np.bincount(img, weights=w) - self.assertTrue( - (actual == expected).all(), - msg='bincount output is wrong, out =' + str(actual)) + self.assertTrue((actual == expected).all(), + msg='bincount output is wrong, out =' + str(actual)) def test_dygraph(self): with fluid.dygraph.guard(): @@ -153,12 +154,12 @@ class TestCase1(TestBincountOp): def init_test_case(self): self.minlength = 0 - self.np_weights = np.random.randint( - low=0, high=20, size=10).astype(np.float32) + self.np_weights = np.random.randint(low=0, high=20, + size=10).astype(np.float32) self.np_input = np.random.randint(low=0, high=20, size=10) - self.Out = np.bincount( - self.np_input, weights=self.np_weights, - minlength=self.minlength).astype(np.float32) + self.Out = np.bincount(self.np_input, + weights=self.np_weights, + minlength=self.minlength).astype(np.float32) class TestCase2(TestBincountOp): @@ -175,8 +176,9 @@ class TestCase2(TestBincountOp): self.minlength = 0 self.np_weights = np.random.randint(low=0, high=20, size=10) self.np_input = np.random.randint(low=0, high=20, size=10) - self.Out = np.bincount( - self.np_input, weights=self.np_weights, minlength=self.minlength) + self.Out = np.bincount(self.np_input, + weights=self.np_weights, + minlength=self.minlength) class TestCase3(TestBincountOp): @@ -191,8 +193,8 @@ class TestCase4(TestBincountOp): # with input(INT32) def init_test_case(self): self.minlength = 0 - self.np_input = np.random.randint( - low=0, high=20, size=10).astype(np.int32) + self.np_input = np.random.randint(low=0, high=20, + size=10).astype(np.int32) self.Out = np.bincount(self.np_input, minlength=self.minlength) diff --git a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py index cc2b1165ec3..b99892c65e1 100644 --- a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py +++ b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py @@ -84,6 +84,7 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None): class TestBipartiteMatchOpWithLoD(OpTest): + def setUp(self): self.op_type = 'bipartite_match' lod = [[5, 6, 12]] @@ -101,6 +102,7 @@ class TestBipartiteMatchOpWithLoD(OpTest): class TestBipartiteMatchOpWithoutLoD(OpTest): + def setUp(self): self.op_type = 'bipartite_match' lod = [[8]] @@ -118,6 +120,7 @@ class TestBipartiteMatchOpWithoutLoD(OpTest): class TestBipartiteMatchOpWithoutLoDLargeScaleInput(OpTest): + def setUp(self): self.op_type = 'bipartite_match' lod = [[300]] @@ -135,12 +138,13 @@ class TestBipartiteMatchOpWithoutLoDLargeScaleInput(OpTest): class TestBipartiteMatchOpWithPerPredictionType(OpTest): + def setUp(self): self.op_type = 'bipartite_match' lod = [[5, 6, 12]] dist = np.random.random((23, 237)).astype('float32') - match_indices, match_dist = batch_bipartite_match(dist, lod[0], - 'per_prediction', 0.5) + match_indices, match_dist = batch_bipartite_match( + dist, lod[0], 'per_prediction', 0.5) self.inputs = {'DistMat': (dist, lod)} self.outputs = { @@ -157,6 +161,7 @@ class TestBipartiteMatchOpWithPerPredictionType(OpTest): class TestBipartiteMatchOpWithEmptyLoD(OpTest): + def setUp(self): self.op_type = 'bipartite_match' lod = [[5, 6, 0, 12]] diff --git a/python/paddle/fluid/tests/unittests/test_bitwise_op.py b/python/paddle/fluid/tests/unittests/test_bitwise_op.py index ead78d75c3d..c387555ccda 100644 --- a/python/paddle/fluid/tests/unittests/test_bitwise_op.py +++ b/python/paddle/fluid/tests/unittests/test_bitwise_op.py @@ -22,16 +22,21 @@ paddle.enable_static() ################## TEST OP: BitwiseAnd ################## class TestBitwiseAnd(OpTest): + def setUp(self): self.op_type = "bitwise_and" self.init_dtype() self.init_shape() self.init_bound() - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.bitwise_and(x, y) self.inputs = {'X': x, 'Y': y} @@ -56,6 +61,7 @@ class TestBitwiseAnd(OpTest): class TestBitwiseAndUInt8(TestBitwiseAnd): + def init_dtype(self): self.dtype = np.uint8 @@ -65,6 +71,7 @@ class TestBitwiseAndUInt8(TestBitwiseAnd): class TestBitwiseAndInt8(TestBitwiseAnd): + def init_dtype(self): self.dtype = np.int8 @@ -74,6 +81,7 @@ class TestBitwiseAndInt8(TestBitwiseAnd): class TestBitwiseAndInt16(TestBitwiseAnd): + def init_dtype(self): self.dtype = np.int16 @@ -83,6 +91,7 @@ class TestBitwiseAndInt16(TestBitwiseAnd): class TestBitwiseAndInt64(TestBitwiseAnd): + def init_dtype(self): self.dtype = np.int64 @@ -92,6 +101,7 @@ class TestBitwiseAndInt64(TestBitwiseAnd): class TestBitwiseAndBool(TestBitwiseAnd): + def setUp(self): self.op_type = "bitwise_and" self.init_shape() @@ -106,16 +116,21 @@ class TestBitwiseAndBool(TestBitwiseAnd): ################## TEST OP: BitwiseOr ################## class TestBitwiseOr(OpTest): + def setUp(self): self.op_type = "bitwise_or" self.init_dtype() self.init_shape() self.init_bound() - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.bitwise_or(x, y) self.inputs = {'X': x, 'Y': y} @@ -140,6 +155,7 @@ class TestBitwiseOr(OpTest): class TestBitwiseOrUInt8(TestBitwiseOr): + def init_dtype(self): self.dtype = np.uint8 @@ -149,6 +165,7 @@ class TestBitwiseOrUInt8(TestBitwiseOr): class TestBitwiseOrInt8(TestBitwiseOr): + def init_dtype(self): self.dtype = np.int8 @@ -158,6 +175,7 @@ class TestBitwiseOrInt8(TestBitwiseOr): class TestBitwiseOrInt16(TestBitwiseOr): + def init_dtype(self): self.dtype = np.int16 @@ -167,6 +185,7 @@ class TestBitwiseOrInt16(TestBitwiseOr): class TestBitwiseOrInt64(TestBitwiseOr): + def init_dtype(self): self.dtype = np.int64 @@ -176,6 +195,7 @@ class TestBitwiseOrInt64(TestBitwiseOr): class TestBitwiseOrBool(TestBitwiseOr): + def setUp(self): self.op_type = "bitwise_or" self.init_shape() @@ -190,16 +210,21 @@ class TestBitwiseOrBool(TestBitwiseOr): ################## TEST OP: BitwiseXor ################## class TestBitwiseXor(OpTest): + def setUp(self): self.op_type = "bitwise_xor" self.init_dtype() self.init_shape() self.init_bound() - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.bitwise_xor(x, y) self.inputs = {'X': x, 'Y': y} @@ -224,6 +249,7 @@ class TestBitwiseXor(OpTest): class TestBitwiseXorUInt8(TestBitwiseXor): + def init_dtype(self): self.dtype = np.uint8 @@ -233,6 +259,7 @@ class TestBitwiseXorUInt8(TestBitwiseXor): class TestBitwiseXorInt8(TestBitwiseXor): + def init_dtype(self): self.dtype = np.int8 @@ -242,6 +269,7 @@ class TestBitwiseXorInt8(TestBitwiseXor): class TestBitwiseXorInt16(TestBitwiseXor): + def init_dtype(self): self.dtype = np.int16 @@ -251,6 +279,7 @@ class TestBitwiseXorInt16(TestBitwiseXor): class TestBitwiseXorInt64(TestBitwiseXor): + def init_dtype(self): self.dtype = np.int64 @@ -260,6 +289,7 @@ class TestBitwiseXorInt64(TestBitwiseXor): class TestBitwiseXorBool(TestBitwiseXor): + def setUp(self): self.op_type = "bitwise_xor" self.init_shape() @@ -274,14 +304,17 @@ class TestBitwiseXorBool(TestBitwiseXor): ################## TEST OP: BitwiseNot ################## class TestBitwiseNot(OpTest): + def setUp(self): self.op_type = "bitwise_not" self.init_dtype() self.init_shape() self.init_bound() - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) out = np.bitwise_not(x) self.inputs = {'X': x} @@ -305,6 +338,7 @@ class TestBitwiseNot(OpTest): class TestBitwiseNotUInt8(TestBitwiseNot): + def init_dtype(self): self.dtype = np.uint8 @@ -314,6 +348,7 @@ class TestBitwiseNotUInt8(TestBitwiseNot): class TestBitwiseNotInt8(TestBitwiseNot): + def init_dtype(self): self.dtype = np.int8 @@ -322,6 +357,7 @@ class TestBitwiseNotInt8(TestBitwiseNot): class TestBitwiseNotInt16(TestBitwiseNot): + def init_dtype(self): self.dtype = np.int16 @@ -331,6 +367,7 @@ class TestBitwiseNotInt16(TestBitwiseNot): class TestBitwiseNotInt64(TestBitwiseNot): + def init_dtype(self): self.dtype = np.int64 @@ -339,6 +376,7 @@ class TestBitwiseNotInt64(TestBitwiseNot): class TestBitwiseNotBool(TestBitwiseNot): + def setUp(self): self.op_type = "bitwise_not" self.init_shape() diff --git a/python/paddle/fluid/tests/unittests/test_bmm_op.py b/python/paddle/fluid/tests/unittests/test_bmm_op.py index a1c82668420..b9a5853c492 100644 --- a/python/paddle/fluid/tests/unittests/test_bmm_op.py +++ b/python/paddle/fluid/tests/unittests/test_bmm_op.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class TestBmmOp(OpTest): + def setUp(self): self.op_type = "bmm" X = np.random.random((10, 3, 4)).astype("float64") @@ -40,25 +41,31 @@ class TestBmmOp(OpTest): class API_TestBmm(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - data1 = fluid.layers.data( - 'data1', shape=[-1, 3, 4], dtype='float64') - data2 = fluid.layers.data( - 'data2', shape=[-1, 4, 5], dtype='float64') + data1 = fluid.layers.data('data1', + shape=[-1, 3, 4], + dtype='float64') + data2 = fluid.layers.data('data2', + shape=[-1, 4, 5], + dtype='float64') result_bmm = paddle.bmm(data1, data2) place = fluid.CPUPlace() exe = fluid.Executor(place) input1 = np.random.random([10, 3, 4]).astype('float64') input2 = np.random.random([10, 4, 5]).astype('float64') - result, = exe.run(feed={"data1": input1, - "data2": input2}, + result, = exe.run(feed={ + "data1": input1, + "data2": input2 + }, fetch_list=[result_bmm]) expected_result = np.matmul(input1, input2) self.assertTrue(np.allclose(expected_result, result)) class API_TestDygraphBmm(unittest.TestCase): + def test_out(self): input1 = np.array([[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]], [[3.0, 3.0, 3.0], [4.0, 4.0, 4.0]]]) @@ -74,6 +81,7 @@ class API_TestDygraphBmm(unittest.TestCase): class TestBmmAPIError(unittest.TestCase): + def test_api_error(self): x_data = np.arange(24, dtype='float32').reshape((2, 3, 4)) y_data = np.arange(16, dtype='float32').reshape((2, 4, 2)) diff --git a/python/paddle/fluid/tests/unittests/test_box_clip_op.py b/python/paddle/fluid/tests/unittests/test_box_clip_op.py index b2b0598f31d..1324f251ee9 100644 --- a/python/paddle/fluid/tests/unittests/test_box_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py @@ -25,14 +25,14 @@ import copy def box_clip(input_box, im_info, output_box): im_w = round(im_info[1] / im_info[2]) im_h = round(im_info[0] / im_info[2]) - output_box[:, :, 0] = np.maximum( - np.minimum(input_box[:, :, 0], im_w - 1), 0) - output_box[:, :, 1] = np.maximum( - np.minimum(input_box[:, :, 1], im_h - 1), 0) - output_box[:, :, 2] = np.maximum( - np.minimum(input_box[:, :, 2], im_w - 1), 0) - output_box[:, :, 3] = np.maximum( - np.minimum(input_box[:, :, 3], im_h - 1), 0) + output_box[:, :, 0] = np.maximum(np.minimum(input_box[:, :, 0], im_w - 1), + 0) + output_box[:, :, 1] = np.maximum(np.minimum(input_box[:, :, 1], im_h - 1), + 0) + output_box[:, :, 2] = np.maximum(np.minimum(input_box[:, :, 2], im_w - 1), + 0) + output_box[:, :, 3] = np.maximum(np.minimum(input_box[:, :, 3], im_h - 1), + 0) def batch_box_clip(input_boxes, im_info, lod): @@ -49,6 +49,7 @@ def batch_box_clip(input_boxes, im_info, lod): class TestBoxClipOp(OpTest): + def test_check_output(self): self.check_output() diff --git a/python/paddle/fluid/tests/unittests/test_box_coder_op.py b/python/paddle/fluid/tests/unittests/test_box_coder_op.py index 220bffebe83..63df37f9122 100644 --- a/python/paddle/fluid/tests/unittests/test_box_coder_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_coder_op.py @@ -34,8 +34,9 @@ def box_decoder(t_box, p_box, pb_v, output_box, norm, axis=0): pb_y = pb_y.reshape(shape) if pb_v.ndim == 2: - var_shape = (1, pb_v.shape[0], pb_v.shape[1]) if axis == 0 else ( - pb_v.shape[0], 1, pb_v.shape[1]) + var_shape = (1, pb_v.shape[0], + pb_v.shape[1]) if axis == 0 else (pb_v.shape[0], 1, + pb_v.shape[1]) pb_v = pb_v.reshape(var_shape) if pb_v.ndim == 1: tb_x = pb_v[0] * t_box[:, :, 0] * pb_w + pb_x @@ -102,6 +103,7 @@ def batch_box_coder(p_box, pb_v, t_box, lod, code_type, norm, axis=0): class TestBoxCoderOp(OpTest): + def test_check_output(self): self.check_output() @@ -128,6 +130,7 @@ class TestBoxCoderOp(OpTest): class TestBoxCoderOpWithoutBoxVar(OpTest): + def test_check_output(self): self.check_output() @@ -154,6 +157,7 @@ class TestBoxCoderOpWithoutBoxVar(OpTest): class TestBoxCoderOpWithLoD(OpTest): + def test_check_output(self): self.check_output() @@ -178,6 +182,7 @@ class TestBoxCoderOpWithLoD(OpTest): class TestBoxCoderOpWithAxis(OpTest): + def test_check_output(self): self.check_output() @@ -207,6 +212,7 @@ class TestBoxCoderOpWithAxis(OpTest): class TestBoxCoderOpWithVariance(OpTest): + def test_check_output(self): self.check_output() diff --git a/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py b/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py index b0afc2a2e4a..00f84dc9496 100644 --- a/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py @@ -64,6 +64,7 @@ def box_decoder_and_assign(deltas, weights, boxes, box_score, box_clip): class TestBoxDecoderAndAssignOpWithLoD(OpTest): + def test_check_output(self): self.check_output() diff --git a/python/paddle/fluid/tests/unittests/test_boxps.py b/python/paddle/fluid/tests/unittests/test_boxps.py index d1340bb1ce7..ea98b6daf28 100644 --- a/python/paddle/fluid/tests/unittests/test_boxps.py +++ b/python/paddle/fluid/tests/unittests/test_boxps.py @@ -38,42 +38,38 @@ class TestTranspile(unittest.TestCase): main_program = fluid.Program() startup_program = fluid.Program() t = self.get_transpile("single_process_multi_thread") - t.transpile( - trainer_id=0, - startup_program=startup_program, - trainers="127.0.0.1:6174", - program=main_program) + t.transpile(trainer_id=0, + startup_program=startup_program, + trainers="127.0.0.1:6174", + program=main_program) t = self.get_transpile("grad_allreduce") try: - t.transpile( - trainer_id=0, - startup_program=startup_program, - trainers="127.0.0.1:6174", - program=main_program) + t.transpile(trainer_id=0, + startup_program=startup_program, + trainers="127.0.0.1:6174", + program=main_program) except ValueError as e: print(e) def test_single_trainers(self): transpiler = collective.GradAllReduce(0) try: - transpiler.transpile( - startup_program=fluid.Program(), - main_program=fluid.Program(), - rank=1, - endpoints="127.0.0.1:6174", - current_endpoint="127.0.0.1:6174", - wait_port="6174") + transpiler.transpile(startup_program=fluid.Program(), + main_program=fluid.Program(), + rank=1, + endpoints="127.0.0.1:6174", + current_endpoint="127.0.0.1:6174", + wait_port="6174") except ValueError as e: print(e) transpiler = collective.LocalSGD(0) try: - transpiler.transpile( - startup_program=fluid.Program(), - main_program=fluid.Program(), - rank=1, - endpoints="127.0.0.1:6174", - current_endpoint="127.0.0.1:6174", - wait_port="6174") + transpiler.transpile(startup_program=fluid.Program(), + main_program=fluid.Program(), + rank=1, + endpoints="127.0.0.1:6174", + current_endpoint="127.0.0.1:6174", + wait_port="6174") except ValueError as e: print(e) @@ -95,10 +91,14 @@ class TestPullBoxSparseOP(unittest.TestCase): paddle.enable_static() program = fluid.Program() with fluid.program_guard(program): - x = fluid.layers.data( - name='x', shape=[1], dtype='int64', lod_level=0) - y = fluid.layers.data( - name='y', shape=[1], dtype='int64', lod_level=0) + x = fluid.layers.data(name='x', + shape=[1], + dtype='int64', + lod_level=0) + y = fluid.layers.data(name='y', + shape=[1], + dtype='int64', + lod_level=0) emb_x, emb_y = _pull_box_sparse([x, y], size=1) diff --git a/python/paddle/fluid/tests/unittests/test_broadcast.py b/python/paddle/fluid/tests/unittests/test_broadcast.py index 8b8cdb1235c..159e3be7ff1 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCBroadcastOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_error.py b/python/paddle/fluid/tests/unittests/test_broadcast_error.py index 517de67fd6d..bc1d0268158 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast_error.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast_error.py @@ -19,6 +19,7 @@ import paddle.fluid.core as core class TestBroadcastOpCpu(OpTest): + def setUp(self): self.op_type = "broadcast" input = np.random.random((100, 2)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_shape.py b/python/paddle/fluid/tests/unittests/test_broadcast_shape.py index b4ac096a696..8046a02c9ab 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast_shape.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast_shape.py @@ -18,6 +18,7 @@ import paddle class TestBroadcastShape(unittest.TestCase): + def test_result(self): shape = paddle.broadcast_shape([2, 1, 3], [1, 3, 1]) self.assertEqual(shape, [2, 3, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py index f60e4067a09..20e0ead8b3f 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py @@ -22,6 +22,7 @@ from op_test import OpTest from test_collective_base import TestDistBase import random + random.seed(2021) paddle.enable_static() @@ -82,6 +83,7 @@ def gen_mixed_tensors_test(dtype): class TestCPUBroadcastTensorsOp(OpTest): + def set_place(self): self.place = core.CPUPlace() @@ -105,22 +107,25 @@ class TestCPUBroadcastTensorsOp(OpTest): test_func(**args) def test_check_output(self): - self.run_test(self.check_output_with_place, - {"place": self.place, - "atol": 1e-1}) - - def test_check_grad_normal(self): - self.run_test(self.check_grad_with_place, { + self.run_test(self.check_output_with_place, { "place": self.place, - "inputs_to_check": ['x0', 'x1'], - "output_names": ['out0', 'out1'], - "max_relative_error": 0.05, + "atol": 1e-1 }) + def test_check_grad_normal(self): + self.run_test( + self.check_grad_with_place, { + "place": self.place, + "inputs_to_check": ['x0', 'x1'], + "output_names": ['out0', 'out1'], + "max_relative_error": 0.05, + }) + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDABroadcastTensorsOp(TestCPUBroadcastTensorsOp): + def set_place(self): self.place = core.CUDAPlace(0) @@ -131,13 +136,17 @@ class TestCUDABroadcastTensorsOp(TestCPUBroadcastTensorsOp): class TestBroadcastTensorsAPI(unittest.TestCase): + def test_api(self): + def test_static(): inputs = [ - paddle.fluid.layers.data( - shape=[4, 1, 4, 1], dtype='float32', name="x0"), - paddle.fluid.layers.data( - shape=[1, 4, 1, 4], dtype='float32', name="x1") + paddle.fluid.layers.data(shape=[4, 1, 4, 1], + dtype='float32', + name="x0"), + paddle.fluid.layers.data(shape=[1, 4, 1, 4], + dtype='float32', + name="x1") ] paddle.broadcast_tensors(inputs) @@ -159,31 +168,39 @@ class TestBroadcastTensorsAPI(unittest.TestCase): class TestRaiseBroadcastTensorsError(unittest.TestCase): + def test_errors(self): + def test_type(): inputs = [ - paddle.fluid.layers.data( - shape=[1, 1, 1, 1], dtype='float32', name="x4"), - paddle.fluid.layers.data( - shape=[1, 4, 1, 1], dtype='float64', name="x5") + paddle.fluid.layers.data(shape=[1, 1, 1, 1], + dtype='float32', + name="x4"), + paddle.fluid.layers.data(shape=[1, 4, 1, 1], + dtype='float64', + name="x5") ] paddle.broadcast_tensors(inputs) def test_dtype(): inputs = [ - paddle.fluid.layers.data( - shape=[1, 1, 1, 1], dtype='int8', name="x6"), - paddle.fluid.layers.data( - shape=[1, 4, 1, 1], dtype='int8', name="x7") + paddle.fluid.layers.data(shape=[1, 1, 1, 1], + dtype='int8', + name="x6"), + paddle.fluid.layers.data(shape=[1, 4, 1, 1], + dtype='int8', + name="x7") ] paddle.broadcast_tensors(inputs) def test_bcast_semantics(): inputs = [ - paddle.fluid.layers.data( - shape=[1, 3, 1, 1], dtype='float32', name="x9"), - paddle.fluid.layers.data( - shape=[1, 8, 1, 1], dtype='float32', name="x10") + paddle.fluid.layers.data(shape=[1, 3, 1, 1], + dtype='float32', + name="x9"), + paddle.fluid.layers.data(shape=[1, 8, 1, 1], + dtype='float32', + name="x10") ] paddle.broadcast_tensors(inputs) @@ -193,37 +210,33 @@ class TestRaiseBroadcastTensorsError(unittest.TestCase): class TestRaiseBroadcastTensorsErrorDyGraph(unittest.TestCase): + def test_errors(self): + def test_type(): inputs = [ paddle.to_tensor( - np.ones( - shape=[1, 1, 1, 1], dtype='float32', name="x4")), + np.ones(shape=[1, 1, 1, 1], dtype='float32', name="x4")), paddle.to_tensor( - np.ones( - shape=[1, 4, 1, 1], dtype='float64', name="x5")) + np.ones(shape=[1, 4, 1, 1], dtype='float64', name="x5")) ] paddle.broadcast_tensors(inputs) def test_dtype(): inputs = [ paddle.to_tensor( - np.ones( - shape=[1, 1, 1, 1], dtype='int8', name="x6")), + np.ones(shape=[1, 1, 1, 1], dtype='int8', name="x6")), paddle.to_tensor( - np.ones( - shape=[1, 4, 1, 1], dtype='int8', name="x7")) + np.ones(shape=[1, 4, 1, 1], dtype='int8', name="x7")) ] paddle.broadcast_tensors(inputs) def test_bcast_semantics(): inputs = [ paddle.to_tensor( - np.ones( - shape=[1, 3, 1, 1], dtype='float32', name="x9")), + np.ones(shape=[1, 3, 1, 1], dtype='float32', name="x9")), paddle.to_tensor( - np.ones( - shape=[1, 8, 1, 1], dtype='float32', name="x10")) + np.ones(shape=[1, 8, 1, 1], dtype='float32', name="x10")) ] paddle.broadcast_tensors(inputs) diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py index 80f4c7a2698..3de96959eb4 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py @@ -25,10 +25,11 @@ paddle.enable_static() class TestBroadcastToError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) shape = [2, 2] self.assertRaises(TypeError, paddle.tensor.broadcast_to, x1, shape) x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") @@ -40,17 +41,19 @@ class TestBroadcastToError(unittest.TestCase): # Test python API class TestBroadcastToAPI(unittest.TestCase): + def test_api(self): input = np.random.random([12, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") positive_2 = fluid.layers.fill_constant([1], "int32", 12) - expand_shape = fluid.layers.data( - name="expand_shape", - shape=[2], - append_batch_size=False, - dtype="int32") + expand_shape = fluid.layers.data(name="expand_shape", + shape=[2], + append_batch_size=False, + dtype="int32") out_1 = paddle.broadcast_to(x, shape=[12, 14]) out_2 = paddle.broadcast_to(x, shape=[positive_2, 14]) @@ -61,7 +64,8 @@ class TestBroadcastToAPI(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ - "x": input, + "x": + input, "expand_shape": np.array([12, 14]).astype("int32") }, diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py index eda7c3caaeb..ffc17318472 100644 --- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py +++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py @@ -24,13 +24,16 @@ import os batch_size = 32 feed_dict = { - 'image': np.random.random([batch_size, 784]).astype('float32'), - 'label': np.random.random_integers( - low=0, high=9, size=[batch_size, 1]).astype('int64') + 'image': + np.random.random([batch_size, 784]).astype('float32'), + 'label': + np.random.random_integers(low=0, high=9, size=[batch_size, + 1]).astype('int64') } class InplaceTestBase(unittest.TestCase): + def initParameter(self): self.use_cuda = True self.fuse_all_optimizer_ops = False @@ -60,8 +63,8 @@ class InplaceTestBase(unittest.TestCase): with fluid.scope_guard(scope): exe = fluid.Executor( - fluid.CUDAPlace(0) - if self.use_cuda else fluid.CPUPlace()) + fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace( + )) exe.run(startup_program) return main_program, scope, exe, loss @@ -101,7 +104,7 @@ class InplaceTestBase(unittest.TestCase): all_vars_name = self.get_all_vars(prog1) repeated_var_names = all_vars_name * 2 - random.shuffle(repeated_var_names) # add some random + random.shuffle(repeated_var_names) # add some random for fetch_var in repeated_var_names: for _ in range(4): @@ -117,8 +120,8 @@ class InplaceTestBase(unittest.TestCase): fetch_list=[fetch_var]) self.assertTrue( np.array_equal(fetch_val1, fetch_val2), - "error var name: {}, fetch_val1: {}, fetch_val2: {}". - format( + "error var name: {}, fetch_val1: {}, fetch_val2: {}" + .format( fetch_var, fetch_val1[~np.equal(fetch_val1, fetch_val2)], fetch_val2[~np.equal(fetch_val1, fetch_val2)])) @@ -145,14 +148,13 @@ class InplaceTestBase(unittest.TestCase): build_strategy.enable_inplace = enable_inplace build_strategy.fuse_all_optimizer_ops = self.fuse_all_optimizer_ops compiled_program = fluid.CompiledProgram( - prog).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - places=places) + prog).with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + places=places) compiled_programs.append(compiled_program) repeated_var_names = self.get_all_vars(prog1) * 2 - random.shuffle(repeated_var_names) # add some random + random.shuffle(repeated_var_names) # add some random for fetch_var in repeated_var_names: for _ in range(4): @@ -175,6 +177,7 @@ class InplaceTestBase(unittest.TestCase): class CUDAInplaceTest(InplaceTestBase): + def initParameter(self): self.use_cuda = True self.fuse_all_optimizer_ops = False @@ -187,6 +190,7 @@ class CUDAInplaceTest(InplaceTestBase): class CPUInplaceTest(InplaceTestBase): + def initParameter(self): self.use_cuda = False self.fuse_all_optimizer_ops = False diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py index e9e62bee006..6ce5e64b0ee 100644 --- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py +++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py @@ -17,6 +17,7 @@ import unittest class CUDAInplaceTestWithFuseOptimizationOps(InplaceTestBase): + def initParameter(self): self.use_cuda = True self.fuse_all_optimizer_ops = True @@ -30,6 +31,7 @@ class CUDAInplaceTestWithFuseOptimizationOps(InplaceTestBase): class CPUInplaceTestWithFuseOptimizationOps(InplaceTestBase): + def initParameter(self): self.use_cuda = False self.fuse_all_optimizer_ops = True diff --git a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py index 1405bf9d70b..a5daa691731 100644 --- a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py +++ b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py @@ -21,6 +21,7 @@ from test_eager_deletion_padding_rnn import RNNConfig, PaddingRNNTestBase class FusionGroupPaddingRNNTest(PaddingRNNTestBase): + def set_customed_config(self): self.build_strategy.enable_auto_fusion = True diff --git a/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py b/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py index a7f4a15381b..2082bc7ca4e 100644 --- a/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py +++ b/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py @@ -21,6 +21,7 @@ import paddle.fluid as fluid class TestCCommInitAllOp(unittest.TestCase): + def setUp(self): self.place = fluid.CUDAPlace(0) self.exe = fluid.Executor(self.place) @@ -41,9 +42,11 @@ class TestCCommInitAllOp(unittest.TestCase): def test_specifying_devices(self): program = fluid.Program() block = program.global_block() - block.append_op( - type='c_comm_init_all', attrs={'devices': [0], - 'ring_id': 1}) + block.append_op(type='c_comm_init_all', + attrs={ + 'devices': [0], + 'ring_id': 1 + }) self.exe.run(program) diff --git a/python/paddle/fluid/tests/unittests/test_c_concat.py b/python/paddle/fluid/tests/unittests/test_c_concat.py index 20f166af14c..17469367a85 100644 --- a/python/paddle/fluid/tests/unittests/test_c_concat.py +++ b/python/paddle/fluid/tests/unittests/test_c_concat.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestConcatOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_c_identity.py b/python/paddle/fluid/tests/unittests/test_c_identity.py index c780f800d1e..4697f7358c9 100644 --- a/python/paddle/fluid/tests/unittests/test_c_identity.py +++ b/python/paddle/fluid/tests/unittests/test_c_identity.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestIdentityOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_c_split.py b/python/paddle/fluid/tests/unittests/test_c_split.py index 0a5d91e0625..24ed6b57572 100644 --- a/python/paddle/fluid/tests/unittests/test_c_split.py +++ b/python/paddle/fluid/tests/unittests/test_c_split.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestSplitOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index 63ba16c57e0..53c578fc6c1 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -23,6 +23,7 @@ from paddle.fluid.backward import calc_gradient class TestCalcGradient(unittest.TestCase): + def test_calc_gradient(self): main = fluid.Program() startup = fluid.Program() @@ -40,6 +41,7 @@ class TestCalcGradient(unittest.TestCase): class TestDoubleGrad(unittest.TestCase): + def test1(self): main = fluid.Program() startup = fluid.Program() @@ -83,6 +85,7 @@ class TestDoubleGrad(unittest.TestCase): class TestGradientWithPrune(unittest.TestCase): + def test_prune(self): with paddle.fluid.scope_guard(paddle.static.Scope()): x = fluid.data(name='x', shape=[3], dtype='float32') @@ -101,6 +104,7 @@ class TestGradientWithPrune(unittest.TestCase): class TestDoubleGradient(unittest.TestCase): + def build_program(self): start_prog = paddle.static.Program() main_prog = paddle.static.Program() @@ -135,6 +139,7 @@ class TestDoubleGradient(unittest.TestCase): class TestDoubleGradient2(unittest.TestCase): + def build_program(self): start_prog = paddle.static.Program() main_prog = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/test_case.py b/python/paddle/fluid/tests/unittests/test_case.py index 6391435cc80..ed633c758b5 100644 --- a/python/paddle/fluid/tests/unittests/test_case.py +++ b/python/paddle/fluid/tests/unittests/test_case.py @@ -26,7 +26,9 @@ import paddle.fluid.optimizer as optimizer class TestAPICase(unittest.TestCase): + def test_return_single_var(self): + def fn_1(): return layers.fill_constant(shape=[4, 2], dtype='int32', value=1) @@ -46,16 +48,16 @@ class TestAPICase(unittest.TestCase): pred_1 = layers.less_than(z, x) # true: 0.2 < 0.3 # call fn_1 - out_0 = layers.case( - pred_fn_pairs=[(pred_1, fn_1), (pred_1, fn_2)], default=fn_3) + out_0 = layers.case(pred_fn_pairs=[(pred_1, fn_1), (pred_1, fn_2)], + default=fn_3) # call fn_2 - out_1 = layers.case( - pred_fn_pairs=[(pred_2, fn_1), (pred_1, fn_2)], default=fn_3) + out_1 = layers.case(pred_fn_pairs=[(pred_2, fn_1), (pred_1, fn_2)], + default=fn_3) # call default fn_3 - out_2 = layers.case( - pred_fn_pairs=((pred_2, fn_1), (pred_2, fn_2)), default=fn_3) + out_2 = layers.case(pred_fn_pairs=((pred_2, fn_1), (pred_2, fn_2)), + default=fn_3) # no default, call fn_2 out_3 = layers.case(pred_fn_pairs=[(pred_1, fn_2)]) @@ -63,8 +65,8 @@ class TestAPICase(unittest.TestCase): # no default, call fn_2. but pred_2 is false out_4 = layers.case(pred_fn_pairs=[(pred_2, fn_2)]) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, @@ -77,20 +79,27 @@ class TestAPICase(unittest.TestCase): self.assertTrue(np.allclose(res[4], 2)) def test_return_var_tuple(self): + def fn_1(): - return layers.fill_constant( - shape=[1, 2], dtype='int32', value=1), layers.fill_constant( - shape=[2, 3], dtype='float32', value=2) + return layers.fill_constant(shape=[1, 2], dtype='int32', + value=1), layers.fill_constant( + shape=[2, 3], + dtype='float32', + value=2) def fn_2(): - return layers.fill_constant( - shape=[3, 4], dtype='int32', value=3), layers.fill_constant( - shape=[4, 5], dtype='float32', value=4) + return layers.fill_constant(shape=[3, 4], dtype='int32', + value=3), layers.fill_constant( + shape=[4, 5], + dtype='float32', + value=4) def fn_3(): - return layers.fill_constant( - shape=[5], dtype='int32', value=5), layers.fill_constant( - shape=[5, 6], dtype='float32', value=6) + return layers.fill_constant(shape=[5], dtype='int32', + value=5), layers.fill_constant( + shape=[5, 6], + dtype='float32', + value=6) main_program = Program() startup_program = Program() @@ -104,46 +113,54 @@ class TestAPICase(unittest.TestCase): out = layers.case(((pred_1, fn_1), (pred_2, fn_2)), fn_3) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32))) self.assertTrue( - np.allclose( - np.asarray(ret[1]), np.full((2, 3), 2, np.float32))) + np.allclose(np.asarray(ret[1]), np.full((2, 3), 2, np.float32))) class TestAPICase_Nested(unittest.TestCase): + def test_nested_case(self): + def fn_1(x=1): var_5 = layers.fill_constant(shape=[1], dtype='int32', value=5) var_6 = layers.fill_constant(shape=[1], dtype='int32', value=6) - out = layers.case(pred_fn_pairs=[(var_5 < var_6, partial( - layers.fill_constant, shape=[1], dtype='int32', value=x)), - (var_5 == var_6, partial( - layers.fill_constant, - shape=[2], - dtype='int32', - value=x))]) + out = layers.case(pred_fn_pairs=[ + (var_5 < var_6, + partial( + layers.fill_constant, shape=[1], dtype='int32', value=x)), + (var_5 == var_6, + partial( + layers.fill_constant, shape=[2], dtype='int32', value=x)) + ]) return out def fn_2(x=2): var_5 = layers.fill_constant(shape=[1], dtype='int32', value=5) var_6 = layers.fill_constant(shape=[1], dtype='int32', value=6) - out = layers.case(pred_fn_pairs=[(var_5 < var_6, partial( - fn_1, x=x)), (var_5 == var_6, partial( - layers.fill_constant, shape=[2], dtype='int32', value=x))]) + out = layers.case(pred_fn_pairs=[ + (var_5 < var_6, partial(fn_1, x=x)), + (var_5 == var_6, + partial( + layers.fill_constant, shape=[2], dtype='int32', value=x)) + ]) return out def fn_3(): var_5 = layers.fill_constant(shape=[1], dtype='int32', value=5) var_6 = layers.fill_constant(shape=[1], dtype='int32', value=6) - out = layers.case(pred_fn_pairs=[(var_5 < var_6, partial( - fn_2, x=3)), (var_5 == var_6, partial( - layers.fill_constant, shape=[2], dtype='int32', value=7))]) + out = layers.case(pred_fn_pairs=[ + (var_5 < var_6, partial(fn_2, x=3)), + (var_5 == var_6, + partial( + layers.fill_constant, shape=[2], dtype='int32', value=7)) + ]) return out main_program = Program() @@ -155,17 +172,17 @@ class TestAPICase_Nested(unittest.TestCase): pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1 pred_1 = layers.less_than(z, x) # true: 0.2 < 0.3 - out_1 = layers.case( - pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3) + out_1 = layers.case(pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], + default=fn_3) - out_2 = layers.case( - pred_fn_pairs=[(pred_2, fn_1), (pred_1, fn_2)], default=fn_3) + out_2 = layers.case(pred_fn_pairs=[(pred_2, fn_1), (pred_1, fn_2)], + default=fn_3) - out_3 = layers.case( - pred_fn_pairs=[(x == y, fn_1), (x == z, fn_2)], default=fn_3) + out_3 = layers.case(pred_fn_pairs=[(x == y, fn_1), (x == z, fn_2)], + default=fn_3) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=[out_1, out_2, out_3]) @@ -176,7 +193,9 @@ class TestAPICase_Nested(unittest.TestCase): class TestAPICase_Error(unittest.TestCase): + def test_error(self): + def fn_1(): return layers.fill_constant(shape=[4, 2], dtype='int32', value=1) @@ -226,15 +245,18 @@ class TestAPICase_Error(unittest.TestCase): # when optimizer in case class TestMutiTask(unittest.TestCase): + def test_optimizer_in_case(self): BATCH_SIZE = 1 INPUT_SIZE = 784 EPOCH_NUM = 2 - x = fluid.data( - name='x', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32') - y = fluid.data( - name='y', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32') + x = fluid.data(name='x', + shape=[BATCH_SIZE, INPUT_SIZE], + dtype='float32') + y = fluid.data(name='y', + shape=[BATCH_SIZE, INPUT_SIZE], + dtype='float32') switch_id = fluid.data(name='switch_id', shape=[1], dtype='int32') diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py index a828eca4f4b..6e9c9bcd147 100644 --- a/python/paddle/fluid/tests/unittests/test_cast_op.py +++ b/python/paddle/fluid/tests/unittests/test_cast_op.py @@ -26,6 +26,7 @@ from paddle.fluid.framework import _test_eager_guard class TestCastOpFp32ToFp64(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float32')} @@ -44,6 +45,7 @@ class TestCastOpFp32ToFp64(OpTest): class TestCastOpFp16ToFp32(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float16')} @@ -60,6 +62,7 @@ class TestCastOpFp16ToFp32(OpTest): class TestCastOpFp32ToFp16(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float32')} @@ -76,6 +79,7 @@ class TestCastOpFp32ToFp16(OpTest): class TestCastOpBf16ToFp32(OpTest): + def setUp(self): ipt = np.array(np.random.randint(10, size=[10, 10])).astype('uint16') self.inputs = {'X': ipt} @@ -92,6 +96,7 @@ class TestCastOpBf16ToFp32(OpTest): class TestCastOpFp32ToBf16(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]).astype('float32') self.inputs = {'X': ipt} @@ -108,15 +113,17 @@ class TestCastOpFp32ToBf16(OpTest): class TestCastOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of cast_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.cast, x1, 'int32') class TestCastOpEager(unittest.TestCase): + def test_eager(self): with paddle.fluid.dygraph.base.guard(): with _test_eager_guard(): @@ -124,7 +131,8 @@ class TestCastOpEager(unittest.TestCase): x.stop_gradient = False out = paddle.cast(x, "float32") self.assertTrue( - np.array_equal(out, np.ones([2, 2]).astype("float32"))) + np.array_equal(out, + np.ones([2, 2]).astype("float32"))) out.backward() self.assertTrue(np.array_equal(x.gradient(), x.numpy())) self.assertTrue(x.gradient().dtype == np.float16) diff --git a/python/paddle/fluid/tests/unittests/test_center_loss.py b/python/paddle/fluid/tests/unittests/test_center_loss.py index 07175579fdd..fc64f37a26d 100644 --- a/python/paddle/fluid/tests/unittests/test_center_loss.py +++ b/python/paddle/fluid/tests/unittests/test_center_loss.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestCenterLossOp(OpTest): + def setUp(self): self.op_type = "center_loss" self.dtype = np.float64 @@ -88,18 +89,21 @@ class TestCenterLossOp(OpTest): class TestCenterLossOpNoUpdate(TestCenterLossOp): + def config(self): self.need_update = False class BadInputTestCenterLoss(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): def test_bad_x(): data = [[1, 2, 3, 4], [5, 6, 7, 8]] - label = fluid.layers.data( - name='label', shape=[2, 1], dtype='int32') + label = fluid.layers.data(name='label', + shape=[2, 1], + dtype='int32') res = fluid.layers.center_loss( data, label, @@ -111,8 +115,9 @@ class BadInputTestCenterLoss(unittest.TestCase): self.assertRaises(TypeError, test_bad_x) def test_bad_y(): - data = fluid.layers.data( - name='data', shape=[2, 32], dtype='float32') + data = fluid.layers.data(name='data', + shape=[2, 32], + dtype='float32') label = [[2], [3]] res = fluid.layers.center_loss( data, @@ -125,21 +130,18 @@ class BadInputTestCenterLoss(unittest.TestCase): self.assertRaises(TypeError, test_bad_y) def test_bad_alpha(): - data = fluid.layers.data( - name='data2', - shape=[2, 32], - dtype='float32', - append_batch_size=False) - label = fluid.layers.data( - name='label2', - shape=[2, 1], - dtype='int32', - append_batch_size=False) - alpha = fluid.layers.data( - name='alpha', - shape=[1], - dtype='int64', - append_batch_size=False) + data = fluid.layers.data(name='data2', + shape=[2, 32], + dtype='float32', + append_batch_size=False) + label = fluid.layers.data(name='label2', + shape=[2, 1], + dtype='int32', + append_batch_size=False) + alpha = fluid.layers.data(name='alpha', + shape=[1], + dtype='int64', + append_batch_size=False) res = fluid.layers.center_loss( data, label, diff --git a/python/paddle/fluid/tests/unittests/test_channel_shuffle.py b/python/paddle/fluid/tests/unittests/test_channel_shuffle.py index b4a3fc38706..eaccb2193dc 100644 --- a/python/paddle/fluid/tests/unittests/test_channel_shuffle.py +++ b/python/paddle/fluid/tests/unittests/test_channel_shuffle.py @@ -42,6 +42,7 @@ def channel_shuffle_np(x, groups, data_format="NCHW"): class TestChannelShuffleOp(OpTest): + def setUp(self): self.op_type = "channel_shuffle" self.init_data_format() @@ -72,11 +73,13 @@ class TestChannelShuffleOp(OpTest): class TestChannelLast(TestChannelShuffleOp): + def init_data_format(self): self.format = "NHWC" class TestChannelShuffleAPI(unittest.TestCase): + def setUp(self): self.x_1_np = np.random.random([2, 9, 4, 4]).astype("float64") self.x_2_np = np.random.random([2, 4, 4, 9]).astype("float64") @@ -89,10 +92,12 @@ class TestChannelShuffleAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=[2, 9, 4, 4], dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=[2, 4, 4, 9], dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=[2, 9, 4, 4], + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=[2, 4, 4, 9], + dtype="float64") out_1 = F.channel_shuffle(x_1, 3) out_2 = F.channel_shuffle(x_2, 3, "NHWC") @@ -117,10 +122,12 @@ class TestChannelShuffleAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=[2, 9, 4, 4], dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=[2, 4, 4, 9], dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=[2, 9, 4, 4], + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=[2, 4, 4, 9], + dtype="float64") # init instance ps_1 = paddle.nn.ChannelShuffle(3) ps_2 = paddle.nn.ChannelShuffle(3, "NHWC") @@ -162,14 +169,14 @@ class TestChannelShuffleAPI(unittest.TestCase): paddle.disable_static(place=place) - channel_shuffle = paddle.nn.ChannelShuffle( - groups, data_format=data_format) + channel_shuffle = paddle.nn.ChannelShuffle(groups, + data_format=data_format) result = channel_shuffle(paddle.to_tensor(x)) self.assertTrue(np.allclose(result.numpy(), npresult)) - result_functional = F.channel_shuffle( - paddle.to_tensor(x), 3, data_format) + result_functional = F.channel_shuffle(paddle.to_tensor(x), 3, + data_format) self.assertTrue(np.allclose(result_functional.numpy(), npresult)) channel_shuffle_str = 'groups={}'.format(groups) @@ -185,7 +192,9 @@ class TestChannelShuffleAPI(unittest.TestCase): class TestChannelShuffleError(unittest.TestCase): + def test_error_functional(self): + def error_input(): with paddle.fluid.dygraph.guard(): x = np.random.random([9, 4, 4]).astype("float64") @@ -210,12 +219,13 @@ class TestChannelShuffleError(unittest.TestCase): def error_data_format(): with paddle.fluid.dygraph.guard(): x = np.random.random([2, 9, 4, 4]).astype("float64") - channel_shuffle = F.channel_shuffle( - paddle.to_tensor(x), 3, "WOW") + channel_shuffle = F.channel_shuffle(paddle.to_tensor(x), 3, + "WOW") self.assertRaises(ValueError, error_data_format) def test_error_layer(self): + def error_input_layer(): with paddle.fluid.dygraph.guard(): x = np.random.random([9, 4, 4]).astype("float64") diff --git a/python/paddle/fluid/tests/unittests/test_check_import_scipy.py b/python/paddle/fluid/tests/unittests/test_check_import_scipy.py index 080d786cd62..c5e12e84c11 100644 --- a/python/paddle/fluid/tests/unittests/test_check_import_scipy.py +++ b/python/paddle/fluid/tests/unittests/test_check_import_scipy.py @@ -21,6 +21,7 @@ def my_import(name, globals=None, locals=None, fromlist=(), level=0): class importTest(unittest.TestCase): + def test_import(self): testOsName = 'nt' old_import = builtins.__import__ diff --git a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py index 4c1b1e0f0bf..1f90270630b 100644 --- a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py +++ b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py @@ -26,6 +26,7 @@ from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver class CheckpointerSaverTest(unittest.TestCase): + def test(self): fs = HDFSClient("/usr/local/hadoop-2.7.7", None) dir_path = "./checkpointsaver_test" diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py index 633aa2cd613..ac6525e7a47 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py @@ -26,7 +26,8 @@ from decorator_helper import prog_scope @skip_check_grad_ci( - reason="The input of cholesky_op should always be symmetric positive-definite. " + reason= + "The input of cholesky_op should always be symmetric positive-definite. " "However, OpTest calculates the numeric gradient of each element in input " "via small finite difference, which makes the input no longer symmetric " "positive-definite thus can not compute the Cholesky decomposition. " @@ -34,13 +35,15 @@ from decorator_helper import prog_scope "check of cholesky_op, since it supports check gradient with a program " "and we can construct symmetric positive-definite matrices in the program") class TestCholeskyOp(OpTest): + def setUp(self): self.op_type = "cholesky" self._input_shape = (2, 32, 32) self._upper = True self.init_config() self.trans_dims = list(range(len(self._input_shape) - 2)) + [ - len(self._input_shape) - 1, len(self._input_shape) - 2 + len(self._input_shape) - 1, + len(self._input_shape) - 2 ] self.root_data = np.random.random(self._input_shape).astype("float64") # construct symmetric positive-definite matrice @@ -69,8 +72,8 @@ class TestCholeskyOp(OpTest): root_data = self.root_data[..., :3, :3] prog = fluid.Program() with fluid.program_guard(prog): - root = layers.create_parameter( - dtype=root_data.dtype, shape=root_data.shape) + root = layers.create_parameter(dtype=root_data.dtype, + shape=root_data.shape) root_t = layers.transpose(root, self.trans_dims) x = layers.matmul(x=root, y=root_t) + 1e-05 out = paddle.cholesky(x, upper=self.attrs["upper"]) @@ -81,16 +84,19 @@ class TestCholeskyOp(OpTest): class TestCholeskyOpLower(TestCholeskyOp): + def init_config(self): self._upper = False class TestCholeskyOp2D(TestCholeskyOp): + def init_config(self): self._input_shape = (64, 64) class TestDygraph(unittest.TestCase): + def test_dygraph(self): if core.is_compiled_with_rocm(): paddle.disable_static(place=fluid.CPUPlace()) @@ -104,6 +110,7 @@ class TestDygraph(unittest.TestCase): class TestCholeskySingularAPI(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and (not core.is_compiled_with_rocm()): diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py index bb45a525662..d03cfed9697 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py @@ -20,6 +20,7 @@ import scipy import scipy.linalg import sys + sys.path.append("..") import paddle from op_test import OpTest @@ -40,18 +41,17 @@ def cholesky_solution(X, B, upper=True): L = A U = A.T return scipy.linalg.solve_triangular( - U, scipy.linalg.solve_triangular( - L, B, lower=True)) + U, scipy.linalg.solve_triangular(L, B, lower=True)) #cholesky_solve implement 2 def scipy_cholesky_solution(X, B, upper=True): if upper: umat = np.triu(X) - A = umat.T @umat + A = umat.T @ umat else: umat = np.tril(X) - A = umat @umat.T + A = umat @ umat.T K = scipy.linalg.cho_factor(A) return scipy.linalg.cho_solve(K, B) @@ -88,7 +88,7 @@ def scipy_cholesky_solution_batch(bumat, bB, upper=True): batch *= d bx = [] for b in range(batch): - # x = scipy_cholesky_solution(bumat[b], bB[b], upper) #large matrix result error + # x = scipy_cholesky_solution(bumat[b], bB[b], upper) #large matrix result error x = cholesky_solution(bumat[b], bB[b], upper) bx.append(x) return np.array(bx).reshape(bshape) @@ -111,8 +111,9 @@ class TestCholeskySolveOp(OpTest): #get scipy result def set_output(self): umat = self.inputs['Y'] - self.output = scipy_cholesky_solution_batch( - umat, self.inputs['X'], upper=self.upper) + self.output = scipy_cholesky_solution_batch(umat, + self.inputs['X'], + upper=self.upper) def setUp(self): self.op_type = "cholesky_solve" @@ -155,6 +156,7 @@ class TestCholeskySolveOp3(TestCholeskySolveOp): #API function test class TestCholeskySolveAPI(unittest.TestCase): + def setUp(self): np.random.seed(2021) self.place = [paddle.CPUPlace()] @@ -181,8 +183,10 @@ class TestCholeskySolveAPI(unittest.TestCase): exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"x": x_np, - "y": umat}, + feed={ + "x": x_np, + "y": umat + }, fetch_list=[z]) self.assertTrue(np.allclose(fetches[0], z_np)) @@ -193,6 +197,7 @@ class TestCholeskySolveAPI(unittest.TestCase): #test in dynamic mode def test_dygraph(self): + def run(place): paddle.disable_static(place) x_np = np.random.random([20, 2]).astype(self.dtype) @@ -212,6 +217,7 @@ class TestCholeskySolveAPI(unittest.TestCase): #test input with broadcast def test_broadcast(self): + def run(place): paddle.disable_static() x_np = np.random.random([1, 30, 2]).astype(self.dtype) @@ -232,17 +238,18 @@ class TestCholeskySolveAPI(unittest.TestCase): #test condition out of bounds class TestCholeskySolveOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): # The input type of solve_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, paddle.linalg.cholesky_solve, x1, y1) - # The data type of input must be float32 or float64. + # The data type of input must be float32 or float64. x2 = fluid.data(name="x2", shape=[30, 30], dtype="bool") y2 = fluid.data(name="y2", shape=[30, 10], dtype="bool") self.assertRaises(TypeError, paddle.linalg.cholesky_solve, x2, y2) diff --git a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py index 2ebf6070c82..ec98b254e06 100644 --- a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py +++ b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py @@ -23,6 +23,7 @@ from paddle import fluid class Segment(object): + def __init__(self, chunk_type, start_idx, end_idx): self.chunk_type = chunk_type self.start_idx = start_idx @@ -49,15 +50,15 @@ class TestChunkEvalOp(OpTest): for chunk in chunks: if self.scheme == 'IOB': data[chunk.start_idx] = chunk.chunk_type * self.num_tag_types - data[chunk.start_idx + 1: - chunk.end_idx] = chunk.chunk_type * self.num_tag_types + ( + data[chunk.start_idx + 1:chunk. + end_idx] = chunk.chunk_type * self.num_tag_types + ( self.num_tag_types - 1) data[chunk.end_idx] = chunk.chunk_type * self.num_tag_types + ( self.num_tag_types - 1 ) if chunk.start_idx < chunk.end_idx else data[chunk.start_idx] elif self.scheme == 'IOE': - data[chunk.start_idx: - chunk.end_idx] = chunk.chunk_type * self.num_tag_types + data[chunk.start_idx:chunk. + end_idx] = chunk.chunk_type * self.num_tag_types data[chunk.end_idx] = chunk.chunk_type * self.num_tag_types + ( self.num_tag_types - 1) @@ -67,15 +68,15 @@ class TestChunkEvalOp(OpTest): chunks = [] # generate chunk beginnings chunk_begins = sorted( - np.random.choice( - list(range(starts[-1])), num_chunks, replace=False)) + np.random.choice(list(range(starts[-1])), num_chunks, + replace=False)) seq_chunk_begins = [] begin_idx = 0 # divide chunks into sequences for i in range(len(starts) - 1): tmp_chunk_begins = [] - while begin_idx < len(chunk_begins) and chunk_begins[ - begin_idx] < starts[i + 1]: + while begin_idx < len( + chunk_begins) and chunk_begins[begin_idx] < starts[i + 1]: tmp_chunk_begins.append(chunk_begins[begin_idx]) begin_idx += 1 seq_chunk_begins.append(tmp_chunk_begins) @@ -84,8 +85,9 @@ class TestChunkEvalOp(OpTest): for i in range(len(seq_chunk_begins)): for j in range(len(seq_chunk_begins[i])): low = seq_chunk_begins[i][j] - high = seq_chunk_begins[i][j + 1] if j < len(seq_chunk_begins[ - i]) - 1 else starts[i + 1] + high = seq_chunk_begins[i][ + j + 1] if j < len(seq_chunk_begins[i]) - 1 else starts[i + + 1] chunk_ends.append(np.random.randint(low, high)) # generate chunks for chunk_pos in zip(chunk_begins, chunk_ends): @@ -94,11 +96,12 @@ class TestChunkEvalOp(OpTest): return chunks def gen_chunks(self, infer, label, starts): - chunks = self.rand_chunks(starts, - self.num_infer_chunks + self.num_label_chunks - - self.num_correct_chunks) - correct_chunks = np.random.choice( - list(range(len(chunks))), self.num_correct_chunks, replace=False) + chunks = self.rand_chunks( + starts, self.num_infer_chunks + self.num_label_chunks - + self.num_correct_chunks) + correct_chunks = np.random.choice(list(range(len(chunks))), + self.num_correct_chunks, + replace=False) infer_chunks = np.random.choice( [x for x in range(len(chunks)) if x not in correct_chunks], self.num_infer_chunks - self.num_correct_chunks, @@ -142,10 +145,9 @@ class TestChunkEvalOp(OpTest): infer = np.zeros((self.batch_size, )).astype('int64') infer.fill(self.num_chunk_types * self.num_tag_types) label = np.copy(infer) - starts = np.random.choice( - list(range(1, self.batch_size)), - self.num_sequences - 1, - replace=False).tolist() + starts = np.random.choice(list(range(1, self.batch_size)), + self.num_sequences - 1, + replace=False).tolist() starts.extend([0, self.batch_size]) starts = sorted(starts) self.num_correct_chunks, self.num_infer_chunks, self.num_label_chunks = self.gen_chunks( @@ -162,18 +164,15 @@ class TestChunkEvalOp(OpTest): f1 = float(2 * precision * recall) / ( precision + recall) if self.num_correct_chunks else 0 self.outputs = { - 'Precision': np.asarray( - [precision], dtype='float32'), - 'Recall': np.asarray( - [recall], dtype='float32'), - 'F1-Score': np.asarray( - [f1], dtype='float32'), - 'NumInferChunks': np.asarray( - [self.num_infer_chunks], dtype='int64'), - 'NumLabelChunks': np.asarray( - [self.num_label_chunks], dtype='int64'), - 'NumCorrectChunks': np.asarray( - [self.num_correct_chunks], dtype='int64') + 'Precision': np.asarray([precision], dtype='float32'), + 'Recall': np.asarray([recall], dtype='float32'), + 'F1-Score': np.asarray([f1], dtype='float32'), + 'NumInferChunks': np.asarray([self.num_infer_chunks], + dtype='int64'), + 'NumLabelChunks': np.asarray([self.num_label_chunks], + dtype='int64'), + 'NumCorrectChunks': np.asarray([self.num_correct_chunks], + dtype='int64') } def set_input(self, infer, label, lod): @@ -189,6 +188,7 @@ class TestChunkEvalOp(OpTest): class TestChunkEvalOpWithExclude(TestChunkEvalOp): + def set_confs(self): # Use the IOE scheme and labels with 3 chunk types self.scheme = 'IOE' @@ -205,6 +205,7 @@ class TestChunkEvalOpWithExclude(TestChunkEvalOp): class TestChunkEvalOpWithTensorInput(TestChunkEvalOp): + def set_input(self, infer, label, lod): max_len = np.max(lod) pad_infer = [] @@ -233,41 +234,41 @@ class TestChunkEvalOpWithTensorInput(TestChunkEvalOp): class TestChunkEvalOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_input(): input_data = np.random.random(1, 1).astype("int64") label_data = np.random.random(1).astype("int64") - fluid.layers.chunk_eval( - input=input_data, - label=label_data, - chunk_scheme="IOB", - num_chunk_types=3) + fluid.layers.chunk_eval(input=input_data, + label=label_data, + chunk_scheme="IOB", + num_chunk_types=3) self.assertRaises(TypeError, test_input) def test_label(): - input_ = fluid.data( - name="input", shape=[None, 1], dtype="int64") + input_ = fluid.data(name="input", + shape=[None, 1], + dtype="int64") label_data = np.random.random(1).astype("int64") - fluid.layers.chunk_eval( - input=input_, - label=label_data, - chunk_scheme="IOB", - num_chunk_types=3) + fluid.layers.chunk_eval(input=input_, + label=label_data, + chunk_scheme="IOB", + num_chunk_types=3) self.assertRaises(TypeError, test_label) def test_type(): - in_data = fluid.data( - name="input_", shape=[None, 1], dtype="int32") + in_data = fluid.data(name="input_", + shape=[None, 1], + dtype="int32") label = fluid.data(name="label_", shape=[1], dtype="int64") - fluid.layers.chunk_eval( - input=in_data, - label=label, - chunk_scheme="IOB", - num_chunk_types=3) + fluid.layers.chunk_eval(input=in_data, + label=label, + chunk_scheme="IOB", + num_chunk_types=3) self.assertRaises(TypeError, test_type) diff --git a/python/paddle/fluid/tests/unittests/test_chunk_op.py b/python/paddle/fluid/tests/unittests/test_chunk_op.py index 8488bfe773f..d7362430f1a 100644 --- a/python/paddle/fluid/tests/unittests/test_chunk_op.py +++ b/python/paddle/fluid/tests/unittests/test_chunk_op.py @@ -23,6 +23,7 @@ import paddle class TestChunkOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The type of axis in chunk_op should be int or Variable. @@ -55,6 +56,7 @@ class TestChunkOpError(unittest.TestCase): class API_TestChunk(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = paddle.fluid.data('data1', shape=[4, 6, 6], dtype='float64') @@ -64,8 +66,10 @@ class API_TestChunk(unittest.TestCase): exe = paddle.static.Executor(place) input1 = np.random.random([4, 6, 6]).astype('float64') input2 = np.array([2]).astype('int32') - r0, r1, r2, = exe.run(feed={"data1": input1, - "data2": input2}, + r0, r1, r2, = exe.run(feed={ + "data1": input1, + "data2": input2 + }, fetch_list=[x0, x1, x2]) ex_x0, ex_x1, ex_x2 = np.array_split(input1, 3, axis=2) self.assertTrue(np.allclose(ex_x0, r0)) @@ -74,6 +78,7 @@ class API_TestChunk(unittest.TestCase): class API_TestChunk1(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = paddle.fluid.data('data1', shape=[4, 6, 6], dtype='float64') @@ -90,6 +95,7 @@ class API_TestChunk1(unittest.TestCase): class API_TestDygraphChunk(unittest.TestCase): + def test_out1(self): with fluid.dygraph.guard(): input_1 = np.random.random([4, 6, 6]).astype("int32") diff --git a/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py b/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py index eb7d05df492..492dae47f2a 100644 --- a/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py +++ b/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py @@ -34,8 +34,8 @@ def class_center_sample_numpy(label, classes_list, num_samples): for i in range(nranks): index = np.logical_and(unique_label >= class_interval[i], unique_label < class_interval[i + 1]) - pos_class_center_per_device.append(unique_label[index] - class_interval[ - i]) + pos_class_center_per_device.append(unique_label[index] - + class_interval[i]) unique_label_per_device.append(unique_label[index]) num_samples_per_device = [] @@ -57,6 +57,7 @@ def class_center_sample_numpy(label, classes_list, num_samples): class TestClassCenterSampleOp(OpTest): + def initParams(self): self.op_type = "class_center_sample" self.batch_size = 20 @@ -74,8 +75,9 @@ class TestClassCenterSampleOp(OpTest): self.initParams() self.init_dtype() self.init_fix_seed() - label = np.random.randint( - 0, self.num_classes, (self.batch_size, ), dtype=self.dtype) + label = np.random.randint(0, + self.num_classes, (self.batch_size, ), + dtype=self.dtype) remapped_label, sampled_class_center = class_center_sample_numpy( label, [self.num_classes], self.num_samples) @@ -98,16 +100,19 @@ class TestClassCenterSampleOp(OpTest): class TestClassCenterSampleOpINT32(TestClassCenterSampleOp): + def init_dtype(self): self.dtype = np.int32 class TestClassCenterSampleOpFixSeed(TestClassCenterSampleOp): + def init_fix_seed(self): self.fix_seed = True class TestClassCenterSampleV2(unittest.TestCase): + def setUp(self): self.initParams() np.random.seed(self.seed) @@ -132,21 +137,23 @@ class TestClassCenterSampleV2(unittest.TestCase): def check_static_result(self, place): with program_guard(Program(), Program()): - label_np = np.random.randint( - 0, self.num_classes, (self.batch_size, ), dtype=self.dtype) + label_np = np.random.randint(0, + self.num_classes, (self.batch_size, ), + dtype=self.dtype) - label = paddle.static.data( - name='label', shape=[self.batch_size], dtype=self.dtype) + label = paddle.static.data(name='label', + shape=[self.batch_size], + dtype=self.dtype) remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample( label, self.num_classes, self.num_samples) remapped_label_np, sampled_class_center_np = class_center_sample_numpy( label_np, [self.num_classes], self.num_samples) exe = paddle.fluid.Executor(place) - [remapped_label_res, sampled_class_index_res] = exe.run( - paddle.fluid.default_main_program(), - feed={'label': label_np}, - fetch_list=[remapped_label, sampled_class_index]) + [remapped_label_res, sampled_class_index_res + ] = exe.run(paddle.fluid.default_main_program(), + feed={'label': label_np}, + fetch_list=[remapped_label, sampled_class_index]) np.testing.assert_allclose(remapped_label_res, remapped_label_np) np.testing.assert_allclose( sampled_class_index_res[:len(sampled_class_center_np[0])], @@ -158,8 +165,9 @@ class TestClassCenterSampleV2(unittest.TestCase): def check_dynamic_result(self, place): with paddle.fluid.dygraph.guard(place): - label_np = np.random.randint( - 0, self.num_classes, (self.batch_size, ), dtype=self.dtype) + label_np = np.random.randint(0, + self.num_classes, (self.batch_size, ), + dtype=self.dtype) label = paddle.to_tensor(label_np, dtype=self.dtype) remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample( @@ -177,11 +185,13 @@ class TestClassCenterSampleV2(unittest.TestCase): class TestClassCenterSampleV2INT32(TestClassCenterSampleV2): + def init_dtype(self): self.dtype = np.int32 class TestClassCenterSampleAPIError(unittest.TestCase): + def setUp(self): self.initParams() np.random.seed(self.seed) @@ -200,13 +210,14 @@ class TestClassCenterSampleAPIError(unittest.TestCase): self.dtype = np.int64 def test_dynamic_errors(self): + def test_num_samples(): for place in self.places: with paddle.fluid.dygraph.guard(place): - label_np = np.random.randint( - 0, - self.num_classes, (self.batch_size, ), - dtype=self.dtype) + label_np = np.random.randint(0, + self.num_classes, + (self.batch_size, ), + dtype=self.dtype) label = paddle.to_tensor(label_np) remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample( @@ -216,6 +227,7 @@ class TestClassCenterSampleAPIError(unittest.TestCase): class TestClassCenterSampleAPIError1(unittest.TestCase): + def setUp(self): self.initParams() np.random.seed(self.seed) @@ -234,6 +246,7 @@ class TestClassCenterSampleAPIError1(unittest.TestCase): self.dtype = np.int64 def test_dynamic_errors(self): + def test_empty_label(): for place in self.places: with paddle.fluid.dygraph.guard(place): @@ -245,10 +258,10 @@ class TestClassCenterSampleAPIError1(unittest.TestCase): def test_group_value(): for place in self.places: with paddle.fluid.dygraph.guard(place): - label_np = np.random.randint( - 0, - self.num_classes, (self.batch_size, ), - dtype=self.dtype) + label_np = np.random.randint(0, + self.num_classes, + (self.batch_size, ), + dtype=self.dtype) label = paddle.to_tensor(label_np) remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample( diff --git a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py index 7f137cf1371..8eb4c7a8be9 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core class TestClipByNormOp(OpTest): + def setUp(self): self.max_relative_error = 0.006 self.init_dtype() @@ -31,7 +32,9 @@ class TestClipByNormOp(OpTest): input = np.random.random(self.shape).astype(self.dtype) input[np.abs(input) < self.max_relative_error] = 0.5 self.op_type = "clip_by_norm" - self.inputs = {'X': input, } + self.inputs = { + 'X': input, + } self.attrs = {} self.attrs['max_norm'] = self.max_norm norm = np.sqrt(np.sum(np.square(input))) @@ -53,24 +56,28 @@ class TestClipByNormOp(OpTest): class TestCase1(TestClipByNormOp): + def initTestCase(self): self.shape = (100, ) self.max_norm = 1e20 class TestCase2(TestClipByNormOp): + def initTestCase(self): self.shape = (16, 16) self.max_norm = 0.1 class TestCase3(TestClipByNormOp): + def initTestCase(self): self.shape = (4, 8, 16) self.max_norm = 1.0 class TestClipByNormOpFp16(TestClipByNormOp): + def init_dtype(self): self.dtype = np.float16 @@ -82,24 +89,28 @@ class TestClipByNormOpFp16(TestClipByNormOp): class TestClipByNormOpFp16Case1(TestClipByNormOpFp16): + def initTestCase(self): self.shape = (100, ) self.max_norm = 1e20 class TestClipByNormOpFp16Case2(TestClipByNormOpFp16): + def initTestCase(self): self.shape = (16, 16) self.max_norm = 0.1 class TestClipByNormOpFp16Case3(TestClipByNormOpFp16): + def initTestCase(self): self.shape = (4, 8, 16) self.max_norm = 1.0 class TestClipByNormOpWithSelectedRows(unittest.TestCase): + def check_with_place(self, place): self.config_test_case() scope = core.Scope() @@ -116,8 +127,10 @@ class TestClipByNormOpWithSelectedRows(unittest.TestCase): out_selected_rows = scope.var('Out').get_selected_rows() # run clip_by_norm_op - clip_by_norm_op = fluid.op.Operator( - "clip_by_norm", max_norm=self.max_norm, X='X', Out='Out') + clip_by_norm_op = fluid.op.Operator("clip_by_norm", + max_norm=self.max_norm, + X='X', + Out='Out') clip_by_norm_op.run(scope, place) # check output @@ -133,8 +146,10 @@ class TestClipByNormOpWithSelectedRows(unittest.TestCase): else: output = y_np self.assertTrue( - np.allclose( - np.array(out_tensor), output, atol=1e-5, equal_nan=False)) + np.allclose(np.array(out_tensor), + output, + atol=1e-5, + equal_nan=False)) def test_clip_by_norm_with_selected_ros(self): places = [core.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py index 121b91d7415..61ff4a63bef 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_op.py @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestClipOp(OpTest): + def setUp(self): self.max_relative_error = 0.006 self.python_api = paddle.clip @@ -71,6 +72,7 @@ class TestClipOp(OpTest): class TestCase1(TestClipOp): + def initTestCase(self): self.dtype = np.float32 self.shape = (8, 16, 8) @@ -79,6 +81,7 @@ class TestCase1(TestClipOp): class TestCase2(TestClipOp): + def initTestCase(self): self.dtype = np.float32 self.shape = (8, 16) @@ -87,6 +90,7 @@ class TestCase2(TestClipOp): class TestCase3(TestClipOp): + def initTestCase(self): self.dtype = np.float32 self.shape = (4, 8, 16) @@ -95,6 +99,7 @@ class TestCase3(TestClipOp): class TestCase4(TestClipOp): + def initTestCase(self): self.dtype = np.float32 self.shape = (4, 8, 8) @@ -105,6 +110,7 @@ class TestCase4(TestClipOp): class TestCase5(TestClipOp): + def initTestCase(self): self.dtype = np.float32 self.shape = (4, 8, 16) @@ -113,6 +119,7 @@ class TestCase5(TestClipOp): class TestCase6(TestClipOp): + def initTestCase(self): self.dtype == np.float16 self.shape = (4, 8, 8) @@ -123,6 +130,7 @@ class TestCase6(TestClipOp): class TestClipOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -142,6 +150,7 @@ class TestClipOpError(unittest.TestCase): class TestClipAPI(unittest.TestCase): + def _executed_api(self, x, min=None, max=None): return paddle.clip(x, min, max) @@ -153,8 +162,8 @@ class TestClipAPI(unittest.TestCase): min = fluid.data(name='min', shape=[1], dtype='float32') max = fluid.data(name='max', shape=[1], dtype='float32') - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) out_1 = self._executed_api(images, min=min, max=max) @@ -165,12 +174,15 @@ class TestClipAPI(unittest.TestCase): out_6 = self._executed_api(images, max=max) out_7 = self._executed_api(images, max=-1.) out_8 = self._executed_api(images) - out_9 = self._executed_api( - paddle.cast(images, 'float64'), min=0.2, max=0.9) - out_10 = self._executed_api( - paddle.cast(images * 10, 'int32'), min=2, max=8) - out_11 = self._executed_api( - paddle.cast(images * 10, 'int64'), min=2, max=8) + out_9 = self._executed_api(paddle.cast(images, 'float64'), + min=0.2, + max=0.9) + out_10 = self._executed_api(paddle.cast(images * 10, 'int32'), + min=2, + max=8) + out_11 = self._executed_api(paddle.cast(images * 10, 'int64'), + min=2, + max=8) res1, res2, res3, res4, res5, res6, res7, res8, res9, res10, res11 = exe.run( fluid.default_main_program(), @@ -193,7 +205,8 @@ class TestClipAPI(unittest.TestCase): self.assertTrue(np.allclose(res7, data.clip(max=-1))) self.assertTrue(np.allclose(res8, data)) self.assertTrue( - np.allclose(res9, data.astype(np.float64).clip(0.2, 0.9))) + np.allclose(res9, + data.astype(np.float64).clip(0.2, 0.9))) self.assertTrue( np.allclose(res10, (data * 10).astype(np.int32).clip(2, 8))) self.assertTrue( @@ -202,8 +215,8 @@ class TestClipAPI(unittest.TestCase): def func_clip_dygraph(self): paddle.disable_static() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() paddle.disable_static(place) data_shape = [1, 9, 9, 4] data = np.random.random(data_shape).astype('float32') @@ -217,10 +230,12 @@ class TestClipAPI(unittest.TestCase): images = paddle.to_tensor(data, dtype='float32') out_3 = self._executed_api(images, min=v_min, max=v_max) - out_4 = self._executed_api( - paddle.cast(images * 10, 'int32'), min=2, max=8) - out_5 = self._executed_api( - paddle.cast(images * 10, 'int64'), min=2, max=8) + out_4 = self._executed_api(paddle.cast(images * 10, 'int32'), + min=2, + max=8) + out_5 = self._executed_api(paddle.cast(images * 10, 'int64'), + min=2, + max=8) # test with numpy.generic out_6 = self._executed_api(images, min=np.abs(0.2), max=np.abs(0.8)) @@ -267,6 +282,7 @@ class TestClipAPI(unittest.TestCase): class TestInplaceClipAPI(TestClipAPI): + def _executed_api(self, x, min=None, max=None): return x.clip_(min, max) diff --git a/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py b/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py index 868a7233424..495d405c46b 100644 --- a/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py +++ b/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py @@ -25,6 +25,7 @@ alignment = 256 @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestAllocContinuousSpace(OpTest): + def setUp(self): self.op_type = "coalesce_tensor" self.dtype, self.fluid_dtype = self.init_dtype() @@ -80,13 +81,15 @@ class TestAllocContinuousSpace(OpTest): return outputs, coalesce_tensor_var def test_check_output(self): - self.check_output_with_place( - place=core.CUDAPlace(0), no_check_set=["FusedOutput"], atol=1e-5) + self.check_output_with_place(place=core.CUDAPlace(0), + no_check_set=["FusedOutput"], + atol=1e-5) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestAllocContinuousSpace2(TestAllocContinuousSpace): + def init_attr(self): return { "copy_data": False, @@ -97,8 +100,9 @@ class TestAllocContinuousSpace2(TestAllocContinuousSpace): } def test_check_output(self): - self.check_output_with_place( - place=core.CUDAPlace(0), no_check_set=["FusedOutput"], atol=1e-5) + self.check_output_with_place(place=core.CUDAPlace(0), + no_check_set=["FusedOutput"], + atol=1e-5) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py index a2f56c42801..5f854ccefcb 100644 --- a/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py +++ b/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py @@ -22,11 +22,12 @@ from op_test import OpTest class TestCollectFPNProposalstOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() - self.scores_input = [('y%d' % i, - (self.scores[i].reshape(-1, 1), self.rois_lod[i])) + self.scores_input = [('y%d' % i, (self.scores[i].reshape(-1, 1), + self.rois_lod[i])) for i in range(self.num_level)] self.rois, self.lod = self.calc_rois_collect() inputs_x = [('x%d' % i, (self.roi_inputs[i][:, 1:], self.rois_lod[i])) @@ -36,7 +37,9 @@ class TestCollectFPNProposalstOp(OpTest): "MultiLevelScores": self.scores_input, 'MultiLevelRoIsNum': [] } - self.attrs = {'post_nms_topN': self.post_nms_top_n, } + self.attrs = { + 'post_nms_topN': self.post_nms_top_n, + } self.outputs = { 'FpnRois': (self.rois, [self.lod]), 'RoisNum': np.array(self.lod).astype('int32') @@ -101,26 +104,28 @@ class TestCollectFPNProposalstOp(OpTest): class TestCollectFPNProposalstOpWithRoisNum(TestCollectFPNProposalstOp): + def set_data(self): self.init_test_case() self.make_rois() - self.scores_input = [('y%d' % i, - (self.scores[i].reshape(-1, 1), self.rois_lod[i])) + self.scores_input = [('y%d' % i, (self.scores[i].reshape(-1, 1), + self.rois_lod[i])) for i in range(self.num_level)] self.rois, self.lod = self.calc_rois_collect() inputs_x = [('x%d' % i, (self.roi_inputs[i][:, 1:], self.rois_lod[i])) for i in range(self.num_level)] - rois_num_per_level = [ - ('rois%d' % i, np.array(self.rois_lod[i][0]).astype('int32')) - for i in range(self.num_level) - ] + rois_num_per_level = [('rois%d' % i, + np.array(self.rois_lod[i][0]).astype('int32')) + for i in range(self.num_level)] self.inputs = { 'MultiLevelRois': inputs_x, "MultiLevelScores": self.scores_input, 'MultiLevelRoIsNum': rois_num_per_level } - self.attrs = {'post_nms_topN': self.post_nms_top_n, } + self.attrs = { + 'post_nms_topN': self.post_nms_top_n, + } self.outputs = { 'FpnRois': (self.rois, [self.lod]), 'RoisNum': np.array(self.lod).astype('int32') diff --git a/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py b/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py index dbf77fafcc4..ebc52ded8bc 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveAllgatherAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py index eed2388f36f..5ec08aa72e2 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveAllreduceAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py b/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py index bb6a8c29bc5..2fe1252846c 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_alltoall_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveAllToAllAPI(TestDistBase): + def _setup_config(self): pass @@ -30,11 +31,10 @@ class TestCollectiveAllToAllAPI(TestDistBase): self.check_with_place("collective_alltoall_api.py", "alltoall", "nccl") def test_alltoall_nccl_dygraph(self): - self.check_with_place( - "collective_alltoall_api_dygraph.py", - "alltoall", - "nccl", - static_mode="0") + self.check_with_place("collective_alltoall_api_dygraph.py", + "alltoall", + "nccl", + static_mode="0") if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py index a4e71db3d38..46cf0f4fcad 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py @@ -31,6 +31,7 @@ from paddle.fluid import core class TestCollectiveAPIRunnerBase(object): + def get_model(self, train_prog, startup_prog, rank, indata=None): raise NotImplementedError( "get model should be implemented by child class.") @@ -90,6 +91,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def setUp(self): self._port_set = set() self._trainers = 2 @@ -98,6 +100,7 @@ class TestDistBase(unittest.TestCase): self._python_interp = sys.executable def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -157,18 +160,16 @@ class TestDistBase(unittest.TestCase): tr1_cmd = tr_cmd % (self._python_interp, model_file) tr0_pipe = open("/tmp/tr0_err_%d.log" % os.getpid(), "w") tr1_pipe = open("/tmp/tr1_err_%d.log" % os.getpid(), "w") - #print(tr0_cmd) - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) - - tr1_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + #print(tr0_cmd) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) + + tr1_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() @@ -223,8 +224,8 @@ class TestDistBase(unittest.TestCase): else: required_envs["FLAGS_enable_eager_mode"] = "%d" % 0 - tr0_out, tr1_out, pid0, pid1 = self._run_cluster(model_file, - required_envs) + tr0_out, tr1_out, pid0, pid1 = self._run_cluster( + model_file, required_envs) np.random.seed(pid0) input1 = np.random.random((10, 1000)) np.random.seed(pid1) @@ -251,11 +252,9 @@ class TestDistBase(unittest.TestCase): elif col_type == "allreduce": need_result = input1 + input2 self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "parallel_embedding": result_data = tr0_out[0] np.random.seed(2020) @@ -263,24 +262,23 @@ class TestDistBase(unittest.TestCase): for i in range(result_data.shape[0]): for j in range(result_data.shape[1]): data = result_data[i][j] - assert np.allclose( - tr0_out[1][i][j], need_result[data], atol=1e-08) + assert np.allclose(tr0_out[1][i][j], + need_result[data], + atol=1e-08) elif col_type == "row_parallel_linear": result_data = tr0_out[0] np.random.seed(2020) weight = np.random.rand(1000, 16) need_result = np.matmul(input1, weight) self.assertTrue( - np.allclose( - result_data, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(result_data, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "column_parallel_linear": result_data = tr0_out[0] np.random.seed(2020) weight = np.random.rand(1000, 16) need_result = np.matmul(input1, weight) self.assertTrue( - np.allclose( - result_data, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(result_data, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "alltoall": need_result1 = np.vstack((input1[0:input1.shape[0] // 2, :], input2[0:input2.shape[0] // 2, :])) @@ -289,16 +287,13 @@ class TestDistBase(unittest.TestCase): tr0_out = np.vstack(tr0_out) tr1_out = np.vstack(tr1_out) self.assertTrue( - np.allclose( - tr0_out, need_result1, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result1, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result2, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result2, rtol=1e-05, atol=1e-05)) elif col_type == "sendrecv": result_data = tr1_out[0] self.assertTrue( - np.allclose( - input1, result_data, rtol=1e-05, atol=1e-05)) + np.allclose(input1, result_data, rtol=1e-05, atol=1e-05)) elif col_type == "global_gather": in_feat = 2 n_expert = 2 @@ -375,15 +370,13 @@ class TestDistBase(unittest.TestCase): if result1 == []: output1 = np.array([]) else: - output1 = np.concatenate( - result1, axis=0).reshape( - sum(local_expert_count1), in_feat) + output1 = np.concatenate(result1, axis=0).reshape( + sum(local_expert_count1), in_feat) if result2 == []: output2 = np.array([]) else: - output2 = np.concatenate( - result2, axis=0).reshape( - sum(local_expert_count2), in_feat) + output2 = np.concatenate(result2, axis=0).reshape( + sum(local_expert_count2), in_feat) if tr0_out[0] is None or tr0_out[0].shape[0] == 0: tr0_out[0] = np.array([]) @@ -392,24 +385,20 @@ class TestDistBase(unittest.TestCase): tr1_out[0] = np.array([]) self.assertTrue( - np.allclose( - tr0_out[0], output1, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out[0], output1, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out[0], output2, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out[0], output2, rtol=1e-05, atol=1e-05)) if static_mode == 0: self.assertTrue( - np.allclose( - tr0_out[1], - 2 * local_input_buf1, - rtol=1e-05, - atol=1e-05)) + np.allclose(tr0_out[1], + 2 * local_input_buf1, + rtol=1e-05, + atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out[1], - 2 * local_input_buf2, - rtol=1e-05, - atol=1e-05)) + np.allclose(tr1_out[1], + 2 * local_input_buf2, + rtol=1e-05, + atol=1e-05)) elif col_type == "global_scatter": np.random.seed(pid0) @@ -463,23 +452,19 @@ class TestDistBase(unittest.TestCase): tr1_out[0] = np.array([]) self.assertTrue( - np.allclose( - tr0_out[0], output1, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out[0], output1, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out[0], output2, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out[0], output2, rtol=1e-05, atol=1e-05)) if static_mode == 0: self.assertTrue( - np.allclose( - tr0_out[1], - 2 * local_input_buf1, - rtol=1e-05, - atol=1e-05)) + np.allclose(tr0_out[1], + 2 * local_input_buf1, + rtol=1e-05, + atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out[1], - 2 * local_input_buf2, - rtol=1e-05, - atol=1e-05)) + np.allclose(tr1_out[1], + 2 * local_input_buf2, + rtol=1e-05, + atol=1e-05)) else: pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py b/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py index d0a67baa61e..873ae77f08e 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveBarrierAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_base.py b/python/paddle/fluid/tests/unittests/test_collective_base.py index 1b55395ede5..55a009b3691 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_base.py @@ -30,6 +30,7 @@ from paddle.fluid import core class TestCollectiveRunnerBase(object): + def get_model(self, train_prog, startup_prog): raise NotImplementedError( "get model should be implemented by child class.") @@ -40,9 +41,8 @@ class TestCollectiveRunnerBase(object): not_ready_endpoints = [] for ep in endpoints: ip_port = ep.split(":") - with closing( - socket.socket(socket.AF_INET, - socket.SOCK_STREAM)) as sock: + with closing(socket.socket(socket.AF_INET, + socket.SOCK_STREAM)) as sock: sock.settimeout(2) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) if hasattr(socket, 'SO_REUSEPORT'): @@ -55,13 +55,14 @@ class TestCollectiveRunnerBase(object): not_ready_endpoints.append(ep) if not all_ok: sys.stderr.write("server not ready, wait 3 sec to retry...\n") - sys.stderr.write("not ready endpoints:" + str( - not_ready_endpoints) + "\n") + sys.stderr.write("not ready endpoints:" + + str(not_ready_endpoints) + "\n") sys.stderr.flush() time.sleep(3) else: break + #endpoints should be ["ip1:port1","ip2:port2"] def initCommunicator(self, program, rank, nranks, wait_port, @@ -71,30 +72,27 @@ class TestCollectiveRunnerBase(object): if rank == 0 and wait_port: self.wait_server_ready(other_endpoints) block = program.global_block() - nccl_id_var = block.create_var( - name=nameGen.generate('nccl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) + nccl_id_var = block.create_var(name=nameGen.generate('nccl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': nccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) + block.append_op(type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) - block.append_op( - type='c_comm_init', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': self.global_ring_id - }) + block.append_op(type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': self.global_ring_id + }) def run_trainer(self, args): train_prog = fluid.Program() @@ -138,6 +136,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def setUp(self): self._port_set = set() self._trainers = 2 @@ -146,6 +145,7 @@ class TestDistBase(unittest.TestCase): self._python_interp = sys.executable def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -185,18 +185,16 @@ class TestDistBase(unittest.TestCase): tr1_cmd = tr_cmd % (self._python_interp, model_file) tr0_pipe = open("/tmp/tr0_err.log", "wb") tr1_pipe = open("/tmp/tr1_err.log", "wb") - #print(tr0_cmd) - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) + #print(tr0_cmd) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) - tr1_proc = subprocess.Popen( - tr0_cmd.strip().split(), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + tr1_proc = subprocess.Popen(tr0_cmd.strip().split(), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() @@ -227,8 +225,8 @@ class TestDistBase(unittest.TestCase): if check_error_log: required_envs["GLOG_v"] = "3" required_envs["GLOG_logtostderr"] = "1" - tr0_out, tr1_out, pid0, pid1 = self._run_cluster(model_file, - required_envs) + tr0_out, tr1_out, pid0, pid1 = self._run_cluster( + model_file, required_envs) np.random.seed(pid0) input1 = np.random.random((10, 1000)) np.random.seed(pid1) @@ -253,26 +251,21 @@ class TestDistBase(unittest.TestCase): elif col_type == "allreduce": need_result = input1 + input2 self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "reduce_scatter": tmp = input1 + input2 need_result1 = tmp[0:tmp.shape[0] // 2] need_result2 = tmp[tmp.shape[0] // 2:] self.assertTrue( - np.allclose( - tr0_out, need_result1, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result1, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result2, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result2, rtol=1e-05, atol=1e-05)) elif col_type == "sendrecv": need_result = input1 self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "identity": need_result1 = input1 need_result2 = input2 @@ -291,28 +284,24 @@ class TestDistBase(unittest.TestCase): elif col_type == "concat": need_result = np.concatenate((input1, input2), axis=1) self.assertTrue( - np.allclose( - tr0_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result, rtol=1e-05, atol=1e-05)) elif col_type == "split": need_result1 = np.split(input1, 2, axis=1)[0] need_result2 = np.split(input2, 2, axis=1)[1] self.assertTrue( - np.allclose( - tr0_out, need_result1, rtol=1e-05, atol=1e-05)) + np.allclose(tr0_out, need_result1, rtol=1e-05, atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out, need_result2, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out, need_result2, rtol=1e-05, atol=1e-05)) elif col_type == "sendrecv_array": need_result1 = np.array([[0, 1, 2]]) need_result2 = np.array([[3, 4, 5]]) self.assertTrue( - np.allclose( - tr1_out[0][0], need_result1, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out[0][0], need_result1, rtol=1e-05, + atol=1e-05)) self.assertTrue( - np.allclose( - tr1_out[0][1], need_result2, rtol=1e-05, atol=1e-05)) + np.allclose(tr1_out[0][1], need_result2, rtol=1e-05, + atol=1e-05)) else: pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py index 702e0431157..289cb7152ac 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveBroadcastAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_cpu_barrier_with_gloo.py b/python/paddle/fluid/tests/unittests/test_collective_cpu_barrier_with_gloo.py index 438e360f60e..bf503d804ca 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_cpu_barrier_with_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_collective_cpu_barrier_with_gloo.py @@ -28,7 +28,9 @@ paddle.enable_static() class CollectiveCPUBarrierWithGlooTest(unittest.TestCase): + def find_free_port(self): + def _free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -97,9 +99,9 @@ class CollectiveCPUBarrierWithGlooTest(unittest.TestCase): procs_out_dict = manager.dict() jobs = [] for id in range(num_of_ranks): - p = multiprocessing.Process( - target=self.barrier_func, - args=(id, num_of_ranks, ep_str, procs_out_dict, sleep_time)) + p = multiprocessing.Process(target=self.barrier_func, + args=(id, num_of_ranks, ep_str, + procs_out_dict, sleep_time)) jobs.append(p) p.start() for proc in jobs: @@ -117,9 +119,9 @@ class CollectiveCPUBarrierWithGlooTest(unittest.TestCase): procs_out_dict = manager.dict() jobs = [] for id in range(num_of_ranks): - p = multiprocessing.Process( - target=self.barrier_op, - args=(id, num_of_ranks, ep_str, procs_out_dict, sleep_time)) + p = multiprocessing.Process(target=self.barrier_op, + args=(id, num_of_ranks, ep_str, + procs_out_dict, sleep_time)) jobs.append(p) p.start() for proc in jobs: diff --git a/python/paddle/fluid/tests/unittests/test_collective_global_gather.py b/python/paddle/fluid/tests/unittests/test_collective_global_gather.py index 6809f3970f6..949c4562ec9 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_global_gather.py +++ b/python/paddle/fluid/tests/unittests/test_collective_global_gather.py @@ -22,6 +22,7 @@ import os class TestCollectiveGlobalGatherAPI(TestDistBase): + def _setup_config(self): pass @@ -31,20 +32,18 @@ class TestCollectiveGlobalGatherAPI(TestDistBase): "nccl") def test_global_gather_nccl_dygraph(self): - self.check_with_place( - "collective_global_gather_dygraph.py", - "global_gather", - "nccl", - static_mode="0", - eager_mode=False) + self.check_with_place("collective_global_gather_dygraph.py", + "global_gather", + "nccl", + static_mode="0", + eager_mode=False) def test_global_gather_nccl_dygraph_eager(self): - self.check_with_place( - "collective_global_gather_dygraph.py", - "global_gather", - "nccl", - static_mode="0", - eager_mode=True) + self.check_with_place("collective_global_gather_dygraph.py", + "global_gather", + "nccl", + static_mode="0", + eager_mode=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_collective_global_scatter.py b/python/paddle/fluid/tests/unittests/test_collective_global_scatter.py index 1485bafa387..9bd112d906f 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_global_scatter.py +++ b/python/paddle/fluid/tests/unittests/test_collective_global_scatter.py @@ -22,6 +22,7 @@ import os class TestCollectiveSelectScatterAPI(TestDistBase): + def _setup_config(self): pass @@ -31,20 +32,18 @@ class TestCollectiveSelectScatterAPI(TestDistBase): "nccl") def test_global_scatter_nccl_dygraph(self): - self.check_with_place( - "collective_global_scatter_dygraph.py", - "global_scatter", - "nccl", - static_mode="0", - eager_mode=False) + self.check_with_place("collective_global_scatter_dygraph.py", + "global_scatter", + "nccl", + static_mode="0", + eager_mode=False) def test_global_scatter_nccl_dygraph_eager(self): - self.check_with_place( - "collective_global_scatter_dygraph.py", - "global_scatter", - "nccl", - static_mode="0", - eager_mode=True) + self.check_with_place("collective_global_scatter_dygraph.py", + "global_scatter", + "nccl", + static_mode="0", + eager_mode=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_collective_optimizer.py b/python/paddle/fluid/tests/unittests/test_collective_optimizer.py index c91586b4d50..182f2b5c32f 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_collective_optimizer.py @@ -32,6 +32,7 @@ from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, Distribu class CollectiveOptimizerTest(unittest.TestCase): + def test_ds_as_None(self): optimizer = fluid.optimizer.AdamOptimizer() dist_optimizer = CollectiveOptimizer(optimizer, strategy=None) diff --git a/python/paddle/fluid/tests/unittests/test_collective_process_group.py b/python/paddle/fluid/tests/unittests/test_collective_process_group.py index e00f90f4b0d..5355c58753e 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_process_group.py +++ b/python/paddle/fluid/tests/unittests/test_collective_process_group.py @@ -19,6 +19,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestProcessGroup(TestMultipleGpus): + def test_process_group_nccl(self): self.run_mnist_2gpu('process_group_nccl.py') diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce.py b/python/paddle/fluid/tests/unittests/test_collective_reduce.py index c0627467428..306fb7beb8a 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_reduce.py +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCReduceOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py b/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py index 721f446c9f0..2da70f5a94d 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveReduceAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter.py b/python/paddle/fluid/tests/unittests/test_collective_scatter.py index ea34d1cab5a..aa6676cb941 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_scatter.py +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCScatterOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py b/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py index 3a37da52b8e..18c720c5628 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveScatterAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_sendrecv.py b/python/paddle/fluid/tests/unittests/test_collective_sendrecv.py index d3bcd0a7e69..4df303d1b3c 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_sendrecv.py +++ b/python/paddle/fluid/tests/unittests/test_collective_sendrecv.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestSendRecvOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py b/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py index f1d5ec1300e..c0a14f7e286 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_sendrecv_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveSendRecvAPI(TestDistBase): + def _setup_config(self): pass @@ -33,11 +34,10 @@ class TestCollectiveSendRecvAPI(TestDistBase): def test_sendrecv_nccl_dygraph(self): if paddle.fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "collective_sendrecv_api_dygraph.py", - "sendrecv", - "nccl", - static_mode='0') + self.check_with_place("collective_sendrecv_api_dygraph.py", + "sendrecv", + "nccl", + static_mode='0') if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_collective_split_col_linear.py b/python/paddle/fluid/tests/unittests/test_collective_split_col_linear.py index a88d3f11991..632c38cc1ce 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_split_col_linear.py +++ b/python/paddle/fluid/tests/unittests/test_collective_split_col_linear.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestColParallelLinearAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_split_embedding.py b/python/paddle/fluid/tests/unittests/test_collective_split_embedding.py index f13ef81f036..58424984f7a 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_split_embedding.py +++ b/python/paddle/fluid/tests/unittests/test_collective_split_embedding.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestParallelEmbeddingAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_split_embedding_none_divisible.py b/python/paddle/fluid/tests/unittests/test_collective_split_embedding_none_divisible.py index 955adf08c48..af10878ddac 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_split_embedding_none_divisible.py +++ b/python/paddle/fluid/tests/unittests/test_collective_split_embedding_none_divisible.py @@ -22,12 +22,15 @@ paddle.enable_static() class TestCollectiveSplitAssert(unittest.TestCase): + def network(self): fleet.init() - data = paddle.static.data( - name='tindata', shape=[10, 1000], dtype="float32") - emb_out = paddle.distributed.split( - data, (7, 8), operation="embedding", num_partitions=2) + data = paddle.static.data(name='tindata', + shape=[10, 1000], + dtype="float32") + emb_out = paddle.distributed.split(data, (7, 8), + operation="embedding", + num_partitions=2) def test_assert(self): with self.assertRaises(AssertionError): diff --git a/python/paddle/fluid/tests/unittests/test_collective_split_row_linear.py b/python/paddle/fluid/tests/unittests/test_collective_split_row_linear.py index 08aedb1feac..b8240e8d991 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_split_row_linear.py +++ b/python/paddle/fluid/tests/unittests/test_collective_split_row_linear.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestRowParallelLinearAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_collective_wait.py b/python/paddle/fluid/tests/unittests/test_collective_wait.py index b34ace80723..c15a2d56d24 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_wait.py +++ b/python/paddle/fluid/tests/unittests/test_collective_wait.py @@ -23,14 +23,14 @@ paddle.enable_static() class TestCWaitOp(TestDistBase): + def _setup_config(self): pass def test_allreduce_wait(self): - self.check_with_place( - "collective_allreduce_op_wait.py", - "allreduce", - check_error_log=True) + self.check_with_place("collective_allreduce_op_wait.py", + "allreduce", + check_error_log=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py index 5e67fe3e446..f6fd89dc37d 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py @@ -21,6 +21,7 @@ import threading import numpy import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -29,6 +30,7 @@ import paddle.distributed.fleet as fleet class TestCommunicator(unittest.TestCase): + def net(self): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index d9c64064222..c3f2566d6f7 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -34,6 +34,7 @@ paddle.enable_static() class TestCommunicatorGeoEnd2End(unittest.TestCase): + def net(self): x = fluid.layers.data(name='x', shape=[13], dtype='float32') x1 = fluid.layers.data(name='x1', shape=[1], dtype='int64', lod_level=1) @@ -56,6 +57,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): return avg_cost, x, x1, y def fake_reader(self): + def reader(): for i in range(10000): x = numpy.random.random((1, 13)).astype('float32') @@ -168,10 +170,9 @@ half_run_server.run_ut() ps_cmd = "{} {}".format(_python, server_file) - ps_proc = subprocess.Popen( - ps_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps_proc = subprocess.Popen(ps_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) time.sleep(5) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py index 5a126bfa66a..c4a7edc21f9 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py @@ -31,6 +31,7 @@ paddle.enable_static() class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): + def net(self): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) @@ -41,6 +42,7 @@ class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): return avg_cost, x, y def fake_reader(self): + def reader(): for i in range(10000): x = numpy.random.random((1, 13)).astype('float32') @@ -140,10 +142,9 @@ half_run_server.run_ut() _python = sys.executable ps_cmd = "{} {}".format(_python, server_file) - ps_proc = subprocess.Popen( - ps_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps_proc = subprocess.Popen(ps_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) os.environ["http_proxy"] = "" os.environ["https_proxy"] = "" diff --git a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py index 7b0c28e64bc..f32cc4f5c93 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py @@ -21,6 +21,7 @@ import threading import numpy import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -29,6 +30,7 @@ import paddle.distributed.fleet as fleet class TestCommunicator(unittest.TestCase): + def test_communicator_ps_gpu(self): with open("test_communicator_ps_gpu.txt", "w") as f: data = "1 0.6 1 0.7\n" @@ -70,8 +72,10 @@ class TestCommunicator(unittest.TestCase): optimizer.minimize(avg_cost) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=1, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist(["test_communicator_ps_gpu.txt"]) dataset.set_date("20211111") dataset.load_into_memory(is_shuffle=True) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/test_communicator_sync.py index 8f52414f8cb..f13cfd88576 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_sync.py @@ -19,6 +19,7 @@ import time import os import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -28,6 +29,7 @@ import paddle.distributed.fleet as fleet class TestCommunicator(unittest.TestCase): + def net(self): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py index bd9ec6b663f..06432e4b007 100755 --- a/python/paddle/fluid/tests/unittests/test_compare_op.py +++ b/python/paddle/fluid/tests/unittests/test_compare_op.py @@ -25,7 +25,9 @@ from paddle.fluid import Program, program_guard def create_test_class(op_type, typename, callback): + class Cls(op_test.OpTest): + def setUp(self): a = numpy.random.random(size=(10, 7)).astype(typename) b = numpy.random.random(size=(10, 7)).astype(typename) @@ -45,12 +47,11 @@ def create_test_class(op_type, typename, callback): y = fluid.layers.data(name='y', shape=[2], dtype='int32') a = fluid.layers.data(name='a', shape=[2], dtype='int16') if self.op_type == "less_than": - self.assertRaises( - TypeError, - fluid.layers.less_than, - x=x, - y=y, - force_cpu=1) + self.assertRaises(TypeError, + fluid.layers.less_than, + x=x, + y=y, + force_cpu=1) op = eval("fluid.layers.%s" % self.op_type) self.assertRaises(TypeError, op, x=x, y=y, cond=1) self.assertRaises(TypeError, op, x=x, y=a) @@ -74,7 +75,9 @@ for _type_name in {'float32', 'float64', 'int32', 'int64'}: def create_paddle_case(op_type, callback): + class PaddleCls(unittest.TestCase): + def setUp(self): self.op_type = op_type self.input_x = np.array([1, 2, 3, 4]).astype(np.int64) @@ -92,8 +95,10 @@ def create_paddle_case(op_type, callback): op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = fluid.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[out]) self.assertEqual((res == self.real_result).all(), True) @@ -106,8 +111,10 @@ def create_paddle_case(op_type, callback): op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = fluid.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": 1.0}, + res, = exe.run(feed={ + "x": self.input_x, + "y": 1.0 + }, fetch_list=[out]) self.real_result = np.array([1, 0, 0, 0]).astype(np.int64) self.assertEqual((res == self.real_result).all(), True) @@ -144,10 +151,10 @@ def create_paddle_case(op_type, callback): def test_not_equal(self): if self.op_type == "not_equal": paddle.disable_static() - x = paddle.to_tensor( - np.array([1.2e-8, 2, 2, 1]), dtype="float32") - y = paddle.to_tensor( - np.array([1.1e-8, 2, 2, 1]), dtype="float32") + x = paddle.to_tensor(np.array([1.2e-8, 2, 2, 1]), + dtype="float32") + y = paddle.to_tensor(np.array([1.1e-8, 2, 2, 1]), + dtype="float32") op = eval("paddle.%s" % (self.op_type)) out = op(x, y) self.real_result = np.array([0, 0, 0, 0]).astype(np.int64) @@ -155,6 +162,7 @@ def create_paddle_case(op_type, callback): paddle.enable_static() def test_assert(self): + def test_dynamic_api_string(self): if self.op_type == "equal": paddle.disable_static() @@ -178,8 +186,9 @@ def create_paddle_case(op_type, callback): def test_broadcast_api_1(self): paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.static.data( - name='x', shape=[1, 2, 1, 3], dtype='int32') + x = paddle.static.data(name='x', + shape=[1, 2, 1, 3], + dtype='int32') y = paddle.static.data(name='y', shape=[1, 2, 3], dtype='int32') op = eval("paddle.%s" % (self.op_type)) out = op(x, y) @@ -187,8 +196,10 @@ def create_paddle_case(op_type, callback): input_x = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(np.int32) input_y = np.arange(0, 6).reshape((1, 2, 3)).astype(np.int32) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -196,16 +207,19 @@ def create_paddle_case(op_type, callback): paddle.enable_static() with program_guard(Program(), Program()): x = paddle.static.data(name='x', shape=[1, 2, 3], dtype='int32') - y = paddle.static.data( - name='y', shape=[1, 2, 1, 3], dtype='int32') + y = paddle.static.data(name='y', + shape=[1, 2, 1, 3], + dtype='int32') op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) input_x = np.arange(0, 6).reshape((1, 2, 3)).astype(np.int32) input_y = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(np.int32) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -220,8 +234,10 @@ def create_paddle_case(op_type, callback): input_x = np.arange(0, 5).reshape((5)).astype(np.int32) input_y = np.array([5, 3, 2]).reshape((3, 1)).astype(np.int32) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -236,8 +252,10 @@ def create_paddle_case(op_type, callback): input_x = np.array([True, False, True]).astype(np.bool) input_y = np.array([True, True, False]).astype(np.bool) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -252,8 +270,10 @@ def create_paddle_case(op_type, callback): input_x = np.array([True, False, True]).astype(np.bool) input_y = np.array([True]).astype(np.bool) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -280,17 +300,19 @@ create_paddle_case('not_equal', lambda _a, _b: _a != _b) class TestCompareOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): # The input x and y of compare_op must be Variable. x = fluid.layers.data(name='x', shape=[1], dtype="float32") - y = fluid.create_lod_tensor( - numpy.array([[-1]]), [[1]], fluid.CPUPlace()) + y = fluid.create_lod_tensor(numpy.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.greater_equal, x, y) class API_TestElementwise_Equal(unittest.TestCase): + def test_api(self): paddle.enable_static() with fluid.program_guard(fluid.Program(), fluid.Program()): @@ -313,6 +335,7 @@ class API_TestElementwise_Equal(unittest.TestCase): class TestCompareOpPlace(unittest.TestCase): + def test_place_1(self): paddle.enable_static() place = paddle.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py b/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py index 29e3436948e..5ee1ac07e8a 100644 --- a/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py @@ -23,7 +23,9 @@ from paddle.fluid import Program, program_guard def create_test_not_equal_class(op_type, typename, callback): + class Cls(op_test.OpTest): + def setUp(self): x = np.random.random(size=(10, 7)).astype(typename) y = np.random.random(size=(10, 7)).astype(typename) @@ -42,7 +44,9 @@ def create_test_not_equal_class(op_type, typename, callback): def create_test_not_shape_equal_class(op_type, typename, callback): + class Cls(op_test.OpTest): + def setUp(self): x = np.random.random(size=(10, 7)).astype(typename) y = np.random.random(size=(10)).astype(typename) @@ -61,7 +65,9 @@ def create_test_not_shape_equal_class(op_type, typename, callback): def create_test_equal_class(op_type, typename, callback): + class Cls(op_test.OpTest): + def setUp(self): x = y = np.random.random(size=(10, 7)).astype(typename) z = callback(x, y) @@ -79,7 +85,9 @@ def create_test_equal_class(op_type, typename, callback): def create_test_dim1_class(op_type, typename, callback): + class Cls(op_test.OpTest): + def setUp(self): x = y = np.random.random(size=(1)).astype(typename) x = np.array([True, False, True]).astype(typename) @@ -107,6 +115,7 @@ for _type_name in {'float32', 'float64', 'int32', 'int64', 'bool'}: class TestEqualReduceAPI(unittest.TestCase): + def test_name(self): x = fluid.layers.assign(np.array([3, 4], dtype="int32")) y = fluid.layers.assign(np.array([3, 4], dtype="int32")) diff --git a/python/paddle/fluid/tests/unittests/test_compat.py b/python/paddle/fluid/tests/unittests/test_compat.py index 7f26582889d..59dbb818898 100644 --- a/python/paddle/fluid/tests/unittests/test_compat.py +++ b/python/paddle/fluid/tests/unittests/test_compat.py @@ -19,6 +19,7 @@ import paddle.compat as cpt class TestCompatible(unittest.TestCase): + def test_type(self): self.assertEqual(cpt.int_type, int) self.assertEqual(cpt.long_type, int) diff --git a/python/paddle/fluid/tests/unittests/test_compiled_program.py b/python/paddle/fluid/tests/unittests/test_compiled_program.py index 79ee383f3f9..e16ac4881c7 100644 --- a/python/paddle/fluid/tests/unittests/test_compiled_program.py +++ b/python/paddle/fluid/tests/unittests/test_compiled_program.py @@ -24,24 +24,29 @@ from simple_nets import simple_fc_net class TestCompiledProgram(unittest.TestCase): + def setUp(self): self.seed = 100 self.img = np.random.random(size=(16, 784)).astype('float32') - self.label = np.random.randint( - low=0, high=10, size=[16, 1], dtype=np.int64) + self.label = np.random.randint(low=0, + high=10, + size=[16, 1], + dtype=np.int64) with new_program_scope(): paddle.seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) loss = simple_fc_net() exe.run(fluid.default_startup_program()) loss_data, = exe.run(fluid.default_main_program(), - feed={"image": self.img, - "label": self.label}, + feed={ + "image": self.img, + "label": self.label + }, fetch_list=[loss.name]) self.loss = loss_data[0] @@ -49,8 +54,8 @@ class TestCompiledProgram(unittest.TestCase): with new_program_scope(): paddle.seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) loss = simple_fc_net() @@ -58,8 +63,10 @@ class TestCompiledProgram(unittest.TestCase): compiled_prog = fluid.CompiledProgram(fluid.default_main_program()) loss_data, = exe.run(compiled_prog, - feed={"image": self.img, - "label": self.label}, + feed={ + "image": self.img, + "label": self.label + }, fetch_list=[loss.name]) self.assertTrue(np.array_equal(loss_data[0], self.loss)) @@ -67,30 +74,34 @@ class TestCompiledProgram(unittest.TestCase): with new_program_scope(): paddle.seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) loss = simple_fc_net() exe.run(fluid.default_startup_program()) - compiled_prog = fluid.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - loss_name=loss.name, places=[place]) + compiled_prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name, places=[place]) loss_data, = exe.run(compiled_prog, - feed={"image": self.img, - "label": self.label}, + feed={ + "image": self.img, + "label": self.label + }, fetch_list=[loss.name]) self.assertTrue(np.array_equal(loss_data[0], self.loss)) class TestCompiledProgramError(unittest.TestCase): + def test_program_or_graph_error(self): self.assertRaises(TypeError, fluid.CompiledProgram, "program") def build_simple_model(self): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') prediction = fluid.layers.fc(input=img, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) diff --git a/python/paddle/fluid/tests/unittests/test_complex_abs.py b/python/paddle/fluid/tests/unittests/test_complex_abs.py index a29d9baadea..6c90e09d7ca 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_abs.py +++ b/python/paddle/fluid/tests/unittests/test_complex_abs.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexAbsOp(OpTest): + def setUp(self): paddle.enable_static() self.python_api = paddle.abs @@ -49,15 +50,15 @@ class TestComplexAbsOp(OpTest): self.check_output(check_eager=False) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) class TestComplexAbsOpZeroValues(OpTest): + def setUp(self): paddle.enable_static() self.op_type = "abs" @@ -71,8 +72,8 @@ class TestComplexAbsOpZeroValues(OpTest): self.outputs = {'Out': self.out} def init_input_output(self): - self.x = np.zeros(self.shape).astype(self.dtype) + 1J * np.zeros( - self.shape).astype(self.dtype) + self.x = np.zeros(self.shape).astype( + self.dtype) + 1J * np.zeros(self.shape).astype(self.dtype) self.out = np.abs(self.x) def init_grad_input_output(self): @@ -83,15 +84,15 @@ class TestComplexAbsOpZeroValues(OpTest): self.check_output(check_eager=False) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) class TestAbs(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -112,6 +113,7 @@ class TestAbs(unittest.TestCase): class TestRealAbsOp(OpTest): + def setUp(self): paddle.enable_static() self.python_api = paddle.abs @@ -136,12 +138,11 @@ class TestRealAbsOp(OpTest): self.check_output(check_eager=False) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_complex_cast.py b/python/paddle/fluid/tests/unittests/test_complex_cast.py index 5da49ca62d9..21db0a78e72 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_cast.py +++ b/python/paddle/fluid/tests/unittests/test_complex_cast.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexCastOp(unittest.TestCase): + def test_complex_to_real(self): r = np.random.random(size=[10, 10]) * 10 i = np.random.random(size=[10, 10]) diff --git a/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py b/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py index d187d6d710b..c110339bf58 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py +++ b/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py @@ -30,6 +30,7 @@ paddle_apis = { class TestComplexElementwiseLayers(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -51,14 +52,14 @@ class TestComplexElementwiseLayers(unittest.TestCase): def compare_by_basic_api(self, x, y): for place in self._places: - self.assert_check( - self.paddle_calc(x, y, "add", place), x + y, place) - self.assert_check( - self.paddle_calc(x, y, "sub", place), x - y, place) - self.assert_check( - self.paddle_calc(x, y, "mul", place), x * y, place) - self.assert_check( - self.paddle_calc(x, y, "div", place), x / y, place) + self.assert_check(self.paddle_calc(x, y, "add", place), x + y, + place) + self.assert_check(self.paddle_calc(x, y, "sub", place), x - y, + place) + self.assert_check(self.paddle_calc(x, y, "mul", place), x * y, + place) + self.assert_check(self.paddle_calc(x, y, "div", place), x / y, + place) def compare_op_by_basic_api(self, x, y): for place in self._places: @@ -72,18 +73,18 @@ class TestComplexElementwiseLayers(unittest.TestCase): def test_complex_xy(self): for dtype in self._dtypes: - x = rand([2, 3, 4, 5]).astype(dtype) + 1j * rand( - [2, 3, 4, 5]).astype(dtype) - y = rand([2, 3, 4, 5]).astype(dtype) + 1j * rand( - [2, 3, 4, 5]).astype(dtype) + x = rand([2, 3, 4, 5 + ]).astype(dtype) + 1j * rand([2, 3, 4, 5]).astype(dtype) + y = rand([2, 3, 4, 5 + ]).astype(dtype) + 1j * rand([2, 3, 4, 5]).astype(dtype) self.compare_by_basic_api(x, y) self.compare_op_by_basic_api(x, y) def test_complex_x_real_y(self): for dtype in self._dtypes: - x = rand([2, 3, 4, 5]).astype(dtype) + 1j * rand( - [2, 3, 4, 5]).astype(dtype) + x = rand([2, 3, 4, 5 + ]).astype(dtype) + 1j * rand([2, 3, 4, 5]).astype(dtype) y = rand([4, 5]).astype(dtype) # promote types cases diff --git a/python/paddle/fluid/tests/unittests/test_complex_getitem.py b/python/paddle/fluid/tests/unittests/test_complex_getitem.py index 5c181515f45..e399dea1ed9 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_getitem.py +++ b/python/paddle/fluid/tests/unittests/test_complex_getitem.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexGetitemLayer(unittest.TestCase): + def setUp(self): self._places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py b/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py index ab40d7c0062..63f98efcfa6 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py +++ b/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py @@ -24,24 +24,27 @@ from paddle.fluid.framework import _test_eager_guard class Optimization_ex1(paddle.nn.Layer): + def __init__(self, shape, dtype, - param_attr=paddle.nn.initializer.Uniform( - low=-5., high=5.)): + param_attr=paddle.nn.initializer.Uniform(low=-5., high=5.)): super(Optimization_ex1, self).__init__() - self.theta0 = self.create_parameter( - shape=shape, attr=param_attr, dtype=dtype, is_bias=False) - self.theta1 = self.create_parameter( - shape=shape, attr=param_attr, dtype=dtype, is_bias=False) + self.theta0 = self.create_parameter(shape=shape, + attr=param_attr, + dtype=dtype, + is_bias=False) + self.theta1 = self.create_parameter(shape=shape, + attr=param_attr, + dtype=dtype, + is_bias=False) self.A = paddle.to_tensor( - np.random.random((4, 4)).astype(dtype) + np.random.random((4, 4)) - .astype(dtype) * 1j) - self.B = paddle.to_tensor( - np.random.random((4, 4)).astype(dtype) + np.random.random( - (4, 4)).astype(dtype) * 1j, - stop_gradient=False) + np.random.random((4, 4)).astype(dtype) + + np.random.random((4, 4)).astype(dtype) * 1j) + self.B = paddle.to_tensor(np.random.random( + (4, 4)).astype(dtype) + np.random.random((4, 4)).astype(dtype) * 1j, + stop_gradient=False) def forward(self, mode=1): jj = paddle.to_tensor(np.array([1j]).astype(np.complex64)) @@ -58,14 +61,15 @@ class Optimization_ex1(paddle.nn.Layer): return loss.real() elif mode == 3: # run without param - loss = paddle.sum(self.A + self.B) * ( - paddle.sum(self.A + self.B).conj()) + loss = paddle.sum(self.A + self.B) * (paddle.sum(self.A + + self.B).conj()) return loss.real() else: raise NotImplementedError class TestComplexGradAccumulated(unittest.TestCase): + def setUp(self): self.devices = ['cpu'] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_complex_kron.py b/python/paddle/fluid/tests/unittests/test_complex_kron.py index 24109357a46..4f15256a8c5 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_kron.py +++ b/python/paddle/fluid/tests/unittests/test_complex_kron.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class ComplexKronTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', x=None, y=None): super(ComplexKronTestCase, self).__init__(methodName) self.x = x @@ -53,27 +54,23 @@ def load_tests(loader, standard_tests, pattern): suite = unittest.TestSuite() for dtype in ["float32", "float64"]: suite.addTest( - ComplexKronTestCase( - x=np.random.randn(2, 2).astype(dtype) + 1j * np.random.randn( - 2, 2).astype(dtype), - y=np.random.randn(3, 3).astype(dtype) + 1j * np.random.randn( - 3, 3).astype(dtype))) + ComplexKronTestCase(x=np.random.randn(2, 2).astype(dtype) + + 1j * np.random.randn(2, 2).astype(dtype), + y=np.random.randn(3, 3).astype(dtype) + + 1j * np.random.randn(3, 3).astype(dtype))) suite.addTest( - ComplexKronTestCase( - x=np.random.randn(2, 2).astype(dtype), - y=np.random.randn(3, 3).astype(dtype) + 1j * np.random.randn( - 3, 3).astype(dtype))) + ComplexKronTestCase(x=np.random.randn(2, 2).astype(dtype), + y=np.random.randn(3, 3).astype(dtype) + + 1j * np.random.randn(3, 3).astype(dtype))) suite.addTest( - ComplexKronTestCase( - x=np.random.randn(2, 2).astype(dtype) + 1j * np.random.randn( - 2, 2).astype(dtype), - y=np.random.randn(3, 3).astype(dtype))) + ComplexKronTestCase(x=np.random.randn(2, 2).astype(dtype) + + 1j * np.random.randn(2, 2).astype(dtype), + y=np.random.randn(3, 3).astype(dtype))) suite.addTest( - ComplexKronTestCase( - x=np.random.randn(2, 2).astype(dtype) + 1j * np.random.randn( - 2, 2).astype(dtype), - y=np.random.randn(2, 2, 3).astype(dtype))) + ComplexKronTestCase(x=np.random.randn(2, 2).astype(dtype) + + 1j * np.random.randn(2, 2).astype(dtype), + y=np.random.randn(2, 2, 3).astype(dtype))) return suite diff --git a/python/paddle/fluid/tests/unittests/test_complex_matmul.py b/python/paddle/fluid/tests/unittests/test_complex_matmul.py index dac4e36ea67..9be7933e926 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_matmul.py +++ b/python/paddle/fluid/tests/unittests/test_complex_matmul.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexMatMulLayer(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [fluid.CPUPlace()] @@ -36,9 +37,9 @@ class TestComplexMatMulLayer(unittest.TestCase): pd_result = result.numpy() self.assertTrue( np.allclose(pd_result, np_result), - "\nplace: {}\npaddle diff result:\n {}\nnumpy diff result:\n {}\n". - format(place, pd_result[~np.isclose(pd_result, np_result)], - np_result[~np.isclose(pd_result, np_result)])) + "\nplace: {}\npaddle diff result:\n {}\nnumpy diff result:\n {}\n" + .format(place, pd_result[~np.isclose(pd_result, np_result)], + np_result[~np.isclose(pd_result, np_result)])) def compare_op_by_basic_api(self, x, y, np_result): for place in self._places: @@ -49,9 +50,9 @@ class TestComplexMatMulLayer(unittest.TestCase): pd_result = result.numpy() self.assertTrue( np.allclose(pd_result, np_result), - "\nplace: {}\npaddle diff result:\n {}\nnumpy diff result:\n {}\n". - format(place, pd_result[~np.isclose(pd_result, np_result)], - np_result[~np.isclose(pd_result, np_result)])) + "\nplace: {}\npaddle diff result:\n {}\nnumpy diff result:\n {}\n" + .format(place, pd_result[~np.isclose(pd_result, np_result)], + np_result[~np.isclose(pd_result, np_result)])) def test_complex_xy(self): for dtype in self._dtypes: diff --git a/python/paddle/fluid/tests/unittests/test_complex_op.py b/python/paddle/fluid/tests/unittests/test_complex_op.py index bd759f7a00f..1faef17a2ad 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_op.py +++ b/python/paddle/fluid/tests/unittests/test_complex_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -50,6 +50,7 @@ def ref_complex_grad(x, y, dout): class TestComplexOp(OpTest): + def init_spec(self): self.x_shape = [10, 10] self.y_shape = [10, 10] @@ -73,12 +74,11 @@ class TestComplexOp(OpTest): dout = self.out_grad dx, dy = ref_complex_grad(self.inputs['X'], self.inputs['Y'], self.out_grad) - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[dx, dy], - user_defined_grad_outputs=[dout], - check_eager=True) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[dx, dy], + user_defined_grad_outputs=[dout], + check_eager=True) def test_check_grad_ignore_x(self): dout = self.out_grad @@ -86,28 +86,27 @@ class TestComplexOp(OpTest): self.out_grad) self.assertTupleEqual(dx.shape, tuple(self.x_shape)) self.assertTupleEqual(dy.shape, tuple(self.y_shape)) - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set('X'), - user_defined_grads=[dy], - user_defined_grad_outputs=[dout], - check_eager=True) + self.check_grad(['Y'], + 'Out', + no_grad_set=set('X'), + user_defined_grads=[dy], + user_defined_grad_outputs=[dout], + check_eager=True) def test_check_grad_ignore_y(self): dout = self.out_grad dx, dy = ref_complex_grad(self.inputs['X'], self.inputs['Y'], self.out_grad) - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[dx], - user_defined_grad_outputs=[dout], - check_eager=True) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[dx], + user_defined_grad_outputs=[dout], + check_eager=True) class TestComplexOpBroadcast1(TestComplexOp): + def init_spec(self): self.x_shape = [10, 3, 1, 4] self.y_shape = [100, 1] @@ -115,6 +114,7 @@ class TestComplexOpBroadcast1(TestComplexOp): class TestComplexOpBroadcast2(TestComplexOp): + def init_spec(self): self.x_shape = [100, 1] self.y_shape = [10, 3, 1, 4] @@ -122,6 +122,7 @@ class TestComplexOpBroadcast2(TestComplexOp): class TestComplexOpBroadcast3(TestComplexOp): + def init_spec(self): self.x_shape = [1, 100] self.y_shape = [100] @@ -129,6 +130,7 @@ class TestComplexOpBroadcast3(TestComplexOp): class TestComplexAPI(unittest.TestCase): + def setUp(self): self.x = np.random.randn(10, 10) self.y = np.random.randn(10, 10) @@ -151,8 +153,10 @@ class TestComplexAPI(unittest.TestCase): exe = static.Executor() exe.run(sp) [out_np] = exe.run(mp, - feed={"x": self.x, - "y": self.y}, + feed={ + "x": self.x, + "y": self.y + }, fetch_list=[out]) self.assertTrue(np.allclose(self.out, out_np)) diff --git a/python/paddle/fluid/tests/unittests/test_complex_reshape.py b/python/paddle/fluid/tests/unittests/test_complex_reshape.py index dccfcf2e045..c80970b33a7 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_reshape.py +++ b/python/paddle/fluid/tests/unittests/test_complex_reshape.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexReshape(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -30,8 +31,8 @@ class TestComplexReshape(unittest.TestCase): def test_shape_norm_dims(self): for dtype in self._dtypes: x_np = np.random.randn( - 2, 3, 4).astype(dtype) + 1j * np.random.randn(2, 3, - 4).astype(dtype) + 2, 3, + 4).astype(dtype) + 1j * np.random.randn(2, 3, 4).astype(dtype) shape = (2, -1) for place in self._places: with dg.guard(place): @@ -43,8 +44,8 @@ class TestComplexReshape(unittest.TestCase): def test_shape_omit_dims(self): for dtype in self._dtypes: x_np = np.random.randn( - 2, 3, 4).astype(dtype) + 1j * np.random.randn(2, 3, - 4).astype(dtype) + 2, 3, + 4).astype(dtype) + 1j * np.random.randn(2, 3, 4).astype(dtype) shape = (0, -1) shape_ = (2, 12) for place in self._places: diff --git a/python/paddle/fluid/tests/unittests/test_complex_simplenet.py b/python/paddle/fluid/tests/unittests/test_complex_simplenet.py index 4191a0487c7..21bc886837a 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_simplenet.py +++ b/python/paddle/fluid/tests/unittests/test_complex_simplenet.py @@ -24,15 +24,17 @@ from paddle.fluid.framework import _test_eager_guard class Optimization_ex1(paddle.nn.Layer): + def __init__(self, shape, - param_attr=paddle.nn.initializer.Uniform( - low=-5., high=5.), + param_attr=paddle.nn.initializer.Uniform(low=-5., high=5.), dtype='float32'): super(Optimization_ex1, self).__init__() - self.theta = self.create_parameter( - shape=shape, attr=param_attr, dtype=dtype, is_bias=False) + self.theta = self.create_parameter(shape=shape, + attr=param_attr, + dtype=dtype, + is_bias=False) self.A = paddle.to_tensor( np.random.randn(4, 4) + np.random.randn(4, 4) * 1j) @@ -42,6 +44,7 @@ class Optimization_ex1(paddle.nn.Layer): class TestComplexSimpleNet(unittest.TestCase): + def setUp(self): self.devices = ['cpu'] if core.is_compiled_with_cuda(): @@ -54,8 +57,8 @@ class TestComplexSimpleNet(unittest.TestCase): paddle.set_device(device) myLayer = Optimization_ex1(self.theta_size) - optimizer = paddle.optimizer.Adam( - learning_rate=self.learning_rate, parameters=myLayer.parameters()) + optimizer = paddle.optimizer.Adam(learning_rate=self.learning_rate, + parameters=myLayer.parameters()) for itr in range(self.iter): loss = myLayer() diff --git a/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py b/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py index a2f6d42dcb7..3c43dbd4582 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py +++ b/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexSumLayer(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -31,8 +32,9 @@ class TestComplexSumLayer(unittest.TestCase): def test_complex_basic_api(self): for dtype in self._dtypes: - input = rand([2, 10, 10]).astype(dtype) + 1j * rand( - [2, 10, 10]).astype(dtype) + input = rand([ + 2, 10, 10 + ]).astype(dtype) + 1j * rand([2, 10, 10]).astype(dtype) for place in self._places: with dg.guard(place): var_x = dg.to_variable(input) diff --git a/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py b/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py index fcbab29b5d0..1618d20da2e 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py +++ b/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexTraceLayer(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [fluid.CPUPlace()] @@ -31,13 +32,14 @@ class TestComplexTraceLayer(unittest.TestCase): def test_basic_api(self): for dtype in self._dtypes: - input = rand([2, 20, 2, 3]).astype(dtype) + 1j * rand( - [2, 20, 2, 3]).astype(dtype) + input = rand([ + 2, 20, 2, 3 + ]).astype(dtype) + 1j * rand([2, 20, 2, 3]).astype(dtype) for place in self._places: with dg.guard(place): var_x = dg.to_variable(input) - result = tensor.trace( - var_x, offset=1, axis1=0, axis2=2).numpy() + result = tensor.trace(var_x, offset=1, axis1=0, + axis2=2).numpy() target = np.trace(input, offset=1, axis1=0, axis2=2) self.assertTrue(np.allclose(result, target)) diff --git a/python/paddle/fluid/tests/unittests/test_complex_transpose.py b/python/paddle/fluid/tests/unittests/test_complex_transpose.py index cc7c778a0ce..bcbeabf8714 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_complex_transpose.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexTransposeLayer(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_complex_variable.py b/python/paddle/fluid/tests/unittests/test_complex_variable.py index a4e2da894d2..c9ebf27cc4d 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_variable.py +++ b/python/paddle/fluid/tests/unittests/test_complex_variable.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestComplexVariable(unittest.TestCase): + def compare(self): a = np.array([[1.0 + 1.0j, 2.0 + 1.0j], [3.0 + 1.0j, 4.0 + 1.0j]]).astype(self._dtype) @@ -45,18 +46,16 @@ class TestComplexVariable(unittest.TestCase): self.compare() def test_convert_np_dtype_to_dtype(self): - self.assertEqual( - convert_np_dtype_to_dtype_(np.complex64), - core.VarDesc.VarType.COMPLEX64) - self.assertEqual( - convert_np_dtype_to_dtype_(np.complex64), - core.VarDesc.VarType.COMPLEX64) + self.assertEqual(convert_np_dtype_to_dtype_(np.complex64), + core.VarDesc.VarType.COMPLEX64) + self.assertEqual(convert_np_dtype_to_dtype_(np.complex64), + core.VarDesc.VarType.COMPLEX64) def test_convert_dtype(self): - self.assertEqual( - convert_dtype(core.VarDesc.VarType.COMPLEX64), "complex64") - self.assertEqual( - convert_dtype(core.VarDesc.VarType.COMPLEX128), "complex128") + self.assertEqual(convert_dtype(core.VarDesc.VarType.COMPLEX64), + "complex64") + self.assertEqual(convert_dtype(core.VarDesc.VarType.COMPLEX128), + "complex128") def test_eager(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_complex_view_op.py b/python/paddle/fluid/tests/unittests/test_complex_view_op.py index 11f43c02a82..6b224209edc 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_view_op.py +++ b/python/paddle/fluid/tests/unittests/test_complex_view_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import paddle from paddle.fluid import dygraph from paddle import static from paddle.fluid.framework import _test_eager_guard + paddle.enable_static() @@ -35,13 +36,13 @@ def ref_view_as_real(x): class TestViewAsComplexOp(OpTest): + def setUp(self): self.op_type = "as_complex" x = np.random.randn(10, 10, 2).astype("float64") out_ref = ref_view_as_complex(x) self.out_grad = np.ones( - [10, 10], dtype="float64") + 1j * np.ones( - [10, 10], dtype="float64") + [10, 10], dtype="float64") + 1j * np.ones([10, 10], dtype="float64") self.inputs = {'X': x} self.outputs = {'Out': out_ref} @@ -49,15 +50,15 @@ class TestViewAsComplexOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[ref_view_as_real(self.out_grad)], - user_defined_grad_outputs=[self.out_grad], - check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[ref_view_as_real(self.out_grad)], + user_defined_grad_outputs=[self.out_grad], + check_eager=True) class TestViewAsRealOp(OpTest): + def setUp(self): self.op_type = "as_real" real = np.random.randn(10, 10).astype("float64") @@ -72,15 +73,15 @@ class TestViewAsRealOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[ref_view_as_complex(self.out_grad)], - user_defined_grad_outputs=[self.out_grad], - check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[ref_view_as_complex(self.out_grad)], + user_defined_grad_outputs=[self.out_grad], + check_eager=True) class TestViewAsComplexAPI(unittest.TestCase): + def setUp(self): self.x = np.random.randn(10, 10, 2) self.out = ref_view_as_complex(self.x) @@ -108,6 +109,7 @@ class TestViewAsComplexAPI(unittest.TestCase): class TestViewAsRealAPI(unittest.TestCase): + def setUp(self): self.x = np.random.randn(10, 10) + 1j * np.random.randn(10, 10) self.out = ref_view_as_real(self.x) diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index 629ddb31d7b..130a7e8833b 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -24,6 +24,7 @@ import paddle class TestConcatOp(OpTest): + def setUp(self): self.op_type = "concat" self.python_api = paddle.concat @@ -38,8 +39,8 @@ class TestConcatOp(OpTest): self.actual_axis = self.axis self.outputs = { - 'Out': np.concatenate( - (self.x0, self.x1, self.x2), axis=self.actual_axis) + 'Out': + np.concatenate((self.x0, self.x1, self.x2), axis=self.actual_axis) } def get_dtype(self): @@ -79,6 +80,7 @@ class TestConcatOp(OpTest): class TestConcatOp2(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) @@ -89,6 +91,7 @@ class TestConcatOp2(TestConcatOp): @skip_check_grad_ci( reason="The function 'check_grad' for large inputs is too slow.") class TestConcatOp3(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype) self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype) @@ -100,9 +103,11 @@ class TestConcatOp3(TestConcatOp): @skip_check_grad_ci( - reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." + reason= + "This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." ) class TestConcatOp4(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) @@ -114,6 +119,7 @@ class TestConcatOp4(TestConcatOp): class TestConcatOp5(TestConcatOp): + def init_test_data(self): self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype) self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype) @@ -122,6 +128,7 @@ class TestConcatOp5(TestConcatOp): class TestConcatOp6(TestConcatOp): + def setUp(self): self.op_type = "concat" self.dtype = self.get_dtype() @@ -158,7 +165,9 @@ class TestConcatOp6(TestConcatOp): def create_test_AxisTensor(parent): + class TestConcatAxisTensor(parent): + def setUp(self): self.op_type = "concat" self.python_api = paddle.concat @@ -178,8 +187,9 @@ def create_test_AxisTensor(parent): self.actual_axis = self.axis self.outputs = { - 'Out': np.concatenate( - (self.x0, self.x1, self.x2), axis=self.actual_axis) + 'Out': + np.concatenate((self.x0, self.x1, self.x2), + axis=self.actual_axis) } cls_name = "{0}_{1}".format(parent.__name__, "AxisTensor") @@ -198,7 +208,9 @@ create_test_AxisTensor(TestConcatOp6) def create_test_fp16(parent): + class TestConcatFp16(parent): + def get_dtype(self): return np.float16 @@ -217,9 +229,11 @@ create_test_fp16(TestConcatOp6) #----------------Concat Bf16---------------- def create_test_bf16(parent): + @unittest.skipIf(not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestConcatBf16(parent): + def get_dtype(self): return np.uint16 @@ -232,16 +246,17 @@ create_test_bf16(TestConcatOp) class TestConcatOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of concat_op should be list. x1 = fluid.layers.data(shape=[4], dtype='int32', name='x1') fluid.layers.concat(x1) # The item in input must be Variable. - x2 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - x3 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x2 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + x3 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.concat, [x2]) # The input dtype of concat_op must be float16, float32, float64, int32, int64. x4 = fluid.layers.data(shape=[4], dtype='uint8', name='x4') @@ -265,6 +280,7 @@ class TestConcatOpError(unittest.TestCase): class TestConcatAPI(unittest.TestCase): + def test_fluid_api(self): paddle.enable_static() x_1 = fluid.data(shape=[None, 1, 4, 5], dtype='int32', name='x_1') @@ -281,20 +297,22 @@ class TestConcatAPI(unittest.TestCase): out_3 = fluid.layers.concat(input=[x_2, x_3], axis=positive_1_int64) exe = fluid.Executor(place=fluid.CPUPlace()) - [res_1, res_2, res_3] = exe.run( - fluid.default_main_program(), - feed={"x_1": input_2, - "x_2": input_2, - "x_3": input_3}, - fetch_list=[out_1, out_2, out_3]) + [res_1, res_2, res_3] = exe.run(fluid.default_main_program(), + feed={ + "x_1": input_2, + "x_2": input_2, + "x_3": input_3 + }, + fetch_list=[out_1, out_2, out_3]) assert np.array_equal(res_1, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_2, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_3, np.concatenate((input_2, input_3), axis=1)) def test_api(self): paddle.enable_static() - x_1 = paddle.fluid.data( - shape=[None, 1, 4, 5], dtype='int32', name='x_1') + x_1 = paddle.fluid.data(shape=[None, 1, 4, 5], + dtype='int32', + name='x_1') paddle.concat([x_1, x_1], 0) input_2 = np.random.random([2, 1, 4, 5]).astype("int32") @@ -310,12 +328,14 @@ class TestConcatAPI(unittest.TestCase): out_4 = paddle.concat(x=[x_2, x_3], axis=negative_int64) exe = paddle.static.Executor(place=paddle.CPUPlace()) - [res_1, res_2, res_3, res_4] = exe.run( - paddle.static.default_main_program(), - feed={"x_1": input_2, - "x_2": input_2, - "x_3": input_3}, - fetch_list=[out_1, out_2, out_3, out_4]) + [res_1, res_2, res_3, + res_4] = exe.run(paddle.static.default_main_program(), + feed={ + "x_1": input_2, + "x_2": input_2, + "x_3": input_3 + }, + fetch_list=[out_1, out_2, out_3, out_4]) assert np.array_equal(res_1, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_2, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_3, np.concatenate((input_2, input_3), axis=1)) @@ -346,10 +366,10 @@ class TestConcatAPI(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): # The item in input must be Variable. - x2 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - x3 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x2 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + x3 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, paddle.concat, [x2]) # The input dtype of concat_op must be float16, float32, float64, int32, int64. x4 = paddle.fluid.data(shape=[4], dtype='uint8', name='x4') @@ -393,8 +413,9 @@ class TestConcatAPIWithLoDTensorArray(unittest.TestCase): with fluid.program_guard(self.program): input = fluid.layers.assign(self.x) tensor_array = fluid.layers.create_array(dtype='float32') - zero = fluid.layers.fill_constant( - shape=[1], value=0, dtype="int64") + zero = fluid.layers.fill_constant(shape=[1], + value=0, + dtype="int64") for i in range(self.iter_num): fluid.layers.array_write(input, zero + i, tensor_array) @@ -428,9 +449,8 @@ class TestConcatAPIWithLoDTensorArray(unittest.TestCase): res = exe.run(self.program, fetch_list=self.out_var) self.assertTrue( np.array_equal( - res[0], - np.concatenate( - [self.x] * self.iter_num, axis=self.axis))) + res[0], np.concatenate([self.x] * self.iter_num, + axis=self.axis))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_cond.py b/python/paddle/fluid/tests/unittests/test_cond.py index d9cb0ccf482..16804613051 100644 --- a/python/paddle/fluid/tests/unittests/test_cond.py +++ b/python/paddle/fluid/tests/unittests/test_cond.py @@ -30,6 +30,7 @@ np.random.seed(123) class TestCondInputOutput(unittest.TestCase): + def test_return_single_var(self): """ pseudocode: @@ -55,8 +56,8 @@ class TestCondInputOutput(unittest.TestCase): out = layers.cond(pred, true_func, false_func) # out is one tensor - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=[out.name]) self.assertTrue( @@ -73,14 +74,18 @@ class TestCondInputOutput(unittest.TestCase): """ def true_func(): - return layers.fill_constant( - shape=[1, 2], dtype='int32', value=1), layers.fill_constant( - shape=[2, 3], dtype='bool', value=True) + return layers.fill_constant(shape=[1, 2], dtype='int32', + value=1), layers.fill_constant( + shape=[2, 3], + dtype='bool', + value=True) def false_func(): - return layers.fill_constant( - shape=[3, 4], dtype='float32', value=3), layers.fill_constant( - shape=[4, 5], dtype='int64', value=2) + return layers.fill_constant(shape=[3, 4], dtype='float32', + value=3), layers.fill_constant( + shape=[4, 5], + dtype='int64', + value=2) main_program = Program() startup_program = Program() @@ -89,8 +94,8 @@ class TestCondInputOutput(unittest.TestCase): out = layers.cond(pred, true_func, false_func) # out is a tuple containing 2 tensors - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( @@ -125,8 +130,8 @@ class TestCondInputOutput(unittest.TestCase): pred = ((i % 2) == 0) a = layers.cond(pred, lambda: true_func(a, i), lambda: false_func(a, i)) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) for feed_i in range(5): expected_a = 7 * (feed_i + 1) if feed_i % 2 == 0 else 8 - feed_i @@ -134,8 +139,8 @@ class TestCondInputOutput(unittest.TestCase): feed={'i': np.full((1), feed_i, np.int32)}, fetch_list=[a]) self.assertTrue( - np.allclose( - np.asarray(ret), np.full((3, 2, 1), expected_a, np.int32))) + np.allclose(np.asarray(ret), + np.full((3, 2, 1), expected_a, np.int32))) def test_return_none(self): """ @@ -161,8 +166,8 @@ class TestCondInputOutput(unittest.TestCase): out1 = layers.cond(pred, true_func, false_func) out2 = layers.cond(pred, None, false_func) out3 = layers.cond(pred, true_func, None) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) for feed_i in range(5): # Test that output is None is runnable @@ -183,9 +188,11 @@ class TestCondInputOutput(unittest.TestCase): return layers.fill_constant(shape=[2, 7], dtype='int32', value=3) def func_return_two_tensors(): - return layers.fill_constant( - shape=[3, 1], dtype='int32', value=7), layers.fill_constant( - shape=[3, 1], dtype='int32', value=8) + return layers.fill_constant(shape=[3, 1], dtype='int32', + value=7), layers.fill_constant( + shape=[3, 1], + dtype='int32', + value=8) main_program = Program() startup_program = Program() @@ -223,17 +230,19 @@ class TestCondInputOutput(unittest.TestCase): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - a = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=1.23) + a = fluid.layers.fill_constant(shape=[1], + dtype='float32', + value=1.23) a.stop_gradient = False - b = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=1.25) + b = fluid.layers.fill_constant(shape=[1], + dtype='float32', + value=1.25) b.stop_gradient = False out = layers.cond(a - b < -1.0, lambda: a, lambda: b) append_backward(out) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=[out, b, a.grad_name, b.grad_name]) @@ -245,6 +254,7 @@ class TestCondInputOutput(unittest.TestCase): class TestCondNestedControlFlow(unittest.TestCase): + def test_cond_inside_cond(self): """ pseudocode: @@ -280,8 +290,8 @@ class TestCondNestedControlFlow(unittest.TestCase): mean = layers.mean(out) append_backward(mean) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) for feed_i in range(0, 10): expected_a = 2.0 * feed_i @@ -302,30 +312,34 @@ class TestCondNestedControlFlow(unittest.TestCase): startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - a = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=1.23) + a = fluid.layers.fill_constant(shape=[1], + dtype='float32', + value=1.23) a.stop_gradient = False - b = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=1.24) + b = fluid.layers.fill_constant(shape=[1], + dtype='float32', + value=1.24) b.stop_gradient = False out = fluid.layers.cond( - a < b, - lambda: fluid.layers.cond(a - b < -1.0, lambda: fluid.layers.elementwise_add(a, b), lambda: fluid.layers.elementwise_mul(a, b)), - lambda: fluid.layers.cond(a == b, lambda: fluid.layers.elementwise_sub(a, b), lambda: fluid.layers.elementwise_pow(a, b)) - ) + a < b, lambda: fluid.layers.cond( + a - b < -1.0, lambda: fluid.layers.elementwise_add(a, b), + lambda: fluid.layers.elementwise_mul(a, b)), lambda: + fluid.layers.cond(a == b, lambda: fluid.layers.elementwise_sub( + a, b), lambda: fluid.layers.elementwise_pow(a, b))) append_backward(out) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=[out, a.grad_name, b.grad_name]) - # Note: fill_constant has loss of precision, so we assertAlmostEqual. + # Note: fill_constant has loss of precision, so we assertAlmostEqual. self.assertAlmostEqual(ret[0][0], 1.5252) self.assertAlmostEqual(ret[1][0], 1.24) self.assertAlmostEqual(ret[2][0], 1.23) class TestCondBackward(unittest.TestCase): + def backward_value_helper(self, cond_func, use_cuda, use_parallel_exe): """ Helper function that compares calculated backward value is close to dy/dx @@ -348,25 +362,24 @@ class TestCondBackward(unittest.TestCase): num_devices = 1 if use_parallel_exe: os.environ['CPU_NUM'] = str(2) - exe = fluid.ParallelExecutor( - use_cuda=use_cuda, - main_program=main_program, - loss_name=loss.name) + exe = fluid.ParallelExecutor(use_cuda=use_cuda, + main_program=main_program, + loss_name=loss.name) num_devices = exe.device_count delta = 0.005 for feed_i in range(0, 10): feed_img = np.random.random(size=[1, 9]).astype(np.float32) - feed_label = np.random.randint( - low=0, high=10, size=[1, 1], dtype=np.int64) + feed_label = np.random.randint(low=0, + high=10, + size=[1, 1], + dtype=np.int64) if use_parallel_exe: img_grad, loss_value = exe.run( feed={ 'i': np.full((num_devices), feed_i, np.int32), - 'image': np.repeat( - feed_img, num_devices, axis=0), - 'label': np.repeat( - feed_label, num_devices, axis=0) + 'image': np.repeat(feed_img, num_devices, axis=0), + 'label': np.repeat(feed_label, num_devices, axis=0) }, fetch_list=[img.grad_name, loss.name]) else: @@ -385,15 +398,16 @@ class TestCondBackward(unittest.TestCase): feed_img_delta[0][j] = feed_img[0][j] + delta if use_parallel_exe: loss_delta = exe.run(feed={ - 'i': np.full((num_devices), feed_i, np.int32), - 'image': np.repeat( - feed_img_delta, num_devices, axis=0), - 'label': np.repeat( - feed_label, num_devices, axis=0) + 'i': + np.full((num_devices), feed_i, np.int32), + 'image': + np.repeat(feed_img_delta, num_devices, axis=0), + 'label': + np.repeat(feed_label, num_devices, axis=0) }, fetch_list=[loss.name]) - multi_device_grad = ( - loss_delta[0] - loss_value[0]) / delta / num_devices + multi_device_grad = (loss_delta[0] - + loss_value[0]) / delta / num_devices for d in range(num_devices): numerical_grad[d][j] = multi_device_grad[d] else: @@ -405,12 +419,12 @@ class TestCondBackward(unittest.TestCase): 'label': feed_label }, fetch_list=[loss.name]) - numerical_grad[0][j] = ( - loss_delta[0] - loss_value[0]) / delta + numerical_grad[0][j] = (loss_delta[0] - + loss_value[0]) / delta feed_img_delta[0][j] = feed_img[0][j] self.assertTrue( - np.isclose( - img_grad, numerical_grad, atol=0.05, rtol=0.05).all()) + np.isclose(img_grad, numerical_grad, atol=0.05, + rtol=0.05).all()) def add_optimizer_helper(self, cond_func, use_cuda, use_parallel_exe): """ @@ -431,23 +445,22 @@ class TestCondBackward(unittest.TestCase): exe.run(startup_program) if use_parallel_exe: os.environ['CPU_NUM'] = str(2) - exe = fluid.ParallelExecutor( - use_cuda=use_cuda, - main_program=main_program, - loss_name=loss.name) + exe = fluid.ParallelExecutor(use_cuda=use_cuda, + main_program=main_program, + loss_name=loss.name) num_devices = exe.device_count for feed_i in range(0, 10): feed_img = np.random.random(size=[16, 784]).astype(np.float32) - feed_label = np.random.randint( - low=0, high=10, size=[16, 1], dtype=np.int64) + feed_label = np.random.randint(low=0, + high=10, + size=[16, 1], + dtype=np.int64) if use_parallel_exe: exe.run(feed={ 'i': np.full((num_devices), feed_i, np.int32), - 'image': np.repeat( - feed_img, num_devices, axis=0), - 'label': np.repeat( - feed_label, num_devices, axis=0) + 'image': np.repeat(feed_img, num_devices, axis=0), + 'label': np.repeat(feed_label, num_devices, axis=0) }, fetch_list=[loss.name]) else: @@ -460,11 +473,13 @@ class TestCondBackward(unittest.TestCase): fetch_list=[loss]) def test_cond_backward(self): + def cond_func(i, img, label): predicate = ((i % 2) == 0) - return layers.cond(predicate, - lambda: simple_fc_net_with_inputs(img, label, class_num=10), - lambda: batchnorm_fc_with_inputs(img, label, class_num=10)) + return layers.cond( + predicate, + lambda: simple_fc_net_with_inputs(img, label, class_num=10), + lambda: batchnorm_fc_with_inputs(img, label, class_num=10)) for use_parallel_exe in [False, True]: if use_parallel_exe and os.name == "nt": @@ -473,17 +488,18 @@ class TestCondBackward(unittest.TestCase): ) continue - self.backward_value_helper(cond_func, - core.is_compiled_with_cuda(), + self.backward_value_helper(cond_func, core.is_compiled_with_cuda(), use_parallel_exe) - self.add_optimizer_helper(cond_func, - core.is_compiled_with_cuda(), + self.add_optimizer_helper(cond_func, core.is_compiled_with_cuda(), use_parallel_exe) def test_half_nested_cond_backward(self): + def branch(i, img, label): - return layers.cond((i % 2) == 0, lambda: simple_fc_net_with_inputs(img, label, class_num=10), - lambda: batchnorm_fc_with_inputs(img, label, class_num=10)) + return layers.cond( + (i % 2) == 0, + lambda: simple_fc_net_with_inputs(img, label, class_num=10), + lambda: batchnorm_fc_with_inputs(img, label, class_num=10)) def cond_func_simple_net_at_true(i, img, label): return layers.cond(i < 5, lambda: branch(i, img, label), @@ -514,13 +530,16 @@ class TestCondBackward(unittest.TestCase): use_parallel_exe) def test_nested_cond_backward(self): + def branch(i, img, label, mod_two): if mod_two: predicate = ((i % 2) == 0) else: predicate = ((i % 2) != 0) - return layers.cond(predicate, lambda: simple_fc_net_with_inputs(img, label, class_num=10), - lambda: batchnorm_fc_with_inputs(img, label, class_num=10)) + return layers.cond( + predicate, + lambda: simple_fc_net_with_inputs(img, label, class_num=10), + lambda: batchnorm_fc_with_inputs(img, label, class_num=10)) def cond_func(i, img, label): return layers.cond(i < 5, lambda: branch(i, img, label, True), @@ -532,15 +551,14 @@ class TestCondBackward(unittest.TestCase): "Skip use_parallel_exe=True in Windows because of flaky test when using PE under old Windows machine" ) continue - self.backward_value_helper(cond_func, - core.is_compiled_with_cuda(), + self.backward_value_helper(cond_func, core.is_compiled_with_cuda(), use_parallel_exe) - self.add_optimizer_helper(cond_func, - core.is_compiled_with_cuda(), + self.add_optimizer_helper(cond_func, core.is_compiled_with_cuda(), use_parallel_exe) class TestCondWithError(unittest.TestCase): + def test_input_type_error(self): main_program = framework.Program() startup_program = framework.Program() diff --git a/python/paddle/fluid/tests/unittests/test_conditional_block.py b/python/paddle/fluid/tests/unittests/test_conditional_block.py index 6a71d396b48..64980115d9e 100644 --- a/python/paddle/fluid/tests/unittests/test_conditional_block.py +++ b/python/paddle/fluid/tests/unittests/test_conditional_block.py @@ -25,6 +25,7 @@ from paddle.fluid.layers.control_flow import ConditionalBlock class ConditionalBlockTest(unittest.TestCase): + def test_forward(self): main_program = fluid.Program() startup_program = fluid.Program() @@ -55,6 +56,7 @@ class ConditionalBlockTest(unittest.TestCase): class TestConditionalBlockOpInferShape(unittest.TestCase): + def test_infer_shape(self): main_program = fluid.Program() startup_program = fluid.Program() @@ -64,19 +66,23 @@ class TestConditionalBlockOpInferShape(unittest.TestCase): main_program._rollback() step_scope = global_block.create_var( type=core.VarDesc.VarType.STEP_SCOPES) - cond_var = layers.fill_constant( - shape=[1], dtype='bool', value=False) + cond_var = layers.fill_constant(shape=[1], + dtype='bool', + value=False) - op = global_block.append_op( - type='conditional_block', - inputs={ - 'Cond': [cond_var], - 'Input': [], - }, - outputs={'Out': [], - 'Scope': [step_scope]}, - attrs={'sub_block': sub_block, - 'is_scalar_condition': True}) + op = global_block.append_op(type='conditional_block', + inputs={ + 'Cond': [cond_var], + 'Input': [], + }, + outputs={ + 'Out': [], + 'Scope': [step_scope] + }, + attrs={ + 'sub_block': sub_block, + 'is_scalar_condition': True + }) op.desc.infer_shape(global_block.desc) diff --git a/python/paddle/fluid/tests/unittests/test_conj_op.py b/python/paddle/fluid/tests/unittests/test_conj_op.py index fe9efc301fe..a3b3f243260 100644 --- a/python/paddle/fluid/tests/unittests/test_conj_op.py +++ b/python/paddle/fluid/tests/unittests/test_conj_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test import OpTest from paddle.fluid import Program, program_guard @@ -30,6 +31,7 @@ paddle.enable_static() class TestConjOp(OpTest): + def setUp(self): self.op_type = "conj" self.python_api = paddle.tensor.conj @@ -57,15 +59,15 @@ class TestConjOp(OpTest): self.check_output(check_eager=True) def test_check_grad_normal(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.grad_in], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.grad_in], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) class TestComplexConjOp(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -74,8 +76,9 @@ class TestComplexConjOp(unittest.TestCase): def test_conj_api(self): for dtype in self._dtypes: - input = rand([2, 20, 2, 3]).astype(dtype) + 1j * rand( - [2, 20, 2, 3]).astype(dtype) + input = rand([ + 2, 20, 2, 3 + ]).astype(dtype) + 1j * rand([2, 20, 2, 3]).astype(dtype) for place in self._places: with dg.guard(place): var_x = paddle.to_tensor(input) @@ -85,8 +88,9 @@ class TestComplexConjOp(unittest.TestCase): def test_conj_operator(self): for dtype in self._dtypes: - input = rand([2, 20, 2, 3]).astype(dtype) + 1j * rand( - [2, 20, 2, 3]).astype(dtype) + input = rand([ + 2, 20, 2, 3 + ]).astype(dtype) + 1j * rand([2, 20, 2, 3]).astype(dtype) for place in self._places: with dg.guard(place): var_x = paddle.to_tensor(input) @@ -95,9 +99,11 @@ class TestComplexConjOp(unittest.TestCase): self.assertTrue(np.array_equal(result, target)) def test_conj_static_mode(self): + def init_input_output(dtype): - input = rand([2, 20, 2, 3]).astype(dtype) + 1j * rand( - [2, 20, 2, 3]).astype(dtype) + input = rand([ + 2, 20, 2, 3 + ]).astype(dtype) + 1j * rand([2, 20, 2, 3]).astype(dtype) return {'x': input}, np.conj(input) for dtype in self._dtypes: @@ -105,8 +111,9 @@ class TestComplexConjOp(unittest.TestCase): for place in self._places: with static.program_guard(static.Program()): x_dtype = np.complex64 if dtype == "float32" else np.complex128 - x = static.data( - name="x", shape=[2, 20, 2, 3], dtype=x_dtype) + x = static.data(name="x", + shape=[2, 20, 2, 3], + dtype=x_dtype) out = paddle.conj(x) exe = static.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_const_value.py b/python/paddle/fluid/tests/unittests/test_const_value.py index 0b2431d7726..3ae68fb5042 100644 --- a/python/paddle/fluid/tests/unittests/test_const_value.py +++ b/python/paddle/fluid/tests/unittests/test_const_value.py @@ -19,6 +19,7 @@ import paddle.fluid.framework as framework class ConstantTest(unittest.TestCase): + def test_const_value(self): self.assertEqual(framework.GRAD_VAR_SUFFIX, "@GRAD") self.assertEqual(framework.TEMP_VAR_NAME, "@TEMP@") diff --git a/python/paddle/fluid/tests/unittests/test_context_manager.py b/python/paddle/fluid/tests/unittests/test_context_manager.py index bd5e1b2355c..93c1d7fa1a4 100644 --- a/python/paddle/fluid/tests/unittests/test_context_manager.py +++ b/python/paddle/fluid/tests/unittests/test_context_manager.py @@ -17,8 +17,9 @@ import unittest class TestContextManagerRaiseException(unittest.TestCase): - # When exception raised in 'with' context, we should safely exit the context + # When exception raised in 'with' context, we should safely exit the context def test_func1(self): + def foo(): with fluid.dygraph.guard(): print("raise error in context manager") @@ -27,7 +28,7 @@ class TestContextManagerRaiseException(unittest.TestCase): self.assertRaises(TypeError, foo) def test_func2(self): - # After test_func1 executed, if fluid.dygraph.guard() in test_func1 safely exited, + # After test_func1 executed, if fluid.dygraph.guard() in test_func1 safely exited, # fluid._non_static_mode() should be false. self.assertEqual(fluid._non_static_mode(), False) diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py index ca77177125f..a7ee1141358 100644 --- a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py @@ -22,6 +22,7 @@ import unittest class Conv1DTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', batch_size=4, @@ -67,8 +68,8 @@ class Conv1DTestCase(unittest.TestCase): filter_size = self.filter_size self.weight_shape = weight_shape = (self.num_filters, self.num_channels // self.groups) + tuple(filter_size) - self.weight = np.random.uniform( - -1, 1, size=weight_shape).astype(self.dtype) + self.weight = np.random.uniform(-1, 1, + size=weight_shape).astype(self.dtype) if not self.no_bias: self.bias = np.random.uniform( -1, 1, size=(self.num_filters, )).astype(self.dtype) @@ -84,19 +85,19 @@ class Conv1DTestCase(unittest.TestCase): -1) if not self.channel_last else ( -1, -1, self.num_channels) x_var = fluid.data("input", input_shape, dtype=self.dtype) - w_var = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) - b_var = fluid.data( - "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv1d( - x_var, - w_var, - b_var if not self.no_bias else None, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + w_var = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) + b_var = fluid.data("bias", (self.num_filters, ), + dtype=self.dtype) + y_var = F.conv1d(x_var, + w_var, + b_var if not self.no_bias else None, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias @@ -107,16 +108,15 @@ class Conv1DTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = paddle.to_tensor(self.input) - conv = nn.Conv1D( - self.num_channels, - self.num_filters, - self.filter_size, - padding=self.padding, - padding_mode=self.padding_mode, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + conv = nn.Conv1D(self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + padding_mode=self.padding_mode, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -140,6 +140,7 @@ class Conv1DTestCase(unittest.TestCase): class Conv1DErrorTestCase(Conv1DTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -148,6 +149,7 @@ class Conv1DErrorTestCase(Conv1DTestCase): class Conv1DTypeErrorTestCase(Conv1DTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -160,65 +162,58 @@ def add_cases(suite): suite.addTest(Conv1DTestCase(methodName='runTest', stride=[1], dilation=2)) suite.addTest(Conv1DTestCase(methodName='runTest', stride=2, dilation=(1))) suite.addTest( - Conv1DTestCase( - methodName='runTest', padding="same", no_bias=True)) + Conv1DTestCase(methodName='runTest', padding="same", no_bias=True)) suite.addTest( - Conv1DTestCase( - methodName='runTest', filter_size=3, padding='valid')) + Conv1DTestCase(methodName='runTest', filter_size=3, padding='valid')) suite.addTest( - Conv1DTestCase( - methodName='runTest', num_filters=512, padding='valid')) + Conv1DTestCase(methodName='runTest', num_filters=512, padding='valid')) suite.addTest( - Conv1DTestCase( - methodName='runTest', num_filters=512, padding=[1, 2])) + Conv1DTestCase(methodName='runTest', num_filters=512, padding=[1, 2])) suite.addTest( - Conv1DTestCase( - methodName='runTest', padding=2, data_format='NLC')) + Conv1DTestCase(methodName='runTest', padding=2, data_format='NLC')) suite.addTest(Conv1DTestCase(methodName='runTest', padding=[1])) suite.addTest(Conv1DTestCase(methodName='runTest', padding=[1, 2])) suite.addTest( - Conv1DTestCase( - methodName='runTest', padding=[1, 2], data_format='NLC')) + Conv1DTestCase(methodName='runTest', padding=[1, 2], data_format='NLC')) suite.addTest(Conv1DTestCase(methodName='runTest', padding=2)) suite.addTest(Conv1DTestCase(methodName='runTest')) suite.addTest( - Conv1DTestCase( - methodName='runTest', groups=2, padding="valid")) + Conv1DTestCase(methodName='runTest', groups=2, padding="valid")) suite.addTest( - Conv1DTestCase( - methodName='runTest', - num_filters=6, - num_channels=3, - groups=3, - padding="valid", - data_format='NLC')) + Conv1DTestCase(methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + padding="valid", + data_format='NLC')) def add_error_cases(suite): suite.addTest( - Conv1DTypeErrorTestCase( - methodName='runTest', padding_mode="reflect", padding="valid")) - suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', data_format="VALID")) + Conv1DTypeErrorTestCase(methodName='runTest', + padding_mode="reflect", + padding="valid")) + suite.addTest(Conv1DErrorTestCase(methodName='runTest', + data_format="VALID")) suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', padding_mode="VALID")) + Conv1DErrorTestCase(methodName='runTest', padding_mode="VALID")) suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', num_channels=5, groups=2)) + Conv1DErrorTestCase(methodName='runTest', num_channels=5, groups=2)) suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', num_filters=8, num_channels=15, groups=3)) + Conv1DErrorTestCase(methodName='runTest', + num_filters=8, + num_channels=15, + groups=3)) suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', padding=[1, 2, 3, 4, 5])) + Conv1DErrorTestCase(methodName='runTest', padding=[1, 2, 3, 4, 5])) suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', padding=[1, 2, 3, 4, 5], data_format='NLC')) + Conv1DErrorTestCase(methodName='runTest', + padding=[1, 2, 3, 4, 5], + data_format='NLC')) suite.addTest( - Conv1DErrorTestCase( - methodName='runTest', num_filters=512, padding=[1, 2, 3, 4, 5])) + Conv1DErrorTestCase(methodName='runTest', + num_filters=512, + padding=[1, 2, 3, 4, 5])) suite.addTest(Conv1DErrorTestCase(methodName='runTest', dilation=-10)) diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py index 40b7074ed39..493cda0c924 100644 --- a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py @@ -22,6 +22,7 @@ import unittest class Conv1DTransposeTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', batch_size=4, @@ -62,7 +63,8 @@ class Conv1DTransposeTestCase(unittest.TestCase): self.spartial_shape) if not self.channel_last else ( self.batch_size, self.spartial_shape, - self.in_channels, ) + self.in_channels, + ) self.input = np.random.randn(*input_shape).astype(self.dtype) if isinstance(self.filter_size, int): @@ -71,8 +73,8 @@ class Conv1DTransposeTestCase(unittest.TestCase): filter_size = self.filter_size self.weight_shape = weight_shape = (self.in_channels, self.out_channels // self.groups) + tuple(filter_size) - self.weight = np.random.uniform( - -1, 1, size=weight_shape).astype(self.dtype) + self.weight = np.random.uniform(-1, 1, + size=weight_shape).astype(self.dtype) if not self.no_bias: self.bias = np.random.uniform( -1, 1, size=(self.out_channels, )).astype(self.dtype) @@ -88,21 +90,21 @@ class Conv1DTransposeTestCase(unittest.TestCase): -1) if not self.channel_last else ( -1, -1, self.in_channels) x_var = fluid.data("input", input_shape, dtype=self.dtype) - w_var = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) - b_var = fluid.data( - "bias", (self.out_channels, ), dtype=self.dtype) - y_var = F.conv1d_transpose( - x_var, - w_var, - None if self.no_bias else b_var, - output_size=self.output_size, - padding=self.padding, - output_padding=self.output_padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + w_var = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) + b_var = fluid.data("bias", (self.out_channels, ), + dtype=self.dtype) + y_var = F.conv1d_transpose(x_var, + w_var, + None if self.no_bias else b_var, + output_size=self.output_size, + padding=self.padding, + output_padding=self.output_padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias @@ -113,16 +115,15 @@ class Conv1DTransposeTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = paddle.to_tensor(self.input) - conv = nn.Conv1DTranspose( - self.in_channels, - self.out_channels, - self.filter_size, - padding=self.padding, - output_padding=self.output_padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + conv = nn.Conv1DTranspose(self.in_channels, + self.out_channels, + self.filter_size, + padding=self.padding, + output_padding=self.output_padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -146,6 +147,7 @@ class Conv1DTransposeTestCase(unittest.TestCase): class Conv1DTransposeErrorTestCase(Conv1DTransposeTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -156,67 +158,64 @@ class Conv1DTransposeErrorTestCase(Conv1DTransposeTestCase): def add_cases(suite): suite.addTest(Conv1DTransposeTestCase(methodName='runTest')) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', stride=[2], no_bias=True, dilation=2)) - suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', - filter_size=(3), - output_size=[36], - stride=[2], - dilation=2)) - suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', stride=2, dilation=(2))) + Conv1DTransposeTestCase(methodName='runTest', + stride=[2], + no_bias=True, + dilation=2)) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', padding="valid")) + Conv1DTransposeTestCase(methodName='runTest', + filter_size=(3), + output_size=[36], + stride=[2], + dilation=2)) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', padding='valid')) + Conv1DTransposeTestCase(methodName='runTest', stride=2, dilation=(2))) + suite.addTest(Conv1DTransposeTestCase(methodName='runTest', + padding="valid")) + suite.addTest(Conv1DTransposeTestCase(methodName='runTest', + padding='valid')) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', filter_size=1, padding=3)) + Conv1DTransposeTestCase(methodName='runTest', filter_size=1, padding=3)) suite.addTest(Conv1DTransposeTestCase(methodName='runTest', padding=[2])) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', data_format="NLC")) + Conv1DTransposeTestCase(methodName='runTest', data_format="NLC")) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', groups=2, padding="valid")) + Conv1DTransposeTestCase(methodName='runTest', groups=2, + padding="valid")) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', - out_channels=6, - in_channels=3, - groups=3, - padding="valid")) + Conv1DTransposeTestCase(methodName='runTest', + out_channels=6, + in_channels=3, + groups=3, + padding="valid")) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', - data_format="NLC", - spartial_shape=16, - output_size=18)) + Conv1DTransposeTestCase(methodName='runTest', + data_format="NLC", + spartial_shape=16, + output_size=18)) suite.addTest( - Conv1DTransposeTestCase( - methodName='runTest', data_format="NLC", stride=3, - output_padding=2)) + Conv1DTransposeTestCase(methodName='runTest', + data_format="NLC", + stride=3, + output_padding=2)) suite.addTest(Conv1DTransposeTestCase(methodName='runTest', padding=[1, 2])) def add_error_cases(suite): suite.addTest( - Conv1DTransposeErrorTestCase( - methodName='runTest', data_format="not_valid")) + Conv1DTransposeErrorTestCase(methodName='runTest', + data_format="not_valid")) suite.addTest( - Conv1DTransposeErrorTestCase( - methodName='runTest', in_channels=5, groups=2)) + Conv1DTransposeErrorTestCase(methodName='runTest', + in_channels=5, + groups=2)) suite.addTest( - Conv1DTransposeErrorTestCase( - methodName='runTest', stride=2, output_padding=3)) + Conv1DTransposeErrorTestCase(methodName='runTest', + stride=2, + output_padding=3)) suite.addTest( - Conv1DTransposeErrorTestCase( - methodName='runTest', output_size="not_valid")) + Conv1DTransposeErrorTestCase(methodName='runTest', + output_size="not_valid")) def load_tests(loader, standard_tests, pattern): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_api.py b/python/paddle/fluid/tests/unittests/test_conv2d_api.py index cb7fd8fe1bc..5ea256efbb4 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_api.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_api.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle + paddle.enable_static() import paddle.fluid.core as core import paddle.fluid as fluid @@ -26,272 +27,251 @@ from paddle.fluid import Program, program_guard class TestConv2DAPI(unittest.TestCase): + def test_api(self): - input_NHWC = fluid.layers.data( - name="input_NHWC", - shape=[2, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCHW = fluid.layers.data( - name="input_NCHW", - shape=[2, 3, 5, 5], - append_batch_size=False, - dtype="float32") - - fluid.layers.conv2d( - input=input_NHWC, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=1, - data_format="NCHW") - - fluid.layers.conv2d( - input=input_NCHW, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 2, 1, 0], - dilation=[1, 1], - groups=1, - data_format="NCHW") - - fluid.layers.conv2d( - input=input_NCHW, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[[0, 0], [0, 0], [1, 1], [1, 1]], - dilation=[1, 1], - groups=1, - data_format="NCHW") - - fluid.layers.conv2d( - input=input_NHWC, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[[0, 0], [1, 1], [1, 1], [0, 0]], - dilation=[1, 1], - groups=1, - data_format="NHWC") - - fluid.layers.conv2d( - input=input_NCHW, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding="SAME", - dilation=[1, 1], - groups=1, - data_format="NCHW") - - fluid.layers.conv2d( - input=input_NCHW, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding="VALID", - dilation=[1, 1], - groups=1, - data_format="NCHW") + input_NHWC = fluid.layers.data(name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCHW = fluid.layers.data(name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") + + fluid.layers.conv2d(input=input_NHWC, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=1, + data_format="NCHW") + + fluid.layers.conv2d(input=input_NCHW, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 2, 1, 0], + dilation=[1, 1], + groups=1, + data_format="NCHW") + + fluid.layers.conv2d(input=input_NCHW, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[[0, 0], [0, 0], [1, 1], [1, 1]], + dilation=[1, 1], + groups=1, + data_format="NCHW") + + fluid.layers.conv2d(input=input_NHWC, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[[0, 0], [1, 1], [1, 1], [0, 0]], + dilation=[1, 1], + groups=1, + data_format="NHWC") + + fluid.layers.conv2d(input=input_NCHW, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding="SAME", + dilation=[1, 1], + groups=1, + data_format="NCHW") + + fluid.layers.conv2d(input=input_NCHW, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding="VALID", + dilation=[1, 1], + groups=1, + data_format="NCHW") def test_depthwise_conv2d(self): x_var = paddle.uniform((2, 8, 8, 4), dtype='float32', min=-1., max=1.) - conv = paddle.nn.Conv2D( - in_channels=4, - out_channels=4, - kernel_size=(3, 3), - groups=4, - data_format='NHWC') + conv = paddle.nn.Conv2D(in_channels=4, + out_channels=4, + kernel_size=(3, 3), + groups=4, + data_format='NHWC') y_var = conv(x_var) class TestConv2DAPI_Error(unittest.TestCase): + def test_api(self): - input = fluid.layers.data( - name="input", - shape=[2, 5, 5, 5], - append_batch_size=False, - dtype="float32") + input = fluid.layers.data(name="input", + shape=[2, 5, 5, 5], + append_batch_size=False, + dtype="float32") # ValueError: cudnn def run_1(): - fluid.layers.conv2d( - input=input, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=1, - use_cudnn=[0], - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=1, + use_cudnn=[0], + data_format="NCHW") self.assertRaises(ValueError, run_1) # ValueError: data_format def run_2(): - fluid.layers.conv2d( - input=input, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=1, - use_cudnn=False, - data_format="NCHWC") + fluid.layers.conv2d(input=input, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=1, + use_cudnn=False, + data_format="NCHWC") self.assertRaises(ValueError, run_2) # ValueError: padding def run_3(): - fluid.layers.conv2d( - input=input, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding="SAMEE", - dilation=[1, 1], - groups=1, - use_cudnn=False, - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding="SAMEE", + dilation=[1, 1], + groups=1, + use_cudnn=False, + data_format="NCHW") self.assertRaises(ValueError, run_3) def run_4(): - fluid.layers.conv2d( - input=input, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[[0, 1], [0, 1], [0, 1], [0, 1]], - dilation=[1, 1], - groups=1, - use_cudnn=False, - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[[0, 1], [0, 1], [0, 1], [0, 1]], + dilation=[1, 1], + groups=1, + use_cudnn=False, + data_format="NCHW") self.assertRaises(ValueError, run_4) def run_5(): - fluid.layers.conv2d( - input=input, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[[0, 1], [0, 1], [0, 1], [0, 1]], - dilation=[1, 1], - groups=1, - use_cudnn=False, - data_format="NHWC") + fluid.layers.conv2d(input=input, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[[0, 1], [0, 1], [0, 1], [0, 1]], + dilation=[1, 1], + groups=1, + use_cudnn=False, + data_format="NHWC") self.assertRaises(ValueError, run_5) # ValueError: channel dimmention - x = fluid.layers.data( - name="x", - shape=[2, 5, 5, -1], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name="x", + shape=[2, 5, 5, -1], + append_batch_size=False, + dtype="float32") def run_6(): - fluid.layers.conv2d( - input=x, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=1, - use_cudnn=False, - data_format="NHWC") + fluid.layers.conv2d(input=x, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=1, + use_cudnn=False, + data_format="NHWC") self.assertRaises(ValueError, run_6) # ValueError: groups def run_7(): - fluid.layers.conv2d( - input=input, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=3, - use_cudnn=False, - data_format="NHWC") + fluid.layers.conv2d(input=input, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=3, + use_cudnn=False, + data_format="NHWC") self.assertRaises(ValueError, run_7) # ValueError: filter num def run_8(): - fluid.layers.conv2d( - input=input, - num_filters=0, - filter_size=0, - stride=0, - padding=0, - dilation=0, - groups=1, - use_cudnn=False, - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=0, + filter_size=0, + stride=0, + padding=0, + dilation=0, + groups=1, + use_cudnn=False, + data_format="NCHW") self.assertRaises(ValueError, run_8) # ValueError: groups def run_9(): - fluid.layers.conv2d( - input=input, - num_filters=0, - filter_size=0, - stride=0, - padding=0, - dilation=0, - groups=0, - use_cudnn=False, - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=0, + filter_size=0, + stride=0, + padding=0, + dilation=0, + groups=0, + use_cudnn=False, + data_format="NCHW") self.assertRaises(ValueError, run_9) - # ValueError: stride + # ValueError: stride def run_10(): - fluid.layers.conv2d( - input=input, - num_filters=1, - filter_size=1, - stride=0, - padding=0, - dilation=0, - groups=1, - use_cudnn=False, - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=1, + filter_size=1, + stride=0, + padding=0, + dilation=0, + groups=1, + use_cudnn=False, + data_format="NCHW") self.assertRaises(ValueError, run_10) def test_api_with_error_input(self): - input = fluid.layers.data( - name="error_input", - shape=[1], - append_batch_size=False, - dtype="float32") + input = fluid.layers.data(name="error_input", + shape=[1], + append_batch_size=False, + dtype="float32") # ValueError: cudnn def run_1(): - fluid.layers.conv2d( - input=input, - num_filters=0, - filter_size=0, - stride=0, - padding=0, - dilation=0, - groups=0, - use_cudnn=False, - data_format="NCHW") + fluid.layers.conv2d(input=input, + num_filters=0, + filter_size=0, + stride=0, + padding=0, + dilation=0, + groups=0, + use_cudnn=False, + data_format="NCHW") self.assertRaises(ValueError, run_1) @@ -301,22 +281,21 @@ class TestConv2DAPI_Error(unittest.TestCase): not (core.is_compiled_with_cuda() or core.is_compiled_with_rocm()), "core is not compiled with CUDA or ROCM") class TestConv2DEnviron(unittest.TestCase): + def run1(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - inputs = fluid.layers.data( - shape=[2, 3, 5, 5], - append_batch_size=False, - name="inputs", - dtype="float32") - result = fluid.layers.conv2d( - input=inputs, - num_filters=4, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=1, - data_format="NCHW") + inputs = fluid.layers.data(shape=[2, 3, 5, 5], + append_batch_size=False, + name="inputs", + dtype="float32") + result = fluid.layers.conv2d(input=inputs, + num_filters=4, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=1, + data_format="NCHW") exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fetches = exe.run(fluid.default_main_program(), @@ -326,11 +305,10 @@ class TestConv2DEnviron(unittest.TestCase): def run2(self, place): with fluid.dygraph.guard(place): inputs = fluid.dygraph.to_variable(self.input_np) - conv = paddle.nn.Conv2D( - in_channels=3, - out_channels=4, - kernel_size=(3, 3), - data_format="NCHW") + conv = paddle.nn.Conv2D(in_channels=3, + out_channels=4, + kernel_size=(3, 3), + data_format="NCHW") result = conv(inputs) def run3(self, place): @@ -339,7 +317,8 @@ class TestConv2DEnviron(unittest.TestCase): conv = paddle.fluid.dygraph.nn.Conv2D( num_channels=3, num_filters=4, - filter_size=(3, 3), ) + filter_size=(3, 3), + ) result = conv(inputs) def run_all(self, place): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py index 8ef2660cac2..2a3d509e2bf 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py @@ -24,7 +24,9 @@ from test_conv2d_op import conv2d_forward_naive def create_test_padding_SAME_class(parent): + class TestPaddingSAMECase(parent): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" @@ -35,7 +37,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" @@ -46,6 +50,7 @@ def create_test_padding_VALID_class(parent): class TestConv2DFusionOp(OpTest): + def setUp(self): self.op_type = "conv2d_fusion" self.exhaustive_search = False @@ -75,9 +80,11 @@ class TestConv2DFusionOp(OpTest): filter = np.random.random(self.filter_size).astype(self.dtype) bias = np.random.random(self.filter_size[0]).astype(self.dtype) - self.output, _, _, _, _ = conv2d_forward_naive( - input, filter, self.groups, conv2d_param, self.padding_algorithm, - self.data_format) + self.output, _, _, _, _ = conv2d_forward_naive(input, filter, + self.groups, + conv2d_param, + self.padding_algorithm, + self.data_format) self.output = self.output.astype(self.dtype) @@ -158,27 +165,32 @@ class TestConv2DFusionOp(OpTest): class TestWithoutResidual(TestConv2DFusionOp): + def init_residual(self): self.add_residual_data = False class TestIdentityActivation(TestConv2DFusionOp): + def init_activation(self): self.activation = 'identity' class TestIdentityActivation1(TestConv2DFusionOp): + def init_activation(self): self.activation = 'identity' self.add_residual_data = False class TestWithGroup(TestConv2DFusionOp): + def init_group(self): self.groups = 3 class TestWithDilation(TestConv2DFusionOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -195,11 +207,13 @@ class TestWithDilation(TestConv2DFusionOp): class TestCUDNNExhaustiveSearch(TestConv2DFusionOp): + def set_search_method(self): self.exhaustive_search = True class TestMultipleOutputs(TestConv2DFusionOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -216,12 +230,14 @@ class TestMultipleOutputs(TestConv2DFusionOp): class TestAsyPadding(TestConv2DFusionOp): + def init_paddings(self): self.pad = [0, 0, 1, 2] self.padding_algorithm = "EXPLICIT" class TestWithPad_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 10, 10] # NCHW @@ -235,6 +251,7 @@ class TestWithPad_AsyPadding(TestConv2DFusionOp): class TestWithStride_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 3, 6, 6] # NCHW @@ -248,6 +265,7 @@ class TestWithStride_AsyPadding(TestConv2DFusionOp): class TestWith1x1_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -264,11 +282,13 @@ class TestWith1x1_AsyPadding(TestConv2DFusionOp): class TestWithGroup_AsyPadding(TestConv2DFusionOp): + def init_group(self): self.groups = 3 class TestWithDepthWise3x3_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [1, 1] self.input_size = [3, 4, 10, 10] # NCHW @@ -288,6 +308,7 @@ class TestWithDepthWise3x3_AsyPadding(TestConv2DFusionOp): class TestWithDepthWise5x5_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 4, 10, 10] # NCHW @@ -304,6 +325,7 @@ class TestWithDepthWise5x5_AsyPadding(TestConv2DFusionOp): class TestWithDepthWise7x7_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 8, 10, 10] # NCHW @@ -320,6 +342,7 @@ class TestWithDepthWise7x7_AsyPadding(TestConv2DFusionOp): class TestWithDilation_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 10, 10] # NCHW @@ -339,6 +362,7 @@ class TestWithDilation_AsyPadding(TestConv2DFusionOp): class TestWithInput1x1Filter1x1_AsyPadding(TestConv2DFusionOp): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 1, 1] # NCHW diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py index 508bd7b1e64..b8c6f1dfa2f 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py @@ -27,6 +27,7 @@ def _reverse_repeat_list(t, n): class Conv2DTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', batch_size=4, @@ -51,8 +52,8 @@ class Conv2DTestCase(unittest.TestCase): self.padding = padding if padding_mode in {'reflect', 'replicate', 'circular'}: - _paired_padding = fluid.layers.utils.convert_to_list(padding, 2, - 'padding') + _paired_padding = fluid.layers.utils.convert_to_list( + padding, 2, 'padding') self._reversed_padding_repeated_twice = _reverse_repeat_list( _paired_padding, 2) self.padding_mode = padding_mode @@ -69,8 +70,8 @@ class Conv2DTestCase(unittest.TestCase): input_shape = (self.batch_size, ) + self.spartial_shape + ( self.num_channels, ) else: - input_shape = (self.batch_size, self.num_channels - ) + self.spartial_shape + input_shape = (self.batch_size, + self.num_channels) + self.spartial_shape self.input = np.random.randn(*input_shape).astype(self.dtype) if isinstance(self.filter_size, int): @@ -79,8 +80,8 @@ class Conv2DTestCase(unittest.TestCase): filter_size = self.filter_size self.weight_shape = weight_shape = (self.num_filters, self.num_channels // self.groups) + tuple(filter_size) - self.weight = np.random.uniform( - -1, 1, size=weight_shape).astype(self.dtype) + self.weight = np.random.uniform(-1, 1, + size=weight_shape).astype(self.dtype) if not self.no_bias: self.bias = np.random.uniform( -1, 1, size=(self.num_filters, )).astype(self.dtype) @@ -109,17 +110,16 @@ class Conv2DTestCase(unittest.TestCase): else: padding = self.padding - y_var = fluid.layers.conv2d( - x_var, - self.num_filters, - self.filter_size, - padding=padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - param_attr=weight_attr, - bias_attr=bias_attr, - data_format=self.data_format) + y_var = fluid.layers.conv2d(x_var, + self.num_filters, + self.filter_size, + padding=padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + param_attr=weight_attr, + bias_attr=bias_attr, + data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) @@ -135,10 +135,11 @@ class Conv2DTestCase(unittest.TestCase): input_shape = (-1, -1, -1,self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) - w_var = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) - b_var = fluid.data( - "bias", (self.num_filters, ), dtype=self.dtype) + w_var = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) + b_var = fluid.data("bias", (self.num_filters, ), + dtype=self.dtype) if self.padding_mode != 'zeros': x_var = F.pad(x_var, @@ -149,15 +150,14 @@ class Conv2DTestCase(unittest.TestCase): else: padding = self.padding - y_var = F.conv2d( - x_var, - w_var, - b_var if not self.no_bias else None, - padding=padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y_var = F.conv2d(x_var, + w_var, + b_var if not self.no_bias else None, + padding=padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias @@ -169,16 +169,15 @@ class Conv2DTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = paddle.to_tensor(self.input) x_var.stop_gradient = False - conv = nn.Conv2D( - self.num_channels, - self.num_filters, - self.filter_size, - padding=self.padding, - padding_mode=self.padding_mode, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + conv = nn.Conv2D(self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + padding_mode=self.padding_mode, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -211,6 +210,7 @@ class Conv2DTestCase(unittest.TestCase): class Conv2DErrorTestCase(Conv2DTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -221,68 +221,63 @@ class Conv2DErrorTestCase(Conv2DTestCase): def add_cases(suite): suite.addTest(Conv2DTestCase(methodName='runTest')) suite.addTest( - Conv2DTestCase( - methodName='runTest', stride=[1, 2], dilation=2)) + Conv2DTestCase(methodName='runTest', stride=[1, 2], dilation=2)) suite.addTest( - Conv2DTestCase( - methodName='runTest', stride=2, dilation=(2, 1))) + Conv2DTestCase(methodName='runTest', stride=2, dilation=(2, 1))) suite.addTest( - Conv2DTestCase( - methodName='runTest', padding="same", no_bias=True)) + Conv2DTestCase(methodName='runTest', padding="same", no_bias=True)) suite.addTest( - Conv2DTestCase( - methodName='runTest', filter_size=(3, 3), padding='valid')) + Conv2DTestCase(methodName='runTest', + filter_size=(3, 3), + padding='valid')) suite.addTest(Conv2DTestCase(methodName='runTest', padding=(2, 3))) suite.addTest(Conv2DTestCase(methodName='runTest', padding=[1, 2, 2, 1])) suite.addTest( - Conv2DTestCase( - methodName='runTest', padding=[[0, 0], [0, 0], [1, 2], [2, 1]])) + Conv2DTestCase(methodName='runTest', + padding=[[0, 0], [0, 0], [1, 2], [2, 1]])) suite.addTest(Conv2DTestCase(methodName='runTest', data_format="NHWC")) suite.addTest( - Conv2DTestCase( - methodName='runTest', - data_format="NHWC", - padding=[[0, 0], [1, 1], [2, 2], [0, 0]])) + Conv2DTestCase(methodName='runTest', + data_format="NHWC", + padding=[[0, 0], [1, 1], [2, 2], [0, 0]])) suite.addTest( - Conv2DTestCase( - methodName='runTest', groups=2, padding="valid")) + Conv2DTestCase(methodName='runTest', groups=2, padding="valid")) suite.addTest( - Conv2DTestCase( - methodName='runTest', - num_filters=6, - num_channels=3, - groups=3, - padding="valid")) + Conv2DTestCase(methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + padding="valid")) suite.addTest( - Conv2DTestCase( - methodName='runTest', - filter_size=(3, 3), - padding=1, - padding_mode='reflect')) + Conv2DTestCase(methodName='runTest', + filter_size=(3, 3), + padding=1, + padding_mode='reflect')) suite.addTest( - Conv2DTestCase( - methodName='runTest', - filter_size=(3, 3), - padding=1, - padding_mode='replicate')) + Conv2DTestCase(methodName='runTest', + filter_size=(3, 3), + padding=1, + padding_mode='replicate')) suite.addTest( - Conv2DTestCase( - methodName='runTest', - filter_size=(3, 3), - padding=1, - padding_mode='circular')) + Conv2DTestCase(methodName='runTest', + filter_size=(3, 3), + padding=1, + padding_mode='circular')) def add_error_cases(suite): suite.addTest( - Conv2DErrorTestCase( - methodName='runTest', num_channels=5, groups=2)) + Conv2DErrorTestCase(methodName='runTest', num_channels=5, groups=2)) suite.addTest( - Conv2DErrorTestCase( - methodName='runTest', num_channels=5, groups=2, stride=0)) + Conv2DErrorTestCase(methodName='runTest', + num_channels=5, + groups=2, + stride=0)) suite.addTest( - Conv2DErrorTestCase( - methodName='runTest', num_channels=5, groups=2, padding=[-1, -1])) + Conv2DErrorTestCase(methodName='runTest', + num_channels=5, + groups=2, + padding=[-1, -1])) def load_tests(loader, standard_tests, pattern): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_op.py index fdb93e1f1af..0d38a1571e0 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py @@ -21,8 +21,9 @@ import paddle import paddle import paddle.fluid.core as core import paddle.fluid as fluid -from paddle.fluid.tests.unittests.op_test import ( - OpTest, convert_float_to_uint16, get_numeric_gradient) +from paddle.fluid.tests.unittests.op_test import (OpTest, + convert_float_to_uint16, + get_numeric_gradient) from paddle.fluid.tests.unittests.testsuite import create_op from paddle.fluid import Program, program_guard @@ -64,8 +65,8 @@ def conv2d_forward_naive(input, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -94,14 +95,14 @@ def conv2d_forward_naive(input, d_bolck_h = (dilation[0] * (f_h - 1) + 1) d_bolck_w = (dilation[1] * (f_w - 1) + 1) - input_pad = np.pad(input, ((0, 0), (0, 0), (pad_h_0, pad_h_1), - (pad_w_0, pad_w_1)), + input_pad = np.pad(input, + ((0, 0), (0, 0), (pad_h_0, pad_h_1), (pad_w_0, pad_w_1)), mode='constant', constant_values=0) filter_dilation = np.zeros((f_n, f_c, d_bolck_h, d_bolck_w)) - filter_dilation[:, :, 0:d_bolck_h:dilation[0], 0:d_bolck_w:dilation[ - 1]] = filter + filter_dilation[:, :, 0:d_bolck_h:dilation[0], + 0:d_bolck_w:dilation[1]] = filter for i in range(out_h): for j in range(out_w): @@ -126,9 +127,11 @@ def conv2d_forward_naive(input, def create_test_cudnn_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNCase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -140,9 +143,11 @@ def create_test_cudnn_class(parent): def create_test_cudnn_fp16_class(parent, grad_check=True): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestConv2DCUDNNFp16(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -156,14 +161,16 @@ def create_test_cudnn_fp16_class(parent, grad_check=True): def test_check_grad_no_filter(self): place = core.CUDAPlace(0) if core.is_float16_supported(place) and grad_check: - self.check_grad_with_place( - place, ['Input'], 'Output', no_grad_set=set(['Filter'])) + self.check_grad_with_place(place, ['Input'], + 'Output', + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): place = core.CUDAPlace(0) if core.is_float16_supported(place) and grad_check: - self.check_grad_with_place( - place, ['Filter'], 'Output', no_grad_set=set(['Input'])) + self.check_grad_with_place(place, ['Filter'], + 'Output', + no_grad_set=set(['Input'])) cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16") TestConv2DCUDNNFp16.__name__ = cls_name @@ -171,11 +178,13 @@ def create_test_cudnn_fp16_class(parent, grad_check=True): def create_test_cudnn_bf16_class(parent): + @unittest.skipIf( - not core.is_compiled_with_cuda() or - not core.is_bfloat16_supported(core.CUDAPlace(0)), + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA and do not support bfloat16") class TestConv2DCUDNNBF16(parent): + def get_numeric_grad(self, place, check_name): scope = core.Scope() self._check_grad_helper() @@ -196,20 +205,18 @@ def create_test_cudnn_bf16_class(parent): def test_check_grad_no_filter(self): place = core.CUDAPlace(0) numeric_grads = self.get_numeric_grad(place, 'Input') - self.check_grad_with_place( - place, ['Input'], - 'Output', - no_grad_set=set(['Filter']), - user_defined_grads=[numeric_grads]) + self.check_grad_with_place(place, ['Input'], + 'Output', + no_grad_set=set(['Filter']), + user_defined_grads=[numeric_grads]) def test_check_grad_no_input(self): place = core.CUDAPlace(0) numeric_grads = self.get_numeric_grad(place, 'Filter') - self.check_grad_with_place( - place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - user_defined_grads=[numeric_grads]) + self.check_grad_with_place(place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + user_defined_grads=[numeric_grads]) cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16") TestConv2DCUDNNBF16.__name__ = cls_name @@ -217,7 +224,9 @@ def create_test_cudnn_bf16_class(parent): def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NHWC" @@ -231,9 +240,11 @@ def create_test_channel_last_class(parent): def create_test_cudnn_channel_last_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCudnnChannelLastCase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -252,9 +263,11 @@ def create_test_cudnn_channel_last_class(parent): def create_test_cudnn_channel_last_fp16_class(parent, grad_check=True): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCudnnChannelLastFp16(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -268,14 +281,16 @@ def create_test_cudnn_channel_last_fp16_class(parent, grad_check=True): def test_check_grad_no_filter(self): place = core.CUDAPlace(0) if core.is_float16_supported(place) and grad_check: - self.check_grad_with_place( - place, ['Input'], 'Output', no_grad_set=set(['Filter'])) + self.check_grad_with_place(place, ['Input'], + 'Output', + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): place = core.CUDAPlace(0) if core.is_float16_supported(place) and grad_check: - self.check_grad_with_place( - place, ['Filter'], 'Output', no_grad_set=set(['Input'])) + self.check_grad_with_place(place, ['Filter'], + 'Output', + no_grad_set=set(['Input'])) def init_data_format(self): self.data_format = "NHWC" @@ -290,7 +305,9 @@ def create_test_cudnn_channel_last_fp16_class(parent, grad_check=True): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" @@ -301,7 +318,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" @@ -312,9 +331,11 @@ def create_test_padding_VALID_class(parent): def create_test_cudnn_padding_SAME_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingSMAECase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -330,9 +351,11 @@ def create_test_cudnn_padding_SAME_class(parent): def create_test_cudnn_padding_VALID_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingVALIDCase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -348,6 +371,7 @@ def create_test_cudnn_padding_VALID_class(parent): class TestConv2DOp(OpTest): + def setUp(self): self.op_type = "conv2d" self.use_cudnn = False @@ -422,51 +446,49 @@ class TestConv2DOp(OpTest): self.outputs = {'Output': output} def has_cuda(self): - return core.is_compiled_with_cuda() and (self.use_cudnn or - self.use_cuda) + return core.is_compiled_with_cuda() and (self.use_cudnn + or self.use_cuda) def test_check_output(self): place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output_with_place( - place, atol=1e-5, check_dygraph=(self.use_mkldnn == False)) + self.check_output_with_place(place, + atol=1e-5, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad(self): - if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad_with_place( - place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.02, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.02, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_filter(self): - if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad_with_place( - place, ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter']), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter']), + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_input(self): - if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if self.dtype == np.float16 or (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad_with_place( - place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + check_dygraph=(self.use_mkldnn == False)) def init_test_case(self): self.pad = [0, 0] @@ -490,6 +512,7 @@ class TestConv2DOp(OpTest): class TestWithPad(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -500,6 +523,7 @@ class TestWithPad(TestConv2DOp): class TestWithStride(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -510,6 +534,7 @@ class TestWithStride(TestConv2DOp): class TestWithGroup(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -521,6 +546,7 @@ class TestWithGroup(TestConv2DOp): class TestWith1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -534,6 +560,7 @@ class TestWith1x1(TestConv2DOp): class TestWithDepthWise3x3(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -550,6 +577,7 @@ class TestWithDepthWise3x3(TestConv2DOp): class TestWithDepthWise5x5(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -563,6 +591,7 @@ class TestWithDepthWise5x5(TestConv2DOp): class TestWithDepthWise7x7(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -576,6 +605,7 @@ class TestWithDepthWise7x7(TestConv2DOp): class TestWithDilation(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -592,6 +622,7 @@ class TestWithDilation(TestConv2DOp): class TestWithInput1x1Filter1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -633,6 +664,7 @@ create_test_cudnn_bf16_class(TestWithInput1x1Filter1x1) class TestCUDNNExhaustiveSearch(TestConv2DOp): + def init_kernel_type(self): self.use_cudnn = True self.exhaustive_search = True @@ -640,13 +672,14 @@ class TestCUDNNExhaustiveSearch(TestConv2DOp): class TestConv2DOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of conv2d must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) fluid.layers.conv2d(x1, 1, 1) self.assertRaises(TypeError, test_Variable) @@ -654,8 +687,9 @@ class TestConv2DOpError(unittest.TestCase): def test_dtype(): # the input dtype of conv2d must be float16 or float32 or float64 # float16 only can be set on GPU place - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="int32") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="int32") fluid.layers.conv2d(x2, 1, 1) self.assertRaises(TypeError, test_dtype) @@ -671,6 +705,7 @@ class TestConv2DOpError(unittest.TestCase): class TestConv2DOp_v2(OpTest): + def setUp(self): self.op_type = "conv2d" self.use_cudnn = False @@ -704,9 +739,10 @@ class TestConv2DOp_v2(OpTest): else: input2 = input filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input2, filter, self.groups, conv2d_param, self.padding_algorithm, - self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input2, filter, self.groups, + conv2d_param, + self.padding_algorithm, + self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -729,48 +765,46 @@ class TestConv2DOp_v2(OpTest): self.outputs = {'Output': output} def has_cuda(self): - return core.is_compiled_with_cuda() and (self.use_cudnn or - self.use_cuda) + return core.is_compiled_with_cuda() and (self.use_cudnn + or self.use_cuda) def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() - self.check_output_with_place( - place, atol=1e-5, check_dygraph=(self.use_mkldnn == False)) + self.check_output_with_place(place, + atol=1e-5, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() - self.check_grad_with_place( - place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.02, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.02, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_filter(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() - self.check_grad_with_place( - place, ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter']), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter']), + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_input(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace() - self.check_grad_with_place( - place, ['Filter'], - 'Output', - no_grad_set=set(['Input']), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ['Filter'], + 'Output', + no_grad_set=set(['Input']), + check_dygraph=(self.use_mkldnn == False)) def init_test_case(self): self.pad = [0, 0] @@ -801,12 +835,14 @@ class TestConv2DOp_v2(OpTest): class TestConv2DOp_AsyPadding(TestConv2DOp_v2): + def init_paddings(self): self.pad = [0, 0, 1, 2] self.padding_algorithm = "EXPLICIT" class TestWithPad_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -820,6 +856,7 @@ class TestWithPad_AsyPadding(TestConv2DOp_v2): class TestWithStride_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 3, 6, 6] # NCHW @@ -833,6 +870,7 @@ class TestWithStride_AsyPadding(TestConv2DOp_v2): class TestWithGroup_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 2] @@ -844,6 +882,7 @@ class TestWithGroup_AsyPadding(TestConv2DOp_v2): class TestWith1x1_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -860,6 +899,7 @@ class TestWith1x1_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [3, 4, 10, 10] # NCHW @@ -879,6 +919,7 @@ class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 4, 10, 10] # NCHW @@ -895,6 +936,7 @@ class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2): class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 8, 10, 10] # NCHW @@ -911,6 +953,7 @@ class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2): class TestWithDilation_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 10, 10] # NCHW @@ -930,6 +973,7 @@ class TestWithDilation_AsyPadding(TestConv2DOp_v2): class TestWithInput1x1Filter1x1_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [40, 3, 1, 1] # NCHW @@ -990,16 +1034,16 @@ create_test_cudnn_channel_last_class(TestWithStride_AsyPadding) create_test_cudnn_channel_last_class(TestWithGroup_AsyPadding) create_test_cudnn_channel_last_class(TestWithDilation_AsyPadding) -create_test_cudnn_channel_last_fp16_class( - TestConv2DOp_AsyPadding, grad_check=False) -create_test_cudnn_channel_last_fp16_class( - TestWithPad_AsyPadding, grad_check=False) -create_test_cudnn_channel_last_fp16_class( - TestWithStride_AsyPadding, grad_check=False) -create_test_cudnn_channel_last_fp16_class( - TestWithGroup_AsyPadding, grad_check=False) -create_test_cudnn_channel_last_fp16_class( - TestWithDilation_AsyPadding, grad_check=False) +create_test_cudnn_channel_last_fp16_class(TestConv2DOp_AsyPadding, + grad_check=False) +create_test_cudnn_channel_last_fp16_class(TestWithPad_AsyPadding, + grad_check=False) +create_test_cudnn_channel_last_fp16_class(TestWithStride_AsyPadding, + grad_check=False) +create_test_cudnn_channel_last_fp16_class(TestWithGroup_AsyPadding, + grad_check=False) +create_test_cudnn_channel_last_fp16_class(TestWithDilation_AsyPadding, + grad_check=False) if __name__ == '__main__': paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py b/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py index 1b680c5a06b..8e43e4d48de 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle + paddle.enable_static() import paddle.fluid.core as core import paddle.fluid as fluid @@ -29,6 +30,7 @@ from test_conv2d_op import TestConv2DOp, TestConv2DOp_v2, create_test_padding_SA class TestDepthwiseConv(TestConv2DOp): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -42,6 +44,7 @@ class TestDepthwiseConv(TestConv2DOp): class TestDepthwiseConv2(TestConv2DOp): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -55,6 +58,7 @@ class TestDepthwiseConv2(TestConv2DOp): class TestDepthwiseConv3(TestConv2DOp): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -68,6 +72,7 @@ class TestDepthwiseConv3(TestConv2DOp): class TestDepthwiseConvWithDilation(TestConv2DOp): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -82,6 +87,7 @@ class TestDepthwiseConvWithDilation(TestConv2DOp): class TestDepthwiseConvWithDilation2(TestConv2DOp): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -96,6 +102,7 @@ class TestDepthwiseConvWithDilation2(TestConv2DOp): class TestDepthwiseConvandFuse(TestConv2DOp): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -110,6 +117,7 @@ class TestDepthwiseConvandFuse(TestConv2DOp): class TestDepthwiseConv2andFuse(TestConv2DOp): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -124,6 +132,7 @@ class TestDepthwiseConv2andFuse(TestConv2DOp): class TestDepthwiseConv3andFuse(TestConv2DOp): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -138,6 +147,7 @@ class TestDepthwiseConv3andFuse(TestConv2DOp): class TestDepthwiseConvWithDilationandFuse(TestConv2DOp): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -153,6 +163,7 @@ class TestDepthwiseConvWithDilationandFuse(TestConv2DOp): class TestDepthwiseConvWithDilation2andFuse(TestConv2DOp): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -168,6 +179,7 @@ class TestDepthwiseConvWithDilation2andFuse(TestConv2DOp): class TestDepthwiseConv_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.use_cuda = True self.stride = [2, 2] @@ -184,6 +196,7 @@ class TestDepthwiseConv_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConv2_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.use_cuda = True self.stride = [1, 1] @@ -200,6 +213,7 @@ class TestDepthwiseConv2_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConv3_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.use_cuda = True self.stride = [1, 1] @@ -216,6 +230,7 @@ class TestDepthwiseConv3_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConvWithDilation_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -234,6 +249,7 @@ class TestDepthwiseConvWithDilation_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConvWithDilation2_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.use_cuda = True self.pad = [1, 1] @@ -252,6 +268,7 @@ class TestDepthwiseConvWithDilation2_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConvandFuse_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -270,6 +287,7 @@ class TestDepthwiseConvandFuse_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConv2andFuse_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -288,6 +306,7 @@ class TestDepthwiseConv2andFuse_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConv3andFuse_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -306,6 +325,7 @@ class TestDepthwiseConv3andFuse_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConvWithDilationandFuse_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True @@ -325,6 +345,7 @@ class TestDepthwiseConvWithDilationandFuse_AsyPadding(TestConv2DOp_v2): class TestDepthwiseConvWithDilation2andFuse_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.fuse_relu_before_depthwise_conv = True self.use_cuda = True diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py index 83d27343189..74d50c545c6 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py @@ -21,6 +21,7 @@ import unittest class Conv2DTransposeTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', batch_size=4, @@ -60,8 +61,8 @@ class Conv2DTransposeTestCase(unittest.TestCase): input_shape = (self.batch_size, ) + self.spartial_shape + ( self.num_channels, ) else: - input_shape = (self.batch_size, self.num_channels - ) + self.spartial_shape + input_shape = (self.batch_size, + self.num_channels) + self.spartial_shape self.input = np.random.randn(*input_shape).astype(self.dtype) if isinstance(self.filter_size, int): @@ -70,8 +71,8 @@ class Conv2DTransposeTestCase(unittest.TestCase): filter_size = self.filter_size self.weight_shape = weight_shape = (self.num_channels, self.num_filters // self.groups) + tuple(filter_size) - self.weight = np.random.uniform( - -1, 1, size=weight_shape).astype(self.dtype) + self.weight = np.random.uniform(-1, 1, + size=weight_shape).astype(self.dtype) if not self.no_bias: self.bias = np.random.uniform( -1, 1, size=(self.num_filters, )).astype(self.dtype) @@ -118,27 +119,27 @@ class Conv2DTransposeTestCase(unittest.TestCase): input_shape = (-1, -1, -1,self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) - w_var = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) - b_var = fluid.data( - "bias", (self.num_filters, ), dtype=self.dtype) + w_var = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) + b_var = fluid.data("bias", (self.num_filters, ), + dtype=self.dtype) if self.output_padding != 0: output_size = None else: output_size = self.output_size - y_var = F.conv2d_transpose( - x_var, - w_var, - None if self.no_bias else b_var, - output_size=output_size, - padding=self.padding, - output_padding=self.output_padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y_var = F.conv2d_transpose(x_var, + w_var, + None if self.no_bias else b_var, + output_size=output_size, + padding=self.padding, + output_padding=self.output_padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias @@ -155,16 +156,15 @@ class Conv2DTransposeTestCase(unittest.TestCase): else: output_size = self.output_size - conv = nn.Conv2DTranspose( - self.num_channels, - self.num_filters, - self.filter_size, - padding=self.padding, - output_padding=self.output_padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + conv = nn.Conv2DTranspose(self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + output_padding=self.output_padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -194,6 +194,7 @@ class Conv2DTransposeTestCase(unittest.TestCase): class Conv2DTransposeErrorTestCase(Conv2DTransposeTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -204,49 +205,46 @@ class Conv2DTransposeErrorTestCase(Conv2DTransposeTestCase): def add_cases(suite): suite.addTest(Conv2DTransposeTestCase(methodName='runTest')) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', stride=[1, 2], no_bias=True, dilation=2)) - suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', - filter_size=(3, 3), - output_size=[20, 36], - stride=[1, 2], - dilation=2)) + Conv2DTransposeTestCase(methodName='runTest', + stride=[1, 2], + no_bias=True, + dilation=2)) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', stride=2, dilation=(2, 1))) + Conv2DTransposeTestCase(methodName='runTest', + filter_size=(3, 3), + output_size=[20, 36], + stride=[1, 2], + dilation=2)) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', padding="valid")) + Conv2DTransposeTestCase(methodName='runTest', stride=2, + dilation=(2, 1))) + suite.addTest(Conv2DTransposeTestCase(methodName='runTest', + padding="valid")) suite.addTest(Conv2DTransposeTestCase(methodName='runTest', padding="same")) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', filter_size=1, padding=(2, 3))) + Conv2DTransposeTestCase(methodName='runTest', + filter_size=1, + padding=(2, 3))) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', padding=[1, 2, 2, 1])) + Conv2DTransposeTestCase(methodName='runTest', padding=[1, 2, 2, 1])) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', padding=[[0, 0], [0, 0], [1, 2], [2, 1]])) + Conv2DTransposeTestCase(methodName='runTest', + padding=[[0, 0], [0, 0], [1, 2], [2, 1]])) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', data_format="NHWC")) + Conv2DTransposeTestCase(methodName='runTest', data_format="NHWC")) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', - data_format="NHWC", - padding=[[0, 0], [1, 1], [2, 2], [0, 0]])) + Conv2DTransposeTestCase(methodName='runTest', + data_format="NHWC", + padding=[[0, 0], [1, 1], [2, 2], [0, 0]])) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', groups=2, padding="valid")) + Conv2DTransposeTestCase(methodName='runTest', groups=2, + padding="valid")) suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', - num_filters=6, - num_channels=3, - groups=3, - padding="valid")) + Conv2DTransposeTestCase(methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + padding="valid")) suite.addTest( Conv2DTransposeTestCase( methodName='runTest', @@ -258,16 +256,18 @@ def add_cases(suite): padding=2, stride=2, output_size=[14, 14], - output_padding=[1, 1], )) + output_padding=[1, 1], + )) def add_error_cases(suite): suite.addTest( - Conv2DTransposeErrorTestCase( - methodName='runTest', num_channels=5, groups=2)) + Conv2DTransposeErrorTestCase(methodName='runTest', + num_channels=5, + groups=2)) suite.addTest( - Conv2DTransposeErrorTestCase( - methodName='runTest', output_size="not_valid")) + Conv2DTransposeErrorTestCase(methodName='runTest', + output_size="not_valid")) def load_tests(loader, standard_tests, pattern): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index 89125dc326d..c10d71baf32 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.nn as nn + paddle.enable_static() import paddle.fluid.core as core import paddle.fluid as fluid @@ -47,11 +48,12 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): # update pad and dilation def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride): padding = [] - for input_size, filter_size, stride_size in zip( - input_shape, kernel_size, kernel_stride): + for input_size, filter_size, stride_size in zip(input_shape, + kernel_size, + kernel_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -85,8 +87,8 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): if 'output_padding' in attrs: out_pad_h = attrs['output_padding'][0] out_pad_w = attrs['output_padding'][1] - out = np.zeros( - (in_n, out_c, out_h + out_pad_h, out_w + out_pad_w), dtype=input_.dtype) + out = np.zeros((in_n, out_c, out_h + out_pad_h, out_w + out_pad_w), + dtype=input_.dtype) for n in range(in_n): for i in range(in_h): @@ -104,17 +106,18 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): axis=0) i1, i2 = i * stride[0], i * stride[0] + d_bolck_h j1, j2 = j * stride[1], j * stride[1] + d_bolck_w - out[n, g * f_out_c + k, i1:i2:dilations[0], j1:j2: - dilations[1]] += tmp_out + out[n, g * f_out_c + k, i1:i2:dilations[0], + j1:j2:dilations[1]] += tmp_out - out = out[:, :, pad_h_0:out_h - pad_h_1 + out_pad_h, pad_w_0:out_w - pad_w_1 - + out_pad_w] + out = out[:, :, pad_h_0:out_h - pad_h_1 + out_pad_h, + pad_w_0:out_w - pad_w_1 + out_pad_w] if attrs['data_format'] == 'NHWC': out = np.transpose(out, [0, 2, 3, 1]) return out class TestConv2DTransposeOp(OpTest): + def setUp(self): # init as conv transpose self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 @@ -169,37 +172,39 @@ class TestConv2DTransposeOp(OpTest): if self.need_check_grad: if self.use_cudnn: place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Filter'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Input'])) + self.check_grad_with_place(place, ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) else: - self.check_grad( - ['Filter'], 'Output', no_grad_set=set(['Input'])) + self.check_grad(['Filter'], + 'Output', + no_grad_set=set(['Input'])) def test_check_grad_no_filter(self): if self.need_check_grad: if self.use_cudnn: place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Input'], 'Output', no_grad_set=set(['Filter'])) + self.check_grad_with_place(place, ['Input'], + 'Output', + no_grad_set=set(['Filter'])) else: - self.check_grad( - ['Input'], 'Output', no_grad_set=set(['Filter'])) + self.check_grad(['Input'], + 'Output', + no_grad_set=set(['Filter'])) def test_check_grad(self): if self.need_check_grad: if self.use_cudnn: place = core.CUDAPlace(0) - self.check_grad_with_place( - place, - set(['Input', 'Filter']), - 'Output', - max_relative_error=0.02) + self.check_grad_with_place(place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02) else: - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + self.check_grad(set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02) def init_test_case(self): self.pad = [0, 0] @@ -215,6 +220,7 @@ class TestConv2DTransposeOp(OpTest): class TestWithSymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -226,6 +232,7 @@ class TestWithSymmetricPad(TestConv2DTransposeOp): class TestWithAsymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -237,6 +244,7 @@ class TestWithAsymmetricPad(TestConv2DTransposeOp): class TestWithSAMEPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [2, 1] self.dilations = [1, 2] @@ -248,6 +256,7 @@ class TestWithSAMEPad(TestConv2DTransposeOp): class TestWithVALIDPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [1, 1] self.dilations = [1, 1] @@ -259,6 +268,7 @@ class TestWithVALIDPad(TestConv2DTransposeOp): class TestWithGroups(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -270,6 +280,7 @@ class TestWithGroups(TestConv2DTransposeOp): class TestWithStride(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -281,6 +292,7 @@ class TestWithStride(TestConv2DTransposeOp): class TestWithDilation(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -292,6 +304,7 @@ class TestWithDilation(TestConv2DTransposeOp): class TestWithEvenUpsample(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -304,6 +317,7 @@ class TestWithEvenUpsample(TestConv2DTransposeOp): class TestWithEvenUpsampleOutputPadding(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -316,6 +330,7 @@ class TestWithEvenUpsampleOutputPadding(TestConv2DTransposeOp): class Test_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -328,6 +343,7 @@ class Test_NHWC(TestConv2DTransposeOp): class TestWithSymmetricPad_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -340,6 +356,7 @@ class TestWithSymmetricPad_NHWC(TestConv2DTransposeOp): class TestWithAsymmetricPad_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -352,6 +369,7 @@ class TestWithAsymmetricPad_NHWC(TestConv2DTransposeOp): class TestWithGroups_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -364,6 +382,7 @@ class TestWithGroups_NHWC(TestConv2DTransposeOp): class TestWithStride_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -376,6 +395,7 @@ class TestWithStride_NHWC(TestConv2DTransposeOp): class TestWithDilation_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -388,6 +408,7 @@ class TestWithDilation_NHWC(TestConv2DTransposeOp): class TestWithEvenUpsample_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -401,6 +422,7 @@ class TestWithEvenUpsample_NHWC(TestConv2DTransposeOp): class TestWithEvenUpsample_NHWC_output_padding(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -417,6 +439,7 @@ class TestWithEvenUpsample_NHWC_output_padding(TestConv2DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN(TestConv2DTransposeOp): + def init_op_type(self): self.use_cudnn = True self.op_type = "conv2d_transpose" @@ -425,6 +448,7 @@ class TestCUDNN(TestConv2DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSymmetricPad(TestWithSymmetricPad): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -442,6 +466,7 @@ class TestCUDNNWithSymmetricPad(TestWithSymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithAsymmetricPad(TestWithAsymmetricPad): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -459,6 +484,7 @@ class TestCUDNNWithAsymmetricPad(TestWithAsymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSAMEPad(TestWithSAMEPad): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 2] @@ -476,6 +502,7 @@ class TestCUDNNWithSAMEPad(TestWithSAMEPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithVALIDPad(TestWithVALIDPad): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -493,6 +520,7 @@ class TestCUDNNWithVALIDPad(TestWithVALIDPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithStride(TestWithStride): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -510,6 +538,7 @@ class TestCUDNNWithStride(TestWithStride): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithGroups(TestWithGroups): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -528,6 +557,7 @@ class TestCUDNNWithGroups(TestWithGroups): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithEvenUpsample(TestWithEvenUpsample): + def init_op_type(self): self.use_cudnn = True self.op_type = "conv2d_transpose" @@ -551,6 +581,7 @@ class TestCUDNNWithEvenUpsample(TestWithEvenUpsample): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN_NHWC(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -569,6 +600,7 @@ class TestCUDNN_NHWC(TestConv2DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSymmetricPad_NHWC(TestWithSymmetricPad): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -587,6 +619,7 @@ class TestCUDNNWithSymmetricPad_NHWC(TestWithSymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithAsymmetricPad_NHWC(TestWithSymmetricPad): + def init_test_case(self): self.pad = [1, 0, 2, 3] self.stride = [2, 2] @@ -605,6 +638,7 @@ class TestCUDNNWithAsymmetricPad_NHWC(TestWithSymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithStride_NHWC(TestWithStride): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -623,6 +657,7 @@ class TestCUDNNWithStride_NHWC(TestWithStride): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithGroups_NHWC(TestWithGroups): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -641,6 +676,7 @@ class TestCUDNNWithGroups_NHWC(TestWithGroups): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithEvenUpsample_NHWC(TestWithEvenUpsample): + def init_test_case(self): self.pad = [2, 2] self.stride = [2, 2] @@ -660,6 +696,7 @@ class TestCUDNNWithEvenUpsample_NHWC(TestWithEvenUpsample): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN_FP16(TestConv2DTransposeOp): + def init_test_case(self): self.dtype = np.float16 self.pad = [1, 1] @@ -687,6 +724,7 @@ class TestCUDNN_FP16(TestConv2DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): self.dtype = np.float16 self.pad = [0, 0] @@ -702,6 +740,7 @@ class TestCUDNN_NHWC_FP16(TestCUDNN_FP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): self.dtype = np.float16 self.pad = [1, 1] @@ -717,6 +756,7 @@ class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): self.dtype = np.float16 self.pad = [1, 0, 2, 3] @@ -732,6 +772,7 @@ class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): self.dtype = np.float16 self.pad = [1, 1] @@ -747,6 +788,7 @@ class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): self.dtype = np.float16 self.pad = [1, 1] @@ -762,6 +804,7 @@ class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): self.dtype = np.float16 self.pad = [2, 2] @@ -776,58 +819,56 @@ class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16): class TestConv2DTransposeAPI(unittest.TestCase): + def test_case1(self): - data1 = fluid.layers.data( - name='data1', shape=[3, 5, 5], dtype='float32') - data2 = fluid.layers.data( - name='data2', shape=[5, 5, 3], dtype='float32') - out1 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - data_format='NCHW') - out2 = fluid.layers.conv2d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - data_format='NHWC') - out3 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - padding=[[0, 0], [1, 1], [1, 1], [0, 0]], - data_format='NHWC') - out4 = fluid.layers.conv2d_transpose( - input=data1, - groups=3, - num_filters=6, - filter_size=3, - padding=[[0, 0], [0, 0], [2, 1], [0, 0]], - data_format='NCHW') - out5 = fluid.layers.conv2d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - padding='SAME', - data_format='NCHW') - out6 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - padding='VALID', - data_format='NHWC') - out7 = fluid.layers.conv2d_transpose( - input=data1, - groups=1, - num_filters=6, - output_size=[7, 7], - padding=[0, 0], - data_format='NHWC') + data1 = fluid.layers.data(name='data1', + shape=[3, 5, 5], + dtype='float32') + data2 = fluid.layers.data(name='data2', + shape=[5, 5, 3], + dtype='float32') + out1 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + data_format='NCHW') + out2 = fluid.layers.conv2d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + data_format='NHWC') + out3 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + padding=[[0, 0], [1, 1], [1, 1], + [0, 0]], + data_format='NHWC') + out4 = fluid.layers.conv2d_transpose(input=data1, + groups=3, + num_filters=6, + filter_size=3, + padding=[[0, 0], [0, 0], [2, 1], + [0, 0]], + data_format='NCHW') + out5 = fluid.layers.conv2d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + padding='SAME', + data_format='NCHW') + out6 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + padding='VALID', + data_format='NHWC') + out7 = fluid.layers.conv2d_transpose(input=data1, + groups=1, + num_filters=6, + output_size=[7, 7], + padding=[0, 0], + data_format='NHWC') data1_np = np.random.random((2, 3, 5, 5)).astype("float32") data2_np = np.random.random((2, 5, 5, 3)).astype("float32") @@ -838,12 +879,13 @@ class TestConv2DTransposeAPI(unittest.TestCase): place = core.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - results = exe.run( - fluid.default_main_program(), - feed={"data1": data1_np, - "data2": data2_np}, - fetch_list=[out1, out2, out3, out4, out5, out6, out7], - return_numpy=True) + results = exe.run(fluid.default_main_program(), + feed={ + "data1": data1_np, + "data2": data2_np + }, + fetch_list=[out1, out2, out3, out4, out5, out6, out7], + return_numpy=True) self.assertIsNotNone(results[0]) self.assertIsNotNone(results[1]) self.assertIsNotNone(results[2]) @@ -854,71 +896,73 @@ class TestConv2DTransposeAPI(unittest.TestCase): class TestConv2DTransposeOpException(unittest.TestCase): + def test_exception(self): data = fluid.layers.data(name='data', shape=[3, 5, 5], dtype="float32") def attr_data_format(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - data_format="NCDHW") + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + data_format="NCDHW") self.assertRaises(ValueError, attr_data_format) def attr_padding_str(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - padding='Vald') + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + padding='Vald') self.assertRaises(ValueError, attr_padding_str) def attr_padding_list(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - padding=[[1, 1], [1, 1], [0, 0], [0, 0]]) + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + padding=[[1, 1], [1, 1], [0, 0], + [0, 0]]) self.assertRaises(ValueError, attr_padding_list) def attr_padding_with_data_format(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - padding=[[1, 1], [0, 0], [0, 0], [1, 1]], - data_format='NHWC') + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + padding=[[1, 1], [0, 0], [0, 0], + [1, 1]], + data_format='NHWC') self.assertRaises(ValueError, attr_padding_with_data_format) - error_input = fluid.layers.data( - name='error_data', shape=[1], dtype="float32") + error_input = fluid.layers.data(name='error_data', + shape=[1], + dtype="float32") def error_input_size(): - out = fluid.layers.conv2d_transpose( - input=error_input, groups=1, num_filters=6, filter_size=3) + out = fluid.layers.conv2d_transpose(input=error_input, + groups=1, + num_filters=6, + filter_size=3) self.assertRaises(ValueError, error_input_size) def error_groups(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=0, - num_filters=6, - filter_size=3, - data_format='NHWC') + out = fluid.layers.conv2d_transpose(input=data, + groups=0, + num_filters=6, + filter_size=3, + data_format='NHWC') self.assertRaises(ValueError, error_groups) class TestConv2DTransposeRepr(unittest.TestCase): + def test_case(self): paddle.disable_static() x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py index 65c5d35fe53..665413ee4cf 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle + paddle.enable_static() import paddle.fluid.core as core import paddle.fluid as fluid @@ -26,6 +27,7 @@ from test_conv2d_transpose_op import TestConv2DTransposeOp class TestDepthwiseConvTranspose(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -39,6 +41,7 @@ class TestDepthwiseConvTranspose(TestConv2DTransposeOp): class TestDepthwiseConvTransposeAsymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1, 1, 2] self.stride = [1, 1] @@ -53,6 +56,7 @@ class TestDepthwiseConvTransposeAsymmetricPad(TestConv2DTransposeOp): class TestDepthwiseConvTransposeSAMEPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [1, 1] self.dilations = [1, 1] @@ -66,6 +70,7 @@ class TestDepthwiseConvTransposeSAMEPad(TestConv2DTransposeOp): class TestDepthwiseConvTransposeVALIDPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [1, 1] self.dilations = [1, 1] @@ -79,6 +84,7 @@ class TestDepthwiseConvTransposeVALIDPad(TestConv2DTransposeOp): class TestDepthwiseConvTranspose_NHWC_3x3kernel(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py index dd6dcf6d5e9..42c23eb64fd 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py @@ -23,6 +23,7 @@ import unittest class Conv3DTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', batch_size=4, @@ -58,8 +59,8 @@ class Conv3DTestCase(unittest.TestCase): input_shape = (self.batch_size, ) + self.spartial_shape + ( self.num_channels, ) else: - input_shape = (self.batch_size, self.num_channels - ) + self.spartial_shape + input_shape = (self.batch_size, + self.num_channels) + self.spartial_shape self.input = np.random.randn(*input_shape).astype(self.dtype) if isinstance(self.filter_size, int): @@ -68,8 +69,8 @@ class Conv3DTestCase(unittest.TestCase): filter_size = self.filter_size self.weight_shape = weight_shape = (self.num_filters, self.num_channels // self.groups) + tuple(filter_size) - self.weight = np.random.uniform( - -1, 1, size=weight_shape).astype(self.dtype) + self.weight = np.random.uniform(-1, 1, + size=weight_shape).astype(self.dtype) if not self.no_bias: self.bias = np.random.uniform( -1, 1, size=(self.num_filters, )).astype(self.dtype) @@ -89,17 +90,16 @@ class Conv3DTestCase(unittest.TestCase): bias_attr = False else: bias_attr = I.NumpyArrayInitializer(self.bias) - y_var = fluid.layers.conv3d( - x_var, - self.num_filters, - self.filter_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - param_attr=weight_attr, - bias_attr=bias_attr, - data_format=self.data_format) + y_var = fluid.layers.conv3d(x_var, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + param_attr=weight_attr, + bias_attr=bias_attr, + data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) exe.run(start) @@ -114,19 +114,19 @@ class Conv3DTestCase(unittest.TestCase): input_shape = (-1, -1, -1, -1, self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) - w_var = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) - b_var = fluid.data( - "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv3d( - x_var, - w_var, - None if self.no_bias else b_var, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + w_var = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) + b_var = fluid.data("bias", (self.num_filters, ), + dtype=self.dtype) + y_var = F.conv3d(x_var, + w_var, + None if self.no_bias else b_var, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias @@ -138,15 +138,14 @@ class Conv3DTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = paddle.to_tensor(self.input) x_var.stop_gradient = False - conv = nn.Conv3D( - self.num_channels, - self.num_filters, - self.filter_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + conv = nn.Conv3D(self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -179,6 +178,7 @@ class Conv3DTestCase(unittest.TestCase): class Conv3DErrorTestCase(Conv3DTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -189,50 +189,44 @@ class Conv3DErrorTestCase(Conv3DTestCase): def add_cases(suite): suite.addTest(Conv3DTestCase(methodName='runTest')) suite.addTest( - Conv3DTestCase( - methodName='runTest', stride=[1, 2, 1], dilation=2)) + Conv3DTestCase(methodName='runTest', stride=[1, 2, 1], dilation=2)) suite.addTest( - Conv3DTestCase( - methodName='runTest', stride=2, dilation=(2, 1, 2))) + Conv3DTestCase(methodName='runTest', stride=2, dilation=(2, 1, 2))) suite.addTest( - Conv3DTestCase( - methodName='runTest', padding="same", no_bias=True)) + Conv3DTestCase(methodName='runTest', padding="same", no_bias=True)) suite.addTest( - Conv3DTestCase( - methodName='runTest', filter_size=(3, 2, 3), padding='valid')) + Conv3DTestCase(methodName='runTest', + filter_size=(3, 2, 3), + padding='valid')) suite.addTest(Conv3DTestCase(methodName='runTest', padding=(2, 3, 1))) suite.addTest( - Conv3DTestCase( - methodName='runTest', padding=[1, 2, 2, 1, 2, 3])) + Conv3DTestCase(methodName='runTest', padding=[1, 2, 2, 1, 2, 3])) suite.addTest( - Conv3DTestCase( - methodName='runTest', - padding=[[0, 0], [0, 0], [1, 2], [2, 1], [2, 2]])) + Conv3DTestCase(methodName='runTest', + padding=[[0, 0], [0, 0], [1, 2], [2, 1], [2, 2]])) suite.addTest(Conv3DTestCase(methodName='runTest', data_format="NDHWC")) suite.addTest( - Conv3DTestCase( - methodName='runTest', - data_format="NDHWC", - padding=[[0, 0], [1, 1], [3, 3], [2, 2], [0, 0]])) + Conv3DTestCase(methodName='runTest', + data_format="NDHWC", + padding=[[0, 0], [1, 1], [3, 3], [2, 2], [0, 0]])) suite.addTest( - Conv3DTestCase( - methodName='runTest', groups=2, padding="valid")) + Conv3DTestCase(methodName='runTest', groups=2, padding="valid")) suite.addTest( - Conv3DTestCase( - methodName='runTest', - num_filters=6, - num_channels=3, - groups=3, - padding="valid")) + Conv3DTestCase(methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + padding="valid")) def add_error_cases(suite): suite.addTest( - Conv3DErrorTestCase( - methodName='runTest', num_channels=5, groups=2)) + Conv3DErrorTestCase(methodName='runTest', num_channels=5, groups=2)) suite.addTest( - Conv3DErrorTestCase( - methodName='runTest', num_channels=5, groups=2, padding=[-1, 1, 3])) + Conv3DErrorTestCase(methodName='runTest', + num_channels=5, + groups=2, + padding=[-1, 1, 3])) def load_tests(loader, standard_tests, pattern): diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py index 8cf779ccfdd..370a4820dda 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py @@ -63,8 +63,8 @@ def conv3d_forward_naive(input, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -106,8 +106,8 @@ def conv3d_forward_naive(input, constant_values=0) filter_dilation = np.zeros((f_n, f_c, d_bolck_d, d_bolck_h, d_bolck_w)) - filter_dilation[:, :, 0:d_bolck_d:dilation[0], 0:d_bolck_h:dilation[1], 0: - d_bolck_w:dilation[2]] = filter + filter_dilation[:, :, 0:d_bolck_d:dilation[0], 0:d_bolck_h:dilation[1], + 0:d_bolck_w:dilation[2]] = filter for d in range(out_d): for i in range(out_h): @@ -131,9 +131,11 @@ def conv3d_forward_naive(input, def create_test_cudnn_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNCase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -145,7 +147,9 @@ def create_test_cudnn_class(parent): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0, 0] self.padding_algorithm = "SAME" @@ -156,7 +160,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1, 1] self.padding_algorithm = "VALID" @@ -167,9 +173,11 @@ def create_test_padding_VALID_class(parent): def create_test_cudnn_padding_SAME_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingSMAECase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -185,9 +193,11 @@ def create_test_cudnn_padding_SAME_class(parent): def create_test_cudnn_padding_VALID_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingVALIDCase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -203,7 +213,9 @@ def create_test_cudnn_padding_VALID_class(parent): def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NDHWC" @@ -217,9 +229,11 @@ def create_test_channel_last_class(parent): def create_test_cudnn_channel_last_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCudnnChannelLastCase(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm( @@ -238,6 +252,7 @@ def create_test_cudnn_channel_last_class(parent): class TestConv3DOp(OpTest): + def setUp(self): self.op_type = "conv3d" self.use_cudnn = False @@ -261,7 +276,8 @@ class TestConv3DOp(OpTest): input, filter, self.groups, - conv3d_param, ).astype(self.dtype) + conv3d_param, + ).astype(self.dtype) self.inputs = { 'Input': OpTest.np_dtype_to_fluid_dtype(input), @@ -284,43 +300,41 @@ class TestConv3DOp(OpTest): def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() - self.check_output_with_place( - place, atol=1e-5, check_dygraph=(self.use_mkldnn == False)) + self.check_output_with_place(place, + atol=1e-5, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad(self): if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad_with_place( - place, {'Input', 'Filter'}, - 'Output', - max_relative_error=0.03, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_filter(self): if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad_with_place( - place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter']), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter']), + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_input(self): if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad_with_place( - place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input']), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input']), + check_dygraph=(self.use_mkldnn == False)) def init_test_case(self): self.pad = [0, 0, 0] @@ -344,6 +358,7 @@ class TestConv3DOp(OpTest): class TestCase1(TestConv3DOp): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -354,16 +369,19 @@ class TestCase1(TestConv3DOp): class TestWithGroup1(TestConv3DOp): + def init_group(self): self.groups = 3 class TestWithGroup2(TestCase1): + def init_group(self): self.groups = 3 class TestWith1x1(TestConv3DOp): + def init_test_case(self): self.pad = [0, 0, 0] self.stride = [1, 1, 1] @@ -380,6 +398,7 @@ class TestWith1x1(TestConv3DOp): class TestWithInput1x1Filter1x1(TestConv3DOp): + def init_test_case(self): self.pad = [0, 0, 0] self.stride = [1, 1, 1] @@ -396,6 +415,7 @@ class TestWithInput1x1Filter1x1(TestConv3DOp): class TestWithDilation(TestConv3DOp): + def init_test_case(self): self.pad = [0, 0, 0] self.stride = [1, 1, 1] @@ -417,6 +437,7 @@ class TestWithDilation(TestConv3DOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN(TestConv3DOp): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 @@ -425,6 +446,7 @@ class TestCUDNN(TestConv3DOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16CUDNN(TestConv3DOp): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -439,6 +461,7 @@ class TestFP16CUDNN(TestConv3DOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestWithGroup1CUDNN(TestWithGroup1): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 @@ -447,6 +470,7 @@ class TestWithGroup1CUDNN(TestWithGroup1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16WithGroup1CUDNN(TestWithGroup1): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -461,6 +485,7 @@ class TestFP16WithGroup1CUDNN(TestWithGroup1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestWithGroup2CUDNN(TestWithGroup2): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 @@ -469,6 +494,7 @@ class TestWithGroup2CUDNN(TestWithGroup2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16WithGroup2CUDNN(TestWithGroup2): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -483,6 +509,7 @@ class TestFP16WithGroup2CUDNN(TestWithGroup2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestWith1x1CUDNN(TestWith1x1): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 @@ -491,6 +518,7 @@ class TestWith1x1CUDNN(TestWith1x1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16With1x1CUDNN(TestWith1x1): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -505,6 +533,7 @@ class TestFP16With1x1CUDNN(TestWith1x1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 @@ -513,6 +542,7 @@ class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16WithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -525,6 +555,7 @@ class TestFP16WithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1): class TestCUDNNExhaustiveSearch(TestCUDNN): + def init_kernel_type(self): self.use_cudnn = True self.exhaustive_search = True @@ -535,6 +566,7 @@ class TestCUDNNExhaustiveSearch(TestCUDNN): class TestConv3DOp_2(OpTest): + def setUp(self): self.op_type = "conv3d" self.use_cudnn = False @@ -589,28 +621,27 @@ class TestConv3DOp_2(OpTest): if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() - self.check_grad_with_place( - place, {'Input', 'Filter'}, 'Output', max_relative_error=0.03) + self.check_grad_with_place(place, {'Input', 'Filter'}, + 'Output', + max_relative_error=0.03) def test_check_grad_no_filter(self): if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() - self.check_grad_with_place( - place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter'])) + self.check_grad_with_place(place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): if self.dtype == np.float16: return place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace() - self.check_grad_with_place( - place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input'])) + self.check_grad_with_place(place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) def init_test_case(self): self.stride = [1, 1, 1] @@ -640,6 +671,7 @@ class TestConv3DOp_2(OpTest): class TestConv3DOp_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 2] self.input_size = [2, 3, 4, 4, 4] # NCDHW @@ -653,6 +685,7 @@ class TestConv3DOp_AsyPadding(TestConv3DOp_2): class TestConv3DOp_DiffDataInDiffDim(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 2] self.input_size = [2, 3, 4, 5, 5] # NCDHW @@ -671,6 +704,7 @@ create_test_channel_last_class(TestConv3DOp_DiffDataInDiffDim) class TestCase1_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 1] self.input_size = [2, 3, 4, 4, 4] # NCDHW @@ -684,6 +718,7 @@ class TestCase1_AsyPadding(TestConv3DOp_2): class TestWithGroup1_AsyPadding(TestConv3DOp_2): + def init_group(self): self.groups = 3 @@ -693,6 +728,7 @@ class TestWithGroup1_AsyPadding(TestConv3DOp_2): class TestWithGroup2_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 1] self.input_size = [2, 3, 4, 4, 4] # NCDHW @@ -709,6 +745,7 @@ class TestWithGroup2_AsyPadding(TestConv3DOp_2): class TestWith1x1_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 1] self.input_size = [2, 3, 4, 4, 4] @@ -728,6 +765,7 @@ class TestWith1x1_AsyPadding(TestConv3DOp_2): class TestWithDilation_AsyPadding(TestConv3DOp_2): + def init_test_case(self): self.stride = [1, 1, 1] self.input_size = [2, 3, 6, 6, 6] @@ -793,210 +831,196 @@ create_test_cudnn_channel_last_class(TestWith1x1_AsyPadding) # --------- test python API --------------- class TestConv3DAPI(unittest.TestCase): + def test_api(self): - input_NDHWC = fluid.layers.data( - name="input_NDHWC", - shape=[2, 5, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCDHW = fluid.layers.data( - name="input_NCDHW", - shape=[2, 3, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - fluid.layers.conv3d( - input=input_NDHWC, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=0, - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=[1, 2, 1, 0, 1, 0], - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]], - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NDHWC, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]], - dilation=[1, 1, 1], - groups=1, - data_format="NDHWC") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding="SAME", - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") - - fluid.layers.conv3d( - input=input_NCDHW, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding="VALID", - dilation=[1, 1, 1], - groups=1, - data_format="NCDHW") + input_NDHWC = fluid.layers.data(name="input_NDHWC", + shape=[2, 5, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCDHW = fluid.layers.data(name="input_NCDHW", + shape=[2, 3, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + fluid.layers.conv3d(input=input_NDHWC, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=0, + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=[1, 2, 1, 0, 1, 0], + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]], + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NDHWC, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]], + dilation=[1, 1, 1], + groups=1, + data_format="NDHWC") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding="SAME", + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") + + fluid.layers.conv3d(input=input_NCDHW, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding="VALID", + dilation=[1, 1, 1], + groups=1, + data_format="NCDHW") class TestConv3DAPI_Error(unittest.TestCase): + def test_api(self): - input = fluid.layers.data( - name="input", - shape=[2, 5, 5, 5, 4], - append_batch_size=False, - dtype="float32") + input = fluid.layers.data(name="input", + shape=[2, 5, 5, 5, 4], + append_batch_size=False, + dtype="float32") # ValueError: cudnn def run_1(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding=0, - dilation=1, - groups=1, - use_cudnn=[0], - data_format="NCDHW") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding=0, + dilation=1, + groups=1, + use_cudnn=[0], + data_format="NCDHW") self.assertRaises(ValueError, run_1) # ValueError: data_format def run_2(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=[3, 3, 3], - stride=[1, 1, 1], - padding=0, - dilation=[1, 1, 1], - groups=1, - use_cudnn=False, - data_format="NCHWC") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=[3, 3, 3], + stride=[1, 1, 1], + padding=0, + dilation=[1, 1, 1], + groups=1, + use_cudnn=False, + data_format="NCHWC") self.assertRaises(ValueError, run_2) # ValueError: padding def run_3(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding="SAMEE", - dilation=1, - groups=1, - use_cudnn=False, - data_format="NCDHW") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding="SAMEE", + dilation=1, + groups=1, + use_cudnn=False, + data_format="NCDHW") self.assertRaises(ValueError, run_3) def run_4(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding=[[0, 1], [0, 0], [0, 1], [0, 1], [0, 1]], - dilation=1, - groups=1, - use_cudnn=False, - data_format="NCDHW") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding=[[0, 1], [0, 0], [0, 1], [0, 1], [0, + 1]], + dilation=1, + groups=1, + use_cudnn=False, + data_format="NCDHW") self.assertRaises(ValueError, run_4) def run_5(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=0, - stride=0, - padding=[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1]], - dilation=1, - groups=1, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=0, + stride=0, + padding=[[0, 1], [0, 1], [0, 1], [0, 1], [0, + 1]], + dilation=1, + groups=1, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_5) # ValueError: channel dimmention - x = fluid.layers.data( - name="x", - shape=[2, 5, 5, 5, -1], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name="x", + shape=[2, 5, 5, 5, -1], + append_batch_size=False, + dtype="float32") def run_6(): - fluid.layers.conv3d( - input=x, - num_filters=3, - filter_size=3, - stride=1, - padding=0, - dilation=1, - groups=1, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=x, + num_filters=3, + filter_size=3, + stride=1, + padding=0, + dilation=1, + groups=1, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_6) # ValueError: groups def run_7(): - fluid.layers.conv3d( - input=input, - num_filters=3, - filter_size=3, - stride=1, - padding=0, - dilation=1, - groups=3, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=input, + num_filters=3, + filter_size=3, + stride=1, + padding=0, + dilation=1, + groups=3, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_7) # ValueError: filter num def run_8(): - fluid.layers.conv3d( - input=input, - num_filters=0, - filter_size=0, - stride=0, - padding=0, - dilation=0, - groups=1, - use_cudnn=False, - data_format="NDHWC") + fluid.layers.conv3d(input=input, + num_filters=0, + filter_size=0, + stride=0, + padding=0, + dilation=0, + groups=1, + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_8) diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py index 19249fcfeb3..9ad3eaaccfc 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py @@ -21,6 +21,7 @@ import unittest class Conv3DTransposeTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', batch_size=2, @@ -58,8 +59,8 @@ class Conv3DTransposeTestCase(unittest.TestCase): input_shape = (self.batch_size, ) + self.spartial_shape + ( self.num_channels, ) else: - input_shape = (self.batch_size, self.num_channels - ) + self.spartial_shape + input_shape = (self.batch_size, + self.num_channels) + self.spartial_shape self.input = np.random.randn(*input_shape).astype(self.dtype) if isinstance(self.filter_size, int): @@ -68,8 +69,8 @@ class Conv3DTransposeTestCase(unittest.TestCase): filter_size = self.filter_size self.weight_shape = weight_shape = (self.num_channels, self.num_filters // self.groups) + tuple(filter_size) - self.weight = np.random.uniform( - -1, 1, size=weight_shape).astype(self.dtype) + self.weight = np.random.uniform(-1, 1, + size=weight_shape).astype(self.dtype) if self.no_bias: self.bias = None else: @@ -115,20 +116,20 @@ class Conv3DTransposeTestCase(unittest.TestCase): input_shape = (-1, -1, -1, -1, self.num_channels) \ if self.channel_last else (-1, self.num_channels, -1, -1, -1) x_var = fluid.data("input", input_shape, dtype=self.dtype) - w_var = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) - b_var = fluid.data( - "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv3d_transpose( - x_var, - w_var, - None if self.no_bias else b_var, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + w_var = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) + b_var = fluid.data("bias", (self.num_filters, ), + dtype=self.dtype) + y_var = F.conv3d_transpose(x_var, + w_var, + None if self.no_bias else b_var, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: feed_dict["bias"] = self.bias @@ -139,15 +140,14 @@ class Conv3DTransposeTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv3DTranspose( - self.num_channels, - self.num_filters, - self.filter_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + conv = nn.Conv3DTranspose(self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -174,6 +174,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): class Conv3DTransposeErrorTestCase(Conv3DTransposeTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -184,63 +185,65 @@ class Conv3DTransposeErrorTestCase(Conv3DTransposeTestCase): def add_cases(suite): suite.addTest(Conv3DTransposeTestCase(methodName='runTest')) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', stride=[1, 2, 1], dilation=2, no_bias=True)) - suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', - output_size=[12, 19, 12], - stride=[1, 2, 1], - dilation=2)) - suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', stride=2, dilation=(2, 1, 2))) + Conv3DTransposeTestCase(methodName='runTest', + stride=[1, 2, 1], + dilation=2, + no_bias=True)) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', padding="valid")) + Conv3DTransposeTestCase(methodName='runTest', + output_size=[12, 19, 12], + stride=[1, 2, 1], + dilation=2)) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', padding='valid')) + Conv3DTransposeTestCase(methodName='runTest', + stride=2, + dilation=(2, 1, 2))) + suite.addTest(Conv3DTransposeTestCase(methodName='runTest', + padding="valid")) + suite.addTest(Conv3DTransposeTestCase(methodName='runTest', + padding='valid')) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', filter_size=1, padding=(2, 3, 1))) + Conv3DTransposeTestCase(methodName='runTest', + filter_size=1, + padding=(2, 3, 1))) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', padding=[1, 2, 2, 3, 2, 1])) + Conv3DTransposeTestCase(methodName='runTest', + padding=[1, 2, 2, 3, 2, 1])) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', - padding=[[0, 0], [0, 0], [2, 3], [1, 2], [2, 1]])) + Conv3DTransposeTestCase(methodName='runTest', + padding=[[0, 0], [0, 0], [2, 3], [1, 2], [2, + 1]])) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', data_format="NDHWC")) + Conv3DTransposeTestCase(methodName='runTest', data_format="NDHWC")) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', - data_format="NDHWC", - padding=[[0, 0], [1, 1], [2, 2], [3, 3], [0, 0]])) + Conv3DTransposeTestCase(methodName='runTest', + data_format="NDHWC", + padding=[[0, 0], [1, 1], [2, 2], [3, 3], [0, + 0]])) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', groups=2, padding="valid")) + Conv3DTransposeTestCase(methodName='runTest', groups=2, + padding="valid")) suite.addTest( - Conv3DTransposeTestCase( - methodName='runTest', - num_filters=6, - num_channels=3, - groups=3, - padding="valid")) + Conv3DTransposeTestCase(methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + padding="valid")) def add_error_cases(suite): suite.addTest( - Conv3DTransposeErrorTestCase( - methodName='runTest', num_channels=5, groups=2)) + Conv3DTransposeErrorTestCase(methodName='runTest', + num_channels=5, + groups=2)) suite.addTest( - Conv3DTransposeErrorTestCase( - methodName='runTest', output_size="not_valid")) + Conv3DTransposeErrorTestCase(methodName='runTest', + output_size="not_valid")) suite.addTest( - Conv3DTransposeErrorTestCase( - methodName='runTest', num_channels=5, groups=2, padding=[-1, 1, 3])) + Conv3DTransposeErrorTestCase(methodName='runTest', + num_channels=5, + groups=2, + padding=[-1, 1, 3])) def load_tests(loader, standard_tests, pattern): diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py index 1e4d09c509e..0042585aef8 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle + paddle.enable_static() import paddle.fluid.core as core import paddle.fluid as fluid @@ -45,11 +46,12 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs): def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride): padding = [] - for input_size, filter_size, stride_size in zip( - input_shape, kernel_size, kernel_stride): + for input_size, filter_size, stride_size in zip(input_shape, + kernel_size, + kernel_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -85,31 +87,34 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs): for i in range(in_h): for j in range(in_w): for g in range(groups): - input_masked = input_[n, g * sub_in_c:(g + 1 - ) * sub_in_c, d, - i, j] # (c) + input_masked = input_[n, + g * sub_in_c:(g + 1) * sub_in_c, + d, i, j] # (c) input_masked = np.reshape(input_masked, (sub_in_c, 1, 1, 1)) input_masked = np.tile(input_masked, (1, f_d, f_h, f_w)) for k in range(f_out_c): - tmp_out = np.sum(input_masked * filter_[ - g * sub_in_c:(g + 1) * sub_in_c, k, :, :, :], + tmp_out = np.sum(input_masked * + filter_[g * sub_in_c:(g + 1) * + sub_in_c, k, :, :, :], axis=0) d1, d2 = d * stride[0], d * stride[0] + d_bolck_d i1, i2 = i * stride[1], i * stride[1] + d_bolck_h j1, j2 = j * stride[2], j * stride[2] + d_bolck_w - out[n, g * f_out_c + k, d1:d2:dilations[0], i1:i2: - dilations[1], j1:j2:dilations[2]] += tmp_out + out[n, g * f_out_c + k, d1:d2:dilations[0], + i1:i2:dilations[1], + j1:j2:dilations[2]] += tmp_out - out = out[:, :, pad_d_0:out_d - pad_d_1, pad_h_0:out_h - pad_h_1, pad_w_0: - out_w - pad_w_1] + out = out[:, :, pad_d_0:out_d - pad_d_1, pad_h_0:out_h - pad_h_1, + pad_w_0:out_w - pad_w_1] if attrs['data_format'] == 'NHWC': out = np.transpose(out, [0, 2, 3, 4, 1]) return out class TestConv3DTransposeOp(OpTest): + def setUp(self): # init as conv transpose self.use_cudnn = False @@ -150,44 +155,40 @@ class TestConv3DTransposeOp(OpTest): def test_check_grad(self): if self.use_cudnn: place = core.CUDAPlace(0) - self.check_grad_with_place( - place, - set(['Input', 'Filter']), - 'Output', - max_relative_error=0.03) + self.check_grad_with_place(place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.03) else: - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.03) + self.check_grad(set(['Input', 'Filter']), + 'Output', + max_relative_error=0.03) def test_check_grad_no_filter(self): if self.use_cudnn: place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter'])) + self.check_grad_with_place(place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) elif self.check_no_filter: - self.check_grad( - ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter'])) + self.check_grad(['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): if self.use_cudnn: place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input'])) + self.check_grad_with_place(place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) elif self.check_no_input: - self.check_grad( - ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input'])) + self.check_grad(['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) def init_test_case(self): self.pad = [0, 0, 0] @@ -203,6 +204,7 @@ class TestConv3DTransposeOp(OpTest): class TestWithSymmetricPad(TestConv3DTransposeOp): + def init_test_case(self): self.check_no_input = True self.pad = [1, 1, 1] @@ -215,6 +217,7 @@ class TestWithSymmetricPad(TestConv3DTransposeOp): class TestWithAsymmetricPad(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 0, 1, 2] self.stride = [1, 1, 1] @@ -226,6 +229,7 @@ class TestWithAsymmetricPad(TestConv3DTransposeOp): class TestWithSAMEPad(TestConv3DTransposeOp): + def init_test_case(self): self.stride = [1, 1, 2] self.dilations = [1, 2, 1] @@ -237,6 +241,7 @@ class TestWithSAMEPad(TestConv3DTransposeOp): class TestWithVALIDPad(TestConv3DTransposeOp): + def init_test_case(self): self.stride = [2, 1, 1] self.dilations = [1, 1, 1] @@ -248,6 +253,7 @@ class TestWithVALIDPad(TestConv3DTransposeOp): class TestWithStride(TestConv3DTransposeOp): + def init_test_case(self): self.check_no_filter = True self.pad = [1, 1, 1] @@ -260,6 +266,7 @@ class TestWithStride(TestConv3DTransposeOp): class TestWithGroups(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -271,6 +278,7 @@ class TestWithGroups(TestConv3DTransposeOp): class TestWithDilation(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -282,6 +290,7 @@ class TestWithDilation(TestConv3DTransposeOp): class Test_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [0, 0, 0] self.stride = [1, 1, 1] @@ -297,6 +306,7 @@ class Test_NHWC(TestConv3DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN(TestConv3DTransposeOp): + def init_op_type(self): self.use_cudnn = True self.op_type = "conv3d_transpose" @@ -305,6 +315,7 @@ class TestCUDNN(TestConv3DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSymmetricPad(TestWithSymmetricPad): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -322,6 +333,7 @@ class TestCUDNNWithSymmetricPad(TestWithSymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithAsymmetricPad(TestWithAsymmetricPad): + def init_test_case(self): self.pad = [1, 1, 1, 0, 0, 2] self.stride = [1, 1, 1] @@ -339,6 +351,7 @@ class TestCUDNNWithAsymmetricPad(TestWithAsymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSAMEPad(TestWithSAMEPad): + def init_test_case(self): self.stride = [1, 1, 2] self.dilations = [1, 2, 1] @@ -356,6 +369,7 @@ class TestCUDNNWithSAMEPad(TestWithSAMEPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithVALIDPad(TestWithVALIDPad): + def init_test_case(self): self.stride = [1, 1, 1] self.dilations = [1, 1, 1] @@ -373,6 +387,7 @@ class TestCUDNNWithVALIDPad(TestWithVALIDPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithStride(TestWithStride): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [2, 2, 2] @@ -390,6 +405,7 @@ class TestCUDNNWithStride(TestWithStride): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithGroups(TestWithGroups): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -422,6 +438,7 @@ class TestCUDNNWithGroups(TestWithGroups): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNN_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [0, 0, 0] self.stride = [1, 1, 1] @@ -440,6 +457,7 @@ class TestCUDNN_NHWC(TestConv3DTransposeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithSymmetricPad_NHWC(TestWithSymmetricPad): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -458,6 +476,7 @@ class TestCUDNNWithSymmetricPad_NHWC(TestWithSymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithAsymmetricPad_NHWC(TestWithAsymmetricPad): + def init_test_case(self): self.pad = [1, 0, 1, 0, 0, 2] self.stride = [1, 1, 1] @@ -476,6 +495,7 @@ class TestCUDNNWithAsymmetricPad_NHWC(TestWithAsymmetricPad): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithStride_NHWC(TestWithStride): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [2, 2, 2] @@ -494,6 +514,7 @@ class TestCUDNNWithStride_NHWC(TestWithStride): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNWithGroups_NHWC(TestWithGroups): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py index d5970456419..74122abf77b 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py @@ -24,6 +24,7 @@ from test_conv3d_transpose_op import TestConv3DTransposeOp class TestWithSymmetricPad_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -36,6 +37,7 @@ class TestWithSymmetricPad_NHWC(TestConv3DTransposeOp): class TestWithAsymmetricPad_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 0, 1, 2] self.stride = [1, 1, 1] @@ -48,6 +50,7 @@ class TestWithAsymmetricPad_NHWC(TestConv3DTransposeOp): class TestWithGroups_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.check_no_filter = True self.pad = [1, 1, 1] @@ -61,6 +64,7 @@ class TestWithGroups_NHWC(TestConv3DTransposeOp): class TestWithStride_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.pad = [1, 1, 1] self.stride = [2, 2, 2] @@ -73,6 +77,7 @@ class TestWithStride_NHWC(TestConv3DTransposeOp): class TestWithDilation_NHWC(TestConv3DTransposeOp): + def init_test_case(self): self.check_no_input = True self.pad = [1, 1, 1] @@ -86,59 +91,57 @@ class TestWithDilation_NHWC(TestConv3DTransposeOp): class TestConv3DTransposeAPI(unittest.TestCase): + def test_case1(self): - data1 = fluid.layers.data( - name='data1', shape=[3, 5, 5, 5], dtype='float32') - data2 = fluid.layers.data( - name='data2', shape=[5, 5, 5, 3], dtype='float32') - - out1 = fluid.layers.conv3d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - data_format='NCDHW') - out2 = fluid.layers.conv3d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - data_format='NDHWC') - out3 = fluid.layers.conv3d_transpose( - input=data1, - groups=1, - num_filters=6, - filter_size=3, - padding=[[0, 0], [0, 0], [1, 1], [0, 0], [1, 1]], - data_format='NCDHW') - out4 = fluid.layers.conv3d_transpose( - input=data2, - groups=3, - num_filters=6, - filter_size=3, - padding=[[0, 0], [0, 0], [1, 1], [1, 2], [0, 0]], - data_format='NDHWC') - out5 = fluid.layers.conv3d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - padding='SAME', - data_format='NCDHW') - out6 = fluid.layers.conv3d_transpose( - input=data2, - groups=1, - num_filters=6, - filter_size=3, - padding='VALID', - data_format='NDHWC') - out7 = fluid.layers.conv3d_transpose( - input=data2, - groups=1, - num_filters=6, - output_size=[7, 7, 7], - padding=[0, 0, 0], - data_format='NDHWC') + data1 = fluid.layers.data(name='data1', + shape=[3, 5, 5, 5], + dtype='float32') + data2 = fluid.layers.data(name='data2', + shape=[5, 5, 5, 3], + dtype='float32') + + out1 = fluid.layers.conv3d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + data_format='NCDHW') + out2 = fluid.layers.conv3d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + data_format='NDHWC') + out3 = fluid.layers.conv3d_transpose(input=data1, + groups=1, + num_filters=6, + filter_size=3, + padding=[[0, 0], [0, 0], [1, 1], + [0, 0], [1, 1]], + data_format='NCDHW') + out4 = fluid.layers.conv3d_transpose(input=data2, + groups=3, + num_filters=6, + filter_size=3, + padding=[[0, 0], [0, 0], [1, 1], + [1, 2], [0, 0]], + data_format='NDHWC') + out5 = fluid.layers.conv3d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + padding='SAME', + data_format='NCDHW') + out6 = fluid.layers.conv3d_transpose(input=data2, + groups=1, + num_filters=6, + filter_size=3, + padding='VALID', + data_format='NDHWC') + out7 = fluid.layers.conv3d_transpose(input=data2, + groups=1, + num_filters=6, + output_size=[7, 7, 7], + padding=[0, 0, 0], + data_format='NDHWC') data1_np = np.random.random((2, 3, 5, 5, 5)).astype("float32") data2_np = np.random.random((2, 5, 5, 5, 3)).astype("float32") @@ -149,12 +152,13 @@ class TestConv3DTransposeAPI(unittest.TestCase): place = core.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - results = exe.run( - fluid.default_main_program(), - feed={"data1": data1_np, - "data2": data2_np}, - fetch_list=[out1, out2, out3, out4, out5, out6, out7], - return_numpy=True) + results = exe.run(fluid.default_main_program(), + feed={ + "data1": data1_np, + "data2": data2_np + }, + fetch_list=[out1, out2, out3, out4, out5, out6, out7], + return_numpy=True) self.assertIsNotNone(results[0]) self.assertIsNotNone(results[1]) self.assertIsNotNone(results[2]) @@ -165,48 +169,48 @@ class TestConv3DTransposeAPI(unittest.TestCase): class TestConv3DTransposeOpException(unittest.TestCase): + def test_exception(self): - data = fluid.layers.data( - name='data', shape=[3, 5, 5, 5], dtype="float32") + data = fluid.layers.data(name='data', + shape=[3, 5, 5, 5], + dtype="float32") def attr_data_format(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - data_format="NCDW") + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + data_format="NCDW") self.assertRaises(ValueError, attr_data_format) def attr_padding_str(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - padding='Vald') + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + padding='Vald') self.assertRaises(ValueError, attr_padding_str) def attr_padding_list(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - padding=[[1, 1], [1, 1], [0, 0], [0, 0], [1, 1]]) + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + padding=[[1, 1], [1, 1], [0, 0], + [0, 0], [1, 1]]) self.assertRaises(ValueError, attr_padding_list) def attr_padding_with_data_format(): - out = fluid.layers.conv2d_transpose( - input=data, - groups=1, - num_filters=6, - filter_size=3, - padding=[[1, 1], [0, 0], [0, 0], [1, 0], [1, 1]], - data_format='NDHWC') + out = fluid.layers.conv2d_transpose(input=data, + groups=1, + num_filters=6, + filter_size=3, + padding=[[1, 1], [0, 0], [0, 0], + [1, 0], [1, 1]], + data_format='NDHWC') self.assertRaises(ValueError, attr_padding_with_data_format) diff --git a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py index 5bff8b31421..2bee23cbdbd 100644 --- a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py @@ -27,6 +27,7 @@ from decorator_helper import prog_scope class TestConvDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 4, 3, 3] @@ -40,8 +41,11 @@ class TestConvDoubleGradCheck(unittest.TestCase): w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -53,6 +57,7 @@ class TestConvDoubleGradCheck(unittest.TestCase): class TestConvDoubleGradCheckTest0(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 4, 3, 3] @@ -66,8 +71,11 @@ class TestConvDoubleGradCheckTest0(unittest.TestCase): w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -78,6 +86,7 @@ class TestConvDoubleGradCheckTest0(unittest.TestCase): class TestConvDoubleGradCheckTest1(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 3, 3, 3] @@ -91,8 +100,11 @@ class TestConvDoubleGradCheckTest1(unittest.TestCase): w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -103,6 +115,7 @@ class TestConvDoubleGradCheckTest1(unittest.TestCase): class TestConv3DDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 4, 3, 4, 2] @@ -116,8 +129,11 @@ class TestConv3DDoubleGradCheck(unittest.TestCase): w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): #places = [fluid.CPUPlace()] @@ -129,6 +145,7 @@ class TestConv3DDoubleGradCheck(unittest.TestCase): class TestConv3DDoubleGradCheckTest1(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 4, 5, 3, 2] @@ -142,8 +159,11 @@ class TestConv3DDoubleGradCheckTest1(unittest.TestCase): w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -154,27 +174,30 @@ class TestConv3DDoubleGradCheckTest1(unittest.TestCase): class TestConv2DoubleGradCheck_AsyPadding(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 3, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 0, 1], - bias_attr=False, - use_cudnn=True) + y = layers.conv2d(input=x, + num_filters=2, + filter_size=1, + padding=[1, 0, 0, 1], + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -185,27 +208,30 @@ class TestConv2DoubleGradCheck_AsyPadding(unittest.TestCase): class TestConv2DoubleGradCheck_PaddingSAME(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 3, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding="SAME", - bias_attr=False, - use_cudnn=True) + y = layers.conv2d(input=x, + num_filters=2, + filter_size=1, + padding="SAME", + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -216,27 +242,30 @@ class TestConv2DoubleGradCheck_PaddingSAME(unittest.TestCase): class TestConv2DoubleGradCheck_PaddingVALID(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 3, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding="VALID", - bias_attr=False, - use_cudnn=True) + y = layers.conv2d(input=x, + num_filters=2, + filter_size=1, + padding="VALID", + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -247,29 +276,32 @@ class TestConv2DoubleGradCheck_PaddingVALID(unittest.TestCase): class TestConv2DoubleGradCheck_ChannelLast(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 3, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 1], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NHWC") + y = layers.conv2d(input=x, + num_filters=2, + filter_size=1, + padding=[1, 1], + bias_attr=False, + use_cudnn=True, + groups=1, + data_format="NHWC") x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -280,29 +312,32 @@ class TestConv2DoubleGradCheck_ChannelLast(unittest.TestCase): class TestConv2DoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 3, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 1, 0], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NHWC") + y = layers.conv2d(input=x, + num_filters=2, + filter_size=1, + padding=[1, 0, 1, 0], + bias_attr=False, + use_cudnn=True, + groups=1, + data_format="NHWC") x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -313,27 +348,30 @@ class TestConv2DoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): class TestConv3DDoubleGradCheck_AsyPadding(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 2, 2, 2] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 0, 1, 1, 2], - bias_attr=False, - use_cudnn=True) + y = layers.conv3d(input=x, + num_filters=2, + filter_size=1, + padding=[1, 0, 0, 1, 1, 2], + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -344,28 +382,31 @@ class TestConv3DDoubleGradCheck_AsyPadding(unittest.TestCase): class TestConv3DoubleGradCheck_PaddingSAME(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 2, 2, 2] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding="SAME", - groups=1, - bias_attr=False, - use_cudnn=True) + y = layers.conv3d(input=x, + num_filters=2, + filter_size=1, + padding="SAME", + groups=1, + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -376,27 +417,30 @@ class TestConv3DoubleGradCheck_PaddingSAME(unittest.TestCase): class TestConv3DoubleGradCheck_PaddingVALID(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 3, 3, 2] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding="VALID", - bias_attr=False, - use_cudnn=True) + y = layers.conv3d(input=x, + num_filters=2, + filter_size=1, + padding="VALID", + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -407,29 +451,32 @@ class TestConv3DoubleGradCheck_PaddingVALID(unittest.TestCase): class TestConv3DDoubleGradCheck_ChannelLast(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 2, 2, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 1, 1], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NDHWC") + y = layers.conv3d(input=x, + num_filters=2, + filter_size=1, + padding=[1, 1, 1], + bias_attr=False, + use_cudnn=True, + groups=1, + data_format="NDHWC") x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -440,29 +487,32 @@ class TestConv3DDoubleGradCheck_ChannelLast(unittest.TestCase): class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 2, 2, 2, 3] eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 1, 0, 1, 0], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NDHWC") + y = layers.conv3d(input=x, + num_filters=2, + filter_size=1, + padding=[1, 0, 1, 0, 1, 0], + bias_attr=False, + use_cudnn=True, + groups=1, + data_format="NDHWC") x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -473,6 +523,7 @@ class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): class TestDepthWiseConvDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [2, 4, 3, 3] @@ -480,20 +531,27 @@ class TestDepthWiseConvDoubleGradCheck(unittest.TestCase): dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - # condition of depthwise conv: + # condition of depthwise conv: # use_cudnn == False # groups == filters # num_filters % num_channels == 0 - y = layers.conv2d( - x, shape[1], 1, groups=shape[1], bias_attr=False, use_cudnn=False) + y = layers.conv2d(x, + shape[1], + 1, + groups=shape[1], + bias_attr=False, + use_cudnn=False) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() w_arr = [] for p in w: w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) def test_grad(self): places = [] @@ -504,6 +562,7 @@ class TestDepthWiseConvDoubleGradCheck(unittest.TestCase): class TestDepthWiseConvDoubleGradCheckCase1(unittest.TestCase): + def depthwise_conv2d_wrapper(self, x): return paddle.nn.functional.conv2d(x[0], x[1], groups=4) @@ -516,7 +575,7 @@ class TestDepthWiseConvDoubleGradCheckCase1(unittest.TestCase): x = layers.data('x', x_shape, False, dtype) w = layers.data('w', w_shape, False, dtype) - # condition of depthwise conv: + # condition of depthwise conv: # use_cudnn == False # groups == filters # num_filters % num_channels == 0 @@ -525,8 +584,11 @@ class TestDepthWiseConvDoubleGradCheckCase1(unittest.TestCase): x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) w_arr = np.random.uniform(-1, 1, w_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, w], y, x_init=[x_arr, w_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, w], + y, + x_init=[x_arr, w_arr], + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.depthwise_conv2d_wrapper, [x, w], y, @@ -542,6 +604,7 @@ class TestDepthWiseConvDoubleGradCheckCase1(unittest.TestCase): class TestConv3DDoubleGradCheck_NN(unittest.TestCase): + def conv3d_wrapper(self, x): return paddle.nn.functional.conv3d(x[0], x[1]) @@ -559,10 +622,16 @@ class TestConv3DDoubleGradCheck_NN(unittest.TestCase): x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) w_arr = np.random.uniform(-1, 1, w_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, w], y, x_init=[x_arr, w_arr], place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.conv3d_wrapper, [x, w], y, x_init=[x_arr, w_arr], place=place) + gradient_checker.double_grad_check([x, w], + y, + x_init=[x_arr, w_arr], + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.conv3d_wrapper, + [x, w], + y, + x_init=[x_arr, w_arr], + place=place) def test_grad(self): places = [] diff --git a/python/paddle/fluid/tests/unittests/test_conv_shift_op.py b/python/paddle/fluid/tests/unittests/test_conv_shift_op.py index 59241a40869..4718d94ba4f 100644 --- a/python/paddle/fluid/tests/unittests/test_conv_shift_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv_shift_op.py @@ -31,6 +31,7 @@ def conv_shift_forward(x, y): class TestConvShiftOp(OpTest): + def setUp(self): self.op_type = "conv_shift" diff --git a/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py index b9e9224b9e4..45b0a2b991e 100644 --- a/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py @@ -27,6 +27,7 @@ from decorator_helper import prog_scope class TestConvTransposeDoubleGradCheck(unittest.TestCase): + def conv_transpose_wrapper(self, x): return paddle.nn.functional.conv2d_transpose(x[0], x[1], groups=1) @@ -38,8 +39,11 @@ class TestConvTransposeDoubleGradCheck(unittest.TestCase): if core.is_compiled_with_rocm(): dtype = np.float32 x = layers.data('x', shape, False, dtype) - y = layers.conv2d_transpose( - x, 2, filter_size=1, groups=1, bias_attr=False) + y = layers.conv2d_transpose(x, + 2, + filter_size=1, + groups=1, + bias_attr=False) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -48,16 +52,18 @@ class TestConvTransposeDoubleGradCheck(unittest.TestCase): w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) if core.is_compiled_with_rocm(): # HIP will sometimes fail if no atol - gradient_checker.double_grad_check( - [x] + w, - y, - x_init=[x_arr] + w_arr, - place=place, - eps=eps, - atol=1e-4) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps, + atol=1e-4) else: - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.conv_transpose_wrapper, [x] + w, y, @@ -75,9 +81,12 @@ class TestConvTransposeDoubleGradCheck(unittest.TestCase): class TestConvTranspose2DoubleGradCheck_AsyPadding( TestConvTransposeDoubleGradCheck): + def conv_transpose_wrapper(self, x): - return paddle.nn.functional.conv2d_transpose( - x[0], x[1], groups=1, padding=[1, 0, 0, 1]) + return paddle.nn.functional.conv2d_transpose(x[0], + x[1], + groups=1, + padding=[1, 0, 0, 1]) @prog_scope() def func(self, place): @@ -87,13 +96,12 @@ class TestConvTranspose2DoubleGradCheck_AsyPadding( if core.is_compiled_with_rocm(): dtype = np.float32 x = layers.data('x', shape, False, dtype) - y = layers.conv2d_transpose( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 0, 1], - bias_attr=False, - use_cudnn=True) + y = layers.conv2d_transpose(input=x, + num_filters=2, + filter_size=1, + padding=[1, 0, 0, 1], + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -102,16 +110,18 @@ class TestConvTranspose2DoubleGradCheck_AsyPadding( w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) if core.is_compiled_with_rocm(): # HIP will sometimes fail if no atol - gradient_checker.double_grad_check( - [x] + w, - y, - x_init=[x_arr] + w_arr, - place=place, - eps=eps, - atol=1e-4) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps, + atol=1e-4) else: - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.conv_transpose_wrapper, [x] + w, y, @@ -121,9 +131,12 @@ class TestConvTranspose2DoubleGradCheck_AsyPadding( class TestConvTranspose2DoubleGradCheck_PaddingSAME( TestConvTransposeDoubleGradCheck): + def conv_transpose_wrapper(self, x): - return paddle.nn.functional.conv2d_transpose( - x[0], x[1], groups=1, padding="SAME") + return paddle.nn.functional.conv2d_transpose(x[0], + x[1], + groups=1, + padding="SAME") @prog_scope() def func(self, place): @@ -133,13 +146,12 @@ class TestConvTranspose2DoubleGradCheck_PaddingSAME( if core.is_compiled_with_rocm(): dtype = np.float32 x = layers.data('x', shape, False, dtype) - y = layers.conv2d_transpose( - input=x, - num_filters=2, - filter_size=1, - padding="SAME", - bias_attr=False, - use_cudnn=True) + y = layers.conv2d_transpose(input=x, + num_filters=2, + filter_size=1, + padding="SAME", + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -148,16 +160,18 @@ class TestConvTranspose2DoubleGradCheck_PaddingSAME( w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) if core.is_compiled_with_rocm(): # HIP will sometimes fail if no atol - gradient_checker.double_grad_check( - [x] + w, - y, - x_init=[x_arr] + w_arr, - place=place, - eps=eps, - atol=1e-4) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps, + atol=1e-4) else: - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.conv_transpose_wrapper, [x] + w, y, @@ -167,9 +181,12 @@ class TestConvTranspose2DoubleGradCheck_PaddingSAME( class TestConvTranspose2DoubleGradCheck_PaddingVALID( TestConvTransposeDoubleGradCheck): + def conv_transpose_wrapper(self, x): - return paddle.nn.functional.conv2d_transpose( - x[0], x[1], groups=1, padding="VALID") + return paddle.nn.functional.conv2d_transpose(x[0], + x[1], + groups=1, + padding="VALID") @prog_scope() def func(self, place): @@ -179,13 +196,12 @@ class TestConvTranspose2DoubleGradCheck_PaddingVALID( if core.is_compiled_with_rocm(): dtype = np.float32 x = layers.data('x', shape, False, dtype) - y = layers.conv2d_transpose( - input=x, - num_filters=2, - filter_size=1, - padding="VALID", - bias_attr=False, - use_cudnn=True) + y = layers.conv2d_transpose(input=x, + num_filters=2, + filter_size=1, + padding="VALID", + bias_attr=False, + use_cudnn=True) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -194,16 +210,18 @@ class TestConvTranspose2DoubleGradCheck_PaddingVALID( w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) if core.is_compiled_with_rocm(): # HIP will sometimes fail if no atol - gradient_checker.double_grad_check( - [x] + w, - y, - x_init=[x_arr] + w_arr, - place=place, - eps=eps, - atol=1e-4) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps, + atol=1e-4) else: - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.conv_transpose_wrapper, [x] + w, y, @@ -213,9 +231,13 @@ class TestConvTranspose2DoubleGradCheck_PaddingVALID( class TestConvTranspose2DoubleGradCheck_ChannelLast( TestConvTransposeDoubleGradCheck): + def conv_transpose_wrapper(self, x): - return paddle.nn.functional.conv2d_transpose( - x[0], x[1], groups=1, padding=[1, 1], data_format="NHWC") + return paddle.nn.functional.conv2d_transpose(x[0], + x[1], + groups=1, + padding=[1, 1], + data_format="NHWC") @prog_scope() def func(self, place): @@ -225,15 +247,14 @@ class TestConvTranspose2DoubleGradCheck_ChannelLast( if core.is_compiled_with_rocm(): dtype = np.float32 x = layers.data('x', shape, False, dtype) - y = layers.conv2d_transpose( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 1], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NHWC") + y = layers.conv2d_transpose(input=x, + num_filters=2, + filter_size=1, + padding=[1, 1], + bias_attr=False, + use_cudnn=True, + groups=1, + data_format="NHWC") x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -242,16 +263,18 @@ class TestConvTranspose2DoubleGradCheck_ChannelLast( w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) if core.is_compiled_with_rocm(): # HIP will sometimes fail if no atol - gradient_checker.double_grad_check( - [x] + w, - y, - x_init=[x_arr] + w_arr, - place=place, - eps=eps, - atol=1e-4) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps, + atol=1e-4) else: - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x] + w, + y, + x_init=[x_arr] + w_arr, + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.conv_transpose_wrapper, [x] + w, y, diff --git a/python/paddle/fluid/tests/unittests/test_corr.py b/python/paddle/fluid/tests/unittests/test_corr.py index 1e1dd3b3695..6a9d931e22d 100644 --- a/python/paddle/fluid/tests/unittests/test_corr.py +++ b/python/paddle/fluid/tests/unittests/test_corr.py @@ -31,6 +31,7 @@ def numpy_corr(np_arr, rowvar=True, dtype='float64'): class Corr_Test(unittest.TestCase): + def setUp(self): self.shape = [4, 5] @@ -52,8 +53,7 @@ class Corr_Test(unittest.TestCase): np_corr = numpy_corr(np_arr, rowvar=True, dtype=dtype) if dtype == 'float32': self.assertTrue( - np.allclose( - np_corr, corr.numpy(), atol=1.e-5)) + np.allclose(np_corr, corr.numpy(), atol=1.e-5)) else: self.assertTrue(np.allclose(np_corr, corr.numpy())) @@ -76,29 +76,32 @@ class Corr_Test(unittest.TestCase): np_corr = numpy_corr(np_arr, rowvar=False, dtype=dtype) if dtype == 'float32': self.assertTrue( - np.allclose( - np_corr, corr.numpy(), atol=1.e-5)) + np.allclose(np_corr, corr.numpy(), atol=1.e-5)) else: self.assertTrue(np.allclose(np_corr, corr.numpy())) # Input(x) only support N-D (1<=N<=2) tensor class Corr_Test2(Corr_Test): + def setUp(self): self.shape = [10] class Corr_Test3(Corr_Test): + def setUp(self): self.shape = [4, 5] # Input(x) only support N-D (1<=N<=2) tensor class Corr_Test4(unittest.TestCase): + def setUp(self): self.shape = [2, 5, 2] def test_errors(self): + def test_err(): np_arr = np.random.rand(*self.shape).astype('float64') tensor = paddle.to_tensor(np_arr) @@ -109,6 +112,7 @@ class Corr_Test4(unittest.TestCase): # test unsupported complex input class Corr_Comeplex_Test(unittest.TestCase): + def setUp(self): self.dtype = 'complex128' @@ -120,6 +124,7 @@ class Corr_Comeplex_Test(unittest.TestCase): class Corr_Test5(Corr_Comeplex_Test): + def setUp(self): self.dtype = 'complex64' diff --git a/python/paddle/fluid/tests/unittests/test_cos_sim_op.py b/python/paddle/fluid/tests/unittests/test_cos_sim_op.py index 23b0fcc691a..8e5d30d2743 100644 --- a/python/paddle/fluid/tests/unittests/test_cos_sim_op.py +++ b/python/paddle/fluid/tests/unittests/test_cos_sim_op.py @@ -22,6 +22,7 @@ from paddle.fluid import Program, program_guard class TestCosSimOp(OpTest): + def setUp(self): self.op_type = "cos_sim" self.inputs = { @@ -45,15 +46,20 @@ class TestCosSimOp(OpTest): self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.06) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.06, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.06, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.06, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.06, + no_grad_set=set('Y')) class TestCosSimOp2(TestCosSimOp): + def setUp(self): self.op_type = "cos_sim" self.inputs = { @@ -72,6 +78,7 @@ class TestCosSimOp2(TestCosSimOp): class TestCosSimOp3(TestCosSimOp): + def setUp(self): self.op_type = "cos_sim" self.inputs = { @@ -90,6 +97,7 @@ class TestCosSimOp3(TestCosSimOp): class TestCosSimOp4(TestCosSimOp): + def setUp(self): self.op_type = "cos_sim" self.inputs = { @@ -108,13 +116,14 @@ class TestCosSimOp4(TestCosSimOp): class TestCosSimOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of batch_norm must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - x2 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + x2 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.cos_sim, x1, x2) # the input dtype of batch_norm must be float32 diff --git a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py index 0b6e5b444ca..45000c3aef8 100644 --- a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py +++ b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard, Executor, default_main_program class TestCosineSimilarityAPI(unittest.TestCase): + def setUp(self): self.places = [paddle.CPUPlace()] if core.is_compiled_with_cuda(): @@ -53,8 +54,10 @@ class TestCosineSimilarityAPI(unittest.TestCase): result = F.cosine_similarity(x1, x2, axis=axis, eps=eps) exe = Executor(place) fetches = exe.run(default_main_program(), - feed={"x1": np_x1, - "x2": np_x2}, + feed={ + "x1": np_x1, + "x2": np_x2 + }, fetch_list=[result]) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) diff --git a/python/paddle/fluid/tests/unittests/test_cost_model.py b/python/paddle/fluid/tests/unittests/test_cost_model.py index 79e2b787921..75570932858 100644 --- a/python/paddle/fluid/tests/unittests/test_cost_model.py +++ b/python/paddle/fluid/tests/unittests/test_cost_model.py @@ -26,6 +26,7 @@ device = "gpu" if core.is_compiled_with_cuda() else "cpu" class TestCostModel(unittest.TestCase): + def test_profiler_measure_empty_program(self): cost_model = core.CostModel() empty_program = paddle.static.Program() @@ -71,16 +72,16 @@ class TestCostModel(unittest.TestCase): print("conv2d_op_time:", conv2d_op_time) print("conv2d_op_config:", conv2d_op_config) - conv2d_backward_op_cost = cost_model.get_static_op_time( - "conv2d", forward=False) + conv2d_backward_op_cost = cost_model.get_static_op_time("conv2d", + forward=False) conv2d_backward_op_time = conv2d_backward_op_cost["op_time"] conv2d_backward_op_config = conv2d_backward_op_cost["config"] self.assertGreater(float(conv2d_backward_op_time), 0) print("conv2d_backward_op_time:", conv2d_backward_op_time) print("conv2d_backward_op_config:", conv2d_backward_op_config) - conv2d_fp16_op_cost = cost_model.get_static_op_time( - "conv2d", dtype="float16") + conv2d_fp16_op_cost = cost_model.get_static_op_time("conv2d", + dtype="float16") conv2d_fp16_op_time = conv2d_fp16_op_cost["op_time"] conv2d_fp16_op_config = conv2d_fp16_op_cost["config"] self.assertGreater(float(conv2d_fp16_op_time), 0) diff --git a/python/paddle/fluid/tests/unittests/test_cov.py b/python/paddle/fluid/tests/unittests/test_cov.py index 5c4b9cbab27..c67b2c2d357 100644 --- a/python/paddle/fluid/tests/unittests/test_cov.py +++ b/python/paddle/fluid/tests/unittests/test_cov.py @@ -29,6 +29,7 @@ def numpy_cov(np_arr, rowvar=True, ddof=1, fweights=None, aweights=None): class Cov_Test(unittest.TestCase): + def setUp(self): self.shape = [20, 10] self.weightshape = [10] @@ -53,8 +54,11 @@ class Cov_Test(unittest.TestCase): ddof=True, fweights=None, aweights=None) - np_cov = numpy_cov( - np_arr, rowvar=True, ddof=1, fweights=None, aweights=None) + np_cov = numpy_cov(np_arr, + rowvar=True, + ddof=1, + fweights=None, + aweights=None) self.assertTrue(np.allclose(np_cov, cov.numpy())) def test_tensor_cov_default(self): @@ -82,8 +86,11 @@ class Cov_Test(unittest.TestCase): ddof=True, fweights=None, aweights=None) - np_cov = numpy_cov( - np_arr, rowvar=False, ddof=1, fweights=None, aweights=None) + np_cov = numpy_cov(np_arr, + rowvar=False, + ddof=1, + fweights=None, + aweights=None) self.assertTrue(np.allclose(np_cov, cov.numpy())) def test_tensor_cov_rowvar(self): @@ -111,8 +118,11 @@ class Cov_Test(unittest.TestCase): ddof=False, fweights=None, aweights=None) - np_cov = numpy_cov( - np_arr, rowvar=True, ddof=0, fweights=None, aweights=None) + np_cov = numpy_cov(np_arr, + rowvar=True, + ddof=0, + fweights=None, + aweights=None) self.assertTrue(np.allclose(np_cov, cov.numpy())) def test_tensor_cov_ddof(self): @@ -134,8 +144,8 @@ class Cov_Test(unittest.TestCase): for dtype in typelist: np_arr = np.random.rand(*self.shape).astype(dtype) - np_fw = np.random.randint( - 10, size=self.weightshape).astype('int32') + np_fw = np.random.randint(10, + size=self.weightshape).astype('int32') tensor = paddle.to_tensor(np_arr, place=p) fweights = paddle.to_tensor(np_fw, place=p) cov = paddle.linalg.cov(tensor, @@ -143,8 +153,11 @@ class Cov_Test(unittest.TestCase): ddof=True, fweights=fweights, aweights=None) - np_cov = numpy_cov( - np_arr, rowvar=True, ddof=1, fweights=np_fw, aweights=None) + np_cov = numpy_cov(np_arr, + rowvar=True, + ddof=1, + fweights=np_fw, + aweights=None) self.assertTrue(np.allclose(np_cov, cov.numpy())) def test_tensor_cov_fweights(self): @@ -166,8 +179,8 @@ class Cov_Test(unittest.TestCase): for dtype in typelist: np_arr = np.random.rand(*self.shape).astype(dtype) - np_aw = np.random.randint( - 10, size=self.weightshape).astype('int32') + np_aw = np.random.randint(10, + size=self.weightshape).astype('int32') tensor = paddle.to_tensor(np_arr, place=p) aweights = paddle.to_tensor(np_aw, place=p) cov = paddle.linalg.cov(tensor, @@ -175,8 +188,11 @@ class Cov_Test(unittest.TestCase): ddof=True, fweights=None, aweights=aweights) - np_cov = numpy_cov( - np_arr, rowvar=True, ddof=1, fweights=None, aweights=np_aw) + np_cov = numpy_cov(np_arr, + rowvar=True, + ddof=1, + fweights=None, + aweights=np_aw) self.assertTrue(np.allclose(np_cov, cov.numpy())) def test_tensor_cov_aweights(self): @@ -198,8 +214,8 @@ class Cov_Test(unittest.TestCase): for dtype in typelist: np_arr = np.random.rand(*self.shape).astype(dtype) - np_fw = np.random.randint( - 10, size=self.weightshape).astype('int64') + np_fw = np.random.randint(10, + size=self.weightshape).astype('int64') np_aw = np.random.rand(*self.weightshape).astype('float64') tensor = paddle.to_tensor(np_arr, place=p) fweights = paddle.to_tensor(np_fw, place=p) @@ -209,8 +225,11 @@ class Cov_Test(unittest.TestCase): ddof=True, fweights=fweights, aweights=aweights) - np_cov = numpy_cov( - np_arr, rowvar=True, ddof=1, fweights=np_fw, aweights=np_aw) + np_cov = numpy_cov(np_arr, + rowvar=True, + ddof=1, + fweights=np_fw, + aweights=np_aw) self.assertTrue(np.allclose(np_cov, cov.numpy())) def test_tensor_cov_weights(self): @@ -220,6 +239,7 @@ class Cov_Test(unittest.TestCase): class Cov_Test2(Cov_Test): + def setUp(self): self.shape = [10] self.weightshape = [10] @@ -227,6 +247,7 @@ class Cov_Test2(Cov_Test): # Input(x) only support N-D (1<=N<=2) tensor class Cov_Test3(unittest.TestCase): + def setUp(self): self.shape = [2, 5, 10] self.fweightshape = [10] @@ -235,12 +256,13 @@ class Cov_Test3(unittest.TestCase): self.aw_s = 1. def func_test_errors(self): + def test_err(): np_arr = np.random.rand(*self.shape).astype('float64') - np_fw = self.fw_s * np.random.rand( - *self.fweightshape).astype('int32') - np_aw = self.aw_s * np.random.rand( - *self.aweightshape).astype('float64') + np_fw = self.fw_s * np.random.rand(*self.fweightshape).astype( + 'int32') + np_aw = self.aw_s * np.random.rand(*self.aweightshape).astype( + 'float64') tensor = paddle.to_tensor(np_arr) fweights = paddle.to_tensor(np_fw) aweights = paddle.to_tensor(np_aw) @@ -260,6 +282,7 @@ class Cov_Test3(unittest.TestCase): #Input(fweights) only support N-D (N<=1) tensor class Cov_Test4(Cov_Test3): + def setUp(self): self.shape = [5, 10] self.fweightshape = [2, 10] @@ -270,6 +293,7 @@ class Cov_Test4(Cov_Test3): #The number of Input(fweights) should equal to x's dim[1] class Cov_Test5(Cov_Test3): + def setUp(self): self.shape = [5, 10] self.fweightshape = [5] @@ -280,6 +304,7 @@ class Cov_Test5(Cov_Test3): #The value of Input(fweights) cannot be negtive class Cov_Test6(Cov_Test3): + def setUp(self): self.shape = [5, 10] self.fweightshape = [10] @@ -290,6 +315,7 @@ class Cov_Test6(Cov_Test3): #Input(aweights) only support N-D (N<=1) tensor class Cov_Test7(Cov_Test3): + def setUp(self): self.shape = [5, 10] self.fweightshape = [10] @@ -300,6 +326,7 @@ class Cov_Test7(Cov_Test3): #The number of Input(aweights) should equal to x's dim[1] class Cov_Test8(Cov_Test3): + def setUp(self): self.shape = [5, 10] self.fweightshape = [10] @@ -310,6 +337,7 @@ class Cov_Test8(Cov_Test3): #The value of Input(aweights) cannot be negtive class Cov_Test9(Cov_Test3): + def setUp(self): self.shape = [5, 10] self.fweightshape = [10] diff --git a/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py b/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py index 1def2ffd82a..4b351b9f199 100644 --- a/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py +++ b/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import paddle.distributed as dist class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear1 = nn.Linear(10, 10) @@ -58,6 +59,7 @@ def train(print_result=False): class TestSpawn(unittest.TestCase): + def test_spawn(self): dist.spawn(train, backend='gloo', nprocs=4) diff --git a/python/paddle/fluid/tests/unittests/test_create_global_var.py b/python/paddle/fluid/tests/unittests/test_create_global_var.py index 39fb0355190..1517ce64222 100644 --- a/python/paddle/fluid/tests/unittests/test_create_global_var.py +++ b/python/paddle/fluid/tests/unittests/test_create_global_var.py @@ -20,6 +20,7 @@ from paddle.fluid import Program, program_guard class TestCreateGlobalVarError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -33,8 +34,8 @@ class TestCreateGlobalVarError(unittest.TestCase): self.assertRaises(TypeError, test_shape_item) - # Since create_global_var support all dtype in convert_dtype(). - # Hence, assertRaises ValueError not TypeError. + # Since create_global_var support all dtype in convert_dtype(). + # Hence, assertRaises ValueError not TypeError. def test_dtype(): fluid.layers.create_global_var([1, 2, 3], 2.0, np.complex128) diff --git a/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py b/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py index fd34c8fc939..207bef9ed81 100644 --- a/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py +++ b/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py @@ -19,6 +19,7 @@ import paddle.fluid.layers as layers class TestDocString(unittest.TestCase): + def test_layer_doc_string(self): print(layers.dropout.__doc__) diff --git a/python/paddle/fluid/tests/unittests/test_create_parameter.py b/python/paddle/fluid/tests/unittests/test_create_parameter.py index fb4b5e4b6fa..85a3045881f 100644 --- a/python/paddle/fluid/tests/unittests/test_create_parameter.py +++ b/python/paddle/fluid/tests/unittests/test_create_parameter.py @@ -22,6 +22,7 @@ import paddle class TestCreateParameterError(unittest.TestCase): + def func_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -37,16 +38,18 @@ class TestCreateParameterError(unittest.TestCase): self.assertRaises(TypeError, test_shape_item) def test_attr(): - fluid.layers.create_parameter( - [1, 2, 3], np.float32, attr=np.array([i for i in range(6)])) + fluid.layers.create_parameter([1, 2, 3], + np.float32, + attr=np.array( + [i for i in range(6)])) self.assertRaises(TypeError, test_attr) def test_default_initializer(): - fluid.layers.create_parameter( - [1, 2, 3], - np.float32, - default_initializer=np.array([i for i in range(6)])) + fluid.layers.create_parameter([1, 2, 3], + np.float32, + default_initializer=np.array( + [i for i in range(6)])) self.assertRaises(TypeError, test_default_initializer) diff --git a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py index 6f594d16074..6724c327b60 100644 --- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py +++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class CRFDecoding(object): + def __init__(self, emission_weights, transition_weights, seq_start_positions): assert (emission_weights.shape[0] == sum(seq_start_positions)) @@ -35,10 +36,10 @@ class CRFDecoding(object): self.b = transition_weights[1, :] self.w = transition_weights[2:, :] - self.track = np.zeros( - (sum(seq_start_positions), self.tag_num), dtype="int64") - self.decoded_path = np.zeros( - (sum(seq_start_positions), 1), dtype="int64") + self.track = np.zeros((sum(seq_start_positions), self.tag_num), + dtype="int64") + self.decoded_path = np.zeros((sum(seq_start_positions), 1), + dtype="int64") def _decode_one_sequence(self, decoded_path, x): seq_len, tag_num = x.shape @@ -137,19 +138,19 @@ class TestCRFDecodingOp2(OpTest): self.init_lod() total_len = sum(self.lod[-1]) - transition = np.repeat( - np.arange( - TAG_NUM, dtype="float64").reshape(1, TAG_NUM), - TAG_NUM + 2, - axis=0) - emission = np.repeat( - np.arange( - TAG_NUM, dtype="float64").reshape(1, TAG_NUM), - total_len, - axis=0) - - labels = np.random.randint( - low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64") + transition = np.repeat(np.arange(TAG_NUM, + dtype="float64").reshape(1, TAG_NUM), + TAG_NUM + 2, + axis=0) + emission = np.repeat(np.arange(TAG_NUM, + dtype="float64").reshape(1, TAG_NUM), + total_len, + axis=0) + + labels = np.random.randint(low=0, + high=TAG_NUM, + size=(total_len, 1), + dtype="int64") predicted_labels = np.ones( (total_len, 1), dtype="int64") * (TAG_NUM - 1) expected_output = (labels == predicted_labels).astype("int64") @@ -167,11 +168,13 @@ class TestCRFDecodingOp2(OpTest): class TestCRFDecodingOp3(TestCRFDecodingOp2): + def init_lod(self): self.lod = [[1, 0, 0, 4]] class TestCRFDecodingOp4(TestCRFDecodingOp2): + def init_lod(self): self.lod = [[0, 2, 3, 0]] @@ -228,6 +231,7 @@ class TestCRFDecodingOp5(OpTest): class TestCRFDecodingOp6(OpTest): + def init_lod(self): self.lod = [[1, 2, 3, 4]] @@ -237,19 +241,19 @@ class TestCRFDecodingOp6(OpTest): self.init_lod() total_len = sum(self.lod[-1]) - transition = np.repeat( - np.arange( - TAG_NUM, dtype="float64").reshape(1, TAG_NUM), - TAG_NUM + 2, - axis=0) - emission = np.repeat( - np.arange( - TAG_NUM, dtype="float64").reshape(1, TAG_NUM), - total_len, - axis=0) - - labels = np.random.randint( - low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64") + transition = np.repeat(np.arange(TAG_NUM, + dtype="float64").reshape(1, TAG_NUM), + TAG_NUM + 2, + axis=0) + emission = np.repeat(np.arange(TAG_NUM, + dtype="float64").reshape(1, TAG_NUM), + total_len, + axis=0) + + labels = np.random.randint(low=0, + high=TAG_NUM, + size=(total_len, 1), + dtype="int64") predicted_labels = np.ones( (total_len, 1), dtype="int64") * (TAG_NUM - 1) expected_output = (labels == predicted_labels).astype("int64") diff --git a/python/paddle/fluid/tests/unittests/test_crop_op.py b/python/paddle/fluid/tests/unittests/test_crop_op.py index acb652ad6f9..29d0bdde6e9 100644 --- a/python/paddle/fluid/tests/unittests/test_crop_op.py +++ b/python/paddle/fluid/tests/unittests/test_crop_op.py @@ -20,6 +20,7 @@ from op_test import OpTest def crop(data, offsets, crop_shape): + def indexOf(shape, index): result = [] for dim in reversed(shape): @@ -41,6 +42,7 @@ def crop(data, offsets, crop_shape): class TestCropOp(OpTest): + def setUp(self): self.op_type = "crop" self.crop_by_input = False @@ -78,6 +80,7 @@ class TestCropOp(OpTest): class TestCase1(TestCropOp): + def initTestCase(self): self.x_shape = (16, 8, 32) self.crop_shape = [2, 2, 3] @@ -85,6 +88,7 @@ class TestCase1(TestCropOp): class TestCase2(TestCropOp): + def initTestCase(self): self.x_shape = (15, 8) self.crop_shape = [15, 8] @@ -92,6 +96,7 @@ class TestCase2(TestCropOp): class TestCase3(TestCropOp): + def initTestCase(self): self.x_shape = (4, 8, 16) self.crop_shape = [2, 2, 3] @@ -100,6 +105,7 @@ class TestCase3(TestCropOp): class TestCase4(TestCropOp): + def initTestCase(self): self.x_shape = (10, 10) self.crop_shape = [10, 10] @@ -108,6 +114,7 @@ class TestCase4(TestCropOp): class TestCase5(TestCropOp): + def initTestCase(self): self.x_shape = (3, 4, 10) self.crop_shape = [2, 2, 3] @@ -116,6 +123,7 @@ class TestCase5(TestCropOp): class TestCase6(TestCropOp): + def initTestCase(self): self.x_shape = (10, 9, 14) self.crop_shape = [3, 3, 5] diff --git a/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py b/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py index 04e47bd30ce..49805c578bf 100644 --- a/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py +++ b/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid def crop(data, offsets, crop_shape): + def indexOf(shape, index): result = [] for dim in reversed(shape): @@ -43,6 +44,7 @@ def crop(data, offsets, crop_shape): class TestCropTensorOp(OpTest): + def setUp(self): self.op_type = "crop_tensor" self.shape_by_input = False @@ -85,6 +87,7 @@ class TestCropTensorOp(OpTest): class TestCase1(TestCropTensorOp): + def initTestCase(self): self.x_shape = (100) self.crop_shape = [64] @@ -92,6 +95,7 @@ class TestCase1(TestCropTensorOp): class TestCase2(TestCropTensorOp): + def initTestCase(self): self.x_shape = (12, 24) self.crop_shape = [-1, 8] @@ -99,6 +103,7 @@ class TestCase2(TestCropTensorOp): class TestCase3(TestCropTensorOp): + def initTestCase(self): self.x_shape = (4, 8, 16) self.crop_shape = [2, 2, 3] @@ -107,6 +112,7 @@ class TestCase3(TestCropTensorOp): class TestCase4(TestCropTensorOp): + def initTestCase(self): self.x_shape = (8, 3, 6, 6) self.crop_shape = [-1, 3, -1, 4] @@ -115,6 +121,7 @@ class TestCase4(TestCropTensorOp): class TestCase5(TestCropTensorOp): + def initTestCase(self): self.x_shape = (2, 4, 5, 8, 8) self.crop_shape = [1, 1, 2, 4, 4] @@ -123,6 +130,7 @@ class TestCase5(TestCropTensorOp): class TestCase6(TestCropTensorOp): + def initTestCase(self): self.x_shape = (2, 2, 4, 4, 4, 2) self.crop_shape = [1, 1, 4, 2, 2, 2] @@ -132,6 +140,7 @@ class TestCase6(TestCropTensorOp): class TestCropTensorOpTensorAttr(OpTest): + def setUp(self): self.op_type = "crop_tensor" self.OffsetsTensor = False @@ -183,6 +192,7 @@ class TestCropTensorOpTensorAttr(OpTest): class TestCropTensorOpTensorAttrCase1(TestCropTensorOpTensorAttr): + def initTestCase(self): self.x_shape = (16, 8, 32) self.crop_shape = [-1, -1, 3] @@ -191,6 +201,7 @@ class TestCropTensorOpTensorAttrCase1(TestCropTensorOpTensorAttr): class TestCropTensorOpTensorAttrCase2(TestCropTensorOpTensorAttr): + def initTestCase(self): self.x_shape = (4, 8, 16, 8) self.crop_shape = [2, 2, 3, 4] @@ -199,6 +210,7 @@ class TestCropTensorOpTensorAttrCase2(TestCropTensorOpTensorAttr): class TestCropTensorOpTensorAttrCase3(TestCropTensorOpTensorAttr): + def initTestCase(self): self.x_shape = (16, 8, 32) self.crop_shape = [2, 2, 3] @@ -209,6 +221,7 @@ class TestCropTensorOpTensorAttrCase3(TestCropTensorOpTensorAttr): class TestCropTensorOpTensorAttrCase4(TestCropTensorOpTensorAttr): + def initTestCase(self): self.x_shape = (16, 8, 32) self.crop_shape = [2, 2, 3] @@ -219,6 +232,7 @@ class TestCropTensorOpTensorAttrCase4(TestCropTensorOpTensorAttr): class TestCropTensorException(unittest.TestCase): + def test_exception(self): input1 = fluid.data(name="input1", shape=[2, 3, 6, 6], dtype="float32") input2 = fluid.data(name="input2", shape=[2, 3, 6, 6], dtype="float16") @@ -241,12 +255,14 @@ class TestCropTensorException(unittest.TestCase): out = paddle.crop(input1, shape=[2, 2, 3, 3], offsets=0) def attr_offsets_dtype(): - out = paddle.crop( - input1, shape=[2, 2, 3, 3], offsets=[0, 1.0, 0, 0]) + out = paddle.crop(input1, + shape=[2, 2, 3, 3], + offsets=[0, 1.0, 0, 0]) def attr_offsets_value(): - out = paddle.crop( - input1, shape=[2, 2, 3, 3], offsets=[0, -1, offset, 0]) + out = paddle.crop(input1, + shape=[2, 2, 3, 3], + offsets=[0, -1, offset, 0]) def input_dtype(): out = paddle.crop(input2, shape=[2, 2, 3, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py index e9f7e6ef050..f332800bdd4 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py @@ -19,6 +19,7 @@ import six class CrossEntropy2OpTestBase(OpTest): + def initParameters(self): return [32, 64], 'float64', -100, False @@ -39,10 +40,11 @@ class CrossEntropy2OpTestBase(OpTest): feature_size = int(self.shape[-1]) batch_size = int(np.prod(self.shape) / feature_size) logits = (np.random.random(size=self.shape) + 1).astype(self.dtype) - label_shape = self.shape[0:-1] if self.drop_last_dim else self.shape[ - 0:-1] + [1] - label = np.random.random_integers( - low=0, high=feature_size - 1, size=label_shape).astype('int64') + label_shape = self.shape[ + 0:-1] if self.drop_last_dim else self.shape[0:-1] + [1] + label = np.random.random_integers(low=0, + high=feature_size - 1, + size=label_shape).astype('int64') outputs, match_x = self.calc_output( np.reshape(logits, [batch_size, feature_size]), np.reshape(label, [batch_size, 1]), self.ignore_index) @@ -51,8 +53,7 @@ class CrossEntropy2OpTestBase(OpTest): self.outputs = { 'Y': np.reshape(outputs, out_shape), 'MatchX': np.reshape(match_x, self.shape[:-1] + [1]), - 'XShape': np.zeros( - shape=logits.shape, dtype=logits.dtype) + 'XShape': np.zeros(shape=logits.shape, dtype=logits.dtype) } self.attrs = {'ignore_index': self.ignore_index} @@ -60,33 +61,37 @@ class CrossEntropy2OpTestBase(OpTest): self.check_output(no_check_set=['XShape']) def test_check_grad(self): - self.check_grad( - inputs_to_check=['X'], - output_names=['Y'], - no_grad_set=['XShape', 'MatchX', 'Label']) + self.check_grad(inputs_to_check=['X'], + output_names=['Y'], + no_grad_set=['XShape', 'MatchX', 'Label']) class CrossEntropy2OpTest2(CrossEntropy2OpTestBase): + def initParameters(self): return [32, 64], 'float64', 3, False class CrossEntropy2OpTest2RemoveLastDim(CrossEntropy2OpTestBase): + def initParameters(self): return [32, 64], 'float64', 3, True class CrossEntropy2OpTest3(CrossEntropy2OpTestBase): + def initParameters(self): return [4, 8, 16, 32], 'float64', -100, False class CrossEntropy2OpTest3RemoveLastDim(CrossEntropy2OpTestBase): + def initParameters(self): return [4, 8, 16, 32], 'float64', -100, True class CrossEntropy2OpTest4(CrossEntropy2OpTestBase): + def initParameters(self): return [4, 8, 16, 32], 'float64', 3, False diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index 4402d875a41..4982ae59d43 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -83,8 +83,8 @@ def cross_entropy_loss_2d(input, continue cur_weight = weight[cur_target] if weight is not None else 1 total_weight += cur_weight - out[i][h][w] = -log_softmax_out[i][h][w][ - cur_target] * cur_weight + out[i][h][ + w] = -log_softmax_out[i][h][w][cur_target] * cur_weight if reduction == 'sum': return np.sum(out), np.array([total_weight]).astype('float64') elif reduction == 'mean': @@ -187,6 +187,7 @@ def cross_entropy_soft_2d(softmax, class CrossEntropyLoss(unittest.TestCase): + def setUp(self): self.dtype = 'float32' if fluid.core.is_compiled_with_rocm( ) else 'float64' @@ -213,14 +214,13 @@ class CrossEntropyLoss(unittest.TestCase): self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype) self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) - expected = cross_entropy_soft( - softmax, - self.labels, - self.axis, - self.N, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft(softmax, + self.labels, + self.axis, + self.N, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") @@ -266,14 +266,13 @@ class CrossEntropyLoss(unittest.TestCase): self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype) self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) - expected = cross_entropy_soft( - softmax, - self.labels, - self.axis, - self.N, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft(softmax, + self.labels, + self.axis, + self.N, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") @@ -293,13 +292,15 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[self.N, self.C], dtype=self.dtype) - label = fluid.data( - name='label', shape=[self.N, self.C], dtype=self.dtype) + input = fluid.data(name='input', + shape=[self.N, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.C], + dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction=self.reduction, soft_label=True) @@ -344,18 +345,19 @@ class CrossEntropyLoss(unittest.TestCase): else: axis_dim = self.shape[self.axis] self.shape[self.axis] = 1 - self.labels = np.random.randint( - 0, axis_dim, self.shape, dtype="int64") + self.labels = np.random.randint(0, + axis_dim, + self.shape, + dtype="int64") #1. numpy - expected = cross_entropy_soft( - softmax, - self.labels, - self.axis, - self.N, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft(softmax, + self.labels, + self.axis, + self.N, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") @@ -374,13 +376,15 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[self.N, self.C], dtype=self.dtype) - label = fluid.data( - name='label', shape=[self.N, self.C], dtype=self.dtype) + input = fluid.data(name='input', + shape=[self.N, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.C], + dtype=self.dtype) weight = fluid.data(name='weight', shape=[self.C], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -424,14 +428,13 @@ class CrossEntropyLoss(unittest.TestCase): self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) #1. numpy - expected = cross_entropy_soft( - softmax, - self.labels, - self.axis, - self.N, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft(softmax, + self.labels, + self.axis, + self.N, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") @@ -450,24 +453,27 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[self.N, self.C], dtype=self.dtype) - label = fluid.data( - name='label', shape=[self.N, self.C], dtype=self.dtype) + input = fluid.data(name='input', + shape=[self.N, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.C], + dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction=self.reduction, soft_label=True) ret = cross_entropy_loss(input, label) exe = fluid.Executor(place) - static_ret = exe.run( - prog, - feed={'input': self.logits, - 'label': self.labels}, - fetch_list=[ret]) + static_ret = exe.run(prog, + feed={ + 'input': self.logits, + 'label': self.labels + }, + fetch_list=[ret]) self.assertIsNotNone(static_ret) paddle.disable_static() @@ -497,14 +503,13 @@ class CrossEntropyLoss(unittest.TestCase): self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) #1. numpy - expected = cross_entropy_soft( - softmax, - self.labels, - self.axis, - self.N, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft(softmax, + self.labels, + self.axis, + self.N, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") paddle.disable_static() @@ -523,13 +528,15 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[self.N, self.C], dtype=self.dtype) - label = fluid.data( - name='label', shape=[self.N, self.C], dtype=self.dtype) + input = fluid.data(name='input', + shape=[self.N, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.C], + dtype=self.dtype) weight = fluid.data(name='weight', shape=[self.C], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -574,16 +581,15 @@ class CrossEntropyLoss(unittest.TestCase): self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) #1. numpy - expected = cross_entropy_soft_2d( - softmax, - self.labels, - self.axis, - self.N, - self.H, - self.W, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft_2d(softmax, + self.labels, + self.axis, + self.N, + self.H, + self.W, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") paddle.disable_static() @@ -603,17 +609,15 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', - shape=[self.N, self.H, self.W, self.C], - dtype=self.dtype) - label = fluid.data( - name='label', - shape=[self.N, self.H, self.W, self.C], - dtype=self.dtype) + input = fluid.data(name='input', + shape=[self.N, self.H, self.W, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.H, self.W, self.C], + dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction=self.reduction, soft_label=True) @@ -657,16 +661,15 @@ class CrossEntropyLoss(unittest.TestCase): self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) #1. numpy - expected = cross_entropy_soft_2d( - softmax, - self.labels, - self.axis, - self.N, - self.H, - self.W, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) + expected = cross_entropy_soft_2d(softmax, + self.labels, + self.axis, + self.N, + self.H, + self.W, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) paddle.set_device("cpu") paddle.disable_static() @@ -685,17 +688,15 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', - shape=[self.N, self.H, self.W, self.C], - dtype=self.dtype) - label = fluid.data( - name='label', - shape=[self.N, self.H, self.W, self.C], - dtype=self.dtype) + input = fluid.data(name='input', + shape=[self.N, self.H, self.W, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.H, self.W, self.C], + dtype=self.dtype) weight = fluid.data(name='weight', shape=[self.C], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -724,8 +725,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[2, 4], dtype=self.dtype) label = fluid.data(name='label', shape=[2], dtype='int64') @@ -743,11 +744,10 @@ class CrossEntropyLoss(unittest.TestCase): expected = cross_entropy_loss_1d(input_np, label_np)[0] with fluid.dygraph.guard(): - cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( - axis=1, ignore_index=0) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(axis=1, + ignore_index=0) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_1d(input_np, label_np, ignore_index=0)[0] @@ -763,8 +763,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[N, C], dtype=self.dtype) label = fluid.data(name='label', shape=[N], dtype='int64') @@ -783,9 +783,8 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( axis=1, ignore_index=-1) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_1d(input_np, label_np, ignore_index=-1)[0] @@ -803,16 +802,15 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[N, C], dtype=self.dtype) label = fluid.data(name='label', shape=[N], dtype='int64') - weight = fluid.data( - name='weight', shape=[C], - dtype=self.dtype) #weight for each class - cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( - weight=weight, ignore_index=0) + weight = fluid.data(name='weight', shape=[C], + dtype=self.dtype) #weight for each class + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight, + ignore_index=0) ret = cross_entropy_loss(input, label) exe = fluid.Executor(place) @@ -830,13 +828,14 @@ class CrossEntropyLoss(unittest.TestCase): weight=fluid.dygraph.to_variable(weight_np), axis=1, ignore_index=0) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, ignore_index=0)[0] + expected = cross_entropy_loss_1d(input_np, + label_np, + weight=weight_np, + ignore_index=0)[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) @@ -853,13 +852,14 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), ignore_index=255) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, ignore_index=255)[0] + expected = cross_entropy_loss_1d(input_np, + label_np, + weight=weight_np, + ignore_index=255)[0] self.assertTrue(np.allclose(dy_ret_value, expected)) @@ -870,14 +870,13 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[2, 4], dtype=self.dtype) label = fluid.data(name='label', shape=[2], dtype='int64') - weight = fluid.data( - name='weight', shape=[4], - dtype=self.dtype) #weight for each class + weight = fluid.data(name='weight', shape=[4], + dtype=self.dtype) #weight for each class cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight) ret = cross_entropy_loss(input, label) @@ -890,19 +889,18 @@ class CrossEntropyLoss(unittest.TestCase): }, fetch_list=[ret]) self.assertIsNotNone(static_ret) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np)[0] + expected = cross_entropy_loss_1d(input_np, label_np, + weight=weight_np)[0] with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), axis=1) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np)[0] + expected = cross_entropy_loss_1d(input_np, label_np, + weight=weight_np)[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) @@ -914,8 +912,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype) label = fluid.data(name='label', shape=[100], dtype='int64') @@ -936,13 +934,14 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), reduction='sum') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, reduction='sum')[0] + expected = cross_entropy_loss_1d(input_np, + label_np, + weight=weight_np, + reduction='sum')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) @@ -955,8 +954,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype) label = fluid.data(name='label', shape=[100], dtype='int64') @@ -979,14 +978,15 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), reduction='none') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, reduction='none') + expected = cross_entropy_loss_1d(input_np, + label_np, + weight=weight_np, + reduction='none') self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) @@ -998,14 +998,16 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype) label = fluid.data(name='label', shape=[100], dtype='int64') weight = fluid.data(name='weight', shape=[200], dtype=self.dtype) - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight, reduction='none') + ret = paddle.nn.functional.cross_entropy(input, + label, + weight=weight, + reduction='none') exe = fluid.Executor(place) static_ret = exe.run(prog, @@ -1026,8 +1028,10 @@ class CrossEntropyLoss(unittest.TestCase): dy_ret_value = dy_ret.numpy() dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, reduction='none') + expected = cross_entropy_loss_1d(input_np, + label_np, + weight=weight_np, + reduction='none') self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) @@ -1038,8 +1042,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype) label = fluid.data(name='label', shape=[100], dtype='int64') @@ -1047,15 +1051,16 @@ class CrossEntropyLoss(unittest.TestCase): ret = cross_entropy_loss(input, label) exe = fluid.Executor(place) static_ret = exe.run(prog, - feed={'input': input_np, - 'label': label_np}, + feed={ + 'input': input_np, + 'label': label_np + }, fetch_list=[ret]) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss() - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_1d(input_np, label_np)[0] @@ -1069,8 +1074,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype) label = fluid.data(name='label', shape=[100], dtype='int64') @@ -1079,16 +1084,17 @@ class CrossEntropyLoss(unittest.TestCase): ret = cross_entropy_loss(input, label) exe = fluid.Executor(place) static_ret = exe.run(prog, - feed={'input': input_np, - 'label': label_np}, + feed={ + 'input': input_np, + 'label': label_np + }, fetch_list=[ret]) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='sum') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_1d(input_np, label_np, reduction='sum')[0] @@ -1102,8 +1108,8 @@ class CrossEntropyLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype) label = fluid.data(name='label', shape=[100], dtype='int64') @@ -1112,17 +1118,18 @@ class CrossEntropyLoss(unittest.TestCase): ret = cross_entropy_loss(input, label) exe = fluid.Executor(place) static_ret = exe.run(prog, - feed={'input': input_np, - 'label': label_np}, + feed={ + 'input': input_np, + 'label': label_np + }, fetch_list=[ret]) static_ret = np.squeeze(static_ret) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='none') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) @@ -1133,18 +1140,19 @@ class CrossEntropyLoss(unittest.TestCase): def test_cross_entropy_loss_2d_with_weight_none(self): input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW1 + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW1 weight_np = np.random.random(size=(3, )).astype(self.dtype) #C paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 2, 2, 3], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 2, 2, 3], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -1164,32 +1172,34 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), reduction='none') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, reduction='none') + expected = cross_entropy_loss_2d(input_np, + label_np, + weight=weight_np, + reduction='none') self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_with_weight_axis_change_mean(self): input_np = np.random.random(size=(2, 3, 2, 2)).astype(self.dtype) #NCHW - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW weight_np = np.random.random(size=(3, )).astype(self.dtype) #C paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 3, 2, 2], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 3, 2, 2], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -1212,16 +1222,14 @@ class CrossEntropyLoss(unittest.TestCase): weight=fluid.dygraph.to_variable(weight_np), reduction='mean', axis=1) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - np.transpose(input_np, [0, 2, 3, 1]), - label_np, - weight=weight_np, - reduction='mean')[0] + expected = cross_entropy_loss_2d(np.transpose(input_np, [0, 2, 3, 1]), + label_np, + weight=weight_np, + reduction='mean')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) @@ -1238,28 +1246,30 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), ignore_index=255) - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, ignore_index=255)[0] + expected = cross_entropy_loss_2d(input_np, + label_np, + weight=weight_np, + ignore_index=255)[0] self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_with_weight_mean(self): input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW weight_np = np.random.random(size=(3, )).astype(self.dtype) #C paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 2, 2, 3], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 2, 2, 3], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -1278,31 +1288,33 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), reduction='mean') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, reduction='mean')[0] + expected = cross_entropy_loss_2d(input_np, + label_np, + weight=weight_np, + reduction='mean')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_with_weight_sum(self): input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW weight_np = np.random.random(size=(3, )).astype(self.dtype) #C paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 2, 2, 3], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 2, 2, 3], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype=self.dtype) cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -1321,29 +1333,31 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=fluid.dygraph.to_variable(weight_np), reduction='sum') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, reduction='sum')[0] + expected = cross_entropy_loss_2d(input_np, + label_np, + weight=weight_np, + reduction='sum')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_none(self): input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 2, 2, 3], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 2, 2, 3], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='none') @@ -1360,9 +1374,8 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='none') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) @@ -1373,16 +1386,17 @@ class CrossEntropyLoss(unittest.TestCase): def test_cross_entropy_loss_2d_mean(self): input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 2, 2, 3], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 2, 2, 3], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='mean') @@ -1399,29 +1413,29 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='mean') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, reduction='mean')[0] + expected = cross_entropy_loss_2d(input_np, label_np, + reduction='mean')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_sum(self): input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC - label_np = np.random.randint( - 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + label_np = np.random.randint(0, 3, + size=(2, 2, 2)).astype(np.int64) #NHW paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 2, 2, 3], dtype=self.dtype) + input = fluid.data(name='input', + shape=[2, 2, 2, 3], + dtype=self.dtype) label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='sum') @@ -1438,9 +1452,8 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='sum') - dy_ret = cross_entropy_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = cross_entropy_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_2d(input_np, label_np, reduction='sum')[0] @@ -1487,47 +1500,51 @@ class CrossEntropyLoss(unittest.TestCase): class TestCrossEntropyFAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_WeightLength_NotEqual(): input_data = paddle.rand(shape=[20, 100]) - label_data = paddle.randint( - 0, 100, shape=[20, 1], dtype="int64") + label_data = paddle.randint(0, + 100, + shape=[20, 1], + dtype="int64") weight_data = paddle.rand([100 + 1]) - paddle.nn.functional.cross_entropy( - input=input_data, - label=label_data, - weight=weight_data, - ignore_index=-100) + paddle.nn.functional.cross_entropy(input=input_data, + label=label_data, + weight=weight_data, + ignore_index=-100) self.assertRaises(ValueError, test_WeightLength_NotEqual) def test_LabelValue_ExceedMax(): input_data = paddle.rand(shape=[20, 100]) - label_data = paddle.randint( - 0, 100, shape=[20, 1], dtype="int64") + label_data = paddle.randint(0, + 100, + shape=[20, 1], + dtype="int64") label_data[0] = 100 weight_data = paddle.rand([100]) - paddle.nn.functional.cross_entropy( - input=input_data, - label=label_data, - weight=weight_data, - ignore_index=-100) + paddle.nn.functional.cross_entropy(input=input_data, + label=label_data, + weight=weight_data, + ignore_index=-100) self.assertRaises(ValueError, test_LabelValue_ExceedMax) def test_LabelValue_ExceedMin(): input_data = paddle.rand(shape=[20, 100]) - label_data = paddle.randint( - 0, 100, shape=[20, 1], dtype="int64") + label_data = paddle.randint(0, + 100, + shape=[20, 1], + dtype="int64") label_data[0] = -1 weight_data = paddle.rand([100]) - paddle.nn.functional.cross_entropy( - input=input_data, - label=label_data, - weight=weight_data, - ignore_index=-100) + paddle.nn.functional.cross_entropy(input=input_data, + label=label_data, + weight=weight_data, + ignore_index=-100) self.assertRaises(ValueError, test_LabelValue_ExceedMin) @@ -1541,12 +1558,13 @@ class TestCrossEntropyFAPIError(unittest.TestCase): place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[2, 4], dtype='float32') + input = fluid.data(name='input', + shape=[2, 4], + dtype='float32') label = fluid.data(name='label', shape=[2], dtype='int64') - weight = fluid.data( - name='weight', shape=[3], - dtype='float32') #weight for each class + weight = fluid.data(name='weight', + shape=[3], + dtype='float32') #weight for each class cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=weight) ret = cross_entropy_loss(input, label) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py index ba39b072303..35d73759be5 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py @@ -49,12 +49,14 @@ class TestCrossEntropyOp(OpTest): } def init_x(self): - self.x = randomize_probability( - self.batch_size, self.class_num, dtype=self.dtype) + self.x = randomize_probability(self.batch_size, + self.class_num, + dtype=self.dtype) def init_label(self): - self.label = np.random.randint( - 0, self.class_num, (self.batch_size, 1), dtype="int64") + self.label = np.random.randint(0, + self.class_num, (self.batch_size, 1), + dtype="int64") def get_cross_entropy(self): self.cross_entropy = np.asmatrix( @@ -83,15 +85,13 @@ class TestCrossEntropyOpRemoveLastDim(TestCrossEntropyOp): """ def init_label(self): - self.label = np.random.randint( - 0, self.class_num, (self.batch_size), dtype="int64") + self.label = np.random.randint(0, + self.class_num, (self.batch_size), + dtype="int64") def get_cross_entropy(self): self.cross_entropy = np.asmatrix( - [ - -np.log(self.x[i][self.label[i]]) - for i in range(self.x.shape[0]) - ], + [-np.log(self.x[i][self.label[i]]) for i in range(self.x.shape[0])], dtype="float64") @@ -119,8 +119,10 @@ class TestCrossEntropyOp2(TestCrossEntropyOp): self.class_num = 37 def test_check_grad(self): - self.check_grad( - ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + self.check_grad(["X"], + "Y", + max_relative_error=0.05, + numeric_grad_delta=0.001) class TestCrossEntropyOp3(TestCrossEntropyOp): @@ -149,8 +151,10 @@ class TestCrossEntropyOp3(TestCrossEntropyOp): self.class_num = 27 def test_check_grad(self): - self.check_grad( - ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + self.check_grad(["X"], + "Y", + max_relative_error=0.05, + numeric_grad_delta=0.001) class TestCrossEntropyOp4(TestCrossEntropyOp): @@ -165,8 +169,9 @@ class TestCrossEntropyOp4(TestCrossEntropyOp): self.x = self.X_2d.reshape(self.shape + [self.class_num]) def init_label(self): - self.label_2d = np.random.randint( - 0, self.class_num, (self.ins_num, 1), dtype="int64") + self.label_2d = np.random.randint(0, + self.class_num, (self.ins_num, 1), + dtype="int64") self.label = self.label_2d.reshape(self.shape + [1]) def get_cross_entropy(self): @@ -191,8 +196,9 @@ class TestCrossEntropyOp4RemoveLastDim(TestCrossEntropyOp4): """ def init_label(self): - self.label_2d = np.random.randint( - 0, self.class_num, (self.ins_num, 1), dtype="int64") + self.label_2d = np.random.randint(0, + self.class_num, (self.ins_num, 1), + dtype="int64") self.label = self.label_2d.reshape(self.shape) def get_cross_entropy(self): @@ -235,8 +241,10 @@ class TestCrossEntropyOp5(TestCrossEntropyOp): self.class_num = 37 def test_check_grad(self): - self.check_grad( - ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + self.check_grad(["X"], + "Y", + max_relative_error=0.05, + numeric_grad_delta=0.001) class TestCrossEntropyOp6(TestCrossEntropyOp): @@ -251,8 +259,9 @@ class TestCrossEntropyOp6(TestCrossEntropyOp): self.x = self.X_2d.reshape(self.shape + [self.class_num]) def init_label(self): - self.label_index_2d = np.random.randint( - 0, self.class_num, (self.ins_num), dtype="int64") + self.label_index_2d = np.random.randint(0, + self.class_num, (self.ins_num), + dtype="int64") label_2d = np.zeros(self.X_2d.shape) label_2d[np.arange(self.ins_num), self.label_index_2d] = 1 self.label = label_2d.reshape(self.shape + [self.class_num]).astype( @@ -262,8 +271,9 @@ class TestCrossEntropyOp6(TestCrossEntropyOp): cross_entropy_2d = np.asmatrix( [[-np.log(self.X_2d[i][self.label_index_2d[i]])] for i in range(self.X_2d.shape[0])]) - self.cross_entropy = np.array(cross_entropy_2d).reshape( - self.shape + [1]).astype(self.dtype) + self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape + + [1]).astype( + self.dtype) def init_attr_type(self): self.soft_label = True @@ -275,8 +285,10 @@ class TestCrossEntropyOp6(TestCrossEntropyOp): self.class_num = 17 def test_check_grad(self): - self.check_grad( - ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + self.check_grad(["X"], + "Y", + max_relative_error=0.05, + numeric_grad_delta=0.001) class TestCrossEntropyOp7(TestCrossEntropyOp): @@ -284,8 +296,9 @@ class TestCrossEntropyOp7(TestCrossEntropyOp): """ def init_label(self): - self.label = np.random.randint( - 0, self.class_num, (self.batch_size, 1), dtype="int64") + self.label = np.random.randint(0, + self.class_num, (self.batch_size, 1), + dtype="int64") def get_cross_entropy(self): self.cross_entropy = np.asmatrix( @@ -310,8 +323,9 @@ class TestCrossEntropyOp7RemoveLastDim(TestCrossEntropyOp7): """ def init_label(self): - self.label = np.random.randint( - 0, self.class_num, (self.batch_size), dtype="int64") + self.label = np.random.randint(0, + self.class_num, (self.batch_size), + dtype="int64") def get_cross_entropy(self): self.cross_entropy = np.asmatrix( @@ -324,9 +338,11 @@ class TestCrossEntropyOp7RemoveLastDim(TestCrossEntropyOp7): # Add Fp16 test def create_test_class(parent, cls_name): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCrossEntropyFP16Op(parent): + def init_dtype_type(self): return np.float16 @@ -338,8 +354,9 @@ def create_test_class(parent, cls_name): def test_check_grad(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['X'], 'Y', max_relative_error=0.9) + self.check_grad_with_place(place, ['X'], + 'Y', + max_relative_error=0.9) cls_name = "{0}".format(cls_name) TestCrossEntropyFP16Op.__name__ = cls_name @@ -360,15 +377,16 @@ create_test_class(TestCrossEntropyOp7RemoveLastDim, class TestCrossEntropyOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of cross_entropy must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - lab1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + lab1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) fluid.layers.cross_entropy(x1, lab1) self.assertRaises(TypeError, test_Variable) @@ -376,10 +394,12 @@ class TestCrossEntropyOpError(unittest.TestCase): def test_dtype(): # the input dtype of cross_entropy must be float16 or float32 or float64 # float16 only can be set on GPU place - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="int32") - lab2 = fluid.layers.data( - name='lab2', shape=[3, 4, 5, 6], dtype="int32") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="int32") + lab2 = fluid.layers.data(name='lab2', + shape=[3, 4, 5, 6], + dtype="int32") fluid.layers.cross_entropy(x2, lab2) self.assertRaises(TypeError, test_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_cross_op.py b/python/paddle/fluid/tests/unittests/test_cross_op.py index 8b884583646..b54883975a6 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_op.py +++ b/python/paddle/fluid/tests/unittests/test_cross_op.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class TestCrossOp(OpTest): + def setUp(self): self.op_type = "cross" self.python_api = paddle.cross @@ -55,6 +56,7 @@ class TestCrossOp(OpTest): class TestCrossOpCase1(TestCrossOp): + def initTestCase(self): self.shape = (2048, 3) self.dtype = np.float32 @@ -67,11 +69,12 @@ class TestCrossOpCase1(TestCrossOp): class TestCrossAPI(unittest.TestCase): + def input_data(self): - self.data_x = np.array( - [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]) - self.data_y = np.array( - [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]) + self.data_x = np.array([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0]]) + self.data_y = np.array([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], + [1.0, 1.0, 1.0]]) def test_cross_api(self): self.input_data() @@ -82,8 +85,10 @@ class TestCrossAPI(unittest.TestCase): y = fluid.layers.data(name='y', shape=[-1, 3]) z = paddle.cross(x, y, axis=1) exe = fluid.Executor(fluid.CPUPlace()) - res, = exe.run(feed={'x': self.data_x, - 'y': self.data_y}, + res, = exe.run(feed={ + 'x': self.data_x, + 'y': self.data_y + }, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], @@ -96,8 +101,10 @@ class TestCrossAPI(unittest.TestCase): y = fluid.layers.data(name='y', shape=[-1, 3]) z = paddle.cross(x, y) exe = fluid.Executor(fluid.CPUPlace()) - res, = exe.run(feed={'x': self.data_x, - 'y': self.data_y}, + res, = exe.run(feed={ + 'x': self.data_x, + 'y': self.data_y + }, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[-1.0, -1.0, -1.0], [2.0, 2.0, 2.0], diff --git a/python/paddle/fluid/tests/unittests/test_crypto.py b/python/paddle/fluid/tests/unittests/test_crypto.py index 2a9bed7acbb..54db1f8bfbb 100644 --- a/python/paddle/fluid/tests/unittests/test_crypto.py +++ b/python/paddle/fluid/tests/unittests/test_crypto.py @@ -20,6 +20,7 @@ import unittest class CipherUtilsTestCase(unittest.TestCase): + def test_gen_key(self): key1 = CipherUtils.gen_key(256) key2 = CipherUtils.gen_key_to_file(256, "paddle_aes_test.keyfile") @@ -31,6 +32,7 @@ class CipherUtilsTestCase(unittest.TestCase): class CipherTestCase(unittest.TestCase): + def test_aes_cipher(self): plaintext = "hello world" key = CipherUtils.gen_key(256) diff --git a/python/paddle/fluid/tests/unittests/test_ctc_align.py b/python/paddle/fluid/tests/unittests/test_ctc_align.py index ffc5bc184ef..ee22e227228 100644 --- a/python/paddle/fluid/tests/unittests/test_ctc_align.py +++ b/python/paddle/fluid/tests/unittests/test_ctc_align.py @@ -32,8 +32,8 @@ def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None): prev_token = -1 for j in range(cur_offset, cur_offset + lod0[i]): token = input[j][0] - if (token != blank) and not (merge_repeated and - token == prev_token): + if (token != blank) and not (merge_repeated + and token == prev_token): result.append(token) prev_token = token cur_offset += lod0[i] @@ -48,38 +48,41 @@ def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None): prev_token = -1 for j in range(input_length[i][0]): token = input[i][j] - if (token != blank) and not (merge_repeated and - token == prev_token): + if (token != blank) and not (merge_repeated + and token == prev_token): result[i].append(token) prev_token = token start = len(result[i]) output_length.append([start]) for j in range(start, len(input[i])): result[i].append(padding) - result = np.array(result).reshape( - [len(input), len(input[0])]).astype("int32") - output_length = np.array(output_length).reshape( - [len(input), 1]).astype("int32") + result = np.array(result).reshape([len(input), + len(input[0])]).astype("int32") + output_length = np.array(output_length).reshape([len(input), + 1]).astype("int32") return result, output_length class TestCTCAlignOp(OpTest): + def config(self): self.op_type = "ctc_align" self.input_lod = [[11, 7]] self.blank = 0 self.merge_repeated = False self.input = np.array( - [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0]).reshape( - [18, 1]).astype("int32") + [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, + 0]).reshape([18, 1]).astype("int32") def setUp(self): self.config() output = CTCAlign(self.input, self.input_lod, self.blank, self.merge_repeated) - self.inputs = {"Input": (self.input, self.input_lod), } + self.inputs = { + "Input": (self.input, self.input_lod), + } self.outputs = {"Output": output} self.attrs = { "blank": self.blank, @@ -92,17 +95,19 @@ class TestCTCAlignOp(OpTest): class TestCTCAlignOpCase1(TestCTCAlignOp): + def config(self): self.op_type = "ctc_align" self.input_lod = [[11, 8]] self.blank = 0 self.merge_repeated = True self.input = np.array( - [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0, 0]).reshape( - [19, 1]).astype("int32") + [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0, + 0]).reshape([19, 1]).astype("int32") class TestCTCAlignOpCase2(TestCTCAlignOp): + def config(self): self.op_type = "ctc_align" self.input_lod = [[4]] @@ -112,6 +117,7 @@ class TestCTCAlignOpCase2(TestCTCAlignOp): class TestCTCAlignPaddingOp(OpTest): + def config(self): self.op_type = "ctc_align" self.input_lod = [] @@ -119,8 +125,8 @@ class TestCTCAlignPaddingOp(OpTest): self.padding_value = 0 self.merge_repeated = True self.input = np.array([[0, 2, 4, 4, 0, 6, 3, 6, 6, 0, 0], - [1, 1, 3, 0, 0, 4, 5, 6, 0, 0, 0]]).reshape( - [2, 11]).astype("int32") + [1, 1, 3, 0, 0, 4, 5, 6, 0, 0, + 0]]).reshape([2, 11]).astype("int32") self.input_length = np.array([[9], [8]]).reshape([2, 1]).astype("int32") def setUp(self): @@ -144,6 +150,7 @@ class TestCTCAlignPaddingOp(OpTest): class TestCTCAlignOpCase3(TestCTCAlignPaddingOp): + def config(self): self.op_type = "ctc_align" self.blank = 0 @@ -151,8 +158,8 @@ class TestCTCAlignOpCase3(TestCTCAlignPaddingOp): self.merge_repeated = True self.padding_value = 0 self.input = np.array([[0, 1, 2, 2, 0, 4], [0, 4, 5, 0, 6, 0], - [0, 7, 7, 7, 0, 0]]).reshape( - [3, 6]).astype("int32") + [0, 7, 7, 7, 0, 0]]).reshape([3, + 6]).astype("int32") self.input_length = np.array([[6], [5], [4]]).reshape([3, 1]).astype("int32") @@ -169,13 +176,14 @@ class TestCTCAlignOpCase4(TestCTCAlignPaddingOp): self.merge_repeated = False self.padding_value = 0 self.input = np.array([[0, 1, 2, 2, 0, 4], [0, 4, 5, 0, 6, 0], - [0, 7, 7, 7, 0, 0]]).reshape( - [3, 6]).astype("int32") + [0, 7, 7, 7, 0, 0]]).reshape([3, + 6]).astype("int32") self.input_length = np.array([[6], [5], [4]]).reshape([3, 1]).astype("int32") class TestCTCAlignOpCase5(TestCTCAlignPaddingOp): + def config(self): self.op_type = "ctc_align" self.blank = 0 @@ -183,13 +191,14 @@ class TestCTCAlignOpCase5(TestCTCAlignPaddingOp): self.merge_repeated = False self.padding_value = 1 self.input = np.array([[0, 1, 2, 2, 0, 4], [0, 4, 5, 0, 6, 0], - [0, 7, 1, 7, 0, 0]]).reshape( - [3, 6]).astype("int32") + [0, 7, 1, 7, 0, 0]]).reshape([3, + 6]).astype("int32") self.input_length = np.array([[6], [5], [4]]).reshape([3, 1]).astype("int32") class TestCTCAlignOpApi(unittest.TestCase): + def test_api(self): x = fluid.layers.data('x', shape=[4], dtype='float32') y = fluid.layers.ctc_greedy_decoder(x, blank=0) @@ -219,6 +228,7 @@ class TestCTCAlignOpApi(unittest.TestCase): class BadInputTestCTCAlignr(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py b/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py index d8229247a81..36637971f9e 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py @@ -17,6 +17,7 @@ import paddle class TestCPUVersion(unittest.TestCase): + def test_cuda_cudnn_version_in_cpu_package(self): if not paddle.is_compiled_with_cuda(): self.assertEqual(paddle.version.cuda(), 'False') diff --git a/python/paddle/fluid/tests/unittests/test_cuda_device_count.py b/python/paddle/fluid/tests/unittests/test_cuda_device_count.py index f4114c9d451..482a3413caf 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_device_count.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_device_count.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import unittest class TestDeviceCount(unittest.TestCase): + def test_device_count(self): s = paddle.device.cuda.device_count() self.assertIsNotNone(s) diff --git a/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py b/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py index 88f71f28412..0d749c5d177 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import unittest class TestDeviceName(unittest.TestCase): + def test_device_name_default(self): if paddle.is_compiled_with_cuda(): name = paddle.device.cuda.get_device_name() @@ -34,6 +35,7 @@ class TestDeviceName(unittest.TestCase): class TestDeviceCapability(unittest.TestCase): + def test_device_capability_default(self): if paddle.is_compiled_with_cuda(): capability = paddle.device.cuda.get_device_capability() diff --git a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py index 4aefb234bbf..0ec066eb7cd 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py @@ -17,6 +17,7 @@ import unittest class TestEmptyCache(unittest.TestCase): + def test_empty_cache(self): x = paddle.randn((2, 10, 12)).astype('float32') del x diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph.py b/python/paddle/fluid/tests/unittests/test_cuda_graph.py index 66228856eff..fda3fa79ef6 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_graph.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_graph.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,6 +29,7 @@ def can_use_cuda_graph(): class TestCUDAGraph(unittest.TestCase): + def setUp(self): if can_use_cuda_graph(): paddle.set_flags({ @@ -40,8 +41,7 @@ class TestCUDAGraph(unittest.TestCase): def random_tensor(self, shape): return paddle.to_tensor( - np.random.randint( - low=0, high=10, size=shape).astype("float32")) + np.random.randint(low=0, high=10, size=shape).astype("float32")) @switch_to_static_graph def test_cuda_graph_static_graph(self): @@ -49,8 +49,8 @@ class TestCUDAGraph(unittest.TestCase): return seed = 100 - loss_cuda_graph = self.cuda_graph_static_graph_main( - seed, use_cuda_graph=True) + loss_cuda_graph = self.cuda_graph_static_graph_main(seed, + use_cuda_graph=True) loss_no_cuda_graph = self.cuda_graph_static_graph_main( seed, use_cuda_graph=False) self.assertEqual(loss_cuda_graph, loss_no_cuda_graph) @@ -66,10 +66,12 @@ class TestCUDAGraph(unittest.TestCase): startup = paddle.static.Program() main = paddle.static.Program() with paddle.static.program_guard(main, startup): - image = paddle.static.data( - name="image", shape=image_shape, dtype='float32') - label = paddle.static.data( - name="label", shape=label_shape, dtype='int64') + image = paddle.static.data(name="image", + shape=image_shape, + dtype='float32') + label = paddle.static.data(name="label", + shape=label_shape, + dtype='int64') image.persistable = True label.persistable = True loss = simple_fc_net_with_inputs(image, label, class_num) @@ -88,10 +90,9 @@ class TestCUDAGraph(unittest.TestCase): build_strategy.fix_op_run_order = True build_strategy.fuse_all_optimizer_ops = True compiled_program = paddle.static.CompiledProgram( - main).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - places=place) + main).with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + places=place) image_t = scope.var(image.name).get_tensor() label_t = scope.var(label.name).get_tensor() loss_t = scope.var(loss.name).get_tensor() @@ -102,9 +103,11 @@ class TestCUDAGraph(unittest.TestCase): for batch_id in range(20): image_t.set( np.random.rand(*image_shape).astype('float32'), place) - label_t.set(np.random.randint( - low=0, high=class_num, size=label_shape, dtype='int64'), - place) + label_t.set( + np.random.randint(low=0, + high=class_num, + size=label_shape, + dtype='int64'), place) if batch_id == 1 and use_cuda_graph: cuda_graph = CUDAGraph(place, mode="global") @@ -193,6 +196,7 @@ class TestCUDAGraph(unittest.TestCase): return class AutoIncDataset(paddle.io.Dataset): + def __init__(self, n, dtype): self.n = n self.dtype = dtype @@ -206,8 +210,10 @@ class TestCUDAGraph(unittest.TestCase): n = 100 dtype = 'int64' dataset = AutoIncDataset(n, dtype) - data_loader = paddle.io.DataLoader( - dataset, batch_size=1, num_workers=2, use_buffer_reader=True) + data_loader = paddle.io.DataLoader(dataset, + batch_size=1, + num_workers=2, + use_buffer_reader=True) x = None y = None diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py index 182a70af8a8..b0e6878e3fe 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from paddle.device.cuda.graphs import wrap_cuda_graph, is_cuda_graph_supported class SimpleModel(nn.Layer): + def __init__(self, in_size, out_size): super(SimpleModel, self).__init__() self.linear = nn.Linear(in_size, out_size) @@ -38,6 +39,7 @@ class SimpleModel(nn.Layer): class TestSimpleModel(unittest.TestCase): + def setUp(self): paddle.set_flags({'FLAGS_eager_delete_tensor_gb': 0.0}) diff --git a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py index ae8bdeed1ef..7b8c6e9d22e 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestMaxMemoryAllocated(unittest.TestCase): + def func_test_max_memory_allocated(self, device=None): if core.is_compiled_with_cuda(): alloc_time = 100 @@ -58,7 +59,8 @@ class TestMaxMemoryAllocated(unittest.TestCase): def func_test_max_memory_allocated_exception(self): if core.is_compiled_with_cuda(): wrong_device = [ - core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" + core.CPUPlace(), + device_count() + 1, -2, 0.5, "gpu1", "npu" ] for device in wrong_device: with self.assertRaises(BaseException): diff --git a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py index e64e02bb7f0..936a084abb7 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from paddle.device.cuda import device_count, memory_reserved, max_memory_reserve class TestMaxMemoryreserved(unittest.TestCase): + def test_max_memory_reserved(self, device=None): if core.is_compiled_with_cuda(): alloc_time = 100 @@ -47,7 +48,8 @@ class TestMaxMemoryreserved(unittest.TestCase): def test_max_memory_reserved_exception(self): if core.is_compiled_with_cuda(): wrong_device = [ - core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" + core.CPUPlace(), + device_count() + 1, -2, 0.5, "gpu1", "npu" ] for device in wrong_device: with self.assertRaises(BaseException): diff --git a/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py b/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py index af45537b6d4..4922b8df1fc 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from paddle.device.cuda import device_count, memory_allocated class TestMemoryAllocated(unittest.TestCase): + def test_memory_allocated(self, device=None): if core.is_compiled_with_cuda(): tensor = paddle.zeros(shape=[256]) @@ -39,7 +40,8 @@ class TestMemoryAllocated(unittest.TestCase): def test_memory_allocated_exception(self): if core.is_compiled_with_cuda(): wrong_device = [ - core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" + core.CPUPlace(), + device_count() + 1, -2, 0.5, "gpu1", "npu" ] for device in wrong_device: with self.assertRaises(BaseException): diff --git a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py index ca551ab4a3f..c4346913980 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestMemoryreserved(unittest.TestCase): + def func_test_memory_reserved(self, device=None): if core.is_compiled_with_cuda(): tensor = paddle.zeros(shape=[256]) @@ -50,7 +51,8 @@ class TestMemoryreserved(unittest.TestCase): def func_test_memory_reserved_exception(self): if core.is_compiled_with_cuda(): wrong_device = [ - core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" + core.CPUPlace(), + device_count() + 1, -2, 0.5, "gpu1", "npu" ] for device in wrong_device: with self.assertRaises(BaseException): diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py index 14a91b0c2c5..ef886d2067a 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py @@ -40,12 +40,18 @@ class TestGeneratorSeed(unittest.TestCase): gen.manual_seed(111111111) st = paddle.get_cuda_rng_state() - x = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) - x_again = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) - x_third = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) + x = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) + x_again = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) + x_third = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) print("x: {}".format(x.numpy())) print("x_again: {}".format(x_again.numpy())) x = x + x_again + x_third @@ -53,12 +59,18 @@ class TestGeneratorSeed(unittest.TestCase): paddle.set_cuda_rng_state(st) - x1 = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) - x1_again = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) - x1_third = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) + x1 = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) + x1_again = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) + x1_third = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) x1 = x1 + x1_again + x1_third y1 = fluid.layers.dropout(x1, 0.5) y_np = y.numpy() @@ -125,13 +137,13 @@ class TestGeneratorSeed(unittest.TestCase): result_1 = fluid.layers.fc( input=x, size=10, - param_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0)) + param_attr=fluid.initializer.TruncatedNormal(loc=0.0, + scale=2.0)) result_2 = fluid.layers.fc( input=x, size=10, - param_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0)) + param_attr=fluid.initializer.TruncatedNormal(loc=0.0, + scale=2.0)) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) diff --git a/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py b/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py index 30bc00c9d94..8063331fe39 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import numpy as np class TestCurrentStream(unittest.TestCase): + def test_current_stream(self): if paddle.is_compiled_with_cuda(): s = cuda.current_stream() @@ -38,6 +39,7 @@ class TestCurrentStream(unittest.TestCase): class TestSynchronize(unittest.TestCase): + def test_synchronize(self): if paddle.is_compiled_with_cuda(): self.assertIsNone(cuda.synchronize()) @@ -48,6 +50,7 @@ class TestSynchronize(unittest.TestCase): class TestCUDAStream(unittest.TestCase): + def test_cuda_stream(self): if paddle.is_compiled_with_cuda(): s = paddle.device.cuda.Stream() @@ -85,6 +88,7 @@ class TestCUDAStream(unittest.TestCase): class TestCUDAEvent(unittest.TestCase): + def test_cuda_event(self): if paddle.is_compiled_with_cuda(): e = paddle.device.cuda.Event(True, False, False) @@ -158,6 +162,7 @@ class TestStreamGuard(unittest.TestCase): class TestRawStream(unittest.TestCase): + def test_cuda_stream(self): if paddle.is_compiled_with_cuda(): cuda_stream = paddle.device.cuda.current_stream().cuda_stream diff --git a/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py b/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py index 2335293b22e..3b7093db391 100644 --- a/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py +++ b/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py @@ -79,6 +79,7 @@ def non_cudnn_step(step_in, pre_hidden, gate_w, gate_b, candidate_w, class TestCudnnGRU(unittest.TestCase): + def setUp(self): self.input_size = 100 self.hidden_size = 200 @@ -115,8 +116,8 @@ class TestCudnnGRU(unittest.TestCase): named_param_list[weight_ih_name].set_value(weight_ih) bias_ih = param_list[bias_ih_name].numpy() - bias_ih = np.random.uniform( - -0.1, 0.1, size=bias_ih.shape).astype('float64') + bias_ih = np.random.uniform(-0.1, 0.1, + size=bias_ih.shape).astype('float64') param_list[bias_ih_name].set_value(bias_ih) named_param_list[bias_ih_name].set_value(bias_ih) @@ -127,15 +128,16 @@ class TestCudnnGRU(unittest.TestCase): named_param_list[weight_hh_name].set_value(weight_hh) bias_hh = param_list[bias_hh_name].numpy() - bias_hh = np.random.uniform( - -0.1, 0.1, size=bias_hh.shape).astype('float64') + bias_hh = np.random.uniform(-0.1, 0.1, + size=bias_hh.shape).astype('float64') param_list[bias_hh_name].set_value(bias_hh) named_param_list[bias_hh_name].set_value(bias_hh) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.input_size)).astype('float64') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float64') + step_input_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.input_size)).astype('float64') + pre_hidden_np = np.random.uniform( + -0.1, 0.1, + (self.batch_size, self.hidden_size)).astype('float64') step_input_var = fluid.dygraph.to_variable(step_input_np) pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) @@ -147,11 +149,11 @@ class TestCudnnGRU(unittest.TestCase): self.assertTrue(np.allclose(api_out.numpy(), np_out, rtol=1e-5, atol=0)) self.assertTrue( - np.allclose( - named_api_out.numpy(), np_out, rtol=1e-5, atol=0)) + np.allclose(named_api_out.numpy(), np_out, rtol=1e-5, atol=0)) class TestNonCudnnGRU(unittest.TestCase): + def setUp(self): self.input_size = 100 self.hidden_size = 200 @@ -167,14 +169,14 @@ class TestNonCudnnGRU(unittest.TestCase): with fluid.dygraph.guard(place): param_attr = fluid.ParamAttr(name="param_attr") bias_attr = fluid.ParamAttr(name="bias_attr") - named_non_cudnn_gru = GRUCell( - self.hidden_size, - self.input_size, - param_attr, - bias_attr, - use_cudnn_impl=False) - non_cudnn_gru = GRUCell( - self.hidden_size, self.input_size, use_cudnn_impl=False) + named_non_cudnn_gru = GRUCell(self.hidden_size, + self.input_size, + param_attr, + bias_attr, + use_cudnn_impl=False) + non_cudnn_gru = GRUCell(self.hidden_size, + self.input_size, + use_cudnn_impl=False) param_list = non_cudnn_gru.state_dict() named_param_list = named_non_cudnn_gru.state_dict() @@ -187,14 +189,14 @@ class TestNonCudnnGRU(unittest.TestCase): candidate_b_name = "_candidate_bias" gate_w = param_list[gate_w_name].numpy() - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float64') + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float64') param_list[gate_w_name].set_value(gate_w) named_param_list[gate_w_name].set_value(gate_w) gate_b = param_list[gate_b_name].numpy() - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float64') + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float64') param_list[gate_b_name].set_value(gate_b) named_param_list[gate_b_name].set_value(gate_b) @@ -210,10 +212,11 @@ class TestNonCudnnGRU(unittest.TestCase): param_list[candidate_b_name].set_value(candidate_b) named_param_list[candidate_b_name].set_value(candidate_b) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.input_size)).astype('float64') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float64') + step_input_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.input_size)).astype('float64') + pre_hidden_np = np.random.uniform( + -0.1, 0.1, + (self.batch_size, self.hidden_size)).astype('float64') step_input_var = fluid.dygraph.to_variable(step_input_np) pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) @@ -225,8 +228,7 @@ class TestNonCudnnGRU(unittest.TestCase): self.assertTrue(np.allclose(api_out.numpy(), np_out, rtol=1e-5, atol=0)) self.assertTrue( - np.allclose( - named_api_out.numpy(), np_out, rtol=1e-5, atol=0)) + np.allclose(named_api_out.numpy(), np_out, rtol=1e-5, atol=0)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py b/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py index ddba6bc69d2..36b563a97c7 100644 --- a/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py +++ b/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py @@ -80,6 +80,7 @@ def cudnn_step(step_input_np, pre_hidden_np, pre_cell_np, weight_ih, bias_ih, class TestCudnnLSTM(unittest.TestCase): + def setUp(self): self.input_size = 100 self.hidden_size = 200 @@ -114,8 +115,8 @@ class TestCudnnLSTM(unittest.TestCase): named_param_list[weight_ih_name].set_value(weight_ih) bias_ih = param_list[bias_ih_name].numpy() - bias_ih = np.random.uniform( - -0.1, 0.1, size=bias_ih.shape).astype('float64') + bias_ih = np.random.uniform(-0.1, 0.1, + size=bias_ih.shape).astype('float64') param_list[bias_ih_name].set_value(bias_ih) named_param_list[bias_ih_name].set_value(bias_ih) @@ -126,17 +127,19 @@ class TestCudnnLSTM(unittest.TestCase): named_param_list[weight_hh_name].set_value(weight_hh) bias_hh = param_list[bias_hh_name].numpy() - bias_hh = np.random.uniform( - -0.1, 0.1, size=bias_hh.shape).astype('float64') + bias_hh = np.random.uniform(-0.1, 0.1, + size=bias_hh.shape).astype('float64') param_list[bias_hh_name].set_value(bias_hh) named_param_list[bias_hh_name].set_value(bias_hh) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.input_size)).astype('float64') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float64') - pre_cell_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float64') + step_input_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.input_size)).astype('float64') + pre_hidden_np = np.random.uniform( + -0.1, 0.1, + (self.batch_size, self.hidden_size)).astype('float64') + pre_cell_np = np.random.uniform( + -0.1, 0.1, + (self.batch_size, self.hidden_size)).astype('float64') step_input_var = fluid.dygraph.to_variable(step_input_np) pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) @@ -150,27 +153,34 @@ class TestCudnnLSTM(unittest.TestCase): named_api_hidden_out = named_api_out[0] named_api_cell_out = named_api_out[1] - np_hidden_out, np_cell_out = cudnn_step( - step_input_np, pre_hidden_np, pre_cell_np, weight_ih, bias_ih, - weight_hh, bias_hh) + np_hidden_out, np_cell_out = cudnn_step(step_input_np, + pre_hidden_np, pre_cell_np, + weight_ih, bias_ih, + weight_hh, bias_hh) self.assertTrue( - np.allclose( - api_hidden_out.numpy(), np_hidden_out, rtol=1e-5, atol=0)) + np.allclose(api_hidden_out.numpy(), + np_hidden_out, + rtol=1e-5, + atol=0)) self.assertTrue( - np.allclose( - api_cell_out.numpy(), np_cell_out, rtol=1e-5, atol=0)) + np.allclose(api_cell_out.numpy(), + np_cell_out, + rtol=1e-5, + atol=0)) self.assertTrue( - np.allclose( - named_api_hidden_out.numpy(), - np_hidden_out, - rtol=1e-5, - atol=0)) + np.allclose(named_api_hidden_out.numpy(), + np_hidden_out, + rtol=1e-5, + atol=0)) self.assertTrue( - np.allclose( - named_api_cell_out.numpy(), np_cell_out, rtol=1e-5, atol=0)) + np.allclose(named_api_cell_out.numpy(), + np_cell_out, + rtol=1e-5, + atol=0)) class TestNonCudnnLSTM(unittest.TestCase): + def setUp(self): self.input_size = 100 self.hidden_size = 200 @@ -185,14 +195,14 @@ class TestNonCudnnLSTM(unittest.TestCase): with fluid.dygraph.guard(place): param_attr = fluid.ParamAttr(name="param_attr") bias_attr = fluid.ParamAttr(name="bias_attr") - named_cudnn_lstm = LSTMCell( - self.hidden_size, - self.input_size, - param_attr, - bias_attr, - use_cudnn_impl=False) - cudnn_lstm = LSTMCell( - self.hidden_size, self.input_size, use_cudnn_impl=False) + named_cudnn_lstm = LSTMCell(self.hidden_size, + self.input_size, + param_attr, + bias_attr, + use_cudnn_impl=False) + cudnn_lstm = LSTMCell(self.hidden_size, + self.input_size, + use_cudnn_impl=False) param_list = cudnn_lstm.state_dict() named_param_list = named_cudnn_lstm.state_dict() @@ -203,23 +213,25 @@ class TestNonCudnnLSTM(unittest.TestCase): gate_b_name = "_bias" gate_w = param_list[gate_w_name].numpy() - gate_w = np.random.uniform( - -0.1, 0.1, size=gate_w.shape).astype('float64') + gate_w = np.random.uniform(-0.1, 0.1, + size=gate_w.shape).astype('float64') param_list[gate_w_name].set_value(gate_w) named_param_list[gate_w_name].set_value(gate_w) gate_b = param_list[gate_b_name].numpy() - gate_b = np.random.uniform( - -0.1, 0.1, size=gate_b.shape).astype('float64') + gate_b = np.random.uniform(-0.1, 0.1, + size=gate_b.shape).astype('float64') param_list[gate_b_name].set_value(gate_b) named_param_list[gate_b_name].set_value(gate_b) - step_input_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.input_size)).astype('float64') - pre_hidden_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float64') - pre_cell_np = np.random.uniform(-0.1, 0.1, ( - self.batch_size, self.hidden_size)).astype('float64') + step_input_np = np.random.uniform( + -0.1, 0.1, (self.batch_size, self.input_size)).astype('float64') + pre_hidden_np = np.random.uniform( + -0.1, 0.1, + (self.batch_size, self.hidden_size)).astype('float64') + pre_cell_np = np.random.uniform( + -0.1, 0.1, + (self.batch_size, self.hidden_size)).astype('float64') step_input_var = fluid.dygraph.to_variable(step_input_np) pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np) @@ -233,24 +245,31 @@ class TestNonCudnnLSTM(unittest.TestCase): named_api_hidden_out = named_api_out[0] named_api_cell_out = named_api_out[1] - np_hidden_out, np_cell_out = non_cudnn_step( - step_input_np, pre_hidden_np, pre_cell_np, gate_w, gate_b) + np_hidden_out, np_cell_out = non_cudnn_step(step_input_np, + pre_hidden_np, + pre_cell_np, gate_w, + gate_b) self.assertTrue( - np.allclose( - api_hidden_out.numpy(), np_hidden_out, rtol=1e-5, atol=0)) + np.allclose(api_hidden_out.numpy(), + np_hidden_out, + rtol=1e-5, + atol=0)) self.assertTrue( - np.allclose( - api_cell_out.numpy(), np_cell_out, rtol=1e-5, atol=0)) + np.allclose(api_cell_out.numpy(), + np_cell_out, + rtol=1e-5, + atol=0)) self.assertTrue( - np.allclose( - named_api_hidden_out.numpy(), - np_hidden_out, - rtol=1e-5, - atol=0)) + np.allclose(named_api_hidden_out.numpy(), + np_hidden_out, + rtol=1e-5, + atol=0)) self.assertTrue( - np.allclose( - named_api_cell_out.numpy(), np_cell_out, rtol=1e-5, atol=0)) + np.allclose(named_api_cell_out.numpy(), + np_cell_out, + rtol=1e-5, + atol=0)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_cumprod_op.py b/python/paddle/fluid/tests/unittests/test_cumprod_op.py index 681b8d6cc0b..66b4a601973 100644 --- a/python/paddle/fluid/tests/unittests/test_cumprod_op.py +++ b/python/paddle/fluid/tests/unittests/test_cumprod_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -61,6 +61,7 @@ def cumprod_grad(x, y, dy, dx, shape, dim): # test function. class TestCumprod(OpTest): + def init_params(self): self.shape = (2, 3, 4, 5) self.zero_nums = [0, 10, 20, 30, int(np.prod(self.shape))] @@ -122,34 +123,37 @@ class TestCumprod(OpTest): if self.dtype == np.float64: self.check_grad(['X'], 'Out', check_eager=True) else: - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) # test float32 case. class TestCumprod_float32(TestCumprod): + def init_dtype(self): self.dtype = np.float32 # test complex64 case. class TestCumprod_complex64(TestCumprod): + def init_dtype(self): self.dtype = np.complex64 # test complex128 case. class TestCumprod_complex128(TestCumprod): + def init_dtype(self): self.dtype = np.complex128 # test api. class TestCumprodAPI(unittest.TestCase): + def init_dtype(self): self.dtype = 'float64' self.shape = [2, 3, 10, 10] @@ -182,6 +186,7 @@ class TestCumprodAPI(unittest.TestCase): # test dynamic graph api. def test_dygraph_api(self): + def run(place): paddle.disable_static(place) x = paddle.to_tensor(self.x) diff --git a/python/paddle/fluid/tests/unittests/test_cumsum_op.py b/python/paddle/fluid/tests/unittests/test_cumsum_op.py index 818e15bb319..7e11ad647d9 100644 --- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py +++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py @@ -24,6 +24,7 @@ from paddle.fluid import compiler, Program, program_guard class TestCumsumOp(unittest.TestCase): + def run_cases(self): data_np = np.arange(12).reshape(3, 4) data = paddle.to_tensor(data_np) @@ -105,6 +106,7 @@ class TestCumsumOp(unittest.TestCase): class TestSumOp1(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2} @@ -119,14 +121,14 @@ class TestSumOp1(OpTest): class TestSumOp2(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': -1, 'reverse': True} self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} self.outputs = { - 'Out': np.flip( - np.flip( - self.inputs['X'], axis=2).cumsum(axis=2), axis=2) + 'Out': np.flip(np.flip(self.inputs['X'], axis=2).cumsum(axis=2), + axis=2) } def test_check_output(self): @@ -137,6 +139,7 @@ class TestSumOp2(OpTest): class TestSumOp3(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 1} @@ -151,6 +154,7 @@ class TestSumOp3(OpTest): class TestSumOp4(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 0} @@ -165,6 +169,7 @@ class TestSumOp4(OpTest): class TestSumOp5(OpTest): + def setUp(self): self.op_type = "cumsum" self.inputs = {'X': np.random.random((5, 20)).astype("float64")} @@ -178,6 +183,7 @@ class TestSumOp5(OpTest): class TestSumOp7(OpTest): + def setUp(self): self.op_type = "cumsum" self.inputs = {'X': np.random.random((100)).astype("float64")} @@ -191,16 +197,17 @@ class TestSumOp7(OpTest): class TestSumOpExclusive1(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 65)).astype("float64") self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), + axis=2) } def test_check_output(self): @@ -208,16 +215,17 @@ class TestSumOpExclusive1(OpTest): class TestSumOpExclusive2(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((1, 1, 888)).astype("float64") self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (1, 1, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (1, 1, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), + axis=2) } def test_check_output(self): @@ -225,16 +233,17 @@ class TestSumOpExclusive2(OpTest): class TestSumOpExclusive3(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 888)).astype("float32") self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), + axis=2) } def test_check_output(self): @@ -242,16 +251,17 @@ class TestSumOpExclusive3(OpTest): class TestSumOpExclusive4(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((1, 1, 3049)).astype("float64") self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (1, 1, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (1, 1, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), + axis=2) } def test_check_output(self): @@ -259,16 +269,17 @@ class TestSumOpExclusive4(OpTest): class TestSumOpExclusive5(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2, "exclusive": True} a = np.random.random((4, 5, 3096)).astype("float64") self.inputs = {'X': a} self.outputs = { - 'Out': np.concatenate( - (np.zeros( - (4, 5, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), - axis=2) + 'Out': + np.concatenate((np.zeros( + (4, 5, 1), dtype=np.float64), a[:, :, :-1].cumsum(axis=2)), + axis=2) } def test_check_output(self): @@ -276,6 +287,7 @@ class TestSumOpExclusive5(OpTest): class TestSumOpReverseExclusive(OpTest): + def setUp(self): self.op_type = "cumsum" self.attrs = {'axis': 2, 'reverse': True, "exclusive": True} @@ -283,10 +295,10 @@ class TestSumOpReverseExclusive(OpTest): self.inputs = {'X': a} a = np.flip(a, axis=2) self.outputs = { - 'Out': np.concatenate( - (np.flip( - a[:, :, :-1].cumsum(axis=2), axis=2), np.zeros( - (4, 5, 1), dtype=np.float64)), + 'Out': + np.concatenate( + (np.flip(a[:, :, :-1].cumsum(axis=2), + axis=2), np.zeros((4, 5, 1), dtype=np.float64)), axis=2) } @@ -295,6 +307,7 @@ class TestSumOpReverseExclusive(OpTest): class BadInputTest(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index 83a25b71626..2d12243de52 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestTensorBackward(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -56,6 +57,7 @@ class TestTensorBackward(unittest.TestCase): class TestBackwardAPI(unittest.TestCase): + def setUp(self): self._dtypes = ["float32", "float64"] self._places = [paddle.CPUPlace()] @@ -138,9 +140,15 @@ class TestBackwardAPI(unittest.TestCase): def func_backward_accumulator_with_init_grad(self): for dtype in self._dtypes: - x = np.random.random([10, ]).astype(dtype) - y_grad = np.random.random([10, ]).astype(dtype) - z_grad = np.random.random([10, ]).astype(dtype) + x = np.random.random([ + 10, + ]).astype(dtype) + y_grad = np.random.random([ + 10, + ]).astype(dtype) + z_grad = np.random.random([ + 10, + ]).astype(dtype) self._places = [paddle.CPUPlace()] for place in self._places: with dg.guard(place): diff --git a/python/paddle/fluid/tests/unittests/test_cvm_op.py b/python/paddle/fluid/tests/unittests/test_cvm_op.py index 276d00bb2bf..a1db1a0c6b4 100644 --- a/python/paddle/fluid/tests/unittests/test_cvm_op.py +++ b/python/paddle/fluid/tests/unittests/test_cvm_op.py @@ -66,9 +66,10 @@ class TestCVMOpWithLodTensor(OpTest): lod = [[1]] self.inputs = { 'X': (np.random.uniform( - 0, 1, [self.batch_size, self.item_width]).astype("float32"), - lod), - 'CVM': np.array([[0.6, 0.4]]).astype("float32"), + 0, 1, + [self.batch_size, self.item_width]).astype("float32"), lod), + 'CVM': + np.array([[0.6, 0.4]]).astype("float32"), } self.attrs = {'use_cvm': False} out = [] @@ -85,8 +86,10 @@ class TestCVMOpWithLodTensor(OpTest): (self.batch_size, self.item_width)).astype("float32") user_grads[:, :2] = self.inputs['CVM'].reshape(self.batch_size, 2) user_grads = [user_grads] - self.check_grad( - ['X'], 'Y', user_defined_grads=user_grads, check_dygraph=False) + self.check_grad(['X'], + 'Y', + user_defined_grads=user_grads, + check_dygraph=False) class TestCVMOpWithOutLodTensor1(OpTest): @@ -120,8 +123,10 @@ class TestCVMOpWithOutLodTensor1(OpTest): (self.batch_size, self.item_width)).astype("float32") user_grads[:, :2] = self.inputs['CVM'].reshape(self.batch_size, 2) user_grads = [user_grads] - self.check_grad( - ['X'], 'Y', user_defined_grads=user_grads, check_dygraph=False) + self.check_grad(['X'], + 'Y', + user_defined_grads=user_grads, + check_dygraph=False) class TestCVMOpWithOutLodTensor2(OpTest): @@ -156,8 +161,10 @@ class TestCVMOpWithOutLodTensor2(OpTest): (self.batch_size, self.item_width)).astype("float32") user_grads[:, :2] = self.inputs['CVM'].reshape(self.batch_size, 2) user_grads = [user_grads] - self.check_grad( - ['X'], 'Y', user_defined_grads=user_grads, check_dygraph=False) + self.check_grad(['X'], + 'Y', + user_defined_grads=user_grads, + check_dygraph=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py b/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py index 01a588c4058..e014a25ab79 100644 --- a/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py @@ -17,6 +17,7 @@ import unittest class TestCifar10(unittest.TestCase): + def test_main(self): reader = paddle.dataset.cifar.train10(cycle=False) sample_num = 0 diff --git a/python/paddle/fluid/tests/unittests/test_data.py b/python/paddle/fluid/tests/unittests/test_data.py index 98739f6e163..ebbcab6d9dc 100644 --- a/python/paddle/fluid/tests/unittests/test_data.py +++ b/python/paddle/fluid/tests/unittests/test_data.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core class TestApiDataError(unittest.TestCase): + def test_fluid_data(self): with program_guard(Program(), Program()): @@ -56,6 +57,7 @@ class TestApiDataError(unittest.TestCase): class TestApiStaticDataError(unittest.TestCase): + def test_fluid_dtype(self): with program_guard(Program(), Program()): x1 = paddle.static.data(name="x1", shape=[2, 25]) @@ -100,14 +102,19 @@ class TestApiStaticDataError(unittest.TestCase): class TestApiErrorWithDynamicMode(unittest.TestCase): + def test_error(self): with program_guard(Program(), Program()): paddle.disable_static() self.assertRaises(AssertionError, fluid.data, 'a', [2, 25]) - self.assertRaises( - AssertionError, fluid.layers.data, 'b', shape=[2, 25]) - self.assertRaises( - AssertionError, paddle.static.data, 'c', shape=[2, 25]) + self.assertRaises(AssertionError, + fluid.layers.data, + 'b', + shape=[2, 25]) + self.assertRaises(AssertionError, + paddle.static.data, + 'c', + shape=[2, 25]) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_data_generator.py b/python/paddle/fluid/tests/unittests/test_data_generator.py index 69d8e01fd46..62e8f607367 100644 --- a/python/paddle/fluid/tests/unittests/test_data_generator.py +++ b/python/paddle/fluid/tests/unittests/test_data_generator.py @@ -19,7 +19,9 @@ import platform class MyMultiSlotDataGenerator(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -30,7 +32,9 @@ class MyMultiSlotDataGenerator(fleet.MultiSlotDataGenerator): class MyMultiSlotStringDataGenerator(fleet.MultiSlotStringDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -41,7 +45,9 @@ class MyMultiSlotStringDataGenerator(fleet.MultiSlotStringDataGenerator): class MyMultiSlotDataGenerator_error(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -52,7 +58,9 @@ class MyMultiSlotDataGenerator_error(fleet.MultiSlotDataGenerator): class MyMultiSlotDataGenerator_error_2(fleet.MultiSlotStringDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -63,7 +71,9 @@ class MyMultiSlotDataGenerator_error_2(fleet.MultiSlotStringDataGenerator): class MyMultiSlotDataGenerator_error_3(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -74,7 +84,9 @@ class MyMultiSlotDataGenerator_error_3(fleet.MultiSlotDataGenerator): class MyMultiSlotDataGenerator_error_4(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -85,7 +97,9 @@ class MyMultiSlotDataGenerator_error_4(fleet.MultiSlotDataGenerator): class MyMultiSlotDataGenerator_error_5(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -96,7 +110,9 @@ class MyMultiSlotDataGenerator_error_5(fleet.MultiSlotDataGenerator): class MyMultiSlotStringDataGenerator_zip(fleet.MultiSlotStringDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -109,7 +125,9 @@ class MyMultiSlotStringDataGenerator_zip(fleet.MultiSlotStringDataGenerator): class MyMultiSlotDataGenerator_zip(fleet.MultiSlotDataGenerator): + def generate_sample(self, line): + def data_iter(): for i in range(40): if i == 1: @@ -122,6 +140,7 @@ class MyMultiSlotDataGenerator_zip(fleet.MultiSlotDataGenerator): class TestMultiSlotDataGenerator(unittest.TestCase): + def test_MultiSlotDataGenerator_basic(self): my_ms_dg = MyMultiSlotDataGenerator() my_ms_dg.set_batch(1) @@ -129,6 +148,7 @@ class TestMultiSlotDataGenerator(unittest.TestCase): class TestMultiSlotStringDataGenerator(unittest.TestCase): + def test_MyMultiSlotStringDataGenerator_basic(self): my_ms_dg = MyMultiSlotStringDataGenerator() my_ms_dg.set_batch(1) @@ -136,6 +156,7 @@ class TestMultiSlotStringDataGenerator(unittest.TestCase): class TestMultiSlotDataGenerator_error(unittest.TestCase): + def test_MultiSlotDataGenerator_error(self): with self.assertRaises(ValueError): my_ms_dg = MyMultiSlotDataGenerator_error() @@ -144,6 +165,7 @@ class TestMultiSlotDataGenerator_error(unittest.TestCase): class TestMultiSlotDataGenerator_error_2(unittest.TestCase): + def test_MultiSlotDataGenerator_error(self): with self.assertRaises(ValueError): my_ms_dg = MyMultiSlotDataGenerator_error_2() @@ -152,6 +174,7 @@ class TestMultiSlotDataGenerator_error_2(unittest.TestCase): class TestMultiSlotDataGenerator_error_3(unittest.TestCase): + def test_MultiSlotDataGenerator_error(self): with self.assertRaises(ValueError): my_ms_dg = MyMultiSlotDataGenerator_error_3() @@ -160,6 +183,7 @@ class TestMultiSlotDataGenerator_error_3(unittest.TestCase): class TestMultiSlotDataGenerator_error_4(unittest.TestCase): + def test_MultiSlotDataGenerator_error(self): with self.assertRaises(ValueError): my_ms_dg = MyMultiSlotDataGenerator_error_4() @@ -168,6 +192,7 @@ class TestMultiSlotDataGenerator_error_4(unittest.TestCase): class TestMultiSlotDataGenerator_error_5(unittest.TestCase): + def test_MultiSlotDataGenerator_error(self): with self.assertRaises(ValueError): my_ms_dg = MyMultiSlotDataGenerator_error_5() @@ -176,6 +201,7 @@ class TestMultiSlotDataGenerator_error_5(unittest.TestCase): class TestMultiSlotStringDataGeneratorZip(unittest.TestCase): + def test_MultiSlotStringDataGenerator_zip(self): my_ms_dg = MyMultiSlotStringDataGenerator_zip() my_ms_dg.set_batch(1) @@ -183,6 +209,7 @@ class TestMultiSlotStringDataGeneratorZip(unittest.TestCase): class TestMultiSlotDataGeneratorZip(unittest.TestCase): + def test_MultiSlotDataGenerator_zip(self): my_ms_dg = MyMultiSlotDataGenerator_zip() my_ms_dg.set_batch(1) diff --git a/python/paddle/fluid/tests/unittests/test_data_norm_op.py b/python/paddle/fluid/tests/unittests/test_data_norm_op.py index cefef9ff918..650ca5ca134 100644 --- a/python/paddle/fluid/tests/unittests/test_data_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_data_norm_op.py @@ -43,8 +43,8 @@ def _reference_testing(x, batch_size, batch_sum, batch_square_sum, slot_dim=-1): for j in range(0, x_shape[1], slot_dim): if x[i][j] <= -min_precision or x[i][j] >= min_precision: for k in range(0, slot_dim): - y[i][j + k] = ( - x[i][j + k] - means_arr[j + k]) * scales_arr[j + k] + y[i][j + k] = (x[i][j + k] - + means_arr[j + k]) * scales_arr[j + k] return y @@ -120,11 +120,11 @@ class TestDataNormOpInference(unittest.TestCase): OpTest.np_dtype_to_fluid_dtype(x_val), place) batch_size_tensor = create_or_get_tensor( - scope, "batch_size", - OpTest.np_dtype_to_fluid_dtype(batch_size), place) + scope, "batch_size", OpTest.np_dtype_to_fluid_dtype(batch_size), + place) batch_sum_tensor = create_or_get_tensor( - scope, "batch_sum", - OpTest.np_dtype_to_fluid_dtype(batch_sum), place) + scope, "batch_sum", OpTest.np_dtype_to_fluid_dtype(batch_sum), + place) batch_square_sum_tensor = create_or_get_tensor( scope, "batch_square_sum", OpTest.np_dtype_to_fluid_dtype(batch_square_sum), place) @@ -155,8 +155,8 @@ class TestDataNormOpInference(unittest.TestCase): scale_w = np.ones(scale_shape).astype(np.float32) bias = np.zeros(scale_shape).astype(np.float32) scale_w_tensor = create_or_get_tensor( - scope, "scale_w", - OpTest.np_dtype_to_fluid_dtype(scale_w), place) + scope, "scale_w", OpTest.np_dtype_to_fluid_dtype(scale_w), + place) bias_tensor = create_or_get_tensor( scope, "bias", OpTest.np_dtype_to_fluid_dtype(bias), place) data_norm_op = Operator( @@ -181,13 +181,12 @@ class TestDataNormOpInference(unittest.TestCase): data_norm_op.run(scope, place) # check inference result - self.__assert_close( - y_tensor, - y_out, - "inference output are different at " + str(place) + ", " + - data_layout + ", " + str(np.dtype(dtype)) + - str(np.array(y_tensor)) + str(y_out), - atol=1e-3) + self.__assert_close(y_tensor, + y_out, + "inference output are different at " + str(place) + + ", " + data_layout + ", " + str(np.dtype(dtype)) + + str(np.array(y_tensor)) + str(y_out), + atol=1e-3) def test_check_output(self): """ @@ -495,12 +494,14 @@ class TestDataNormOpWithSlotDim(OpTest): class TestDataNormOpErrorr(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x2 = fluid.layers.data(name='x2', shape=[3, 4], dtype="int32") #self.assertRaises(TypeError, fluid.data_norm, x2) - fluid.layers.data_norm( - input=x2, param_attr={}, enable_scale_and_shift=True) + fluid.layers.data_norm(input=x2, + param_attr={}, + enable_scale_and_shift=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py b/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py index 7348783bd67..a12ccf79dd2 100755 --- a/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py @@ -26,6 +26,7 @@ import os class RandomDataset(Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -39,6 +40,7 @@ class RandomDataset(Dataset): class SimpleNet(nn.Layer): + def __init__(self): super(SimpleNet, self).__init__() self.fc = nn.Linear(10, 10) @@ -48,6 +50,7 @@ class SimpleNet(nn.Layer): class TestAutoTune(unittest.TestCase): + def setUp(self): self.batch_size = 1 self.dataset = RandomDataset(10) @@ -58,8 +61,9 @@ class TestAutoTune(unittest.TestCase): "enable": True, "tuning_steps": 1, }}) - loader = DataLoader( - self.dataset, batch_size=self.batch_size, num_workers=0) + loader = DataLoader(self.dataset, + batch_size=self.batch_size, + num_workers=0) def test_dataloader_disable_autotune(self): config = {"dataloader": {"enable": False, "tuning_steps": 1}} @@ -68,8 +72,9 @@ class TestAutoTune(unittest.TestCase): tfile.close() paddle.incubate.autotune.set_config(tfile.name) os.remove(tfile.name) - loader = DataLoader( - self.dataset, batch_size=self.batch_size, num_workers=2) + loader = DataLoader(self.dataset, + batch_size=self.batch_size, + num_workers=2) if (sys.platform == 'darwin' or sys.platform == 'win32'): self.assertEqual(loader.num_workers, 0) else: @@ -83,11 +88,13 @@ class TestAutoTune(unittest.TestCase): }}) batch_sampler = paddle.io.DistributedBatchSampler( self.dataset, batch_size=self.batch_size) - loader = DataLoader( - self.dataset, batch_sampler=batch_sampler, num_workers=2) + loader = DataLoader(self.dataset, + batch_sampler=batch_sampler, + num_workers=2) class TestAutoTuneAPI(unittest.TestCase): + def test_set_config_warnings(self): with warnings.catch_warnings(record=True) as w: config = {"kernel": {"enable": 1, "tuning_range": True}} diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py b/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py index 786d04272e3..81e52d5175d 100644 --- a/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py @@ -26,6 +26,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestDatasetAbstract(unittest.TestCase): + def func_test_main(self): dataset = Dataset() try: @@ -47,15 +48,20 @@ class TestDatasetAbstract(unittest.TestCase): class TestDatasetWithDiffOutputPlace(unittest.TestCase): + def get_dataloader(self, num_workers): dataset = paddle.vision.datasets.MNIST( mode='test', transform=transforms.Compose([ - transforms.CenterCrop(20), transforms.RandomResizedCrop(14), - transforms.Normalize(), transforms.ToTensor() + transforms.CenterCrop(20), + transforms.RandomResizedCrop(14), + transforms.Normalize(), + transforms.ToTensor() ])) - loader = paddle.io.DataLoader( - dataset, batch_size=32, num_workers=num_workers, shuffle=True) + loader = paddle.io.DataLoader(dataset, + batch_size=32, + num_workers=num_workers, + shuffle=True) return loader def run_check_on_cpu(self): diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py index 02501d51c49..82f92bd633e 100644 --- a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py +++ b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py @@ -26,6 +26,7 @@ def infinite_reader(): class TestDataLoaderEarlyReset(unittest.TestCase): + def setUp(self): self.stop_batch = 10 self.iterable = True @@ -45,8 +46,9 @@ class TestDataLoaderEarlyReset(unittest.TestCase): def create_data_loader(self): self.x = fluid.data(name='x', shape=[None, 32], dtype='float32') - return fluid.io.DataLoader.from_generator( - feed_list=[self.x], capacity=10, iterable=self.iterable) + return fluid.io.DataLoader.from_generator(feed_list=[self.x], + capacity=10, + iterable=self.iterable) def test_main(self): with fluid.program_guard(fluid.Program(), fluid.Program()): @@ -88,6 +90,7 @@ class TestDataLoaderEarlyReset(unittest.TestCase): class TestDataLoaderEarlyReset2(TestDataLoaderEarlyReset): + def setUp(self): self.stop_batch = 20 self.iterable = False diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py index 6e8ee5589db..6f18c876233 100644 --- a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py +++ b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py @@ -20,6 +20,7 @@ import six def create_reader(shape, batch_number): + def __impl__(): idx = 0 for _ in six.moves.range(batch_number): @@ -30,6 +31,7 @@ def create_reader(shape, batch_number): class DataLoaderKeepOrderTestBase(unittest.TestCase): + def initParameters(self): self.iterable = False self.break_num = 100 @@ -42,15 +44,15 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): def build_network(self, places): input_data = fluid.data(shape=self.shape, dtype='float32', name="input") - loader = fluid.io.DataLoader.from_generator( - capacity=16, feed_list=[input_data], iterable=self.iterable) + loader = fluid.io.DataLoader.from_generator(capacity=16, + feed_list=[input_data], + iterable=self.iterable) fc = fluid.layers.fc(input_data, size=10) loss = fluid.layers.reduce_mean(fc) - loader.set_batch_generator( - create_reader(self.shape, self.batch_num), - places=places if loader.iterable else None) + loader.set_batch_generator(create_reader(self.shape, self.batch_num), + places=places if loader.iterable else None) return input_data, loss, loader @@ -64,9 +66,8 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): self.assertTrue((input_tensor == start_val).all()) start_val += 1 else: - self.assertEqual( - list(input_data.shape), - [self.shape[0] * dev_cnt] + self.shape[1:]) + self.assertEqual(list(input_data.shape), + [self.shape[0] * dev_cnt] + self.shape[1:]) start_val = dev_cnt * batch_id for idx in six.moves.range(dev_cnt): data_part = input_data[idx * self.shape[0]:(idx + 1) * @@ -81,7 +82,8 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): place_list.extend([fluid.cuda_places(0)]) else: place_list.extend( - [fluid.cuda_places(0), fluid.cuda_places([0, 1])]) + [fluid.cuda_places(0), + fluid.cuda_places([0, 1])]) return place_list def test_main(self): @@ -106,8 +108,8 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): main_program = fluid.default_main_program() if use_compiled_program: main_program = fluid.CompiledProgram( - main_program).with_data_parallel( - loss_name=loss.name, places=places) + main_program).with_data_parallel(loss_name=loss.name, + places=places) max_batch_num = min(self.break_num, int(self.batch_num / dev_cnt)) @@ -153,30 +155,35 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): class IterableDataLoaderKeepOrderTest2(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = True self.break_num = 100 class IterableDataLoaderKeepOrderTest3(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = False self.break_num = 2 class IterableDataLoaderKeepOrderTest4(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = True self.break_num = 2 class IterableDataLoaderKeepOrderTest5(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = False self.break_num = 0 class IterableDataLoaderKeepOrderTest6(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = True self.break_num = 0 diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py index f779d762fb3..94cc701b598 100644 --- a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py +++ b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py @@ -23,6 +23,7 @@ keep_data_loader_order(False) def create_reader(shape, batch_number): + def __impl__(): idx = 0 for _ in six.moves.range(batch_number): @@ -33,6 +34,7 @@ def create_reader(shape, batch_number): class DataLoaderKeepOrderTestBase(unittest.TestCase): + def initParameters(self): self.iterable = False self.break_num = 10000 @@ -48,19 +50,22 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): def build_network(self, places): input_data = fluid.data(shape=self.shape, dtype='float32', name="input") - loader = fluid.io.DataLoader.from_generator( - capacity=16, feed_list=[input_data], iterable=self.iterable) + loader = fluid.io.DataLoader.from_generator(capacity=16, + feed_list=[input_data], + iterable=self.iterable) fc = fluid.layers.fc(input_data, size=10) loss = fluid.layers.reduce_mean(fc) - loader.set_batch_generator( - create_reader(self.shape, self.batch_num), - places=places if loader.iterable else None) + loader.set_batch_generator(create_reader(self.shape, self.batch_num), + places=places if loader.iterable else None) return input_data, loss, loader - def assertInputData(self, batch_id, input_data, dev_cnt, + def assertInputData(self, + batch_id, + input_data, + dev_cnt, check_visited=True): if isinstance(input_data, list): self.assertTrue(len(input_data), dev_cnt) @@ -78,9 +83,8 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): start_val += 1 else: - self.assertEqual( - list(input_data.shape), - [self.shape[0] * dev_cnt] + self.shape[1:]) + self.assertEqual(list(input_data.shape), + [self.shape[0] * dev_cnt] + self.shape[1:]) start_val = dev_cnt * batch_id for idx in six.moves.range(dev_cnt): data_part = input_data[idx * self.shape[0]:(idx + 1) * @@ -100,7 +104,8 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): place_list.extend([fluid.cuda_places(0)]) else: place_list.extend( - [fluid.cuda_places(0), fluid.cuda_places([0, 1])]) + [fluid.cuda_places(0), + fluid.cuda_places([0, 1])]) return place_list def test_main(self): @@ -125,8 +130,8 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): main_program = fluid.default_main_program() if use_compiled_program: main_program = fluid.CompiledProgram( - main_program).with_data_parallel( - loss_name=loss.name, places=places) + main_program).with_data_parallel(loss_name=loss.name, + places=places) max_batch_num = min(self.break_num, int(self.batch_num / dev_cnt)) @@ -141,8 +146,10 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): if batch_id >= self.break_num: early_break = True break - self.assertInputData( - batch_id, data, dev_cnt, check_visited=False) + self.assertInputData(batch_id, + data, + dev_cnt, + check_visited=False) fetch_val, = exe.run(program=main_program, feed=data, fetch_list=fetch_list) @@ -181,30 +188,35 @@ class DataLoaderKeepOrderTestBase(unittest.TestCase): class IterableDataLoaderKeepOrderTest2(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = True self.break_num = 10000 class IterableDataLoaderKeepOrderTest3(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = False self.break_num = 2 class IterableDataLoaderKeepOrderTest4(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = True self.break_num = 2 class IterableDataLoaderKeepOrderTest5(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = False self.break_num = 0 class IterableDataLoaderKeepOrderTest6(DataLoaderKeepOrderTestBase): + def initParameters(self): self.iterable = True self.break_num = 0 diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py index 5ef5a1016cc..e31baf9fe2e 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_dataset.py @@ -103,19 +103,22 @@ class TestDataset(unittest.TestCase): slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.update_settings(pipe_command="cat1") - dataset._init_distributed_settings( - parse_ins_id=True, - parse_content=True, - fea_eval=True, - candidate_size=10000) + dataset._init_distributed_settings(parse_ins_id=True, + parse_content=True, + fea_eval=True, + candidate_size=10000) dataset.set_filelist([dump_a_path, dump_b_path]) dataset.load_into_memory() dataset.local_shuffle() @@ -186,17 +189,18 @@ class TestDataset(unittest.TestCase): slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, - thread_num=3, - pipe_command="cat", - download_cmd="cat", - use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + download_cmd="cat", + use_var=slots_vars) dataset.set_filelist([filename1, filename2]) dataset.load_into_memory() paddle.enable_static() @@ -207,9 +211,8 @@ class TestDataset(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(main_program, feed=data) @@ -242,13 +245,17 @@ class TestDataset(unittest.TestCase): slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset._init_distributed_settings(fea_eval=True, candidate_size=1) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", @@ -262,9 +269,8 @@ class TestDataset(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) @@ -307,17 +313,23 @@ class TestDataset(unittest.TestCase): startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): for slot in slots[:2]: - var = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) slots_vars.append(var) for slot in slots[2:]: - var = fluid.layers.data( - name=slot, shape=[1], dtype="float32", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="float32", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=1, + pipe_command="cat", + use_var=slots_vars) dataset._init_distributed_settings(parse_ins_id=True) dataset.set_filelist([ "test_in_memory_dataset_masterpatch_a.txt", @@ -370,19 +382,29 @@ class TestDataset(unittest.TestCase): train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): - var1 = fluid.layers.data( - name="slot1", shape=[1], dtype="int64", lod_level=0) - var2 = fluid.layers.data( - name="slot2", shape=[1], dtype="int64", lod_level=0) - var3 = fluid.layers.data( - name="slot3", shape=[1], dtype="float32", lod_level=0) - var4 = fluid.layers.data( - name="slot4", shape=[1], dtype="float32", lod_level=0) + var1 = fluid.layers.data(name="slot1", + shape=[1], + dtype="int64", + lod_level=0) + var2 = fluid.layers.data(name="slot2", + shape=[1], + dtype="int64", + lod_level=0) + var3 = fluid.layers.data(name="slot3", + shape=[1], + dtype="float32", + lod_level=0) + var4 = fluid.layers.data(name="slot4", + shape=[1], + dtype="float32", + lod_level=0) slots_vars = [var1, var2, var3, var4] dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=1, + pipe_command="cat", + use_var=slots_vars) dataset._init_distributed_settings(parse_ins_id=True) dataset.set_filelist([ "test_in_memory_dataset_masterpatch1_a.txt", @@ -429,13 +451,17 @@ class TestDataset(unittest.TestCase): slots = ["slot1_f", "slot2_f", "slot3_f", "slot4_f"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="float32", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="float32", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset_run_a.txt", "test_in_memory_dataset_run_b.txt" @@ -443,32 +469,36 @@ class TestDataset(unittest.TestCase): dataset.load_into_memory() dataset.local_shuffle() - exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) for i in range(2): try: exe.train_from_dataset(fluid.default_main_program(), dataset) - exe.train_from_dataset( - fluid.default_main_program(), dataset, thread=1) - exe.train_from_dataset( - fluid.default_main_program(), dataset, thread=2) - exe.train_from_dataset( - fluid.default_main_program(), dataset, thread=2) - exe.train_from_dataset( - fluid.default_main_program(), dataset, thread=3) - exe.train_from_dataset( - fluid.default_main_program(), dataset, thread=4) + exe.train_from_dataset(fluid.default_main_program(), + dataset, + thread=1) + exe.train_from_dataset(fluid.default_main_program(), + dataset, + thread=2) + exe.train_from_dataset(fluid.default_main_program(), + dataset, + thread=2) + exe.train_from_dataset(fluid.default_main_program(), + dataset, + thread=3) + exe.train_from_dataset(fluid.default_main_program(), + dataset, + thread=4) except ImportError as e: pass except Exception as e: self.assertTrue(False) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) @@ -492,21 +522,20 @@ class TestDataset(unittest.TestCase): dataset._set_parse_ins_id(False) dataset.load_into_memory() dataset.dataset.merge_by_lineid() - dataset.update_settings( - batch_size=1, - thread_num=2, - input_type=1, - pipe_command="cat", - use_var=[], - fs_name="", - fs_ugi="", - download_cmd="cat", - merge_size=-1, - parse_ins_id=False, - parse_content=False, - fleet_send_batch_size=2, - fleet_send_sleep_seconds=2, - fea_eval=True) + dataset.update_settings(batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=[], + fs_name="", + fs_ugi="", + download_cmd="cat", + merge_size=-1, + parse_ins_id=False, + parse_content=False, + fleet_send_batch_size=2, + fleet_send_sleep_seconds=2, + fea_eval=True) fleet_ptr = fluid.core.Fleet() fleet_ptr.set_client2client_config(1, 1, 1) fleet_ptr.get_cache_threshold(0) @@ -533,22 +562,25 @@ class TestDataset(unittest.TestCase): slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.QueueDataset() - dataset.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) @@ -561,8 +593,10 @@ class TestDataset(unittest.TestCase): self.assertTrue(False) dataset2 = paddle.distributed.QueueDataset() - dataset2.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset2.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([]) try: exe.train_from_dataset(fluid.default_main_program(), dataset2) @@ -597,23 +631,26 @@ class TestDataset(unittest.TestCase): slots = ["slot1_f", "slot2_f", "slot3_f", "slot4_f"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="float32", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="float32", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.QueueDataset() - dataset.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) - exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) @@ -652,28 +689,28 @@ class TestDataset(unittest.TestCase): slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: - var = fluid.data( - name=slot, shape=[None, 1], dtype="int64", lod_level=1) + var = fluid.data(name=slot, + shape=[None, 1], + dtype="int64", + lod_level=1) slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=1, - thread_num=2, - input_type=1, - pipe_command="cat", - use_var=slots_vars) + dataset.init(batch_size=1, + thread_num=2, + input_type=1, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist( ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"]) dataset.load_into_memory() - exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) @@ -718,8 +755,10 @@ class TestDatasetWithFetchHandler(unittest.TestCase): slots_vars = [] poolings = [] for slot in slots: - data = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) var = fluid.layers.cast(x=data, dtype='float32') pool = fluid.layers.sequence_pool(input=var, pool_type='AVERAGE') @@ -739,8 +778,10 @@ class TestDatasetWithFetchHandler(unittest.TestCase): files(list): files of get_dataset """ dataset = paddle.distributed.QueueDataset() - dataset.init( - batch_size=32, thread_num=3, pipe_command="cat", use_var=inputs) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=inputs) dataset.set_filelist(files) return dataset @@ -822,10 +863,9 @@ class TestDatasetWithFetchHandler(unittest.TestCase): fh.help() try: - exe.train_from_dataset( - program=fluid.default_main_program(), - dataset=dataset, - fetch_handler=fh) + exe.train_from_dataset(program=fluid.default_main_program(), + dataset=dataset, + fetch_handler=fh) except ImportError as e: print("warning: we skip trainer_desc_pb2 import problem in windows") except RuntimeError as e: @@ -895,11 +935,10 @@ class TestDataset2(unittest.TestCase): exe.run(startup_program) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, - thread_num=3, - pipe_command="cat", - use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run_a.txt", "test_in_memory_dataset2_run_b.txt" @@ -950,14 +989,17 @@ class TestDataset2(unittest.TestCase): print("warning: no mpi4py") adam = fluid.optimizer.Adam(learning_rate=0.000005) try: - adam = fleet.distributed_optimizer( - adam, - strategy={ - "fs_uri": "fs_uri_xxx", - "fs_user": "fs_user_xxx", - "fs_passwd": "fs_passwd_xxx", - "fs_hadoop_bin": "fs_hadoop_bin_xxx" - }) + adam = fleet.distributed_optimizer(adam, + strategy={ + "fs_uri": + "fs_uri_xxx", + "fs_user": + "fs_user_xxx", + "fs_passwd": + "fs_passwd_xxx", + "fs_hadoop_bin": + "fs_hadoop_bin_xxx" + }) adam.minimize([fake_cost], [scope]) except AttributeError as e: print("warning: no mpi") @@ -965,11 +1007,10 @@ class TestDataset2(unittest.TestCase): print("warning: no mpi4py") exe.run(startup_program) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=32, - thread_num=3, - pipe_command="cat", - use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run2_a.txt", "test_in_memory_dataset2_run2_b.txt" @@ -1074,14 +1115,17 @@ class TestDataset2(unittest.TestCase): print("warning: no mpi4py") adam = fluid.optimizer.Adam(learning_rate=0.000005) try: - adam = fleet.distributed_optimizer( - adam, - strategy={ - "fs_uri": "fs_uri_xxx", - "fs_user": "fs_user_xxx", - "fs_passwd": "fs_passwd_xxx", - "fs_hadoop_bin": "fs_hadoop_bin_xxx" - }) + adam = fleet.distributed_optimizer(adam, + strategy={ + "fs_uri": + "fs_uri_xxx", + "fs_user": + "fs_user_xxx", + "fs_passwd": + "fs_passwd_xxx", + "fs_hadoop_bin": + "fs_hadoop_bin_xxx" + }) adam.minimize([fake_cost], [scope]) except AttributeError as e: print("warning: no mpi") @@ -1089,11 +1133,10 @@ class TestDataset2(unittest.TestCase): print("warning: no mpi4py") exe.run(startup_program) dataset = paddle.distributed.fleet.BoxPSDataset() - dataset.init( - batch_size=32, - thread_num=3, - pipe_command="cat", - use_var=slots_vars) + dataset.init(batch_size=32, + thread_num=3, + pipe_command="cat", + use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run2_a.txt", "test_in_memory_dataset2_run2_b.txt" @@ -1106,15 +1149,14 @@ class TestDataset2(unittest.TestCase): fleet._opt_info = None fleet._fleet_ptr = None dataset = paddle.distributed.fleet.BoxPSDataset() - dataset.init( - rank_offset="", - pv_batch_size=1, - fs_name="", - fs_ugi="", - data_feed_type="MultiSlotInMemoryDataFeed", - parse_logkey=True, - merge_by_sid=True, - enable_pv_merge=True) + dataset.init(rank_offset="", + pv_batch_size=1, + fs_name="", + fs_ugi="", + data_feed_type="MultiSlotInMemoryDataFeed", + parse_logkey=True, + merge_by_sid=True, + enable_pv_merge=True) d = paddle.distributed.fleet.DatasetBase() try: dataset._set_feed_type("MultiSlotInMemoryDataFeed") diff --git a/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py b/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py index 911bee69e8b..ed4ff10758e 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py +++ b/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py @@ -45,10 +45,12 @@ query_schema = [ class CTRDataset(dg.MultiSlotDataGenerator): + def __init__(self, mode): self.test = mode def generate_sample(self, line): + def reader(): ins = line.strip().split(';') label_pos_num = int(ins[1].split(' ')[0]) @@ -296,64 +298,74 @@ class TestDataset(unittest.TestCase): f.write(data) slot_data = [] - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="int64", + lod_level=0, + append_batch_size=False) slot_data.append(label) # sprase_query_feat_names len_sparse_query = 19 for feat_name in range(1, len_sparse_query + 1): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='int64', lod_level=1)) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='int64', + lod_level=1)) - # sparse_url_feat_names + # sparse_url_feat_names for feat_name in range(len_sparse_query + 1, len_sparse_query + 5): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='int64', lod_level=1)) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='int64', + lod_level=1)) # dense_feat_names for feat_name in range(len_sparse_query + 5, len_sparse_query + 16): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='float32')) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='float32')) # context_feat_namess for feat_name in range(len_sparse_query + 16, len_sparse_query + 18): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='float32')) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='float32')) - # neg sparse_url_feat_names + # neg sparse_url_feat_names for feat_name in range(len_sparse_query + 18, len_sparse_query + 22): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='int64', lod_level=1)) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='int64', + lod_level=1)) # neg dense_feat_names for feat_name in range(len_sparse_query + 22, len_sparse_query + 33): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='float32')) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='float32')) # neg context_feat_namess for feat_name in range(len_sparse_query + 33, len_sparse_query + 35): slot_data.append( - fluid.layers.data( - name=str(feat_name), shape=[1], dtype='float32')) + fluid.layers.data(name=str(feat_name), + shape=[1], + dtype='float32')) dataset = paddle.distributed.InMemoryDataset() print("========================================") generator_class = CTRDataset(mode=0) try: - dataset._check_use_var_with_data_generator( - slot_data, generator_class, dump_a_path) + dataset._check_use_var_with_data_generator(slot_data, + generator_class, + dump_a_path) print("case 1: check passed!") except Exception as e: print("warning: catch expected error") @@ -364,8 +376,9 @@ class TestDataset(unittest.TestCase): print("========================================") generator_class = CTRDataset(mode=2) try: - dataset._check_use_var_with_data_generator( - slot_data, generator_class, dump_a_path) + dataset._check_use_var_with_data_generator(slot_data, + generator_class, + dump_a_path) except Exception as e: print("warning: case 2 catch expected error") print(e) @@ -375,8 +388,9 @@ class TestDataset(unittest.TestCase): print("========================================") generator_class = CTRDataset(mode=3) try: - dataset._check_use_var_with_data_generator( - slot_data, generator_class, dump_a_path) + dataset._check_use_var_with_data_generator(slot_data, + generator_class, + dump_a_path) except Exception as e: print("warning: case 3 catch expected error") print(e) @@ -386,8 +400,9 @@ class TestDataset(unittest.TestCase): print("========================================") generator_class = CTRDataset(mode=4) try: - dataset._check_use_var_with_data_generator( - slot_data, generator_class, dump_a_path) + dataset._check_use_var_with_data_generator(slot_data, + generator_class, + dump_a_path) except Exception as e: print("warning: case 4 catch expected error") print(e) @@ -397,8 +412,9 @@ class TestDataset(unittest.TestCase): print("========================================") generator_class = CTRDataset(mode=5) try: - dataset._check_use_var_with_data_generator( - slot_data, generator_class, dump_a_path) + dataset._check_use_var_with_data_generator(slot_data, + generator_class, + dump_a_path) except Exception as e: print("warning: case 5 catch expected error") print(e) diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py index 9195ac277b9..8d949bf51a7 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py @@ -52,7 +52,9 @@ def write_reader_data_to_file(filename, reader): with open(filename, 'w') as fid: for instance_list in reader(): for i, instance in enumerate(instance_list): - instance = np.reshape(instance, [instance.size, ]) + instance = np.reshape(instance, [ + instance.size, + ]) fid.write(str(instance.size) + ' ') fid.write(' '.join(map(str, instance))) fid.write(' ') @@ -61,19 +63,21 @@ def write_reader_data_to_file(filename, reader): def fake_reader(batch_size=BATCH_SIZE, batch_num=BATCH_NUM): + def __reader__(): iteration = BATCH_SIZE * BATCH_NUM iteration = int(iteration + BATCH_SIZE / 2) for _ in six.moves.range(iteration): image = np.random.random(size=IMAGE_SHAPE).astype('float32') - label = np.random.random_integers( - size=LABEL_SHAPE, low=0, high=9).astype('int64') + label = np.random.random_integers(size=LABEL_SHAPE, low=0, + high=9).astype('int64') yield image, label return __reader__ class DatasetLoaderTestBase(unittest.TestCase): + def setUp(self): self.dataset_name = "QueueDataset" self.drop_last = False @@ -86,10 +90,12 @@ class DatasetLoaderTestBase(unittest.TestCase): main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): - image = fluid.layers.data( - name='image', shape=IMAGE_SHAPE, dtype='float32') - label = fluid.layers.data( - name='label', shape=LABEL_SHAPE, dtype='int64') + image = fluid.layers.data(name='image', + shape=IMAGE_SHAPE, + dtype='float32') + label = fluid.layers.data(name='label', + shape=LABEL_SHAPE, + dtype='int64') simple_fc_net_with_inputs(image, label) @@ -135,8 +141,9 @@ class DatasetLoaderTestBase(unittest.TestCase): if self.dataset_name == 'InMemoryDataset': dataset.load_into_memory() - dataloader = fluid.io.DataLoader.from_dataset( - dataset=dataset, places=places, drop_last=self.drop_last) + dataloader = fluid.io.DataLoader.from_dataset(dataset=dataset, + places=places, + drop_last=self.drop_last) prog = fluid.CompiledProgram(main_prog).with_data_parallel() exe = fluid.Executor(place) @@ -159,23 +166,22 @@ class DatasetLoaderTestBase(unittest.TestCase): batch_size = BATCH_SIZE self.assertEquals(image.shape()[1:], IMAGE_SHAPE) - self.assertTrue( - image._place()._equals(places[idx]), - msg=get_place_string(image._place()) + ' vs ' + - get_place_string(places[idx])) + self.assertTrue(image._place()._equals(places[idx]), + msg=get_place_string(image._place()) + + ' vs ' + get_place_string(places[idx])) if self.drop_last: self.assertEquals(image.shape()[0], BATCH_SIZE) else: - self.assertTrue(image.shape()[0] == BATCH_SIZE or - image.shape()[0] == BATCH_SIZE / 2) + self.assertTrue(image.shape()[0] == BATCH_SIZE + or image.shape()[0] == BATCH_SIZE / 2) self.assertEquals(label.shape()[1:], LABEL_SHAPE) self.assertTrue(label._place()._equals(places[idx])) if self.drop_last: self.assertEquals(label.shape()[0], BATCH_SIZE) else: - self.assertTrue(label.shape()[0] == BATCH_SIZE or - label.shape()[0] == BATCH_SIZE / 2) + self.assertTrue(label.shape()[0] == BATCH_SIZE + or label.shape()[0] == BATCH_SIZE / 2) self.assertEquals(image.shape()[0], label.shape()[0]) @@ -204,18 +210,21 @@ class DatasetLoaderTestBase(unittest.TestCase): class QueueDatasetTestWithoutDropLast(DatasetLoaderTestBase): + def setUp(self): self.dataset_name = "QueueDataset" self.drop_last = True class InMemoryDatasetTestWithoutDropLast(DatasetLoaderTestBase): + def setUp(self): self.dataset_name = "InMemoryDataset" self.drop_last = False class InMemoryDatasetTestWithDropLast(DatasetLoaderTestBase): + def setUp(self): self.dataset_name = "InMemoryDataset" self.drop_last = True diff --git a/python/paddle/fluid/tests/unittests/test_dataset_download.py b/python/paddle/fluid/tests/unittests/test_dataset_download.py index f1fba215b93..06f015edf95 100644 --- a/python/paddle/fluid/tests/unittests/test_dataset_download.py +++ b/python/paddle/fluid/tests/unittests/test_dataset_download.py @@ -18,6 +18,7 @@ from paddle.dataset.common import download, DATA_HOME, md5file class TestDataSetDownload(unittest.TestCase): + def setUp(self): flower_path = DATA_HOME + "/flowers/imagelabels.mat" diff --git a/python/paddle/fluid/tests/unittests/test_debugger.py b/python/paddle/fluid/tests/unittests/test_debugger.py index f4c9466d63a..884b58a4acc 100644 --- a/python/paddle/fluid/tests/unittests/test_debugger.py +++ b/python/paddle/fluid/tests/unittests/test_debugger.py @@ -22,36 +22,42 @@ from paddle.fluid.framework import Program class TestDebugger(unittest.TestCase): + def test_debug_str(self): p = Program() b = p.current_block() #selected_rows - b.create_var( - name='selected_rows', - dtype="float32", - shape=[5, 10], - type=core.VarDesc.VarType.SELECTED_ROWS) + b.create_var(name='selected_rows', + dtype="float32", + shape=[5, 10], + type=core.VarDesc.VarType.SELECTED_ROWS) #tensor array - b.create_var( - name='tensor_array', - shape=[5, 10], - type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + b.create_var(name='tensor_array', + shape=[5, 10], + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) #operator - mul_x = b.create_parameter( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = b.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = b.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - b.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) + mul_x = b.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x") + mul_y = b.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = b.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + b.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) print(debugger.pprint_program_codes(p)) diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py index 0be329ac959..75dc36f9bb9 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py @@ -41,14 +41,14 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): with fluid.unique_name.guard(): with fluid.program_guard(main_prog, startup_prog): - image = fluid.layers.data( - name='image', shape=[784], dtype='float32') + image = fluid.layers.data(name='image', + shape=[784], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - py_reader = fluid.io.PyReader( - feed_list=[image, label], - capacity=4, - iterable=not use_legacy_py_reader, - use_double_buffer=use_double_buffer) + py_reader = fluid.io.PyReader(feed_list=[image, label], + capacity=4, + iterable=not use_legacy_py_reader, + use_double_buffer=use_double_buffer) hidden = image for hidden_size in [10, 20, 30]: hidden = fluid.layers.fc( @@ -62,8 +62,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): size=CLASS_NUM, act='softmax') loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) @@ -71,6 +70,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): class TestBase(unittest.TestCase): + def run_main(self, use_legacy_py_reader, with_data_parallel, places, use_double_buffer): scope = fluid.Scope() @@ -90,8 +90,8 @@ class TestBase(unittest.TestCase): prog = fluid.CompiledProgram(main_prog) if with_data_parallel: - prog = prog.with_data_parallel( - loss_name=loss.name, places=places) + prog = prog.with_data_parallel(loss_name=loss.name, + places=places) step = 0 step_list = [] diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py index b2cb3141aad..e2062238b11 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py @@ -20,6 +20,7 @@ import six class TestClass(unittest.TestCase): + def setUp(self): self.use_double_buffer = True self.use_py_reader = True @@ -48,10 +49,12 @@ class TestClass(unittest.TestCase): main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): - img = fluid.layers.data( - shape=img_shape, dtype='float32', name='image') - label = fluid.layers.data( - shape=label_shape, dtype='int64', name='label') + img = fluid.layers.data(shape=img_shape, + dtype='float32', + name='image') + label = fluid.layers.data(shape=label_shape, + dtype='int64', + name='label') feeder = fluid.DataFeeder(feed_list=[img, label], place=p) @@ -66,16 +69,15 @@ class TestClass(unittest.TestCase): capacity=4, iterable=True, use_double_buffer=use_double_buffer) - py_reader.decorate_sample_list_generator( - batch_reader, places=p) + py_reader.decorate_sample_list_generator(batch_reader, + places=p) else: py_reader = fluid.io.DataLoader.from_generator( feed_list=[img, label], capacity=4, iterable=True, use_double_buffer=use_double_buffer - ).set_sample_list_generator( - batch_reader, places=p) + ).set_sample_list_generator(batch_reader, places=p) for break_beforehand in [True, False]: for epoch_id in six.moves.range(10): @@ -95,8 +97,8 @@ class TestClass(unittest.TestCase): self.assertTrue(np.array_equal(L1, L2)) batch_id += 1 - if break_beforehand and batch_id >= int(batch_num / - 2): + if break_beforehand and batch_id >= int( + batch_num / 2): break if break_beforehand: @@ -106,18 +108,21 @@ class TestClass(unittest.TestCase): class TestClass2(TestClass): + def setUp(self): self.use_double_buffer = False self.use_py_reader = True class TestClass3(TestClass): + def setUp(self): self.use_double_buffer = True self.use_py_reader = False class TestClass4(TestClass): + def setUp(self): self.use_double_buffer = False self.use_py_reader = False diff --git a/python/paddle/fluid/tests/unittests/test_default_dtype.py b/python/paddle/fluid/tests/unittests/test_default_dtype.py index 29ca9a93985..378f3eb7e8f 100644 --- a/python/paddle/fluid/tests/unittests/test_default_dtype.py +++ b/python/paddle/fluid/tests/unittests/test_default_dtype.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core class TestDefaultType(unittest.TestCase): + def check_default(self): self.assertEqual("float32", get_default_dtype()) @@ -49,6 +50,7 @@ class TestDefaultType(unittest.TestCase): class TestRaiseError(unittest.TestCase): + def test_error(self): self.assertRaises(TypeError, set_default_dtype, "int32") self.assertRaises(TypeError, set_default_dtype, np.int32) diff --git a/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py b/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py index 01a7b682488..be52e033011 100644 --- a/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py +++ b/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py @@ -19,6 +19,7 @@ import unittest class TestDefaultScopeFuncs(unittest.TestCase): + def test_cur_scope(self): self.assertIsNotNone(get_cur_scope()) @@ -34,6 +35,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): leave_local_scope() def test_var_get_int(self): + def __new_scope__(): i = var("var_i") self.assertFalse(i.is_int()) diff --git a/python/paddle/fluid/tests/unittests/test_deform_conv2d.py b/python/paddle/fluid/tests/unittests/test_deform_conv2d.py index f5f1479d07d..90b2feccd39 100644 --- a/python/paddle/fluid/tests/unittests/test_deform_conv2d.py +++ b/python/paddle/fluid/tests/unittests/test_deform_conv2d.py @@ -47,11 +47,11 @@ class TestDeformConv2D(TestCase): self.filter_shape = filter_shape self.weight = np.random.uniform( - -1, 1, (self.out_channels, self.in_channels // self.groups - ) + filter_shape).astype(self.dtype) + -1, 1, (self.out_channels, self.in_channels // self.groups) + + filter_shape).astype(self.dtype) if not self.no_bias: - self.bias = np.random.uniform(-1, 1, ( - self.out_channels, )).astype(self.dtype) + self.bias = np.random.uniform(-1, 1, (self.out_channels, )).astype( + self.dtype) def out_size(in_size, pad_size, dilation_size, kernel_size, stride_size): @@ -66,8 +66,8 @@ class TestDeformConv2D(TestCase): self.kernel_size[1], self.stride[1])) out_shape = (out_h, out_w) - self.input_shape = (self.batch_size, self.in_channels - ) + self.spatial_shape + self.input_shape = (self.batch_size, + self.in_channels) + self.spatial_shape self.offset_shape = (self.batch_size, self.deformable_groups * 2 * filter_shape[0] * filter_shape[1]) + out_shape @@ -88,8 +88,8 @@ class TestDeformConv2D(TestCase): start = paddle.static.Program() paddle.enable_static() with paddle.static.program_guard(main, start): - x = paddle.static.data( - "input", (-1, self.in_channels, -1, -1), dtype=self.dtype) + x = paddle.static.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) offset = paddle.static.data( "offset", (-1, self.deformable_groups * 2 * self.filter_shape[0] * self.filter_shape[1], -1, -1), @@ -215,11 +215,11 @@ class TestDeformConv2DFunctional(TestCase): self.filter_shape = filter_shape self.weight = np.random.uniform( - -1, 1, (self.out_channels, self.in_channels // self.groups - ) + filter_shape).astype(self.dtype) + -1, 1, (self.out_channels, self.in_channels // self.groups) + + filter_shape).astype(self.dtype) if not self.no_bias: - self.bias = np.random.uniform(-1, 1, ( - self.out_channels, )).astype(self.dtype) + self.bias = np.random.uniform(-1, 1, (self.out_channels, )).astype( + self.dtype) def out_size(in_size, pad_size, dilation_size, kernel_size, stride_size): @@ -234,8 +234,8 @@ class TestDeformConv2DFunctional(TestCase): self.kernel_size[1], self.stride[1])) out_shape = (out_h, out_w) - self.input_shape = (self.batch_size, self.in_channels - ) + self.spatial_shape + self.input_shape = (self.batch_size, + self.in_channels) + self.spatial_shape self.offset_shape = (self.batch_size, self.deformable_groups * 2 * filter_shape[0] * filter_shape[1]) + out_shape @@ -256,8 +256,8 @@ class TestDeformConv2DFunctional(TestCase): start = paddle.static.Program() paddle.enable_static() with paddle.static.program_guard(main, start): - x = paddle.static.data( - "input", (-1, self.in_channels, -1, -1), dtype=self.dtype) + x = paddle.static.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) offset = paddle.static.data( "offset", (-1, self.deformable_groups * 2 * self.filter_shape[0] * self.filter_shape[1], -1, -1), @@ -326,7 +326,8 @@ class TestDeformConv2DFunctional(TestCase): padding=self.padding, dilation=self.dilation, deformable_groups=self.deformable_groups, - groups=self.groups, ) + groups=self.groups, + ) y_v2 = paddle.vision.ops.deform_conv2d( x=x, @@ -338,7 +339,8 @@ class TestDeformConv2DFunctional(TestCase): padding=self.padding, dilation=self.dilation, deformable_groups=self.deformable_groups, - groups=self.groups, ) + groups=self.groups, + ) out_v1 = y_v1.numpy() out_v2 = y_v2.numpy() @@ -350,8 +352,8 @@ class TestDeformConv2DFunctional(TestCase): start = paddle.static.Program() paddle.enable_static() with paddle.static.program_guard(main, start): - x = paddle.static.data( - "input", (-1, self.in_channels, -1, -1), dtype=self.dtype) + x = paddle.static.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) offset = paddle.static.data( "offset", (-1, self.deformable_groups * 2 * self.filter_shape[0] * self.filter_shape[1], -1, -1), @@ -361,8 +363,9 @@ class TestDeformConv2DFunctional(TestCase): self.filter_shape[1], -1, -1), dtype=self.dtype) - weight = paddle.static.data( - "weight", list(self.weight.shape), dtype=self.dtype) + weight = paddle.static.data("weight", + list(self.weight.shape), + dtype=self.dtype) if not self.no_bias: bias = paddle.static.data("bias", [-1], dtype=self.dtype) @@ -376,7 +379,8 @@ class TestDeformConv2DFunctional(TestCase): padding=self.padding, dilation=self.dilation, deformable_groups=self.deformable_groups, - groups=self.groups, ) + groups=self.groups, + ) y_v2 = paddle.vision.ops.deform_conv2d( x=x, @@ -388,7 +392,8 @@ class TestDeformConv2DFunctional(TestCase): padding=self.padding, dilation=self.dilation, deformable_groups=self.deformable_groups, - groups=self.groups, ) + groups=self.groups, + ) exe = paddle.static.Executor(self.place) exe.run(start) @@ -430,6 +435,7 @@ class TestDeformConv2DFunctional(TestCase): # testcases for DeformConv2D class TestDeformConv2DWithPadding(TestDeformConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -443,6 +449,7 @@ class TestDeformConv2DWithPadding(TestDeformConv2D): class TestDeformConv2DWithBias(TestDeformConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -456,6 +463,7 @@ class TestDeformConv2DWithBias(TestDeformConv2D): class TestDeformConv2DWithAsynPadding(TestDeformConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -469,6 +477,7 @@ class TestDeformConv2DWithAsynPadding(TestDeformConv2D): class TestDeformConv2DWithDilation(TestDeformConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -482,6 +491,7 @@ class TestDeformConv2DWithDilation(TestDeformConv2D): class TestDeformConv2DWithStride(TestDeformConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -495,6 +505,7 @@ class TestDeformConv2DWithStride(TestDeformConv2D): class TestDeformConv2DWithDeformable_Groups(TestDeformConv2D): + def setUp(self): self.in_channels = 5 self.out_channels = 5 @@ -508,6 +519,7 @@ class TestDeformConv2DWithDeformable_Groups(TestDeformConv2D): class TestDeformConv2DWithGroups(TestDeformConv2D): + def setUp(self): self.in_channels = 5 self.out_channels = 5 @@ -522,6 +534,7 @@ class TestDeformConv2DWithGroups(TestDeformConv2D): # testcases for deform_conv2d class TestDeformConv2DFunctionalWithPadding(TestDeformConv2DFunctional): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -535,6 +548,7 @@ class TestDeformConv2DFunctionalWithPadding(TestDeformConv2DFunctional): class TestDeformConv2DFunctionalWithBias(TestDeformConv2DFunctional): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -548,6 +562,7 @@ class TestDeformConv2DFunctionalWithBias(TestDeformConv2DFunctional): class TestDeformConv2DFunctionalWithAsynPadding(TestDeformConv2DFunctional): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -561,6 +576,7 @@ class TestDeformConv2DFunctionalWithAsynPadding(TestDeformConv2DFunctional): class TestDeformConv2DFunctionalWithDilation(TestDeformConv2DFunctional): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -574,6 +590,7 @@ class TestDeformConv2DFunctionalWithDilation(TestDeformConv2DFunctional): class TestDeformConv2DFunctionalWithStride(TestDeformConv2DFunctional): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -586,8 +603,9 @@ class TestDeformConv2DFunctionalWithStride(TestDeformConv2DFunctional): self.no_bias = False -class TestDeformConv2DFunctionalWithDeformable_Groups( - TestDeformConv2DFunctional): +class TestDeformConv2DFunctionalWithDeformable_Groups(TestDeformConv2DFunctional + ): + def setUp(self): self.in_channels = 5 self.out_channels = 5 @@ -601,6 +619,7 @@ class TestDeformConv2DFunctionalWithDeformable_Groups( class TestDeformConv2DFunctionalWithGroups(TestDeformConv2DFunctional): + def setUp(self): self.in_channels = 5 self.out_channels = 5 diff --git a/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py b/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py index 5fc849575b6..d653e7a99e4 100644 --- a/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py @@ -96,8 +96,8 @@ def dconv_im2col_gemm(input, offset, mask, filter, group, conv_param): val = dmc_bilinear(input[n, c], in_h, in_w, im_h, im_w) val_out = val * mask_table[kh, kw] - col_buffer[n, c * f_h * f_w + kh * f_w + kw, h * - in_w + w] = val_out + col_buffer[n, c * f_h * f_w + kh * f_w + kw, + h * in_w + w] = val_out out = np.zeros((in_n, group, int(out_c // group), out_h * out_w)) weight = filter.reshape(group, int(out_c // group), f_c * f_h * f_w) @@ -126,6 +126,7 @@ def deform_conv2d_wrapper(x, class TestModulatedDeformableConvOp(OpTest): + def setUp(self): self.python_api = deform_conv2d_wrapper self.op_type = "deformable_conv" @@ -169,11 +170,10 @@ class TestModulatedDeformableConvOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - {'Input', 'Offset', 'Mask', 'Filter'}, - 'Output', - max_relative_error=0.05, - check_eager=True) + self.check_grad({'Input', 'Offset', 'Mask', 'Filter'}, + 'Output', + max_relative_error=0.05, + check_eager=True) def init_test_case(self): self.pad = [1, 1] @@ -207,6 +207,7 @@ class TestModulatedDeformableConvOp(OpTest): class TestWithStride(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [3, 3] self.stride = [2, 2] @@ -229,6 +230,7 @@ class TestWithStride(TestModulatedDeformableConvOp): class TestWithDilation(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [1, 1] @@ -254,6 +256,7 @@ class TestWithDilation(TestModulatedDeformableConvOp): class TestWith3x3(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -276,11 +279,13 @@ class TestWith3x3(TestModulatedDeformableConvOp): class TestWithGroup(TestModulatedDeformableConvOp): + def init_group(self): self.groups = 2 class TestWithDouble(TestModulatedDeformableConvOp): + def init_type(self): self.dtype = np.float64 @@ -307,42 +312,61 @@ class TestWithDouble(TestModulatedDeformableConvOp): class TestModulatedDeformableConvInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): paddle.enable_static() input = [1, 3, 32, 32] - offset = fluid.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32') - mask = fluid.data( - name='mask', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, offset, mask, num_filters=4, filter_size=1) + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=1) self.assertRaises(TypeError, test_invalid_input) def test_invalid_offset(): paddle.enable_static() - input = fluid.data( - name='input', shape=[None, 3, 32, 32], dtype='int32') - offset = fluid.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32') - mask = fluid.data( - name='mask', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, offset, mask, num_filters=4, filter_size=1) + input = fluid.data(name='input', + shape=[None, 3, 32, 32], + dtype='int32') + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=1) self.assertRaises(TypeError, test_invalid_offset) def test_invalid_filter(): paddle.enable_static() - input = fluid.data( - name='input_filter', shape=[None, 3, 32, 32], dtype='float32') - offset = fluid.data( - name='offset_filter', shape=[None, 3, 32, 32], dtype='float32') - mask = fluid.data( - name='mask_filter', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, offset, mask, num_filters=4, filter_size=0) + input = fluid.data(name='input_filter', + shape=[None, 3, 32, 32], + dtype='float32') + offset = fluid.data(name='offset_filter', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask_filter', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=0) self.assertRaises(ValueError, test_invalid_filter) @@ -352,15 +376,22 @@ class TestModulatedDeformableConvInvalidInput(unittest.TestCase): class TestDeformConv2DAPI(unittest.TestCase): + def test_api(self): + def test_deform_conv2d_v1(): paddle.enable_static() - input = paddle.static.data( - name='input_v1', shape=[None, 3, 32, 32], dtype='float32') - offset = paddle.static.data( - name='offset_v1', shape=[None, 4, 32, 32], dtype='float32') - out = paddle.static.nn.deform_conv2d( - input, offset, None, num_filters=4, filter_size=1) + input = paddle.static.data(name='input_v1', + shape=[None, 3, 32, 32], + dtype='float32') + offset = paddle.static.data(name='offset_v1', + shape=[None, 4, 32, 32], + dtype='float32') + out = paddle.static.nn.deform_conv2d(input, + offset, + None, + num_filters=4, + filter_size=1) assert (out.shape == (-1, 4, 32, 32)) @@ -368,14 +399,20 @@ class TestDeformConv2DAPI(unittest.TestCase): def test_deform_conv2d_v2(): paddle.enable_static() - input = paddle.static.data( - name='input_v2', shape=[None, 3, 32, 32], dtype='float32') - offset = paddle.static.data( - name='offset_v2', shape=[None, 4, 32, 32], dtype='float32') - mask = paddle.static.data( - name='mask_v2', shape=[None, 2, 32, 32], dtype='float32') - out = paddle.static.nn.deform_conv2d( - input, offset, mask, num_filters=4, filter_size=1) + input = paddle.static.data(name='input_v2', + shape=[None, 3, 32, 32], + dtype='float32') + offset = paddle.static.data(name='offset_v2', + shape=[None, 4, 32, 32], + dtype='float32') + mask = paddle.static.data(name='mask_v2', + shape=[None, 2, 32, 32], + dtype='float32') + out = paddle.static.nn.deform_conv2d(input, + offset, + mask, + num_filters=4, + filter_size=1) assert (out.shape == (-1, 4, 32, 32)) diff --git a/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py b/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py index 304a151c4d3..a60881e8dde 100644 --- a/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py +++ b/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py @@ -92,8 +92,8 @@ def dconv_im2col_gemm(input, offset, filter, group, conv_param): im_h, im_w) val_out = val - col_buffer[n, c * f_h * f_w + kh * f_w + kw, h * - in_w + w] = val_out + col_buffer[n, c * f_h * f_w + kh * f_w + kw, + h * in_w + w] = val_out out = np.zeros((in_n, group, int(out_c // group), out_h * out_w)) weight = filter.reshape(group, int(out_c // group), f_c * f_h * f_w) @@ -122,6 +122,7 @@ def deform_conv2d_wrapper(x, class TestModulatedDeformableConvOp(OpTest): + def setUp(self): self.python_api = deform_conv2d_wrapper self.op_type = "deformable_conv_v1" @@ -162,19 +163,17 @@ class TestModulatedDeformableConvOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['Input', 'Offset', 'Filter'], - 'Output', - max_relative_error=0.05, - check_eager=True) + self.check_grad(['Input', 'Offset', 'Filter'], + 'Output', + max_relative_error=0.05, + check_eager=True) def test_check_grad_no_filter(self): - self.check_grad( - ['Input', 'Offset'], - 'Output', - max_relative_error=0.1, - no_grad_set=set(['Filter']), - check_eager=True) + self.check_grad(['Input', 'Offset'], + 'Output', + max_relative_error=0.1, + no_grad_set=set(['Filter']), + check_eager=True) def init_test_case(self): self.pad = [1, 1] @@ -203,6 +202,7 @@ class TestModulatedDeformableConvOp(OpTest): class TestWithStride(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [3, 3] self.stride = [2, 2] @@ -220,6 +220,7 @@ class TestWithStride(TestModulatedDeformableConvOp): class TestWithDilation(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [1, 1] @@ -240,6 +241,7 @@ class TestWithDilation(TestModulatedDeformableConvOp): class TestWith1x1(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -257,6 +259,7 @@ class TestWith1x1(TestModulatedDeformableConvOp): class TestWithGroup(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -278,38 +281,42 @@ class TestWithGroup(TestModulatedDeformableConvOp): class TestWithDouble(TestModulatedDeformableConvOp): + def init_type(self): self.dtype = np.float64 class TestModulatedDeformableConvV1InvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): input = [1, 3, 32, 32] - offset = fluid.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, - offset, - mask=None, - num_filters=4, - filter_size=1, - modulated=False) + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask=None, + num_filters=4, + filter_size=1, + modulated=False) self.assertRaises(TypeError, test_invalid_input) def test_invalid_offset(): - input = fluid.data( - name='input', shape=[None, 3, 32, 32], dtype='int32') - offset = fluid.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, - offset, - mask=None, - num_filters=4, - filter_size=1, - modulated=False) + input = fluid.data(name='input', + shape=[None, 3, 32, 32], + dtype='int32') + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask=None, + num_filters=4, + filter_size=1, + modulated=False) self.assertRaises(TypeError, test_invalid_offset) diff --git a/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py b/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py index 20d72f2d95f..f1cd04bd3f5 100644 --- a/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py +++ b/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py @@ -52,6 +52,7 @@ def set_outputs(output, top_count): class TestDeformablePSROIPoolOp(OpTest): + def set_data(self): self.start_test1() self.start_test2() @@ -369,130 +370,131 @@ class TestDeformablePSROIPoolOp(OpTest): class TestDeformablePSROIPoolOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - input1 = fluid.data( - name="input1", shape=[2, 192, 64, 64], dtype='float32') - rois1 = fluid.data( - name="rois1", shape=[-1, 4], dtype='float32', lod_level=1) - trans1 = fluid.data( - name="trans1", shape=[2, 384, 64, 64], dtype='float32') + input1 = fluid.data(name="input1", + shape=[2, 192, 64, 64], + dtype='float32') + rois1 = fluid.data(name="rois1", + shape=[-1, 4], + dtype='float32', + lod_level=1) + trans1 = fluid.data(name="trans1", + shape=[2, 384, 64, 64], + dtype='float32') # The `input` must be Variable and the data type of `input` Tensor must be one of float32 and float64. def test_input_type(): - fluid.layers.deformable_roi_pooling( - input=[3, 4], - rois=rois1, - trans=trans1, - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + fluid.layers.deformable_roi_pooling(input=[3, 4], + rois=rois1, + trans=trans1, + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_input_type) def test_input_tensor_dtype(): - input2 = fluid.data( - name="input2", shape=[2, 192, 64, 64], dtype='int32') - fluid.layers.deformable_roi_pooling( - input=input2, - rois=rois1, - trans=trans1, - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + input2 = fluid.data(name="input2", + shape=[2, 192, 64, 64], + dtype='int32') + fluid.layers.deformable_roi_pooling(input=input2, + rois=rois1, + trans=trans1, + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_input_tensor_dtype) # The `rois` must be Variable and the data type of `rois` Tensor must be one of float32 and float64. def test_rois_type(): - fluid.layers.deformable_roi_pooling( - input=input1, - rois=2, - trans=trans1, - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + fluid.layers.deformable_roi_pooling(input=input1, + rois=2, + trans=trans1, + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_rois_type) def test_rois_tensor_dtype(): - rois2 = fluid.data( - name="rois2", shape=[-1, 4], dtype='int32', lod_level=1) - fluid.layers.deformable_roi_pooling( - input=input1, - rois=rois2, - trans=trans1, - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + rois2 = fluid.data(name="rois2", + shape=[-1, 4], + dtype='int32', + lod_level=1) + fluid.layers.deformable_roi_pooling(input=input1, + rois=rois2, + trans=trans1, + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_rois_tensor_dtype) # The `trans` must be Variable and the data type of `trans` Tensor must be one of float32 and float64. def test_trans_type(): - fluid.layers.deformable_roi_pooling( - input=input1, - rois=rois1, - trans=[2], - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + fluid.layers.deformable_roi_pooling(input=input1, + rois=rois1, + trans=[2], + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_trans_type) def test_trans_tensor_dtype(): - trans2 = fluid.data( - name="trans2", shape=[2, 384, 64, 64], dtype='int32') - fluid.layers.deformable_roi_pooling( - input=input1, - rois=rois1, - trans=trans2, - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + trans2 = fluid.data(name="trans2", + shape=[2, 384, 64, 64], + dtype='int32') + fluid.layers.deformable_roi_pooling(input=input1, + rois=rois1, + trans=trans2, + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_trans_tensor_dtype) # The `group_size` must be one of list and tuple. # Each element must be int. def test_group_size_type(): - fluid.layers.deformable_roi_pooling( - input=input1, - rois=rois1, - trans=trans1, - group_size=1, - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - position_sensitive=True) + fluid.layers.deformable_roi_pooling(input=input1, + rois=rois1, + trans=trans1, + group_size=1, + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_group_size_type) # The `part_size` must be one of list, tuple and None. # Each element must be int. def test_part_size_type(): - fluid.layers.deformable_roi_pooling( - input=input1, - rois=rois1, - trans=trans1, - pooled_height=8, - pooled_width=8, - part_size=8, - sample_per_part=4, - position_sensitive=True) + fluid.layers.deformable_roi_pooling(input=input1, + rois=rois1, + trans=trans1, + pooled_height=8, + pooled_width=8, + part_size=8, + sample_per_part=4, + position_sensitive=True) self.assertRaises(TypeError, test_part_size_type) diff --git a/python/paddle/fluid/tests/unittests/test_deg2rad.py b/python/paddle/fluid/tests/unittests/test_deg2rad.py index 31219d5ab97..c3e77c0ac5d 100644 --- a/python/paddle/fluid/tests/unittests/test_deg2rad.py +++ b/python/paddle/fluid/tests/unittests/test_deg2rad.py @@ -26,10 +26,11 @@ paddle.enable_static() class TestDeg2radAPI(unittest.TestCase): + def setUp(self): self.x_dtype = 'float64' - self.x_np = np.array( - [180.0, -180.0, 360.0, -360.0, 90.0, -90.0]).astype(np.float64) + self.x_np = np.array([180.0, -180.0, 360.0, -360.0, 90.0, + -90.0]).astype(np.float64) self.x_shape = [6] self.out_np = np.deg2rad(self.x_np) @@ -40,8 +41,8 @@ class TestDeg2radAPI(unittest.TestCase): x = fluid.data(name='input', dtype=self.x_dtype, shape=self.x_shape) out = paddle.deg2rad(x) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(fluid.default_main_program(), feed={'input': self.x_np}, diff --git a/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py index 4b0bc1dcf85..c00e7588294 100644 --- a/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py +++ b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestDensityPriorBoxOp(OpTest): + def set_data(self): self.init_test_params() self.init_test_input() @@ -76,8 +77,8 @@ class TestDensityPriorBoxOp(OpTest): if len(self.fixed_sizes) > 0 and len(self.densities) > 0: for density in self.densities: if len(self.fixed_ratios) > 0: - self.num_priors += len(self.fixed_ratios) * (pow(density, - 2)) + self.num_priors += len(self.fixed_ratios) * (pow( + density, 2)) self.offset = 0.5 def init_test_input(self): @@ -135,6 +136,7 @@ class TestDensityPriorBoxOp(OpTest): class TestDensityPriorBox(TestDensityPriorBoxOp): + def set_density(self): self.densities = [3, 4] self.fixed_sizes = [1.0, 2.0] diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py index 654397b6c20..dc9991c3836 100755 --- a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py +++ b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py @@ -24,6 +24,7 @@ import sys import warnings import paddle.utils.deprecated as deprecated from paddle import _C_ops + LOWEST_WARNING_POSTION = 3 ERROR_WARNING_POSTION = sys.maxsize @@ -81,7 +82,7 @@ class TestDeprecatedDocorator(unittest.TestCase): # expected expected = LOWEST_WARNING_POSTION - # captured + # captured captured = get_warning_index(fluid.data) paddle.disable_static() @@ -104,7 +105,7 @@ class TestDeprecatedDocorator(unittest.TestCase): # expected expected = LOWEST_WARNING_POSTION - # captured + # captured captured = get_warning_index(fluid.layers.elementwise_mul) # testting @@ -124,7 +125,7 @@ class TestDeprecatedDocorator(unittest.TestCase): # expected expected = LOWEST_WARNING_POSTION - # captured + # captured captured = get_warning_index(paddle.multiply) # testting @@ -145,7 +146,7 @@ class TestDeprecatedDocorator(unittest.TestCase): # expected expected = LOWEST_WARNING_POSTION - # captured + # captured captured = get_warning_index(fluid.layers.elementwise_mul) # testting @@ -175,8 +176,8 @@ class TestDeprecatedDocorator(unittest.TestCase): x = linear(data) with warnings.catch_warnings(record=True) as w: - out = paddle.nn.functional.softmax_with_cross_entropy( - logits=x, label=label) + out = paddle.nn.functional.softmax_with_cross_entropy(logits=x, + label=label) assert ( 'API "paddle.nn.functional.loss.softmax_with_cross_entropy" is ' 'deprecated since 2.0.0') in str(w[-1].message) diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py b/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py index c3a21ba0bcb..bd91e14e34d 100644 --- a/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py +++ b/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py @@ -18,6 +18,7 @@ from simple_nets import simple_fc_net class DeprecatedMemoryOptimizationInterfaceTest(unittest.TestCase): + def setUp(self): self.method = fluid.memory_optimize @@ -60,6 +61,7 @@ class DeprecatedMemoryOptimizationInterfaceTest(unittest.TestCase): class ReleaseMemoryTest(DeprecatedMemoryOptimizationInterfaceTest): + def setUp(self): self.method = fluid.release_memory diff --git a/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py b/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py index 696a60787b7..7750a41701e 100644 --- a/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py @@ -32,6 +32,7 @@ def dequantize_max_abs(x, scale, max_range): class TestDequantizeMaxAbsOp(OpTest): + def set_args(self): self.num_bits = 8 self.max_range = math.pow(2, self.num_bits - 1) - 1 @@ -56,6 +57,7 @@ class TestDequantizeMaxAbsOp(OpTest): class TestDequantizeMaxAbsOp5Bits(TestDequantizeMaxAbsOp): + def set_args(self): self.num_bits = 5 self.max_range = math.pow(2, self.num_bits - 1) - 1 @@ -63,6 +65,7 @@ class TestDequantizeMaxAbsOp5Bits(TestDequantizeMaxAbsOp): class TestDequantizeMaxAbsOpInt16(TestDequantizeMaxAbsOp): + def set_args(self): self.num_bits = 16 self.max_range = math.pow(2, self.num_bits - 1) - 1 diff --git a/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py b/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py index 3ad1f05f92d..8b7b5df656a 100644 --- a/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py +++ b/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py @@ -33,6 +33,7 @@ def dequantize_log(x, dict_data): class TestDequantizeLogOp(OpTest): + def setUp(self): self.op_type = "dequantize_log" x = np.random.randint(low=-128, high=127, size=(20, 10)).astype('int8') diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py index b63c4f55dbc..c82ba2bc8cb 100644 --- a/python/paddle/fluid/tests/unittests/test_desc_clone.py +++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py @@ -39,20 +39,18 @@ paddle.dataset.mnist.fetch() # random seed must set before configuring the network. # fluid.default_startup_program().random_seed = SEED def cnn_model(data): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=data, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=data, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") # TODO(dzhwinter) : refine the initializer and random seed settting SIZE = 10 @@ -66,8 +64,8 @@ def cnn_model(data): size=SIZE, act="softmax", param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale))) + initializer=fluid.initializer.NormalInitializer(loc=0.0, + scale=scale))) return predict @@ -83,19 +81,21 @@ def get_model(batch_size): # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size_tensor) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size_tensor) inference_program = fluid.default_main_program().clone() # Optimization - opt = fluid.optimizer.AdamOptimizer( - learning_rate=0.001, beta1=0.9, beta2=0.999) + opt = fluid.optimizer.AdamOptimizer(learning_rate=0.001, + beta1=0.9, + beta2=0.999) # Reader - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size) + test_reader = paddle.batch(paddle.dataset.mnist.test(), + batch_size=batch_size) opt.minimize(avg_cost) return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict @@ -156,8 +156,8 @@ def program_equal(a, b): elif k == 'blocks': for i in range(0, len(a.blocks)): if not block_equal(a.blocks[i], b.blocks[i]): - raise ValueError("In operator_equal not equal:{0}\n".format( - k)) + raise ValueError( + "In operator_equal not equal:{0}\n".format(k)) return False assert (len(a.blocks) == len(b.blocks)) elif k == '_auto_checkpoint_name': @@ -169,6 +169,7 @@ def program_equal(a, b): class TestCloneWithStopGradient(unittest.TestCase): + def test_clone_with_stop_gradient(self): train_program = fluid.Program() startup_program = fluid.Program() @@ -179,8 +180,7 @@ class TestCloneWithStopGradient(unittest.TestCase): hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5) loss = fluid.layers.cross_entropy( input=fluid.layers.fc(hidden2, size=10, act='softmax'), - label=fluid.layers.data( - name='label', shape=[1], dtype='int64')) + label=fluid.layers.data(name='label', shape=[1], dtype='int64')) avg_loss = fluid.layers.mean(loss) test_program = train_program.clone(for_test=False) @@ -191,6 +191,7 @@ class TestCloneWithStopGradient(unittest.TestCase): class TestCloneWithStopGradientInSubBlock(unittest.TestCase): + def test_clone_with_stop_gradient(self): train_program = fluid.Program() startup_program = fluid.Program() @@ -215,8 +216,7 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase): loss = fluid.layers.cross_entropy( input=fluid.layers.fc(hidden2, size=10, act='softmax'), - label=fluid.layers.data( - name='label', shape=[1], dtype='int64')) + label=fluid.layers.data(name='label', shape=[1], dtype='int64')) avg_loss = fluid.layers.mean(loss) test_program = train_program.clone(for_test=False) @@ -231,6 +231,7 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase): class TestCloneWithRaise(unittest.TestCase): + def test_clone_with_stop_gradient(self): train_program = fluid.Program() startup_program = fluid.Program() @@ -254,8 +255,7 @@ class TestCloneWithRaise(unittest.TestCase): hidden2 = fluid.layers.cond(cond, true_fn, false_fn) loss = fluid.layers.cross_entropy( input=fluid.layers.fc(hidden2, size=10, act='softmax'), - label=fluid.layers.data( - name='label', shape=[1], dtype='int64')) + label=fluid.layers.data(name='label', shape=[1], dtype='int64')) avg_loss = fluid.layers.mean(loss) test_program = train_program.clone(for_test=False) diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 8cf88027e37..9950aa65c01 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -25,9 +25,10 @@ import unittest class Test_Detach(unittest.TestCase): + def generate_Data(self): - data = np.array( - [[1, 8, 3, 9], [7, 20, 9, 6], [4, 6, 8, 10]]).astype('float32') + data = np.array([[1, 8, 3, 9], [7, 20, 9, 6], [4, 6, 8, + 10]]).astype('float32') return data def no_detach_multi(self): @@ -37,29 +38,26 @@ class Test_Detach(unittest.TestCase): initializer=fluid.initializer.Constant(5.0)) linear_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(6.0)) - linear = Linear( - 4, - 10, - param_attr=linear_w_param_attrs, - bias_attr=linear_b_param_attrs) + linear = Linear(4, + 10, + param_attr=linear_w_param_attrs, + bias_attr=linear_b_param_attrs) linear1_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(7.0)) linear1_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(8.0)) - linear1 = Linear( - 10, - 1, - param_attr=linear1_w_param_attrs, - bias_attr=linear1_b_param_attrs) + linear1 = Linear(10, + 1, + param_attr=linear1_w_param_attrs, + bias_attr=linear1_b_param_attrs) linear2_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(9.0)) linear2_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(10.0)) - linear2 = Linear( - 10, - 1, - param_attr=linear2_w_param_attrs, - bias_attr=linear2_b_param_attrs) + linear2 = Linear(10, + 1, + param_attr=linear2_w_param_attrs, + bias_attr=linear2_b_param_attrs) data = to_variable(data) x = linear(data) x1 = linear1(x) @@ -76,20 +74,18 @@ class Test_Detach(unittest.TestCase): initializer=fluid.initializer.Constant(5.0)) linear_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(6.0)) - linear = Linear( - 4, - 10, - param_attr=linear_w_param_attrs, - bias_attr=linear_b_param_attrs) + linear = Linear(4, + 10, + param_attr=linear_w_param_attrs, + bias_attr=linear_b_param_attrs) linear1_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(7.0)) linear1_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(8.0)) - linear1 = Linear( - 10, - 1, - param_attr=linear1_w_param_attrs, - bias_attr=linear1_b_param_attrs) + linear1 = Linear(10, + 1, + param_attr=linear1_w_param_attrs, + bias_attr=linear1_b_param_attrs) data = to_variable(data) x = linear(data) x1 = linear1(x) @@ -105,29 +101,26 @@ class Test_Detach(unittest.TestCase): initializer=fluid.initializer.Constant(5.0)) linear_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(6.0)) - linear = Linear( - 4, - 10, - param_attr=linear_w_param_attrs, - bias_attr=linear_b_param_attrs) + linear = Linear(4, + 10, + param_attr=linear_w_param_attrs, + bias_attr=linear_b_param_attrs) linear1_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(7.0)) linear1_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(8.0)) - linear1 = Linear( - 10, - 1, - param_attr=linear1_w_param_attrs, - bias_attr=linear1_b_param_attrs) + linear1 = Linear(10, + 1, + param_attr=linear1_w_param_attrs, + bias_attr=linear1_b_param_attrs) linear2_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(9.0)) linear2_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(10.0)) - linear2 = Linear( - 10, - 1, - param_attr=linear2_w_param_attrs, - bias_attr=linear2_b_param_attrs) + linear2 = Linear(10, + 1, + param_attr=linear2_w_param_attrs, + bias_attr=linear2_b_param_attrs) data = to_variable(data) x = linear(data) x_detach = x.detach() @@ -153,6 +146,7 @@ class Test_Detach(unittest.TestCase): class TestInplace(unittest.TestCase): + def test_forward_version(self): with paddle.fluid.dygraph.guard(): var = paddle.to_tensor(np.ones((4, 2, 3)).astype(np.float32)) diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py index 93ab4a73906..c545484bff3 100644 --- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py +++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py @@ -25,6 +25,7 @@ from op_test import OpTest class TestDetectionMAPOp(OpTest): + def set_data(self): self.class_num = 4 self.init_test_case() @@ -34,8 +35,8 @@ class TestDetectionMAPOp(OpTest): self.mAP = np.array(self.mAP).astype('float32') if len(self.class_pos_count) > 0: - self.class_pos_count = np.array(self.class_pos_count).astype( - 'int32') + self.class_pos_count = np.array( + self.class_pos_count).astype('int32') self.true_pos = np.array(self.true_pos).astype('float32') self.false_pos = np.array(self.false_pos).astype('float32') self.has_state = np.array([1]).astype('int32') @@ -61,8 +62,8 @@ class TestDetectionMAPOp(OpTest): 'class_num': self.class_num } - self.out_class_pos_count = np.array(self.out_class_pos_count).astype( - 'int') + self.out_class_pos_count = np.array( + self.out_class_pos_count).astype('int') self.out_true_pos = np.array(self.out_true_pos).astype('float32') self.out_false_pos = np.array(self.out_false_pos).astype('float32') @@ -85,12 +86,13 @@ class TestDetectionMAPOp(OpTest): # label score xmin ymin xmax ymax difficult self.detect_lod = [[3, 4]] - self.detect = [ - [1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3], - [1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4], - [2, 0.1, 0.4, 0.3, 0.7, 0.5], [1, 0.2, 0.8, 0.1, 1.0, 0.3], - [3, 0.2, 0.8, 0.1, 1.0, 0.3] - ] + self.detect = [[1, 0.3, 0.1, 0.0, 0.4, + 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3], + [1, 0.9, 0.7, 0.6, 0.8, + 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4], + [2, 0.1, 0.4, 0.3, 0.7, + 0.5], [1, 0.2, 0.8, 0.1, 1.0, 0.3], + [3, 0.2, 0.8, 0.1, 1.0, 0.3]] # label score true_pos false_pos self.tf_pos_lod = [[3, 4]] @@ -247,6 +249,7 @@ class TestDetectionMAPOp(OpTest): class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp): + def init_test_case(self): super(TestDetectionMAPOpSkipDiff, self).init_test_case() @@ -259,6 +262,7 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp): class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp): + def init_test_case(self): super(TestDetectionMAPOpWithoutDiff, self).init_test_case() @@ -268,6 +272,7 @@ class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp): class TestDetectionMAPOp11Point(TestDetectionMAPOp): + def init_test_case(self): super(TestDetectionMAPOp11Point, self).init_test_case() @@ -275,6 +280,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp): class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp): + def init_test_case(self): super(TestDetectionMAPOpMultiBatch, self).init_test_case() self.class_pos_count = [0, 2, 1, 0] @@ -285,6 +291,7 @@ class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp): class TestDetectionMAPOp11PointWithClassNoTP(TestDetectionMAPOp): + def init_test_case(self): self.overlap_threshold = 0.3 self.evaluate_difficult = True diff --git a/python/paddle/fluid/tests/unittests/test_determinant_op.py b/python/paddle/fluid/tests/unittests/test_determinant_op.py index d447d213f3c..7a799ad3776 100644 --- a/python/paddle/fluid/tests/unittests/test_determinant_op.py +++ b/python/paddle/fluid/tests/unittests/test_determinant_op.py @@ -28,6 +28,7 @@ paddle.enable_static() class TestDeterminantOp(OpTest): + def setUp(self): self.python_api = paddle.linalg.det self.init_data() @@ -48,6 +49,7 @@ class TestDeterminantOp(OpTest): class TestDeterminantOpCase1(TestDeterminantOp): + def init_data(self): np.random.seed(0) self.case = np.random.rand(10, 10).astype('float32') @@ -56,6 +58,7 @@ class TestDeterminantOpCase1(TestDeterminantOp): class TestDeterminantOpCase2(TestDeterminantOp): + def init_data(self): np.random.seed(0) # not invertible matrix @@ -65,6 +68,7 @@ class TestDeterminantOpCase2(TestDeterminantOp): class TestDeterminantAPI(unittest.TestCase): + def setUp(self): np.random.seed(0) self.shape = [3, 3, 5, 5] @@ -97,6 +101,7 @@ class TestDeterminantAPI(unittest.TestCase): class TestSlogDeterminantOp(OpTest): + def setUp(self): self.op_type = "slogdeterminant" self.init_data() @@ -117,6 +122,7 @@ class TestSlogDeterminantOp(OpTest): class TestSlogDeterminantOpCase1(TestSlogDeterminantOp): + def init_data(self): np.random.seed(0) self.case = np.random.rand(2, 2, 5, 5).astype(np.float32) @@ -125,6 +131,7 @@ class TestSlogDeterminantOpCase1(TestSlogDeterminantOp): class TestSlogDeterminantAPI(unittest.TestCase): + def setUp(self): np.random.seed(0) self.shape = [3, 3, 5, 5] diff --git a/python/paddle/fluid/tests/unittests/test_device.py b/python/paddle/fluid/tests/unittests/test_device.py index fc3734c7874..eff2bf490df 100644 --- a/python/paddle/fluid/tests/unittests/test_device.py +++ b/python/paddle/fluid/tests/unittests/test_device.py @@ -23,6 +23,7 @@ import paddle.fluid.framework as framework class TestStaticDeviceManage(unittest.TestCase): + def _test_device(self, device_name, device_class): paddle.set_device(device_name) @@ -55,6 +56,7 @@ class TestStaticDeviceManage(unittest.TestCase): class TestImperativeDeviceManage(unittest.TestCase): + def test_cpu(self): with fluid.dygraph.guard(): paddle.set_device('cpu') diff --git a/python/paddle/fluid/tests/unittests/test_device_guard.py b/python/paddle/fluid/tests/unittests/test_device_guard.py index e547c786feb..911c6c4a2d5 100644 --- a/python/paddle/fluid/tests/unittests/test_device_guard.py +++ b/python/paddle/fluid/tests/unittests/test_device_guard.py @@ -43,14 +43,17 @@ def get_vaild_warning_num(warning, w): class TestDeviceGuard(unittest.TestCase): + def test_device_guard(self): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - data1 = paddle.full( - shape=[1, 3, 8, 8], fill_value=0.5, dtype='float32') - data2 = paddle.full( - shape=[1, 3, 5, 5], fill_value=0.5, dtype='float32') + data1 = paddle.full(shape=[1, 3, 8, 8], + fill_value=0.5, + dtype='float32') + data2 = paddle.full(shape=[1, 3, 5, 5], + fill_value=0.5, + dtype='float32') shape = paddle.shape(data2) with paddle.static.device_guard("cpu"): shape = paddle.slice(shape, axes=[0], starts=[0], ends=[4]) @@ -71,10 +74,12 @@ class TestDeviceGuard(unittest.TestCase): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - data1 = paddle.full( - shape=[1, 3, 8, 8], fill_value=0.5, dtype='float32') - data2 = paddle.full( - shape=[1, 3, 5, 5], fill_value=0.5, dtype='float32') + data1 = paddle.full(shape=[1, 3, 8, 8], + fill_value=0.5, + dtype='float32') + data2 = paddle.full(shape=[1, 3, 5, 5], + fill_value=0.5, + dtype='float32') shape = paddle.shape(data2) with paddle.static.device_guard("cpu"): shape = paddle.slice(shape, axes=[0], starts=[0], ends=[4]) @@ -95,13 +100,16 @@ class TestDeviceGuard(unittest.TestCase): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - x = paddle.full( - shape=[2, 255, 13, 13], fill_value=0.3, dtype='float32') - gt_box = paddle.full( - shape=[2, 6, 4], fill_value=0.5, dtype='float32') + x = paddle.full(shape=[2, 255, 13, 13], + fill_value=0.3, + dtype='float32') + gt_box = paddle.full(shape=[2, 6, 4], + fill_value=0.5, + dtype='float32') gt_label = paddle.full(shape=[2, 6], fill_value=1.0, dtype='int32') - gt_score = paddle.full( - shape=[2, 6], fill_value=0.5, dtype='float32') + gt_score = paddle.full(shape=[2, 6], + fill_value=0.5, + dtype='float32') anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326 @@ -109,16 +117,15 @@ class TestDeviceGuard(unittest.TestCase): anchor_mask = [0, 1, 2] with paddle.static.device_guard("gpu"): # yolov3_loss only has cpu kernel, so its cpu kernel will be executed - loss = fluid.layers.yolov3_loss( - x=x, - gt_box=gt_box, - gt_label=gt_label, - gt_score=gt_score, - anchors=anchors, - anchor_mask=anchor_mask, - class_num=80, - ignore_thresh=0.7, - downsample_ratio=32) + loss = fluid.layers.yolov3_loss(x=x, + gt_box=gt_box, + gt_label=gt_label, + gt_score=gt_score, + anchors=anchors, + anchor_mask=anchor_mask, + class_num=80, + ignore_thresh=0.7, + downsample_ratio=32) execute(main_program, startup_program) @@ -151,6 +158,7 @@ class TestDeviceGuard(unittest.TestCase): execute(main_program, startup_program) def test_error(self): + def device_attr(): with paddle.static.device_guard("cpu1"): out = paddle.full(shape=[1], fill_value=0.2, dtype='float32') @@ -167,10 +175,12 @@ class TestDeviceGuard(unittest.TestCase): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - data1 = paddle.static.data( - name="data_1", shape=[4, 2], dtype="float32") - label = paddle.static.data( - name="label", shape=[4, 1], dtype="int64") + data1 = paddle.static.data(name="data_1", + shape=[4, 2], + dtype="float32") + label = paddle.static.data(name="label", + shape=[4, 1], + dtype="int64") fc1 = paddle.static.nn.fc(x=data1, size=10) fc2 = paddle.static.nn.fc(x=fc1, size=10) with paddle.static.device_guard("gpu"): diff --git a/python/paddle/fluid/tests/unittests/test_dgc_momentum_op.py b/python/paddle/fluid/tests/unittests/test_dgc_momentum_op.py index 39558d95a6e..d827c500995 100644 --- a/python/paddle/fluid/tests/unittests/test_dgc_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_dgc_momentum_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestDGCMomentumOp1(unittest.TestCase): + def get_tensor(self, name, value, place=None): tensor = self.scope.var(name).get_tensor() tensor.set(value, self.place if place is None else place) @@ -49,14 +50,14 @@ class TestDGCMomentumOp1(unittest.TestCase): # get tensor self.param_name, self.param_tensor = self.get_tensor('Param', param) self.grad_name, self.grad_tensor = self.get_tensor('Grad', grad) - self.velocity_name, self.velocity_tensor = self.get_tensor('Velocity', - velocity) + self.velocity_name, self.velocity_tensor = self.get_tensor( + 'Velocity', velocity) self.learning_rate_name, self.learning_rate_tensor = self.get_tensor( 'LearningRate', learning_rate) self.current_step_name, self.current_step_tensor = self.get_tensor( 'current_step', current_step, core.CPUPlace()) - self.nranks_name, self.nranks_tensor = self.get_tensor('nranks', nranks, - core.CPUPlace()) + self.nranks_name, self.nranks_tensor = self.get_tensor( + 'nranks', nranks, core.CPUPlace()) self.kwargs = { # inputs @@ -95,10 +96,9 @@ class TestDGCMomentumOp1(unittest.TestCase): def check(self, actual_t, expect_t, place, out_name, atol=1e-5): self.assertTrue( - np.allclose( - actual_t, expect_t, atol=atol), - "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " - + str(expect_t) + "\n" + "But Got" + str(actual_t)) + np.allclose(actual_t, expect_t, atol=atol), + "Output (" + out_name + ") has diff at " + str(place) + + "\nExpect " + str(expect_t) + "\n" + "But Got" + str(actual_t)) def check_momentum_step(self, place): self.setup(place=place) @@ -106,13 +106,11 @@ class TestDGCMomentumOp1(unittest.TestCase): dgc_momentum_op = Operator(self.op_type, **self.kwargs) dgc_momentum_op.run(self.scope, self.place) - self.check( - np.array(self.param_tensor), self.outputs['ParamOut'], self.place, - self.param_name) + self.check(np.array(self.param_tensor), self.outputs['ParamOut'], + self.place, self.param_name) - self.check( - np.array(self.velocity_tensor), self.outputs['VelocityOut'], - self.place, self.velocity_name) + self.check(np.array(self.velocity_tensor), self.outputs['VelocityOut'], + self.place, self.velocity_name) def check_sgd_step(self, place): self.setup(place=place, step=15.0) @@ -120,9 +118,8 @@ class TestDGCMomentumOp1(unittest.TestCase): dgc_momentum_op = Operator(self.op_type, **self.kwargs) dgc_momentum_op.run(self.scope, self.place) - self.check( - np.array(self.param_tensor), self.outputs['SGDOut'], self.place, - self.param_name) + self.check(np.array(self.param_tensor), self.outputs['SGDOut'], + self.place, self.param_name) def test_cuda_place(self): if not core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_dgc_op.py b/python/paddle/fluid/tests/unittests/test_dgc_op.py index 634fd64bc72..0ab710b8cbb 100644 --- a/python/paddle/fluid/tests/unittests/test_dgc_op.py +++ b/python/paddle/fluid/tests/unittests/test_dgc_op.py @@ -25,6 +25,7 @@ g_array_size = 102400 class TestDGCOp(unittest.TestCase): + def setup(self, place, array_size=g_array_size): size = array_size np.random.seed(5) # fix seed @@ -59,7 +60,7 @@ class TestDGCOp(unittest.TestCase): self.k = np.full((1), 0.0).astype("float32") self.gather_buff_name = "GatherBuff" - # scope data + # scope data self.u_tensor = self.scope.var(self.u_name).get_tensor() self.u_tensor.set(self.u, place) @@ -90,10 +91,9 @@ class TestDGCOp(unittest.TestCase): def check(self, actual_t, expect_t, place, out_name, atol=1e-5): self.assertTrue( - np.allclose( - actual_t, expect_t, atol=atol), - "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " - + str(expect_t) + "\n" + "But Got" + str(actual_t)) + np.allclose(actual_t, expect_t, atol=atol), + "Output (" + out_name + ") has diff at " + str(place) + + "\nExpect " + str(expect_t) + "\n" + "But Got" + str(actual_t)) def test_run_and_check(self): self.setup(place=core.CUDAPlace(0)) diff --git a/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py b/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py index f3878dfa2bc..06488c8f59b 100644 --- a/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py @@ -23,11 +23,14 @@ import paddle.fluid.regularizer as regularizer import paddle.fluid.clip as clip import paddle.compat as cpt from paddle.fluid.backward import append_backward + paddle.enable_static() class TestDGCMomentumOptimizer(unittest.TestCase): + class MockDGCMomentum(optimizer.DGCMomentumOptimizer): + def get_accumulators(self): return self._accumulators @@ -50,22 +53,21 @@ class TestDGCMomentumOptimizer(unittest.TestCase): optimize_attr={'learning_rate': 1.1}, regularizer=None if regularization is not None else regularizer.L2DecayRegularizer(2e-4)) - mul_y = block.create_var( - dtype="float32", - shape=[dims[1], dims[2]], - lod_level=0, - name="mul.y") - mul_out = block.create_var( - dtype="float32", - shape=[dims[0], dims[2]], - lod_level=0, - name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) + mul_y = block.create_var(dtype="float32", + shape=[dims[1], dims[2]], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[dims[0], dims[2]], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) learning_rate = 0.01 dgc_momentum_optimizer = self.MockDGCMomentum( @@ -83,10 +85,13 @@ class TestDGCMomentumOptimizer(unittest.TestCase): dgc_momentum_optimizer.get_accumulators = dgc_momentum_optimizer._optimizer.get_accumulators dgc_momentum_optimizer.get_velocity_str = dgc_momentum_optimizer._optimizer.get_velocity_str - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) # params_grads = append_backward(mean_out) params_grads = dgc_momentum_optimizer.backward( mean_out, startup_program=init_program) @@ -96,8 +101,8 @@ class TestDGCMomentumOptimizer(unittest.TestCase): accumulator_count = 1 if name == "momentum" else 2 self.assertEqual(len(params_grads), 1) - self.assertEqual( - len(dgc_momentum_optimizer.get_accumulators()), accumulator_count) + self.assertEqual(len(dgc_momentum_optimizer.get_accumulators()), + accumulator_count) self.assertEqual(len(opts), 2) sgd_op = opts[-1] @@ -152,8 +157,8 @@ class TestDGCMomentumOptimizer(unittest.TestCase): regularization=regularizer.L2Decay(1e-4)) # check param.regularizer in dgc - self.check_dgc_momentum_optimizer( - dims=[16, 1024, 8], name="dgc_momentum") + self.check_dgc_momentum_optimizer(dims=[16, 1024, 8], + name="dgc_momentum") def test_momentum_with_dgc_recompute(self): # 16 * 1024 = 16384, use dgc momentum diff --git a/python/paddle/fluid/tests/unittests/test_diag.py b/python/paddle/fluid/tests/unittests/test_diag.py index 29f5a90726d..507f98a613c 100644 --- a/python/paddle/fluid/tests/unittests/test_diag.py +++ b/python/paddle/fluid/tests/unittests/test_diag.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class TestDiagOp(OpTest): + def setUp(self): self.op_type = "diag" self.init_config() @@ -40,11 +41,13 @@ class TestDiagOp(OpTest): class TestDiagOpCase1(TestDiagOp): + def init_config(self): self.case = np.array([3], dtype='int32') class TestDiagError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): diff --git a/python/paddle/fluid/tests/unittests/test_diag_embed.py b/python/paddle/fluid/tests/unittests/test_diag_embed.py index 9df8fc7d575..c7f933d23ea 100644 --- a/python/paddle/fluid/tests/unittests/test_diag_embed.py +++ b/python/paddle/fluid/tests/unittests/test_diag_embed.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core class TestDiagEmbedOp(OpTest): + def setUp(self): self.op_type = "diag_embed" self.init_config() @@ -40,6 +41,7 @@ class TestDiagEmbedOp(OpTest): class TestDiagEmbedOpCase1(TestDiagEmbedOp): + def init_config(self): self.case = np.random.randn(2, 3).astype('float32') self.inputs = {'Input': self.case} @@ -49,6 +51,7 @@ class TestDiagEmbedOpCase1(TestDiagEmbedOp): class TestDiagEmbedAPICase(unittest.TestCase): + def test_case1(self): diag_embed = np.random.randn(2, 3, 4).astype('float32') data1 = fluid.data(name='data1', shape=[2, 3, 4], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_diag_v2.py b/python/paddle/fluid/tests/unittests/test_diag_v2.py index 4047ccb8782..aaae8e65730 100644 --- a/python/paddle/fluid/tests/unittests/test_diag_v2.py +++ b/python/paddle/fluid/tests/unittests/test_diag_v2.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestDiagV2Op(OpTest): + def setUp(self): self.op_type = "diag_v2" self.python_api = paddle.diag @@ -54,24 +55,28 @@ class TestDiagV2Op(OpTest): class TestDiagV2OpCase1(TestDiagV2Op): + def init_config(self): self.offset = 1 self.out = np.diag(self.x, self.offset) class TestDiagV2OpCase2(TestDiagV2Op): + def init_config(self): self.offset = -1 self.out = np.diag(self.x, self.offset) class TestDiagV2OpCase3(TestDiagV2Op): + def init_config(self): self.x = np.random.randint(-10, 10, size=(10, 10)).astype("float64") self.out = np.diag(self.x, self.offset) class TestDiagV2OpCase4(TestDiagV2Op): + def init_config(self): self.x = np.random.rand(100) self.padding_value = 2 @@ -81,6 +86,7 @@ class TestDiagV2OpCase4(TestDiagV2Op): class TestDiagV2Error(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -101,6 +107,7 @@ class TestDiagV2Error(unittest.TestCase): class TestDiagV2API(unittest.TestCase): + def setUp(self): self.input_np = np.random.random(size=(10, 10)).astype(np.float32) self.expected0 = np.diag(self.input_np) @@ -190,11 +197,13 @@ class TestDiagV2API(unittest.TestCase): x = paddle.static.data(name='input', shape=[10, 10], dtype='float32') x2 = paddle.static.data(name='input2', shape=[100], dtype='float64') x3 = paddle.static.data(name='input3', shape=[100], dtype='int64') - x4 = paddle.static.data( - name='input4', shape=[2000, 2000], dtype='float32') + x4 = paddle.static.data(name='input4', + shape=[2000, 2000], + dtype='float32') x5 = paddle.static.data(name='input5', shape=[2000], dtype='float32') - x6 = paddle.static.data( - name='input6', shape=[2000, 1500], dtype='float32') + x6 = paddle.static.data(name='input6', + shape=[2000, 1500], + dtype='float32') result0 = paddle.diag(x) result1 = paddle.diag(x, offset=1) result2 = paddle.diag(x, offset=-1) diff --git a/python/paddle/fluid/tests/unittests/test_diagflat.py b/python/paddle/fluid/tests/unittests/test_diagflat.py index ec74855ba25..98f8c3d434f 100644 --- a/python/paddle/fluid/tests/unittests/test_diagflat.py +++ b/python/paddle/fluid/tests/unittests/test_diagflat.py @@ -21,6 +21,7 @@ from paddle.static import Program, program_guard class TestDiagFlatError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -36,6 +37,7 @@ class TestDiagFlatError(unittest.TestCase): class TestDiagFlatAPI(unittest.TestCase): + def setUp(self): self.input_np = np.random.random(size=(10, 10)).astype(np.float64) self.expected0 = np.diagflat(self.input_np) @@ -77,10 +79,11 @@ class TestDiagFlatAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) - res0, res3 = exe.run( - feed={"input": self.input_np, - 'input2': self.input_np2}, - fetch_list=[result0, result3]) + res0, res3 = exe.run(feed={ + "input": self.input_np, + 'input2': self.input_np2 + }, + fetch_list=[result0, result3]) self.assertTrue(np.allclose(res0, self.expected0)) self.assertTrue(np.allclose(res3, self.expected3)) diff --git a/python/paddle/fluid/tests/unittests/test_diagonal_op.py b/python/paddle/fluid/tests/unittests/test_diagonal_op.py index 7db5fcb9625..b5600f21b78 100644 --- a/python/paddle/fluid/tests/unittests/test_diagonal_op.py +++ b/python/paddle/fluid/tests/unittests/test_diagonal_op.py @@ -28,6 +28,7 @@ paddle.enable_static() class TestDiagonalOp(OpTest): + def setUp(self): self.op_type = "diagonal" self.python_api = paddle.diagonal @@ -44,63 +45,62 @@ class TestDiagonalOp(OpTest): self.case = np.random.randn(10, 5, 2).astype('float64') self.inputs = {'Input': self.case} self.attrs = {'offset': 0, 'axis1': 0, 'axis2': 1} - self.target = np.diagonal( - self.inputs['Input'], - offset=self.attrs['offset'], - axis1=self.attrs['axis1'], - axis2=self.attrs['axis2']) + self.target = np.diagonal(self.inputs['Input'], + offset=self.attrs['offset'], + axis1=self.attrs['axis1'], + axis2=self.attrs['axis2']) class TestDiagonalOpCase1(TestDiagonalOp): + def init_config(self): self.case = np.random.randn(4, 2, 4, 4).astype('float32') self.inputs = {'Input': self.case} self.attrs = {'offset': -2, 'axis1': 3, 'axis2': 0} - self.target = np.diagonal( - self.inputs['Input'], - offset=self.attrs['offset'], - axis1=self.attrs['axis1'], - axis2=self.attrs['axis2']) + self.target = np.diagonal(self.inputs['Input'], + offset=self.attrs['offset'], + axis1=self.attrs['axis1'], + axis2=self.attrs['axis2']) class TestDiagonalOpCase2(TestDiagonalOp): + def init_config(self): self.case = np.random.randn(100, 100).astype('int64') self.inputs = {'Input': self.case} self.attrs = {'offset': 0, 'axis1': 0, 'axis2': 1} - self.target = np.diagonal( - self.inputs['Input'], - offset=self.attrs['offset'], - axis1=self.attrs['axis1'], - axis2=self.attrs['axis2']) + self.target = np.diagonal(self.inputs['Input'], + offset=self.attrs['offset'], + axis1=self.attrs['axis1'], + axis2=self.attrs['axis2']) self.grad_x = np.eye(100).astype('int64') self.grad_out = np.ones(100).astype('int64') def test_check_grad(self): - self.check_grad( - ['Input'], - 'Out', - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['Input'], + 'Out', + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) class TestDiagonalOpCase3(TestDiagonalOp): + def init_config(self): self.case = np.random.randint(0, 2, (4, 2, 4, 4)).astype('bool') self.inputs = {'Input': self.case} self.attrs = {'offset': -2, 'axis1': 3, 'axis2': 0} - self.target = np.diagonal( - self.inputs['Input'], - offset=self.attrs['offset'], - axis1=self.attrs['axis1'], - axis2=self.attrs['axis2']) + self.target = np.diagonal(self.inputs['Input'], + offset=self.attrs['offset'], + axis1=self.attrs['axis1'], + axis2=self.attrs['axis2']) def test_check_grad(self): pass class TestDiagonalAPI(unittest.TestCase): + def setUp(self): self.shape = [10, 3, 4] self.x = np.random.random((10, 3, 4)).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_diff_op.py b/python/paddle/fluid/tests/unittests/test_diff_op.py index b4359754520..dad8bcd70c1 100644 --- a/python/paddle/fluid/tests/unittests/test_diff_op.py +++ b/python/paddle/fluid/tests/unittests/test_diff_op.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestDiffOp(unittest.TestCase): + def set_args(self): self.input = np.array([1, 4, 5, 2]).astype('float32') self.n = 1 @@ -32,18 +33,21 @@ class TestDiffOp(unittest.TestCase): def get_output(self): if self.prepend is not None and self.append is not None: - self.output = np.diff( - self.input, - n=self.n, - axis=self.axis, - prepend=self.prepend, - append=self.append) + self.output = np.diff(self.input, + n=self.n, + axis=self.axis, + prepend=self.prepend, + append=self.append) elif self.prepend is not None: - self.output = np.diff( - self.input, n=self.n, axis=self.axis, prepend=self.prepend) + self.output = np.diff(self.input, + n=self.n, + axis=self.axis, + prepend=self.prepend) elif self.append is not None: - self.output = np.diff( - self.input, n=self.n, axis=self.axis, append=self.append) + self.output = np.diff(self.input, + n=self.n, + axis=self.axis, + append=self.append) else: self.output = np.diff(self.input, n=self.n, axis=self.axis) @@ -62,12 +66,11 @@ class TestDiffOp(unittest.TestCase): self.prepend = paddle.to_tensor(self.prepend, place=place) if self.append is not None: self.append = paddle.to_tensor(self.append, place=place) - out = paddle.diff( - x, - n=self.n, - axis=self.axis, - prepend=self.prepend, - append=self.append) + out = paddle.diff(x, + n=self.n, + axis=self.axis, + prepend=self.prepend, + append=self.append) self.assertTrue((out.numpy() == self.output).all(), True) def test_dygraph(self): @@ -84,29 +87,29 @@ class TestDiffOp(unittest.TestCase): places.append(fluid.CUDAPlace(0)) for place in places: with fluid.program_guard(fluid.Program(), fluid.Program()): - x = paddle.fluid.data( - name="input", - shape=self.input.shape, - dtype=self.input.dtype) + x = paddle.fluid.data(name="input", + shape=self.input.shape, + dtype=self.input.dtype) has_pend = False prepend = None append = None if self.prepend is not None: has_pend = True - prepend = paddle.fluid.data( - name="prepend", - shape=self.prepend.shape, - dtype=self.prepend.dtype) + prepend = paddle.fluid.data(name="prepend", + shape=self.prepend.shape, + dtype=self.prepend.dtype) if self.append is not None: has_pend = True - append = paddle.fluid.data( - name="append", - shape=self.append.shape, - dtype=self.append.dtype) + append = paddle.fluid.data(name="append", + shape=self.append.shape, + dtype=self.append.dtype) exe = fluid.Executor(place) - out = paddle.diff( - x, n=self.n, axis=self.axis, prepend=prepend, append=append) + out = paddle.diff(x, + n=self.n, + axis=self.axis, + prepend=prepend, + append=append) fetches = exe.run(fluid.default_main_program(), feed={ "input": self.input, @@ -123,12 +126,11 @@ class TestDiffOp(unittest.TestCase): self.prepend = paddle.to_tensor(self.prepend, place=place) if self.append is not None: self.append = paddle.to_tensor(self.append, place=place) - out = paddle.diff( - x, - n=self.n, - axis=self.axis, - prepend=self.prepend, - append=self.append) + out = paddle.diff(x, + n=self.n, + axis=self.axis, + prepend=self.prepend, + append=self.append) try: out.backward() x_grad = x.grad @@ -144,6 +146,7 @@ class TestDiffOp(unittest.TestCase): class TestDiffOpAxis(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 @@ -153,6 +156,7 @@ class TestDiffOpAxis(TestDiffOp): class TestDiffOpNDim(TestDiffOp): + def set_args(self): self.input = np.random.rand(10, 10).astype('float32') self.n = 1 @@ -162,6 +166,7 @@ class TestDiffOpNDim(TestDiffOp): class TestDiffOpBool(TestDiffOp): + def set_args(self): self.input = np.array([0, 1, 1, 0, 1, 0]).astype('bool') self.n = 1 @@ -171,6 +176,7 @@ class TestDiffOpBool(TestDiffOp): class TestDiffOpPrepend(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 @@ -180,16 +186,18 @@ class TestDiffOpPrepend(TestDiffOp): class TestDiffOpPrependAxis(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 self.axis = 0 - self.prepend = np.array( - [[0, 2, 3, 4], [1, 3, 5, 7], [2, 5, 8, 0]]).astype('float32') + self.prepend = np.array([[0, 2, 3, 4], [1, 3, 5, 7], + [2, 5, 8, 0]]).astype('float32') self.append = None class TestDiffOpAppend(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 @@ -199,6 +207,7 @@ class TestDiffOpAppend(TestDiffOp): class TestDiffOpAppendAxis(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 @@ -208,6 +217,7 @@ class TestDiffOpAppendAxis(TestDiffOp): class TestDiffOpPreAppend(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 @@ -217,6 +227,7 @@ class TestDiffOpPreAppend(TestDiffOp): class TestDiffOpPreAppendAxis(TestDiffOp): + def set_args(self): self.input = np.array([[1, 4, 5, 2], [1, 5, 4, 2]]).astype('float32') self.n = 1 diff --git a/python/paddle/fluid/tests/unittests/test_digamma_op.py b/python/paddle/fluid/tests/unittests/test_digamma_op.py index 4897becf611..27ba710a96d 100644 --- a/python/paddle/fluid/tests/unittests/test_digamma_op.py +++ b/python/paddle/fluid/tests/unittests/test_digamma_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestDigammaOp(OpTest): + def setUp(self): # switch to static paddle.enable_static() @@ -49,6 +50,7 @@ class TestDigammaOp(OpTest): class TestDigammaOpFp32(TestDigammaOp): + def init_dtype_type(self): self.dtype = np.float32 @@ -57,6 +59,7 @@ class TestDigammaOpFp32(TestDigammaOp): class TestDigammaAPI(unittest.TestCase): + def setUp(self): # switch to static paddle.enable_static() @@ -68,6 +71,7 @@ class TestDigammaAPI(unittest.TestCase): self._shape = [8, 3, 32, 32] def test_in_static_mode(self): + def init_input_output(dtype): input = np.random.random(self._shape).astype(dtype) return {'x': input}, psi(input) @@ -82,8 +86,7 @@ class TestDigammaAPI(unittest.TestCase): exe = static.Executor(place) out_value = exe.run(feed=input_dict, fetch_list=[out.name]) self.assertEqual( - np.allclose( - out_value[0], sc_res, rtol=1e-5), True) + np.allclose(out_value[0], sc_res, rtol=1e-5), True) def test_in_dynamic_mode(self): for dtype in self.dtypes: diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 2ec16a9dcab..727fcb28cc2 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -24,6 +24,7 @@ import paddle class TestDirectory(unittest.TestCase): + def get_import_command(self, module): paths = module.split('.') if len(paths) == 1: @@ -86,10 +87,9 @@ class TestDirectory(unittest.TestCase): _python = sys.executable ps_cmd = "{} {}".format(_python, import_file) - ps_proc = subprocess.Popen( - ps_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps_proc = subprocess.Popen(ps_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) stdout, stderr = ps_proc.communicate() self.assertFalse("Error" in str(stderr), @@ -169,10 +169,9 @@ if count != {len_old_directory}: _python = sys.executable ps_cmd = "{} {}".format(_python, import_file) - ps_proc = subprocess.Popen( - ps_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + ps_proc = subprocess.Popen(ps_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) stdout, stderr = ps_proc.communicate() self.assertFalse("Error" in str(stdout), bytes.decode(stdout)) diff --git a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py index dbe9dcb7f82..655c2fbfb79 100644 --- a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py +++ b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py @@ -27,15 +27,15 @@ SignalsToTest = { class TestSignOpError(unittest.TestCase): + def test_errors(self): try: for sig in SignalsToTest: - output = subprocess.check_output( - [ - "python", "-c", - f"import paddle; import signal,os; paddle.disable_signal_handler(); os.kill(os.getpid(), {sig})" - ], - stderr=subprocess.STDOUT) + output = subprocess.check_output([ + "python", "-c", + f"import paddle; import signal,os; paddle.disable_signal_handler(); os.kill(os.getpid(), {sig})" + ], + stderr=subprocess.STDOUT) except Exception as e: # If paddle signal handler is enabled # One would expect "paddle::framework::SignalHandle" in STDERR diff --git a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py index 2adf6e41931..3c3a5b047f3 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 4f21b3220a9..3e03634987a 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -59,6 +59,7 @@ def eprint(*args, **kwargs): class TestDistRunnerBase(object): + def get_model(self, batch_size=DEFAULT_BATCH_SIZE, lr=0.1, @@ -88,13 +89,12 @@ class TestDistRunnerBase(object): config.nccl_comm_num = nccl_comm_num # config.runtime_split_send_recv = True t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id=trainer_id, - program=main_program, - pservers=pserver_endpoints, - trainers=trainers, - sync_mode=sync_mode, - current_endpoint=current_endpoint) + t.transpile(trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers, + sync_mode=sync_mode, + current_endpoint=current_endpoint) return t @staticmethod @@ -111,14 +111,13 @@ class TestDistRunnerBase(object): self.get_model(batch_size=args.batch_size) # NOTE: pserver should not call memory optimize - t = self.get_transpiler( - trainer_id=args.trainer_id, - main_program=fluid.default_main_program(), - pserver_endpoints=args.endpoints, - trainers=args.trainers, - sync_mode=args.sync_mode, - dc_asgd=args.dc_asgd, - hogwild_mode=args.hogwild) + t = self.get_transpiler(trainer_id=args.trainer_id, + main_program=fluid.default_main_program(), + pserver_endpoints=args.endpoints, + trainers=args.trainers, + sync_mode=args.sync_mode, + dc_asgd=args.dc_asgd, + hogwild_mode=args.hogwild) pserver_prog = t.get_pserver_program(args.current_endpoint) startup_prog = t.get_startup_program(args.current_endpoint, pserver_prog) @@ -195,8 +194,8 @@ class TestDistRunnerBase(object): eprint(type(self).__name__, "run worker startup program done.") feed_var_list = [ - var - for var in fluid.default_main_program().global_block().vars.values() + var for var in + fluid.default_main_program().global_block().vars.values() if var.is_data ] @@ -366,14 +365,13 @@ class TestDistRunnerBase(object): print_to_err( type(self).__name__, "begin to run transpile on trainer with pserver mode") - t = self.get_transpiler( - trainer_id=args.trainer_id, - main_program=fluid.default_main_program(), - pserver_endpoints=args.endpoints, - trainers=args.trainers, - sync_mode=args.sync_mode, - dc_asgd=args.dc_asgd, - hogwild_mode=args.hogwild) + t = self.get_transpiler(trainer_id=args.trainer_id, + main_program=fluid.default_main_program(), + pserver_endpoints=args.endpoints, + trainers=args.trainers, + sync_mode=args.sync_mode, + dc_asgd=args.dc_asgd, + hogwild_mode=args.hogwild) trainer_prog = t.get_trainer_program() print_to_err( @@ -391,12 +389,11 @@ class TestDistRunnerBase(object): type(self).__name__, "begin to run transpile on trainer with nccl2 mode") nccl2_t = fluid.DistributeTranspiler(config=config) - nccl2_t.transpile( - args.trainer_id, - program=fluid.default_main_program(), - startup_program=fluid.default_startup_program(), - trainers=args.endpoints, - current_endpoint=args.current_endpoint) + nccl2_t.transpile(args.trainer_id, + program=fluid.default_main_program(), + startup_program=fluid.default_startup_program(), + trainers=args.endpoints, + current_endpoint=args.current_endpoint) print_to_err( type(self).__name__, "get trainer program done. with nccl2 mode") @@ -502,6 +499,7 @@ class TestDistRunnerBase(object): class TestParallelDyGraphRunnerBase(object): + def get_model(self): raise NotImplementedError( "get_model should be implemented by child classes.") @@ -517,9 +515,9 @@ class TestParallelDyGraphRunnerBase(object): elif args.update_method != "local": new_batch = [] - # NOTE(@xiongkun03) args.diff_batch means batch length is different: - # such as : batch = [2,3,4,5], then the first rank will get [2] and - # the second rank will get [3,4,5]. + # NOTE(@xiongkun03) args.diff_batch means batch length is different: + # such as : batch = [2,3,4,5], then the first rank will get [2] and + # the second rank will get [3,4,5]. # this function is for test sparse_embedding_differ_length if hasattr(args, "diff_batch") and args.diff_batch: assert len( @@ -700,17 +698,18 @@ class TestParallelDyGraphRunnerBase(object): def runtime_main(test_class): parser = argparse.ArgumentParser(description='Run dist test.') - parser.add_argument( - '--role', type=str, required=True, choices=['pserver', 'trainer']) + parser.add_argument('--role', + type=str, + required=True, + choices=['pserver', 'trainer']) parser.add_argument('--endpoints', type=str, required=False, default="") - parser.add_argument( - '--update_method', - type=str, - default="local", - choices=[ - "pserver", "nccl2", "bkcl", "local", "nccl2_reduce_layer", "gloo", - "hccl" - ]) + parser.add_argument('--update_method', + type=str, + default="local", + choices=[ + "pserver", "nccl2", "bkcl", "local", + "nccl2_reduce_layer", "gloo", "hccl" + ]) parser.add_argument('--trainer_id', type=int, required=False, default=0) parser.add_argument('--trainers', type=int, required=False, default=1) parser.add_argument('--nccl_comm_num', type=int, required=False, default=1) @@ -722,10 +721,14 @@ def runtime_main(test_class): parser.add_argument('--use_local_sgd', action='store_true') parser.add_argument('--diff_batch', action='store_true') parser.add_argument('--ut4grad_allreduce', action='store_true') - parser.add_argument( - '--hallreduce_inter_nranks', type=int, required=False, default=2) - parser.add_argument( - '--current_endpoint', type=str, required=False, default="") + parser.add_argument('--hallreduce_inter_nranks', + type=int, + required=False, + default=2) + parser.add_argument('--current_endpoint', + type=str, + required=False, + default="") parser.add_argument('--sync_mode', action='store_true') parser.add_argument('--use_cuda', action='store_true') parser.add_argument('--use_cpu', action='store_true') @@ -738,23 +741,24 @@ def runtime_main(test_class): parser.add_argument('--dc_asgd', action='store_true') parser.add_argument('--hogwild', action='store_true') parser.add_argument('--save_model', action='store_true') - parser.add_argument( - '--use_reader_alloc', action='store_true', required=False) + parser.add_argument('--use_reader_alloc', + action='store_true', + required=False) parser.add_argument('--batch_size', required=False, type=int, default=2) parser.add_argument('--lr', required=False, type=float, default=0.001) - parser.add_argument( - '--batch_merge_repeat', required=False, type=int, default=1) - parser.add_argument( - '--nccl2_reduce_layer_local_run', - required=False, - type=bool, - default=False) + parser.add_argument('--batch_merge_repeat', + required=False, + type=int, + default=1) + parser.add_argument('--nccl2_reduce_layer_local_run', + required=False, + type=bool, + default=False) parser.add_argument('--sync_batch_norm', action='store_true') - parser.add_argument( - '--fuse_all_reduce', - required=False, - type=ast.literal_eval, - default=None) + parser.add_argument('--fuse_all_reduce', + required=False, + type=ast.literal_eval, + default=None) args = parser.parse_args() @@ -780,6 +784,7 @@ from contextlib import closing class TestDistBase(unittest.TestCase): + def _setup_config(self): raise NotImplementedError("tests should have _setup_config implemented") @@ -868,6 +873,7 @@ class TestDistBase(unittest.TestCase): self._after_setup_config() def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -913,17 +919,15 @@ class TestDistBase(unittest.TestCase): ps1_pipe = open(log_name + "_ps1_err.log", "wb") print_to_err(type(self).__name__, "going to start pserver process 0") - ps0_proc = subprocess.Popen( - ps0_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=ps0_pipe, - env=required_envs) + ps0_proc = subprocess.Popen(ps0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps0_pipe, + env=required_envs) print_to_err(type(self).__name__, "going to start pserver process 1") - ps1_proc = subprocess.Popen( - ps1_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=ps1_pipe, - env=required_envs) + ps1_proc = subprocess.Popen(ps1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps1_pipe, + env=required_envs) return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe @@ -991,17 +995,15 @@ class TestDistBase(unittest.TestCase): if check_error_log: err_log = open(log_name + "_local.log", "wb") - local_proc = subprocess.Popen( - cmd.split(" "), - stdout=subprocess.PIPE, - stderr=err_log, - env=env_local) + local_proc = subprocess.Popen(cmd.split(" "), + stdout=subprocess.PIPE, + stderr=err_log, + env=env_local) else: - local_proc = subprocess.Popen( - cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env_local) + local_proc = subprocess.Popen(cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env_local) local_out, local_err = local_proc.communicate() @@ -1030,8 +1032,10 @@ class TestDistBase(unittest.TestCase): def _run_cluster(self, model, envs, check_error_log, log_name): # Run dist train to compare with local results - ps0, ps1, ps0_pipe, ps1_pipe = self.start_pserver( - model, check_error_log, envs, log_name=log_name) + ps0, ps1, ps0_pipe, ps1_pipe = self.start_pserver(model, + check_error_log, + envs, + log_name=log_name) ps0_ep, ps1_ep = self._ps_endpoints.split(",") @@ -1080,17 +1084,15 @@ class TestDistBase(unittest.TestCase): tr1_pipe = open(log_name + "_tr1_err.log", "wb") print_to_err(type(self).__name__, "going to start trainer process 0") - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=env0) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=env0) print_to_err(type(self).__name__, "going to start trainer process 1") - tr1_proc = subprocess.Popen( - tr1_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=env1) + tr1_proc = subprocess.Popen(tr1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=env1) # Wait until trainer process terminate while True: @@ -1285,8 +1287,10 @@ class TestDistBase(unittest.TestCase): procs = [] pipes = [] for i in range(0, trainer_num): - tr_cmd, tr_env = self._get_gloo_trainer_cmd( - model, worker_endpoints[i], update_method, i, trainer_num) + tr_cmd, tr_env = self._get_gloo_trainer_cmd(model, + worker_endpoints[i], + update_method, i, + trainer_num) tr_env.update(envs) tr_env["GLOG_vmodule"] = 'gloo_context=4' tr_env["GLOG_v"] = '3' @@ -1298,11 +1302,10 @@ class TestDistBase(unittest.TestCase): print_to_err( type(self).__name__, "going to start process {} with nccl2".format(i)) - tr_proc = subprocess.Popen( - tr_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr_pipe, - env=tr_env) + tr_proc = subprocess.Popen(tr_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr_pipe, + env=tr_env) procs.append(tr_proc) pipes.append(tr_pipe) @@ -1360,11 +1363,10 @@ class TestDistBase(unittest.TestCase): print_to_err( type(self).__name__, "going to start process {} with nccl2".format(i)) - tr_proc = subprocess.Popen( - tr_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr_pipe, - env=tr_env) + tr_proc = subprocess.Popen(tr_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr_pipe, + env=tr_env) procs.append(tr_proc) pipes.append(tr_pipe) @@ -1406,11 +1408,10 @@ class TestDistBase(unittest.TestCase): print_to_err( type(self).__name__, "going to start process {} with nccl2".format(i)) - tr_proc = subprocess.Popen( - tr_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr_pipe, - env=tr_env) + tr_proc = subprocess.Popen(tr_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr_pipe, + env=tr_env) procs.append(tr_proc) pipes.append(tr_pipe) @@ -1463,26 +1464,23 @@ class TestDistBase(unittest.TestCase): if self._dygraph and (self._gloo_mode or self._nccl2_mode): need_envs.update({"FLAGS_enable_eager_mode": "1"}) with _test_eager_guard(): - self.check_with_place_func( - model_file=model_file, - delta=delta, - check_error_log=check_error_log, - need_envs=need_envs, - log_name=log_name) + self.check_with_place_func(model_file=model_file, + delta=delta, + check_error_log=check_error_log, + need_envs=need_envs, + log_name=log_name) need_envs.update({"FLAGS_enable_eager_mode": "0"}) - self.check_with_place_func( - model_file=model_file, - delta=delta, - check_error_log=check_error_log, - need_envs=need_envs, - log_name=log_name) + self.check_with_place_func(model_file=model_file, + delta=delta, + check_error_log=check_error_log, + need_envs=need_envs, + log_name=log_name) else: - self.check_with_place_func( - model_file=model_file, - delta=delta, - check_error_log=check_error_log, - need_envs=need_envs, - log_name=log_name) + self.check_with_place_func(model_file=model_file, + delta=delta, + check_error_log=check_error_log, + need_envs=need_envs, + log_name=log_name) def check_with_place_func(self, model_file, @@ -1540,11 +1538,15 @@ class TestDistBase(unittest.TestCase): log_name=log_name) elif self._pipeline_mode: - tr0_losses, tr1_losses = self._run_pipeline( - model_file, required_envs, check_error_log, log_name=log_name) + tr0_losses, tr1_losses = self._run_pipeline(model_file, + required_envs, + check_error_log, + log_name=log_name) else: - tr0_losses, tr1_losses = self._run_cluster( - model_file, required_envs, check_error_log, log_name=log_name) + tr0_losses, tr1_losses = self._run_cluster(model_file, + required_envs, + check_error_log, + log_name=log_name) for step_id in range(RUN_STEP): local_loss = local_losses[step_id] @@ -1570,20 +1572,19 @@ class TestDistBase(unittest.TestCase): required_envs = self._get_required_envs(check_error_log, need_envs) if self._use_dgc: - multi_cards_losses = self._run_local( - model_file, - required_envs, - check_error_log, - log_name=log_name + "_dgc_2cards", - devices="0,1") + multi_cards_losses = self._run_local(model_file, + required_envs, + check_error_log, + log_name=log_name + + "_dgc_2cards", + devices="0,1") self._use_dgc = False - base_losses = self._run_local( - model_file, - required_envs, - check_error_log, - log_name=log_name + "_base_2cards", - devices="0,1") + base_losses = self._run_local(model_file, + required_envs, + check_error_log, + log_name=log_name + "_base_2cards", + devices="0,1") self._use_dgc = True diff --git a/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py b/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py index 8e6fb99ae93..d64c2acae7e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py +++ b/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py @@ -19,6 +19,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestDygraphFleetApi(TestMultipleGpus): + def test_dygraph_fleet_api(self): self.run_mnist_2gpu('dygraph_fleet_api.py') diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 92dbf9f2c8c..38fea7f2413 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py index 35577c27121..3e683b0d693 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py @@ -22,6 +22,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" @@ -43,15 +44,16 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) os.environ["FLAGS_LAUNCH_BARRIER"] = "0" diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py index 36ba8f38c99..d2ed6ad7ff1 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -13,6 +13,7 @@ # limitations under the License. import os + os.environ["WITH_DISTRIBUTE"] = "ON" import unittest import paddle @@ -23,6 +24,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" @@ -44,12 +46,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) + input_x = paddle.fluid.layers.data(name="x", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) x_embedding = paddle.fluid.layers.embedding( is_distributed=False, input=input_x, @@ -63,8 +64,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): fc_1 = paddle.fluid.layers.fc(input=x_embedding, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) os.environ["FLAGS_LAUNCH_BARRIER"] = "0" diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py index 60fd1c525c1..707f072060a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os + os.environ["WITH_DISTRIBUTE"] = "ON" import unittest import paddle @@ -22,6 +23,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" @@ -47,14 +49,15 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): input_x = paddle.fluid.layers.data(name="x", shape=[1], dtype='int64') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - emb = paddle.fluid.layers.embedding( - input=input_x, size=[100, 10], is_sparse=True) + emb = paddle.fluid.layers.embedding(input=input_x, + size=[100, 10], + is_sparse=True) fc_1 = paddle.fluid.layers.fc(input=emb, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) os.environ["FLAGS_LAUNCH_BARRIER"] = "0" strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py index 6c8ce0a5acc..51eb9b81619 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os + os.environ["WITH_DISTRIBUTE"] = "ON" import unittest import paddle @@ -22,6 +23,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" @@ -43,15 +45,16 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -75,15 +78,16 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index 4b1f0ee85d9..3d7aa1b3fee 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py index 72f8a117ea9..ac1bf486182 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py @@ -38,6 +38,7 @@ import tempfile import unittest import paddle + paddle.enable_static() __all__ = ['FleetDistRunnerBase', 'TestFleetBase', 'runtime_main'] @@ -104,9 +105,12 @@ class FleetDistRunnerBase(object): # TODO(update strategy to support dump params) if False: # debug: self.strategy.set_debug_opt({ - "dump_param": self.dump_param, - "dump_fields": self.dump_fields, - "dump_fields_path": self.dump_fields_path + "dump_param": + self.dump_param, + "dump_fields": + self.dump_fields, + "dump_fields_path": + self.dump_fields_path }) return self.strategy @@ -226,6 +230,7 @@ class TestFleetBase(unittest.TestCase): self._setup_config() def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -258,17 +263,15 @@ class TestFleetBase(unittest.TestCase): ps0_out = open(ps0_out_log, "wb+") ps1_out = open(ps1_out_log, "wb+") - ps0_proc = subprocess.Popen( - ps0_cmd.strip().split(" "), - stdout=ps0_out, - stderr=ps0_err, - env=required_envs) + ps0_proc = subprocess.Popen(ps0_cmd.strip().split(" "), + stdout=ps0_out, + stderr=ps0_err, + env=required_envs) - ps1_proc = subprocess.Popen( - ps1_cmd.strip().split(" "), - stdout=ps1_out, - stderr=ps1_err, - env=required_envs) + ps1_proc = subprocess.Popen(ps1_cmd.strip().split(" "), + stdout=ps1_out, + stderr=ps1_err, + env=required_envs) return ((ps0_proc, ps0_out, ps0_err, ps0_out_log, ps0_err_log), (ps1_proc, ps1_out, ps1_err, ps1_out_log, ps1_err_log)) @@ -293,17 +296,15 @@ class TestFleetBase(unittest.TestCase): tr0_out = open(tr0_out_log, "wb+") tr1_out = open(tr1_out_log, "wb+") - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(" "), - stdout=tr0_out, - stderr=tr0_err, - env=required_envs) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(" "), + stdout=tr0_out, + stderr=tr0_err, + env=required_envs) - tr1_proc = subprocess.Popen( - tr1_cmd.strip().split(" "), - stdout=tr1_out, - stderr=tr1_err, - env=required_envs) + tr1_proc = subprocess.Popen(tr1_cmd.strip().split(" "), + stdout=tr1_out, + stderr=tr1_err, + env=required_envs) return ((tr0_proc, tr0_out, tr0_err, tr0_out_log, tr0_err_log), (tr1_proc, tr1_out, tr1_err, tr1_out_log, tr1_err_log)) @@ -397,10 +398,10 @@ class TestFleetBase(unittest.TestCase): print("find parameter server port bind failed, skip the error") tr0_ret, tr1_ret = 0, 0 else: - for out, err in [ - (ps0_out_log, ps0_err_log), (ps1_out_log, ps1_err_log), - (tr0_out_log, tr0_err_log), (tr1_out_log, tr1_err_log) - ]: + for out, err in [(ps0_out_log, ps0_err_log), + (ps1_out_log, ps1_err_log), + (tr0_out_log, tr0_err_log), + (tr1_out_log, tr1_err_log)]: catlog(out) catlog(err) @@ -441,17 +442,23 @@ class TestFleetBase(unittest.TestCase): def runtime_main(test_class): parser = argparse.ArgumentParser(description='Run Fleet test.') - parser.add_argument( - '--role', type=str, required=True, choices=['pserver', 'trainer']) + parser.add_argument('--role', + type=str, + required=True, + choices=['pserver', 'trainer']) parser.add_argument('--endpoints', type=str, required=False, default="") - parser.add_argument( - '--trainer_endpoints', type=str, required=False, default="") + parser.add_argument('--trainer_endpoints', + type=str, + required=False, + default="") parser.add_argument('--gloo_path', type=str, required=False, default="") parser.add_argument('--current_id', type=int, required=False, default=0) parser.add_argument('--trainers', type=int, required=False, default=1) parser.add_argument('--mode', type=str, required=False, default='geo') - parser.add_argument( - '--geo_sgd_need_push_nums', type=int, required=False, default=2) + parser.add_argument('--geo_sgd_need_push_nums', + type=int, + required=False, + default=2) parser.add_argument('--reader', type=str, required=False, default='dataset') parser.add_argument('--test', type=int, required=False, default=0) parser.add_argument('--model_dir', type=str, required=False, default="") @@ -464,11 +471,10 @@ def runtime_main(test_class): if args.test and args.model_dir != "": avg_cost = model.net(args, is_train=False) dist_infer = DistributedInfer() - dist_infer.init_distributed_infer_env( - exe=model.get_executor(), - loss=model.avg_cost, - role_maker=role, - dirname=args.model_dir) + dist_infer.init_distributed_infer_env(exe=model.get_executor(), + loss=model.avg_cost, + role_maker=role, + dirname=args.model_dir) if fleet.is_worker(): with paddle.static.program_guard( @@ -501,9 +507,8 @@ def runtime_main(test_class): startup_program=test_startup_program): with paddle.utils.unique_name.guard(): avg_cost = model.net(args, is_train=False) - dist_infer = DistributedInfer( - main_program=test_origin_program, - startup_program=test_startup_program) + dist_infer = DistributedInfer(main_program=test_origin_program, + startup_program=test_startup_program) with paddle.static.program_guard( main_program=dist_infer.get_dist_infer_program()): model.do_distributed_testing(fleet) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py index 09d64a318d6..59d6ce70ddc 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py @@ -21,6 +21,7 @@ from test_dist_fleet_base import TestFleetBase class TestDistMnistAsyncInMemoryDataset2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" #self._reader = "pyreader" @@ -53,11 +54,13 @@ class TestDistMnistAsyncInMemoryDataset2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) class TestDistMnistAsync2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -87,11 +90,13 @@ class TestDistMnistAsync2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) class TestDistCtrHalfAsync2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -124,8 +129,9 @@ class TestDistCtrHalfAsync2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py index e73eff2acc9..ecffd1ca76e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py @@ -22,6 +22,7 @@ from test_dist_fleet_base import TestFleetBase @unittest.skip(reason="Skip unstable ut, need paddle sync mode fix") class TestDistMnistSync2x2(TestFleetBase): + def _setup_config(self): self._mode = "sync" self._reader = "pyreader" @@ -52,12 +53,14 @@ class TestDistMnistSync2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) # @unittest.skip(reason="Skip unstable ut, reader need to be rewrite") class TestDistMnistAsyncDataset2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "dataset" @@ -91,8 +94,9 @@ class TestDistMnistAsyncDataset2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py index f52cace4cf3..0fa3552a091 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid import os import unittest import paddle + paddle.enable_static() # For Net @@ -33,14 +34,18 @@ batch_size = 4 class TestNoamDecay(unittest.TestCase): + def net(self): - input_data = paddle.static.data( - name="sparse_input", shape=[None, 1], dtype="int64") - input_label = paddle.static.data( - name="label", shape=[None, 1], dtype="int64") + input_data = paddle.static.data(name="sparse_input", + shape=[None, 1], + dtype="int64") + input_label = paddle.static.data(name="label", + shape=[None, 1], + dtype="int64") label = paddle.cast(input_label, dtype="float32") - embedding = paddle.static.nn.embedding( - input_data, is_sparse=True, size=[1000, 128]) + embedding = paddle.static.nn.embedding(input_data, + is_sparse=True, + size=[1000, 128]) fc1 = paddle.static.nn.fc(embedding, size=1024, activation="relu") fc2 = paddle.static.nn.fc(fc1, size=512, activation="relu") @@ -57,16 +62,16 @@ class TestNoamDecay(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.WORKER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.WORKER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss = self.net() - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) + scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, + warmup_steps=100, + verbose=True) optimizer = fluid.optimizer.Adam(scheduler) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py index 052dec6981e..164694de8d5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py @@ -15,6 +15,7 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import unittest import paddle @@ -24,10 +25,12 @@ import paddle.distributed.fleet.base.role_maker as role_maker from test_dist_fleet_base import TestFleetBase from dist_fleet_simnet_bow import train_network + paddle.enable_static() class TestDistGeoCtr_2x2(TestFleetBase): + def _setup_config(self): self._mode = "geo" self._reader = "pyreader" @@ -57,11 +60,13 @@ class TestDistGeoCtr_2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) class TestGeoSgdTranspiler(unittest.TestCase): + def test_pserver(self): role = role_maker.UserDefinedRoleMaker( current_id=0, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py index b4bc0d8dadc..c01314389e8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py @@ -31,6 +31,7 @@ from test_dist_fleet_base import TestFleetBase class TestDistGloo_2x2(TestFleetBase): + def _setup_config(self): self._mode = "sync" self._reader = "pyreader" @@ -51,20 +52,18 @@ class TestDistGloo_2x2(TestFleetBase): required_envs["POD_IP"] = "127.0.0.1" required_envs["PADDLE_PSERVER_ID"] = "0" required_envs["PADDLE_PORT"] = "36011" - ps0_proc = subprocess.Popen( - ps0_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=ps0_pipe, - env=required_envs) + ps0_proc = subprocess.Popen(ps0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps0_pipe, + env=required_envs) print("PADDLE_PSERVER_ID=0:") print(required_envs) required_envs["PADDLE_PSERVER_ID"] = "1" required_envs["PADDLE_PORT"] = "36012" - ps1_proc = subprocess.Popen( - ps1_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=ps1_pipe, - env=required_envs) + ps1_proc = subprocess.Popen(ps1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps1_pipe, + env=required_envs) print("PADDLE_PSERVER_ID=1:") print(required_envs) return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe @@ -78,19 +77,17 @@ class TestDistGloo_2x2(TestFleetBase): tr0_pipe = open(tempfile.gettempdir() + "/tr0_err.log", "wb+") tr1_pipe = open(tempfile.gettempdir() + "/tr1_err.log", "wb+") required_envs["PADDLE_TRAINER_ID"] = "0" - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr0_pipe, - env=required_envs) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=required_envs) print("PADDLE_TRAINER_ID=0:") print(required_envs) required_envs["PADDLE_TRAINER_ID"] = "1" - tr1_proc = subprocess.Popen( - tr1_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=tr1_pipe, - env=required_envs) + tr1_proc = subprocess.Popen(tr1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=required_envs) print("PADDLE_TRAINER_ID=1:") print(required_envs) return tr0_proc, tr1_proc, tr0_pipe, tr1_pipe @@ -182,8 +179,9 @@ class TestDistGloo_2x2(TestFleetBase): def test_dist_train(self): print("path is not delete", os.path.exists("./tmp4")) - self.check_with_place( - "dist_fleet_debug_gloo.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_debug_gloo.py", + delta=1e-5, + check_error_log=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py index 7807646dca3..f929bc2ae1e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py @@ -20,6 +20,7 @@ from test_dist_fleet_base import TestFleetBase class TestDistGeoClipByGlobalNorm(TestFleetBase): + def _setup_config(self): self._mode = "geo" self._reader = "dataset" @@ -43,8 +44,9 @@ class TestDistGeoClipByGlobalNorm(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=True) def _setup_config1(self): self._sync_mode = False @@ -52,6 +54,7 @@ class TestDistGeoClipByGlobalNorm(TestFleetBase): class TestDistASyncClipByValue(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "dataset" @@ -74,11 +77,13 @@ class TestDistASyncClipByValue(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=True) class TestDistASyncClipByNorm(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "dataset" @@ -101,11 +106,13 @@ class TestDistASyncClipByNorm(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=True) class TestDistASyncClipByGlobalNorm(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "dataset" @@ -128,8 +135,9 @@ class TestDistASyncClipByGlobalNorm(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py index 6111d40c7d6..560cfb0b36d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py @@ -222,6 +222,7 @@ class TestFleetHeterBase(unittest.TestCase): self._setup_config() def _find_free_port(self): + def __free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: @@ -240,16 +241,14 @@ class TestFleetHeterBase(unittest.TestCase): ps0_pipe = open(tempfile.gettempdir() + "/ps0_err.log", "wb+") ps1_pipe = open(tempfile.gettempdir() + "/ps1_err.log", "wb+") - ps0_proc = subprocess.Popen( - ps0_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=ps0_pipe, - env=required_envs) - ps1_proc = subprocess.Popen( - ps1_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=ps1_pipe, - env=required_envs) + ps0_proc = subprocess.Popen(ps0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps0_pipe, + env=required_envs) + ps1_proc = subprocess.Popen(ps1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps1_pipe, + env=required_envs) return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe def _start_trainer(self, cmd, required_envs): @@ -261,16 +260,14 @@ class TestFleetHeterBase(unittest.TestCase): tr0_out = open(tempfile.gettempdir() + "/tr0_out.log", "wb+") tr1_out = open(tempfile.gettempdir() + "/tr1_out.log", "wb+") - tr0_proc = subprocess.Popen( - tr0_cmd.strip().split(" "), - stdout=tr0_out, - stderr=tr0_pipe, - env=required_envs) - tr1_proc = subprocess.Popen( - tr1_cmd.strip().split(" "), - stdout=tr1_out, - stderr=tr1_pipe, - env=required_envs) + tr0_proc = subprocess.Popen(tr0_cmd.strip().split(" "), + stdout=tr0_out, + stderr=tr0_pipe, + env=required_envs) + tr1_proc = subprocess.Popen(tr1_cmd.strip().split(" "), + stdout=tr1_out, + stderr=tr1_pipe, + env=required_envs) return tr0_proc, tr1_proc, tr0_pipe, tr1_pipe @@ -287,26 +284,22 @@ class TestFleetHeterBase(unittest.TestCase): heter2_out = open(tempfile.gettempdir() + "/heter2_out.log", "wb+") heter3_out = open(tempfile.gettempdir() + "/heter3_out.log", "wb+") - heter0_proc = subprocess.Popen( - heter0_cmd.strip().split(" "), - stdout=heter0_out, - stderr=heter0_pipe, - env=required_envs) - heter1_proc = subprocess.Popen( - heter1_cmd.strip().split(" "), - stdout=heter1_out, - stderr=heter1_pipe, - env=required_envs) - heter2_proc = subprocess.Popen( - heter2_cmd.strip().split(" "), - stdout=heter2_out, - stderr=heter2_pipe, - env=required_envs) - heter3_proc = subprocess.Popen( - heter3_cmd.strip().split(" "), - stdout=heter3_out, - stderr=heter3_pipe, - env=required_envs) + heter0_proc = subprocess.Popen(heter0_cmd.strip().split(" "), + stdout=heter0_out, + stderr=heter0_pipe, + env=required_envs) + heter1_proc = subprocess.Popen(heter1_cmd.strip().split(" "), + stdout=heter1_out, + stderr=heter1_pipe, + env=required_envs) + heter2_proc = subprocess.Popen(heter2_cmd.strip().split(" "), + stdout=heter2_out, + stderr=heter2_pipe, + env=required_envs) + heter3_proc = subprocess.Popen(heter3_cmd.strip().split(" "), + stdout=heter3_out, + stderr=heter3_pipe, + env=required_envs) return heter0_proc, heter1_proc, heter2_proc, heter3_proc, heter0_pipe, heter1_pipe, heter2_pipe, heter3_pipe @@ -414,25 +407,32 @@ class TestFleetHeterBase(unittest.TestCase): def runtime_main(test_class): parser = argparse.ArgumentParser(description='Run Fleet test.') - parser.add_argument( - '--role', - type=str, - required=True, - choices=['pserver', 'trainer', 'heter_trainer']) + parser.add_argument('--role', + type=str, + required=True, + choices=['pserver', 'trainer', 'heter_trainer']) parser.add_argument('--endpoints', type=str, required=False, default="") - parser.add_argument( - '--trainer_endpoints', type=str, required=False, default="") - parser.add_argument( - '--heter_trainer_endpoints', type=str, required=False, default="") - parser.add_argument( - '--heter_trainer_device', type=str, required=False, default="gpu") + parser.add_argument('--trainer_endpoints', + type=str, + required=False, + default="") + parser.add_argument('--heter_trainer_endpoints', + type=str, + required=False, + default="") + parser.add_argument('--heter_trainer_device', + type=str, + required=False, + default="gpu") parser.add_argument('--gloo_path', type=str, required=False, default="") parser.add_argument('--current_id', type=int, required=False, default=0) parser.add_argument('--trainers', type=int, required=False, default=1) parser.add_argument('--stage_id', type=int, required=False, default=1) parser.add_argument('--mode', type=str, required=False, default='async') - parser.add_argument( - '--geo_sgd_need_push_nums', type=int, required=False, default=2) + parser.add_argument('--geo_sgd_need_push_nums', + type=int, + required=False, + default=2) parser.add_argument('--reader', type=str, required=False, default='dataset') args = parser.parse_args() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py index 2ed331c6284..eaae0eff55a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestDistHeterDatasetAsync2x2(TestFleetHeterBase): + def _setup_config(self): self._mode = "async" self._reader = "dataset" @@ -51,10 +52,9 @@ class TestDistHeterDatasetAsync2x2(TestFleetHeterBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_heter_pipeline_ctr.py", - delta=1e-5, - check_error_log=True) + self.check_with_place("dist_fleet_heter_pipeline_ctr.py", + delta=1e-5, + check_error_log=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py index 61f15e7dfff..db5f5bccdac 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestDistFleetHeterProgram(unittest.TestCase): + def build_role(self): environs = {} environs[ @@ -63,13 +64,15 @@ class TestDistFleetHeterProgram(unittest.TestCase): return self.strategy def build_input(self): - dense_input = fluid.layers.data( - name="dense_input", shape=[10], dtype="float32") + dense_input = fluid.layers.data(name="dense_input", + shape=[10], + dtype="float32") sparse_input_ids = [ - fluid.layers.data( - name="C" + str(i), shape=[1], lod_level=1, dtype="int64") - for i in range(1, 27) + fluid.layers.data(name="C" + str(i), + shape=[1], + lod_level=1, + dtype="int64") for i in range(1, 27) ] label = fluid.layers.data(name="label", shape=[1], dtype="float32") @@ -78,6 +81,7 @@ class TestDistFleetHeterProgram(unittest.TestCase): return inputs def build_net(self, inputs): + def embedding_layer(input): return fluid.layers.embedding( input=input, @@ -85,7 +89,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=[100001, 10], param_attr=fluid.ParamAttr( name="SparseFeatFactors", - initializer=fluid.initializer.Uniform()), ) + initializer=fluid.initializer.Uniform()), + ) sparse_embed_seq = list(map(embedding_layer, inputs[1:-1])) @@ -101,22 +106,22 @@ class TestDistFleetHeterProgram(unittest.TestCase): name="fc1") with fluid.device_guard("cpu"): - fc2 = fluid.layers.fc(input=fc1, - size=400, - act="relu", - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc1.shape[1]))), - name="fc2") + fc2 = fluid.layers.fc( + input=fc1, + size=400, + act="relu", + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc1.shape[1]))), + name="fc2") with fluid.device_guard("gpu"): - fc3 = fluid.layers.fc(input=fc2, - size=400, - act="relu", - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc2.shape[1]))), - name="fc3") + fc3 = fluid.layers.fc( + input=fc2, + size=400, + act="relu", + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc2.shape[1]))), + name="fc3") with fluid.device_guard("cpu"): predict = fluid.layers.fc( @@ -124,7 +129,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=2, act="softmax", param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc3.shape[1]))), ) + scale=1 / math.sqrt(fc3.shape[1]))), + ) with fluid.device_guard("gpu"): labels = fluid.layers.cast(inputs[-1], dtype="int64") diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py index 82a3d73da2c..6febcd9478b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py @@ -24,6 +24,7 @@ from paddle.dataset.common import download, DATA_HOME class TestDistCtrInfer(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -57,22 +58,25 @@ class TestDistCtrInfer(TestFleetBase): def test_dist_infer(self): model_dirname = tempfile.mkdtemp() - self.check_with_place( - "dist_fleet_ctr.py", - delta=1e-5, - check_error_log=False, - need_envs={"SAVE_DIRNAME": model_dirname, }) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False, + need_envs={ + "SAVE_DIRNAME": model_dirname, + }) self._need_test = 1 self._model_dir = model_dirname - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) shutil.rmtree(model_dirname) class TestDistCtrTrainInfer(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -106,8 +110,9 @@ class TestDistCtrTrainInfer(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train_infer(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py index fbd58e015c1..7e3e5258aed 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py @@ -18,6 +18,7 @@ import os import unittest import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -36,30 +37,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -68,8 +73,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.layers.embedding( input=q, @@ -95,8 +102,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.layers.embedding( input=pt, @@ -121,8 +130,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.layers.embedding( input=nt, @@ -160,11 +171,10 @@ class TestPSPassWithBow(unittest.TestCase): "127.0.0.1:36007" ] - role = fleet.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = fleet.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py index 3fa4cc1c1b6..65fc10031cc 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py @@ -14,6 +14,7 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker @@ -35,14 +36,18 @@ batch_size = 4 class TestExponentialDecay(unittest.TestCase): + def net(self): - input_data = paddle.static.data( - name="sparse_input", shape=[None, 1], dtype="int64") - input_label = paddle.static.data( - name="label", shape=[None, 1], dtype="int64") + input_data = paddle.static.data(name="sparse_input", + shape=[None, 1], + dtype="int64") + input_label = paddle.static.data(name="label", + shape=[None, 1], + dtype="int64") label = paddle.cast(input_label, dtype="float32") - embedding = paddle.static.nn.embedding( - input_data, is_sparse=True, size=[1000, 128]) + embedding = paddle.static.nn.embedding(input_data, + is_sparse=True, + size=[1000, 128]) fc1 = paddle.static.nn.fc(embedding, size=1024, activation="relu") fc2 = paddle.static.nn.fc(fc1, size=512, activation="relu") @@ -59,16 +64,16 @@ class TestExponentialDecay(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss = self.net() - scheduler = paddle.optimizer.lr.InverseTimeDecay( - learning_rate=base_lr, gamma=0.999, verbose=True) + scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=base_lr, + gamma=0.999, + verbose=True) optimizer = fluid.optimizer.Adam(scheduler) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py index cad7d067e90..171889ae917 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py @@ -38,30 +38,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -70,8 +74,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.contrib.layers.sparse_embedding( input=q, @@ -95,8 +101,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, @@ -119,8 +127,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py index 14ed9dc0427..65e4381bc2a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py @@ -15,6 +15,7 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import unittest @@ -40,30 +41,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -72,8 +77,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.contrib.layers.sparse_embedding( input=q, @@ -97,8 +104,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, @@ -121,8 +130,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py index 858b1acb4fd..243023b4fe1 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py @@ -15,6 +15,7 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import unittest @@ -40,30 +41,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -72,8 +77,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.contrib.layers.sparse_embedding( input=q, @@ -97,8 +104,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, @@ -121,8 +130,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py index aa7975d2b8b..b8ff052c192 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py @@ -18,6 +18,7 @@ import os import unittest import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -36,30 +37,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -68,8 +73,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = False # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.layers.embedding( input=q, @@ -95,8 +102,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.layers.embedding( input=pt, @@ -121,8 +130,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.layers.embedding( input=nt, @@ -160,11 +171,10 @@ class TestPSPassWithBow(unittest.TestCase): "127.0.0.1:36007" ] - role = fleet.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.WORKER, - worker_num=2, - server_endpoints=endpoints) + role = fleet.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.WORKER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index ca8f5261045..32af1959f25 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -18,6 +18,7 @@ import os import unittest import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -36,30 +37,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -68,8 +73,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.contrib.layers.sparse_embedding( input=q, @@ -93,8 +100,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, @@ -117,8 +126,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, @@ -154,11 +165,10 @@ class TestPSPassWithBow(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index 2812cb4b3d6..63ea8f639aa 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -18,6 +18,7 @@ import os import unittest import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -36,30 +37,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -68,8 +73,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.layers.embedding( input=q, @@ -95,8 +102,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.layers.embedding( input=pt, @@ -121,8 +130,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.layers.embedding( input=nt, @@ -160,21 +171,19 @@ class TestPSPassWithBow(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() optimizer = fluid.optimizer.Adam( - learning_rate=fluid.layers.exponential_decay( - learning_rate=base_lr, - decay_steps=500, - decay_rate=0.969, - staircase=True)) + learning_rate=fluid.layers.exponential_decay(learning_rate=base_lr, + decay_steps=500, + decay_rate=0.969, + staircase=True)) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py index 902870789e8..692f586a435 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py @@ -18,6 +18,7 @@ import os import unittest import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -36,30 +37,34 @@ batch_size = 4 class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') cond_3 = fluid.layers.reduce_sum(cond) - acc = fluid.layers.elementwise_div( - cond_3, - fluid.layers.fill_constant( - shape=[1], value=batch_size * 1.0, dtype='float64'), - name="simnet_acc") + acc = fluid.layers.elementwise_div(cond_3, + fluid.layers.fill_constant( + shape=[1], + value=batch_size * 1.0, + dtype='float64'), + name="simnet_acc") return acc def get_loss(cos_q_pt, cos_q_nt): loss_op1 = fluid.layers.elementwise_sub( - fluid.layers.fill_constant_batch_size_like( - input=cos_q_pt, - shape=[-1, 1], - value=margin, - dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), cos_q_pt) loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) loss_op3 = fluid.layers.elementwise_max( - fluid.layers.fill_constant_batch_size_like( - input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant_batch_size_like(input=loss_op2, + shape=[-1, 1], + value=0.0, + dtype='float32'), loss_op2) avg_cost = fluid.layers.mean(loss_op3) return avg_cost @@ -68,8 +73,10 @@ class TestPSPassWithBow(unittest.TestCase): is_sparse = True # query - q = fluid.layers.data( - name="query_ids", shape=[1], dtype="int64", lod_level=1) + q = fluid.layers.data(name="query_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding q_emb = fluid.contrib.layers.sparse_embedding( input=q, @@ -93,8 +100,10 @@ class TestPSPassWithBow(unittest.TestCase): # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + pt = fluid.layers.data(name="pos_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, @@ -117,8 +126,10 @@ class TestPSPassWithBow(unittest.TestCase): learning_rate=base_lr), bias_attr=fluid.ParamAttr(name="__fc_b__")) # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + nt = fluid.layers.data(name="neg_title_ids", + shape=[1], + dtype="int64", + lod_level=1) # embedding nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, @@ -154,11 +165,10 @@ class TestPSPassWithBow(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py index b63301b87dc..466ceb5c6db 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py @@ -14,6 +14,7 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import paddle.distributed.fleet as fleet @@ -21,6 +22,7 @@ import paddle.distributed.fleet.base.role_maker as role_maker import paddle.fluid as fluid import unittest import paddle + paddle.enable_static() # For Net @@ -35,14 +37,18 @@ batch_size = 4 class TestNaturalExpDecay(unittest.TestCase): + def net(self): - input_data = paddle.static.data( - name="sparse_input", shape=[None, 1], dtype="int64") - input_label = paddle.static.data( - name="label", shape=[None, 1], dtype="int64") + input_data = paddle.static.data(name="sparse_input", + shape=[None, 1], + dtype="int64") + input_label = paddle.static.data(name="label", + shape=[None, 1], + dtype="int64") label = paddle.cast(input_label, dtype="float32") - embedding = paddle.static.nn.embedding( - input_data, is_sparse=True, size=[1000, 128]) + embedding = paddle.static.nn.embedding(input_data, + is_sparse=True, + size=[1000, 128]) fc1 = paddle.static.nn.fc(embedding, size=1024, activation="relu") fc2 = paddle.static.nn.fc(fc1, size=512, activation="relu") @@ -59,16 +65,16 @@ class TestNaturalExpDecay(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss = self.net() - scheduler = paddle.optimizer.lr.NaturalExpDecay( - learning_rate=base_lr, gamma=0.999, verbose=True) + scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=base_lr, + gamma=0.999, + verbose=True) optimizer = fluid.optimizer.Adam(scheduler) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py index d213014da9a..834f7d1273f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py @@ -14,12 +14,14 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import paddle.fluid as fluid import unittest import paddle + paddle.enable_static() # For Net @@ -34,14 +36,18 @@ batch_size = 4 class TestNoamDecay(unittest.TestCase): + def net(self): - input_data = paddle.static.data( - name="sparse_input", shape=[None, 1], dtype="int64") - input_label = paddle.static.data( - name="label", shape=[None, 1], dtype="int64") + input_data = paddle.static.data(name="sparse_input", + shape=[None, 1], + dtype="int64") + input_label = paddle.static.data(name="label", + shape=[None, 1], + dtype="int64") label = paddle.cast(input_label, dtype="float32") - embedding = paddle.static.nn.embedding( - input_data, is_sparse=True, size=[1000, 128]) + embedding = paddle.static.nn.embedding(input_data, + is_sparse=True, + size=[1000, 128]) fc1 = paddle.static.nn.fc(embedding, size=1024, activation="relu") fc2 = paddle.static.nn.fc(fc1, size=512, activation="relu") @@ -58,16 +64,16 @@ class TestNoamDecay(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss = self.net() - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) + scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, + warmup_steps=100, + verbose=True) optimizer = fluid.optimizer.Adam(scheduler) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py index 926789f4fba..a1bd087cbe8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py @@ -14,12 +14,14 @@ from __future__ import print_function import os + os.environ["WITH_DISTRIBUTE"] = "ON" import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import paddle.fluid as fluid import unittest import paddle + paddle.enable_static() # For Net @@ -34,14 +36,18 @@ batch_size = 4 class TestExponentialDecay(unittest.TestCase): + def net(self): - input_data = paddle.static.data( - name="sparse_input", shape=[None, 1], dtype="int64") - input_label = paddle.static.data( - name="label", shape=[None, 1], dtype="int64") + input_data = paddle.static.data(name="sparse_input", + shape=[None, 1], + dtype="int64") + input_label = paddle.static.data(name="label", + shape=[None, 1], + dtype="int64") label = paddle.cast(input_label, dtype="float32") - embedding = paddle.static.nn.embedding( - input_data, is_sparse=True, size=[1000, 128]) + embedding = paddle.static.nn.embedding(input_data, + is_sparse=True, + size=[1000, 128]) fc1 = paddle.static.nn.fc(embedding, size=1024, activation="relu") fc2 = paddle.static.nn.fc(fc1, size=512, activation="relu") @@ -58,16 +64,16 @@ class TestExponentialDecay(unittest.TestCase): "127.0.0.1:36007" ] - role = role_maker.UserDefinedRoleMaker( - current_id=0, - role=role_maker.Role.SERVER, - worker_num=2, - server_endpoints=endpoints) + role = role_maker.UserDefinedRoleMaker(current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) fleet.init(role) loss = self.net() - scheduler = paddle.optimizer.lr.ExponentialDecay( - learning_rate=base_lr, gamma=0.999, verbose=True) + scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=base_lr, + gamma=0.999, + verbose=True) optimizer = fluid.optimizer.Adam(scheduler) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps_gpu_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps_gpu_ctr.py index 9308a3e4792..d29ea0daad6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps_gpu_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps_gpu_ctr.py @@ -21,6 +21,7 @@ from test_dist_fleet_base import TestFleetBase class TestPsGPUAsyncDataset2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "dataset" @@ -50,8 +51,9 @@ class TestPsGPUAsyncDataset2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_ctr.py", + delta=1e-5, + check_error_log=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py index e729bfe0537..b49a2599b76 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py @@ -22,6 +22,7 @@ flag_name = os.path.splitext(__file__)[0] class TestFleetMetaOptimizerPrecision(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -34,11 +35,10 @@ class TestFleetMetaOptimizerPrecision(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_fleet_raw_program_optimizer.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_fleet_raw_program_optimizer.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py index 21b921c52c8..be85ea71040 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py @@ -22,6 +22,7 @@ flag_name = os.path.splitext(__file__)[0] class TestFleetMetaOptimizerAllReduceFusePrecision(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py index e0fa590db2a..6e45b142399 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestDistSimnetASync2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -51,8 +52,9 @@ class TestDistSimnetASync2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_simnet_bow.py", delta=1e-5, check_error_log=True) + self.check_with_place("dist_fleet_simnet_bow.py", + delta=1e-5, + check_error_log=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py index 4e0241c1e9c..17d50f988a6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py @@ -30,6 +30,7 @@ from dist_fleet_sparse_embedding_ctr import fake_ctr_reader @unittest.skip(reason="Skip unstable ut, need paddle sync mode fix") class TestDistMnistSync2x2(TestFleetBase): + def _setup_config(self): self._mode = "sync" self._reader = "pyreader" @@ -59,13 +60,13 @@ class TestDistMnistSync2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_sparse_embedding_ctr.py", - delta=1e-5, - check_error_log=True) + self.check_with_place("dist_fleet_sparse_embedding_ctr.py", + delta=1e-5, + check_error_log=True) class TestDistMnistAsync2x2(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -95,13 +96,13 @@ class TestDistMnistAsync2x2(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_sparse_embedding_ctr.py", - delta=1e-5, - check_error_log=True) + self.check_with_place("dist_fleet_sparse_embedding_ctr.py", + delta=1e-5, + check_error_log=True) class TestDistMnistAsync2x2WithDecay(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -132,13 +133,13 @@ class TestDistMnistAsync2x2WithDecay(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_sparse_embedding_ctr.py", - delta=1e-5, - check_error_log=True) + self.check_with_place("dist_fleet_sparse_embedding_ctr.py", + delta=1e-5, + check_error_log=True) class TestDistMnistAsync2x2WithUnifrom(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" @@ -169,19 +170,20 @@ class TestDistMnistAsync2x2WithUnifrom(TestFleetBase): tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) def test_dist_train(self): - self.check_with_place( - "dist_fleet_sparse_embedding_ctr.py", - delta=1e-5, - check_error_log=True) + self.check_with_place("dist_fleet_sparse_embedding_ctr.py", + delta=1e-5, + check_error_log=True) @unittest.skip(reason="Skip unstable ut, need tensor table to enhance") class TestDistMnistAsync2x2WithGauss(TestFleetBase): + def _setup_config(self): self._mode = "async" self._reader = "pyreader" def _run_local_infer(self, model_file): + def net(): """ network definition @@ -194,24 +196,21 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): """ dnn_input_dim, lr_input_dim = 10, 10 - dnn_data = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - lr_data = fluid.layers.data( - name="lr_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) - label = fluid.layers.data( - name="click", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False) + dnn_data = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + lr_data = fluid.layers.data(name="lr_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + label = fluid.layers.data(name="click", + shape=[-1, 1], + dtype="int64", + lod_level=0, + append_batch_size=False) datas = [dnn_data, lr_data, label] @@ -223,10 +222,10 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], is_test=inference, - param_attr=fluid.ParamAttr( - name="deep_embedding", initializer=init)) - dnn_pool = fluid.layers.sequence_pool( - input=dnn_embedding, pool_type="sum") + param_attr=fluid.ParamAttr(name="deep_embedding", + initializer=init)) + dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, + pool_type="sum") dnn_out = dnn_pool for i, dim in enumerate(dnn_layer_dims[1:]): fc = fluid.layers.fc( @@ -247,8 +246,8 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): name="wide_embedding", initializer=fluid.initializer.Constant(value=0.01))) - lr_pool = fluid.layers.sequence_pool( - input=lr_embbding, pool_type="sum") + lr_pool = fluid.layers.sequence_pool(input=lr_embbding, + pool_type="sum") merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1) predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax') return datas, predict @@ -296,10 +295,9 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): shutil.rmtree(model_dir) def test_dist_train(self): - self.check_with_place( - "dist_fleet_sparse_embedding_ctr.py", - delta=1e-5, - check_error_log=True) + self.check_with_place("dist_fleet_sparse_embedding_ctr.py", + delta=1e-5, + check_error_log=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py index 574a6888fdb..d692528f5bb 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py @@ -15,6 +15,7 @@ import os import time import unittest + os.environ["WITH_DISTRIBUTE"] = "ON" import paddle import paddle.distributed.fleet.base.role_maker as role_maker @@ -24,6 +25,7 @@ paddle.enable_static() class TestDistStrategyTrainerDescConfig(unittest.TestCase): + def setUp(self): os.environ["PADDLE_PSERVER_NUMS"] = "2" os.environ["PADDLE_TRAINERS_NUM"] = "2" diff --git a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py index 11ac301b72a..88d9e46370b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py +++ b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py @@ -21,11 +21,13 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle + paddle.enable_static() @unittest.skip("do not need currently") class TestLookupTableFuseOp(unittest.TestCase): + def test_fuse(self): places = [core.CPUPlace()] # currently only support CPU @@ -73,25 +75,25 @@ class TestLookupTableFuseOp(unittest.TestCase): outputs=None, attrs={"large_scale_metas": metas}) - init_program.global_block().append_op( - type="lookup_sparse_table_read", - inputs={"Ids": ids}, - outputs={"Out": output}, - attrs={ - "tablename": "embedding_1.block0", - "init": True, - "value_names": ["Param"], - }) - - init_program.global_block().append_op( - type="lookup_sparse_table_read", - inputs={"Ids": ids}, - outputs={"Out": output}, - attrs={ - "tablename": "embedding_2.block0", - "init": True, - "value_names": ["Param"], - }) + init_program.global_block().append_op(type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": output}, + attrs={ + "tablename": + "embedding_1.block0", + "init": True, + "value_names": ["Param"], + }) + + init_program.global_block().append_op(type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": output}, + attrs={ + "tablename": + "embedding_2.block0", + "init": True, + "value_names": ["Param"], + }) executor = fluid.Executor(place) executor.run(init_program) @@ -150,8 +152,10 @@ class TestLookupTableFuseOp(unittest.TestCase): "Beta1Pow": beta1, "Beta2Pow": beta2, }, - outputs={"Beta1PowOut": beta1, - "Beta2PowOut": beta2}, + outputs={ + "Beta1PowOut": beta1, + "Beta2PowOut": beta2 + }, attrs={ "is_entry": False, "tablename": "embedding_1.block0", @@ -160,8 +164,10 @@ class TestLookupTableFuseOp(unittest.TestCase): training_program.global_block().append_op( type="lookup_sparse_table_fuse_sgd", - inputs={"Grad": grads, - "LearningRate": lr}, + inputs={ + "Grad": grads, + "LearningRate": lr + }, attrs={ "is_entry": False, "tablename": "embedding_2.block0", diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py index 23a2b8fd306..257f5f0db9c 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestDistMnistNCCL2BackWardDeps(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py index 4cf2cf5f367..483d4757760 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestDistMnist2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -52,20 +53,18 @@ class TestDistMnist2x2(TestDistBase): "fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10" required_envs["GLOG_logtostderr"] = "1" - no_merge_losses = self._run_local( - model_file, - required_envs, - check_error_log=check_error_log, - batch_size=4, - log_name=flag_name) + no_merge_losses = self._run_local(model_file, + required_envs, + check_error_log=check_error_log, + batch_size=4, + log_name=flag_name) - batch_merge_losses = self._run_local( - model_file, - required_envs, - check_error_log=check_error_log, - batch_size=2, - batch_merge_repeat=2, - log_name=flag_name) + batch_merge_losses = self._run_local(model_file, + required_envs, + check_error_log=check_error_log, + batch_size=2, + batch_merge_repeat=2, + log_name=flag_name) # Ensure both result have values. self.assertGreater(len(no_merge_losses), 1) self.assertEqual(len(no_merge_losses), len(batch_merge_losses)) diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py index eae19afb2ef..f62ce85032b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py @@ -38,6 +38,7 @@ def count_of_sparse_all_reduce_calls(file_name): class TestDistMnistNCCL2DGC(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -48,11 +49,10 @@ class TestDistMnistNCCL2DGC(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) def tearDown(self): import paddle.fluid as fluid @@ -67,6 +67,7 @@ class TestDistMnistNCCL2DGC(TestDistBase): class TestDistMnistNCCL2DGCMultiCards(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -77,11 +78,10 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place_multi_cards( - "dist_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place_multi_cards("dist_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) def tearDown(self): import paddle.fluid as fluid diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py index 1cecb996202..fc7d004672c 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestDistMnistFleetSave(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py index 3b15b06b5ef..265e59ff949 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestDistMnistNCCL2FleetApi(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -40,6 +41,7 @@ class TestDistMnistNCCL2FleetApi(TestDistBase): class FleetCollectiveTest(unittest.TestCase): + def test_open_sync_batch_norm(self): import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker @@ -61,8 +63,8 @@ class FleetCollectiveTest(unittest.TestCase): dist_strategy = DistributedStrategy() dist_strategy.sync_batch_norm = True - dist_optimizer = fleet.distributed_optimizer( - optimizer, strategy=dist_strategy) + dist_optimizer = fleet.distributed_optimizer(optimizer, + strategy=dist_strategy) dist_optimizer.minimize(loss) self.assertEqual(dist_strategy.exec_strategy.num_threads, 1) diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py index d74d08681c1..3a249c929e3 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py @@ -18,6 +18,7 @@ from test_dist_base import TestDistBase class TestDistMnist2x2FP16AllReduce(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py index 8056ab86333..7e4453ca4c4 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py @@ -22,6 +22,7 @@ flag_name = os.path.splitext(__file__)[0] class TestDistMnistGradMerge(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -29,14 +30,14 @@ class TestDistMnistGradMerge(TestDistBase): def test_dist_train(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_mnist_gradient_merge.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist_gradient_merge.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestDistMnistGradMergeNoFuse(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -45,14 +46,14 @@ class TestDistMnistGradMergeNoFuse(TestDistBase): def test_dist_train(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_mnist_gradient_merge.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name + "_no_fuse") + self.check_with_place("dist_mnist_gradient_merge.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name + "_no_fuse") class TestDistMnistGradMergeRawOptimizerBase(TestDistBase): + def _setup_config(self): self._use_reader_alloc = False self._nccl2_mode = True @@ -66,19 +67,19 @@ class TestDistMnistGradMergeRawOptimizerBase(TestDistBase): if fluid.core.is_compiled_with_cuda(): avg = str(self.enable_avg()) log_name = flag_name + "_raw_optimizer_gm_avg_" + avg - self.check_with_place( - "dist_mnist_gradient_merge_raw_optimizer.py", - delta=1e-5, - check_error_log=True, - log_name=log_name, - need_envs={ - 'FLAGS_apply_pass_to_program': '1', - 'enable_gm_avg': avg, - }) + self.check_with_place("dist_mnist_gradient_merge_raw_optimizer.py", + delta=1e-5, + check_error_log=True, + log_name=log_name, + need_envs={ + 'FLAGS_apply_pass_to_program': '1', + 'enable_gm_avg': avg, + }) class TestDistMnistGradMergeRawOptimizerAvg( TestDistMnistGradMergeRawOptimizerBase): + def enable_avg(self): return True diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py index e1fbbebe171..c615b753169 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestDistMnistNCCL2HAllreduce(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -35,11 +36,10 @@ class TestDistMnistNCCL2HAllreduce(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py index 53c7527fdaf..f714a8ad00f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py @@ -18,6 +18,7 @@ from test_dist_base import TestDistBase class TestDistMnist2x2Lars(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py index d9e6be8609d..b520f03153d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestDistMnistNCCL2MultiNCCLComm(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -34,11 +35,10 @@ class TestDistMnistNCCL2MultiNCCLComm(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py index 28ef31875db..cbf5972e6db 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -30,13 +31,12 @@ class TestDistMnistNCCL2(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_mnist.py", - delta=1, - need_envs={ - "FLAGS_enable_parallel_graph": "1", - "FLAGS_sync_nccl_allreduce": "1" - }) + self.check_with_place("dist_mnist.py", + delta=1, + need_envs={ + "FLAGS_enable_parallel_graph": "1", + "FLAGS_sync_nccl_allreduce": "1" + }) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py index 4436064dc28..30d651ed0d4 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py index a5bcada14d8..f8818471647 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py @@ -17,59 +17,60 @@ import unittest from test_dist_base import TestDistBase import os + flag_name = os.path.splitext(__file__)[0] class TestDistMnist2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False def test_dist_train(self): - self.check_with_place( - "dist_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestDistMnist2x2WithMemopt(TestDistBase): + def _setup_config(self): self._sync_mode = True self._mem_opt = True def test_dist_train(self): - self.check_with_place( - "dist_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestDistMnistAsync(TestDistBase): + def _setup_config(self): self._sync_mode = False self._use_reduce = False def test_dist_train(self): - self.check_with_place( - "dist_mnist.py", - delta=200, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=200, + check_error_log=True, + log_name=flag_name) class TestDistMnistDcAsgd(TestDistBase): + def _setup_config(self): self._sync_mode = False self._dc_asgd = True def test_se_resnext(self): - self.check_with_place( - "dist_mnist.py", - delta=200, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_mnist.py", + delta=200, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py index 0ee6740ac23..fd585644791 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestDistMnistLocalSGDFleetApi(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -36,6 +37,7 @@ class TestDistMnistLocalSGDFleetApi(TestDistBase): class TestDistMnistGradAllReduceFleetApi(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_oneps.py b/python/paddle/fluid/tests/unittests/test_dist_oneps.py index 2493c7aab55..7704a4c715e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_oneps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_oneps.py @@ -15,12 +15,14 @@ import unittest import paddle + paddle.enable_static() from paddle.distributed.fleet.runtime.the_one_ps import Table class TestTable(unittest.TestCase): + def test_table_tensor(self): table = Table() table.id = 1001 diff --git a/python/paddle/fluid/tests/unittests/test_dist_op.py b/python/paddle/fluid/tests/unittests/test_dist_op.py index ad999c3feae..255431544f9 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_op.py +++ b/python/paddle/fluid/tests/unittests/test_dist_op.py @@ -35,6 +35,7 @@ def dist(x, y, p): class TestDistOp(OpTest): + def setUp(self): self.op_type = 'dist' self.python_api = paddle.dist @@ -110,14 +111,14 @@ class TestDistOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ["X", "Y"], - "Out", - user_defined_grads=self.gradient, - check_eager=True) + self.check_grad(["X", "Y"], + "Out", + user_defined_grads=self.gradient, + check_eager=True) class TestDistOpCase1(TestDistOp): + def init_case(self): self.x_shape = (3, 5, 5, 6) self.y_shape = (5, 5, 6) @@ -125,6 +126,7 @@ class TestDistOpCase1(TestDistOp): class TestDistOpCase2(TestDistOp): + def init_case(self): self.x_shape = (10, 10) self.y_shape = (4, 10, 10) @@ -132,6 +134,7 @@ class TestDistOpCase2(TestDistOp): class TestDistOpCase3(TestDistOp): + def init_case(self): self.x_shape = (15, 10) self.y_shape = (15, 10) @@ -139,6 +142,7 @@ class TestDistOpCase3(TestDistOp): class TestDistOpCase4(TestDistOp): + def init_case(self): self.x_shape = (2, 3, 4, 5, 8) self.y_shape = (3, 1, 5, 8) @@ -146,6 +150,7 @@ class TestDistOpCase4(TestDistOp): class TestDistOpCase5(TestDistOp): + def init_case(self): self.x_shape = (4, 1, 4, 8) self.y_shape = (2, 2, 1, 4, 4, 8) @@ -153,6 +158,7 @@ class TestDistOpCase5(TestDistOp): class TestDistAPI(unittest.TestCase): + def init_data_type(self): self.data_type = 'float32' if core.is_compiled_with_rocm( ) else 'float64' @@ -168,12 +174,14 @@ class TestDistAPI(unittest.TestCase): x_i = np.random.random((2, 3, 4, 5)).astype(self.data_type) y_i = np.random.random((3, 1, 5)).astype(self.data_type) result = paddle.dist(x, y, p) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) out = exe.run(fluid.default_main_program(), - feed={'x': x_i, - 'y': y_i}, + feed={ + 'x': x_i, + 'y': y_i + }, fetch_list=[result]) self.assertTrue(np.allclose(dist(x_i, y_i, p), out[0])) diff --git a/python/paddle/fluid/tests/unittests/test_dist_save_load.py b/python/paddle/fluid/tests/unittests/test_dist_save_load.py index ed71a389756..08e8adaa932 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_dist_save_load.py @@ -23,10 +23,12 @@ import numpy as np from test_dist_base import TestDistBase, RUN_STEP import os + flag_name = os.path.splitext(__file__)[0] class TestDistSaveLoadDense2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._enforce_place = "CPU" @@ -64,8 +66,10 @@ class TestDistSaveLoadDense2x2(TestDistBase): cluster_env.update(required_envs) local_var = self._run_local(model_file, local_env, check_error_log) - tr0_var, tr1_var = self._run_cluster( - model_file, cluster_env, check_error_log, log_name=flag_name) + tr0_var, tr1_var = self._run_cluster(model_file, + cluster_env, + check_error_log, + log_name=flag_name) shutil.rmtree(model_dir) @@ -84,14 +88,14 @@ class TestDistSaveLoadDense2x2(TestDistBase): 'IS_SELF_CONTAINED_LR': '1', 'SAVE_MODE': 'LOCAL', } - self.check_with_place( - "dist_save_load.py", - delta=0, - check_error_log=False, - need_envs=need_envs) + self.check_with_place("dist_save_load.py", + delta=0, + check_error_log=False, + need_envs=need_envs) class TestDistSaveLoadWithPServerStateDense2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._enforce_place = "CPU" @@ -124,15 +128,19 @@ class TestDistSaveLoadWithPServerStateDense2x2(TestDistBase): save_env["MODEL_DIR"] = model_dir save_env.update(required_envs) - tr0_var_1, tr1_var_1 = self._run_cluster( - model_file, save_env, check_error_log, log_name=flag_name) + tr0_var_1, tr1_var_1 = self._run_cluster(model_file, + save_env, + check_error_log, + log_name=flag_name) load_env = {} load_env["LOAD"] = "1" load_env["MODEL_DIR"] = model_dir load_env.update(required_envs) - tr0_var_2, tr1_var_2 = self._run_cluster( - model_file, load_env, check_error_log, log_name=flag_name) + tr0_var_2, tr1_var_2 = self._run_cluster(model_file, + load_env, + check_error_log, + log_name=flag_name) shutil.rmtree(model_dir) @@ -153,12 +161,11 @@ class TestDistSaveLoadWithPServerStateDense2x2(TestDistBase): 'OPTIMIZER': 'ADAM', 'SKIP_STEPS': str(np.random.randint(2, 6)) } - self.check_with_place( - "dist_save_load.py", - delta=0, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) + self.check_with_place("dist_save_load.py", + delta=0, + check_error_log=True, + need_envs=need_envs, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_dgc.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_dgc.py index b48ec89a2af..86101cf9fe4 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_dgc.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_dgc.py @@ -18,10 +18,12 @@ from test_dist_base import TestDistBase import os import os + flag_name = os.path.splitext(__file__)[0] class TestDistSeResnetNCCL2DGC(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -33,11 +35,10 @@ class TestDistSeResnetNCCL2DGC(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_se_resnext.py", - delta=30, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_se_resnext.py", + delta=30, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py index 64217135be7..21d002ef318 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py @@ -25,6 +25,7 @@ flag_name = os.path.splitext(__file__)[0] class TestDistSeResneXtNCCL(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reader_alloc = False @@ -33,14 +34,14 @@ class TestDistSeResneXtNCCL(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_se_resnext.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_se_resnext.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestDistSeResneXtNCCLMP(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reader_alloc = False @@ -50,12 +51,11 @@ class TestDistSeResneXtNCCLMP(TestDistBase): def test_dist_train(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "dist_se_resnext.py", - delta=1e-5, - check_error_log=True, - need_envs={"NCCL_P2P_DISABLE": "1"}, - log_name=flag_name) + self.check_with_place("dist_se_resnext.py", + delta=1e-5, + check_error_log=True, + need_envs={"NCCL_P2P_DISABLE": "1"}, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py index cb4d07b4ccb..3c1dc9d989f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py @@ -18,21 +18,22 @@ from test_dist_base import TestDistBase import os import os + flag_name = os.path.splitext(__file__)[0] class TestDistSeResneXt2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reader_alloc = False @unittest.skip(reason="Skip unstable ci") def test_dist_train(self): - self.check_with_place( - "dist_se_resnext.py", - delta=1e-7, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_se_resnext.py", + delta=1e-7, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py b/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py index 051bb7724eb..9f78f229612 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestDistMnistFleetSave(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py index 0044be23260..56bc41690e7 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py @@ -41,7 +41,8 @@ class SparseLoadOp(unittest.TestCase): param_attr=fluid.ParamAttr( name="embedding", initializer=fluid.initializer.NumpyArrayInitializer( - emb_array)), ) + emb_array)), + ) fc1 = fluid.layers.fc( input=emb, @@ -72,6 +73,7 @@ class SparseLoadOp(unittest.TestCase): @unittest.skip(reason="Skip unstable ut, need rewrite with new implement") class TestSparseLoadOpCase1(SparseLoadOp): + def test_2ps_0_load(self): # init No.0 server env env = {} @@ -110,8 +112,8 @@ class TestSparseLoadOpCase1(SparseLoadOp): fc_w = np.array(fluid.global_scope().find_var("fc").get_tensor()) - emb = np.array(fluid.global_scope().find_var("embedding.block0") - .get_tensor()) + emb = np.array( + fluid.global_scope().find_var("embedding.block0").get_tensor()) assert fc_w.all() == fc_array.all() assert emb.all() == emb_array[::2].all() diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py index b06d718e598..6ae1afa7dc5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py @@ -29,6 +29,7 @@ from test_dist_sparse_load_ps0 import SparseLoadOp @unittest.skip(reason="Skip unstable ut, need rewrite with new implement") class TestSparseLoadOpCase2(SparseLoadOp): + def test_2ps_0_load(self): # init No.1 server env env = {} @@ -66,8 +67,8 @@ class TestSparseLoadOpCase2(SparseLoadOp): optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) fleet.init_server(model_path) - emb = np.array(fluid.global_scope().find_var("embedding.block1") - .get_tensor()) + emb = np.array( + fluid.global_scope().find_var("embedding.block1").get_tensor()) assert emb.all() == emb_array[1::2].all() shutil.rmtree(model_path) diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py index 17bff651c44..416a6290715 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py @@ -51,8 +51,9 @@ class TestSparseLoadProgram(unittest.TestCase): with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): inputs = fluid.data('input', shape=[None, 1], dtype="int64") - emb = fluid.layers.embedding( - inputs, is_sparse=True, size=[10000, 128]) + emb = fluid.layers.embedding(inputs, + is_sparse=True, + size=[10000, 128]) fc1 = fluid.layers.fc(input=emb, size=128, act="relu") fc2 = fluid.layers.fc(input=fc1, size=64, act="relu") loss = fluid.layers.reduce_mean(fc2) @@ -60,6 +61,7 @@ class TestSparseLoadProgram(unittest.TestCase): class TestSparseLoadProgramSGD(TestSparseLoadProgram): + def test_server_init(self): scope, train_program, startup_program, loss = self.net() with fluid.scope_guard(scope): diff --git a/python/paddle/fluid/tests/unittests/test_dist_text_classification.py b/python/paddle/fluid/tests/unittests/test_dist_text_classification.py index d49ea3372e5..78264228a0f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_text_classification.py +++ b/python/paddle/fluid/tests/unittests/test_dist_text_classification.py @@ -18,33 +18,34 @@ import unittest from test_dist_base import TestDistBase import os + flag_name = os.path.splitext(__file__)[0] class TestDistTextClassification2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._enforce_place = "CPU" def test_text_classification(self): - self.check_with_place( - "dist_text_classification.py", - delta=1e-6, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_text_classification.py", + delta=1e-6, + check_error_log=True, + log_name=flag_name) class TestDistTextClassification2x2Async(TestDistBase): + def _setup_config(self): self._sync_mode = False self._enforce_place = "CPU" def test_se_resnext(self): - self.check_with_place( - "dist_text_classification.py", - delta=100, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_text_classification.py", + delta=100, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index e9f39f10904..bdaee766543 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -38,6 +38,7 @@ RPC_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.RPC class TestSendOp(unittest.TestCase): + def test_send(self): remove_ps_flag(os.getpid()) # Run init_serv in a thread @@ -80,16 +81,14 @@ class TestSendOp(unittest.TestCase): with fluid.program_guard(main): serv = ListenAndServ("127.0.0.1:0", ["X"], optimizer_mode=False) with serv.do(): - out_var = main.global_block().create_var( - name="scale_0.tmp_0", - psersistable=True, - dtype="float32", - shape=[32, 32]) - x = layers.data( - shape=[32, 32], - dtype='float32', - name="X", - append_batch_size=False) + out_var = main.global_block().create_var(name="scale_0.tmp_0", + psersistable=True, + dtype="float32", + shape=[32, 32]) + x = layers.data(shape=[32, 32], + dtype='float32', + name="X", + append_batch_size=False) fluid.initializer.Constant(value=1.0)(x, main.global_block()) ops._scale(x=x, scale=10.0, out=out_var) @@ -99,20 +98,20 @@ class TestSendOp(unittest.TestCase): def init_client(self, place, port): main = fluid.Program() with fluid.program_guard(main): - main.global_block().append_op( - type="fetch_barrier", - inputs={}, - outputs={"Out": []}, - attrs={ - "endpoints": ["127.0.0.1:{0}".format(port)], - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) - - x = layers.data( - shape=[32, 32], - dtype='float32', - name='X', - append_batch_size=False) + main.global_block().append_op(type="fetch_barrier", + inputs={}, + outputs={"Out": []}, + attrs={ + "endpoints": + ["127.0.0.1:{0}".format(port)], + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) + + x = layers.data(shape=[32, 32], + dtype='float32', + name='X', + append_batch_size=False) x.persistable = True fluid.initializer.Constant(value=2.3)(x, main.global_block()) @@ -123,13 +122,13 @@ class TestSendOp(unittest.TestCase): shape=[32, 32]) fluid.initializer.Constant(value=2.3)(get_var, main.global_block()) - # NOTE(zjl): `Send` is async send, which means that the sent - # variable would be needed even though `Send` op runs. + # NOTE(zjl): `Send` is async send, which means that the sent + # variable would be needed even though `Send` op runs. # Is it a right design? If I do not set `x.persistable = True`, - # this unittest would hang in rpc client after x is deleted. + # this unittest would hang in rpc client after x is deleted. # - # BTW, `Send` is not a public API to users. So I set - # `x.persistable = True` to be a hot fix of this unittest. + # BTW, `Send` is not a public API to users. So I set + # `x.persistable = True` to be a hot fix of this unittest. Send("127.0.0.1:%d" % port, [x]) o = Recv("127.0.0.1:%d" % port, [get_var]) @@ -139,11 +138,10 @@ class TestSendOp(unittest.TestCase): def run_local(self, place): main = fluid.Program() with fluid.program_guard(main): - x = layers.data( - shape=[32, 32], - dtype='float32', - name='X', - append_batch_size=False) + x = layers.data(shape=[32, 32], + dtype='float32', + name='X', + append_batch_size=False) fluid.initializer.Constant(value=2.3)(x, main.global_block()) o = layers.scale(x=x, scale=10.0) exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transformer.py b/python/paddle/fluid/tests/unittests/test_dist_transformer.py index 3307caa8b2d..073cab807b6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transformer.py @@ -56,23 +56,27 @@ def download_files(): class TestDistTransformer2x2Sync(TestDistBase): + def _setup_config(self): self._sync_mode = True def test_dist_train(self): download_files() - self.check_with_place( - "dist_transformer.py", delta=1e-5, check_error_log=False) + self.check_with_place("dist_transformer.py", + delta=1e-5, + check_error_log=False) class TestDistTransformer2x2Async(TestDistBase): + def _setup_config(self): self._sync_mode = False def test_dist_train(self): download_files() - self.check_with_place( - "dist_transformer.py", delta=1.0, check_error_log=False) + self.check_with_place("dist_transformer.py", + delta=1.0, + check_error_log=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 13a36f4a81e..5905b682d89 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -30,6 +30,7 @@ import paddle.fluid as fluid class TranspilerTest(unittest.TestCase): + def setUp(self): self.trainer_id = 0 self.trainers = 2 @@ -85,12 +86,11 @@ class TranspilerTest(unittest.TestCase): if not self.transpiler: main = self.get_main_program() self.transpiler = fluid.DistributeTranspiler(config=config) - self.transpiler.transpile( - self.trainer_id, - program=main, - pservers=self.pserver_eps, - trainers=self.trainers, - sync_mode=sync_mode) + self.transpiler.transpile(self.trainer_id, + program=main, + pservers=self.pserver_eps, + trainers=self.trainers, + sync_mode=sync_mode) return self.transpiler @@ -112,6 +112,7 @@ class TranspilerTest(unittest.TestCase): class TestBasicModel(TranspilerTest): + def transpiler_test_impl(self): pserver, startup = self.get_pserver(self.pserver1_ep) pserver2, startup2 = self.get_pserver(self.pserver2_ep) @@ -174,6 +175,7 @@ class TestBasicModel(TranspilerTest): class TestBasicModelWithLargeBlockSize(TranspilerTest): + def transpiler_test_impl(self): config = fluid.DistributeTranspilerConfig() config.min_block_size = 1048576 @@ -225,6 +227,7 @@ class TestBasicModelWithLargeBlockSize(TranspilerTest): class TestNoSliceVar(TranspilerTest): + def setUp(self): super(TestNoSliceVar, self).setUp() @@ -244,6 +247,7 @@ class TestNoSliceVar(TranspilerTest): class TestLRDecay(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -255,11 +259,10 @@ class TestLRDecay(TranspilerTest): cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=1.0, - decay_steps=2100, - decay_rate=0.1, - staircase=True)) + learning_rate=fluid.layers.exponential_decay(learning_rate=1.0, + decay_steps=2100, + decay_rate=0.1, + staircase=True)) sgd_optimizer.minimize(avg_cost) def transpiler_test_impl(self): @@ -276,15 +279,22 @@ class TestLRDecay(TranspilerTest): class TestFakeInit(TranspilerTest): + def net_conf(self): dict_size, embedding_size, neg_num = 10000, 8, 5 - input_word = fluid.layers.data( - name="input_word", shape=[1], dtype='int64', lod_level=1) - true_word = fluid.layers.data( - name='true_label', shape=[1], dtype='int64', lod_level=1) - neg_word = fluid.layers.data( - name="neg_label", shape=[1], dtype='int64', lod_level=1) + input_word = fluid.layers.data(name="input_word", + shape=[1], + dtype='int64', + lod_level=1) + true_word = fluid.layers.data(name='true_label', + shape=[1], + dtype='int64', + lod_level=1) + neg_word = fluid.layers.data(name="neg_label", + shape=[1], + dtype='int64', + lod_level=1) inputs = [input_word, true_word, neg_word] init_width = 0.5 / embedding_size @@ -292,9 +302,9 @@ class TestFakeInit(TranspilerTest): input=inputs[0], is_sparse=True, size=[dict_size, embedding_size], - param_attr=fluid.ParamAttr( - name='emb', - initializer=fluid.initializer.Uniform(-init_width, init_width))) + param_attr=fluid.ParamAttr(name='emb', + initializer=fluid.initializer.Uniform( + -init_width, init_width))) true_emb_w = fluid.layers.embedding( input=inputs[1], @@ -315,62 +325,59 @@ class TestFakeInit(TranspilerTest): neg_word_reshape = fluid.layers.reshape(inputs[2], shape=[-1, 1]) neg_word_reshape.stop_gradient = True - neg_emb_w = fluid.layers.embedding( - input=neg_word_reshape, - is_sparse=True, - size=[dict_size, embedding_size], - param_attr=fluid.ParamAttr( - name='emb_w', learning_rate=1.0)) + neg_emb_w = fluid.layers.embedding(input=neg_word_reshape, + is_sparse=True, + size=[dict_size, embedding_size], + param_attr=fluid.ParamAttr( + name='emb_w', learning_rate=1.0)) - neg_emb_w_re = fluid.layers.reshape( - neg_emb_w, shape=[-1, neg_num, embedding_size]) + neg_emb_w_re = fluid.layers.reshape(neg_emb_w, + shape=[-1, neg_num, embedding_size]) - neg_emb_b = fluid.layers.embedding( - input=neg_word_reshape, - is_sparse=True, - size=[dict_size, 1], - param_attr=fluid.ParamAttr( - name='emb_b', learning_rate=1.0)) + neg_emb_b = fluid.layers.embedding(input=neg_word_reshape, + is_sparse=True, + size=[dict_size, 1], + param_attr=fluid.ParamAttr( + name='emb_b', learning_rate=1.0)) neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num]) true_logits = fluid.layers.elementwise_add( - fluid.layers.reduce_sum( - fluid.layers.elementwise_mul(input_emb, true_emb_w), - dim=1, - keep_dim=True), - true_emb_b) + fluid.layers.reduce_sum(fluid.layers.elementwise_mul( + input_emb, true_emb_w), + dim=1, + keep_dim=True), true_emb_b) - input_emb_re = fluid.layers.reshape( - input_emb, shape=[-1, 1, embedding_size]) + input_emb_re = fluid.layers.reshape(input_emb, + shape=[-1, 1, embedding_size]) - neg_matmul = fluid.layers.matmul( - input_emb_re, neg_emb_w_re, transpose_y=True) + neg_matmul = fluid.layers.matmul(input_emb_re, + neg_emb_w_re, + transpose_y=True) neg_matmul_re = fluid.layers.reshape(neg_matmul, shape=[-1, neg_num]) neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec) # nce loss - label_ones = fluid.layers.fill_constant_batch_size_like( - true_logits, shape=[-1, 1], value=1.0, dtype='float32') + label_ones = fluid.layers.fill_constant_batch_size_like(true_logits, + shape=[-1, 1], + value=1.0, + dtype='float32') label_zeros = fluid.layers.fill_constant_batch_size_like( true_logits, shape=[-1, neg_num], value=0.0, dtype='float32') - true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits, - label_ones) - neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(neg_logits, - label_zeros) + true_xent = fluid.layers.sigmoid_cross_entropy_with_logits( + true_logits, label_ones) + neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits( + neg_logits, label_zeros) cost = fluid.layers.elementwise_add( - fluid.layers.reduce_sum( - true_xent, dim=1), - fluid.layers.reduce_sum( - neg_xent, dim=1)) + fluid.layers.reduce_sum(true_xent, dim=1), + fluid.layers.reduce_sum(neg_xent, dim=1)) avg_cost = fluid.layers.reduce_mean(cost) sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=1.0, - decay_steps=2100, - decay_rate=0.1, - staircase=True)) + learning_rate=fluid.layers.exponential_decay(learning_rate=1.0, + decay_steps=2100, + decay_rate=0.1, + staircase=True)) sgd_optimizer.minimize(avg_cost) def transpiler_test_impl(self): @@ -385,6 +392,7 @@ class TestFakeInit(TranspilerTest): class TestDecayedAdagrad(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -404,6 +412,7 @@ class TestDecayedAdagrad(TranspilerTest): class TestFtrl(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -423,6 +432,7 @@ class TestFtrl(TranspilerTest): class TestLRDecayConditional(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -469,14 +479,15 @@ class TestLRDecayConditional(TranspilerTest): class TestL2Decay(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc( input=x, size=1000, act=None, - param_attr=fluid.ParamAttr( - name='fc_w', regularizer=fluid.regularizer.L2Decay()), + param_attr=fluid.ParamAttr(name='fc_w', + regularizer=fluid.regularizer.L2Decay()), bias_attr=fluid.ParamAttr(name='fc_b')) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) @@ -502,6 +513,7 @@ class TestL2Decay(TranspilerTest): class TestL2DecayWithPiecewise(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -516,8 +528,8 @@ class TestL2DecayWithPiecewise(TranspilerTest): bd = [1, 10, 20, 30] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] sgd_optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), + learning_rate=fluid.layers.piecewise_decay(boundaries=bd, + values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) sgd_optimizer.minimize(avg_cost) @@ -545,6 +557,7 @@ class TestL2DecayWithPiecewise(TranspilerTest): class TestEmptyPserverOptimizeBlocks(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') # only one parameter @@ -570,33 +583,39 @@ class TestEmptyPserverOptimizeBlocks(TranspilerTest): class TestDistLookupTableBase(TranspilerTest): + def network_with_table(self, is_sparse, is_distributed): self.table_size = 1000 self.emb_size = 64 self.lookup_table_name = 'shared_w' def emb_pool(ids, table_name, is_distributed): - emb = fluid.layers.embedding( - input=ids, - size=[self.table_size, self.emb_size], - dtype='float32', - param_attr=table_name, - is_sparse=is_sparse, - is_distributed=is_distributed) + emb = fluid.layers.embedding(input=ids, + size=[self.table_size, self.emb_size], + dtype='float32', + param_attr=table_name, + is_sparse=is_sparse, + is_distributed=is_distributed) pool = fluid.layers.sequence_pool(input=emb, pool_type='average') return pool - title_ids = fluid.layers.data( - name='title_ids', shape=[1], dtype='int64', lod_level=1) - brand_ids = fluid.layers.data( - name='brand_ids', shape=[1], dtype='int64', lod_level=1) - profile_ids = fluid.layers.data( - name='brand_ids', shape=[1], dtype='int64', lod_level=1) + title_ids = fluid.layers.data(name='title_ids', + shape=[1], + dtype='int64', + lod_level=1) + brand_ids = fluid.layers.data(name='brand_ids', + shape=[1], + dtype='int64', + lod_level=1) + profile_ids = fluid.layers.data(name='brand_ids', + shape=[1], + dtype='int64', + lod_level=1) title_emb = emb_pool(title_ids, self.lookup_table_name, is_distributed) brand_emb = emb_pool(brand_ids, self.lookup_table_name, is_distributed) profile_emb = emb_pool(profile_ids, "profile_emb", False) - fc0 = fluid.layers.concat( - input=[title_emb, brand_emb, profile_emb], axis=1) + fc0 = fluid.layers.concat(input=[title_emb, brand_emb, profile_emb], + axis=1) predict = fluid.layers.fc(input=fc0, size=2, act=None, @@ -611,6 +630,7 @@ class TestDistLookupTableBase(TranspilerTest): class TestLocalLookupTable(TestDistLookupTableBase): + def net_conf(self): self.network_with_table(is_sparse=True, is_distributed=False) @@ -649,6 +669,7 @@ class TestLocalLookupTable(TestDistLookupTableBase): class TestDistLookupTable(TestDistLookupTableBase): + def net_conf(self): self.network_with_table(is_sparse=True, is_distributed=True) @@ -699,6 +720,7 @@ class TestDistLookupTable(TestDistLookupTableBase): class TestAsyncLocalLookupTable(TestDistLookupTableBase): + def net_conf(self): self.network_with_table(is_sparse=True, is_distributed=False) @@ -736,6 +758,7 @@ class TestAsyncLocalLookupTable(TestDistLookupTableBase): class TestAsyncDistLookupTable(TestDistLookupTableBase): + def net_conf(self): self.network_with_table(is_sparse=True, is_distributed=True) @@ -786,6 +809,7 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): class TestDistLookupTableSliceSize(TestDistLookupTableBase): + def net_conf(self): self.network_with_table(is_sparse=True, is_distributed=True) @@ -802,6 +826,7 @@ class TestDistLookupTableSliceSize(TestDistLookupTableBase): class TestDistArgsInProgram(TestDistLookupTableBase): + def net_conf(self): self.network_with_table(is_sparse=True, is_distributed=True) @@ -817,6 +842,7 @@ class TestDistArgsInProgram(TestDistLookupTableBase): class TestRMSPropOptimizer(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -846,6 +872,7 @@ class TestRMSPropOptimizer(TranspilerTest): class TestLoadSliceVar(TranspilerTest): + def net_conf(self): x = fluid.layers.data(name='x', shape=[1000], dtype='float32') y_predict = fluid.layers.fc(input=x, @@ -900,6 +927,7 @@ class TestLoadSliceVar(TranspilerTest): class TestNCCL2Transpile(TranspilerTest): + def test_nccl2_transpile(self): if fluid.core.is_compiled_with_cuda(): # test nccl2 only with cuda main = fluid.Program() @@ -911,11 +939,10 @@ class TestNCCL2Transpile(TranspilerTest): config.mode = "nccl2" config.wait_port = False t = fluid.DistributeTranspiler(config=config) - t.transpile( - 0, - trainers="127.0.0.1:6174,127.0.0.1:6175", - current_endpoint="127.0.0.1:6174", - startup_program=startup) + t.transpile(0, + trainers="127.0.0.1:6174,127.0.0.1:6175", + current_endpoint="127.0.0.1:6174", + startup_program=startup) print([op.type for op in startup.global_block().ops]) self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id") self.assertIsNotNone(startup.global_block().vars.get("NCCLID")) @@ -926,6 +953,7 @@ class TestNCCL2Transpile(TranspilerTest): # test for remote prefetch class TestRemoteLookupTable(TestDistLookupTableBase): + def net_conf(self): import os os.environ['PADDLE_ENABLE_REMOTE_PREFETCH'] = "1" @@ -967,6 +995,7 @@ class TestRemoteLookupTable(TestDistLookupTableBase): # test for remote prefetch class TestRemoteNce(TestDistLookupTableBase): + def network_with_table(self, is_sparse, is_distributed): num_total_classes = 20 @@ -1029,16 +1058,19 @@ class TestRemoteNce(TestDistLookupTableBase): # test for remote prefetch class TestRemoteHsigmoid(TestDistLookupTableBase): + def network_with_table(self, is_sparse, is_distributed): num_total_classes = 3 input = fluid.layers.data(name="input", shape=[1], dtype="float32") label = fluid.layers.data(name="label", shape=[1], dtype="int64") - path_table = fluid.layers.data( - name='path_table', shape=[3], dtype='int64') - path_code = fluid.layers.data( - name='path_code', shape=[3], dtype='int64') + path_table = fluid.layers.data(name='path_table', + shape=[3], + dtype='int64') + path_code = fluid.layers.data(name='path_code', + shape=[3], + dtype='int64') w_param = fluid.default_main_program().global_block().create_parameter( shape=[num_total_classes, 10], dtype='float32', @@ -1057,14 +1089,13 @@ class TestRemoteHsigmoid(TestDistLookupTableBase): param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( scale=1 / math.sqrt(num_total_classes)))) - cost = fluid.layers.hsigmoid( - input=emb, - label=label, - num_classes=num_total_classes, - path_table=path_table, - path_code=path_code, - is_custom=True, - is_sparse=is_sparse) + cost = fluid.layers.hsigmoid(input=emb, + label=label, + num_classes=num_total_classes, + path_table=path_table, + path_code=path_code, + is_custom=True, + is_sparse=is_sparse) avg_cost = fluid.layers.mean(cost) # optimizer optimizer = fluid.optimizer.SGD(learning_rate=0.003) diff --git a/python/paddle/fluid/tests/unittests/test_dist_tree_index.py b/python/paddle/fluid/tests/unittests/test_dist_tree_index.py index 6ea15319204..18500d9b152 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_tree_index.py +++ b/python/paddle/fluid/tests/unittests/test_dist_tree_index.py @@ -18,26 +18,36 @@ from paddle.distributed.fleet.dataset import TreeIndex import paddle.fluid as fluid import paddle.fluid.core as core import paddle + paddle.enable_static() def create_feeds(): - user_input = fluid.layers.data( - name="item_id", shape=[1], dtype="int64", lod_level=1) - - item = fluid.layers.data( - name="unit_id", shape=[1], dtype="int64", lod_level=1) - - label = fluid.layers.data( - name="label", shape=[1], dtype="int64", lod_level=1) - labels = fluid.layers.data( - name="labels", shape=[1], dtype="int64", lod_level=1) + user_input = fluid.layers.data(name="item_id", + shape=[1], + dtype="int64", + lod_level=1) + + item = fluid.layers.data(name="unit_id", + shape=[1], + dtype="int64", + lod_level=1) + + label = fluid.layers.data(name="label", + shape=[1], + dtype="int64", + lod_level=1) + labels = fluid.layers.data(name="labels", + shape=[1], + dtype="int64", + lod_level=1) feed_list = [user_input, item, label, labels] return feed_list class TestTreeIndex(unittest.TestCase): + def test_tree_index(self): path = download( "https://paddlerec.bj.bcebos.com/tree-based/data/mini_tree.pb", @@ -102,6 +112,7 @@ class TestTreeIndex(unittest.TestCase): class TestIndexSampler(unittest.TestCase): + def test_layerwise_sampler(self): path = download( "https://paddlerec.bj.bcebos.com/tree-based/data/mini_tree.pb", @@ -123,11 +134,10 @@ class TestIndexSampler(unittest.TestCase): slots_vars.append(var) dataset = paddle.distributed.InMemoryDataset() - dataset.init( - batch_size=1, - pipe_command="cat", - download_cmd="cat", - use_var=slots_vars) + dataset.init(batch_size=1, + pipe_command="cat", + download_cmd="cat", + use_var=slots_vars) dataset.set_filelist([file_name]) #dataset.update_settings(pipe_command="cat") #dataset._init_distributed_settings( @@ -137,14 +147,13 @@ class TestIndexSampler(unittest.TestCase): # candidate_size=10000) dataset.load_into_memory() - dataset.tdm_sample( - 'demo', - tree_path=path, - tdm_layer_counts=tdm_layer_counts, - start_sample_layer=1, - with_hierachy=False, - seed=0, - id_slot=2) + dataset.tdm_sample('demo', + tree_path=path, + tdm_layer_counts=tdm_layer_counts, + start_sample_layer=1, + with_hierachy=False, + seed=0, + id_slot=2) self.assertTrue(dataset.get_shuffle_data_size() == 8) diff --git a/python/paddle/fluid/tests/unittests/test_dist_word2vec.py b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py index 9385d42c559..ef4d3c8f169 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_word2vec.py +++ b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py @@ -17,47 +17,48 @@ import unittest from test_dist_base import TestDistBase import os + flag_name = os.path.splitext(__file__)[0] class TestDistW2V2x2(TestDistBase): + def _setup_config(self): self._sync_mode = True self._enforce_place = "CPU" def test_dist_train(self): - self.check_with_place( - "dist_word2vec.py", - delta=1e-4, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_word2vec.py", + delta=1e-4, + check_error_log=True, + log_name=flag_name) class TestDistW2V2x2WithMemOpt(TestDistBase): + def _setup_config(self): self._sync_mode = True self._mem_opt = True self._enforce_place = "CPU" def test_dist_train(self): - self.check_with_place( - "dist_word2vec.py", - delta=1e-4, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_word2vec.py", + delta=1e-4, + check_error_log=True, + log_name=flag_name) class TestDistW2V2x2Async(TestDistBase): + def _setup_config(self): self._sync_mode = False self._enforce_place = "CPU" def test_dist_train(self): - self.check_with_place( - "dist_word2vec.py", - delta=100, - check_error_log=True, - log_name=flag_name) + self.check_with_place("dist_word2vec.py", + delta=100, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py index 2cd7889d6e3..06cdaed1988 100644 --- a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py +++ b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestDistributeFPNProposalsOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() @@ -84,8 +85,8 @@ class TestDistributeFPNProposalsOp(OpTest): sub_lod = self.get_sub_lod(rois[idx_lvl, 0]) rois_fpn.append((rois[idx_lvl, 1:], [sub_lod])) rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) - rois_idx_restore = np.argsort(rois_idx_order).astype( - np.int32, copy=False) + rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, + copy=False) return rois_fpn, rois_idx_restore def calc_rois_distribute(self): @@ -122,6 +123,7 @@ class TestDistributeFPNProposalsOp(OpTest): class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp): + def set_data(self): self.init_test_case() self.make_rois() @@ -139,10 +141,9 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp): } output = [('out%d' % i, self.rois_fpn[i]) for i in range(len(self.rois_fpn))] - rois_num_per_level = [ - ('rois_num%d' % i, np.array(self.rois_fpn[i][1][0]).astype('int32')) - for i in range(len(self.rois_fpn)) - ] + rois_num_per_level = [('rois_num%d' % i, + np.array(self.rois_fpn[i][1][0]).astype('int32')) + for i in range(len(self.rois_fpn))] self.outputs = { 'MultiFpnRois': output, @@ -153,6 +154,7 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp): class TestDistributeFPNProposalsOpNoOffset( TestDistributeFPNProposalsOpWithRoisNum): + def init_test_case(self): self.roi_max_level = 5 self.roi_min_level = 2 diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py index 315580dd31a..324da95f37d 100644 --- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py +++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -72,6 +72,7 @@ def run_test(clip_after_allreduce=True, class TestDistributedFusedLambWithClip(unittest.TestCase): + def test_1(self): run_test(clip_after_allreduce=True, max_global_norm=0.01) diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py index 1822b77d0d0..c2089b1d97d 100644 --- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py +++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,11 +17,11 @@ import unittest class TestDistributedFusedLambGradientMerge(unittest.TestCase): + def test_gm(self): - run_test( - clip_after_allreduce=True, - max_global_norm=-1.0, - gradient_merge_steps=2) + run_test(clip_after_allreduce=True, + max_global_norm=-1.0, + gradient_merge_steps=2) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py index dbd2d72fd2f..8d4dfa84d2f 100644 --- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py +++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import unittest class TestDistributedFusedLambWithoutClip(unittest.TestCase): + def test_1(self): run_test(clip_after_allreduce=True, max_global_norm=-1.0) diff --git a/python/paddle/fluid/tests/unittests/test_distributed_strategy.py b/python/paddle/fluid/tests/unittests/test_distributed_strategy.py index df32912b0c2..491555907ec 100644 --- a/python/paddle/fluid/tests/unittests/test_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_distributed_strategy.py @@ -22,6 +22,7 @@ import os class TestStrategyFactor(unittest.TestCase): + def test_sync_strategy(self): os.environ['CPU_NUM'] = "2" strategy = StrategyFactory.create_sync_strategy() @@ -108,8 +109,9 @@ class TestStrategyFactor(unittest.TestCase): self.assertEqual(strategy._build_strategy.async_mode, True) trainer_runtime_config = strategy.get_trainer_runtime_config() - self.assertEqual(trainer_runtime_config.runtime_configs[ - 'communicator_send_queue_size'], '100') + self.assertEqual( + trainer_runtime_config. + runtime_configs['communicator_send_queue_size'], '100') # test set_trainer_runtime_config using dict trainer_runtime_config_dict = dict() @@ -193,6 +195,7 @@ class TestStrategyFactor(unittest.TestCase): class TestCreateDefaultStrategy(unittest.TestCase): + def test_default_strategy(self): role = role_maker.UserDefinedRoleMaker( current_id=0, @@ -209,6 +212,7 @@ class TestCreateDefaultStrategy(unittest.TestCase): class TestHalfAsyncStrategy(unittest.TestCase): + def test_half_async_strategy(self): role = role_maker.UserDefinedRoleMaker( current_id=0, @@ -228,6 +232,7 @@ class TestHalfAsyncStrategy(unittest.TestCase): class TestDebugInfo(unittest.TestCase): + def test_debug_info(self): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_distributions.py b/python/paddle/fluid/tests/unittests/test_distributions.py index 83874417183..daf2adacb41 100644 --- a/python/paddle/fluid/tests/unittests/test_distributions.py +++ b/python/paddle/fluid/tests/unittests/test_distributions.py @@ -43,6 +43,7 @@ class DistributionNumpy(): class UniformNumpy(DistributionNumpy): + def __init__(self, low, high): self.low = np.array(low).astype('float32') self.high = np.array(high).astype('float32') @@ -62,6 +63,7 @@ class UniformNumpy(DistributionNumpy): class NormalNumpy(DistributionNumpy): + def __init__(self, loc, scale): self.loc = np.array(loc).astype('float32') self.scale = np.array(scale).astype('float32') @@ -73,12 +75,13 @@ class NormalNumpy(DistributionNumpy): def log_prob(self, value): var = self.scale * self.scale log_scale = np.log(self.scale) - return -((value - self.loc) * (value - self.loc)) / ( - 2. * var) - log_scale - math.log(math.sqrt(2. * math.pi)) + return -((value - self.loc) * + (value - self.loc)) / (2. * var) - log_scale - math.log( + math.sqrt(2. * math.pi)) def entropy(self): - return 0.5 + 0.5 * np.log(np.array(2. * math.pi).astype( - 'float32')) + np.log(self.scale) + return 0.5 + 0.5 * np.log(np.array( + 2. * math.pi).astype('float32')) + np.log(self.scale) def kl_divergence(self, other): var_ratio = (self.scale / other.scale) @@ -89,6 +92,7 @@ class NormalNumpy(DistributionNumpy): class CategoricalNumpy(DistributionNumpy): + def __init__(self, logits): self.logits = np.array(logits).astype('float32') @@ -113,6 +117,7 @@ class CategoricalNumpy(DistributionNumpy): class MultivariateNormalDiagNumpy(DistributionNumpy): + def __init__(self, loc, scale): self.loc = np.array(loc).astype('float32') self.scale = np.array(scale).astype('float32') @@ -135,8 +140,8 @@ class MultivariateNormalDiagNumpy(DistributionNumpy): def entropy(self): return 0.5 * (self.scale.shape[0] * - (1.0 + np.log(np.array(2 * math.pi).astype('float32')) - ) + np.log(self._det(self.scale))) + (1.0 + np.log(np.array(2 * math.pi).astype('float32'))) + + np.log(self._det(self.scale))) def kl_divergence(self, other): tr_cov_matmul = np.sum(self._inv(other.scale) * self.scale) @@ -151,6 +156,7 @@ class MultivariateNormalDiagNumpy(DistributionNumpy): class DistributionTest(unittest.TestCase): + def setUp(self, use_gpu=False): self.use_gpu = use_gpu if not use_gpu: @@ -169,10 +175,12 @@ class DistributionTest(unittest.TestCase): loc = layers.data(name='loc', shape=[dims], dtype='float32') scale = layers.data(name='scale', shape=[dims], dtype='float32') - other_loc = layers.data( - name='other_loc', shape=[dims], dtype='float32') - other_scale = layers.data( - name='other_scale', shape=[dims], dtype='float32') + other_loc = layers.data(name='other_loc', + shape=[dims], + dtype='float32') + other_scale = layers.data(name='other_scale', + shape=[dims], + dtype='float32') values = layers.data(name='values', shape=[dims], dtype='float32') @@ -264,8 +272,8 @@ class DistributionTest(unittest.TestCase): np_normal_float = NormalNumpy(loc_float, scale_float) np_other_normal_float = NormalNumpy(other_loc_float, other_scale_float) np_normal_float_np_broadcast = NormalNumpy(loc_float, scale_np) - np_other_normal_float_np_broadcast = NormalNumpy(other_loc_float, - other_scale_np) + np_other_normal_float_np_broadcast = NormalNumpy( + other_loc_float, other_scale_np) np_normal = NormalNumpy(loc_np, scale_np) np_other_normal = NormalNumpy(other_loc_np, other_scale_np) @@ -295,60 +303,66 @@ class DistributionTest(unittest.TestCase): feed=feed_vars, fetch_list=fetch_list) - np.testing.assert_allclose( - output_sample_float.shape, - gt_sample_float.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_sample_float_np_broadcast.shape, - gt_sample_float_np_broadcast.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_sample_np.shape, - gt_sample_np.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_sample_variable.shape, - gt_sample_np.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_float, - gt_entropy_float, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_float_np_broadcast, - gt_entropy_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_float_np_broadcast, - gt_lp_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_lp_np, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_float, gt_kl_float, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_float_np_broadcast, - gt_kl_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_kl_np, gt_kl, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_variable, gt_kl, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(output_sample_float.shape, + gt_sample_float.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_sample_float_np_broadcast.shape, + gt_sample_float_np_broadcast.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_sample_np.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_sample_variable.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_float, + gt_entropy_float, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_float_np_broadcast, + gt_entropy_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_np, + gt_entropy, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_variable, + gt_entropy, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_lp_float_np_broadcast, + gt_lp_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_lp_np, + gt_lp, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_lp_variable, + gt_lp, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_kl_float, + gt_kl_float, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_kl_float_np_broadcast, + gt_kl_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_kl_np, + gt_kl, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_kl_variable, + gt_kl, + rtol=tolerance, + atol=tolerance) def build_uniform_program(self, test_program, batch_size, dims, low_float, high_float, high_np, low_np, values_np): @@ -428,49 +442,50 @@ class DistributionTest(unittest.TestCase): feed=feed_vars, fetch_list=fetch_list) - np.testing.assert_allclose( - output_sample_float.shape, - gt_sample_float.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_sample_float_np_broadcast.shape, - gt_sample_float_np_broadcast.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_sample_np.shape, - gt_sample_np.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_sample_variable.shape, - gt_sample_np.shape, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_float, - gt_entropy_float, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_float_np_broadcast, - gt_entropy_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_float_np_broadcast, - gt_lp_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_lp_np, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(output_sample_float.shape, + gt_sample_float.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_sample_float_np_broadcast.shape, + gt_sample_float_np_broadcast.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_sample_np.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_sample_variable.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_float, + gt_entropy_float, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_float_np_broadcast, + gt_entropy_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_np, + gt_entropy, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_entropy_variable, + gt_entropy, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_lp_float_np_broadcast, + gt_lp_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_lp_np, + gt_lp, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_lp_variable, + gt_lp, + rtol=tolerance, + atol=tolerance) def test_categorical_distribution(self, batch_size=2, @@ -483,8 +498,9 @@ class DistributionTest(unittest.TestCase): with fluid.program_guard(test_program): logits = layers.data(name='logits', shape=[dims], dtype='float32') - other_logits = layers.data( - name='other_logits', shape=[dims], dtype='float32') + other_logits = layers.data(name='other_logits', + shape=[dims], + dtype='float32') categorical_np = Categorical(logits_np) other_categorical_np = Categorical(other_logits_np) @@ -504,10 +520,14 @@ class DistributionTest(unittest.TestCase): output_kl_np] = self.executor.run(program=test_program, feed={'logits': logits_np}, fetch_list=[entropy_np, kl_np]) - np.testing.assert_allclose( - output_entropy_np, gt_entropy_np, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_np, gt_kl_np, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(output_entropy_np, + gt_entropy_np, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_kl_np, + gt_kl_np, + rtol=tolerance, + atol=tolerance) def test_multivariateNormalDiag_distribution(self, batch_size=2, @@ -517,34 +537,34 @@ class DistributionTest(unittest.TestCase): loc_np = np.random.random(batch_size, ).astype('float32') scale_np = np.diag(np.random.random(batch_size, )).astype('float32') other_loc_np = np.random.random(batch_size, ).astype('float32') - other_scale_np = np.diag(np.random.random(batch_size, )).astype( - 'float32') + other_scale_np = np.diag(np.random.random( + batch_size, )).astype('float32') with fluid.program_guard(test_program): - loc = layers.data( - name='loc', - shape=[batch_size, ], - dtype='float32', - append_batch_size=False) - scale = layers.data( - name='scale', - shape=[batch_size, batch_size], - dtype='float32', - append_batch_size=False) - other_loc = layers.data( - name='other_loc', - shape=[batch_size, ], - dtype='float32', - append_batch_size=False) - other_scale = layers.data( - name='other_scale', - shape=[batch_size, batch_size], - dtype='float32', - append_batch_size=False) + loc = layers.data(name='loc', + shape=[ + batch_size, + ], + dtype='float32', + append_batch_size=False) + scale = layers.data(name='scale', + shape=[batch_size, batch_size], + dtype='float32', + append_batch_size=False) + other_loc = layers.data(name='other_loc', + shape=[ + batch_size, + ], + dtype='float32', + append_batch_size=False) + other_scale = layers.data(name='other_scale', + shape=[batch_size, batch_size], + dtype='float32', + append_batch_size=False) multivariate_np = MultivariateNormalDiag(loc, scale) - other_multivariate_np = MultivariateNormalDiag(other_loc, - other_scale) + other_multivariate_np = MultivariateNormalDiag( + other_loc, other_scale) entropy_np = multivariate_np.entropy() other_entropy_np = other_multivariate_np.entropy() @@ -553,8 +573,8 @@ class DistributionTest(unittest.TestCase): self.executor.run(fluid.default_main_program()) np_multivariate = MultivariateNormalDiagNumpy(loc_np, scale_np) - np_other_multivariate = MultivariateNormalDiagNumpy(other_loc_np, - other_scale_np) + np_other_multivariate = MultivariateNormalDiagNumpy( + other_loc_np, other_scale_np) gt_entropy_np = np_multivariate.entropy() gt_kl_np = np_multivariate.kl_divergence(np_other_multivariate) @@ -568,13 +588,18 @@ class DistributionTest(unittest.TestCase): 'other_scale': other_scale_np }, fetch_list=[entropy_np, kl_np]) - np.testing.assert_allclose( - output_entropy_np, gt_entropy_np, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_np, gt_kl_np, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(output_entropy_np, + gt_entropy_np, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose(output_kl_np, + gt_kl_np, + rtol=tolerance, + atol=tolerance) class DistributionTestError(unittest.TestCase): + def test_normal_error(self): loc = int(1) scale = int(1) diff --git a/python/paddle/fluid/tests/unittests/test_dot_op.py b/python/paddle/fluid/tests/unittests/test_dot_op.py index a92104a5a6f..536f8fd8d8a 100644 --- a/python/paddle/fluid/tests/unittests/test_dot_op.py +++ b/python/paddle/fluid/tests/unittests/test_dot_op.py @@ -24,6 +24,7 @@ from paddle.fluid import compiler, Program, program_guard class DotOp(OpTest): + def setUp(self): self.op_type = "dot" self.init_dtype() @@ -50,21 +51,19 @@ class DotOp(OpTest): def test_check_grad_ingore_x(self): if core.is_compiled_with_rocm(): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.inputs['X']]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.inputs['X']]) else: self.check_grad(['Y'], 'Out', no_grad_set=set("X")) def test_check_grad_ingore_y(self): if core.is_compiled_with_rocm(): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.inputs['Y']]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.inputs['Y']]) else: self.check_grad(['X'], 'Out', no_grad_set=set('Y')) @@ -78,11 +77,12 @@ class DotOp(OpTest): class DotOpBatch(DotOp): + def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [132]).astype(self.dtype).reshape( - [11, 12]) - self.y = np.random.uniform(1, 3, [132]).astype(self.dtype).reshape( - [11, 12]) + self.x = np.random.uniform(0.1, 1, + [132]).astype(self.dtype).reshape([11, 12]) + self.y = np.random.uniform(1, 3, + [132]).astype(self.dtype).reshape([11, 12]) self.out = np.sum(self.x * self.y, axis=1).reshape([11, 1]) def test_check_grad_normal(self): @@ -96,6 +96,7 @@ class DotOpBatch(DotOp): class TestDotOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -115,6 +116,7 @@ class TestDotOpError(unittest.TestCase): class TestDygraph(unittest.TestCase): + def test_dygraph(self): with fluid.dygraph.guard(): x1 = fluid.dygraph.to_variable(np.array([1, 3]).astype(np.float32)) @@ -132,6 +134,7 @@ class TestDygraph(unittest.TestCase): class TestComplexDotOp(OpTest): + def setUp(self): self.op_type = "dot" self.init_base_dtype() @@ -164,30 +167,28 @@ class TestComplexDotOp(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out]) class TestComplexDotOp2D(OpTest): + def setUp(self): self.op_type = "dot" self.init_base_dtype() @@ -229,27 +230,24 @@ class TestComplexDotOp2D(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py index f22a956b687..030af8f809e 100644 --- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py +++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py @@ -54,8 +54,9 @@ class TestListenAndServOp(unittest.TestCase): cache_path) os.system(cmd) x = fluid.layers.data(name='x', shape=[1], dtype='int64') - x_emb = fluid.layers.embedding( - input=x, size=[1, 2], is_distributed=True) + x_emb = fluid.layers.embedding(input=x, + size=[1, 2], + is_distributed=True) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) @@ -113,8 +114,9 @@ class TestListenAndServOp(unittest.TestCase): cache_path) os.system(cmd) x = fluid.layers.data(name='x', shape=[1], dtype='int64') - x_emb = fluid.layers.embedding( - input=x, size=[1, 2], is_distributed=True) + x_emb = fluid.layers.embedding(input=x, + size=[1, 2], + is_distributed=True) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) @@ -170,8 +172,9 @@ class TestListenAndServOp(unittest.TestCase): cache_path) os.system(cmd) x = fluid.layers.data(name='x', shape=[1], dtype='int64') - x_emb = fluid.layers.embedding( - input=x, size=[1, 2], is_distributed=True) + x_emb = fluid.layers.embedding(input=x, + size=[1, 2], + is_distributed=True) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) diff --git a/python/paddle/fluid/tests/unittests/test_dpsgd_op.py b/python/paddle/fluid/tests/unittests/test_dpsgd_op.py index 35a922b7820..bc0de9c8952 100644 --- a/python/paddle/fluid/tests/unittests/test_dpsgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_dpsgd_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestDpsgdOp(OpTest): + def setUp(self): '''Test Dpsgd Operator with supplied attributes ''' diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index e8d4fc260b8..33992b1881e 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -29,6 +29,7 @@ from paddle import _C_ops class TestDropoutOp(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} @@ -46,6 +47,7 @@ class TestDropoutOp(OpTest): class TestDropoutOpInput1d(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((2000, )).astype("float32")} @@ -63,6 +65,7 @@ class TestDropoutOpInput1d(OpTest): class TestDropoutOp2(TestDropoutOp): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} @@ -74,6 +77,7 @@ class TestDropoutOp2(TestDropoutOp): class TestDropoutOp3(TestDropoutOp): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} @@ -86,6 +90,7 @@ class TestDropoutOp3(TestDropoutOp): @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestDropoutOp4(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} @@ -100,6 +105,7 @@ class TestDropoutOp4(OpTest): @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestDropoutOp5(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} @@ -113,6 +119,7 @@ class TestDropoutOp5(OpTest): class TestDropoutOp6(TestDropoutOp): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} @@ -129,6 +136,7 @@ class TestDropoutOp6(TestDropoutOp): class TestDropoutOp7(TestDropoutOp): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} @@ -146,6 +154,7 @@ class TestDropoutOp7(TestDropoutOp): @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestDropoutOp8(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} @@ -163,6 +172,7 @@ class TestDropoutOp8(OpTest): @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestDropoutOp9(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} @@ -178,14 +188,16 @@ class TestDropoutOp9(OpTest): class TestDropoutOpWithSeed(OpTest): + def setUp(self): self.op_type = "dropout" self.inputs = { "X": np.random.random((32, 64)).astype("float32"), - "Seed": np.asarray( - [125], dtype="int32") + "Seed": np.asarray([125], dtype="int32") + } + self.attrs = { + 'dropout_prob': 0.0, } - self.attrs = {'dropout_prob': 0.0, } self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('uint8') @@ -198,11 +210,13 @@ class TestDropoutOpWithSeed(OpTest): self.check_grad(['X'], 'Out', max_relative_error=0.05) -@unittest.skipIf( - not core.is_compiled_with_cuda() or not core.op_support_gpu("dropout"), - "core is not compiled with CUDA or core is not support dropout") +@unittest.skipIf(not core.is_compiled_with_cuda() + or not core.op_support_gpu("dropout"), + "core is not compiled with CUDA or core is not support dropout" + ) @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestFP16DropoutOp(OpTest): + def setUp(self): self.op_type = "dropout" self.init_test_case() @@ -226,11 +240,13 @@ class TestFP16DropoutOp(OpTest): self.check_output_with_place(core.CUDAPlace(0), atol=1e-3) -@unittest.skipIf( - not core.is_compiled_with_cuda() or not core.op_support_gpu("dropout"), - "core is not compiled with CUDA or core is not support dropout") +@unittest.skipIf(not core.is_compiled_with_cuda() + or not core.op_support_gpu("dropout"), + "core is not compiled with CUDA or core is not support dropout" + ) @skip_check_grad_ci(reason="For inference, check_grad is not required.") class TestFP16DropoutOp2(TestFP16DropoutOp): + def init_test_case(self): self.input_size = [32, 64, 3] self.prob = 0.75 @@ -238,6 +254,7 @@ class TestFP16DropoutOp2(TestFP16DropoutOp): class TestBF16DropoutOp(OpTest): + def setUp(self): self.op_type = "dropout" self.dtype = np.uint16 @@ -259,6 +276,7 @@ class TestBF16DropoutOp(OpTest): class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase): + def test_seed_cpu_place(self): paddle.enable_static() main_program = Program() @@ -280,12 +298,11 @@ class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase): dtype='float32', persistable=False, stop_gradient=True) - x_var = main_program.global_block().create_var( - name=x_var_name, - shape=[40, 40], - dtype='float32', - persistable=False, - stop_gradient=True) + x_var = main_program.global_block().create_var(name=x_var_name, + shape=[40, 40], + dtype='float32', + persistable=False, + stop_gradient=True) mask_var = main_program.global_block().create_var( name=mask_var_name, shape=[1], @@ -293,28 +310,32 @@ class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase): persistable=False, stop_gradient=True) - main_program.global_block().append_op( - type="fill_constant", - outputs={"Out": x_var_name}, - attrs={ - "shape": [40, 40], - "dtype": x_var.dtype, - "value": 1.0, - "place_type": 0 - }) + main_program.global_block().append_op(type="fill_constant", + outputs={"Out": x_var_name}, + attrs={ + "shape": [40, 40], + "dtype": x_var.dtype, + "value": 1.0, + "place_type": 0 + }) main_program.global_block().append_op( type='seed', inputs={}, outputs={'Out': seed_input_var}, - attrs={'seed': 1, - 'force_cpu': True}) - main_program.global_block().append_op( - type='dropout', - inputs={'X': x_var, - 'Seed': seed_input_var}, - attrs={'dropout_prob': 0.}, - outputs={'Out': x_out_var, - 'Mask': mask_var}) + attrs={ + 'seed': 1, + 'force_cpu': True + }) + main_program.global_block().append_op(type='dropout', + inputs={ + 'X': x_var, + 'Seed': seed_input_var + }, + attrs={'dropout_prob': 0.}, + outputs={ + 'Out': x_out_var, + 'Mask': mask_var + }) place = fluid.CPUPlace() if core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) @@ -328,13 +349,14 @@ class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase): class TestDropoutOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of dropout must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) fluid.layers.dropout(x1, dropout_prob=0.5) self.assertRaises(TypeError, test_Variable) @@ -342,14 +364,16 @@ class TestDropoutOpError(unittest.TestCase): def test_dtype(): # the input dtype of dropout must be float16 or float32 or float64 # float16 only can be set on GPU place - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="int32") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="int32") fluid.layers.dropout(x2, dropout_prob=0.5) self.assertRaises(TypeError, test_dtype) class TestDropoutFAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -360,53 +384,59 @@ class TestDropoutFAPI(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data(name="input", shape=[-1, -1], dtype="float32") res1 = paddle.nn.functional.dropout(x=input, p=0., training=False) - res2 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=True, mode='upscale_in_train') - res3 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=True, mode='downscale_in_infer') - res4 = paddle.nn.functional.dropout( - x=input, p=0., axis=0, training=False, mode='upscale_in_train') - res5 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=0, - training=False, - mode='downscale_in_infer') - res6 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=True, - mode='upscale_in_train') - res7 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=True, - mode='downscale_in_infer') - res8 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=False, - mode='upscale_in_train') - res9 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=False, - mode='downscale_in_infer') + res2 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=True, + mode='upscale_in_train') + res3 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=True, + mode='downscale_in_infer') + res4 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=False, + mode='upscale_in_train') + res5 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=False, + mode='downscale_in_infer') + res6 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=True, + mode='upscale_in_train') + res7 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=True, + mode='downscale_in_infer') + res8 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=False, + mode='upscale_in_train') + res9 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=False, + mode='downscale_in_infer') res10 = paddle.nn.functional.dropout(x=input, p=1., training=True) res11 = paddle.fluid.layers.dropout(x=input, dropout_prob=0.) - res12 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=(0, 1), - training=False, - mode='upscale_in_train') - - res13 = paddle.nn.functional.dropout( - x=input, p=0.7, axis=1, training=True, mode='upscale_in_train') + res12 = paddle.nn.functional.dropout(x=input, + p=0., + axis=(0, 1), + training=False, + mode='upscale_in_train') + + res13 = paddle.nn.functional.dropout(x=input, + p=0.7, + axis=1, + training=True, + mode='upscale_in_train') in_np = np.ones([40, 40]).astype("float32") res_np = in_np @@ -442,72 +472,64 @@ class TestDropoutFAPI(unittest.TestCase): res_np2 = np.zeros_like(in_np) input = fluid.dygraph.to_variable(in_np) - res1 = paddle.nn.functional.dropout( - x=input, p=0., training=False) - res2 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=0, - training=True, - mode='upscale_in_train') - res3 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=0, - training=True, - mode='downscale_in_infer') - res4 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=0, - training=False, - mode='upscale_in_train') - res5 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=0, - training=False, - mode='downscale_in_infer') - res6 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=True, - mode='upscale_in_train') - res7 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=True, - mode='downscale_in_infer') - res8 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=False, - mode='upscale_in_train') - res9 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=[0, 1], - training=False, - mode='downscale_in_infer') - res10 = paddle.nn.functional.dropout( - x=input, p=1., training=True) + res1 = paddle.nn.functional.dropout(x=input, + p=0., + training=False) + res2 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=True, + mode='upscale_in_train') + res3 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=True, + mode='downscale_in_infer') + res4 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=False, + mode='upscale_in_train') + res5 = paddle.nn.functional.dropout(x=input, + p=0., + axis=0, + training=False, + mode='downscale_in_infer') + res6 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=True, + mode='upscale_in_train') + res7 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=True, + mode='downscale_in_infer') + res8 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=False, + mode='upscale_in_train') + res9 = paddle.nn.functional.dropout(x=input, + p=0., + axis=[0, 1], + training=False, + mode='downscale_in_infer') + res10 = paddle.nn.functional.dropout(x=input, + p=1., + training=True) dropout = paddle.fluid.dygraph.Dropout(p=0, ) res11 = dropout(input) - res12 = paddle.nn.functional.dropout( - x=input, - p=0., - axis=(0, 1), - training=False, - mode='upscale_in_train') - res13 = paddle.nn.functional.dropout( - x=input, - p=0.5, - axis=1, - training=True, - mode='upscale_in_train') + res12 = paddle.nn.functional.dropout(x=input, + p=0., + axis=(0, 1), + training=False, + mode='upscale_in_train') + res13 = paddle.nn.functional.dropout(x=input, + p=0.5, + axis=1, + training=True, + mode='upscale_in_train') res_list = [ res1, res2, res3, res4, res5, res6, res7, res8, res9, res11, @@ -519,21 +541,22 @@ class TestDropoutFAPI(unittest.TestCase): class TestDropoutFAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of dropout must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) paddle.nn.functional.dropout(x1, p=0.5) self.assertRaises(TypeError, test_Variable) def test_Variable2(): # the input of dropout must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) paddle.nn.functional.dropout(x1, p=0.5, axis=0) self.assertRaises(TypeError, test_Variable2) @@ -597,6 +620,7 @@ class TestDropoutFAPIError(unittest.TestCase): class TestDropoutCAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -616,6 +640,7 @@ class TestDropoutCAPI(unittest.TestCase): class TestDropout2DFAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -624,12 +649,17 @@ class TestDropout2DFAPI(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 4, 5], dtype="float32") - res1 = paddle.nn.functional.dropout2d( - x=input, p=0., training=False, data_format='NCHW') - res2 = paddle.nn.functional.dropout2d( - x=input, p=0., training=False, data_format='NHWC') + input = fluid.data(name="input", + shape=[2, 3, 4, 5], + dtype="float32") + res1 = paddle.nn.functional.dropout2d(x=input, + p=0., + training=False, + data_format='NCHW') + res2 = paddle.nn.functional.dropout2d(x=input, + p=0., + training=False, + data_format='NHWC') in_np = np.random.random([2, 3, 4, 5]).astype("float32") res_np = in_np @@ -653,10 +683,14 @@ class TestDropout2DFAPI(unittest.TestCase): res_np = in_np input = fluid.dygraph.to_variable(in_np) - res1 = paddle.nn.functional.dropout2d( - x=input, p=0., training=False, data_format='NCHW') - res2 = paddle.nn.functional.dropout2d( - x=input, p=0., training=False, data_format='NHWC') + res1 = paddle.nn.functional.dropout2d(x=input, + p=0., + training=False, + data_format='NCHW') + res2 = paddle.nn.functional.dropout2d(x=input, + p=0., + training=False, + data_format='NHWC') res_list = [res1, res2] for res in res_list: @@ -664,6 +698,7 @@ class TestDropout2DFAPI(unittest.TestCase): class TestDropout2DFAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -683,6 +718,7 @@ class TestDropout2DFAPIError(unittest.TestCase): class TestDropout2DCAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -702,6 +738,7 @@ class TestDropout2DCAPI(unittest.TestCase): class TestDropout3DFAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -710,12 +747,17 @@ class TestDropout3DFAPI(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 4, 5, 6], dtype="float32") - res1 = paddle.nn.functional.dropout3d( - x=input, p=0., training=False, data_format='NCDHW') - res2 = paddle.nn.functional.dropout3d( - x=input, p=0., training=False, data_format='NDHWC') + input = fluid.data(name="input", + shape=[2, 3, 4, 5, 6], + dtype="float32") + res1 = paddle.nn.functional.dropout3d(x=input, + p=0., + training=False, + data_format='NCDHW') + res2 = paddle.nn.functional.dropout3d(x=input, + p=0., + training=False, + data_format='NDHWC') in_np = np.random.random([2, 3, 4, 5, 6]).astype("float32") res_np = in_np @@ -739,10 +781,14 @@ class TestDropout3DFAPI(unittest.TestCase): res_np = in_np input = fluid.dygraph.to_variable(in_np) - res1 = paddle.nn.functional.dropout3d( - x=input, p=0., training=False, data_format='NCDHW') - res2 = paddle.nn.functional.dropout3d( - x=input, p=0., training=False, data_format='NDHWC') + res1 = paddle.nn.functional.dropout3d(x=input, + p=0., + training=False, + data_format='NCDHW') + res2 = paddle.nn.functional.dropout3d(x=input, + p=0., + training=False, + data_format='NDHWC') res_list = [res1, res2] for res in res_list: @@ -750,6 +796,7 @@ class TestDropout3DFAPI(unittest.TestCase): class TestDropout3DFAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -769,6 +816,7 @@ class TestDropout3DFAPIError(unittest.TestCase): class TestDropout3DCAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -788,6 +836,7 @@ class TestDropout3DCAPI(unittest.TestCase): class TestAlphaDropoutFAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -798,8 +847,9 @@ class TestAlphaDropoutFAPI(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data(name="input", shape=[40, 40], dtype="float32") res1 = paddle.nn.functional.alpha_dropout(x=input, p=0.) - res2 = paddle.nn.functional.alpha_dropout( - x=input, p=0., training=False) + res2 = paddle.nn.functional.alpha_dropout(x=input, + p=0., + training=False) res3 = paddle.nn.functional.alpha_dropout(x=input, p=1.) in_np = np.random.random([40, 40]).astype("float32") @@ -831,8 +881,9 @@ class TestAlphaDropoutFAPI(unittest.TestCase): input = fluid.dygraph.to_variable(in_np) res1 = paddle.nn.functional.alpha_dropout(x=input, p=0.) - res2 = paddle.nn.functional.alpha_dropout( - x=input, p=0., training=False) + res2 = paddle.nn.functional.alpha_dropout(x=input, + p=0., + training=False) res3 = paddle.nn.functional.alpha_dropout(x=input, p=1.) res_list = [res1, res2] @@ -842,13 +893,14 @@ class TestAlphaDropoutFAPI(unittest.TestCase): class TestAlphaDropoutFAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of dropout must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) paddle.nn.functional.alpha_dropout(x1, p=0.5) self.assertRaises(TypeError, test_Variable) @@ -876,6 +928,7 @@ class TestAlphaDropoutFAPIError(unittest.TestCase): class TestAlphaDropoutCAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -895,6 +948,7 @@ class TestAlphaDropoutCAPI(unittest.TestCase): class TestDropoutWithDeterminateSeedGenerator(unittest.TestCase): + def setUp(self): paddle.framework.random.set_random_seed_generator('seed0', 123) paddle.framework.random.set_random_seed_generator('seed1', 123) @@ -908,18 +962,16 @@ class TestDropoutWithDeterminateSeedGenerator(unittest.TestCase): from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout with static.program_guard(static.Program(), static.Program()): input = static.data(name="input", shape=[40, 40], dtype="float32") - res1 = dropout( - input, - p=0.3, - training=True, - mode='upscale_in_train', - rng_name='seed0') - res2 = dropout( - input, - p=0.3, - training=True, - mode='upscale_in_train', - rng_name='seed1') + res1 = dropout(input, + p=0.3, + training=True, + mode='upscale_in_train', + rng_name='seed0') + res2 = dropout(input, + p=0.3, + training=True, + mode='upscale_in_train', + rng_name='seed1') res3 = dropout(input, p=0.3) in_np = np.random.random([40, 40]).astype("float32") @@ -938,6 +990,7 @@ class TestDropoutWithDeterminateSeedGenerator(unittest.TestCase): class TestDropoutBackward(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -961,8 +1014,9 @@ class TestDropoutBackward(unittest.TestCase): out.backward() self.assertTrue( - np.array_equal(input.gradient( - ), self.cal_grad_downscale_in_infer(mask.numpy()))) + np.array_equal( + input.gradient(), + self.cal_grad_downscale_in_infer(mask.numpy()))) def test_backward_downscale_in_infer_eager(self): for place in self.places: @@ -974,8 +1028,9 @@ class TestDropoutBackward(unittest.TestCase): input, None, 0.5, False, "downgrade_in_infer", 0, False) out.backward() self.assertTrue( - np.array_equal(input.gradient( - ), self.cal_grad_downscale_in_infer(mask.numpy()))) + np.array_equal( + input.gradient(), + self.cal_grad_downscale_in_infer(mask.numpy()))) def test_backward_upscale_train(self): _enable_legacy_dygraph() @@ -991,8 +1046,9 @@ class TestDropoutBackward(unittest.TestCase): out.backward() self.assertTrue( - np.allclose(input.gradient( - ), self.cal_grad_upscale_train(mask.numpy(), prob))) + np.allclose(input.gradient(), + self.cal_grad_upscale_train(mask.numpy(), + prob))) def test_backward_upscale_train_eager(self): for place in self.places: @@ -1006,8 +1062,9 @@ class TestDropoutBackward(unittest.TestCase): out.backward() self.assertTrue( - np.allclose(input.gradient( - ), self.cal_grad_upscale_train(mask.numpy(), prob))) + np.allclose( + input.gradient(), + self.cal_grad_upscale_train(mask.numpy(), prob))) def test_backward_upscale_train_2(self): _enable_legacy_dygraph() @@ -1023,8 +1080,9 @@ class TestDropoutBackward(unittest.TestCase): out.backward() self.assertTrue( - np.allclose(input.gradient( - ), self.cal_grad_upscale_train(mask.numpy(), prob))) + np.allclose(input.gradient(), + self.cal_grad_upscale_train(mask.numpy(), + prob))) def test_backward_upscale_train_2_eager(self): for place in self.places: @@ -1040,11 +1098,13 @@ class TestDropoutBackward(unittest.TestCase): out.backward() self.assertTrue( - np.allclose(input.gradient( - ), self.cal_grad_upscale_train(mask.numpy(), prob))) + np.allclose( + input.gradient(), + self.cal_grad_upscale_train(mask.numpy(), prob))) class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py index 6c2516d6c11..f77f54a636e 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -43,27 +44,25 @@ class SimpleImgConvPool(fluid.dygraph.Layer): bias_attr=None): super(SimpleImgConvPool, self).__init__() - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=param_attr, - bias_attr=bias_attr, - use_cudnn=use_cudnn, - dtype=dtype, - act=act) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) + self._conv2d = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=param_attr, + bias_attr=bias_attr, + use_cudnn=use_cudnn, + dtype=dtype, + act=act) + + self._pool2d = Pool2D(pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) def forward(self, inputs): x = self._conv2d(inputs) @@ -72,28 +71,27 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer): + def __init__(self, dtype="float32"): super(MNIST, self).__init__() - self._simple_img_conv_pool_1 = SimpleImgConvPool( - num_channels=3, - num_filters=20, - filter_size=5, - pool_size=2, - pool_stride=2, - act="relu", - dtype=dtype, - use_cudnn=True) - - self._simple_img_conv_pool_2 = SimpleImgConvPool( - num_channels=20, - num_filters=50, - filter_size=5, - pool_size=2, - pool_stride=2, - act="relu", - dtype=dtype, - use_cudnn=True) + self._simple_img_conv_pool_1 = SimpleImgConvPool(num_channels=3, + num_filters=20, + filter_size=5, + pool_size=2, + pool_stride=2, + act="relu", + dtype=dtype, + use_cudnn=True) + + self._simple_img_conv_pool_2 = SimpleImgConvPool(num_channels=20, + num_filters=50, + filter_size=5, + pool_size=2, + pool_stride=2, + act="relu", + dtype=dtype, + use_cudnn=True) self.pool_2_shape = 50 * 53 * 53 SIZE = 10 @@ -102,8 +100,8 @@ class MNIST(fluid.dygraph.Layer): self.pool_2_shape, 10, param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), + initializer=fluid.initializer.NormalInitializer(loc=0.0, + scale=scale)), act="softmax", dtype=dtype) @@ -118,6 +116,7 @@ class MNIST(fluid.dygraph.Layer): class TestMnist(unittest.TestCase): + def func_mnist_fp16(self): if not fluid.is_compiled_with_cuda(): return diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py b/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py index 739a0fbbfd3..8e1761e9cd7 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py @@ -19,10 +19,12 @@ import paddle class TestDygraphModeOfUnittest(unittest.TestCase): + def test_dygraph_mode(self): - self.assertTrue(paddle.in_dynamic_mode( - ), 'Default Mode of Unittest should be dygraph mode, but get static mode.' - ) + self.assertTrue( + paddle.in_dynamic_mode(), + 'Default Mode of Unittest should be dygraph mode, but get static mode.' + ) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py index a1165f33584..2487bc15660 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py @@ -31,6 +31,7 @@ SEED = 123123111 class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -50,25 +51,23 @@ class SimpleImgConvPool(fluid.dygraph.Layer): bias_attr=None): super(SimpleImgConvPool, self).__init__() - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) + self._conv2d = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D(pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) def forward(self, inputs): x = self._conv2d(inputs) @@ -77,25 +76,33 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer): + def __init__(self): super(MNIST, self).__init__() - self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") + self._simple_img_conv_pool_1 = SimpleImgConvPool(1, + 20, + 5, + 2, + 2, + act="relu") - self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") + self._simple_img_conv_pool_2 = SimpleImgConvPool(20, + 50, + 5, + 2, + 2, + act="relu") self.pool_2_shape = 50 * 4 * 4 SIZE = 100 #10 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 - self._fc = Linear( - self.pool_2_shape, - SIZE, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") + self._fc = Linear(self.pool_2_shape, + SIZE, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") def forward(self, inputs): x = self._simple_img_conv_pool_1(inputs) @@ -106,6 +113,7 @@ class MNIST(fluid.dygraph.Layer): class TestDygraphMultiForward(unittest.TestCase): + def test_mnist_forward_float32(self): epoch_num = 1 @@ -113,20 +121,21 @@ class TestDygraphMultiForward(unittest.TestCase): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) mnist = MNIST() - sgd = SGDOptimizer( - learning_rate=1e-3, parameter_list=mnist.parameters()) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=mnist.parameters()) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=128, + drop_last=True) dy_param_init_value = {} mnist.eval() for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(128, 1) + dy_x_data = np.array([ + x[0].reshape(1, 28, 28) for x in data + ]).astype('float32') + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(128, 1) img = to_variable(dy_x_data) label = to_variable(y_data) @@ -150,11 +159,13 @@ class TestDygraphMultiForward(unittest.TestCase): mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=128, + drop_last=True) - img = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) @@ -174,18 +185,19 @@ class TestDygraphMultiForward(unittest.TestCase): for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): - static_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape([128, 1]) + static_x_data = np.array([ + x[0].reshape(1, 28, 28) for x in data + ]).astype('float32') + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape([128, 1]) fetch_list = [avg_loss.name] - out = exe.run( - fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + "pixel": static_x_data, + "label": y_data + }, + fetch_list=fetch_list) static_out = out[0] diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_recompute.py b/python/paddle/fluid/tests/unittests/test_dygraph_recompute.py index fa9ea5d086c..799555a7b03 100755 --- a/python/paddle/fluid/tests/unittests/test_dygraph_recompute.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_recompute.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,27 +29,28 @@ from paddle.fluid.framework import _test_eager_guard def get_fc_block(block_idx, input_size, is_last=False): block_name = "block_" + str(block_idx) block = paddle.nn.Sequential( - (block_name + "_fc_0", paddle.nn.Linear( - input_size, input_size, bias_attr=False)), + (block_name + "_fc_0", + paddle.nn.Linear(input_size, input_size, bias_attr=False)), (block_name + "_dropout", paddle.nn.Dropout(p=0.5)), (block_name + "_relu_1", paddle.nn.ReLU()), - (block_name + "_fc_1", paddle.nn.Linear( - input_size, input_size, bias_attr=False)), - (block_name + "_relu_2", paddle.nn.ReLU()), ) + (block_name + "_fc_1", + paddle.nn.Linear(input_size, input_size, bias_attr=False)), + (block_name + "_relu_2", paddle.nn.ReLU()), + ) if is_last: - block.add_sublayer( - block_name + "_fc_2", - paddle.nn.Linear( - input_size, 1, bias_attr=False)) # add sublayer + block.add_sublayer(block_name + "_fc_2", + paddle.nn.Linear(input_size, 1, + bias_attr=False)) # add sublayer else: - block.add_sublayer( - block_name + "_fc_2", - paddle.nn.Linear( - input_size, input_size, bias_attr=False)) # add sublayer + block.add_sublayer(block_name + "_fc_2", + paddle.nn.Linear(input_size, + input_size, + bias_attr=False)) # add sublayer return block class Naive_fc_net(paddle.nn.Layer): + def __init__(self, input_size=10, recompute_blocks=[1, 3], @@ -103,10 +104,9 @@ def run_model(recompute_block=[], random.seed(10) batch_size, input_size = 1, 10 - model = Naive_fc_net( - input_size, - recompute_blocks=recompute_block, - recompute_kwargs=recompute_kwargs) + model = Naive_fc_net(input_size, + recompute_blocks=recompute_block, + recompute_kwargs=recompute_kwargs) loss_fn = paddle.nn.MSELoss(reduction='mean') optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) @@ -142,7 +142,9 @@ def run_model(recompute_block=[], class TestPyLayer(unittest.TestCase): + def test_base_case(self, enable_autocast=False, pure_fp16=False): + def check_identical(loss_ref, param_ref, grad_ref, loss, param, grad): self.assertEqual(loss_ref, loss) self.assertEqual(param_ref, param) @@ -155,31 +157,27 @@ class TestPyLayer(unittest.TestCase): pure_fp16=pure_fp16) # recompute second block - loss, param, grad = run_model( - recompute_block=[1], - enable_autocast=enable_autocast, - pure_fp16=pure_fp16) + loss, param, grad = run_model(recompute_block=[1], + enable_autocast=enable_autocast, + pure_fp16=pure_fp16) check_identical(loss_ref, param_ref, grad_ref, loss, param, grad) # recompute fourth block - loss, param, grad = run_model( - recompute_block=[3], - enable_autocast=enable_autocast, - pure_fp16=pure_fp16) + loss, param, grad = run_model(recompute_block=[3], + enable_autocast=enable_autocast, + pure_fp16=pure_fp16) check_identical(loss_ref, param_ref, grad_ref, loss, param, grad) # recompute second to fourth block - loss, param, grad = run_model( - recompute_block=[1, 2, 3], - enable_autocast=enable_autocast, - pure_fp16=pure_fp16) + loss, param, grad = run_model(recompute_block=[1, 2, 3], + enable_autocast=enable_autocast, + pure_fp16=pure_fp16) check_identical(loss_ref, param_ref, grad_ref, loss, param, grad) # recompute second & fourth block - loss, param, grad = run_model( - recompute_block=[1, 3], - enable_autocast=enable_autocast, - pure_fp16=pure_fp16) + loss, param, grad = run_model(recompute_block=[1, 3], + enable_autocast=enable_autocast, + pure_fp16=pure_fp16) check_identical(loss_ref, param_ref, grad_ref, loss, param, grad) def test_fc_net_with_dropout(self): @@ -214,8 +212,8 @@ class TestPyLayer(unittest.TestCase): paddle.set_device("gpu") kwargs = {"is_test": False} with self.assertRaises(ValueError): - loss_ref, param_ref, grad_ref = run_model( - recompute_block=[2], recompute_kwargs=kwargs) + loss_ref, param_ref, grad_ref = run_model(recompute_block=[2], + recompute_kwargs=kwargs) def test_recompute_cpu_rng(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_optimizer_stage2.py b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_optimizer_stage2.py index 50e19851386..e76b4ab3674 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_optimizer_stage2.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_optimizer_stage2.py @@ -24,8 +24,8 @@ class TestDygraphShardingOptimizerStage2(TestMultipleGpus): # check sharding logic as well as the accuracy with single mode def test_dygraph_sharding_optimizer_stage2(self): - self.run_mnist_2gpu( - 'dygraph_sharding_optimizer_stage2.py', eager_mode=False) + self.run_mnist_2gpu('dygraph_sharding_optimizer_stage2.py', + eager_mode=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage2.py b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage2.py index 866577ea7aa..9d842d8719f 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage2.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage2.py @@ -30,8 +30,8 @@ class TestDygraphShardingStage2(TestMultipleGpus): def test_dygraph_sharding_stage2_offload(self): self.run_mnist_2gpu('dygraph_group_sharded_stage2_offload.py') - self.run_mnist_2gpu( - 'dygraph_sharding_stage2_offload.py', eager_mode=False) + self.run_mnist_2gpu('dygraph_sharding_stage2_offload.py', + eager_mode=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py index c1f5e06f42b..6175634e700 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage3.py @@ -30,8 +30,8 @@ class TestDygraphShardingStage3(TestMultipleGpus): def test_dygraph_sharding_stage3_offload(self): self.run_mnist_2gpu('dygraph_group_sharded_stage3_offload.py') - self.run_mnist_2gpu( - 'dygraph_sharding_stage3_offload.py', eager_mode=False) + self.run_mnist_2gpu('dygraph_sharding_stage3_offload.py', + eager_mode=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py index ef220ba1016..9ca53d9a925 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py @@ -23,6 +23,7 @@ from paddle.nn.utils import spectral_norm class TestDygraphSpectralNorm(unittest.TestCase): + def setUp(self): self.init_test_case() self.set_data() @@ -39,8 +40,8 @@ class TestDygraphSpectralNorm(unittest.TestCase): for desc in self.data_desc: data_name = desc[0] data_shape = desc[1] - data_value = np.random.random( - size=[self.batch_size] + data_shape).astype('float32') + data_value = np.random.random(size=[self.batch_size] + + data_shape).astype('float32') self.data[data_name] = data_value def spectral_normalize(self, weight, u, v, dim, power_iters, eps): @@ -77,11 +78,10 @@ class TestDygraphSpectralNorm(unittest.TestCase): else: self.dim = (self.dim + len(before_weight)) % len(before_weight) - sn = spectral_norm( - linear, - n_power_iterations=self.n_power_iterations, - eps=self.eps, - dim=self.dim) + sn = spectral_norm(linear, + n_power_iterations=self.n_power_iterations, + eps=self.eps, + dim=self.dim) u = sn.weight_u.numpy().copy() v = sn.weight_v.numpy().copy() outputs = [] @@ -90,16 +90,17 @@ class TestDygraphSpectralNorm(unittest.TestCase): outputs.append(output.numpy()) self.actual_outputs = linear.weight.numpy() - expect_output = self.spectral_normalize( - before_weight, u, v, self.dim, self.n_power_iterations, self.eps) + expect_output = self.spectral_normalize(before_weight, u, v, self.dim, + self.n_power_iterations, + self.eps) for expect, actual in zip(expect_output, self.actual_outputs): self.assertTrue( - np.allclose( - np.array(actual), np.array(expect), atol=0.001)) + np.allclose(np.array(actual), np.array(expect), atol=0.001)) class TestDygraphWeightNormCase(TestDygraphSpectralNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -109,6 +110,7 @@ class TestDygraphWeightNormCase(TestDygraphSpectralNorm): class TestDygraphWeightNormWithIterations(TestDygraphSpectralNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -118,6 +120,7 @@ class TestDygraphWeightNormWithIterations(TestDygraphSpectralNorm): class TestDygraphWeightNormWithDim(TestDygraphSpectralNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -127,6 +130,7 @@ class TestDygraphWeightNormWithDim(TestDygraphSpectralNorm): class TestDygraphWeightNormWithEps(TestDygraphSpectralNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py index 27d82fcc890..6ca02794a8a 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py @@ -25,6 +25,7 @@ from paddle.nn.utils import weight_norm, remove_weight_norm class TestDygraphWeightNorm(unittest.TestCase): + def setUp(self): self.init_test_case() self.set_data() @@ -39,8 +40,8 @@ class TestDygraphWeightNorm(unittest.TestCase): for desc in self.data_desc: data_name = desc[0] data_shape = desc[1] - data_value = numpy.random.random( - size=[self.batch_size] + data_shape).astype('float32') + data_value = numpy.random.random(size=[self.batch_size] + + data_shape).astype('float32') self.data[data_name] = data_value def norm_except_dim(self, w, dim=None): @@ -95,11 +96,9 @@ class TestDygraphWeightNorm(unittest.TestCase): p_matrix = numpy.reshape( p_transposed, (p_transposed.shape[0], transposed_shape_numel // p_transposed.shape[0])) - v_norm = v / numpy.expand_dims( - numpy.expand_dims( - numpy.linalg.norm( - p_matrix, axis=1, keepdims=True), axis=0), - axis=(ndims - 1)) + v_norm = v / numpy.expand_dims(numpy.expand_dims( + numpy.linalg.norm(p_matrix, axis=1, keepdims=True), axis=0), + axis=(ndims - 1)) v_norm = numpy.reshape(v_norm, transposed_shape) v_norm = numpy.transpose(v_norm, perm) g = numpy.squeeze(g, axis=1) @@ -107,11 +106,10 @@ class TestDygraphWeightNorm(unittest.TestCase): eaxis = 2 elif dim == 2: eaxis = 1 - g_mul = numpy.expand_dims( - numpy.expand_dims( - numpy.expand_dims( - g, axis=0), axis=eaxis), - axis=(ndims - 1)) + g_mul = numpy.expand_dims(numpy.expand_dims(numpy.expand_dims( + g, axis=0), + axis=eaxis), + axis=(ndims - 1)) w = g_mul * v_norm return g, v @@ -136,11 +134,11 @@ class TestDygraphWeightNorm(unittest.TestCase): for expect, actual in zip(expect_output, self.actual_outputs): self.assertTrue( - numpy.allclose( - numpy.array(actual), expect, atol=0.001)) + numpy.allclose(numpy.array(actual), expect, atol=0.001)) class TestDygraphWeightNormCase1(TestDygraphWeightNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -148,6 +146,7 @@ class TestDygraphWeightNormCase1(TestDygraphWeightNorm): class TestDygraphWeightNormCase2(TestDygraphWeightNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -155,6 +154,7 @@ class TestDygraphWeightNormCase2(TestDygraphWeightNorm): class TestDygraphWeightNormCase3(TestDygraphWeightNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -162,6 +162,7 @@ class TestDygraphWeightNormCase3(TestDygraphWeightNorm): class TestDygraphWeightNormCase4(TestDygraphWeightNorm): + def init_test_case(self): self.batch_size = 3 self.data_desc = (['x', [2, 3, 3]], ) @@ -169,6 +170,7 @@ class TestDygraphWeightNormCase4(TestDygraphWeightNorm): class TestDygraphRemoveWeightNorm(unittest.TestCase): + def setUp(self): self.init_test_case() @@ -185,8 +187,9 @@ class TestDygraphRemoveWeightNorm(unittest.TestCase): rwn = remove_weight_norm(linear) after_weight = linear.weight self.assertTrue( - numpy.allclose( - before_weight.numpy(), after_weight.numpy(), atol=0.001)) + numpy.allclose(before_weight.numpy(), + after_weight.numpy(), + atol=0.001)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py index 1cf0c145f83..0698a8b40df 100644 --- a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py @@ -31,6 +31,7 @@ numpy.random.seed(2020) class TestDynamicRNN(unittest.TestCase): + def setUp(self): self.word_dict_len = 5147 self.BATCH_SIZE = 2 @@ -82,10 +83,13 @@ class TestDynamicRNN(unittest.TestCase): startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - sentence = fluid.layers.data( - name='word', shape=[1], dtype='int64', lod_level=1) - sent_emb = fluid.layers.embedding( - input=sentence, size=[self.word_dict_len, 32], dtype='float32') + sentence = fluid.layers.data(name='word', + shape=[1], + dtype='int64', + lod_level=1) + sent_emb = fluid.layers.embedding(input=sentence, + size=[self.word_dict_len, 32], + dtype='float32') rank_table = lod_rank_table(x=sent_emb) sent_emb_array = lod_tensor_to_array(x=sent_emb, table=rank_table) @@ -95,8 +99,7 @@ class TestDynamicRNN(unittest.TestCase): i.stop_gradient = False boot_mem = fluid.layers.fill_constant_batch_size_like( - input=fluid.layers.array_read( - array=sent_emb_array, i=i), + input=fluid.layers.array_read(array=sent_emb_array, i=i), value=0, shape=[-1, 100], dtype='float32') @@ -126,8 +129,8 @@ class TestDynamicRNN(unittest.TestCase): logits = fluid.layers.fc(input=last, size=1, act=None) label = fluid.layers.data(name='label', shape=[1], dtype='float32') - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logits, label=label) + loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, + label=label) loss = fluid.layers.mean(loss) sgd = fluid.optimizer.SGD(1e-4) sgd.minimize(loss=loss) @@ -135,13 +138,12 @@ class TestDynamicRNN(unittest.TestCase): # Check for lod_level set in compile-time. self.assertEqual(sent_emb.lod_level, result_all_timesteps.lod_level) - self._train( - main_program=main_program, - startup_program=startup_program, - feed_list=[sentence, label], - fetch_list=[sent_emb, result_all_timesteps, loss], - is_nested=False, - max_iters=1) + self._train(main_program=main_program, + startup_program=startup_program, + feed_list=[sentence, label], + fetch_list=[sent_emb, result_all_timesteps, loss], + is_nested=False, + max_iters=1) def test_train_dynamic_rnn(self): main_program = fluid.Program() @@ -149,10 +151,13 @@ class TestDynamicRNN(unittest.TestCase): main_program.random_seed = 10 startup_program.random_seed = 10 with fluid.program_guard(main_program, startup_program): - sentence = fluid.layers.data( - name='word', shape=[1], dtype='int64', lod_level=1) - sent_emb = fluid.layers.embedding( - input=sentence, size=[self.word_dict_len, 32], dtype='float32') + sentence = fluid.layers.data(name='word', + shape=[1], + dtype='int64', + lod_level=1) + sent_emb = fluid.layers.embedding(input=sentence, + size=[self.word_dict_len, 32], + dtype='float32') drnn = fluid.layers.DynamicRNN() with drnn.block(): @@ -167,8 +172,8 @@ class TestDynamicRNN(unittest.TestCase): logits = fluid.layers.fc(input=last, size=1, act=None) label = fluid.layers.data(name='label', shape=[1], dtype='float32') - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logits, label=label) + loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, + label=label) loss = fluid.layers.mean(loss) sgd = fluid.optimizer.Adam(1e-3) sgd.minimize(loss=loss) @@ -176,13 +181,12 @@ class TestDynamicRNN(unittest.TestCase): # Check for lod_level set in compile-time. self.assertEqual(sent_emb.lod_level, drnn_result.lod_level) - self._train( - main_program=main_program, - startup_program=startup_program, - feed_list=[sentence, label], - fetch_list=[sent_emb, drnn_result, loss], - is_nested=False, - max_iters=100) + self._train(main_program=main_program, + startup_program=startup_program, + feed_list=[sentence, label], + fetch_list=[sent_emb, drnn_result, loss], + is_nested=False, + max_iters=100) def _fake_reader(self): seq_len, label = [[2, 2]], [0, 1] @@ -203,17 +207,22 @@ class TestDynamicRNN(unittest.TestCase): main_program.random_seed = 10 startup_program.random_seed = 10 with fluid.program_guard(main_program, startup_program): - sentence = fluid.layers.data( - name='word', shape=[1], dtype='int64', lod_level=2) - label = fluid.layers.data( - name='label', shape=[1], dtype='float32', lod_level=1) + sentence = fluid.layers.data(name='word', + shape=[1], + dtype='int64', + lod_level=2) + label = fluid.layers.data(name='label', + shape=[1], + dtype='float32', + lod_level=1) drnn0 = fluid.layers.DynamicRNN() with drnn0.block(): in_0 = drnn0.step_input(sentence) assert in_0.lod_level == 1, "the lod level of in_ should be 1" - sentence_emb = fluid.layers.embedding( - input=in_0, size=[len(word_dict), 32], dtype='float32') + sentence_emb = fluid.layers.embedding(input=in_0, + size=[len(word_dict), 32], + dtype='float32') out_0 = fluid.layers.fc(input=sentence_emb, size=100, act='tanh') @@ -231,21 +240,20 @@ class TestDynamicRNN(unittest.TestCase): last = drnn0() logits = fluid.layers.fc(input=last, size=1, act=None) - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logits, label=label) + loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, + label=label) loss = fluid.layers.mean(loss) sgd = fluid.optimizer.SGD(1e-3) sgd.minimize(loss=loss) train_data_orig = self.train_data self.train_data = paddle.batch(self._fake_reader, batch_size=2) - self._train( - main_program=main_program, - startup_program=startup_program, - feed_list=[sentence, label], - fetch_list=[loss], - is_nested=True, - max_iters=100) + self._train(main_program=main_program, + startup_program=startup_program, + feed_list=[sentence, label], + fetch_list=[loss], + is_nested=True, + max_iters=100) self.train_data = train_data_orig # this unit test is just used to the two layer nested dyn_rnn. @@ -258,10 +266,14 @@ class TestDynamicRNN(unittest.TestCase): main_program.random_seed = 10 startup_program.random_seed = 10 with fluid.program_guard(main_program, startup_program): - sentence = fluid.layers.data( - name='word', shape=[1], dtype='int64', lod_level=2) - label = fluid.layers.data( - name='label', shape=[1], dtype='float32', lod_level=1) + sentence = fluid.layers.data(name='word', + shape=[1], + dtype='int64', + lod_level=2) + label = fluid.layers.data(name='label', + shape=[1], + dtype='float32', + lod_level=1) drnn0 = fluid.layers.DynamicRNN() with drnn0.block(): @@ -274,10 +286,9 @@ class TestDynamicRNN(unittest.TestCase): size=hidden_size * 4, act=None, bias_attr=False) - forward, _ = fluid.layers.dynamic_lstm( - input=input_forward_proj, - size=hidden_size * 4, - use_peepholes=False) + forward, _ = fluid.layers.dynamic_lstm(input=input_forward_proj, + size=hidden_size * 4, + use_peepholes=False) drnn1 = fluid.layers.DynamicRNN() with drnn1.block(): @@ -290,31 +301,33 @@ class TestDynamicRNN(unittest.TestCase): last = drnn0() logits = fluid.layers.fc(input=last, size=1, act=None) - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logits, label=label) + loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, + label=label) loss = fluid.layers.mean(loss) sgd = fluid.optimizer.SGD(1e-3) sgd.minimize(loss=loss) train_data_orig = self.train_data self.train_data = paddle.batch(self._fake_reader, batch_size=2) - self._train( - main_program=main_program, - startup_program=startup_program, - feed_list=[sentence, label], - fetch_list=[loss], - is_nested=True, - max_iters=100) + self._train(main_program=main_program, + startup_program=startup_program, + feed_list=[sentence, label], + fetch_list=[loss], + is_nested=True, + max_iters=100) self.train_data = train_data_orig class TestDynamicRNNErrors(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): init = fluid.layers.zeros(shape=[1], dtype='float32') shape = 'shape' - sentence = fluid.data( - name='sentence', shape=[None, 32], dtype='float32', lod_level=1) + sentence = fluid.data(name='sentence', + shape=[None, 32], + dtype='float32', + lod_level=1) # The type of Input(shape) in API(memory) must be list or tuple def input_shape_type_of_memory(): diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py index 243ad4c082a..167748c5a98 100644 --- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py @@ -27,10 +27,14 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False): x = layers.assign( np.random.rand(batch_size, beam_size, 32).astype("float32")) indices = fluid.data(shape=[None, beam_size], dtype="int64", name="indices") - step_idx = layers.fill_constant( - shape=[1], dtype="int64", value=0, force_cpu=True) - max_len = layers.fill_constant( - shape=[1], dtype="int64", value=10, force_cpu=True) + step_idx = layers.fill_constant(shape=[1], + dtype="int64", + value=0, + force_cpu=True) + max_len = layers.fill_constant(shape=[1], + dtype="int64", + value=10, + force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) scores = layers.array_write(x, step_idx) @@ -40,9 +44,8 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False): bs = layers.cast(bs, 'int64') bs.stop_gradient = stop_gradient batch_pos = layers.expand( - layers.unsqueeze( - layers.range( - 0, bs, 1, dtype=bs.dtype), [1]), [1, beam_size]) + layers.unsqueeze(layers.range(0, bs, 1, dtype=bs.dtype), [1]), + [1, beam_size]) topk_coordinates = layers.stack([batch_pos, indices], axis=2) topk_coordinates.stop_gradient = stop_gradient score = layers.gather_nd(x, topk_coordinates) @@ -56,14 +59,17 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False): opt = fluid.optimizer.Adam(0.01) opt.minimize(loss) exe = fluid.Executor(place) - data = np.random.random_integers( - low=0, high=beam_size - 1, size=(batch_size, beam_size)).astype("int64") + data = np.random.random_integers(low=0, + high=beam_size - 1, + size=(batch_size, + beam_size)).astype("int64") loss_val, = exe.run(feed={"indices": data}, fetch_list=[loss]) return loss_val class TestDynRNNStopGradient(unittest.TestCase): + def setUp(self): self.batch_size = 20 self.beam_size = 64 diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py index 5328f73b315..0d6fa635a8f 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py @@ -23,6 +23,7 @@ from decorator_helper import * class Memory(object): + def __init__(self, shape, dtype='float32'): self.ex = numpy.zeros(shape=shape, dtype=dtype) self.cur = None @@ -45,6 +46,7 @@ class Memory(object): class Output(object): + def __init__(self): self.outs = [] @@ -59,6 +61,7 @@ class Output(object): class BaseRNN(object): + def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15): self.num_seq = num_seq self.inputs = collections.defaultdict(list) @@ -211,6 +214,7 @@ class BaseRNN(object): class SeedFixedTestCase(unittest.TestCase): + @classmethod def setUpClass(cls): """Fix random seeds to remove randomness from tests""" @@ -235,17 +239,17 @@ class TestSimpleMul(SeedFixedTestCase): OUT_NAME = 'Out' class SimpleMul(BaseRNN): + def __init__(self): base = TestSimpleMul - super(base.SimpleMul, self).__init__({ - base.DATA_NAME: { - 'shape': [base.DATA_WIDTH] - } - }, {}, { - base.PARAM_NAME: { - 'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH] - } - }, [base.OUT_NAME]) + super(base.SimpleMul, + self).__init__({base.DATA_NAME: { + 'shape': [base.DATA_WIDTH] + }}, {}, { + base.PARAM_NAME: { + 'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH] + } + }, [base.OUT_NAME]) def step(self, X, W, Out): Out.out(numpy.matmul(X, W)) @@ -255,8 +259,9 @@ class TestSimpleMul(SeedFixedTestCase): @prog_scope() def test_forward_backward(self): py_rnn = TestSimpleMul.SimpleMul() - dat = fluid.layers.data( - name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + dat = fluid.layers.data(name=self.DATA_NAME, + shape=[self.DATA_WIDTH], + lod_level=1) dat.stop_gradient = False rnn = fluid.layers.DynamicRNN() @@ -277,11 +282,12 @@ class TestSimpleMul(SeedFixedTestCase): cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) out, w_g, i_g = list( - map(numpy.array, + map( + numpy.array, exe.run(feed=py_rnn.to_feed(cpu), fetch_list=[ - out, self.PARAM_NAME + "@GRAD", self.DATA_NAME + - "@GRAD" + out, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" ], return_numpy=False))) out_by_python = py_rnn.exe()[self.OUT_NAME] @@ -301,21 +307,23 @@ class TestSimpleMulWithMemory(SeedFixedTestCase): PARAM_NAME = 'W' class SimpleMulWithMemory(BaseRNN): + def __init__(self): - super(TestSimpleMulWithMemory.SimpleMulWithMemory, self).__init__({ - TestSimpleMulWithMemory.DATA_NAME: { - 'shape': [TestSimpleMulWithMemory.DATA_WIDTH] - } - }, {'Mem': { - 'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH] - }}, { - TestSimpleMulWithMemory.PARAM_NAME: { - 'shape': [ - TestSimpleMulWithMemory.DATA_WIDTH, - TestSimpleMulWithMemory.HIDDEN_WIDTH - ] - } - }, ['Out']) + super(TestSimpleMulWithMemory.SimpleMulWithMemory, self).__init__( + { + TestSimpleMulWithMemory.DATA_NAME: { + 'shape': [TestSimpleMulWithMemory.DATA_WIDTH] + } + }, {'Mem': { + 'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH] + }}, { + TestSimpleMulWithMemory.PARAM_NAME: { + 'shape': [ + TestSimpleMulWithMemory.DATA_WIDTH, + TestSimpleMulWithMemory.HIDDEN_WIDTH + ] + } + }, ['Out']) def step(self, X, Mem, W, Out): o = numpy.matmul(X, W) @@ -330,8 +338,9 @@ class TestSimpleMulWithMemory(SeedFixedTestCase): @prog_scope() def test_forward_backward(self): py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() - data = fluid.layers.data( - name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + data = fluid.layers.data(name=self.DATA_NAME, + shape=[self.DATA_WIDTH], + lod_level=1) data.stop_gradient = False rnn = fluid.layers.DynamicRNN() with rnn.block(): @@ -355,11 +364,12 @@ class TestSimpleMulWithMemory(SeedFixedTestCase): exe = fluid.Executor(cpu) feed = py_rnn.to_feed(cpu) last_np, w_g, i_g = list( - map(numpy.array, + map( + numpy.array, exe.run(feed=feed, fetch_list=[ - last, self.PARAM_NAME + "@GRAD", self.DATA_NAME + - "@GRAD" + last, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" ], return_numpy=False))) last_by_py, = list(py_rnn.exe().values()) diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py index 698f914f899..07f7fa818aa 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py @@ -29,6 +29,7 @@ np.random.seed(1) class TestDyRnnStaticInput(unittest.TestCase): + def setUp(self): self._delta = 0.005 self._max_sequence_len = 3 @@ -60,8 +61,10 @@ class TestDyRnnStaticInput(unittest.TestCase): def fetch_value(self, var): fetch_outs = self.exe.run(feed={ - 'x_tensor': self.x_tensor, - 'static_input_tensor': self.static_input_tensor + 'x_tensor': + self.x_tensor, + 'static_input_tensor': + self.static_input_tensor }, fetch_list=[var], return_numpy=False) @@ -75,11 +78,10 @@ class TestDyRnnStaticInput(unittest.TestCase): return ndarray, lod_tensor.recursive_sequence_lengths() def build_graph(self, only_forward=False): - x_tensor = fluid.layers.data( - name='x_tensor', - shape=[self.x_tensor_dim], - dtype='float32', - lod_level=1) + x_tensor = fluid.layers.data(name='x_tensor', + shape=[self.x_tensor_dim], + dtype='float32', + lod_level=1) x_tensor.stop_gradient = False static_input_tensor = fluid.layers.data( @@ -101,20 +103,20 @@ class TestDyRnnStaticInput(unittest.TestCase): step_x = rnn.step_input(x_tensor) step_static_input = rnn.static_input(static_input_tensor) if only_forward: - fluid.layers.array_write( - x=step_static_input, - i=rnn.step_idx, - array=static_input_out_array) - last = fluid.layers.sequence_pool( - input=step_static_input, pool_type='last') + fluid.layers.array_write(x=step_static_input, + i=rnn.step_idx, + array=static_input_out_array) + last = fluid.layers.sequence_pool(input=step_static_input, + pool_type='last') projected = fluid.layers.fc(input=[step_x, last], size=self.output_dim) rnn.output(projected) if only_forward: static_input_step_outs = [] - step_idx = fluid.layers.fill_constant( - shape=[1], dtype='int64', value=0) + step_idx = fluid.layers.fill_constant(shape=[1], + dtype='int64', + value=0) step_idx.stop_gradient = True for i in range(self._max_sequence_len): @@ -144,8 +146,9 @@ class TestDyRnnStaticInput(unittest.TestCase): static_sliced = [] cur_offset = 0 for i in range(len(static_lod[0])): - static_sliced.append(self.static_input_data[cur_offset:( - cur_offset + static_lod[0][i])]) + static_sliced.append( + self.static_input_data[cur_offset:(cur_offset + + static_lod[0][i])]) cur_offset += static_lod[0][i] static_seq_len = static_lod[0] static_reordered = [] diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py index de85c763514..4bf8faf25ef 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py @@ -14,6 +14,7 @@ import os import numpy as np + os.environ['FLAGS_use_mkldnn'] = '0' os.environ['CPU_NUM'] = '4' @@ -24,6 +25,7 @@ import multiprocessing from functools import reduce import paddle + paddle.enable_static() fluid.core._set_eager_deletion_mode(0.0, 1.0, True) @@ -38,8 +40,8 @@ def simple_fc_net(): hidden, size=200, act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) prediction = fluid.layers.fc(hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) @@ -64,6 +66,7 @@ def get_persistables_and_non_persistables(prog, fetch_list): class TestExecutor(unittest.TestCase): + def test_executor_main(self): places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): @@ -89,8 +92,8 @@ class TestExecutor(unittest.TestCase): label_shape = (batch_size, ) + tuple(label.shape[1:]) image_np = np.random.random(size=image_shape).astype('float32') - label_np = np.random.random_integers( - low=0, high=9, size=label_shape).astype('int64') + label_np = np.random.random_integers(low=0, high=9, + size=label_shape).astype('int64') return image_np, label_np @@ -111,10 +114,10 @@ class TestExecutor(unittest.TestCase): if t._is_initialized(): outline_np_vars.append(name) - print('Non-alive persistable vars {} in {}'.format(outline_p_vars, - persitables)) - print('Alive non-persistable vars {} in {}'.format(outline_np_vars, - non_persistables)) + print('Non-alive persistable vars {} in {}'.format( + outline_p_vars, persitables)) + print('Alive non-persistable vars {} in {}'.format( + outline_np_vars, non_persistables)) self.assertEqual(len(outline_p_vars), 0) self.assertEqual(len(outline_np_vars), 0) @@ -144,14 +147,14 @@ class TestExecutor(unittest.TestCase): for _ in six.moves.range(10): image_np, label_np = self.prepare_feed(image, label) - fluid.global_scope().var(image.name).get_tensor().set(image_np, - self.place) - fluid.global_scope().var(label.name).get_tensor().set(label_np, - self.place) + fluid.global_scope().var(image.name).get_tensor().set( + image_np, self.place) + fluid.global_scope().var(label.name).get_tensor().set( + label_np, self.place) # exe.run would not create local scope # so that we can detect whether gc clears temporary variables - exe.run(fluid.default_main_program().desc, - fluid.global_scope(), 0, False, True, [loss.name]) + exe.run(fluid.default_main_program().desc, fluid.global_scope(), 0, + False, True, [loss.name]) self.assertScopeVar(fluid.global_scope(), persistables, non_persistables) @@ -173,9 +176,9 @@ class TestExecutor(unittest.TestCase): build_strategy.memory_optimize = False build_strategy.enable_inplace = False - prog = fluid.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - loss_name=loss.name, exec_strategy=exec_strategy) + prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name, exec_strategy=exec_strategy) dev_cnt = fluid.core.get_cuda_device_count() if isinstance(self.place, fluid.CUDAPlace) \ else int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py index e4bde606ca6..2f67627a73e 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py @@ -13,6 +13,7 @@ # limitations under the License. import os + os.environ['CPU_NUM'] = '2' import six @@ -41,8 +42,10 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): reader = fake_imdb_reader(word_dict_size, batch_size * 40) train_reader = paddle.batch(reader, batch_size=batch_size) - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") @@ -53,8 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[data, label], place=place) - reader = feeder.decorate_reader( - train_reader, multi_devices=use_parallel_executor) + reader = feeder.decorate_reader(train_reader, + multi_devices=use_parallel_executor) exe = fluid.Executor(place) fluid.default_startup_program().random_seed = 1 @@ -63,8 +66,9 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): train_cp = fluid.default_main_program() if use_parallel_executor: - train_cp = compiler.CompiledProgram(fluid.default_main_program( - )).with_data_parallel(loss_name=cost.name) + train_cp = compiler.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=cost.name) fetch_list = [cost.name] else: fetch_list = [cost] @@ -81,6 +85,7 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): class TestBase(unittest.TestCase): + def setUp(self): self.net = None diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py index 1023c18f410..39dc0caefd3 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py @@ -43,6 +43,7 @@ def gru_net(data, class GRUTest(TestBase): + def setUp(self): self.net = gru_net diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py index 6784edb9d7b..07f78d3b845 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py @@ -32,8 +32,9 @@ def lstm_net(data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False) + lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, + size=hid_dim * 4, + is_reverse=False) lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') lstm_max_tanh = fluid.layers.tanh(lstm_max) fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') @@ -44,6 +45,7 @@ def lstm_net(data, class LSTMTest(TestBase): + def setUp(self): self.net = lstm_net diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py index ecdf9efa451..d44a74ccb57 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py @@ -18,7 +18,7 @@ import paddle.fluid as fluid fluid.core._set_eager_deletion_mode(0.0, 1.0, True) -# FIXME(zjl): It seems that this unittest fails randomly +# FIXME(zjl): It seems that this unittest fails randomly # when comparing all reduce last loss and reduce last loss # e.g.: AssertionError: 1.0357145 != 1.0673475 within 0.01 delta # Disable it temporarily. diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index ff99a06e49e..180e1229514 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -33,6 +33,7 @@ os.environ["CPU_NUM"] = "1" class RNNConfig(object): + def __init__(self, model_type, rnn_model): self.model_type = model_type self.rnn_model = rnn_model @@ -99,14 +100,13 @@ class RNNConfig(object): # Fake data reader for test class Reader(object): + def get_data_iter(self, rnn_config): for i in range(rnn_config.max_epoch): - x = np.zeros( - shape=(rnn_config.batch_size, rnn_config.num_steps), - dtype='int64') - y = np.ones( - shape=(rnn_config.batch_size, rnn_config.num_steps), - dtype='int64') + x = np.zeros(shape=(rnn_config.batch_size, rnn_config.num_steps), + dtype='int64') + y = np.ones(shape=(rnn_config.batch_size, rnn_config.num_steps), + dtype='int64') yield (x, y) @@ -119,6 +119,7 @@ def lm_model(hidden_size, init_scale=0.1, dropout=None, rnn_model='static'): + def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] @@ -141,10 +142,14 @@ def lm_model(hidden_size, default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) - pre_hidden = layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = layers.slice(init_hidden, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_cell = layers.slice(init_cell, + axes=[0], + starts=[i], + ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size]) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size]) hidden_array.append(pre_hidden) @@ -165,23 +170,22 @@ def lm_model(hidden_size, gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) - i = layers.slice( - gate_input, axes=[1], starts=[0], ends=[hidden_size]) - j = layers.slice( - gate_input, - axes=[1], - starts=[hidden_size], - ends=[hidden_size * 2]) - f = layers.slice( - gate_input, - axes=[1], - starts=[hidden_size * 2], - ends=[hidden_size * 3]) - o = layers.slice( - gate_input, - axes=[1], - starts=[hidden_size * 3], - ends=[hidden_size * 4]) + i = layers.slice(gate_input, + axes=[1], + starts=[0], + ends=[hidden_size]) + j = layers.slice(gate_input, + axes=[1], + starts=[hidden_size], + ends=[hidden_size * 2]) + f = layers.slice(gate_input, + axes=[1], + starts=[hidden_size * 2], + ends=[hidden_size * 3]) + o = layers.slice(gate_input, + axes=[1], + starts=[hidden_size * 3], + ends=[hidden_size * 4]) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) @@ -212,11 +216,15 @@ def lm_model(hidden_size, c = rnnout[i * 2 + 1] m.stop_gradient = True c.stop_gradient = True - last_h = layers.slice( - m, axes=[0], starts=[num_steps - 1], ends=[num_steps]) + last_h = layers.slice(m, + axes=[0], + starts=[num_steps - 1], + ends=[num_steps]) last_hidden_array.append(last_h) - last_c = layers.slice( - c, axes=[0], starts=[num_steps - 1], ends=[num_steps]) + last_c = layers.slice(c, + axes=[0], + starts=[num_steps - 1], + ends=[num_steps]) last_cell_array.append(last_c) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) @@ -224,7 +232,9 @@ def lm_model(hidden_size, return real_res, last_hidden, last_cell - def encoder_static(input_embedding, len=3, init_hidden=None, + def encoder_static(input_embedding, + len=3, + init_hidden=None, init_cell=None): weight_1_arr = [] @@ -248,20 +258,27 @@ def lm_model(hidden_size, default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) - pre_hidden = layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = layers.reshape( - pre_hidden, shape=[-1, hidden_size], inplace=True) - pre_cell = layers.reshape( - pre_cell, shape=[-1, hidden_size], inplace=True) + pre_hidden = layers.slice(init_hidden, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_cell = layers.slice(init_cell, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_hidden = layers.reshape(pre_hidden, + shape=[-1, hidden_size], + inplace=True) + pre_cell = layers.reshape(pre_cell, + shape=[-1, hidden_size], + inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] - sliced_inputs = layers.split( - input_embedding, num_or_sections=len, dim=1) + sliced_inputs = layers.split(input_embedding, + num_or_sections=len, + dim=1) for index in range(len): input = sliced_inputs[index] @@ -295,52 +312,50 @@ def lm_model(hidden_size, res.append(input) last_hidden = layers.concat(hidden_array, 1) - last_hidden = layers.reshape( - last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) + last_hidden = layers.reshape(last_hidden, + shape=[-1, num_layers, hidden_size], + inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) - last_cell = layers.reshape( - last_cell, shape=[-1, num_layers, hidden_size]) + last_cell = layers.reshape(last_cell, + shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) - real_res = layers.reshape( - real_res, shape=[len, -1, hidden_size], inplace=True) + real_res = layers.reshape(real_res, + shape=[len, -1, hidden_size], + inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell batch_size_each = batch_size - x = layers.data( - name="x", - shape=[batch_size_each, num_steps, 1], - dtype='int64', - append_batch_size=False) - y = layers.data( - name="y", - shape=[batch_size_each * num_steps, 1], - dtype='int64', - append_batch_size=False) - - init_hidden = layers.data( - name="init_hidden", - shape=[num_layers, batch_size_each, hidden_size], - dtype='float32', - append_batch_size=False) - init_cell = layers.data( - name="init_cell", - shape=[num_layers, batch_size_each, hidden_size], - dtype='float32', - append_batch_size=False) + x = layers.data(name="x", + shape=[batch_size_each, num_steps, 1], + dtype='int64', + append_batch_size=False) + y = layers.data(name="y", + shape=[batch_size_each * num_steps, 1], + dtype='int64', + append_batch_size=False) + + init_hidden = layers.data(name="init_hidden", + shape=[num_layers, batch_size_each, hidden_size], + dtype='float32', + append_batch_size=False) + init_cell = layers.data(name="init_cell", + shape=[num_layers, batch_size_each, hidden_size], + dtype='float32', + append_batch_size=False) init_cell.persistable = True init_hidden.persistable = True - init_hidden_reshape = layers.reshape( - init_hidden, shape=[num_layers, -1, hidden_size]) - init_cell_reshape = layers.reshape( - init_cell, shape=[num_layers, -1, hidden_size]) + init_hidden_reshape = layers.reshape(init_hidden, + shape=[num_layers, -1, hidden_size]) + init_cell_reshape = layers.reshape(init_cell, + shape=[num_layers, -1, hidden_size]) x_emb = layers.embedding( input=x, @@ -349,16 +364,16 @@ def lm_model(hidden_size, is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale))) + initializer=fluid.initializer.UniformInitializer(low=-init_scale, + high=init_scale))) - x_emb = layers.reshape( - x_emb, shape=[-1, num_steps, hidden_size], inplace=True) + x_emb = layers.reshape(x_emb, + shape=[-1, num_steps, hidden_size], + inplace=True) if dropout != None and dropout > 0.0: - x_emb = layers.dropout( - x_emb, - dropout_prob=dropout, - dropout_implementation='upscale_in_train') + x_emb = layers.dropout(x_emb, + dropout_prob=dropout, + dropout_implementation='upscale_in_train') if rnn_model == "padding": rnn_out, last_hidden, last_cell = padding_rnn( @@ -395,8 +410,9 @@ def lm_model(hidden_size, print("type not support") return - rnn_out = layers.reshape( - rnn_out, shape=[-1, num_steps, hidden_size], inplace=True) + rnn_out = layers.reshape(rnn_out, + shape=[-1, num_steps, hidden_size], + inplace=True) softmax_weight = layers.create_parameter( [hidden_size, vocab_size], @@ -413,11 +429,13 @@ def lm_model(hidden_size, projection = layers.matmul(rnn_out, softmax_weight) projection = layers.elementwise_add(projection, softmax_bias) - projection = layers.reshape( - projection, shape=[-1, vocab_size], inplace=True) + projection = layers.reshape(projection, + shape=[-1, vocab_size], + inplace=True) - loss = layers.softmax_with_cross_entropy( - logits=projection, label=y, soft_label=False) + loss = layers.softmax_with_cross_entropy(logits=projection, + label=y, + soft_label=False) loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True) loss = layers.reduce_mean(loss, dim=[0]) @@ -439,6 +457,7 @@ def lm_model(hidden_size, class PaddingRNNTestBase(unittest.TestCase): + def setUp(self): self.reader = Reader() self.device_count = 1 @@ -471,15 +490,14 @@ class PaddingRNNTestBase(unittest.TestCase): self.startup_program = fluid.Program() with fluid.program_guard(self.main_program, self.startup_program): with fluid.unique_name.guard(): - res_vars = lm_model( - config.hidden_size, - config.vocab_size, - config.batch_size, - num_layers=config.num_layers, - num_steps=config.num_steps, - init_scale=config.init_scale, - dropout=config.dropout, - rnn_model=config.rnn_model) + res_vars = lm_model(config.hidden_size, + config.vocab_size, + config.batch_size, + num_layers=config.num_layers, + num_steps=config.num_steps, + init_scale=config.init_scale, + dropout=config.dropout, + rnn_model=config.rnn_model) self.loss, self.last_hidden, self.last_cell, self.feed_order = res_vars fluid.clip.set_gradient_clip( @@ -509,14 +527,12 @@ class PaddingRNNTestBase(unittest.TestCase): self.train_program = self.main_program def _generate_init_data(self): - init_hidden = np.zeros( - (self.config.num_layers, self.config.batch_size, - self.config.hidden_size), - dtype='float32') - init_cell = np.zeros( - (self.config.num_layers, self.config.batch_size, - self.config.hidden_size), - dtype='float32') + init_hidden = np.zeros((self.config.num_layers, self.config.batch_size, + self.config.hidden_size), + dtype='float32') + init_cell = np.zeros((self.config.num_layers, self.config.batch_size, + self.config.hidden_size), + dtype='float32') return init_hidden, init_cell def _generate_new_lr(self, epoch_id=0, device_count=1): @@ -596,7 +612,8 @@ class PaddingRNNTestBase(unittest.TestCase): ppl = np.append(ppl, train_ppl) return ppl - def compare_padding_static_mode(self, parallel=True, + def compare_padding_static_mode(self, + parallel=True, use_program_cache=True): ''' Test that train ppl of padding mode is same to that of static mode @@ -608,11 +625,11 @@ class PaddingRNNTestBase(unittest.TestCase): with fluid.scope_guard(fluid.Scope()): static_rnn_ppl = self.train(config, parallel, use_program_cache) self.assertTrue( - np.isclose( - padding_rnn_ppl, static_rnn_ppl, rtol=0.001).all()) + np.isclose(padding_rnn_ppl, static_rnn_ppl, rtol=0.001).all()) class EagerDeletionPaddingRNNTest(PaddingRNNTestBase): + def test_padding_mode_no_eager_deletion(self): ''' Test that train ppl of padding mode is same to that of static mode without eager deletion diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index 01d8cbc5b7d..907e167b5f1 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -27,6 +27,7 @@ from paddle.fluid.framework import Program, grad_var_name from paddle.fluid.executor import Executor from paddle.fluid.backward import append_backward import paddle + paddle.enable_static() np.random.seed(123) @@ -35,6 +36,7 @@ fluid.core._set_eager_deletion_mode(0.0, 1.0, True) class PyRNNBase(object): + def __init__(self, input_shape, output_shape): self.x = np.ones(shape=input_shape).astype("float32") self.y = np.zeros(shape=output_shape).astype("float32") @@ -52,6 +54,7 @@ class PyRNNBase(object): class PySimpleRNN1(PyRNNBase): + def __init__(self, input_shape, output_shape): super(PySimpleRNN1, self).__init__(input_shape, output_shape) @@ -73,6 +76,7 @@ class PySimpleRNN1(PyRNNBase): class PySimpleRNN2(PyRNNBase): + def __init__(self, input_shape, output_shape): super(PySimpleRNN2, self).__init__(input_shape, output_shape) @@ -139,14 +143,14 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot') + h_boot = layers.data(shape=[self.input_dim], + dtype='float32', + name='h_boot') h_boot.stop_gradient = False rnn = layers.StaticRNN() @@ -154,10 +158,8 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) - h = layers.scale( - x=layers.elementwise_add( - x=h_pre, y=x_t), - scale=self.py_rnn.scale) + h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t), + scale=self.py_rnn.scale) rnn.update_memory(h_pre, h) rnn.output(h) @@ -211,8 +213,7 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): for idx, name in enumerate(self.data_field): self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) self.assertTrue( - np.isclose( - num_grad[idx], ana_grad[idx], rtol=rtol).all(), + np.isclose(num_grad[idx], ana_grad[idx], rtol=rtol).all(), "num_grad (" + name + ") has diff at " + str(self.place) + "\nExpect " + str(num_grad[idx]) + "\n" + "But Got" + str(ana_grad[idx]) + " in class " + self.__class__.__name__) @@ -276,14 +277,14 @@ class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot') + h_boot = layers.data(shape=[self.input_dim], + dtype='float32', + name='h_boot') h_boot.stop_gradient = False rnn = layers.StaticRNN() @@ -333,6 +334,7 @@ class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1): ''' class PySimpleRNN3(PyRNNBase): + def __init__(self, input_shape, output_shape): super(EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3, self).__init__(input_shape, output_shape) @@ -376,23 +378,20 @@ class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False - h_boot1 = layers.data( - shape=[self.batch_size, self.input_dim], - dtype='float32', - name='h_boot1', - append_batch_size=False) + h_boot1 = layers.data(shape=[self.batch_size, self.input_dim], + dtype='float32', + name='h_boot1', + append_batch_size=False) h_boot1.stop_gradient = False - h_boot2 = layers.data( - shape=[self.batch_size, self.input_dim], - dtype='float32', - name='h_boot2', - append_batch_size=False) + h_boot2 = layers.data(shape=[self.batch_size, self.input_dim], + dtype='float32', + name='h_boot2', + append_batch_size=False) h_boot2.stop_gradient = False rnn = layers.StaticRNN() @@ -427,6 +426,7 @@ class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1): ''' class PySimpleRNN4(PyRNNBase): + def __init__(self, input_shape, output_shape): super(EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4, self).__init__(input_shape, output_shape) @@ -459,11 +459,10 @@ class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False rnn = layers.StaticRNN() @@ -497,6 +496,7 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): ''' class PySimpleRNN5(PyRNNBase): + def __init__(self, input_shape, output_shape): super(EagerDeletionTwoRecurrentOpsTest.PySimpleRNN5, self).__init__(input_shape, output_shape) @@ -536,11 +536,10 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False rnn_0 = layers.StaticRNN() @@ -564,8 +563,8 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): return rnn_1() -class EagerDeletionRecurrentOpParallelExecutorTest( - EagerDeletionRecurrentOpTest1): +class EagerDeletionRecurrentOpParallelExecutorTest(EagerDeletionRecurrentOpTest1 + ): ''' Test RNNOp with ParallelExecutor equation: @@ -587,11 +586,10 @@ class EagerDeletionRecurrentOpParallelExecutorTest( build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = True exec_strategy = fluid.ExecutionStrategy() - parallel_exe = fluid.ParallelExecutor( - use_cuda=False, - main_program=self.main_program, - build_strategy=build_strategy, - exec_strategy=exec_strategy) + parallel_exe = fluid.ParallelExecutor(use_cuda=False, + main_program=self.main_program, + build_strategy=build_strategy, + exec_strategy=exec_strategy) out = parallel_exe.run(feed=self.feed_map, fetch_list=[self.output]) return out[0] @@ -608,12 +606,11 @@ class EagerDeletionRecurrentOpParallelExecutorTest( build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = True exec_strategy = fluid.ExecutionStrategy() - parallel_exe = fluid.ParallelExecutor( - use_cuda=False, - loss_name=self.output.name, - main_program=self.main_program, - build_strategy=build_strategy, - exec_strategy=exec_strategy) + parallel_exe = fluid.ParallelExecutor(use_cuda=False, + loss_name=self.output.name, + main_program=self.main_program, + build_strategy=build_strategy, + exec_strategy=exec_strategy) return parallel_exe.run(feed=self.feed_map, fetch_list=fetch_list, return_numpy=False) @@ -640,8 +637,9 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( name='x', append_batch_size=False) x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot') + h_boot = layers.data(shape=[self.input_dim], + dtype='float32', + name='h_boot') h_boot.stop_gradient = False forward_only_rnn = layers.StaticRNN() @@ -649,10 +647,8 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( h_pre = forward_only_rnn.memory(init=h_boot) x_t = forward_only_rnn.step_input(x) - h = layers.scale( - x=layers.elementwise_add( - x=h_pre, y=x_t), - scale=self.py_rnn.scale) + h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t), + scale=self.py_rnn.scale) forward_only_rnn.update_memory(h_pre, h) forward_only_rnn.output(h) @@ -665,10 +661,8 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) - h = layers.scale( - x=layers.elementwise_add( - x=h_pre, y=x_t), - scale=self.py_rnn.scale) + h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t), + scale=self.py_rnn.scale) rnn.update_memory(h_pre, h) rnn.output(h) @@ -693,8 +687,7 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( self.assertEqual(forward_only_output.shape, py_output.shape) self.assertEqual(pd_output.shape, py_output.shape) self.assertTrue( - np.isclose( - forward_only_output, py_output, rtol=0.01).all) + np.isclose(forward_only_output, py_output, rtol=0.01).all) self.assertTrue(np.isclose(pd_output, py_output, rtol=0.01).all()) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py index 936651d8324..41685fa4254 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py @@ -15,6 +15,7 @@ from __future__ import print_function import os + os.environ['CPU_NUM'] = '2' import unittest @@ -28,13 +29,17 @@ import numpy import multiprocessing import paddle + paddle.enable_static() fluid.core._set_eager_deletion_mode(0.0, 1.0, True) class TestEagerDeletionWhileOpBase(unittest.TestCase): + def test_main(self): - places = [core.CPUPlace(), ] + places = [ + core.CPUPlace(), + ] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) @@ -48,8 +53,8 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase): self.place = place self.with_data_parallel = with_data_parallel - if not core.is_compiled_with_cuda() and isinstance(self.place, - core.CUDAPlace): + if not core.is_compiled_with_cuda() and isinstance( + self.place, core.CUDAPlace): return if isinstance(self.place, core.CUDAPlace): @@ -57,15 +62,21 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase): ) if self.with_data_parallel else 1 else: device_cnt = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count( - ))) if self.with_data_parallel else 1 + os.environ.get('CPU_NUM', multiprocessing.cpu_count()) + ) if self.with_data_parallel else 1 - d0 = layers.data( - "d0", shape=[10], append_batch_size=False, dtype='float32') - d1 = layers.data( - "d1", shape=[10], append_batch_size=False, dtype='float32') - d2 = layers.data( - "d2", shape=[10], append_batch_size=False, dtype='float32') + d0 = layers.data("d0", + shape=[10], + append_batch_size=False, + dtype='float32') + d1 = layers.data("d1", + shape=[10], + append_batch_size=False, + dtype='float32') + d2 = layers.data("d2", + shape=[10], + append_batch_size=False, + dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True @@ -136,8 +147,9 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase): prog = fluid.default_main_program() if self.with_data_parallel: - prog = compiler.CompiledProgram(fluid.default_main_program( - )).with_data_parallel(loss_name=loss.name) + prog = compiler.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name) for _ in range(5): d = [] @@ -149,9 +161,11 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase): d.append(numpy.array([tmp] * device_cnt)) outs = exe.run(program=prog, - feed={'d0': d[0], - 'd1': d[1], - 'd2': d[2]}, + feed={ + 'd0': d[0], + 'd1': d[1], + 'd2': d[2] + }, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01) diff --git a/python/paddle/fluid/tests/unittests/test_eager_dist_api.py b/python/paddle/fluid/tests/unittests/test_eager_dist_api.py index e00f90f4b0d..5355c58753e 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_dist_api.py +++ b/python/paddle/fluid/tests/unittests/test_eager_dist_api.py @@ -19,6 +19,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestProcessGroup(TestMultipleGpus): + def test_process_group_nccl(self): self.run_mnist_2gpu('process_group_nccl.py') diff --git a/python/paddle/fluid/tests/unittests/test_eager_run_program.py b/python/paddle/fluid/tests/unittests/test_eager_run_program.py index 620f72ccb30..8d3ebcfbac5 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_run_program.py +++ b/python/paddle/fluid/tests/unittests/test_eager_run_program.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -59,17 +59,16 @@ def _create_out(var): var_desc = var.desc varbase = None if _in_legacy_dygraph(): - var_base = core.VarBase(var_desc.dtype(), - var_desc.shape(), + var_base = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) else: - var_base = core.eager.Tensor(var_desc.dtype(), - var_desc.shape(), + var_base = core.eager.Tensor(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) return var_base class TestRunProgram(unittest.TestCase): + def test_eager(self): paddle.set_device('cpu') paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_eager_trace_op.py b/python/paddle/fluid/tests/unittests/test_eager_trace_op.py index b67dbd0ba62..1266e1c9a6a 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_trace_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_trace_op.py @@ -26,21 +26,26 @@ from paddle.fluid.framework import _test_eager_guard class TestEagerTraceOp(unittest.TestCase): + def test_branches(self): with _test_eager_guard(): data = np.random.random([1, 1]).astype(np.float32) x = paddle.to_tensor(data) paddle.fluid.framework._dygraph_tracer().trace_op( - 'broadcast_tensors', {'X': [x, x], - 'Out': [x, x]}, {'Out': [x, x]}, {}) + 'broadcast_tensors', { + 'X': [x, x], + 'Out': [x, x] + }, {'Out': [x, x]}, {}) paddle.fluid.framework._dygraph_tracer().trace_op( 'scale', {'X': x}, {'Out': x}, {'scale': 0.5}) scale = paddle.to_tensor(np.random.random([1]).astype(np.float32)) paddle.fluid.framework._dygraph_tracer().trace_op( - 'instance_norm', {'Scale': [scale], - 'X': [x]}, {'Y': [x]}, {}) + 'instance_norm', { + 'Scale': [scale], + 'X': [x] + }, {'Y': [x]}, {}) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py index ba48b143a8e..561a379b6fa 100644 --- a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py +++ b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py @@ -51,6 +51,7 @@ def Levenshtein(hyp, ref): class TestEditDistanceOp(OpTest): + def setUp(self): self.op_type = "edit_distance" normalized = False @@ -86,6 +87,7 @@ class TestEditDistanceOp(OpTest): class TestEditDistanceOpNormalizedCase0(OpTest): + def reset_config(self): pass @@ -134,18 +136,21 @@ class TestEditDistanceOpNormalizedCase0(OpTest): class TestEditDistanceOpNormalizedCase1(TestEditDistanceOpNormalizedCase0): + def reset_config(self): self.x1_lod = [0, 6, 0] self.x2_lod = [2, 1, 2] class TestEditDistanceOpNormalizedCase2(TestEditDistanceOpNormalizedCase0): + def reset_config(self): self.x1_lod = [0, 0, 6] self.x2_lod = [2, 2, 1] class TestEditDistanceOpNormalizedTensor(OpTest): + def reset_config(self): self.x1 = np.array([[10, 3, 0, 0], [6, 5, 8, 2]], dtype=np.int64) self.x2 = np.array([[10, 4, 0], [6, 7, 8]], dtype=np.int64) @@ -163,9 +168,8 @@ class TestEditDistanceOpNormalizedTensor(OpTest): sequence_num = np.array(num_strs).astype("int64") for i in range(0, num_strs): - distance[i] = Levenshtein( - hyp=self.x1[i][0:self.x1_lod[i]], - ref=self.x2[i][0:self.x2_lod[i]]) + distance[i] = Levenshtein(hyp=self.x1[i][0:self.x1_lod[i]], + ref=self.x2[i][0:self.x2_lod[i]]) if normalized is True: len_ref = self.x2_lod[i] distance[i] = distance[i] / len_ref diff --git a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py index 45cb7e785bc..4afbe2d7155 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ import unittest class EagerOpAPIGenerateTestCase(unittest.TestCase): + def test_elementwise_add(self): with _test_eager_guard(): paddle.set_device("cpu") @@ -35,8 +36,8 @@ class EagerOpAPIGenerateTestCase(unittest.TestCase): def test_sum(self): with _test_eager_guard(): - x_data = np.array( - [[0.2, 0.3, 0.5, 0.9], [0.1, 0.2, 0.6, 0.7]]).astype('float32') + x_data = np.array([[0.2, 0.3, 0.5, 0.9], [0.1, 0.2, 0.6, + 0.7]]).astype('float32') x = paddle.to_tensor(x_data, 'float32') out = paddle.sum(x, axis=0) out_arr = out.numpy() @@ -61,8 +62,8 @@ class EagerOpAPIGenerateTestCase(unittest.TestCase): out = paddle.nn.functional.sigmoid(x) out_arr = out.numpy() out_arr_expected = np.array( - [0.40131234, 0.450166, 0.52497919, 0.57444252]).astype( - 'float32') + [0.40131234, 0.450166, 0.52497919, + 0.57444252]).astype('float32') self.assertTrue(np.allclose(out_arr, out_arr_expected)) diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index bb8c6346eb5..7fe755225f4 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ import paddle.compat as cpt class EagerScaleTestCase(unittest.TestCase): + def test_scale_base(self): with _test_eager_guard(): paddle.set_device("cpu") @@ -85,6 +86,7 @@ class EagerScaleTestCase(unittest.TestCase): class EagerDtypeTestCase(unittest.TestCase): + def check_to_tesnsor_and_numpy(self, dtype, proto_dtype): with _test_eager_guard(): arr = np.random.random([4, 16, 16, 32]).astype(dtype) @@ -110,6 +112,7 @@ class EagerDtypeTestCase(unittest.TestCase): class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): + def constructor(self, place): egr_tensor = core.eager.Tensor() self.assertEqual(egr_tensor.persistable, False) @@ -170,8 +173,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertTrue( egr_tensor4.place._equals( paddle.fluid.framework._current_expected_place())) - self.assertTrue( - np.array_equal(egr_tensor4.numpy(), egr_tensor3.numpy())) + self.assertTrue(np.array_equal(egr_tensor4.numpy(), + egr_tensor3.numpy())) arr4 = np.random.rand(4, 16, 16, 32).astype('float32') egr_tensor5 = core.eager.Tensor(arr4, place) @@ -190,8 +193,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor6.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor6.stop_gradient, True) self.assertEqual(egr_tensor6.place.is_cpu_place(), True) - self.assertTrue( - np.array_equal(egr_tensor6.numpy(), egr_tensor5.numpy())) + self.assertTrue(np.array_equal(egr_tensor6.numpy(), + egr_tensor5.numpy())) egr_tensor7 = core.eager.Tensor(arr4, place, True) self.assertEqual(egr_tensor7.persistable, True) @@ -209,8 +212,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor8.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor8.stop_gradient, True) self.assertTrue(egr_tensor8.place._equals(place)) - self.assertTrue( - np.array_equal(egr_tensor8.numpy(), egr_tensor5.numpy())) + self.assertTrue(np.array_equal(egr_tensor8.numpy(), + egr_tensor5.numpy())) egr_tensor9 = core.eager.Tensor(arr4, place, True, True) self.assertEqual(egr_tensor9.persistable, True) @@ -279,8 +282,9 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): "The type of trainable MUST be bool, but the type is /*"): eager_param.trainable = "False" - eager_param_2 = EagerParamBase( - shape=paddle.shape(paddle.to_tensor([1, 2, 3, 4])), dtype="float32") + eager_param_2 = EagerParamBase(shape=paddle.shape( + paddle.to_tensor([1, 2, 3, 4])), + dtype="float32") self.assertTrue(eager_param_2.trainable) eager_param_2.trainable = False self.assertFalse(eager_param_2.trainable) @@ -329,8 +333,9 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor2.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor2.stop_gradient, True) - egr_tensor3 = core.eager.Tensor( - arr, place=place, name="new_eager_tensor") + egr_tensor3 = core.eager.Tensor(arr, + place=place, + name="new_eager_tensor") self.assertEqual(egr_tensor3.persistable, False) self.assertTrue("new_eager_tensor" in egr_tensor3.name) self.assertEqual(egr_tensor3.shape, [4, 16, 16, 32]) @@ -338,8 +343,10 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor3.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor3.stop_gradient, True) - egr_tensor4 = core.eager.Tensor( - arr, place=place, persistable=True, name="new_eager_tensor") + egr_tensor4 = core.eager.Tensor(arr, + place=place, + persistable=True, + name="new_eager_tensor") self.assertEqual(egr_tensor4.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor4.name) self.assertEqual(egr_tensor4.shape, [4, 16, 16, 32]) @@ -347,12 +354,11 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor4.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor4.stop_gradient, True) - egr_tensor5 = core.eager.Tensor( - arr, - core.CPUPlace(), - persistable=True, - name="new_eager_tensor", - zero_copy=True) + egr_tensor5 = core.eager.Tensor(arr, + core.CPUPlace(), + persistable=True, + name="new_eager_tensor", + zero_copy=True) self.assertEqual(egr_tensor5.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor5.name) self.assertEqual(egr_tensor5.shape, [4, 16, 16, 32]) @@ -360,12 +366,11 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor5.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor5.stop_gradient, True) - egr_tensor6 = core.eager.Tensor( - arr, - place=core.CPUPlace(), - persistable=True, - name="new_eager_tensor", - zero_copy=True) + egr_tensor6 = core.eager.Tensor(arr, + place=core.CPUPlace(), + persistable=True, + name="new_eager_tensor", + zero_copy=True) self.assertEqual(egr_tensor6.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor6.name) self.assertEqual(egr_tensor6.shape, [4, 16, 16, 32]) @@ -373,12 +378,11 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor6.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor6.stop_gradient, True) - egr_tensor7 = core.eager.Tensor( - arr, - place=place, - persistable=True, - name="new_eager_tensor", - zero_copy=True) + egr_tensor7 = core.eager.Tensor(arr, + place=place, + persistable=True, + name="new_eager_tensor", + zero_copy=True) self.assertEqual(egr_tensor7.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor7.name) self.assertEqual(egr_tensor7.shape, [4, 16, 16, 32]) @@ -386,13 +390,12 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor7.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor7.stop_gradient, True) - egr_tensor8 = core.eager.Tensor( - arr, - place=place, - persistable=True, - name="new_eager_tensor", - zero_copy=True, - stop_gradient=False) + egr_tensor8 = core.eager.Tensor(arr, + place=place, + persistable=True, + name="new_eager_tensor", + zero_copy=True, + stop_gradient=False) self.assertEqual(egr_tensor8.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor8.name) self.assertEqual(egr_tensor8.shape, [4, 16, 16, 32]) @@ -400,8 +403,12 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor8.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor8.stop_gradient, False) - egr_tensor9 = core.eager.Tensor( - arr, place, True, True, "new_eager_tensor", stop_gradient=False) + egr_tensor9 = core.eager.Tensor(arr, + place, + True, + True, + "new_eager_tensor", + stop_gradient=False) self.assertEqual(egr_tensor9.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor9.name) self.assertEqual(egr_tensor9.shape, [4, 16, 16, 32]) @@ -409,13 +416,12 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor9.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor9.stop_gradient, False) - egr_tensor10 = core.eager.Tensor( - arr, - place, - True, - True, - name="new_eager_tensor", - stop_gradient=False) + egr_tensor10 = core.eager.Tensor(arr, + place, + True, + True, + name="new_eager_tensor", + stop_gradient=False) self.assertEqual(egr_tensor10.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor10.name) self.assertEqual(egr_tensor10.shape, [4, 16, 16, 32]) @@ -423,13 +429,12 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor10.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor10.stop_gradient, False) - egr_tensor11 = core.eager.Tensor( - arr, - place, - True, - zero_copy=True, - name="new_eager_tensor", - stop_gradient=False) + egr_tensor11 = core.eager.Tensor(arr, + place, + True, + zero_copy=True, + name="new_eager_tensor", + stop_gradient=False) self.assertEqual(egr_tensor11.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor11.name) self.assertEqual(egr_tensor11.shape, [4, 16, 16, 32]) @@ -437,13 +442,12 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor11.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor11.stop_gradient, False) - egr_tensor12 = core.eager.Tensor( - arr, - place, - persistable=True, - zero_copy=True, - name="new_eager_tensor", - stop_gradient=False) + egr_tensor12 = core.eager.Tensor(arr, + place, + persistable=True, + zero_copy=True, + name="new_eager_tensor", + stop_gradient=False) self.assertEqual(egr_tensor12.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor12.name) self.assertEqual(egr_tensor12.shape, [4, 16, 16, 32]) @@ -451,13 +455,12 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor12.dtype, core.VarDesc.VarType.FP32) self.assertEqual(egr_tensor12.stop_gradient, False) - egr_tensor13 = core.eager.Tensor( - value=arr, - place=place, - persistable=True, - zero_copy=True, - name="new_eager_tensor", - stop_gradient=False) + egr_tensor13 = core.eager.Tensor(value=arr, + place=place, + persistable=True, + zero_copy=True, + name="new_eager_tensor", + stop_gradient=False) self.assertEqual(egr_tensor13.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor13.name) self.assertEqual(egr_tensor13.shape, [4, 16, 16, 32]) @@ -466,12 +469,11 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertEqual(egr_tensor13.stop_gradient, False) # special case - egr_tensor14 = core.eager.Tensor( - dtype=core.VarDesc.VarType.FP32, - dims=[4, 16, 16, 32], - name="special_eager_tensor", - type=core.VarDesc.VarType.LOD_TENSOR, - persistable=True) + egr_tensor14 = core.eager.Tensor(dtype=core.VarDesc.VarType.FP32, + dims=[4, 16, 16, 32], + name="special_eager_tensor", + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=True) self.assertEqual(egr_tensor14.persistable, True) self.assertEqual(egr_tensor14.name, "special_eager_tensor") self.assertEqual(egr_tensor14.shape, [4, 16, 16, 32]) @@ -490,8 +492,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertTrue( np.array_equal(egr_tensor15.numpy(), egr_tensor4.numpy())) - egr_tensor16 = core.eager.Tensor( - value=egr_tensor4, name="new_eager_tensor") + egr_tensor16 = core.eager.Tensor(value=egr_tensor4, + name="new_eager_tensor") self.assertEqual(egr_tensor16.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor16.name) self.assertEqual(egr_tensor16.shape, egr_tensor4.shape) @@ -506,7 +508,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): egr_tensor17 = core.eager.Tensor( value=egr_tensor4, place=place, - name="new_eager_tensor", ) + name="new_eager_tensor", + ) self.assertEqual(egr_tensor17.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor17.name) self.assertEqual(egr_tensor17.shape, egr_tensor4.shape) @@ -519,7 +522,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): egr_tensor18 = core.eager.Tensor( egr_tensor4, place=place, - name="new_eager_tensor", ) + name="new_eager_tensor", + ) self.assertEqual(egr_tensor18.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor18.name) self.assertEqual(egr_tensor18.shape, egr_tensor4.shape) @@ -532,7 +536,8 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): egr_tensor19 = core.eager.Tensor( egr_tensor4, place, - name="new_eager_tensor", ) + name="new_eager_tensor", + ) self.assertEqual(egr_tensor19.persistable, True) self.assertTrue("new_eager_tensor" in egr_tensor19.name) self.assertEqual(egr_tensor19.shape, egr_tensor4.shape) @@ -584,8 +589,9 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertTrue(egr_tensor23.place._equals(place)) self.assertTrue(np.array_equal(egr_tensor23.numpy(), x)) - egr_tensor24 = core.eager.Tensor( - value=t, place=place, name="from_framework_tensor") + egr_tensor24 = core.eager.Tensor(value=t, + place=place, + name="from_framework_tensor") self.assertEqual(egr_tensor24.persistable, False) self.assertTrue("from_framework_tensor" in egr_tensor24.name) self.assertEqual(egr_tensor24.shape, [3, 3]) @@ -596,7 +602,7 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): # Bad usage # SyntaxError: positional argument follows keyword argument - # egr_tensor25 = core.eager.Tensor(value=t, place) + # egr_tensor25 = core.eager.Tensor(value=t, place) def test_constructor_with_kwargs(self): print("Test_constructor_with_kwargs") @@ -770,14 +776,14 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): paddle.set_device("gpu:0") with paddle.fluid.framework._dygraph_place_guard(core.CPUPlace()): self.assertTrue( - isinstance(_current_expected_place(), type(core.CPUPlace( - )))) + isinstance(_current_expected_place(), + type(core.CPUPlace()))) else: paddle.set_device("cpu") with paddle.fluid.framework._dygraph_place_guard(core.CPUPlace()): self.assertTrue( - isinstance(_current_expected_place(), type(core.CPUPlace( - )))) + isinstance(_current_expected_place(), + type(core.CPUPlace()))) def test_value(self): with _test_eager_guard(): @@ -819,8 +825,7 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): def test_sharding_related_api(self): with _test_eager_guard(): arr0 = np.random.rand(4, 16, 16, 32).astype('float32') - egr_tensor1 = core.eager.Tensor(arr0, - core.CPUPlace(), True, False, + egr_tensor1 = core.eager.Tensor(arr0, core.CPUPlace(), True, False, "numpy_tensor1", False) self.assertEqual(egr_tensor1._numel(), 32768) self.assertEqual(egr_tensor1._slice(0, 2)._numel(), 16384) @@ -846,6 +851,7 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): class EagerParamBaseUsageTestCase(unittest.TestCase): + def test_print(self): with _test_eager_guard(): linear = paddle.nn.Linear(3, 3, bias_attr=False) @@ -881,8 +887,10 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): bias_attr=False, weight_attr=paddle.fluid.initializer.MSRAInitializer()) res = [ - linear1.weight.numpy(), linear2.weight.numpy(), - linear3.weight.numpy(), linear4.weight.numpy() + linear1.weight.numpy(), + linear2.weight.numpy(), + linear3.weight.numpy(), + linear4.weight.numpy() ] paddle.set_default_dtype("float32") return res @@ -900,8 +908,8 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): self.assertTrue(np.array_equal(res1[i], res2[i])) def func_layer_helper_base(self, value): - base = paddle.fluid.layer_helper_base.LayerHelperBase("test_layer", - "test_layer") + base = paddle.fluid.layer_helper_base.LayerHelperBase( + "test_layer", "test_layer") return base.to_variable(value).numpy() def func_base_to_variable(self, value): @@ -950,6 +958,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): class EagerGuardTestCase(unittest.TestCase): + def test__test_eager_guard(self): tracer = paddle.fluid.dygraph.tracer.Tracer() with _test_eager_guard(tracer): diff --git a/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py b/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py index def5f569b8f..3b5ec683bc7 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import copy class EagerStringTensorTestCase(unittest.TestCase): + def setUp(self): self.str_arr = np.array([ ["15.4寸笔记本的键盘确实爽,基本跟台式机差不多了,蛮喜欢数字小键盘,输数字特方便,样子也很美观,做工也相当不错" @@ -40,9 +41,7 @@ class EagerStringTensorTestCase(unittest.TestCase): self.assertEqual(ST2.name, "ST2") self.assertEqual(ST2.shape, shape) self.assertTrue( - np.array_equal( - ST2.numpy(), np.empty( - shape, dtype=np.unicode_))) + np.array_equal(ST2.numpy(), np.empty(shape, dtype=np.unicode_))) ST3 = core.eager.StringTensor(self.str_arr, "ST3") # constructor 3 self.assertEqual(ST3.name, "ST3") @@ -71,17 +70,15 @@ class EagerStringTensorTestCase(unittest.TestCase): def test_constructor_with_kwargs(self): with _test_eager_guard(): shape = [2, 3] - ST1 = core.eager.StringTensor( - dims=shape, name="ST1") # constructor 2 + ST1 = core.eager.StringTensor(dims=shape, + name="ST1") # constructor 2 self.assertEqual(ST1.name, "ST1") self.assertEqual(ST1.shape, shape) self.assertTrue( - np.array_equal( - ST1.numpy(), np.empty( - shape, dtype=np.unicode_))) + np.array_equal(ST1.numpy(), np.empty(shape, dtype=np.unicode_))) - ST2 = core.eager.StringTensor( - self.str_arr, name="ST2") # constructor 3 + ST2 = core.eager.StringTensor(self.str_arr, + name="ST2") # constructor 3 self.assertEqual(ST2.name, "ST2") self.assertEqual(ST2.shape, list(self.str_arr.shape)) self.assertTrue(np.array_equal(ST2.numpy(), self.str_arr)) @@ -91,8 +88,8 @@ class EagerStringTensorTestCase(unittest.TestCase): self.assertEqual(ST3.shape, list(self.str_arr.shape)) self.assertTrue(np.array_equal(ST3.numpy(), self.str_arr)) - ST4 = core.eager.StringTensor( - value=ST2, name="ST4") # constructor 6 + ST4 = core.eager.StringTensor(value=ST2, + name="ST4") # constructor 6 self.assertEqual(ST4.name, "ST4") self.assertEqual(ST4.shape, list(self.str_arr.shape)) self.assertTrue(np.array_equal(ST4.numpy(), self.str_arr)) diff --git a/python/paddle/fluid/tests/unittests/test_eig_op.py b/python/paddle/fluid/tests/unittests/test_eig_op.py index bb83de7d0dd..b4044c9e799 100644 --- a/python/paddle/fluid/tests/unittests/test_eig_op.py +++ b/python/paddle/fluid/tests/unittests/test_eig_op.py @@ -59,6 +59,7 @@ def eig_backward(w, v, grad_w, grad_v): class TestEigOp(OpTest): + def setUp(self): paddle.enable_static() paddle.device.set_device("cpu") @@ -142,26 +143,28 @@ class TestEigOp(OpTest): self.grad_v) def test_check_output(self): - self.check_output_with_place_customized( - checker=self.checker, place=core.CPUPlace()) + self.check_output_with_place_customized(checker=self.checker, + place=core.CPUPlace()) def test_check_grad(self): self.init_grad() - self.check_grad( - ['X'], ['Eigenvalues', 'Eigenvectors'], - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_w, self.grad_v]) + self.check_grad(['X'], ['Eigenvalues', 'Eigenvectors'], + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_w, self.grad_v]) class TestComplex128(TestEigOp): + def set_dtype(self): self.dtype = np.complex128 @skip_check_grad_ci( - reason="For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig" + reason= + "For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig" ) class TestDouble(TestEigOp): + def set_dtype(self): self.dtype = np.float64 @@ -170,9 +173,11 @@ class TestDouble(TestEigOp): @skip_check_grad_ci( - reason="For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig" + reason= + "For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig" ) class TestEigBatchMarices(TestEigOp): + def set_dtype(self): self.dtype = np.float64 @@ -184,9 +189,11 @@ class TestEigBatchMarices(TestEigOp): @skip_check_grad_ci( - reason="For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig" + reason= + "For float dtype, numpy.linalg.eig forward outputs real or complex when input is real, therefore the grad computation may be not the same with paddle.linalg.eig" ) class TestFloat(TestEigOp): + def set_dtype(self): self.dtype = np.float32 @@ -195,6 +202,7 @@ class TestFloat(TestEigOp): class TestEigStatic(TestEigOp): + def test_check_output_with_place(self): paddle.enable_static() place = core.CPUPlace() @@ -209,17 +217,18 @@ class TestEigStatic(TestEigOp): feed={"input": input_np}, fetch_list=[act_val, act_vec]) self.assertTrue( - np.allclose(expect_val, fetch_val, 1e-6, 1e-6), - "The eigen values have diff: \nExpected " + str(expect_val) + "\n" + - "But got: " + str(fetch_val)) + np.allclose(expect_val, fetch_val, 1e-6, + 1e-6), "The eigen values have diff: \nExpected " + + str(expect_val) + "\n" + "But got: " + str(fetch_val)) self.assertTrue( - np.allclose(np.abs(expect_vec), np.abs(fetch_vec), 1e-6, 1e-6), - "The eigen vectors have diff: \nExpected " + + np.allclose(np.abs(expect_vec), np.abs(fetch_vec), 1e-6, + 1e-6), "The eigen vectors have diff: \nExpected " + str(np.abs(expect_vec)) + "\n" + "But got: " + str(np.abs(fetch_vec))) class TestEigWrongDimsError(unittest.TestCase): + def test_error(self): paddle.device.set_device("cpu") paddle.disable_static() @@ -229,6 +238,7 @@ class TestEigWrongDimsError(unittest.TestCase): class TestEigNotSquareError(unittest.TestCase): + def test_error(self): paddle.device.set_device("cpu") paddle.disable_static() @@ -238,6 +248,7 @@ class TestEigNotSquareError(unittest.TestCase): class TestEigUnsupportedDtypeError(unittest.TestCase): + def test_error(self): paddle.device.set_device("cpu") paddle.disable_static() diff --git a/python/paddle/fluid/tests/unittests/test_eigh_op.py b/python/paddle/fluid/tests/unittests/test_eigh_op.py index 2abbcc98a6b..cc5fdcca6e1 100644 --- a/python/paddle/fluid/tests/unittests/test_eigh_op.py +++ b/python/paddle/fluid/tests/unittests/test_eigh_op.py @@ -56,7 +56,7 @@ def valid_single_eigh_result(A, eigh_value, eigh_vector, uplo): T = np.diag(eigh_value) # A = Q*T*Q' - residual = A - (eigh_vector @T @np.linalg.inv(eigh_vector)) + residual = A - (eigh_vector @ T @ np.linalg.inv(eigh_vector)) # ||A - Q*T*Q'|| / (N*||A||) < rtol np.testing.assert_array_less( @@ -64,11 +64,12 @@ def valid_single_eigh_result(A, eigh_value, eigh_vector, uplo): rtol) # ||I - Q*Q'|| / M < rtol - residual = np.eye(M) - eigh_vector @np.linalg.inv(eigh_vector) + residual = np.eye(M) - eigh_vector @ np.linalg.inv(eigh_vector) np.testing.assert_array_less(np.linalg.norm(residual, np.inf) / M, rtol) class TestEighOp(OpTest): + def setUp(self): paddle.enable_static() self.op_type = "eigh" @@ -96,11 +97,13 @@ class TestEighOp(OpTest): class TestEighUPLOCase(TestEighOp): + def init_config(self): self.UPLO = 'U' class TestEighGPUCase(unittest.TestCase): + def setUp(self): self.x_shape = [32, 32] self.dtype = "float32" @@ -113,11 +116,12 @@ class TestEighGPUCase(unittest.TestCase): paddle.disable_static(place=paddle.CUDAPlace(0)) input_real_data = paddle.to_tensor(self.x_np) actual_w, actual_v = paddle.linalg.eigh(input_real_data, self.UPLO) - valid_eigh_result(self.x_np, - actual_w.numpy(), actual_v.numpy(), self.UPLO) + valid_eigh_result(self.x_np, actual_w.numpy(), actual_v.numpy(), + self.UPLO) class TestEighAPI(unittest.TestCase): + def setUp(self): self.init_input_data() self.UPLO = 'L' @@ -147,8 +151,9 @@ class TestEighAPI(unittest.TestCase): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): - input_x = paddle.static.data( - 'input_x', shape=self.x_shape, dtype=self.dtype) + input_x = paddle.static.data('input_x', + shape=self.x_shape, + dtype=self.dtype) output_w, output_v = paddle.linalg.eigh(input_x) exe = paddle.static.Executor(self.place) actual_w, actual_v = exe.run(main_prog, @@ -161,8 +166,9 @@ class TestEighAPI(unittest.TestCase): startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): x_dtype = np.complex64 if self.dtype == "float32" else np.complex128 - input_x = paddle.static.data( - 'input_x', shape=self.x_shape, dtype=x_dtype) + input_x = paddle.static.data('input_x', + shape=self.x_shape, + dtype=x_dtype) output_w, output_v = paddle.linalg.eigh(input_x) exe = paddle.static.Executor(self.place) actual_w, actual_v = exe.run(main_prog, @@ -179,55 +185,61 @@ class TestEighAPI(unittest.TestCase): paddle.disable_static() input_real_data = paddle.to_tensor(self.real_data) actual_w, actual_v = paddle.linalg.eigh(input_real_data) - valid_eigh_result(self.real_data, - actual_w.numpy(), actual_v.numpy(), self.UPLO) + valid_eigh_result(self.real_data, actual_w.numpy(), actual_v.numpy(), + self.UPLO) input_complex_data = paddle.to_tensor(self.complex_symm) actual_w, actual_v = paddle.linalg.eigh(input_complex_data) - valid_eigh_result(self.complex_symm, - actual_w.numpy(), actual_v.numpy(), self.UPLO) + valid_eigh_result(self.complex_symm, actual_w.numpy(), actual_v.numpy(), + self.UPLO) def test_eigh_grad(self): paddle.disable_static() x = paddle.to_tensor(self.complex_symm, stop_gradient=False) w, v = paddle.linalg.eigh(x) (w.sum() + paddle.abs(v).sum()).backward() - np.testing.assert_allclose( - abs(x.grad.numpy()), - abs(x.grad.numpy().conj().transpose(self.trans_dims)), - rtol=self.rtol, - atol=self.atol) + np.testing.assert_allclose(abs(x.grad.numpy()), + abs(x.grad.numpy().conj().transpose( + self.trans_dims)), + rtol=self.rtol, + atol=self.atol) class TestEighBatchAPI(TestEighAPI): + def init_input_shape(self): self.x_shape = [2, 5, 5] class TestEighAPIError(unittest.TestCase): + def test_error(self): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): #input maxtrix must greater than 2 dimensions - input_x = paddle.static.data( - name='x_1', shape=[12], dtype='float32') + input_x = paddle.static.data(name='x_1', + shape=[12], + dtype='float32') self.assertRaises(ValueError, paddle.linalg.eigh, input_x) #input matrix must be square matrix - input_x = paddle.static.data( - name='x_2', shape=[12, 32], dtype='float32') + input_x = paddle.static.data(name='x_2', + shape=[12, 32], + dtype='float32') self.assertRaises(ValueError, paddle.linalg.eigh, input_x) #uplo must be in 'L' or 'U' - input_x = paddle.static.data( - name='x_3', shape=[4, 4], dtype="float32") + input_x = paddle.static.data(name='x_3', + shape=[4, 4], + dtype="float32") uplo = 'R' self.assertRaises(ValueError, paddle.linalg.eigh, input_x, uplo) #x_data cannot be integer - input_x = paddle.static.data( - name='x_4', shape=[4, 4], dtype="int32") + input_x = paddle.static.data(name='x_4', + shape=[4, 4], + dtype="int32") self.assertRaises(TypeError, paddle.linalg.eigh, input_x) diff --git a/python/paddle/fluid/tests/unittests/test_eigvals_op.py b/python/paddle/fluid/tests/unittests/test_eigvals_op.py index eff9d4ea6e8..6d52d7fa4d1 100644 --- a/python/paddle/fluid/tests/unittests/test_eigvals_op.py +++ b/python/paddle/fluid/tests/unittests/test_eigvals_op.py @@ -33,6 +33,7 @@ def np_eigvals(a): class TestEigvalsOp(OpTest): + def setUp(self): np.random.seed(0) paddle.enable_static() @@ -57,14 +58,14 @@ class TestEigvalsOp(OpTest): self.input_data = np.random.random(self.input_dims).astype( self.dtype) else: - self.input_data = ( - np.random.random(self.input_dims) + - np.random.random(self.input_dims) * 1j).astype(self.dtype) + self.input_data = (np.random.random(self.input_dims) + + np.random.random(self.input_dims) * 1j).astype( + self.dtype) def test_check_output(self): self.__class__.no_need_check_grad = True - self.check_output_with_place_customized( - checker=self.verify_output, place=core.CPUPlace()) + self.check_output_with_place_customized(checker=self.verify_output, + place=core.CPUPlace()) def verify_output(self, outs): actual_outs = np.sort(np.array(outs[0])) @@ -75,9 +76,8 @@ class TestEigvalsOp(OpTest): str(actual_outs.shape) + " in class " + self.__class__.__name__) n_dim = actual_outs.shape[-1] - for actual_row, expect_row in zip( - actual_outs.reshape((-1, n_dim)), - expect_outs.reshape((-1, n_dim))): + for actual_row, expect_row in zip(actual_outs.reshape((-1, n_dim)), + expect_outs.reshape((-1, n_dim))): is_mapped_index = np.zeros((n_dim, )) for i in range(n_dim): is_mapped = False @@ -98,56 +98,67 @@ class TestEigvalsOp(OpTest): class TestEigvalsOpFloat64(TestEigvalsOp): + def set_dtype(self): self.dtype = np.float64 class TestEigvalsOpComplex64(TestEigvalsOp): + def set_dtype(self): self.dtype = np.complex64 class TestEigvalsOpComplex128(TestEigvalsOp): + def set_dtype(self): self.dtype = np.complex128 class TestEigvalsOpLargeScare(TestEigvalsOp): + def set_input_dims(self): self.input_dims = (128, 128) class TestEigvalsOpLargeScareFloat64(TestEigvalsOpLargeScare): + def set_dtype(self): self.dtype = np.float64 class TestEigvalsOpLargeScareComplex64(TestEigvalsOpLargeScare): + def set_dtype(self): self.dtype = np.complex64 class TestEigvalsOpLargeScareComplex128(TestEigvalsOpLargeScare): + def set_dtype(self): self.dtype = np.complex128 class TestEigvalsOpBatch1(TestEigvalsOp): + def set_input_dims(self): self.input_dims = (1, 2, 3, 4, 4) class TestEigvalsOpBatch2(TestEigvalsOp): + def set_input_dims(self): self.input_dims = (3, 1, 4, 5, 5) class TestEigvalsOpBatch3(TestEigvalsOp): + def set_input_dims(self): self.input_dims = (6, 2, 9, 6, 6) class TestEigvalsAPI(unittest.TestCase): + def setUp(self): np.random.seed(0) @@ -177,9 +188,9 @@ class TestEigvalsAPI(unittest.TestCase): self.input_data = np.random.random(self.input_dims).astype( self.dtype) else: - self.input_data = ( - np.random.random(self.input_dims) + - np.random.random(self.input_dims) * 1j).astype(self.dtype) + self.input_data = (np.random.random(self.input_dims) + + np.random.random(self.input_dims) * 1j).astype( + self.dtype) def verify_output(self, actural_outs, expect_outs): actual_outs = np.array(actural_outs) @@ -190,9 +201,8 @@ class TestEigvalsAPI(unittest.TestCase): str(actual_outs.shape) + " in class " + self.__class__.__name__) n_dim = actual_outs.shape[-1] - for actual_row, expect_row in zip( - actual_outs.reshape((-1, n_dim)), - expect_outs.reshape((-1, n_dim))): + for actual_row, expect_row in zip(actual_outs.reshape((-1, n_dim)), + expect_outs.reshape((-1, n_dim))): is_mapped_index = np.zeros((n_dim, )) for i in range(n_dim): is_mapped = False @@ -234,19 +244,22 @@ class TestEigvalsAPI(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - small_input_tensor = paddle.static.data( - name='small_x', shape=self.small_dims, dtype=self.dtype) - large_input_tensor = paddle.static.data( - name='large_x', shape=self.large_dims, dtype=self.dtype) - batch_input_tensor = paddle.static.data( - name='batch_x', shape=self.batch_dims, dtype=self.dtype) - - small_outs = paddle.linalg.eigvals( - small_input_tensor, name='small_x') - large_outs = paddle.linalg.eigvals( - large_input_tensor, name='large_x') - batch_outs = paddle.linalg.eigvals( - batch_input_tensor, name='batch_x') + small_input_tensor = paddle.static.data(name='small_x', + shape=self.small_dims, + dtype=self.dtype) + large_input_tensor = paddle.static.data(name='large_x', + shape=self.large_dims, + dtype=self.dtype) + batch_input_tensor = paddle.static.data(name='batch_x', + shape=self.batch_dims, + dtype=self.dtype) + + small_outs = paddle.linalg.eigvals(small_input_tensor, + name='small_x') + large_outs = paddle.linalg.eigvals(large_input_tensor, + name='large_x') + batch_outs = paddle.linalg.eigvals(batch_input_tensor, + name='batch_x') exe = paddle.static.Executor(place) @@ -289,16 +302,19 @@ class TestEigvalsAPI(unittest.TestCase): class TestEigvalsAPIFloat64(TestEigvalsAPI): + def set_dtype(self): self.dtype = np.float64 class TestEigvalsAPIComplex64(TestEigvalsAPI): + def set_dtype(self): self.dtype = np.complex64 class TestEigvalsAPIComplex128(TestEigvalsAPI): + def set_dtype(self): self.dtype = np.complex128 diff --git a/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py b/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py index 8b7ca9189e1..e518491588d 100644 --- a/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py +++ b/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py @@ -47,6 +47,7 @@ def valid_eigenvalues(actual, expected): class TestEigvalshOp(OpTest): + def setUp(self): paddle.enable_static() self.op_type = "eigvalsh" @@ -75,11 +76,13 @@ class TestEigvalshOp(OpTest): class TestEigvalshUPLOCase(TestEigvalshOp): + def init_config(self): self.UPLO = 'U' class TestEigvalshGPUCase(unittest.TestCase): + def setUp(self): self.x_shape = [32, 32] self.dtype = "float32" @@ -96,6 +99,7 @@ class TestEigvalshGPUCase(unittest.TestCase): class TestEigvalshAPI(unittest.TestCase): + def setUp(self): self.dtype = "float32" self.UPLO = 'L' @@ -124,8 +128,9 @@ class TestEigvalshAPI(unittest.TestCase): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): - input_x = paddle.static.data( - 'input_x', shape=self.x_shape, dtype=self.dtype) + input_x = paddle.static.data('input_x', + shape=self.x_shape, + dtype=self.dtype) output_w = paddle.linalg.eigvalsh(input_x) exe = paddle.static.Executor(self.place) actual_w = exe.run(main_prog, @@ -140,8 +145,9 @@ class TestEigvalshAPI(unittest.TestCase): startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): x_dtype = np.complex64 if self.dtype == "float32" else np.complex128 - input_x = paddle.static.data( - 'input_x', shape=self.x_shape, dtype=x_dtype) + input_x = paddle.static.data('input_x', + shape=self.x_shape, + dtype=x_dtype) output_w = paddle.linalg.eigvalsh(input_x) exe = paddle.static.Executor(self.place) actual_w = exe.run(main_prog, @@ -172,42 +178,48 @@ class TestEigvalshAPI(unittest.TestCase): x = paddle.to_tensor(self.complex_symm, stop_gradient=False) w = paddle.linalg.eigvalsh(x) (w.sum()).backward() - np.testing.assert_allclose( - abs(x.grad.numpy()), - abs(x.grad.numpy().conj().transpose(self.trans_dims)), - rtol=self.rtol, - atol=self.atol) + np.testing.assert_allclose(abs(x.grad.numpy()), + abs(x.grad.numpy().conj().transpose( + self.trans_dims)), + rtol=self.rtol, + atol=self.atol) class TestEigvalshBatchAPI(TestEigvalshAPI): + def init_input_shape(self): self.x_shape = [2, 5, 5] class TestEigvalshAPIError(unittest.TestCase): + def test_error(self): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): #input maxtrix must greater than 2 dimensions - input_x = paddle.static.data( - name='x_1', shape=[12], dtype='float32') + input_x = paddle.static.data(name='x_1', + shape=[12], + dtype='float32') self.assertRaises(ValueError, paddle.linalg.eigvalsh, input_x) #input matrix must be square matrix - input_x = paddle.static.data( - name='x_2', shape=[12, 32], dtype='float32') + input_x = paddle.static.data(name='x_2', + shape=[12, 32], + dtype='float32') self.assertRaises(ValueError, paddle.linalg.eigvalsh, input_x) #uplo must be in 'L' or 'U' - input_x = paddle.static.data( - name='x_3', shape=[4, 4], dtype="float32") + input_x = paddle.static.data(name='x_3', + shape=[4, 4], + dtype="float32") uplo = 'R' self.assertRaises(ValueError, paddle.linalg.eigvalsh, input_x, uplo) #x_data cannot be integer - input_x = paddle.static.data( - name='x_4', shape=[4, 4], dtype="int32") + input_x = paddle.static.data(name='x_4', + shape=[4, 4], + dtype="int32") self.assertRaises(TypeError, paddle.linalg.eigvalsh, input_x) diff --git a/python/paddle/fluid/tests/unittests/test_einsum.py b/python/paddle/fluid/tests/unittests/test_einsum.py index 26aaf0f44f1..9ba4869786c 100644 --- a/python/paddle/fluid/tests/unittests/test_einsum.py +++ b/python/paddle/fluid/tests/unittests/test_einsum.py @@ -19,10 +19,12 @@ import paddle from paddle.fluid import core import os + os.environ['FLAGS_new_einsum'] = "0" class TestErrors(unittest.TestCase): + def setUp(self): pass @@ -45,50 +47,62 @@ class TestErrors(unittest.TestCase): with self.assertRaisesRegex(AssertionError, ('At least one operand is expected.')): paddle.einsum('ijk') - with self.assertRaisesRegex(AssertionError, ( - 'Invalid equation: multiple `->` were found.')): + with self.assertRaisesRegex( + AssertionError, + ('Invalid equation: multiple `->` were found.')): paddle.einsum('i -> j -> k', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the number of operands is 2, " - "but found 3 segments in the label equation.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the number of operands is 2, " + "but found 3 segments in the label equation.")): paddle.einsum('i,j,k', a, a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the number of operands is 2, " - "but found 1 segments in the label equation.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the number of operands is 2, " + "but found 1 segments in the label equation.")): paddle.einsum('ij -> k', a, a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the number of operands is 1, " - "but found 2 segments in the label equation.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the number of operands is 1, " + "but found 2 segments in the label equation.")): paddle.einsum('i, -> k', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the label string '' misses dimensions.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the label string '' misses dimensions.")): paddle.einsum('->', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the label string 'i' misses dimensions.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the label string 'i' misses dimensions.")): paddle.einsum('i', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: _ is not a valid label, " - "which should be letters.")): + with self.assertRaisesRegex( + AssertionError, ("Invalid equation: _ is not a valid label, " + "which should be letters.")): paddle.einsum('i_', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: `.` is found outside of an ellipsis.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: `.` is found outside of an ellipsis.")): paddle.einsum('i..j', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: `.` is found outside of an ellipsis.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: `.` is found outside of an ellipsis.")): paddle.einsum('...k...', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: missing ellipsis in output labels.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: missing ellipsis in output labels.")): paddle.einsum('i...->i', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: duplicate output labels are found.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: duplicate output labels are found.")): paddle.einsum('i...->i...i', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid operands: label i " - "corresponds to non-broadcastable dimensions.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid operands: label i " + "corresponds to non-broadcastable dimensions.")): paddle.einsum('ij...,ji...', a, a) class TestEinsum(unittest.TestCase): + @classmethod def setUpClass(cls): np.random.seed(12345) @@ -122,8 +136,7 @@ class TestEinsum(unittest.TestCase): def check_output_equal(self, actual, expect, rtol=1.e-5, atol=1.e-8): error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' self.assertTrue( - np.allclose( - actual, expect, rtol=rtol, atol=atol), + np.allclose(actual, expect, rtol=rtol, atol=atol), error_msg.format(paddle.get_device(), expect, actual, self.__class__.__name__)) @@ -150,136 +163,163 @@ class TestEinsum(unittest.TestCase): class TestEinsumVectorDot(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i,i->", "data": ["x", "x"]} class TestEinsumVectorMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i,i->i", "data": ["x", "x"]} class TestEinsumVectorOuter(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i,j->ij", "data": ["x", "y"]} class TestEinsumMatrixTranspose(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij->ji", "data": ["A"]} class TestEinsumMatrixRowSum(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij->j", "data": ["A"]} class TestEinsumMatrixColSum(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij->i", "data": ["A"]} class TestEinsumMatrixEleMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,ij->ij", "data": ["A", "A"]} class TestEinsumDegenerateMatrixVecMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,j", "data": ["a", "b"]} class TestEinsumMatrixVecMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,j->i", "data": ["A", "x"]} class TestEinsumMatrixMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,kj->ik", "data": ["A", "B"]} class TestEinsumMatrixOuter(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,kl->ijkl", "data": ["A", "C"]} class TestEinsumTensorBMM(TestEinsum): + def setUp(self): self.sample = {"paradigm": "bij,bjk->bik", "data": ["D", "E"]} class TestEinsumTensorContract1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,jk->i", "data": ["D", "A"]} class TestEinsumTensorContract2(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,lk->ijl", "data": ["D", "B"]} class TestEinsumTensorContract3(TestEinsum): + def setUp(self): self.sample = {"paradigm": "abcd,dfg->abcfg", "data": ["F", "D"]} class TestEinsumTensorContract4(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,jk->ik", "data": ["D", "A"]} class TestEinsumTensorContract5(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,jk->ij", "data": ["D", "A"]} class TestEinsumTensorContract6(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ik, ijk->j", "data": ["A", "G"]} class TestEinsumTensorContract7(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk, ik->jk", "data": ["G", "A"]} class TestEinsumEllipsis1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i...->...", "data": ["G"]} class TestEinsumEllipsis2(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,...i->j...", "data": ["A", "H"]} class TestEinsumEllipsis3(TestEinsum): + def setUp(self): self.sample = {"paradigm": "k...,jk", "data": ["F", "I"]} class TestEinsumTestEinsumBilinear(TestEinsum): + def setUp(self): self.sample = {"paradigm": "bn,anm,bm->ba", "data": ["B", "E", "I"]} class TestEinsumTestEinsumOthers1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijkl, lmn->kmn", "data": ["F", "H"]} class TestEinsumTestEinsumOthers2(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijkl, lmn->ijn", "data": ["F", "H"]} class TestEinsumBatch1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "blq,bhlk->bhlqk", "data": ["J", "K"]} class TestNumpyTests(unittest.TestCase): + def setUp(self): pass @@ -294,8 +334,7 @@ class TestNumpyTests(unittest.TestCase): def check_output_equal(self, actual, expect, rtol=1.e-5, atol=1.e-8): error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' self.assertTrue( - np.allclose( - actual, expect, rtol=rtol, atol=atol), + np.allclose(actual, expect, rtol=rtol, atol=atol), error_msg.format(paddle.get_device(), expect, actual, self.__class__.__name__)) @@ -409,16 +448,21 @@ class TestNumpyTests(unittest.TestCase): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): - a = paddle.static.data( - name='a', shape=[3, None, None, None], dtype='float') - b = paddle.static.data( - name='b', shape=[2, None, None, None], dtype='float') - c = paddle.static.data( - name='c', shape=[None, None, 2, None], dtype='float') - d = paddle.static.data( - name='d', shape=[None, None, 5], dtype='float') - e = paddle.static.data( - name='e', shape=[None, 2, None], dtype='float') + a = paddle.static.data(name='a', + shape=[3, None, None, None], + dtype='float') + b = paddle.static.data(name='b', + shape=[2, None, None, None], + dtype='float') + c = paddle.static.data(name='c', + shape=[None, None, 2, None], + dtype='float') + d = paddle.static.data(name='d', + shape=[None, None, 5], + dtype='float') + e = paddle.static.data(name='e', + shape=[None, 2, None], + dtype='float') outs = [] outs.append(paddle.einsum("ibnd,jbnd->bnij", a, b)) diff --git a/python/paddle/fluid/tests/unittests/test_einsum_op.py b/python/paddle/fluid/tests/unittests/test_einsum_op.py index 1a4ae54afef..c36950b6922 100644 --- a/python/paddle/fluid/tests/unittests/test_einsum_op.py +++ b/python/paddle/fluid/tests/unittests/test_einsum_op.py @@ -21,6 +21,7 @@ from op_test import OpTest class TestEinsumBinary(OpTest): + def setUp(self): paddle.enable_static() self.op_type = "einsum" @@ -35,7 +36,8 @@ class TestEinsumBinary(OpTest): self.inputs = {"Operands": self.operands} self.attrs = {"equation": self.equation} self.outputs = { - 'Out': out, + 'Out': + out, "InnerCache": [('cache_' + str(i), np.array([1.0])) for i in range(len(self.operands))] } @@ -61,6 +63,7 @@ class TestEinsumBinary(OpTest): class TestEinsum1(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(20, 3, 3), (20, 3, 3)] self.types = [np.float64, np.float64] @@ -68,6 +71,7 @@ class TestEinsum1(TestEinsumBinary): class TestEinsum2(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(20, 3, 3), (20, 3, 3)] self.types = [np.float64, np.float64] @@ -75,6 +79,7 @@ class TestEinsum2(TestEinsumBinary): class TestEinsum3(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 10), (10, 10)] self.types = [np.float64, np.float64] @@ -82,6 +87,7 @@ class TestEinsum3(TestEinsumBinary): class TestEinsumWithReduction(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 3, 5), (5, 30)] self.types = [np.float64, np.float64] @@ -89,6 +95,7 @@ class TestEinsumWithReduction(TestEinsumBinary): class TestEinsumWithReduction1(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 3, 3, 5), (10, 5, 10, 10)] self.types = [np.float64, np.float64] @@ -96,6 +103,7 @@ class TestEinsumWithReduction1(TestEinsumBinary): class TestEinsumWithUnary(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 10, 3, 5)] self.types = [np.float64] @@ -103,6 +111,7 @@ class TestEinsumWithUnary(TestEinsumBinary): class TestEinsumWithUnary1(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(5, 10, 3, 3), (3, 6, 3, 10)] self.types = [np.float64, np.float64] @@ -110,6 +119,7 @@ class TestEinsumWithUnary1(TestEinsumBinary): class TestEinsumWithBroadcast1(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(5, 10, 3, 3)] self.types = [np.float64] @@ -117,6 +127,7 @@ class TestEinsumWithBroadcast1(TestEinsumBinary): class TestEinsumWithBroadcast2(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 11), (3, 4, 5, 10)] self.types = [np.float64, np.float64] @@ -124,6 +135,7 @@ class TestEinsumWithBroadcast2(TestEinsumBinary): class TestEinsumWithBroadcast3(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 3, 2, 3, 4), (12, 10)] self.types = [np.float64, np.float64] @@ -131,6 +143,7 @@ class TestEinsumWithBroadcast3(TestEinsumBinary): class TestEinsumWithBroadcast4(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(10, 3, 2, 3, 4), (12, 10)] self.types = [np.float64, np.float64] @@ -138,6 +151,7 @@ class TestEinsumWithBroadcast4(TestEinsumBinary): class TestEinsumWithBroadcast5(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(3, 2, 2, 10), (10, 3, 2, 2)] self.types = [np.float64, np.float64] @@ -145,6 +159,7 @@ class TestEinsumWithBroadcast5(TestEinsumBinary): class TestEinsumWithBroadcast6(TestEinsumBinary): + def set_mandatory(self): self.shapes = [(100), (100)] self.types = [np.float64, np.float64] diff --git a/python/paddle/fluid/tests/unittests/test_einsum_v2.py b/python/paddle/fluid/tests/unittests/test_einsum_v2.py index b33a943c9f2..97f3eef51a5 100644 --- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py +++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py @@ -19,6 +19,7 @@ import paddle from paddle.fluid import core import os + os.environ['FLAGS_new_einsum'] = "1" @@ -36,6 +37,7 @@ def error_trans(func, *args, **kargs): class TestErrors(unittest.TestCase): + def setUp(self): pass @@ -59,50 +61,62 @@ class TestErrors(unittest.TestCase): AssertionError, ("Required at least one operand in Einsum API, but received 0 ")): paddle.einsum('ijk') - with self.assertRaisesRegex(AssertionError, ( - 'Invalid equation: multiple `->` were found.')): + with self.assertRaisesRegex( + AssertionError, + ('Invalid equation: multiple `->` were found.')): paddle.einsum('i -> j -> k', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the number of operands is 2, " - "but found 3 segments in the label equation.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the number of operands is 2, " + "but found 3 segments in the label equation.")): paddle.einsum('i,j,k', a, a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the number of operands is 2, " - "but found 1 segments in the label equation.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the number of operands is 2, " + "but found 1 segments in the label equation.")): paddle.einsum('ij -> k', a, a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the number of operands is 1, " - "but found 2 segments in the label equation.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the number of operands is 1, " + "but found 2 segments in the label equation.")): paddle.einsum('i, -> k', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the label string '' misses dimensions.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the label string '' misses dimensions.")): paddle.einsum('->', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: the label string 'i' misses dimensions.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: the label string 'i' misses dimensions.")): paddle.einsum('i', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: _ is not a valid label, " - "which should be letters.")): + with self.assertRaisesRegex( + AssertionError, ("Invalid equation: _ is not a valid label, " + "which should be letters.")): paddle.einsum('i_', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: `.` is found outside of an ellipsis.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: `.` is found outside of an ellipsis.")): paddle.einsum('i..j', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: `.` is found outside of an ellipsis.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: `.` is found outside of an ellipsis.")): paddle.einsum('...k...', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: missing ellipsis in output labels.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: missing ellipsis in output labels.")): paddle.einsum('i...->i', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid equation: duplicate output labels are found.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid equation: duplicate output labels are found.")): paddle.einsum('i...->i...i', a) - with self.assertRaisesRegex(AssertionError, ( - "Invalid operands: label i " - "corresponds to non-broadcastable dimensions.")): + with self.assertRaisesRegex( + AssertionError, + ("Invalid operands: label i " + "corresponds to non-broadcastable dimensions.")): error_trans(paddle.einsum, 'ij...,ji...', a, a) class TestEinsum(unittest.TestCase): + @classmethod def setUpClass(cls): np.random.seed(12345) @@ -136,8 +150,7 @@ class TestEinsum(unittest.TestCase): def check_output_equal(self, actual, expect, rtol=1.e-5, atol=1.e-8): error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' self.assertTrue( - np.allclose( - actual, expect, rtol=rtol, atol=atol), + np.allclose(actual, expect, rtol=rtol, atol=atol), error_msg.format(paddle.get_device(), expect, actual, self.__class__.__name__)) @@ -164,136 +177,163 @@ class TestEinsum(unittest.TestCase): class TestEinsumVectorDot(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i,i->", "data": ["x", "x"]} class TestEinsumVectorMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i,i->i", "data": ["x", "x"]} class TestEinsumVectorOuter(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i,j->ij", "data": ["x", "y"]} class TestEinsumMatrixTranspose(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij->ji", "data": ["A"]} class TestEinsumMatrixRowSum(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij->j", "data": ["A"]} class TestEinsumMatrixColSum(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij->i", "data": ["A"]} class TestEinsumMatrixEleMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,ij->ij", "data": ["A", "A"]} class TestEinsumDegenerateMatrixVecMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,j", "data": ["a", "b"]} class TestEinsumMatrixVecMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,j->i", "data": ["A", "x"]} class TestEinsumMatrixMul(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,kj->ik", "data": ["A", "B"]} class TestEinsumMatrixOuter(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,kl->ijkl", "data": ["A", "C"]} class TestEinsumTensorBMM(TestEinsum): + def setUp(self): self.sample = {"paradigm": "bij,bjk->bik", "data": ["D", "E"]} class TestEinsumTensorContract1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,jk->i", "data": ["D", "A"]} class TestEinsumTensorContract2(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,lk->ijl", "data": ["D", "B"]} class TestEinsumTensorContract3(TestEinsum): + def setUp(self): self.sample = {"paradigm": "abcd,dfg->abcfg", "data": ["F", "D"]} class TestEinsumTensorContract4(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,jk->ik", "data": ["D", "A"]} class TestEinsumTensorContract5(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk,jk->ij", "data": ["D", "A"]} class TestEinsumTensorContract6(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ik, ijk->j", "data": ["A", "G"]} class TestEinsumTensorContract7(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijk, ik->jk", "data": ["G", "A"]} class TestEinsumEllipsis1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "i...->...", "data": ["G"]} class TestEinsumEllipsis2(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ij,...i->j...", "data": ["A", "H"]} class TestEinsumEllipsis3(TestEinsum): + def setUp(self): self.sample = {"paradigm": "k...,jk", "data": ["F", "I"]} class TestEinsumTestEinsumBilinear(TestEinsum): + def setUp(self): self.sample = {"paradigm": "bn,anm,bm->ba", "data": ["B", "E", "I"]} class TestEinsumTestEinsumOthers1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijkl, lmn->kmn", "data": ["F", "H"]} class TestEinsumTestEinsumOthers2(TestEinsum): + def setUp(self): self.sample = {"paradigm": "ijkl, lmn->ijn", "data": ["F", "H"]} class TestEinsumBatch1(TestEinsum): + def setUp(self): self.sample = {"paradigm": "blq,bhlk->bhlqk", "data": ["J", "K"]} class TestNumpyTests(unittest.TestCase): + def setUp(self): pass @@ -308,8 +348,7 @@ class TestNumpyTests(unittest.TestCase): def check_output_equal(self, actual, expect, rtol=1.e-5, atol=1.e-8): error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' self.assertTrue( - np.allclose( - actual, expect, rtol=rtol, atol=atol), + np.allclose(actual, expect, rtol=rtol, atol=atol), error_msg.format(paddle.get_device(), expect, actual, self.__class__.__name__)) @@ -428,16 +467,21 @@ class TestNumpyTests(unittest.TestCase): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): - a = paddle.static.data( - name='a', shape=[3, None, None, None], dtype='float') - b = paddle.static.data( - name='b', shape=[2, None, None, None], dtype='float') - c = paddle.static.data( - name='c', shape=[None, None, 2, None], dtype='float') - d = paddle.static.data( - name='d', shape=[None, None, 5], dtype='float') - e = paddle.static.data( - name='e', shape=[None, 2, None], dtype='float') + a = paddle.static.data(name='a', + shape=[3, None, None, None], + dtype='float') + b = paddle.static.data(name='b', + shape=[2, None, None, None], + dtype='float') + c = paddle.static.data(name='c', + shape=[None, None, 2, None], + dtype='float') + d = paddle.static.data(name='d', + shape=[None, None, 5], + dtype='float') + e = paddle.static.data(name='e', + shape=[None, 2, None], + dtype='float') outs = [] outs.append(paddle.einsum("ibnd,jbnd->bnij", a, b)) @@ -465,6 +509,7 @@ class TestNumpyTests(unittest.TestCase): class TestStaticGraphShape(unittest.TestCase): + def setUp(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 22787a23fea..714ef764a92 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -23,6 +23,7 @@ from paddle.fluid import compiler, Program, program_guard class TestElementwiseAddOp(OpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -46,41 +47,37 @@ class TestElementwiseAddOp(OpTest): def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output( - check_dygraph=(self.use_mkldnn == False), - check_eager=self.check_eager()) + self.check_output(check_dygraph=(self.use_mkldnn == False), + check_eager=self.check_eager()) def test_check_grad_normal(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.dtype == np.float16: return - self.check_grad( - ['X', 'Y'], - 'Out', - check_dygraph=(self.use_mkldnn == False), - check_eager=self.check_eager()) + self.check_grad(['X', 'Y'], + 'Out', + check_dygraph=(self.use_mkldnn == False), + check_eager=self.check_eager()) def test_check_grad_ingore_x(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.dtype == np.float16: return - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - check_dygraph=(self.use_mkldnn == False), - check_eager=self.check_eager()) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + check_dygraph=(self.use_mkldnn == False), + check_eager=self.check_eager()) def test_check_grad_ingore_y(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.dtype == np.float16: return - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - check_dygraph=(self.use_mkldnn == False), - check_eager=self.check_eager()) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + check_dygraph=(self.use_mkldnn == False), + check_eager=self.check_eager()) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) @@ -97,6 +94,7 @@ class TestElementwiseAddOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16ElementwiseAddOp(TestElementwiseAddOp): + def init_dtype(self): self.dtype = np.float16 @@ -113,6 +111,7 @@ class TestFP16ElementwiseAddOp(TestElementwiseAddOp): not core.is_compiled_with_cuda() or core.cudnn_version() < 8100, "core is not compiled with CUDA and cudnn version need larger than 8.1.0") class TestBF16ElementwiseAddOp(OpTest): + def setUp(self): self.op_type = "elementwise_add" self.dtype = np.uint16 @@ -126,8 +125,7 @@ class TestBF16ElementwiseAddOp(OpTest): self.inputs = { 'X': OpTest.np_dtype_to_fluid_dtype(convert_float_to_uint16(self.x)), - 'Y': - OpTest.np_dtype_to_fluid_dtype(convert_float_to_uint16(self.y)) + 'Y': OpTest.np_dtype_to_fluid_dtype(convert_float_to_uint16(self.y)) } self.attrs = {'axis': self.axis, 'use_mkldnn': False} self.outputs = {'Out': convert_float_to_uint16(self.out)} @@ -142,18 +140,23 @@ class TestBF16ElementwiseAddOp(OpTest): def test_check_grad_ingore_x(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Y'], 'Out', no_grad_set=set("X"), check_eager=False) + self.check_grad_with_place(place, ['Y'], + 'Out', + no_grad_set=set("X"), + check_eager=False) def test_check_grad_ingore_y(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', no_grad_set=set('Y'), check_eager=False) + self.check_grad_with_place(place, ['X'], + 'Out', + no_grad_set=set('Y'), + check_eager=False) @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_scalar(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -163,6 +166,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -172,6 +176,7 @@ class TestFP16ElementwiseAddOp_scalar(TestFP16ElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -181,6 +186,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -188,6 +194,7 @@ class TestFP16ElementwiseAddOp_scalar2(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_Vector(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -195,6 +202,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -202,6 +210,7 @@ class TestFP16ElementwiseAddOp_Vector(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -212,6 +221,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -222,6 +232,7 @@ class TestFP16ElementwiseAddOp_broadcast_0(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -232,6 +243,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -242,6 +254,7 @@ class TestFP16ElementwiseAddOp_broadcast_1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -249,6 +262,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -256,6 +270,7 @@ class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 1).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -266,6 +281,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -276,6 +292,7 @@ class TestFP16ElementwiseAddOp_broadcast_3(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -286,6 +303,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -296,6 +314,7 @@ class TestFP16ElementwiseAddOp_broadcast_4(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -303,6 +322,7 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_5(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -310,6 +330,7 @@ class TestFP16ElementwiseAddOp_broadcast_5(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -317,6 +338,7 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype) @@ -324,6 +346,7 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_broadcast_6(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -331,6 +354,7 @@ class TestFP16ElementwiseAddOp_broadcast_6(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -341,6 +365,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -353,6 +378,7 @@ class TestFP16ElementwiseAddOp_rowwise_add_0(TestFP16ElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -365,6 +391,7 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -375,6 +402,7 @@ class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -385,6 +413,7 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -395,6 +424,7 @@ class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -405,6 +435,7 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -415,6 +446,7 @@ class TestElementwiseFP16AddOp_commonuse_add1(TestFP16ElementwiseAddOp): class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype) @@ -425,6 +457,7 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -435,6 +468,7 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 1, 12).astype(self.dtype) self.y = np.random.rand(10, 2, 12).astype(self.dtype) @@ -445,13 +479,14 @@ class TestElementwiseAddOp_same_shape_ysize_large(TestElementwiseAddOp): class TestElementwiseAddOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 @@ -462,6 +497,7 @@ class TestElementwiseAddOpError(unittest.TestCase): class TestAddApi(unittest.TestCase): + def _executed_api(self, x, y, name=None): return paddle.add(x, y, name) @@ -505,11 +541,13 @@ class TestAddApi(unittest.TestCase): class TestAddInplaceApi(TestAddApi): + def _executed_api(self, x, y, name=None): return x.add_(y, name) class TestAddInplaceBroadcastSuccess(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 4).astype('float') self.y_numpy = np.random.rand(3, 4).astype('float') @@ -526,18 +564,21 @@ class TestAddInplaceBroadcastSuccess(unittest.TestCase): class TestAddInplaceBroadcastSuccess2(TestAddInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(1, 2, 3, 1).astype('float') self.y_numpy = np.random.rand(3, 1).astype('float') class TestAddInplaceBroadcastSuccess3(TestAddInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 1, 5).astype('float') self.y_numpy = np.random.rand(1, 3, 1, 5).astype('float') class TestAddInplaceBroadcastError(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(3, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') @@ -556,18 +597,21 @@ class TestAddInplaceBroadcastError(unittest.TestCase): class TestAddInplaceBroadcastError2(TestAddInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(2, 1, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') class TestAddInplaceBroadcastError3(TestAddInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(5, 2, 1, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') class TestComplexElementwiseAddOp(OpTest): + def setUp(self): self.op_type = "elementwise_add" self.dtype = np.float64 @@ -593,8 +637,8 @@ class TestComplexElementwiseAddOp(OpTest): self.out = self.x + self.y def init_grad_input_output(self): - self.grad_out = np.ones(self.shape, self.dtype) + 1J * np.ones( - self.shape, self.dtype) + self.grad_out = np.ones( + self.shape, self.dtype) + 1J * np.ones(self.shape, self.dtype) self.grad_x = self.grad_out self.grad_y = self.grad_out @@ -602,30 +646,28 @@ class TestComplexElementwiseAddOp(OpTest): self.check_output(check_eager=False) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out]) class TestRealComplexElementwiseAddOp(TestComplexElementwiseAddOp): + def init_input_output(self): self.x = np.random.random(self.shape).astype(self.dtype) self.y = np.random.random(self.shape).astype( @@ -633,13 +675,14 @@ class TestRealComplexElementwiseAddOp(TestComplexElementwiseAddOp): self.out = self.x + self.y def init_grad_input_output(self): - self.grad_out = np.ones(self.shape, self.dtype) + 1J * np.ones( - self.shape, self.dtype) + self.grad_out = np.ones( + self.shape, self.dtype) + 1J * np.ones(self.shape, self.dtype) self.grad_x = np.real(self.grad_out) self.grad_y = self.grad_out class TestBoolAddFloatElementwiseAddop(unittest.TestCase): + def test_static_add(self): paddle.enable_static() a = 1.5 diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py index 27dbd3752b5..d522a9d0cde 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py @@ -22,6 +22,7 @@ from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16 class ElementwiseDivOp(OpTest): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -49,21 +50,26 @@ class ElementwiseDivOp(OpTest): self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.05, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.05, + no_grad_set=set('Y')) def init_dtype(self): pass -@unittest.skipIf(not core.is_compiled_with_cuda() or - not core.is_bfloat16_supported(core.CUDAPlace(0)), +@unittest.skipIf(not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA and not support the bfloat16") class TestElementwiseDivOpBF16(OpTest): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -100,6 +106,7 @@ class TestElementwiseDivOpBF16(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseDivOp_scalar(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -111,6 +118,7 @@ class TestElementwiseDivOp_scalar(ElementwiseDivOp): class TestElementwiseDivOp_Vector(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -122,6 +130,7 @@ class TestElementwiseDivOp_Vector(ElementwiseDivOp): class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -132,12 +141,13 @@ class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp): self.attrs = {'axis': 0} self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + 'Out': np.divide(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -148,12 +158,13 @@ class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp): self.attrs = {'axis': 1} self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + 'Out': np.divide(self.inputs['X'], + self.inputs['Y'].reshape(1, 100, 1)) } class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -163,12 +174,13 @@ class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp): } self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + 'Out': np.divide(self.inputs['X'], + self.inputs['Y'].reshape(1, 1, 100)) } class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -185,6 +197,7 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp): class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -196,6 +209,7 @@ class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp): class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -207,6 +221,7 @@ class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp): class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -218,6 +233,7 @@ class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp): class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -229,6 +245,7 @@ class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp): class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -243,16 +260,15 @@ class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp): class TestElementwiseDivOp_INT(OpTest): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide self.dtype = np.int32 self.init_dtype() self.inputs = { - 'X': np.random.randint( - 1, 5, size=[13, 17]).astype(self.dtype), - 'Y': np.random.randint( - 1, 5, size=[13, 17]).astype(self.dtype) + 'X': np.random.randint(1, 5, size=[13, 17]).astype(self.dtype), + 'Y': np.random.randint(1, 5, size=[13, 17]).astype(self.dtype) } self.outputs = {'Out': self.inputs['X'] // self.inputs['Y']} @@ -266,6 +282,7 @@ class TestElementwiseDivOp_INT(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestElementwiseDivOpFp16(ElementwiseDivOp): + def init_dtype(self): self.dtype = np.float16 @@ -273,19 +290,25 @@ class TestElementwiseDivOpFp16(ElementwiseDivOp): self.check_grad(['X', 'Y'], 'Out', max_relative_error=1) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=1, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=1, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=1, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=1, + no_grad_set=set('Y')) class TestElementwiseDivBroadcast(unittest.TestCase): + def test_shape_with_batch_sizes(self): with fluid.program_guard(fluid.Program()): - x_var = fluid.data( - name='x', dtype='float32', shape=[None, 3, None, None]) + x_var = fluid.data(name='x', + dtype='float32', + shape=[None, 3, None, None]) one = 2. out = one / x_var exe = fluid.Executor(fluid.CPUPlace()) @@ -295,6 +318,7 @@ class TestElementwiseDivBroadcast(unittest.TestCase): class TestDivideOp(unittest.TestCase): + def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2, 3], dtype="float32") @@ -316,6 +340,7 @@ class TestDivideOp(unittest.TestCase): class TestComplexElementwiseDivOp(OpTest): + def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide @@ -352,30 +377,28 @@ class TestComplexElementwiseDivOp(OpTest): self.check_output(check_eager=False) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out]) class TestRealComplexElementwiseDivOp(TestComplexElementwiseDivOp): + def init_input_output(self): self.x = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.y = np.random.random( diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py index 6ea24b4543f..6a74acd89b0 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py @@ -24,6 +24,7 @@ import random class TestElementwiseModOp(OpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -60,6 +61,7 @@ class TestElementwiseModOp(OpTest): class TestElementwiseModOp_scalar(TestElementwiseModOp): + def init_input_output(self): scale_x = random.randint(0, 100000000) scale_y = random.randint(1, 100000000) @@ -69,6 +71,7 @@ class TestElementwiseModOp_scalar(TestElementwiseModOp): class TestElementwiseModOpInverse(TestElementwiseModOp): + def init_input_output(self): self.x = np.random.uniform(0, 10000, [10]).astype(self.dtype) self.y = np.random.uniform(0, 1000, [10, 10]).astype(self.dtype) @@ -76,6 +79,7 @@ class TestElementwiseModOpInverse(TestElementwiseModOp): class TestFloorDivideOp(unittest.TestCase): + def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2, 3], dtype="int64") @@ -96,7 +100,7 @@ class TestFloorDivideOp(unittest.TestCase): self.assertEqual((np_z == z_expected).all(), True) with fluid.dygraph.guard(fluid.CPUPlace()): - # divide by zero + # divide by zero np_x = np.array([2, 3, 4]) np_y = np.array([0]) x = paddle.to_tensor(np_x) @@ -106,7 +110,7 @@ class TestFloorDivideOp(unittest.TestCase): except Exception as e: print("Error: Divide by zero encounter in floor_divide\n") - # divide by zero + # divide by zero np_x = np.array([2]) np_y = np.array([0, 0, 0]) x = paddle.to_tensor(np_x, dtype="int32") diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py index 9f452ffde74..6c300ce24d3 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py @@ -21,10 +21,12 @@ import paddle.fluid as fluid class TestElementWiseAddOp(unittest.TestCase): + def __assert_close(self, tensor, np_array, msg, atol=1e-4): self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) def check_forward_backward(self): + def test_with_place(place): out_grad = np.random.random_sample(self.x.shape).astype(np.float32) x_grad = out_grad @@ -47,18 +49,21 @@ class TestElementWiseAddOp(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) - elementwise_add_op = block.append_op( - type="elementwise_add", - inputs={ - "X": block.var('x'), - "Y": block.var('y'), - }, - outputs={"Out": block.var('out'), }, - attrs={"axis": self.axis, }) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) + elementwise_add_op = block.append_op(type="elementwise_add", + inputs={ + "X": block.var('x'), + "Y": block.var('y'), + }, + outputs={ + "Out": + block.var('out'), + }, + attrs={ + "axis": self.axis, + }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py index 8a8e74e28ec..73d110ce132 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py @@ -19,6 +19,7 @@ import paddle class TestElementwiseOp(OpTest): + def setUp(self): self.op_type = "elementwise_heaviside" x = np.random.random((13, 17)).astype("float64") @@ -40,6 +41,7 @@ class TestElementwiseOp(OpTest): class TestHeavisideBroadcast(unittest.TestCase): + def setUp(self): self.input_1 = np.random.rand(2, 100, 13, 17).astype("float32") self.input_2 = np.random.rand(100, 13, 17).astype("float32") @@ -78,6 +80,7 @@ class TestHeavisideBroadcast(unittest.TestCase): class TestHeavisideAPI_float64(unittest.TestCase): + def setUp(self): self.x_np = np.random.random((13, 17)).astype("float64") self.y_np = np.random.random((13, 17)).astype("float64") @@ -92,10 +95,12 @@ class TestHeavisideAPI_float64(unittest.TestCase): paddle.enable_static() prog = paddle.static.Program() with paddle.static.program_guard(prog): - x = paddle.static.data( - name=f"x_{self.dtype}", shape=[13, 17], dtype=self.dtype) - y = paddle.static.data( - name=f"y_{self.dtype}", shape=[13, 17], dtype=self.dtype) + x = paddle.static.data(name=f"x_{self.dtype}", + shape=[13, 17], + dtype=self.dtype) + y = paddle.static.data(name=f"y_{self.dtype}", + shape=[13, 17], + dtype=self.dtype) out = paddle.heaviside(x, y) exe = paddle.static.Executor(place=place) @@ -114,13 +119,14 @@ class TestHeavisideAPI_float64(unittest.TestCase): if paddle.device.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - result = paddle.heaviside( - paddle.to_tensor(self.x_np), paddle.to_tensor(self.y_np)) + result = paddle.heaviside(paddle.to_tensor(self.x_np), + paddle.to_tensor(self.y_np)) self.assertTrue(np.allclose(result.numpy(), self.out_np)) class TestHeavisideAPI_float32(TestHeavisideAPI_float64): + def setUp(self): self.x_np = np.random.random((13, 17)).astype("float32") self.y_np = np.random.random((13, 17)).astype("float32") @@ -129,6 +135,7 @@ class TestHeavisideAPI_float32(TestHeavisideAPI_float64): class TestHeavisideAPI_int64(TestHeavisideAPI_float64): + def setUp(self): self.x_np = np.random.random((13, 17)).astype("int64") self.y_np = np.random.random((13, 17)).astype("int64") @@ -137,6 +144,7 @@ class TestHeavisideAPI_int64(TestHeavisideAPI_float64): class TestHeavisideAPI_int32(TestHeavisideAPI_float64): + def setUp(self): self.x_np = np.random.random((13, 17)).astype("int32") self.y_np = np.random.random((13, 17)).astype("int32") @@ -145,6 +153,7 @@ class TestHeavisideAPI_int32(TestHeavisideAPI_float64): class TestHeavisideError(unittest.TestCase): + def test_input(self): paddle.disable_static() @@ -159,8 +168,8 @@ class TestHeavisideError(unittest.TestCase): self.assertRaises(ValueError, test_input_y) def test_input_xy(): - paddle.heaviside( - paddle.randn([100], 'float32'), paddle.randn([100], 'float64')) + paddle.heaviside(paddle.randn([100], 'float32'), + paddle.randn([100], 'float64')) self.assertRaises(ValueError, test_input_xy) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py index 21b0595b6dc..1ab1bf07b0d 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py @@ -24,6 +24,7 @@ import paddle class TestElementwiseOp(OpTest): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -49,18 +50,23 @@ class TestElementwiseOp(OpTest): self.check_grad(['X', 'Y'], 'Out', check_eager=True) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) @unittest.skipIf( core.is_compiled_with_cuda() and core.cudnn_version() < 8100, "run test when gpu is availble and the minimum cudnn version is 8.1.0.") class TestElementwiseBF16Op(OpTest): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -99,6 +105,7 @@ class TestElementwiseBF16Op(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMaxOp_scalar(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -109,6 +116,7 @@ class TestElementwiseMaxOp_scalar(TestElementwiseOp): class TestElementwiseMaxOp_Vector(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -120,6 +128,7 @@ class TestElementwiseMaxOp_Vector(TestElementwiseOp): class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -131,12 +140,13 @@ class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp): self.attrs = {'axis': 0} self.outputs = { - 'Out': - np.maximum(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + 'Out': np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -148,12 +158,13 @@ class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp): self.attrs = {'axis': 1} self.outputs = { - 'Out': - np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + 'Out': np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(1, 100, 1)) } class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -164,12 +175,13 @@ class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp): self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': - np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + 'Out': np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(1, 1, 100)) } class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum @@ -187,6 +199,7 @@ class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp): class TestElementwiseMaxOp_broadcast_4(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py index f8dc9602c35..e2366248391 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py @@ -25,6 +25,7 @@ paddle.enable_static() class TestElementwiseOp(OpTest): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -50,17 +51,22 @@ class TestElementwiseOp(OpTest): self.check_grad(['X', 'Y'], 'Out', check_eager=True) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMinOp_scalar(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -71,6 +77,7 @@ class TestElementwiseMinOp_scalar(TestElementwiseOp): class TestElementwiseMinOp_Vector(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -82,6 +89,7 @@ class TestElementwiseMinOp_Vector(TestElementwiseOp): class TestElementwiseMinOp_broadcast_0(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -93,12 +101,13 @@ class TestElementwiseMinOp_broadcast_0(TestElementwiseOp): self.attrs = {'axis': 0} self.outputs = { - 'Out': - np.minimum(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + 'Out': np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestElementwiseMinOp_broadcast_1(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -110,12 +119,13 @@ class TestElementwiseMinOp_broadcast_1(TestElementwiseOp): self.attrs = {'axis': 1} self.outputs = { - 'Out': - np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + 'Out': np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(1, 100, 1)) } class TestElementwiseMinOp_broadcast_2(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -126,12 +136,13 @@ class TestElementwiseMinOp_broadcast_2(TestElementwiseOp): self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': - np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + 'Out': np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(1, 1, 100)) } class TestElementwiseMinOp_broadcast_3(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -149,6 +160,7 @@ class TestElementwiseMinOp_broadcast_3(TestElementwiseOp): class TestElementwiseMinOp_broadcast_4(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_min" self.python_api = paddle.minimum @@ -162,6 +174,7 @@ class TestElementwiseMinOp_broadcast_4(TestElementwiseOp): class TestElementwiseMinOpFP16(unittest.TestCase): + def get_out_and_grad(self, x_np, y_np, axis, place, use_fp32=False): assert x_np.dtype == np.float16 assert y_np.dtype == np.float16 @@ -194,10 +207,10 @@ class TestElementwiseMinOpFP16(unittest.TestCase): False) z_2, x_g_2, y_g_2 = self.get_out_and_grad(x_np, y_np, axis, place, True) self.assertTrue(np.array_equal(z_1, z_2), "{} vs {}".format(z_1, z_2)) - self.assertTrue( - np.array_equal(x_g_1, x_g_2), "{} vs {}".format(x_g_1, x_g_2)) - self.assertTrue( - np.array_equal(y_g_1, y_g_2), "{} vs {}".format(y_g_1, y_g_2)) + self.assertTrue(np.array_equal(x_g_1, x_g_2), + "{} vs {}".format(x_g_1, x_g_2)) + self.assertTrue(np.array_equal(y_g_1, y_g_2), + "{} vs {}".format(y_g_1, y_g_2)) def test_main(self): self.check_main((13, 17), (13, 17)) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py index c6973255f26..436ce466be3 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py @@ -24,6 +24,7 @@ import random class TestElementwiseModOp(OpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -62,6 +63,7 @@ class TestElementwiseModOp(OpTest): class TestElementwiseModOp_scalar(TestElementwiseModOp): + def init_input_output(self): scale_x = random.randint(0, 100000000) scale_y = random.randint(1, 100000000) @@ -71,6 +73,7 @@ class TestElementwiseModOp_scalar(TestElementwiseModOp): class TestElementwiseModOpFloat(TestElementwiseModOp): + def init_dtype(self): self.dtype = np.float32 @@ -87,11 +90,13 @@ class TestElementwiseModOpFloat(TestElementwiseModOp): class TestElementwiseModOpDouble(TestElementwiseModOpFloat): + def init_dtype(self): self.dtype = np.float64 class TestRemainderOp(unittest.TestCase): + def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2, 3], dtype="int64") diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py index b35b2840ed3..7035f3b1ca7 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py @@ -27,6 +27,7 @@ from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci, con class ElementwiseMulOp(OpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -52,24 +53,23 @@ class ElementwiseMulOp(OpTest): def test_check_grad_normal(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad( - ['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False)) + self.check_grad(['X', 'Y'], + 'Out', + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_ingore_x(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + check_dygraph=(self.use_mkldnn == False)) def test_check_grad_ingore_y(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - check_dygraph=(self.use_mkldnn == False)) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + check_dygraph=(self.use_mkldnn == False)) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) @@ -84,6 +84,7 @@ class ElementwiseMulOp(OpTest): class TestBF16ElementwiseMulOp(OpTest): + def setUp(self): self.op_type = "elementwise_mul" self.dtype = np.uint16 @@ -97,8 +98,7 @@ class TestBF16ElementwiseMulOp(OpTest): self.inputs = { 'X': OpTest.np_dtype_to_fluid_dtype(convert_float_to_uint16(self.x)), - 'Y': - OpTest.np_dtype_to_fluid_dtype(convert_float_to_uint16(self.y)) + 'Y': OpTest.np_dtype_to_fluid_dtype(convert_float_to_uint16(self.y)) } self.outputs = {'Out': convert_float_to_uint16(self.out)} self.attrs = {'axis': self.axis, 'use_mkldnn': False} @@ -119,6 +119,7 @@ class TestBF16ElementwiseMulOp(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMulOp_scalar(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -130,6 +131,7 @@ class TestElementwiseMulOp_scalar(ElementwiseMulOp): class TestElementwiseMulOp_Vector(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -141,6 +143,7 @@ class TestElementwiseMulOp_Vector(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -151,6 +154,7 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -166,6 +170,7 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -180,6 +185,7 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -195,6 +201,7 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -206,6 +213,7 @@ class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -219,11 +227,13 @@ class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestElementwiseMulOpFp16(ElementwiseMulOp): + def init_dtype(self): self.dtype = np.float16 class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -235,6 +245,7 @@ class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -246,6 +257,7 @@ class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): + def setUp(self): self.op_type = "elementwise_mul" self.inputs = { @@ -262,13 +274,14 @@ class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): class TestElementwiseMulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_mul must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.elementwise_mul, x1, y1) # the input dtype of elementwise_mul must be float16 or float32 or float64 or int32 or int64 @@ -279,6 +292,7 @@ class TestElementwiseMulOpError(unittest.TestCase): class TestComplexElementwiseMulOp(OpTest): + def setUp(self): self.op_type = "elementwise_mul" self.init_base_dtype() @@ -314,30 +328,28 @@ class TestComplexElementwiseMulOp(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out]) class TestRealComplexElementwiseMulOp(TestComplexElementwiseMulOp): + def init_input_output(self): self.x = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.y = np.random.random( diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py index 36e5d4d8e09..f06d90d27d4 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py @@ -27,6 +27,7 @@ from decorator_helper import prog_scope class TestElementwiseMulDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -42,8 +43,11 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -55,6 +59,7 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase): class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -70,8 +75,11 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -83,6 +91,7 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): class TestElementwiseAddDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -98,8 +107,11 @@ class TestElementwiseAddDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -111,6 +123,7 @@ class TestElementwiseAddDoubleGradCheck(unittest.TestCase): class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -126,8 +139,11 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -139,6 +155,7 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): class TestElementwiseSubDoubleGradCheck(unittest.TestCase): + def subtract_wrapper(self, x): return paddle.subtract(x[0], x[1]) @@ -157,13 +174,16 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.subtract_wrapper, [x, y], - out, - x_init=[x_arr, y_arr], - place=place) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.subtract_wrapper, + [x, y], + out, + x_init=[x_arr, y_arr], + place=place) def test_grad(self): paddle.enable_static() @@ -175,6 +195,7 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase): class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -190,8 +211,11 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -203,6 +227,7 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase): class TestElementwiseDivDoubleGradCheck(unittest.TestCase): + def divide_wrapper(self, x): return paddle.divide(x[0], x[1]) @@ -222,14 +247,18 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase): y_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr[np.abs(y_arr) < 0.005] = 0.02 - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps, atol=1e-3) - gradient_checker.double_grad_check_for_dygraph( - self.divide_wrapper, [x, y], - out, - x_init=[x_arr, y_arr], - place=place, - atol=1e-3) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps, + atol=1e-3) + gradient_checker.double_grad_check_for_dygraph(self.divide_wrapper, + [x, y], + out, + x_init=[x_arr, y_arr], + place=place, + atol=1e-3) def test_grad(self): paddle.enable_static() @@ -241,6 +270,7 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase): class TestElementwiseDivBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -257,8 +287,12 @@ class TestElementwiseDivBroadcastDoubleGradCheck(unittest.TestCase): y_arr = np.random.uniform(-1, 1, shape[1:-1]).astype(dtype) y_arr[np.abs(y_arr) < 0.005] = 0.02 - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps, atol=1e-3) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps, + atol=1e-3) def test_grad(self): paddle.enable_static() @@ -270,6 +304,7 @@ class TestElementwiseDivBroadcastDoubleGradCheck(unittest.TestCase): class TestElementwiseAddTripleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -285,8 +320,11 @@ class TestElementwiseAddTripleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -298,6 +336,7 @@ class TestElementwiseAddTripleGradCheck(unittest.TestCase): class TestElementwiseAddBroadcastTripleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -313,8 +352,11 @@ class TestElementwiseAddBroadcastTripleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -326,6 +368,7 @@ class TestElementwiseAddBroadcastTripleGradCheck(unittest.TestCase): class TestElementwiseMulTripleGradCheck(unittest.TestCase): + def multiply_wrapper(self, x): return paddle.multiply(x[0], x[1]) @@ -364,6 +407,7 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase): class TestElementwiseMulBroadcastTripleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -379,8 +423,11 @@ class TestElementwiseMulBroadcastTripleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py index 79945a10c80..12f2a217360 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py @@ -21,6 +21,7 @@ import paddle class TestElementwisePowOp(OpTest): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -44,6 +45,7 @@ class TestElementwisePowOp(OpTest): class TestElementwisePowOp_big_shape_1(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -55,6 +57,7 @@ class TestElementwisePowOp_big_shape_1(TestElementwisePowOp): class TestElementwisePowOp_big_shape_2(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -68,6 +71,7 @@ class TestElementwisePowOp_big_shape_2(TestElementwisePowOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwisePowOp_scalar(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -79,6 +83,7 @@ class TestElementwisePowOp_scalar(TestElementwisePowOp): class TestElementwisePowOp_tensor(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -90,6 +95,7 @@ class TestElementwisePowOp_tensor(TestElementwisePowOp): class TestElementwisePowOp_broadcast_0(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -101,6 +107,7 @@ class TestElementwisePowOp_broadcast_0(TestElementwisePowOp): class TestElementwisePowOp_broadcast_1(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -115,6 +122,7 @@ class TestElementwisePowOp_broadcast_1(TestElementwisePowOp): class TestElementwisePowOp_broadcast_2(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -124,12 +132,13 @@ class TestElementwisePowOp_broadcast_2(TestElementwisePowOp): } self.attrs = {'axis': 0} self.outputs = { - 'Out': - np.power(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + 'Out': np.power(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestElementwisePowOp_broadcast_3(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -139,12 +148,13 @@ class TestElementwisePowOp_broadcast_3(TestElementwisePowOp): } self.attrs = {'axis': 1} self.outputs = { - 'Out': np.power(self.inputs['X'], self.inputs['Y'].reshape(1, 20, 5, - 1)) + 'Out': np.power(self.inputs['X'], + self.inputs['Y'].reshape(1, 20, 5, 1)) } class TestElementwisePowOp_broadcast_4(TestElementwisePowOp): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -156,6 +166,7 @@ class TestElementwisePowOp_broadcast_4(TestElementwisePowOp): class TestElementwisePowOpInt(OpTest): + def setUp(self): self.op_type = "elementwise_pow" self.python_api = paddle.pow @@ -170,6 +181,7 @@ class TestElementwisePowOpInt(OpTest): class TestElementwisePowGradOpInt(unittest.TestCase): + def setUp(self): self.x = np.asarray([1, 3, 6]) self.y = np.asarray([1, 1, 1]) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py index 6801a4bc5f3..0c5fc983978 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py @@ -22,6 +22,7 @@ from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16 class TestElementwiseOp(OpTest): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -37,15 +38,20 @@ class TestElementwiseOp(OpTest): self.check_grad(['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) class TestBF16ElementwiseOp(OpTest): + def setUp(self): self.op_type = "elementwise_sub" self.dtype = np.uint16 @@ -75,6 +81,7 @@ class TestBF16ElementwiseOp(OpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseSubOp_scalar(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -85,6 +92,7 @@ class TestElementwiseSubOp_scalar(TestElementwiseOp): class TestElementwiseSubOp_Vector(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -95,6 +103,7 @@ class TestElementwiseSubOp_Vector(TestElementwiseOp): class TestElementwiseSubOp_broadcast_0(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -109,6 +118,7 @@ class TestElementwiseSubOp_broadcast_0(TestElementwiseOp): class TestElementwiseSubOp_broadcast_1(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -123,6 +133,7 @@ class TestElementwiseSubOp_broadcast_1(TestElementwiseOp): class TestElementwiseSubOp_broadcast_2(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -136,6 +147,7 @@ class TestElementwiseSubOp_broadcast_2(TestElementwiseOp): class TestElementwiseSubOp_broadcast_3(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -150,6 +162,7 @@ class TestElementwiseSubOp_broadcast_3(TestElementwiseOp): class TestElementwiseSubOp_broadcast_4(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -160,6 +173,7 @@ class TestElementwiseSubOp_broadcast_4(TestElementwiseOp): class TestElementwiseSubOp_commonuse_1(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -170,6 +184,7 @@ class TestElementwiseSubOp_commonuse_1(TestElementwiseOp): class TestElementwiseSubOp_commonuse_2(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -180,6 +195,7 @@ class TestElementwiseSubOp_commonuse_2(TestElementwiseOp): class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseOp): + def setUp(self): self.op_type = "elementwise_sub" self.inputs = { @@ -195,6 +211,7 @@ class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseOp): class TestComplexElementwiseSubOp(OpTest): + def setUp(self): self.op_type = "elementwise_sub" self.dtype = np.float64 @@ -220,8 +237,8 @@ class TestComplexElementwiseSubOp(OpTest): self.out = self.x - self.y def init_grad_input_output(self): - self.grad_out = np.ones(self.shape, self.dtype) + 1J * np.ones( - self.shape, self.dtype) + self.grad_out = np.ones( + self.shape, self.dtype) + 1J * np.ones(self.shape, self.dtype) self.grad_x = self.grad_out self.grad_y = -self.grad_out @@ -229,30 +246,28 @@ class TestComplexElementwiseSubOp(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out]) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out]) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out]) class TestRealComplexElementwiseSubOp(TestComplexElementwiseSubOp): + def init_input_output(self): self.x = np.random.random(self.shape).astype(self.dtype) self.y = np.random.random(self.shape).astype( @@ -260,13 +275,14 @@ class TestRealComplexElementwiseSubOp(TestComplexElementwiseSubOp): self.out = self.x - self.y def init_grad_input_output(self): - self.grad_out = np.ones(self.shape, self.dtype) + 1J * np.ones( - self.shape, self.dtype) + self.grad_out = np.ones( + self.shape, self.dtype) + 1J * np.ones(self.shape, self.dtype) self.grad_x = np.real(self.grad_out) self.grad_y = -self.grad_out class TestSubtractApi(unittest.TestCase): + def _executed_api(self, x, y, name=None): return paddle.subtract(x, y, name) @@ -309,11 +325,13 @@ class TestSubtractApi(unittest.TestCase): class TestSubtractInplaceApi(TestSubtractApi): + def _executed_api(self, x, y, name=None): return x.subtract_(y, name) class TestSubtractInplaceBroadcastSuccess(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 4).astype('float') self.y_numpy = np.random.rand(3, 4).astype('float') @@ -330,18 +348,21 @@ class TestSubtractInplaceBroadcastSuccess(unittest.TestCase): class TestSubtractInplaceBroadcastSuccess2(TestSubtractInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(1, 2, 3, 1).astype('float') self.y_numpy = np.random.rand(3, 1).astype('float') class TestSubtractInplaceBroadcastSuccess3(TestSubtractInplaceBroadcastSuccess): + def init_data(self): self.x_numpy = np.random.rand(2, 3, 1, 5).astype('float') self.y_numpy = np.random.rand(1, 3, 1, 5).astype('float') class TestSubtractInplaceBroadcastError(unittest.TestCase): + def init_data(self): self.x_numpy = np.random.rand(3, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') @@ -360,12 +381,14 @@ class TestSubtractInplaceBroadcastError(unittest.TestCase): class TestSubtractInplaceBroadcastError2(TestSubtractInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(2, 1, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') class TestSubtractInplaceBroadcastError3(TestSubtractInplaceBroadcastError): + def init_data(self): self.x_numpy = np.random.rand(5, 2, 1, 4).astype('float') self.y_numpy = np.random.rand(2, 3, 4).astype('float') diff --git a/python/paddle/fluid/tests/unittests/test_ema.py b/python/paddle/fluid/tests/unittests/test_ema.py index ec992a8132a..ae0dff4edf9 100644 --- a/python/paddle/fluid/tests/unittests/test_ema.py +++ b/python/paddle/fluid/tests/unittests/test_ema.py @@ -20,6 +20,7 @@ import paddle.fluid as fluid class TestExponentialMovingAverage(unittest.TestCase): + def setUp(self): self._places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): @@ -63,8 +64,8 @@ class TestExponentialMovingAverage(unittest.TestCase): params.append(tmp_param) with self._ema.apply(exe): - final_ema = np.array(fluid.global_scope().find_var(self._param_name) - .get_tensor()) + final_ema = np.array(fluid.global_scope().find_var( + self._param_name).get_tensor()) data = np.random.random(size=(10, 5)).astype('float32') exe.run(program=self._test_program, feed={'x': data}) return params, final_ema @@ -75,8 +76,8 @@ class TestExponentialMovingAverage(unittest.TestCase): manu_ema = np.zeros_like(final_ema) if len(params) > 0: for param in params: - manu_ema = self._ema_decay * manu_ema + (1 - self._ema_decay - ) * param + manu_ema = self._ema_decay * manu_ema + ( + 1 - self._ema_decay) * param manu_ema = manu_ema / (1.0 - self._ema_decay**len(params)) self.assertTrue(np.allclose(manu_ema, final_ema)) diff --git a/python/paddle/fluid/tests/unittests/test_ema_fleet.py b/python/paddle/fluid/tests/unittests/test_ema_fleet.py index e0526deb59a..c08f811a178 100644 --- a/python/paddle/fluid/tests/unittests/test_ema_fleet.py +++ b/python/paddle/fluid/tests/unittests/test_ema_fleet.py @@ -26,6 +26,7 @@ def gen_data(): class TestFleetStaticEMA(unittest.TestCase): + def setUp(self): self._places = [paddle.CPUPlace()] if paddle.device.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py index 120880a5fc9..a1a4a263d93 100644 --- a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py @@ -20,6 +20,7 @@ import unittest class TestEmbeddingIdStopGradientBase(unittest.TestCase): + def setUp(self): self.reshape_times = 1 self.iteration = 10 @@ -70,15 +71,17 @@ class TestEmbeddingIdStopGradientBase(unittest.TestCase): fetch_val = None for _ in six.moves.range(self.iteration): - fetch_val = exe.run( - feed={x_1.name: x1_data, - x_2.name: x2_data}, - fetch_list=[emb])[0] + fetch_val = exe.run(feed={ + x_1.name: x1_data, + x_2.name: x2_data + }, + fetch_list=[emb])[0] return fetch_val class TestEmbeddingIdStopGradient2(TestEmbeddingIdStopGradientBase): + def setUp(self): self.reshape_times = 100 self.iteration = 10 diff --git a/python/paddle/fluid/tests/unittests/test_empty_like_op.py b/python/paddle/fluid/tests/unittests/test_empty_like_op.py index 385a0c0b6e8..ea37f6a6d1a 100644 --- a/python/paddle/fluid/tests/unittests/test_empty_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_empty_like_op.py @@ -24,16 +24,17 @@ from paddle.static import program_guard, Program class TestEmptyLikeAPICommon(unittest.TestCase): + def __check_out__(self, out): data_type = convert_dtype(out.dtype) - self.assertEqual(data_type, self.dst_dtype, - 'dtype should be %s, but get %s' % - (self.dst_dtype, data_type)) + self.assertEqual( + data_type, self.dst_dtype, + 'dtype should be %s, but get %s' % (self.dst_dtype, data_type)) shape = out.shape - self.assertTupleEqual(shape, self.dst_shape, - 'shape should be %s, but get %s' % - (self.dst_shape, shape)) + self.assertTupleEqual( + shape, self.dst_shape, + 'shape should be %s, but get %s' % (self.dst_shape, shape)) if data_type in ['float32', 'float64', 'int32', 'int64']: max_value = np.nanmax(out) @@ -53,6 +54,7 @@ class TestEmptyLikeAPICommon(unittest.TestCase): class TestEmptyLikeAPI(TestEmptyLikeAPICommon): + def setUp(self): self.init_config() @@ -70,6 +72,7 @@ class TestEmptyLikeAPI(TestEmptyLikeAPICommon): class TestEmptyLikeAPI2(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("float64") self.dtype = self.x.dtype @@ -78,6 +81,7 @@ class TestEmptyLikeAPI2(TestEmptyLikeAPI): class TestEmptyLikeAPI3(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("int") self.dtype = self.x.dtype @@ -86,6 +90,7 @@ class TestEmptyLikeAPI3(TestEmptyLikeAPI): class TestEmptyLikeAPI4(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("int64") self.dtype = self.x.dtype @@ -94,6 +99,7 @@ class TestEmptyLikeAPI4(TestEmptyLikeAPI): class TestEmptyLikeAPI5(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("bool") self.dtype = self.x.dtype @@ -102,6 +108,7 @@ class TestEmptyLikeAPI5(TestEmptyLikeAPI): class TestEmptyLikeAPI6(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("float64") self.dtype = "float32" @@ -110,6 +117,7 @@ class TestEmptyLikeAPI6(TestEmptyLikeAPI): class TestEmptyLikeAPI7(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("int") self.dtype = "float32" @@ -118,6 +126,7 @@ class TestEmptyLikeAPI7(TestEmptyLikeAPI): class TestEmptyLikeAPI8(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("int64") self.dtype = "float32" @@ -126,6 +135,7 @@ class TestEmptyLikeAPI8(TestEmptyLikeAPI): class TestEmptyLikeAPI9(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("bool") self.dtype = "float32" @@ -134,6 +144,7 @@ class TestEmptyLikeAPI9(TestEmptyLikeAPI): class TestEmptyLikeAPI10(TestEmptyLikeAPI): + def init_config(self): self.x = np.random.random((200, 3)).astype("float32") self.dtype = "bool" @@ -142,6 +153,7 @@ class TestEmptyLikeAPI10(TestEmptyLikeAPI): class TestEmptyLikeAPI_Static(TestEmptyLikeAPICommon): + def setUp(self): self.init_config() @@ -155,13 +167,14 @@ class TestEmptyLikeAPI_Static(TestEmptyLikeAPICommon): with program_guard(train_program, startup_program): x = np.random.random(self.x_shape).astype(dtype) - data_x = paddle.static.data( - 'x', shape=self.data_x_shape, dtype=dtype) + data_x = paddle.static.data('x', + shape=self.data_x_shape, + dtype=dtype) out = paddle.empty_like(data_x) - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) res = exe.run(train_program, feed={'x': x}, fetch_list=[out]) @@ -177,13 +190,16 @@ class TestEmptyLikeAPI_Static(TestEmptyLikeAPICommon): class TestEmptyLikeAPI_Static2(TestEmptyLikeAPI_Static): + def init_config(self): self.x_shape = (3, 200, 3) self.data_x_shape = [-1, 200, 3] class TestEmptyError(unittest.TestCase): + def test_attr(self): + def test_dtype(): x = np.random.random((200, 3)).astype("float64") dtype = 'uint8' diff --git a/python/paddle/fluid/tests/unittests/test_empty_op.py b/python/paddle/fluid/tests/unittests/test_empty_op.py index 371c59a1b8c..50580cded90 100644 --- a/python/paddle/fluid/tests/unittests/test_empty_op.py +++ b/python/paddle/fluid/tests/unittests/test_empty_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import convert_np_dtype_to_dtype_ # Situation 1: Attr(shape) is a list(without tensor) class TestEmptyOp(OpTest): + def setUp(self): self.op_type = "empty" self.init_config() @@ -61,6 +62,7 @@ class TestEmptyOp(OpTest): class TestEmptyOp2(TestEmptyOp): + def init_config(self): shape = [500, 3] dtype = 'float64' @@ -71,6 +73,7 @@ class TestEmptyOp2(TestEmptyOp): class TestEmptyOp3(TestEmptyOp): + def init_config(self): shape = [500, 3] dtype = 'int32' @@ -81,6 +84,7 @@ class TestEmptyOp3(TestEmptyOp): class TestEmptyOp4(TestEmptyOp): + def init_config(self): shape = [500, 3] dtype = 'int64' @@ -91,6 +95,7 @@ class TestEmptyOp4(TestEmptyOp): class TestEmptyOp5(TestEmptyOp): + def init_config(self): shape = [500, 3] dtype = 'bool' @@ -102,6 +107,7 @@ class TestEmptyOp5(TestEmptyOp): # Situation 2: shape is a tensor class TestEmptyOp_ShapeTensor(OpTest): + def setUp(self): self.op_type = "empty" self.init_config() @@ -139,6 +145,7 @@ class TestEmptyOp_ShapeTensor(OpTest): # Situation 3: Attr(shape) is a list(with tensor) class TestEmptyOp_ShapeTensorList(OpTest): + def setUp(self): self.op_type = "empty" self.init_config() @@ -183,6 +190,7 @@ class TestEmptyOp_ShapeTensorList(OpTest): class TestEmptyAPI(unittest.TestCase): + def __check_out__(self, out, dtype='float32'): max_value = np.nanmax(np.array(out)) min_value = np.nanmin(np.array(out)) @@ -228,12 +236,15 @@ class TestEmptyAPI(unittest.TestCase): positive_2_int32 = fluid.layers.fill_constant([1], "int32", 3) positive_2_int64 = fluid.layers.fill_constant([1], "int64", 3) - shape_tensor_int32 = fluid.data( - name="shape_tensor_int32", shape=[2], dtype="int32") - shape_tensor_int64 = fluid.data( - name="shape_tensor_int64", shape=[2], dtype="int64") - shape_tensor_unknown = fluid.data( - name="shape_tensor_unknown", shape=[-1], dtype="int64") + shape_tensor_int32 = fluid.data(name="shape_tensor_int32", + shape=[2], + dtype="int32") + shape_tensor_int64 = fluid.data(name="shape_tensor_int64", + shape=[2], + dtype="int64") + shape_tensor_unknown = fluid.data(name="shape_tensor_unknown", + shape=[-1], + dtype="int64") out_1 = paddle.empty(shape=[200, 3], dtype=dtype) out_2 = paddle.empty(shape=shape_tensor_int32, dtype=dtype) @@ -262,7 +273,9 @@ class TestEmptyAPI(unittest.TestCase): class TestEmptyError(unittest.TestCase): + def test_attr(self): + def test_dtype(): shape = [200, 3] dtype = 'uint8' diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr.py b/python/paddle/fluid/tests/unittests/test_entry_attr.py index bdfe95560e5..e963fbd81bc 100644 --- a/python/paddle/fluid/tests/unittests/test_entry_attr.py +++ b/python/paddle/fluid/tests/unittests/test_entry_attr.py @@ -15,6 +15,7 @@ from __future__ import print_function import paddle + paddle.enable_static() import unittest @@ -23,6 +24,7 @@ from paddle.distributed import ProbabilityEntry, CountFilterEntry, ShowClickEntr class EntryAttrChecks(unittest.TestCase): + def base(self): with self.assertRaises(NotImplementedError): from paddle.distributed.entry_attr import EntryAttr @@ -62,12 +64,11 @@ class EntryAttrChecks(unittest.TestCase): with fluid.scope_guard(scope): with fluid.program_guard(prog): - input = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) + input = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) prob = ProbabilityEntry(0.5) emb = paddle.static.nn.sparse_embedding( input=input, @@ -93,6 +94,7 @@ class EntryAttrChecks(unittest.TestCase): class TestEntryAttrs(EntryAttrChecks): + def test_base(self): self.base() diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr2.py b/python/paddle/fluid/tests/unittests/test_entry_attr2.py index 96301c4a878..87d8bb70f38 100644 --- a/python/paddle/fluid/tests/unittests/test_entry_attr2.py +++ b/python/paddle/fluid/tests/unittests/test_entry_attr2.py @@ -15,6 +15,7 @@ from __future__ import print_function import paddle + paddle.enable_static() import unittest @@ -24,18 +25,18 @@ from paddle.fluid.entry_attr import ProbabilityEntry, CountFilterEntry class EntryAttrChecks(unittest.TestCase): + def embedding_layer(self): prog = fluid.Program() scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog): - input = fluid.layers.data( - name="dnn_data", - shape=[-1, 1], - dtype="int64", - lod_level=1, - append_batch_size=False) + input = fluid.layers.data(name="dnn_data", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) emb = fluid.layers.embedding( input=input, size=[100, 10], @@ -56,6 +57,7 @@ class EntryAttrChecks(unittest.TestCase): class TestEntryAttrs(EntryAttrChecks): + def test_embedding_layer(self): self.embedding_layer() diff --git a/python/paddle/fluid/tests/unittests/test_erf_op.py b/python/paddle/fluid/tests/unittests/test_erf_op.py index 964e704c6a2..c7d7b3abc9a 100644 --- a/python/paddle/fluid/tests/unittests/test_erf_op.py +++ b/python/paddle/fluid/tests/unittests/test_erf_op.py @@ -25,6 +25,7 @@ import paddle.fluid.dygraph as dg class TestErfOp(OpTest): + def setUp(self): self.op_type = "erf" self.dtype = self._init_dtype() @@ -45,6 +46,7 @@ class TestErfOp(OpTest): class TestErfLayer(unittest.TestCase): + def _test_case(self, place): x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float64) y_ref = erf(x) diff --git a/python/paddle/fluid/tests/unittests/test_erfinv_op.py b/python/paddle/fluid/tests/unittests/test_erfinv_op.py index 5b5a7c03843..4f10f1daaf7 100644 --- a/python/paddle/fluid/tests/unittests/test_erfinv_op.py +++ b/python/paddle/fluid/tests/unittests/test_erfinv_op.py @@ -26,6 +26,7 @@ np.random.seed(0) class TestErfinv(OpTest): + def setUp(self): self.op_type = "erfinv" self.python_api = paddle.erfinv @@ -46,19 +47,20 @@ class TestErfinv(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.gradient], - user_defined_grad_outputs=self.grad_out) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.gradient], + user_defined_grad_outputs=self.grad_out) class TestErfinvFP32(TestErfinv): + def init_dtype(self): self.dtype = np.float32 class TestErfinvAPI(unittest.TestCase): + def init_dtype(self): self.dtype = 'float32' @@ -86,6 +88,7 @@ class TestErfinvAPI(unittest.TestCase): run(place) def test_dygraph_api(self): + def run(place): paddle.disable_static(place) x = paddle.to_tensor(self.x) @@ -97,6 +100,7 @@ class TestErfinvAPI(unittest.TestCase): run(place) def test_inplace_api(self): + def run(place): paddle.disable_static(place) x = paddle.to_tensor(self.x) diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py index adc7386bdeb..6e826dacf7c 100644 --- a/python/paddle/fluid/tests/unittests/test_exception.py +++ b/python/paddle/fluid/tests/unittests/test_exception.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core class TestException(unittest.TestCase): + def test_exception(self): exception = None try: @@ -37,6 +38,7 @@ class TestException(unittest.TestCase): class TestExceptionNoCStack(unittest.TestCase): + def setUp(self): paddle.enable_static() # test no C++ stack format @@ -60,8 +62,10 @@ class TestExceptionNoCStack(unittest.TestCase): with self.assertRaises(ValueError): exe.run(fluid.default_main_program(), - feed={'X': x, - 'Y': y}, + feed={ + 'X': x, + 'Y': y + }, fetch_list=[avg_loss.name]) def test_exception_in_dynamic_mode(self): diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py index ebe820cb90a..1f3394b6019 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py +++ b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py @@ -23,17 +23,17 @@ from paddle.fluid.layers import mul, data, zeros, array_write, increment class TestExecutor(unittest.TestCase): + def test_mul(self): i = zeros(shape=[1], dtype='int64') a = data(name='a', shape=[784], dtype='float32') array = array_write(x=a, i=i) i = increment(i) - b = data( - name='b', - shape=[784, 100], - dtype='float32', - append_batch_size=False) + b = data(name='b', + shape=[784, 100], + dtype='float32', + append_batch_size=False) array_write(x=b, i=i, array=array) i = increment(i) @@ -44,8 +44,10 @@ class TestExecutor(unittest.TestCase): b_np = numpy.random.random((784, 100)).astype('float32') exe = Executor() - res, res_array = exe.run(feed={'a': a_np, - 'b': b_np}, + res, res_array = exe.run(feed={ + 'a': a_np, + 'b': b_np + }, fetch_list=[out, array]) self.assertEqual((100, 100), res.shape) diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py index 96d23174071..ad7a319f9c2 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py +++ b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py @@ -23,16 +23,16 @@ from test_eager_deletion_padding_rnn import RNNConfig, PaddingRNNTestBase class TestExecutor(unittest.TestCase): + def test_mul(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): a = fluid.layers.data(name='a', shape=[784], dtype='float32') - b = fluid.layers.data( - name='b', - shape=[784, 100], - dtype='float32', - append_batch_size=False) + b = fluid.layers.data(name='b', + shape=[784, 100], + dtype='float32', + append_batch_size=False) output = fluid.layers.mul(x=a, y=b) # Compute with numpy @@ -50,8 +50,10 @@ class TestExecutor(unittest.TestCase): for i in range(max_iters): begin = time.time() outs = exe.run(program=main_program, - feed={'a': a_np, - 'b': b_np}, + feed={ + 'a': a_np, + 'b': b_np + }, fetch_list=[output.name], use_program_cache=use_program_cache) end = time.time() @@ -62,24 +64,25 @@ class TestExecutor(unittest.TestCase): return run_time max_iters = 3 - run_time_with_cache = _train( - use_program_cache=True, max_iters=max_iters) + run_time_with_cache = _train(use_program_cache=True, + max_iters=max_iters) print("run time with program cache: %f" % run_time_with_cache) - run_time_without_cache = _train( - use_program_cache=False, max_iters=max_iters) + run_time_without_cache = _train(use_program_cache=False, + max_iters=max_iters) print("run time without program cache: %f" % run_time_without_cache) - run_time_with_cache = _train( - use_program_cache=True, max_iters=max_iters) + run_time_with_cache = _train(use_program_cache=True, + max_iters=max_iters) print("run time with program cache: %f" % run_time_with_cache) - run_time_with_cache = _train( - use_program_cache=True, max_iters=max_iters) + run_time_with_cache = _train(use_program_cache=True, + max_iters=max_iters) print("run time with program cache: %f" % run_time_with_cache) class ExecutorPaddingRNNTest(PaddingRNNTestBase): + def train_and_save_inference_program(self, rnn_model="static", parallel=True, @@ -98,8 +101,9 @@ class ExecutorPaddingRNNTest(PaddingRNNTestBase): def test_inference_output(self): for rnn_model in ["static", "padding"]: # Set parallel to False to use the default executor. - self.train_and_save_inference_program( - rnn_model=rnn_model, parallel=True, use_program_cache=True) + self.train_and_save_inference_program(rnn_model=rnn_model, + parallel=True, + use_program_cache=True) x_np = numpy.random.random( (self.config.batch_size, self.config.num_steps, @@ -134,14 +138,14 @@ class ExecutorPaddingRNNTest(PaddingRNNTestBase): results_with_cache = results else: results_without_cache = results - self.assertEqual( - len(results_with_cache), len(results_without_cache)) + self.assertEqual(len(results_with_cache), + len(results_without_cache)) for i in range(len(results_with_cache)): self.assertEqual(results_with_cache[i].shape, results_without_cache[i].shape) self.assertTrue( - numpy.allclose(results_with_cache[i], results_without_cache[ - i])) + numpy.allclose(results_with_cache[i], + results_without_cache[i])) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py index 6b1e3c5a28a..a35ebfbab17 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py +++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestExecutor(unittest.TestCase): + def net(self): lr = fluid.data(name="lr", shape=[1], dtype='float32') x = fluid.data(name="x", shape=[None, 1], dtype='float32') @@ -50,8 +51,10 @@ class TestExecutor(unittest.TestCase): y_true = [[2.0], [4.0], [6.0], [8.0]] a = 0 with self.assertRaises(ValueError): - exe.run(feed={'x': train_data, - 'lr': a}, + exe.run(feed={ + 'x': train_data, + 'lr': a + }, fetch_list=[lr, cost], return_numpy=False, use_prune=True) @@ -73,8 +76,10 @@ class TestExecutor(unittest.TestCase): a = 0 with self.assertRaises(ValueError): exe.run(compiled_prog, - feed={'x': train_data, - 'lr': a}, + feed={ + 'x': train_data, + 'lr': a + }, fetch_list=[lr, cost], return_numpy=False, use_prune=True) diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py b/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py index 1af2009f217..9d1c902fdc2 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py +++ b/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import unittest class TestCheckFetchList(unittest.TestCase): + def setUp(self): paddle.enable_static() self.feed = {"x": np.array([[0], [0], [1], [0]], dtype='float32')} @@ -33,8 +34,10 @@ class TestCheckFetchList(unittest.TestCase): main_program = paddle.static.Program() with paddle.static.program_guard(main_program): x = paddle.static.data(name='x', shape=[4, 1], dtype='float32') - output = paddle.unique_consecutive( - x, return_inverse=True, return_counts=True, axis=0) + output = paddle.unique_consecutive(x, + return_inverse=True, + return_counts=True, + axis=0) self.main_program = main_program self.fetch_list = output diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py index 23c4191f6cf..05676c34e6d 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestExecutor(unittest.TestCase): + def net(self): lr = fluid.data(name="lr", shape=[1], dtype='float32') x = fluid.data(name="x", shape=[None, 1], dtype='float32') @@ -46,14 +47,16 @@ class TestExecutor(unittest.TestCase): exe = fluid.Executor(cpu) lr, cost = self.net() exe.run(startup_program) - train_data = numpy.array( - [[1.0], [2.0], [3.0], [4.0]]).astype('float32') - y_true = numpy.array( - [[2.0], [4.0], [6.0], [8.0]]).astype('float32') + train_data = numpy.array([[1.0], [2.0], [3.0], + [4.0]]).astype('float32') + y_true = numpy.array([[2.0], [4.0], [6.0], + [8.0]]).astype('float32') a = 0.01 - _lr, _ = exe.run(feed={'x': train_data, - 'y': y_true, - 'lr': a}, + _lr, _ = exe.run(feed={ + 'x': train_data, + 'y': y_true, + 'lr': a + }, fetch_list=[lr, cost], return_numpy=False) self.assertEqual(_lr._dtype(), lr.dtype) @@ -70,14 +73,16 @@ class TestExecutor(unittest.TestCase): exe = fluid.Executor(cpu) lr, cost = self.net() exe.run(startup_program) - train_data = numpy.array( - [[1.0], [2.0], [3.0], [4.0]]).astype('float32') - y_true = numpy.array( - [[2.0], [4.0], [6.0], [8.0]]).astype('float32') + train_data = numpy.array([[1.0], [2.0], [3.0], + [4.0]]).astype('float32') + y_true = numpy.array([[2.0], [4.0], [6.0], + [8.0]]).astype('float32') a = 0 - _lr, _ = exe.run(feed={'x': train_data, - 'y': y_true, - 'lr': a}, + _lr, _ = exe.run(feed={ + 'x': train_data, + 'y': y_true, + 'lr': a + }, fetch_list=[lr, cost], return_numpy=False) self.assertEqual(_lr._dtype(), lr.dtype) @@ -97,9 +102,11 @@ class TestExecutor(unittest.TestCase): train_data = [[1.0], [2.0], [3.0], [4.0]] y_true = [[2.0], [4.0], [6.0], [8.0]] a = 0 - _lr, _ = exe.run(feed={'x': train_data, - 'y': y_true, - 'lr': a}, + _lr, _ = exe.run(feed={ + 'x': train_data, + 'y': y_true, + 'lr': a + }, fetch_list=[lr, cost], return_numpy=False) self.assertEqual(_lr._dtype(), lr.dtype) @@ -118,15 +125,17 @@ class TestExecutor(unittest.TestCase): exe.run(startup_program) compiled_prog = fluid.CompiledProgram( main_program).with_data_parallel(loss_name=cost.name) - train_data = numpy.array( - [[1.0], [2.0], [3.0], [4.0]]).astype('float32') - y_true = numpy.array( - [[2.0], [4.0], [6.0], [8.0]]).astype('float32') + train_data = numpy.array([[1.0], [2.0], [3.0], + [4.0]]).astype('float32') + y_true = numpy.array([[2.0], [4.0], [6.0], + [8.0]]).astype('float32') a = 0.01 _lr, _ = exe.run(compiled_prog, - feed={'x': train_data, - 'y': y_true, - 'lr': a}, + feed={ + 'x': train_data, + 'y': y_true, + 'lr': a + }, fetch_list=[lr, cost], return_numpy=False) self.assertEqual(_lr._dtype(), lr.dtype) @@ -135,6 +144,7 @@ class TestExecutor(unittest.TestCase): class TestAsLodTensor(unittest.TestCase): + def test_as_lodtensor_int32(self): cpu = fluid.CPUPlace() tensor = fluid.executor._as_lodtensor(1.0, cpu, diff --git a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py index a7ee6b31b09..81bc7021280 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py +++ b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py @@ -22,6 +22,7 @@ from op_test import OpTest, skip_check_grad_ci @skip_check_grad_ci(reason="Not op test but call the method of class OpTest.") class TestExecutorReturnTensorNotOverwritingWithOptest(OpTest): + def setUp(self): pass @@ -68,6 +69,7 @@ class TestExecutorReturnTensorNotOverwritingWithOptest(OpTest): class TestExecutorReturnTensorNotOverOverwritingWithLayers(unittest.TestCase): + def setUp(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_expand_as_op.py b/python/paddle/fluid/tests/unittests/test_expand_as_op.py index 150aff78508..aa4f0b2f3ca 100755 --- a/python/paddle/fluid/tests/unittests/test_expand_as_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_as_op.py @@ -31,6 +31,7 @@ def bcast(x, target_tensor): class TestExpandAsOpRank1(OpTest): + def setUp(self): self.op_type = "expand_as" x = np.random.rand(100).astype("float64") @@ -49,6 +50,7 @@ class TestExpandAsOpRank1(OpTest): class TestExpandAsOpRank2(OpTest): + def setUp(self): self.op_type = "expand_as" x = np.random.rand(10, 12).astype("float64") @@ -67,6 +69,7 @@ class TestExpandAsOpRank2(OpTest): class TestExpandAsOpRank3(OpTest): + def setUp(self): self.op_type = "expand_as" x = np.random.rand(2, 3, 20).astype("float64") @@ -85,6 +88,7 @@ class TestExpandAsOpRank3(OpTest): class TestExpandAsOpRank4(OpTest): + def setUp(self): self.op_type = "expand_as" x = np.random.rand(1, 1, 7, 16).astype("float64") @@ -104,6 +108,7 @@ class TestExpandAsOpRank4(OpTest): # Test dygraph API class TestExpandAsDygraphAPI(unittest.TestCase): + def test_api(self): import paddle paddle.disable_static() @@ -119,24 +124,28 @@ class TestExpandAsDygraphAPI(unittest.TestCase): # Test python API class TestExpandAsAPI(unittest.TestCase): + def test_api(self): input1 = np.random.random([12, 14]).astype("float32") input2 = np.random.random([48, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") - y = fluid.layers.data( - name='target_tensor', - shape=[48, 14], - append_batch_size=False, - dtype="float32") + y = fluid.layers.data(name='target_tensor', + shape=[48, 14], + append_batch_size=False, + dtype="float32") out_1 = fluid.layers.expand_as(x, target_tensor=y) exe = fluid.Executor(place=fluid.CPUPlace()) res_1 = exe.run(fluid.default_main_program(), - feed={"x": input1, - "target_tensor": input2}, + feed={ + "x": input1, + "target_tensor": input2 + }, fetch_list=[out_1]) assert np.array_equal(res_1[0], np.tile(input1, (4, 1))) diff --git a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py index 3bf6868fed9..f107fec1c4e 100755 --- a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestExpandAsBasic(OpTest): + def setUp(self): self.op_type = "expand_as_v2" self.python_api = paddle.expand_as @@ -41,6 +42,7 @@ class TestExpandAsBasic(OpTest): class TestExpandAsOpRank2(TestExpandAsBasic): + def setUp(self): self.op_type = "expand_as_v2" self.python_api = paddle.expand_as @@ -54,6 +56,7 @@ class TestExpandAsOpRank2(TestExpandAsBasic): class TestExpandAsOpRank3(TestExpandAsBasic): + def setUp(self): self.op_type = "expand_as_v2" self.python_api = paddle.expand_as @@ -67,6 +70,7 @@ class TestExpandAsOpRank3(TestExpandAsBasic): class TestExpandAsOpRank4(TestExpandAsBasic): + def setUp(self): self.op_type = "expand_as_v2" self.python_api = paddle.expand_as @@ -80,6 +84,7 @@ class TestExpandAsOpRank4(TestExpandAsBasic): class TestExpandAsV2Error(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): x1 = fluid.layers.data(name='x1', shape=[4], dtype="uint8") @@ -92,24 +97,28 @@ class TestExpandAsV2Error(unittest.TestCase): # Test python API class TestExpandAsV2API(unittest.TestCase): + def test_api(self): input1 = np.random.random([12, 14]).astype("float32") input2 = np.random.random([2, 12, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") - y = fluid.layers.data( - name='target_tensor', - shape=[2, 12, 14], - append_batch_size=False, - dtype="float32") + y = fluid.layers.data(name='target_tensor', + shape=[2, 12, 14], + append_batch_size=False, + dtype="float32") out_1 = paddle.expand_as(x, y=y) exe = fluid.Executor(place=fluid.CPUPlace()) res_1 = exe.run(fluid.default_main_program(), - feed={"x": input1, - "target_tensor": input2}, + feed={ + "x": input1, + "target_tensor": input2 + }, fetch_list=[out_1]) assert np.array_equal(res_1[0], np.tile(input1, (2, 1, 1))) diff --git a/python/paddle/fluid/tests/unittests/test_expand_op.py b/python/paddle/fluid/tests/unittests/test_expand_op.py index edda6da655d..d0d9a1f7e21 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_op.py @@ -24,6 +24,7 @@ import paddle # Situation 1: expand_times is a list(without tensor) class TestExpandOpRank1(OpTest): + def setUp(self): self.op_type = "expand" self.init_data() @@ -47,30 +48,35 @@ class TestExpandOpRank1(OpTest): class TestExpandOpRank2_Corner(TestExpandOpRank1): + def init_data(self): self.ori_shape = [120] self.expand_times = [2] class TestExpandOpRank2(TestExpandOpRank1): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [2, 3] class TestExpandOpRank3_Corner(TestExpandOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.expand_times = (1, 1, 1) class TestExpandOpRank3(TestExpandOpRank1): + def init_data(self): self.ori_shape = (2, 4, 15) self.expand_times = (2, 1, 4) class TestExpandOpRank4(TestExpandOpRank1): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.expand_times = (3, 2, 1, 2) @@ -78,6 +84,7 @@ class TestExpandOpRank4(TestExpandOpRank1): # Situation 2: expand_times is a list(with tensor) class TestExpandOpRank1_tensor_attr(OpTest): + def setUp(self): self.op_type = "expand" self.init_data() @@ -110,6 +117,7 @@ class TestExpandOpRank1_tensor_attr(OpTest): class TestExpandOpRank2_Corner_tensor_attr(TestExpandOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [1, 1] @@ -117,6 +125,7 @@ class TestExpandOpRank2_Corner_tensor_attr(TestExpandOpRank1_tensor_attr): class TestExpandOpRank2_attr_tensor(TestExpandOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [2, 3] @@ -125,6 +134,7 @@ class TestExpandOpRank2_attr_tensor(TestExpandOpRank1_tensor_attr): # Situation 3: expand_times is a tensor class TestExpandOpRank1_tensor(OpTest): + def setUp(self): self.op_type = "expand" self.init_data() @@ -151,6 +161,7 @@ class TestExpandOpRank1_tensor(OpTest): class TestExpandOpRank2_tensor(TestExpandOpRank1_tensor): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [2, 3] @@ -158,11 +169,11 @@ class TestExpandOpRank2_tensor(TestExpandOpRank1_tensor): # Situation 4: input x is Integer class TestExpandOpInteger(OpTest): + def setUp(self): self.op_type = "expand" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 5)).astype("int32") + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int32") } self.attrs = {'expand_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) @@ -174,6 +185,7 @@ class TestExpandOpInteger(OpTest): # Situation 5: input x is Bool class TestExpandOpBoolean(OpTest): + def setUp(self): self.op_type = "expand" self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} @@ -187,11 +199,11 @@ class TestExpandOpBoolean(OpTest): # Situation 56: input x is Integer class TestExpandOpInt64_t(OpTest): + def setUp(self): self.op_type = "expand" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 5)).astype("int64") + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int64") } self.attrs = {'expand_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) @@ -202,10 +214,11 @@ class TestExpandOpInt64_t(OpTest): class TestExpandError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) expand_times = [2, 2] self.assertRaises(TypeError, fluid.layers.expand, x1, expand_times) x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") @@ -217,14 +230,18 @@ class TestExpandError(unittest.TestCase): # Test python API class TestExpandAPI(unittest.TestCase): + def test_api(self): input = np.random.random([12, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") positive_2 = fluid.layers.fill_constant([1], "int32", 2) - expand_times = fluid.layers.data( - name="expand_times", shape=[2], append_batch_size=False) + expand_times = fluid.layers.data(name="expand_times", + shape=[2], + append_batch_size=False) out_1 = fluid.layers.expand(x, expand_times=[2, 3]) out_2 = fluid.layers.expand(x, expand_times=[positive_2, 3]) @@ -235,7 +252,8 @@ class TestExpandAPI(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ - "x": input, + "x": + input, "expand_times": np.array([1, 3]).astype("int32") }, @@ -246,13 +264,14 @@ class TestExpandAPI(unittest.TestCase): class TestExpandDygraphAPI(unittest.TestCase): + def test_expand_times_is_tensor(self): with paddle.fluid.dygraph.guard(): a = paddle.rand([2, 5]) b = paddle.fluid.layers.expand(a, expand_times=[2, 3]) - c = paddle.fluid.layers.expand( - a, expand_times=paddle.to_tensor( - [2, 3], dtype='int32')) + c = paddle.fluid.layers.expand(a, + expand_times=paddle.to_tensor( + [2, 3], dtype='int32')) self.assertTrue( np.array_equal(b.numpy(), np.tile(a.numpy(), [2, 3]))) self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py index 4932ea8a1b5..52b9234263d 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard # Situation 1: shape is a list(without tensor) class TestExpandV2OpRank1(OpTest): + def setUp(self): self.op_type = "expand_v2" self.init_data() @@ -48,6 +49,7 @@ class TestExpandV2OpRank1(OpTest): class TestExpandV2OpRank2_DimExpanding(TestExpandV2OpRank1): + def init_data(self): self.ori_shape = [120] self.shape = [2, 120] @@ -55,6 +57,7 @@ class TestExpandV2OpRank2_DimExpanding(TestExpandV2OpRank1): class TestExpandV2OpRank2(TestExpandV2OpRank1): + def init_data(self): self.ori_shape = [1, 140] self.shape = [12, 140] @@ -62,6 +65,7 @@ class TestExpandV2OpRank2(TestExpandV2OpRank1): class TestExpandV2OpRank3_Corner(TestExpandV2OpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.shape = (2, 10, 5) @@ -69,6 +73,7 @@ class TestExpandV2OpRank3_Corner(TestExpandV2OpRank1): class TestExpandV2OpRank4(TestExpandV2OpRank1): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.shape = (-1, -1, -1, -1) @@ -77,6 +82,7 @@ class TestExpandV2OpRank4(TestExpandV2OpRank1): # Situation 2: shape is a list(with tensor) class TestExpandV2OpRank1_tensor_attr(OpTest): + def setUp(self): self.op_type = "expand_v2" self.init_data() @@ -107,6 +113,7 @@ class TestExpandV2OpRank1_tensor_attr(OpTest): class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [1, 1] @@ -116,6 +123,7 @@ class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr): # Situation 3: shape is a tensor class TestExpandV2OpRank1_tensor(OpTest): + def setUp(self): self.op_type = "expand_v2" self.init_data() @@ -142,11 +150,11 @@ class TestExpandV2OpRank1_tensor(OpTest): # Situation 4: input x is Integer class TestExpandV2OpInteger(OpTest): + def setUp(self): self.op_type = "expand_v2" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 5)).astype("int32") + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int32") } self.attrs = {'shape': [2, 4, 5]} output = np.tile(self.inputs['X'], (1, 1, 1)) @@ -158,6 +166,7 @@ class TestExpandV2OpInteger(OpTest): # Situation 5: input x is Bool class TestExpandV2OpBoolean(OpTest): + def setUp(self): self.op_type = "expand_v2" self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} @@ -171,11 +180,11 @@ class TestExpandV2OpBoolean(OpTest): # Situation 56: input x is Integer class TestExpandV2OpInt64_t(OpTest): + def setUp(self): self.op_type = "expand_v2" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 5)).astype("int64") + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int64") } self.attrs = {'shape': [2, 4, 5]} output = np.tile(self.inputs['X'], (1, 1, 1)) @@ -186,10 +195,11 @@ class TestExpandV2OpInt64_t(OpTest): class TestExpandV2Error(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) shape = [2, 2] self.assertRaises(TypeError, paddle.tensor.expand, x1, shape) x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") @@ -201,17 +211,19 @@ class TestExpandV2Error(unittest.TestCase): # Test python API class TestExpandV2API(unittest.TestCase): + def test_api(self): input = np.random.random([12, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") positive_2 = fluid.layers.fill_constant([1], "int32", 12) - expand_shape = fluid.layers.data( - name="expand_shape", - shape=[2], - append_batch_size=False, - dtype="int32") + expand_shape = fluid.layers.data(name="expand_shape", + shape=[2], + append_batch_size=False, + dtype="int32") out_1 = paddle.expand(x, shape=[12, 14]) out_2 = paddle.expand(x, shape=[positive_2, 14]) @@ -222,7 +234,8 @@ class TestExpandV2API(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ - "x": input, + "x": + input, "expand_shape": np.array([12, 14]).astype("int32") }, @@ -233,19 +246,22 @@ class TestExpandV2API(unittest.TestCase): class TestExpandInferShape(unittest.TestCase): + def test_shape_with_var(self): with program_guard(Program(), Program()): x = paddle.static.data(shape=[-1, 1, 3], name='x') fake_var = paddle.randn([2, 3]) target_shape = [ - -1, paddle.shape(fake_var)[0], paddle.shape(fake_var)[1] + -1, paddle.shape(fake_var)[0], + paddle.shape(fake_var)[1] ] out = paddle.expand(x, shape=target_shape) self.assertListEqual(list(out.shape), [-1, -1, -1]) -# Test python Dygraph API +# Test python Dygraph API class TestExpandV2DygraphAPI(unittest.TestCase): + def test_expand_times_is_tensor(self): with paddle.fluid.dygraph.guard(): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_exponential_op.py b/python/paddle/fluid/tests/unittests/test_exponential_op.py index c8f4101ea5d..57c4fb02d85 100644 --- a/python/paddle/fluid/tests/unittests/test_exponential_op.py +++ b/python/paddle/fluid/tests/unittests/test_exponential_op.py @@ -23,6 +23,7 @@ paddle.seed(100) class TestExponentialOp1(OpTest): + def setUp(self): self.op_type = "exponential" self.config() @@ -48,29 +49,28 @@ class TestExponentialOp1(OpTest): hist2 = hist2.astype("float32") hist2 = hist2 / float(data_np.size) - self.assertTrue( - np.allclose( - hist1, hist2, rtol=0.02), - "actual: {}, expected: {}".format(hist1, hist2)) + self.assertTrue(np.allclose(hist1, hist2, rtol=0.02), + "actual: {}, expected: {}".format(hist1, hist2)) def test_check_grad_normal(self): self.check_grad( ['X'], 'Out', - user_defined_grads=[np.zeros( - [1024, 1024], dtype=self.dtype)], + user_defined_grads=[np.zeros([1024, 1024], dtype=self.dtype)], user_defined_grad_outputs=[ np.random.rand(1024, 1024).astype(self.dtype) ]) class TestExponentialOp2(TestExponentialOp1): + def config(self): self.lam = 0.25 self.dtype = "float32" class TestExponentialAPI(unittest.TestCase): + def test_static(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_eye_op.py b/python/paddle/fluid/tests/unittests/test_eye_op.py index 704762d8094..d74cabb4275 100644 --- a/python/paddle/fluid/tests/unittests/test_eye_op.py +++ b/python/paddle/fluid/tests/unittests/test_eye_op.py @@ -24,6 +24,7 @@ import paddle.fluid.framework as framework class TestEyeOp(OpTest): + def setUp(self): ''' Test eye op with specified shape @@ -44,6 +45,7 @@ class TestEyeOp(OpTest): class TestEyeOp1(OpTest): + def setUp(self): ''' Test eye op with default parameters @@ -60,6 +62,7 @@ class TestEyeOp1(OpTest): class TestEyeOp2(OpTest): + def setUp(self): ''' Test eye op with specified shape @@ -76,6 +79,7 @@ class TestEyeOp2(OpTest): class API_TestTensorEye(unittest.TestCase): + def test_out(self): with paddle.static.program_guard(paddle.static.Program()): data = paddle.eye(10) diff --git a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py index 07f3eaa04ad..adfd15f2dd1 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py @@ -75,6 +75,7 @@ def channel_wise_dequantize_max_abs(x, class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest): + def set_args(self): self.quant_bits = [8, 8] self.activation_scale = 0.7861 @@ -92,10 +93,11 @@ class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest): self.activation_scale) self.inputs = { - 'X': yq, - 'Scales': [("scales0", np.array(scales).astype(self.dtype)), - ("scales1", - np.array([self.activation_scale]).astype(self.dtype))] + 'X': + yq, + 'Scales': + [("scales0", np.array(scales).astype(self.dtype)), + ("scales1", np.array([self.activation_scale]).astype(self.dtype))] } self.attrs = {'quant_bits': self.quant_bits} self.outputs = {'Out': ydq} @@ -106,6 +108,7 @@ class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest): class TestFakeChannelWiseDequantizeMaxAbsOpTwoScalesFloat16( TestFakeChannelWiseDequantizeMaxAbsOpTwoScales): + def set_dtype(self): self.dtype = np.float16 @@ -114,6 +117,7 @@ class TestFakeChannelWiseDequantizeMaxAbsOpTwoScalesFloat16( class TestFakeChannelWiseDequantizeMaxAbsOpOneScale(OpTest): + def set_args(self): self.quant_bits = [8] self.quant_axis = 0 @@ -147,6 +151,7 @@ class TestFakeChannelWiseDequantizeMaxAbsOpOneScale(OpTest): class TestFakeChannelWiseDequantizeMaxAbsOpOneScale1( TestFakeChannelWiseDequantizeMaxAbsOpOneScale): + def set_args(self): self.quant_bits = [8] self.quant_axis = 1 @@ -154,6 +159,7 @@ class TestFakeChannelWiseDequantizeMaxAbsOpOneScale1( class TestFakeChannelWiseDequantizeMaxAbsOpOneScaleFloat16( TestFakeChannelWiseDequantizeMaxAbsOpOneScale): + def set_dtype(self): self.dtype = np.float16 @@ -163,6 +169,7 @@ class TestFakeChannelWiseDequantizeMaxAbsOpOneScaleFloat16( class TestFakeChannelWiseDequantizeMaxAbsOpOneScale1Float16( TestFakeChannelWiseDequantizeMaxAbsOpOneScale1): + def set_dtype(self): self.dtype = np.float16 @@ -171,6 +178,7 @@ class TestFakeChannelWiseDequantizeMaxAbsOpOneScale1Float16( class TestFakeDequantizeMaxAbsOp(OpTest): + def set_args(self): self.num_bits = 8 self.max_range = math.pow(2, self.num_bits - 1) - 1 @@ -195,17 +203,20 @@ class TestFakeDequantizeMaxAbsOp(OpTest): class TestFakeDequantizeMaxAbsOpDouble(TestFakeDequantizeMaxAbsOp): + def set_dtype(self): self.dtype = np.float64 class TestFakeDequantizeMaxAbsOp5Bits(TestFakeDequantizeMaxAbsOp): + def set_args(self): self.num_bits = 5 self.max_range = math.pow(2, self.num_bits - 1) - 1 class TestFakeDequantizeMaxAbsOpFloat16(TestFakeDequantizeMaxAbsOp): + def set_dtype(self): self.dtype = np.float16 @@ -214,6 +225,7 @@ class TestFakeDequantizeMaxAbsOpFloat16(TestFakeDequantizeMaxAbsOp): class TestChannelWiseDequantizeOp(OpTest): + def set_args(self): self.bit_length = 8 self.data_type = "float32" @@ -242,6 +254,7 @@ class TestChannelWiseDequantizeOp(OpTest): class TestChannelWiseDequantizeOp1(TestChannelWiseDequantizeOp): + def set_args(self): self.bit_length = 8 self.data_type = "float32" @@ -249,6 +262,7 @@ class TestChannelWiseDequantizeOp1(TestChannelWiseDequantizeOp): class TestDequantizeOp(OpTest): + def set_args(self): self.bit_length = 8 self.quant_axis = -1 @@ -276,6 +290,7 @@ class TestDequantizeOp(OpTest): class TestDequantizeOpDouble(TestDequantizeOp): + def set_args(self): self.bit_length = 8 self.max_range = math.pow(2, self.bit_length - 1) - 1 @@ -284,6 +299,7 @@ class TestDequantizeOpDouble(TestDequantizeOp): class TestDequantizeOp5Bits(TestDequantizeOp): + def set_args(self): self.bit_length = 5 self.max_range = math.pow(2, self.bit_length - 1) - 1 diff --git a/python/paddle/fluid/tests/unittests/test_fake_init_op.py b/python/paddle/fluid/tests/unittests/test_fake_init_op.py index a62b7aed66b..e094b82c41a 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_init_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_init_op.py @@ -21,13 +21,14 @@ from paddle.fluid.op import Operator class TestFakeInitOpSelectedRows(unittest.TestCase): + def check_with_place(self, place, is_selected_rows): scope = core.Scope() out_var_name = 'Out' if is_selected_rows: - out_tensor = scope.var(out_var_name).get_selected_rows().get_tensor( - ) + out_tensor = scope.var( + out_var_name).get_selected_rows().get_tensor() else: out_tensor = scope.var(out_var_name).get_tensor() diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 0c8e115d7ce..3693ba615d9 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -41,6 +41,7 @@ def get_compute_type(dtype): class TestFakeQuantizeAbsMaxOp(OpTest): + def setUp(self): self.op_type = 'fake_quantize_abs_max' self.attrs = {'bit_length': 8} @@ -72,6 +73,7 @@ class TestFakeQuantizeAbsMaxOp(OpTest): class TestFakeChannelWiseQuantizeAbsMaxOp(OpTest): + def setUp(self): self.op_type = 'fake_channel_wise_quantize_abs_max' self.attrs = {'bit_length': 8} @@ -82,8 +84,8 @@ class TestFakeChannelWiseQuantizeAbsMaxOp(OpTest): input_data = distribution(input_shape).astype(dtype) compute_type = get_compute_type(dtype) bnt = (1 << (self.attrs['bit_length'] - 1)) - 1 - compute_axis = tuple( - i for i in range(len(input_shape)) if i != quant_axis) + compute_axis = tuple(i for i in range(len(input_shape)) + if i != quant_axis) scale_broadcast = np.amax(input_data, axis=compute_axis, keepdims=True) output_data = round_c(bnt * input_data.astype(compute_type) / scale_broadcast) @@ -105,14 +107,15 @@ class TestFakeChannelWiseQuantizeAbsMaxOp(OpTest): for dtype, input_shape_quant_axis in itertools.product( dtype_options, input_shape_quant_axis_options): input_shape, quant_axis = input_shape_quant_axis - with self.subTest( - dtype=dtype, input_shape=input_shape, - quant_axis=quant_axis): + with self.subTest(dtype=dtype, + input_shape=input_shape, + quant_axis=quant_axis): self._fake_channel_wise_quantize_abs_max( dtype, input_shape, quant_axis, np.random.random) class TestFakeQuantizeRangeAbsMaxOp(OpTest): + def setUp(self): self.op_type = 'fake_quantize_range_abs_max' self.attrs = {'bit_length': 5, 'window_size': 1} @@ -162,6 +165,7 @@ class TestFakeQuantizeRangeAbsMaxOp(OpTest): class TestMovingAverageAbsMaxScaleOp(OpTest): + def setUp(self): self.op_type = 'moving_average_abs_max_scale' self.attrs = {'moving_rate': float(0.9), 'is_test': False} @@ -194,6 +198,7 @@ class TestMovingAverageAbsMaxScaleOp(OpTest): class TestFakeQuantizeMovingAverageAbsMaxOp(OpTest): + def setUp(self): self.op_type = 'fake_quantize_moving_average_abs_max' self.attrs = {'bit_length': 5, 'moving_rate': 0.9, 'is_test': False} @@ -252,14 +257,14 @@ class TestFakeQuantizeMovingAverageAbsMaxOp(OpTest): np.random.random) def test_fake_quantize_dequantize_moving_average_abs_max(self): - self._fake_quantize_moving_average_abs_max( - np.float32, (8, 16, 7, 7), - np.random.random, - dequantize=True, - with_gradient=True) + self._fake_quantize_moving_average_abs_max(np.float32, (8, 16, 7, 7), + np.random.random, + dequantize=True, + with_gradient=True) class TestFakeQuantizeDequantizeAbsMaxOp(OpTest): + def setUp(self): self.op_type = 'fake_quantize_dequantize_abs_max' self.attrs = {'bit_length': 8} @@ -286,22 +291,24 @@ class TestFakeQuantizeDequantizeAbsMaxOp(OpTest): class TestChannelWiseFakeQuantizeDequantizeAbsMaxOp(OpTest): + def setUp(self): self.op_type = 'fake_channel_wise_quantize_dequantize_abs_max' self.attrs = {'bit_length': 8} - def _fake_channel_wise_quantize_dequantize_abs_max( - self, dtype, input_shape, quant_axis, distribution): + def _fake_channel_wise_quantize_dequantize_abs_max(self, dtype, input_shape, + quant_axis, + distribution): assert quant_axis in [0, 1], 'quant_axis should be 0 or 1.' input_data = distribution(input_shape).astype(dtype) compute_type = get_compute_type(dtype) bnt = (1 << (self.attrs['bit_length'] - 1)) - 1 output_data = input_data.copy().astype(compute_type) - compute_axis = tuple( - i for i in range(len(input_shape)) if i != quant_axis) + compute_axis = tuple(i for i in range(len(input_shape)) + if i != quant_axis) scale_broadcast = np.amax(input_data, axis=compute_axis, keepdims=True) - output_data = round_c(bnt * output_data / - scale_broadcast) * scale_broadcast / bnt + output_data = round_c( + bnt * output_data / scale_broadcast) * scale_broadcast / bnt if quant_axis == 1: scale_broadcast = np.transpose(scale_broadcast, (1, ) + compute_axis) @@ -315,8 +322,9 @@ class TestChannelWiseFakeQuantizeDequantizeAbsMaxOp(OpTest): self.check_grad(['X'], 'Out', user_defined_grads=gradient) def test_channel_wise_fake_quant_dequant_abs_max(self): - input_shape_quant_axis_options = [[(3, 4, 64, 64), 0], [( - 15, 20, 5, 5), 1], [(30, 15), 0], [(30, 15), 1]] + input_shape_quant_axis_options = [[(3, 4, 64, 64), 0], + [(15, 20, 5, 5), 1], [(30, 15), 0], + [(30, 15), 1]] for input_shape, quant_axis in input_shape_quant_axis_options: with self.subTest(input_shape=input_shape, quant_axis=quant_axis): self._fake_channel_wise_quantize_dequantize_abs_max( @@ -348,6 +356,7 @@ def channel_wise_quantize_max_abs(x, quant_bit=8, quant_axis=0): class TestChannelWiseQuantizeOp(OpTest): + def set_args(self): self.bit_length = 8 self.data_type = "float32" @@ -374,6 +383,7 @@ class TestChannelWiseQuantizeOp(OpTest): class TestChannelWiseQuantizeOp1(TestChannelWiseQuantizeOp): + def set_args(self): self.bit_length = 8 self.data_type = "float32" @@ -381,6 +391,7 @@ class TestChannelWiseQuantizeOp1(TestChannelWiseQuantizeOp): class TestChannelWiseQuantizeOpTrain(OpTest): + def set_args(self): self.bit_length = 8 self.data_type = "float32" @@ -409,6 +420,7 @@ class TestChannelWiseQuantizeOpTrain(OpTest): class TestquantizeOp(OpTest): + def set_args(self): self.bit_length = 8 self.quant_axis = -1 @@ -435,6 +447,7 @@ class TestquantizeOp(OpTest): class TestquantizeOpTrain(TestquantizeOp): + def set_args(self): self.bit_length = 8 self.quant_axis = -1 diff --git a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py index a460c5f2527..d6ccec25a43 100755 --- a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py +++ b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py @@ -27,6 +27,7 @@ from paddle.fluid.layer_helper import LayerHelper from paddle import _C_ops import sys + sys.path.append("./tokenizer") from tokenizer.bert_tokenizer import BertTokenizer @@ -63,6 +64,7 @@ def to_map_tensor(string_dict, name): class FasterTokenizer(nn.Layer): + def __init__(self, vocab_dict): super(FasterTokenizer, self).__init__() vocab_tensor = to_map_tensor(vocab_dict, "vocab") @@ -92,28 +94,33 @@ class FasterTokenizer(nn.Layer): input_ids = helper.create_variable_for_type_inference(dtype="int64") seg_ids = helper.create_variable_for_type_inference(dtype="int64") if text_pair is None: - helper.append_op( - type='faster_tokenizer', - inputs={'Vocab': self.vocab, - 'Text': text}, - outputs={'InputIds': input_ids, - 'SegmentIds': seg_ids}, - attrs=attrs) + helper.append_op(type='faster_tokenizer', + inputs={ + 'Vocab': self.vocab, + 'Text': text + }, + outputs={ + 'InputIds': input_ids, + 'SegmentIds': seg_ids + }, + attrs=attrs) else: - helper.append_op( - type='faster_tokenizer', - inputs={ - 'Vocab': self.vocab, - 'Text': text, - 'TextPair': text_pair - }, - outputs={'InputIds': input_ids, - 'SegmentIds': seg_ids}, - attrs=attrs) + helper.append_op(type='faster_tokenizer', + inputs={ + 'Vocab': self.vocab, + 'Text': text, + 'TextPair': text_pair + }, + outputs={ + 'InputIds': input_ids, + 'SegmentIds': seg_ids + }, + attrs=attrs) return input_ids, seg_ids class Predictor(object): + def __init__(self, model_dir): model_file = os.path.join(model_dir, "inference.pdmodel") params_file = os.path.join(model_dir, "inference.pdiparams") @@ -148,6 +155,7 @@ class Predictor(object): class TestBertTokenizerOp(unittest.TestCase): + def setUp(self): self.bert_tokenizer = BertTokenizer.from_pretrained("bert-base-chinese") self.save_path = os.path.join(DATA_HOME, "fast_tokenizer") @@ -199,12 +207,11 @@ class TestBertTokenizerOp(unittest.TestCase): pad_to_max_seq_len=self.pad_to_max_seq_len, is_split_into_words=self.is_split_into_words) py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1]) - py_token_type_ids = np.array(encoded_inputs[0][ - "token_type_ids"]).reshape([1, -1]) + py_token_type_ids = np.array( + encoded_inputs[0]["token_type_ids"]).reshape([1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) # case 2: only one text and one text_pair (batch_size = 1) input_ids, token_type_ids = self.faster_tokenizer( @@ -224,12 +231,11 @@ class TestBertTokenizerOp(unittest.TestCase): pad_to_max_seq_len=self.pad_to_max_seq_len, is_split_into_words=self.is_split_into_words) py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1]) - py_token_type_ids = np.array(encoded_inputs[0][ - "token_type_ids"]).reshape([1, -1]) + py_token_type_ids = np.array( + encoded_inputs[0]["token_type_ids"]).reshape([1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) # case 3: only texts (batch_size = 3) input_ids, token_type_ids = self.faster_tokenizer( @@ -252,8 +258,7 @@ class TestBertTokenizerOp(unittest.TestCase): py_token_type_ids = np.array(py_token_type_ids).reshape([3, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) # case 4: texts and text pairs (batch_size = 3) input_ids, token_type_ids = self.faster_tokenizer( @@ -278,8 +283,7 @@ class TestBertTokenizerOp(unittest.TestCase): py_token_type_ids = np.array(py_token_type_ids).reshape([3, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) def test_padding(self): with _test_eager_guard(): @@ -308,12 +312,11 @@ class TestBertTokenizerOp(unittest.TestCase): pad_to_max_seq_len=self.pad_to_max_seq_len, is_split_into_words=self.is_split_into_words) py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1]) - py_token_type_ids = np.array(encoded_inputs[0][ - "token_type_ids"]).reshape([1, -1]) + py_token_type_ids = np.array( + encoded_inputs[0]["token_type_ids"]).reshape([1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) # case 2: only one text and one text_pair (batch_size = 1) input_ids, token_type_ids = self.faster_tokenizer( @@ -333,12 +336,11 @@ class TestBertTokenizerOp(unittest.TestCase): pad_to_max_seq_len=self.pad_to_max_seq_len, is_split_into_words=self.is_split_into_words) py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1]) - py_token_type_ids = np.array(encoded_inputs[0][ - "token_type_ids"]).reshape([1, -1]) + py_token_type_ids = np.array( + encoded_inputs[0]["token_type_ids"]).reshape([1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) def test_no_padding(self): with _test_eager_guard(): @@ -362,8 +364,7 @@ class TestBertTokenizerOp(unittest.TestCase): [1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) def test_is_split_into_words(self): with _test_eager_guard(): @@ -391,18 +392,18 @@ class TestBertTokenizerOp(unittest.TestCase): encoded_inputs = self.bert_tokenizer(self.text) py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1]) - py_token_type_ids = np.array(encoded_inputs[0][ - "token_type_ids"]).reshape([1, -1]) + py_token_type_ids = np.array( + encoded_inputs[0]["token_type_ids"]).reshape([1, -1]) self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) self.assertTrue( - np.allclose( - token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + np.allclose(token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) def test_feed_string_var(self): self.init_data() paddle.enable_static() - x = paddle.static.data( - name="x", shape=[-1], dtype=core.VarDesc.VarType.STRINGS) + x = paddle.static.data(name="x", + shape=[-1], + dtype=core.VarDesc.VarType.STRINGS) exe = paddle.static.Executor(paddle.framework.CPUPlace()) exe.run(paddle.static.default_main_program(), feed={'x': self.text}) paddle.disable_static() diff --git a/python/paddle/fluid/tests/unittests/test_fc_op.py b/python/paddle/fluid/tests/unittests/test_fc_op.py index 22126ce41d0..439296e4d8f 100644 --- a/python/paddle/fluid/tests/unittests/test_fc_op.py +++ b/python/paddle/fluid/tests/unittests/test_fc_op.py @@ -43,6 +43,7 @@ def fc_refer(matrix, with_bias, with_relu=False): class MatrixGenerate: + def __init__(self, mb, ic, oc, h, w, bias_dims=2): self.input = np.random.random((mb, ic, h, w)).astype("float32") self.weights = np.random.random((ic * h * w, oc)).astype("float32") @@ -53,6 +54,7 @@ class MatrixGenerate: class TestFCOp(OpTest): + def config(self): self.with_bias = True self.with_relu = True @@ -86,6 +88,7 @@ class TestFCOp(OpTest): class TestFCOpNoBias1(TestFCOp): + def config(self): self.with_bias = False self.with_relu = False @@ -93,6 +96,7 @@ class TestFCOpNoBias1(TestFCOp): class TestFCOpNoBias2(TestFCOp): + def config(self): self.with_bias = False self.with_relu = False @@ -100,6 +104,7 @@ class TestFCOpNoBias2(TestFCOp): class TestFCOpNoBias4(TestFCOp): + def config(self): self.with_bias = False self.with_relu = False @@ -107,6 +112,7 @@ class TestFCOpNoBias4(TestFCOp): class TestFCOpWithBias1(TestFCOp): + def config(self): self.with_bias = True self.with_relu = False @@ -114,6 +120,7 @@ class TestFCOpWithBias1(TestFCOp): class TestFCOpWithBias2(TestFCOp): + def config(self): self.with_bias = True self.with_relu = True @@ -121,6 +128,7 @@ class TestFCOpWithBias2(TestFCOp): class TestFCOpWithBias3(TestFCOp): + def config(self): self.with_bias = True self.with_relu = True @@ -128,6 +136,7 @@ class TestFCOpWithBias3(TestFCOp): class TestFCOpWithPadding(TestFCOp): + def config(self): self.with_bias = True self.with_relu = True @@ -135,7 +144,9 @@ class TestFCOpWithPadding(TestFCOp): class TestFcOp_NumFlattenDims_NegOne(unittest.TestCase): + def test_api(self): + def run_program(num_flatten_dims): paddle.seed(SEED) np.random.seed(SEED) @@ -144,18 +155,17 @@ class TestFcOp_NumFlattenDims_NegOne(unittest.TestCase): with program_guard(main_program, startup_program): input = np.random.random([2, 2, 25]).astype("float32") - x = fluid.layers.data( - name="x", - shape=[2, 2, 25], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name="x", + shape=[2, 2, 25], + append_batch_size=False, + dtype="float32") out = paddle.static.nn.fc(x=x, size=1, num_flatten_dims=num_flatten_dims) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) exe = fluid.Executor(place=place) exe.run(startup_program) out = exe.run(main_program, feed={"x": input}, fetch_list=[out]) @@ -167,6 +177,7 @@ class TestFcOp_NumFlattenDims_NegOne(unittest.TestCase): class TestFCOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.random((2, 4)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py index 3bbc4cc2904..f3fe43e3152 100644 --- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py +++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py @@ -62,16 +62,15 @@ class TestFeedData(unittest.TestCase): predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax') loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) return in_data, label, loss def test(self): - for use_cuda in [True, - False] if core.is_compiled_with_cuda() else [False]: + for use_cuda in [True, False + ] if core.is_compiled_with_cuda() else [False]: for use_parallel_executor in [False, True]: print('Test Parameters:'), print({ @@ -85,7 +84,7 @@ class TestFeedData(unittest.TestCase): use_parallel_executor) self._test_feed_lod_tensor(use_cuda, use_parallel_executor) - # Test exception message when feeding with error + # Test exception message when feeding with error in_shape_tuple = (-1, 3, 4, 8) error_shape_list = [self.data_batch_size, 3, 4, 5] @@ -114,31 +113,34 @@ class TestFeedData(unittest.TestCase): feed_in_data = np.random.uniform( size=[feed_batch_size, 3, 4, 5]).astype(np.float32) label_size = [self.data_batch_size, 1] - feed_label = np.random.randint( - low=0, high=self.class_num, - size=[feed_batch_size, 1]).astype(np.float64) + feed_label = np.random.randint(low=0, + high=self.class_num, + size=[feed_batch_size, + 1]).astype(np.float64) self._feed_data_in_executor(in_size, label_size, feed_in_data, feed_label, use_cuda, use_parallel_executor) def _test_feed_data_shape_mismatch(self, use_cuda, use_parallel_executor): batch_size = self._get_feed_batch_size(use_cuda, use_parallel_executor) in_size = [None, 3, 4, 8] - feed_in_data = np.random.uniform( - size=[batch_size, 3, 4, 5]).astype(np.float32) + feed_in_data = np.random.uniform(size=[batch_size, 3, 4, 5]).astype( + np.float32) label_size = [-1, 1] - feed_label = np.random.randint( - low=0, high=self.class_num, size=[batch_size, 1]).astype(np.int64) + feed_label = np.random.randint(low=0, + high=self.class_num, + size=[batch_size, 1]).astype(np.int64) self._feed_data_in_executor(in_size, label_size, feed_in_data, feed_label, use_cuda, use_parallel_executor) def _test_feed_data_contains_neg_one(self, use_cuda, use_parallel_executor): batch_size = self._get_feed_batch_size(use_cuda, use_parallel_executor) in_size = [-1, 3, 4, 5] - feed_in_data = np.random.uniform( - size=[batch_size, 3, 4, 5]).astype(np.float32) + feed_in_data = np.random.uniform(size=[batch_size, 3, 4, 5]).astype( + np.float32) label_size = (None, 1) - feed_label = np.random.randint( - low=0, high=self.class_num, size=[batch_size, 1]).astype(np.int64) + feed_label = np.random.randint(low=0, + high=self.class_num, + size=[batch_size, 1]).astype(np.int64) self._feed_data_in_executor(in_size, label_size, feed_in_data, feed_label, use_cuda, use_parallel_executor) @@ -149,9 +151,10 @@ class TestFeedData(unittest.TestCase): feed_in_data = np.random.uniform( size=[feed_batch_size, 3, 4, 5]).astype(np.float32) label_size = [self.data_batch_size, 1] - feed_label = np.random.randint( - low=0, high=self.class_num, - size=[feed_batch_size, 1]).astype(np.int64) + feed_label = np.random.randint(low=0, + high=self.class_num, + size=[feed_batch_size, + 1]).astype(np.int64) self._feed_data_in_executor(in_size, label_size, feed_in_data, feed_label, use_cuda, use_parallel_executor) @@ -163,16 +166,17 @@ class TestFeedData(unittest.TestCase): # sum from 1 to device_count sum_length = int((device_count + 1) * device_count / 2) - feed_in_data = np.random.uniform( - size=[sum_length, 3, 4, 5]).astype(np.float32) + feed_in_data = np.random.uniform(size=[sum_length, 3, 4, 5]).astype( + np.float32) feed_data_tensor = fluid.LoDTensor() feed_data_tensor.set(feed_in_data, fluid.CPUPlace()) feed_data_tensor.set_recursive_sequence_lengths(sequence_lengths) label_size = [device_count, 1] feed_label_tensor = fluid.LoDTensor() - feed_label = np.random.randint( - low=0, high=self.class_num, size=[sum_length, 1]).astype(np.int64) + feed_label = np.random.randint(low=0, + high=self.class_num, + size=[sum_length, 1]).astype(np.int64) feed_label_tensor.set(feed_label, fluid.CPUPlace()) feed_label_tensor.set_recursive_sequence_lengths(sequence_lengths) @@ -187,8 +191,9 @@ class TestFeedData(unittest.TestCase): main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - in_data, label, loss = self._simple_fc_net( - in_size, label_size, self.class_num, self.hidden_sizes) + in_data, label, loss = self._simple_fc_net(in_size, label_size, + self.class_num, + self.hidden_sizes) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -201,11 +206,12 @@ class TestFeedData(unittest.TestCase): main_program).with_data_parallel(loss_name=loss.name) for i in range(self.iterations): - fetches = exe.run( - train_program, - feed={in_data.name: feed_in_data, - label.name: feed_label}, - fetch_list=[loss.name]) + fetches = exe.run(train_program, + feed={ + in_data.name: feed_in_data, + label.name: feed_label + }, + fetch_list=[loss.name]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py index d1842001379..282f9bbb6f8 100644 --- a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py +++ b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py @@ -20,6 +20,7 @@ import numpy as np class TestFeedFetch(unittest.TestCase): + def test_feed_fetch(self): scope = core.Scope() place = core.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/test_fetch_handler.py b/python/paddle/fluid/tests/unittests/test_fetch_handler.py index de9e456f68c..f5e1b3c687a 100644 --- a/python/paddle/fluid/tests/unittests/test_fetch_handler.py +++ b/python/paddle/fluid/tests/unittests/test_fetch_handler.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid class TestFetchHandler(unittest.TestCase): + @unittest.skip(reason="Skip unstable ci") def test_fetch_handler(self): place = core.CPUPlace() @@ -37,6 +38,7 @@ class TestFetchHandler(unittest.TestCase): var_emb3 = block.create_var(name='emb3', type=core.VarDesc.VarType.FP32) class FH(fluid.executor.FetchHandler): + def handler(self, fetch_dict): assert len(fetch_dict) == 1 @@ -49,13 +51,14 @@ class TestFetchHandler(unittest.TestCase): time.sleep(3) fm.stop() - default_fh = fluid.executor.FetchHandler( - var_dict={'emb': var_emb, - 'emb2': None, - 'emb3': var_emb3}, - period_secs=1) - default_fm = fluid.trainer_factory.FetchHandlerMonitor(scope, - default_fh) + default_fh = fluid.executor.FetchHandler(var_dict={ + 'emb': var_emb, + 'emb2': None, + 'emb3': var_emb3 + }, + period_secs=1) + default_fm = fluid.trainer_factory.FetchHandlerMonitor( + scope, default_fh) default_fm.start() time.sleep(5) default_fm.stop() diff --git a/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py b/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py index 50ad2a4087a..ee168cc36c1 100644 --- a/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py +++ b/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py @@ -23,6 +23,7 @@ from simple_nets import simple_fc_net_with_inputs, simple_fc_net class TestFetchLoDTensorArray(unittest.TestCase): + def build_program(self, main_program, startup_program): with fluid.unique_name.guard(): with fluid.program_guard(main_program, startup_program): diff --git a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py index 37d269e3369..2e48157f950 100644 --- a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py +++ b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py @@ -24,24 +24,23 @@ os.environ["CPU_NUM"] = "2" class TestFetchUnmerged(unittest.TestCase): + def conv_net(self, img, label): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=8, - pool_size=2, - pool_stride=2, - pool_type='max', - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=8, + pool_size=2, + pool_stride=2, + pool_type='max', + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=16, - pool_size=2, - pool_stride=2, - pool_type='avg', - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=16, + pool_size=2, + pool_stride=2, + pool_type='avg', + act="relu") hidden = fluid.layers.fc(input=conv_pool_2, size=32, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) @@ -51,10 +50,12 @@ class TestFetchUnmerged(unittest.TestCase): def build_program(self, main, startup, is_test): with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') + img = fluid.layers.data(name='image', + shape=[1, 28, 28], + dtype='float32') + label = fluid.layers.data(name='label', + shape=[1], + dtype='int64') loss, prediction = self.conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) @@ -77,10 +78,9 @@ class TestFetchUnmerged(unittest.TestCase): iters = 2 batch_size = 16 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) device_num = fluid.core.get_cuda_device_count() if use_cuda else 2 diff --git a/python/paddle/fluid/tests/unittests/test_fetch_var.py b/python/paddle/fluid/tests/unittests/test_fetch_var.py index d78b27566eb..2a0d29be47d 100644 --- a/python/paddle/fluid/tests/unittests/test_fetch_var.py +++ b/python/paddle/fluid/tests/unittests/test_fetch_var.py @@ -22,6 +22,7 @@ import unittest class TestFetchVar(unittest.TestCase): + def set_input(self): self.val = numpy.array([1, 3, 5]).astype(numpy.int32) @@ -32,13 +33,13 @@ class TestFetchVar(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[]) fetched_x = fluid.executor._fetch_var("x") - self.assertTrue( - numpy.array_equal(fetched_x, self.val), - "fetch_x=%s val=%s" % (fetched_x, self.val)) + self.assertTrue(numpy.array_equal(fetched_x, self.val), + "fetch_x=%s val=%s" % (fetched_x, self.val)) self.assertEqual(fetched_x.dtype, self.val.dtype) class TestFetchNullVar(TestFetchVar): + def set_input(self): self.val = numpy.array([]).astype(numpy.int32) diff --git a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py index 95537d43327..1e7d0971349 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py @@ -25,6 +25,7 @@ from op_test import OpTest, convert_float_to_uint16 class TestFillAnyLikeOp(OpTest): + def setUp(self): self.op_type = "fill_any_like" self.dtype = np.int32 @@ -42,6 +43,7 @@ class TestFillAnyLikeOp(OpTest): class TestFillAnyLikeOpFloat32(TestFillAnyLikeOp): + def init(self): self.dtype = np.float32 self.value = 0.0 @@ -50,6 +52,7 @@ class TestFillAnyLikeOpFloat32(TestFillAnyLikeOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFillAnyLikeOpBfloat16(OpTest): + def setUp(self): self.op_type = "fill_any_like" self.dtype = np.uint16 @@ -67,21 +70,25 @@ class TestFillAnyLikeOpBfloat16(OpTest): class TestFillAnyLikeOpValue1(TestFillAnyLikeOp): + def init(self): self.value = 1.0 class TestFillAnyLikeOpValue2(TestFillAnyLikeOp): + def init(self): self.value = 1e-10 class TestFillAnyLikeOpValue3(TestFillAnyLikeOp): + def init(self): self.value = 1e-100 class TestFillAnyLikeOpType(TestFillAnyLikeOp): + def setUp(self): self.op_type = "fill_any_like" self.dtype = np.int32 @@ -99,6 +106,7 @@ class TestFillAnyLikeOpType(TestFillAnyLikeOp): class TestFillAnyLikeOpFloat16(TestFillAnyLikeOp): + def init(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/test_fill_any_op.py b/python/paddle/fluid/tests/unittests/test_fill_any_op.py index 20660847536..1262c28edda 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_any_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_any_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestFillAnyOp(OpTest): + def setUp(self): self.op_type = "fill_any" self.dtype = 'float64' @@ -48,23 +49,27 @@ class TestFillAnyOp(OpTest): class TestFillAnyOpFloat32(TestFillAnyOp): + def init(self): self.dtype = np.float32 self.value = 0.0 class TestFillAnyOpFloat16(TestFillAnyOp): + def init(self): self.dtype = np.float16 class TestFillAnyOpvalue1(TestFillAnyOp): + def init(self): self.dtype = np.float32 self.value = 111111555 class TestFillAnyOpvalue2(TestFillAnyOp): + def init(self): self.dtype = np.float32 self.value = 11111.1111 diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py index 15071b2b6aa..bd87181ebcc 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py @@ -28,6 +28,7 @@ from paddle.fluid import compiler, Program, program_guard # Situation 1: Attr(shape) is a list(without tensor) class TestFillConstantOp1(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -42,6 +43,7 @@ class TestFillConstantOp1(OpTest): class TestFillConstantOp2(OpTest): + def setUp(self): '''Test fill_constant op with default value ''' @@ -56,6 +58,7 @@ class TestFillConstantOp2(OpTest): class TestFillConstantOp3(OpTest): + def setUp(self): '''Test fill_constant op with specified int64 value ''' @@ -70,6 +73,7 @@ class TestFillConstantOp3(OpTest): class TestFillConstantOp4(OpTest): + def setUp(self): '''Test fill_constant op with specified int value ''' @@ -86,6 +90,7 @@ class TestFillConstantOp4(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFillConstantBF16Op(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -105,14 +110,17 @@ class TestFillConstantBF16Op(OpTest): class TestFillConstantOpWithSelectedRows(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() # create Out Variable out = scope.var('Out').get_selected_rows() # create and run fill_constant_op operator - fill_constant_op = Operator( - "fill_constant", shape=[123, 92], value=3.8, Out='Out') + fill_constant_op = Operator("fill_constant", + shape=[123, 92], + value=3.8, + Out='Out') fill_constant_op.run(scope, place) # get result from Out @@ -132,6 +140,7 @@ class TestFillConstantOpWithSelectedRows(unittest.TestCase): # Situation 2: Attr(shape) is a list(with tensor) class TestFillConstantOp1_ShapeTensorList(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -156,6 +165,7 @@ class TestFillConstantOp1_ShapeTensorList(OpTest): class TestFillConstantOp2_ShapeTensorList(OpTest): + def setUp(self): '''Test fill_constant op with default value ''' @@ -179,6 +189,7 @@ class TestFillConstantOp2_ShapeTensorList(OpTest): class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): self.shape = [123, 92] self.infer_shape = [123, -1] @@ -186,6 +197,7 @@ class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): self.shape = [123, 92] self.infer_shape = [123, -1] @@ -194,6 +206,7 @@ class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): # Situation 3: shape is a tensor class TestFillConstantOp1_ShapeTensor(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -214,6 +227,7 @@ class TestFillConstantOp1_ShapeTensor(OpTest): # Situation 4: value is a tensor class TestFillConstantOp1_ValueTensor(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -238,6 +252,7 @@ class TestFillConstantOp1_ValueTensor(OpTest): # Situation 5: value is a tensor class TestFillConstantOp2_ValueTensor(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -262,43 +277,56 @@ class TestFillConstantOp2_ValueTensor(OpTest): # Test python API class TestFillConstantAPI(unittest.TestCase): + def test_api(self): positive_2_int32 = fluid.layers.fill_constant([1], "int32", 2) positive_2_int64 = fluid.layers.fill_constant([1], "int64", 2) - shape_tensor_int32 = fluid.data( - name="shape_tensor_int32", shape=[2], dtype="int32") - shape_tensor_int64 = fluid.data( - name="shape_tensor_int64", shape=[2], dtype="int64") - - out_1 = fluid.layers.fill_constant( - shape=[1, 2], dtype="float32", value=1.1) - - out_2 = fluid.layers.fill_constant( - shape=[1, positive_2_int32], dtype="float32", value=1.1) - - out_3 = fluid.layers.fill_constant( - shape=[1, positive_2_int64], dtype="float32", value=1.1) - - out_4 = fluid.layers.fill_constant( - shape=shape_tensor_int32, dtype="float32", value=1.1) - - out_5 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype="float32", value=1.1) - - out_6 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype=np.float32, value=1.1) - - val1 = fluid.layers.fill_constant( - shape=[1], dtype=np.float32, value=1.1) - val2 = fluid.layers.fill_constant( - shape=[1], dtype=np.float64, value=1.1) - out_7 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype=np.float32, value=val1) - - out_8 = fluid.layers.fill_constant( - shape=shape_tensor_int64, dtype=np.float32, value=val2) + shape_tensor_int32 = fluid.data(name="shape_tensor_int32", + shape=[2], + dtype="int32") + shape_tensor_int64 = fluid.data(name="shape_tensor_int64", + shape=[2], + dtype="int64") + + out_1 = fluid.layers.fill_constant(shape=[1, 2], + dtype="float32", + value=1.1) + + out_2 = fluid.layers.fill_constant(shape=[1, positive_2_int32], + dtype="float32", + value=1.1) + + out_3 = fluid.layers.fill_constant(shape=[1, positive_2_int64], + dtype="float32", + value=1.1) + + out_4 = fluid.layers.fill_constant(shape=shape_tensor_int32, + dtype="float32", + value=1.1) + + out_5 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype="float32", + value=1.1) + + out_6 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype=np.float32, + value=1.1) + + val1 = fluid.layers.fill_constant(shape=[1], + dtype=np.float32, + value=1.1) + val2 = fluid.layers.fill_constant(shape=[1], + dtype=np.float64, + value=1.1) + out_7 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype=np.float32, + value=val1) + + out_8 = fluid.layers.fill_constant(shape=shape_tensor_int64, + dtype=np.float32, + value=val2) exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8 = exe.run( @@ -307,9 +335,7 @@ class TestFillConstantAPI(unittest.TestCase): "shape_tensor_int32": np.array([1, 2]).astype("int32"), "shape_tensor_int64": np.array([1, 2]).astype("int64"), }, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32")) @@ -322,6 +348,7 @@ class TestFillConstantAPI(unittest.TestCase): class TestFillConstantImperative(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): data1 = np.array([1, 2]).astype('int32') @@ -330,26 +357,26 @@ class TestFillConstantImperative(unittest.TestCase): shape = fluid.dygraph.to_variable(data1) val = fluid.dygraph.to_variable(data2) value = fluid.dygraph.to_variable(data3) - res1 = fluid.layers.fill_constant( - shape=[1, 2], dtype='float32', value=1.1) - res2 = fluid.layers.fill_constant( - shape=shape, dtype='float32', value=1.1) - res3 = fluid.layers.fill_constant( - shape=shape, dtype='float32', value=val) - res4 = fluid.layers.fill_constant( - shape=shape, dtype='int32', value=value) - assert np.array_equal( - res1.numpy(), np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - res2.numpy(), np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - res3.numpy(), np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - res4.numpy(), np.full( - [1, 2], 88, dtype="int32")) + res1 = fluid.layers.fill_constant(shape=[1, 2], + dtype='float32', + value=1.1) + res2 = fluid.layers.fill_constant(shape=shape, + dtype='float32', + value=1.1) + res3 = fluid.layers.fill_constant(shape=shape, + dtype='float32', + value=val) + res4 = fluid.layers.fill_constant(shape=shape, + dtype='int32', + value=value) + assert np.array_equal(res1.numpy(), + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res2.numpy(), + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res3.numpy(), + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res4.numpy(), + np.full([1, 2], 88, dtype="int32")) def test_nan(self): with fluid.dygraph.guard(): @@ -369,45 +396,42 @@ class TestFillConstantImperative(unittest.TestCase): class TestFillConstantOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): #for ci coverage x1 = fluid.layers.data(name='x1', shape=[1], dtype="int16") - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[1], - value=5, - dtype='uint4') - - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[1.1], - value=5, - dtype='float32', - out=x1) + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[1], + value=5, + dtype='uint4') + + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[1.1], + value=5, + dtype='float32', + out=x1) # The argument dtype of fill_constant_op must be one of bool, float16, #float32, float64, uint8, int16, int32 or int64 x2 = fluid.layers.data(name='x2', shape=[1], dtype="int32") - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[1], - value=5, - dtype='float64', - out=x2) + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[1], + value=5, + dtype='float64', + out=x2) x3 = np.random.randn(100, 100).astype('int32') - self.assertRaises( - TypeError, - fluid.layers.fill_constant, - shape=[100, 100], - value=5, - dtype='float64', - out=x3) + self.assertRaises(TypeError, + fluid.layers.fill_constant, + shape=[100, 100], + value=5, + dtype='float64', + out=x3) # The argument shape's type of fill_constant_op must be list, tuple or Variable. def test_shape_type(): @@ -423,23 +447,28 @@ class TestFillConstantOpError(unittest.TestCase): # The shape dtype of fill_constant_op must be int32 or int64. def test_shape_tensor_dtype(): - shape = fluid.data( - name="shape_tensor", shape=[2], dtype="float32") - fluid.layers.fill_constant( - shape=shape, dtype="float32", value=1) + shape = fluid.data(name="shape_tensor", + shape=[2], + dtype="float32") + fluid.layers.fill_constant(shape=shape, + dtype="float32", + value=1) self.assertRaises(TypeError, test_shape_tensor_dtype) def test_shape_tensor_list_dtype(): - shape = fluid.data( - name="shape_tensor_list", shape=[1], dtype="bool") - fluid.layers.fill_constant( - shape=[shape, 2], dtype="float32", value=1) + shape = fluid.data(name="shape_tensor_list", + shape=[1], + dtype="bool") + fluid.layers.fill_constant(shape=[shape, 2], + dtype="float32", + value=1) self.assertRaises(TypeError, test_shape_tensor_list_dtype) class TestFillConstantOp_ValueTensorBf16(OpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -447,7 +476,8 @@ class TestFillConstantOp_ValueTensorBf16(OpTest): self.init_data() self.inputs = { - "ShapeTensor": np.array(self.shape).astype("int32"), + "ShapeTensor": + np.array(self.shape).astype("int32"), 'ValueTensor': convert_float_to_uint16(np.array([self.value]).astype("float32")) } diff --git a/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py b/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py index 8ac7a9586cb..c1a187d7bba 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py @@ -83,6 +83,7 @@ def fill_gt(x, y, offset, dim1, dim2): class TensorFillDiagTensor_Test(OpTest): + def setUp(self): self.op_type = "fill_diagonal_tensor" self.init_kernel_type() @@ -108,6 +109,7 @@ class TensorFillDiagTensor_Test(OpTest): class TensorFillDiagTensor_Test2(TensorFillDiagTensor_Test): + def setUp(self): self.op_type = "fill_diagonal_tensor" self.init_kernel_type() @@ -127,6 +129,7 @@ class TensorFillDiagTensor_Test2(TensorFillDiagTensor_Test): class TensorFillDiagTensor_Test3(TensorFillDiagTensor_Test): + def setUp(self): self.op_type = "fill_diagonal_tensor" self.init_kernel_type() diff --git a/python/paddle/fluid/tests/unittests/test_fill_op.py b/python/paddle/fluid/tests/unittests/test_fill_op.py index 7c8587dc400..fdf4ec85627 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_op.py @@ -22,6 +22,7 @@ from paddle.fluid.op import Operator class TestFillOp1(OpTest): + def setUp(self): self.op_type = "fill" val = np.random.random(size=[100, 200]) @@ -39,6 +40,7 @@ class TestFillOp1(OpTest): class TestFillOp2(OpTest): + def setUp(self): self.op_type = "fill" val = np.random.random(size=[100, 200]) @@ -56,6 +58,7 @@ class TestFillOp2(OpTest): class TestFillOp3(unittest.TestCase): + def check_with_place(self, place, f_cpu): scope = core.Scope() # create Out Variable @@ -63,13 +66,12 @@ class TestFillOp3(unittest.TestCase): # create and run fill_op operator val = np.random.random(size=[300, 200]) - fill_op = Operator( - "fill", - value=val.flatten(), - shape=[300, 200], - dtype=int(core.VarDesc.VarType.FP32), - force_cpu=f_cpu, - Out='Out') + fill_op = Operator("fill", + value=val.flatten(), + shape=[300, 200], + dtype=int(core.VarDesc.VarType.FP32), + force_cpu=f_cpu, + Out='Out') fill_op.run(scope, place) # get result from Out diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py index 46590bf187a..1371f202cb6 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestFillZerosLike2Op(OpTest): + def setUp(self): self.op_type = "fill_zeros_like2" self.dtype = np.float32 @@ -38,17 +39,21 @@ class TestFillZerosLike2Op(OpTest): class TestFillZerosLike2OpFp16(TestFillZerosLike2Op): + def init_dtype(self): self.dtype = np.float16 class TestFillZerosLike2OpFp64(TestFillZerosLike2Op): + def init_dtype(self): self.dtype = np.float64 class TestZerosError(unittest.TestCase): + def test_errors(self): + def test_zeros_like_type_error(): with fluid.program_guard(fluid.Program(), fluid.Program()): fluid.layers.zeros_like([10], dtype="float") diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py index 20f1a110c35..1b23078e7d4 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestFillZerosLikeOp(OpTest): + def setUp(self): self.op_type = "fill_zeros_like" self.dtype = np.float32 @@ -35,6 +36,7 @@ class TestFillZerosLikeOp(OpTest): class TestFillZerosLikeOpFp16(TestFillZerosLikeOp): + def init_dtype(self): self.dtype = np.float16 diff --git a/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py b/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py index ecd2e2cd6c3..32aa5c15997 100644 --- a/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py +++ b/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py @@ -28,6 +28,7 @@ from paddle.fluid.op import Operator class TestFilterByInstagOp(OpTest): + def setUp(self): self.op_type = 'filter_by_instag' x1 = np.zeros((36, 4), dtype=np.float64) @@ -55,8 +56,8 @@ class TestFilterByInstagOp(OpTest): out[ln, k] = cur ln += 1 - mmap = np.array( - [[0, 1, 2], [2, 6, 4], [6, 15, 6], [12, 28, 8]]).astype('int64') + mmap = np.array([[0, 1, 2], [2, 6, 4], [6, 15, 6], [12, 28, + 8]]).astype('int64') mmap_lod = [[1, 1, 1, 1]] loss_weight = np.array([[1], [1], [1], [1]]).astype('double') @@ -78,14 +79,16 @@ class TestFilterByInstagOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Ins'], 'Out', no_grad_set=set(['Ins_tag', 'Filter_tag'])) + self.check_grad(['Ins'], + 'Out', + no_grad_set=set(['Ins_tag', 'Filter_tag'])) """This is Test Case 2""" class TestFilterByInstagOp2(OpTest): + def setUp(self): self.op_type = 'filter_by_instag' @@ -123,14 +126,16 @@ class TestFilterByInstagOp2(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Ins'], 'Out', no_grad_set=set(['Ins_tag', 'Filter_tag'])) + self.check_grad(['Ins'], + 'Out', + no_grad_set=set(['Ins_tag', 'Filter_tag'])) """This is Test Case 3""" class TestFilterByInstagOp3(OpTest): + def setUp(self): self.op_type = 'filter_by_instag' @@ -165,14 +170,16 @@ class TestFilterByInstagOp3(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Ins'], 'Out', no_grad_set=set(['Ins_tag', 'Filter_tag'])) + self.check_grad(['Ins'], + 'Out', + no_grad_set=set(['Ins_tag', 'Filter_tag'])) """This is Test Case 4""" class TestFilterByInstagOp4(OpTest): + def setUp(self): self.op_type = 'filter_by_instag' @@ -206,11 +213,13 @@ class TestFilterByInstagOp4(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Ins'], 'Out', no_grad_set=set(['Ins_tag', 'Filter_tag'])) + self.check_grad(['Ins'], + 'Out', + no_grad_set=set(['Ins_tag', 'Filter_tag'])) class TestFilterByInstagOp6(OpTest): + def setUp(self): self.op_type = 'filter_by_instag' @@ -248,6 +257,7 @@ class TestFilterByInstagOp6(OpTest): class TestFilterByInstagOp7(OpTest): + def setUp(self): self.op_type = 'filter_by_instag' diff --git a/python/paddle/fluid/tests/unittests/test_flatten2_op.py b/python/paddle/fluid/tests/unittests/test_flatten2_op.py index 42b43cc46a6..b0e821c2693 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten2_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten2_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestFlattenOp(OpTest): + def setUp(self): self.op_type = "flatten2" self.init_test_case() @@ -48,6 +49,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.axis = 0 @@ -55,6 +57,7 @@ class TestFlattenOp1(TestFlattenOp): class TestFlattenOpWithDefaultAxis(TestFlattenOp): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -64,6 +67,7 @@ class TestFlattenOpWithDefaultAxis(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 @@ -71,6 +75,7 @@ class TestFlattenOpSixDims(TestFlattenOp): class TestStaticFlattenInferShapePythonAPI(unittest.TestCase): + def execute_api(self, x, axis=1): return fluid.layers.flatten(x, axis=axis) @@ -78,13 +83,15 @@ class TestStaticFlattenInferShapePythonAPI(unittest.TestCase): paddle.enable_static() main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[-1, 3, -1, -1], dtype='float32') + x = paddle.static.data(name="x", + shape=[-1, 3, -1, -1], + dtype='float32') out = self.execute_api(x, axis=2) self.assertTrue((-1, -1) == out.shape) class TestFlatten2OpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input_data = np.random.random((3, 2, 4, 5)).astype("float64") @@ -97,8 +104,9 @@ class TestFlatten2OpError(unittest.TestCase): def test_type(): # dtype must be float32, float64, int8, int32, int64, uint8. - x2 = fluid.layers.data( - name='x2', shape=[3, 2, 4, 5], dtype='float16') + x2 = fluid.layers.data(name='x2', + shape=[3, 2, 4, 5], + dtype='float16') fluid.layers.flatten(x2, axis=1) self.assertRaises(TypeError, test_type) diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index ac352fcdf87..bcb9a99e7d8 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestFlattenOp(OpTest): + def setUp(self): self.python_api = paddle.flatten self.python_out_sig = ["Out"] @@ -56,6 +57,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp_1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 1 @@ -70,6 +72,7 @@ class TestFlattenOp_1(TestFlattenOp): class TestFlattenOp_2(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -84,6 +87,7 @@ class TestFlattenOp_2(TestFlattenOp): class TestFlattenOp_3(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -98,6 +102,7 @@ class TestFlattenOp_3(TestFlattenOp): class TestFlattenOp_4(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = -2 @@ -112,6 +117,7 @@ class TestFlattenOp_4(TestFlattenOp): class TestFlattenOp_5(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 2 @@ -126,6 +132,7 @@ class TestFlattenOp_5(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.start_axis = 3 @@ -140,6 +147,7 @@ class TestFlattenOpSixDims(TestFlattenOp): class TestFlatten2OpError(unittest.TestCase): + def test_errors(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * @@ -147,22 +155,25 @@ class TestFlatten2OpError(unittest.TestCase): x = x.astype('float32') def test_ValueError1(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') out = paddle.flatten(x_var, start_axis=2, stop_axis=1) self.assertRaises(ValueError, test_ValueError1) def test_ValueError2(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=10, stop_axis=1) self.assertRaises(ValueError, test_ValueError2) def test_ValueError3(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=2, stop_axis=10) self.assertRaises(ValueError, test_ValueError3) @@ -172,8 +183,9 @@ class TestFlatten2OpError(unittest.TestCase): x2 = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3]).reshape(image_shape) / 100. x2 = x2.astype('float16') - x2_var = paddle.fluid.data( - name='x2', shape=[3, 2, 4, 5], dtype='float16') + x2_var = paddle.fluid.data(name='x2', + shape=[3, 2, 4, 5], + dtype='float16') paddle.flatten(x2_var) self.assertRaises(TypeError, test_type) @@ -185,6 +197,7 @@ class TestFlatten2OpError(unittest.TestCase): class TestStaticFlattenPythonAPI(unittest.TestCase): + def execute_api(self, x, start_axis=0, stop_axis=-1): return paddle.flatten(x, start_axis, stop_axis) @@ -194,8 +207,9 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[2, 3, 4, 4], dtype='float32') + x = paddle.static.data(name="x", + shape=[2, 3, 4, 4], + dtype='float32') out = self.execute_api(x, start_axis=-2, stop_axis=-1) exe = paddle.static.Executor(place=paddle.CPUPlace()) @@ -204,6 +218,7 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): class TestStaticFlattenInferShapePythonAPI(unittest.TestCase): + def execute_api(self, x, start_axis=0, stop_axis=-1): return paddle.flatten(x, start_axis, stop_axis) @@ -211,18 +226,21 @@ class TestStaticFlattenInferShapePythonAPI(unittest.TestCase): paddle.enable_static() main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[-1, 3, -1, -1], dtype='float32') + x = paddle.static.data(name="x", + shape=[-1, 3, -1, -1], + dtype='float32') out = self.execute_api(x, start_axis=2, stop_axis=3) self.assertTrue((-1, 3, -1) == out.shape) class TestStaticInplaceFlattenPythonAPI(TestStaticFlattenPythonAPI): + def execute_api(self, x, start_axis=0, stop_axis=-1): return x.flatten_(start_axis, stop_axis) class TestFlattenPython(unittest.TestCase): + def test_python_api(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * @@ -245,6 +263,7 @@ class TestFlattenPython(unittest.TestCase): class TestDygraphInplaceFlattenPython(unittest.TestCase): + def test_python_api(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * diff --git a/python/paddle/fluid/tests/unittests/test_flatten_op.py b/python/paddle/fluid/tests/unittests/test_flatten_op.py index a5b24debaee..91e2ba89dc0 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_op.py @@ -21,6 +21,7 @@ from op_test import OpTest class TestFlattenOp(OpTest): + def setUp(self): self.op_type = "flatten" self.init_test_case() @@ -44,6 +45,7 @@ class TestFlattenOp(OpTest): class TestFlattenOp1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 2, 10) self.axis = 0 @@ -51,6 +53,7 @@ class TestFlattenOp1(TestFlattenOp): class TestFlattenOpWithDefaultAxis(TestFlattenOp): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -60,6 +63,7 @@ class TestFlattenOpWithDefaultAxis(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 diff --git a/python/paddle/fluid/tests/unittests/test_fleet_amp_init.py b/python/paddle/fluid/tests/unittests/test_fleet_amp_init.py index a9a6b9c0660..a7df64c1d92 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_amp_init.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_amp_init.py @@ -26,8 +26,7 @@ paddle.enable_static() def gen_data(): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } @@ -43,6 +42,7 @@ def mlp(input_x, input_y, hid_dim=128, label_dim=2): class TestFleetAMPInit(unittest.TestCase): + def test_fleet_amp_init(self): if not fluid.core.is_compiled_with_cuda(): return @@ -54,10 +54,12 @@ class TestFleetAMPInit(unittest.TestCase): fleet.init(role) with paddle.static.program_guard(main_program, startup_program): - input_x = paddle.static.data( - name="x", shape=[None, 32], dtype='float32') - input_y = paddle.static.data( - name="y", shape=[None, 1], dtype='int64') + input_x = paddle.static.data(name="x", + shape=[None, 32], + dtype='float32') + input_y = paddle.static.data(name="y", + shape=[None, 1], + dtype='int64') cost = mlp(input_x, input_y) optimizer = paddle.optimizer.Momentum( @@ -95,10 +97,12 @@ class TestFleetAMPInit(unittest.TestCase): fleet.init(role) with paddle.static.program_guard(main_program, startup_program): - input_x = paddle.static.data( - name="x", shape=[None, 32], dtype='float32') - input_y = paddle.static.data( - name="y", shape=[None, 1], dtype='int64') + input_x = paddle.static.data(name="x", + shape=[None, 32], + dtype='float32') + input_y = paddle.static.data(name="y", + shape=[None, 1], + dtype='int64') cost = mlp(input_x, input_y) optimizer = paddle.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py index 982ec4eb5c7..6b05e63482b 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py @@ -25,13 +25,14 @@ paddle.enable_static() class TestFleetAMPOptimizer(TestFleetMetaOptimizer): + def test_amp_optimizer_backward(self): """ test amp optimizer backward """ train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = AMPOptimizer(opt) self.set_strategy(strategy, 'amp') @@ -48,8 +49,8 @@ class TestFleetAMPOptimizer(TestFleetMetaOptimizer): train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = AMPOptimizer(opt) self.set_strategy(strategy, 'amp') @@ -68,8 +69,8 @@ class TestFleetAMPOptimizer(TestFleetMetaOptimizer): train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = AMPOptimizer(opt) self.set_strategy(strategy, 'amp') diff --git a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py index 9ca2b7c567c..139ce121ad5 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py @@ -28,6 +28,7 @@ from dist_simnet_bow import train_network class DistributeTranspilerConfigTest(unittest.TestCase): + def set_runtime_split_send_recv(self, config, value): config.runtime_split_send_recv = value @@ -48,6 +49,7 @@ class DistributeTranspilerConfigTest(unittest.TestCase): class FleetTest(unittest.TestCase): + def testInvalidInputs(self): self.assertRaises(Exception, fleet.split_files, "files") self.assertRaises(Exception, fleet.init, "pserver") @@ -60,43 +62,40 @@ class FleetTest(unittest.TestCase): place = fluid.CPUPlace() exe = fluid.Executor(place) pe = fluid.ParallelExecutor(use_cuda=False, loss_name=loss.name) - self.assertRaises( - Exception, - fleet.save_inference_model, - dirname='/tmp/', - feeded_var_names=['X'], - target_vars=[loss], - executor=pe) - self.assertRaises( - Exception, - fleet.save_inference_model, - dirname='/tmp/', - feeded_var_names=['X'], - target_vars=[loss], - executor="executor") + self.assertRaises(Exception, + fleet.save_inference_model, + dirname='/tmp/', + feeded_var_names=['X'], + target_vars=[loss], + executor=pe) + self.assertRaises(Exception, + fleet.save_inference_model, + dirname='/tmp/', + feeded_var_names=['X'], + target_vars=[loss], + executor="executor") compiled_prog = fluid.compiler.CompiledProgram( fluid.default_main_program()) - self.assertRaises( - Exception, - fleet.save_inference_model, - dirname='/tmp/', - feeded_var_names=['X'], - target_vars=[loss], - executor=exe, - main_program=compiled_prog) - self.assertRaises( - Exception, fleet.save_persistables, executor=pe, dirname='/tmp/') - self.assertRaises( - Exception, - fleet.save_persistables, - executor="executor", - dirname='/tmp/') - self.assertRaises( - Exception, - fleet.save_persistables, - executor=exe, - dirname='/tmp/', - main_program=compiled_prog) + self.assertRaises(Exception, + fleet.save_inference_model, + dirname='/tmp/', + feeded_var_names=['X'], + target_vars=[loss], + executor=exe, + main_program=compiled_prog) + self.assertRaises(Exception, + fleet.save_persistables, + executor=pe, + dirname='/tmp/') + self.assertRaises(Exception, + fleet.save_persistables, + executor="executor", + dirname='/tmp/') + self.assertRaises(Exception, + fleet.save_persistables, + executor=exe, + dirname='/tmp/', + main_program=compiled_prog) self.assertRaises(Exception, fleet._transpile, "config") def set_program(self, avg_cost, strategy): @@ -147,6 +146,7 @@ class FleetTest(unittest.TestCase): class TranspilerOptimizerTest(unittest.TestCase): + def testInvalidInputs(self): self.assertRaises(Exception, TranspilerOptimizer, "Adam", None) self.assertRaises(Exception, TranspilerOptimizer, @@ -157,11 +157,14 @@ class TranspilerOptimizerTest(unittest.TestCase): data = fluid.layers.data(name='X', shape=[1], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) loss = fluid.layers.mean(hidden) - self.assertRaises( - Exception, transpiler.minimize, loss=loss.name, startup_program=[]) + self.assertRaises(Exception, + transpiler.minimize, + loss=loss.name, + startup_program=[]) class UserDefinedRoleMakerTest(unittest.TestCase): + def createRoleMaker(self, current_id=0, role=Role.WORKER, @@ -179,19 +182,19 @@ class UserDefinedRoleMakerTest(unittest.TestCase): self.assertRaises( Exception, self.createRoleMaker, server_endpoints=[]) # server_endpoints can't be empty - self.assertRaises( - Exception, self.createRoleMaker, server_endpoints=[ - 3, [] - ]) # element in server_endpoints must be as string - self.assertRaises( - Exception, - self.createRoleMaker, - server_endpoints=["127.0.0.1:8080", "127.0.0.1:8080"] - ) # element in server_endpoints can't be duplicate + self.assertRaises(Exception, + self.createRoleMaker, + server_endpoints=[ + 3, [] + ]) # element in server_endpoints must be as string + self.assertRaises(Exception, + self.createRoleMaker, + server_endpoints=[ + "127.0.0.1:8080", "127.0.0.1:8080" + ]) # element in server_endpoints can't be duplicate # test all invalid current_id - self.assertRaises( - Exception, self.createRoleMaker, - current_id="0") # current_id must be as int + self.assertRaises(Exception, self.createRoleMaker, + current_id="0") # current_id must be as int self.assertRaises( Exception, self.createRoleMaker, current_id=-1) # current_id must be greater than or equal to 0 @@ -203,12 +206,10 @@ class UserDefinedRoleMakerTest(unittest.TestCase): server_endpoints=["127.0.0.1:8080"] ) # if role is server, current_id must be less than len(server_endpoints) # test all invalid worker_num - self.assertRaises( - Exception, self.createRoleMaker, - worker_num="1") # worker_num must be as int - self.assertRaises( - Exception, self.createRoleMaker, - worker_num=0) # worker_num must be greater than 0 + self.assertRaises(Exception, self.createRoleMaker, + worker_num="1") # worker_num must be as int + self.assertRaises(Exception, self.createRoleMaker, + worker_num=0) # worker_num must be greater than 0 # test all invalid role self.assertRaises( Exception, self.createRoleMaker, @@ -216,7 +217,9 @@ class UserDefinedRoleMakerTest(unittest.TestCase): class UserDefinedCollectiveRoleMakerTest(unittest.TestCase): - def createRoleMaker(self, current_id=0, + + def createRoleMaker(self, + current_id=0, worker_endpoints=["127.0.0.1:8080"]): role = UserDefinedCollectiveRoleMaker(current_id, worker_endpoints) @@ -229,19 +232,19 @@ class UserDefinedCollectiveRoleMakerTest(unittest.TestCase): self.assertRaises( Exception, self.createRoleMaker, worker_endpoints=[]) # worker_endpoints can't be empty - self.assertRaises( - Exception, self.createRoleMaker, - worker_endpoints=[3, - []]) # element worker_endpoints must be as string - self.assertRaises( - Exception, - self.createRoleMaker, - worker_endpoints=["127.0.0.1:8080", "127.0.0.1:8080"] - ) # element in worker_endpoints can't be duplicate + self.assertRaises(Exception, + self.createRoleMaker, + worker_endpoints=[ + 3, [] + ]) # element worker_endpoints must be as string + self.assertRaises(Exception, + self.createRoleMaker, + worker_endpoints=[ + "127.0.0.1:8080", "127.0.0.1:8080" + ]) # element in worker_endpoints can't be duplicate # test all invalid current_id - self.assertRaises( - Exception, self.createRoleMaker, - current_id="0") # current_id must be as int + self.assertRaises(Exception, self.createRoleMaker, + current_id="0") # current_id must be as int self.assertRaises( Exception, self.createRoleMaker, current_id=-1) # current_id must be greater than or equal to 0 @@ -249,11 +252,13 @@ class UserDefinedCollectiveRoleMakerTest(unittest.TestCase): Exception, self.createRoleMaker, current_id=1, - worker_endpoints=["127.0.0.1:8080"] - ) # current_id must be less than len(worker_endpoints) + worker_endpoints=[ + "127.0.0.1:8080" + ]) # current_id must be less than len(worker_endpoints) class CollectiveOptimizerTest(unittest.TestCase): + def test_ds_as_None(self): optimizer = fluid.optimizer.AdamOptimizer() dist_optimizer = CollectiveOptimizer(optimizer, strategy=None) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py b/python/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py index b9d88a8e115..d8140638705 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_ascend_utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,11 +25,15 @@ import paddle.fluid as fluid import paddle.distributed.fleet.ascend_utils as ascend_utils RANK_TABLE_JSON = { - "status": "completed", - "version": "1.0", - "server_count": "1", + "status": + "completed", + "version": + "1.0", + "server_count": + "1", "server_list": [{ - "server_id": "127.0.0.1", + "server_id": + "127.0.0.1", "device": [{ "device_id": "0", "device_ip": "192.1.184.23", @@ -44,6 +48,7 @@ RANK_TABLE_JSON = { class TestAscendUtil(unittest.TestCase): + def test_get_cloud_cluster(self): cluster, pod = ascend_utils.get_cloud_cluster() self.assertTrue(cluster) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_auto.py b/python/paddle/fluid/tests/unittests/test_fleet_auto.py index 3e5b479fab5..460ef27f63c 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_auto.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_auto.py @@ -22,6 +22,7 @@ paddle.enable_static() class TestDistributedStrategyAuto(unittest.TestCase): + def setUp(self): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" @@ -31,15 +32,16 @@ class TestDistributedStrategyAuto(unittest.TestCase): def test_distributed_strategy_auto(self): fleet.init(is_collective=True) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 99986043ec7..46263d1a10e 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -23,6 +23,7 @@ import numpy as np class TestFleetBase(unittest.TestCase): + def setUp(self): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36000" @@ -59,8 +60,8 @@ class TestFleetBase(unittest.TestCase): def test_worker_endpoints(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - self.assertEqual( - "127.0.0.1:36000", fleet.worker_endpoints(to_string=True)) + self.assertEqual("127.0.0.1:36000", + fleet.worker_endpoints(to_string=True)) self.assertEqual(["127.0.0.1:36000"], fleet.worker_endpoints()) def test_server_num(self): @@ -90,9 +91,8 @@ class TestFleetBase(unittest.TestCase): role = role_maker.PaddleCloudRoleMaker() fleet.init(role) if fleet.is_server(): - self.assertEqual( - "127.0.0.1:36001,127.0.0.2:36002", - fleet.server_endpoints(to_string=True)) + self.assertEqual("127.0.0.1:36001,127.0.0.2:36002", + fleet.server_endpoints(to_string=True)) self.assertEqual(["127.0.0.1:36001", "127.0.0.2:36002"], fleet.server_endpoints()) @@ -144,6 +144,7 @@ class TestFleetBase(unittest.TestCase): class TestFleetDygraph(unittest.TestCase): + def setUp(self): os.environ[ "PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213,127.0.0.1:36214" @@ -156,15 +157,15 @@ class TestFleetDygraph(unittest.TestCase): value = np.arange(26).reshape(2, 13).astype("float32") a = fluid.dygraph.to_variable(value) layer = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.Adam( - learning_rate=0.01, parameters=layer.parameters()) + adam = paddle.optimizer.Adam(learning_rate=0.01, + parameters=layer.parameters()) # remove init cause this UT cannot launch distributed task adam = fleet.distributed_optimizer(adam) try: dp_layer = fleet.distributed_model(layer) except Exception as e: - # This is just for testing the interface, - # and will not actually be called. Therefore, + # This is just for testing the interface, + # and will not actually be called. Therefore, # use "try-except" to avoid errors. lr = 0.001 adam.set_lr(lr) @@ -177,20 +178,22 @@ class TestFleetDygraph(unittest.TestCase): class TestFleetBaseSingleError(unittest.TestCase): + def setUp(self): os.environ.pop("PADDLE_TRAINER_ENDPOINTS") def gen_data(self): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } def test_single_run_collective_minimize(self): + def test_single_error(): - input_x = paddle.static.data( - name="x", shape=[-1, 32], dtype='float32') + input_x = paddle.static.data(name="x", + shape=[-1, 32], + dtype='float32') input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py index 9675a77d676..529e9995bd5 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py @@ -14,6 +14,7 @@ import unittest import paddle + paddle.enable_static() import os @@ -21,6 +22,7 @@ import paddle.fluid as fluid class TestFleetBase(unittest.TestCase): + def setUp(self): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_PORT"] = "36000" @@ -35,20 +37,23 @@ class TestFleetBase(unittest.TestCase): os.environ["TRAINING_ROLE"] = "TRAINER" os.environ["PADDLE_TRAINER_ID"] = "1" - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_slot = paddle.fluid.layers.data( - name="slot", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_slot = paddle.fluid.layers.data(name="slot", + shape=[1], + dtype='int64') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - emb = paddle.fluid.layers.embedding( - input=input_slot, size=[10, 9], is_sparse=True) + emb = paddle.fluid.layers.embedding(input=input_slot, + size=[10, 9], + is_sparse=True) input_x = paddle.concat(x=[input_x, emb], axis=1) fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) role = fleet.PaddleCloudRoleMaker(is_collective=False) @@ -71,8 +76,9 @@ class TestFleetBase(unittest.TestCase): fleet.init_worker() fleet.fleet.save(dirname="/tmp", feed=['x', 'y'], fetch=[avg_cost]) - fleet.fleet.save( - dirname="/tmp", feed=[input_x, input_y], fetch=[avg_cost]) + fleet.fleet.save(dirname="/tmp", + feed=[input_x, input_y], + fetch=[avg_cost]) fleet.fleet.save(dirname="/tmp") fleet.load_model(path="/tmp", mode=0) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py index 8dcacafabbb..5e6aabe308e 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py @@ -18,10 +18,12 @@ import paddle import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import paddle.fluid as fluid + paddle.enable_static() class TestFleetBase_1(unittest.TestCase): + def setUp(self): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" @@ -30,15 +32,16 @@ class TestFleetBase_1(unittest.TestCase): "127.0.0.1:36001,127.0.0.2:36001" def test_collective_minimize(self): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) role = role_maker.PaddleCloudRoleMaker(is_collective=True) @@ -50,6 +53,7 @@ class TestFleetBase_1(unittest.TestCase): class TestFleetBase(unittest.TestCase): + def setUp(self): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" @@ -58,15 +62,16 @@ class TestFleetBase(unittest.TestCase): "127.0.0.1:36001,127.0.0.2:36001" def test_fleet_get_applied_optimizer(self): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) fleet.init(is_collective=True) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_4.py b/python/paddle/fluid/tests/unittests/test_fleet_base_4.py index dba409ec920..fa154285f21 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base_4.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_4.py @@ -22,6 +22,7 @@ paddle.enable_static() class TestFleetBase(unittest.TestCase): + def setUp(self): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py index ff54035045b..a782bf3842d 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py @@ -14,6 +14,7 @@ import numpy as np import os + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES') if cuda_visible_devices is None or cuda_visible_devices == "": os.environ['CUDA_VISIBLE_DEVICES'] = '0' @@ -28,6 +29,7 @@ import paddle.nn as nn class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear1 = nn.Linear(10, 10) @@ -38,6 +40,7 @@ class LinearNet(nn.Layer): class TestFleetDygraphSingle(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" @@ -50,8 +53,8 @@ class TestFleetDygraphSingle(unittest.TestCase): layer = LinearNet() loss_fn = nn.MSELoss() - adam = paddle.optimizer.Adam( - learning_rate=0.001, parameters=layer.parameters()) + adam = paddle.optimizer.Adam(learning_rate=0.001, + parameters=layer.parameters()) adam = fleet.distributed_optimizer(adam) dp_layer = fleet.distributed_model(layer) @@ -66,14 +69,14 @@ class TestFleetDygraphSingle(unittest.TestCase): class TestFleetBaseSingleRunCollective(unittest.TestCase): + def setUp(self): pass def gen_data(self): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } def test_single_run_collective_minimize(self): @@ -91,8 +94,8 @@ class TestFleetBaseSingleRunCollective(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer) optimizer.minimize(avg_cost) - place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if paddle.fluid.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(paddle.static.default_startup_program()) @@ -103,14 +106,14 @@ class TestFleetBaseSingleRunCollective(unittest.TestCase): class TestFleetBaseSingleRunPS(unittest.TestCase): + def setUp(self): pass def gen_data(self): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } def test_single_run_ps_minimize(self): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py b/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py index fc57602b445..f48b166f970 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_checkpoint.py @@ -26,6 +26,7 @@ from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver class FleetTest(unittest.TestCase): + def _test_checkpoint(self, fs, dir_path): file_name = "persistables" @@ -38,8 +39,8 @@ class FleetTest(unittest.TestCase): image = fluid.data(name='img', shape=[None, 28, 28], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - feeder = fluid.DataFeeder( - feed_list=[image, label], place=fluid.CPUPlace()) + feeder = fluid.DataFeeder(feed_list=[image, label], + place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=predict, label=label) avg_loss = fluid.layers.mean(loss) @@ -53,21 +54,26 @@ class FleetTest(unittest.TestCase): status = ExeTrainStatus() status.epoch_no = 2 - _, n1 = fleet.save_checkpoint( - exe, dir_path, trainer_id=0, train_status=status, fs=fs) + _, n1 = fleet.save_checkpoint(exe, + dir_path, + trainer_id=0, + train_status=status, + fs=fs) status2 = ExeTrainStatus() - fleet.load_checkpoint( - exe, dir_path, trainer_id=0, fs=fs, train_status=status2) + fleet.load_checkpoint(exe, + dir_path, + trainer_id=0, + fs=fs, + train_status=status2) self.assertEqual(status2, status) - _, n2 = fleet.save_checkpoint( - exe, - dir_path, - trainer_id=0, - train_status=status, - fs=fs, - remain_all_checkpoint=False) + _, n2 = fleet.save_checkpoint(exe, + dir_path, + trainer_id=0, + train_status=status, + fs=fs, + remain_all_checkpoint=False) self.assertEqual(n2, n1 + 1) c = CheckpointSaver(fs) @@ -75,40 +81,37 @@ class FleetTest(unittest.TestCase): assert len(cp_nos) == 1 # cleanup all others # unnormal - # test remain_all_checkpoint - fleet.save_checkpoint( - exe, - dir_path, - trainer_id=0, - train_status=status, - fs=fs, - remain_all_checkpoint=False) + # test remain_all_checkpoint + fleet.save_checkpoint(exe, + dir_path, + trainer_id=0, + train_status=status, + fs=fs, + remain_all_checkpoint=False) # can't save under a file fs = LocalFS() cache_path = "./.load_cache" fs.touch(cache_path) try: - fleet.save_checkpoint( - exe, - dir_path, - trainer_id=0, - train_status=status, - fs=fs, - cache_path=cache_path) + fleet.save_checkpoint(exe, + dir_path, + trainer_id=0, + train_status=status, + fs=fs, + cache_path=cache_path) self.assertFalse(True) except: pass # can't load under a file try: - fleet.load_checkpoint( - exe, - dir_path, - trainer_id=0, - train_status=status2, - fs=fs, - cache_path=cache_path) + fleet.load_checkpoint(exe, + dir_path, + trainer_id=0, + train_status=status2, + fs=fs, + cache_path=cache_path) self.assertFalse(True) except: pass diff --git a/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py index 3a64c1818cc..522b563bc56 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py @@ -25,14 +25,15 @@ paddle.enable_static() class TestFleetDGCOptimizer(TestFleetMetaOptimizer): + def test_dgc_optimizer_backward(self): """ test dgc optimizer backward """ train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'dgc') - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) dgc_opt = DGCOptimizer(opt) role = role_maker.PaddleCloudRoleMaker(is_collective=True) dgc_opt._set_basic_info(avg_cost, role, opt, strategy) @@ -47,8 +48,8 @@ class TestFleetDGCOptimizer(TestFleetMetaOptimizer): avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'dgc') - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) dgc_opt = DGCOptimizer(opt) role = role_maker.PaddleCloudRoleMaker(is_collective=True) dgc_opt._set_basic_info(avg_cost, role, opt, strategy) @@ -66,8 +67,8 @@ class TestFleetDGCOptimizer(TestFleetMetaOptimizer): avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'dgc') - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) dgc_opt = DGCOptimizer(opt) role = role_maker.PaddleCloudRoleMaker(is_collective=True) dgc_opt._set_basic_info(avg_cost, role, opt, strategy) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py index ffc3f2b21a4..455a7a30cfd 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py @@ -18,6 +18,7 @@ import os class TestStrategyConfig(unittest.TestCase): + def test_amp(self): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.amp = True @@ -266,10 +267,12 @@ class TestStrategyConfig(unittest.TestCase): [1, 2] } strategy.sparse_table_configs = configs - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.embed_sgd_param.adagrad.learning_rate, 0.05) - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.table_accessor_save_param[0].param, 1) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.embed_sgd_param.adagrad. + learning_rate, 0.05) + self.assertEqual( + strategy.sparse_table_configs[0].accessor. + table_accessor_save_param[0].param, 1) strategy.adam_d2sum = True self.assertEqual(strategy.adam_d2sum, True) @@ -286,22 +289,25 @@ class TestStrategyConfig(unittest.TestCase): configs = {} configs['emb'] = {"sparse_optimizer": "adagrad"} strategy.fleet_desc_configs = configs - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.embed_sgd_param.adagrad.learning_rate, 0.05) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.embed_sgd_param.adagrad. + learning_rate, 0.05) strategy = paddle.distributed.fleet.DistributedStrategy() configs = {} configs['emb'] = {"sparse_optimizer": "naive"} strategy.fleet_desc_configs = configs - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.embed_sgd_param.naive.learning_rate, 0.05) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.embed_sgd_param.naive. + learning_rate, 0.05) strategy = paddle.distributed.fleet.DistributedStrategy() configs = {} configs['emb'] = {"sparse_optimizer": "adam"} strategy.fleet_desc_configs = configs - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.embed_sgd_param.adam.beta1_decay_rate, 0.9) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.embed_sgd_param.adam. + beta1_decay_rate, 0.9) strategy = paddle.distributed.fleet.DistributedStrategy() configs = {} @@ -310,10 +316,12 @@ class TestStrategyConfig(unittest.TestCase): "embed_sparse_optimizer": "std_adagrad" } strategy.fleet_desc_configs = configs - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.ctr_accessor_param.show_scale, False) - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.embed_sgd_param.adagrad.initial_range, 0) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.ctr_accessor_param. + show_scale, False) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.embed_sgd_param.adagrad. + initial_range, 0) strategy = paddle.distributed.fleet.DistributedStrategy() configs = {} @@ -322,9 +330,9 @@ class TestStrategyConfig(unittest.TestCase): "embed_sparse_optimizer": "std_adagrad" } strategy.fleet_desc_configs = configs - self.assertEqual(strategy.sparse_table_configs[0] - .accessor.embed_sgd_param.adagrad.initial_range, - 0.0001) + self.assertEqual( + strategy.sparse_table_configs[0].accessor.embed_sgd_param.adagrad. + initial_range, 0.0001) def test_trainer_desc_configs(self): strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py b/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py index 2d2f019c5ed..3bc5d886011 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py @@ -32,6 +32,7 @@ print("test") class TestCollectiveLauncher(unittest.TestCase): + def setUp(self): file_dir = os.path.dirname(os.path.abspath(__file__)) @@ -40,6 +41,7 @@ class TestCollectiveLauncher(unittest.TestCase): f.write(fake_python_code) def test_launch(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -78,6 +80,7 @@ class TestCollectiveLauncher(unittest.TestCase): pass def test_stop(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" diff --git a/python/paddle/fluid/tests/unittests/test_fleet_elastic_init.py b/python/paddle/fluid/tests/unittests/test_fleet_elastic_init.py index 10028d2d98f..b7310ab4486 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_elastic_init.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_elastic_init.py @@ -25,7 +25,9 @@ from paddle.distributed.fleet.launch_utils import DistributeMode class TestElasticInit(unittest.TestCase): + def setUp(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" diff --git a/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py b/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py index 6dc9f69d03f..61d84151b68 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py @@ -26,11 +26,13 @@ from paddle.distributed.fleet.elastic.manager import ELASTIC_AUTO_PARALLEL_EXIT_ class MockLease(): + def refresh(self): pass class MockEtcdClient: + def __init__(self, lease=None): self._lease = lease @@ -69,10 +71,12 @@ class MockEtcdClient: class TestElasticManager(unittest.TestCase): + def setUp(self): self.etcd_client = MockEtcdClient() def test_elastic_manager_init(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -89,6 +93,7 @@ class TestElasticManager(unittest.TestCase): args = Argument() class _MockLease(): + def refresh(self): raise ValueError("valid error, this only for unittest") @@ -96,6 +101,7 @@ class TestElasticManager(unittest.TestCase): elastic = ElasticManager(args, etcd_client=etcd_client) def test_match_faulttolerance(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -126,6 +132,7 @@ class TestElasticManager(unittest.TestCase): self.assertEqual(elastic._match(hosts), False) def test_match_elastic(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -177,6 +184,7 @@ class TestElasticManager(unittest.TestCase): #self.assertEqual(elastic._match(hosts), True) def test_update_hosts_for_faulttolerance(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -253,8 +261,8 @@ class TestElasticManager(unittest.TestCase): elastic._update_hosts() #self.assertEqual(elastic.all_host_endpoints, # ["10.10.10.1:6001", "10.10.10.2:6001", "10.10.10.3:6001"]) - self.assertEqual( - os.getenv('PADDLE_TRAINERS'), "10.10.10.1,10.10.10.2,10.10.10.3") + self.assertEqual(os.getenv('PADDLE_TRAINERS'), + "10.10.10.1,10.10.10.2,10.10.10.3") ####################### # elastic, scale in # @@ -279,11 +287,10 @@ class TestElasticManager(unittest.TestCase): elastic._update_hosts() #self.assertEqual(elastic.all_host_endpoints, # ["10.10.10.3:6001", "10.10.10.1:6001", "10.10.10.2:6001"]) - self.assertEqual( - os.getenv('PADDLE_TRAINERS'), "10.10.10.3,10.10.10.1,10.10.10.2") - self.assertEqual( - os.getenv('DISTRIBUTED_TRAINER_ENDPOINTS'), - "10.10.10.3:6001,10.10.10.1:6001,10.10.10.2:6001") + self.assertEqual(os.getenv('PADDLE_TRAINERS'), + "10.10.10.3,10.10.10.1,10.10.10.2") + self.assertEqual(os.getenv('DISTRIBUTED_TRAINER_ENDPOINTS'), + "10.10.10.3:6001,10.10.10.1:6001,10.10.10.2:6001") ############ os.environ[ @@ -305,11 +312,11 @@ class TestElasticManager(unittest.TestCase): #self.assertEqual(elastic.all_host_endpoints, # ["10.10.10.1:6001", "10.10.10.1:6001"]) self.assertEqual(os.getenv('PADDLE_TRAINERS'), "10.10.10.1,10.10.10.1") - self.assertEqual( - os.getenv('DISTRIBUTED_TRAINER_ENDPOINTS'), - "10.10.10.1:6001,10.10.10.1:6003") + self.assertEqual(os.getenv('DISTRIBUTED_TRAINER_ENDPOINTS'), + "10.10.10.1:6001,10.10.10.1:6003") def test_exit(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -328,6 +335,7 @@ class TestElasticManager(unittest.TestCase): elastic.exit() def test_pre_hook(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -350,6 +358,7 @@ class TestElasticManager(unittest.TestCase): elastic.pre_hook() def test_watch(self): + class Argument: elastic_server = "127.0.0.1:2379" job_id = "test_job_id_123" @@ -365,6 +374,7 @@ class TestElasticManager(unittest.TestCase): elastic_pre_hook = None class ElasticLauncher: + def watch(self): return ELASTIC_AUTO_PARALLEL_EXIT_CODE @@ -378,11 +388,14 @@ class TestElasticManager(unittest.TestCase): elastic.watch() def test_launcher_interface_check_procs(self): + class Proc: + def poll(self): return ELASTIC_AUTO_PARALLEL_EXIT_CODE class ProcList: + def __init__(self): self.proc = Proc() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py index 544fe4dd43e..0c672f1ff1e 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py @@ -22,6 +22,7 @@ paddle.enable_static() class TestDistModelRun(unittest.TestCase): + def test_dist_model_run(self): # step 0: declare folder to save the model and params folder = './dist_model_run_test/' @@ -39,8 +40,10 @@ class TestDistModelRun(unittest.TestCase): x_data = np.random.randn(28, 28).astype('float32') y_data = np.random.randint(0, 9, size=[28, 1]).astype('int64') exe.run(paddle.static.default_main_program(), - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[avg_loss]) paddle.static.save_inference_model(path_prefix, [x, y], [avg_loss], exe) print('save model to', path_prefix) @@ -63,11 +66,13 @@ class TestDistModelRun(unittest.TestCase): print("dist model rst:", dist_model_rst) # step 4: use framework's api to inference with fake data - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model(path_prefix, exe)) + [inference_program, feed_target_names, + fetch_targets] = (paddle.static.load_inference_model(path_prefix, exe)) results = exe.run(inference_program, - feed={'x': x_tensor, - 'y': y_tensor}, + feed={ + 'x': x_tensor, + 'y': y_tensor + }, fetch_list=fetch_targets) load_inference_model_rst = results[0] print("load inference model api rst:", load_inference_model_rst) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py index 2d4fe92f051..98affdfa540 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py @@ -22,12 +22,13 @@ paddle.enable_static() class TestDistModelTensor(unittest.TestCase): + def test_dist_model_tensor(self): tensor_32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') dist_tensor32 = DistModelTensor(tensor_32, '32_tensor') self.assertEqual(dist_tensor32.dtype, DistModelDataType.INT32) - self.assertEqual( - dist_tensor32.data.tolist('int32'), tensor_32.ravel().tolist()) + self.assertEqual(dist_tensor32.data.tolist('int32'), + tensor_32.ravel().tolist()) # the length is how many byte the data contains self.assertEqual(dist_tensor32.data.length(), 40 * 4) self.assertEqual(dist_tensor32.name, '32_tensor') @@ -38,8 +39,8 @@ class TestDistModelTensor(unittest.TestCase): tensor_64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') dist_tensor64 = DistModelTensor(tensor_64, '64_tensor') self.assertEqual(dist_tensor64.dtype, DistModelDataType.INT64) - self.assertEqual( - dist_tensor64.data.tolist('int64'), tensor_64.ravel().tolist()) + self.assertEqual(dist_tensor64.data.tolist('int64'), + tensor_64.ravel().tolist()) self.assertEqual(dist_tensor64.data.length(), 40 * 8) self.assertEqual(dist_tensor64.name, '64_tensor') dist_tensor64.data.reset(tensor_64) @@ -49,9 +50,8 @@ class TestDistModelTensor(unittest.TestCase): tensor_float = np.random.randn(20, 2).astype('float32') dist_tensor_float = DistModelTensor(tensor_float, 'float_tensor') self.assertEqual(dist_tensor_float.dtype, DistModelDataType.FLOAT32) - self.assertEqual( - dist_tensor_float.data.tolist('float32'), - tensor_float.ravel().tolist()) + self.assertEqual(dist_tensor_float.data.tolist('float32'), + tensor_float.ravel().tolist()) self.assertEqual(dist_tensor_float.data.length(), 40 * 4) self.assertEqual(dist_tensor_float.name, 'float_tensor') dist_tensor_float.data.reset(tensor_float) @@ -62,9 +62,8 @@ class TestDistModelTensor(unittest.TestCase): dist_tensor_float_16 = DistModelTensor(tensor_float_16, 'float_tensor_16') self.assertEqual(dist_tensor_float_16.dtype, DistModelDataType.FLOAT16) - self.assertEqual( - dist_tensor_float_16.data.tolist('float16'), - tensor_float_16.ravel().tolist()) + self.assertEqual(dist_tensor_float_16.data.tolist('float16'), + tensor_float_16.ravel().tolist()) self.assertEqual(dist_tensor_float_16.data.length(), 40 * 2) self.assertEqual(dist_tensor_float_16.name, 'float_tensor_16') dist_tensor_float_16.data.reset(tensor_float_16) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_executor.py index 8b73a714bbb..b824df45e3e 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestFleetExecutor(unittest.TestCase): + def fake_fleet_opt(self): # TODO: Fake for coverage will be removed in the future import paddle.distributed.fleet as fleet @@ -42,10 +43,12 @@ class TestFleetExecutor(unittest.TestCase): exe = paddle.static.Executor(place) empty_program = paddle.static.Program() with fluid.program_guard(empty_program, empty_program): - x = fluid.layers.data( - name='x', shape=x_data.shape, dtype=x_data.dtype) - y = fluid.layers.data( - name='y', shape=y_data.shape, dtype=y_data.dtype) + x = fluid.layers.data(name='x', + shape=x_data.shape, + dtype=x_data.dtype) + y = fluid.layers.data(name='y', + shape=y_data.shape, + dtype=y_data.dtype) z = x + y a = 2 * x + 3 * y loss = paddle.mean(a) @@ -54,8 +57,8 @@ class TestFleetExecutor(unittest.TestCase): steps_per_pass = 10 bd = [steps_per_pass * p for p in passes] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] - lr_val = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr) + lr_val = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr) opt = paddle.optimizer.AdamW( learning_rate=lr_val, grad_clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)) @@ -66,8 +69,10 @@ class TestFleetExecutor(unittest.TestCase): "section_program": empty_program } res = exe.run(empty_program, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[z.name, a.name]) return res diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py index fb82c71b2ff..c21549c3ce3 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py @@ -22,6 +22,7 @@ paddle.enable_static() class TestFleetExecutor(unittest.TestCase): + def run_fleet_executor(self, place, fleet_opt=dict()): exe = paddle.static.Executor(place) empty_program = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py index 4bbb3bff07f..295530d9c9d 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestFleetExecutor(unittest.TestCase): + def fake_fleet_opt(self): # TODO: Fake for coverage will be removed in the future import paddle.distributed.fleet as fleet @@ -42,10 +43,12 @@ class TestFleetExecutor(unittest.TestCase): exe = paddle.static.Executor(place) empty_program = paddle.static.Program() with fluid.program_guard(empty_program, empty_program): - x = fluid.layers.data( - name='x', shape=x_data.shape, dtype=x_data.dtype) - y = fluid.layers.data( - name='y', shape=y_data.shape, dtype=y_data.dtype) + x = fluid.layers.data(name='x', + shape=x_data.shape, + dtype=x_data.dtype) + y = fluid.layers.data(name='y', + shape=y_data.shape, + dtype=y_data.dtype) z = x + y a = 2 * x + 3 * y loss = paddle.mean(a) @@ -54,8 +57,8 @@ class TestFleetExecutor(unittest.TestCase): steps_per_pass = 10 bd = [steps_per_pass * p for p in passes] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] - lr_val = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr) + lr_val = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr) opt = paddle.optimizer.AdamW( learning_rate=lr_val, grad_clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)) @@ -66,8 +69,10 @@ class TestFleetExecutor(unittest.TestCase): "section_program": empty_program } res = exe.run(empty_program, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[z.name, a.name]) return res diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py index 3dae8a5bf6b..0830782c86d 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ paddle.enable_static() class TestFleetExecutorTaskNode(unittest.TestCase): + def test_task_node(self): program = paddle.static.Program() task_node_0 = core.TaskNode(program.desc, 0, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py index 61064175266..f531b85c3dd 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py @@ -22,14 +22,17 @@ paddle.enable_static() class TestFleetExecutor(unittest.TestCase): + def run_fleet_executor(self, place, x_data, y_data): exe = paddle.static.Executor(place) empty_program = paddle.static.Program() with fluid.program_guard(empty_program, empty_program): - x = fluid.layers.data( - name='x', shape=x_data.shape, dtype=x_data.dtype) - y = fluid.layers.data( - name='y', shape=y_data.shape, dtype=y_data.dtype) + x = fluid.layers.data(name='x', + shape=x_data.shape, + dtype=x_data.dtype) + y = fluid.layers.data(name='y', + shape=y_data.shape, + dtype=y_data.dtype) z = x + y a = 2 * x + 3 * y loss = paddle.mean(a) @@ -38,8 +41,8 @@ class TestFleetExecutor(unittest.TestCase): steps_per_pass = 10 bd = [steps_per_pass * p for p in passes] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] - lr_val = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr) + lr_val = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr) opt = paddle.optimizer.AdamW( learning_rate=lr_val, grad_clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)) @@ -61,8 +64,10 @@ class TestFleetExecutor(unittest.TestCase): "section_program": empty_program } res = exe.run(empty_program, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[z.name, a.name]) return res diff --git a/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py index efffa9fa88f..d7de5ef3d40 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py @@ -23,24 +23,27 @@ paddle.enable_static() class TestFleetFP16CompressOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "0" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" def net(self, main_prog, startup_prog, dtype='float32'): with fluid.program_guard(main_prog, startup_prog): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype=dtype) - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype=dtype) + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py index efe62a32fc3..0f8b36e3f89 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestFleetGradientMergeMetaOptimizer(TestFleetMetaOptimizer): + def test_gradient_merge_optimizer(self): train_prog, startup_prog = paddle.fluid.Program(), paddle.fluid.Program( ) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py b/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py index d64b534398d..7fd6211b33b 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py @@ -24,6 +24,7 @@ import os class TestGradientScale(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "0" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" @@ -34,16 +35,15 @@ class TestGradientScale(unittest.TestCase): prediction = paddle.static.nn.fc(x=[fc_2], size=label_dim, activation='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=input_y) + cost = paddle.nn.functional.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.mean(x=cost) return avg_cost def gen_data(self): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } def test_single_gpu(self): @@ -55,10 +55,12 @@ class TestGradientScale(unittest.TestCase): strategy.gradient_scale_configs = {'scale_strategy': 'sum'} with fluid.program_guard(main_program, startup_program): with fluid.unique_name.guard(): - input_x = paddle.static.data( - name="x", shape=[None, 32], dtype='float32') - input_y = paddle.static.data( - name="y", shape=[None, 1], dtype='int64') + input_x = paddle.static.data(name="x", + shape=[None, 32], + dtype='float32') + input_y = paddle.static.data(name="y", + shape=[None, 1], + dtype='int64') cost = self.mlp(input_x=input_x, input_y=input_y) output_name = cost.name optimizer = fleet.distributed_optimizer(fluid.optimizer.Adam(), diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py index 393de688aa5..6ca078cdde7 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py @@ -21,6 +21,7 @@ paddle.enable_static() class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): + def setUp(self): try: self._dist_ut_port_0 = int(os.environ["PADDLE_DIST_UT_PORT"]) @@ -33,46 +34,58 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): port_a = self._dist_ut_port_0 port_b = self._dist_ut_port_1 node_a = { - "PADDLE_TRAINER_ID": "0", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_a), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "0", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_a), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } node_b = { - "PADDLE_TRAINER_ID": "1", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_b), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "1", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_b), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } def node_func(): import paddle.distributed.fleet as fleet fleet.init(is_collective=True) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace()) @@ -89,48 +102,60 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): port_b = self._dist_ut_port_1 + 2 node_a = { - "PADDLE_TRAINER_ID": "0", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_a), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "0", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_a), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } node_b = { - "PADDLE_TRAINER_ID": "1", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_b), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "1", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_b), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } def node_func(): import paddle.distributed.fleet as fleet fleet.init(is_collective=True) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.nccl_comm_num = 2 strategy.sync_nccl_allreduce = True optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace()) exe.run(paddle.fluid.default_startup_program()) @@ -140,8 +165,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): def gen_data(): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } for i in range(10): @@ -158,46 +182,58 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): port_a = self._dist_ut_port_0 + 4 port_b = self._dist_ut_port_1 + 4 node_a = { - "PADDLE_TRAINER_ID": "0", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_a), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "0", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_a), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } node_b = { - "PADDLE_TRAINER_ID": "1", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_b), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "1", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_b), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } def node_func(): import paddle.distributed.fleet as fleet fleet.init(is_collective=True) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace()) @@ -213,48 +249,60 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): port_a = self._dist_ut_port_0 + 6 port_b = self._dist_ut_port_1 + 6 node_a = { - "PADDLE_TRAINER_ID": "0", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_a), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "0", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_a), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } node_b = { - "PADDLE_TRAINER_ID": "1", - "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_b), - "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ID": + "1", + "PADDLE_CURRENT_ENDPOINT": + "127.0.0.1:{}".format(port_b), + "PADDLE_TRAINERS_NUM": + "2", "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), - "http_proxy": "", - "https_proxy": "" + "http_proxy": + "", + "https_proxy": + "" } def node_func(): import paddle.distributed.fleet as fleet fleet.init(is_collective=True) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.nccl_comm_num = 2 strategy.sync_nccl_allreduce = True optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace()) exe.run(paddle.fluid.default_startup_program()) @@ -264,8 +312,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): def gen_data(): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } for i in range(10): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py index 628f1db80d2..2afe4af3645 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py @@ -21,6 +21,7 @@ from launch_function_helper import launch_func class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): + def test_graph_execution_optimizer(self): node_a = { "PADDLE_TRAINER_ID": "0", @@ -43,26 +44,28 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): def node_func(): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.nccl_comm_num = 2 strategy.sync_nccl_allreduce = True optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace()) exe.run(paddle.fluid.default_startup_program()) @@ -72,8 +75,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): def gen_data(): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } for i in range(5): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_hybrid_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_hybrid_meta_optimizer.py index 35b74eac4b0..928ea06a611 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_hybrid_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_hybrid_meta_optimizer.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestFleetHybridOptimizer(TestFleetMetaOptimizer): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "3" os.environ["PADDLE_TRAINER_ENDPOINTS"] = \ @@ -193,8 +194,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): strategy.fuse_grad_size_in_MB = 32 clip = paddle.fluid.clip.GradientClipByGlobalNorm(1.0) - self.optimizer( - avg_cost, strategy, train_prog, startup_prog, grad_clip=clip) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip) train_prog = train_prog._pipeline_opt['section_program'] startup_prog = startup_prog._pipeline_opt['startup_program'] self.debug_program(train_prog, startup_prog) @@ -267,8 +271,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): strategy.fuse_grad_merge = True clip = paddle.fluid.clip.GradientClipByGlobalNorm(1.0) - self.optimizer( - avg_cost, strategy, train_prog, startup_prog, grad_clip=clip) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip) train_prog = train_prog._pipeline_opt['section_program'] startup_prog = startup_prog._pipeline_opt['startup_program'] self.debug_program(train_prog, startup_prog) @@ -325,7 +332,9 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): self.set_strategy(strategy, 'pipeline') self.set_strategy(strategy, 'amp') - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.recompute = True strategy.recompute_configs = { "checkpoints": @@ -397,6 +406,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "3" os.environ["PADDLE_TRAINER_ENDPOINTS"] = \ @@ -430,8 +440,11 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer): strategy.fuse_grad_size_in_MB = 32 clip = paddle.fluid.clip.GradientClipByGlobalNorm(1.0) - self.optimizer( - avg_cost, strategy, train_prog, startup_prog, grad_clip=clip) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip) train_prog = train_prog._pipeline_opt['section_program'] startup_prog = startup_prog._pipeline_opt['startup_program'] self.debug_program(train_prog, startup_prog) @@ -491,8 +504,11 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer): strategy.fuse_grad_size_in_MB = 32 clip = paddle.fluid.clip.GradientClipByGlobalNorm(1.0) - self.optimizer( - avg_cost, strategy, train_prog, startup_prog, grad_clip=clip) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip) train_prog = train_prog._pipeline_opt['section_program'] startup_prog = startup_prog._pipeline_opt['startup_program'] self.debug_program(train_prog, startup_prog) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py index 022e0b99ce8..f6f3f50be0d 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestFleetLambMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ @@ -31,10 +32,12 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): def net(self, main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, @@ -43,8 +46,8 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -75,8 +78,8 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.fluid.optimizer.Momentum( - learning_rate=0.1, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum(learning_rate=0.1, + momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -107,15 +110,16 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): def test_lamb_apply_with_amp(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py index bee6acf7324..b4f0c93d09c 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestFleetLarsMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ @@ -31,10 +32,12 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): def net(self, main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, @@ -43,8 +46,8 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -64,8 +67,8 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.fluid.optimizer.Momentum( - learning_rate=0.01, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum(learning_rate=0.01, + momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -91,8 +94,8 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.fluid.optimizer.Momentum( - learning_rate=0.01, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum(learning_rate=0.01, + momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -108,15 +111,16 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): def test_lars_apply_with_amp(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -139,8 +143,8 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): "exclude_from_weight_decay": ["batch_norm", ".b"], } - optimizer = paddle.fluid.optimizer.Momentum( - learning_rate=0.01, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum(learning_rate=0.01, + momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py index bafb2419123..ac7b203d5ee 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestFleetLocalSGDMetaOptimizer(TestFleetMetaOptimizer): + def test_localsgd_optimizer(self): train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) @@ -63,6 +64,7 @@ class TestFleetLocalSGDMetaOptimizer(TestFleetMetaOptimizer): class TestFleetAdaptiveLocalSGDMetaOptimizer(TestFleetMetaOptimizer): + def test_adaptive_localsgd_optimizer(self): train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py index dfea848aadf..f39f916dbbe 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py @@ -22,15 +22,18 @@ from paddle.distributed.fleet.meta_optimizers.meta_optimizer_base import MetaOpt class TestFleetMetaOptimizerBase(unittest.TestCase): + def net(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, @@ -39,8 +42,8 @@ class TestFleetMetaOptimizerBase(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_metric.py b/python/paddle/fluid/tests/unittests/test_fleet_metric.py index 5dce59ac23d..ae231351888 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_metric.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_metric.py @@ -34,6 +34,7 @@ class TestFleetMetric(unittest.TestCase): """Set up, set envs.""" class FakeUtil(UtilBase): + def __init__(self, fake_fleet): super(FakeUtil, self).__init__() self.fleet = fake_fleet @@ -80,18 +81,16 @@ class TestFleetMetric(unittest.TestCase): train = fluid.Program() startup = fluid.Program() with fluid.program_guard(train, startup): - t = fluid.layers.create_global_var( - shape=[1, 1], - value=1, - dtype='int64', - persistable=True, - force_cpu=True) - t1 = fluid.layers.create_global_var( - shape=[1, 1], - value=1, - dtype='int64', - persistable=True, - force_cpu=True) + t = fluid.layers.create_global_var(shape=[1, 1], + value=1, + dtype='int64', + persistable=True, + force_cpu=True) + t1 = fluid.layers.create_global_var(shape=[1, 1], + value=1, + dtype='int64', + persistable=True, + force_cpu=True) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py index e6138296a6c..2dccf8bca82 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py @@ -64,13 +64,13 @@ class TestFleet1(unittest.TestCase): cost = fluid.layers.log_loss(fc, label_cast) try: adam = fluid.optimizer.Adam(learning_rate=0.000005) - adam = fleet.distributed_optimizer( - adam, - strategy={ - "embedding": { - "sparse_accessor_class": "DownpourCtrAccessor" - } - }) + adam = fleet.distributed_optimizer(adam, + strategy={ + "embedding": { + "sparse_accessor_class": + "DownpourCtrAccessor" + } + }) adam.minimize([cost], [scope]) fleet.run_server() except: diff --git a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py index 263c578a571..d9bc0c7a5f3 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestFleetMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ @@ -31,12 +32,15 @@ class TestFleetMetaOptimizer(unittest.TestCase): def net(self): with static.device_guard("gpu:0"): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') - input_z = paddle.fluid.layers.data( - name="z", shape=[1], dtype="float32") + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') + input_z = paddle.fluid.layers.data(name="z", + shape=[1], + dtype="float32") with static.device_guard("gpu:all"): input_z = input_z * 1.0 input_z.stop_gradient = True @@ -51,8 +55,8 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) return avg_cost @@ -73,8 +77,8 @@ class TestFleetMetaOptimizer(unittest.TestCase): avg_cost = self.net() optimizer = paddle.fluid.optimizer.Adam(0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) def test_pipeline_amp_optimizer(self): @@ -96,8 +100,8 @@ class TestFleetMetaOptimizer(unittest.TestCase): avg_cost = self.net() optimizer = paddle.fluid.optimizer.Adam(0.01) - optimizer = fleet.distributed_optimizer( - optimizer, strategy=strategy) + optimizer = fleet.distributed_optimizer(optimizer, + strategy=strategy) optimizer.minimize(avg_cost) ops = train_prog._pipeline_opt['section_program'].global_block().ops diff --git a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer_with_recompute.py b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer_with_recompute.py index f67b26e0aef..5c086a5994f 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer_with_recompute.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer_with_recompute.py @@ -20,6 +20,7 @@ paddle.enable_static() class TestFleetMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ @@ -31,10 +32,12 @@ class TestFleetMetaOptimizer(unittest.TestCase): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) with paddle.fluid.device_guard("gpu:0"): - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data( - name="y", shape=[1], dtype='int64') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') + input_y = paddle.fluid.layers.data(name="y", + shape=[1], + dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') fc_3 = paddle.fluid.layers.fc(input=fc_2, size=64, act='tanh') @@ -47,8 +50,8 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc(input=[fc_7], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_private_function.py b/python/paddle/fluid/tests/unittests/test_fleet_private_function.py index beec6d7f51c..063cda8aa9d 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_private_function.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_private_function.py @@ -20,7 +20,9 @@ import threading class TestFleetPrivateFunction(unittest.TestCase): + def test_wait_port(self): + def init_server(port): import time time.sleep(5) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_fleet_ps.py index 04d1616399a..5ad87859560 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_ps.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_ps.py @@ -17,10 +17,12 @@ from __future__ import print_function import unittest from paddle.fluid.framework import default_main_program from paddle.fluid.incubate.fleet.parameter_server.ir.pserver_pass import _get_optimizer_input_shape + main_program = default_main_program() class TestFleetPS(unittest.TestCase): + def test_version(self): from paddle.fluid.incubate.fleet.parameter_server import version transpiler = version.is_transpiler() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py b/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py index 91e9cddd2a8..d22fc3a1b8c 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py @@ -20,6 +20,7 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distribu class TestPyramidHashOpApi(unittest.TestCase): + def test_dist_geo_server_transpiler(self): num_voc = 128 embed_dim = 64 @@ -40,10 +41,12 @@ class TestPyramidHashOpApi(unittest.TestCase): lr=0.002, param_attr=fluid.ParamAttr( name="PyramidHash_emb_0", - learning_rate=0, ), + learning_rate=0, + ), param_attr_wl=fluid.ParamAttr( name="Filter", - learning_rate=0, ), + learning_rate=0, + ), param_attr_bl=None, distribute_update_vars=["PyramidHash_emb_0"], name=None) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_raw_program_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_raw_program_meta_optimizer.py index 604109b262d..05c3391565e 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_raw_program_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_raw_program_meta_optimizer.py @@ -20,6 +20,7 @@ paddle.enable_static() class TestFleetMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ @@ -30,15 +31,16 @@ class TestFleetMetaOptimizer(unittest.TestCase): import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') + input_x = paddle.fluid.layers.data(name="x", + shape=[32], + dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) + cost = paddle.fluid.layers.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py index 59a8fa48d94..230cad18361 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py @@ -23,14 +23,15 @@ paddle.enable_static() class TestFleetRecomputeMetaOptimizer(TestFleetMetaOptimizer): + def test_recompute_optimizer_backward(self): """ test recompute optimizer backward """ train_prog, startup_prog = fluid.Program(), fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'recompute') - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = RecomputeOptimizer(opt) opt.user_defined_strategy = strategy params_grads = opt.backward(avg_cost, startup_prog) @@ -46,8 +47,8 @@ class TestFleetRecomputeMetaOptimizer(TestFleetMetaOptimizer): avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'recompute') - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = RecomputeOptimizer(opt) opt.user_defined_strategy = strategy params_grads = opt.backward(avg_cost, startup_prog) @@ -65,8 +66,8 @@ class TestFleetRecomputeMetaOptimizer(TestFleetMetaOptimizer): avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'recompute') - opt = fluid.optimizer.MomentumOptimizer( - learning_rate=0.001, momentum=0.9) + opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, + momentum=0.9) opt = RecomputeOptimizer(opt) opt.user_defined_strategy = strategy params_grads = opt.backward(avg_cost, startup_prog) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py index 396d705508b..9636efdbfcb 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py @@ -43,10 +43,9 @@ class TestCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" - role_maker = GeneralRoleMaker( - init_timeout_seconds=100, - run_timeout_seconds=100, - http_ip_port="127.0.0.1:36003") + role_maker = GeneralRoleMaker(init_timeout_seconds=100, + run_timeout_seconds=100, + http_ip_port="127.0.0.1:36003") #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py index 86ee0db30ef..7fc68ec1563 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py @@ -104,8 +104,8 @@ class TestCloudRoleMaker(unittest.TestCase): os.environ["POD_IP"] = "127.0.0.1" os.environ["PADDLE_PORT"] = "36001" - ro = role_maker.PaddleCloudRoleMaker( - is_collective=False, init_gloo=False) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False, + init_gloo=False) self.assertEqual(ro._server_index(), 0) self.assertFalse(ro._is_worker()) self.assertTrue(ro._is_server()) @@ -161,6 +161,7 @@ class TestUserDefinedRoleMaker(unittest.TestCase): class TestGlooWithCloudRoleMaker(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINERS_NUM"] = "1" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" @@ -443,8 +444,8 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32') y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost( - input=y_predict, label=y) + cost = paddle.fluid.layers.square_error_cost(input=y_predict, + label=y) avg_cost = paddle.fluid.layers.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_fleet_runtime.py b/python/paddle/fluid/tests/unittests/test_fleet_runtime.py index 80109716a54..19c407bf57f 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_runtime.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_runtime.py @@ -18,6 +18,7 @@ import os class TestFleetRuntime(unittest.TestCase): + def test_fleet_runtime_base(self): import paddle.distributed.fleet.runtime base = paddle.distributed.fleet.runtime.runtime_base.RuntimeBase() @@ -44,19 +45,18 @@ class TestFleetRuntime(unittest.TestCase): ps_runtime = paddle.distributed.fleet.runtime.ParameterServerRuntime() self.assertRaises(Exception, ps_runtime._get_optimizer_status, "test_op", None) - reshaped_names, origin_names = ps_runtime._get_optimizer_status("adam", - "param") + reshaped_names, origin_names = ps_runtime._get_optimizer_status( + "adam", "param") self.assertTrue( - len(reshaped_names) == 2 and - reshaped_names[0] == 'param_moment1_0' and - reshaped_names[1] == 'param_moment2_0') + len(reshaped_names) == 2 and reshaped_names[0] == 'param_moment1_0' + and reshaped_names[1] == 'param_moment2_0') self.assertTrue( - len(origin_names) == 2 and - origin_names[0] == 'param_beta1_pow_acc_0' and - origin_names[1] == 'param_beta2_pow_acc_0') + len(origin_names) == 2 + and origin_names[0] == 'param_beta1_pow_acc_0' + and origin_names[1] == 'param_beta2_pow_acc_0') - reshaped_names, origin_names = ps_runtime._get_optimizer_status("sgd", - "param") + reshaped_names, origin_names = ps_runtime._get_optimizer_status( + "sgd", "param") self.assertTrue(len(reshaped_names) == 0 and len(origin_names) == 0) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py index 28e03fdfd70..20eace7cce3 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py @@ -27,6 +27,7 @@ paddle.enable_static() class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): + def test_sharding_optimizer(self): train_prog, startup_prog = paddle.fluid.Program(), paddle.fluid.Program( ) @@ -244,12 +245,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'sharding') regularization = paddle.fluid.regularizer.L2Decay(0.0001) - self.optimizer( - avg_cost, - strategy, - train_prog, - startup_prog, - regularization=regularization) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + regularization=regularization) parameters = [ x.name for x in train_prog.list_vars() if x.persistable == True ] @@ -285,8 +285,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): avg_cost, strategy = self.net(train_prog, startup_prog) self.set_strategy(strategy, 'sharding') clip = paddle.fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) - self.optimizer( - avg_cost, strategy, train_prog, startup_prog, grad_clip=clip) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip) parameters = [ x.name for x in train_prog.list_vars() if x.persistable == True ] @@ -340,6 +343,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "3" os.environ[ @@ -382,8 +386,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of MP group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": sharding_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(sharding_group_waiting_ports, ['127.0.0.1:36003']) @@ -391,8 +395,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -429,8 +433,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": sharding_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(sharding_group_waiting_ports, ['127.0.0.1:36003']) @@ -438,8 +442,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of dp group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -501,8 +505,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": sharding_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(sharding_group_waiting_ports, ['127.0.0.1:36003']) @@ -510,8 +514,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of dp group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -618,8 +622,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": sharding_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(sharding_group_waiting_ports, ['127.0.0.1:36003']) @@ -627,8 +631,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -682,8 +686,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): "accumulate_steps": 4, } clip = paddle.fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) - self.optimizer( - avg_cost, strategy, train_prog, startup_prog, grad_clip=clip) + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip) train_prog = train_prog._pipeline_opt['section_program'] startup_prog = startup_prog._pipeline_opt['startup_program'] @@ -757,8 +764,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": mp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(mp_group_waiting_ports, ['127.0.0.1:36003']) @@ -766,8 +773,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36002']) @@ -791,13 +798,12 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): "accumulate_steps": 4, } clip = paddle.fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) - self.optimizer( - avg_cost, - strategy, - train_prog, - startup_prog, - grad_clip=clip, - name="adamw") + self.optimizer(avg_cost, + strategy, + train_prog, + startup_prog, + grad_clip=clip, + name="adamw") train_prog = train_prog._pipeline_opt['section_program'] startup_prog = startup_prog._pipeline_opt['startup_program'] @@ -876,8 +882,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": mp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(mp_group_waiting_ports, ['127.0.0.1:36003']) @@ -885,8 +891,8 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group sharding_group_waiting_port = None for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36002']) @@ -896,7 +902,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "sharding_degree": 1, @@ -970,16 +978,16 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36003']) # check correctness of dp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_3": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_3": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -989,7 +997,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "segment_broadcast_MB": 0.1, @@ -1043,16 +1053,16 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of sharding group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": sharding_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(sharding_group_waiting_ports, ['127.0.0.1:36003']) # check correctness of pp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_1": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_1": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36002']) @@ -1062,7 +1072,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "sharding_degree": 1, @@ -1139,16 +1151,16 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36003']) # check correctness of dp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_3": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_3": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -1158,7 +1170,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "sharding_degree": 1, @@ -1238,16 +1252,16 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36003']) # check correctness of dp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_3": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_3": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -1258,7 +1272,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "sharding_degree": 1, @@ -1334,16 +1350,16 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36003']) # check correctness of dp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_3": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_3": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -1353,7 +1369,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "sharding_degree": 1, @@ -1425,16 +1443,16 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): # check correctness of pp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_0": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_0": pp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(pp_group_waiting_ports, ['127.0.0.1:36003']) # check correctness of dp group for op in startup_prog_ops: - if op.type == "c_gen_nccl_id" and op.desc.output_arg_names()[ - 0] == "comm_id_3": + if op.type == "c_gen_nccl_id" and op.desc.output_arg_names( + )[0] == "comm_id_3": dp_group_waiting_ports = op.desc.attr("other_endpoints") self.assertEqual(dp_group_waiting_ports, ['127.0.0.1:36002']) @@ -1444,7 +1462,9 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ) avg_cost, strategy = self.pp_net(train_prog, startup_prog) strategy.amp = True - strategy.amp_configs = {'custom_black_varnames': ['fc_6.b_0'], } + strategy.amp_configs = { + 'custom_black_varnames': ['fc_6.b_0'], + } strategy.sharding = True strategy.sharding_configs = { "sharding_degree": 1, diff --git a/python/paddle/fluid/tests/unittests/test_fleet_static_mp_layers.py b/python/paddle/fluid/tests/unittests/test_fleet_static_mp_layers.py index ed64c7421d0..10d80bc434b 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_static_mp_layers.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_static_mp_layers.py @@ -30,6 +30,7 @@ paddle.enable_static() class ColumnLinearNet(fluid.dygraph.Layer): + def __init__(self, input_size, output_size): super(ColumnLinearNet, self).__init__() self.parallel_linear = fleet.meta_parallel.ColumnParallelLinear( @@ -46,6 +47,7 @@ class ColumnLinearNet(fluid.dygraph.Layer): class RowLinearNet(fluid.dygraph.Layer): + def __init__(self, input_size, output_size): super(RowLinearNet, self).__init__() self.parallel_linear = fleet.meta_parallel.RowParallelLinear( @@ -61,10 +63,11 @@ class RowLinearNet(fluid.dygraph.Layer): class EmbeddingNet(fluid.dygraph.Layer): + def __init__(self, vocab_size, hidden_size): super(EmbeddingNet, self).__init__() - self.embedding = fleet.meta_parallel.VocabParallelEmbedding(vocab_size, - hidden_size) + self.embedding = fleet.meta_parallel.VocabParallelEmbedding( + vocab_size, hidden_size) def forward(self, x): output = self.embedding(x) @@ -72,6 +75,7 @@ class EmbeddingNet(fluid.dygraph.Layer): class TestDistTraning(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "2" os.environ[ @@ -104,13 +108,13 @@ class TestDistTraning(unittest.TestCase): ops = main_program.global_block().ops ops = [op.type for op in ops] self.assertEqual( - ops, - ['c_identity', 'matmul_v2', 'elementwise_add', 'c_concat']) + ops, ['c_identity', 'matmul_v2', 'elementwise_add', 'c_concat']) weight = model_a.parallel_linear.weight bias = model_a.parallel_linear.bias - self.assertEqual(weight.shape, (input_size, output_size // - self.model_parallel_size)) + self.assertEqual( + weight.shape, + (input_size, output_size // self.model_parallel_size)) self.assertEqual(bias.shape, (output_size // self.model_parallel_size, )) @@ -132,8 +136,9 @@ class TestDistTraning(unittest.TestCase): weight = model_a.parallel_linear.weight bias = model_a.parallel_linear.bias - self.assertEqual(weight.shape, ( - input_size // self.model_parallel_size, output_size)) + self.assertEqual( + weight.shape, + (input_size // self.model_parallel_size, output_size)) self.assertEqual(bias.shape, (output_size, )) def test_parallel_embedding(self): @@ -145,8 +150,9 @@ class TestDistTraning(unittest.TestCase): # model_a model_a = EmbeddingNet(vocab_size, hidden_size) - x = paddle.static.data( - name='x', shape=[None, seq_len], dtype='int64') + x = paddle.static.data(name='x', + shape=[None, seq_len], + dtype='int64') y = model_a(x) #print(main_program) @@ -155,8 +161,9 @@ class TestDistTraning(unittest.TestCase): self.assertEqual(ops, ['c_embedding', 'c_allreduce_sum']) weight = model_a.embedding.weight - self.assertEqual(weight.shape, ( - vocab_size // self.model_parallel_size, hidden_size)) + self.assertEqual( + weight.shape, + (vocab_size // self.model_parallel_size, hidden_size)) def test_parallel_cross_entropy(self): main_program, startup_program = self.get_program() @@ -171,8 +178,9 @@ class TestDistTraning(unittest.TestCase): x = paddle.static.data( name='x', shape=[batch_size, seq_length, class_size_per_card]) - label = paddle.static.data( - name='label', shape=[batch_size, seq_length], dtype='int64') + label = paddle.static.data(name='label', + shape=[batch_size, seq_length], + dtype='int64') loss_a = model_a(x, label) #print(main_program) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_util.py b/python/paddle/fluid/tests/unittests/test_fleet_util.py index a3a526aaa61..91d9e062b9a 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_util.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_util.py @@ -64,6 +64,7 @@ class TestFleetUtil(unittest.TestCase): import paddle.distributed.fleet as fleet class UserDefinedUtil(fleet.UtilBase): + def __init__(self): super(UserDefinedUtil, self).__init__() @@ -166,8 +167,7 @@ class TestFleetUtil(unittest.TestCase): results = fleet.util._params_check(conf) self.assertTrue(len(results) == 1) np.testing.assert_array_almost_equal( - results[0], np.array( - [[3.0590223e-07]], dtype=np.float32)) + results[0], np.array([[3.0590223e-07]], dtype=np.float32)) # test feed_var's shape conf.dump_program_filename = "pruned_main_program.feed_var_shape_not_match" @@ -178,8 +178,7 @@ class TestFleetUtil(unittest.TestCase): results = fleet.util._params_check(conf) self.assertTrue(len(results) == 1) np.testing.assert_array_almost_equal( - results[0], np.array( - [[3.0590223e-07]], dtype=np.float32)) + results[0], np.array([[3.0590223e-07]], dtype=np.float32)) # test correct case without feed_vars_filelist conf.feed_config.feeded_vars_filelist = None @@ -215,8 +214,8 @@ class TestFleetUtil(unittest.TestCase): # test match conf.pruned_prog_path = os.path.join( - data_dir, - os.path.join(self.pruned_dir, "pruned_main_program.pbtxt")) + data_dir, os.path.join(self.pruned_dir, + "pruned_main_program.pbtxt")) if sys.platform == 'win32' or sys.platform == 'sys.platform': conf.draw = False else: @@ -232,8 +231,8 @@ class TestFleetUtil(unittest.TestCase): else: data_dir = self.download_files() program_path = os.path.join( - data_dir, - os.path.join(self.train_dir, "join_main_program.pbtxt")) + data_dir, os.path.join(self.train_dir, + "join_main_program.pbtxt")) is_text = True program = fleet.util._load_program(program_path, is_text) output_dir = os.path.join(data_dir, self.train_dir) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_utils.py b/python/paddle/fluid/tests/unittests/test_fleet_utils.py index 09de4867ef9..be3376a1d9a 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_utils.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_utils.py @@ -67,8 +67,8 @@ class TestFleetUtils(unittest.TestCase): def test_parse_program_proto(self): data_dir = self.download_files() parse_program_file_path = os.path.join( - data_dir, - os.path.join(self.pruned_dir, "pruned_main_program.pbtxt")) + data_dir, os.path.join(self.pruned_dir, + "pruned_main_program.pbtxt")) is_text_parse_program = True parse_output_dir = os.path.join(data_dir, self.pruned_dir) fleet_util = FleetUtil() @@ -119,8 +119,7 @@ class TestFleetUtils(unittest.TestCase): results = fleet_util.check_vars_and_dump(conf) self.assertTrue(len(results) == 1) np.testing.assert_array_almost_equal( - results[0], np.array( - [[3.0590223e-07]], dtype=np.float32)) + results[0], np.array([[3.0590223e-07]], dtype=np.float32)) # test feed_var's shape conf.dump_program_filename = "pruned_main_program.feed_var_shape_not_match" @@ -131,8 +130,7 @@ class TestFleetUtils(unittest.TestCase): results = fleet_util.check_vars_and_dump(conf) self.assertTrue(len(results) == 1) np.testing.assert_array_almost_equal( - results[0], np.array( - [[3.0590223e-07]], dtype=np.float32)) + results[0], np.array([[3.0590223e-07]], dtype=np.float32)) # test correct case without feed_vars_filelist conf.feed_config.feeded_vars_filelist = None @@ -168,8 +166,8 @@ class TestFleetUtils(unittest.TestCase): # test match conf.pruned_prog_path = os.path.join( - data_dir, - os.path.join(self.pruned_dir, "pruned_main_program.pbtxt")) + data_dir, os.path.join(self.pruned_dir, + "pruned_main_program.pbtxt")) if sys.platform == 'win32' or sys.platform == 'sys.platform': conf.draw = False else: @@ -184,8 +182,8 @@ class TestFleetUtils(unittest.TestCase): else: data_dir = self.download_files() program_path = os.path.join( - data_dir, - os.path.join(self.train_dir, "join_main_program.pbtxt")) + data_dir, os.path.join(self.train_dir, + "join_main_program.pbtxt")) is_text = True program = utils.load_program(program_path, is_text) output_dir = os.path.join(data_dir, self.train_dir) diff --git a/python/paddle/fluid/tests/unittests/test_flip.py b/python/paddle/fluid/tests/unittests/test_flip.py index 010d23bca51..a933595be87 100644 --- a/python/paddle/fluid/tests/unittests/test_flip.py +++ b/python/paddle/fluid/tests/unittests/test_flip.py @@ -46,9 +46,8 @@ class TestFlipOp_API(unittest.TestCase): fetch_list=[output]) out_np = np.array(res[0]) out_ref = np.array([[3, 2, 1], [6, 5, 4]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='flip output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='flip output is wrong, out =' + str(out_np)) def test_dygraph(self): img = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) @@ -65,6 +64,7 @@ class TestFlipOp_API(unittest.TestCase): class TestFlipOp(OpTest): + def setUp(self): self.op_type = 'flip' self.python_api = paddle.tensor.flip @@ -96,36 +96,42 @@ class TestFlipOp(OpTest): class TestFlipOpAxis1(TestFlipOp): + def init_test_case(self): self.in_shape = (2, 4, 4) self.axis = [0] class TestFlipOpAxis2(TestFlipOp): + def init_test_case(self): self.in_shape = (4, 4, 6, 3) self.axis = [0, 2] class TestFlipOpAxis3(TestFlipOp): + def init_test_case(self): self.in_shape = (4, 3, 1) self.axis = [0, 1, 2] class TestFlipOpAxis4(TestFlipOp): + def init_test_case(self): self.in_shape = (6, 4, 2, 2) self.axis = [0, 1, 2, 3] class TestFlipOpEmptyAxis(TestFlipOp): + def init_test_case(self): self.in_shape = (6, 4, 2, 2) self.axis = [] class TestFlipOpNegAxis(TestFlipOp): + def init_test_case(self): self.in_shape = (6, 4, 2, 2) self.axis = [-1] diff --git a/python/paddle/fluid/tests/unittests/test_fmax_op.py b/python/paddle/fluid/tests/unittests/test_fmax_op.py index 608d97b68ac..359b98c4b49 100644 --- a/python/paddle/fluid/tests/unittests/test_fmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_fmax_op.py @@ -52,8 +52,10 @@ class ApiFMaxTest(unittest.TestCase): data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_fmax = paddle.fmax(data_x, data_y) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[result_fmax]) self.assertTrue(np.allclose(res, self.np_expected1)) @@ -63,8 +65,10 @@ class ApiFMaxTest(unittest.TestCase): data_z = paddle.static.data("z", shape=[15], dtype="float32") result_fmax = paddle.fmax(data_x, data_z) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "z": self.input_z}, + res, = exe.run(feed={ + "x": self.input_x, + "z": self.input_z + }, fetch_list=[result_fmax]) self.assertTrue(np.allclose(res, self.np_expected2)) @@ -74,8 +78,10 @@ class ApiFMaxTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_fmax = paddle.fmax(data_a, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"a": self.input_a, - "c": self.input_c}, + res, = exe.run(feed={ + "a": self.input_a, + "c": self.input_c + }, fetch_list=[result_fmax]) self.assertTrue(np.allclose(res, self.np_expected3)) @@ -85,8 +91,10 @@ class ApiFMaxTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_fmax = paddle.fmax(data_b, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"b": self.input_b, - "c": self.input_c}, + res, = exe.run(feed={ + "b": self.input_b, + "c": self.input_c + }, fetch_list=[result_fmax]) self.assertTrue(np.allclose(res, self.np_expected4)) @@ -145,21 +153,19 @@ class TestElementwiseFmaxOp(OpTest): def test_check_grad_ingore_x(self): """test_check_grad_ingore_x""" - self.check_grad( - ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X"), - check_eager=True) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X"), + check_eager=True) def test_check_grad_ingore_y(self): """test_check_grad_ingore_y""" - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y'), - check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y'), + check_eager=True) class TestElementwiseFmax2Op(OpTest): @@ -190,18 +196,16 @@ class TestElementwiseFmax2Op(OpTest): def test_check_grad_ingore_x(self): """test_check_grad_ingore_x""" - self.check_grad( - ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X"), - check_eager=True) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X"), + check_eager=True) def test_check_grad_ingore_y(self): """test_check_grad_ingore_y""" - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y'), - check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y'), + check_eager=True) diff --git a/python/paddle/fluid/tests/unittests/test_fmin_op.py b/python/paddle/fluid/tests/unittests/test_fmin_op.py index b9d26827988..88542ba9365 100644 --- a/python/paddle/fluid/tests/unittests/test_fmin_op.py +++ b/python/paddle/fluid/tests/unittests/test_fmin_op.py @@ -54,8 +54,10 @@ class ApiFMinTest(unittest.TestCase): data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_fmin = paddle.fmin(data_x, data_y) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[result_fmin]) self.assertTrue(np.allclose(res, self.np_expected1)) @@ -65,8 +67,10 @@ class ApiFMinTest(unittest.TestCase): data_z = paddle.static.data("z", shape=[15], dtype="float32") result_fmin = paddle.fmin(data_x, data_z) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "z": self.input_z}, + res, = exe.run(feed={ + "x": self.input_x, + "z": self.input_z + }, fetch_list=[result_fmin]) self.assertTrue(np.allclose(res, self.np_expected2)) @@ -76,8 +80,10 @@ class ApiFMinTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_fmin = paddle.fmin(data_a, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"a": self.input_a, - "c": self.input_c}, + res, = exe.run(feed={ + "a": self.input_a, + "c": self.input_c + }, fetch_list=[result_fmin]) self.assertTrue(np.allclose(res, self.np_expected3)) @@ -87,8 +93,10 @@ class ApiFMinTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_fmin = paddle.fmin(data_b, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"b": self.input_b, - "c": self.input_c}, + res, = exe.run(feed={ + "b": self.input_b, + "c": self.input_c + }, fetch_list=[result_fmin]) self.assertTrue(np.allclose(res, self.np_expected4)) @@ -147,21 +155,19 @@ class TestElementwiseFminOp(OpTest): def test_check_grad_ingore_x(self): """test_check_grad_ingore_x""" - self.check_grad( - ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X"), - check_eager=True) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X"), + check_eager=True) def test_check_grad_ingore_y(self): """test_check_grad_ingore_y""" - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y'), - check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y'), + check_eager=True) class TestElementwiseFmin2Op(OpTest): @@ -192,21 +198,19 @@ class TestElementwiseFmin2Op(OpTest): def test_check_grad_ingore_x(self): """test_check_grad_ingore_x""" - self.check_grad( - ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X"), - check_eager=True) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X"), + check_eager=True) def test_check_grad_ingore_y(self): """test_check_grad_ingore_y""" - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y'), - check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y'), + check_eager=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_fold_op.py b/python/paddle/fluid/tests/unittests/test_fold_op.py index 44b94cd3b66..a919cac6b7d 100644 --- a/python/paddle/fluid/tests/unittests/test_fold_op.py +++ b/python/paddle/fluid/tests/unittests/test_fold_op.py @@ -71,8 +71,8 @@ class TestFoldOp(OpTest): w_offset * self.dilations[1]) if (h_out >= 0 and h_out < self.output_sizes[0]) and ( w_out >= 0 and w_out < self.output_sizes[1]): - output[b, c_out, h_out, w_out] += self.x[ - b, c, w + col_width * h] + output[b, c_out, h_out, + w_out] += self.x[b, c, w + col_width * h] self.outputs = output @@ -125,6 +125,7 @@ class TestFoldAPI(TestFoldOp): class TestFoldOpError(unittest.TestCase): + def test_errors(self): from paddle.nn.functional import fold from paddle.fluid.framework import Program, program_guard @@ -143,61 +144,59 @@ class TestFoldOpError(unittest.TestCase): def test_padding_shape(): # padding_size must be 2 or 4 x = paddle.randn(shape=[2, 6, 6], dtype="float32") - out = fold( - x, - output_sizes=[2, 3], - kernel_sizes=[2, 2], - paddings=[2, 2, 3]) + out = fold(x, + output_sizes=[2, 3], + kernel_sizes=[2, 2], + paddings=[2, 2, 3]) def test_dilations_shape(): - # dialtions_size must be 2 + # dialtions_size must be 2 x = paddle.randn(shape=[2, 6, 6], dtype="float32") - out = fold( - x, - output_sizes=[2, 3], - kernel_sizes=[2, 2], - dilations=[2, 2, 3]) + out = fold(x, + output_sizes=[2, 3], + kernel_sizes=[2, 2], + dilations=[2, 2, 3]) def test_strides_shape(): # strids_size must be 2 x = paddle.randn(shape=[2, 6, 6], dtype="float32") - out = fold( - x, - output_sizes=[2, 3], - kernel_sizes=[2, 2], - strides=[2, 2, 3]) + out = fold(x, + output_sizes=[2, 3], + kernel_sizes=[2, 2], + strides=[2, 2, 3]) def test_output_size(): # im_h * im_w must be L x = paddle.randn(shape=[2, 6, 6], dtype="float32") - out = fold( - x, output_sizes=[6, 6], kernel_sizes=[2, 2], - strides=[1, 1]) + out = fold(x, + output_sizes=[6, 6], + kernel_sizes=[2, 2], + strides=[1, 1]) def test_output_size_2(): # out_size must GT 1 x = paddle.randn(shape=[2, 6, 6], dtype="float32") - out = fold( - x, - output_sizes=[0.1, 0.2], - kernel_sizes=[2, 2], - strides=[1, 1]) + out = fold(x, + output_sizes=[0.1, 0.2], + kernel_sizes=[2, 2], + strides=[1, 1]) def test_block_h_w(): # test_block_h_w GT 0 x = paddle.randn(shape=[2, 1, 1], dtype="float32") - out = fold( - x, output_sizes=[1, 1], kernel_sizes=[2, 2], strides=1) + out = fold(x, + output_sizes=[1, 1], + kernel_sizes=[2, 2], + strides=1) def test_GT_0(): x = paddle.randn(shape=[2, 1, 1], dtype="float32") - out = fold( - x, - output_sizes=[0, 0], - kernel_sizes=[0, 0], - dilations=0, - paddings=[0, 0], - strides=0) + out = fold(x, + output_sizes=[0, 0], + kernel_sizes=[0, 0], + dilations=0, + paddings=[0, 0], + strides=0) self.assertRaises(AssertionError, test_input_shape) self.assertRaises(AssertionError, test_kernel_shape) diff --git a/python/paddle/fluid/tests/unittests/test_frame_op.py b/python/paddle/fluid/tests/unittests/test_frame_op.py index f26662dcf4f..528446f3eb4 100644 --- a/python/paddle/fluid/tests/unittests/test_frame_op.py +++ b/python/paddle/fluid/tests/unittests/test_frame_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -44,6 +44,7 @@ def frame_from_librosa(x, frame_length, hop_length, axis=-1): class TestFrameOp(OpTest): + def setUp(self): self.op_type = "frame" self.shape, self.type, self.attrs = self.initTestCase() @@ -51,8 +52,7 @@ class TestFrameOp(OpTest): 'X': np.random.random(size=self.shape).astype(self.type), } self.outputs = { - 'Out': frame_from_librosa( - x=self.inputs['X'], **self.attrs) + 'Out': frame_from_librosa(x=self.inputs['X'], **self.attrs) } def initTestCase(self): @@ -77,6 +77,7 @@ class TestFrameOp(OpTest): class TestCase1(TestFrameOp): + def initTestCase(self): input_shape = (150, ) input_type = 'float64' @@ -89,6 +90,7 @@ class TestCase1(TestFrameOp): class TestCase2(TestFrameOp): + def initTestCase(self): input_shape = (8, 150) input_type = 'float64' @@ -101,6 +103,7 @@ class TestCase2(TestFrameOp): class TestCase3(TestFrameOp): + def initTestCase(self): input_shape = (150, 8) input_type = 'float64' @@ -113,6 +116,7 @@ class TestCase3(TestFrameOp): class TestCase4(TestFrameOp): + def initTestCase(self): input_shape = (4, 2, 150) input_type = 'float64' @@ -125,6 +129,7 @@ class TestCase4(TestFrameOp): class TestCase5(TestFrameOp): + def initTestCase(self): input_shape = (150, 4, 2) input_type = 'float64' diff --git a/python/paddle/fluid/tests/unittests/test_framework_debug_str.py b/python/paddle/fluid/tests/unittests/test_framework_debug_str.py index 6511b56b5e8..420c7c55149 100644 --- a/python/paddle/fluid/tests/unittests/test_framework_debug_str.py +++ b/python/paddle/fluid/tests/unittests/test_framework_debug_str.py @@ -19,6 +19,7 @@ from paddle.fluid.framework import Program class TestDebugStringFramework(unittest.TestCase): + def test_debug_str(self): p = Program() p.current_block().create_var(name='t', shape=[0, 1]) diff --git a/python/paddle/fluid/tests/unittests/test_fs_interface.py b/python/paddle/fluid/tests/unittests/test_fs_interface.py index 581fa973811..56341fa4898 100644 --- a/python/paddle/fluid/tests/unittests/test_fs_interface.py +++ b/python/paddle/fluid/tests/unittests/test_fs_interface.py @@ -24,6 +24,7 @@ from paddle.distributed.fleet.utils.fs import LocalFS, FS, HDFSClient, FSTimeOut class FSTest(unittest.TestCase): + def _test_method(self, func): if sys.version_info[0] <= 2: args = inspect.getargspec(func).args diff --git a/python/paddle/fluid/tests/unittests/test_fsp_op.py b/python/paddle/fluid/tests/unittests/test_fsp_op.py index 7864f4efcdf..0f7eb4ad95d 100644 --- a/python/paddle/fluid/tests/unittests/test_fsp_op.py +++ b/python/paddle/fluid/tests/unittests/test_fsp_op.py @@ -29,15 +29,16 @@ def fsp_matrix(a, b): a_t = a.transpose([0, 2, 3, 1]) a_t = a_t.reshape([batch, h * w, a_channel]) b_t = b.transpose([0, 2, 3, 1]).reshape([batch, h * w, b_channel]) - a_r = a_t.repeat( - b_channel, axis=1).reshape( - [batch, h * w, b_channel, a_channel]).transpose([0, 1, 3, 2]) - b_r = b_t.repeat( - a_channel, axis=1).reshape([batch, h * w, a_channel, b_channel]) + a_r = a_t.repeat(b_channel, + axis=1).reshape([batch, h * w, b_channel, + a_channel]).transpose([0, 1, 3, 2]) + b_r = b_t.repeat(a_channel, + axis=1).reshape([batch, h * w, a_channel, b_channel]) return np.mean(a_r * b_r, axis=1) class TestFSPOp(OpTest): + def setUp(self): self.op_type = "fsp" self.initTestCase() @@ -60,22 +61,25 @@ class TestFSPOp(OpTest): class BadInputTest(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): def test_bad_x(): data = fluid.layers.data(name='data', shape=[3, 32, 32]) feature_map_0 = [1, 2, 3] - feature_map_1 = fluid.layers.conv2d( - data, num_filters=2, filter_size=3) + feature_map_1 = fluid.layers.conv2d(data, + num_filters=2, + filter_size=3) loss = fluid.layers.fsp_matrix(feature_map_0, feature_map_1) self.assertRaises(TypeError, test_bad_x) def test_bad_y(): data = fluid.layers.data(name='data', shape=[3, 32, 32]) - feature_map_0 = fluid.layers.conv2d( - data, num_filters=2, filter_size=3) + feature_map_0 = fluid.layers.conv2d(data, + num_filters=2, + filter_size=3) feature_map_1 = [1, 2, 3] loss = fluid.layers.fsp_matrix(feature_map_0, feature_map_1) diff --git a/python/paddle/fluid/tests/unittests/test_ftrl_op.py b/python/paddle/fluid/tests/unittests/test_ftrl_op.py index 1826fdc3c06..d35f6dadac6 100644 --- a/python/paddle/fluid/tests/unittests/test_ftrl_op.py +++ b/python/paddle/fluid/tests/unittests/test_ftrl_op.py @@ -35,8 +35,8 @@ def ftrl_step(param, grad, rows, sq_accum, lin_accum, lr, l1, l2, lr_power): (np.sqrt(new_accum) - np.sqrt(sq_accum_hit)) / lr) * param_hit else: lin_accum_updated = lin_accum_hit + grad - ( - (np.power(new_accum, -lr_power) - np.power(sq_accum_hit, -lr_power) - ) / lr) * param_hit + (np.power(new_accum, -lr_power) - np.power(sq_accum_hit, -lr_power)) + / lr) * param_hit x = l1 * np.sign(lin_accum_updated) - lin_accum_updated if lr_power == -0.5: @@ -65,6 +65,7 @@ def ftrl_step(param, grad, rows, sq_accum, lin_accum, lr, l1, l2, lr_power): class TestFTRLOp(OpTest): + def setUp(self): self.op_type = "ftrl" rows = 102 @@ -105,6 +106,7 @@ class TestFTRLOp(OpTest): class TestSparseFTRLOp(unittest.TestCase): + def setUp(self): self.lr_power = -0.5 @@ -154,19 +156,18 @@ class TestSparseFTRLOp(unittest.TestCase): l1, l2, lr_power) # create and run operator - op = Operator( - "ftrl", - Param='Param', - Grad='Grad', - ParamOut='Param', - SquaredAccumulator='SquaredAccumulator', - SquaredAccumOut='SquaredAccumulator', - LinearAccumulator='LinearAccumulator', - LinearAccumOut='LinearAccumulator', - LearningRate='LearningRate', - l1=l1, - l2=l2, - lr_power=lr_power) + op = Operator("ftrl", + Param='Param', + Grad='Grad', + ParamOut='Param', + SquaredAccumulator='SquaredAccumulator', + SquaredAccumOut='SquaredAccumulator', + LinearAccumulator='LinearAccumulator', + LinearAccumOut='LinearAccumulator', + LearningRate='LearningRate', + l1=l1, + l2=l2, + lr_power=lr_power) op.run(scope, place) @@ -177,12 +178,15 @@ class TestSparseFTRLOp(unittest.TestCase): for i in range(height): for j in range(row_numel): - self.assertAlmostEqual( - param_out[i][j], param_array[i][j], places=4) - self.assertAlmostEqual( - sq_accum_out[i][j], sq_accum_array[i][j], places=4) - self.assertAlmostEqual( - lin_accum_out[i][j], lin_accum_array[i][j], places=4) + self.assertAlmostEqual(param_out[i][j], + param_array[i][j], + places=4) + self.assertAlmostEqual(sq_accum_out[i][j], + sq_accum_array[i][j], + places=4) + self.assertAlmostEqual(lin_accum_out[i][j], + lin_accum_array[i][j], + places=4) def init_kernel(self): pass @@ -196,6 +200,7 @@ class TestSparseFTRLOp(unittest.TestCase): class TestSparseFTRLOp2(TestSparseFTRLOp): + def init_kernel(self): self.lr_power = -0.6 diff --git a/python/paddle/fluid/tests/unittests/test_full_like_op.py b/python/paddle/fluid/tests/unittests/test_full_like_op.py index d3fea677a47..7a55125d1b4 100644 --- a/python/paddle/fluid/tests/unittests/test_full_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_full_like_op.py @@ -33,8 +33,9 @@ class TestFullOp(unittest.TestCase): train_program = Program() with program_guard(train_program, startup_program): fill_value = 2.0 - input = paddle.fluid.data( - name='input', dtype='float32', shape=[2, 3]) + input = paddle.fluid.data(name='input', + dtype='float32', + shape=[2, 3]) output = paddle.full_like(input, fill_value) output_dtype = paddle.full_like(input, fill_value, dtype='float32') @@ -51,9 +52,9 @@ class TestFullOp(unittest.TestCase): fetch_list=[output]) out_np = np.array(res[0]) - self.assertTrue( - not (out_np - np.full_like(img, fill_value)).any(), - msg="full_like output is wrong, out = " + str(out_np)) + self.assertTrue(not (out_np - np.full_like(img, fill_value)).any(), + msg="full_like output is wrong, out = " + + str(out_np)) def test_full_like_imperative(self): paddle.disable_static() @@ -75,23 +76,24 @@ class TestFullOp(unittest.TestCase): class TestFullOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): #for ci coverage - input_data = paddle.fluid.data( - name='input', dtype='float32', shape=[2, 3]) + input_data = paddle.fluid.data(name='input', + dtype='float32', + shape=[2, 3]) output = paddle.full_like(input_data, 2.0) def test_input_dtype(): paddle.full_like - self.assertRaises( - TypeError, - paddle.full_like, - x=input_data, - fill_value=2, - dtype='uint4') + self.assertRaises(TypeError, + paddle.full_like, + x=input_data, + fill_value=2, + dtype='uint4') class TestFullLikeOp1(OpTest): @@ -121,6 +123,7 @@ class TestFullLikeOp1(OpTest): class TestFullLikeOp2(TestFullLikeOp1): + def init_data(self): self.fill_value = 1000 self.shape = [1024, 1024] @@ -128,6 +131,7 @@ class TestFullLikeOp2(TestFullLikeOp1): class TestFullLikeOp3(TestFullLikeOp1): + def init_data(self): self.fill_value = 8888 self.shape = [5000, 5000] @@ -137,11 +141,12 @@ class TestFullLikeOp3(TestFullLikeOp1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFullLikeOp4(unittest.TestCase): + def test_skip_data_transform(self): paddle.disable_static() with _test_eager_guard(): - x = paddle.to_tensor( - [1., 2., 3., 4.], place=paddle.CUDAPinnedPlace()) + x = paddle.to_tensor([1., 2., 3., 4.], + place=paddle.CUDAPinnedPlace()) out = paddle.full_like(x, 1.) self.assertTrue( (out.numpy() == np.ones([4]).astype(np.float32)).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_full_op.py b/python/paddle/fluid/tests/unittests/test_full_op.py index 723c4609bc9..c0aba3ff366 100644 --- a/python/paddle/fluid/tests/unittests/test_full_op.py +++ b/python/paddle/fluid/tests/unittests/test_full_op.py @@ -28,36 +28,45 @@ from paddle.fluid.framework import _test_eager_guard # Test python API class TestFullAPI(unittest.TestCase): + def test_api(self): positive_2_int32 = fluid.layers.fill_constant([1], "int32", 2) positive_2_int64 = fluid.layers.fill_constant([1], "int64", 2) - shape_tensor_int32 = fluid.data( - name="shape_tensor_int32", shape=[2], dtype="int32") + shape_tensor_int32 = fluid.data(name="shape_tensor_int32", + shape=[2], + dtype="int32") - shape_tensor_int64 = fluid.data( - name="shape_tensor_int64", shape=[2], dtype="int64") + shape_tensor_int64 = fluid.data(name="shape_tensor_int64", + shape=[2], + dtype="int64") out_1 = paddle.full(shape=[1, 2], dtype="float32", fill_value=1.1) - out_2 = paddle.full( - shape=[1, positive_2_int32], dtype="float32", fill_value=1.1) + out_2 = paddle.full(shape=[1, positive_2_int32], + dtype="float32", + fill_value=1.1) - out_3 = paddle.full( - shape=[1, positive_2_int64], dtype="float32", fill_value=1.1) + out_3 = paddle.full(shape=[1, positive_2_int64], + dtype="float32", + fill_value=1.1) - out_4 = paddle.full( - shape=shape_tensor_int32, dtype="float32", fill_value=1.2) + out_4 = paddle.full(shape=shape_tensor_int32, + dtype="float32", + fill_value=1.2) - out_5 = paddle.full( - shape=shape_tensor_int64, dtype="float32", fill_value=1.1) + out_5 = paddle.full(shape=shape_tensor_int64, + dtype="float32", + fill_value=1.1) - out_6 = paddle.full( - shape=shape_tensor_int64, dtype=np.float32, fill_value=1.1) + out_6 = paddle.full(shape=shape_tensor_int64, + dtype=np.float32, + fill_value=1.1) val = fluid.layers.fill_constant(shape=[1], dtype=np.float32, value=1.1) - out_7 = paddle.full( - shape=shape_tensor_int64, dtype=np.float32, fill_value=val) + out_7 = paddle.full(shape=shape_tensor_int64, + dtype=np.float32, + fill_value=val) exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3, res_4, res_5, res_6, res_7 = exe.run( @@ -84,83 +93,83 @@ class TestFullAPI(unittest.TestCase): positive_4_int64 = fluid.layers.fill_constant([1], "int64", 4, True) - out_1 = paddle.full( - shape=[1, 2], dtype="float32", fill_value=1.1) + out_1 = paddle.full(shape=[1, 2], + dtype="float32", + fill_value=1.1) - out_2 = paddle.full( - shape=[1, positive_2_int32.item()], - dtype="float32", - fill_value=1.1) + out_2 = paddle.full(shape=[1, positive_2_int32.item()], + dtype="float32", + fill_value=1.1) - out_3 = paddle.full( - shape=[1, positive_2_int64.item()], - dtype="float32", - fill_value=1.1) + out_3 = paddle.full(shape=[1, positive_2_int64.item()], + dtype="float32", + fill_value=1.1) - out_4 = paddle.full( - shape=[1, 2], dtype="float32", fill_value=1.2) + out_4 = paddle.full(shape=[1, 2], + dtype="float32", + fill_value=1.2) - out_5 = paddle.full( - shape=[1, 2], dtype="float32", fill_value=1.1) + out_5 = paddle.full(shape=[1, 2], + dtype="float32", + fill_value=1.1) - out_6 = paddle.full( - shape=[1, 2], dtype=np.float32, fill_value=1.1) + out_6 = paddle.full(shape=[1, 2], + dtype=np.float32, + fill_value=1.1) - val = fluid.layers.fill_constant( - shape=[1], dtype=np.float32, value=1.1) - out_7 = paddle.full( - shape=[1, 2], dtype=np.float32, fill_value=val) + val = fluid.layers.fill_constant(shape=[1], + dtype=np.float32, + value=1.1) + out_7 = paddle.full(shape=[1, 2], + dtype=np.float32, + fill_value=val) - out_8 = paddle.full( - shape=positive_2_int32, dtype="float32", fill_value=1.1) + out_8 = paddle.full(shape=positive_2_int32, + dtype="float32", + fill_value=1.1) - out_9 = paddle.full( - shape=[ - positive_2_int32, positive_2_int64, positive_4_int64 - ], - dtype="float32", - fill_value=1.1) + out_9 = paddle.full(shape=[ + positive_2_int32, positive_2_int64, positive_4_int64 + ], + dtype="float32", + fill_value=1.1) # test for numpy.float64 as fill_value - out_10 = paddle.full_like( - out_7, dtype=np.float32, fill_value=np.abs(1.1)) - - assert np.array_equal( - out_1, np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - out_2, np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - out_3, np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - out_4, np.full( - [1, 2], 1.2, dtype="float32")) - assert np.array_equal( - out_5, np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - out_6, np.full( - [1, 2], 1.1, dtype="float32")) - assert np.array_equal( - out_7, np.full( - [1, 2], 1.1, dtype="float32")) + out_10 = paddle.full_like(out_7, + dtype=np.float32, + fill_value=np.abs(1.1)) + + assert np.array_equal(out_1, + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(out_2, + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(out_3, + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(out_4, + np.full([1, 2], 1.2, dtype="float32")) + assert np.array_equal(out_5, + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(out_6, + np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(out_7, + np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(out_8, np.full([2], 1.1, dtype="float32")) - assert np.array_equal( - out_9, np.full( - [2, 2, 4], 1.1, dtype="float32")) - assert np.array_equal( - out_10, np.full( - [1, 2], 1.1, dtype="float32")) + assert np.array_equal(out_9, + np.full([2, 2, 4], 1.1, dtype="float32")) + assert np.array_equal(out_10, + np.full([1, 2], 1.1, dtype="float32")) class TestFullOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): #for ci coverage - self.assertRaises( - TypeError, paddle.full, shape=[1], fill_value=5, dtype='uint4') + self.assertRaises(TypeError, + paddle.full, + shape=[1], + fill_value=5, + dtype='uint4') # The argument dtype of full must be one of bool, float16, #float32, float64, uint8, int16, int32 or int64 @@ -179,15 +188,17 @@ class TestFullOpError(unittest.TestCase): # The shape dtype of full op must be int32 or int64. def test_shape_tensor_dtype(): - shape = fluid.data( - name="shape_tensor", shape=[2], dtype="float32") + shape = fluid.data(name="shape_tensor", + shape=[2], + dtype="float32") paddle.full(shape=shape, dtype="float32", fill_value=1) self.assertRaises(TypeError, test_shape_tensor_dtype) def test_shape_tensor_list_dtype(): - shape = fluid.data( - name="shape_tensor_list", shape=[1], dtype="bool") + shape = fluid.data(name="shape_tensor_list", + shape=[1], + dtype="bool") paddle.full(shape=[shape, 2], dtype="float32", fill_value=1) self.assertRaises(TypeError, test_shape_tensor_list_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_function_hook.py b/python/paddle/fluid/tests/unittests/test_function_hook.py index 55981b01c40..8c88ee06c1e 100644 --- a/python/paddle/fluid/tests/unittests/test_function_hook.py +++ b/python/paddle/fluid/tests/unittests/test_function_hook.py @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestCapture: + def __init__(self): self.list = [] @@ -42,6 +43,7 @@ def grad_hook(grad): class TestBakcwardFunctionHookError(unittest.TestCase): + def func_hook(self): input_data = np.ones([4, 4]).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv1d.py b/python/paddle/fluid/tests/unittests/test_functional_conv1d.py index b803835d107..88dd98f1f3a 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv1d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv1d.py @@ -23,6 +23,7 @@ from unittest import TestCase class TestFunctionalConv1DError(TestCase): + def setUp(self): self.input = [] self.filter = [] @@ -39,15 +40,14 @@ class TestFunctionalConv1DError(TestCase): w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) - y = F.conv1d( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv1d(x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) def test_exception(self): with self.assertRaises(ValueError): @@ -55,6 +55,7 @@ class TestFunctionalConv1DError(TestCase): class TestFunctionalConv1DErrorCase1(TestFunctionalConv1DError): + def setUp(self): self.input = np.random.randn(1, 3, 3) self.filter = np.random.randn(3, 3, 1) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py index 4284ab48827..3d719de3674 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py @@ -23,6 +23,7 @@ from unittest import TestCase class TestFunctionalConv1DError(TestCase): + def setUp(self): self.input = [] self.filter = [] @@ -39,15 +40,14 @@ class TestFunctionalConv1DError(TestCase): w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) - y = F.conv1d_transpose( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv1d_transpose(x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) def test_exception(self): with self.assertRaises(ValueError): @@ -55,6 +55,7 @@ class TestFunctionalConv1DError(TestCase): class TestFunctionalConv1DErrorCase1(TestFunctionalConv1DError): + def setUp(self): self.input = np.random.randn(1, 3, 3) self.filter = np.random.randn(3, 3, 1) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py index 8e0a744ecdb..6c0f526f236 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py @@ -46,19 +46,19 @@ class TestFunctionalConv2D(TestCase): filter_shape = tuple(self.filter_shape) self.weight = np.random.uniform( - -1, 1, (self.out_channels, self.in_channels // self.groups - ) + filter_shape).astype(self.dtype) + -1, 1, (self.out_channels, self.in_channels // self.groups) + + filter_shape).astype(self.dtype) if not self.no_bias: - self.bias = np.random.uniform(-1, 1, ( - self.out_channels, )).astype(self.dtype) + self.bias = np.random.uniform(-1, 1, (self.out_channels, )).astype( + self.dtype) self.channel_last = (self.data_format == "NHWC") if self.channel_last: self.input_shape = (self.batch_size, ) + self.spatial_shape + ( self.in_channels, ) else: - self.input_shape = (self.batch_size, self.in_channels - ) + self.spatial_shape + self.input_shape = (self.batch_size, + self.in_channels) + self.spatial_shape self.input = np.random.uniform(-1, 1, self.input_shape).astype(self.dtype) @@ -69,13 +69,11 @@ class TestFunctionalConv2D(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = fluid.data( - "input", (-1, -1, -1, self.in_channels), - dtype=self.dtype) + x = fluid.data("input", (-1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1), - dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) y = fluid.layers.conv2d( x, self.out_channels, @@ -100,26 +98,24 @@ class TestFunctionalConv2D(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", (-1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight.shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight.shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv2d( - x, - weight, - None if self.no_bias else bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d(x, + weight, + None if self.no_bias else bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) @@ -137,15 +133,14 @@ class TestFunctionalConv2D(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv2d( - x, - weight, - bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d(x, + weight, + bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) @@ -199,8 +194,8 @@ class TestFunctionalConv2DError(TestCase): filter_shape = (self.filter_shape, ) * 2 else: filter_shape = tuple(self.filter_shape) - self.weight_shape = (self.out_channels, self.in_channels // self.groups - ) + filter_shape + self.weight_shape = (self.out_channels, + self.in_channels // self.groups) + filter_shape self.bias_shape = (self.out_channels, ) def static_graph_case(self): @@ -210,29 +205,28 @@ class TestFunctionalConv2DError(TestCase): with fluid.program_guard(main, start): self.channel_last = self.data_format == "NHWC" if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", (-1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv2d( - x, - weight, - None if self.no_bias else bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d(x, + weight, + None if self.no_bias else bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) class TestFunctionalConv2DCase2(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -248,6 +242,7 @@ class TestFunctionalConv2DCase2(TestFunctionalConv2D): class TestFunctionalConv2DCase3(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -263,6 +258,7 @@ class TestFunctionalConv2DCase3(TestFunctionalConv2D): class TestFunctionalConv2DCase4(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -278,6 +274,7 @@ class TestFunctionalConv2DCase4(TestFunctionalConv2D): class TestFunctionalConv2DCase5(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -293,6 +290,7 @@ class TestFunctionalConv2DCase5(TestFunctionalConv2D): class TestFunctionalConv2DCase6(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -308,6 +306,7 @@ class TestFunctionalConv2DCase6(TestFunctionalConv2D): class TestFunctionalConv2DCase7(TestFunctionalConv2D): + def setUp(self): self.in_channels = 6 self.out_channels = 8 @@ -323,6 +322,7 @@ class TestFunctionalConv2DCase7(TestFunctionalConv2D): class TestFunctionalConv2DCase8(TestFunctionalConv2D): + def setUp(self): self.in_channels = 6 self.out_channels = 12 @@ -338,6 +338,7 @@ class TestFunctionalConv2DCase8(TestFunctionalConv2D): class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -353,6 +354,7 @@ class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 4 @@ -368,6 +370,7 @@ class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 4 self.out_channels = 3 @@ -383,6 +386,7 @@ class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -398,6 +402,7 @@ class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -413,6 +418,7 @@ class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): + def setUp(self): self.in_channels = -5 self.out_channels = 5 @@ -428,6 +434,7 @@ class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase10(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 4 @@ -443,6 +450,7 @@ class TestFunctionalConv2DErrorCase10(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase11(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -458,6 +466,7 @@ class TestFunctionalConv2DErrorCase11(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase12(TestCase): + def setUp(self): self.input = np.array([]) self.filter = np.array([]) @@ -476,19 +485,19 @@ class TestFunctionalConv2DErrorCase12(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): x = fluid.data("input", self.input.shape, dtype=paddle.float32) - y = fluid.layers.conv2d( - x, - self.num_filters, - self.filter_size, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), - bias_attr=False if self.bias is None else - I.NumpyArrayInitializer(self.bias), - act=None, - data_format=self.data_format) + y = fluid.layers.conv2d(x, + self.num_filters, + self.filter_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + param_attr=I.NumpyArrayInitializer( + self.filter), + bias_attr=False if self.bias is None + else I.NumpyArrayInitializer(self.bias), + act=None, + data_format=self.data_format) exe = fluid.Executor() exe.run(start) out, = exe.run(main, feed={"input": self.input}, fetch_list=[y]) @@ -500,15 +509,14 @@ class TestFunctionalConv2DErrorCase12(TestCase): w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) - y = F.conv2d( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d(x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) def test_dygraph_exception(self): with self.assertRaises(ValueError): @@ -520,6 +528,7 @@ class TestFunctionalConv2DErrorCase12(TestCase): class TestFunctionalConv2DErrorCase13(TestFunctionalConv2DErrorCase12): + def setUp(self): self.input = np.random.randn(1, 3, 3, 3) self.filter = np.random.randn(3, 3, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py index 781169d70c1..d1b9c689257 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py @@ -47,19 +47,19 @@ class TestFunctionalConv2D(TestCase): filter_shape = tuple(self.filter_shape) self.weight = np.random.uniform( - -1, 1, (self.in_channels, self.out_channels // self.groups - ) + filter_shape).astype(self.dtype) + -1, 1, (self.in_channels, self.out_channels // self.groups) + + filter_shape).astype(self.dtype) if not self.no_bias: - self.bias = np.random.uniform(-1, 1, ( - self.out_channels, )).astype(self.dtype) + self.bias = np.random.uniform(-1, 1, (self.out_channels, )).astype( + self.dtype) self.channel_last = (self.data_format == "NHWC") if self.channel_last: self.input_shape = (self.batch_size, ) + self.spatial_shape + ( self.in_channels, ) else: - self.input_shape = (self.batch_size, self.in_channels - ) + self.spatial_shape + self.input_shape = (self.batch_size, + self.in_channels) + self.spatial_shape self.input = np.random.uniform(-1, 1, self.input_shape).astype(self.dtype) @@ -70,13 +70,11 @@ class TestFunctionalConv2D(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = fluid.data( - "input", (-1, -1, -1, self.in_channels), - dtype=self.dtype) + x = fluid.data("input", (-1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1), - dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) y = fluid.layers.conv2d_transpose( x, self.out_channels, @@ -101,27 +99,25 @@ class TestFunctionalConv2D(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", (-1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight.shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight.shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv2d_transpose( - x, - weight, - None if self.no_bias else bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d_transpose(x, + weight, + None if self.no_bias else bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -135,16 +131,15 @@ class TestFunctionalConv2D(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv2d_transpose( - x, - weight, - bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d_transpose(x, + weight, + bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) out = y.numpy() return out @@ -204,8 +199,8 @@ class TestFunctionalConv2DError(TestCase): filter_shape = (self.filter_shape, ) * 2 else: filter_shape = tuple(self.filter_shape) - self.weight_shape = (self.in_channels, self.out_channels // self.groups - ) + filter_shape + self.weight_shape = (self.in_channels, + self.out_channels // self.groups) + filter_shape self.bias_shape = (self.out_channels, ) def static_graph_case(self): @@ -215,30 +210,29 @@ class TestFunctionalConv2DError(TestCase): with fluid.program_guard(main, start): self.channel_last = self.data_format == "NHWC" if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", (-1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv2d_transpose( - x, - weight, - None if self.no_bias else bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d_transpose(x, + weight, + None if self.no_bias else bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) class TestFunctionalConv2DCase2(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -252,6 +246,7 @@ class TestFunctionalConv2DCase2(TestFunctionalConv2D): class TestFunctionalConv2DCase3(TestFunctionalConv2D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -265,6 +260,7 @@ class TestFunctionalConv2DCase3(TestFunctionalConv2D): class TestFunctionalConv2DCase4(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -278,6 +274,7 @@ class TestFunctionalConv2DCase4(TestFunctionalConv2D): class TestFunctionalConv2DCase5(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -291,6 +288,7 @@ class TestFunctionalConv2DCase5(TestFunctionalConv2D): class TestFunctionalConv2DCase6(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -304,6 +302,7 @@ class TestFunctionalConv2DCase6(TestFunctionalConv2D): class TestFunctionalConv2DCase7(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 4 @@ -317,6 +316,7 @@ class TestFunctionalConv2DCase7(TestFunctionalConv2D): class TestFunctionalConv2DCase8(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 4 @@ -331,6 +331,7 @@ class TestFunctionalConv2DCase8(TestFunctionalConv2D): class TestFunctionalConv2DCase9(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -344,6 +345,7 @@ class TestFunctionalConv2DCase9(TestFunctionalConv2D): class TestFunctionalConv2DCase10(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -357,6 +359,7 @@ class TestFunctionalConv2DCase10(TestFunctionalConv2D): class TestFunctionalConv2DCase11(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -370,6 +373,7 @@ class TestFunctionalConv2DCase11(TestFunctionalConv2D): class TestFunctionalConv2DCase12(TestFunctionalConv2D): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -383,6 +387,7 @@ class TestFunctionalConv2DCase12(TestFunctionalConv2D): class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -396,6 +401,7 @@ class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -409,6 +415,7 @@ class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -422,6 +429,7 @@ class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase5(TestFunctionalConv2DError): + def setUp(self): self.in_channels = -2 self.out_channels = 5 @@ -435,6 +443,7 @@ class TestFunctionalConv2DErrorCase5(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 4 self.out_channels = 5 @@ -449,6 +458,7 @@ class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 4 self.out_channels = 5 @@ -462,6 +472,7 @@ class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): + def setUp(self): self.in_channels = 3 self.out_channels = 4 @@ -475,6 +486,7 @@ class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): class TestFunctionalConv2DErrorCase10(TestCase): + def setUp(self): self.input = np.array([]) self.filter = np.array([]) @@ -493,19 +505,19 @@ class TestFunctionalConv2DErrorCase10(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): x = fluid.data("input", self.input.shape, dtype=paddle.float32) - y = fluid.layers.conv2d( - x, - self.num_filters, - self.filter_size, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), - bias_attr=False if self.bias is None else - I.NumpyArrayInitializer(self.bias), - act=None, - data_format=self.data_format) + y = fluid.layers.conv2d(x, + self.num_filters, + self.filter_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + param_attr=I.NumpyArrayInitializer( + self.filter), + bias_attr=False if self.bias is None + else I.NumpyArrayInitializer(self.bias), + act=None, + data_format=self.data_format) exe = fluid.Executor() exe.run(start) out, = exe.run(main, feed={"input": self.input}, fetch_list=[y]) @@ -517,15 +529,14 @@ class TestFunctionalConv2DErrorCase10(TestCase): w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) - y = F.conv2d_transpose( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv2d_transpose(x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) def test_dygraph_exception(self): with self.assertRaises(ValueError): @@ -541,6 +552,7 @@ class TestFunctionalConv2DErrorCase10(TestCase): class TestFunctionalConv2DErrorCase11(TestFunctionalConv2DErrorCase10): + def setUp(self): self.input = np.random.randn(1, 3, 3, 3) self.filter = np.random.randn(3, 3, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py index 6c208160658..9ecbf2bf46c 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py @@ -46,19 +46,19 @@ class TestFunctionalConv3D(TestCase): filter_shape = tuple(self.filter_shape) self.weight = np.random.uniform( - -1, 1, (self.out_channels, self.in_channels // self.groups - ) + filter_shape).astype(self.dtype) + -1, 1, (self.out_channels, self.in_channels // self.groups) + + filter_shape).astype(self.dtype) if not self.no_bias: - self.bias = np.random.uniform(-1, 1, ( - self.out_channels, )).astype(self.dtype) + self.bias = np.random.uniform(-1, 1, (self.out_channels, )).astype( + self.dtype) self.channel_last = (self.data_format == "NDHWC") if self.channel_last: self.input_shape = (self.batch_size, ) + self.spatial_shape + ( self.in_channels, ) else: - self.input_shape = (self.batch_size, self.in_channels - ) + self.spatial_shape + self.input_shape = (self.batch_size, + self.in_channels) + self.spatial_shape self.input = np.random.uniform(-1, 1, self.input_shape).astype(self.dtype) @@ -69,13 +69,11 @@ class TestFunctionalConv3D(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = fluid.data( - "input", (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype) + x = fluid.data("input", (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype) y = fluid.layers.conv3d( x, self.out_channels, @@ -100,26 +98,25 @@ class TestFunctionalConv3D(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight.shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight.shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv3d( - x, - weight, - None if self.no_bias else bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d(x, + weight, + None if self.no_bias else bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) @@ -137,15 +134,14 @@ class TestFunctionalConv3D(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv3d( - x, - weight, - bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d(x, + weight, + bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) @@ -199,8 +195,8 @@ class TestFunctionalConv3DError(TestCase): filter_shape = (self.filter_shape, ) * 3 else: filter_shape = tuple(self.filter_shape) - self.weight_shape = (self.out_channels, self.in_channels // self.groups - ) + filter_shape + self.weight_shape = (self.out_channels, + self.in_channels // self.groups) + filter_shape self.bias_shape = (self.out_channels, ) def static_graph_case(self): @@ -210,32 +206,32 @@ class TestFunctionalConv3DError(TestCase): with fluid.program_guard(main, start): self.channel_last = self.data_format == "NDHWC" if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv3d( - x, - weight, - None if self.no_bias else bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d(x, + weight, + None if self.no_bias else bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) class TestFunctionalConv3DCase2(TestFunctionalConv3D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -250,6 +246,7 @@ class TestFunctionalConv3DCase2(TestFunctionalConv3D): class TestFunctionalConv3DCase3(TestFunctionalConv3D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -264,6 +261,7 @@ class TestFunctionalConv3DCase3(TestFunctionalConv3D): class TestFunctionalConv3DCase4(TestFunctionalConv3D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -278,6 +276,7 @@ class TestFunctionalConv3DCase4(TestFunctionalConv3D): class TestFunctionalConv3DCase5(TestFunctionalConv3D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -292,6 +291,7 @@ class TestFunctionalConv3DCase5(TestFunctionalConv3D): class TestFunctionalConv3DCase6(TestFunctionalConv3D): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -306,6 +306,7 @@ class TestFunctionalConv3DCase6(TestFunctionalConv3D): class TestFunctionalConv3DCase7(TestFunctionalConv3D): + def setUp(self): self.in_channels = 6 self.out_channels = 8 @@ -320,6 +321,7 @@ class TestFunctionalConv3DCase7(TestFunctionalConv3D): class TestFunctionalConv3DCase8(TestFunctionalConv3D): + def setUp(self): self.in_channels = 6 self.out_channels = 12 @@ -335,6 +337,7 @@ class TestFunctionalConv3DCase8(TestFunctionalConv3D): class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -349,6 +352,7 @@ class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError): + def setUp(self): self.in_channels = 3 self.out_channels = 4 @@ -363,6 +367,7 @@ class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError): + def setUp(self): self.in_channels = 4 self.out_channels = 3 @@ -377,6 +382,7 @@ class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -391,6 +397,7 @@ class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -405,6 +412,7 @@ class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError): + def setUp(self): self.in_channels = -5 self.out_channels = 5 @@ -419,6 +427,7 @@ class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError): + def setUp(self): self.in_channels = 3 self.out_channels = 4 @@ -433,6 +442,7 @@ class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError): class TestFunctionalConv3DErrorCase11(TestCase): + def setUp(self): self.input = np.array([]) self.filter = np.array([]) @@ -451,19 +461,19 @@ class TestFunctionalConv3DErrorCase11(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): x = fluid.data("input", self.input.shape, dtype=paddle.float32) - y = fluid.layers.conv3d( - x, - self.num_filters, - self.filter_size, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), - bias_attr=False if self.bias is None else - I.NumpyArrayInitializer(self.bias), - act=None, - data_format=self.data_format) + y = fluid.layers.conv3d(x, + self.num_filters, + self.filter_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + param_attr=I.NumpyArrayInitializer( + self.filter), + bias_attr=False if self.bias is None + else I.NumpyArrayInitializer(self.bias), + act=None, + data_format=self.data_format) exe = fluid.Executor() exe.run(start) out, = exe.run(main, feed={"input": self.input}, fetch_list=[y]) @@ -475,15 +485,14 @@ class TestFunctionalConv3DErrorCase11(TestCase): w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) - y = F.conv3d( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d(x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) def test_dygraph_exception(self): with self.assertRaises(ValueError): @@ -495,6 +504,7 @@ class TestFunctionalConv3DErrorCase11(TestCase): class TestFunctionalConv3DErrorCase12(TestFunctionalConv3DErrorCase11): + def setUp(self): self.input = np.random.randn(1, 3, 3, 3, 3) self.filter = np.random.randn(3, 3, 1, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py index 6f25d65aac2..0190779a021 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py @@ -48,19 +48,19 @@ class TestFunctionalConv3DTranspose(TestCase): filter_shape = tuple(self.filter_shape) self.weight = np.random.uniform( - -1, 1, (self.in_channels, self.out_channels // self.groups - ) + filter_shape).astype(self.dtype) + -1, 1, (self.in_channels, self.out_channels // self.groups) + + filter_shape).astype(self.dtype) if not self.no_bias: - self.bias = np.random.uniform(-1, 1, ( - self.out_channels, )).astype(self.dtype) + self.bias = np.random.uniform(-1, 1, (self.out_channels, )).astype( + self.dtype) self.channel_last = (self.data_format == "NDHWC") if self.channel_last: self.input_shape = (self.batch_size, ) + self.spatial_shape + ( self.in_channels, ) else: - self.input_shape = (self.batch_size, self.in_channels - ) + self.spatial_shape + self.input_shape = (self.batch_size, + self.in_channels) + self.spatial_shape self.input = np.random.uniform(-1, 1, self.input_shape).astype(self.dtype) @@ -71,13 +71,11 @@ class TestFunctionalConv3DTranspose(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = fluid.data( - "input", (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype) + x = fluid.data("input", (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype) y = fluid.layers.conv3d_transpose( x, self.out_channels, @@ -103,27 +101,26 @@ class TestFunctionalConv3DTranspose(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight.shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight.shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv3d_transpose( - x, - weight, - None if self.no_bias else bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d_transpose(x, + weight, + None if self.no_bias else bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) exe = fluid.Executor(self.place) @@ -139,16 +136,15 @@ class TestFunctionalConv3DTranspose(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv3d_transpose( - x, - weight, - bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d_transpose(x, + weight, + bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) out = y.numpy() @@ -211,8 +207,8 @@ class TestFunctionalConv3DTransposeError(TestCase): filter_shape = (self.filter_shape, ) * 3 else: filter_shape = tuple(self.filter_shape) - self.weight_shape = (self.in_channels, self.out_channels // self.groups - ) + filter_shape + self.weight_shape = (self.in_channels, + self.out_channels // self.groups) + filter_shape self.bias_shape = (self.out_channels, ) def static_graph_case(self): @@ -222,32 +218,32 @@ class TestFunctionalConv3DTransposeError(TestCase): with fluid.program_guard(main, start): self.channel_last = self.data_format == "NDHWC" if self.channel_last: - x = x = fluid.data( - "input", (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype) + x = x = fluid.data("input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype) else: - x = fluid.data( - "input", (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype) - weight = fluid.data( - "weight", self.weight_shape, dtype=self.dtype) + x = fluid.data("input", (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype) + weight = fluid.data("weight", + self.weight_shape, + dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv3d_transpose( - x, - weight, - None if self.no_bias else bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d_transpose(x, + weight, + None if self.no_bias else bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) if self.act == 'sigmoid': y = F.sigmoid(y) class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -262,6 +258,7 @@ class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -276,6 +273,7 @@ class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -290,6 +288,7 @@ class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -304,6 +303,7 @@ class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 4 @@ -318,6 +318,7 @@ class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 4 @@ -333,6 +334,7 @@ class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -347,6 +349,7 @@ class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -361,6 +364,7 @@ class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -375,6 +379,7 @@ class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose): class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose): + def setUp(self): self.in_channels = 4 self.out_channels = 6 @@ -388,8 +393,9 @@ class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose): self.data_format = "NCDHW" -class TestFunctionalConv3DTransposeErrorCase2( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase2(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -403,8 +409,9 @@ class TestFunctionalConv3DTransposeErrorCase2( self.data_format = "NDHWC" -class TestFunctionalConv3DTransposeErrorCase3( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase3(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -418,8 +425,9 @@ class TestFunctionalConv3DTransposeErrorCase3( self.data_format = "NDHWC" -class TestFunctionalConv3DTransposeErrorCase4( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase4(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = 3 self.out_channels = 5 @@ -433,8 +441,9 @@ class TestFunctionalConv3DTransposeErrorCase4( self.data_format = "NCDHW" -class TestFunctionalConv3DTransposeErrorCase5( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase5(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = -2 self.out_channels = 5 @@ -448,8 +457,9 @@ class TestFunctionalConv3DTransposeErrorCase5( self.data_format = "NCDHW" -class TestFunctionalConv3DTransposeErrorCase7( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase7(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = 4 self.out_channels = 5 @@ -464,8 +474,9 @@ class TestFunctionalConv3DTransposeErrorCase7( self.data_format = "NCDHW" -class TestFunctionalConv3DTransposeErrorCase8( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase8(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = 4 self.out_channels = 5 @@ -479,8 +490,9 @@ class TestFunctionalConv3DTransposeErrorCase8( self.data_format = "not_valid" -class TestFunctionalConv3DTransposeErrorCase9( - TestFunctionalConv3DTransposeError): +class TestFunctionalConv3DTransposeErrorCase9(TestFunctionalConv3DTransposeError + ): + def setUp(self): self.in_channels = 3 self.out_channels = 4 @@ -495,6 +507,7 @@ class TestFunctionalConv3DTransposeErrorCase9( class TestFunctionalConv3DTransposeErrorCase10(TestCase): + def setUp(self): self.input = np.array([]) self.filter = np.array([]) @@ -537,15 +550,14 @@ class TestFunctionalConv3DTransposeErrorCase10(TestCase): w = dg.to_variable(self.filter, dtype=paddle.float32) b = None if self.bias is None else dg.to_variable( self.bias, dtype=paddle.float32) - y = F.conv3d_transpose( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format) + y = F.conv3d_transpose(x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) def test_dygraph_exception(self): with self.assertRaises(ValueError): @@ -562,6 +574,7 @@ class TestFunctionalConv3DTransposeErrorCase10(TestCase): class TestFunctionalConv3DTransposeErrorCase11( TestFunctionalConv3DTransposeErrorCase10): + def setUp(self): self.input = np.random.randn(1, 3, 3, 3, 3) self.filter = np.random.randn(3, 3, 1, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py index e3a25661337..c3e8a51397f 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestFuseAllReduceOpsBase(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -77,13 +78,13 @@ class TestFuseAllReduceOpsBase(TestParallelExecutorBase): class TestFuseAllReduceOps(TestFuseAllReduceOpsBase): + def _decorate_compare_fused_all_reduce(self, model, use_device): - self.compare_fuse_all_reduce_ops( - model, - use_device, - init_feed_dict=init_data, - optimizer=self.optimizer, - fuse_all_optimizer_ops=True) + self.compare_fuse_all_reduce_ops(model, + use_device, + init_feed_dict=init_data, + optimizer=self.optimizer, + fuse_all_optimizer_ops=True) def test_simple_fc_with_fuse_all_reduce(self): self._decorate_compare_fused_all_reduce(simple_fc_net, DeviceType.CUDA) @@ -101,16 +102,17 @@ class TestFuseAllReduceOps(TestFuseAllReduceOpsBase): class TestFuseAllReduceOpsAndOptiOps(TestFuseAllReduceOps): + def _decorate_compare_fused_all_reduce(self, model, use_device): - self.compare_fuse_all_reduce_ops( - model, - use_device, - init_feed_dict=init_data, - optimizer=self.optimizer, - fuse_all_optimizer_ops=True) + self.compare_fuse_all_reduce_ops(model, + use_device, + init_feed_dict=init_data, + optimizer=self.optimizer, + fuse_all_optimizer_ops=True) class TestFuseAllReduceOpsWithSparseGrad(TestFuseAllReduceOpsBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py index 6a1700e758e..c8106db1330 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py @@ -18,34 +18,34 @@ import unittest class TestFuseBatchNormActPass(unittest.TestCase): + def build_program(self, main_program, startup_program, use_cuda, seed=1): with fluid.program_guard(main_program, startup_program): x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') y = fluid.layers.data(name="y", shape=[1], dtype='int64') - hidden1 = fluid.layers.conv2d( - input=x, - filter_size=3, - num_filters=16, - stride=1, - padding=1, - act=None, - bias_attr=False, - data_format='NHWC') + hidden1 = fluid.layers.conv2d(input=x, + filter_size=3, + num_filters=16, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') param_attr = fluid.ParamAttr( name='batch_norm_w', initializer=fluid.initializer.Constant(value=1.0)) bias_attr = fluid.ParamAttr( name='batch_norm_b', initializer=fluid.initializer.Constant(value=0.0)) - hidden2 = fluid.layers.batch_norm( - input=hidden1, - param_attr=param_attr, - bias_attr=bias_attr, - act='relu', - data_layout='NHWC') + hidden2 = fluid.layers.batch_norm(input=hidden1, + param_attr=param_attr, + bias_attr=bias_attr, + act='relu', + data_layout='NHWC') hidden3 = fluid.layers.fc(input=hidden2, size=32, act='relu') - hidden4 = fluid.layers.batch_norm( - input=hidden3, act='relu', data_layout='NHWC') + hidden4 = fluid.layers.batch_norm(input=hidden3, + act='relu', + data_layout='NHWC') prediction = fluid.layers.fc(input=hidden4, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=y) loss = fluid.layers.mean(loss) @@ -72,8 +72,8 @@ class TestFuseBatchNormActPass(unittest.TestCase): build_strategy.fuse_bn_act_ops = False binary = fluid.CompiledProgram(main_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size) loss_vals = [] scope = fluid.Scope() with fluid.scope_guard(scope): @@ -90,8 +90,8 @@ class TestFuseBatchNormActPass(unittest.TestCase): build_strategy_fused.fuse_bn_act_ops = True binary_fused = fluid.CompiledProgram(main_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy_fused) - train_reader_fused = paddle.batch( - paddle.dataset.mnist.train(), batch_size=batch_size) + train_reader_fused = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size) loss_vals_fused = [] scope_fused = fluid.Scope() with fluid.scope_guard(scope_fused): diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py index f4cb53b31c5..59b85530f10 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py @@ -27,6 +27,7 @@ paddle.enable_static() @unittest.skipIf(not core.is_compiled_with_cuda(), "Paddle core is not compiled with CUDA") class TestFusedBnAddActAPI(unittest.TestCase): + def setUp(self): self.conv_param_attr1 = fluid.ParamAttr( name='conv2d_1.weight', @@ -60,32 +61,29 @@ class TestFusedBnAddActAPI(unittest.TestCase): with fluid.program_guard(main_program, startup_program): x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') y = fluid.layers.data(name="y", shape=[1], dtype='int64') - conv1_1 = fluid.layers.conv2d( - input=x, - filter_size=3, - num_filters=32, - stride=1, - padding=1, - act=None, - param_attr=self.conv_param_attr1, - bias_attr=False, - data_format='NHWC') - conv1_2 = fluid.layers.conv2d( - input=x, - filter_size=3, - num_filters=32, - stride=1, - padding=1, - act=None, - param_attr=self.conv_param_attr2, - bias_attr=False, - data_format='NHWC') - bn = fluid.layers.batch_norm( - input=conv1_1, - param_attr=self.bn_param_attr1, - bias_attr=self.bn_bias_attr1, - act=None, - data_layout='NHWC') + conv1_1 = fluid.layers.conv2d(input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d(input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm(input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act( conv1_2, bn, @@ -112,37 +110,33 @@ class TestFusedBnAddActAPI(unittest.TestCase): with fluid.program_guard(main_program, startup_program): x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') y = fluid.layers.data(name="y", shape=[1], dtype='int64') - conv1_1 = fluid.layers.conv2d( - input=x, - filter_size=3, - num_filters=32, - stride=1, - padding=1, - act=None, - param_attr=self.conv_param_attr1, - bias_attr=False, - data_format='NHWC') - bn1 = fluid.layers.batch_norm( - input=conv1_1, - param_attr=self.bn_param_attr1, - bias_attr=self.bn_bias_attr1, - act=None, - data_layout='NHWC') - conv1_2 = fluid.layers.conv2d( - input=conv1_1, - filter_size=1, - num_filters=32, - stride=1, - act=None, - param_attr=self.conv_param_attr2, - bias_attr=False, - data_format='NHWC') - bn2 = fluid.layers.batch_norm( - input=conv1_1, - param_attr=self.bn_param_attr2, - bias_attr=self.bn_bias_attr2, - act=None, - data_layout='NHWC') + conv1_1 = fluid.layers.conv2d(input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + bn1 = fluid.layers.batch_norm(input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + conv1_2 = fluid.layers.conv2d(input=conv1_1, + filter_size=1, + num_filters=32, + stride=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn2 = fluid.layers.batch_norm(input=conv1_1, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2, + act=None, + data_layout='NHWC') out = bn1 + bn2 out = fluid.layers.relu(out) prediction = fluid.layers.fc(input=out, @@ -186,8 +180,10 @@ class TestFusedBnAddActAPI(unittest.TestCase): x_data.append(x) y_data.append(y) loss_v = exe.run(binary_fused, - feed={"x": x, - "y": y}, + feed={ + "x": x, + "y": y + }, fetch_list=[loss]) loss_vals_fused.append(loss_v[0][0]) @@ -202,8 +198,10 @@ class TestFusedBnAddActAPI(unittest.TestCase): exe.run(startup_program) for i in range(iters): loss_v = exe.run(binary, - feed={"x": x_data[i], - "y": y_data[i]}, + feed={ + "x": x_data[i], + "y": y_data[i] + }, fetch_list=[loss]) loss_vals.append(loss_v[0][0]) @@ -220,8 +218,9 @@ class TestFusedBnAddActAPI(unittest.TestCase): main_program = fluid.Program() startup_program = fluid.Program() place = fluid.CUDAPlace(0) - x, y, loss = self.build_fused_program( - main_program, startup_program, use_cuda=True) + x, y, loss = self.build_fused_program(main_program, + startup_program, + use_cuda=True) exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): @@ -230,8 +229,10 @@ class TestFusedBnAddActAPI(unittest.TestCase): x = np.random.random((4, 1, 28, 28)).astype("float32") y = np.random.random((4, 1)).astype("int64") loss_v = exe.run(main_program, - feed={"x": x, - "y": y}, + feed={ + "x": x, + "y": y + }, fetch_list=[loss]) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py index 6c3fa9e61d2..97fa40a89de 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py @@ -21,6 +21,7 @@ import os class TestMNIST(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -43,8 +44,10 @@ class TestMNIST(TestParallelExecutorBase): # add enable_inplace=False here to force pass the unittest not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_elewise_add_act_ops=False, use_ir_memory_optimize=False, @@ -52,8 +55,10 @@ class TestMNIST(TestParallelExecutorBase): optimizer=_optimizer) fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_elewise_add_act_ops=True, use_ir_memory_optimize=False, diff --git a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py index 00d91b1fab0..29bfca4dd78 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py @@ -47,6 +47,7 @@ def verify_node_count(graph, node_name, target_count): class MultiFCLayer(paddle.nn.Layer): + def __init__(self, hidden, Activation): super(MultiFCLayer, self).__init__() self.linear1 = paddle.nn.Linear(hidden, 4 * hidden) @@ -76,6 +77,7 @@ class MultiFCLayer(paddle.nn.Layer): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueFWDBase(unittest.TestCase): + def setUp(self): self.batch = 64 self.seqlen = 128 @@ -87,16 +89,17 @@ class TestFuseGemmEpilogueFWDBase(unittest.TestCase): self.startup_prog = paddle.static.Program() with paddle.static.program_guard(self.main_prog, self.startup_prog): - data = paddle.static.data( - name="_data", - shape=[-1, self.seqlen, self.hidden], - dtype='float32') - matmul_y = paddle.static.data( - name="_matmul_y", - shape=[1, self.hidden, self.hidden], - dtype='float32') - ele_y = paddle.static.data( - name="_ele_y", shape=[self.hidden, ], dtype='float32') + data = paddle.static.data(name="_data", + shape=[-1, self.seqlen, self.hidden], + dtype='float32') + matmul_y = paddle.static.data(name="_matmul_y", + shape=[1, self.hidden, self.hidden], + dtype='float32') + ele_y = paddle.static.data(name="_ele_y", + shape=[ + self.hidden, + ], + dtype='float32') multi_layer = MultiFCLayer(self.hidden, self._get_act_type()[0]) with paddle.static.amp.fp16_guard(): @@ -131,10 +134,9 @@ class TestFuseGemmEpilogueFWDBase(unittest.TestCase): build_strategy = paddle.static.BuildStrategy() build_strategy.fuse_gemm_epilogue = True program = paddle.static.CompiledProgram(self.main_prog) - program = program.with_data_parallel( - loss_name=self.loss.name, - build_strategy=build_strategy, - places=paddle.static.cuda_places()) + program = program.with_data_parallel(loss_name=self.loss.name, + build_strategy=build_strategy, + places=paddle.static.cuda_places()) result = self.exe.run(program, feed=self.feed, @@ -144,8 +146,8 @@ class TestFuseGemmEpilogueFWDBase(unittest.TestCase): "[{}] outputs are miss-matched.".format(type(self).__name__)) self.assertTrue( verify_node_count(program._graph, "fused_gemm_epilogue", 3), - "[{}] The number of fused_gemm_epilogue is miss-matched in the computing graph.". - format(type(self).__name__)) + "[{}] The number of fused_gemm_epilogue is miss-matched in the computing graph." + .format(type(self).__name__)) act_fwd_name = self._get_act_type()[1] self.assertTrue( verify_node_count(program._graph, act_fwd_name, 1), @@ -163,6 +165,7 @@ class TestFuseGemmEpilogueFWDBase(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueReluFWDFP32(TestFuseGemmEpilogueFWDBase): + def _pre_test_hooks(self): self.atol = 1e-3 self.rtol = 1e-2 @@ -177,6 +180,7 @@ class TestFuseGemmEpilogueReluFWDFP32(TestFuseGemmEpilogueFWDBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueReluFWDFP16(TestFuseGemmEpilogueReluFWDFP32): + def _pre_test_hooks(self): self.atol = 1e-3 self.rtol = 1e-2 @@ -193,6 +197,7 @@ class TestFuseGemmEpilogueReluFWDFP16(TestFuseGemmEpilogueReluFWDFP32): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGeluFWDFP32(TestFuseGemmEpilogueFWDBase): + def _pre_test_hooks(self): self.atol = 1e-4 self.rtol = 1e-3 @@ -207,6 +212,7 @@ class TestFuseGemmEpilogueGeluFWDFP32(TestFuseGemmEpilogueFWDBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGeluFWDFP16(TestFuseGemmEpilogueGeluFWDFP32): + def _pre_test_hooks(self): self.atol = 1e-3 self.rtol = 1e-2 @@ -223,6 +229,7 @@ class TestFuseGemmEpilogueGeluFWDFP16(TestFuseGemmEpilogueGeluFWDFP32): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueBWDBase(unittest.TestCase): + def setUp(self): self.batch = 64 self.seqlen = 128 @@ -234,16 +241,17 @@ class TestFuseGemmEpilogueBWDBase(unittest.TestCase): self.startup_prog = paddle.static.Program() with paddle.static.program_guard(self.main_prog, self.startup_prog): - data = paddle.static.data( - name="_data", - shape=[-1, self.seqlen, self.hidden], - dtype='float32') - matmul_y = paddle.static.data( - name="_matmul_y", - shape=[1, self.hidden, self.hidden], - dtype='float32') - ele_y = paddle.static.data( - name="_ele_y", shape=[self.hidden, ], dtype='float32') + data = paddle.static.data(name="_data", + shape=[-1, self.seqlen, self.hidden], + dtype='float32') + matmul_y = paddle.static.data(name="_matmul_y", + shape=[1, self.hidden, self.hidden], + dtype='float32') + ele_y = paddle.static.data(name="_ele_y", + shape=[ + self.hidden, + ], + dtype='float32') multi_layer = MultiFCLayer(self.hidden, self._get_act_type()[0]) with paddle.static.amp.fp16_guard(): @@ -289,10 +297,9 @@ class TestFuseGemmEpilogueBWDBase(unittest.TestCase): build_strategy = paddle.static.BuildStrategy() build_strategy.fuse_gemm_epilogue = True program = paddle.static.CompiledProgram(self.main_prog) - program = program.with_data_parallel( - loss_name=self.loss.name, - build_strategy=build_strategy, - places=paddle.static.cuda_places()) + program = program.with_data_parallel(loss_name=self.loss.name, + build_strategy=build_strategy, + places=paddle.static.cuda_places()) outs_res = self.exe.run(program, feed=self.feed, fetch_list=self.fetch) @@ -303,12 +310,12 @@ class TestFuseGemmEpilogueBWDBase(unittest.TestCase): self.assertTrue( verify_node_count(program._graph, "fused_gemm_epilogue", 3), - "[{}] The number of fused_gemm_epilogue is miss-matched in the computing graph.". - format(type(self).__name__)) + "[{}] The number of fused_gemm_epilogue is miss-matched in the computing graph." + .format(type(self).__name__)) self.assertTrue( verify_node_count(program._graph, "fused_gemm_epilogue_grad", 3), - "[{}] The number of fused_gemm_epilogue_grad is miss-matched in the computing graph.". - format(type(self).__name__)) + "[{}] The number of fused_gemm_epilogue_grad is miss-matched in the computing graph." + .format(type(self).__name__)) _, act_fwd_name, act_bwd_name = self._get_act_type() self.assertTrue( verify_node_count(program._graph, act_fwd_name, 1), @@ -330,6 +337,7 @@ class TestFuseGemmEpilogueBWDBase(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueReLUBWDFP32(TestFuseGemmEpilogueBWDBase): + def _pre_test_hooks(self): self.atol = 1e-4 self.rtol = 1e-3 @@ -344,6 +352,7 @@ class TestFuseGemmEpilogueReLUBWDFP32(TestFuseGemmEpilogueBWDBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueReLUBWDFP16(TestFuseGemmEpilogueReLUBWDFP32): + def _pre_test_hooks(self): self.atol = 1e-3 self.rtol = 1e-2 @@ -360,6 +369,7 @@ class TestFuseGemmEpilogueReLUBWDFP16(TestFuseGemmEpilogueReLUBWDFP32): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGeLUBWDFP32(TestFuseGemmEpilogueBWDBase): + def _pre_test_hooks(self): self.atol = 5e-4 self.rtol = 1e-3 @@ -374,6 +384,7 @@ class TestFuseGemmEpilogueGeLUBWDFP32(TestFuseGemmEpilogueBWDBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGeLUBWDFP16(TestFuseGemmEpilogueGeLUBWDFP32): + def _pre_test_hooks(self): self.atol = 1e-3 self.rtol = 1e-2 diff --git a/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py index 51c06bb79d7..b1451e83f9c 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py @@ -24,6 +24,7 @@ import os class TestFuseOptimizationOps(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -63,36 +64,41 @@ class TestFuseOptimizationOps(TestParallelExecutorBase): def _decorate_compare_fused_optimizer_ops(self, model, use_device, optimizer): - self._compare_fused_optimizer_ops( - model, - use_device, - feed_dict=self._get_feed_dict(), - optimizer=optimizer) + self._compare_fused_optimizer_ops(model, + use_device, + feed_dict=self._get_feed_dict(), + optimizer=optimizer) class TestFuseAdamOps(TestFuseOptimizationOps): + def optimizer(self, learning_rate=1e-4): return fluid.optimizer.Adam(learning_rate=learning_rate) def test_batchnorm_fc_with_fuse_op(self): - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CUDA, optimizer=self.optimizer) - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CPU, optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CUDA, + optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CPU, + optimizer=self.optimizer) class TestFuseSGDOps(TestFuseAdamOps): + def optimizer(self, learning_rate=1e-3): return fluid.optimizer.SGD(learning_rate=learning_rate) class TestFuseMomentumOps(TestFuseAdamOps): + def optimizer(self, learning_rate=1e-3): - return fluid.optimizer.Momentum( - learning_rate=learning_rate, momentum=0.1) + return fluid.optimizer.Momentum(learning_rate=learning_rate, + momentum=0.1) class TestSpareFuseAdamOps(TestFuseOptimizationOps): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -120,24 +126,29 @@ class TestSpareFuseAdamOps(TestFuseOptimizationOps): def test_simple_bow_net_with_fuse_op(self): model = partial(bow_net, dict_dim=self.word_dict_len, is_sparse=True) - self._decorate_compare_fused_optimizer_ops( - model, DeviceType.CUDA, optimizer=self.optimizer) - self._decorate_compare_fused_optimizer_ops( - model, DeviceType.CPU, optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(model, + DeviceType.CUDA, + optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(model, + DeviceType.CPU, + optimizer=self.optimizer) class TestSpareFuseSGDOps(TestSpareFuseAdamOps): + def optimizer(self, learning_rate=1e-3): return fluid.optimizer.SGD(learning_rate=learning_rate) class TestSpareFuseMomentumOps(TestSpareFuseAdamOps): + def optimizer(self, learning_rate=1e-3): - return fluid.optimizer.Momentum( - learning_rate=learning_rate, momentum=0.1) + return fluid.optimizer.Momentum(learning_rate=learning_rate, + momentum=0.1) class TestPassConflictBase(TestFuseAdamOps): + def _compare_fused_optimizer_ops(self, model, use_device, @@ -147,36 +158,40 @@ class TestPassConflictBase(TestFuseAdamOps): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): return - self.check_pass_conflict( - model, - feed_dict=feed_dict, - get_data_from_feeder=get_data_from_feeder, - use_device=use_device, - fuse_all_optimizer_ops=True, - optimizer=optimizer, - enable_sequential_execution=True) + self.check_pass_conflict(model, + feed_dict=feed_dict, + get_data_from_feeder=get_data_from_feeder, + use_device=use_device, + fuse_all_optimizer_ops=True, + optimizer=optimizer, + enable_sequential_execution=True) class TestFuseAdamOpsPassConflict(TestPassConflictBase): + def optimizer(self, learning_rate=1e-4): return fluid.optimizer.Adam(learning_rate=learning_rate) def test_batchnorm_fc_with_fuse_op(self): - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CPU, optimizer=self.optimizer) - self._decorate_compare_fused_optimizer_ops( - fc_with_batchnorm, DeviceType.CUDA, optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CPU, + optimizer=self.optimizer) + self._decorate_compare_fused_optimizer_ops(fc_with_batchnorm, + DeviceType.CUDA, + optimizer=self.optimizer) class TestFuseSGDOpsPassConflict(TestFuseAdamOpsPassConflict): + def optimizer(self, learning_rate=1e-3): return fluid.optimizer.SGD(learning_rate=learning_rate) class TestFuseMomentumOpsPassConflict(TestFuseAdamOpsPassConflict): + def optimizer(self, learning_rate=1e-3): - return fluid.optimizer.Momentum( - learning_rate=learning_rate, momentum=0.1) + return fluid.optimizer.Momentum(learning_rate=learning_rate, + momentum=0.1) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py index d391b04aa47..a86ca3e31f6 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py @@ -28,21 +28,25 @@ def norm(*args, **kargs): def sep_conv(input, channel, stride, filter, dilation=1, act=None): # with scope('depthwise'): - input = fluid.layers.conv2d( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=False, - bias_attr=False) + input = fluid.layers.conv2d(input, + input.shape[1], + filter, + stride, + groups=input.shape[1], + padding=(filter // 2) * dilation, + dilation=dilation, + use_cudnn=False, + bias_attr=False) input = norm(input) if act: input = act(input) # with scope('pointwise'): - input = fluid.layers.conv2d( - input, channel, 1, 1, groups=1, padding=0, bias_attr=False) + input = fluid.layers.conv2d(input, + channel, + 1, + 1, + groups=1, + padding=0, + bias_attr=False) input = norm(input) if act: input = act(input) return input @@ -63,6 +67,7 @@ def simple_depthwise_net(use_feed): class TestMNIST(TestParallelExecutorBase): + def _init_data(self, random=True): np.random.seed(5) if random: @@ -88,16 +93,20 @@ class TestMNIST(TestParallelExecutorBase): fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_relu_depthwise_conv=True, use_ir_memory_optimize=True, optimizer=_optimizer) not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, fuse_relu_depthwise_conv=False, optimizer=_optimizer) diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py index 445620f9e1c..6507cc1ee32 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py @@ -27,12 +27,14 @@ from paddle.fluid import layers import unittest from op_test import OpTest from paddle.fluid.framework import default_main_program, _enable_legacy_dygraph + _enable_legacy_dygraph() default_main_program().random_seed = 42 class TestFusedAttentionOp(OpTest): + def setUp(self): self.config() self.generate_input_data() @@ -52,26 +54,22 @@ class TestFusedAttentionOp(OpTest): self.__class__.op_type = "fused_attention" # use autograd to check grad in this unittest. self.__class__.no_need_check_grad = True - self.q_proj = Linear( - self.embed_dim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - self.k_proj = Linear( - self.kdim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - self.v_proj = Linear( - self.vdim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - self.out_proj = Linear( - self.embed_dim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) + self.q_proj = Linear(self.embed_dim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + self.k_proj = Linear(self.kdim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + self.v_proj = Linear(self.vdim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + self.out_proj = Linear(self.embed_dim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) paddle.set_default_dtype(np.float32) self.norm1 = LayerNorm(self.embed_dim) self.norm2 = LayerNorm(self.embed_dim) @@ -116,10 +114,9 @@ class TestFusedAttentionOp(OpTest): if self.has_attn_mask: # [B, n_head, seq_len, out_seq_len] - self.attn_mask = np.ones( - (self.batch_size, self.num_heads, self.query_length, - out_seq_len), - dtype=self.attn_mask_type) + self.attn_mask = np.ones((self.batch_size, self.num_heads, + self.query_length, out_seq_len), + dtype=self.attn_mask_type) if self.attn_mask_type == np.int64: self.attn_mask = np.tril(self.attn_mask) elif self.attn_mask_type == np.float64: @@ -174,8 +171,10 @@ class TestFusedAttentionOp(OpTest): # [B, n_head, seq_len, head_dim] * [B, n_head, out_seq_len, head_dim] # --> [B, n_head, seq_len, out_seq_len] - qk_out = layers.matmul( - x=q_out, y=k_out, transpose_y=True, alpha=self.head_dim**-0.5) + qk_out = layers.matmul(x=q_out, + y=k_out, + transpose_y=True, + alpha=self.head_dim**-0.5) if attn_mask is not None: attn_mask = _convert_attention_mask(attn_mask, qk_out.dtype) @@ -185,11 +184,10 @@ class TestFusedAttentionOp(OpTest): softmax_out = F.softmax(qk_out) if self.dropout_prob: - dropout_out = F.dropout( - softmax_out, - self.dropout_prob, - training=self.training, - mode="upscale_in_train") + dropout_out = F.dropout(softmax_out, + self.dropout_prob, + training=self.training, + mode="upscale_in_train") # [B, n_head, seq_len, out_seq_len] * [B, n_head, out_seq_len, head_dim] # --> [B, n_head, seq_len, head_dim] qktv_out = tensor.matmul(dropout_out, v_out) @@ -210,37 +208,37 @@ class TestFusedAttentionOp(OpTest): if self.has_cache_kv: return final_out - paddle.autograd.backward( - [final_out], [paddle.to_tensor(self.dout)], retain_graph=True) + paddle.autograd.backward([final_out], [paddle.to_tensor(self.dout)], + retain_graph=True) return final_out, tensor_query.grad def GetFusedAttentionOut(self): paddle.disable_static(place=paddle.CUDAPlace(0)) - q_proj_weight = paddle.to_tensor( - self.q_proj.weight, stop_gradient=False) - k_proj_weight = paddle.to_tensor( - self.k_proj.weight, stop_gradient=False) - v_proj_weight = paddle.to_tensor( - self.v_proj.weight, stop_gradient=False) - out_linear_weight = paddle.to_tensor( - self.out_proj.weight, stop_gradient=False) + q_proj_weight = paddle.to_tensor(self.q_proj.weight, + stop_gradient=False) + k_proj_weight = paddle.to_tensor(self.k_proj.weight, + stop_gradient=False) + v_proj_weight = paddle.to_tensor(self.v_proj.weight, + stop_gradient=False) + out_linear_weight = paddle.to_tensor(self.out_proj.weight, + stop_gradient=False) if self.bias_attr is False: qkv_bias_tensor = None out_linear_bias = None else: - q_proj_bias = paddle.to_tensor( - self.q_proj.bias, stop_gradient=False) - k_proj_bias = paddle.to_tensor( - self.k_proj.bias, stop_gradient=False) - v_proj_bias = paddle.to_tensor( - self.v_proj.bias, stop_gradient=False) + q_proj_bias = paddle.to_tensor(self.q_proj.bias, + stop_gradient=False) + k_proj_bias = paddle.to_tensor(self.k_proj.bias, + stop_gradient=False) + v_proj_bias = paddle.to_tensor(self.v_proj.bias, + stop_gradient=False) qkv_bias = np.concatenate( (q_proj_bias.numpy(), k_proj_bias.numpy(), v_proj_bias.numpy())) qkv_bias = qkv_bias.reshape((3, self.num_heads, self.head_dim)) qkv_bias_tensor = paddle.to_tensor(qkv_bias, stop_gradient=False) - out_linear_bias = paddle.to_tensor( - self.out_proj.bias, stop_gradient=False) + out_linear_bias = paddle.to_tensor(self.out_proj.bias, + stop_gradient=False) ln1_scale = paddle.to_tensor(self.norm1.weight, stop_gradient=False) ln1_bias = paddle.to_tensor(self.norm1.bias, stop_gradient=False) @@ -278,32 +276,39 @@ class TestFusedAttentionOp(OpTest): if self.has_cache_kv: return final_out[0], final_out[1] - paddle.autograd.backward( - [final_out], [paddle.to_tensor(self.dout)], retain_graph=True) + paddle.autograd.backward([final_out], [paddle.to_tensor(self.dout)], + retain_graph=True) return final_out, x.grad def test_fused_attention_op(self): final_out_ref, x_grad_ref = self.GetBaselineOut() final_out, x_grad = self.GetFusedAttentionOut() - np.testing.assert_allclose( - final_out_ref, final_out.numpy(), rtol=self.rtol, atol=self.atol) - np.testing.assert_allclose( - x_grad_ref, x_grad.numpy(), rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(final_out_ref, + final_out.numpy(), + rtol=self.rtol, + atol=self.atol) + np.testing.assert_allclose(x_grad_ref, + x_grad.numpy(), + rtol=self.rtol, + atol=self.atol) class TestFusedAttentionOpBiasIsNone(TestFusedAttentionOp): + def config(self): super().config() self.bias_attr = False class TestFusedAttentionOpPreLn(TestFusedAttentionOp): + def config(self): super().config() self.pre_layer_norm = True class TestFusedAttentionOpNoneAttnMask(TestFusedAttentionOp): + def config(self): super().config() self.pre_layer_norm = True @@ -311,6 +316,7 @@ class TestFusedAttentionOpNoneAttnMask(TestFusedAttentionOp): class TestFusedAttentionOpFp16(TestFusedAttentionOp): + def config(self): super().config() self.x_type = np.float16 @@ -318,13 +324,18 @@ class TestFusedAttentionOpFp16(TestFusedAttentionOp): def test_fused_attention_op(self): final_out_ref, x_grad_ref = self.GetBaselineOut() final_out, x_grad = self.GetFusedAttentionOut() - np.testing.assert_allclose( - final_out_ref, final_out.numpy(), rtol=self.rtol, atol=self.atol) - np.testing.assert_allclose( - x_grad_ref, x_grad.numpy(), rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(final_out_ref, + final_out.numpy(), + rtol=self.rtol, + atol=self.atol) + np.testing.assert_allclose(x_grad_ref, + x_grad.numpy(), + rtol=self.rtol, + atol=self.atol) class TestFusedAttentionOpCacheKV(TestFusedAttentionOp): + def config(self): super().config() self.has_cache_kv = True @@ -336,11 +347,10 @@ class TestFusedAttentionOpCacheKV(TestFusedAttentionOp): with paddle.no_grad(): final_out_ref = self.GetBaselineOut() final_out, cache_kv_out = self.GetFusedAttentionOut() - np.testing.assert_allclose( - final_out_ref, - final_out.numpy(), - rtol=self.rtol, - atol=self.atol) + np.testing.assert_allclose(final_out_ref, + final_out.numpy(), + rtol=self.rtol, + atol=self.atol) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py b/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py index 74dc9351a25..89689942a02 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py +++ b/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py @@ -44,8 +44,8 @@ def softmax(x): def batch_matmul(x, y): assert x.shape[0] == y.shape[0] assert x.shape[1] == y.shape[1] - retval = np.zeros( - (x.shape[0], x.shape[1], x.shape[2], y.shape[3]), dtype=np.float64) + retval = np.zeros((x.shape[0], x.shape[1], x.shape[2], y.shape[3]), + dtype=np.float64) for i in range(x.shape[0]): for j in range(x.shape[1]): retval[i, j, :, :] = np.matmul(x[i, j, :, :], y[i, j, :, :]) @@ -90,8 +90,9 @@ def compute_reference(pre_layer_norm, query, attn_mask, ln_scale, ln_bias, head_dim = qkv_weight.shape[2] # embed_dim, 3, num_heads, self.head_dim qkv_weight = qkv_weight.transpose((3, 0, 1, 2)) - qkv_weight = qkv_weight.reshape(qkv_weight.shape[0], qkv_weight.shape[1] * - qkv_weight.shape[2] * qkv_weight.shape[3]) + qkv_weight = qkv_weight.reshape( + qkv_weight.shape[0], + qkv_weight.shape[1] * qkv_weight.shape[2] * qkv_weight.shape[3]) if qkv_bias is not None: qkv_bias = qkv_bias.reshape(qkv_bias.shape[0] * qkv_bias.shape[1] * @@ -165,6 +166,7 @@ def compute_reference(pre_layer_norm, query, attn_mask, ln_scale, ln_bias, class TestFusedAttentionAPI(unittest.TestCase): + def setUp(self): self.setXType() self.setPreLn() @@ -218,10 +220,9 @@ class TestFusedAttentionAPI(unittest.TestCase): self.query = np.random.rand(self.batch_size, self.query_length, self.embed_dim).astype(self.x_type) if self.has_attn_mask: - self.attn_mask = np.ones( - (self.batch_size, self.num_heads, self.query_length, - self.key_length), - dtype=self.attn_mask_type) + self.attn_mask = np.ones((self.batch_size, self.num_heads, + self.query_length, self.key_length), + dtype=self.attn_mask_type) if self.attn_mask_type == np.int64: self.attn_mask = np.tril(self.attn_mask) elif self.attn_mask_type == np.float64: @@ -238,18 +239,19 @@ class TestFusedAttentionAPI(unittest.TestCase): attn_mask_tensor = paddle.to_tensor(self.attn_mask) else: attn_mask_tensor = None - fused_attn = FusedMultiHeadAttention( - self.embed_dim, self.num_heads, self.dropout_prob, - self.attn_dropout_prob, self.kdim, self.vdim, self.pre_layer_norm, - self.need_weight, self.weight_attr, self.bias_attr) + fused_attn = FusedMultiHeadAttention(self.embed_dim, self.num_heads, + self.dropout_prob, + self.attn_dropout_prob, self.kdim, + self.vdim, self.pre_layer_norm, + self.need_weight, self.weight_attr, + self.bias_attr) if self.bias_attr is not False: - qkv_bias = np.random.random(fused_attn.qkv_bias.shape).astype( - 'float32') + qkv_bias = np.random.random( + fused_attn.qkv_bias.shape).astype('float32') fused_attn.qkv_bias.set_value(paddle.to_tensor(qkv_bias)) - out = fused_attn( - paddle.to_tensor(self.query), - paddle.to_tensor(self.query), - paddle.to_tensor(self.query), attn_mask_tensor) + out = fused_attn(paddle.to_tensor(self.query), + paddle.to_tensor(self.query), + paddle.to_tensor(self.query), attn_mask_tensor) fused_attn_qkv_bias = None fused_attn_linear_bias = None @@ -267,27 +269,31 @@ class TestFusedAttentionAPI(unittest.TestCase): fused_attn.ln_scale.numpy(), fused_attn_ln_bias, fused_attn.qkv_weight.numpy(), fused_attn_qkv_bias, fused_attn.linear_weight.numpy(), fused_attn_linear_bias) - np.testing.assert_allclose( - ref_out, out.numpy(), rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(ref_out, + out.numpy(), + rtol=self.rtol, + atol=self.atol) def run_static(self): - fused_attn = FusedMultiHeadAttention( - self.embed_dim, self.num_heads, self.dropout_prob, - self.attn_dropout_prob, self.kdim, self.vdim, self.pre_layer_norm, - self.need_weight, self.weight_attr, self.bias_attr) + fused_attn = FusedMultiHeadAttention(self.embed_dim, self.num_heads, + self.dropout_prob, + self.attn_dropout_prob, self.kdim, + self.vdim, self.pre_layer_norm, + self.need_weight, self.weight_attr, + self.bias_attr) x = paddle.static.data( name='X', shape=[self.batch_size, self.query_length, self.embed_dim], dtype=self.x_type) if self.has_attn_mask: - attn_mask = paddle.static.data( - name='SrcMask', - shape=[ - self.batch_size, self.num_heads, self.query_length, - self.key_length - ], - dtype=self.attn_mask_type) + attn_mask = paddle.static.data(name='SrcMask', + shape=[ + self.batch_size, self.num_heads, + self.query_length, + self.key_length + ], + dtype=self.attn_mask_type) final_out = fused_attn(x, x, x, attn_mask) else: final_out = fused_attn(x, x, x) @@ -304,8 +310,10 @@ class TestFusedAttentionAPI(unittest.TestCase): if self.bias_attr is False: out, qkv_weight, out_linear_weight, ln_scale, ln_2_scale = exe.run( paddle.static.default_main_program(), - feed={"X": self.query, - "SrcMask": self.attn_mask}, + feed={ + "X": self.query, + "SrcMask": self.attn_mask + }, fetch_list=[ final_out, fused_attn.qkv_weight, fused_attn.linear_weight, fused_attn.pre_ln_scale, @@ -314,8 +322,10 @@ class TestFusedAttentionAPI(unittest.TestCase): else: out, qkv_weight, qkv_bias, out_linear_weight, linear_bias, ln_scale, ln_bias, ln_2_scale, ln_2_bias = exe.run( paddle.static.default_main_program(), - feed={"X": self.query, - "SrcMask": self.attn_mask}, + feed={ + "X": self.query, + "SrcMask": self.attn_mask + }, fetch_list=[ final_out, fused_attn.qkv_weight, fused_attn.qkv_bias, fused_attn.linear_weight, fused_attn.linear_bias, @@ -326,7 +336,9 @@ class TestFusedAttentionAPI(unittest.TestCase): if self.bias_attr is False: out, qkv_weight, out_linear_weight, ln_scale, ln_2_scale = exe.run( paddle.static.default_main_program(), - feed={"X": self.query, }, + feed={ + "X": self.query, + }, fetch_list=[ final_out, fused_attn.qkv_weight, fused_attn.linear_weight, fused_attn.pre_ln_scale, @@ -335,7 +347,9 @@ class TestFusedAttentionAPI(unittest.TestCase): else: out, qkv_weight, qkv_bias, out_linear_weight, linear_bias, ln_scale, ln_bias, ln_2_scale, ln_2_bias = exe.run( paddle.static.default_main_program(), - feed={"X": self.query, }, + feed={ + "X": self.query, + }, fetch_list=[ final_out, fused_attn.qkv_weight, fused_attn.qkv_bias, fused_attn.linear_weight, fused_attn.linear_bias, @@ -361,6 +375,7 @@ class TestFusedAttentionAPI(unittest.TestCase): class TestFusedAttentionAPINoneAttnMask(TestFusedAttentionAPI): + def setAttnMask(self): self.has_attn_mask = False @@ -369,6 +384,7 @@ class TestFusedAttentionAPINoneAttnMask(TestFusedAttentionAPI): class TestFusedAttentionAPIBiasIsNone(TestFusedAttentionAPI): + def setBiasAttr(self): self.bias_attr = False diff --git a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py index d47450837a4..92c815a246f 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py @@ -32,6 +32,7 @@ default_main_program().random_seed = 42 class TestFusedBiasDropoutResidualLayerNormOp(OpTest): + def setUp(self): self.config() self.generate_input_data() @@ -67,12 +68,12 @@ class TestFusedBiasDropoutResidualLayerNormOp(OpTest): if self.bias_attr is False: self.tensor_linear_bias = None else: - self.tensor_linear_bias = paddle.to_tensor( - self.linear_bias, stop_gradient=False) + self.tensor_linear_bias = paddle.to_tensor(self.linear_bias, + stop_gradient=False) self.tensor_x = paddle.to_tensor(self.x, stop_gradient=False) - self.tensor_residual = paddle.to_tensor( - self.residual, stop_gradient=False) + self.tensor_residual = paddle.to_tensor(self.residual, + stop_gradient=False) def GetBaselineOut(self): paddle.disable_static(place=paddle.CUDAPlace(0)) @@ -85,8 +86,8 @@ class TestFusedBiasDropoutResidualLayerNormOp(OpTest): residual_out = self.tensor_residual + self.dropout(out) final_out = self.norm1(residual_out) - paddle.autograd.backward( - [final_out], [paddle.to_tensor(self.dout)], retain_graph=True) + paddle.autograd.backward([final_out], [paddle.to_tensor(self.dout)], + retain_graph=True) if self.tensor_linear_bias is not None: tensor_linear_bias_grad = self.tensor_linear_bias.grad @@ -105,8 +106,8 @@ class TestFusedBiasDropoutResidualLayerNormOp(OpTest): self.tensor_x, self.tensor_residual, self.tensor_linear_bias, ln_scale, ln_bias, self.dropout_prob, epsilon) - paddle.autograd.backward( - [final_out], [paddle.to_tensor(self.dout)], retain_graph=True) + paddle.autograd.backward([final_out], [paddle.to_tensor(self.dout)], + retain_graph=True) if self.tensor_linear_bias is not None: tensor_linear_bias_grad = self.tensor_linear_bias.grad else: @@ -118,22 +119,28 @@ class TestFusedBiasDropoutResidualLayerNormOp(OpTest): ) out, x_grad, residual_grad, linear_bias_grad = self.GetFusedBiasDropoutResidualLayerNormOut( ) - np.testing.assert_allclose( - out_ref, out.numpy(), rtol=1e-5, atol=self.atol) - np.testing.assert_allclose( - x_grad_ref, x_grad.numpy(), rtol=1e-5, atol=self.atol) - np.testing.assert_allclose( - residual_grad_ref, residual_grad.numpy(), rtol=1e-5, atol=self.atol) + np.testing.assert_allclose(out_ref, + out.numpy(), + rtol=1e-5, + atol=self.atol) + np.testing.assert_allclose(x_grad_ref, + x_grad.numpy(), + rtol=1e-5, + atol=self.atol) + np.testing.assert_allclose(residual_grad_ref, + residual_grad.numpy(), + rtol=1e-5, + atol=self.atol) if linear_bias_grad_ref is not None: - np.testing.assert_allclose( - linear_bias_grad_ref, - linear_bias_grad.numpy(), - rtol=1e-5, - atol=self.atol) + np.testing.assert_allclose(linear_bias_grad_ref, + linear_bias_grad.numpy(), + rtol=1e-5, + atol=self.atol) class TestFusedBiasDropoutResidualLayerNormOpBiasIsNone( TestFusedBiasDropoutResidualLayerNormOp): + def config(self): super().config() self.bias_attr = False @@ -141,6 +148,7 @@ class TestFusedBiasDropoutResidualLayerNormOpBiasIsNone( class TestFusedBiasDropoutResidualLayerNormOpFp16( TestFusedBiasDropoutResidualLayerNormOp): + def config(self): super().config() self.x_type = np.float16 diff --git a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py index 19fc3972e58..f0c6bd83d40 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py +++ b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py @@ -66,6 +66,7 @@ def compute_reference(x, residual, ln_scale, ln_bias, linear_bias): class TestFusedBiasDropoutResidualLayerNormAPI(unittest.TestCase): + def setUp(self): self.setXType() self.setBiasAttr() @@ -99,12 +100,13 @@ class TestFusedBiasDropoutResidualLayerNormAPI(unittest.TestCase): linear_bias = None if self.bias_attr is not False: - linear_bias = np.random.random(fused_bias_dropout_residual_ln. - linear_bias.shape).astype('float32') + linear_bias = np.random.random( + fused_bias_dropout_residual_ln.linear_bias.shape).astype( + 'float32') fused_bias_dropout_residual_ln.linear_bias.set_value( paddle.to_tensor(linear_bias)) - out = fused_bias_dropout_residual_ln( - paddle.to_tensor(self.x), paddle.to_tensor(self.residual)) + out = fused_bias_dropout_residual_ln(paddle.to_tensor(self.x), + paddle.to_tensor(self.residual)) ln_bias = None if self.bias_attr is not False: @@ -112,12 +114,16 @@ class TestFusedBiasDropoutResidualLayerNormAPI(unittest.TestCase): ln_scale = fused_bias_dropout_residual_ln.ln_scale.numpy(), ref_out = compute_reference(self.x, self.residual, ln_scale, ln_bias, linear_bias) - np.testing.assert_allclose( - ref_out, out.numpy(), rtol=1e-5, atol=self.atol) + np.testing.assert_allclose(ref_out, + out.numpy(), + rtol=1e-5, + atol=self.atol) def run_static(self): - fused_op = FusedBiasDropoutResidualLayerNorm( - self.embed_dim, self.dropout_prob, self.weight_attr, self.bias_attr) + fused_op = FusedBiasDropoutResidualLayerNorm(self.embed_dim, + self.dropout_prob, + self.weight_attr, + self.bias_attr) x = paddle.static.data( name='X', @@ -136,16 +142,19 @@ class TestFusedBiasDropoutResidualLayerNormAPI(unittest.TestCase): linear_bias = None ln_bias = None if self.bias_attr is False: - out, ln_scale = exe.run( - paddle.static.default_main_program(), - feed={"X": self.x, - "Residual": self.residual}, - fetch_list=[final_out, fused_op.ln_scale]) + out, ln_scale = exe.run(paddle.static.default_main_program(), + feed={ + "X": self.x, + "Residual": self.residual + }, + fetch_list=[final_out, fused_op.ln_scale]) else: out, linear_bias, ln_scale, ln_bias = exe.run( paddle.static.default_main_program(), - feed={"X": self.x, - "Residual": self.residual}, + feed={ + "X": self.x, + "Residual": self.residual + }, fetch_list=[ final_out, fused_op.linear_bias, fused_op.ln_scale, fused_op.ln_bias @@ -167,6 +176,7 @@ class TestFusedBiasDropoutResidualLayerNormAPI(unittest.TestCase): class TestFusedBiasDropoutResidualLayerNormAPIBiasIsNone( TestFusedBiasDropoutResidualLayerNormAPI): + def setBiasAttr(self): self.bias_attr = False diff --git a/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py b/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py index ba9e05470e3..07a2e28f678 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py @@ -38,7 +38,9 @@ def create_test_class(test_case, attrs, dtype=np.float32, grad_chek=True): + class TestFusedElementwiseActivationOp_base(OpTest): + def setUp(self): self.op_type = "fused_elemwise_activation" self.dtype = dtype @@ -73,7 +75,9 @@ def create_test_class(test_case, callback(self.x, self.y, self.x, self.y) def init_attr(self): - self.attrs = {'axis': self.axis, } + self.attrs = { + 'axis': self.axis, + } for key in attrs.keys(): self.attrs[key] = attrs[key] @@ -98,50 +102,50 @@ def create_test_class(test_case, if not grad_chek: return if self.attrs["save_intermediate_out"]: - self.check_grad( - ['Y'], ['Out'], - max_relative_error=0.005, - no_grad_set=set("X")) + self.check_grad(['Y'], ['Out'], + max_relative_error=0.005, + no_grad_set=set("X")) else: - self.check_grad( - ['Y'], ['Out'], - max_relative_error=0.005, - no_grad_set=set("X")) + self.check_grad(['Y'], ['Out'], + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): if not grad_chek: return if self.attrs["save_intermediate_out"]: - self.check_grad( - ['X'], ['Out'], - max_relative_error=0.005, - no_grad_set=set("Y")) + self.check_grad(['X'], ['Out'], + max_relative_error=0.005, + no_grad_set=set("Y")) else: - self.check_grad( - ['X'], ['Out'], - max_relative_error=0.005, - no_grad_set=set("Y")) + self.check_grad(['X'], ['Out'], + max_relative_error=0.005, + no_grad_set=set("Y")) class TestFusedElementwiseActivationOp_scalar( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) class TestFusedElementwiseActivationOp_scalar2( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) class TestFusedElementwiseActivationOp_Vector( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.random((32, )).astype(self.dtype) self.y = np.random.random((32, )).astype(self.dtype) class TestFusedElementwiseActivationOp_broadcast_0( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(2).astype(self.dtype) @@ -153,6 +157,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_broadcast_1( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(3).astype(self.dtype) @@ -164,6 +169,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_broadcast_2( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(4).astype(self.dtype) @@ -174,6 +180,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_broadcast_3( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) self.y = np.random.rand(3, 4).astype(self.dtype) @@ -185,6 +192,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_broadcast_4( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) self.y = np.random.rand(2, 1).astype(self.dtype) @@ -196,6 +204,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_rowwise_add_0( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(3, 4).astype(self.dtype) @@ -207,6 +216,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_rowwise_add_1( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(2, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -218,6 +228,7 @@ def create_test_class(test_case, class TestFusedElementwiseActivationOp_channelwise_add( TestFusedElementwiseActivationOp_base): + def init_input(self): self.x = np.random.rand(3, 20, 20).astype(self.dtype) self.y = np.random.rand(3, 1, 1).astype(self.dtype) @@ -253,9 +264,9 @@ def create_test_class(test_case, "_rowwise_add_0"] = TestFusedElementwiseActivationOp_rowwise_add_0 globals()[test_case + "_rowwise_add_1"] = TestFusedElementwiseActivationOp_rowwise_add_1 - globals( - )[test_case + - "_channelwise_add"] = TestFusedElementwiseActivationOp_channelwise_add + globals()[ + test_case + + "_channelwise_add"] = TestFusedElementwiseActivationOp_channelwise_add def scale_add_func(x, y, x_bcast, y_bcast, scale, mode=0): @@ -330,33 +341,39 @@ for mode in {0, 1}: for save_intermediate_out in {True, False}: suffix = ("_save_intermediate_out" if save_intermediate_out else "") \ + ("_mode_"+ str(mode)) - create_test_class('scale_add' + suffix, scale_add_func, { - 'scale': scale, - 'functor_list': ["scale", "elementwise_add"], - 'save_intermediate_out': save_intermediate_out, - }) - create_test_class('add_scale' + suffix, add_scale_func, { - 'scale': scale, - 'functor_list': ["elementwise_add", "scale"], - 'save_intermediate_out': save_intermediate_out, - }) - create_test_class('add_relu' + suffix, add_relu_func, { - 'functor_list': ["elementwise_add", "relu"], - 'save_intermediate_out': save_intermediate_out, - }) - create_test_class('relu_add' + suffix, relu_add_func, { - 'functor_list': ["relu", "elementwise_add"], - 'save_intermediate_out': save_intermediate_out, - }) - create_test_class('mul_scale' + suffix, mul_scale_func, { - 'scale': scale, - 'functor_list': ["elementwise_mul", "scale"], - 'save_intermediate_out': save_intermediate_out, - }) - create_test_class('gelu_add' + suffix, gelu_add_func, { - 'functor_list': ["gelu", "elementwise_add"], - 'save_intermediate_out': save_intermediate_out, - }) + create_test_class( + 'scale_add' + suffix, scale_add_func, { + 'scale': scale, + 'functor_list': ["scale", "elementwise_add"], + 'save_intermediate_out': save_intermediate_out, + }) + create_test_class( + 'add_scale' + suffix, add_scale_func, { + 'scale': scale, + 'functor_list': ["elementwise_add", "scale"], + 'save_intermediate_out': save_intermediate_out, + }) + create_test_class( + 'add_relu' + suffix, add_relu_func, { + 'functor_list': ["elementwise_add", "relu"], + 'save_intermediate_out': save_intermediate_out, + }) + create_test_class( + 'relu_add' + suffix, relu_add_func, { + 'functor_list': ["relu", "elementwise_add"], + 'save_intermediate_out': save_intermediate_out, + }) + create_test_class( + 'mul_scale' + suffix, mul_scale_func, { + 'scale': scale, + 'functor_list': ["elementwise_mul", "scale"], + 'save_intermediate_out': save_intermediate_out, + }) + create_test_class( + 'gelu_add' + suffix, gelu_add_func, { + 'functor_list': ["gelu", "elementwise_add"], + 'save_intermediate_out': save_intermediate_out, + }) if core.is_compiled_with_cuda(): create_test_class( diff --git a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py index d756394535a..7d06ae3e134 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py @@ -28,6 +28,7 @@ import paddle.version as ver @skip_check_grad_ci(reason="check_grad is called when ver.mkl() == ON" "and 'Linux' in platform.platform().") class TestFusedEmbeddingSeqPoolOp(OpTest): + def setUp(self): self.op_type = "fused_embedding_seq_pool" self.emb_size = 6 @@ -39,7 +40,8 @@ class TestFusedEmbeddingSeqPoolOp(OpTest): self.attrs = {'is_sparse': True} self.inputs = {'W': self.table, 'Ids': (ids_expand, self.lod)} self.outputs = { - 'Out': np.reshape( + 'Out': + np.reshape( np.array([ self.table[[4, 3]] + self.table[[4, 3]] + self.table[[2, 1]], self.table[[16, 1]] @@ -54,11 +56,14 @@ class TestFusedEmbeddingSeqPoolOp(OpTest): # TODO(wangzhongpu): support lod in dygraph mode if ver.mkl() == "ON" and 'Linux' in platform.platform(): self.attrs = {'is_sparse': False} - self.check_grad( - ['W'], 'Out', no_grad_set=['Ids'], check_dygraph=False) + self.check_grad(['W'], + 'Out', + no_grad_set=['Ids'], + check_dygraph=False) class TestLookupTableOpWithPadding(TestFusedEmbeddingSeqPoolOp): + def test_check_output(self): if ver.mkl() == "ON" and 'Linux' in platform.platform(): ids = np.squeeze(self.ids, axis=2) @@ -75,8 +80,9 @@ class TestLookupTableOpWithPadding(TestFusedEmbeddingSeqPoolOp): output.append(np.sum(out, 0)) index += count self.outputs = { - 'Out': np.reshape( - np.array(output), [len(self.lod[0]), 2 * self.emb_size]) + 'Out': + np.reshape(np.array(output), + [len(self.lod[0]), 2 * self.emb_size]) } self.attrs = {'padding_idx': int(padding_idx)} # TODO(wangzhongpu): support lod in dygraph mode @@ -88,18 +94,23 @@ class TestLookupTableOpWithPadding(TestFusedEmbeddingSeqPoolOp): padding_idx = np.random.choice(ids.flatten(), 1)[0] self.attrs = {'padding_idx': int(padding_idx), 'is_sparse': False} # TODO(wangzhongpu): support lod in dygraph mode - self.check_grad( - ['W'], 'Out', no_grad_set=['Ids'], check_dygraph=False) + self.check_grad(['W'], + 'Out', + no_grad_set=['Ids'], + check_dygraph=False) class TestFusedEmbeddingSeqPoolApi(unittest.TestCase): + def test_api(self): if ver.mkl() == "ON" and 'Linux' in platform.platform(): import paddle.fluid as fluid dict_size = 20 - data_t = fluid.layers.data( - name='word', shape=[1], dtype='int64', lod_level=1) + data_t = fluid.layers.data(name='word', + shape=[1], + dtype='int64', + lod_level=1) padding_idx = np.random.randint(1, 10) out = fluid.contrib.fused_embedding_seq_pool( input=data_t, diff --git a/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py index 7988c66c172..9957b16a1b1 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py @@ -43,12 +43,12 @@ def fused_embedded_fc_lstm( T = ids.shape[0] M = embeddings.shape[1] x = embeddings[ids].reshape([T, M]) - return lstm( - fc(x, wx, bx), lod, h0, c0, w_h, w_b, w_c, is_reverse, act_gate, - act_cell, act_cand) + return lstm(fc(x, wx, bx), lod, h0, c0, w_h, w_b, w_c, is_reverse, act_gate, + act_cell, act_cand) class TestFusionLSTMOp(OpTest): + def set_conf(self): pass @@ -56,7 +56,7 @@ class TestFusionLSTMOp(OpTest): self.op_type = 'fused_embedding_fc_lstm' self.lod = [[2, 3, 5, 4]] self.M = 8 # Embedding size - self.D = 16 # Hidden size + self.D = 16 # Hidden size self.dict_size = 18 self.has_initial_state = False self.use_peepholes = False @@ -82,8 +82,8 @@ class TestFusionLSTMOp(OpTest): w_c = b[:, 4 * self.D:] if self.use_peepholes else None # low is 0 , high is voc_size - 1 - ids = np.random.randint( - low=0, high=self.dict_size - 1, size=(T, 1)).astype("int64") + ids = np.random.randint(low=0, high=self.dict_size - 1, + size=(T, 1)).astype("int64") # embeddings as they were trained , so each entry is of M size embeddings = np.random.random( (self.dict_size, self.M)).astype("float32") @@ -109,10 +109,11 @@ class TestFusionLSTMOp(OpTest): wh = np.random.normal(size=(self.D, 4 * self.D)).astype('float32') - h, c = fused_embedded_fc_lstm( - ids, self.lod, embeddings, wx, bx, h0, c0, wh, w_b, w_c, - self.is_reverse, ACTIVATION[self.act_gate], - ACTIVATION[self.act_cell], ACTIVATION[self.act_cand]) + h, c = fused_embedded_fc_lstm(ids, self.lod, embeddings, wx, bx, h0, c0, + wh, w_b, w_c, self.is_reverse, + ACTIVATION[self.act_gate], + ACTIVATION[self.act_cell], + ACTIVATION[self.act_cand]) self.inputs = { 'Ids': (ids, self.lod), @@ -144,63 +145,74 @@ class TestFusionLSTMOp(OpTest): class TestFusionLSTMOpInit(TestFusionLSTMOp): + def set_conf(self): self.has_initial_state = True class TestFusionLSTMOpReverse(TestFusionLSTMOp): + def set_conf(self): self.is_reverse = True class TestFusionLSTMOpInitReverse(TestFusionLSTMOp): + def set_conf(self): self.has_initial_state = True self.is_reverse = True class TestFusionLSTMOpMD1(TestFusionLSTMOp): + def set_conf(self): self.M = 36 self.D = 8 class TestFusionLSTMOpMD2(TestFusionLSTMOp): + def set_conf(self): self.M = 8 self.D = 8 class TestFusionLSTMOpMD3(TestFusionLSTMOp): + def set_conf(self): self.M = 15 self.D = 3 class TestFusionLSTMOpBS1(TestFusionLSTMOp): + def set_conf(self): self.lod = [[3]] self.D = 16 class TestFusionLSTMOpPeepholes(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True class TestFusionLSTMOpPeepholesInit(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.has_initial_state = True class TestFusionLSTMOpPeepholesReverse(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.is_reverse = True class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.has_initial_state = True @@ -208,6 +220,7 @@ class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp): class TestFusionLSTMOpPeepholesBS1(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.lod = [[2]] diff --git a/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py b/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py index 9604201e04e..cd2f6b6e66c 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py @@ -27,6 +27,7 @@ np.random.random(123) @unittest.skipIf(not core.is_compiled_with_cuda(), "Paddle core is not compiled with CUDA") class TestFusedFCElementwiseLayerNormOp(OpTest): + def config(self): self.matrix = MatrixGenerate(1, 10, 15, 3, 3, 2) self.y_shape = [1, 15] @@ -72,6 +73,7 @@ class TestFusedFCElementwiseLayerNormOp(OpTest): class TestFusedFCElementwiseLayerNormOp2(TestFusedFCElementwiseLayerNormOp): + def config(self): self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) self.y_shape = [4, 6] diff --git a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py index 25336efd6a7..43d39224287 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py @@ -24,10 +24,12 @@ from paddle.nn.layer.common import Linear, Dropout import unittest from op_test import OpTest from paddle.fluid.framework import default_main_program, _enable_legacy_dygraph + _enable_legacy_dygraph() class TestFusedFFNOp(OpTest): + def getDtype(self): self.dtype = "float32" self.layer_norm_dtype = "float32" @@ -71,16 +73,14 @@ class TestFusedFFNOp(OpTest): self.weight_attr, 2) self.bias_attrs = transformer._convert_param_attr_to_list( self.bias_attr, 2) - self.linear1 = Linear( - self.d_model, - self.dim_feedforward, - self.weight_attrs[1], - bias_attr=self.bias_attrs[1]) - self.linear2 = Linear( - self.dim_feedforward, - self.d_model, - self.weight_attrs[1], - bias_attr=self.bias_attrs[1]) + self.linear1 = Linear(self.d_model, + self.dim_feedforward, + self.weight_attrs[1], + bias_attr=self.bias_attrs[1]) + self.linear2 = Linear(self.dim_feedforward, + self.d_model, + self.weight_attrs[1], + bias_attr=self.bias_attrs[1]) paddle.set_default_dtype(self.layer_norm_dtype) self.norm1 = LayerNorm(self.d_model) @@ -118,31 +118,30 @@ class TestFusedFFNOp(OpTest): def FusedFFN(self): paddle.disable_static() - linear1_weight = paddle.to_tensor( - self.linear1.weight, stop_gradient=False) + linear1_weight = paddle.to_tensor(self.linear1.weight, + stop_gradient=False) linear1_bias = paddle.to_tensor(self.linear1.bias, stop_gradient=False) - linear2_weight = paddle.to_tensor( - self.linear2.weight, stop_gradient=False) + linear2_weight = paddle.to_tensor(self.linear2.weight, + stop_gradient=False) linear2_bias = paddle.to_tensor(self.linear2.bias, stop_gradient=False) ln1_scale = paddle.to_tensor(self.norm1.weight, stop_gradient=False) ln1_bias = paddle.to_tensor(self.norm1.bias, stop_gradient=False) ln2_scale = paddle.to_tensor(self.norm2.weight, stop_gradient=False) ln2_bias = paddle.to_tensor(self.norm2.bias, stop_gradient=False) x = paddle.to_tensor(self.src, stop_gradient=False) - out = incubate_f.fused_feedforward( - x, - linear1_weight, - linear2_weight, - linear1_bias, - linear2_bias, - ln1_scale, - ln1_bias, - ln2_scale, - ln2_bias, - 0.0, - 0.0, - activation=self.act_method, - pre_layer_norm=self.pre_layer_norm) + out = incubate_f.fused_feedforward(x, + linear1_weight, + linear2_weight, + linear1_bias, + linear2_bias, + ln1_scale, + ln1_bias, + ln2_scale, + ln2_bias, + 0.0, + 0.0, + activation=self.act_method, + pre_layer_norm=self.pre_layer_norm) paddle.autograd.backward([out], [paddle.to_tensor(self.dout)]) return out, x.grad @@ -150,16 +149,18 @@ class TestFusedFFNOp(OpTest): default_main_program().random_seed = 42 base_out, base_grad = self.Base() fused_out, fused_grad = self.FusedFFN() - np.testing.assert_allclose( - base_out.numpy(), fused_out.numpy(), rtol=self.rtol, atol=self.atol) - np.testing.assert_allclose( - base_grad.numpy(), - fused_grad.numpy(), - rtol=self.rtol, - atol=self.atol) + np.testing.assert_allclose(base_out.numpy(), + fused_out.numpy(), + rtol=self.rtol, + atol=self.atol) + np.testing.assert_allclose(base_grad.numpy(), + fused_grad.numpy(), + rtol=self.rtol, + atol=self.atol) class TestFusedFFNOpFp16(TestFusedFFNOp): + def getDtype(self): self.dtype = "float16" self.layer_norm_dtype = "float32" @@ -176,17 +177,20 @@ class TestFusedFFNOpFp16(TestFusedFFNOp): class TestFusedFFNOpFp64(TestFusedFFNOp): + def getDtype(self): self.dtype = "float64" self.layer_norm_dtype = "float64" class TestFusedFFNOpActivation(TestFusedFFNOp): + def getActivation(self): self.act_method = "relu" class TestFusedFFNOpNormalizeBefore(TestFusedFFNOp): + def getNormalizeBefore(self): self.pre_layer_norm = True @@ -198,6 +202,7 @@ class TestFusedFFNOpNormalizeBefore(TestFusedFFNOp): class APITestStaticFusedFFN(unittest.TestCase): + def test_static(self): paddle.enable_static() default_main_program().random_seed = 42 @@ -207,38 +212,36 @@ class APITestStaticFusedFFN(unittest.TestCase): d_model = 8 dim_feedforward = 8 - x = paddle.static.data( - name='x', shape=[batch_size, d_model, dim_feedforward], dtype=dtype) - linear1_weight = paddle.static.data( - name='linear1_weight', - shape=[d_model, dim_feedforward], - dtype=dtype) - linear1_bias = paddle.static.data( - name='linear1_bias', shape=[dim_feedforward]) - linear2_weight = paddle.static.data( - name='linear2_weight', - shape=[dim_feedforward, d_model], - dtype=dtype) + x = paddle.static.data(name='x', + shape=[batch_size, d_model, dim_feedforward], + dtype=dtype) + linear1_weight = paddle.static.data(name='linear1_weight', + shape=[d_model, dim_feedforward], + dtype=dtype) + linear1_bias = paddle.static.data(name='linear1_bias', + shape=[dim_feedforward]) + linear2_weight = paddle.static.data(name='linear2_weight', + shape=[dim_feedforward, d_model], + dtype=dtype) linear2_bias = paddle.static.data(name='linear2_bias', shape=[d_model]) ln1_scale = paddle.static.data(name='ln1_scale', shape=[d_model]) ln1_bias = paddle.static.data(name='ln1_scale', shape=[d_model]) ln2_scale = paddle.static.data(name='ln2_scale', shape=[d_model]) ln2_bias = paddle.static.data(name='ln2_scale', shape=[d_model]) - fused_out = incubate_f.fused_feedforward( - x, - linear1_weight, - linear2_weight, - linear1_bias, - linear2_bias, - ln1_scale, - ln1_bias, - ln2_scale, - ln2_bias, - 0.0, - 0.0, - activation="relu", - pre_layer_norm=False) + fused_out = incubate_f.fused_feedforward(x, + linear1_weight, + linear2_weight, + linear1_bias, + linear2_bias, + ln1_scale, + ln1_bias, + ln2_scale, + ln2_bias, + 0.0, + 0.0, + activation="relu", + pre_layer_norm=False) ######base ffn###### linear1_out = F.linear(x, linear1_weight, linear1_bias) @@ -246,11 +249,10 @@ class APITestStaticFusedFFN(unittest.TestCase): dropout1_out = F.dropout(x=act_out, p=0.0, training=False) linear2_out = F.linear(dropout1_out, linear2_weight, linear2_bias) dropout2_out = x + F.dropout(x=linear2_out, p=0.0, training=False) - ln_out = F.layer_norm( - dropout2_out, - normalized_shape=list([d_model]), - weight=ln2_scale, - bias=ln2_bias) + ln_out = F.layer_norm(dropout2_out, + normalized_shape=list([d_model]), + weight=ln2_scale, + bias=ln2_bias) ######base ffn###### exe = paddle.static.Executor(paddle.CUDAPlace(0)) @@ -286,62 +288,79 @@ class APITestStaticFusedFFN(unittest.TestCase): }, fetch_list=[res]) real_res.append(fetch) - self.assertTrue( - np.allclose( - real_res[0], real_res[1], atol=1e-3), - "two value is check diff") + self.assertTrue(np.allclose(real_res[0], real_res[1], atol=1e-3), + "two value is check diff") class TestFusedFFNOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): def test_dtype(): - x = paddle.static.data( - name='x', shape=[1, 10, 10], dtype="int32") - linear1_weight = paddle.static.data( - name='linear1_weight', shape=[1, 10, 10], dtype="float32") - linear2_weight = paddle.static.data( - name='linear2_weight', shape=[1, 10, 10], dtype="float32") + x = paddle.static.data(name='x', + shape=[1, 10, 10], + dtype="int32") + linear1_weight = paddle.static.data(name='linear1_weight', + shape=[1, 10, 10], + dtype="float32") + linear2_weight = paddle.static.data(name='linear2_weight', + shape=[1, 10, 10], + dtype="float32") incubate_f.fused_feedforward(x, linear1_weight, linear2_weight) self.assertRaises(TypeError, test_dtype) def test_dropout_rate_type(): - x = paddle.static.data( - name='x1', shape=[1, 10, 10], dtype="float32") - linear1_weight = paddle.static.data( - name='linear1_weight1', shape=[10, 10], dtype="float32") - linear2_weight = paddle.static.data( - name='linear2_weight1', shape=[10, 10], dtype="float32") - incubate_f.fused_feedforward( - x, linear1_weight, linear2_weight, dropout1_rate="a") + x = paddle.static.data(name='x1', + shape=[1, 10, 10], + dtype="float32") + linear1_weight = paddle.static.data(name='linear1_weight1', + shape=[10, 10], + dtype="float32") + linear2_weight = paddle.static.data(name='linear2_weight1', + shape=[10, 10], + dtype="float32") + incubate_f.fused_feedforward(x, + linear1_weight, + linear2_weight, + dropout1_rate="a") self.assertRaises(TypeError, test_dropout_rate_type) def test_dropout_rate_value(): - x = paddle.static.data( - name='x2', shape=[1, 10, 10], dtype="float32") - linear1_weight = paddle.static.data( - name='linear1_weight2', shape=[10, 10], dtype="float32") - linear2_weight = paddle.static.data( - name='linear2_weight2', shape=[10, 10], dtype="float32") - incubate_f.fused_feedforward( - x, linear1_weight, linear2_weight, dropout2_rate=-1) + x = paddle.static.data(name='x2', + shape=[1, 10, 10], + dtype="float32") + linear1_weight = paddle.static.data(name='linear1_weight2', + shape=[10, 10], + dtype="float32") + linear2_weight = paddle.static.data(name='linear2_weight2', + shape=[10, 10], + dtype="float32") + incubate_f.fused_feedforward(x, + linear1_weight, + linear2_weight, + dropout2_rate=-1) self.assertRaises(ValueError, test_dropout_rate_value) def test_dropout_mode(): - x = paddle.static.data( - name='x3', shape=[1, 10, 10], dtype="float32") - linear1_weight = paddle.static.data( - name='linear1_weight3', shape=[10, 10], dtype="float32") - linear2_weight = paddle.static.data( - name='linear2_weight3', shape=[10, 10], dtype="float32") - incubate_f.fused_feedforward( - x, linear1_weight, linear2_weight, mode='test') + x = paddle.static.data(name='x3', + shape=[1, 10, 10], + dtype="float32") + linear1_weight = paddle.static.data(name='linear1_weight3', + shape=[10, 10], + dtype="float32") + linear2_weight = paddle.static.data(name='linear2_weight3', + shape=[10, 10], + dtype="float32") + incubate_f.fused_feedforward(x, + linear1_weight, + linear2_weight, + mode='test') self.assertRaises(ValueError, test_dropout_mode) diff --git a/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py index 6f9ba5f5e4e..2d624395547 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py @@ -28,6 +28,7 @@ from paddle.fluid import core @unittest.skipIf(not core.is_compiled_with_cuda(), "Paddle is not compiled with CUDA") class TestFusedGateAttentionOp(OpTest): + def setUp(self): self.__class__.op_type = "fused_gate_attention" # use autograd to check grad in this unittest. @@ -51,6 +52,7 @@ class TestFusedGateAttentionOp(OpTest): self.bias_attr = True def generate_input_data(self): + def _random(shape): if self.dtype == "bfloat16": data = np.random.random(shape).astype("float32") @@ -97,8 +99,8 @@ class TestFusedGateAttentionOp(OpTest): paddle.disable_static(place=paddle.CUDAPlace(0)) query = paddle.to_tensor(self.query, stop_gradient=False) - key = query if self.merge_qkv else paddle.to_tensor( - self.key, stop_gradient=False) + key = query if self.merge_qkv else paddle.to_tensor(self.key, + stop_gradient=False) q_weight = paddle.to_tensor(self.q_weight, stop_gradient=False) k_weight = paddle.to_tensor(self.k_weight, stop_gradient=False) v_weight = paddle.to_tensor(self.v_weight, stop_gradient=False) @@ -112,12 +114,12 @@ class TestFusedGateAttentionOp(OpTest): # [batch_size, msa_len, num_heads, m_size, key_dim] v = paddle.einsum('nbka,ahc->nbkhc', key, v_weight) - # [batch_size, msa_len, num_heads, res_len, m_size] + # [batch_size, msa_len, num_heads, res_len, m_size] logits = paddle.einsum('nbqhc,nbkhc->nbhqk', q, k) # qk_out logits = logits + src_mask if self.bias_attr: - nonbatched_bias = paddle.to_tensor( - self.nonbatched_bias, stop_gradient=False) + nonbatched_bias = paddle.to_tensor(self.nonbatched_bias, + stop_gradient=False) logits = logits + nonbatched_bias weights = nn.functional.softmax(logits) # softmax_out @@ -136,8 +138,8 @@ class TestFusedGateAttentionOp(OpTest): out = paddle.einsum('nbqhc,hco->nbqo', weighted_avg, output_w) + output_b - paddle.autograd.backward( - [out], [paddle.to_tensor(self.dout)], retain_graph=True) + paddle.autograd.backward([out], [paddle.to_tensor(self.dout)], + retain_graph=True) if self.merge_qkv: return out, query.grad, None else: @@ -163,8 +165,8 @@ class TestFusedGateAttentionOp(OpTest): src_mask = paddle.to_tensor(self.attn_mask, stop_gradient=True) if self.bias_attr: - nonbatched_bias = paddle.to_tensor( - self.nonbatched_bias, stop_gradient=False) + nonbatched_bias = paddle.to_tensor(self.nonbatched_bias, + stop_gradient=False) else: nonbatched_bias = None if self.has_gating: @@ -182,8 +184,8 @@ class TestFusedGateAttentionOp(OpTest): nonbatched_bias, src_mask, gating_w, gating_b, output_w, output_b, 'has_gating', self.has_gating, 'merge_qkv', self.merge_qkv) - paddle.autograd.backward( - [out], [paddle.to_tensor(self.dout)], retain_graph=True) + paddle.autograd.backward([out], [paddle.to_tensor(self.dout)], + retain_graph=True) if key is not None: return out, query.grad, key.grad else: @@ -193,17 +195,22 @@ class TestFusedGateAttentionOp(OpTest): out_ref, query_grad_ref, key_grad_ref = self.get_reference_out() out, query_grad, key_grad = self.get_fused_gate_attention_out() np.testing.assert_allclose(out_ref, out.numpy(), atol=atol, rtol=rtol) - np.testing.assert_allclose( - query_grad_ref, query_grad.numpy(), atol=atol, rtol=rtol) + np.testing.assert_allclose(query_grad_ref, + query_grad.numpy(), + atol=atol, + rtol=rtol) if key_grad_ref is not None and key_grad is not None: - np.testing.assert_allclose( - key_grad_ref, key_grad.numpy(), atol=atol, rtol=rtol) + np.testing.assert_allclose(key_grad_ref, + key_grad.numpy(), + atol=atol, + rtol=rtol) def test_output_and_grad(self): self.check_output_and_grad(atol=1e-5, rtol=1e-5) class TestSeparatedQKVCase(TestFusedGateAttentionOp): + def config(self): self.dtype = "float32" self.has_gating = False @@ -220,6 +227,7 @@ class TestSeparatedQKVCase(TestFusedGateAttentionOp): class TestMergeQKVNoBiasGatingCase(TestFusedGateAttentionOp): + def config(self): super().config() self.has_gating = False @@ -227,6 +235,7 @@ class TestMergeQKVNoBiasGatingCase(TestFusedGateAttentionOp): class TestMergeQKVFp16Case(TestFusedGateAttentionOp): + def config(self): super().config() self.dtype = "float16" @@ -240,6 +249,7 @@ class TestMergeQKVFp16Case(TestFusedGateAttentionOp): "core is not compiled with CUDA and cuda version need larger than or equal to 11.3" ) class TestMergeQKVBF16Case(TestFusedGateAttentionOp): + def config(self): super().config() self.dtype = "bfloat16" diff --git a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py index 106ce5b4ef0..b536b0d7e66 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,6 +34,7 @@ def get_outputs(DOut, X, Y): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDXYBiasFP16(OpTest): + def setUp(self): self.op_type = "fused_gemm_epilogue_grad" self.place = core.CUDAPlace(0) @@ -67,6 +68,7 @@ class TestFuseGemmEpilogueGradOpDXYBiasFP16(OpTest): "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDXYBiasFP32( TestFuseGemmEpilogueGradOpDXYBiasFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -77,6 +79,7 @@ class TestFuseGemmEpilogueGradOpDXYBiasFP32( "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDXYBiasFP64( TestFuseGemmEpilogueGradOpDXYBiasFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -86,6 +89,7 @@ class TestFuseGemmEpilogueGradOpDXYBiasFP64( @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDYBiasFP16(OpTest): + def setUp(self): self.op_type = "fused_gemm_epilogue_grad" self.place = core.CUDAPlace(0) @@ -117,8 +121,9 @@ class TestFuseGemmEpilogueGradOpDYBiasFP16(OpTest): @skip_check_grad_ci(reason="no grap op") @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") -class TestFuseGemmEpilogueGradOpDYBiasFP32( - TestFuseGemmEpilogueGradOpDYBiasFP16): +class TestFuseGemmEpilogueGradOpDYBiasFP32(TestFuseGemmEpilogueGradOpDYBiasFP16 + ): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -127,8 +132,9 @@ class TestFuseGemmEpilogueGradOpDYBiasFP32( @skip_check_grad_ci(reason="no grap op") @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") -class TestFuseGemmEpilogueGradOpDYBiasFP64( - TestFuseGemmEpilogueGradOpDYBiasFP16): +class TestFuseGemmEpilogueGradOpDYBiasFP64(TestFuseGemmEpilogueGradOpDYBiasFP16 + ): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -138,6 +144,7 @@ class TestFuseGemmEpilogueGradOpDYBiasFP64( @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDYFP16(OpTest): + def setUp(self): self.op_type = "fused_gemm_epilogue_grad" self.place = core.CUDAPlace(0) @@ -170,6 +177,7 @@ class TestFuseGemmEpilogueGradOpDYFP16(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDYFP32(TestFuseGemmEpilogueGradOpDYFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -179,6 +187,7 @@ class TestFuseGemmEpilogueGradOpDYFP32(TestFuseGemmEpilogueGradOpDYFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDYFP64(TestFuseGemmEpilogueGradOpDYFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -188,6 +197,7 @@ class TestFuseGemmEpilogueGradOpDYFP64(TestFuseGemmEpilogueGradOpDYFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDXYFP16(OpTest): + def setUp(self): self.op_type = "fused_gemm_epilogue_grad" self.place = core.CUDAPlace(0) @@ -220,6 +230,7 @@ class TestFuseGemmEpilogueGradOpDXYFP16(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDXYFP32(TestFuseGemmEpilogueGradOpDXYFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -229,6 +240,7 @@ class TestFuseGemmEpilogueGradOpDXYFP32(TestFuseGemmEpilogueGradOpDXYFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueGradOpDXYFP64(TestFuseGemmEpilogueGradOpDXYFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py index 4256945a1e8..bd29ebbf12a 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -52,6 +52,7 @@ class TestFuseGemmBase(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMFP16(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -63,8 +64,9 @@ class TestFuseGemmEpilogueOpReluMMFP16(TestFuseGemmBase): 'Bias': np.random.random((128, )).astype(self.dtype) - 0.5 } self.outputs = { - 'Out': get_output(self.inputs['X'], self.inputs['Y'], - self.inputs['Bias'], 'relu') + 'Out': + get_output(self.inputs['X'], self.inputs['Y'], self.inputs['Bias'], + 'relu') } self.attrs = {"activation": 'relu'} @@ -83,6 +85,7 @@ class TestFuseGemmEpilogueOpReluMMFP16(TestFuseGemmBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMFP32(TestFuseGemmEpilogueOpReluMMFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -92,6 +95,7 @@ class TestFuseGemmEpilogueOpReluMMFP32(TestFuseGemmEpilogueOpReluMMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMFP64(TestFuseGemmEpilogueOpReluMMFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -101,6 +105,7 @@ class TestFuseGemmEpilogueOpReluMMFP64(TestFuseGemmEpilogueOpReluMMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMFP16(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -112,8 +117,9 @@ class TestFuseGemmEpilogueOpReluMTMFP16(TestFuseGemmBase): 'Bias': np.random.random((128, )).astype(self.dtype) - 0.5 } self.outputs = { - 'Out': get_output(self.inputs['X'].T, self.inputs['Y'], - self.inputs['Bias'], 'relu') + 'Out': + get_output(self.inputs['X'].T, self.inputs['Y'], + self.inputs['Bias'], 'relu') } self.attrs = {'trans_x': True, "activation": 'relu'} @@ -132,6 +138,7 @@ class TestFuseGemmEpilogueOpReluMTMFP16(TestFuseGemmBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMFP32(TestFuseGemmEpilogueOpReluMTMFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -141,6 +148,7 @@ class TestFuseGemmEpilogueOpReluMTMFP32(TestFuseGemmEpilogueOpReluMTMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMFP64(TestFuseGemmEpilogueOpReluMTMFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -150,6 +158,7 @@ class TestFuseGemmEpilogueOpReluMTMFP64(TestFuseGemmEpilogueOpReluMTMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMTFP16(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -161,8 +170,9 @@ class TestFuseGemmEpilogueOpReluMMTFP16(TestFuseGemmBase): 'Bias': np.random.random((128, )).astype(self.dtype) - 0.5 } self.outputs = { - 'Out': get_output(self.inputs['X'], self.inputs['Y'].T, - self.inputs['Bias'], 'relu') + 'Out': + get_output(self.inputs['X'], self.inputs['Y'].T, + self.inputs['Bias'], 'relu') } self.attrs = {'trans_y': True, "activation": 'relu'} @@ -181,6 +191,7 @@ class TestFuseGemmEpilogueOpReluMMTFP16(TestFuseGemmBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMTFP32(TestFuseGemmEpilogueOpReluMMTFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -190,6 +201,7 @@ class TestFuseGemmEpilogueOpReluMMTFP32(TestFuseGemmEpilogueOpReluMMTFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMTFP64(TestFuseGemmEpilogueOpReluMMTFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -199,6 +211,7 @@ class TestFuseGemmEpilogueOpReluMMTFP64(TestFuseGemmEpilogueOpReluMMTFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMTFP16(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -210,8 +223,9 @@ class TestFuseGemmEpilogueOpReluMTMTFP16(TestFuseGemmBase): 'Bias': np.random.random((128, )).astype(self.dtype) - 0.5 } self.outputs = { - 'Out': get_output(self.inputs['X'].T, self.inputs['Y'].T, - self.inputs['Bias'], 'relu') + 'Out': + get_output(self.inputs['X'].T, self.inputs['Y'].T, + self.inputs['Bias'], 'relu') } self.attrs = {'trans_x': True, 'trans_y': True, "activation": 'relu'} @@ -230,6 +244,7 @@ class TestFuseGemmEpilogueOpReluMTMTFP16(TestFuseGemmBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMTFP32(TestFuseGemmEpilogueOpReluMTMTFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -239,6 +254,7 @@ class TestFuseGemmEpilogueOpReluMTMTFP32(TestFuseGemmEpilogueOpReluMTMTFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMTFP64(TestFuseGemmEpilogueOpReluMTMTFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -248,6 +264,7 @@ class TestFuseGemmEpilogueOpReluMTMTFP64(TestFuseGemmEpilogueOpReluMTMTFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMFP16MultiDimX(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -259,9 +276,9 @@ class TestFuseGemmEpilogueOpReluMMFP16MultiDimX(TestFuseGemmBase): 'Bias': np.random.random((128, )).astype(self.dtype) - 0.5 } self.outputs = { - 'Out': get_output(self.inputs['X'].reshape( - (-1, 4)), self.inputs['Y'], self.inputs['Bias'], - 'relu').reshape((2, 2, 8, 128)) + 'Out': + get_output(self.inputs['X'].reshape((-1, 4)), self.inputs['Y'], + self.inputs['Bias'], 'relu').reshape((2, 2, 8, 128)) } self.attrs = {"activation": 'relu'} @@ -281,6 +298,7 @@ class TestFuseGemmEpilogueOpReluMMFP16MultiDimX(TestFuseGemmBase): "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMFP32MultiDimX( TestFuseGemmEpilogueOpReluMMFP16MultiDimX): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -291,6 +309,7 @@ class TestFuseGemmEpilogueOpReluMMFP32MultiDimX( "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMMFP64MultiDimX( TestFuseGemmEpilogueOpReluMMFP16MultiDimX): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -300,6 +319,7 @@ class TestFuseGemmEpilogueOpReluMMFP64MultiDimX( @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMFP16MultiDimX(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -311,9 +331,9 @@ class TestFuseGemmEpilogueOpReluMTMFP16MultiDimX(TestFuseGemmBase): 'Bias': np.random.random((128, )).astype(self.dtype) - 0.5 } self.outputs = { - 'Out': get_output(self.inputs['X'].reshape( - (4, -1)).T, self.inputs['Y'], self.inputs['Bias'], - 'relu').reshape((2, 2, 8, 128)) + 'Out': + get_output(self.inputs['X'].reshape((4, -1)).T, self.inputs['Y'], + self.inputs['Bias'], 'relu').reshape((2, 2, 8, 128)) } self.attrs = {'trans_x': True, "activation": 'relu'} @@ -333,6 +353,7 @@ class TestFuseGemmEpilogueOpReluMTMFP16MultiDimX(TestFuseGemmBase): "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMFP32MultiDimX( TestFuseGemmEpilogueOpReluMTMFP16MultiDimX): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -343,6 +364,7 @@ class TestFuseGemmEpilogueOpReluMTMFP32MultiDimX( "core is not compiled with CUDA") class TestFuseGemmEpilogueOpReluMTMFP64MultiDimX( TestFuseGemmEpilogueOpReluMTMFP16MultiDimX): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -352,6 +374,7 @@ class TestFuseGemmEpilogueOpReluMTMFP64MultiDimX( @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpGeluMMFP16(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -366,8 +389,9 @@ class TestFuseGemmEpilogueOpGeluMMFP16(TestFuseGemmBase): self.attrs = {"activation": 'gelu'} self.outputs = { - 'Out': get_output(self.inputs['X'], self.inputs['Y'], - self.inputs['Bias'], 'gelu') + 'Out': + get_output(self.inputs['X'], self.inputs['Y'], self.inputs['Bias'], + 'gelu') } def init_dtype_type(self): @@ -385,6 +409,7 @@ class TestFuseGemmEpilogueOpGeluMMFP16(TestFuseGemmBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpGeluMMFP32(TestFuseGemmEpilogueOpGeluMMFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -394,6 +419,7 @@ class TestFuseGemmEpilogueOpGeluMMFP32(TestFuseGemmEpilogueOpGeluMMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpGeluMMFP64(TestFuseGemmEpilogueOpGeluMMFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 @@ -403,6 +429,7 @@ class TestFuseGemmEpilogueOpGeluMMFP64(TestFuseGemmEpilogueOpGeluMMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpNoneMMFP16(TestFuseGemmBase): + def setUp(self): self.op_type = "fused_gemm_epilogue" self.place = core.CUDAPlace(0) @@ -417,8 +444,9 @@ class TestFuseGemmEpilogueOpNoneMMFP16(TestFuseGemmBase): self.attrs = {"activation": 'none'} self.outputs = { - 'Out': get_output(self.inputs['X'], self.inputs['Y'], - self.inputs['Bias'], 'none') + 'Out': + get_output(self.inputs['X'], self.inputs['Y'], self.inputs['Bias'], + 'none') } def init_dtype_type(self): @@ -436,6 +464,7 @@ class TestFuseGemmEpilogueOpNoneMMFP16(TestFuseGemmBase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpNoneMMFP32(TestFuseGemmEpilogueOpNoneMMFP16): + def init_dtype_type(self): self.dtype = np.single self.atol = 1e-6 @@ -445,6 +474,7 @@ class TestFuseGemmEpilogueOpNoneMMFP32(TestFuseGemmEpilogueOpNoneMMFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFuseGemmEpilogueOpNoneMMFP64(TestFuseGemmEpilogueOpNoneMMFP16): + def init_dtype_type(self): self.dtype = np.double self.atol = 1e-6 diff --git a/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py b/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py index 98548c99965..f2f56e42543 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py +++ b/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -66,6 +66,7 @@ def matmul_grad(x, y, bias, dz, trans_x, trans_y): not is_fused_matmul_bias_supported(), "fused_gemm_epilogue is only supported when CUDA version >= 11.6") class TestFusedMatmulBias(unittest.TestCase): + def setUp(self): paddle.set_device('gpu') @@ -131,6 +132,7 @@ class TestFusedMatmulBias(unittest.TestCase): not is_fused_matmul_bias_supported(), "fused_gemm_epilogue is only supported when CUDA version >= 11.6") class TestFusedLinear(unittest.TestCase): + def check_fused_linear(self, transpose): x = paddle.randn([30, 40]) linear = FusedLinear(40, 50, transpose_weight=transpose) @@ -149,6 +151,7 @@ class TestFusedLinear(unittest.TestCase): not is_fused_matmul_bias_supported(), "fused_gemm_epilogue is only supported when CUDA version >= 11.6") class TestStaticGraph(unittest.TestCase): + def test_static_graph(self): paddle.enable_static() x = paddle.static.data(name='x', dtype='float32', shape=[-1, 100]) diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py index 67f382a439d..ffe6fa8d41a 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py @@ -39,6 +39,7 @@ default_main_program().random_seed = 42 class TestFusedMultiTransformerOp(OpTest): + def setUp(self): self.config() self.generate_input_data() @@ -61,39 +62,33 @@ class TestFusedMultiTransformerOp(OpTest): bias_attr = paddle.fluid.ParamAttr( initializer=paddle.fluid.initializer.Constant(value=0.0005)) - self.q_proj = Linear( - self.embed_dim, - self.embed_dim, - self.weight_attr, - bias_attr=bias_attr) + self.q_proj = Linear(self.embed_dim, + self.embed_dim, + self.weight_attr, + bias_attr=bias_attr) #bias_attr=self.bias_attr) - self.k_proj = Linear( - self.kdim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - self.v_proj = Linear( - self.vdim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - self.out_proj = Linear( - self.embed_dim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - - self.ffn1_proj = Linear( - self.embed_dim, - 4 * self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) - self.ffn2_proj = Linear( - 4 * self.embed_dim, - self.embed_dim, - self.weight_attr, - bias_attr=self.bias_attr) + self.k_proj = Linear(self.kdim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + self.v_proj = Linear(self.vdim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + self.out_proj = Linear(self.embed_dim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + + self.ffn1_proj = Linear(self.embed_dim, + 4 * self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) + self.ffn2_proj = Linear(4 * self.embed_dim, + self.embed_dim, + self.weight_attr, + bias_attr=self.bias_attr) paddle.set_default_dtype(np.float32) self.norm = LayerNorm(self.embed_dim) @@ -234,8 +229,10 @@ class TestFusedMultiTransformerOp(OpTest): # [B, n_head, seq_len, head_dim] * [B, n_head, out_seq_len, head_dim] # --> [B, n_head, seq_len, out_seq_len] - qk_out = layers.matmul( - x=q_out, y=k_out, transpose_y=True, alpha=self.head_dim**-0.5) + qk_out = layers.matmul(x=q_out, + y=k_out, + transpose_y=True, + alpha=self.head_dim**-0.5) if self.debug: print('qk out is') @@ -255,11 +252,10 @@ class TestFusedMultiTransformerOp(OpTest): print('softmax out is') print(softmax_out[0][0][0]) if self.dropout_prob: - dropout_out = F.dropout( - softmax_out, - self.dropout_prob, - training=self.training, - mode="upscale_in_train") + dropout_out = F.dropout(softmax_out, + self.dropout_prob, + training=self.training, + mode="upscale_in_train") # [B, n_head, seq_len, out_seq_len] * [B, n_head, out_seq_len, head_dim] # --> [B, n_head, seq_len, head_dim] qktv_out = tensor.matmul(dropout_out, v_out) @@ -271,8 +267,7 @@ class TestFusedMultiTransformerOp(OpTest): print('fmha out is') print(fmha_out[0][0][0]) out_linear_in = tensor.reshape( - x=fmha_out, - shape=[0, 0, fmha_out.shape[2] * fmha_out.shape[3]]) + x=fmha_out, shape=[0, 0, fmha_out.shape[2] * fmha_out.shape[3]]) out = self.out_proj(out_linear_in) residual_out = residual + self.dropout(out) @@ -302,44 +297,44 @@ class TestFusedMultiTransformerOp(OpTest): def GetFusedMultiTransformerOut(self): paddle.disable_static(place=paddle.CUDAPlace(0)) - q_proj_weight = paddle.to_tensor( - self.q_proj.weight, stop_gradient=False) - k_proj_weight = paddle.to_tensor( - self.k_proj.weight, stop_gradient=False) - v_proj_weight = paddle.to_tensor( - self.v_proj.weight, stop_gradient=False) - out_linear_weight = paddle.to_tensor( - self.out_proj.weight, stop_gradient=False) - ffn1_weight = paddle.to_tensor( - self.ffn1_proj.weight, stop_gradient=False) - ffn2_weight = paddle.to_tensor( - self.ffn2_proj.weight, stop_gradient=False) + q_proj_weight = paddle.to_tensor(self.q_proj.weight, + stop_gradient=False) + k_proj_weight = paddle.to_tensor(self.k_proj.weight, + stop_gradient=False) + v_proj_weight = paddle.to_tensor(self.v_proj.weight, + stop_gradient=False) + out_linear_weight = paddle.to_tensor(self.out_proj.weight, + stop_gradient=False) + ffn1_weight = paddle.to_tensor(self.ffn1_proj.weight, + stop_gradient=False) + ffn2_weight = paddle.to_tensor(self.ffn2_proj.weight, + stop_gradient=False) if self.bias_attr is False: qkv_bias_tensor = None out_linear_bias = None else: - q_proj_bias = paddle.to_tensor( - self.q_proj.bias, stop_gradient=False) - k_proj_bias = paddle.to_tensor( - self.k_proj.bias, stop_gradient=False) - v_proj_bias = paddle.to_tensor( - self.v_proj.bias, stop_gradient=False) + q_proj_bias = paddle.to_tensor(self.q_proj.bias, + stop_gradient=False) + k_proj_bias = paddle.to_tensor(self.k_proj.bias, + stop_gradient=False) + v_proj_bias = paddle.to_tensor(self.v_proj.bias, + stop_gradient=False) qkv_bias = np.concatenate( (q_proj_bias.numpy(), k_proj_bias.numpy(), v_proj_bias.numpy())) qkv_bias = qkv_bias.reshape((3, self.num_heads, self.head_dim)) qkv_bias_tensor = paddle.to_tensor(qkv_bias, stop_gradient=False) - out_linear_bias = paddle.to_tensor( - self.out_proj.bias, stop_gradient=False) - ffn1_bias = paddle.to_tensor( - self.ffn1_proj.bias, stop_gradient=False) - ffn2_bias = paddle.to_tensor( - self.ffn2_proj.bias, stop_gradient=False) + out_linear_bias = paddle.to_tensor(self.out_proj.bias, + stop_gradient=False) + ffn1_bias = paddle.to_tensor(self.ffn1_proj.bias, + stop_gradient=False) + ffn2_bias = paddle.to_tensor(self.ffn2_proj.bias, + stop_gradient=False) ln_scale = paddle.to_tensor(self.norm.weight, stop_gradient=False) ln_bias = paddle.to_tensor(self.norm.bias, stop_gradient=False) - ffn_ln_scale = paddle.to_tensor( - self.ffn_norm.weight, stop_gradient=False) + ffn_ln_scale = paddle.to_tensor(self.ffn_norm.weight, + stop_gradient=False) ffn_ln_bias = paddle.to_tensor(self.ffn_norm.bias, stop_gradient=False) q_proj_weight = q_proj_weight.numpy().transpose((1, 0)) @@ -357,12 +352,11 @@ class TestFusedMultiTransformerOp(OpTest): cache_kvs = [] max_seq_length = (self.cache_length + 128) // 128 * 128 - cache_kv = np.zeros( - [ - 2, self.batch_size, self.num_heads, max_seq_length, - self.head_dim - ], - dtype=self.x_type) + cache_kv = np.zeros([ + 2, self.batch_size, self.num_heads, max_seq_length, + self.head_dim + ], + dtype=self.x_type) elems = 4 if self.x_type is np.float16: @@ -390,8 +384,9 @@ class TestFusedMultiTransformerOp(OpTest): assert self.query_length == self.cache_length cache_kv[:] = 0 else: - time_step = paddle.to_tensor( - [self.cache_length], dtype='int32', place=paddle.CPUPlace()) + time_step = paddle.to_tensor([self.cache_length], + dtype='int32', + place=paddle.CPUPlace()) if self.has_attn_mask: attn_mask = paddle.to_tensor(self.attn_mask, stop_gradient=False) else: @@ -423,31 +418,29 @@ class TestFusedMultiTransformerOp(OpTest): ffn_ln_scales.append(ffn_ln_scale) ffn_ln_biases.append(ffn_ln_bias) if self.has_cache_kv: - cache_kvs.append( - paddle.to_tensor( - cache_kv, stop_gradient=False)) - - final_out = fused_multi_transformer( - x, - ln_scales, - ln_biases, - qkv_weights, - qkv_biases, - out_weights, - out_biases, - ffn_ln_scales, - ffn_ln_biases, - ffn1_weights, - ffn1_biases, - ffn2_weights, - ffn2_biases, - pre_layer_norm=self.pre_layer_norm, - epsilon=epsilon, - cache_kvs=cache_kvs, - time_step=time_step, - attn_mask=attn_mask, - dropout_rate=self.dropout_prob, - training=self.training) + cache_kvs.append(paddle.to_tensor(cache_kv, + stop_gradient=False)) + + final_out = fused_multi_transformer(x, + ln_scales, + ln_biases, + qkv_weights, + qkv_biases, + out_weights, + out_biases, + ffn_ln_scales, + ffn_ln_biases, + ffn1_weights, + ffn1_biases, + ffn2_weights, + ffn2_biases, + pre_layer_norm=self.pre_layer_norm, + epsilon=epsilon, + cache_kvs=cache_kvs, + time_step=time_step, + attn_mask=attn_mask, + dropout_rate=self.dropout_prob, + training=self.training) if self.has_cache_kv: return final_out[0], final_out[1] @@ -469,9 +462,9 @@ class TestFusedMultiTransformerOp(OpTest): if self.debug: print("cache_k out timestep=128") - print(cache_kv_out[0].reshape([ - 2, bsz, num_head, v_elems, max_seq_len, elems - ])[0, 0, 0, :, self.cache_length, :]) + print(cache_kv_out[0].reshape( + [2, bsz, num_head, v_elems, max_seq_len, + elems])[0, 0, 0, :, self.cache_length, :]) print("cache_v out timestep=128") print(cache_kv_out[0][1, 0, 0, self.cache_length, :]) @@ -492,18 +485,25 @@ class TestFusedMultiTransformerOp(OpTest): cache_v = cache_kv_out[i][1, :, :, :self.cache_length, :] - np.testing.assert_allclose( - cache_k_ref, cache_k, rtol=self.rtol, atol=self.atol) - np.testing.assert_allclose( - cache_v_ref, cache_v, rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(cache_k_ref, + cache_k, + rtol=self.rtol, + atol=self.atol) + np.testing.assert_allclose(cache_v_ref, + cache_v, + rtol=self.rtol, + atol=self.atol) if i == 0: break - np.testing.assert_allclose( - final_out_ref, final_out, rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(final_out_ref, + final_out, + rtol=self.rtol, + atol=self.atol) class TestFusedMultiTransformerOpFp16(TestFusedMultiTransformerOp): + def config(self): super().config() self.x_type = np.float16 @@ -511,6 +511,7 @@ class TestFusedMultiTransformerOpFp16(TestFusedMultiTransformerOp): class TestFusedMultiTransformerOpCacheKV(TestFusedMultiTransformerOp): + def config(self): super().config() self.has_cache_kv = True @@ -520,6 +521,7 @@ class TestFusedMultiTransformerOpCacheKV(TestFusedMultiTransformerOp): class TestFusedMultiTransformerOpCacheKVFp16(TestFusedMultiTransformerOp): + def config(self): super().config() self.has_cache_kv = True @@ -529,6 +531,7 @@ class TestFusedMultiTransformerOpCacheKVFp16(TestFusedMultiTransformerOp): class TestFusedMultiTransformerOpGenCacheKV(TestFusedMultiTransformerOp): + def config(self): super().config() self.has_cache_kv = True @@ -536,6 +539,7 @@ class TestFusedMultiTransformerOpGenCacheKV(TestFusedMultiTransformerOp): class TestFusedMultiTransformerOpGenCacheKVFp16(TestFusedMultiTransformerOp): + def config(self): super().config() self.has_cache_kv = True diff --git a/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py b/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py index d78e929fb60..0b05a660243 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py @@ -35,6 +35,7 @@ def stable_softmax(x): @unittest.skipIf(not core.is_compiled_with_cuda(), "Paddle core is not compiled with CUDA") class TestFusedMultiheadMatmulOp(OpTest): + def config(self): self.seq_len = 128 self.size_per_head = 64 @@ -113,6 +114,7 @@ class TestFusedMultiheadMatmulOp(OpTest): class TestFusedMultiHeadMatmulOp2(TestFusedMultiheadMatmulOp): + def config(self): self.seq_len = 256 self.size_per_head = 32 diff --git a/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py b/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py index 843b495e85b..882258239d0 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py +++ b/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py @@ -21,6 +21,7 @@ import unittest class TestFusedTransformerEncoderLayer(unittest.TestCase): + def setActivation(self): self.activation = 'gelu' @@ -60,7 +61,8 @@ class TestFusedTransformerEncoderLayer(unittest.TestCase): def fused_weight(self, weight, num_head): a = paddle.transpose(weight, perm=[1, 0]) return paddle.reshape( - a, shape=[1, num_head, int(a.shape[0] / num_head), a.shape[1]]) + a, shape=[1, num_head, + int(a.shape[0] / num_head), a.shape[1]]) def fused_qkv(self, q, k, v, num_head): fq = self.fused_weight(q, num_head) @@ -80,10 +82,9 @@ class TestFusedTransformerEncoderLayer(unittest.TestCase): self.embed_dim).astype(self.dtype) if self.has_attn_mask: - attn_mask = np.ones( - (self.batch_size, self.num_heads, self.query_length, - self.key_length), - dtype=self.attn_mask_type) + attn_mask = np.ones((self.batch_size, self.num_heads, + self.query_length, self.key_length), + dtype=self.attn_mask_type) attn_mask_tensor = paddle.to_tensor(attn_mask) else: attn_mask = None @@ -91,9 +92,8 @@ class TestFusedTransformerEncoderLayer(unittest.TestCase): dout = np.random.random(src.shape).astype(self.dtype) - base_out = base_encoder( - paddle.to_tensor( - src, stop_gradient=False), attn_mask_tensor) + base_out = base_encoder(paddle.to_tensor(src, stop_gradient=False), + attn_mask_tensor) paddle.autograd.backward([base_out], [paddle.to_tensor(dout)], True) fused_encoder = FusedTransformerEncoderLayer( @@ -138,12 +138,12 @@ class TestFusedTransformerEncoderLayer(unittest.TestCase): tmp = paddle.concat(x=[q_bias, k_bias, v_bias], axis=0) qkv_bias = paddle.reshape( tmp, - shape=[3, self.num_heads, int(tmp.shape[0] / 3 / self.num_heads)]) + shape=[3, self.num_heads, + int(tmp.shape[0] / 3 / self.num_heads)]) fused_encoder.fused_attn.qkv_bias.set_value(qkv_bias) - fused_out = fused_encoder( - paddle.to_tensor( - src, stop_gradient=False), attn_mask_tensor) + fused_out = fused_encoder(paddle.to_tensor(src, stop_gradient=False), + attn_mask_tensor) paddle.autograd.backward([fused_out], [paddle.to_tensor(dout)], True) correct_ffn_str = 'd_model={}, dim_feedforward={}, dropout_rate={}, epsilon={}, activation={}, act_dropout_rate={}, normalize_before={}, dtype={}'.format( @@ -158,35 +158,40 @@ class TestFusedTransformerEncoderLayer(unittest.TestCase): self.pre_layer_norm, False, self.dtype) self.assertTrue(fused_encoder.fused_attn.extra_repr(), correct_attn_str) - np.testing.assert_allclose( - fused_out.numpy(), base_out.numpy(), rtol=self.rtol, atol=self.atol) + np.testing.assert_allclose(fused_out.numpy(), + base_out.numpy(), + rtol=self.rtol, + atol=self.atol) self.assertTrue( - np.allclose( - fused_out.grad.numpy(), - base_out.grad.numpy(), - rtol=self.rtol, - atol=self.atol)) + np.allclose(fused_out.grad.numpy(), + base_out.grad.numpy(), + rtol=self.rtol, + atol=self.atol)) class TestFusedTransformerEncoderLayerAct(TestFusedTransformerEncoderLayer): + def setActivation(self): self.activation = 'relu' class TestFusedTransformerEncoderLayerPreLayerNorm( TestFusedTransformerEncoderLayer): + def setPreLayerNorm(self): self.pre_layer_norm = True class TestFusedTransformerEncoderLayerAttnMaskIsNone( TestFusedTransformerEncoderLayer): + def setAttnMask(self): self.has_attn_mask = False class TestFusedTransformerEncoderLayerPreLnTrueAttnMaskIsNone( TestFusedTransformerEncoderLayer): + def setPreLayerNorm(self): self.pre_layer_norm = True diff --git a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py index c241fc65d9b..dd4707c3fc3 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py @@ -37,8 +37,7 @@ def fusion_gru( lod, h0, wh, - np.zeros( - (1, wh.shape[1]), dtype='float32'), + np.zeros((1, wh.shape[1]), dtype='float32'), is_reverse, act_state, act_gate, @@ -46,6 +45,7 @@ def fusion_gru( class TestFusionGRUOp(OpTest): + def set_confs(self): pass @@ -76,9 +76,10 @@ class TestFusionGRUOp(OpTest): N, self.D).astype('float32') if self.with_h0 else np.zeros( (N, self.D), dtype='float32') - _, _, _, hidden = fusion_gru( - x, self.lod, h0, wx, wh, bias, self.is_reverse, self.origin_mode, - ACTIVATION[self.act_state], ACTIVATION[self.act_gate]) + _, _, _, hidden = fusion_gru(x, self.lod, h0, wx, wh, bias, + self.is_reverse, self.origin_mode, + ACTIVATION[self.act_state], + ACTIVATION[self.act_gate]) self.inputs = {'X': (x, self.lod), 'WeightX': wx, 'WeightH': wh} @@ -105,39 +106,46 @@ class TestFusionGRUOp(OpTest): class TestFusionGRUOpNoInitial(TestFusionGRUOp): + def set_confs(self): self.with_h0 = False class TestFusionGRUOpNoBias(TestFusionGRUOp): + def set_confs(self): self.with_bias = False class TestFusionGRUOpReverse(TestFusionGRUOp): + def set_confs(self): self.is_reverse = True class TestFusionGRUOpMD1(TestFusionGRUOp): + def set_confs(self): self.M = 36 self.D = 8 class TestFusionGRUOpMD2(TestFusionGRUOp): + def set_confs(self): self.M = 8 self.D = 8 class TestFusionGRUOpMD3(TestFusionGRUOp): + def set_confs(self): self.M = 17 self.D = 15 class TestFusionGRUOpBS1(TestFusionGRUOp): + def set_confs(self): self.lod = [[3]] self.D = 16 diff --git a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py index 4899927a769..c7dfaa1d907 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py @@ -38,12 +38,12 @@ def fusion_lstm( act_gate=None, act_cell=None, act_cand=None): - return lstm( - fc(x, wx, bx), lod, h0, c0, w_h, w_b, w_c, is_reverse, act_gate, - act_cell, act_cand) + return lstm(fc(x, wx, bx), lod, h0, c0, w_h, w_b, w_c, is_reverse, act_gate, + act_cell, act_cand) class TestFusionLSTMOp(OpTest): + def set_conf(self): pass @@ -122,63 +122,74 @@ class TestFusionLSTMOp(OpTest): class TestFusionLSTMOpInit(TestFusionLSTMOp): + def set_conf(self): self.has_initial_state = True class TestFusionLSTMOpReverse(TestFusionLSTMOp): + def set_conf(self): self.is_reverse = True class TestFusionLSTMOpInitReverse(TestFusionLSTMOp): + def set_conf(self): self.has_initial_state = True self.is_reverse = True class TestFusionLSTMOpMD1(TestFusionLSTMOp): + def set_conf(self): self.M = 36 self.D = 8 class TestFusionLSTMOpMD2(TestFusionLSTMOp): + def set_conf(self): self.M = 8 self.D = 8 class TestFusionLSTMOpMD3(TestFusionLSTMOp): + def set_conf(self): self.M = 15 self.D = 3 class TestFusionLSTMOpBS1(TestFusionLSTMOp): + def set_conf(self): self.lod = [[3]] self.D = 16 class TestFusionLSTMOpPeepholes(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True class TestFusionLSTMOpPeepholesInit(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.has_initial_state = True class TestFusionLSTMOpPeepholesReverse(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.is_reverse = True class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.has_initial_state = True @@ -186,6 +197,7 @@ class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp): class TestFusionLSTMOpPeepholesBS1(TestFusionLSTMOp): + def set_conf(self): self.use_peepholes = True self.lod = [[2]] diff --git a/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py b/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py index aa244080349..c32d1db5e5d 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py @@ -21,6 +21,7 @@ from test_fc_op import fc_refer, MatrixGenerate class TestFusionRepeatedFCReluOp(OpTest): + def setUp(self): self.bs = 3 self.ic = 9 @@ -38,20 +39,21 @@ class TestFusionRepeatedFCReluOp(OpTest): i = 0 matrix = MatrixGenerate(self.bs, ics[i], self.oc[i], 1, 1) inp = np.reshape(matrix.input, [self.bs, ics[i]]) - weights.append(('W_{0}'.format(i), np.reshape(matrix.weights, - [ics[i], self.oc[i]]))) + weights.append( + ('W_{0}'.format(i), np.reshape(matrix.weights, + [ics[i], self.oc[i]]))) biases.append(('B_{0}'.format(i), matrix.bias)) outs.append( - np.reshape( - np.maximum(fc_refer(matrix, True), 0), [self.bs, self.oc[i]])) + np.reshape(np.maximum(fc_refer(matrix, True), 0), + [self.bs, self.oc[i]])) for i in range(sz - 1): matrix = MatrixGenerate(self.bs, ics[i + 1], self.oc[i + 1], 1, 1) matrix.input = np.reshape(outs[i], [self.bs, ics[i + 1], 1, 1]) out = fc_refer(matrix, True) - weights.append( - ('W_{0}'.format(i + 1), - np.reshape(matrix.weights, [ics[i + 1], self.oc[i + 1]]))) + weights.append(('W_{0}'.format(i + 1), + np.reshape(matrix.weights, + [ics[i + 1], self.oc[i + 1]]))) biases.append(('B_{0}'.format(i + 1), matrix.bias)) outs.append( np.reshape(np.maximum(out, 0), [self.bs, self.oc[i + 1]])) @@ -76,6 +78,7 @@ class TestFusionRepeatedFCReluOp(OpTest): class TestFusionRepeatedFCReluOpBS1(TestFusionRepeatedFCReluOp): + def set_conf(self): self.bs = 1 self.oc = [4, 2, 7, 5, 512, 1024] diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py index b6d643c3571..fc40d6dc21d 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py @@ -22,6 +22,7 @@ from sequence.test_sequence_conv import seqconv class TestSeqConvEltAddRelu(OpTest): + def set_conf(self): pass @@ -40,8 +41,8 @@ class TestSeqConvEltAddRelu(OpTest): T = sum(self.lod[0]) x = np.random.uniform(-1, 1, [T, self.in_fea_size]).astype('float32') w = np.random.uniform( - -1, 1, [self.in_fea_size * self.context_length, - self.out_fea_size]).astype('float32') + -1, 1, [self.in_fea_size * self.context_length, self.out_fea_size + ]).astype('float32') b = np.random.uniform(-2, 1, [1, self.out_fea_size]).astype('float32') out = seqconv(x, self.lod, w, self.context_length, self.context_start) out = np.maximum(out + b, 0) @@ -59,16 +60,19 @@ class TestSeqConvEltAddRelu(OpTest): class TestSeqConvEltAddReluBS1(TestSeqConvEltAddRelu): + def set_conf(self): self.lod = [[10]] class TestSeqConvEltAddReluBS1Case2(TestSeqConvEltAddRelu): + def set_conf(self): self.lod = [[2]] class TestSeqConvEltAddReluCase1(TestSeqConvEltAddRelu): + def set_conf(self): self.lod = [[3, 5, 1, 6]] self.context_length = 3 @@ -76,6 +80,7 @@ class TestSeqConvEltAddReluCase1(TestSeqConvEltAddRelu): class TestSeqConvEltAddReluCase2(TestSeqConvEltAddRelu): + def set_conf(self): self.lod = [[10, 1, 2, 4, 1, 5, 6]] self.in_fea_size = 2 @@ -84,6 +89,7 @@ class TestSeqConvEltAddReluCase2(TestSeqConvEltAddRelu): class TestSeqConvEltAddReluCase3(TestSeqConvEltAddRelu): + def set_conf(self): self.lod = [[10, 1, 2, 4, 1, 5, 6]] self.context_length = 5 diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py index 702545d2ee4..d519d3eee2d 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py @@ -47,6 +47,7 @@ def fusion_seqexpand_concat_fc(xs, lod, w, b, fc_act): class TestFusionSeqExpandConcatFCOp(OpTest): + def set_conf(self): pass @@ -73,8 +74,8 @@ class TestFusionSeqExpandConcatFCOp(OpTest): # fc weight and bias w = np.random.normal(size=(sum(self.inputs_M), self.D)).astype('float32') - b = np.random.normal(size=( - 1, self.D)).astype('float32') if self.with_bias else np.zeros( + b = np.random.normal( + size=(1, self.D)).astype('float32') if self.with_bias else np.zeros( (1, self.D)).astype('float32') out = fusion_seqexpand_concat_fc(xs, self.lod, w, b, @@ -96,40 +97,47 @@ class TestFusionSeqExpandConcatFCOp(OpTest): class TestFusionSECFCOpNonBias(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.with_bias = False class TestFusionSECFCOpNonAct(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.fc_act = 'identity' class TestFusionSECFCOpMD1(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.inputs_M = [3, 4, 2, 1, 5] self.D = 8 class TestFusionSECFCOpMD2(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.lod = [[5, 6]] self.inputs_M = [1, 1] class TestFusionSECFCOpBS1_1(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.lod = [[1]] self.inputs_M = [3, 4, 2] class TestFusionSECFCOpBS1_2(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.lod = [[1]] self.inputs_M = [3, 4] class TestFusionSECFCOpBS1_3(TestFusionSeqExpandConcatFCOp): + def set_conf(self): self.lod = [[5]] self.inputs_M = [6, 3] diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py index fa42f5d09b8..34ce7beea22 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py @@ -22,6 +22,7 @@ from sequence.test_sequence_pool import compute_seqpool_sum, compute_seqpool_avg class TestFusionSeqPoolConcatOp(OpTest): + def setUp(self): self.w = 11 self.lods = [[[2, 3, 5]], [[1, 5, 2]]] @@ -69,22 +70,26 @@ class TestFusionSeqPoolConcatOp(OpTest): class TestFusionSeqPoolConcatOpCase1(TestFusionSeqPoolConcatOp): + def set_conf(self): self.lods = [[[1]]] class TestFusionSeqPoolConcatOpCase2(TestFusionSeqPoolConcatOp): + def set_conf(self): self.lods = [[[1]], [[1]], [[1]]] class TestFusionSeqPoolConcatOpCase3(TestFusionSeqPoolConcatOp): + def set_conf(self): self.lods = [[[1, 3, 4, 6]]] self.w = 10 class TestFusionSeqPoolConcatOpCase4(TestFusionSeqPoolConcatOp): + def set_conf(self): self.lods = [[[2, 13, 4]], [[1, 1, 1]], [[5, 3, 1]], [[9, 10, 3]]] self.w = 3 @@ -92,11 +97,14 @@ class TestFusionSeqPoolConcatOpCase4(TestFusionSeqPoolConcatOp): ## test avg pool and sqrt def create_test_avg_sqrt_class(parent): + class TestSeqPoolAvgCase(parent): + def set_pooltype(self): self.pooltype = "AVERAGE" class TestSeqPoolSqrtCase(parent): + def set_pooltype(self): self.pooltype = "SQRT" diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py index eb681b1f167..8d3ac3e19ad 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py @@ -23,6 +23,7 @@ from test_cvm_op import cvm_compute class TestFusionSeqPoolCVMConcatOp(OpTest): + def setUp(self): self.w = 11 self.use_cvm = True @@ -76,22 +77,26 @@ class TestFusionSeqPoolCVMConcatOp(OpTest): class TestFusionSeqPoolCVMConcatOpCase1(TestFusionSeqPoolCVMConcatOp): + def set_conf(self): self.lods = [[[1]]] class TestFusionSeqPoolCVMConcatOpCase2(TestFusionSeqPoolCVMConcatOp): + def set_conf(self): self.lods = [[[1]], [[1]], [[1]]] class TestFusionSeqPoolCVMConcatOpCase3(TestFusionSeqPoolCVMConcatOp): + def set_conf(self): self.lods = [[[1, 3, 4, 6]]] self.w = 10 class TestFusionSeqPoolCVMConcatOpCase4(TestFusionSeqPoolCVMConcatOp): + def set_conf(self): self.lods = [[[2, 13, 4]], [[1, 1, 1]], [[5, 3, 1]], [[9, 10, 3]]] self.w = 3 @@ -99,11 +104,14 @@ class TestFusionSeqPoolCVMConcatOpCase4(TestFusionSeqPoolCVMConcatOp): ## test avg pool and sqrt def create_test_avg_sqrt_class(parent): + class TestSeqPoolAvgCase(parent): + def set_pooltype(self): self.pooltype = "AVERAGE" class TestSeqPoolSqrtCase(parent): + def set_pooltype(self): self.pooltype = "SQRT" diff --git a/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py b/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py index a097d3d9a20..6bf1e308585 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestFusionSquaredMatSubOp(OpTest): + def setUp(self): self.op_type = 'fusion_squared_mat_sub' self.m = 11 @@ -35,7 +36,9 @@ class TestFusionSquaredMatSubOp(OpTest): 'Out': (np.dot(matx, maty)**2 - np.dot(matx**2, maty**2)) * self.scalar } - self.attrs = {'scalar': self.scalar, } + self.attrs = { + 'scalar': self.scalar, + } def set_conf(self): pass @@ -45,6 +48,7 @@ class TestFusionSquaredMatSubOp(OpTest): class TestFusionSquaredMatSubOpCase1(TestFusionSquaredMatSubOp): + def set_conf(self): self.scalar = -0.3 diff --git a/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py index 9fe1df39d3a..dc827f27ab4 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFusionTransposeFlattenConcationOp(OpTest): + def setUp(self): self.init_test_case() self.op_type = "fusion_transpose_flatten_concat" @@ -63,6 +64,7 @@ class TestFusionTransposeFlattenConcationOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCase1(TestFusionTransposeFlattenConcationOp): + def init_test_case(self): self.shapes = [(3, 4, 18, 17), (3, 8, 18, 7), (6, 12, 9, 5)] self.trans_axis = (0, 2, 3, 1) @@ -73,6 +75,7 @@ class TestCase1(TestFusionTransposeFlattenConcationOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCase2(TestFusionTransposeFlattenConcationOp): + def init_test_case(self): self.shapes = [(3, 8, 20, 17), (3, 8, 19, 17), (3, 8, 40, 17)] self.trans_axis = (0, 2, 3, 1) @@ -83,6 +86,7 @@ class TestCase2(TestFusionTransposeFlattenConcationOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCase3(TestFusionTransposeFlattenConcationOp): + def init_test_case(self): self.shapes = [(3, 8, 20, 17), (3, 8, 19, 17), (3, 8, 40, 17)] self.trans_axis = (0, 3, 2, 1) @@ -93,6 +97,7 @@ class TestCase3(TestFusionTransposeFlattenConcationOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCase4(TestFusionTransposeFlattenConcationOp): + def init_test_case(self): self.shapes = [(3, 8, 9, 17), (8, 3, 9, 17), (4, 6, 9, 17)] self.trans_axis = (0, 2, 1, 3) @@ -103,6 +108,7 @@ class TestCase4(TestFusionTransposeFlattenConcationOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCase5(TestFusionTransposeFlattenConcationOp): + def init_test_case(self): self.shapes = [(3, 8, 9, 17, 2), (3, 8, 2, 17, 9), (3, 17, 9, 8, 2)] self.trans_axis = (0, 2, 1, 4, 3) diff --git a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py index 8404c563274..1d37558bcfa 100644 --- a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py +++ b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py @@ -22,6 +22,7 @@ import unittest class GastNodeTransformer(gast.NodeTransformer): + def __init__(self, root): self.root = root @@ -123,6 +124,7 @@ def code_ast(source): class TestPythonCompatibility(unittest.TestCase): + def _check_compatibility(self, source, target): source_dump = code_gast_ast(source) target_dump = code_ast(target) @@ -144,12 +146,12 @@ class TestPythonCompatibility(unittest.TestCase): self._check_compatibility(source, target) # The 0.3.3 version of gast has a bug in python3.8 that - # would cause the following tests to fail. But this - # problem doesn't affect the use of Paddle's related - # functions, therefore, the following tests would be + # would cause the following tests to fail. But this + # problem doesn't affect the use of Paddle's related + # functions, therefore, the following tests would be # disable in python3.8. # - # This problem had been fixed and updated to version + # This problem had been fixed and updated to version # 0.4.1 of gast. # # More information please refer to: diff --git a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py index ac2d980f7fd..31cf8cdc3a7 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py +++ b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py @@ -41,6 +41,7 @@ class TestGatherNdOpWithEmptyIndex(OpTest): class TestGatherNdOpWithIndex1(OpTest): + def setUp(self): self.op_type = "gather_nd" self.python_api = paddle.gather_nd @@ -157,9 +158,11 @@ class TestGatherNdOpWithHighRankDiff(OpTest): #Test Python API class TestGatherNdOpAPI(unittest.TestCase): + def test_case1(self): - x1 = fluid.layers.data( - name='x1', shape=[30, 40, 50, 60], dtype='float32') + x1 = fluid.layers.data(name='x1', + shape=[30, 40, 50, 60], + dtype='float32') index1 = fluid.layers.data(name='index1', shape=[2, 4], dtype='int32') output1 = fluid.layers.gather_nd(x1, index1) @@ -176,13 +179,17 @@ class TestGatherNdOpAPI(unittest.TestCase): #Test Raise Index Error class TestGatherNdOpRaise(unittest.TestCase): + def test_check_raise(self): + def check_raise_is_test(): try: - x = fluid.layers.data( - name='x', shape=[3, 4, 5], dtype='float32') - index = fluid.layers.data( - name='index', shape=[2, 10], dtype='int32') + x = fluid.layers.data(name='x', + shape=[3, 4, 5], + dtype='float32') + index = fluid.layers.data(name='index', + shape=[2, 10], + dtype='int32') output = fluid.layers.gather_nd(x, index) except Exception as e: t = \ @@ -194,6 +201,7 @@ class TestGatherNdOpRaise(unittest.TestCase): class TestGatherNdError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -201,8 +209,9 @@ class TestGatherNdError(unittest.TestCase): shape = [8, 9, 6] x = paddle.fluid.data(shape=shape, dtype='float32', name='x') index = paddle.fluid.data(shape=shape, dtype='bool', name='index') - index_float = paddle.fluid.data( - shape=shape, dtype='float32', name='index_float') + index_float = paddle.fluid.data(shape=shape, + dtype='float32', + name='index_float') np_x = np.random.random(shape).astype('float32') np_index = np.array(np.random.randint(2, size=shape, dtype=bool)) @@ -223,6 +232,7 @@ class TestGatherNdError(unittest.TestCase): class TestGatherNdAPI2(unittest.TestCase): + def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64') @@ -232,8 +242,10 @@ class TestGatherNdAPI2(unittest.TestCase): exe = fluid.Executor(place) input = np.array([[1, 2], [3, 4], [5, 6]]) index_1 = np.array([[1]]) - result, = exe.run(feed={"data1": input, - "index": index_1}, + result, = exe.run(feed={ + "data1": input, + "index": index_1 + }, fetch_list=[out]) expected_output = np.array([[3, 4]]) self.assertTrue(np.allclose(result, expected_output)) diff --git a/python/paddle/fluid/tests/unittests/test_gather_op.py b/python/paddle/fluid/tests/unittests/test_gather_op.py index 3d7dc2da052..0c356f4bc38 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_op.py +++ b/python/paddle/fluid/tests/unittests/test_gather_op.py @@ -31,6 +31,7 @@ def gather_numpy(x, index, axis): class TestGatherOp(OpTest): + def setUp(self): self.op_type = "gather" self.python_api = paddle.gather @@ -59,6 +60,7 @@ class TestGatherOp(OpTest): class TestCase1(TestGatherOp): + def config(self): """ For one dimension input @@ -70,6 +72,7 @@ class TestCase1(TestGatherOp): class TestCase2(TestGatherOp): + def config(self): """ For int64_t index type @@ -81,6 +84,7 @@ class TestCase2(TestGatherOp): class TestCase3(TestGatherOp): + def config(self): """ For other input type @@ -92,6 +96,7 @@ class TestCase3(TestGatherOp): class TestCase4(TestGatherOp): + def config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': False} @@ -101,6 +106,7 @@ class TestCase4(TestGatherOp): class TestCase5(TestGatherOp): + def config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': False} @@ -110,6 +116,7 @@ class TestCase5(TestGatherOp): class TestCase6(TestGatherOp): + def config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': True} @@ -119,6 +126,7 @@ class TestCase6(TestGatherOp): class TestGatherBF16Op(OpTest): + def setUp(self): self.op_type = "gather" self.python_api = paddle.gather @@ -153,6 +161,7 @@ class TestGatherBF16Op(OpTest): class TestGatherOp1(OpTest): + def setUp(self): self.op_type = "gather" self.python_api = paddle.gather @@ -183,6 +192,7 @@ class TestGatherOp1(OpTest): class TestGatherOp2(TestGatherOp1): + def config(self): """ For multi-dimension input @@ -196,6 +206,7 @@ class TestGatherOp2(TestGatherOp1): class TestGatherOp3(TestGatherOp1): + def config(self): """ For multi-dimension input @@ -209,6 +220,7 @@ class TestGatherOp3(TestGatherOp1): class TestGatherOp4(TestGatherOp1): + def config(self): """ For multi-dimension input @@ -223,6 +235,7 @@ class TestGatherOp4(TestGatherOp1): class API_TestGather(unittest.TestCase): + def test_out1(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64') @@ -232,8 +245,10 @@ class API_TestGather(unittest.TestCase): exe = fluid.Executor(place) input = np.array([[1, 2], [3, 4], [5, 6]]) index_1 = np.array([1, 2]) - result, = exe.run(feed={"data1": input, - "index": index_1}, + result, = exe.run(feed={ + "data1": input, + "index": index_1 + }, fetch_list=[out]) expected_output = np.array([[3, 4], [5, 6]]) self.assertTrue(np.allclose(result, expected_output)) @@ -250,16 +265,18 @@ class API_TestGather(unittest.TestCase): x_np = np.array([[1, 2], [3, 4], [5, 6]]).astype('float64') index_np = np.array([1, 1]).astype('int32') axis_np = np.array([1]).astype('int32') - result, = exe.run( - feed={"x": x_np, - "index": index_np, - 'axis': axis_np}, - fetch_list=[out]) + result, = exe.run(feed={ + "x": x_np, + "index": index_np, + 'axis': axis_np + }, + fetch_list=[out]) expected_output = gather_numpy(x_np, index_np, axis_np[0]) self.assertTrue(np.allclose(result, expected_output)) class API_TestDygraphGather(unittest.TestCase): + def test_out1(self): paddle.disable_static() input_1 = np.array([[1, 2], [3, 4], [5, 6]]) @@ -304,8 +321,8 @@ class API_TestDygraphGather(unittest.TestCase): def test_dygraph(): with fluid.dygraph.guard(): - gpu_out = paddle.gather( - paddle.to_tensor(x), paddle.to_tensor(index)) + gpu_out = paddle.gather(paddle.to_tensor(x), + paddle.to_tensor(index)) return gpu_out.numpy() @switch_to_static_graph @@ -313,8 +330,9 @@ class API_TestDygraphGather(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x_t = paddle.static.data(name="x", dtype=x.dtype, shape=x.shape) - index_t = paddle.static.data( - name="index", dtype=index.dtype, shape=index.shape) + index_t = paddle.static.data(name="index", + dtype=index.dtype, + shape=index.shape) out_t = paddle.gather(x_t, index_t) feed = {x_t.name: x, index_t.name: index} fetch = [out_t] @@ -327,6 +345,7 @@ class API_TestDygraphGather(unittest.TestCase): class TestGathertError(unittest.TestCase): + def test_error1(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -335,8 +354,9 @@ class TestGathertError(unittest.TestCase): x = paddle.fluid.data(shape=shape, dtype='int8', name='x') axis = paddle.fluid.data(shape=[1], dtype='float32', name='axis') index = paddle.fluid.data(shape=shape, dtype='int32', name='index') - index_float = paddle.fluid.data( - shape=shape, dtype='float32', name='index_float') + index_float = paddle.fluid.data(shape=shape, + dtype='float32', + name='index_float') def test_x_type(): paddle.gather(x, index) @@ -364,8 +384,9 @@ class TestGathertError(unittest.TestCase): shape = [8, 9, 6] x = fluid.data(shape=shape, dtype='int8', name='x') index = fluid.data(shape=shape, dtype='int32', name='mask') - index_float = fluid.data( - shape=shape, dtype='float32', name='index_float') + index_float = fluid.data(shape=shape, + dtype='float32', + name='index_float') def test_x_type(): paddle.fluid.layers.gather(x, index) @@ -379,6 +400,7 @@ class TestGathertError(unittest.TestCase): class TestCheckOutType(unittest.TestCase): + def test_out_type(self): data = paddle.static.data(shape=[16, 10], dtype='int64', name='x') index = paddle.static.data(shape=[4], dtype='int64', name='index') diff --git a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py index 6fe68c5d34f..f3a5acc0484 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py +++ b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py @@ -23,14 +23,17 @@ from paddle.fluid.framework import program_guard, Program class TestGatherTreeOp(OpTest): + def setUp(self): self.op_type = "gather_tree" self.python_api = paddle.nn.functional.gather_tree max_length, batch_size, beam_size = 5, 2, 2 - ids = np.random.randint( - 0, high=10, size=(max_length, batch_size, beam_size)) - parents = np.random.randint( - 0, high=beam_size, size=(max_length, batch_size, beam_size)) + ids = np.random.randint(0, + high=10, + size=(max_length, batch_size, beam_size)) + parents = np.random.randint(0, + high=beam_size, + size=(max_length, batch_size, beam_size)) self.inputs = {"Ids": ids, "Parents": parents} self.outputs = {'Out': self.backtrace(ids, parents)} @@ -53,40 +56,41 @@ class TestGatherTreeOp(OpTest): class TestGatherTreeOpAPI(unittest.TestCase): + def test_case(self): paddle.enable_static() - ids = fluid.layers.data( - name='ids', shape=[5, 2, 2], dtype='int64', append_batch_size=False) - parents = fluid.layers.data( - name='parents', - shape=[5, 2, 2], - dtype='int64', - append_batch_size=False) + ids = fluid.layers.data(name='ids', + shape=[5, 2, 2], + dtype='int64', + append_batch_size=False) + parents = fluid.layers.data(name='parents', + shape=[5, 2, 2], + dtype='int64', + append_batch_size=False) final_sequences = fluid.layers.gather_tree(ids, parents) paddle.disable_static() def test_case2(self): - ids = paddle.to_tensor( - [[[2, 2], [6, 1]], [[3, 9], [6, 1]], [[0, 1], [9, 0]]]) - parents = paddle.to_tensor( - [[[0, 0], [1, 1]], [[1, 0], [1, 0]], [[0, 0], [0, 1]]]) + ids = paddle.to_tensor([[[2, 2], [6, 1]], [[3, 9], [6, 1]], + [[0, 1], [9, 0]]]) + parents = paddle.to_tensor([[[0, 0], [1, 1]], [[1, 0], [1, 0]], + [[0, 0], [0, 1]]]) final_sequences = paddle.nn.functional.gather_tree(ids, parents) class TestGatherTreeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): - ids = fluid.layers.data( - name='ids', - shape=[5, 2, 2], - dtype='int64', - append_batch_size=False) - parents = fluid.layers.data( - name='parents', - shape=[5, 2, 2], - dtype='int64', - append_batch_size=False) + ids = fluid.layers.data(name='ids', + shape=[5, 2, 2], + dtype='int64', + append_batch_size=False) + parents = fluid.layers.data(name='parents', + shape=[5, 2, 2], + dtype='int64', + append_batch_size=False) def test_Variable_ids(): # the input type must be Variable @@ -104,22 +108,20 @@ class TestGatherTreeOpError(unittest.TestCase): def test_type_ids(): # dtype must be int32 or int64 - bad_ids = fluid.layers.data( - name='bad_ids', - shape=[5, 2, 2], - dtype='float32', - append_batch_size=False) + bad_ids = fluid.layers.data(name='bad_ids', + shape=[5, 2, 2], + dtype='float32', + append_batch_size=False) fluid.layers.gather_tree(bad_ids, parents) self.assertRaises(TypeError, test_type_ids) def test_type_parents(): # dtype must be int32 or int64 - bad_parents = fluid.layers.data( - name='bad_parents', - shape=[5, 2, 2], - dtype='float32', - append_batch_size=False) + bad_parents = fluid.layers.data(name='bad_parents', + shape=[5, 2, 2], + dtype='float32', + append_batch_size=False) fluid.layers.gather_tree(ids, bad_parents) self.assertRaises(TypeError, test_type_parents) diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py index 4140ce44648..43eaa7bf6a1 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py @@ -28,6 +28,7 @@ import paddle class TestGaussianRandomOp(OpTest): + def setUp(self): self.op_type = "gaussian_random" self.python_api = paddle.normal @@ -65,15 +66,14 @@ class TestGaussianRandomOp(OpTest): hist2, _ = np.histogram(data, range=(-3, 5)) hist2 = hist2.astype("float32") hist2 /= float(outs[0].size) - self.assertTrue( - np.allclose( - hist, hist2, rtol=0, atol=0.01), - "hist: " + str(hist) + " hist2: " + str(hist2)) + self.assertTrue(np.allclose(hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestGaussianRandomBF16Op(OpTest): + def setUp(self): self.op_type = "gaussian_random" self.python_api = paddle.normal @@ -97,8 +97,8 @@ class TestGaussianRandomBF16Op(OpTest): self.std = 2. def test_check_output(self): - self.check_output_with_place_customized( - self.verify_output, place=core.CUDAPlace(0)) + self.check_output_with_place_customized(self.verify_output, + place=core.CUDAPlace(0)) def test_eager(self): with _test_eager_guard(): @@ -114,13 +114,12 @@ class TestGaussianRandomBF16Op(OpTest): hist2, _ = np.histogram(data, range=(-3, 5)) hist2 = hist2.astype("float32") hist2 /= float(outs[0].size) - self.assertTrue( - np.allclose( - hist, hist2, rtol=0, atol=0.05), - "hist: " + str(hist) + " hist2: " + str(hist2)) + self.assertTrue(np.allclose(hist, hist2, rtol=0, atol=0.05), + "hist: " + str(hist) + " hist2: " + str(hist2)) class TestMeanStdAreInt(TestGaussianRandomOp): + def set_attrs(self): self.mean = 1 self.std = 2 @@ -128,6 +127,7 @@ class TestMeanStdAreInt(TestGaussianRandomOp): # Situation 2: Attr(shape) is a list(with tensor) class TestGaussianRandomOp_ShapeTensorList(TestGaussianRandomOp): + def setUp(self): '''Test gaussian_random op with specified value ''' @@ -161,8 +161,9 @@ class TestGaussianRandomOp_ShapeTensorList(TestGaussianRandomOp): self.check_output_customized(self.verify_output) -class TestGaussianRandomOp2_ShapeTensorList( - TestGaussianRandomOp_ShapeTensorList): +class TestGaussianRandomOp2_ShapeTensorList(TestGaussianRandomOp_ShapeTensorList + ): + def init_data(self): self.shape = [123, 92] self.infer_shape = [-1, -1] @@ -172,8 +173,9 @@ class TestGaussianRandomOp2_ShapeTensorList( self.seed = 10 -class TestGaussianRandomOp3_ShapeTensorList( - TestGaussianRandomOp_ShapeTensorList): +class TestGaussianRandomOp3_ShapeTensorList(TestGaussianRandomOp_ShapeTensorList + ): + def init_data(self): self.shape = [123, 92] self.infer_shape = [123, -1] @@ -183,8 +185,9 @@ class TestGaussianRandomOp3_ShapeTensorList( self.seed = 10 -class TestGaussianRandomOp4_ShapeTensorList( - TestGaussianRandomOp_ShapeTensorList): +class TestGaussianRandomOp4_ShapeTensorList(TestGaussianRandomOp_ShapeTensorList + ): + def init_data(self): self.shape = [123, 92] self.infer_shape = [123, -1] @@ -196,6 +199,7 @@ class TestGaussianRandomOp4_ShapeTensorList( # Situation 3: shape is a tensor class TestGaussianRandomOp1_ShapeTensor(TestGaussianRandomOp): + def setUp(self): '''Test gaussian_random op with specified value ''' @@ -222,53 +226,54 @@ class TestGaussianRandomOp1_ShapeTensor(TestGaussianRandomOp): # Test python API class TestGaussianRandomAPI(unittest.TestCase): + def test_api(self): positive_2_int32 = fluid.layers.fill_constant([1], "int32", 2000) positive_2_int64 = fluid.layers.fill_constant([1], "int64", 500) - shape_tensor_int32 = fluid.data( - name="shape_tensor_int32", shape=[2], dtype="int32") - - shape_tensor_int64 = fluid.data( - name="shape_tensor_int64", shape=[2], dtype="int64") - - out_1 = fluid.layers.gaussian_random( - shape=[2000, 500], dtype="float32", mean=0.0, std=1.0, seed=10) - - out_2 = fluid.layers.gaussian_random( - shape=[2000, positive_2_int32], - dtype="float32", - mean=0., - std=1.0, - seed=10) - - out_3 = fluid.layers.gaussian_random( - shape=[2000, positive_2_int64], - dtype="float32", - mean=0., - std=1.0, - seed=10) - - out_4 = fluid.layers.gaussian_random( - shape=shape_tensor_int32, - dtype="float32", - mean=0., - std=1.0, - seed=10) - - out_5 = fluid.layers.gaussian_random( - shape=shape_tensor_int64, - dtype="float32", - mean=0., - std=1.0, - seed=10) - - out_6 = fluid.layers.gaussian_random( - shape=shape_tensor_int64, - dtype=np.float32, - mean=0., - std=1.0, - seed=10) + shape_tensor_int32 = fluid.data(name="shape_tensor_int32", + shape=[2], + dtype="int32") + + shape_tensor_int64 = fluid.data(name="shape_tensor_int64", + shape=[2], + dtype="int64") + + out_1 = fluid.layers.gaussian_random(shape=[2000, 500], + dtype="float32", + mean=0.0, + std=1.0, + seed=10) + + out_2 = fluid.layers.gaussian_random(shape=[2000, positive_2_int32], + dtype="float32", + mean=0., + std=1.0, + seed=10) + + out_3 = fluid.layers.gaussian_random(shape=[2000, positive_2_int64], + dtype="float32", + mean=0., + std=1.0, + seed=10) + + out_4 = fluid.layers.gaussian_random(shape=shape_tensor_int32, + dtype="float32", + mean=0., + std=1.0, + seed=10) + + out_5 = fluid.layers.gaussian_random(shape=shape_tensor_int64, + dtype="float32", + mean=0., + std=1.0, + seed=10) + + out_6 = fluid.layers.gaussian_random(shape=shape_tensor_int64, + dtype=np.float32, + mean=0., + std=1.0, + seed=10) exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3, res_4, res_5, res_6 = exe.run( @@ -318,6 +323,7 @@ class TestGaussianRandomAPI(unittest.TestCase): class TestStandardNormalDtype(unittest.TestCase): + def test_default_dtype(self): paddle.disable_static() @@ -344,6 +350,7 @@ class TestStandardNormalDtype(unittest.TestCase): class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_gcd.py b/python/paddle/fluid/tests/unittests/test_gcd.py index 820216dc56c..b3ada9cdaa6 100644 --- a/python/paddle/fluid/tests/unittests/test_gcd.py +++ b/python/paddle/fluid/tests/unittests/test_gcd.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestGcdAPI(unittest.TestCase): + def setUp(self): self.x_np = 12 self.y_np = 20 @@ -40,15 +41,17 @@ class TestGcdAPI(unittest.TestCase): y = fluid.data(name='input2', dtype='int32', shape=self.y_shape) out = paddle.gcd(x, y) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(fluid.default_main_program(), - feed={'input1': self.x_np, - 'input2': self.y_np}, + feed={ + 'input1': self.x_np, + 'input2': self.y_np + }, fetch_list=[out]) - self.assertTrue((np.array(res[0]) == np.gcd(self.x_np, self.y_np) - ).all()) + self.assertTrue((np.array(res[0]) == np.gcd(self.x_np, + self.y_np)).all()) def test_dygraph(self): paddle.disable_static() @@ -62,6 +65,7 @@ class TestGcdAPI(unittest.TestCase): class TestGcdAPI2(TestGcdAPI): + def setUp(self): self.x_np = np.arange(6).astype(np.int32) self.y_np = np.array([20]).astype(np.int32) @@ -70,6 +74,7 @@ class TestGcdAPI2(TestGcdAPI): class TestGcdAPI3(TestGcdAPI): + def setUp(self): self.x_np = 0 self.y_np = 20 @@ -78,6 +83,7 @@ class TestGcdAPI3(TestGcdAPI): class TestGcdAPI4(TestGcdAPI): + def setUp(self): self.x_np = 0 self.y_np = 0 @@ -86,6 +92,7 @@ class TestGcdAPI4(TestGcdAPI): class TestGcdAPI5(TestGcdAPI): + def setUp(self): self.x_np = 12 self.y_np = -20 diff --git a/python/paddle/fluid/tests/unittests/test_gelu_op.py b/python/paddle/fluid/tests/unittests/test_gelu_op.py index abfb65c27a9..f6fa4e2da59 100644 --- a/python/paddle/fluid/tests/unittests/test_gelu_op.py +++ b/python/paddle/fluid/tests/unittests/test_gelu_op.py @@ -26,14 +26,15 @@ from paddle.fluid.framework import _test_eager_guard def gelu(x, approximate): if approximate: - y_ref = 0.5 * x * (1.0 + np.tanh( - np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) + y_ref = 0.5 * x * ( + 1.0 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) else: y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2))) return y_ref.astype(x.dtype) class TestGeluOp(unittest.TestCase): + def _test_case1_cpu(self, approximate): x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32) y_ref = gelu(x, approximate) @@ -89,8 +90,7 @@ class TestGeluOp(unittest.TestCase): self.assertTrue(np.allclose(y_ref, y_fast_math, rtol=1e-5, atol=5e-4)) self.assertTrue( - np.allclose( - x_g_ref, x_g_fast_math, rtol=1e-5, atol=5e-4)) + np.allclose(x_g_ref, x_g_fast_math, rtol=1e-5, atol=5e-4)) def test_fast_math_eager(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_gen_nccl_id_op.py b/python/paddle/fluid/tests/unittests/test_gen_nccl_id_op.py index c5e48e27a75..8543912f04f 100644 --- a/python/paddle/fluid/tests/unittests/test_gen_nccl_id_op.py +++ b/python/paddle/fluid/tests/unittests/test_gen_nccl_id_op.py @@ -67,6 +67,7 @@ def run_gen_ncc_id(attr): class TestGenNcclIdOp(unittest.TestCase): + def setUp(self): try: self._dist_ut_port_0 = int(os.environ["PADDLE_DIST_UT_PORT"]) diff --git a/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py b/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py index 1d7ce33ea7c..8414cd941c2 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py @@ -285,6 +285,7 @@ def trans_lod(lod): class TestGenerateMaskLabels(OpTest): + def set_data(self): self.init_test_case() self.make_generate_proposal_labels_out() @@ -362,8 +363,9 @@ class TestGenerateMaskLabels(OpTest): lod1.append(poly_num) pts = [] for j in range(poly_num): - poly_size = np.random.randint( - min_poly_size, max_poly_size, size=1)[0] + poly_size = np.random.randint(min_poly_size, + max_poly_size, + size=1)[0] x = np.random.rand(poly_size, 1) * w y = np.random.rand(poly_size, 1) * h xy = np.concatenate((x, y), axis=1) diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py index 2e9a5229e2e..d1bf246b5a7 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py @@ -50,11 +50,12 @@ def generate_proposal_labels_in_python(rpn_rois, for im_i in range(len(im_info)): max_overlap = max_overlaps[im_i] if is_cascade_rcnn else None - frcn_blobs = _sample_rois( - rpn_rois[im_i], gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i], - im_info[im_i], batch_size_per_im, fg_fraction, fg_thresh, - bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, - use_random, is_cls_agnostic, is_cascade_rcnn, max_overlap) + frcn_blobs = _sample_rois(rpn_rois[im_i], gt_classes[im_i], + is_crowd[im_i], gt_boxes[im_i], im_info[im_i], + batch_size_per_im, fg_fraction, fg_thresh, + bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, + class_nums, use_random, is_cls_agnostic, + is_cascade_rcnn, max_overlap) lod.append(frcn_blobs['rois'].shape[0]) rois.append(frcn_blobs['rois']) labels_int32.append(frcn_blobs['labels_int32']) @@ -100,11 +101,11 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, overlaps_max = proposal_to_gt_overlaps.max(axis=1) # Boxes which with non-zero overlap with gt boxes overlapped_boxes_ind = np.where(overlaps_max > 0)[0] - overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[ - overlapped_boxes_ind]] - gt_overlaps[overlapped_boxes_ind, - overlapped_boxes_gt_classes] = overlaps_max[ - overlapped_boxes_ind] + overlapped_boxes_gt_classes = gt_classes[ + overlaps_argmax[overlapped_boxes_ind]] + gt_overlaps[ + overlapped_boxes_ind, + overlapped_boxes_gt_classes] = overlaps_max[overlapped_boxes_ind] box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ overlapped_boxes_ind] @@ -116,8 +117,8 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, if is_cascade_rcnn: # Cascade RCNN Decode Filter fg_inds = np.where(max_overlaps >= fg_thresh)[0] - bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= - bg_thresh_lo))[0] + bg_inds = np.where((max_overlaps < bg_thresh_hi) + & (max_overlaps >= bg_thresh_lo))[0] fg_rois_per_this_image = fg_inds.shape[0] bg_rois_per_this_image = bg_inds.shape[0] else: @@ -126,19 +127,21 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) # Sample foreground if there are too many if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random: - fg_inds = np.random.choice( - fg_inds, size=fg_rois_per_this_image, replace=False) + fg_inds = np.random.choice(fg_inds, + size=fg_rois_per_this_image, + replace=False) fg_inds = fg_inds[:fg_rois_per_this_image] # Background - bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= - bg_thresh_lo))[0] + bg_inds = np.where((max_overlaps < bg_thresh_hi) + & (max_overlaps >= bg_thresh_lo))[0] bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.shape[0]) # Sample background if there are too many if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random: - bg_inds = np.random.choice( - bg_inds, size=bg_rois_per_this_image, replace=False) + bg_inds = np.random.choice(bg_inds, + size=bg_rois_per_this_image, + replace=False) bg_inds = bg_inds[:bg_rois_per_this_image] keep_inds = np.append(fg_inds, bg_inds) @@ -152,19 +155,18 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, sampled_labels, bbox_reg_weights) bbox_targets, bbox_inside_weights = _expand_bbox_targets( bbox_label_targets, class_nums, is_cls_agnostic) - bbox_outside_weights = np.array( - bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) + bbox_outside_weights = np.array(bbox_inside_weights > 0, + dtype=bbox_inside_weights.dtype) # Scale rois sampled_rois = sampled_boxes * im_scale # Faster RCNN blobs - frcn_blobs = dict( - rois=sampled_rois, - labels_int32=sampled_labels, - bbox_targets=bbox_targets, - bbox_inside_weights=bbox_inside_weights, - bbox_outside_weights=bbox_outside_weights, - max_overlap=sampled_max_overlap) + frcn_blobs = dict(rois=sampled_rois, + labels_int32=sampled_labels, + bbox_targets=bbox_targets, + bbox_inside_weights=bbox_inside_weights, + bbox_outside_weights=bbox_outside_weights, + max_overlap=sampled_max_overlap) return frcn_blobs @@ -198,11 +200,12 @@ def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights): targets = np.zeros(roi_boxes.shape) bbox_reg_weights = np.asarray(bbox_reg_weights) - targets = _box_to_delta( - ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights) + targets = _box_to_delta(ex_boxes=roi_boxes, + gt_boxes=gt_boxes, + weights=bbox_reg_weights) - return np.hstack([labels[:, np.newaxis], targets]).astype( - np.float32, copy=False) + return np.hstack([labels[:, np.newaxis], targets]).astype(np.float32, + copy=False) def _box_to_delta(ex_boxes, gt_boxes, weights): @@ -232,8 +235,8 @@ def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic): # class_labels = [1 if ll > 0 else 0 for ll in class_labels] # class_labels = np.array(class_labels, dtype=np.int32) # class_nums = 2 - bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums - if not is_cls_agnostic else 4 * 2)) + bbox_targets = np.zeros((class_labels.shape[0], + 4 * class_nums if not is_cls_agnostic else 4 * 2)) bbox_inside_weights = np.zeros(bbox_targets.shape) for ind in fg_inds: class_label = int(class_labels[ind]) if not is_cls_agnostic else 1 @@ -245,6 +248,7 @@ def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic): class TestGenerateProposalLabelsOp(OpTest): + def set_data(self): #self.use_random = False self.init_use_random() @@ -320,8 +324,8 @@ class TestGenerateProposalLabelsOp(OpTest): self.im_info[i, 1] = images_shape[i][1] self.im_info[i, 2] = 0.8 #scale - self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape, - proposal_nums) + self.rpn_rois, self.rpn_rois_lod = _generate_proposals( + images_shape, proposal_nums) ground_truth, self.gts_lod = _generate_groundtruth( images_shape, self.class_nums, gt_nums) @@ -350,6 +354,7 @@ class TestGenerateProposalLabelsOp(OpTest): class TestCascade(TestGenerateProposalLabelsOp): + def init_test_cascade(self): self.is_cascade_rcnn = True roi_num = len(self.rpn_rois[0]) @@ -361,6 +366,7 @@ class TestCascade(TestGenerateProposalLabelsOp): class TestUseRandom(TestGenerateProposalLabelsOp): + def init_use_random(self): self.use_random = True self.is_cascade_rcnn = False @@ -383,6 +389,7 @@ class TestUseRandom(TestGenerateProposalLabelsOp): class TestClsAgnostic(TestCascade): + def init_test_params(self): self.batch_size_per_im = 512 self.fg_fraction = 0.25 @@ -395,6 +402,7 @@ class TestClsAgnostic(TestCascade): class TestOnlyGT(TestCascade): + def init_test_input(self): np.random.seed(0) gt_nums = 6 # Keep same with batch_size_per_im for unittest @@ -417,6 +425,7 @@ class TestOnlyGT(TestCascade): class TestOnlyGT2(TestCascade): + def init_test_cascade(self): self.is_cascade_rcnn = True roi_num = len(self.rpn_rois[0]) @@ -443,14 +452,13 @@ def _generate_groundtruth(images_shape, class_nums, gt_nums): num_gts = 0 for i, image_shape in enumerate(images_shape): # Avoid background - gt_classes = np.random.randint( - low=1, high=class_nums, size=gt_nums).astype(np.int32) + gt_classes = np.random.randint(low=1, high=class_nums, + size=gt_nums).astype(np.int32) gt_boxes = _generate_boxes(image_shape, gt_nums) is_crowd = np.zeros((gt_nums), dtype=np.int32) is_crowd[0] = 1 ground_truth.append( - dict( - gt_classes=gt_classes, boxes=gt_boxes, is_crowd=is_crowd)) + dict(gt_classes=gt_classes, boxes=gt_boxes, is_crowd=is_crowd)) num_gts += len(gt_classes) gts_lod.append(num_gts) return ground_truth, [gts_lod] diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py index 6b9eeaa0867..460f58d87b7 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py @@ -126,7 +126,7 @@ def box_coder(all_anchors, bbox_deltas, variances, pixel_offset=True): anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0] anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1] - #predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height + #predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32) if variances is not None: for i in range(bbox_deltas.shape[0]): @@ -142,10 +142,12 @@ def box_coder(all_anchors, bbox_deltas, variances, pixel_offset=True): 1000 / 16.0))) * anchor_loc[i, 1] else: for i in range(bbox_deltas.shape[0]): - pred_bbox[i, 0] = bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[ - i, 2] - pred_bbox[i, 1] = bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[ - i, 3] + pred_bbox[i, + 0] = bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[i, + 2] + pred_bbox[i, + 1] = bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[i, + 3] pred_bbox[i, 2] = math.exp( min(bbox_deltas[i, 2], math.log(1000 / 16.0))) * anchor_loc[i, 0] @@ -169,17 +171,21 @@ def clip_tiled_boxes(boxes, im_shape, pixel_offset=True): ) offset = 1 if pixel_offset else 0 # x1 >= 0 - boxes[:, 0::4] = np.maximum( - np.minimum(boxes[:, 0::4], im_shape[1] - offset), 0) + boxes[:, + 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - offset), + 0) # y1 >= 0 - boxes[:, 1::4] = np.maximum( - np.minimum(boxes[:, 1::4], im_shape[0] - offset), 0) + boxes[:, + 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - offset), + 0) # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum( - np.minimum(boxes[:, 2::4], im_shape[1] - offset), 0) + boxes[:, + 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - offset), + 0) # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum( - np.minimum(boxes[:, 3::4], im_shape[0] - offset), 0) + boxes[:, + 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - offset), + 0) return boxes @@ -197,9 +203,9 @@ def filter_boxes(boxes, min_size, im_info, pixel_offset=True): hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1 x_ctr = boxes[:, 0] + ws / 2. y_ctr = boxes[:, 1] + hs / 2. - keep = np.where((ws_orig_scale >= min_size) & ( - hs_orig_scale >= min_size) & (x_ctr < im_info[1]) & (y_ctr < - im_info[0]))[0] + keep = np.where((ws_orig_scale >= min_size) + & (hs_orig_scale >= min_size) & (x_ctr < im_info[1]) + & (y_ctr < im_info[0]))[0] else: keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep @@ -275,6 +281,7 @@ def nms(boxes, scores, nms_threshold, eta=1.0, pixel_offset=True): class TestGenerateProposalsOp(OpTest): + def set_data(self): self.init_test_params() self.init_test_input() @@ -343,6 +350,7 @@ class TestGenerateProposalsOp(OpTest): class TestGenerateProposalsOutLodOp(TestGenerateProposalsOp): + def set_data(self): self.init_test_params() self.init_test_input() @@ -367,12 +375,12 @@ class TestGenerateProposalsOutLodOp(TestGenerateProposalsOp): self.outputs = { 'RpnRois': (self.rpn_rois[0], [self.rois_num]), 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]), - 'RpnRoisNum': (np.asarray( - self.rois_num, dtype=np.int32)) + 'RpnRoisNum': (np.asarray(self.rois_num, dtype=np.int32)) } class TestGenerateProposalsOpNoBoxLeft(TestGenerateProposalsOp): + def init_test_params(self): self.pre_nms_topN = 12000 # train 12000, test 2000 self.post_nms_topN = 5000 # train 6000, test 1000 diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py index 0a670045187..32d7d308e53 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py @@ -124,14 +124,15 @@ def filter_boxes(boxes, min_size, im_shape, pixel_offset=True): if pixel_offset: x_ctr = boxes[:, 0] + ws / 2. y_ctr = boxes[:, 1] + hs / 2. - keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_shape[ - 1]) & (y_ctr < im_shape[0]))[0] + keep = np.where((ws >= min_size) & (hs >= min_size) + & (x_ctr < im_shape[1]) & (y_ctr < im_shape[0]))[0] else: keep = np.where((ws >= min_size) & (hs >= min_size))[0] return keep class TestGenerateProposalsV2Op(OpTest): + def set_data(self): self.init_test_params() self.init_test_input() @@ -202,6 +203,7 @@ class TestGenerateProposalsV2Op(OpTest): class TestGenerateProposalsV2OutLodOp(TestGenerateProposalsV2Op): + def set_data(self): self.init_test_params() self.init_test_input() @@ -226,12 +228,12 @@ class TestGenerateProposalsV2OutLodOp(TestGenerateProposalsV2Op): self.outputs = { 'RpnRois': (self.rpn_rois[0], [self.rois_num]), 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]), - 'RpnRoisNum': (np.asarray( - self.rois_num, dtype=np.int32)) + 'RpnRoisNum': (np.asarray(self.rois_num, dtype=np.int32)) } class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op): + def init_test_params(self): self.pre_nms_topN = 12000 # train 12000, test 2000 self.post_nms_topN = 5000 # train 6000, test 1000 @@ -242,6 +244,7 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op): class TestGenerateProposalsV2OpNoOffset(TestGenerateProposalsV2Op): + def init_test_params(self): self.pre_nms_topN = 12000 # train 12000, test 2000 self.post_nms_topN = 5000 # train 6000, test 1000 diff --git a/python/paddle/fluid/tests/unittests/test_generator.py b/python/paddle/fluid/tests/unittests/test_generator.py index ef9a305053e..7335718f0f5 100644 --- a/python/paddle/fluid/tests/unittests/test_generator.py +++ b/python/paddle/fluid/tests/unittests/test_generator.py @@ -36,8 +36,9 @@ class TestGenerator(unittest.TestCase): def test_basic_generator_error(self): if paddle.fluid.core.is_compiled_with_cuda(): - self.assertRaises( - ValueError, generator.Generator, place=paddle.CUDAPlace(0)) + self.assertRaises(ValueError, + generator.Generator, + place=paddle.CUDAPlace(0)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py index c36550fca8c..674c0b4d12f 100644 --- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py @@ -42,8 +42,9 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): with fluid.unique_name.guard(): with fluid.program_guard(main_prog, startup_prog): - image = fluid.layers.data( - name='image', shape=[784], dtype='float32') + image = fluid.layers.data(name='image', + shape=[784], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') py_reader = fluid.io.DataLoader.from_generator( feed_list=[image, label], @@ -63,8 +64,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): size=CLASS_NUM, act='softmax') loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) @@ -72,6 +72,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): class TestBase(unittest.TestCase): + def run_main(self, use_legacy_py_reader, with_data_parallel, places, use_double_buffer): scope = fluid.Scope() @@ -91,8 +92,8 @@ class TestBase(unittest.TestCase): prog = fluid.CompiledProgram(main_prog) if with_data_parallel: - prog = prog.with_data_parallel( - loss_name=loss.name, places=places) + prog = prog.with_data_parallel(loss_name=loss.name, + places=places) step = 0 step_list = [] @@ -176,6 +177,7 @@ class TestBase(unittest.TestCase): class TestDataLoaderBaseAbstract(unittest.TestCase): + def test_main(self): loader = DataLoaderBase() try: diff --git a/python/paddle/fluid/tests/unittests/test_get_device_properties.py b/python/paddle/fluid/tests/unittests/test_get_device_properties.py index 4cfb91bfae9..750a257b0d9 100644 --- a/python/paddle/fluid/tests/unittests/test_get_device_properties.py +++ b/python/paddle/fluid/tests/unittests/test_get_device_properties.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from paddle.device.cuda import device_count, get_device_properties class TestGetDeviceProperties(unittest.TestCase): + def test_get_device_properties_default(self): if core.is_compiled_with_cuda(): props = get_device_properties() @@ -44,6 +45,7 @@ class TestGetDeviceProperties(unittest.TestCase): class TestGetDevicePropertiesError(unittest.TestCase): + def test_error_api(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py b/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py index 9e820579594..1896f0a4bf9 100644 --- a/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py +++ b/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestGetInputsOutputsInBlock(unittest.TestCase): + def test_ordered(self): # Program variable names may be different when test order is different # This helper makes the test ordered. @@ -68,7 +69,7 @@ class TestGetInputsOutputsInBlock(unittest.TestCase): sub_block = main_program.block(1) inner_inputs, inner_outputs = utils.get_inputs_outputs_in_block( sub_block) - #'fill_constant_1.tmp_0', 'tmp_3' are names of a, c + #'fill_constant_1.tmp_0', 'tmp_3' are names of a, c self.assertTrue(inner_inputs == {'fill_constant_1.tmp_0', 'tmp_3'}) #'_generated_var_1', is name of a + c self.assertTrue(inner_outputs == {'_generated_var_1'}) diff --git a/python/paddle/fluid/tests/unittests/test_get_places_op.py b/python/paddle/fluid/tests/unittests/test_get_places_op.py index a6deeab457c..1e0c99bac08 100644 --- a/python/paddle/fluid/tests/unittests/test_get_places_op.py +++ b/python/paddle/fluid/tests/unittests/test_get_places_op.py @@ -22,6 +22,7 @@ import unittest class TestGetPlaces(unittest.TestCase): + @prog_scope() def check_get_cpu_places(self): places = get_places() diff --git a/python/paddle/fluid/tests/unittests/test_get_set_flags.py b/python/paddle/fluid/tests/unittests/test_get_set_flags.py index e2761ff4358..80300eb7dfc 100644 --- a/python/paddle/fluid/tests/unittests/test_get_set_flags.py +++ b/python/paddle/fluid/tests/unittests/test_get_set_flags.py @@ -17,6 +17,7 @@ import unittest as unittest class TestGetAndSetFlags(unittest.TestCase): + def test_api(self): flags = { 'FLAGS_eager_delete_tensor_gb': 1.0, @@ -37,6 +38,7 @@ class TestGetAndSetFlags(unittest.TestCase): class TestGetAndSetFlagsErrors(unittest.TestCase): + def test_errors(self): flags_list = ['FLAGS_eager_delete_tensor_gb', 'FLAGS_check_nan_inf'] flag = 1 diff --git a/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py b/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py index 2f6c87aefaa..2540fa78d62 100644 --- a/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py +++ b/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py @@ -42,6 +42,7 @@ class TestGetTensorFromSelectedRowsError(unittest.TestCase): class TestGetTensorFromSelectedRows(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py b/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py index 3394a08de8b..90fe9988ac2 100644 --- a/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py +++ b/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py @@ -17,6 +17,7 @@ import unittest as unittest class VarInfo(object): + def __init__(self, var_name, var_type, writable): self.name = var_name self.type = var_type @@ -24,6 +25,7 @@ class VarInfo(object): class TestGlobalVarGetterSetter(unittest.TestCase): + def test_main(self): var_infos = [ VarInfo("FLAGS_free_idle_chunk", bool, False), diff --git a/python/paddle/fluid/tests/unittests/test_glu.py b/python/paddle/fluid/tests/unittests/test_glu.py index 25f1975db0c..c8f0098456c 100644 --- a/python/paddle/fluid/tests/unittests/test_glu.py +++ b/python/paddle/fluid/tests/unittests/test_glu.py @@ -32,6 +32,7 @@ def glu(x, dim=-1): class TestGLUCase(unittest.TestCase): + def setUp(self): self.x = np.random.randn(5, 20) self.dim = -1 @@ -52,6 +53,7 @@ class TestGLUCase(unittest.TestCase): class TestGLUV2(unittest.TestCase): + def setUp(self): self.x = np.random.randn(5, 20) self.dim = -1 diff --git a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py index e528e742a27..d4dc21e7646 100644 --- a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py +++ b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py @@ -24,6 +24,7 @@ from paddle.fluid import core class TestGPUPackagePaddle(unittest.TestCase): + def test_import_paddle(self): if core.is_compiled_with_cuda(): if core.is_compiled_with_rocm(): @@ -43,11 +44,10 @@ assert x.place.is_gpu_place() is False, "There is no CUDA device, but Tensor's p _python = sys.executable ps_cmd = '{} {}'.format(_python, test_file) - ps_proc = subprocess.Popen( - ps_cmd.strip().split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=os.environ) + ps_proc = subprocess.Popen(ps_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ) stdout, stderr = ps_proc.communicate() assert 'CPU device will be used by default' in str( diff --git a/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py b/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py index 39a5b9391e0..15009ea8c58 100644 --- a/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py +++ b/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py @@ -29,6 +29,7 @@ from paddle.fluid.clip import GradientClipByValue, GradientClipByNorm, GradientC class TestGradClipByGlobalNorm(unittest.TestCase): + def init_value(self): self.max_global_norm = 5.0 self.init_scale = 1.0 @@ -103,6 +104,7 @@ class TestGradClipByGlobalNorm(unittest.TestCase): class TestGradClipByNorm(unittest.TestCase): + def init_value(self): self.max_norm = 5.0 self.init_scale = 1.0 @@ -173,6 +175,7 @@ class TestGradClipByNorm(unittest.TestCase): class TestGradClipByValue(unittest.TestCase): + def init_value(self): self.max_value = 0.8 self.min_value = -0.1 @@ -200,8 +203,8 @@ class TestGradClipByValue(unittest.TestCase): def get_dygrap_clip_result(self): with fluid.dygraph.guard(): - value_clip = GradientClipByValue( - max=self.max_value, min=self.min_value) + value_clip = GradientClipByValue(max=self.max_value, + min=self.min_value) p_g_var = [] for p, g in self.para_and_grad: new_p = to_variable(p) diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index 20a55af15c4..dfdb3c32dc2 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -38,8 +38,9 @@ def bow_net(data, This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( - input=data, is_sparse=True, size=[dict_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, + is_sparse=True, + size=[dict_dim, emb_dim]) bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") @@ -52,6 +53,7 @@ def bow_net(data, class TestGradientClip(unittest.TestCase): + def setUp(self): self.word_dict_len = 5147 self.BATCH_SIZE = 2 @@ -75,8 +77,8 @@ class TestGradientClip(unittest.TestCase): def check_gradient_clip(self, place, dtype='float32'): prog = fluid.Program() startup_program = fluid.Program() - with fluid.program_guard( - main_program=prog, startup_program=startup_program): + with fluid.program_guard(main_program=prog, + startup_program=startup_program): image = fluid.data(name="a", shape=[-1, 784], dtype='float32') label = fluid.data(name="b", shape=[-1, 1], dtype='int64') if dtype != 'float32': @@ -97,8 +99,8 @@ class TestGradientClip(unittest.TestCase): p_g = sorted(p_g, key=lambda x: x[0].name) p_g_clip = sorted(p_g_clip, key=lambda x: x[0].name) - with fluid.program_guard( - main_program=prog_clip, startup_program=startup_program): + with fluid.program_guard(main_program=prog_clip, + startup_program=startup_program): p_g_clip = self.clip_gradient(p_g_clip) grad_list = [elem[1] for elem in p_g] @@ -119,10 +121,12 @@ class TestGradientClip(unittest.TestCase): def check_sparse_gradient_clip(self, place): prog = fluid.Program() startup_program = fluid.Program() - with fluid.program_guard( - main_program=prog, startup_program=startup_program): - data = fluid.data( - name="words", shape=[-1, 1], dtype="int64", lod_level=1) + with fluid.program_guard(main_program=prog, + startup_program=startup_program): + data = fluid.data(name="words", + shape=[-1, 1], + dtype="int64", + lod_level=1) label = fluid.data(name="label", shape=[-1, 1], dtype="int64") cost = bow_net(data, label, self.word_dict_len) @@ -142,6 +146,7 @@ class TestGradientClip(unittest.TestCase): class TestGradientClipByGlobalNorm(TestGradientClip): + def init(self): self.clip_norm = 0.2 @@ -157,13 +162,13 @@ class TestGradientClipByGlobalNorm(TestGradientClip): for u, v in zip(out, out_clip): self.assertTrue( - np.allclose( - a=u, b=v, rtol=1e-5, atol=1e-8), - "gradient clip by global norm has wrong results!, \nu={}\nv={}\ndiff={}". - format(u, v, u - v)) + np.allclose(a=u, b=v, rtol=1e-5, atol=1e-8), + "gradient clip by global norm has wrong results!, \nu={}\nv={}\ndiff={}" + .format(u, v, u - v)) # test whether the output is right when use 'set_gradient_clip' def test_old_gradient_clip(self): + def func(params_grads): clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_norm) fluid.clip.set_gradient_clip(clip) @@ -174,6 +179,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip): # test whether the output is right when use grad_clip def test_new_gradient_clip(self): + def func(params_grads): clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_norm) return clip(params_grads) @@ -183,6 +189,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip): # test whether the output is right when use grad_clip under float64 def test_new_gradient_clip_fp64(self): + def func(params_grads): clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_norm) return clip(params_grads) @@ -192,6 +199,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip): # invoke 'set_gradient_clip' in a wrong order def test_wrong_API_order(self): + def backward_func(cost): clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0) fluid.clip.set_gradient_clip(clip) @@ -233,8 +241,8 @@ class TestGradientClipByGlobalNorm(TestGradientClip): def _test_none_grad_helper(self, dtype): prog = fluid.Program() startup_program = fluid.Program() - with fluid.program_guard( - main_program=prog, startup_program=startup_program): + with fluid.program_guard(main_program=prog, + startup_program=startup_program): clip = fluid.clip.GradientClipByGlobalNorm(self.clip_norm) x = fluid.default_main_program().global_block().create_parameter( name="x", shape=[2, 3], dtype=dtype) @@ -254,6 +262,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip): class TestGradientClipByNorm(TestGradientClip): + def init(self): self.clip_norm = 0.2 @@ -262,13 +271,12 @@ class TestGradientClipByNorm(TestGradientClip): norm = np.sqrt(np.sum(np.power(u, 2))) scale = self.clip_norm / np.maximum(self.clip_norm, norm) u = u * scale - self.assertTrue( - np.allclose( - a=u, b=v, rtol=1e-5, atol=1e-8), - "gradient clip by norm has wrong results!") + self.assertTrue(np.allclose(a=u, b=v, rtol=1e-5, atol=1e-8), + "gradient clip by norm has wrong results!") # test whether the output is right when use grad_clip def test_gradient_clip(self): + def func(params_grads): clip = fluid.clip.GradientClipByNorm(clip_norm=self.clip_norm) return clip(params_grads) @@ -297,6 +305,7 @@ class TestGradientClipByNorm(TestGradientClip): class TestGradientClipByValue(TestGradientClip): + def init(self): self.max = 0.2 self.min = 0.1 @@ -306,13 +315,12 @@ class TestGradientClipByValue(TestGradientClip): out[i] = np.clip(v, self.min, self.max) for u, v in zip(out, out_clip): u = np.clip(u, self.min, self.max) - self.assertTrue( - np.allclose( - a=u, b=v, rtol=1e-6, atol=1e-8), - "gradient clip by value has wrong results!") + self.assertTrue(np.allclose(a=u, b=v, rtol=1e-6, atol=1e-8), + "gradient clip by value has wrong results!") # test whether the output is right when use grad_clip def test_gradient_clip(self): + def func(params_grads): clip = fluid.clip.GradientClipByValue(max=self.max, min=self.min) return clip(params_grads) @@ -341,11 +349,12 @@ class TestGradientClipByValue(TestGradientClip): class TestDygraphGradientClip(unittest.TestCase): + def test_gradient_clip(self): with fluid.dygraph.guard(): linear = fluid.dygraph.Linear(5, 5) - inputs = fluid.layers.uniform_random( - [16, 5], min=-10, max=10).astype('float32') + inputs = fluid.layers.uniform_random([16, 5], min=-10, + max=10).astype('float32') out = linear(fluid.dygraph.to_variable(inputs)) loss = fluid.layers.reduce_mean(out) loss.backward() @@ -360,6 +369,7 @@ class TestDygraphGradientClip(unittest.TestCase): class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip): + def setUp(self): self.clip_norm = 0.8 self.clip1 = fluid.clip.GradientClipByGlobalNorm( @@ -369,10 +379,10 @@ class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip): def check_clip_result(self, loss, optimizer): # if grad is None - x = fluid.dygraph.to_variable( - np.array([2, 3]).astype("float32"), name="x") - y = fluid.dygraph.to_variable( - np.array([3, 4]).astype("float32"), name="y") + x = fluid.dygraph.to_variable(np.array([2, 3]).astype("float32"), + name="x") + y = fluid.dygraph.to_variable(np.array([3, 4]).astype("float32"), + name="y") assert len(self.clip1([(x, x), (x, y), (x, None)])) == 2 # get params and grads from network opt, params_grads = optimizer.minimize(loss) @@ -395,13 +405,13 @@ class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip): a = np.minimum(global_norm, self.clip_norm) b = global_norm_clip self.assertTrue( - np.isclose( - a=a, b=b, rtol=1e-6, atol=1e-8), + np.isclose(a=a, b=b, rtol=1e-6, atol=1e-8), "gradient clip by global norm has wrong results, expetcd:%f, but received:%f" % (a, b)) class TestDygraphGradientClipByNorm(TestDygraphGradientClip): + def setUp(self): self.clip_norm = 0.8 self.clip = fluid.clip.GradientClipByNorm(clip_norm=self.clip_norm) @@ -424,13 +434,13 @@ class TestDygraphGradientClipByNorm(TestDygraphGradientClip): a = np.minimum(a, self.clip_norm) b = np.sqrt(np.sum(np.power(v, 2))) self.assertTrue( - np.isclose( - a=a, b=b, rtol=1e-6, atol=1e-8), + np.isclose(a=a, b=b, rtol=1e-6, atol=1e-8), "gradient clip by norm has wrong results, expetcd:%f, but received:%f" % (a, b)) class TestDygraphGradientClipByValue(TestDygraphGradientClip): + def setUp(self): self.max = 0.2 self.min = 0.1 @@ -448,13 +458,12 @@ class TestDygraphGradientClipByValue(TestDygraphGradientClip): for u, v in zip(grads, grads_clip): u = np.clip(u.numpy(), self.min, self.max) v = v.numpy() - self.assertTrue( - np.allclose( - a=u, b=v, rtol=1e-6, atol=1e-8), - "gradient clip by value has wrong results!") + self.assertTrue(np.allclose(a=u, b=v, rtol=1e-6, atol=1e-8), + "gradient clip by value has wrong results!") class SimpleNet(paddle.nn.Layer): + def __init__(self): super(SimpleNet, self).__init__() self.linear = paddle.nn.Linear(5, 5) @@ -467,6 +476,7 @@ class SimpleNet(paddle.nn.Layer): class TestDygraphGradientClipFP16(unittest.TestCase): + def test_gradient_clip(self): if fluid.core.is_compiled_with_cuda(): with fluid.dygraph.guard(): @@ -477,8 +487,8 @@ class TestDygraphGradientClipFP16(unittest.TestCase): model, sgd_optimizer = paddle.amp.decorate( models=model, optimizers=sgd_optimizer, level='O2') scaler = paddle.amp.GradScaler(init_loss_scaling=1024) - inputs = fluid.layers.uniform_random( - [1, 5], min=-10, max=10).astype('float32') + inputs = fluid.layers.uniform_random([1, 5], min=-10, + max=10).astype('float32') with paddle.amp.auto_cast(level='O2'): out = model(fluid.dygraph.to_variable(inputs)) loss = fluid.layers.reduce_mean(out) @@ -497,7 +507,7 @@ class TestDygraphGradientClipFP16(unittest.TestCase): clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=0.8) params_grads = clip(params_grads) _, grads_clip = zip(*params_grads) - # param update + # param update scaler.step(sgd_optimizer) scaler.update() @@ -515,17 +525,17 @@ class TestDygraphGradientClipFP16(unittest.TestCase): a = np.minimum(global_norm, 0.8) b = global_norm_clip self.assertTrue( - np.isclose( - a=a, b=b, rtol=1e-3, atol=1e-8), + np.isclose(a=a, b=b, rtol=1e-3, atol=1e-8), "gradient clip by global norm has wrong results, expetcd:%f, but received:%f" % (a, b)) class TestDygraphGradientClipFP64(unittest.TestCase): + def test_gradient_clip(self): with fluid.dygraph.guard(): - inputs = fluid.layers.uniform_random( - [16, 5], min=-10, max=10).astype('float64') + inputs = fluid.layers.uniform_random([16, 5], min=-10, + max=10).astype('float64') linear = fluid.dygraph.Linear(5, 5, dtype="float64") out = linear(fluid.dygraph.to_variable(inputs)) loss = fluid.layers.reduce_mean(out) @@ -561,13 +571,13 @@ class TestDygraphGradientClipFP64(unittest.TestCase): b = global_norm_clip self.assertTrue( - np.isclose( - a=a, b=b, rtol=1e-6, atol=1e-8), + np.isclose(a=a, b=b, rtol=1e-6, atol=1e-8), "gradient clip by global norm has wrong results, expetcd:%f, but received:%f" % (a, b)) class TestPureFP16ClipGradByGlobalNorm(unittest.TestCase): + def check_main(self, expected_has_cast_op): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -578,10 +588,12 @@ class TestPureFP16ClipGradByGlobalNorm(unittest.TestCase): param_and_grads = [] main_block = main_prog.global_block() for name, shape in zip(names, shapes): - p = main_block.create_parameter( - name=name, shape=shape, dtype='float16') - g = main_block.create_parameter( - name=p.name + '@GRAD', shape=p.shape, dtype=p.dtype) + p = main_block.create_parameter(name=name, + shape=shape, + dtype='float16') + g = main_block.create_parameter(name=p.name + '@GRAD', + shape=p.shape, + dtype=p.dtype) param_and_grads.append((p, g)) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) diff --git a/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py b/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py index 6e6175d6695..57b8209d8d6 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py +++ b/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestGraphKhopSampler(unittest.TestCase): + def setUp(self): num_nodes = 20 edges = np.random.randint(num_nodes, size=(100, 2)) @@ -41,8 +42,8 @@ class TestGraphKhopSampler(unittest.TestCase): self.row = sorted_edges[:, 0].astype("int64") self.colptr = colptr.astype("int64") self.sorted_eid = sorted_eid.astype("int64") - self.nodes = np.unique(np.random.randint( - num_nodes, size=5)).astype("int64") + self.nodes = np.unique(np.random.randint(num_nodes, + size=5)).astype("int64") self.sample_sizes = [5, 5] self.dst_src_dict = dst_src_dict @@ -73,8 +74,8 @@ class TestGraphKhopSampler(unittest.TestCase): self.assertTrue( edge_src_n.shape[0] == paddle.unique(edge_src_n).shape[0]) # Ensure the correct sample size. - self.assertTrue(edge_src_n.shape[0] == self.sample_sizes[0] or - edge_src_n.shape[0] == len(self.dst_src_dict[n])) + self.assertTrue(edge_src_n.shape[0] == self.sample_sizes[0] + or edge_src_n.shape[0] == len(self.dst_src_dict[n])) in_neighbors = np.isin(edge_src_n.numpy(), self.dst_src_dict[n]) # Ensure the correct sample neighbors. self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) @@ -121,8 +122,8 @@ class TestGraphKhopSampler(unittest.TestCase): self.assertTrue( edge_src_n.shape[0] == paddle.unique(edge_src_n).shape[0]) self.assertTrue( - edge_src_n.shape[0] == self.sample_sizes[0] or - edge_src_n.shape[0] == len(self.dst_src_dict[n])) + edge_src_n.shape[0] == self.sample_sizes[0] + or edge_src_n.shape[0] == len(self.dst_src_dict[n])) in_neighbors = np.isin(edge_src_n.numpy(), self.dst_src_dict[n]) self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) @@ -134,16 +135,18 @@ class TestGraphKhopSampler(unittest.TestCase): def test_sample_result_static_with_eids(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - row = paddle.static.data( - name="row", shape=self.row.shape, dtype=self.row.dtype) - sorted_eids = paddle.static.data( - name="eids", - shape=self.sorted_eid.shape, - dtype=self.sorted_eid.dtype) - colptr = paddle.static.data( - name="colptr", shape=self.colptr.shape, dtype=self.colptr.dtype) - nodes = paddle.static.data( - name="nodes", shape=self.nodes.shape, dtype=self.nodes.dtype) + row = paddle.static.data(name="row", + shape=self.row.shape, + dtype=self.row.dtype) + sorted_eids = paddle.static.data(name="eids", + shape=self.sorted_eid.shape, + dtype=self.sorted_eid.dtype) + colptr = paddle.static.data(name="colptr", + shape=self.colptr.shape, + dtype=self.colptr.dtype) + nodes = paddle.static.data(name="nodes", + shape=self.nodes.shape, + dtype=self.nodes.dtype) edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \ paddle.incubate.graph_khop_sampler(row, colptr, @@ -174,20 +177,23 @@ class TestGraphKhopSampler(unittest.TestCase): self.assertTrue( edge_src_n.shape[0] == np.unique(edge_src_n).shape[0]) self.assertTrue( - edge_src_n.shape[0] == self.sample_sizes[0] or - edge_src_n.shape[0] == len(self.dst_src_dict[n])) + edge_src_n.shape[0] == self.sample_sizes[0] + or edge_src_n.shape[0] == len(self.dst_src_dict[n])) in_neighbors = np.isin(edge_src_n, self.dst_src_dict[n]) self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) def test_sample_result_static_without_eids(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - row = paddle.static.data( - name="row", shape=self.row.shape, dtype=self.row.dtype) - colptr = paddle.static.data( - name="colptr", shape=self.colptr.shape, dtype=self.colptr.dtype) - nodes = paddle.static.data( - name="nodes", shape=self.nodes.shape, dtype=self.nodes.dtype) + row = paddle.static.data(name="row", + shape=self.row.shape, + dtype=self.row.dtype) + colptr = paddle.static.data(name="colptr", + shape=self.colptr.shape, + dtype=self.colptr.dtype) + nodes = paddle.static.data(name="nodes", + shape=self.nodes.shape, + dtype=self.nodes.dtype) edge_src, edge_dst, sample_index, reindex_nodes = \ paddle.incubate.graph_khop_sampler(row, colptr, nodes, self.sample_sizes) @@ -214,8 +220,8 @@ class TestGraphKhopSampler(unittest.TestCase): self.assertTrue( edge_src_n.shape[0] == np.unique(edge_src_n).shape[0]) self.assertTrue( - edge_src_n.shape[0] == self.sample_sizes[0] or - edge_src_n.shape[0] == len(self.dst_src_dict[n])) + edge_src_n.shape[0] == self.sample_sizes[0] + or edge_src_n.shape[0] == len(self.dst_src_dict[n])) in_neighbors = np.isin(edge_src_n, self.dst_src_dict[n]) self.assertTrue(np.sum(in_neighbors) == in_neighbors.shape[0]) diff --git a/python/paddle/fluid/tests/unittests/test_graph_reindex.py b/python/paddle/fluid/tests/unittests/test_graph_reindex.py index 4a98beb0cce..1323aaeb02b 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_reindex.py +++ b/python/paddle/fluid/tests/unittests/test_graph_reindex.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestGraphReindex(unittest.TestCase): + def setUp(self): self.x = np.arange(5).astype("int64") self.neighbors = np.random.randint(100, size=20).astype("int64") @@ -73,17 +74,17 @@ class TestGraphReindex(unittest.TestCase): reindex_src, reindex_dst, out_nodes = \ paddle.incubate.graph_reindex(x, neighbors, count) self.assertTrue( - np.allclose(self.reindex_src, reindex_src[:self.neighbors.shape[ - 0]])) + np.allclose(self.reindex_src, + reindex_src[:self.neighbors.shape[0]])) self.assertTrue( - np.allclose(self.reindex_src, reindex_src[self.neighbors.shape[ - 0]:])) + np.allclose(self.reindex_src, + reindex_src[self.neighbors.shape[0]:])) self.assertTrue( - np.allclose(self.reindex_dst, reindex_dst[:self.neighbors.shape[ - 0]])) + np.allclose(self.reindex_dst, + reindex_dst[:self.neighbors.shape[0]])) self.assertTrue( - np.allclose(self.reindex_dst, reindex_dst[self.neighbors.shape[ - 0]:])) + np.allclose(self.reindex_dst, + reindex_dst[self.neighbors.shape[0]:])) self.assertTrue(np.allclose(self.out_nodes, out_nodes)) def test_heter_reindex_result_v2(self): @@ -122,18 +123,21 @@ class TestGraphReindex(unittest.TestCase): def test_reindex_result_static(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - x = paddle.static.data( - name="x", shape=self.x.shape, dtype=self.x.dtype) - neighbors = paddle.static.data( - name="neighbors", - shape=self.neighbors.shape, - dtype=self.neighbors.dtype) - count = paddle.static.data( - name="count", shape=self.count.shape, dtype=self.count.dtype) - value_buffer = paddle.static.data( - name="value_buffer", shape=[self.num_nodes], dtype="int32") - index_buffer = paddle.static.data( - name="index_buffer", shape=[self.num_nodes], dtype="int32") + x = paddle.static.data(name="x", + shape=self.x.shape, + dtype=self.x.dtype) + neighbors = paddle.static.data(name="neighbors", + shape=self.neighbors.shape, + dtype=self.neighbors.dtype) + count = paddle.static.data(name="count", + shape=self.count.shape, + dtype=self.count.dtype) + value_buffer = paddle.static.data(name="value_buffer", + shape=[self.num_nodes], + dtype="int32") + index_buffer = paddle.static.data(name="index_buffer", + shape=[self.num_nodes], + dtype="int32") reindex_src_1, reindex_dst_1, out_nodes_1 = \ paddle.incubate.graph_reindex(x, neighbors, count) @@ -144,13 +148,16 @@ class TestGraphReindex(unittest.TestCase): exe = paddle.static.Executor(paddle.CPUPlace()) ret = exe.run(feed={ - 'x': self.x, - 'neighbors': self.neighbors, - 'count': self.count, - 'value_buffer': np.full( - [self.num_nodes], -1, dtype="int32"), - 'index_buffer': np.full( - [self.num_nodes], -1, dtype="int32") + 'x': + self.x, + 'neighbors': + self.neighbors, + 'count': + self.count, + 'value_buffer': + np.full([self.num_nodes], -1, dtype="int32"), + 'index_buffer': + np.full([self.num_nodes], -1, dtype="int32") }, fetch_list=[ reindex_src_1, reindex_dst_1, out_nodes_1, diff --git a/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py b/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py index 675a3429ab5..f84513506b3 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py +++ b/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestGraphSampleNeighbors(unittest.TestCase): + def setUp(self): num_nodes = 20 edges = np.random.randint(num_nodes, size=(100, 2)) @@ -39,8 +40,8 @@ class TestGraphSampleNeighbors(unittest.TestCase): self.row = sorted_edges[:, 0].astype("int64") self.colptr = colptr.astype("int64") - self.nodes = np.unique(np.random.randint( - num_nodes, size=5)).astype("int64") + self.nodes = np.unique(np.random.randint(num_nodes, + size=5)).astype("int64") self.sample_size = 5 self.dst_src_dict = dst_src_dict @@ -57,12 +58,12 @@ class TestGraphSampleNeighbors(unittest.TestCase): if i == 0: neighbors = out_neighbors[0:out_count_cumsum[i]] else: - neighbors = out_neighbors[out_count_cumsum[i - 1]: - out_count_cumsum[i]] + neighbors = out_neighbors[ + out_count_cumsum[i - 1]:out_count_cumsum[i]] # Ensure the correct sample size. self.assertTrue( - out_count[i] == self.sample_size or - out_count[i] == len(self.dst_src_dict[self.nodes[i]])) + out_count[i] == self.sample_size + or out_count[i] == len(self.dst_src_dict[self.nodes[i]])) # Ensure no repetitive sample neighbors. self.assertTrue( neighbors.shape[0] == paddle.unique(neighbors).shape[0]) @@ -91,12 +92,12 @@ class TestGraphSampleNeighbors(unittest.TestCase): if i == 0: neighbors = out_neighbors[0:out_count_cumsum[i]] else: - neighbors = out_neighbors[out_count_cumsum[i - 1]: - out_count_cumsum[i]] + neighbors = out_neighbors[ + out_count_cumsum[i - 1]:out_count_cumsum[i]] # Ensure the correct sample size. self.assertTrue( - out_count[i] == self.sample_size or - out_count[i] == len(self.dst_src_dict[self.nodes[i]])) + out_count[i] == self.sample_size + or out_count[i] == len(self.dst_src_dict[self.nodes[i]])) # Ensure no repetitive sample neighbors. self.assertTrue( neighbors.shape[0] == paddle.unique(neighbors).shape[0]) @@ -108,12 +109,15 @@ class TestGraphSampleNeighbors(unittest.TestCase): def test_sample_result_static(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - row = paddle.static.data( - name="row", shape=self.row.shape, dtype=self.row.dtype) - colptr = paddle.static.data( - name="colptr", shape=self.colptr.shape, dtype=self.colptr.dtype) - nodes = paddle.static.data( - name="nodes", shape=self.nodes.shape, dtype=self.nodes.dtype) + row = paddle.static.data(name="row", + shape=self.row.shape, + dtype=self.row.dtype) + colptr = paddle.static.data(name="colptr", + shape=self.colptr.shape, + dtype=self.colptr.dtype) + nodes = paddle.static.data(name="nodes", + shape=self.nodes.shape, + dtype=self.nodes.dtype) out_neighbors, out_count = paddle.incubate.graph_sample_neighbors( row, colptr, nodes, sample_size=self.sample_size) @@ -129,8 +133,8 @@ class TestGraphSampleNeighbors(unittest.TestCase): out_neighbors = np.split(out_neighbors, out_count_cumsum)[:-1] for neighbors, node, count in zip(out_neighbors, self.nodes, out_count): - self.assertTrue(count == self.sample_size or - count == len(self.dst_src_dict[node])) + self.assertTrue(count == self.sample_size + or count == len(self.dst_src_dict[node])) self.assertTrue( neighbors.shape[0] == np.unique(neighbors).shape[0]) in_neighbors = np.isin(neighbors, self.dst_src_dict[node]) @@ -143,20 +147,18 @@ class TestGraphSampleNeighbors(unittest.TestCase): nodes = paddle.to_tensor(self.nodes) def check_eid_error(): - paddle.incubate.graph_sample_neighbors( - row, - colptr, - nodes, - sample_size=self.sample_size, - return_eids=True) + paddle.incubate.graph_sample_neighbors(row, + colptr, + nodes, + sample_size=self.sample_size, + return_eids=True) def check_perm_buffer_error(): - paddle.incubate.graph_sample_neighbors( - row, - colptr, - nodes, - sample_size=self.sample_size, - flag_perm_buffer=True) + paddle.incubate.graph_sample_neighbors(row, + colptr, + nodes, + sample_size=self.sample_size, + flag_perm_buffer=True) self.assertRaises(ValueError, check_eid_error) self.assertRaises(ValueError, check_perm_buffer_error) @@ -189,14 +191,18 @@ class TestGraphSampleNeighbors(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - row = paddle.static.data( - name="row", shape=self.row.shape, dtype=self.row.dtype) - colptr = paddle.static.data( - name="colptr", shape=self.colptr.shape, dtype=self.colptr.dtype) - nodes = paddle.static.data( - name="nodes", shape=self.nodes.shape, dtype=self.nodes.dtype) - eids = paddle.static.data( - name="eids", shape=self.edges_id.shape, dtype=self.nodes.dtype) + row = paddle.static.data(name="row", + shape=self.row.shape, + dtype=self.row.dtype) + colptr = paddle.static.data(name="colptr", + shape=self.colptr.shape, + dtype=self.colptr.dtype) + nodes = paddle.static.data(name="nodes", + shape=self.nodes.shape, + dtype=self.nodes.dtype) + eids = paddle.static.data(name="eids", + shape=self.edges_id.shape, + dtype=self.nodes.dtype) out_neighbors, out_count, out_eids = paddle.incubate.graph_sample_neighbors( row, diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py index c233606c053..c0fdb134f16 100644 --- a/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py +++ b/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py @@ -33,6 +33,7 @@ def graph_send_recv_wrapper(x, class TestGraphSendRecvMaxOp(OpTest): + def setUp(self): paddle.enable_static() self.python_api = graph_send_recv_wrapper @@ -47,19 +48,22 @@ class TestGraphSendRecvMaxOp(OpTest): self.attrs = {'pool_type': 'MAX'} - out, self.gradient = compute_graph_send_recv_for_min_max(self.inputs, - self.attrs) + out, self.gradient = compute_graph_send_recv_for_min_max( + self.inputs, self.attrs) self.outputs = {'Out': out} def test_check_output(self): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', user_defined_grads=[self.gradient], check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.gradient], + check_eager=True) class TestGraphSendRecvMinOp(OpTest): + def setUp(self): paddle.enable_static() self.python_api = graph_send_recv_wrapper @@ -74,8 +78,8 @@ class TestGraphSendRecvMinOp(OpTest): self.attrs = {'pool_type': 'MIN'} - out, self.gradient = compute_graph_send_recv_for_min_max(self.inputs, - self.attrs) + out, self.gradient = compute_graph_send_recv_for_min_max( + self.inputs, self.attrs) self.outputs = {'Out': out} @@ -83,11 +87,14 @@ class TestGraphSendRecvMinOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', user_defined_grads=[self.gradient], check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.gradient], + check_eager=True) class TestGraphSendRecvSumOp(OpTest): + def setUp(self): paddle.enable_static() self.python_api = graph_send_recv_wrapper @@ -114,6 +121,7 @@ class TestGraphSendRecvSumOp(OpTest): class TestGraphSendRecvMeanOp(OpTest): + def setUp(self): paddle.enable_static() self.python_api = graph_send_recv_wrapper @@ -128,8 +136,8 @@ class TestGraphSendRecvMeanOp(OpTest): self.attrs = {'pool_type': 'MEAN'} - out, dst_count = compute_graph_send_recv_for_sum_mean(self.inputs, - self.attrs) + out, dst_count = compute_graph_send_recv_for_sum_mean( + self.inputs, self.attrs) self.outputs = {'Out': out, 'Dst_count': dst_count} @@ -182,7 +190,7 @@ def compute_graph_send_recv_for_min_max(inputs, attributes): results = np.zeros(target_shape, dtype=x.dtype) gradient = np.zeros_like(x) - # Calculate forward output + # Calculate forward output if pool_type == "MAX": first_set = set() for index, s_id in enumerate(dst_index): @@ -209,13 +217,14 @@ def compute_graph_send_recv_for_min_max(inputs, attributes): for i in range(index_size): forward_src_idx = src_index[i] forward_dst_idx = dst_index[i] - gradient[forward_src_idx] += 1 * ( - x[forward_src_idx] == results[forward_dst_idx]) + gradient[forward_src_idx] += 1 * (x[forward_src_idx] + == results[forward_dst_idx]) return results, gradient / results.size class API_GraphSendRecvOpTest(unittest.TestCase): + def test_static(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): @@ -237,32 +246,32 @@ class API_GraphSendRecvOpTest(unittest.TestCase): data2 = np.array([0, 1, 2, 0], dtype="int32") data3 = np.array([1, 2, 1, 0], dtype="int32") - np_sum = np.array( - [[0, 2, 3], [2, 8, 10], [1, 4, 5]], dtype="float32") - np_mean = np.array( - [[0, 2, 3], [1, 4, 5], [1, 4, 5]], dtype="float32") - np_max = np.array( - [[0, 2, 3], [2, 6, 7], [1, 4, 5]], dtype="float32") - np_min = np.array( - [[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="float32") - - ret = exe.run(feed={'x': data1, - 'src': data2, - 'dst': data3}, + np_sum = np.array([[0, 2, 3], [2, 8, 10], [1, 4, 5]], + dtype="float32") + np_mean = np.array([[0, 2, 3], [1, 4, 5], [1, 4, 5]], + dtype="float32") + np_max = np.array([[0, 2, 3], [2, 6, 7], [1, 4, 5]], + dtype="float32") + np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], + dtype="float32") + + ret = exe.run(feed={ + 'x': data1, + 'src': data2, + 'dst': data3 + }, fetch_list=[res_sum, res_mean, res_max, res_min]) for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): self.assertTrue( - np.allclose( - np_res, ret_res, atol=1e-6), - "two value is\ + np.allclose(np_res, ret_res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, ret_res)) def test_dygraph(self): device = paddle.CPUPlace() with paddle.fluid.dygraph.guard(device): - x = paddle.to_tensor( - np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]]), dtype="float32") + x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 7]]), + dtype="float32") src_index = paddle.to_tensor(np.array([0, 1, 2, 0]), dtype="int32") dst_index = paddle.to_tensor(np.array([1, 2, 1, 0]), dtype="int32") res_sum = paddle.incubate.graph_send_recv(x, src_index, dst_index, @@ -274,33 +283,31 @@ class API_GraphSendRecvOpTest(unittest.TestCase): res_min = paddle.incubate.graph_send_recv(x, src_index, dst_index, "min") - np_sum = np.array( - [[0, 2, 3], [2, 8, 10], [1, 4, 5]], dtype="float32") - np_mean = np.array( - [[0, 2, 3], [1, 4, 5], [1, 4, 5]], dtype="float32") - np_max = np.array( - [[0, 2, 3], [2, 6, 7], [1, 4, 5]], dtype="float32") - np_min = np.array( - [[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="float32") + np_sum = np.array([[0, 2, 3], [2, 8, 10], [1, 4, 5]], + dtype="float32") + np_mean = np.array([[0, 2, 3], [1, 4, 5], [1, 4, 5]], + dtype="float32") + np_max = np.array([[0, 2, 3], [2, 6, 7], [1, 4, 5]], + dtype="float32") + np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], + dtype="float32") ret = [res_sum, res_mean, res_max, res_min] for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): self.assertTrue( - np.allclose( - np_res, ret_res, atol=1e-6), - "two value is\ + np.allclose(np_res, ret_res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, ret_res)) def test_int32_input(self): device = paddle.CPUPlace() with paddle.fluid.dygraph.guard(device): - x = paddle.to_tensor( - np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]), dtype="int32") - src_index = paddle.to_tensor( - np.array([0, 1, 2, 0, 1]), dtype="int32") - dst_index = paddle.to_tensor( - np.array([1, 2, 1, 0, 1]), dtype="int32") + x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]), + dtype="int32") + src_index = paddle.to_tensor(np.array([0, 1, 2, 0, 1]), + dtype="int32") + dst_index = paddle.to_tensor(np.array([1, 2, 1, 0, 1]), + dtype="int32") res_sum = paddle.incubate.graph_send_recv(x, src_index, dst_index, "sum") res_mean = paddle.incubate.graph_send_recv(x, src_index, dst_index, @@ -310,8 +317,8 @@ class API_GraphSendRecvOpTest(unittest.TestCase): res_min = paddle.incubate.graph_send_recv(x, src_index, dst_index, "min") - np_sum = np.array( - [[0, 2, 3], [3, 12, 14], [1, 4, 5]], dtype="int32") + np_sum = np.array([[0, 2, 3], [3, 12, 14], [1, 4, 5]], + dtype="int32") np_mean = np.array([[0, 2, 3], [1, 4, 4], [1, 4, 5]], dtype="int32") np_max = np.array([[0, 2, 3], [2, 6, 6], [1, 4, 5]], dtype="int32") np_min = np.array([[0, 2, 3], [0, 2, 3], [1, 4, 5]], dtype="int32") @@ -320,15 +327,13 @@ class API_GraphSendRecvOpTest(unittest.TestCase): for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): self.assertTrue( - np.allclose( - np_res, ret_res, atol=1e-6), - "two value is\ + np.allclose(np_res, ret_res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, ret_res)) def test_set_outsize_gpu(self): if paddle.fluid.core.is_compiled_with_cuda(): - x = paddle.to_tensor( - np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]), dtype="float32") + x = paddle.to_tensor(np.array([[0, 2, 3], [1, 4, 5], [2, 6, 6]]), + dtype="float32") src_index = paddle.to_tensor(np.array([0, 0, 1]), dtype="int32") dst_index = paddle.to_tensor(np.array([0, 1, 1]), dtype="int32") res = paddle.incubate.graph_send_recv(x, src_index, dst_index, @@ -337,22 +342,19 @@ class API_GraphSendRecvOpTest(unittest.TestCase): res_set_outsize = paddle.incubate.graph_send_recv( x, src_index, dst_index, "sum", out_size) - np_res = np.array( - [[0, 2, 3], [1, 6, 8], [0, 0, 0]], dtype="float32") - np_res_set_outsize = np.array( - [[0, 2, 3], [1, 6, 8]], dtype="float32") + np_res = np.array([[0, 2, 3], [1, 6, 8], [0, 0, 0]], + dtype="float32") + np_res_set_outsize = np.array([[0, 2, 3], [1, 6, 8]], + dtype="float32") self.assertTrue( - np.allclose( - np_res, res, atol=1e-6), - "two value is\ + np.allclose(np_res, res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, res)) self.assertTrue( - np.allclose( - np_res_set_outsize, res_set_outsize, atol=1e-6), + np.allclose(np_res_set_outsize, res_set_outsize, atol=1e-6), "two value is\ - {}\n{}, check diff!" - .format(np_res_set_outsize, res_set_outsize)) + {}\n{}, check diff!".format(np_res_set_outsize, + res_set_outsize)) def test_api_eager_dygraph(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py index 9ad0309a70e..90e80e013ec 100644 --- a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py +++ b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py @@ -21,6 +21,7 @@ import unittest class GridSampleTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', x_shape=[2, 2, 3, 3], @@ -48,12 +49,11 @@ class GridSampleTestCase(unittest.TestCase): with fluid.program_guard(main, start): x = fluid.data("x", self.x_shape, dtype=self.dtype) grid = fluid.data("grid", self.grid_shape, dtype=self.dtype) - y_var = F.grid_sample( - x, - grid, - mode=self.mode, - padding_mode=self.padding_mode, - align_corners=self.align_corners) + y_var = F.grid_sample(x, + grid, + mode=self.mode, + padding_mode=self.padding_mode, + align_corners=self.align_corners) feed_dict = {"x": self.x, "grid": self.grid} exe = fluid.Executor(place) exe.run(start) @@ -63,12 +63,11 @@ class GridSampleTestCase(unittest.TestCase): def dynamic_functional(self): x_t = paddle.to_tensor(self.x) grid_t = paddle.to_tensor(self.grid) - y_t = F.grid_sample( - x_t, - grid_t, - mode=self.mode, - padding_mode=self.padding_mode, - align_corners=self.align_corners) + y_t = F.grid_sample(x_t, + grid_t, + mode=self.mode, + padding_mode=self.padding_mode, + align_corners=self.align_corners) y_np = y_t.numpy() return y_np @@ -88,6 +87,7 @@ class GridSampleTestCase(unittest.TestCase): class GridSampleErrorTestCase(GridSampleTestCase): + def runTest(self): place = fluid.CPUPlace() with self.assertRaises(ValueError): @@ -97,26 +97,22 @@ class GridSampleErrorTestCase(GridSampleTestCase): def add_cases(suite): suite.addTest(GridSampleTestCase(methodName='runTest')) suite.addTest( - GridSampleTestCase( - methodName='runTest', - mode='bilinear', - padding_mode='reflection', - align_corners=True)) + GridSampleTestCase(methodName='runTest', + mode='bilinear', + padding_mode='reflection', + align_corners=True)) suite.addTest( - GridSampleTestCase( - methodName='runTest', - mode='bilinear', - padding_mode='zeros', - align_corners=True)) + GridSampleTestCase(methodName='runTest', + mode='bilinear', + padding_mode='zeros', + align_corners=True)) def add_error_cases(suite): suite.addTest( - GridSampleErrorTestCase( - methodName='runTest', padding_mode="VALID")) + GridSampleErrorTestCase(methodName='runTest', padding_mode="VALID")) suite.addTest( - GridSampleErrorTestCase( - methodName='runTest', align_corners="VALID")) + GridSampleErrorTestCase(methodName='runTest', align_corners="VALID")) suite.addTest(GridSampleErrorTestCase(methodName='runTest', mode="VALID")) @@ -128,6 +124,7 @@ def load_tests(loader, standard_tests, pattern): class TestGridSampleAPI(unittest.TestCase): + def test_errors(self): with self.assertRaises(ValueError): x = paddle.randn([1, 1, 3, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py index 531aa1dcc3c..f32387d07a9 100644 --- a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py @@ -17,6 +17,7 @@ import unittest import numpy as np import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci + paddle.enable_static() @@ -24,12 +25,12 @@ def AffineGrid(theta, grid_shape): n = grid_shape[0] h = grid_shape[1] w = grid_shape[2] - h_idx = np.repeat( - np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis] - w_idx = np.repeat( - np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis] - grid = np.concatenate( - [w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3 + h_idx = np.repeat(np.linspace(-1, 1, h)[np.newaxis, :], w, + axis=0).T[:, :, np.newaxis] + w_idx = np.repeat(np.linspace(-1, 1, w)[np.newaxis, :], h, + axis=0)[:, :, np.newaxis] + grid = np.concatenate([w_idx, h_idx, np.ones([h, w, 1])], + axis=2) # h * w * 3 grid = np.repeat(grid[np.newaxis, :], n, axis=0) # n * h * w *3 ret = np.zeros([n, h * w, 2]) @@ -71,8 +72,8 @@ def unnormalizeAndClip(grid_slice, max_val, align_corners, padding_mode): if align_corners: grid_slice = 0.5 * ((grid_slice.astype('float64') + 1.0) * max_val) else: - grid_slice = 0.5 * ( - (grid_slice.astype('float64') + 1.0) * (max_val + 1)) - 0.5 + grid_slice = 0.5 * ((grid_slice.astype('float64') + 1.0) * + (max_val + 1)) - 0.5 if padding_mode == "border": grid_slice = clip(grid_slice, 0, max_val) @@ -82,8 +83,8 @@ def unnormalizeAndClip(grid_slice, max_val, align_corners, padding_mode): 0.5) extra = grid_abs - np.floor(grid_abs / double_range) * double_range grid_slice = np.minimum(extra, double_range - extra) - grid_slice = grid_slice if align_corners else clip(grid_slice - 0.5, 0, - max_val) + grid_slice = grid_slice if align_corners else clip( + grid_slice - 0.5, 0, max_val) return grid_slice @@ -138,6 +139,7 @@ def GridSampler(data, class TestGridSamplerOp(OpTest): + def setUp(self): self.use_cudnn = False self.numeric_grad_delta = 0.0001 @@ -163,19 +165,19 @@ class TestGridSamplerOp(OpTest): "mode": self.mode } self.outputs = { - 'Output': GridSampler(x, grid, self.align_corners, self.mode, - self.padding_mode) + 'Output': + GridSampler(x, grid, self.align_corners, self.mode, + self.padding_mode) } def test_check_output(self): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X', 'Grid'], - 'Output', - max_relative_error=0.01, - numeric_grad_delta=self.numeric_grad_delta) + self.check_grad(['X', 'Grid'], + 'Output', + max_relative_error=0.01, + numeric_grad_delta=self.numeric_grad_delta) def initTestCase(self): self.x_shape = (2, 3, 8, 8) @@ -188,6 +190,7 @@ class TestGridSamplerOp(OpTest): class Case1(TestGridSamplerOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 6) self.grid_shape = (2, 8, 9, 2) @@ -198,6 +201,7 @@ class Case1(TestGridSamplerOp): class Case1_(TestGridSamplerOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 6) self.grid_shape = (2, 8, 9, 2) @@ -208,6 +212,7 @@ class Case1_(TestGridSamplerOp): class Case2(TestGridSamplerOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 6) self.grid_shape = (2, 8, 9, 2) @@ -218,6 +223,7 @@ class Case2(TestGridSamplerOp): class Case3(TestGridSamplerOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 6) self.grid_shape = (2, 8, 9, 2) @@ -228,6 +234,7 @@ class Case3(TestGridSamplerOp): class Case4(TestGridSamplerOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 6) self.grid_shape = (2, 8, 9, 2) @@ -241,6 +248,7 @@ class Case4(TestGridSamplerOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class LargeInputCase(TestGridSamplerOp): + def get_places(self): places = [] if core.is_compiled_with_cuda(): @@ -263,6 +271,7 @@ class LargeInputCase(TestGridSamplerOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class Case5(LargeInputCase): + def initTestCase(self): self.no_need_check_grad = True self.x_shape = (2, 3, 128, 128) diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op.py b/python/paddle/fluid/tests/unittests/test_group_norm_op.py index 61a51d9b5dd..94793ad85cf 100644 --- a/python/paddle/fluid/tests/unittests/test_group_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_group_norm_op.py @@ -41,6 +41,7 @@ def group_norm_naive(x, scale, bias, epsilon, groups, data_layout): class TestGroupNormOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): @@ -52,8 +53,9 @@ class TestGroupNormOpError(unittest.TestCase): self.assertRaises(TypeError, test_x_type) def test_x_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[2, 100, 3, 5], dtype='int32') + x2 = fluid.layers.data(name='x2', + shape=[2, 100, 3, 5], + dtype='int32') groups = 2 fluid.layers.group_norm(x2, groups) @@ -61,6 +63,7 @@ class TestGroupNormOpError(unittest.TestCase): class TestGroupNormOp(OpTest): + def setUp(self): self.op_type = "group_norm" self.data_format = "NCHW" @@ -75,9 +78,10 @@ class TestGroupNormOp(OpTest): input = np.transpose(input, (0, 2, 3, 1)) scale = np.random.random([self.shape[1]]).astype(self.dtype) bias = np.random.random([self.shape[1]]).astype(self.dtype) - output, mean, var = group_norm_naive( - input, scale, bias, self.attrs['epsilon'], self.attrs['groups'], - self.data_format) + output, mean, var = group_norm_naive(input, scale, bias, + self.attrs['epsilon'], + self.attrs['groups'], + self.data_format) self.inputs = { 'X': OpTest.np_dtype_to_fluid_dtype(input), @@ -97,15 +101,16 @@ class TestGroupNormOp(OpTest): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) # group_norm uses AtomicAdd on CUDAPlace, which do not ensure - # computation order when multiple threads write the same address. So the + # computation order when multiple threads write the same address. So the # result of group_norm is non-deterministic when datatype is float. # When inplace_atol is not None, the inplace check uses numpy.allclose # to check inplace result instead of numpy.array_equal. # Set to inplace_atol to 0, which means the absolute error is 0, and the # relative error is 1e-05 in numpy.allclose by default. # Reference: https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html - self.check_output_with_place( - place, atol=atol, inplace_atol=inplace_atol) + self.check_output_with_place(place, + atol=atol, + inplace_atol=inplace_atol) def do_compare_between_place(self): if not core.is_compiled_with_cuda(): return @@ -138,44 +143,52 @@ class TestGroupNormOp(OpTest): self.check_grad_with_place( place, set(['X', 'Scale', 'Bias']), - 'Y', ) + 'Y', + ) def init_test_case(self): pass class TestGroupNormOp1(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 class TestGroupNormOp2(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 class TestGroupNormOpBigEps1(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 self.attrs['epsilon'] = 0.5 class TestGroupNormOpBigEps2(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 self.attrs['epsilon'] = 0.5 class TestGroupNormOpBigEps3(TestGroupNormOp): + def init_test_case(self): self.attrs['epsilon'] = 0.5 @skip_check_grad_ci( - reason='''This test case is used to ensure whether the gradient checking results between CPU and GPU + reason= + '''This test case is used to ensure whether the gradient checking results between CPU and GPU are consistent when using the same inputs, thus, it doesn't need to call check_grad.''' ) class TestGroupNormOpLargeData(TestGroupNormOp): + def init_test_case(self): self.shape = (2, 32, 64, 64) self.attrs['groups'] = 8 @@ -183,18 +196,21 @@ class TestGroupNormOpLargeData(TestGroupNormOp): class TestGroupNormOp1_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 self.data_format = "NHWC" class TestGroupNormOp2_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 self.data_format = "NHWC" class TestGroupNormOpBigEps1_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 1 self.attrs['epsilon'] = 0.5 @@ -202,6 +218,7 @@ class TestGroupNormOpBigEps1_With_NHWC(TestGroupNormOp): class TestGroupNormOpBigEps2_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['groups'] = 4 self.attrs['epsilon'] = 0.5 @@ -209,16 +226,19 @@ class TestGroupNormOpBigEps2_With_NHWC(TestGroupNormOp): class TestGroupNormOpBigEps3_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.attrs['epsilon'] = 0.5 self.data_format = "NHWC" @skip_check_grad_ci( - reason='''This test case is used to ensure whether the gradient checking results between CPU and GPU + reason= + '''This test case is used to ensure whether the gradient checking results between CPU and GPU are consistent when using the same inputs, thus, it doesn't need to call check_grad.''' ) class TestGroupNormOpLargeData_With_NHWC(TestGroupNormOp): + def init_test_case(self): self.shape = (2, 64, 32, 32) # NCHW self.attrs['groups'] = 8 @@ -227,13 +247,16 @@ class TestGroupNormOpLargeData_With_NHWC(TestGroupNormOp): class TestGroupNormAPI_With_NHWC(unittest.TestCase): + def test_case1(self): data1 = fluid.data(name='data1', shape=[None, 3, 3, 4], dtype='float64') - out1 = fluid.layers.group_norm( - input=data1, groups=2, data_layout="NHWC") + out1 = fluid.layers.group_norm(input=data1, + groups=2, + data_layout="NHWC") data2 = fluid.data(name='data2', shape=[None, 4, 3, 3], dtype='float64') - out2 = fluid.layers.group_norm( - input=data2, groups=2, data_layout="NCHW") + out2 = fluid.layers.group_norm(input=data2, + groups=2, + data_layout="NCHW") data1_np = np.random.random((2, 3, 3, 4)).astype("float64") data2_np = np.random.random((2, 4, 3, 3)).astype("float64") @@ -243,14 +266,24 @@ class TestGroupNormAPI_With_NHWC(unittest.TestCase): place = core.CPUPlace() exe = fluid.Executor(place) results = exe.run(fluid.default_main_program(), - feed={"data1": data1_np, - "data2": data2_np}, + feed={ + "data1": data1_np, + "data2": data2_np + }, fetch_list=[out1, out2], return_numpy=True) - expect_res1 = group_norm_naive( - data1_np, scale, bias, epsilon=1e-5, groups=2, data_layout="NHWC") - expect_res2 = group_norm_naive( - data2_np, scale, bias, epsilon=1e-5, groups=2, data_layout="NCHW") + expect_res1 = group_norm_naive(data1_np, + scale, + bias, + epsilon=1e-5, + groups=2, + data_layout="NHWC") + expect_res2 = group_norm_naive(data2_np, + scale, + bias, + epsilon=1e-5, + groups=2, + data_layout="NCHW") self.assertTrue(np.allclose(results[0], expect_res1[0])) self.assertTrue(np.allclose(results[1], expect_res2[0])) @@ -261,8 +294,9 @@ class TestGroupNormException(unittest.TestCase): data = fluid.data(name='data', shape=[None, 3, 3, 4], dtype="float64") def attr_data_format(): - out = fluid.layers.group_norm( - input=data, groups=2, data_layout="NDHW") + out = fluid.layers.group_norm(input=data, + groups=2, + data_layout="NDHW") self.assertRaises(ValueError, attr_data_format) diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py index 2f3adbe861a..c6bc44ebd2f 100644 --- a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py @@ -41,6 +41,7 @@ def group_norm_naive_for_general_dimension(x, scale, bias, epsilon, groups): class TestDygraphGroupNormv2(unittest.TestCase): + def test_dygraph(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("group_norm"): @@ -63,18 +64,18 @@ class TestDygraphGroupNormv2(unittest.TestCase): def test_weight_bias_false(): with fluid.dygraph.guard(p): - gn = paddle.nn.GroupNorm( - num_channels=2, - num_groups=2, - weight_attr=False, - bias_attr=False) + gn = paddle.nn.GroupNorm(num_channels=2, + num_groups=2, + weight_attr=False, + bias_attr=False) def test_nn_exception(): with fluid.dygraph.guard(p): def attr_data_format(): - out = paddle.nn.GroupNorm( - num_groups=2, num_channels=2, data_format="NHWC") + out = paddle.nn.GroupNorm(num_groups=2, + num_channels=2, + data_format="NHWC") self.assertRaises(ValueError, attr_data_format) @@ -125,10 +126,11 @@ class TestDygraphGroupNormv2(unittest.TestCase): class TestGroupNormAPIV2_With_General_Dimensions(unittest.TestCase): + def test_numerical_accuracy(self): paddle.disable_static() - shapes = [(2, 6), (2, 6, 4), (2, 6, 4, 4), (2, 6, 6, 6, 2), (2, 6, 6, 6, - 2, 3)] + shapes = [(2, 6), (2, 6, 4), (2, 6, 4, 4), (2, 6, 6, 6, 2), + (2, 6, 6, 6, 2, 3)] np.random.seed(10) places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("group_norm"): @@ -154,7 +156,9 @@ class TestGroupNormAPIV2_With_General_Dimensions(unittest.TestCase): class TestGroupNormDimException(unittest.TestCase): + def test_exception(self): + def test_empty_input_static_API(): x = paddle.to_tensor([], dtype='float32') paddle.static.nn.group_norm(x, 3) diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py index 7740cc0b03b..1006a43b2e9 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_op.py @@ -35,6 +35,7 @@ def gru( act_gate, dtype='float32', origin_mode=False): + def _seq_to_batch(lod, is_reverse): idx_in_seq_list = [] seq_lens = lod[0] @@ -50,8 +51,8 @@ def gru( for i in range(len(seq_lens)): if seq_lens[sorted_seqs[i]] <= batch_idx: break - idx = (seq_starts[sorted_seqs[i] + 1] - 1 - batch_idx - ) if is_reverse else ( + idx = (seq_starts[sorted_seqs[i] + 1] - 1 - + batch_idx) if is_reverse else ( seq_starts[sorted_seqs[i]] + batch_idx) idx_in_seq.append(idx) idx_in_seq_list.append(idx_in_seq) @@ -103,6 +104,7 @@ def gru( class TestGRUOp(OpTest): + def set_confs(self): pass @@ -127,11 +129,11 @@ class TestGRUOp(OpTest): N = len(self.lod[0]) input = np.random.rand(T, 3 * self.D).astype(self.dtype) weight = np.random.rand(self.D, 3 * self.D).astype(self.dtype) - bias = np.random.rand( - 1, 3 * self.D).astype(self.dtype) if self.with_bias else np.zeros( + bias = np.random.rand(1, 3 * self.D).astype( + self.dtype) if self.with_bias else np.zeros( (1, 3 * self.D), dtype=self.dtype) - h0 = np.random.rand( - N, self.D).astype(self.dtype) if self.with_h0 else np.zeros( + h0 = np.random.rand(N, self.D).astype( + self.dtype) if self.with_h0 else np.zeros( (N, self.D), dtype=self.dtype) batch_gate, batch_reset_hidden_prev, batch_hidden, hidden = gru( @@ -165,33 +167,38 @@ class TestGRUOp(OpTest): self.check_output(atol=1e-8, check_dygraph=False) def test_check_grad(self): - self.check_grad( - ['Input', 'H0', 'Weight', 'Bias'], ['Hidden'], check_dygraph=False) + self.check_grad(['Input', 'H0', 'Weight', 'Bias'], ['Hidden'], + check_dygraph=False) class TestGRUOriginMode(TestGRUOp): + def set_confs(self): self.origin_mode = True class TestGRUOp2(TestGRUOp): + def set_confs(self): self.dtype = 'float64' class TestGRUOp2Len0(TestGRUOp): + def set_confs(self): self.lod = [[2, 0, 4]] self.dtype = 'float64' class TestGRUOp2OriginMode(TestGRUOp): + def set_confs(self): self.dtype = 'float64' self.origin_mode = True class TestGRUOp2OriginModeLen0(TestGRUOp): + def set_confs(self): self.lod = [[0, 3, 4]] self.dtype = 'float64' @@ -199,6 +206,7 @@ class TestGRUOp2OriginModeLen0(TestGRUOp): class TestGRUOp2OriginModeLastLen0(TestGRUOp): + def set_confs(self): self.lod = [[0, 3, 0]] self.dtype = 'float64' @@ -206,35 +214,40 @@ class TestGRUOp2OriginModeLastLen0(TestGRUOp): class TestGRUOpNoInitial(TestGRUOp): + def set_confs(self): self.with_h0 = False def test_check_grad(self): - self.check_grad( - ['Input', 'Weight', 'Bias'], ['Hidden'], check_dygraph=False) + self.check_grad(['Input', 'Weight', 'Bias'], ['Hidden'], + check_dygraph=False) class TestGRUOpNoBias(TestGRUOp): + def set_confs(self): self.with_bias = False def test_check_grad(self): - self.check_grad( - ['Input', 'H0', 'Weight'], ['Hidden'], check_dygraph=False) + self.check_grad(['Input', 'H0', 'Weight'], ['Hidden'], + check_dygraph=False) class TestGRUOpReverse(TestGRUOp): + def set_confs(self): self.is_reverse = True class TestGRUOpReverseOriginMode(TestGRUOp): + def set_confs(self): self.is_reverse = True self.origin_mode = True class TestGRUOpInference(TestGRUOp): + def set_is_test(self): self.is_test = True @@ -250,6 +263,7 @@ class TestGRUOpInference(TestGRUOp): class TestGruOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -260,8 +274,9 @@ class TestGruOpError(unittest.TestCase): self.assertRaises(TypeError, test_Variable) def test_h_0(): - in_data = fluid.data( - name="input", shape=[None, 1536], dtype="float32") + in_data = fluid.data(name="input", + shape=[None, 1536], + dtype="float32") h = fluid.data(name="h", shape=[None, 512], dtype="int32") fluid.layers.dynamic_gru(input=in_data, size=512, h_0=h) diff --git a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py index 77b88161d3a..abce0e11278 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py @@ -23,15 +23,18 @@ import paddle.fluid as fluid import paddle.fluid.layers as layers import random import sys + sys.path.append("./rnn") from rnn_numpy import GRU from convert import get_params_for_net + random.seed(2) np.set_printoptions(threshold=np.inf) paddle.enable_static() class TestGRUOp(OpTest): + def get_weight_names(self): weight_names = [] for i in range(self.num_layers): @@ -46,8 +49,7 @@ class TestGRUOp(OpTest): self.op_type = "rnn" self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" self.sequence_length = None if core.is_compiled_with_rocm( - ) else np.array( - [12, 11, 10, 9, 8, 7, 6, 5], dtype=np.int32) + ) else np.array([12, 11, 10, 9, 8, 7, 6, 5], dtype=np.int32) self.num_layers = 1 self.is_bidirec = False self.is_test = False @@ -62,9 +64,10 @@ class TestGRUOp(OpTest): self.direction_num = 2 if self.is_bidirec else 1 direction = "bidirectional" if self.is_bidirec else "forward" - input = np.random.uniform( - low=-0.1, high=0.1, - size=(seq_length, batch_size, input_size)).astype(self.dtype) + input = np.random.uniform(low=-0.1, + high=0.1, + size=(seq_length, batch_size, + input_size)).astype(self.dtype) if self.sequence_length is not None: input[3][1:][:] = 0 @@ -140,23 +143,27 @@ class TestGRUOp(OpTest): class TestGRUOp1(TestGRUOp): + def set_attrs(self): self.sequence_length = None class TestGRUOp2(TestGRUOp): + def set_attrs(self): self.sequence_length = None self.is_bidirec = True class TestGRUOp3(TestGRUOp): + def set_attrs(self): self.sequence_length = None self.is_test = True class TestGRUOp4(TestGRUOp): + def set_attrs(self): self.sequence_length = None self.is_bidirec = True @@ -164,6 +171,7 @@ class TestGRUOp4(TestGRUOp): class TestGRUOpAvx(TestGRUOp): + def set_attrs(self): self.dtype = "float32" self.hidden_size = 8 diff --git a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py index 74afa7db289..ac70901c2eb 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py @@ -25,13 +25,14 @@ from paddle.fluid.framework import program_guard, Program class TestGRUUnitAPIError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): D = 5 layer = fluid.dygraph.nn.GRUUnit(size=D * 3) # the input must be Variable. - x0 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x0 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, layer, x0) # the input dtype must be float32 or float64 x = fluid.data(name='x', shape=[-1, D * 3], dtype='float16') @@ -63,14 +64,17 @@ def relu(x): class TestGRUUnitOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_size = 5 hidden_dim = 40 - input = fluid.data( - name='input', shape=[None, hidden_dim * 3], dtype='float32') - pre_hidden = fluid.data( - name='pre_hidden', shape=[None, hidden_dim], dtype='float32') + input = fluid.data(name='input', + shape=[None, hidden_dim * 3], + dtype='float32') + pre_hidden = fluid.data(name='pre_hidden', + shape=[None, hidden_dim], + dtype='float32') np_input = np.random.uniform( -0.1, 0.1, (batch_size, hidden_dim * 3)).astype('float64') np_pre_hidden = np.random.uniform( @@ -87,19 +91,17 @@ class TestGRUUnitOpError(unittest.TestCase): self.assertRaises(TypeError, test_pre_hidden_Variable) def test_input_type(): - error_input = fluid.data( - name='error_input', - shape=[None, hidden_dim * 3], - dtype='int32') + error_input = fluid.data(name='error_input', + shape=[None, hidden_dim * 3], + dtype='int32') gru_unit(error_input, pre_hidden, hidden_dim * 3) self.assertRaises(TypeError, test_input_type) def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[None, hidden_dim], - dtype='int32') + error_pre_hidden = fluid.data(name='error_pre_hidden', + shape=[None, hidden_dim], + dtype='int32') gru_unit(input, error_pre_hidden, hidden_dim * 3) self.assertRaises(TypeError, test_pre_hidden_type) @@ -120,13 +122,16 @@ class TestGRUUnitOp(OpTest): frame_size = self.frame_size self.op_type = 'gru_unit' self.inputs = { - 'Input': np.random.uniform( - -0.1, 0.1, (batch_size, frame_size * 3)).astype(self.dtype), - 'HiddenPrev': np.random.uniform( - -0.1, 0.1, (batch_size, frame_size)).astype(self.dtype), - 'Weight': np.random.uniform( - -1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size), - (frame_size, frame_size * 3)).astype(self.dtype), + 'Input': + np.random.uniform(-0.1, 0.1, + (batch_size, frame_size * 3)).astype(self.dtype), + 'HiddenPrev': + np.random.uniform(-0.1, 0.1, + (batch_size, frame_size)).astype(self.dtype), + 'Weight': + np.random.uniform(-1. / math.sqrt(frame_size), + 1. / math.sqrt(frame_size), + (frame_size, frame_size * 3)).astype(self.dtype), } self.attrs = { 'activation': GRUActivationType.tanh, @@ -146,8 +151,9 @@ class TestGRUUnitOp(OpTest): g = x + np.tile(b, (batch_size, 1)) w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape( (frame_size, frame_size * 2)) - u_r = self.activate[self.attrs['gate_activation']](np.dot( - h_p, w_u_r) + g[:, :frame_size * 2]) + u_r = self.activate[self.attrs['gate_activation']](np.dot(h_p, w_u_r) + + g[:, :frame_size * + 2]) u = u_r[:, :frame_size] r = u_r[:, frame_size:frame_size * 2] r_h_p = r * h_p @@ -180,6 +186,7 @@ class TestGRUUnitOp(OpTest): class TestGRUUnitOpOriginMode(TestGRUUnitOp): + def setUp(self): self.dtype = 'float32' if fluid.core.is_compiled_with_rocm( ) else 'float64' @@ -188,6 +195,7 @@ class TestGRUUnitOpOriginMode(TestGRUUnitOp): class TestGRUUnitOpWithBias(TestGRUUnitOp): + def set_inputs(self, origin_mode=False): batch_size = self.batch_size frame_size = self.frame_size @@ -204,12 +212,12 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): self.check_grad(['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden']) def test_check_grad_ingore_input(self): - self.check_grad( - ['HiddenPrev', 'Weight', 'Bias'], ['Hidden'], - no_grad_set=set('Input')) + self.check_grad(['HiddenPrev', 'Weight', 'Bias'], ['Hidden'], + no_grad_set=set('Input')) class TestGRUUnitOpWithBiasOriginMode(TestGRUUnitOpWithBias): + def setUp(self): self.dtype = 'float32' if fluid.core.is_compiled_with_rocm( ) else 'float64' diff --git a/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py b/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py index 7c706eabd1d..650626883c7 100644 --- a/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py @@ -18,10 +18,12 @@ import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard from paddle.fluid.framework import _test_eager_guard + paddle.enable_static() class TestGumbelSoftmaxOp(OpTest): + def init_attrs(self): self.shape = [20, 10] self.attrs = {"hard": True, "axis": -1} @@ -51,6 +53,7 @@ class TestGumbelSoftmaxOp(OpTest): class TestGumbelSoftmaxOp2(TestGumbelSoftmaxOp): + def init_attrs(self): self.shape = [20, 10] self.attrs = {"hard": True, "axis": 0} @@ -59,6 +62,7 @@ class TestGumbelSoftmaxOp2(TestGumbelSoftmaxOp): class TestGumbelSoftmaxOp3(TestGumbelSoftmaxOp): + def init_attrs(self): self.shape = [100] self.attrs = {"hard": True, "axis": -1} @@ -67,6 +71,7 @@ class TestGumbelSoftmaxOp3(TestGumbelSoftmaxOp): class TestGumbelSoftmaxOp4(TestGumbelSoftmaxOp): + def init_attrs(self): self.shape = [20, 10, 5] self.attrs = {"hard": True, "axis": -1} @@ -75,6 +80,7 @@ class TestGumbelSoftmaxOp4(TestGumbelSoftmaxOp): class TestGumbelSoftmaxOp5(TestGumbelSoftmaxOp): + def init_attrs(self): self.shape = [20, 10, 5] self.attrs = {"hard": True, "axis": 1} @@ -83,6 +89,7 @@ class TestGumbelSoftmaxOp5(TestGumbelSoftmaxOp): class TestGumbelSoftmaxOpSampleDistribution(OpTest): + def softmax(self, x): x_row_max = x.max(axis=-1) x_row_max = x_row_max.reshape(list(x.shape)[:-1] + [1]) @@ -118,10 +125,10 @@ class TestGumbelSoftmaxOpSampleDistribution(OpTest): # Experiment should result in batch num . self.assertEqual(self.counts.sum(), self.shape[0]) - # Treat the probability from softmax as + # Treat the probability from softmax as # the probability of binomial distribution. # Samples from gumbel softmax meet this binomial distribution. - # Construct statistics z for samples and + # Construct statistics z for samples and # z is approximately N(0,1) for unbiased count expected = self.probs * self.shape[0] z = (self.counts - expected) / np.sqrt((expected * (1 - self.probs))) @@ -134,6 +141,7 @@ class TestGumbelSoftmaxOpSampleDistribution(OpTest): class TestGumbelSoftmaxOpGrad(unittest.TestCase): + def init_attrs(self): self.shape = [20, 10] self.dtype = "float64" @@ -153,12 +161,13 @@ class TestGumbelSoftmaxOpGrad(unittest.TestCase): out_hard.sum().backward() out_soft.sum().backward() - self.assertEqual( - np.allclose(x_hard.grad.numpy(), x_soft.grad.numpy()), True) + self.assertEqual(np.allclose(x_hard.grad.numpy(), x_soft.grad.numpy()), + True) paddle.enable_static() class TestGumbelSoftmaxAPI(unittest.TestCase): + def setUp(self): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1., 1., self.x_shape).astype(np.float32) @@ -192,12 +201,13 @@ class TestGumbelSoftmaxAPI(unittest.TestCase): class TestGumbelSoftmaxOpError(unittest.TestCase): + def test_errors(self): paddle.disable_static() def test_Variable(): - x1 = fluid.create_lod_tensor( - np.zeros((100, 784)), [[10, 10, 10, 70]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.zeros((100, 784)), + [[10, 10, 10, 70]], fluid.CPUPlace()) paddle.nn.functional.gumbel_softmax(x1) self.assertRaises(ValueError, test_Variable) @@ -224,8 +234,9 @@ class TestGumbelSoftmaxOpError(unittest.TestCase): def test_dtype(): with paddle.static.program_guard(paddle.static.Program()): - x_int32 = paddle.fluid.data( - name='x_int32', shape=[2, 3], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[2, 3], + dtype='int32') paddle.nn.functional.gumbel_softmax(x_int32) self.assertRaises(TypeError, test_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_hash_op.py b/python/paddle/fluid/tests/unittests/test_hash_op.py index 3fe8bca2f19..fe076290948 100644 --- a/python/paddle/fluid/tests/unittests/test_hash_op.py +++ b/python/paddle/fluid/tests/unittests/test_hash_op.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestHashOp(OpTest): + def setUp(self): self.op_type = "hash" self.init_test_case() @@ -40,6 +41,7 @@ class TestHashOp(OpTest): class TestHashNotLoDOp(TestHashOp): + def setUp(self): self.op_type = "hash" self.init_test_case() @@ -96,14 +98,15 @@ class TestHashOp3(TestHashOp): def init_test_case(self): self.in_seq = np.array([10, 5]).reshape((2, 1)).astype("int64") - self.out_seq = np.array( - [1204014882, 393011615, 3586283837, 2814821595]).reshape((2, 2, 1)) + self.out_seq = np.array([1204014882, 393011615, 3586283837, + 2814821595]).reshape((2, 2, 1)) def test_check_output(self): self.check_output() class TestHashOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input_data = np.random.randint(0, 10, (8, 1)).astype("int32") @@ -116,24 +119,30 @@ class TestHashOpError(unittest.TestCase): def test_type(): # dtype must be int32, int64. - x2 = fluid.layers.data( - name='x2', shape=[1], dtype="float32", lod_level=1) + x2 = fluid.layers.data(name='x2', + shape=[1], + dtype="float32", + lod_level=1) fluid.layers.hash(input=x2, hash_size=2**32) self.assertRaises(TypeError, test_type) def test_hash_size_type(): # hash_size dtype must be int32, int64. - x3 = fluid.layers.data( - name='x3', shape=[1], dtype="int32", lod_level=1) + x3 = fluid.layers.data(name='x3', + shape=[1], + dtype="int32", + lod_level=1) fluid.layers.hash(input=x3, hash_size=1024.5) self.assertRaises(TypeError, test_hash_size_type) def test_num_hash_type(): # num_hash dtype must be int32, int64. - x4 = fluid.layers.data( - name='x4', shape=[1], dtype="int32", lod_level=1) + x4 = fluid.layers.data(name='x4', + shape=[1], + dtype="int32", + lod_level=1) fluid.layers.hash(input=x4, hash_size=2**32, num_hash=2.5) self.assertRaises(TypeError, test_num_hash_type) diff --git a/python/paddle/fluid/tests/unittests/test_hdfs1.py b/python/paddle/fluid/tests/unittests/test_hdfs1.py index 65d12c31e39..2fa312fc20b 100644 --- a/python/paddle/fluid/tests/unittests/test_hdfs1.py +++ b/python/paddle/fluid/tests/unittests/test_hdfs1.py @@ -26,12 +26,12 @@ java_home = os.environ["JAVA_HOME"] class FSTest1(FSTestBase): + def test_timeout(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=6 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + None, + time_out=6 * 1000, + sleep_inter=100) src = "hdfs_test_timeout" dst = "new_hdfs_test_timeout" fs.delete(dst) @@ -42,8 +42,8 @@ class FSTest1(FSTestBase): cmd = "{} -mv {} {}".format(fs._base_cmd, src, dst) try: fs.mv(src, dst, test_exists=False) - self.assertFalse(1, "can't execute cmd:{} output:{}".format(cmd, - output)) + self.assertFalse( + 1, "can't execute cmd:{} output:{}".format(cmd, output)) except FSTimeOut as e: print("execute mv {} to {} timeout".format(src, dst)) @@ -52,11 +52,10 @@ class FSTest1(FSTestBase): print("second mv ret:{} output:{}".format(ret, output)) def test_is_dir(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=6 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + None, + time_out=6 * 1000, + sleep_inter=100) self.assertFalse(fs.is_dir("./test_hdfs.py")) s = """ java.io.IOException: Input/output error @@ -78,18 +77,16 @@ java.io.IOException: Input/output error def test_config(self): config = {"fs.default.name": "hdfs://xxx", "hadoop.job.ugi": "ugi"} - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - config, - time_out=6 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + config, + time_out=6 * 1000, + sleep_inter=100) def test_exists(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=6 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + None, + time_out=6 * 1000, + sleep_inter=100) self.assertFalse(fs.is_exist(os.path.abspath("./xxxx"))) self.assertFalse(fs.is_dir(os.path.abspath("./xxxx"))) self.assertTrue(fs.is_dir(os.path.abspath("./xxx/.."))) diff --git a/python/paddle/fluid/tests/unittests/test_hdfs2.py b/python/paddle/fluid/tests/unittests/test_hdfs2.py index a74fc558382..a77368d11a1 100644 --- a/python/paddle/fluid/tests/unittests/test_hdfs2.py +++ b/python/paddle/fluid/tests/unittests/test_hdfs2.py @@ -26,12 +26,12 @@ java_home = os.environ["JAVA_HOME"] class FSTest2(FSTestBase): + def test_hdfs(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=5 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + None, + time_out=5 * 1000, + sleep_inter=100) self._test_rm(fs) self._test_touch(fs) self._test_dirs(fs) diff --git a/python/paddle/fluid/tests/unittests/test_hdfs3.py b/python/paddle/fluid/tests/unittests/test_hdfs3.py index 57b0b1ba45f..450aceb5a74 100644 --- a/python/paddle/fluid/tests/unittests/test_hdfs3.py +++ b/python/paddle/fluid/tests/unittests/test_hdfs3.py @@ -26,12 +26,12 @@ java_home = os.environ["JAVA_HOME"] class FSTest3(FSTestBase): + def test_hdfs(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=5 * 1000, - sleep_inter=100) + fs = HDFSClient("/usr/local/hadoop-2.7.7/", + None, + time_out=5 * 1000, + sleep_inter=100) self._test_mkdirs(fs) self._test_list_dir(fs) self._test_try_upload(fs) diff --git a/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py b/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py index 91c1b45cbca..9f281e6bf39 100644 --- a/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py +++ b/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py @@ -34,6 +34,7 @@ def calc_hinge_embedding_loss(input, label, margin=1.0, reduction='mean'): class TestFunctionalHingeEmbeddingLoss(unittest.TestCase): + def setUp(self): self.margin = 1.0 self.shape = (10, 10, 5) @@ -51,37 +52,45 @@ class TestFunctionalHingeEmbeddingLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_result.numpy(), expected)) self.assertTrue(dy_result.shape, [1]) - dy_result = paddle.nn.functional.hinge_embedding_loss( - input, label, reduction='sum') - expected = calc_hinge_embedding_loss( - self.input_np, self.label_np, reduction='sum') + dy_result = paddle.nn.functional.hinge_embedding_loss(input, + label, + reduction='sum') + expected = calc_hinge_embedding_loss(self.input_np, + self.label_np, + reduction='sum') self.assertTrue(np.allclose(dy_result.numpy(), expected)) self.assertTrue(dy_result.shape, [1]) - dy_result = paddle.nn.functional.hinge_embedding_loss( - input, label, reduction='none') - expected = calc_hinge_embedding_loss( - self.input_np, self.label_np, reduction='none') + dy_result = paddle.nn.functional.hinge_embedding_loss(input, + label, + reduction='none') + expected = calc_hinge_embedding_loss(self.input_np, + self.label_np, + reduction='none') self.assertTrue(np.allclose(dy_result.numpy(), expected)) self.assertTrue(dy_result.shape, self.shape) def run_static_check(self, place=paddle.CPUPlace): paddle.enable_static() for reduction in ['none', 'mean', 'sum']: - expected = calc_hinge_embedding_loss( - self.input_np, self.label_np, reduction=reduction) + expected = calc_hinge_embedding_loss(self.input_np, + self.label_np, + reduction=reduction) with program_guard(Program(), Program()): - input = paddle.static.data( - name="input", shape=self.shape, dtype=paddle.float64) - label = paddle.static.data( - name="label", shape=self.shape, dtype=paddle.float64) + input = paddle.static.data(name="input", + shape=self.shape, + dtype=paddle.float64) + label = paddle.static.data(name="label", + shape=self.shape, + dtype=paddle.float64) st_result = paddle.nn.functional.hinge_embedding_loss( input, label, reduction=reduction) exe = paddle.static.Executor(place) - result_numpy, = exe.run( - feed={"input": self.input_np, - "label": self.label_np}, - fetch_list=[st_result]) + result_numpy, = exe.run(feed={ + "input": self.input_np, + "label": self.label_np + }, + fetch_list=[st_result]) self.assertTrue(np.allclose(result_numpy, expected)) def test_cpu(self): @@ -96,6 +105,7 @@ class TestFunctionalHingeEmbeddingLoss(unittest.TestCase): # test case the raise message def test_reduce_errors(self): + def test_value_error(): loss = paddle.nn.functional.hinge_embedding_loss( self.input_np, self.label_np, reduction='reduce_mean') @@ -104,6 +114,7 @@ class TestFunctionalHingeEmbeddingLoss(unittest.TestCase): class TestClassHingeEmbeddingLoss(unittest.TestCase): + def setUp(self): self.margin = 1.0 self.shape = (10, 10, 5) @@ -124,37 +135,43 @@ class TestClassHingeEmbeddingLoss(unittest.TestCase): hinge_embedding_loss = paddle.nn.loss.HingeEmbeddingLoss( reduction='sum') dy_result = hinge_embedding_loss(input, label) - expected = calc_hinge_embedding_loss( - self.input_np, self.label_np, reduction='sum') + expected = calc_hinge_embedding_loss(self.input_np, + self.label_np, + reduction='sum') self.assertTrue(np.allclose(dy_result.numpy(), expected)) self.assertTrue(dy_result.shape, [1]) hinge_embedding_loss = paddle.nn.loss.HingeEmbeddingLoss( reduction='none') dy_result = hinge_embedding_loss(input, label) - expected = calc_hinge_embedding_loss( - self.input_np, self.label_np, reduction='none') + expected = calc_hinge_embedding_loss(self.input_np, + self.label_np, + reduction='none') self.assertTrue(np.allclose(dy_result.numpy(), expected)) self.assertTrue(dy_result.shape, self.shape) def run_static_check(self, place=paddle.CPUPlace): paddle.enable_static() for reduction in ['none', 'mean', 'sum']: - expected = calc_hinge_embedding_loss( - self.input_np, self.label_np, reduction=reduction) + expected = calc_hinge_embedding_loss(self.input_np, + self.label_np, + reduction=reduction) with program_guard(Program(), Program()): - input = paddle.static.data( - name="input", shape=self.shape, dtype=paddle.float64) - label = paddle.static.data( - name="label", shape=self.shape, dtype=paddle.float64) + input = paddle.static.data(name="input", + shape=self.shape, + dtype=paddle.float64) + label = paddle.static.data(name="label", + shape=self.shape, + dtype=paddle.float64) hinge_embedding_loss = paddle.nn.loss.HingeEmbeddingLoss( reduction=reduction) st_result = hinge_embedding_loss(input, label) exe = paddle.static.Executor(place) - result_numpy, = exe.run( - feed={"input": self.input_np, - "label": self.label_np}, - fetch_list=[st_result]) + result_numpy, = exe.run(feed={ + "input": self.input_np, + "label": self.label_np + }, + fetch_list=[st_result]) self.assertTrue(np.allclose(result_numpy, expected)) def test_cpu(self): @@ -169,6 +186,7 @@ class TestClassHingeEmbeddingLoss(unittest.TestCase): # test case the raise message def test_reduce_errors(self): + def test_value_error(): hinge_embedding_loss = paddle.nn.loss.HingeEmbeddingLoss( reduction='reduce_mean') diff --git a/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py b/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py index 2e2d0d2ea48..60ea132961e 100644 --- a/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestHingeLossOp(OpTest): + def setUp(self): self.op_type = 'hinge_loss' samples_num = 100 diff --git a/python/paddle/fluid/tests/unittests/test_histogram_op.py b/python/paddle/fluid/tests/unittests/test_histogram_op.py index 819029c5fcd..17b7b95942f 100644 --- a/python/paddle/fluid/tests/unittests/test_histogram_op.py +++ b/python/paddle/fluid/tests/unittests/test_histogram_op.py @@ -62,10 +62,9 @@ class TestHistogramOpAPI(unittest.TestCase): inputs_np = np.array([[2, 4, 2], [2, 5, 4]]).astype(np.int64) inputs = paddle.to_tensor(inputs_np) actual = paddle.histogram(inputs, bins=5, min=1, max=5) - self.assertTrue( - (actual.numpy() == expected).all(), - msg='histogram output is wrong, out =' + - str(actual.numpy())) + self.assertTrue((actual.numpy() == expected).all(), + msg='histogram output is wrong, out =' + + str(actual.numpy())) class TestHistogramOpError(unittest.TestCase): @@ -83,8 +82,9 @@ class TestHistogramOpError(unittest.TestCase): """Test bins should be greater than or equal to 1.""" def net_func(): - input_value = paddle.fluid.layers.fill_constant( - shape=[3, 4], dtype='float32', value=3.0) + input_value = paddle.fluid.layers.fill_constant(shape=[3, 4], + dtype='float32', + value=3.0) paddle.histogram(input=input_value, bins=-1, min=1, max=5) with self.assertRaises(IndexError): @@ -94,8 +94,9 @@ class TestHistogramOpError(unittest.TestCase): """Test max must be larger or equal to min.""" def net_func(): - input_value = paddle.fluid.layers.fill_constant( - shape=[3, 4], dtype='float32', value=3.0) + input_value = paddle.fluid.layers.fill_constant(shape=[3, 4], + dtype='float32', + value=3.0) paddle.histogram(input=input_value, bins=1, min=5, max=1) with self.assertRaises(ValueError): @@ -105,8 +106,9 @@ class TestHistogramOpError(unittest.TestCase): """Test range of min, max is not finite""" def net_func(): - input_value = paddle.fluid.layers.fill_constant( - shape=[3, 4], dtype='float32', value=3.0) + input_value = paddle.fluid.layers.fill_constant(shape=[3, 4], + dtype='float32', + value=3.0) paddle.histogram(input=input_value, bins=1, min=-np.inf, max=5) with self.assertRaises(ValueError): @@ -115,15 +117,24 @@ class TestHistogramOpError(unittest.TestCase): def test_type_errors(self): with program_guard(Program()): # The input type must be Variable. - self.assertRaises( - TypeError, paddle.histogram, 1, bins=5, min=1, max=5) + self.assertRaises(TypeError, + paddle.histogram, + 1, + bins=5, + min=1, + max=5) # The input type must be 'int32', 'int64', 'float32', 'float64' x_bool = fluid.data(name='x_bool', shape=[4, 3], dtype='bool') - self.assertRaises( - TypeError, paddle.histogram, x_bool, bins=5, min=1, max=5) + self.assertRaises(TypeError, + paddle.histogram, + x_bool, + bins=5, + min=1, + max=5) class TestHistogramOp(OpTest): + def setUp(self): self.op_type = "histogram" self.init_test_case() @@ -131,8 +142,9 @@ class TestHistogramOp(OpTest): self.python_api = paddle.histogram self.inputs = {"X": np_input} self.init_attrs() - Out, _ = np.histogram( - np_input, bins=self.bins, range=(self.min, self.max)) + Out, _ = np.histogram(np_input, + bins=self.bins, + range=(self.min, self.max)) self.outputs = {"Out": Out.astype(np.int64)} def init_test_case(self): diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index 51ff8ec943d..fc8b0d114d5 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -34,6 +34,7 @@ def find_latest_set(num): class CodeTable(object): + def __init__(self, num_classes, code): self.c = num_classes + code @@ -48,6 +49,7 @@ class CodeTable(object): class CodeTableWithCustomTree(object): + def __init__(self, path_table, path_code, index): self.ptable_ = path_table self.pcode_ = path_code @@ -171,6 +173,7 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, class TestHSigmoidOp(OpTest): + def setUp(self): self.op_type = "hierarchical_sigmoid" num_classes = 101 @@ -193,14 +196,16 @@ class TestHSigmoidOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['X', 'W', 'Bias'], ['Out'], user_defined_grads=self.user_grads) + self.check_grad(['X', 'W', 'Bias'], ['Out'], + user_defined_grads=self.user_grads) @skip_check_grad_ci( - reason="For 'TestHSigmoidOpSparse', check_grad is separately calculated by 'TestHSigmoidOpWithSparseGrad'." + reason= + "For 'TestHSigmoidOpSparse', check_grad is separately calculated by 'TestHSigmoidOpWithSparseGrad'." ) class TestHSigmoidOpSparse(OpTest): + def setUp(self): self.op_type = "hierarchical_sigmoid" num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample @@ -210,13 +215,13 @@ class TestHSigmoidOpSparse(OpTest): w = np.random.random((num_classes - 1, feature_size)) label = np.array([0, 1, 4, 5]).astype('int64') path_table = np.array([ - (0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), (0, 2, -1, - -1, -1) + (0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), + (0, 2, -1, -1, -1) ]).astype( 'int64') #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) - path_code = np.array( - [(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (1, 0, 0, -1, -1), - (0, 1, -1, -1, -1)]).astype('int64') #np.array to store + path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), + (1, 0, 0, -1, -1), (0, 1, -1, -1, -1) + ]).astype('int64') #np.array to store bias = np.random.random((num_classes - 1, 1)) self.attrs = {'num_classes': num_classes, 'is_sparse': True} self.inputs = { @@ -236,12 +241,15 @@ class TestHSigmoidOpSparse(OpTest): class TestHSigmoidOpWithSparseGrad(unittest.TestCase): + def hs_net_conf(self, is_sparse): input_word = fluid.layers.data(name="x", shape=[1], dtype='int64') - path_table = fluid.layers.data( - name='path_table', shape=[3], dtype='int64') - path_code = fluid.layers.data( - name='path_code', shape=[3], dtype='int64') + path_table = fluid.layers.data(name='path_table', + shape=[3], + dtype='int64') + path_code = fluid.layers.data(name='path_code', + shape=[3], + dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64') data_list = [input_word, path_table, path_code, label] @@ -253,15 +261,14 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( scale=1 / math.sqrt(3)))) - cost = fluid.layers.hsigmoid( - input=emb, - label=label, - bias_attr=True, - num_classes=3, - path_table=path_table, - path_code=path_code, - is_custom=True, - is_sparse=is_sparse) + cost = fluid.layers.hsigmoid(input=emb, + label=label, + bias_attr=True, + num_classes=3, + path_table=path_table, + path_code=path_code, + is_custom=True, + is_sparse=is_sparse) avg_cost = fluid.layers.reduce_mean(cost) @@ -304,9 +311,11 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): @skip_check_grad_ci( - reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." + reason= + "[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." ) class TestHSigmoidOpWithCostumTree(OpTest): + def setUp(self): self.op_type = "hierarchical_sigmoid" num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample @@ -316,13 +325,13 @@ class TestHSigmoidOpWithCostumTree(OpTest): w = np.random.uniform(-1, 1, (num_classes - 1, feature_size)) label = np.array([0, 1, 4, 5]).astype('int64') path_table = np.array([ - (0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), (0, 2, -1, - -1, -1) + (0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), + (0, 2, -1, -1, -1) ]).astype( 'int64') #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) - path_code = np.array( - [(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (1, 0, 0, -1, -1), - (0, 1, -1, -1, -1)]).astype('int64') #np.array to store + path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), + (1, 0, 0, -1, -1), (0, 1, -1, -1, -1) + ]).astype('int64') #np.array to store bias = np.random.random((num_classes - 1, 1)) self.attrs = {'num_classes': num_classes, 'is_sparse': False} self.inputs = { @@ -345,9 +354,11 @@ class TestHSigmoidOpWithCostumTree(OpTest): @skip_check_grad_ci( - reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." + reason= + "[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." ) class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): + def setUp(self): self.op_type = "hierarchical_sigmoid" num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample @@ -357,13 +368,13 @@ class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): w = np.random.uniform(-1, 1, (num_classes - 1, feature_size)) label = np.array([0, 1, 4, 5]).astype('int64') path_table = np.array([ - (0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), (0, 2, -1, - -1, -1) + (0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), + (0, 2, -1, -1, -1) ]).astype( 'int64') #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) - path_code = np.array( - [(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (1, 0, 0, -1, -1), - (0, 1, -1, -1, -1)]).astype('int64') #np.array to store + path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), + (1, 0, 0, -1, -1), (0, 1, -1, -1, -1) + ]).astype('int64') #np.array to store # bias = np.random.random((num_classes - 1, 1)).astype("float32") self.attrs = {'num_classes': num_classes, 'is_sparse': False} self.inputs = { @@ -373,14 +384,13 @@ class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): 'PathCode': path_code, 'Label': label, } - pre_output, out = hsigmoidWithCustomTree( - x=x, - w=w, - path_table=path_table, - path_code=path_code, - label=label, - bias=None, - num_classes=num_classes) + pre_output, out = hsigmoidWithCustomTree(x=x, + w=w, + path_table=path_table, + path_code=path_code, + label=label, + bias=None, + num_classes=num_classes) self.outputs = {'PreOut': pre_output, 'Out': out} def test_check_output(self): @@ -404,12 +414,13 @@ class TestHSigmoidLossAPI(unittest.TestCase): self.x_np = np.random.uniform( -1, 1, [self.batch_size, self.feature_size]).astype(self.dtype) - self.labels_np = np.random.randint( - self.num_classes, size=(self.batch_size, 1), dtype='int64') + self.labels_np = np.random.randint(self.num_classes, + size=(self.batch_size, 1), + dtype='int64') self.weight_np = np.random.uniform( -1, 1, [self.num_classes - 1, self.feature_size]).astype(self.dtype) - self.bias_np = np.random.uniform(-1, 1, ( - self.num_classes - 1, )).astype(self.dtype) + self.bias_np = np.random.uniform( + -1, 1, (self.num_classes - 1, )).astype(self.dtype) self.path_table_np = None self.path_code_np = None _, self.out_np = hsigmoid(self.x_np, self.weight_np, self.labels_np, @@ -417,10 +428,12 @@ class TestHSigmoidLossAPI(unittest.TestCase): self.set_attrs() if self.is_custom: - _, self.out_np = hsigmoidWithCustomTree( - self.x_np, self.weight_np, self.path_table_np, - self.path_code_np, self.labels_np, - self.bias_np.reshape(-1, 1), self.num_classes) + _, self.out_np = hsigmoidWithCustomTree(self.x_np, self.weight_np, + self.path_table_np, + self.path_code_np, + self.labels_np, + self.bias_np.reshape(-1, 1), + self.num_classes) def set_attrs(self): pass @@ -456,7 +469,9 @@ class TestHSigmoidLossAPI(unittest.TestCase): x = paddle.static.data('x', [-1, self.feature_size]) labels = paddle.static.data('labels', [-1, 1], 'int64') weight = paddle.static.data('weight', [-1, self.feature_size]) - bias = paddle.static.data('bias', [-1, ]) + bias = paddle.static.data('bias', [ + -1, + ]) path_table = None path_code = None if self.is_custom: @@ -544,36 +559,33 @@ class TestHSigmoidLossAPI(unittest.TestCase): weight_int32) bias_int32 = paddle.static.data('bias_int32', [7], 'int32') - self.assertRaises( - TypeError, - F.hsigmoid_loss, - x, - label, - 8, - weight, - bias=bias_int32) + self.assertRaises(TypeError, + F.hsigmoid_loss, + x, + label, + 8, + weight, + bias=bias_int32) path_table_int32 = paddle.static.data('path_table_int32', [7], 'int32') - self.assertRaises( - TypeError, - F.hsigmoid_loss, - x, - label, - 8, - weight, - path_table=path_table_int32) + self.assertRaises(TypeError, + F.hsigmoid_loss, + x, + label, + 8, + weight, + path_table=path_table_int32) path_code_int32 = paddle.static.data('path_code_int32', [7], 'int32') - self.assertRaises( - TypeError, - F.hsigmoid_loss, - x, - label, - 8, - weight, - path_code=path_code_int32) + self.assertRaises(TypeError, + F.hsigmoid_loss, + x, + label, + 8, + weight, + path_code=path_code_int32) # test paddle.nn.HSigmoidLoss paddle.disable_static(self.place) @@ -611,12 +623,15 @@ class TestHSigmoidLossAPI(unittest.TestCase): class TestHSigmoidLossAPICustom(TestHSigmoidLossAPI): + def set_attrs(self): self.is_custom = True - self.path_table_np = np.array([(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), ( - 0, 1, 4, -1, -1), (0, 2, -1, -1, -1)]).astype(np.int64) - self.path_code_np = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), ( - 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]).astype(np.int64) + self.path_table_np = np.array([(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), + (0, 1, 4, -1, -1), + (0, 2, -1, -1, -1)]).astype(np.int64) + self.path_code_np = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), + (1, 0, 0, -1, -1), + (0, 1, -1, -1, -1)]).astype(np.int64) def test_errors(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_huber_loss_op.py b/python/paddle/fluid/tests/unittests/test_huber_loss_op.py index 9a0437ad2f5..13460af90ed 100644 --- a/python/paddle/fluid/tests/unittests/test_huber_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_huber_loss_op.py @@ -31,6 +31,7 @@ def huber_loss_forward(val, delta): class TestHuberLossOp(OpTest): + def setUp(self): self.op_type = 'huber_loss' self.python_api = paddle.fluid.layers.huber_loss @@ -61,30 +62,38 @@ class TestHuberLossOp(OpTest): self.check_grad(['X', 'Y'], 'Out', check_eager=True) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.008, no_grad_set=set("residual")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.008, + no_grad_set=set("residual")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.008, no_grad_set=set('residual')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.008, + no_grad_set=set('residual')) def TestHuberLossOp1(TestHuberLossOp): + def set_shape(self): return (64) def TestHuberLossOp2(TestHuberLossOp): + def set_shape(self): return (6, 6) def TestHuberLossOp3(TestHuberLossOp): + def set_shape(self): return (6, 6, 1) class TestHuberLossOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input and label must be Variable diff --git a/python/paddle/fluid/tests/unittests/test_hybrid_parallel_inference_helper.py b/python/paddle/fluid/tests/unittests/test_hybrid_parallel_inference_helper.py index c7c3c87fadc..d10673829f3 100644 --- a/python/paddle/fluid/tests/unittests/test_hybrid_parallel_inference_helper.py +++ b/python/paddle/fluid/tests/unittests/test_hybrid_parallel_inference_helper.py @@ -21,6 +21,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestHybridParallelInferenceHelper(TestMultipleGpus): + def test_hybrid_parallel_inference_helper(self): self.run_mnist_2gpu('hybrid_parallel_inference_helper.py') diff --git a/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py b/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py index e8300113ddc..e3d341bcc02 100644 --- a/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py +++ b/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py @@ -21,6 +21,7 @@ import numpy as np class TestCommunicateTopology(unittest.TestCase): + def test_topology(self): topo = fleet.CommunicateTopology(["dp", "mp", "pp"], [2, 2, 2]) @@ -151,20 +152,20 @@ class TestCommunicateTopology(unittest.TestCase): # test get_axis_list self.assertEqual(topo.get_axis_list("dp", 0), [0, 1, 2, 3, 4, 5, 6, 7]) - self.assertEqual( - topo.get_axis_list("dp", 1), [8, 9, 10, 11, 12, 13, 14, 15]) - self.assertEqual( - topo.get_axis_list("mp", 0), [0, 2, 4, 6, 8, 10, 12, 14]) - self.assertEqual( - topo.get_axis_list("mp", 1), [1, 3, 5, 7, 9, 11, 13, 15]) - self.assertEqual( - topo.get_axis_list("pp", 0), [0, 1, 2, 3, 8, 9, 10, 11]) - self.assertEqual( - topo.get_axis_list("pp", 1), [4, 5, 6, 7, 12, 13, 14, 15]) - self.assertEqual( - topo.get_axis_list("sharding", 0), [0, 1, 4, 5, 8, 9, 12, 13]) - self.assertEqual( - topo.get_axis_list("sharding", 1), [2, 3, 6, 7, 10, 11, 14, 15]) + self.assertEqual(topo.get_axis_list("dp", 1), + [8, 9, 10, 11, 12, 13, 14, 15]) + self.assertEqual(topo.get_axis_list("mp", 0), + [0, 2, 4, 6, 8, 10, 12, 14]) + self.assertEqual(topo.get_axis_list("mp", 1), + [1, 3, 5, 7, 9, 11, 13, 15]) + self.assertEqual(topo.get_axis_list("pp", 0), + [0, 1, 2, 3, 8, 9, 10, 11]) + self.assertEqual(topo.get_axis_list("pp", 1), + [4, 5, 6, 7, 12, 13, 14, 15]) + self.assertEqual(topo.get_axis_list("sharding", 0), + [0, 1, 4, 5, 8, 9, 12, 13]) + self.assertEqual(topo.get_axis_list("sharding", 1), + [2, 3, 6, 7, 10, 11, 14, 15]) # test get_dim_size self.assertEqual(topo.get_dim_size("dp"), 2) diff --git a/python/paddle/fluid/tests/unittests/test_identity_op.py b/python/paddle/fluid/tests/unittests/test_identity_op.py index 5c2ff2138ee..17174b0d8e9 100644 --- a/python/paddle/fluid/tests/unittests/test_identity_op.py +++ b/python/paddle/fluid/tests/unittests/test_identity_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ import paddle class TestIdentityAPI(unittest.TestCase): + def setUp(self): self.shape = [4, 4] self.x = np.random.random((4, 4)).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py index c540531e7cf..bdfcf739b8f 100644 --- a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py +++ b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py @@ -87,10 +87,9 @@ def im2col(attrs, im, col): im_col_offset = col_col_idx * stride_width \ + filter_col_idx - padding_width - if (im_row_offset < 0 or - im_row_offset >= input_height or - im_col_offset < 0 or - im_col_offset >= input_width): + if (im_row_offset < 0 or im_row_offset >= input_height + or im_col_offset < 0 + or im_col_offset >= input_width): col[col_row_idx][col_col_idx][channel][\ filter_row_idx][filter_col_idx] = 0.0 else: @@ -125,6 +124,7 @@ def Im2Sequence(inputs, img_real_size, attrs): class TestBlockExpandOp(OpTest): + def config(self): self.batch_size = 1 self.img_channels = 3 @@ -155,6 +155,7 @@ class TestBlockExpandOp(OpTest): class TestBlockExpandOpCase2(TestBlockExpandOp): + def config(self): self.batch_size = 2 self.img_channels = 3 @@ -168,6 +169,7 @@ class TestBlockExpandOpCase2(TestBlockExpandOp): class TestBlockExpandOpCase3(TestBlockExpandOp): + def config(self): self.batch_size = 6 self.img_channels = 1 @@ -181,6 +183,7 @@ class TestBlockExpandOpCase3(TestBlockExpandOp): class TestBlockExpandOpCase4(TestBlockExpandOp): + def config(self): self.batch_size = 6 self.img_channels = 2 @@ -194,9 +197,11 @@ class TestBlockExpandOpCase4(TestBlockExpandOp): @skip_check_grad_ci( - reason="Since 'real_size' is used just in forward computation, we don't test the gradient here." + reason= + "Since 'real_size' is used just in forward computation, we don't test the gradient here." ) class TestBlockExpandOpCase5(OpTest): + def config(self): self.batch_size = 1 self.img_channels = 3 @@ -225,6 +230,7 @@ class TestBlockExpandOpCase5(OpTest): class TestBlockExpandOpCase6(TestBlockExpandOpCase5): + def config(self): self.batch_size = 3 self.img_channels = 1 @@ -240,6 +246,7 @@ class TestBlockExpandOpCase6(TestBlockExpandOpCase5): class TestBlockExpandOpCase7(TestBlockExpandOpCase6): + def config(self): self.batch_size = 2 self.img_channels = 2 diff --git a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py index 405637969af..a4404e00939 100644 --- a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py +++ b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py @@ -22,25 +22,26 @@ from paddle.fluid.framework import Program def conv_block(input, num_filter, groups, dropouts): - return nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') + return nets.img_conv_group(input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') class TestLayer(unittest.TestCase): + def test_batch_norm_layer(self): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): - images = fluid.layers.data( - name='pixel', shape=[3, 48, 48], dtype='float32') + images = fluid.layers.data(name='pixel', + shape=[3, 48, 48], + dtype='float32') hidden1 = fluid.layers.batch_norm(input=images) hidden2 = fluid.layers.fc(input=hidden1, size=128, act='relu') fluid.layers.batch_norm(input=hidden2) @@ -51,8 +52,9 @@ class TestLayer(unittest.TestCase): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): - images = fluid.layers.data( - name='pixel', shape=[3, 48, 48], dtype='float32') + images = fluid.layers.data(name='pixel', + shape=[3, 48, 48], + dtype='float32') fluid.layers.dropout(x=images, dropout_prob=0.5) print(str(main_program)) @@ -62,8 +64,9 @@ class TestLayer(unittest.TestCase): startup_program = Program() with fluid.program_guard(main_program, startup_program): - images = fluid.layers.data( - name='pixel', shape=[3, 48, 48], dtype='float32') + images = fluid.layers.data(name='pixel', + shape=[3, 48, 48], + dtype='float32') conv1 = conv_block(images, 64, 2, [0.3, 0]) conv_block(conv1, 256, 3, [0.4, 0.4, 0]) @@ -73,10 +76,12 @@ class TestLayer(unittest.TestCase): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): - image1 = fluid.layers.data( - name='pixel1', shape=[3, 48, 48], dtype='float32') - image2 = fluid.layers.data( - name='pixel2', shape=[3, 48, 48], dtype='float32') + image1 = fluid.layers.data(name='pixel1', + shape=[3, 48, 48], + dtype='float32') + image2 = fluid.layers.data(name='pixel2', + shape=[3, 48, 48], + dtype='float32') fluid.layers.elementwise_add(x=image1, y=image2, act='relu') print(main_program) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py index d200b77eea8..f06bb96ae92 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_mixed_precision.py @@ -28,6 +28,7 @@ if fluid.core.is_compiled_with_cuda(): class SimpleConv(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -36,22 +37,22 @@ class SimpleConv(fluid.dygraph.Layer): groups=1, act=None): super(SimpleConv, self).__init__() - self._conv = fluid.dygraph.Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=None, - use_cudnn=True) + self._conv = fluid.dygraph.Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=None, + use_cudnn=True) def forward(self, inputs): return self._conv(inputs) class TestAutoCast(unittest.TestCase): + def amp_guard_white_op(self): data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') with fluid.dygraph.guard(): @@ -92,31 +93,30 @@ class TestAutoCast(unittest.TestCase): tracer = fluid.framework._dygraph_tracer() base_white_list = fluid.dygraph.amp.auto_cast.WHITE_LIST base_black_list = fluid.dygraph.amp.auto_cast.BLACK_LIST - with fluid.dygraph.amp_guard( - custom_white_list=["log"], custom_black_list=["conv2d"]): + with fluid.dygraph.amp_guard(custom_white_list=["log"], + custom_black_list=["conv2d"]): white_list, black_list = tracer._get_amp_op_list() self.assertTrue( - set(white_list) == - (set(base_white_list) | {"log"}) - {"conv2d"}) + set(white_list) == (set(base_white_list) | {"log"}) - + {"conv2d"}) self.assertTrue( - set(black_list) == - (set(base_black_list) - {"log"}) | {"conv2d"}) + set(black_list) == (set(base_black_list) - {"log"}) + | {"conv2d"}) base_white_list = fluid.dygraph.amp.auto_cast.PURE_FP16_WHITE_LIST base_black_list = fluid.dygraph.amp.auto_cast.PURE_FP16_BLACK_LIST - with fluid.dygraph.amp_guard( - custom_white_list=["log"], - custom_black_list=["conv2d"], - level='O2'): + with fluid.dygraph.amp_guard(custom_white_list=["log"], + custom_black_list=["conv2d"], + level='O2'): white_list, black_list = tracer._get_amp_op_list() self.assertTrue( - set(white_list) == - (set(base_white_list) | {"log"}) - {"conv2d"}) + set(white_list) == (set(base_white_list) | {"log"}) - + {"conv2d"}) self.assertTrue( - set(black_list) == - (set(base_black_list) - {"log"}) | {"conv2d"}) + set(black_list) == (set(base_black_list) - {"log"}) + | {"conv2d"}) def test_custom_op_list(self): with _test_eager_guard(): @@ -128,15 +128,13 @@ class TestAutoCast(unittest.TestCase): def func(): with fluid.dygraph.guard(): - model = SimpleConv( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - with fluid.dygraph.amp_guard( - custom_white_list=["conv2d"], - custom_black_list=["conv2d"]): + model = SimpleConv(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + with fluid.dygraph.amp_guard(custom_white_list=["conv2d"], + custom_black_list=["conv2d"]): inp = fluid.dygraph.to_variable(inp_np) out = model(inp) @@ -177,11 +175,15 @@ class TestAutoCast(unittest.TestCase): self.amp_guard_upsupported_fp16_op() def mode_exception(self): + def func(): data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') with fluid.dygraph.guard(): - conv2d = fluid.dygraph.Conv2D( - 3, 2, 3, bias_attr=False, act=None) + conv2d = fluid.dygraph.Conv2D(3, + 2, + 3, + bias_attr=False, + act=None) data = fluid.dygraph.to_variable(data) with fluid.dygraph.amp_guard(level='O'): out = conv2d(data) @@ -195,13 +197,15 @@ class TestAutoCast(unittest.TestCase): class TestAmpScaler(unittest.TestCase): + def scale(self): with fluid.dygraph.guard(): data = paddle.rand([10, 1024]) scaler = paddle.fluid.dygraph.AmpScaler(init_loss_scaling=1024) scaled_data = scaler.scale(data) self.assertEqual( - np.array_equal(scaled_data.numpy(), data.numpy() * 1024), True) + np.array_equal(scaled_data.numpy(), + data.numpy() * 1024), True) def test_scale(self): with _test_eager_guard(): @@ -215,12 +219,11 @@ class TestAmpScaler(unittest.TestCase): paddle.seed(10) paddle.framework.random._manual_program_seed(10) with fluid.dygraph.guard(): - model = SimpleConv( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') + model = SimpleConv(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') optimizer = fluid.optimizer.SGDOptimizer( learning_rate=0.01, parameter_list=model.parameters()) scaler = fluid.dygraph.AmpScaler(init_loss_scaling=1024) @@ -232,8 +235,8 @@ class TestAmpScaler(unittest.TestCase): print('use scaler') scaled_loss = scaler.scale(loss) scaled_loss.backward() - optimize_ops, params_grads = scaler.minimize(optimizer, - scaled_loss) + optimize_ops, params_grads = scaler.minimize( + optimizer, scaled_loss) else: print('use no scaler') loss.backward() @@ -269,12 +272,11 @@ class TestAmpScaler(unittest.TestCase): paddle.seed(10) paddle.framework.random._manual_program_seed(10) with fluid.dygraph.guard(): - model = SimpleConv( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') + model = SimpleConv(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') optimizer = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) scaler = paddle.amp.GradScaler(init_loss_scaling=1024) @@ -312,12 +314,11 @@ class TestAmpScaler(unittest.TestCase): inp_np = np.random.random(size=[1, 3, 128, 128]).astype(np.float32) inp_np[0][1][2][3] = np.nan with fluid.dygraph.guard(): - model = SimpleConv( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') + model = SimpleConv(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') params_init = {} for param in model.parameters(): params_init[param.name] = param.numpy() @@ -344,6 +345,7 @@ class TestAmpScaler(unittest.TestCase): self.nan_inf() def step_update_exception(self): + def func1(): model = paddle.nn.Conv2D(3, 2, 3, bias_attr=True) optimizer = paddle.optimizer.SGD(learning_rate=0.01, @@ -396,14 +398,13 @@ class TestAmpScaler(unittest.TestCase): def test_get_and_set(self): with fluid.dygraph.guard(): - scaler = paddle.amp.GradScaler( - enable=True, - init_loss_scaling=1024, - incr_ratio=2.0, - decr_ratio=0.5, - incr_every_n_steps=1000, - decr_every_n_nan_or_inf=2, - use_dynamic_loss_scaling=True) + scaler = paddle.amp.GradScaler(enable=True, + init_loss_scaling=1024, + incr_ratio=2.0, + decr_ratio=0.5, + incr_every_n_steps=1000, + decr_every_n_nan_or_inf=2, + use_dynamic_loss_scaling=True) self.assertEqual(scaler.is_enable() == True, True) self.assertEqual(scaler.get_init_loss_scaling() == 1024, True) self.assertEqual(scaler.get_incr_ratio() == 2.0, True) @@ -424,14 +425,13 @@ class TestAmpScaler(unittest.TestCase): def test_state_dict_and_load_state_dict(self): with fluid.dygraph.guard(): - scaler1 = paddle.amp.GradScaler( - enable=True, - init_loss_scaling=14, - incr_ratio=233.0, - decr_ratio=0.523, - incr_every_n_steps=1090, - decr_every_n_nan_or_inf=20, - use_dynamic_loss_scaling=True) + scaler1 = paddle.amp.GradScaler(enable=True, + init_loss_scaling=14, + incr_ratio=233.0, + decr_ratio=0.523, + incr_every_n_steps=1090, + decr_every_n_nan_or_inf=20, + use_dynamic_loss_scaling=True) scaler_state = scaler1.state_dict() scaler2 = paddle.amp.GradScaler(enable=True) scaler2.load_state_dict(scaler_state) @@ -446,6 +446,7 @@ class TestAmpScaler(unittest.TestCase): self.assertEqual(scaler3.is_enable() == False, True) def test_state_dict_and_load_state_dict_error(self): + def test_error(): state_empty = {} scaler = paddle.amp.GradScaler(enable=True) @@ -455,6 +456,7 @@ class TestAmpScaler(unittest.TestCase): def reader_decorator(reader): + def __reader__(): for item in reader(): img = np.array(item[0]).astype('float32').reshape(3, 224, 224) @@ -465,6 +467,7 @@ def reader_decorator(reader): class TestGradScalerStateDict(unittest.TestCase): + def train_resnet(self, enable_amp=True, use_data_loader=True, @@ -478,8 +481,8 @@ class TestGradScalerStateDict(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) resnet = ResNet(use_cudnn=True) - optimizer = optimizer_setting( - train_parameters, parameter_list=resnet.parameters()) + optimizer = optimizer_setting(train_parameters, + parameter_list=resnet.parameters()) np.random.seed(seed) train_reader = paddle.batch( paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size) @@ -489,14 +492,14 @@ class TestGradScalerStateDict(unittest.TestCase): dy_param_init_value[param.name] = param.numpy() program = None - scaler = paddle.amp.GradScaler( - enable=enable_amp, init_loss_scaling=2.**10) + scaler = paddle.amp.GradScaler(enable=enable_amp, + init_loss_scaling=2.**10) if use_data_loader: - train_reader = paddle.batch( - reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True) + train_reader = paddle.batch(reader_decorator( + paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True) train_loader = fluid.io.DataLoader.from_generator( capacity=4, use_double_buffer=True, @@ -516,8 +519,8 @@ class TestGradScalerStateDict(unittest.TestCase): if len(np.array([x[1] for x in data]).astype('int64')) != batch_size: continue - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) @@ -540,8 +543,8 @@ class TestGradScalerStateDict(unittest.TestCase): for param in resnet.parameters(): if param.trainable: np_array = np.array(param._grad_ivar().value().get_tensor()) - dy_grad_value[param.name + fluid.core.grad_var_suffix( - )] = np_array + dy_grad_value[param.name + + fluid.core.grad_var_suffix()] = np_array resnet.clear_gradients() @@ -558,12 +561,15 @@ class TestGradScalerStateDict(unittest.TestCase): return dy_out, dy_param_value, dy_grad_value def test_with_state_dict(self): + def func_isinstance(): with fluid.dygraph.guard(): - out_use_state_dict = self.train_resnet( - enable_amp=True, use_data_loader=True, use_save_load=True) - out_no_state_dict = self.train_resnet( - enable_amp=True, use_data_loader=True, use_save_load=False) + out_use_state_dict = self.train_resnet(enable_amp=True, + use_data_loader=True, + use_save_load=True) + out_no_state_dict = self.train_resnet(enable_amp=True, + use_data_loader=True, + use_save_load=False) print('save_load:', out_use_state_dict[0], out_no_state_dict[0]) self.assertTrue( np.allclose(out_use_state_dict[0], out_no_state_dict[0])) @@ -574,19 +580,25 @@ class TestGradScalerStateDict(unittest.TestCase): class TestAmpDecorator(unittest.TestCase): + def test_mode_exception(self): + def func(): with fluid.dygraph.guard(): model = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None) opt = paddle.optimizer.SGD(parameters=model.parameters()) - model, opt = paddle.amp.decorate( - models=model, optimizers=opt, level='O') + model, opt = paddle.amp.decorate(models=model, + optimizers=opt, + level='O') self.assertRaises(ValueError, func) def test_input_type_exception(self): + def test_error_model(): + class MyModel(object): + def __init__(self): print("A fake Model") @@ -605,7 +617,9 @@ class TestAmpDecorator(unittest.TestCase): self.assertRaises(RuntimeError, test_error_distributed_model) def test_error_optimizer(): + class MyOptimizer(object): + def __init__(self): print("A fake Optimizer") @@ -618,51 +632,48 @@ class TestAmpDecorator(unittest.TestCase): def test_set_master_weight(self): model1 = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None) - opt1 = paddle.optimizer.Adam( - learning_rate=0.0001, - parameters=model1.parameters(), - multi_precision=True) + opt1 = paddle.optimizer.Adam(learning_rate=0.0001, + parameters=model1.parameters(), + multi_precision=True) model2 = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None) - opt2 = paddle.optimizer.Adam( - learning_rate=0.0001, - parameters=model2.parameters(), - multi_precision=False) - - model1, opt1 = paddle.amp.decorate( - models=model1, optimizers=opt1, level='O2', master_weight=None) + opt2 = paddle.optimizer.Adam(learning_rate=0.0001, + parameters=model2.parameters(), + multi_precision=False) + + model1, opt1 = paddle.amp.decorate(models=model1, + optimizers=opt1, + level='O2', + master_weight=None) self.assertEqual(opt1._multi_precision, True) - models, opt2 = paddle.amp.decorate( - models=[model1, model2], - optimizers=opt2, - level='O2', - master_weight=None) + models, opt2 = paddle.amp.decorate(models=[model1, model2], + optimizers=opt2, + level='O2', + master_weight=None) self.assertEqual(opt2._multi_precision, True) model3 = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None) - opt3 = paddle.optimizer.Adam( - learning_rate=0.0001, parameters=model3.parameters()) + opt3 = paddle.optimizer.Adam(learning_rate=0.0001, + parameters=model3.parameters()) model4 = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None) - opt4 = paddle.optimizer.Adam( - learning_rate=0.0001, parameters=model4.parameters()) - - model3, opts = paddle.amp.decorate( - models=model3, - optimizers=[opt3, opt4], - level='O2', - master_weight=True) + opt4 = paddle.optimizer.Adam(learning_rate=0.0001, + parameters=model4.parameters()) + + model3, opts = paddle.amp.decorate(models=model3, + optimizers=[opt3, opt4], + level='O2', + master_weight=True) self.assertEqual(opts[0]._multi_precision, True) self.assertEqual(opts[1]._multi_precision, True) models = [model3, model4] optimizers = [opt3, opt4] - models, optimizers = paddle.amp.decorate( - models=models, - optimizers=optimizers, - level='O2', - master_weight=False) + models, optimizers = paddle.amp.decorate(models=models, + optimizers=optimizers, + level='O2', + master_weight=False) self.assertEqual(optimizers[0]._multi_precision, False) self.assertEqual(optimizers[1]._multi_precision, False) @@ -694,13 +705,17 @@ class TestAmpDecorator(unittest.TestCase): class TestPureFp16SaveLoad(unittest.TestCase): + def test_save_dtype_exception(self): + def func(): paddle.disable_static() model = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None) opt = paddle.optimizer.SGD(parameters=model.parameters()) - paddle.amp.decorate( - models=model, optimizers=opt, level='O2', save_dtype='int') + paddle.amp.decorate(models=model, + optimizers=opt, + level='O2', + save_dtype='int') self.assertRaises(ValueError, func) @@ -717,8 +732,8 @@ class TestPureFp16SaveLoad(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) resnet = ResNet(use_cudnn=True) - optimizer = optimizer_setting( - train_parameters, parameter_list=resnet.parameters()) + optimizer = optimizer_setting(train_parameters, + parameter_list=resnet.parameters()) np.random.seed(seed) train_reader = paddle.batch( paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size) @@ -728,14 +743,14 @@ class TestPureFp16SaveLoad(unittest.TestCase): dy_param_init_value[param.name] = param.numpy() program = None - scaler = paddle.amp.GradScaler( - enable=enable_amp, init_loss_scaling=2.**10) + scaler = paddle.amp.GradScaler(enable=enable_amp, + init_loss_scaling=2.**10) if use_data_loader: - train_reader = paddle.batch( - reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True) + train_reader = paddle.batch(reader_decorator( + paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True) train_loader = fluid.io.DataLoader.from_generator( capacity=4, use_double_buffer=True, @@ -745,11 +760,10 @@ class TestPureFp16SaveLoad(unittest.TestCase): train_reader = train_loader if enable_amp: - resnet, optimizer = paddle.amp.decorate( - models=resnet, - optimizers=optimizer, - level='O2', - save_dtype='float32') + resnet, optimizer = paddle.amp.decorate(models=resnet, + optimizers=optimizer, + level='O2', + save_dtype='float32') for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: @@ -762,8 +776,8 @@ class TestPureFp16SaveLoad(unittest.TestCase): if len(np.array([x[1] for x in data]).astype('int64')) != batch_size: continue - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) @@ -787,8 +801,8 @@ class TestPureFp16SaveLoad(unittest.TestCase): for param in resnet.parameters(): if param.trainable: np_array = np.array(param._grad_ivar().value().get_tensor()) - dy_grad_value[param.name + fluid.core.grad_var_suffix( - )] = np_array + dy_grad_value[param.name + + fluid.core.grad_var_suffix()] = np_array resnet.clear_gradients() @@ -813,23 +827,25 @@ class TestPureFp16SaveLoad(unittest.TestCase): resnet.set_state_dict(obj_load['model']) optimizer.set_state_dict(obj_load['opt']) scaler.load_state_dict(obj_load['scaler']) - resnet, optimizer = paddle.amp.decorate( - models=resnet, - optimizers=optimizer, - level='O2', - save_dtype='float32') + resnet, optimizer = paddle.amp.decorate(models=resnet, + optimizers=optimizer, + level='O2', + save_dtype='float32') if use_data_loader: train_reader._reset() return dy_out, dy_param_value, dy_grad_value def test_with_save_load(self): + def func_isinstance(): with fluid.dygraph.guard(): - out_use_save_load = self.train_resnet( - enable_amp=True, use_data_loader=True, use_save_load=True) - out_no_save_load = self.train_resnet( - enable_amp=True, use_data_loader=True, use_save_load=False) + out_use_save_load = self.train_resnet(enable_amp=True, + use_data_loader=True, + use_save_load=True) + out_no_save_load = self.train_resnet(enable_amp=True, + use_data_loader=True, + use_save_load=False) print('save_load:', out_use_save_load[0], out_no_save_load[0]) self.assertTrue( np.allclose(out_use_save_load[0], out_no_save_load[0])) @@ -840,6 +856,7 @@ class TestPureFp16SaveLoad(unittest.TestCase): class TestPureFp16InferenceSaveLoad(unittest.TestCase): + def inference_save_load(self): BATCH_SIZE = 16 BATCH_NUM = 4 @@ -849,6 +866,7 @@ class TestPureFp16InferenceSaveLoad(unittest.TestCase): # define a random dataset class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -862,6 +880,7 @@ class TestPureFp16InferenceSaveLoad(unittest.TestCase): return self.num_samples class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) @@ -872,11 +891,10 @@ class TestPureFp16InferenceSaveLoad(unittest.TestCase): def train(layer, loader, loss_fn, opt): for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): - with paddle.amp.auto_cast( - enable=True, - custom_white_list=None, - custom_black_list=None, - level='O2'): + with paddle.amp.auto_cast(enable=True, + custom_white_list=None, + custom_black_list=None, + level='O2'): out = layer(image) loss = loss_fn(out, label) loss.backward() @@ -885,28 +903,27 @@ class TestPureFp16InferenceSaveLoad(unittest.TestCase): # train layer = LinearNet() - adam = paddle.optimizer.Adam( - learning_rate=0.001, - parameters=layer.parameters(), - multi_precision=True) + adam = paddle.optimizer.Adam(learning_rate=0.001, + parameters=layer.parameters(), + multi_precision=True) loss_fn = nn.CrossEntropyLoss() - layer, adam = paddle.amp.decorate( - models=layer, optimizers=adam, save_dtype='float32') + layer, adam = paddle.amp.decorate(models=layer, + optimizers=adam, + save_dtype='float32') dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) - loader = paddle.io.DataLoader( - dataset, - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=2) + loader = paddle.io.DataLoader(dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) train(layer, loader, loss_fn, adam) - # save + # save path = "example_model/linear" - paddle.jit.save( - layer, path, input_spec=[InputSpec( - shape=[IMAGE_SIZE], name='x')]) + paddle.jit.save(layer, + path, + input_spec=[InputSpec(shape=[IMAGE_SIZE], name='x')]) # jit.load loaded_layer = paddle.jit.load(path) @@ -920,8 +937,8 @@ class TestPureFp16InferenceSaveLoad(unittest.TestCase): # load_inference_model paddle.enable_static() exe = paddle.static.Executor() - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.load_inference_model(path, exe)) + [inference_program, feed_target_names, + fetch_targets] = (paddle.static.load_inference_model(path, exe)) tensor_img = x results = exe.run(inference_program, feed={feed_target_names[0]: tensor_img}, @@ -970,15 +987,18 @@ class TestResnet2(unittest.TestCase): # NOTE(zhiqiu): The Membership test operations(in / not in) calls "is" and "equal", # see details: https://docs.python.org/3/reference/expressions.html#membership-test-operations. # So do not use other_params = [p for p in resnet.parameters() if p not in conv_params] - optimizer = paddle.optimizer.Momentum( - parameters=[{ - 'params': conv_params, - 'learning_rate': 0.01 - }, { - 'params': other_params, - 'learning_rate': 0.001 - }], - multi_precision=True) + optimizer = paddle.optimizer.Momentum(parameters=[{ + 'params': + conv_params, + 'learning_rate': + 0.01 + }, { + 'params': + other_params, + 'learning_rate': + 0.001 + }], + multi_precision=True) else: optimizer = paddle.optimizer.SGD(parameters=resnet.parameters()) @@ -991,14 +1011,14 @@ class TestResnet2(unittest.TestCase): dy_param_init_value[param.name] = param.numpy() program = None - scaler = paddle.amp.GradScaler( - enable=enable_amp, init_loss_scaling=2.**10) + scaler = paddle.amp.GradScaler(enable=enable_amp, + init_loss_scaling=2.**10) if use_data_loader: - train_reader = paddle.batch( - reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True) + train_reader = paddle.batch(reader_decorator( + paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True) train_loader = fluid.io.DataLoader.from_generator( capacity=4, use_double_buffer=True, @@ -1016,13 +1036,13 @@ class TestResnet2(unittest.TestCase): if use_data_loader: img, label = data else: - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') + dy_x_data = np.array([x[0].reshape(3, 224, 224) + for x in data]).astype('float32') if len(np.array([x[1] for x in data]).astype('int64')) != batch_size: continue - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - -1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) @@ -1047,8 +1067,8 @@ class TestResnet2(unittest.TestCase): for param in resnet.parameters(): if param.trainable: np_array = np.array(param._grad_ivar().value().get_tensor()) - dy_grad_value[param.name + fluid.core.grad_var_suffix( - )] = np_array + dy_grad_value[param.name + + fluid.core.grad_var_suffix()] = np_array resnet.clear_gradients() @@ -1060,6 +1080,7 @@ class TestResnet2(unittest.TestCase): return dy_out, dy_param_value, dy_grad_value def test_resnet(self): + def func_isinstance(): with fluid.dygraph.guard(): out_fp32 = self.train_resnet(enable_amp=False) @@ -1068,51 +1089,50 @@ class TestResnet2(unittest.TestCase): print(out_fp32[0], out_amp[0], out_pure_fp16[0]) self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-5)) self.assertTrue( - np.allclose( - out_fp32[0], out_pure_fp16[0], atol=1.e-2)) + np.allclose(out_fp32[0], out_pure_fp16[0], atol=1.e-2)) with _test_eager_guard(): func_isinstance() func_isinstance() def test_with_data_loader(self): + def func_isinstance(): with fluid.dygraph.guard(): - out_fp32 = self.train_resnet( - enable_amp=False, use_data_loader=True) - out_amp = self.train_resnet( - enable_amp=True, use_data_loader=True) - out_pure_fp16 = self.train_resnet( - enable_amp=True, use_data_loader=True, level='O2') + out_fp32 = self.train_resnet(enable_amp=False, + use_data_loader=True) + out_amp = self.train_resnet(enable_amp=True, + use_data_loader=True) + out_pure_fp16 = self.train_resnet(enable_amp=True, + use_data_loader=True, + level='O2') print(out_fp32[0], out_amp[0], out_pure_fp16[0]) self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-5)) self.assertTrue( - np.allclose( - out_fp32[0], out_pure_fp16[0], atol=1.e-2)) + np.allclose(out_fp32[0], out_pure_fp16[0], atol=1.e-2)) with _test_eager_guard(): func_isinstance() func_isinstance() def test_param_group(self): + def func_isinstance(): with fluid.dygraph.guard(): - out_fp32 = self.train_resnet( - enable_amp=False, - use_data_loader=True, - use_param_group=True) - out_amp = self.train_resnet( - enable_amp=True, use_data_loader=True, use_param_group=True) - out_pure_fp16 = self.train_resnet( - enable_amp=True, - use_data_loader=True, - use_param_group=True, - level='O2') + out_fp32 = self.train_resnet(enable_amp=False, + use_data_loader=True, + use_param_group=True) + out_amp = self.train_resnet(enable_amp=True, + use_data_loader=True, + use_param_group=True) + out_pure_fp16 = self.train_resnet(enable_amp=True, + use_data_loader=True, + use_param_group=True, + level='O2') print(out_fp32[0], out_amp[0], out_pure_fp16[0]) self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-5)) self.assertTrue( - np.allclose( - out_fp32[0], out_pure_fp16[0], atol=1.e-2)) + np.allclose(out_fp32[0], out_pure_fp16[0], atol=1.e-2)) with _test_eager_guard(): func_isinstance() @@ -1135,8 +1155,8 @@ class TestResnet(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) resnet = ResNet(use_cudnn=True) - optimizer = optimizer_setting( - train_parameters, parameter_list=resnet.parameters()) + optimizer = optimizer_setting(train_parameters, + parameter_list=resnet.parameters()) optimizer = paddle.optimizer.Momentum( parameters=resnet.parameters(), multi_precision=True) np.random.seed(seed) @@ -1149,8 +1169,8 @@ class TestResnet(unittest.TestCase): dy_param_init_value[param.name] = param.numpy() program = None - scaler = paddle.fluid.dygraph.AmpScaler( - enable=enable_amp, init_loss_scaling=2.**10) + scaler = paddle.fluid.dygraph.AmpScaler(enable=enable_amp, + init_loss_scaling=2.**10) if enable_amp and (level == 'O2'): resnet, optimizer = paddle.fluid.dygraph.amp_decorate( @@ -1159,18 +1179,18 @@ class TestResnet(unittest.TestCase): for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: break - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') + dy_x_data = np.array([x[0].reshape(3, 224, 224) + for x in data]).astype('float32') if len(np.array([x[1] for x in data]).astype('int64')) != batch_size: continue - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - -1, 1) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label.stop_gradient = True - with paddle.fluid.dygraph.amp_guard( - enable=enable_amp, level=level): + with paddle.fluid.dygraph.amp_guard(enable=enable_amp, + level=level): out = resnet(img) loss = fluid.layers.cross_entropy(input=out, label=label) @@ -1186,10 +1206,10 @@ class TestResnet(unittest.TestCase): dy_grad_value = {} for param in resnet.parameters(): if param.trainable: - np_array = np.array(param._grad_ivar().value() - .get_tensor()) - dy_grad_value[param.name + fluid.core.grad_var_suffix( - )] = np_array + np_array = np.array( + param._grad_ivar().value().get_tensor()) + dy_grad_value[param.name + + fluid.core.grad_var_suffix()] = np_array resnet.clear_gradients() @@ -1200,6 +1220,7 @@ class TestResnet(unittest.TestCase): return dy_out, dy_param_value, dy_grad_value def test_resnet(self): + def func_isinstance(): out_fp32 = self.train_resnet(enable_amp=False) out_amp = self.train_resnet(enable_amp=True) @@ -1207,8 +1228,7 @@ class TestResnet(unittest.TestCase): print(out_fp32[0], out_amp[0], out_pure_fp16[0]) self.assertTrue(np.allclose(out_fp32[0], out_amp[0], atol=1.e-2)) self.assertTrue( - np.allclose( - out_fp32[0], out_pure_fp16[0], atol=1.e-1)) + np.allclose(out_fp32[0], out_pure_fp16[0], atol=1.e-1)) with _test_eager_guard(): func_isinstance() @@ -1221,6 +1241,7 @@ class TestLayerNormFp16(unittest.TestCase): ''' def test_layer_norm_fp16(self): + def func_isinstance(): if fluid.is_compiled_with_cuda(): with fluid.dygraph.guard(fluid.CUDAPlace(0)): @@ -1246,13 +1267,15 @@ class TestBf16(unittest.TestCase): paddle.seed(100) input = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) conv = paddle.nn.Conv2D(4, 6, (3, 3)) - with paddle.amp.auto_cast( - enable=enable_amp, level=amp_level, dtype='bfloat16'): + with paddle.amp.auto_cast(enable=enable_amp, + level=amp_level, + dtype='bfloat16'): output = conv(input) output = output.cast('float32') return output.numpy() def test_bf16(self): + def func_isinstance(): if fluid.core.is_compiled_with_cuda( ) and fluid.core.is_bfloat16_supported(paddle.CUDAPlace(0)): @@ -1260,11 +1283,9 @@ class TestBf16(unittest.TestCase): out_bf16_O1 = self.train(enable_amp=True, amp_level='O1') out_bf16_O2 = self.train(enable_amp=True, amp_level='O2') self.assertTrue( - np.allclose( - out_fp32, out_bf16_O1, rtol=1.e-3, atol=1.e-1)) + np.allclose(out_fp32, out_bf16_O1, rtol=1.e-3, atol=1.e-1)) self.assertTrue( - np.allclose( - out_fp32, out_bf16_O2, rtol=1.e-3, atol=1.e-1)) + np.allclose(out_fp32, out_bf16_O2, rtol=1.e-3, atol=1.e-1)) with _test_eager_guard(): func_isinstance() @@ -1272,8 +1293,11 @@ class TestBf16(unittest.TestCase): class TestAmpWithPyLyer(unittest.TestCase): + def test_pylayer(self): + class MyMM(PyLayer): + @staticmethod def forward(ctx, a, b): ctx.save_for_backward(a, b) @@ -1298,7 +1322,9 @@ class TestAmpWithPyLyer(unittest.TestCase): class TestAmpWithHook(unittest.TestCase): + def test_hook_change_dtype(self): + def func_isinstance(): with paddle.fluid.dygraph.guard(): v = paddle.rand([3, 3]) @@ -1321,6 +1347,7 @@ class TestAmpWithHook(unittest.TestCase): func_isinstance() def test_hook_change_place(self): + def func_isinstance(): with paddle.fluid.dygraph.guard(): v = paddle.rand([3, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index ffc5baf5cd2..4dee7cf9633 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -19,6 +19,7 @@ from paddle.fluid.framework import _test_eager_guard class AutoPruneLayer0(fluid.Layer): + def __init__(self, input_size): super(AutoPruneLayer0, self).__init__() self.linear1 = fluid.dygraph.Linear( @@ -41,6 +42,7 @@ class AutoPruneLayer0(fluid.Layer): class AutoPruneLayer1(fluid.Layer): + def __init__(self, input_size): super(AutoPruneLayer1, self).__init__() self.linear1 = fluid.dygraph.Linear( @@ -64,6 +66,7 @@ class AutoPruneLayer1(fluid.Layer): class AutoPruneLayer2(fluid.Layer): + def __init__(self, input_size): super(AutoPruneLayer2, self).__init__() self.linear = fluid.dygraph.Linear(input_size, 10, act=None) @@ -81,14 +84,16 @@ class AutoPruneLayer2(fluid.Layer): class AutoPruneLayer3(fluid.Layer): + def __init__(self, input_size): super(AutoPruneLayer3, self).__init__() self.linear = fluid.dygraph.Linear(input_size, 20, act=None) def forward(self, x, label, test_num): feature = self.linear(x) - part1, part2 = fluid.layers.split( - feature, num_or_sections=[10, 10], dim=1) + part1, part2 = fluid.layers.split(feature, + num_or_sections=[10, 10], + dim=1) # Note that: part2 is not used. loss = fluid.layers.cross_entropy(input=part1, label=label) loss = fluid.layers.mean(loss) @@ -99,6 +104,7 @@ class AutoPruneLayer3(fluid.Layer): class MyLayer(fluid.Layer): + def __init__(self, input_size, vocab_size, size, dtype="float32"): super(MyLayer, self).__init__(dtype=dtype) self.embed0 = fluid.Embedding(size=(vocab_size, size)) @@ -121,6 +127,7 @@ class MyLayer(fluid.Layer): class MyLayer2(fluid.Layer): + def __init__(self, input_size, vocab_size, size, dtype="float32"): super(MyLayer2, self).__init__(dtype=dtype) self.embed0 = fluid.Embedding(size=(vocab_size, size)) @@ -132,8 +139,8 @@ class MyLayer2(fluid.Layer): # mind the difference with MyLayer # In this example, the forward method involes all params loss = fluid.layers.reduce_mean( - self.linear_0(self.embed0(indices)) + self.linear_1( - self.embed1(indices))) + self.linear_0(self.embed0(indices)) + + self.linear_1(self.embed1(indices))) return loss def linear0(self, x): @@ -146,6 +153,7 @@ class MyLayer2(fluid.Layer): class TestImperativeAutoPrune(unittest.TestCase): + def func_auto_prune(self): with fluid.dygraph.guard(): case1 = AutoPruneLayer0(input_size=5) @@ -337,8 +345,8 @@ class TestImperativeAutoPrune(unittest.TestCase): optimizer.minimize(out2) self.assertTrue( np.array_equal(linear2_origin, linear2.weight.numpy())) - self.assertTrue( - np.array_equal(linear_origin, linear.weight.numpy())) + self.assertTrue(np.array_equal(linear_origin, + linear.weight.numpy())) try: linear2.weight.gradient() except ValueError as e: @@ -379,8 +387,8 @@ class TestImperativeAutoPrune(unittest.TestCase): size = 20 batch_size = 16 - indices = np.random.randint( - low=0, high=100, size=(batch_size, 1)).astype("int64") + indices = np.random.randint(low=0, high=100, + size=(batch_size, 1)).astype("int64") embed = np.random.randn(batch_size, size).astype("float32") place = fluid.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index c129f0756cc..e67bae46a53 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -28,6 +28,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph, _non_s class MyLayer(fluid.Layer): + def __init__(self): super(MyLayer, self).__init__() @@ -40,22 +41,23 @@ class MyLayer(fluid.Layer): class MLP(fluid.Layer): + def __init__(self, input_size): super(MLP, self).__init__() self._linear1 = Linear( input_size, 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1))) self._linear2 = Linear( 3, 4, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1))) def forward(self, inputs): x = self._linear1(inputs) @@ -65,6 +67,7 @@ class MLP(fluid.Layer): class SimpleRNNCell(fluid.Layer): + def __init__(self, step_input_size, hidden_size, output_size, param_attr): super(SimpleRNNCell, self).__init__() self.step_input_size = step_input_size @@ -77,21 +80,18 @@ class SimpleRNNCell(fluid.Layer): h2h_param_shape = [self.hidden_size, self.hidden_size] h2o_param_shape = [self.output_size, self.hidden_size] self._i2h_w = None - self._i2h_w = self.create_parameter( - attr=self.param_attr, - shape=i2h_param_shape, - dtype=self._dtype, - is_bias=False) - self._h2h_w = self.create_parameter( - attr=self.param_attr, - shape=h2h_param_shape, - dtype=self._dtype, - is_bias=False) - self._h2o_w = self.create_parameter( - attr=self.param_attr, - shape=h2o_param_shape, - dtype=self._dtype, - is_bias=False) + self._i2h_w = self.create_parameter(attr=self.param_attr, + shape=i2h_param_shape, + dtype=self._dtype, + is_bias=False) + self._h2h_w = self.create_parameter(attr=self.param_attr, + shape=h2h_param_shape, + dtype=self._dtype, + is_bias=False) + self._h2o_w = self.create_parameter(attr=self.param_attr, + shape=h2o_param_shape, + dtype=self._dtype, + is_bias=False) def forward(self, input, pre_hidden): tmp_i2h = paddle.fluid.layers.nn.mul(input, self._i2h_w) @@ -105,29 +105,29 @@ class SimpleRNNCell(fluid.Layer): class SimpleRNN(fluid.Layer): + def __init__(self): super(SimpleRNN, self).__init__() self.seq_len = 4 self._cell = SimpleRNNCell( - 3, - 3, - 3, + 3, 3, 3, fluid.ParamAttr(initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): outs = list() pre_hiddens = list() - init_hidden = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - shape=[1, 3], - dtype='float32', - is_bias=False) + init_hidden = self.create_parameter(attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + shape=[1, 3], + dtype='float32', + is_bias=False) pre_hidden = init_hidden for i in range(self.seq_len): - input = fluid.layers.slice( - inputs, axes=[1], starts=[i], ends=[i + 1]) + input = fluid.layers.slice(inputs, + axes=[1], + starts=[i], + ends=[i + 1]) input = fluid.layers.reshape(input, shape=[1, 3]) out_softmax, pre_hidden = self._cell(input, pre_hidden) outs.append(out_softmax) @@ -136,6 +136,7 @@ class SimpleRNN(fluid.Layer): class TestImperative(unittest.TestCase): + def functional_dygraph_context(self): self.assertFalse(fluid.dygraph.enabled()) fluid.enable_dygraph() @@ -220,8 +221,8 @@ class TestImperative(unittest.TestCase): t = fluid.Tensor() t.set(x, fluid.CPUPlace()) if not _in_legacy_dygraph(): - egr_tmp = fluid.core.eager.Tensor( - value=x, place=fluid.core.CPUPlace()) + egr_tmp = fluid.core.eager.Tensor(value=x, + place=fluid.core.CPUPlace()) egr_tmp2 = fluid.core.eager.Tensor(y, fluid.core.CPUPlace()) egr_tmp3 = paddle.to_tensor(x) egr_tmp4 = fluid.core.eager.Tensor(y) @@ -359,11 +360,13 @@ class TestImperative(unittest.TestCase): cur_block = cur_program.current_block() # Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good. if _in_legacy_dygraph(): - new_variable = cur_block.create_var( - name="X", shape=[-1, 23, 48], dtype='float32') + new_variable = cur_block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') else: - new_variable = cur_block.create_var( - name="X", shape=[1, 23, 48], dtype='float32') + new_variable = cur_block.create_var(name="X", + shape=[1, 23, 48], + dtype='float32') try: new_variable.numpy() except Exception as e: @@ -398,11 +401,13 @@ class TestImperative(unittest.TestCase): cur_block = cur_program.current_block() # Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good. if _in_legacy_dygraph(): - new_variable = cur_block.create_var( - name="X", shape=[-1, 23, 48], dtype='float32') + new_variable = cur_block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') else: - new_variable = cur_block.create_var( - name="X", shape=[1, 23, 48], dtype='float32') + new_variable = cur_block.create_var(name="X", + shape=[1, 23, 48], + dtype='float32') try: new_variable.gradient() except Exception as e: @@ -460,8 +465,9 @@ class TestImperative(unittest.TestCase): dy_grad2 = l2._x_for_debug.gradient() with new_program_scope(): - inp = fluid.layers.data( - name="inp", shape=[3], append_batch_size=False) + inp = fluid.layers.data(name="inp", + shape=[3], + append_batch_size=False) l = MyLayer() x = l(inp)[0] param_grads = fluid.backward.append_backward( @@ -505,8 +511,9 @@ class TestImperative(unittest.TestCase): dy_grad2 = mlp2._linear1.weight.gradient() with new_program_scope(): - inp = fluid.layers.data( - name="inp", shape=[2, 2], append_batch_size=False) + inp = fluid.layers.data(name="inp", + shape=[2, 2], + append_batch_size=False) mlp = MLP(input_size=2) out = mlp(inp) param_grads = fluid.backward.append_backward( @@ -542,6 +549,7 @@ class TestImperative(unittest.TestCase): self.func_mlp() def test_gradient_accumulation(self): + def test_single_api(sort_sum_gradient): fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) x = paddle.to_tensor(5., stop_gradient=False) @@ -583,7 +591,7 @@ class TestImperative(unittest.TestCase): loss = fun(x, y, z) loss.backward(retain_graph=True) - # x.grad = 2*x*y + z + 2*y = 27 + # x.grad = 2*x*y + z + 2*y = 27 self.assertTrue(np.array_equal(x.grad.numpy(), [27])) loss.backward(retain_graph=True) @@ -621,14 +629,14 @@ class TestImperative(unittest.TestCase): detach_x = x.detach() clear_loss = mlp2(detach_x) clear_loss.backward() - expected_weight1_grad = ( - expected_weight1_grad + mlp2._linear1.weight.grad.numpy()) - expected_bias1_grad = ( - expected_bias1_grad + mlp2._linear1.bias.grad.numpy()) - expected_weight2_grad = ( - expected_weight2_grad + mlp2._linear2.weight.grad.numpy()) - expected_bias2_grad = ( - expected_bias2_grad + mlp2._linear2.bias.grad.numpy()) + expected_weight1_grad = (expected_weight1_grad + + mlp2._linear1.weight.grad.numpy()) + expected_bias1_grad = (expected_bias1_grad + + mlp2._linear1.bias.grad.numpy()) + expected_weight2_grad = (expected_weight2_grad + + mlp2._linear2.weight.grad.numpy()) + expected_bias2_grad = (expected_bias2_grad + + mlp2._linear2.bias.grad.numpy()) loss = mlp1(x) loss.backward() @@ -689,17 +697,19 @@ class TestImperative(unittest.TestCase): # static graph with new_program_scope(): - inp_data1 = fluid.layers.data( - name='inp1', shape=[3, 3], dtype=np.float32) - inp_data2 = fluid.layers.data( - name='inp2', shape=[3, 3], dtype=np.float32) + inp_data1 = fluid.layers.data(name='inp1', + shape=[3, 3], + dtype=np.float32) + inp_data2 = fluid.layers.data(name='inp2', + shape=[3, 3], + dtype=np.float32) a = fluid.layers.expand( - fluid.layers.reshape( - fluid.layers.reduce_sum(inp_data1), [1, 1]), [4, 1]) + fluid.layers.reshape(fluid.layers.reduce_sum(inp_data1), + [1, 1]), [4, 1]) b = fluid.layers.expand( - fluid.layers.reshape( - fluid.layers.reduce_sum(inp_data2), [1, 1]), [4, 1]) + fluid.layers.reshape(fluid.layers.reduce_sum(inp_data2), + [1, 1]), [4, 1]) cond = fluid.layers.less_than(x=a, y=b) ie = fluid.layers.IfElse(cond) @@ -719,8 +729,10 @@ class TestImperative(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) static_result = exe.run(fluid.default_main_program(), - feed={'inp1': np_inp1, - 'inp2': np_inp2}, + feed={ + 'inp1': np_inp1, + 'inp2': np_inp2 + }, fetch_list=out)[0] self.assertTrue(np.allclose(dygraph_result, static_result)) @@ -758,8 +770,9 @@ class TestImperative(unittest.TestCase): dy_grad_i2h2 = simple_rnn2._cell._i2h_w.gradient() with new_program_scope(): - inp = fluid.layers.data( - name="inp", shape=[1, 4, 3], append_batch_size=False) + inp = fluid.layers.data(name="inp", + shape=[1, 4, 3], + append_batch_size=False) simple_rnn = SimpleRNN() outs, pre_hiddens = simple_rnn(inp) param_grads = fluid.backward.append_backward(outs[3]) @@ -813,6 +826,7 @@ class TestImperative(unittest.TestCase): class TestDygraphUtils(unittest.TestCase): + def func_append_activation_in_dygraph_exception(self): with new_program_scope(): np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32) @@ -870,8 +884,9 @@ class TestDygraphUtils(unittest.TestCase): def func_append_activation_in_dygraph_use_mkldnn(self): a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) - helper = LayerHelper( - fluid.unique_name.generate("test"), act="relu", use_mkldnn=True) + helper = LayerHelper(fluid.unique_name.generate("test"), + act="relu", + use_mkldnn=True) func = helper.append_activation with fluid.dygraph.guard(): a = paddle.to_tensor(a_np) @@ -931,6 +946,7 @@ class TestDygraphUtils(unittest.TestCase): class TestDygraphGuardWithError(unittest.TestCase): + def func_without_guard(self): with fluid.dygraph.guard(): x = paddle.to_tensor(np.zeros([10, 10])) @@ -945,6 +961,7 @@ class TestDygraphGuardWithError(unittest.TestCase): class TestMetaclass(unittest.TestCase): + def func_metaclass(self): self.assertEqual(type(MyLayer).__name__, 'type') self.assertNotEqual(type(MyLayer).__name__, 'pybind11_type') diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py b/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py index d624495f71d..fd6c5f33119 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class TestLayerDict(unittest.TestCase): + def func_layer_dict(self): layers = OrderedDict([ ('conv1d', paddle.nn.Conv1D(3, 2, 3)), diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py index cf7fc9ba96b..18e0bff411c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class MyLayer(fluid.Layer): + def __init__(self, layerlist): super(MyLayer, self).__init__() self.layerlist = layerlist @@ -33,6 +34,7 @@ class MyLayer(fluid.Layer): class TestImperativeContainer(unittest.TestCase): + def fluid_dygraph_list(self): return fluid.dygraph.LayerList( [fluid.dygraph.Linear(2**i, 2**(i + 1)) for i in range(6)]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py index 349f18fe799..97101c619ce 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class MyLayer(fluid.Layer): + def __init__(self, num_stacked_param, use_fluid_api): super(MyLayer, self).__init__() # create ParameterList with iterable Parameters @@ -33,13 +34,13 @@ class MyLayer(fluid.Layer): def fluid_dygraph_ParameterList(self, num_stacked_param): return fluid.dygraph.ParameterList( - [fluid.layers.create_parameter( - shape=[2, 2], dtype='float32')] * num_stacked_param) + [fluid.layers.create_parameter(shape=[2, 2], dtype='float32')] * + num_stacked_param) def paddle_imperative_ParameterList(self, num_stacked_param): return paddle.nn.ParameterList( - [fluid.layers.create_parameter( - shape=[2, 2], dtype='float32')] * num_stacked_param) + [fluid.layers.create_parameter(shape=[2, 2], dtype='float32')] * + num_stacked_param) def forward(self, x): for i, p in enumerate(self.params): @@ -48,6 +49,7 @@ class MyLayer(fluid.Layer): class TestImperativeContainerParameterList(unittest.TestCase): + def paramter_list(self, use_fluid_api): data_np = np.random.uniform(-1, 1, [5, 2]).astype('float32') with fluid.dygraph.guard(): @@ -65,8 +67,7 @@ class TestImperativeContainerParameterList(unittest.TestCase): res = model(x) self.assertListEqual(res.shape, [5, 3]) model.params.append( - fluid.layers.create_parameter( - shape=[3, 4], dtype='float32')) + fluid.layers.create_parameter(shape=[3, 4], dtype='float32')) self.assertEqual(len(model.params), num_stacked_param + 1) res = model(x) self.assertListEqual(res.shape, [5, 4]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py index dcf4e8de5e4..dc0ce696447 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py @@ -21,12 +21,13 @@ from paddle.fluid.framework import _test_eager_guard class TestImperativeContainerSequential(unittest.TestCase): + def func_sequential(self): data = np.random.uniform(-1, 1, [5, 10]).astype('float32') with fluid.dygraph.guard(): data = fluid.dygraph.to_variable(data) - model1 = fluid.dygraph.Sequential( - fluid.Linear(10, 1), fluid.Linear(1, 2)) + model1 = fluid.dygraph.Sequential(fluid.Linear(10, 1), + fluid.Linear(1, 2)) res1 = model1(data) self.assertListEqual(res1.shape, [5, 2]) model1[1] = fluid.Linear(1, 3) @@ -65,8 +66,8 @@ class TestImperativeContainerSequential(unittest.TestCase): data = np.random.uniform(-1, 1, [5, 10]).astype('float32') with fluid.dygraph.guard(): data = fluid.dygraph.to_variable(data) - model1 = fluid.dygraph.Sequential( - fluid.Linear(10, 1), fluid.Linear(1, 2)) + model1 = fluid.dygraph.Sequential(fluid.Linear(10, 1), + fluid.Linear(1, 2)) res1 = model1(data) self.assertListEqual(res1.shape, [5, 2]) model1[1] = fluid.Linear(1, 3) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py index 6f0876dcfc3..4ef5f423d4d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py @@ -28,6 +28,7 @@ def get_random_images_and_labels(image_shape, label_shape): def sample_generator_creator(batch_size, batch_num): + def __reader__(): for _ in range(batch_num * batch_size): image, label = get_random_images_and_labels([784], [1]) @@ -37,6 +38,7 @@ def sample_generator_creator(batch_size, batch_num): class TestDygraphDataLoader(unittest.TestCase): + def setUp(self): self.batch_size = 8 self.batch_num = 4 @@ -53,12 +55,13 @@ class TestDygraphDataLoader(unittest.TestCase): def func_test_single_process_loader(self): with fluid.dygraph.guard(): - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, iterable=False, use_multiprocess=False) - loader.set_sample_generator( - sample_generator_creator(self.batch_size, self.batch_num), - batch_size=self.batch_size, - places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + iterable=False, + use_multiprocess=False) + loader.set_sample_generator(sample_generator_creator( + self.batch_size, self.batch_num), + batch_size=self.batch_size, + places=fluid.CPUPlace()) self.iter_loader_data(loader) def test_single_process_loader(self): @@ -68,12 +71,12 @@ class TestDygraphDataLoader(unittest.TestCase): def func_test_multi_process_loader(self): with fluid.dygraph.guard(): - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, use_multiprocess=True) - loader.set_sample_generator( - sample_generator_creator(self.batch_size, self.batch_num), - batch_size=self.batch_size, - places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + use_multiprocess=True) + loader.set_sample_generator(sample_generator_creator( + self.batch_size, self.batch_num), + batch_size=self.batch_size, + places=fluid.CPUPlace()) self.iter_loader_data(loader) def test_multi_process_loader(self): @@ -84,9 +87,9 @@ class TestDygraphDataLoader(unittest.TestCase): def func_test_generator_no_places(self): with fluid.dygraph.guard(): loader = fluid.io.DataLoader.from_generator(capacity=self.capacity) - loader.set_sample_generator( - sample_generator_creator(self.batch_size, self.batch_num), - batch_size=self.batch_size) + loader.set_sample_generator(sample_generator_creator( + self.batch_size, self.batch_num), + batch_size=self.batch_size) self.iter_loader_data(loader) def test_generator_no_places(self): @@ -97,12 +100,13 @@ class TestDygraphDataLoader(unittest.TestCase): def func_test_set_pin_memory(self): with fluid.dygraph.guard(): use_pinned_memory(False) - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, iterable=False, use_multiprocess=False) - loader.set_sample_generator( - sample_generator_creator(self.batch_size, self.batch_num), - batch_size=self.batch_size, - places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + iterable=False, + use_multiprocess=False) + loader.set_sample_generator(sample_generator_creator( + self.batch_size, self.batch_num), + batch_size=self.batch_size, + places=fluid.CPUPlace()) self.iter_loader_data(loader) use_pinned_memory(True) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py index 4ab58919fdb..034d38c4e8a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py @@ -29,6 +29,7 @@ def get_random_images_and_labels(image_shape, label_shape): class TestDygraphDataLoaderWithException(unittest.TestCase): + def setUp(self): self.batch_size = 8 self.batch_num = 4 @@ -47,7 +48,9 @@ class TestDygraphDataLoaderWithException(unittest.TestCase): self.func_test_not_capacity() def func_test_single_process_with_thread_expection(self): + def error_sample_genarator(batch_num): + def __reader__(): for _ in range(batch_num): yield [[[1, 2], [1]]] @@ -55,10 +58,11 @@ class TestDygraphDataLoaderWithException(unittest.TestCase): return __reader__ with fluid.dygraph.guard(): - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, iterable=False, use_multiprocess=False) - loader.set_batch_generator( - error_sample_genarator(self.batch_num), places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + iterable=False, + use_multiprocess=False) + loader.set_batch_generator(error_sample_genarator(self.batch_num), + places=fluid.CPUPlace()) exception = None try: for _ in loader(): @@ -75,7 +79,9 @@ class TestDygraphDataLoaderWithException(unittest.TestCase): self.func_test_single_process_with_thread_expection() def func_test_multi_process_with_process_expection(self): + def error_sample_genarator(batch_num): + def __reader__(): for _ in range(batch_num): yield [[[1, 2], [1]]] @@ -83,10 +89,10 @@ class TestDygraphDataLoaderWithException(unittest.TestCase): return __reader__ with fluid.dygraph.guard(): - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, use_multiprocess=True) - loader.set_batch_generator( - error_sample_genarator(self.batch_num), places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + use_multiprocess=True) + loader.set_batch_generator(error_sample_genarator(self.batch_num), + places=fluid.CPUPlace()) exception = None try: for _ in loader(): @@ -101,7 +107,9 @@ class TestDygraphDataLoaderWithException(unittest.TestCase): self.func_test_multi_process_with_process_expection() def func_test_multi_process_with_get_timeout(self): + def slow_batch_generator_creator(batch_size, batch_num): + def __reader__(): for _ in range(batch_num): time.sleep(80) @@ -112,11 +120,11 @@ class TestDygraphDataLoaderWithException(unittest.TestCase): return __reader__ with fluid.dygraph.guard(): - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, use_multiprocess=True) - loader.set_batch_generator( - slow_batch_generator_creator(self.batch_size, self.batch_num), - places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + use_multiprocess=True) + loader.set_batch_generator(slow_batch_generator_creator( + self.batch_size, self.batch_num), + places=fluid.CPUPlace()) exception = None try: for _ in range(self.epoch_num): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py index e83d6210f84..021637ec3a6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py @@ -33,6 +33,7 @@ from paddle.fluid.reader import multiprocess_queue_set, _cleanup, CleanupFuncReg class TestDygraphDataLoaderCleanUpFunc(unittest.TestCase): + def setUp(self): self.capacity = 10 @@ -70,8 +71,8 @@ class TestRegisterExitFunc(unittest.TestCase): self.func_test_not_callable_func() def func_test_old_handler_for_sigint(self): - CleanupFuncRegistrar.register( - function=self.none_func, signals=[signal.SIGINT]) + CleanupFuncRegistrar.register(function=self.none_func, + signals=[signal.SIGINT]) def test_old_handler_for_sigint(self): with _test_eager_guard(): @@ -83,8 +84,8 @@ class TestRegisterExitFunc(unittest.TestCase): def __test_process__(): pass - CleanupFuncRegistrar.register( - function=self.none_func, signals=[signal.SIGCHLD]) + CleanupFuncRegistrar.register(function=self.none_func, + signals=[signal.SIGCHLD]) exception = None try: diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py index 0ef2e19c44b..bb7bb89d781 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py @@ -28,6 +28,7 @@ def get_random_images_and_labels(image_shape, label_shape): def batch_generator_creator(batch_size, batch_num): + def __reader__(): for _ in range(batch_num): batch_image, batch_label = get_random_images_and_labels( @@ -38,6 +39,7 @@ def batch_generator_creator(batch_size, batch_num): class RandomDataset(Dataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -52,6 +54,7 @@ class RandomDataset(Dataset): class TestDygraphDataLoaderMmapFdsClear(unittest.TestCase): + def setUp(self): self.batch_size = 8 self.batch_num = 100 @@ -59,11 +62,11 @@ class TestDygraphDataLoaderMmapFdsClear(unittest.TestCase): self.capacity = 50 def prepare_data_loader(self): - loader = fluid.io.DataLoader.from_generator( - capacity=self.capacity, use_multiprocess=True) - loader.set_batch_generator( - batch_generator_creator(self.batch_size, self.batch_num), - places=fluid.CPUPlace()) + loader = fluid.io.DataLoader.from_generator(capacity=self.capacity, + use_multiprocess=True) + loader.set_batch_generator(batch_generator_creator( + self.batch_size, self.batch_num), + places=fluid.CPUPlace()) return loader def run_one_epoch_with_break(self, loader): @@ -101,16 +104,16 @@ class TestDygraphDataLoaderMmapFdsClear(unittest.TestCase): class TestMultiProcessDataLoaderMmapFdsClear(TestDygraphDataLoaderMmapFdsClear): + def prepare_data_loader(self): place = fluid.CPUPlace() with fluid.dygraph.guard(place): dataset = RandomDataset(self.batch_size * self.batch_num) - loader = DataLoader( - dataset, - places=place, - batch_size=self.batch_size, - drop_last=True, - num_workers=2) + loader = DataLoader(dataset, + places=place, + batch_size=self.batch_size, + drop_last=True, + num_workers=2) return loader diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py index 0eb5aa55eb3..c860e5ed097 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py @@ -34,6 +34,7 @@ def get_random_images_and_labels(image_shape, label_shape): def batch_generator_creator(batch_size, batch_num): + def __reader__(): for _ in range(batch_num): batch_image, batch_label = get_random_images_and_labels( @@ -46,6 +47,7 @@ def batch_generator_creator(batch_size, batch_num): # NOTE: coverage CI can't cover child process code, so need these test. # Here test child process loop function in main process class TestDygraphDataLoaderProcess(unittest.TestCase): + def setUp(self): self.batch_size = 8 self.batch_num = 4 @@ -64,9 +66,9 @@ class TestDygraphDataLoaderProcess(unittest.TestCase): with fluid.dygraph.guard(): loader = fluid.io.DataLoader.from_generator( capacity=self.batch_num + 1, use_multiprocess=True) - loader.set_batch_generator( - batch_generator_creator(self.batch_size, self.batch_num), - places=fluid.CPUPlace()) + loader.set_batch_generator(batch_generator_creator( + self.batch_size, self.batch_num), + places=fluid.CPUPlace()) loader._data_queue = queue.Queue(self.batch_num + 1) _reader_process_loop(loader._batch_reader, loader._data_queue) # For clean memory mapped files @@ -76,8 +78,8 @@ class TestDygraphDataLoaderProcess(unittest.TestCase): util_queue.put(data) # Clean up memory mapped files - clear_process = multiprocessing.Process( - target=__clear_process__, args=(util_queue, )) + clear_process = multiprocessing.Process(target=__clear_process__, + args=(util_queue, )) clear_process.start() def test_reader_process_loop(self): @@ -86,7 +88,9 @@ class TestDygraphDataLoaderProcess(unittest.TestCase): self.func_test_reader_process_loop() def func_test_reader_process_loop_simple_none(self): + def none_sample_genarator(batch_num): + def __reader__(): for _ in range(batch_num): yield None @@ -96,8 +100,8 @@ class TestDygraphDataLoaderProcess(unittest.TestCase): with fluid.dygraph.guard(): loader = fluid.io.DataLoader.from_generator( capacity=self.batch_num + 1, use_multiprocess=True) - loader.set_batch_generator( - none_sample_genarator(self.batch_num), places=fluid.CPUPlace()) + loader.set_batch_generator(none_sample_genarator(self.batch_num), + places=fluid.CPUPlace()) loader._data_queue = queue.Queue(self.batch_num + 1) exception = None try: diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py index d645a0a5ceb..8e9c3c280f4 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py @@ -28,6 +28,7 @@ import paddle.fluid.core as core class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -41,6 +42,7 @@ class MLP(fluid.Layer): class TestDataParallelStateDict(unittest.TestCase): + def test_data_parallel_state_dict(self): with fluid.dygraph.guard(): strategy = dygraph.parallel.prepare_context() @@ -51,8 +53,8 @@ class TestDataParallelStateDict(unittest.TestCase): parallel_state = parallel_mlp.state_dict() base_para = {} - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) for k, v in single_state.items(): self.assertTrue(k in parallel_state) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py index 38c7de24b77..f783b18b2b2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestTracerMode(unittest.TestCase): + def setUp(self): self.init_mode = True @@ -59,8 +60,8 @@ class TestTracerMode(unittest.TestCase): decorated_func = fluid.dygraph.no_grad(need_no_grad_func) self.assertTrue( - str(inspect.getfullargspec(decorated_func)) == - str(inspect.getfullargspec(need_no_grad_func))) + str(inspect.getfullargspec(decorated_func)) == str( + inspect.getfullargspec(need_no_grad_func))) self.assertEqual(self.tracer._train_mode, self.init_mode) @@ -78,11 +79,13 @@ class TestTracerMode(unittest.TestCase): class TestTracerMode2(TestTracerMode): + def setUp(self): self.init_mode = False class TestNoGradClass(unittest.TestCase): + @paddle.no_grad() def no_grad_func(self, a): self.assertEqual(self.tracer._train_mode, True) @@ -102,9 +105,8 @@ class TestNoGradClass(unittest.TestCase): return a + b decorated_func = paddle.no_grad()(need_no_grad_func) - self.assertEqual( - str(inspect.getfullargspec(decorated_func)), - str(inspect.getfullargspec(need_no_grad_func))) + self.assertEqual(str(inspect.getfullargspec(decorated_func)), + str(inspect.getfullargspec(need_no_grad_func))) def test_gen(): for i in range(3): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index 3e222e3c658..822a0fcc449 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -35,6 +35,7 @@ NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1)) class DMF(fluid.Layer): + def __init__(self): super(DMF, self).__init__() self._user_latent = Linear(1000, 256) @@ -47,17 +48,15 @@ class DMF(fluid.Layer): self._user_layers.append( self.add_sublayer( 'user_layer_%d' % i, - Linear( - 256 if i == 0 else self._hid_sizes[i - 1], - self._hid_sizes[i], - act='relu'))) + Linear(256 if i == 0 else self._hid_sizes[i - 1], + self._hid_sizes[i], + act='relu'))) self._item_layers.append( self.add_sublayer( 'item_layer_%d' % i, - Linear( - 256 if i == 0 else self._hid_sizes[i - 1], - self._hid_sizes[i], - act='relu'))) + Linear(256 if i == 0 else self._hid_sizes[i - 1], + self._hid_sizes[i], + act='relu'))) def forward(self, users, items): users = self._user_latent(users) @@ -70,6 +69,7 @@ class DMF(fluid.Layer): class MLP(fluid.Layer): + def __init__(self): super(MLP, self).__init__() self._user_latent = Linear(1000, 256) @@ -80,22 +80,22 @@ class MLP(fluid.Layer): self._match_layers.append( self.add_sublayer( 'match_layer_%d' % i, - Linear( - 256 * 2 if i == 0 else self._hid_sizes[i - 1], - self._hid_sizes[i], - act='relu'))) + Linear(256 * 2 if i == 0 else self._hid_sizes[i - 1], + self._hid_sizes[i], + act='relu'))) def forward(self, users, items): users = self._user_latent(users) items = self._item_latent(items) - match_vec = fluid.layers.concat( - [users, items], axis=len(users.shape) - 1) + match_vec = fluid.layers.concat([users, items], + axis=len(users.shape) - 1) for l in self._match_layers: match_vec = l(match_vec) return match_vec class DeepCF(fluid.Layer): + def __init__(self, num_users, num_items, matrix): super(DeepCF, self).__init__() self._num_users = num_users @@ -123,9 +123,8 @@ class DeepCF(fluid.Layer): mlp_predictive = self._mlp(users_emb, items_emb) dmf_predictive = self._dmf(users_emb, items_emb) - predictive = fluid.layers.concat( - [mlp_predictive, dmf_predictive], - axis=len(mlp_predictive.shape) - 1) + predictive = fluid.layers.concat([mlp_predictive, dmf_predictive], + axis=len(mlp_predictive.shape) - 1) prediction = self._match_fc(predictive) return prediction @@ -199,6 +198,7 @@ def load_data(DATA_PATH): class TestDygraphDeepCF(unittest.TestCase): + def test_deefcf(self): seed = 90 if DATA_PATH: @@ -259,9 +259,9 @@ class TestDygraphDeepCF(unittest.TestCase): to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss = fluid.layers.reduce_sum( - fluid.layers.log_loss(prediction, - to_variable(labels_np[ - slice:slice + BATCH_SIZE]))) + fluid.layers.log_loss( + prediction, + to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss.backward() adam.minimize(loss) deepcf.clear_gradients() @@ -285,9 +285,9 @@ class TestDygraphDeepCF(unittest.TestCase): to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss2 = fluid.layers.reduce_sum( - fluid.layers.log_loss(prediction2, - to_variable(labels_np[ - slice:slice + BATCH_SIZE]))) + fluid.layers.log_loss( + prediction2, + to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss2.backward() adam2.minimize(loss2) deepcf2.clear_gradients() @@ -315,10 +315,10 @@ class TestDygraphDeepCF(unittest.TestCase): to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss = fluid.layers.reduce_sum( - fluid.layers.log_loss(prediction, - to_variable( - labels_np[slice:slice + - BATCH_SIZE]))) + fluid.layers.log_loss( + prediction, + to_variable(labels_np[slice:slice + + BATCH_SIZE]))) loss.backward() adam.minimize(loss) deepcf.clear_gradients() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py index 00b192b2a05..5e9374bac05 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py @@ -25,6 +25,7 @@ import paddle.fluid.core as core def _dygraph_guard_(func): + def __impl__(*args, **kwargs): if fluid._non_static_mode(): return func(*args, **kwargs) @@ -44,6 +45,7 @@ def random_var(size, low=-1, high=1, dtype='float32'): class TestEagerGrad(TestCase): + def func_simple_example_eager_grad(self): np.random.seed(2021) paddle.set_device('cpu') @@ -166,6 +168,7 @@ class TestEagerGrad(TestCase): class TestDygraphDoubleGrad(TestCase): + def setUp(self): self.sort_sum_gradient = False self.shape = [5, 10] @@ -179,14 +182,13 @@ class TestDygraphDoubleGrad(TestCase): create_graph=False, allow_unused=False): fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient}) - return fluid.dygraph.grad( - outputs=outputs, - inputs=inputs, - grad_outputs=grad_outputs, - no_grad_vars=no_grad_vars, - retain_graph=retain_graph, - create_graph=create_graph, - allow_unused=allow_unused) + return fluid.dygraph.grad(outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused) @dygraph_guard def func_exception(self): @@ -212,8 +214,8 @@ class TestDygraphDoubleGrad(TestCase): [random_var(shape)], [random_var(shape)]) with self.assertRaises(AssertionError): - self.grad( - [random_var(shape)], [random_var(shape)], no_grad_vars=[1]) + self.grad([random_var(shape)], [random_var(shape)], + no_grad_vars=[1]) with self.assertRaises(AssertionError): self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1) @@ -230,24 +232,27 @@ class TestDygraphDoubleGrad(TestCase): y = x + 1 for create_graph in [False, True]: - dx, = self.grad( - [x], [x], create_graph=create_graph, retain_graph=True) + dx, = self.grad([x], [x], + create_graph=create_graph, + retain_graph=True) self.assertEqual(dx.shape, x.shape) self.assertTrue(np.all(dx.numpy() == 1)) self.assertNotEqual(dx.stop_gradient, create_graph) - dx_mul_2, = self.grad( - [y, x], [x], create_graph=create_graph, retain_graph=True) + dx_mul_2, = self.grad([y, x], [x], + create_graph=create_graph, + retain_graph=True) self.assertEqual(dx_mul_2.shape, x.shape) self.assertTrue(np.all(dx_mul_2.numpy() == 2)) self.assertNotEqual(dx_mul_2.stop_gradient, create_graph) - none_grad, = self.grad( - [x], [y], create_graph=create_graph, allow_unused=True) + none_grad, = self.grad([x], [y], + create_graph=create_graph, + allow_unused=True) self.assertTrue(none_grad is None) - grad_with_none_and_not_none, = self.grad( - [x, y], [y], create_graph=create_graph) + grad_with_none_and_not_none, = self.grad([x, y], [y], + create_graph=create_graph) self.assertTrue(grad_with_none_and_not_none.shape, x.shape) self.assertTrue(np.all(grad_with_none_and_not_none.numpy() == 1)) self.assertNotEqual(grad_with_none_and_not_none.stop_gradient, @@ -273,8 +278,9 @@ class TestDygraphDoubleGrad(TestCase): w_mean = fluid.layers.reduce_mean(w) del y1, z, w - dx_actual, = self.grad( - [w_mean], [x], create_graph=True, no_grad_vars=[y2]) + dx_actual, = self.grad([w_mean], [x], + create_graph=True, + no_grad_vars=[y2]) self.assertFalse(y2.stop_gradient) self.assertFalse(dx_actual.stop_gradient) @@ -297,10 +303,11 @@ class TestDygraphDoubleGrad(TestCase): half_numel = int(numel / 2) half_x_positive = np.random.uniform(low=1, high=2, size=[half_numel]) - half_x_negative = np.random.uniform( - low=-2, high=-1, size=[numel - half_numel]) - x_np = np.array(list(half_x_positive) + list(half_x_negative)).astype( - 'float32') + half_x_negative = np.random.uniform(low=-2, + high=-1, + size=[numel - half_numel]) + x_np = np.array(list(half_x_positive) + + list(half_x_negative)).astype('float32') np.random.shuffle(x_np) x = fluid.dygraph.to_variable(x_np) @@ -329,12 +336,11 @@ class TestDygraphDoubleGrad(TestCase): for grad_y in [random_grad_y]: for grad_z in [random_grad_z]: for create_graph in [False, True]: - dx_actual, = self.grad( - outputs=[y, z], - inputs=[x], - grad_outputs=[grad_y, grad_z], - create_graph=create_graph, - retain_graph=True) + dx_actual, = self.grad(outputs=[y, z], + inputs=[x], + grad_outputs=[grad_y, grad_z], + create_graph=create_graph, + retain_graph=True) grad_y_np = ones_grad_y if grad_y is None else grad_y.numpy( ) @@ -397,9 +403,10 @@ class TestDygraphDoubleGrad(TestCase): for i in range(5): loss.backward(retain_graph=True) x_grad_actual = x.gradient() - x_grad_expected = (i + 2) * (2.0 / float(numel) * ( - x_np + dx_expected * - (x_np > 0) * 2 / float(numel))).astype('float32') + x_grad_expected = ( + i + 2) * (2.0 / float(numel) * + (x_np + dx_expected * + (x_np > 0) * 2 / float(numel))).astype('float32') self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) def test_example_with_gradient_accumulation_and_create_graph(self): @@ -422,11 +429,10 @@ class TestDygraphDoubleGrad(TestCase): w_mean = fluid.layers.reduce_mean(w) del y1, z, w - dx_actual, = self.grad( - [w_mean], [x], - retain_graph=True, - create_graph=True, - no_grad_vars=[y2]) + dx_actual, = self.grad([w_mean], [x], + retain_graph=True, + create_graph=True, + no_grad_vars=[y2]) self.assertFalse(y2.stop_gradient) self.assertFalse(dx_actual.stop_gradient) @@ -487,12 +493,14 @@ class TestDygraphDoubleGrad(TestCase): class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad): + def setUp(self): self.sort_sum_gradient = True self.shape = [5, 10] class TestDygraphDoubleGradVisitedUniq(TestCase): + def func_compare(self): value = np.random.uniform(-0.5, 0.5, 100).reshape(10, 2, 5).astype("float32") @@ -516,12 +524,11 @@ class TestDygraphDoubleGradVisitedUniq(TestCase): out = model_f(a) - dx = fluid.dygraph.grad( - outputs=[out], - inputs=[a], - create_graph=False, - only_inputs=True, - allow_unused=False) + dx = fluid.dygraph.grad(outputs=[out], + inputs=[a], + create_graph=False, + only_inputs=True, + allow_unused=False) grad_1 = dx[0].numpy() @@ -545,15 +552,17 @@ class TestDygraphDoubleGradVisitedUniq(TestCase): class TestRaiseNoDoubleGradOp(TestCase): + def raise_no_grad_op(self): with fluid.dygraph.guard(): x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32') x.stop_gradient = False y = paddle.fluid.layers.group_norm(x, groups=1) - dx = fluid.dygraph.grad( - outputs=[y], inputs=[x], create_graph=True, - retain_graph=True)[0] + dx = fluid.dygraph.grad(outputs=[y], + inputs=[x], + create_graph=True, + retain_graph=True)[0] loss = fluid.layers.reduce_mean(dx) loss.backward() @@ -563,6 +572,7 @@ class TestRaiseNoDoubleGradOp(TestCase): class TestDoubleGradResNet(TestCase): + def setUp(self): paddle.seed(123) paddle.framework.random._manual_program_seed(123) @@ -589,8 +599,8 @@ class TestDoubleGradResNet(TestCase): data.stop_gradient = False out = model(data) preds = paddle.argmax(out, axis=1) - label_onehot = paddle.nn.functional.one_hot( - paddle.to_tensor(preds), num_classes=out.shape[1]) + label_onehot = paddle.nn.functional.one_hot(paddle.to_tensor(preds), + num_classes=out.shape[1]) target = paddle.sum(out * label_onehot, axis=1) g = paddle.grad(outputs=target, inputs=out)[0] @@ -621,8 +631,8 @@ class TestDoubleGradResNet(TestCase): data.stop_gradient = False out = model(data) preds = paddle.argmax(out, axis=1) - label_onehot = paddle.nn.functional.one_hot( - paddle.to_tensor(preds), num_classes=out.shape[1]) + label_onehot = paddle.nn.functional.one_hot(paddle.to_tensor(preds), + num_classes=out.shape[1]) target = paddle.sum(out * label_onehot, axis=1) g = paddle.grad(outputs=target, inputs=out)[0] @@ -634,19 +644,24 @@ class TestDoubleGradResNet(TestCase): class TestDoubleGradBasics(TestCase): + def test_matmul(self): input_numpy = np.ones([3, 3]) * 2 with _test_eager_guard(): - x = paddle.to_tensor( - input_numpy, stop_gradient=False, dtype='float32') - y = paddle.to_tensor( - input_numpy, stop_gradient=False, dtype='float32') - grad_out = paddle.to_tensor( - np.ones([3, 3]), stop_gradient=False, dtype='float32') + x = paddle.to_tensor(input_numpy, + stop_gradient=False, + dtype='float32') + y = paddle.to_tensor(input_numpy, + stop_gradient=False, + dtype='float32') + grad_out = paddle.to_tensor(np.ones([3, 3]), + stop_gradient=False, + dtype='float32') out = paddle.matmul(x, y, False, False) - new_x_g, new_y_g = paddle.grad( - [out], [x, y], [grad_out], retain_graph=True, create_graph=True) + new_x_g, new_y_g = paddle.grad([out], [x, y], [grad_out], + retain_graph=True, + create_graph=True) new_x_g.backward() out_ref = np.ones([3, 3]) * 12.0 diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py index 2d900d65976..5e1c59dba57 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py @@ -22,22 +22,23 @@ from paddle.fluid.framework import _test_eager_guard class MLP(fluid.Layer): + def __init__(self, input_size): super(MLP, self).__init__() self._linear1 = fluid.dygraph.Linear( input_size, 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1))) self._linear2 = fluid.dygraph.Linear( 3, 4, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1)), + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.1))) def forward(self, inputs): x = self._linear1(inputs) @@ -47,11 +48,14 @@ class MLP(fluid.Layer): class TestDygraphFramework(unittest.TestCase): + def func_test_dygraph_backward(self): with new_program_scope(): mlp = MLP(input_size=2) - var_inp = fluid.layers.data( - "input", shape=[2, 2], dtype="float32", append_batch_size=False) + var_inp = fluid.layers.data("input", + shape=[2, 2], + dtype="float32", + append_batch_size=False) out = mlp(var_inp) try: out.backward() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py index 39b7f941c4b..e724421d1db 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py @@ -29,6 +29,7 @@ from paddle.fluid.framework import _test_eager_guard class Discriminator(fluid.Layer): + def __init__(self): super(Discriminator, self).__init__() self._fc1 = Linear(1, 32, act='elu') @@ -41,6 +42,7 @@ class Discriminator(fluid.Layer): class Generator(fluid.Layer): + def __init__(self): super(Generator, self).__init__() self._fc1 = Linear(2, 64, act='elu') @@ -55,6 +57,7 @@ class Generator(fluid.Layer): class TestDygraphGAN(unittest.TestCase): + def func_test_gan_float32(self): seed = 90 paddle.seed(1) @@ -64,29 +67,34 @@ class TestDygraphGAN(unittest.TestCase): generate_p = fluid.Program() scope = fluid.core.Scope() - with new_program_scope( - main=discriminate_p, startup=startup, scope=scope): + with new_program_scope(main=discriminate_p, + startup=startup, + scope=scope): discriminator = Discriminator() generator = Generator() - img = fluid.layers.data( - name="img", shape=[2, 1], append_batch_size=False) - noise = fluid.layers.data( - name="noise", shape=[2, 2], append_batch_size=False) + img = fluid.layers.data(name="img", + shape=[2, 1], + append_batch_size=False) + noise = fluid.layers.data(name="noise", + shape=[2, 2], + append_batch_size=False) d_real = discriminator(img) d_loss_real = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_real, - label=fluid.layers.fill_constant( - shape=[2, 1], dtype='float32', value=1.0))) + label=fluid.layers.fill_constant(shape=[2, 1], + dtype='float32', + value=1.0))) d_fake = discriminator(generator(noise)) d_loss_fake = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, - label=fluid.layers.fill_constant( - shape=[2, 1], dtype='float32', value=0.0))) + label=fluid.layers.fill_constant(shape=[2, 1], + dtype='float32', + value=0.0))) d_loss = d_loss_real + d_loss_fake @@ -97,29 +105,33 @@ class TestDygraphGAN(unittest.TestCase): discriminator = Discriminator() generator = Generator() - noise = fluid.layers.data( - name="noise", shape=[2, 2], append_batch_size=False) + noise = fluid.layers.data(name="noise", + shape=[2, 2], + append_batch_size=False) d_fake = discriminator(generator(noise)) g_loss = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, - label=fluid.layers.fill_constant( - shape=[2, 1], dtype='float32', value=1.0))) + label=fluid.layers.fill_constant(shape=[2, 1], + dtype='float32', + value=1.0))) sgd = SGDOptimizer(learning_rate=1e-3) sgd.minimize(g_loss) - exe = fluid.Executor(fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) static_params = dict() with fluid.scope_guard(scope): img = np.ones([2, 1], np.float32) noise = np.ones([2, 2], np.float32) exe.run(startup) static_d_loss = exe.run(discriminate_p, - feed={'img': img, - 'noise': noise}, + feed={ + 'img': img, + 'noise': noise + }, fetch_list=[d_loss])[0] static_g_loss = exe.run(generate_p, feed={'noise': noise}, @@ -137,10 +149,9 @@ class TestDygraphGAN(unittest.TestCase): discriminator = Discriminator() generator = Generator() - sgd = SGDOptimizer( - learning_rate=1e-3, - parameter_list=( - discriminator.parameters() + generator.parameters())) + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=(discriminator.parameters() + + generator.parameters())) d_real = discriminator(to_variable(np.ones([2, 1], np.float32))) d_loss_real = fluid.layers.reduce_mean( @@ -181,10 +192,9 @@ class TestDygraphGAN(unittest.TestCase): paddle.framework.random._manual_program_seed(1) discriminator2 = Discriminator() generator2 = Generator() - sgd2 = SGDOptimizer( - learning_rate=1e-3, - parameter_list=( - discriminator2.parameters() + generator2.parameters())) + sgd2 = SGDOptimizer(learning_rate=1e-3, + parameter_list=(discriminator2.parameters() + + generator2.parameters())) d_real2 = discriminator2(to_variable(np.ones([2, 1], np.float32))) d_loss_real2 = fluid.layers.reduce_mean( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py index a5a90461551..6acab36221f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py @@ -31,6 +31,7 @@ def gen_data(): class GraphConv(fluid.Layer): + def __init__(self, name_scope, in_features, out_features): super(GraphConv, self).__init__(name_scope) @@ -40,8 +41,9 @@ class GraphConv(fluid.Layer): attr=None, dtype='float32', shape=[self._in_features, self._out_features]) - self.bias = self.create_parameter( - attr=None, dtype='float32', shape=[self._out_features]) + self.bias = self.create_parameter(attr=None, + dtype='float32', + shape=[self._out_features]) def forward(self, features, adj): support = fluid.layers.matmul(features, self.weight) @@ -50,6 +52,7 @@ class GraphConv(fluid.Layer): class GCN(fluid.Layer): + def __init__(self, name_scope, num_hidden): super(GCN, self).__init__(name_scope) self.gc = GraphConv(self.full_name(), num_hidden, 32) @@ -61,6 +64,7 @@ class GCN(fluid.Layer): class TestDygraphGNN(unittest.TestCase): + def func_gnn_float32(self): paddle.seed(90) paddle.framework.random._manual_program_seed(90) @@ -69,22 +73,19 @@ class TestDygraphGNN(unittest.TestCase): scope = fluid.core.Scope() with new_program_scope(main=main, startup=startup, scope=scope): - features = fluid.layers.data( - name='features', - shape=[1, 100, 50], - dtype='float32', - append_batch_size=False) + features = fluid.layers.data(name='features', + shape=[1, 100, 50], + dtype='float32', + append_batch_size=False) # Use selected rows when it's supported. - adj = fluid.layers.data( - name='adj', - shape=[1, 100, 100], - dtype='float32', - append_batch_size=False) - labels = fluid.layers.data( - name='labels', - shape=[100, 1], - dtype='int64', - append_batch_size=False) + adj = fluid.layers.data(name='adj', + shape=[1, 100, 100], + dtype='float32', + append_batch_size=False) + labels = fluid.layers.data(name='labels', + shape=[100, 1], + dtype='int64', + append_batch_size=False) model = GCN('test_gcn', 50) logits = model(features, adj) @@ -100,12 +101,12 @@ class TestDygraphGNN(unittest.TestCase): ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(startup) static_loss = exe.run(feed={ - 'features': np.ones( - [1, 100, 50], dtype=np.float32), - 'adj': np.ones( - [1, 100, 100], dtype=np.float32), - 'labels': np.ones( - [100, 1], dtype=np.int64) + 'features': + np.ones([1, 100, 50], dtype=np.float32), + 'adj': + np.ones([1, 100, 100], dtype=np.float32), + 'labels': + np.ones([100, 1], dtype=np.int64) }, fetch_list=[loss])[0] @@ -126,12 +127,12 @@ class TestDygraphGNN(unittest.TestCase): logits = fluid.layers.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss = fluid.layers.softmax_with_cross_entropy(logits, - to_variable(labels)) + loss = fluid.layers.softmax_with_cross_entropy( + logits, to_variable(labels)) loss = fluid.layers.reduce_sum(loss) loss.backward() - adam = AdamOptimizer( - learning_rate=1e-3, parameter_list=model.parameters()) + adam = AdamOptimizer(learning_rate=1e-3, + parameter_list=model.parameters()) adam.minimize(loss) model.clear_gradients() @@ -156,8 +157,8 @@ class TestDygraphGNN(unittest.TestCase): logits2, to_variable(labels2)) loss2 = fluid.layers.reduce_sum(loss2) loss2.backward() - adam2 = AdamOptimizer( - learning_rate=1e-3, parameter_list=model2.parameters()) + adam2 = AdamOptimizer(learning_rate=1e-3, + parameter_list=model2.parameters()) adam2.minimize(loss2) model2.clear_gradients() loss2_value = loss2.numpy() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_group.py b/python/paddle/fluid/tests/unittests/test_imperative_group.py index 994ae27a290..ca7af2d6d49 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_group.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_group.py @@ -30,6 +30,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph, in_dyg class TestDataParallelGroup(unittest.TestCase): + def create_varbase(self, dtype, shape): return paddle.rand(shape=shape, dtype=dtype) @@ -118,7 +119,8 @@ class TestDataParallelGroup(unittest.TestCase): var_list = [] var_list.append(self.create_varbase( "float32", - [1, 50], )) + [1, 50], + )) var_list.append(self.create_varbase("float64", [1, 25])) var_list.append(self.create_varbase("float32", [1, 50])) var_list.append(self.create_varbase("float64", [1, 25])) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py b/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py index 4c457e9345c..87d0d8e81b0 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py @@ -80,13 +80,12 @@ class Test_Forward_Hook(unittest.TestCase): input1 = base.to_variable(input_word1) y = base.to_variable(y_data) - simplenet = SimpleNet( - hidden_size=20, - vocab_size=32, - num_steps=3, - init_scale=0.1, - is_sparse=False, - dtype="float32") + simplenet = SimpleNet(hidden_size=20, + vocab_size=32, + num_steps=3, + init_scale=0.1, + is_sparse=False, + dtype="float32") # origin, don't register any hook outs_origin = simplenet(input, y) @@ -149,13 +148,12 @@ class Test_Forward_Hook(unittest.TestCase): input = base.to_variable(input_word) y = base.to_variable(y_data) - simplenet = SimpleNet( - hidden_size=20, - vocab_size=32, - num_steps=3, - init_scale=0.1, - is_sparse=False, - dtype="float32") + simplenet = SimpleNet(hidden_size=20, + vocab_size=32, + num_steps=3, + init_scale=0.1, + is_sparse=False, + dtype="float32") # origin, don't register any hook outs_origin = simplenet(input, y) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py index 0bc56294876..127aed8cabc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py @@ -25,24 +25,21 @@ from paddle.fluid.framework import _test_eager_guard class LeNetDygraph(fluid.dygraph.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): super(LeNetDygraph, self).__init__() self.num_classes = num_classes - self.features = nn.Sequential( - nn.Conv2D( - 1, 6, 3, stride=1, padding=1), - nn.ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2), - nn.Conv2D( - 6, 16, 5, stride=1, padding=0), - nn.ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2)) + self.features = nn.Sequential(nn.Conv2D(1, 6, 3, stride=1, padding=1), + nn.ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2), + nn.Conv2D(6, 16, 5, stride=1, padding=0), + nn.ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2)) if num_classes > 0: - self.fc = nn.Sequential( - nn.Linear(400, 120), - nn.Linear(120, 84), nn.Linear(84, 10), - nn.Softmax()) #Todo: accept any activation + self.fc = nn.Sequential(nn.Linear(400, 120), nn.Linear(120, 84), + nn.Linear(84, 10), + nn.Softmax()) #Todo: accept any activation def forward(self, inputs): x = self.features(inputs) @@ -55,22 +52,27 @@ class LeNetDygraph(fluid.dygraph.Layer): def init_weights(layer): if type(layer) == nn.Linear: - new_weight = paddle.fluid.layers.fill_constant( - layer.weight.shape, layer.weight.dtype, value=0.9) + new_weight = paddle.fluid.layers.fill_constant(layer.weight.shape, + layer.weight.dtype, + value=0.9) layer.weight.set_value(new_weight) - new_bias = paddle.fluid.layers.fill_constant( - layer.bias.shape, layer.bias.dtype, value=-0.1) + new_bias = paddle.fluid.layers.fill_constant(layer.bias.shape, + layer.bias.dtype, + value=-0.1) layer.bias.set_value(new_bias) elif type(layer) == nn.Conv2D: - new_weight = paddle.fluid.layers.fill_constant( - layer.weight.shape, layer.weight.dtype, value=0.7) + new_weight = paddle.fluid.layers.fill_constant(layer.weight.shape, + layer.weight.dtype, + value=0.7) layer.weight.set_value(new_weight) - new_bias = paddle.fluid.layers.fill_constant( - layer.bias.shape, layer.bias.dtype, value=-0.2) + new_bias = paddle.fluid.layers.fill_constant(layer.bias.shape, + layer.bias.dtype, + value=-0.2) layer.bias.set_value(new_bias) class TestLayerApply(unittest.TestCase): + def func_apply_init_weight(self): with fluid.dygraph.guard(): net = LeNetDygraph() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py index 0cce1efd1f8..7d9c6e1dc4e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py @@ -25,17 +25,15 @@ from paddle.fluid.framework import _test_eager_guard class LeNetDygraph(fluid.dygraph.Layer): + def __init__(self): super(LeNetDygraph, self).__init__() - self.features = nn.Sequential( - nn.Conv2D( - 1, 6, 3, stride=1, padding=1), - nn.ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2), - nn.Conv2D( - 6, 16, 5, stride=1, padding=0), - nn.ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2)) + self.features = nn.Sequential(nn.Conv2D(1, 6, 3, stride=1, padding=1), + nn.ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2), + nn.Conv2D(6, 16, 5, stride=1, padding=0), + nn.ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2)) def forward(self, inputs): x = self.features(inputs) @@ -43,6 +41,7 @@ class LeNetDygraph(fluid.dygraph.Layer): class TestLayerChildren(unittest.TestCase): + def func_apply_init_weight(self): with fluid.dygraph.guard(): net = LeNetDygraph() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py index b0dcfd653fb..c359d99c819 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestImperativeLayerTrainable(unittest.TestCase): + def func_set_trainable(self): with fluid.dygraph.guard(): label = np.random.uniform(-1, 1, [10, 10]).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layers.py b/python/paddle/fluid/tests/unittests/test_imperative_layers.py index 15dada8c823..4f7e8c1ac16 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layers.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layers.py @@ -19,6 +19,7 @@ from paddle.fluid.framework import _test_eager_guard, _non_static_mode class TestLayerPrint(unittest.TestCase): + def func_test_layer_str(self): module = nn.ELU(0.2) self.assertEqual(str(module), 'ELU(alpha=0.2)') @@ -39,8 +40,8 @@ class TestLayerPrint(unittest.TestCase): self.assertEqual(str(module), 'Tanh(name=Tanh)') module = nn.Hardtanh(name="Hardtanh") - self.assertEqual( - str(module), 'Hardtanh(min=-1.0, max=1.0, name=Hardtanh)') + self.assertEqual(str(module), + 'Hardtanh(min=-1.0, max=1.0, name=Hardtanh)') module = nn.PReLU(1, 0.25, name="PReLU", data_format="NCHW") self.assertEqual( @@ -125,8 +126,8 @@ class TestLayerPrint(unittest.TestCase): ) module = nn.Dropout(p=0.5) - self.assertEqual( - str(module), 'Dropout(p=0.5, axis=None, mode=upscale_in_train)') + self.assertEqual(str(module), + 'Dropout(p=0.5, axis=None, mode=upscale_in_train)') module = nn.Dropout2D(p=0.5) self.assertEqual(str(module), 'Dropout2D(p=0.5, data_format=NCHW)') @@ -149,8 +150,8 @@ class TestLayerPrint(unittest.TestCase): ) module = nn.ZeroPad2D(padding=[1, 0, 1, 2]) - self.assertEqual( - str(module), 'ZeroPad2D(padding=[1, 0, 1, 2], data_format=NCHW)') + self.assertEqual(str(module), + 'ZeroPad2D(padding=[1, 0, 1, 2], data_format=NCHW)') module = nn.Pad3D(padding=[1, 0, 1, 2, 0, 0], mode='constant') self.assertEqual( @@ -165,8 +166,8 @@ class TestLayerPrint(unittest.TestCase): self.assertEqual(str(module), 'Embedding(10, 3, sparse=True)') module = nn.Conv1D(3, 2, 3) - self.assertEqual( - str(module), 'Conv1D(3, 2, kernel_size=[3], data_format=NCL)') + self.assertEqual(str(module), + 'Conv1D(3, 2, kernel_size=[3], data_format=NCL)') module = nn.Conv1DTranspose(2, 1, 2) self.assertEqual( @@ -174,8 +175,8 @@ class TestLayerPrint(unittest.TestCase): 'Conv1DTranspose(2, 1, kernel_size=[2], data_format=NCL)') module = nn.Conv2D(4, 6, (3, 3)) - self.assertEqual( - str(module), 'Conv2D(4, 6, kernel_size=[3, 3], data_format=NCHW)') + self.assertEqual(str(module), + 'Conv2D(4, 6, kernel_size=[3, 3], data_format=NCHW)') module = nn.Conv2DTranspose(4, 6, (3, 3)) self.assertEqual( @@ -196,16 +197,16 @@ class TestLayerPrint(unittest.TestCase): self.assertEqual(str(module), 'PairwiseDistance(p=2.0)') module = nn.InstanceNorm1D(2) - self.assertEqual( - str(module), 'InstanceNorm1D(num_features=2, epsilon=1e-05)') + self.assertEqual(str(module), + 'InstanceNorm1D(num_features=2, epsilon=1e-05)') module = nn.InstanceNorm2D(2) - self.assertEqual( - str(module), 'InstanceNorm2D(num_features=2, epsilon=1e-05)') + self.assertEqual(str(module), + 'InstanceNorm2D(num_features=2, epsilon=1e-05)') module = nn.InstanceNorm3D(2) - self.assertEqual( - str(module), 'InstanceNorm3D(num_features=2, epsilon=1e-05)') + self.assertEqual(str(module), + 'InstanceNorm3D(num_features=2, epsilon=1e-05)') module = nn.GroupNorm(num_channels=6, num_groups=6) self.assertEqual( @@ -244,28 +245,28 @@ class TestLayerPrint(unittest.TestCase): 'LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=1.0)') module = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) - self.assertEqual( - str(module), 'AvgPool1D(kernel_size=2, stride=2, padding=0)') + self.assertEqual(str(module), + 'AvgPool1D(kernel_size=2, stride=2, padding=0)') module = nn.AvgPool2D(kernel_size=2, stride=2, padding=0) - self.assertEqual( - str(module), 'AvgPool2D(kernel_size=2, stride=2, padding=0)') + self.assertEqual(str(module), + 'AvgPool2D(kernel_size=2, stride=2, padding=0)') module = nn.AvgPool3D(kernel_size=2, stride=2, padding=0) - self.assertEqual( - str(module), 'AvgPool3D(kernel_size=2, stride=2, padding=0)') + self.assertEqual(str(module), + 'AvgPool3D(kernel_size=2, stride=2, padding=0)') module = nn.MaxPool1D(kernel_size=2, stride=2, padding=0) - self.assertEqual( - str(module), 'MaxPool1D(kernel_size=2, stride=2, padding=0)') + self.assertEqual(str(module), + 'MaxPool1D(kernel_size=2, stride=2, padding=0)') module = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) - self.assertEqual( - str(module), 'MaxPool2D(kernel_size=2, stride=2, padding=0)') + self.assertEqual(str(module), + 'MaxPool2D(kernel_size=2, stride=2, padding=0)') module = nn.MaxPool3D(kernel_size=2, stride=2, padding=0) - self.assertEqual( - str(module), 'MaxPool3D(kernel_size=2, stride=2, padding=0)') + self.assertEqual(str(module), + 'MaxPool3D(kernel_size=2, stride=2, padding=0)') module = nn.AdaptiveAvgPool1D(output_size=16) self.assertEqual(str(module), 'AdaptiveAvgPool1D(output_size=16)') @@ -277,16 +278,16 @@ class TestLayerPrint(unittest.TestCase): self.assertEqual(str(module), 'AdaptiveAvgPool3D(output_size=3)') module = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True) - self.assertEqual( - str(module), 'AdaptiveMaxPool1D(output_size=16, return_mask=True)') + self.assertEqual(str(module), + 'AdaptiveMaxPool1D(output_size=16, return_mask=True)') module = nn.AdaptiveMaxPool2D(output_size=3, return_mask=True) - self.assertEqual( - str(module), 'AdaptiveMaxPool2D(output_size=3, return_mask=True)') + self.assertEqual(str(module), + 'AdaptiveMaxPool2D(output_size=3, return_mask=True)') module = nn.AdaptiveMaxPool3D(output_size=3, return_mask=True) - self.assertEqual( - str(module), 'AdaptiveMaxPool3D(output_size=3, return_mask=True)') + self.assertEqual(str(module), + 'AdaptiveMaxPool3D(output_size=3, return_mask=True)') module = nn.SimpleRNNCell(16, 32) self.assertEqual(str(module), 'SimpleRNNCell(16, 32)') @@ -332,14 +333,9 @@ class TestLayerPrint(unittest.TestCase): module2 = nn.Sequential( nn.Conv3DTranspose(4, 6, (3, 3, 3)), - nn.AvgPool3D( - kernel_size=2, stride=2, padding=0), - nn.Tanh(name="Tanh"), - module1, - nn.Conv3D(4, 6, (3, 3, 3)), - nn.MaxPool3D( - kernel_size=2, stride=2, padding=0), - nn.GELU(True)) + nn.AvgPool3D(kernel_size=2, stride=2, padding=0), + nn.Tanh(name="Tanh"), module1, nn.Conv3D(4, 6, (3, 3, 3)), + nn.MaxPool3D(kernel_size=2, stride=2, padding=0), nn.GELU(True)) self.assertEqual( str(module2), 'Sequential(\n '\ diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index e4faa7e259a..2c860a0a624 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -20,6 +20,7 @@ import numpy as np class TestDygraphLoadStatic(unittest.TestCase): + def testLoadStaticModel(self): # static mode a = fluid.data(name="a", shape=[10, 10]) @@ -28,20 +29,30 @@ class TestDygraphLoadStatic(unittest.TestCase): fc_out1 = fluid.layers.fc(a, 10) fc_out2 = fluid.layers.fc(a, 20) - conv_out_1 = fluid.layers.conv2d( - conv_in, num_filters=10, filter_size=5, act="relu") - conv_out_2 = fluid.layers.conv2d( - conv_in, num_filters=10, filter_size=5, act="relu") - - conv3d_in = fluid.data( - name='conv3d_in', shape=[None, 3, 12, 32, 32], dtype='float32') - conv3d_out_1 = fluid.layers.conv3d( - input=conv3d_in, num_filters=2, filter_size=3, act="relu") - conv3d_out_2 = fluid.layers.conv3d( - input=conv3d_in, num_filters=2, filter_size=3, act="relu") - - batchnorm_in = fluid.data( - name="batchnorm_in", shape=[None, 10], dtype='float32') + conv_out_1 = fluid.layers.conv2d(conv_in, + num_filters=10, + filter_size=5, + act="relu") + conv_out_2 = fluid.layers.conv2d(conv_in, + num_filters=10, + filter_size=5, + act="relu") + + conv3d_in = fluid.data(name='conv3d_in', + shape=[None, 3, 12, 32, 32], + dtype='float32') + conv3d_out_1 = fluid.layers.conv3d(input=conv3d_in, + num_filters=2, + filter_size=3, + act="relu") + conv3d_out_2 = fluid.layers.conv3d(input=conv3d_in, + num_filters=2, + filter_size=3, + act="relu") + + batchnorm_in = fluid.data(name="batchnorm_in", + shape=[None, 10], + dtype='float32') batchnorm_out_1 = fluid.layers.batch_norm(batchnorm_in) batchnorm_out_2 = fluid.layers.batch_norm(batchnorm_in) @@ -54,45 +65,53 @@ class TestDygraphLoadStatic(unittest.TestCase): layernorm_2 = fluid.layers.layer_norm(layernorm) nce_in = fluid.data(name="nce_in", shape=[None, 100], dtype='float32') - nce_label = fluid.data( - name="nce_label", shape=[None, 10], dtype='int64') + nce_label = fluid.data(name="nce_label", + shape=[None, 10], + dtype='int64') nce_out_1 = fluid.layers.nce(nce_in, nce_label, 10000) nce_out_2 = fluid.layers.nce(nce_in, nce_label, 10000) - prelu_in = fluid.data( - name="prelu_in", shape=[None, 5, 10, 10], dtype='float32') + prelu_in = fluid.data(name="prelu_in", + shape=[None, 5, 10, 10], + dtype='float32') prelu_out_1 = fluid.layers.prelu(prelu_in, "channel") prelu_out_2 = fluid.layers.prelu(prelu_in, "channel") - bilinear_tensor_pro_x = fluid.data( - "t1", shape=[None, 5], dtype="float32") - bilinear_tensor_pro_y = fluid.data( - "t2", shape=[None, 4], dtype="float32") + bilinear_tensor_pro_x = fluid.data("t1", + shape=[None, 5], + dtype="float32") + bilinear_tensor_pro_y = fluid.data("t2", + shape=[None, 4], + dtype="float32") bilinear_tensor_pro_out_1 = fluid.layers.bilinear_tensor_product( x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000) bilinear_tensor_pro_out_2 = fluid.layers.bilinear_tensor_product( x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000) - conv2d_trans_in = fluid.data( - name="conv2d_trans_in", shape=[None, 10, 10, 10]) - - conv2d_trans_out_1 = fluid.layers.conv2d_transpose( - conv2d_trans_in, num_filters=10, filter_size=5, act="relu") - conv2d_trans_out_2 = fluid.layers.conv2d_transpose( - conv2d_trans_in, num_filters=10, filter_size=5, act="relu") - - conv3d_trans_in = fluid.data( - name='conv3d_trans_in', - shape=[None, 3, 12, 32, 32], - dtype='float32') + conv2d_trans_in = fluid.data(name="conv2d_trans_in", + shape=[None, 10, 10, 10]) + + conv2d_trans_out_1 = fluid.layers.conv2d_transpose(conv2d_trans_in, + num_filters=10, + filter_size=5, + act="relu") + conv2d_trans_out_2 = fluid.layers.conv2d_transpose(conv2d_trans_in, + num_filters=10, + filter_size=5, + act="relu") + + conv3d_trans_in = fluid.data(name='conv3d_trans_in', + shape=[None, 3, 12, 32, 32], + dtype='float32') conv3d_trans_out_1 = fluid.layers.conv3d_transpose( input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu") conv3d_trans_out_2 = fluid.layers.conv3d_transpose( input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu") - groupnorm_in = fluid.data( - name='groupnorm_in', shape=[None, 8, 32, 32], dtype='float32') + groupnorm_in = fluid.data(name='groupnorm_in', + shape=[None, 8, 32, 32], + dtype='float32') groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4) groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4) ''' @@ -101,19 +120,23 @@ class TestDygraphLoadStatic(unittest.TestCase): spe_norm_out_2 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2) ''' - nodes_vector = fluid.data( - name='vectors', shape=[None, 10, 5], dtype='float32') - edge_set = fluid.data( - name='edge_set', shape=[None, 10, 2], dtype='float32') + nodes_vector = fluid.data(name='vectors', + shape=[None, 10, 5], + dtype='float32') + edge_set = fluid.data(name='edge_set', + shape=[None, 10, 2], + dtype='float32') tree_conv_out1 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set, 6, 1, 2) tree_conv_out2 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set, 6, 1, 2) - para1 = fluid.layers.create_parameter( - [100, 100], 'float32', name="weight_test_1") - para2 = fluid.layers.create_parameter( - [20, 200], 'float32', name="weight_test_2") + para1 = fluid.layers.create_parameter([100, 100], + 'float32', + name="weight_test_1") + para2 = fluid.layers.create_parameter([20, 200], + 'float32', + name="weight_test_2") para_list = fluid.default_main_program().list_vars() @@ -137,33 +160,30 @@ class TestDygraphLoadStatic(unittest.TestCase): with fluid.dygraph.guard(): class MyTest(fluid.dygraph.Layer): + def __init__(self): super(MyTest, self).__init__() self.linear1 = Linear(10, 10) self.lienar2 = Linear(10, 20) - self.conv2d_1 = Conv2D( - num_channels=10, - num_filters=10, - filter_size=5, - act="relu") - self.conv2d_2 = Conv2D( - num_channels=10, - num_filters=10, - filter_size=5, - act="relu") - - self.conv3d_1 = Conv3D( - num_channels=3, - num_filters=2, - filter_size=3, - act="relu") - self.conv3d_2 = Conv3D( - num_channels=3, - num_filters=2, - filter_size=3, - act="relu") + self.conv2d_1 = Conv2D(num_channels=10, + num_filters=10, + filter_size=5, + act="relu") + self.conv2d_2 = Conv2D(num_channels=10, + num_filters=10, + filter_size=5, + act="relu") + + self.conv3d_1 = Conv3D(num_channels=3, + num_filters=2, + filter_size=3, + act="relu") + self.conv3d_2 = Conv3D(num_channels=3, + num_filters=2, + filter_size=3, + act="relu") self.batch_norm_1 = BatchNorm(10) self.batch_norm_2 = BatchNorm(10) @@ -183,10 +203,12 @@ class TestDygraphLoadStatic(unittest.TestCase): self.group_norm1 = GroupNorm(8, 4) self.gourp_norm2 = GroupNorm(8, 4) - self.w_1 = self.create_parameter( - [100, 100], dtype='float32', attr="weight_test_1") - self.w_2 = self.create_parameter( - [20, 200], dtype='float32', attr="weight_test_2") + self.w_1 = self.create_parameter([100, 100], + dtype='float32', + attr="weight_test_1") + self.w_2 = self.create_parameter([20, 200], + dtype='float32', + attr="weight_test_2") my_test = MyTest() my_test.set_dict(new_dict, use_structured_name=False) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index 110bb961bbe..f9306d0cfeb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -30,6 +30,7 @@ from paddle.fluid.framework import _test_eager_guard class SimpleNet(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -60,13 +61,13 @@ class SimpleNet(fluid.Layer): def forward(self, input, label): x_emb = self.embedding(input) projection = fluid.layers.matmul( - x_emb, fluid.layers.transpose( - self.embedding.weight, perm=[1, 0])) + x_emb, fluid.layers.transpose(self.embedding.weight, perm=[1, 0])) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + projection = fluid.layers.reshape(projection, + shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -75,6 +76,7 @@ class SimpleNet(fluid.Layer): class TestDygraphSimpleNet(unittest.TestCase): + def func_simple_net(self): for is_sparse in [True, False]: dtype_list = ["float32"] @@ -107,25 +109,22 @@ class TestDygraphSimpleNet(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - simple_net = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=is_sparse, - dtype=dtype) - - sgd = SGDOptimizer( - learning_rate=1e-3, - parameter_list=simple_net.parameters()) + simple_net = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=is_sparse, + dtype=dtype) + + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=simple_net.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None helper = DyGraphProgramDescTracerTestHelper(self) - fluid.set_flags({ - 'FLAGS_sort_sum_gradient': is_sort_sum_gradient - }) + fluid.set_flags( + {'FLAGS_sort_sum_gradient': is_sort_sum_gradient}) for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -152,17 +151,17 @@ class TestDygraphSimpleNet(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - simple_net = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - is_sparse=is_sparse, - dtype=dtype) + simple_net = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + is_sparse=is_sparse, + dtype=dtype) exe = fluid.Executor(place) sgd = SGDOptimizer(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype=dtype) static_loss = simple_net(x, y) @@ -186,8 +185,10 @@ class TestDygraphSimpleNet(unittest.TestCase): fetch_list = [static_loss] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"x": x_data, - "y": y_data}, + feed={ + "x": x_data, + "y": y_data + }, fetch_list=fetch_list) static_loss_value = out[0] @@ -197,13 +198,12 @@ class TestDygraphSimpleNet(unittest.TestCase): k - 1]] = out[k] self.assertTrue( - np.allclose( - static_loss_value, dy_loss_value, rtol=1e-3)) + np.allclose(static_loss_value, dy_loss_value, rtol=1e-3)) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): - self.assertTrue( - np.array_equal(value, dy_param_updated[key])) + self.assertTrue(np.array_equal(value, + dy_param_updated[key])) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py index f9bd5e45971..aeead6ff747 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -31,6 +31,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -50,25 +51,23 @@ class SimpleImgConvPool(fluid.dygraph.Layer): bias_attr=None): super(SimpleImgConvPool, self).__init__() - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) + self._conv2d = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D(pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) def forward(self, inputs): x = self._conv2d(inputs) @@ -77,25 +76,33 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer): + def __init__(self): super(MNIST, self).__init__() - self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") + self._simple_img_conv_pool_1 = SimpleImgConvPool(1, + 20, + 5, + 2, + 2, + act="relu") - self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") + self._simple_img_conv_pool_2 = SimpleImgConvPool(20, + 50, + 5, + 2, + 2, + act="relu") self.pool_2_shape = 50 * 4 * 4 SIZE = 10 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 - self._fc = Linear( - self.pool_2_shape, - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") + self._fc = Linear(self.pool_2_shape, + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") def forward(self, inputs): x = self._simple_img_conv_pool_1(inputs) @@ -106,7 +113,9 @@ class MNIST(fluid.dygraph.Layer): class TestImperativeMnist(unittest.TestCase): + def reader_decorator(self, reader): + def _reader_imple(): for item in reader(): image = np.array(item[0]).reshape(1, 28, 28) @@ -128,15 +137,15 @@ class TestImperativeMnist(unittest.TestCase): fluid.default_main_program().random_seed = seed mnist = MNIST() - sgd = SGDOptimizer( - learning_rate=1e-3, parameter_list=mnist.parameters()) + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=mnist.parameters()) batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( - paddle.batch( - self.reader_decorator(paddle.dataset.mnist.train()), - batch_size=batch_size, - drop_last=True), + paddle.batch(self.reader_decorator( + paddle.dataset.mnist.train()), + batch_size=batch_size, + drop_last=True), places=fluid.CPUPlace()) mnist.train() @@ -194,13 +203,13 @@ class TestImperativeMnist(unittest.TestCase): mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), - batch_size=batch_size, - drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=batch_size, + drop_last=True) - img = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) @@ -223,12 +232,12 @@ class TestImperativeMnist(unittest.TestCase): for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: break - static_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape( - [batch_size, 1]) + static_x_data = np.array([ + x[0].reshape(1, 28, 28) for x in data + ]).astype('float32') + y_data = np.array([x[1] + for x in data]).astype('int64').reshape( + [batch_size, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) @@ -236,17 +245,18 @@ class TestImperativeMnist(unittest.TestCase): if traced_layer is not None: traced_layer([static_x_data]) - out = exe.run( - fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + "pixel": static_x_data, + "label": y_data + }, + fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): - static_param_value[static_param_name_list[i - 1]] = out[ - i] + static_param_value[static_param_name_list[i - + 1]] = out[i] self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py index 8e3cbaf9488..23af23a4286 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py @@ -30,6 +30,7 @@ from paddle.fluid.framework import _test_eager_guard class TestImperativeMnistSortGradient(unittest.TestCase): + def func_test_mnist_sort_gradient_float32(self): seed = 90 epoch_num = 1 @@ -40,20 +41,21 @@ class TestImperativeMnistSortGradient(unittest.TestCase): fluid.set_flags({'FLAGS_sort_sum_gradient': True}) mnist2 = MNIST() - sgd2 = SGDOptimizer( - learning_rate=1e-3, parameter_list=mnist2.parameters()) - train_reader2 = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + sgd2 = SGDOptimizer(learning_rate=1e-3, + parameter_list=mnist2.parameters()) + train_reader2 = paddle.batch(paddle.dataset.mnist.train(), + batch_size=128, + drop_last=True) mnist2.train() dy_param_init_value2 = {} for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader2()): - dy_x_data2 = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data2 = np.array( - [x[1] for x in data]).astype('int64').reshape(128, 1) + dy_x_data2 = np.array([ + x[0].reshape(1, 28, 28) for x in data + ]).astype('float32') + y_data2 = np.array([x[1] for x in data + ]).astype('int64').reshape(128, 1) img2 = to_variable(dy_x_data2) label2 = to_variable(y_data2) @@ -88,11 +90,13 @@ class TestImperativeMnistSortGradient(unittest.TestCase): mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=128, + drop_last=True) - img = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) @@ -113,25 +117,26 @@ class TestImperativeMnistSortGradient(unittest.TestCase): for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): - static_x_data = np.array( - [x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape([128, 1]) + static_x_data = np.array([ + x[0].reshape(1, 28, 28) for x in data + ]).astype('float32') + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape([128, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) - out = exe.run( - fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + "pixel": static_x_data, + "label": y_data + }, + fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): - static_param_value[static_param_name_list[i - 1]] = out[ - i] + static_param_value[static_param_name_list[i - + 1]] = out[i] if batch_id == 20: break diff --git a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py index 223ccd3a3d5..c3b052edeac 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py @@ -20,6 +20,7 @@ from paddle.fluid.framework import _test_eager_guard, _non_static_mode class MyLayer(fluid.Layer): + def __init__(self, num_channel, dim, num_filter=5): super(MyLayer, self).__init__() self.fc = fluid.dygraph.Linear(dim, dim) @@ -32,6 +33,7 @@ class MyLayer(fluid.Layer): class TestImperativeNamedSubLayers(unittest.TestCase): + def func_test_named_sublayers(self): with fluid.dygraph.guard(): fc1 = fluid.Linear(10, 3) @@ -43,14 +45,16 @@ class TestImperativeNamedSubLayers(unittest.TestCase): expected_sublayers = [fc1, fc2, custom, custom.fc, custom.conv] self.assertEqual(len(list_named_sublayers), len(expected_sublayers)) - for (name, sublayer), expected_sublayer in zip(list_named_sublayers, - expected_sublayers): + for (name, + sublayer), expected_sublayer in zip(list_named_sublayers, + expected_sublayers): self.assertEqual(sublayer, expected_sublayer) list_sublayers = list(model.sublayers()) self.assertEqual(len(list_named_sublayers), len(list_sublayers)) - for (name, sublayer), expected_sublayer in zip(list_named_sublayers, - list_sublayers): + for (name, + sublayer), expected_sublayer in zip(list_named_sublayers, + list_sublayers): self.assertEqual(sublayer, expected_sublayer) self.assertListEqual( @@ -64,6 +68,7 @@ class TestImperativeNamedSubLayers(unittest.TestCase): class TestImperativeNamedParameters(unittest.TestCase): + def func_test_named_parameters(self): with fluid.dygraph.guard(): fc1 = fluid.Linear(10, 3) @@ -90,6 +95,7 @@ class TestImperativeNamedParameters(unittest.TestCase): with fluid.dygraph.guard(): class Mymodel(fluid.dygraph.Layer): + def __init__(self): super(Mymodel, self).__init__() self.linear1 = fluid.dygraph.Linear(10, 10) @@ -98,11 +104,10 @@ class TestImperativeNamedParameters(unittest.TestCase): self.embedding = fluid.dygraph.Embedding(size=[128, 16]) self.h_0 = fluid.dygraph.to_variable( np.zeros([10, 10]).astype('float32')) - self.weight = self.create_parameter( - shape=[2, 3], - attr=fluid.ParamAttr(), - dtype="float32", - is_bias=False) + self.weight = self.create_parameter(shape=[2, 3], + attr=fluid.ParamAttr(), + dtype="float32", + is_bias=False) model = Mymodel() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py index 158f71cc300..c0287668a31 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py @@ -20,6 +20,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestImperativeNumpyBridge(unittest.TestCase): + def func_tensor_from_numpy(self): data_np = np.array([[2, 3, 1]]).astype('float32') with fluid.dygraph.guard(fluid.CPUPlace()): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index fd53b42450d..064f0948cad 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -60,6 +60,7 @@ class Config(object): class ConvBNPool(fluid.dygraph.Layer): + def __init__(self, group, out_ch, @@ -81,34 +82,31 @@ class ConvBNPool(fluid.dygraph.Layer): conv_param_1 = fluid.ParamAttr( initializer=fluid.initializer.Normal(0.0, conv_std_1)) - self.conv_0_layer = Conv2D( - channels[0], - out_ch[0], - 3, - padding=1, - param_attr=conv_param_0, - bias_attr=False, - act=None, - use_cudnn=use_cudnn) + self.conv_0_layer = Conv2D(channels[0], + out_ch[0], + 3, + padding=1, + param_attr=conv_param_0, + bias_attr=False, + act=None, + use_cudnn=use_cudnn) self.bn_0_layer = BatchNorm(out_ch[0], act=act, is_test=is_test) - self.conv_1_layer = Conv2D( - out_ch[0], - num_filters=out_ch[1], - filter_size=3, - padding=1, - param_attr=conv_param_1, - bias_attr=False, - act=None, - use_cudnn=use_cudnn) + self.conv_1_layer = Conv2D(out_ch[0], + num_filters=out_ch[1], + filter_size=3, + padding=1, + param_attr=conv_param_1, + bias_attr=False, + act=None, + use_cudnn=use_cudnn) self.bn_1_layer = BatchNorm(out_ch[1], act=act, is_test=is_test) if self.pool: - self.pool_layer = Pool2D( - pool_size=2, - pool_type='max', - pool_stride=2, - use_cudnn=use_cudnn, - ceil_mode=True) + self.pool_layer = Pool2D(pool_size=2, + pool_type='max', + pool_stride=2, + use_cudnn=use_cudnn, + ceil_mode=True) def forward(self, inputs): conv_0 = self.conv_0_layer(inputs) @@ -122,19 +120,22 @@ class ConvBNPool(fluid.dygraph.Layer): class OCRConv(fluid.dygraph.Layer): + def __init__(self, is_test=False, use_cudnn=True): super(OCRConv, self).__init__() - self.conv_bn_pool_1 = ConvBNPool( - 2, [8, 8], [1, 8], is_test=is_test, use_cudnn=use_cudnn) - self.conv_bn_pool_2 = ConvBNPool( - 2, [8, 8], [8, 8], is_test=is_test, use_cudnn=use_cudnn) - self.conv_bn_pool_3 = ConvBNPool( - 2, [8, 8], [8, 8], is_test=is_test, use_cudnn=use_cudnn) - self.conv_bn_pool_4 = ConvBNPool( - 2, [16, 16], [8, 16], - is_test=is_test, - pool=False, - use_cudnn=use_cudnn) + self.conv_bn_pool_1 = ConvBNPool(2, [8, 8], [1, 8], + is_test=is_test, + use_cudnn=use_cudnn) + self.conv_bn_pool_2 = ConvBNPool(2, [8, 8], [8, 8], + is_test=is_test, + use_cudnn=use_cudnn) + self.conv_bn_pool_3 = ConvBNPool(2, [8, 8], [8, 8], + is_test=is_test, + use_cudnn=use_cudnn) + self.conv_bn_pool_4 = ConvBNPool(2, [16, 16], [8, 16], + is_test=is_test, + pool=False, + use_cudnn=use_cudnn) def forward(self, inputs): inputs_1 = self.conv_bn_pool_1(inputs) @@ -146,6 +147,7 @@ class OCRConv(fluid.dygraph.Layer): class DynamicGRU(fluid.dygraph.Layer): + def __init__(self, size, param_attr=None, @@ -157,13 +159,12 @@ class DynamicGRU(fluid.dygraph.Layer): origin_mode=False): super(DynamicGRU, self).__init__() - self.gru_unit = GRUUnit( - size * 3, - param_attr=param_attr, - bias_attr=bias_attr, - activation=candidate_activation, - gate_activation=gate_activation, - origin_mode=origin_mode) + self.gru_unit = GRUUnit(size * 3, + param_attr=param_attr, + bias_attr=bias_attr, + activation=candidate_activation, + gate_activation=gate_activation, + origin_mode=origin_mode) self.h_0 = h_0 self.is_reverse = is_reverse @@ -175,13 +176,15 @@ class DynamicGRU(fluid.dygraph.Layer): for i in range(inputs.shape[1]): if self.is_reverse: i = inputs.shape[1] - 1 - i - input_ = fluid.layers.slice( - inputs, axes=[1], starts=[i], ends=[i + 1]) - input_ = fluid.layers.reshape( - input_, [-1, input_.shape[2]], inplace=False) + input_ = fluid.layers.slice(inputs, + axes=[1], + starts=[i], + ends=[i + 1]) + input_ = fluid.layers.reshape(input_, [-1, input_.shape[2]], + inplace=False) hidden, reset, gate = self.gru_unit(input_, hidden) - hidden_ = fluid.layers.reshape( - hidden, [-1, 1, hidden.shape[1]], inplace=False) + hidden_ = fluid.layers.reshape(hidden, [-1, 1, hidden.shape[1]], + inplace=False) if self.is_reverse: res = [hidden_] + res else: @@ -191,19 +194,21 @@ class DynamicGRU(fluid.dygraph.Layer): class EncoderNet(fluid.dygraph.Layer): + def __init__(self, rnn_hidden_size=Config.encoder_size, is_test=False, use_cudnn=True): super(EncoderNet, self).__init__() self.rnn_hidden_size = rnn_hidden_size - para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, - 0.02)) - bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0) + para_attr = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, 0.02)) + bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal( + 0.0, 0.02), + learning_rate=2.0) if fluid.framework._non_static_mode(): - h_0 = np.zeros( - (Config.batch_size, rnn_hidden_size), dtype="float32") + h_0 = np.zeros((Config.batch_size, rnn_hidden_size), + dtype="float32") h_0 = to_variable(h_0) else: h_0 = fluid.layers.fill_constant( @@ -212,26 +217,29 @@ class EncoderNet(fluid.dygraph.Layer): value=0) self.ocr_convs = OCRConv(is_test=is_test, use_cudnn=use_cudnn) - self.fc_1_layer = Linear( - 32, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False) - self.fc_2_layer = Linear( - 32, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False) - self.gru_forward_layer = DynamicGRU( - size=rnn_hidden_size, - h_0=h_0, - param_attr=para_attr, - bias_attr=bias_attr, - candidate_activation='relu') - self.gru_backward_layer = DynamicGRU( - size=rnn_hidden_size, - h_0=h_0, - param_attr=para_attr, - bias_attr=bias_attr, - candidate_activation='relu', - is_reverse=True) - - self.encoded_proj_fc = Linear( - rnn_hidden_size * 2, Config.decoder_size, bias_attr=False) + self.fc_1_layer = Linear(32, + rnn_hidden_size * 3, + param_attr=para_attr, + bias_attr=False) + self.fc_2_layer = Linear(32, + rnn_hidden_size * 3, + param_attr=para_attr, + bias_attr=False) + self.gru_forward_layer = DynamicGRU(size=rnn_hidden_size, + h_0=h_0, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu') + self.gru_backward_layer = DynamicGRU(size=rnn_hidden_size, + h_0=h_0, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu', + is_reverse=True) + + self.encoded_proj_fc = Linear(rnn_hidden_size * 2, + Config.decoder_size, + bias_attr=False) def forward(self, inputs): conv_features = self.ocr_convs(inputs) @@ -240,22 +248,21 @@ class EncoderNet(fluid.dygraph.Layer): # stride=[1, 1], # filter_size=[conv_features.shape[2], 1]) - transpose_conv_features = fluid.layers.transpose( - conv_features, perm=[0, 3, 1, 2]) - sliced_feature = fluid.layers.reshape( - transpose_conv_features, [ - -1, 8, transpose_conv_features.shape[2] * - transpose_conv_features.shape[3] - ], - inplace=False) + transpose_conv_features = fluid.layers.transpose(conv_features, + perm=[0, 3, 1, 2]) + sliced_feature = fluid.layers.reshape(transpose_conv_features, [ + -1, 8, + transpose_conv_features.shape[2] * transpose_conv_features.shape[3] + ], + inplace=False) fc_1 = self.fc_1_layer(sliced_feature) fc_2 = self.fc_2_layer(sliced_feature) gru_forward = self.gru_forward_layer(fc_1) gru_backward = self.gru_backward_layer(fc_2) - encoded_vector = fluid.layers.concat( - input=[gru_forward, gru_backward], axis=2) + encoded_vector = fluid.layers.concat(input=[gru_forward, gru_backward], + axis=2) encoded_proj = self.encoded_proj_fc(encoded_vector) @@ -263,11 +270,14 @@ class EncoderNet(fluid.dygraph.Layer): class SimpleAttention(fluid.dygraph.Layer): + def __init__(self, decoder_size): super(SimpleAttention, self).__init__() - self.fc_1 = Linear( - decoder_size, decoder_size, act=None, bias_attr=False) + self.fc_1 = Linear(decoder_size, + decoder_size, + act=None, + bias_attr=False) self.fc_2 = Linear(decoder_size, 1, act=None, bias_attr=False) def forward(self, encoder_vec, encoder_proj, decoder_state): @@ -288,26 +298,33 @@ class SimpleAttention(fluid.dygraph.Layer): inplace=False) weights_reshape = fluid.layers.softmax(weights_reshape) - scaled = fluid.layers.elementwise_mul( - x=encoder_vec, y=weights_reshape, axis=0) + scaled = fluid.layers.elementwise_mul(x=encoder_vec, + y=weights_reshape, + axis=0) context = fluid.layers.reduce_sum(scaled, dim=1) return context class GRUDecoderWithAttention(fluid.dygraph.Layer): + def __init__(self, decoder_size, num_classes): super(GRUDecoderWithAttention, self).__init__() self.simple_attention = SimpleAttention(decoder_size) - self.fc_1_layer = Linear( - Config.encoder_size * 2, decoder_size * 3, bias_attr=False) - self.fc_2_layer = Linear( - decoder_size, decoder_size * 3, bias_attr=False) - self.gru_unit = GRUUnit( - size=decoder_size * 3, param_attr=None, bias_attr=None) - self.out_layer = Linear( - decoder_size, num_classes + 2, bias_attr=None, act='softmax') + self.fc_1_layer = Linear(Config.encoder_size * 2, + decoder_size * 3, + bias_attr=False) + self.fc_2_layer = Linear(decoder_size, + decoder_size * 3, + bias_attr=False) + self.gru_unit = GRUUnit(size=decoder_size * 3, + param_attr=None, + bias_attr=None) + self.out_layer = Linear(decoder_size, + num_classes + 2, + bias_attr=None, + act='softmax') self.decoder_size = decoder_size @@ -316,10 +333,13 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer): res = [] hidden_mem = decoder_boot for i in range(target_embedding.shape[1]): - current_word = fluid.layers.slice( - target_embedding, axes=[1], starts=[i], ends=[i + 1]) - current_word = fluid.layers.reshape( - current_word, [-1, current_word.shape[2]], inplace=False) + current_word = fluid.layers.slice(target_embedding, + axes=[1], + starts=[i], + ends=[i + 1]) + current_word = fluid.layers.reshape(current_word, + [-1, current_word.shape[2]], + inplace=False) context = self.simple_attention(encoder_vec, encoder_proj, hidden_mem) @@ -338,14 +358,14 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer): class OCRAttention(fluid.dygraph.Layer): + def __init__(self): super(OCRAttention, self).__init__() self.encoder_net = EncoderNet() - self.fc = Linear( - Config.encoder_size, - Config.decoder_size, - bias_attr=False, - act='relu') + self.fc = Linear(Config.encoder_size, + Config.decoder_size, + bias_attr=False, + act='relu') self.embedding = Embedding( [Config.num_classes + 2, Config.word_vector_dim], dtype='float32') self.gru_decoder_with_attention = GRUDecoderWithAttention( @@ -353,10 +373,13 @@ class OCRAttention(fluid.dygraph.Layer): def forward(self, inputs, label_in): gru_backward, encoded_vector, encoded_proj = self.encoder_net(inputs) - backward_first = fluid.layers.slice( - gru_backward, axes=[1], starts=[0], ends=[1]) - backward_first = fluid.layers.reshape( - backward_first, [-1, backward_first.shape[2]], inplace=False) + backward_first = fluid.layers.slice(gru_backward, + axes=[1], + starts=[0], + ends=[1]) + backward_first = fluid.layers.reshape(backward_first, + [-1, backward_first.shape[2]], + inplace=False) decoder_boot = self.fc(backward_first) label_in = fluid.layers.reshape(label_in, [-1], inplace=False) trg_embedding = self.embedding(label_in) @@ -365,13 +388,15 @@ class OCRAttention(fluid.dygraph.Layer): trg_embedding, [-1, Config.max_length, trg_embedding.shape[1]], inplace=False) - prediction = self.gru_decoder_with_attention( - trg_embedding, encoded_vector, encoded_proj, decoder_boot) + prediction = self.gru_decoder_with_attention(trg_embedding, + encoded_vector, + encoded_proj, decoder_boot) return prediction class TestDygraphOCRAttention(unittest.TestCase): + def test_ocr_test(self): seed = 90 epoch_num = 1 @@ -383,23 +408,23 @@ class TestDygraphOCRAttention(unittest.TestCase): image_np = np.random.randn(Config.batch_size, Config.DATA_SHAPE[0], Config.DATA_SHAPE[1], Config.DATA_SHAPE[2]).astype('float32') - label_in_np = np.arange( - 0, Config.max_length, - dtype='int64').reshape([1, Config.max_length]) + label_in_np = np.arange(0, Config.max_length, + dtype='int64').reshape([1, Config.max_length]) for i in range(2, Config.batch_size + 1): - label_in_np = np.vstack((label_in_np, np.arange( - (i - 1) * Config.max_length, - i * Config.max_length, - dtype='int64').reshape([1, Config.max_length]))) - - label_out_np = np.arange( - 0, Config.max_length, - dtype='int64').reshape([1, Config.max_length]) + label_in_np = np.vstack( + (label_in_np, + np.arange((i - 1) * Config.max_length, + i * Config.max_length, + dtype='int64').reshape([1, Config.max_length]))) + + label_out_np = np.arange(0, Config.max_length, + dtype='int64').reshape([1, Config.max_length]) for i in range(2, Config.batch_size + 1): - label_out_np = np.vstack((label_out_np, np.arange( - (i - 1) * Config.max_length, - i * Config.max_length, - dtype='int64').reshape([1, Config.max_length]))) + label_out_np = np.vstack( + (label_out_np, + np.arange((i - 1) * Config.max_length, + i * Config.max_length, + dtype='int64').reshape([1, Config.max_length]))) def run_dygraph(): fluid.set_flags({'FLAGS_sort_sum_gradient': True}) @@ -424,12 +449,12 @@ class TestDygraphOCRAttention(unittest.TestCase): label_out.stop_gradient = True img = to_variable(image_np) dy_prediction = ocr_attention(img, label_in) - label_out = fluid.layers.reshape( - label_out, [-1, 1], inplace=False) + label_out = fluid.layers.reshape(label_out, [-1, 1], + inplace=False) dy_prediction = fluid.layers.reshape( dy_prediction, [label_out.shape[0], -1], inplace=False) - loss = fluid.layers.cross_entropy( - input=dy_prediction, label=label_out) + loss = fluid.layers.cross_entropy(input=dy_prediction, + label=label_out) avg_loss = fluid.layers.reduce_sum(loss) dy_out = avg_loss.numpy() @@ -442,10 +467,10 @@ class TestDygraphOCRAttention(unittest.TestCase): dy_grad_value = {} for param in ocr_attention.parameters(): if param.trainable: - np_array = np.array(param._grad_ivar().value() - .get_tensor()) - dy_grad_value[param.name + core.grad_var_suffix( - )] = np_array + np_array = np.array( + param._grad_ivar().value().get_tensor()) + dy_grad_value[param.name + + core.grad_var_suffix()] = np_array optimizer.minimize(avg_loss) ocr_attention.clear_gradients() @@ -478,12 +503,17 @@ class TestDygraphOCRAttention(unittest.TestCase): optimizer = fluid.optimizer.SGD(learning_rate=0.001) - images = fluid.layers.data( - name='pixel', shape=Config.DATA_SHAPE, dtype='float32') - static_label_in = fluid.layers.data( - name='label_in', shape=[1], dtype='int64', lod_level=0) - static_label_out = fluid.layers.data( - name='label_out', shape=[1], dtype='int64', lod_level=0) + images = fluid.layers.data(name='pixel', + shape=Config.DATA_SHAPE, + dtype='float32') + static_label_in = fluid.layers.data(name='label_in', + shape=[1], + dtype='int64', + lod_level=0) + static_label_out = fluid.layers.data(name='label_out', + shape=[1], + dtype='int64', + lod_level=0) static_label_out.stop_gradient = True static_label_out.trainable = False @@ -492,8 +522,8 @@ class TestDygraphOCRAttention(unittest.TestCase): static_prediction = fluid.layers.reshape( static_prediction, shape=[-1, Config.num_classes + 2]) - cost = fluid.layers.cross_entropy( - input=static_prediction, label=static_label_out) + cost = fluid.layers.cross_entropy(input=static_prediction, + label=static_label_out) static_avg_loss = fluid.layers.reduce_sum(cost) # param_grad_list = fluid.backward.append_backward(static_avg_loss) optimizer.minimize(static_avg_loss) @@ -532,8 +562,8 @@ class TestDygraphOCRAttention(unittest.TestCase): static_grad_value = {} static_out = out[0] for i in range(1, len(static_param_name_list) + 1): - static_param_value[static_param_name_list[i - 1]] = out[ - i] + static_param_value[static_param_name_list[i - + 1]] = out[i] grad_start_pos = len(static_param_name_list) + 1 for i in range(grad_start_pos, len(static_grad_name_list) + grad_start_pos): @@ -556,8 +586,7 @@ class TestDygraphOCRAttention(unittest.TestCase): for key, value in six.iteritems(static_param_value): self.assertTrue( - np.allclose( - value, eager_param_value[key], rtol=1e-05)) + np.allclose(value, eager_param_value[key], rtol=1e-05)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 95041699687..d7b55215ae7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -35,6 +35,7 @@ from paddle.fluid.framework import _test_eager_guard class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -48,6 +49,7 @@ class MLP(fluid.Layer): class TestImperativeOptimizerBase(unittest.TestCase): + def setUp(self): self.batch_num = 20 @@ -58,6 +60,7 @@ class TestImperativeOptimizerBase(unittest.TestCase): raise NotImplementedError() def reader_decorator(self, reader): + def _reader_imple(): for item in reader(): image = np.array(item[0]).reshape(1, 784) @@ -70,8 +73,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): seed = 90 batch_size = 128 if place == None: - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): try: @@ -88,8 +91,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): batch_size = 128 if place == None: - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): paddle.seed(seed) @@ -101,10 +104,10 @@ class TestImperativeOptimizerBase(unittest.TestCase): batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( - paddle.batch( - self.reader_decorator(paddle.dataset.mnist.train()), - batch_size=batch_size, - drop_last=True), + paddle.batch(self.reader_decorator( + paddle.dataset.mnist.train()), + batch_size=batch_size, + drop_last=True), places=fluid.CPUPlace()) dy_param_init_value = {} @@ -137,18 +140,20 @@ class TestImperativeOptimizerBase(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) if place == None: - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) exe = fluid.Executor(place) mlp = MLP() optimizer = self.get_optimizer() - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=128, + drop_last=True) - img = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') img = fluid.layers.reshape(img, shape=[batch_size, 784]) cost = mlp(img) @@ -173,14 +178,16 @@ class TestImperativeOptimizerBase(unittest.TestCase): static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [128, 1]) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape([128, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, + feed={ + "pixel": static_x_data, + "label": y_data + }, fetch_list=fetch_list) static_param_value = {} @@ -199,20 +206,18 @@ class TestImperativeOptimizerBase(unittest.TestCase): for key, value in six.iteritems(static_param_value): if core.is_compiled_with_rocm(): self.assertTrue( - np.allclose( - value, dy_param_value[key], atol=1e-3)) + np.allclose(value, dy_param_value[key], atol=1e-3)) else: self.assertTrue(np.allclose(value, dy_param_value[key])) class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): bd = [3, 6, 9] - optimizer = SGDOptimizer( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, - values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]), - parameter_list=parameter_list) + optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]), + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -231,22 +236,22 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): - def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True), - parameter_list=parameter_list) - return optimizer - def get_optimizer(self): + def get_optimizer_dygraph(self, parameter_list): optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay( learning_rate=0.1, decay_steps=10000, decay_rate=0.5, - staircase=True)) + staircase=True), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.natural_exp_decay(learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) return optimizer def func_test_sgd(self): @@ -259,22 +264,22 @@ class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): - def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True), - parameter_list=parameter_list) - return optimizer - def get_optimizer(self): + def get_optimizer_dygraph(self, parameter_list): optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay( learning_rate=0.1, decay_steps=10000, decay_rate=0.5, - staircase=True)) + staircase=True), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.exponential_decay(learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) return optimizer def func_test_sgd(self): @@ -287,22 +292,22 @@ class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): - def get_optimizer_dygraph(self, parameter_list): - optimizer = Adam( - learning_rate=fluid.layers.inverse_time_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True), - parameter_list=parameter_list) - return optimizer - def get_optimizer(self): + def get_optimizer_dygraph(self, parameter_list): optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay( learning_rate=0.1, decay_steps=10000, decay_rate=0.5, - staircase=True)) + staircase=True), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = Adam( + learning_rate=fluid.layers.inverse_time_decay(learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) return optimizer def func_test_adam(self): @@ -315,11 +320,11 @@ class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.polynomial_decay( - learning_rate=0.1, decay_steps=5, cycle=self.cycle), - parameter_list=parameter_list) + optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay( + learning_rate=0.1, decay_steps=5, cycle=self.cycle), + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -347,11 +352,11 @@ class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.cosine_decay( - learning_rate=0.1, step_each_epoch=10000, epochs=120), - parameter_list=parameter_list) + optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay( + learning_rate=0.1, step_each_epoch=10000, epochs=120), + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -369,11 +374,11 @@ class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.noam_decay( - d_model=512, warmup_steps=8000), - parameter_list=parameter_list) + optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay( + d_model=512, warmup_steps=8000), + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -391,6 +396,7 @@ class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): class TestOptimizerLearningRate(unittest.TestCase): + def func_test_constant_lr(self): with fluid.dygraph.guard(): a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") @@ -403,12 +409,12 @@ class TestOptimizerLearningRate(unittest.TestCase): loss = fluid.layers.reduce_mean(b) - adam = fluid.optimizer.Adam( - 0.001, parameter_list=linear.parameters()) + adam = fluid.optimizer.Adam(0.001, + parameter_list=linear.parameters()) self.assertTrue( - np.allclose( - adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0)) + np.allclose(adam.current_step_lr(), 0.001, rtol=1e-06, + atol=0.0)) for i in range(10): adam.minimize(loss) @@ -436,13 +442,12 @@ class TestOptimizerLearningRate(unittest.TestCase): bd = [2, 4, 6, 8] value = [0.2, 0.4, 0.6, 0.8, 1.0] - adam = fluid.optimizer.Adam( - fluid.dygraph.PiecewiseDecay(bd, value, 0), - parameter_list=linear.parameters()) + adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay( + bd, value, 0), + parameter_list=linear.parameters()) self.assertTrue( - np.allclose( - adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0)) + np.allclose(adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0)) ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] for i in range(12): @@ -469,17 +474,15 @@ class TestOptimizerLearningRate(unittest.TestCase): loss = fluid.layers.reduce_mean(b) base_lr = 1.0 - adam = fluid.optimizer.Adam( - fluid.dygraph.NaturalExpDecay( - learning_rate=base_lr, - decay_steps=3, - decay_rate=0.5, - staircase=True), - parameter_list=linear.parameters()) + adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay( + learning_rate=base_lr, + decay_steps=3, + decay_rate=0.5, + staircase=True), + parameter_list=linear.parameters()) self.assertTrue( - np.allclose( - adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0)) + np.allclose(adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0)) ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)] for i in range(5): @@ -513,24 +516,23 @@ class TestOptimizerLearningRate(unittest.TestCase): adam.minimize(loss) lr = adam.current_step_lr() self.assertTrue( - np.allclose( - lr, lr_list[i], rtol=1e-06, atol=0.0)) + np.allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)) - lr_var = fluid.layers.create_global_var( - shape=[1], value=0.7, dtype='float32') + lr_var = fluid.layers.create_global_var(shape=[1], + value=0.7, + dtype='float32') adam.set_lr(lr_var) adam.minimize(loss) lr = adam.current_step_lr() self.assertTrue(np.allclose(lr, 0.7, rtol=1e-06, atol=0.0)) with self.assertRaises(RuntimeError): - adam = fluid.optimizer.Adam( - fluid.dygraph.NaturalExpDecay( - learning_rate=0.1, - decay_steps=3, - decay_rate=0.5, - staircase=True), - parameter_list=linear.parameters()) + adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay( + learning_rate=0.1, + decay_steps=3, + decay_rate=0.5, + staircase=True), + parameter_list=linear.parameters()) adam.set_lr(0.01) def test_set_lr(self): @@ -540,9 +542,11 @@ class TestOptimizerLearningRate(unittest.TestCase): class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = MomentumOptimizer( - learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) + optimizer = MomentumOptimizer(learning_rate=0.001, + momentum=0.9, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -559,9 +563,11 @@ class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = LarsMomentumOptimizer( - learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) + optimizer = LarsMomentumOptimizer(learning_rate=0.001, + momentum=0.9, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -578,9 +584,10 @@ class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = AdagradOptimizer( - learning_rate=0.2, parameter_list=parameter_list) + optimizer = AdagradOptimizer(learning_rate=0.2, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -597,9 +604,10 @@ class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase): class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = AdamaxOptimizer( - learning_rate=0.2, parameter_list=parameter_list) + optimizer = AdamaxOptimizer(learning_rate=0.2, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -616,19 +624,21 @@ class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase): class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = DpsgdOptimizer( - learning_rate=0.01, - clip=10.0, - batch_size=16.0, - sigma=1.0, - parameter_list=parameter_list) + optimizer = DpsgdOptimizer(learning_rate=0.01, + clip=10.0, + batch_size=16.0, + sigma=1.0, + parameter_list=parameter_list) optimizer._seed = 100 return optimizer def get_optimizer(self): - optimizer = DpsgdOptimizer( - learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0) + optimizer = DpsgdOptimizer(learning_rate=0.01, + clip=10.0, + batch_size=16.0, + sigma=1.0) optimizer._seed = 100 return optimizer @@ -642,9 +652,10 @@ class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase): class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = DecayedAdagradOptimizer( - learning_rate=0.2, parameter_list=parameter_list) + optimizer = DecayedAdagradOptimizer(learning_rate=0.2, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -661,17 +672,18 @@ class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase): class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = AdadeltaOptimizer( - learning_rate=0.0003, - epsilon=1.0e-6, - rho=0.95, - parameter_list=parameter_list) + optimizer = AdadeltaOptimizer(learning_rate=0.0003, + epsilon=1.0e-6, + rho=0.95, + parameter_list=parameter_list) return optimizer def get_optimizer(self): - optimizer = AdadeltaOptimizer( - learning_rate=0.0003, epsilon=1.0e-6, rho=0.95) + optimizer = AdadeltaOptimizer(learning_rate=0.0003, + epsilon=1.0e-6, + rho=0.95) return optimizer def func_test_adadelta(self): @@ -684,9 +696,10 @@ class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase): class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = RMSPropOptimizer( - learning_rate=0.1, parameter_list=parameter_list) + optimizer = RMSPropOptimizer(learning_rate=0.1, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -703,9 +716,10 @@ class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase): class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = FtrlOptimizer( - learning_rate=0.1, parameter_list=parameter_list) + optimizer = FtrlOptimizer(learning_rate=0.1, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -726,16 +740,16 @@ def exclude_fn(param): class TestImperativeLambOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = LambOptimizer( - learning_rate=0.002, - exclude_from_weight_decay_fn=exclude_fn, - parameter_list=parameter_list) + optimizer = LambOptimizer(learning_rate=0.002, + exclude_from_weight_decay_fn=exclude_fn, + parameter_list=parameter_list) return optimizer def get_optimizer(self): - optimizer = LambOptimizer( - learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn) + optimizer = LambOptimizer(learning_rate=0.002, + exclude_from_weight_decay_fn=exclude_fn) return optimizer # should fix: may fail in CI-windows @@ -744,9 +758,11 @@ class TestImperativeLambOptimizer(TestImperativeOptimizerBase): class TestImperativeModelAverage(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = ModelAverage( - 0.15, min_average_window=10000, max_average_window=12500) + optimizer = ModelAverage(0.15, + min_average_window=10000, + max_average_window=12500) return optimizer def func_test_modelaverage(self): @@ -760,13 +776,13 @@ class TestImperativeModelAverage(TestImperativeOptimizerBase): class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = DGCMomentumOptimizer( - learning_rate=0.0001, - momentum=0.9, - rampup_step=1000, - rampup_begin_step=1252, - sparsity=[0.999, 0.999]) + optimizer = DGCMomentumOptimizer(learning_rate=0.0001, + momentum=0.9, + rampup_step=1000, + rampup_begin_step=1252, + sparsity=[0.999, 0.999]) return optimizer def func_test_dgcmomentum(self): @@ -780,6 +796,7 @@ class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = ExponentialMovingAverage(0.999) return optimizer @@ -795,6 +812,7 @@ class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase): class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = fluid.optimizer.SGD(learning_rate=0.5, parameter_list=parameter_list) @@ -812,6 +830,7 @@ class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = fluid.optimizer.SGD(learning_rate=0.5, parameter_list=parameter_list) @@ -829,6 +848,7 @@ class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = fluid.optimizer.SGD(learning_rate=0.5, parameter_list=parameter_list) @@ -846,15 +866,16 @@ class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): class TestImperativeOptimizerList(unittest.TestCase): + def func_test_parameter_list(self): with fluid.dygraph.guard(): linear_1 = Linear(10, 10) linear_2 = Linear(10, 10) - sgd = SGDOptimizer( - 1.0, - parameter_list=itertools.chain(linear_1.parameters(), - linear_2.parameters())) + sgd = SGDOptimizer(1.0, + parameter_list=itertools.chain( + linear_1.parameters(), + linear_2.parameters())) in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") in_data = fluid.dygraph.to_variable(in_np) @@ -866,8 +887,8 @@ class TestImperativeOptimizerList(unittest.TestCase): sgd.minimize(loss) self.assertTrue( - len(sgd._parameter_list) == - len(linear_1.parameters() + linear_2.parameters())) + len(sgd._parameter_list) == len(linear_1.parameters() + + linear_2.parameters())) def test_parameter_list(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py index b27ce6bb01f..2bcf0b97bf8 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py @@ -35,6 +35,7 @@ from paddle.fluid.framework import _test_eager_guard class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): super(MLP, self).__init__() @@ -48,6 +49,7 @@ class MLP(fluid.Layer): class TestImperativeOptimizerBase(unittest.TestCase): + def setUp(self): self.batch_num = 20 @@ -58,6 +60,7 @@ class TestImperativeOptimizerBase(unittest.TestCase): raise NotImplementedError() def reader_decorator(self, reader): + def _reader_imple(): for item in reader(): image = np.array(item[0]).reshape(1, 784) @@ -70,8 +73,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): seed = 90 batch_size = 128 if place == None: - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() try: paddle.disable_static() @@ -90,8 +93,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): batch_size = 128 if place == None: - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) paddle.disable_static(place) paddle.seed(seed) @@ -101,12 +104,11 @@ class TestImperativeOptimizerBase(unittest.TestCase): optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters()) batch_py_reader = fluid.io.PyReader(capacity=1) - batch_py_reader.decorate_sample_list_generator( - paddle.batch( - self.reader_decorator(paddle.dataset.mnist.train()), - batch_size=batch_size, - drop_last=True), - places=fluid.CPUPlace()) + batch_py_reader.decorate_sample_list_generator(paddle.batch( + self.reader_decorator(paddle.dataset.mnist.train()), + batch_size=batch_size, + drop_last=True), + places=fluid.CPUPlace()) dy_param_init_value = {} for batch_id, data in enumerate(batch_py_reader()): @@ -147,18 +149,20 @@ class TestImperativeOptimizerBase(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) if place == None: - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) exe = fluid.Executor(place) mlp = MLP() optimizer = self.get_optimizer() - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=128, + drop_last=True) - img = fluid.layers.data( - name='pixel', shape=[1, 28, 28], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[1, 28, 28], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') img = fluid.layers.reshape(img, shape=[batch_size, 784]) cost = mlp(img) @@ -183,14 +187,16 @@ class TestImperativeOptimizerBase(unittest.TestCase): static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [128, 1]) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape([128, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, + feed={ + "pixel": static_x_data, + "label": y_data + }, fetch_list=fetch_list) if isinstance(optimizer._learning_rate, paddle.optimizer.lr.LRScheduler): @@ -216,13 +222,13 @@ class TestImperativeOptimizerBase(unittest.TestCase): for key, value in six.iteritems(static_param_value): if core.is_compiled_with_rocm(): self.assertTrue( - np.allclose( - value, dy_param_value[key], atol=1e-3)) + np.allclose(value, dy_param_value[key], atol=1e-3)) else: self.assertTrue(np.allclose(value, dy_param_value[key])) class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): bd = [3, 6, 9] optimizer = paddle.optimizer.SGD( @@ -250,17 +256,18 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.NaturalExpDecay( - learning_rate=0.5, gamma=0.9), + learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, + gamma=0.9), parameters=parameter_list) return optimizer def get_optimizer(self): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.NaturalExpDecay( - learning_rate=0.5, gamma=0.9)) + learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, + gamma=0.9)) return optimizer def func_test_sgd(self): @@ -273,6 +280,7 @@ class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( learning_rate=paddle.optimizer.lr.ExponentialDecay( @@ -296,6 +304,7 @@ class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.Adam( learning_rate=paddle.optimizer.lr.InverseTimeDecay( @@ -319,10 +328,12 @@ class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.PolynomialDecay( - learning_rate=0.5, decay_steps=5, cycle=self.cycle), + learning_rate=paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, + decay_steps=5, + cycle=self.cycle), parameters=parameter_list) return optimizer @@ -352,6 +363,7 @@ class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( learning_rate=paddle.optimizer.lr.CosineAnnealingDecay( @@ -375,17 +387,19 @@ class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True), + learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01, + warmup_steps=100, + verbose=True), parameters=parameter_list) return optimizer def get_optimizer(self): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100)) + learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01, + warmup_steps=100)) return optimizer def func_test_sgd(self): @@ -398,6 +412,7 @@ class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( learning_rate=paddle.optimizer.lr.LambdaDecay( @@ -421,21 +436,23 @@ class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.LinearWarmup( - learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5), + learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5, + warmup_steps=20, + start_lr=0, + end_lr=0.5), parameters=parameter_list) return optimizer def get_optimizer(self): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.LinearWarmup( - learning_rate=0.5, - warmup_steps=20, - start_lr=0, - end_lr=0.5, - verbose=True)) + learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5, + warmup_steps=20, + start_lr=0, + end_lr=0.5, + verbose=True)) return optimizer def func_test_sgd(self): @@ -448,6 +465,7 @@ class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase): class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( learning_rate=paddle.optimizer.lr.MultiStepDecay( @@ -471,10 +489,12 @@ class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( - learning_rate=paddle.optimizer.lr.StepDecay( - learning_rate=0.5, step_size=5, gamma=0.8), + learning_rate=paddle.optimizer.lr.StepDecay(learning_rate=0.5, + step_size=5, + gamma=0.8), parameters=parameter_list) return optimizer @@ -494,6 +514,7 @@ class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase): class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD( learning_rate=paddle.optimizer.lr.ReduceOnPlateau( @@ -517,6 +538,7 @@ class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase): class TestOptimizerLearningRate(unittest.TestCase): + def func_test_constant_lr(self): with fluid.dygraph.guard(): a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") @@ -532,8 +554,7 @@ class TestOptimizerLearningRate(unittest.TestCase): adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters()) self.assertTrue( - np.allclose( - adam.get_lr(), 0.001, rtol=1e-06, atol=0.0)) + np.allclose(adam.get_lr(), 0.001, rtol=1e-06, atol=0.0)) for i in range(10): adam.minimize(loss) @@ -562,12 +583,11 @@ class TestOptimizerLearningRate(unittest.TestCase): value = [0.2, 0.4, 0.6, 0.8, 1.0] scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value) - adam = paddle.optimizer.Adam( - scheduler, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(scheduler, + parameters=linear.parameters()) self.assertTrue( - np.allclose( - adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)) + np.allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)) ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] for i in range(12): @@ -593,12 +613,11 @@ class TestOptimizerLearningRate(unittest.TestCase): base_lr = 1.0 scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5) - adam = paddle.optimizer.Adam( - scheduler, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(scheduler, + parameters=linear.parameters()) self.assertTrue( - np.allclose( - adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)) + np.allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)) ret = [1.0, np.exp(-0.5), np.exp(-1)] for i in range(3): @@ -632,18 +651,18 @@ class TestOptimizerLearningRate(unittest.TestCase): adam.minimize(loss) lr = adam.get_lr() self.assertTrue( - np.allclose( - lr, lr_list[i], rtol=1e-06, atol=0.0)) + np.allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)) with self.assertRaises(TypeError): - lr_var = fluid.layers.create_global_var( - shape=[1], value=0.7, dtype='float32') + lr_var = fluid.layers.create_global_var(shape=[1], + value=0.7, + dtype='float32') adam.set_lr(lr_var) with self.assertRaises(RuntimeError): adam = paddle.optimizer.Adam( - paddle.optimizer.lr.NaturalExpDecay( - learning_rate=0.1, gamma=0.5), + paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.1, + gamma=0.5), parameters=linear.parameters()) adam.set_lr(0.01) @@ -654,9 +673,11 @@ class TestOptimizerLearningRate(unittest.TestCase): class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = MomentumOptimizer( - learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) + optimizer = MomentumOptimizer(learning_rate=0.001, + momentum=0.9, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -673,9 +694,11 @@ class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = LarsMomentumOptimizer( - learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) + optimizer = LarsMomentumOptimizer(learning_rate=0.001, + momentum=0.9, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -692,9 +715,10 @@ class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = AdagradOptimizer( - learning_rate=0.2, parameter_list=parameter_list) + optimizer = AdagradOptimizer(learning_rate=0.2, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -711,9 +735,10 @@ class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase): class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = AdamaxOptimizer( - learning_rate=0.2, parameter_list=parameter_list) + optimizer = AdamaxOptimizer(learning_rate=0.2, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -730,19 +755,21 @@ class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase): class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = DpsgdOptimizer( - learning_rate=0.01, - clip=10.0, - batch_size=16.0, - sigma=1.0, - parameter_list=parameter_list) + optimizer = DpsgdOptimizer(learning_rate=0.01, + clip=10.0, + batch_size=16.0, + sigma=1.0, + parameter_list=parameter_list) optimizer._seed = 100 return optimizer def get_optimizer(self): - optimizer = DpsgdOptimizer( - learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0) + optimizer = DpsgdOptimizer(learning_rate=0.01, + clip=10.0, + batch_size=16.0, + sigma=1.0) optimizer._seed = 100 return optimizer @@ -756,9 +783,10 @@ class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase): class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = DecayedAdagradOptimizer( - learning_rate=0.2, parameter_list=parameter_list) + optimizer = DecayedAdagradOptimizer(learning_rate=0.2, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -775,17 +803,18 @@ class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase): class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = AdadeltaOptimizer( - learning_rate=0.0003, - epsilon=1.0e-6, - rho=0.95, - parameter_list=parameter_list) + optimizer = AdadeltaOptimizer(learning_rate=0.0003, + epsilon=1.0e-6, + rho=0.95, + parameter_list=parameter_list) return optimizer def get_optimizer(self): - optimizer = AdadeltaOptimizer( - learning_rate=0.0003, epsilon=1.0e-6, rho=0.95) + optimizer = AdadeltaOptimizer(learning_rate=0.0003, + epsilon=1.0e-6, + rho=0.95) return optimizer def func_test_adadelta(self): @@ -798,9 +827,10 @@ class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase): class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = RMSPropOptimizer( - learning_rate=0.1, parameter_list=parameter_list) + optimizer = RMSPropOptimizer(learning_rate=0.1, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -817,9 +847,10 @@ class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase): class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = FtrlOptimizer( - learning_rate=0.1, parameter_list=parameter_list) + optimizer = FtrlOptimizer(learning_rate=0.1, + parameter_list=parameter_list) return optimizer def get_optimizer(self): @@ -840,6 +871,7 @@ def exclude_fn(param): class TestImperativeLambOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.Lamb( learning_rate=0.002, @@ -858,9 +890,11 @@ class TestImperativeLambOptimizer(TestImperativeOptimizerBase): class TestImperativeModelAverage(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = ModelAverage( - 0.15, min_average_window=10000, max_average_window=12500) + optimizer = ModelAverage(0.15, + min_average_window=10000, + max_average_window=12500) return optimizer def func_test_modelaverage(self): @@ -874,13 +908,13 @@ class TestImperativeModelAverage(TestImperativeOptimizerBase): class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): - optimizer = DGCMomentumOptimizer( - learning_rate=0.0001, - momentum=0.9, - rampup_step=1000, - rampup_begin_step=1252, - sparsity=[0.999, 0.999]) + optimizer = DGCMomentumOptimizer(learning_rate=0.0001, + momentum=0.9, + rampup_step=1000, + rampup_begin_step=1252, + sparsity=[0.999, 0.999]) return optimizer def func_test_dgcmomentum(self): @@ -894,6 +928,7 @@ class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase): class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = ExponentialMovingAverage(0.999) return optimizer @@ -909,6 +944,7 @@ class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase): class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD(learning_rate=0.5, parameters=parameter_list) @@ -926,6 +962,7 @@ class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD(learning_rate=0.5, parameters=parameter_list) @@ -943,6 +980,7 @@ class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): optimizer = paddle.optimizer.SGD(learning_rate=0.5, parameters=parameter_list) @@ -960,6 +998,7 @@ class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): class TestImperativeOptimizerList(unittest.TestCase): + def func_test_parameter_list(self): with fluid.dygraph.guard(): linear_1 = Linear(10, 10) @@ -980,8 +1019,8 @@ class TestImperativeOptimizerList(unittest.TestCase): sgd.minimize(loss) self.assertTrue( - len(sgd._parameter_list) == - len(linear_1.parameters() + linear_2.parameters())) + len(sgd._parameter_list) == len(linear_1.parameters() + + linear_2.parameters())) def test_parameter_list(self): with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py b/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py index 480df7482e3..54da2becfde 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py @@ -26,6 +26,7 @@ from paddle.fluid.dygraph.parallel import _coalesce_tensors, _split_tensors, _re class MyLayer(fluid.Layer): + def __init__(self, name_scope): super(MyLayer, self).__init__(name_scope) @@ -37,6 +38,7 @@ class MyLayer(fluid.Layer): class TestImperativeParallelCoalesceSplit(unittest.TestCase): + def test_coalesce_split(self): with fluid.dygraph.guard(): test_layer = MyLayer("test_layer") @@ -47,8 +49,8 @@ class TestImperativeParallelCoalesceSplit(unittest.TestCase): vars = [] vars.append(to_variable(np.random.random([2, 3]).astype("float32"))) vars.append(to_variable(np.random.random([4, 9]).astype("float32"))) - vars.append( - to_variable(np.random.random([10, 1]).astype("float32"))) + vars.append(to_variable( + np.random.random([10, 1]).astype("float32"))) var_groups = OrderedDict() var_groups.setdefault(0, vars) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py index cd31b13083d..b20dcffa8a0 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestImperativePartitialBackward(unittest.TestCase): + def func_partitial_backward(self): with fluid.dygraph.guard(): x = np.random.randn(2, 4, 5).astype("float32") @@ -39,13 +40,12 @@ class TestImperativePartitialBackward(unittest.TestCase): for param in linear2.parameters(): self.assertIsNone(param._grad_ivar()) - optimizer = fluid.optimizer.AdamOptimizer(parameter_list=( - linear1.parameters() + linear2.parameters())) + optimizer = fluid.optimizer.AdamOptimizer( + parameter_list=(linear1.parameters() + linear2.parameters())) _, params_grads = optimizer.minimize(loss) - self.assertListEqual( - sorted([p.name for p in linear1.parameters()]), - sorted([p_g[0].name for p_g in params_grads])) + self.assertListEqual(sorted([p.name for p in linear1.parameters()]), + sorted([p_g[0].name for p_g in params_grads])) linear1.clear_gradients() linear2.clear_gradients() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 234e327935a..e5e26111381 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -31,6 +31,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class SimpleLSTMRNN(fluid.Layer): + def __init__(self, hidden_size, num_steps, @@ -78,23 +79,29 @@ class SimpleLSTMRNN(fluid.Layer): self.hidden_array = [] for i in range(self._num_layers): - pre_hidden = fluid.layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = fluid.layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = fluid.layers.reshape( - pre_hidden, shape=[-1, self._hidden_size]) - pre_cell = fluid.layers.reshape( - pre_cell, shape=[-1, self._hidden_size]) + pre_hidden = fluid.layers.slice(init_hidden, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_cell = fluid.layers.slice(init_cell, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_hidden = fluid.layers.reshape(pre_hidden, + shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape(pre_cell, + shape=[-1, self._hidden_size]) self.hidden_array.append(pre_hidden) self.cell_array.append(pre_cell) res = [] for index in range(self._num_steps): - self._input = fluid.layers.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1]) - self._input = fluid.layers.reshape( - self._input, shape=[-1, self._hidden_size]) + self._input = fluid.layers.slice(input_embedding, + axes=[1], + starts=[index], + ends=[index + 1]) + self._input = fluid.layers.reshape(self._input, + shape=[-1, self._hidden_size]) for k in range(self._num_layers): pre_hidden = self.hidden_array[k] pre_cell = self.cell_array[k] @@ -105,8 +112,9 @@ class SimpleLSTMRNN(fluid.Layer): gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) + i, j, f, o = fluid.layers.split(gate_input, + num_or_sections=4, + dim=-1) c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( i) * fluid.layers.tanh(j) m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) @@ -120,8 +128,8 @@ class SimpleLSTMRNN(fluid.Layer): dropout_prob=self._dropout, dropout_implementation='upscale_in_train') res.append( - fluid.layers.reshape( - self._input, shape=[1, -1, self._hidden_size])) + fluid.layers.reshape(self._input, + shape=[1, -1, self._hidden_size])) real_res = fluid.layers.concat(res, 0) real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) @@ -136,6 +144,7 @@ class SimpleLSTMRNN(fluid.Layer): class PtbModel(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -151,12 +160,11 @@ class PtbModel(fluid.Layer): self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) + self.simple_lstm_rnn = SimpleLSTMRNN(hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) self.embedding = Embedding( size=[vocab_size, hidden_size], dtype='float32', @@ -193,16 +201,17 @@ class PtbModel(fluid.Layer): x_emb, dropout_prob=self.drop_out, dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( + x_emb, init_h, init_c) rnn_out = fluid.layers.reshape( rnn_out, shape=[-1, self.num_steps, self.hidden_size]) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + projection = fluid.layers.reshape(projection, + shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -211,6 +220,7 @@ class PtbModel(fluid.Layer): class TestDygraphPtbRnn(unittest.TestCase): + def func_test_ptb_rnn(self): for is_sparse in [True, False]: self.ptb_rnn_cpu_float32(is_sparse) @@ -235,16 +245,15 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=is_sparse) - - sgd = SGDOptimizer( - learning_rate=1e-3, parameter_list=ptb_model.parameters()) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=is_sparse) + + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -260,8 +269,8 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) @@ -302,24 +311,26 @@ class TestDygraphPtbRnn(unittest.TestCase): with new_program_scope(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=is_sparse) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=is_sparse) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -344,8 +355,8 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py index f659d834354..06bca877c87 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py @@ -30,6 +30,7 @@ from paddle.fluid.framework import _test_eager_guard class TestDygraphPtbRnnSortGradient(unittest.TestCase): + def func_ptb_rnn_sort_gradient(self): for is_sparse in [True, False]: self.ptb_rnn_sort_gradient_cpu_float32(is_sparse) @@ -50,16 +51,15 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=is_sparse) - - sgd = SGDOptimizer( - learning_rate=1e-3, parameter_list=ptb_model.parameters()) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=is_sparse) + + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -73,14 +73,14 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -99,24 +99,26 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=is_sparse) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=is_sparse) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps, 1], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps, 1], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -141,8 +143,8 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), diff --git a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py index 0579d727dd2..f59256f25f8 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py @@ -29,6 +29,7 @@ import six class RecurrentTest(fluid.Layer): + def __init__(self, name_scope): super(RecurrentTest, self).__init__(name_scope) @@ -39,6 +40,7 @@ class RecurrentTest(fluid.Layer): class TestRecurrentFeed(unittest.TestCase): + def test_recurrent_feed(self): seed = 90 @@ -88,10 +90,12 @@ class TestRecurrentFeed(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - in1 = fluid.layers.data( - name="inp1", shape=[2, 2], append_batch_size=False) - in2 = fluid.layers.data( - name="inp2", shape=[2, 2], append_batch_size=False) + in1 = fluid.layers.data(name="inp1", + shape=[2, 2], + append_batch_size=False) + in2 = fluid.layers.data(name="inp2", + shape=[2, 2], + append_batch_size=False) rt1 = RecurrentTest("RecurrentTest") static_sum_out, static_out = rt1(in1, in2) fluid.backward.append_backward(static_sum_out) @@ -102,11 +106,12 @@ class TestRecurrentFeed(unittest.TestCase): 0)._find_var_recursive(static_out.name + "@GRAD") fetch_list = [static_sum_out, static_out, static_dout] for i in range(3): - out = exe.run( - fluid.default_main_program(), - feed={"inp1": original_np1, - "inp2": original_np2}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + "inp1": original_np1, + "inp2": original_np2 + }, + fetch_list=fetch_list) static_out_value = out[1] static_sum_out = out[0] static_dout = out[2] diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py index 08320d04d99..ac41f84be34 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py @@ -31,6 +31,7 @@ from paddle.fluid.framework import _test_eager_guard class Policy(fluid.dygraph.Layer): + def __init__(self, input_size): super(Policy, self).__init__() @@ -51,6 +52,7 @@ class Policy(fluid.dygraph.Layer): class TestImperativeMnist(unittest.TestCase): + def test_mnist_float32(self): seed = 90 epoch_num = 1 @@ -87,8 +89,8 @@ class TestImperativeMnist(unittest.TestCase): loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs) loss = fluid.layers.reduce_sum(loss_probs) - sgd = SGDOptimizer( - learning_rate=1e-3, parameter_list=policy.parameters()) + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=policy.parameters()) dy_param_init_value = {} @@ -126,12 +128,15 @@ class TestImperativeMnist(unittest.TestCase): st_sgd = SGDOptimizer(learning_rate=1e-3) - st_state = fluid.layers.data( - name='st_state', shape=[4], dtype='float32') - st_reward = fluid.layers.data( - name='st_reward', shape=[1], dtype='float32') - st_mask = fluid.layers.data( - name='st_mask', shape=[2], dtype='float32') + st_state = fluid.layers.data(name='st_state', + shape=[4], + dtype='float32') + st_reward = fluid.layers.data(name='st_reward', + shape=[1], + dtype='float32') + st_mask = fluid.layers.data(name='st_mask', + shape=[2], + dtype='float32') st_loss_probs = policy(st_state) @@ -139,8 +144,8 @@ class TestImperativeMnist(unittest.TestCase): st_loss_probs = fluid.layers.elementwise_mul(st_loss_probs, st_mask) st_loss_probs = fluid.layers.reduce_sum(st_loss_probs, dim=-1) - st_loss_probs = fluid.layers.elementwise_mul(st_reward, - st_loss_probs) + st_loss_probs = fluid.layers.elementwise_mul( + st_reward, st_loss_probs) st_loss = fluid.layers.reduce_sum(st_loss_probs) st_sgd.minimize(st_loss) @@ -160,12 +165,13 @@ class TestImperativeMnist(unittest.TestCase): fetch_list = [st_loss.name] fetch_list.extend(static_param_name_list) - out = exe.run( - fluid.default_main_program(), - feed={"st_state": state, - "st_reward": reward, - "st_mask": mask}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + "st_state": state, + "st_reward": reward, + "st_mask": mask + }, + fetch_list=fetch_list) static_param_value = {} static_out = out[0] diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index e48e75c661f..69ebf875b3d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -78,6 +78,7 @@ def optimizer_setting(params, parameter_list=None): class ConvBNLayer(fluid.Layer): + def __init__(self, num_channels, num_filters, @@ -88,16 +89,15 @@ class ConvBNLayer(fluid.Layer): use_cudnn=False): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False, - use_cudnn=use_cudnn) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False, + use_cudnn=use_cudnn) self._batch_norm = BatchNorm(num_filters, act=act) @@ -109,6 +109,7 @@ class ConvBNLayer(fluid.Layer): class BottleneckBlock(fluid.Layer): + def __init__(self, num_channels, num_filters, @@ -117,33 +118,29 @@ class BottleneckBlock(fluid.Layer): use_cudnn=False): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - use_cudnn=use_cudnn) - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - use_cudnn=use_cudnn) - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - use_cudnn=use_cudnn) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + use_cudnn=use_cudnn) + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + use_cudnn=use_cudnn) + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + use_cudnn=use_cudnn) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride, - use_cudnn=use_cudnn) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + use_cudnn=use_cudnn) self.shortcut = shortcut @@ -164,6 +161,7 @@ class BottleneckBlock(fluid.Layer): class ResNet(fluid.Layer): + def __init__(self, layers=50, class_dim=102, use_cudnn=True): super(ResNet, self).__init__() @@ -181,15 +179,16 @@ class ResNet(fluid.Layer): num_channels = [64, 256, 512, 1024] num_filters = [64, 128, 256, 512] - self.conv = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu', - use_cudnn=use_cudnn) - self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + use_cudnn=use_cudnn) + self.pool2d_max = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] for block in range(len(depth)): @@ -197,18 +196,18 @@ class ResNet(fluid.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - use_cudnn=use_cudnn)) + BottleneckBlock(num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + use_cudnn=use_cudnn)) self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) self.pool2d_avg_output = num_filters[-1] * 4 * 1 * 1 @@ -234,7 +233,9 @@ class ResNet(fluid.Layer): class TestDygraphResnet(unittest.TestCase): + def reader_decorator(self, reader): + def _reader_imple(): for item in reader(): doc = np.array(item[0]).reshape(3, 224, 224) @@ -256,8 +257,8 @@ class TestDygraphResnet(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) resnet = ResNet() - optimizer = optimizer_setting( - train_parameters, parameter_list=resnet.parameters()) + optimizer = optimizer_setting(train_parameters, + parameter_list=resnet.parameters()) np.random.seed(seed) train_reader = paddle.batch( @@ -275,10 +276,10 @@ class TestDygraphResnet(unittest.TestCase): if batch_id >= batch_num: break - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - batch_size, 1) + dy_x_data = np.array([x[0].reshape(3, 224, 224) + for x in data]).astype('float32') + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(batch_size, 1) img = to_variable(dy_x_data) label = to_variable(y_data) @@ -322,10 +323,10 @@ class TestDygraphResnet(unittest.TestCase): dy_grad_value = {} for param in resnet.parameters(): if param.trainable: - np_array = np.array(param._grad_ivar().value() - .get_tensor()) - dy_grad_value[param.name + core.grad_var_suffix( - )] = np_array + np_array = np.array( + param._grad_ivar().value().get_tensor()) + dy_grad_value[param.name + + core.grad_var_suffix()] = np_array optimizer.minimize(avg_loss) resnet.clear_gradients() @@ -349,8 +350,9 @@ class TestDygraphResnet(unittest.TestCase): paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size) - img = fluid.layers.data( - name='pixel', shape=[3, 224, 224], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[3, 224, 224], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = resnet(img) loss = fluid.layers.cross_entropy(input=out, label=label) @@ -380,8 +382,8 @@ class TestDygraphResnet(unittest.TestCase): static_x_data = np.array( [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [batch_size, 1]) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape([batch_size, 1]) if traced_layer is not None: traced_layer([static_x_data]) @@ -390,8 +392,10 @@ class TestDygraphResnet(unittest.TestCase): fetch_list.extend(static_param_name_list) fetch_list.extend(static_grad_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, + feed={ + "pixel": static_x_data, + "label": y_data + }, fetch_list=fetch_list) static_param_value = {} diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py index 93a2b96df52..0a1d1c0cfb3 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py @@ -72,6 +72,7 @@ def optimizer_setting(params, parameter_list=None): class TestDygraphResnetSortGradient(unittest.TestCase): + def func_test_resnet_sort_gradient_float32(self): seed = 90 @@ -83,8 +84,8 @@ class TestDygraphResnetSortGradient(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) resnet = ResNet() - optimizer = optimizer_setting( - train_parameters, parameter_list=resnet.parameters()) + optimizer = optimizer_setting(train_parameters, + parameter_list=resnet.parameters()) np.random.seed(seed) import random random.seed = seed @@ -100,10 +101,10 @@ class TestDygraphResnetSortGradient(unittest.TestCase): if batch_id >= batch_num: break - dy_x_data = np.array( - [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - batch_size, 1) + dy_x_data = np.array([x[0].reshape(3, 224, 224) + for x in data]).astype('float32') + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape(batch_size, 1) img = to_variable(dy_x_data) label = to_variable(y_data) @@ -125,10 +126,10 @@ class TestDygraphResnetSortGradient(unittest.TestCase): dy_grad_value = {} for param in resnet.parameters(): if param.trainable: - np_array = np.array(param._grad_ivar().value() - .get_tensor()) - dy_grad_value[param.name + core.grad_var_suffix( - )] = np_array + np_array = np.array( + param._grad_ivar().value().get_tensor()) + dy_grad_value[param.name + + core.grad_var_suffix()] = np_array optimizer.minimize(avg_loss) resnet.clear_gradients() @@ -154,8 +155,9 @@ class TestDygraphResnetSortGradient(unittest.TestCase): paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size) - img = fluid.layers.data( - name='pixel', shape=[3, 224, 224], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[3, 224, 224], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = resnet(img) loss = fluid.layers.cross_entropy(input=out, label=label) @@ -185,15 +187,17 @@ class TestDygraphResnetSortGradient(unittest.TestCase): static_x_data = np.array( [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [batch_size, 1]) + y_data = np.array([x[1] for x in data + ]).astype('int64').reshape([batch_size, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) fetch_list.extend(static_grad_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, + feed={ + "pixel": static_x_data, + "label": y_data + }, fetch_list=fetch_list) static_param_value = {} diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 160c94a549c..593c0462122 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -31,6 +31,7 @@ from paddle.fluid.framework import _test_eager_guard class SimpleLSTMRNN(fluid.Layer): + def __init__(self, hidden_size, num_steps, @@ -75,23 +76,29 @@ class SimpleLSTMRNN(fluid.Layer): self.hidden_array = [] for i in range(self._num_layers): - pre_hidden = fluid.layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = fluid.layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = fluid.layers.reshape( - pre_hidden, shape=[-1, self._hidden_size]) - pre_cell = fluid.layers.reshape( - pre_cell, shape=[-1, self._hidden_size]) + pre_hidden = fluid.layers.slice(init_hidden, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_cell = fluid.layers.slice(init_cell, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_hidden = fluid.layers.reshape(pre_hidden, + shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape(pre_cell, + shape=[-1, self._hidden_size]) self.hidden_array.append(pre_hidden) self.cell_array.append(pre_cell) res = [] for index in range(self._num_steps): - self._input = fluid.layers.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1]) - self._input = fluid.layers.reshape( - self._input, shape=[-1, self._hidden_size]) + self._input = fluid.layers.slice(input_embedding, + axes=[1], + starts=[index], + ends=[index + 1]) + self._input = fluid.layers.reshape(self._input, + shape=[-1, self._hidden_size]) for k in range(self._num_layers): pre_hidden = self.hidden_array[k] pre_cell = self.cell_array[k] @@ -102,8 +109,9 @@ class SimpleLSTMRNN(fluid.Layer): gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) + i, j, f, o = fluid.layers.split(gate_input, + num_or_sections=4, + dim=-1) c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( i) * fluid.layers.tanh(j) m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) @@ -117,8 +125,8 @@ class SimpleLSTMRNN(fluid.Layer): dropout_prob=self._dropout, dropout_implementation='upscale_in_train') res.append( - fluid.layers.reshape( - self._input, shape=[1, -1, self._hidden_size])) + fluid.layers.reshape(self._input, + shape=[1, -1, self._hidden_size])) real_res = fluid.layers.concat(res, 0) real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) @@ -133,6 +141,7 @@ class SimpleLSTMRNN(fluid.Layer): class PtbModel(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -147,12 +156,11 @@ class PtbModel(fluid.Layer): self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) + self.simple_lstm_rnn = SimpleLSTMRNN(hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) self.embedding = Embedding( size=[vocab_size, hidden_size], dtype='float32', @@ -190,17 +198,18 @@ class PtbModel(fluid.Layer): x_emb, dropout_prob=self.drop_out, dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( + x_emb, init_h, init_c) rnn_out = fluid.layers.reshape( rnn_out, shape=[-1, self.num_steps, self.hidden_size]) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + projection = fluid.layers.reshape(projection, + shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -209,6 +218,7 @@ class PtbModel(fluid.Layer): class TestDygraphPtbRnn(unittest.TestCase): + def func_setUp(self): seed = 90 hidden_size = 10 @@ -223,12 +233,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -238,12 +247,11 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), - parameter_list=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -256,14 +264,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -309,12 +317,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -324,12 +331,11 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), - parameter_list=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -342,14 +348,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -419,12 +425,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -434,12 +439,11 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), - parameter_list=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -452,14 +456,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -526,12 +530,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -541,12 +544,11 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), - parameter_list=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -559,14 +561,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -637,20 +639,18 @@ class TestDygraphPtbRnn(unittest.TestCase): with fluid.dygraph.guard(): # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=0.0, - beta1=0.8, - beta2=0.6, - parameter_list=ptb_model.parameters()) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=0.0, + beta1=0.8, + beta2=0.6, + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -666,14 +666,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) @@ -687,12 +687,12 @@ class TestDygraphPtbRnn(unittest.TestCase): if k.find("beta1_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta1)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta2)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta2)) state_dict = ptb_model.state_dict() @@ -716,12 +716,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [0.0] @@ -732,13 +731,12 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 0.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=0.0, - beta1=0.8, - beta2=0.6, - parameter_list=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=0.0, + beta1=0.8, + beta2=0.6, + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -755,14 +753,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) @@ -776,12 +774,12 @@ class TestDygraphPtbRnn(unittest.TestCase): if k.find("beta1_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta1)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta2)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta2)) # check parameter @@ -808,12 +806,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [0.0] @@ -824,14 +821,13 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 0.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), - beta1=0.8, - beta2=0.6, - parameter_list=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + beta1=0.8, + beta2=0.6, + parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -858,14 +854,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) @@ -879,12 +875,12 @@ class TestDygraphPtbRnn(unittest.TestCase): if k.find("beta1_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta1)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta2)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta2)) # check parameter diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index 7e7b2e2fd52..91bb1b7e94f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -31,6 +31,7 @@ from paddle.fluid.framework import _test_eager_guard class SimpleLSTMRNN(fluid.Layer): + def __init__(self, hidden_size, num_steps, @@ -75,23 +76,29 @@ class SimpleLSTMRNN(fluid.Layer): self.hidden_array = [] for i in range(self._num_layers): - pre_hidden = fluid.layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = fluid.layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = fluid.layers.reshape( - pre_hidden, shape=[-1, self._hidden_size]) - pre_cell = fluid.layers.reshape( - pre_cell, shape=[-1, self._hidden_size]) + pre_hidden = fluid.layers.slice(init_hidden, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_cell = fluid.layers.slice(init_cell, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_hidden = fluid.layers.reshape(pre_hidden, + shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape(pre_cell, + shape=[-1, self._hidden_size]) self.hidden_array.append(pre_hidden) self.cell_array.append(pre_cell) res = [] for index in range(self._num_steps): - self._input = fluid.layers.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1]) - self._input = fluid.layers.reshape( - self._input, shape=[-1, self._hidden_size]) + self._input = fluid.layers.slice(input_embedding, + axes=[1], + starts=[index], + ends=[index + 1]) + self._input = fluid.layers.reshape(self._input, + shape=[-1, self._hidden_size]) for k in range(self._num_layers): pre_hidden = self.hidden_array[k] pre_cell = self.cell_array[k] @@ -102,8 +109,9 @@ class SimpleLSTMRNN(fluid.Layer): gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) + i, j, f, o = fluid.layers.split(gate_input, + num_or_sections=4, + dim=-1) c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( i) * fluid.layers.tanh(j) m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) @@ -117,8 +125,8 @@ class SimpleLSTMRNN(fluid.Layer): dropout_prob=self._dropout, dropout_implementation='upscale_in_train') res.append( - fluid.layers.reshape( - self._input, shape=[1, -1, self._hidden_size])) + fluid.layers.reshape(self._input, + shape=[1, -1, self._hidden_size])) real_res = fluid.layers.concat(res, 0) real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) @@ -133,6 +141,7 @@ class SimpleLSTMRNN(fluid.Layer): class PtbModel(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -147,12 +156,11 @@ class PtbModel(fluid.Layer): self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) + self.simple_lstm_rnn = SimpleLSTMRNN(hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) self.embedding = Embedding( size=[vocab_size, hidden_size], dtype='float32', @@ -190,17 +198,18 @@ class PtbModel(fluid.Layer): x_emb, dropout_prob=self.drop_out, dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( + x_emb, init_h, init_c) rnn_out = fluid.layers.reshape( rnn_out, shape=[-1, self.num_steps, self.hidden_size]) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + projection = fluid.layers.reshape(projection, + shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -209,6 +218,7 @@ class PtbModel(fluid.Layer): class TestDygraphPtbRnn(unittest.TestCase): + def func_setUp(self): seed = 90 hidden_size = 10 @@ -223,12 +233,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -238,12 +247,12 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr_arr) - adam = Adam( - learning_rate=scheduler, parameters=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr_arr) + adam = Adam(learning_rate=scheduler, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -256,14 +265,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -312,12 +321,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -327,12 +335,12 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr_arr) - adam = Adam( - learning_rate=scheduler, parameters=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr_arr) + adam = Adam(learning_rate=scheduler, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -345,14 +353,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -420,12 +428,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -435,12 +442,12 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr_arr) - adam = Adam( - learning_rate=scheduler, parameters=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr_arr) + adam = Adam(learning_rate=scheduler, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -453,14 +460,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -528,12 +535,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [1.0] @@ -543,12 +549,12 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 1.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr_arr) - adam = Adam( - learning_rate=scheduler, parameters=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr_arr) + adam = Adam(learning_rate=scheduler, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -561,14 +567,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() @@ -642,20 +648,18 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=0.0, - beta1=0.8, - beta2=0.6, - parameters=ptb_model.parameters()) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=0.0, + beta1=0.8, + beta2=0.6, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -671,14 +675,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) @@ -692,12 +696,12 @@ class TestDygraphPtbRnn(unittest.TestCase): if k.find("beta1_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta1)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta2)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta2)) state_dict = ptb_model.state_dict() @@ -721,12 +725,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [0.0] @@ -737,13 +740,12 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 0.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - adam = Adam( - learning_rate=0.0, - beta1=0.8, - beta2=0.6, - parameters=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + adam = Adam(learning_rate=0.0, + beta1=0.8, + beta2=0.6, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -760,14 +762,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) @@ -781,12 +783,12 @@ class TestDygraphPtbRnn(unittest.TestCase): if k.find("beta1_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta1)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta2)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta2)) # check parameter @@ -812,12 +814,11 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel(hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) bd = [] lr_arr = [0.0] @@ -828,15 +829,14 @@ class TestDygraphPtbRnn(unittest.TestCase): new_lr = 0.0 lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) - scheduler = paddle.optimizer.lr.PiecewiseDecay( - boundaries=bd, values=lr_arr) - adam = Adam( - learning_rate=scheduler, - beta1=0.8, - beta2=0.6, - parameters=ptb_model.parameters()) + place = fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=bd, + values=lr_arr) + adam = Adam(learning_rate=scheduler, + beta1=0.8, + beta2=0.6, + parameters=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None @@ -863,14 +863,14 @@ class TestDygraphPtbRnn(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, - init_cell) + dy_loss, last_hidden, last_cell = ptb_model( + x, y, init_hidden, init_cell) dy_loss.backward() scheduler.step() @@ -881,17 +881,17 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in opti_dict.items(): if k == "LR_Scheduler": self.assertTrue( - np.array_equal(v['last_epoch'], self.base_opti[k][ - 'last_epoch'] + 1)) + np.array_equal(v['last_epoch'], + self.base_opti[k]['last_epoch'] + 1)) if k.find("beta1_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta1)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( - np.array_equal(v.numpy(), self.base_opti[v.name] * - adam._beta2)) + np.array_equal(v.numpy(), + self.base_opti[v.name] * adam._beta2)) # check parameter @@ -930,8 +930,9 @@ class TestDygraphPtbRnn(unittest.TestCase): state_dict = emb.state_dict() paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict = paddle.load( - os.path.join('saved_dy', 'emb_dy.pdparams'), return_numpy=True) + para_state_dict = paddle.load(os.path.join('saved_dy', + 'emb_dy.pdparams'), + return_numpy=True) para_state_dict['weight'] = np.expand_dims( para_state_dict['weight'], axis=-1) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index 9890dfa43a4..245982c71cc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -70,6 +70,7 @@ def optimizer_setting(params, parameter_list=None): class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -79,15 +80,14 @@ class ConvBNLayer(fluid.dygraph.Layer): act=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=None) + self._conv = Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=None) self._batch_norm = BatchNorm(num_filters, act=act) @@ -99,6 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer): class SqueezeExcitation(fluid.dygraph.Layer): + def __init__(self, num_channels, reduction_ratio): super(SqueezeExcitation, self).__init__() @@ -107,14 +108,14 @@ class SqueezeExcitation(fluid.dygraph.Layer): self._squeeze = Linear( num_channels, num_channels // reduction_ratio, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05)), + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.05)), act='relu') self._excitation = Linear( num_channels // reduction_ratio, num_channels, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05)), + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=0.05)), act='sigmoid') def forward(self, input): @@ -127,6 +128,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, @@ -136,29 +138,27 @@ class BottleneckBlock(fluid.dygraph.Layer): shortcut=True): super(BottleneckBlock, self).__init__() - self.conv0 = ConvBNLayer( - num_channels=num_channels, num_filters=num_filters, filter_size=1) - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality) - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act='relu') - - self.scale = SqueezeExcitation( - num_channels=num_filters * 4, reduction_ratio=reduction_ratio) + self.conv0 = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=1) + self.conv1 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality) + self.conv2 = ConvBNLayer(num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act='relu') + + self.scale = SqueezeExcitation(num_channels=num_filters * 4, + reduction_ratio=reduction_ratio) if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride) + self.short = ConvBNLayer(num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) self.shortcut = shortcut @@ -183,6 +183,7 @@ class BottleneckBlock(fluid.dygraph.Layer): class SeResNeXt(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=102): super(SeResNeXt, self).__init__() @@ -196,52 +197,53 @@ class SeResNeXt(fluid.dygraph.Layer): reduction_ratio = 16 depth = [3, 4, 6, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 101: cardinality = 32 reduction_ratio = 16 depth = [3, 4, 23, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') elif layers == 152: cardinality = 64 reduction_ratio = 16 depth = [3, 8, 36, 3] num_filters = [128, 256, 512, 1024] - self.conv0 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=3, - stride=2, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=64, - num_filters=64, - filter_size=3, - stride=2, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=64, - num_filters=128, - filter_size=3, - stride=1, - act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.conv0 = ConvBNLayer(num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu') + self.conv1 = ConvBNLayer(num_channels=64, + num_filters=64, + filter_size=3, + stride=2, + act='relu') + self.conv2 = ConvBNLayer(num_channels=64, + num_filters=128, + filter_size=3, + stride=1, + act='relu') + self.pool = Pool2D(pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] num_channels = 64 @@ -252,19 +254,19 @@ class SeResNeXt(fluid.dygraph.Layer): for i in range(depth[block]): bottleneck_block = self.add_sublayer( 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio, - shortcut=shortcut)) + BottleneckBlock(num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio, + shortcut=shortcut)) num_channels = bottleneck_block._num_channels_out self.bottleneck_block_list.append(bottleneck_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = Pool2D(pool_size=7, + pool_type='avg', + global_pooling=True) import math stdv = 1.0 / math.sqrt(2048 * 1.0) @@ -296,7 +298,9 @@ class SeResNeXt(fluid.dygraph.Layer): class TestImperativeResneXt(unittest.TestCase): + def reader_decorator(self, reader): + def _reader_imple(): for item in reader(): doc = np.array(item[0]).reshape(3, 224, 224) @@ -323,11 +327,10 @@ class TestImperativeResneXt(unittest.TestCase): batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( - paddle.batch( - self.reader_decorator( - paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True), + paddle.batch(self.reader_decorator( + paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True), places=fluid.CPUPlace()) dy_param_init_value = {} @@ -346,8 +349,8 @@ class TestImperativeResneXt(unittest.TestCase): out = se_resnext(img) softmax_out = fluid.layers.softmax(out, use_cudnn=False) - loss = fluid.layers.cross_entropy( - input=softmax_out, label=label) + loss = fluid.layers.cross_entropy(input=softmax_out, + label=label) avg_loss = fluid.layers.mean(x=loss) dy_out = avg_loss.numpy() @@ -361,10 +364,10 @@ class TestImperativeResneXt(unittest.TestCase): dy_grad_value = {} for param in se_resnext.parameters(): if param.trainable: - np_array = np.array(param._grad_ivar().value() - .get_tensor()) - dy_grad_value[param.name + core.grad_var_suffix( - )] = np_array + np_array = np.array( + param._grad_ivar().value().get_tensor()) + dy_grad_value[param.name + + core.grad_var_suffix()] = np_array optimizer.minimize(avg_loss) se_resnext.clear_gradients() @@ -400,8 +403,9 @@ class TestImperativeResneXt(unittest.TestCase): batch_size=batch_size, drop_last=True) - img = fluid.layers.data( - name='pixel', shape=[3, 224, 224], dtype='float32') + img = fluid.layers.data(name='pixel', + shape=[3, 224, 224], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = se_resnext(img) softmax_out = fluid.layers.softmax(out, use_cudnn=False) @@ -430,21 +434,22 @@ class TestImperativeResneXt(unittest.TestCase): if batch_id >= batch_num and batch_num != -1: break - static_x_data = np.array( - [x[0].reshape(3, 224, 224) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape( - [batch_size, 1]) + static_x_data = np.array([ + x[0].reshape(3, 224, 224) for x in data + ]).astype('float32') + y_data = np.array([x[1] + for x in data]).astype('int64').reshape( + [batch_size, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) fetch_list.extend(static_grad_name_list) - out = exe.run( - fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + "pixel": static_x_data, + "label": y_data + }, + fetch_list=fetch_list) static_param_value = {} static_grad_value = {} @@ -497,8 +502,8 @@ class TestImperativeResneXt(unittest.TestCase): np.allclose(static_out, eager_out), "\nstatic_out: {}\neager_out: {}".format(static_out, eager_out)) - self.assertEqual( - len(eager_param_init_value), len(static_param_init_value)) + self.assertEqual(len(eager_param_init_value), + len(static_param_init_value)) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, eager_param_init_value[key])) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index 05f00cba0c2..962b6e2b0af 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -26,13 +26,13 @@ from paddle.fluid.framework import _test_eager_guard class SimpleNet(paddle.nn.Layer): + def __init__(self, vocab_size, hidden_size, dtype): super(SimpleNet, self).__init__() - self.emb = fluid.dygraph.Embedding( - size=[vocab_size, hidden_size], - dtype=dtype, - param_attr='emb.w', - is_sparse=True) + self.emb = fluid.dygraph.Embedding(size=[vocab_size, hidden_size], + dtype=dtype, + param_attr='emb.w', + is_sparse=True) def forward(self, input): input_emb = self.emb(input) @@ -40,6 +40,7 @@ class SimpleNet(paddle.nn.Layer): class TestSimpleNet(unittest.TestCase): + def func_selectedrows_gradient1(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -49,19 +50,17 @@ class TestSimpleNet(unittest.TestCase): for dtype in ["float32", "float64"]: for sort_sum_gradient in [True, False]: paddle.disable_static(place) - fluid.set_flags({ - 'FLAGS_sort_sum_gradient': sort_sum_gradient - }) + fluid.set_flags( + {'FLAGS_sort_sum_gradient': sort_sum_gradient}) # grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0) input_word = np.array([[1, 2], [2, 1]]).astype('int64') input = paddle.to_tensor(input_word) simplenet = SimpleNet(20, 32, dtype) - adam = SGDOptimizer( - learning_rate=0.001, - parameter_list=simplenet.parameters( - )) # grad_clip=grad_clip + adam = SGDOptimizer(learning_rate=0.001, + parameter_list=simplenet.parameters() + ) # grad_clip=grad_clip input_emb, emb = simplenet(input) self.assertTrue(emb.weight.gradient() is None) @@ -93,19 +92,17 @@ class TestSimpleNet(unittest.TestCase): for place in places: for sort_sum_gradient in [True, False]: with fluid.dygraph.guard(place): - fluid.set_flags({ - 'FLAGS_sort_sum_gradient': sort_sum_gradient - }) + fluid.set_flags( + {'FLAGS_sort_sum_gradient': sort_sum_gradient}) grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0) input_word = np.array([[1, 2], [2, 1]]).astype('int64') input = to_variable(input_word) simplenet = SimpleNet(20, 32, "float32") - adam = SGDOptimizer( - learning_rate=0.001, - parameter_list=simplenet.parameters(), - grad_clip=grad_clip) + adam = SGDOptimizer(learning_rate=0.001, + parameter_list=simplenet.parameters(), + grad_clip=grad_clip) input_emb, emb = simplenet(input) self.assertTrue(emb.weight.gradient() is None) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index eaf63436ae0..9f013157205 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -29,6 +29,7 @@ from paddle.fluid.framework import _test_eager_guard class SimpleNet(fluid.Layer): + def __init__(self, hidden_size, vocab_size, @@ -67,12 +68,12 @@ class SimpleNet(fluid.Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = fluid.layers.matmul( - fc, fluid.layers.transpose( - self.embedding.weight, perm=[1, 0])) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + fc, fluid.layers.transpose(self.embedding.weight, perm=[1, 0])) + projection = fluid.layers.reshape(projection, + shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -81,6 +82,7 @@ class SimpleNet(fluid.Layer): class TestDygraphSimpleNet(unittest.TestCase): + def func_simple_net(self): for is_sparse in [True, False]: dtype_list = ["float32"] @@ -114,24 +116,21 @@ class TestDygraphSimpleNet(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - simple_net = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - init_scale=init_scale, - is_sparse=is_sparse, - dtype=dtype) - - sgd = SGDOptimizer( - learning_rate=1e-3, - parameter_list=simple_net.parameters()) + simple_net = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + init_scale=init_scale, + is_sparse=is_sparse, + dtype=dtype) + + sgd = SGDOptimizer(learning_rate=1e-3, + parameter_list=simple_net.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None - fluid.set_flags({ - 'FLAGS_sort_sum_gradient': is_sort_sum_gradient - }) + fluid.set_flags( + {'FLAGS_sort_sum_gradient': is_sort_sum_gradient}) for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -158,17 +157,17 @@ class TestDygraphSimpleNet(unittest.TestCase): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - simple_net = SimpleNet( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_steps=num_steps, - is_sparse=is_sparse, - dtype=dtype) + simple_net = SimpleNet(hidden_size=hidden_size, + vocab_size=vocab_size, + num_steps=num_steps, + is_sparse=is_sparse, + dtype=dtype) exe = fluid.Executor(place) sgd = SGDOptimizer(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype=dtype) static_loss = simple_net(x, y) @@ -192,8 +191,10 @@ class TestDygraphSimpleNet(unittest.TestCase): fetch_list = [static_loss] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"x": x_data, - "y": y_data}, + feed={ + "x": x_data, + "y": y_data + }, fetch_list=fetch_list) static_loss_value = out[0] @@ -202,13 +203,13 @@ class TestDygraphSimpleNet(unittest.TestCase): static_param_updated[static_param_name_list[ k - 1]] = out[k] - self.assertTrue( - np.array_equal(static_loss_value, dy_loss_value)) + self.assertTrue(np.array_equal(static_loss_value, + dy_loss_value)) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): - self.assertTrue( - np.array_equal(value, dy_param_updated[key])) + self.assertTrue(np.array_equal(value, + dy_param_updated[key])) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py index 8aadb155b0c..3f2a897b6b3 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py @@ -39,7 +39,9 @@ def set_child_signal_handler(self, child_pid): class DygraphDataLoaderSingalHandler(unittest.TestCase): + def func_child_process_exit_with_error(self): + def __test_process__(): core._set_process_signal_handler() sys.exit(1) @@ -72,6 +74,7 @@ class DygraphDataLoaderSingalHandler(unittest.TestCase): self.func_child_process_exit_with_error() def func_child_process_killed_by_sigsegv(self): + def __test_process__(): core._set_process_signal_handler() os.kill(os.getpid(), signal.SIGSEGV) @@ -105,6 +108,7 @@ class DygraphDataLoaderSingalHandler(unittest.TestCase): self.func_child_process_killed_by_sigsegv() def func_child_process_killed_by_sigbus(self): + def __test_process__(): core._set_process_signal_handler() os.kill(os.getpid(), signal.SIGBUS) @@ -137,6 +141,7 @@ class DygraphDataLoaderSingalHandler(unittest.TestCase): self.func_child_process_killed_by_sigbus() def func_child_process_killed_by_sigterm(self): + def __test_process__(): core._set_process_signal_handler() time.sleep(10) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py index 4e542fb13cd..092478bbf2a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py @@ -24,6 +24,7 @@ if fluid.is_compiled_with_cuda(): class Config(object): + def __init__(self, place, sort_sum_gradient=True): self.place = place @@ -59,6 +60,7 @@ class Config(object): def create_mnist_dataset(cfg): + def create_target_label(label): return label # return (label + 1) % cfg.c_dim # fake label target @@ -105,6 +107,7 @@ def create_mnist_dataset(cfg): class InstanceNorm(fluid.dygraph.Layer): + def __init__(self, num_channels, epsilon=1e-5): super(InstanceNorm, self).__init__() self.epsilon = epsilon @@ -126,6 +129,7 @@ class InstanceNorm(fluid.dygraph.Layer): class Conv2DLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters=64, @@ -136,13 +140,12 @@ class Conv2DLayer(fluid.dygraph.Layer): use_bias=False, relufactor=None): super(Conv2DLayer, self).__init__() - self._conv = fluid.dygraph.Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - bias_attr=None if use_bias else False) + self._conv = fluid.dygraph.Conv2D(num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + bias_attr=None if use_bias else False) if norm is not None: self._norm = InstanceNorm(num_filters) @@ -164,6 +167,7 @@ class Conv2DLayer(fluid.dygraph.Layer): class Deconv2DLayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters=64, @@ -203,25 +207,24 @@ class Deconv2DLayer(fluid.dygraph.Layer): class ResidualBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters): super(ResidualBlock, self).__init__() - self._conv0 = Conv2DLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=1, - padding=1, - norm=True, - relufactor=0) - - self._conv1 = Conv2DLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=1, - padding=1, - norm=True, - relufactor=None) + self._conv0 = Conv2DLayer(num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=1, + padding=1, + norm=True, + relufactor=0) + + self._conv1 = Conv2DLayer(num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=1, + padding=1, + norm=True, + relufactor=None) def forward(self, input): conv0 = self._conv0(input) @@ -230,28 +233,27 @@ class ResidualBlock(fluid.dygraph.Layer): class Generator(fluid.dygraph.Layer): + def __init__(self, cfg, num_channels=3): super(Generator, self).__init__() - conv_base = Conv2DLayer( - num_channels=cfg.c_dim + num_channels, - num_filters=cfg.g_base_dims, - filter_size=7, - stride=1, - padding=3, - norm=True, - relufactor=0) + conv_base = Conv2DLayer(num_channels=cfg.c_dim + num_channels, + num_filters=cfg.g_base_dims, + filter_size=7, + stride=1, + padding=3, + norm=True, + relufactor=0) sub_layers = [conv_base] cur_channels = cfg.g_base_dims for i in range(2): - sub_layer = Conv2DLayer( - num_channels=cur_channels, - num_filters=cur_channels * 2, - filter_size=4, - stride=2, - padding=1, - norm=True, - relufactor=0) + sub_layer = Conv2DLayer(num_channels=cur_channels, + num_filters=cur_channels * 2, + filter_size=4, + stride=2, + padding=1, + norm=True, + relufactor=0) cur_channels *= 2 sub_layers.append(sub_layer) @@ -261,8 +263,8 @@ class Generator(fluid.dygraph.Layer): repeat_num = cfg.g_repeat_num sub_layers = [] for i in range(repeat_num): - res_block = ResidualBlock( - num_channels=cur_channels, num_filters=cfg.g_base_dims * 4) + res_block = ResidualBlock(num_channels=cur_channels, + num_filters=cfg.g_base_dims * 4) sub_layers.append(res_block) self._res_block = fluid.dygraph.Sequential(*sub_layers) @@ -271,26 +273,24 @@ class Generator(fluid.dygraph.Layer): sub_layers = [] for i in range(2): rate = 2**(1 - i) - deconv = Deconv2DLayer( - num_channels=cur_channels, - num_filters=cfg.g_base_dims * rate, - filter_size=4, - stride=2, - padding=1, - relufactor=0, - norm=True) + deconv = Deconv2DLayer(num_channels=cur_channels, + num_filters=cfg.g_base_dims * rate, + filter_size=4, + stride=2, + padding=1, + relufactor=0, + norm=True) cur_channels = cfg.g_base_dims * rate sub_layers.append(deconv) self._deconv = fluid.dygraph.Sequential(*sub_layers) - self._conv1 = Conv2DLayer( - num_channels=cur_channels, - num_filters=3, - filter_size=7, - stride=1, - padding=3, - relufactor=None) + self._conv1 = Conv2DLayer(num_channels=cur_channels, + num_filters=3, + filter_size=7, + stride=1, + padding=3, + relufactor=None) def forward(self, input, label_trg): shape = input.shape @@ -310,29 +310,28 @@ class Generator(fluid.dygraph.Layer): class Discriminator(fluid.dygraph.Layer): + def __init__(self, cfg, num_channels=3): super(Discriminator, self).__init__() cur_dim = cfg.d_base_dims - conv_base = Conv2DLayer( - num_channels=num_channels, - num_filters=cur_dim, - filter_size=4, - stride=2, - padding=1, - relufactor=0.2) + conv_base = Conv2DLayer(num_channels=num_channels, + num_filters=cur_dim, + filter_size=4, + stride=2, + padding=1, + relufactor=0.2) repeat_num = cfg.d_repeat_num sub_layers = [conv_base] for i in range(1, repeat_num): - sub_layer = Conv2DLayer( - num_channels=cur_dim, - num_filters=cur_dim * 2, - filter_size=4, - stride=2, - padding=1, - relufactor=0.2) + sub_layer = Conv2DLayer(num_channels=cur_dim, + num_filters=cur_dim * 2, + filter_size=4, + stride=2, + padding=1, + relufactor=0.2) cur_dim *= 2 sub_layers.append(sub_layer) @@ -340,17 +339,15 @@ class Discriminator(fluid.dygraph.Layer): kernel_size = int(cfg.image_size / np.power(2, repeat_num)) - self._conv1 = Conv2DLayer( - num_channels=cur_dim, - num_filters=1, - filter_size=3, - stride=1, - padding=1) + self._conv1 = Conv2DLayer(num_channels=cur_dim, + num_filters=1, + filter_size=3, + stride=1, + padding=1) - self._conv2 = Conv2DLayer( - num_channels=cur_dim, - num_filters=cfg.c_dim, - filter_size=kernel_size) + self._conv2 = Conv2DLayer(num_channels=cur_dim, + num_filters=cfg.c_dim, + filter_size=kernel_size) def forward(self, input): conv = self._conv0(input) @@ -361,8 +358,8 @@ class Discriminator(fluid.dygraph.Layer): def loss_cls(cls, label, cfg): cls_shape = cls.shape - cls = fluid.layers.reshape( - cls, [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]]) + cls = fluid.layers.reshape(cls, + [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]]) return fluid.layers.reduce_sum( fluid.layers.sigmoid_cross_entropy_with_logits(cls, label)) / cfg.batch_size @@ -370,21 +367,25 @@ def loss_cls(cls, label, cfg): def calc_gradients(outputs, inputs, no_grad_set): if fluid._non_static_mode(): - return fluid.dygraph.grad( - outputs=outputs, - inputs=inputs, - no_grad_vars=no_grad_set, - create_graph=True) + return fluid.dygraph.grad(outputs=outputs, + inputs=inputs, + no_grad_vars=no_grad_set, + create_graph=True) else: - return fluid.gradients( - targets=outputs, inputs=inputs, no_grad_set=no_grad_set) + return fluid.gradients(targets=outputs, + inputs=inputs, + no_grad_set=no_grad_set) def gradient_penalty(f, real, fake, no_grad_set, cfg): + def _interpolate(a, b): shape = [a.shape[0]] - alpha = fluid.layers.uniform_random_batch_size_like( - input=a, shape=shape, min=0.1, max=1.0, seed=cfg.seed) + alpha = fluid.layers.uniform_random_batch_size_like(input=a, + shape=shape, + min=0.1, + max=1.0, + seed=cfg.seed) inner = fluid.layers.elementwise_mul( b, 1.0 - alpha, axis=0) + fluid.layers.elementwise_mul( @@ -396,8 +397,9 @@ def gradient_penalty(f, real, fake, no_grad_set, cfg): if isinstance(pred, tuple): pred = pred[0] - gradient = calc_gradients( - outputs=[pred], inputs=[x], no_grad_set=no_grad_set) + gradient = calc_gradients(outputs=[pred], + inputs=[x], + no_grad_set=no_grad_set) if gradient is None: return None @@ -410,8 +412,7 @@ def gradient_penalty(f, real, fake, no_grad_set, cfg): epsilon = 1e-16 norm = fluid.layers.sqrt( - fluid.layers.reduce_sum( - fluid.layers.square(gradient), dim=1) + epsilon) + fluid.layers.reduce_sum(fluid.layers.square(gradient), dim=1) + epsilon) gp = fluid.layers.reduce_mean(fluid.layers.square(norm - 1.0)) return gp @@ -455,20 +456,21 @@ def build_optimizer(layer, cfg, loss=None): beta1 = 0.5 beta2 = 0.999 if fluid._non_static_mode(): - return fluid.optimizer.Adam( - learning_rate=learning_rate, - beta1=beta1, - beta2=beta2, - parameter_list=layer.parameters()) + return fluid.optimizer.Adam(learning_rate=learning_rate, + beta1=beta1, + beta2=beta2, + parameter_list=layer.parameters()) else: - optimizer = fluid.optimizer.Adam( - learning_rate=learning_rate, beta1=beta1, beta2=beta2) + optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, + beta1=beta1, + beta2=beta2) optimizer.minimize(loss, parameter_list=layer.parameters()) return optimizer class DyGraphTrainModel(object): + def __init__(self, cfg): paddle.seed(1) paddle.framework.random._manual_program_seed(1) @@ -517,6 +519,7 @@ class DyGraphTrainModel(object): class StaticGraphTrainModel(object): + def __init__(self, cfg): self.cfg = cfg @@ -525,10 +528,12 @@ class StaticGraphTrainModel(object): shape=[None, 3, cfg.image_size, cfg.image_size], dtype='float32', name='image_real') - label_org = fluid.data( - shape=[None, cfg.c_dim], dtype='float32', name='label_org') - label_trg = fluid.data( - shape=[None, cfg.c_dim], dtype='float32', name='label_trg') + label_org = fluid.data(shape=[None, cfg.c_dim], + dtype='float32', + name='label_org') + label_trg = fluid.data(shape=[None, cfg.c_dim], + dtype='float32', + name='label_trg') return image_real, label_org, label_trg paddle.seed(cfg.seed) @@ -584,6 +589,7 @@ class StaticGraphTrainModel(object): class TestStarGANWithGradientPenalty(unittest.TestCase): + def func_main(self): self.place_test(fluid.CPUPlace()) @@ -614,8 +620,9 @@ class TestStarGANWithGradientPenalty(unittest.TestCase): label_trg) eager_dygraph_loss.append(loss) - for (g_loss_f, d_loss_f), (g_loss_e, d_loss_e) in zip( - fluid_dygraph_loss, eager_dygraph_loss): + for (g_loss_f, d_loss_f), (g_loss_e, + d_loss_e) in zip(fluid_dygraph_loss, + eager_dygraph_loss): self.assertEqual(g_loss_f, g_loss_e) self.assertEqual(d_loss_f, d_loss_e) @@ -624,6 +631,7 @@ class TestStarGANWithGradientPenalty(unittest.TestCase): class TestStarGANWithGradientPenaltyLegacy(unittest.TestCase): + def func_main(self): self.place_test(fluid.CPUPlace()) @@ -651,8 +659,8 @@ class TestStarGANWithGradientPenaltyLegacy(unittest.TestCase): loss = dygraph_model.run(image_real, label_org, label_trg) dygraph_loss.append(loss) - for (g_loss_s, d_loss_s), (g_loss_d, d_loss_d) in zip(static_loss, - dygraph_loss): + for (g_loss_s, d_loss_s), (g_loss_d, + d_loss_d) in zip(static_loss, dygraph_loss): self.assertEqual(g_loss_s, g_loss_d) self.assertEqual(d_loss_s, d_loss_d) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py index 8a0d92fa415..619e1ba37d6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py @@ -31,21 +31,19 @@ LOADED_VAR_SUFFIX = ".load_0" def convolutional_neural_network(img): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') return prediction @@ -63,6 +61,7 @@ def static_train_net(img, label): class TestImperativeStaticModelRunnerMnist(unittest.TestCase): + def setUp(self): self.seed = 90 self.epoch_num = 1 @@ -70,6 +69,7 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): self.batch_num = 50 def reader_decorator(self, reader): + def _reader_impl(): for item in reader(): image = np.array(item[0]).reshape(1, 28, 28) @@ -83,24 +83,24 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): startup_program = fluid.default_startup_program() main_program = fluid.default_main_program() - img = fluid.data( - name='img', shape=[None, 1, 28, 28], dtype='float32') + img = fluid.data(name='img', + shape=[None, 1, 28, 28], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') prediction, avg_loss = static_train_net(img, label) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) exe.run(startup_program) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=100), - batch_size=self.batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=100), + batch_size=self.batch_size) for _ in range(0, self.epoch_num): for batch_id, data in enumerate(train_reader()): @@ -111,16 +111,16 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): if batch_id > self.batch_num: break - fluid.io.save_inference_model( - self.save_dirname, ["img"], [prediction], - exe, - model_filename=self.model_filename, - params_filename=self.params_filename, - clip_extra=False) + fluid.io.save_inference_model(self.save_dirname, ["img"], + [prediction], + exe, + model_filename=self.model_filename, + params_filename=self.params_filename, + clip_extra=False) def load_and_train_dygraph(self): - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed @@ -141,10 +141,10 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): sgd = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=mnist.parameters()) - train_reader = paddle.batch( - self.reader_decorator(paddle.dataset.mnist.train()), - batch_size=self.batch_size, - drop_last=True) + train_reader = paddle.batch(self.reader_decorator( + paddle.dataset.mnist.train()), + batch_size=self.batch_size, + drop_last=True) train_loader = fluid.io.DataLoader.from_generator(capacity=10) train_loader.set_sample_list_generator(train_reader, places=place) @@ -182,23 +182,23 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): fluid.default_startup_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed - img = fluid.data( - name='img', shape=[None, 1, 28, 28], dtype='float32') + img = fluid.data(name='img', + shape=[None, 1, 28, 28], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') prediction, avg_loss = static_train_net(img, label) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - fluid.io.load_params( - exe, - self.save_dirname, - main_program=fluid.default_main_program(), - filename=self.params_filename) + fluid.io.load_params(exe, + self.save_dirname, + main_program=fluid.default_main_program(), + filename=self.params_filename) static_param_init_value = {} static_param_name_list = [] @@ -207,23 +207,25 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): static_param_init_value[param.name] = fluid.executor._fetch_var( param.name) - train_reader = paddle.batch( - self.reader_decorator(paddle.dataset.mnist.train()), - batch_size=self.batch_size, - drop_last=True) + train_reader = paddle.batch(self.reader_decorator( + paddle.dataset.mnist.train()), + batch_size=self.batch_size, + drop_last=True) for epoch in range(self.epoch_num): for batch_id, data in enumerate(train_reader()): static_x_data = np.array([x[0] for x in data]) - y_data = np.array([x[1] for x in data]).reshape( - [self.batch_size, 1]) + y_data = np.array([x[1] for x in data + ]).reshape([self.batch_size, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"img": static_x_data, - "label": y_data}, + feed={ + "img": static_x_data, + "label": y_data + }, fetch_list=fetch_list) if batch_id >= self.batch_num: @@ -237,18 +239,18 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): return static_x_data, static_out, static_param_init_value, static_param_value def load_and_infer_dygraph(self): - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_main_program().random_seed = self.seed mnist = fluid.dygraph.static_runner.StaticModelRunner( model_dir=self.save_dirname, model_filename=self.model_filename) - train_reader = paddle.batch( - self.reader_decorator(paddle.dataset.mnist.test()), - batch_size=self.batch_size, - drop_last=True) + train_reader = paddle.batch(self.reader_decorator( + paddle.dataset.mnist.test()), + batch_size=self.batch_size, + drop_last=True) train_loader = fluid.io.DataLoader.from_generator(capacity=10) train_loader.set_sample_list_generator(train_reader, places=place) @@ -268,19 +270,18 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): def load_and_infer_static(self): with new_program_scope(): - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) - [infer_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(self.save_dirname, - exe) + [infer_program, feed_target_names, fetch_targets + ] = fluid.io.load_inference_model(self.save_dirname, exe) infer_program.random_seed = self.seed - train_reader = paddle.batch( - self.reader_decorator(paddle.dataset.mnist.test()), - batch_size=self.batch_size, - drop_last=True) + train_reader = paddle.batch(self.reader_decorator( + paddle.dataset.mnist.test()), + batch_size=self.batch_size, + drop_last=True) for batch_id, data in enumerate(train_reader()): static_x_data = np.array([x[0] for x in data]) @@ -332,7 +333,7 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): # Phase 1. run and save static model self.train_and_save_model() - # Phase 2. load model & train dygraph + # Phase 2. load model & train dygraph dy_x_data, dy_out, dy_param_init_value, dy_param_value, dict_old_new_init= \ self.load_and_train_dygraph() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py index 13ed7a4d334..6c90b834871 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py @@ -33,6 +33,7 @@ LOADED_VAR_SUFFIX = ".load_0" def while_softmax_regression(img): + def cond(i, times, pred): return i < times @@ -44,12 +45,14 @@ def while_softmax_regression(img): i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) times = fluid.layers.fill_constant(shape=[1], dtype='int64', value=5) pred = fluid.layers.fc(input=img, size=10, act='softmax') - i, times, pred = fluid.layers.while_loop( - cond=cond, body=body, loop_vars=[i, times, pred]) + i, times, pred = fluid.layers.while_loop(cond=cond, + body=body, + loop_vars=[i, times, pred]) return pred class TestImperativeStaticModelRunnerWhile(unittest.TestCase): + def setUp(self): self.seed = 90 self.batch_size = 32 @@ -59,6 +62,7 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): self.params_filename = None def _random_batch_reader(self): + def _get_random_images_and_labels(image_shape, label_shape): image = np.random.random(size=image_shape).astype('float32') label = np.random.random(size=label_shape).astype('int64') @@ -87,29 +91,29 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): optimizer = fluid.optimizer.SGD(learning_rate=0.001) optimizer.minimize(avg_loss) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) - loader = fluid.io.DataLoader.from_generator( - feed_list=[img, label], capacity=5, iterable=True) + loader = fluid.io.DataLoader.from_generator(feed_list=[img, label], + capacity=5, + iterable=True) loader.set_batch_generator(self._random_batch_reader(), places=place) for data in loader(): exe.run(main_program, feed=data, fetch_list=[avg_loss]) - fluid.io.save_inference_model( - self.save_dirname, ["img"], [pred], - exe, - model_filename=self.model_filename, - params_filename=self.params_filename, - clip_extra=False) + fluid.io.save_inference_model(self.save_dirname, ["img"], [pred], + exe, + model_filename=self.model_filename, + params_filename=self.params_filename, + clip_extra=False) def load_and_train_dygraph(self): - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed @@ -127,8 +131,8 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): parameter_list=while_net.parameters()) train_loader = fluid.io.DataLoader.from_generator(capacity=10) - train_loader.set_batch_generator( - self._random_batch_reader(), places=place) + train_loader.set_batch_generator(self._random_batch_reader(), + places=place) while_net.train() @@ -170,17 +174,16 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): optimizer = fluid.optimizer.SGD(learning_rate=0.001) optimizer.minimize(avg_loss) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - fluid.io.load_params( - exe, - self.save_dirname, - main_program=fluid.default_main_program(), - filename=self.params_filename) + fluid.io.load_params(exe, + self.save_dirname, + main_program=fluid.default_main_program(), + filename=self.params_filename) static_param_init_value = {} static_param_name_list = [] @@ -189,10 +192,11 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): static_param_init_value[param.name] = fluid.executor._fetch_var( param.name) - loader = fluid.io.DataLoader.from_generator( - feed_list=[img, label], capacity=5, iterable=True) - loader.set_batch_generator( - self._random_batch_reader(), places=place) + loader = fluid.io.DataLoader.from_generator(feed_list=[img, label], + capacity=5, + iterable=True) + loader.set_batch_generator(self._random_batch_reader(), + places=place) for data in loader(): fetch_list = [avg_loss.name] diff --git a/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py index 7f34bf43688..55879293734 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py @@ -21,6 +21,7 @@ import numpy as np def _dygraph_guard_(func): + def __impl__(*args, **kwargs): if fluid._non_static_mode(): return func(*args, **kwargs) @@ -35,6 +36,7 @@ dygraph_guard = wrap_decorator(_dygraph_guard_) class TestDygraphClearGradient(TestCase): + def setUp(self): self.input_shape = [10, 2] @@ -63,7 +65,7 @@ class TestDygraphClearGradient(TestCase): # default arg set_to_zero is true # so, False means real clear gradient linear.weight.clear_gradient(False) - # before ._gradient_set_empty(False), + # before ._gradient_set_empty(False), # the return of ._is_gradient_set_empty() should be True if not fluid.framework.in_dygraph_mode(): self.assertTrue(linear.weight._is_gradient_set_empty()) @@ -73,7 +75,7 @@ class TestDygraphClearGradient(TestCase): # reset, because ClearGradient will call SetIsEmpty(True), but this is not our expectation. if not fluid.framework.in_dygraph_mode(): linear.weight._gradient_set_empty(False) - # after ._gradient_set_empty(False), + # after ._gradient_set_empty(False), # the return of ._is_gradient_set_empty() should be False self.assertFalse(linear.weight._is_gradient_set_empty()) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py index b9b3158515a..5e8a54ca390 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class SimpleNet(nn.Layer): + def __init__(self, in_dim, out_dim): super(SimpleNet, self).__init__() self.fc = nn.Linear(in_dim, out_dim) @@ -31,6 +32,7 @@ class SimpleNet(nn.Layer): class TestCases(unittest.TestCase): + @paddle.no_grad() def thread_1_main(self): time.sleep(8) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py index a621105f508..8a7fa967897 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py @@ -20,6 +20,7 @@ import os class SimpleFCLayer(fluid.dygraph.Layer): + def __init__(self, feature_size, batch_size, fc_size): super(SimpleFCLayer, self).__init__() self._linear = fluid.dygraph.Linear(feature_size, fc_size) @@ -32,6 +33,7 @@ class SimpleFCLayer(fluid.dygraph.Layer): class TestTracedLayerRecordNonPersistableInput(unittest.TestCase): + def test_main(self): if fluid.framework.in_dygraph_mode(): return @@ -51,8 +53,8 @@ class TestTracedLayerRecordNonPersistableInput(unittest.TestCase): for _ in six.moves.range(10): in_x = fluid.dygraph.to_variable( - np.random.random((batch_size, feature_size)).astype( - 'float32')) + np.random.random( + (batch_size, feature_size)).astype('float32')) if traced_layer is None: dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( layer, [in_x]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 531c89fb19e..7f60d6c64ac 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode, _in_legac from paddle.fluid import core import numpy as np import six + np.set_printoptions(suppress=True) from utils import DyGraphProgramDescTracerTestHelper, is_equal_program @@ -153,10 +154,10 @@ def position_encoding_init(n_position, d_pos_vec): num_timescales = channels // 2 log_timescale_increment = (np.log(float(1e4) / float(1)) / (num_timescales - 1)) - inv_timescales = np.exp(np.arange( - num_timescales)) * -log_timescale_increment - scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, - 0) + inv_timescales = np.exp( + np.arange(num_timescales)) * -log_timescale_increment + scaled_time = np.expand_dims(position, 1) * np.expand_dims( + inv_timescales, 0) signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) signal = np.pad(signal, [[0, 0], [0, np.mod(channels, 2)]], 'constant') position_enc = signal @@ -172,18 +173,15 @@ def create_data(is_static=False): ] else: enc_inputs = [ - to_variable( - src_word_np, name='src_word'), to_variable( - src_pos_np, name='src_pos'), to_variable( - src_slf_attn_bias_np, name='src_slf_attn_bias') + to_variable(src_word_np, name='src_word'), + to_variable(src_pos_np, name='src_pos'), + to_variable(src_slf_attn_bias_np, name='src_slf_attn_bias') ] dec_inputs = [ - to_variable( - trg_word_np, name='trg_word'), to_variable( - trg_pos_np, name='trg_pos'), to_variable( - trg_slf_attn_bias_np, name='trg_slf_attn_bias'), - to_variable( - trg_src_attn_bias_np, name='trg_src_attn_bias') + to_variable(trg_word_np, name='trg_word'), + to_variable(trg_pos_np, name='trg_pos'), + to_variable(trg_slf_attn_bias_np, name='trg_slf_attn_bias'), + to_variable(trg_src_attn_bias_np, name='trg_src_attn_bias') ] label = to_variable(lbl_word_np, name='lbl_word') weight = to_variable(lbl_weight_np, name='lbl_weight') @@ -209,13 +207,12 @@ def make_all_inputs(input_fields): """ inputs = [] for input_field in input_fields: - input_var = fluid.layers.data( - name=input_field, - shape=input_descs[input_field][0], - dtype=input_descs[input_field][1], - lod_level=input_descs[input_field][2] - if len(input_descs[input_field]) == 3 else 0, - append_batch_size=False) + input_var = fluid.layers.data(name=input_field, + shape=input_descs[input_field][0], + dtype=input_descs[input_field][1], + lod_level=input_descs[input_field][2] if + len(input_descs[input_field]) == 3 else 0, + append_batch_size=False) inputs.append(input_var) return inputs @@ -240,8 +237,8 @@ input_descs = { # encoder. # The actual data shape of src_slf_attn_bias is: # [batch_size, n_head, max_src_len_in_batch, max_src_len_in_batch] - "src_slf_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, - seq_len), "float32"], + "src_slf_attn_bias": + [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # The actual data shape of trg_word is: # [batch_size, max_trg_len_in_batch] "trg_word": [(batch_size, seq_len), "int64", @@ -253,14 +250,14 @@ input_descs = { # subsequent words in the decoder. # The actual data shape of trg_slf_attn_bias is: # [batch_size, n_head, max_trg_len_in_batch, max_trg_len_in_batch] - "trg_slf_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, - seq_len), "float32"], + "trg_slf_attn_bias": + [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # This input is used to remove attention weights on paddings of the source # input in the encoder-decoder attention. # The actual data shape of trg_src_attn_bias is: # [batch_size, n_head, max_trg_len_in_batch, max_src_len_in_batch] - "trg_src_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, - seq_len), "float32"], + "trg_src_attn_bias": + [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # This input is used in independent decoder program for inference. # The actual data shape of enc_output is: # [batch_size, max_src_len_in_batch, d_model] @@ -282,32 +279,38 @@ input_descs = { # Names of word embedding table which might be reused for weight sharing. word_emb_param_names = ( "src_word_emb_table", - "trg_word_emb_table", ) + "trg_word_emb_table", +) # Names of position encoding table which will be initialized externally. pos_enc_param_names = ( "src_pos_enc_table", - "trg_pos_enc_table", ) + "trg_pos_enc_table", +) # separated inputs for different usages. encoder_data_input_fields = ( "src_word", "src_pos", - "src_slf_attn_bias", ) + "src_slf_attn_bias", +) decoder_data_input_fields = ( "trg_word", "trg_pos", "trg_slf_attn_bias", "trg_src_attn_bias", - "enc_output", ) + "enc_output", +) label_data_input_fields = ( "lbl_word", - "lbl_weight", ) + "lbl_weight", +) # In fast decoder, trg_pos (only containing the current time step) is generated # by ops and trg_slf_attn_bias is not needed. fast_decoder_data_input_fields = ( "trg_word", "init_score", "init_idx", - "trg_src_attn_bias", ) + "trg_src_attn_bias", +) # if we use py_reader use_py_reader = False @@ -320,16 +323,20 @@ batch_num = 5 np.random.seed(90) src_word_np = np.arange(1, TrainTaskConfig.batch_size * seq_len + 1).reshape( [TrainTaskConfig.batch_size, seq_len]).astype('int64') -src_pos_np = np.random.randint( - 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len), dtype='int64') +src_pos_np = np.random.randint(1, + seq_len, + size=(TrainTaskConfig.batch_size, seq_len), + dtype='int64') src_slf_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size, ModelHyperParams.n_head, seq_len, seq_len).astype('float32') trg_word_np = np.arange(1, TrainTaskConfig.batch_size * seq_len + 1).reshape( [TrainTaskConfig.batch_size, seq_len]).astype('int64') -trg_pos_np = np.random.randint( - 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len), dtype='int64') +trg_pos_np = np.random.randint(1, + seq_len, + size=(TrainTaskConfig.batch_size, seq_len), + dtype='int64') trg_slf_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size, ModelHyperParams.n_head, seq_len, seq_len).astype('float32') @@ -337,11 +344,10 @@ trg_src_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size, ModelHyperParams.n_head, seq_len, seq_len).astype('float32') -lbl_word_np = np.random.randint( - 1, - ModelHyperParams.src_vocab_size - 1, - size=(TrainTaskConfig.batch_size * seq_len, 1), - dtype='int64') +lbl_word_np = np.random.randint(1, + ModelHyperParams.src_vocab_size - 1, + size=(TrainTaskConfig.batch_size * seq_len, 1), + dtype='int64') lbl_weight_np = np.random.randn(TrainTaskConfig.batch_size * seq_len, 1).astype('float32') @@ -352,6 +358,7 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length, class PrePostProcessLayer(Layer): + def __init__(self, d_model, process_cmd, shape_len=None): super(PrePostProcessLayer, self).__init__() for cmd in process_cmd: @@ -380,6 +387,7 @@ class PrePostProcessLayer(Layer): class PositionwiseFeedForwardLayer(Layer): + def __init__(self, d_inner_hid, d_hid, dropout_rate): super(PositionwiseFeedForwardLayer, self).__init__() self._i2h = Linear(d_hid, d_inner_hid, act="relu") @@ -389,16 +397,16 @@ class PositionwiseFeedForwardLayer(Layer): def forward(self, x): hidden = self._i2h(x) if self._dropout_rate: - hidden = fluid.layers.dropout( - hidden, - dropout_prob=self._dropout_rate, - seed=ModelHyperParams.dropout_seed, - is_test=False) + hidden = fluid.layers.dropout(hidden, + dropout_prob=self._dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) out = self._h2o(hidden) return out class MultiHeadAttentionLayer(Layer): + def __init__(self, d_key, d_value, @@ -440,11 +448,10 @@ class MultiHeadAttentionLayer(Layer): transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) # scale dot product attention - product = fluid.layers.matmul( - x=transpose_q, - y=transpose_k, - transpose_y=True, - alpha=self._d_model**-0.5) + product = fluid.layers.matmul(x=transpose_q, + y=transpose_k, + transpose_y=True, + alpha=self._d_model**-0.5) if attn_bias is not None: product += attn_bias weights = fluid.layers.softmax(product) @@ -473,6 +480,7 @@ class MultiHeadAttentionLayer(Layer): class EncoderSubLayer(Layer): + def __init__(self, n_head, d_key, @@ -494,14 +502,16 @@ class EncoderSubLayer(Layer): self._preprocess_cmd, 3) self._multihead_attention_layer = MultiHeadAttentionLayer( d_key, d_value, d_model, n_head, attention_dropout) - self._postprocess_layer = PrePostProcessLayer( - d_model, self._postprocess_cmd, None) + self._postprocess_layer = PrePostProcessLayer(d_model, + self._postprocess_cmd, + None) self._preprocess_layer2 = PrePostProcessLayer(d_model, self._preprocess_cmd, 3) self._positionwise_feed_forward = PositionwiseFeedForwardLayer( d_inner_hid, d_model, relu_dropout) - self._postprocess_layer2 = PrePostProcessLayer( - d_model, self._postprocess_cmd, None) + self._postprocess_layer2 = PrePostProcessLayer(d_model, + self._postprocess_cmd, + None) def forward(self, enc_input, attn_bias): pre_process_multihead = self._preprocess_layer( @@ -521,6 +531,7 @@ class EncoderSubLayer(Layer): class EncoderLayer(Layer): + def __init__(self, n_layer, n_head, @@ -560,6 +571,7 @@ class EncoderLayer(Layer): class PrepareEncoderDecoderLayer(Layer): + def __init__(self, src_vocab_size, src_emb_dim, @@ -573,13 +585,13 @@ class PrepareEncoderDecoderLayer(Layer): self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate - self._input_emb = Embedding( - size=[src_vocab_size, src_emb_dim], - is_sparse=is_sparse, - padding_idx=0, - param_attr=fluid.ParamAttr( - name=word_emb_param_name, - initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5))) + self._input_emb = Embedding(size=[src_vocab_size, src_emb_dim], + is_sparse=is_sparse, + padding_idx=0, + param_attr=fluid.ParamAttr( + name=word_emb_param_name, + initializer=fluid.initializer.Normal( + 0., src_emb_dim**-0.5))) if pos_enc_param_name is pos_enc_param_names[0]: pos_inp = pos_inp1 @@ -599,8 +611,8 @@ class PrepareEncoderDecoderLayer(Layer): def forward(self, src_word, src_pos): src_word_emb = self._input_emb(src_word) - src_word_emb = fluid.layers.scale( - x=src_word_emb, scale=self._src_emb_dim**0.5) + src_word_emb = fluid.layers.scale(x=src_word_emb, + scale=self._src_emb_dim**0.5) # # TODO change this to fit dynamic length input src_pos_emb = self._pos_emb(src_pos) src_pos_emb.stop_gradient = True @@ -613,6 +625,7 @@ class PrepareEncoderDecoderLayer(Layer): class WrapEncoderLayer(Layer): + def __init__(self, src_vocab_size, max_length, @@ -655,6 +668,7 @@ class WrapEncoderLayer(Layer): class DecoderSubLayer(Layer): + def __init__(self, n_head, d_key, @@ -705,10 +719,11 @@ class DecoderSubLayer(Layer): postprocess_cmd, None) def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias): - pre_process_rlt = self._pre_process_layer( - None, dec_input, self._preprocess_cmd, self._prepostprcess_dropout) - slf_attn_output = self._multihead_attention_layer(pre_process_rlt, None, - None, slf_attn_bias) + pre_process_rlt = self._pre_process_layer(None, dec_input, + self._preprocess_cmd, + self._prepostprcess_dropout) + slf_attn_output = self._multihead_attention_layer( + pre_process_rlt, None, None, slf_attn_bias) slf_attn_output_pp = self._post_process_layer( dec_input, slf_attn_output, self._postprocess_cmd, self._prepostprcess_dropout) @@ -717,9 +732,10 @@ class DecoderSubLayer(Layer): self._prepostprcess_dropout) enc_attn_output_pp = self._multihead_attention_layer2( pre_process_rlt2, enc_output, enc_output, dec_enc_attn_bias) - enc_attn_output = self._post_process_layer2( - slf_attn_output_pp, enc_attn_output_pp, self._postprocess_cmd, - self._prepostprcess_dropout) + enc_attn_output = self._post_process_layer2(slf_attn_output_pp, + enc_attn_output_pp, + self._postprocess_cmd, + self._prepostprcess_dropout) pre_process_rlt3 = self._pre_process_layer3(None, enc_attn_output, self._preprocess_cmd, self._prepostprcess_dropout) @@ -731,6 +747,7 @@ class DecoderSubLayer(Layer): class DecoderLayer(Layer): + def __init__(self, n_layer, n_head, @@ -756,25 +773,25 @@ class DecoderLayer(Layer): self._decoder_sub_layers.append( self.add_sublayer( 'dsl_%d' % i, - DecoderSubLayer( - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - cache=None if caches is None else caches[i], - gather_idx=gather_idx))) + DecoderSubLayer(n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + cache=None if caches is None else caches[i], + gather_idx=gather_idx))) def forward(self, dec_input, enc_output, dec_slf_attn_bias, dec_enc_attn_bias): for i in range(self._n_layer): - tmp_dec_output = self._decoder_sub_layers[i]( - dec_input, enc_output, dec_slf_attn_bias, dec_enc_attn_bias) + tmp_dec_output = self._decoder_sub_layers[i](dec_input, enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias) dec_input = tmp_dec_output dec_output = self._pre_process_layer(None, tmp_dec_output, @@ -784,6 +801,7 @@ class DecoderLayer(Layer): class WrapDecoderLayer(Layer): + def __init__(self, trg_vocab_size, max_length, @@ -815,20 +833,19 @@ class WrapDecoderLayer(Layer): is_sparse=is_sparse, word_emb_param_name=word_emb_param_names[1], pos_enc_param_name=pos_enc_param_names[1]) - self._decoder_layer = DecoderLayer( - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - caches=caches, - gather_idx=gather_idx) + self._decoder_layer = DecoderLayer(n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + caches=caches, + gather_idx=gather_idx) self._weight_sharing = weight_sharing if not weight_sharing: self._fc = Linear(d_model, trg_vocab_size, bias_attr=False) @@ -858,6 +875,7 @@ class WrapDecoderLayer(Layer): class TransFormer(Layer): + def __init__(self, src_vocab_size, trg_vocab_size, @@ -885,38 +903,36 @@ class TransFormer(Layer): assert src_vocab_size == trg_vocab_size, ( "Vocabularies in source and target should be same for weight sharing." ) - self._wrap_encoder_layer = WrapEncoderLayer( - src_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - is_sparse=is_sparse) - self._wrap_decoder_layer = WrapDecoderLayer( - trg_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - preprocess_cmd, - postprocess_cmd, - weight_sharing, - is_sparse=is_sparse) + self._wrap_encoder_layer = WrapEncoderLayer(src_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + weight_sharing, + is_sparse=is_sparse) + self._wrap_decoder_layer = WrapDecoderLayer(trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + weight_sharing, + is_sparse=is_sparse) if weight_sharing: self._wrap_decoder_layer._prepare_decoder_layer._input_emb.weight = self._wrap_encoder_layer._prepare_encoder_layer._input_emb.weight @@ -926,8 +942,8 @@ class TransFormer(Layer): predict = self._wrap_decoder_layer(dec_inputs, enc_output) if self._label_smooth_eps: label_out = fluid.layers.label_smooth( - label=fluid.layers.one_hot( - input=label, depth=self._trg_vocab_size), + label=fluid.layers.one_hot(input=label, + depth=self._trg_vocab_size), epsilon=self._label_smooth_eps) cost = fluid.layers.softmax_with_cross_entropy( @@ -943,6 +959,7 @@ class TransFormer(Layer): class TestDygraphTransformerSortGradient(unittest.TestCase): + def test_transformer_sort_gradient(self): for is_sparse in [True, False]: self.transformer_sort_gradient_float32(is_sparse) @@ -955,26 +972,25 @@ class TestDygraphTransformerSortGradient(unittest.TestCase): fluid.set_flags({'FLAGS_new_executor_use_inplace': False}) paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - transformer = TransFormer( - ModelHyperParams.src_vocab_size, - ModelHyperParams.trg_vocab_size, - ModelHyperParams.max_length + 1, - ModelHyperParams.n_layer, - ModelHyperParams.n_head, - ModelHyperParams.d_key, - ModelHyperParams.d_value, - ModelHyperParams.d_model, - ModelHyperParams.d_inner_hid, - ModelHyperParams.prepostprocess_dropout, - ModelHyperParams.attention_dropout, - ModelHyperParams.relu_dropout, - ModelHyperParams.preprocess_cmd, - ModelHyperParams.postprocess_cmd, - ModelHyperParams.weight_sharing, - TrainTaskConfig.label_smooth_eps, - use_py_reader=use_py_reader, - is_test=False, - is_sparse=is_sparse) + transformer = TransFormer(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size, + ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, + ModelHyperParams.n_head, + ModelHyperParams.d_key, + ModelHyperParams.d_value, + ModelHyperParams.d_model, + ModelHyperParams.d_inner_hid, + ModelHyperParams.prepostprocess_dropout, + ModelHyperParams.attention_dropout, + ModelHyperParams.relu_dropout, + ModelHyperParams.preprocess_cmd, + ModelHyperParams.postprocess_cmd, + ModelHyperParams.weight_sharing, + TrainTaskConfig.label_smooth_eps, + use_py_reader=use_py_reader, + is_test=False, + is_sparse=is_sparse) if sync: lr_decay = fluid.layers.learning_rate_scheduler.noam_decay( ModelHyperParams.d_model, TrainTaskConfig.warmup_steps) @@ -1048,26 +1064,25 @@ class TestDygraphTransformerSortGradient(unittest.TestCase): with new_program_scope(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) - transformer = TransFormer( - ModelHyperParams.src_vocab_size, - ModelHyperParams.trg_vocab_size, - ModelHyperParams.max_length + 1, - ModelHyperParams.n_layer, - ModelHyperParams.n_head, - ModelHyperParams.d_key, - ModelHyperParams.d_value, - ModelHyperParams.d_model, - ModelHyperParams.d_inner_hid, - ModelHyperParams.prepostprocess_dropout, - ModelHyperParams.attention_dropout, - ModelHyperParams.relu_dropout, - ModelHyperParams.preprocess_cmd, - ModelHyperParams.postprocess_cmd, - ModelHyperParams.weight_sharing, - TrainTaskConfig.label_smooth_eps, - use_py_reader=use_py_reader, - is_test=False, - is_sparse=is_sparse) + transformer = TransFormer(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size, + ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, + ModelHyperParams.n_head, + ModelHyperParams.d_key, + ModelHyperParams.d_value, + ModelHyperParams.d_model, + ModelHyperParams.d_inner_hid, + ModelHyperParams.prepostprocess_dropout, + ModelHyperParams.attention_dropout, + ModelHyperParams.relu_dropout, + ModelHyperParams.preprocess_cmd, + ModelHyperParams.postprocess_cmd, + ModelHyperParams.weight_sharing, + TrainTaskConfig.label_smooth_eps, + use_py_reader=use_py_reader, + is_test=False, + is_sparse=is_sparse) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) optimizer = fluid.optimizer.SGD(learning_rate=0.003) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py index 33ec0df46cd..b814ca87dcd 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph, _in_ea def _dygraph_guard_(func): + def __impl__(*args, **kwargs): if fluid._non_static_mode(): return func(*args, **kwargs) @@ -43,30 +44,36 @@ def random_var(size, low=-1, high=1, dtype='float32'): class TestDygraphTripleGradMatmul(TestCase): + def test_matmul_triple_grad(self): input_numpy = np.ones([3, 3]) * 2 with _test_eager_guard(): - x = paddle.to_tensor( - input_numpy, stop_gradient=False, dtype='float32') - y = paddle.to_tensor( - input_numpy, stop_gradient=False, dtype='float32') + x = paddle.to_tensor(input_numpy, + stop_gradient=False, + dtype='float32') + y = paddle.to_tensor(input_numpy, + stop_gradient=False, + dtype='float32') out = paddle.matmul(x, y, False, False) - new_out_g = paddle.to_tensor( - np.ones([3, 3]), stop_gradient=False, dtype='float32') - new_x_g, new_y_g = paddle.grad( - [out], [x, y], [new_out_g], - retain_graph=True, - create_graph=True) - - new_x_g_g = paddle.to_tensor( - np.ones([3, 3]), stop_gradient=False, dtype='float32') - new_y_g_g = paddle.to_tensor( - np.ones([3, 3]), stop_gradient=False, dtype='float32') - new_a, new_b, new_c = paddle.grad( - [new_x_g, new_y_g], [x, y, new_out_g], [new_x_g_g, new_y_g_g], - retain_graph=True, - create_graph=True) + new_out_g = paddle.to_tensor(np.ones([3, 3]), + stop_gradient=False, + dtype='float32') + new_x_g, new_y_g = paddle.grad([out], [x, y], [new_out_g], + retain_graph=True, + create_graph=True) + + new_x_g_g = paddle.to_tensor(np.ones([3, 3]), + stop_gradient=False, + dtype='float32') + new_y_g_g = paddle.to_tensor(np.ones([3, 3]), + stop_gradient=False, + dtype='float32') + new_a, new_b, new_c = paddle.grad([new_x_g, new_y_g], + [x, y, new_out_g], + [new_x_g_g, new_y_g_g], + retain_graph=True, + create_graph=True) new_a.backward() @@ -105,6 +112,7 @@ class TestDygraphTripleGradMatmul(TestCase): class TestDygraphTripleGrad(TestCase): + def setUp(self): self.sort_sum_gradient = False self.shape = [5, 5] @@ -118,14 +126,13 @@ class TestDygraphTripleGrad(TestCase): create_graph=False, allow_unused=False): fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient}) - return fluid.dygraph.grad( - outputs=outputs, - inputs=inputs, - grad_outputs=grad_outputs, - no_grad_vars=no_grad_vars, - retain_graph=retain_graph, - create_graph=create_graph, - allow_unused=allow_unused) + return fluid.dygraph.grad(outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused) @dygraph_guard def func_exception(self): @@ -151,8 +158,8 @@ class TestDygraphTripleGrad(TestCase): [random_var(shape)], [random_var(shape)]) with self.assertRaises(AssertionError): - self.grad( - [random_var(shape)], [random_var(shape)], no_grad_vars=[1]) + self.grad([random_var(shape)], [random_var(shape)], + no_grad_vars=[1]) with self.assertRaises(AssertionError): self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1) @@ -219,6 +226,7 @@ class TestDygraphTripleGrad(TestCase): class TestDygraphTripleGradBradcastCase(TestCase): + def setUp(self): self.sort_sum_gradient = False self.x_shape = [3, 2, 2] @@ -234,14 +242,13 @@ class TestDygraphTripleGradBradcastCase(TestCase): create_graph=False, allow_unused=False): fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient}) - return fluid.dygraph.grad( - outputs=outputs, - inputs=inputs, - grad_outputs=grad_outputs, - no_grad_vars=no_grad_vars, - retain_graph=retain_graph, - create_graph=create_graph, - allow_unused=allow_unused) + return fluid.dygraph.grad(outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused) @dygraph_guard def func_example_with_gradient_and_create_graph(self): @@ -264,24 +271,21 @@ class TestDygraphTripleGradBradcastCase(TestCase): dx_actual, = self.grad([out], [x], create_graph=True) # Theoritical result based on math calculation dout = np.ones(self.x_shape).astype('float32') - dx_expected = np.matmul( - dout * out_np * (1 - out_np), np.transpose( - y_np, axes=(0, 2, 1))) + dx_expected = np.matmul(dout * out_np * (1 - out_np), + np.transpose(y_np, axes=(0, 2, 1))) self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) ddx_actual, = self.grad([dx_actual], [x], create_graph=True) # Theoritical result based on math calculation DDY = np.zeros(self.y_shape).astype('float32') DDX = np.ones(self.x_shape).astype('float32') - double_grad_tmp1 = np.matmul( - dout * out_np * (1 - out_np), np.transpose( - DDY, axes=(0, 2, 1))) + double_grad_tmp1 = np.matmul(dout * out_np * (1 - out_np), + np.transpose(DDY, axes=(0, 2, 1))) double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY) double_grad_tmp3 = ( 1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np) ddx_expected = double_grad_tmp1 + np.matmul( - double_grad_tmp3, np.transpose( - y_np, axes=(0, 2, 1))) + double_grad_tmp3, np.transpose(y_np, axes=(0, 2, 1))) self.assertTrue(np.allclose(ddx_actual.numpy(), ddx_expected)) # Theoritical result based on math calculation @@ -290,10 +294,8 @@ class TestDygraphTripleGradBradcastCase(TestCase): tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0) tmp2 = tmp0 * (1 - 2 * out_np) * d_ddout - 2 * dout * ( 1 - out_np) * out_np * tmp0 * tmp0 - dddx_expected = np.matmul( - ((tmp1 + tmp2) * out_np * (1 - out_np)), - np.transpose( - y_np, axes=(0, 2, 1))) + dddx_expected = np.matmul(((tmp1 + tmp2) * out_np * (1 - out_np)), + np.transpose(y_np, axes=(0, 2, 1))) ddx_actual.backward() dddx_grad_actual = x.gradient() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py index 46a89efcec4..84180fa299b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TestImperativeUsingNonZeroGpu(unittest.TestCase): + def run_main(self, np_arr, place): with guard(place): var = to_variable(np_arr) diff --git a/python/paddle/fluid/tests/unittests/test_increment.py b/python/paddle/fluid/tests/unittests/test_increment.py index 38f6a546071..d7b0f063295 100755 --- a/python/paddle/fluid/tests/unittests/test_increment.py +++ b/python/paddle/fluid/tests/unittests/test_increment.py @@ -22,10 +22,12 @@ import paddle.fluid as fluid class TestIncrement(unittest.TestCase): + def test_api(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.layers.fill_constant( - shape=[1], dtype='int64', value=5) + input = fluid.layers.fill_constant(shape=[1], + dtype='int64', + value=5) expected_result = np.array([8], dtype='int64') output = paddle.tensor.math.increment(input, value=3) @@ -41,6 +43,7 @@ class TestIncrement(unittest.TestCase): class TestInplaceApiWithDataTransform(unittest.TestCase): + def test_increment(self): if fluid.core.is_compiled_with_cuda(): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_index_sample_op.py b/python/paddle/fluid/tests/unittests/test_index_sample_op.py index 4da03c9643f..e6a76a3a025 100644 --- a/python/paddle/fluid/tests/unittests/test_index_sample_op.py +++ b/python/paddle/fluid/tests/unittests/test_index_sample_op.py @@ -22,14 +22,16 @@ from op_test import OpTest class TestIndexSampleOp(OpTest): + def setUp(self): self.op_type = "index_sample" self.python_api = paddle.index_sample self.config() xnp = np.random.random(self.x_shape).astype(self.x_type) - indexnp = np.random.randint( - low=0, high=self.x_shape[1], - size=self.index_shape).astype(self.index_type) + indexnp = np.random.randint(low=0, + high=self.x_shape[1], + size=self.index_shape).astype( + self.index_type) self.inputs = {'X': xnp, 'Index': indexnp} index_array = [] for i in range(self.index_shape[0]): @@ -56,6 +58,7 @@ class TestIndexSampleOp(OpTest): class TestCase1(TestIndexSampleOp): + def config(self): """ For one dimension input @@ -67,6 +70,7 @@ class TestCase1(TestIndexSampleOp): class TestCase2(TestIndexSampleOp): + def config(self): """ For int64_t index type @@ -78,6 +82,7 @@ class TestCase2(TestIndexSampleOp): class TestCase3(TestIndexSampleOp): + def config(self): """ For int index type @@ -89,6 +94,7 @@ class TestCase3(TestIndexSampleOp): class TestCase4(TestIndexSampleOp): + def config(self): """ For int64 index type @@ -100,6 +106,7 @@ class TestCase4(TestIndexSampleOp): class TestIndexSampleShape(unittest.TestCase): + def test_shape(self): paddle.enable_static() # create x value @@ -110,8 +117,8 @@ class TestIndexSampleShape(unittest.TestCase): # create index value index_shape = (2, 3) index_type = "int32" - index_np = np.random.randint( - low=0, high=x_shape[1], size=index_shape).astype(index_type) + index_np = np.random.randint(low=0, high=x_shape[1], + size=index_shape).astype(index_type) x = fluid.data(name='x', shape=[-1, 5], dtype='float64') index = fluid.data(name='index', shape=[-1, 3], dtype='int32') @@ -126,18 +133,18 @@ class TestIndexSampleShape(unittest.TestCase): class TestIndexSampleDynamic(unittest.TestCase): + def test_result(self): with fluid.dygraph.guard(): - x = paddle.to_tensor( - [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0]], - dtype='float32') - index = paddle.to_tensor( - [[0, 1, 2], [1, 2, 3], [0, 0, 0]], dtype='int32') + x = paddle.to_tensor([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]], + dtype='float32') + index = paddle.to_tensor([[0, 1, 2], [1, 2, 3], [0, 0, 0]], + dtype='int32') out_z1 = paddle.index_sample(x, index) - except_output = np.array( - [[1.0, 2.0, 3.0], [6.0, 7.0, 8.0], [9.0, 9.0, 9.0]]) + except_output = np.array([[1.0, 2.0, 3.0], [6.0, 7.0, 8.0], + [9.0, 9.0, 9.0]]) assert out_z1.numpy().all() == except_output.all() diff --git a/python/paddle/fluid/tests/unittests/test_index_select_op.py b/python/paddle/fluid/tests/unittests/test_index_select_op.py index 0c0e946fdde..c8bb7890964 100644 --- a/python/paddle/fluid/tests/unittests/test_index_select_op.py +++ b/python/paddle/fluid/tests/unittests/test_index_select_op.py @@ -24,12 +24,14 @@ from paddle.fluid import Program, program_guard class TestIndexSelectOp(OpTest): + def setUp(self): self.python_api = paddle.index_select self.op_type = "index_select" self.init_dtype_type() - index_np = np.random.randint( - low=0, high=self.x_shape[self.dim], size=self.index_size) + index_np = np.random.randint(low=0, + high=self.x_shape[self.dim], + size=self.index_size) x_np = np.random.random(self.x_shape).astype(self.x_type) self.inputs = {'X': x_np, 'Index': index_np} self.attrs = {'dim': self.dim} @@ -62,6 +64,7 @@ class TestIndexSelectOp(OpTest): class TestIndexSelectOpCase2(TestIndexSelectOp): + def init_dtype_type(self): self.x_type = np.float32 self.index_type = np.int32 @@ -71,6 +74,7 @@ class TestIndexSelectOpCase2(TestIndexSelectOp): class TestIndexSelectOpCaseSingleThread(TestIndexSelectOp): + def init_dtype_type(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({'FLAGS_cudnn_deterministic': True}) @@ -82,6 +86,7 @@ class TestIndexSelectOpCaseSingleThread(TestIndexSelectOp): class TestIndexSelectAPI(unittest.TestCase): + def input_data(self): self.data_x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]]) @@ -93,12 +98,16 @@ class TestIndexSelectAPI(unittest.TestCase): # case 1: with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 4]) - index = fluid.layers.data( - name='index', shape=[3], dtype='int32', append_batch_size=False) + index = fluid.layers.data(name='index', + shape=[3], + dtype='int32', + append_batch_size=False) z = paddle.index_select(x, index, axis=1) exe = fluid.Executor(fluid.CPUPlace()) - res, = exe.run(feed={'x': self.data_x, - 'index': self.data_index}, + res, = exe.run(feed={ + 'x': self.data_x, + 'index': self.data_index + }, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[1.0, 2.0, 2.0], [5.0, 6.0, 6.0], @@ -108,16 +117,20 @@ class TestIndexSelectAPI(unittest.TestCase): # case 2: with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 4]) - index = fluid.layers.data( - name='index', shape=[3], dtype='int32', append_batch_size=False) + index = fluid.layers.data(name='index', + shape=[3], + dtype='int32', + append_batch_size=False) z = paddle.index_select(x, index) exe = fluid.Executor(fluid.CPUPlace()) - res, = exe.run(feed={'x': self.data_x, - 'index': self.data_index}, + res, = exe.run(feed={ + 'x': self.data_x, + 'index': self.data_index + }, fetch_list=[z.name], return_numpy=False) - expect_out = np.array( - [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [5.0, 6.0, 7.0, 8.0]]) + expect_out = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], + [5.0, 6.0, 7.0, 8.0]]) self.assertTrue(np.allclose(expect_out, np.array(res))) def test_dygraph_api(self): @@ -128,8 +141,8 @@ class TestIndexSelectAPI(unittest.TestCase): index = fluid.dygraph.to_variable(self.data_index) z = paddle.index_select(x, index) np_z = z.numpy() - expect_out = np.array( - [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [5.0, 6.0, 7.0, 8.0]]) + expect_out = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], + [5.0, 6.0, 7.0, 8.0]]) self.assertTrue(np.allclose(expect_out, np_z)) # case 2: diff --git a/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py b/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py index 3656cdfd5a0..a1e0da500e4 100644 --- a/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py +++ b/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core class TestInferNoNeedBufferSlots(unittest.TestCase): + def net(self): x1 = fluid.default_main_program().global_block().create_var( dtype="float32", shape=[1], lod_level=0, name="x1") diff --git a/python/paddle/fluid/tests/unittests/test_infer_shape.py b/python/paddle/fluid/tests/unittests/test_infer_shape.py index 553ebaec7f1..c3e58ddaac1 100644 --- a/python/paddle/fluid/tests/unittests/test_infer_shape.py +++ b/python/paddle/fluid/tests/unittests/test_infer_shape.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core class TestInferShape(unittest.TestCase): + def test_sum_op(self): prog = core.ProgramDesc() self.assertIsNotNone(prog) diff --git a/python/paddle/fluid/tests/unittests/test_inference_api.py b/python/paddle/fluid/tests/unittests/test_inference_api.py index 7ed908eb33b..a590dcecbfe 100644 --- a/python/paddle/fluid/tests/unittests/test_inference_api.py +++ b/python/paddle/fluid/tests/unittests/test_inference_api.py @@ -15,6 +15,7 @@ import os, shutil import unittest import paddle + paddle.enable_static() import numpy as np import paddle.fluid as fluid @@ -25,13 +26,14 @@ from paddle.inference import get_trt_compile_version, get_trt_runtime_version class TestInferenceApi(unittest.TestCase): + def test_inference_api(self): tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') paddletensor32 = PaddleTensor(tensor32) dtype32 = paddletensor32.dtype self.assertEqual(dtype32, PaddleDType.INT32) - self.assertEqual( - paddletensor32.data.tolist('int32'), tensor32.ravel().tolist()) + self.assertEqual(paddletensor32.data.tolist('int32'), + tensor32.ravel().tolist()) paddletensor32.data.reset(tensor32) self.assertEqual(paddletensor32.as_ndarray().ravel().tolist(), tensor32.ravel().tolist()) @@ -40,8 +42,8 @@ class TestInferenceApi(unittest.TestCase): paddletensor64 = PaddleTensor(tensor64) dtype64 = paddletensor64.dtype self.assertEqual(dtype64, PaddleDType.INT64) - self.assertEqual( - paddletensor64.data.tolist('int64'), tensor64.ravel().tolist()) + self.assertEqual(paddletensor64.data.tolist('int64'), + tensor64.ravel().tolist()) paddletensor64.data.reset(tensor64) self.assertEqual(paddletensor64.as_ndarray().ravel().tolist(), tensor64.ravel().tolist()) @@ -50,9 +52,8 @@ class TestInferenceApi(unittest.TestCase): paddletensor_float = PaddleTensor(tensor_float) dtype_float = paddletensor_float.dtype self.assertEqual(dtype_float, PaddleDType.FLOAT32) - self.assertEqual( - paddletensor_float.data.tolist('float32'), - tensor_float.ravel().tolist()) + self.assertEqual(paddletensor_float.data.tolist('float32'), + tensor_float.ravel().tolist()) paddletensor_float.data.reset(tensor_float) self.assertEqual(paddletensor_float.as_ndarray().ravel().tolist(), tensor_float.ravel().tolist()) @@ -66,23 +67,24 @@ def get_sample_model(): startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): data = fluid.data(name="data", shape=[-1, 6, 64, 64], dtype="float32") - conv_out = fluid.layers.conv2d( - input=data, - num_filters=3, - filter_size=3, - groups=1, - padding=0, - bias_attr=False, - act=None) + conv_out = fluid.layers.conv2d(input=data, + num_filters=3, + filter_size=3, + groups=1, + padding=0, + bias_attr=False, + act=None) exe.run(startup_program) - serialized_program = paddle.static.serialize_program( - data, conv_out, program=main_program) + serialized_program = paddle.static.serialize_program(data, + conv_out, + program=main_program) serialized_params = paddle.static.serialize_persistables( data, conv_out, executor=exe, program=main_program) return serialized_program, serialized_params class TestInferenceBaseAPI(unittest.TestCase): + def get_config(self, model, params): config = Config() config.set_model_buffer(model, len(model), params, len(params)) diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py index 9abcf2a7676..c19c2c65e6e 100644 --- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py +++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py @@ -31,10 +31,12 @@ from paddle.fluid.compiler import CompiledProgram from paddle.fluid.framework import Program, program_guard from paddle.fluid.io import save_inference_model, load_inference_model, save_persistables from paddle.fluid.transpiler import memory_optimize + paddle.enable_static() class InferModel(object): + def __init__(self, list): self.program = list[0] self.feed_var_names = list[1] @@ -42,6 +44,7 @@ class InferModel(object): class TestBook(unittest.TestCase): + def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" UNI_MODEL_DIR = "./tmp/inference_model1" @@ -67,13 +70,15 @@ class TestBook(unittest.TestCase): exe.run(init_program, feed={}, fetch_list=[]) for i in six.moves.xrange(100): - tensor_x = np.array( - [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") + tensor_x = np.array([[1, 1], [1, 2], [3, 4], [5, + 2]]).astype("float32") tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, + feed={ + 'x': tensor_x, + 'y': tensor_y + }, fetch_list=[avg_cost]) # Separated model and unified model @@ -85,8 +90,10 @@ class TestBook(unittest.TestCase): params_str = save_persistables(exe, None, main_program, None) expected = exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, + feed={ + 'x': tensor_x, + 'y': tensor_y + }, fetch_list=[avg_cost])[0] six.moves.reload_module(executor) # reload to build a new scope @@ -116,6 +123,7 @@ class TestBook(unittest.TestCase): class TestSaveInferenceModel(unittest.TestCase): + def test_save_inference_model(self): MODEL_DIR = "./tmp/inference_model2" init_program = Program() @@ -166,6 +174,7 @@ class TestSaveInferenceModel(unittest.TestCase): class TestInstance(unittest.TestCase): + def test_save_inference_model(self): MODEL_DIR = "./tmp/inference_model3" init_program = Program() @@ -196,6 +205,7 @@ class TestInstance(unittest.TestCase): class TestSaveInferenceModelNew(unittest.TestCase): + def test_save_and_load_inference_model(self): MODEL_DIR = "./tmp/inference_model5" init_program = fluid.default_startup_program() @@ -222,8 +232,10 @@ class TestSaveInferenceModelNew(unittest.TestCase): tensor_y = np.array([[-2], [-3], [-7]]).astype("float32") for i in six.moves.xrange(3): exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, + feed={ + 'x': tensor_x, + 'y': tensor_y + }, fetch_list=[avg_cost]) self.assertRaises(ValueError, paddle.static.save_inference_model, None, @@ -258,8 +270,10 @@ class TestSaveInferenceModelNew(unittest.TestCase): self.assertTrue(os.path.exists(MODEL_DIR + ".pdiparams")) expected = exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, + feed={ + 'x': tensor_x, + 'y': tensor_y + }, fetch_list=[avg_cost])[0] six.moves.reload_module(executor) # reload to build a new scope @@ -270,28 +284,25 @@ class TestSaveInferenceModelNew(unittest.TestCase): MODEL_DIR + "/", exe) self.assertRaises(ValueError, paddle.static.load_inference_model, [MODEL_DIR], exe) - self.assertRaises( - ValueError, - paddle.static.load_inference_model, - MODEL_DIR, - exe, - pserver_endpoints=None) - self.assertRaises( - ValueError, - paddle.static.load_inference_model, - MODEL_DIR, - exe, - unsupported_param=None) - self.assertRaises( - (TypeError, ValueError), - paddle.static.load_inference_model, - None, - exe, - model_filename="illegal", - params_filename="illegal") - - model = InferModel( - paddle.static.io.load_inference_model(MODEL_DIR, exe)) + self.assertRaises(ValueError, + paddle.static.load_inference_model, + MODEL_DIR, + exe, + pserver_endpoints=None) + self.assertRaises(ValueError, + paddle.static.load_inference_model, + MODEL_DIR, + exe, + unsupported_param=None) + self.assertRaises((TypeError, ValueError), + paddle.static.load_inference_model, + None, + exe, + model_filename="illegal", + params_filename="illegal") + + model = InferModel(paddle.static.io.load_inference_model( + MODEL_DIR, exe)) outs = exe.run(model.program, feed={ @@ -340,8 +351,10 @@ class TestSaveInferenceModelNew(unittest.TestCase): tensor_y = np.array([[-2], [-3], [-7]]).astype("float32") for i in six.moves.xrange(3): exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, + feed={ + 'x': tensor_x, + 'y': tensor_y + }, fetch_list=[avg_cost]) # test if return type of serialize_program is bytes @@ -381,8 +394,10 @@ class TestSaveInferenceModelNew(unittest.TestCase): tensor_y = np.array([[-2], [-3], [-7]]).astype("float32") for i in six.moves.xrange(3): exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, + feed={ + 'x': tensor_x, + 'y': tensor_y + }, fetch_list=[avg_cost]) # test if return type of serialize_program is bytes @@ -400,6 +415,7 @@ class TestSaveInferenceModelNew(unittest.TestCase): class TestLoadInferenceModelError(unittest.TestCase): + def test_load_model_not_exist(self): place = core.CPUPlace() exe = executor.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 52137b22a79..7138c2393ff 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -42,15 +42,16 @@ def output_hist(out): class TestConstantInitializer(unittest.TestCase): + def test_calculate_gain(self): self.assertEqual(paddle.nn.initializer.calculate_gain('sigmoid'), 1) self.assertEqual(paddle.nn.initializer.calculate_gain('linear'), 1) self.assertEqual(paddle.nn.initializer.calculate_gain('conv2d'), 1) self.assertEqual(paddle.nn.initializer.calculate_gain('tanh'), 5.0 / 3) - self.assertEqual( - paddle.nn.initializer.calculate_gain('relu'), math.sqrt(2.0)) - self.assertEqual( - paddle.nn.initializer.calculate_gain('leaky_relu', 1), 1) + self.assertEqual(paddle.nn.initializer.calculate_gain('relu'), + math.sqrt(2.0)) + self.assertEqual(paddle.nn.initializer.calculate_gain('leaky_relu', 1), + 1) self.assertEqual(paddle.nn.initializer.calculate_gain('selu'), 3.0 / 4) def test_constant_initializer_default_value(self, dtype="float32"): @@ -106,18 +107,18 @@ class TestConstantInitializer(unittest.TestCase): class TestUniformInitializer(unittest.TestCase): + def test_uniform_initializer_default_value(self, dtype="float32"): """Test the uniform initializer with default value """ program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.UniformInitializer()) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.UniformInitializer()) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -134,12 +135,11 @@ class TestUniformInitializer(unittest.TestCase): program.random_seed = 123 block = program.global_block() for _ in range(2): - block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="param1", - initializer=initializer.UniformInitializer()) + block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="param1", + initializer=initializer.UniformInitializer()) block.create_parameter( dtype="float32", shape=[5, 10], @@ -157,12 +157,12 @@ class TestUniformInitializer(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.UniformInitializer(-4.2, 3.1, 123)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.UniformInitializer( + -4.2, 3.1, 123)) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -178,12 +178,12 @@ class TestUniformInitializer(unittest.TestCase): program = framework.Program() block = program.global_block() for i in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.UniformInitializer(-4.2, float(i), 123)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.UniformInitializer( + -4.2, float(i), 123)) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] @@ -213,18 +213,18 @@ class TestUniformInitializer(unittest.TestCase): class TestNormalInitializer(unittest.TestCase): + def test_normal_initializer_default_value(self): """Test the normal initializer with default value """ program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.NormalInitializer()) + block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.NormalInitializer()) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') @@ -238,12 +238,12 @@ class TestNormalInitializer(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.NormalInitializer(2.3, 1.9, 123)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.NormalInitializer( + 2.3, 1.9, 123)) num_ops = 2 if (dtype == "float16" or dtype == "uint16") else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -265,6 +265,7 @@ class TestNormalInitializer(unittest.TestCase): class TestXavierInitializer(unittest.TestCase): + def test_uniform_xavier_initializer(self): """Test Xavier initializer with uniform distribution on for matrix multiply. @@ -303,8 +304,8 @@ class TestXavierInitializer(unittest.TestCase): init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') receptive_field_size = float(15 * 20) - limit = np.sqrt(6.0 / ( - (param.shape[0] + param.shape[1]) * receptive_field_size)) + limit = np.sqrt( + 6.0 / ((param.shape[0] + param.shape[1]) * receptive_field_size)) self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0) @@ -347,8 +348,8 @@ class TestXavierInitializer(unittest.TestCase): init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') receptive_field_size = float(15 * 20) - std = np.sqrt(2.0 / ( - (param.shape[0] + param.shape[1]) * receptive_field_size)) + std = np.sqrt( + 2.0 / ((param.shape[0] + param.shape[1]) * receptive_field_size)) self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0) @@ -361,15 +362,17 @@ class TestXavierInitializer(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.XavierInitializer( - uniform=uniform, fan_in=12, fan_out=23, seed=134)) - num_ops = 2 if (dtype == "float16" or (dtype == "uint16" and - not uniform)) else 1 + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.XavierInitializer( + uniform=uniform, + fan_in=12, + fan_out=23, + seed=134)) + num_ops = 2 if (dtype == "float16" or + (dtype == "uint16" and not uniform)) else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] if uniform: @@ -400,6 +403,7 @@ class TestXavierInitializer(unittest.TestCase): class TestMSRAInitializer(unittest.TestCase): + def test_uniform_msra_initializer(self): """Test MSRA initializer with uniform distribution on for matrix multiply. @@ -492,13 +496,12 @@ class TestMSRAInitializer(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.MSRAInitializer( - fan_in=12, seed=134)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer( + fan_in=12, seed=134)) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -522,6 +525,7 @@ class TestMSRAInitializer(unittest.TestCase): class TestBilinearInitializer(unittest.TestCase): + def test_bilinear_initializer(self, dtype="float32"): """Test the bilinear initializer with supplied arguments """ @@ -560,6 +564,7 @@ class TestBilinearInitializer(unittest.TestCase): class TestNumpyArrayInitializer(unittest.TestCase): + def test_numpy_array_initializer(self, dtype="float32"): """Test the numpy array initializer with supplied arguments """ @@ -595,6 +600,7 @@ class TestNumpyArrayInitializer(unittest.TestCase): class TestSetGlobalInitializer(unittest.TestCase): + def test_set_global_weight_initilizer(self): """Test Set Global Param initilizer with UniformInitializer """ @@ -626,11 +632,9 @@ class TestSetGlobalInitializer(unittest.TestCase): """ main_prog = framework.Program() startup_prog = framework.Program() - fluid.set_global_initializer( - initializer.Uniform( - low=-0.5, high=0.5), - bias_init=initializer.Normal( - loc=0.0, scale=2.0)) + fluid.set_global_initializer(initializer.Uniform(low=-0.5, high=0.5), + bias_init=initializer.Normal(loc=0.0, + scale=2.0)) with fluid.program_guard(main_prog, startup_prog): x = fluid.data(name="x", shape=[1, 3, 32, 32]) # default initilizer of bias in layers.conv2d is ConstantInitializer @@ -655,6 +659,7 @@ class TestSetGlobalInitializer(unittest.TestCase): class TestUniformInitializerDygraph(unittest.TestCase): + def func_uniform_initializer(self, dtype="float32"): """ In dygraph mode, we can use initializer directly to initialize a tensor. @@ -673,9 +678,8 @@ class TestUniformInitializerDygraph(unittest.TestCase): hist, prob = output_hist(tensor.numpy()) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=1e-3), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=1e-3), + "hist: " + str(hist)) paddle.enable_static() @@ -686,6 +690,7 @@ class TestUniformInitializerDygraph(unittest.TestCase): class TestXavierInitializerDygraph(unittest.TestCase): + def func_xvarier_initializer(self, dtype="float32"): """ In dygraph mode, we can use initializer directly to initialize a tensor. @@ -695,8 +700,9 @@ class TestXavierInitializerDygraph(unittest.TestCase): tensor = paddle.zeros([1024, 1024, 16]) tensor.stop_gradient = False - xavier_ = paddle.fluid.initializer.XavierInitializer( - uniform=False, fan_in=3, fan_out=5) + xavier_ = paddle.fluid.initializer.XavierInitializer(uniform=False, + fan_in=3, + fan_out=5) xavier_(tensor) hist, _ = output_hist(tensor.numpy()) @@ -704,10 +710,8 @@ class TestXavierInitializerDygraph(unittest.TestCase): hist2, _ = output_hist( np.random.normal(0, np.sqrt(2.0 / (3 + 5)), [1024, 1024, 16])) - self.assertTrue( - np.allclose( - hist, hist2, rtol=0, atol=0.01), - "hist: " + str(hist) + " hist2: " + str(hist2)) + self.assertTrue(np.allclose(hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) paddle.enable_static() def test_xavier_initializer(self, dtype="float32"): @@ -717,6 +721,7 @@ class TestXavierInitializerDygraph(unittest.TestCase): class TestMSRAInitializerDygraph(unittest.TestCase): + def func_msra_initializer(self, dtype="float32"): """ In dygraph mode, we can use initializer directly to initialize a tensor. @@ -726,8 +731,8 @@ class TestMSRAInitializerDygraph(unittest.TestCase): tensor = paddle.zeros([1024, 1024, 16]) tensor.stop_gradient = False - msra_ = paddle.fluid.initializer.MSRAInitializer( - uniform=False, fan_in=4) + msra_ = paddle.fluid.initializer.MSRAInitializer(uniform=False, + fan_in=4) msra_(tensor) hist, _ = output_hist(tensor.numpy()) @@ -735,10 +740,8 @@ class TestMSRAInitializerDygraph(unittest.TestCase): hist2, _ = output_hist( np.random.normal(0, np.sqrt(2.0 / (4)), [1024, 1024, 16])) - self.assertTrue( - np.allclose( - hist, hist2, rtol=0, atol=0.01), - "hist: " + str(hist) + " hist2: " + str(hist2)) + self.assertTrue(np.allclose(hist, hist2, rtol=0, atol=0.01), + "hist: " + str(hist) + " hist2: " + str(hist2)) paddle.enable_static() def test_msra_initializer(self, dtype="float32"): @@ -748,6 +751,7 @@ class TestMSRAInitializerDygraph(unittest.TestCase): class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase): + def func_order(self): paddle.set_device('cpu') SEED = 123 @@ -756,21 +760,23 @@ class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase): learning_rate=1.0, trainable=False, regularizer=None, - initializer=paddle.nn.initializer.TruncatedNormal( - mean=0.0, std=2.0)) + initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, + std=2.0)) bias_attr = paddle.framework.ParamAttr( name="linear_bias", learning_rate=1.0, trainable=False, regularizer=None, - initializer=paddle.nn.initializer.TruncatedNormal( - mean=0.0, std=2.0)) + initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, + std=2.0)) def run_dynamic_graph(): paddle.disable_static() paddle.seed(SEED) - linear = paddle.nn.Linear( - 1, 1, weight_attr=weight_attr, bias_attr=bias_attr) + linear = paddle.nn.Linear(1, + 1, + weight_attr=weight_attr, + bias_attr=bias_attr) return linear.weight.numpy(), linear.bias.numpy() paddle.enable_static() @@ -778,8 +784,10 @@ class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase): paddle.enable_static() exe = paddle.static.Executor(paddle.CPUPlace()) paddle.seed(SEED) - linear = paddle.nn.Linear( - 1, 1, weight_attr=weight_attr, bias_attr=bias_attr) + linear = paddle.nn.Linear(1, + 1, + weight_attr=weight_attr, + bias_attr=bias_attr) res = exe.run(paddle.static.default_startup_program(), fetch_list=['linear_weight', 'linear_bias']) return res[0], res[1] @@ -820,8 +828,9 @@ class TestOrthogonalInitializer1(unittest.TestCase): paddle.disable_static() paddle.seed(2021) - linear = paddle.nn.Linear( - self.in_features, self.out_features, weight_attr=self.weight_attr) + linear = paddle.nn.Linear(self.in_features, + self.out_features, + weight_attr=self.weight_attr) res_dygraph = linear.weight.numpy() paddle.enable_static() @@ -829,10 +838,9 @@ class TestOrthogonalInitializer1(unittest.TestCase): start_prog = paddle.static.Program() main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): - linear = paddle.nn.Linear( - self.in_features, - self.out_features, - weight_attr=self.weight_attr) + linear = paddle.nn.Linear(self.in_features, + self.out_features, + weight_attr=self.weight_attr) block = start_prog.global_block() self.assertEqual(len(block.ops), self.num_ops) @@ -924,11 +932,10 @@ class TestOrthogonalInitializer4(unittest.TestCase): paddle.disable_static() paddle.seed(2021) - conv2d = paddle.nn.Conv2D( - self.in_features, - self.out_features, - self.kernel_size, - weight_attr=self.weight_attr) + conv2d = paddle.nn.Conv2D(self.in_features, + self.out_features, + self.kernel_size, + weight_attr=self.weight_attr) res_dygraph = conv2d.weight.numpy() paddle.enable_static() @@ -936,11 +943,10 @@ class TestOrthogonalInitializer4(unittest.TestCase): start_prog = paddle.static.Program() main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): - conv2d = paddle.nn.Conv2D( - self.in_features, - self.out_features, - self.kernel_size, - weight_attr=self.weight_attr) + conv2d = paddle.nn.Conv2D(self.in_features, + self.out_features, + self.kernel_size, + weight_attr=self.weight_attr) exe = paddle.static.Executor() res_static = exe.run(paddle.static.default_startup_program(), fetch_list=[conv2d.weight])[0] @@ -995,6 +1001,7 @@ class TestOrthogonalInitializer6(TestOrthogonalInitializer4): # initialize Conv1D weight class TestDiracInitializer1(unittest.TestCase): + def config(self): self.weight_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Dirac()) @@ -1015,11 +1022,10 @@ class TestDiracInitializer1(unittest.TestCase): paddle.set_default_dtype(self.dtype) paddle.disable_static() - conv = self.conv_layer( - self.in_channels, - self.out_channels, - self.kernel_size, - weight_attr=self.weight_attr) + conv = self.conv_layer(self.in_channels, + self.out_channels, + self.kernel_size, + weight_attr=self.weight_attr) weight_dygraph = conv.weight.numpy() paddle.enable_static() @@ -1027,11 +1033,10 @@ class TestDiracInitializer1(unittest.TestCase): main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): inp = paddle.rand(self.input_shape) - conv = self.conv_layer( - self.in_channels, - self.out_channels, - self.kernel_size, - weight_attr=self.weight_attr) + conv = self.conv_layer(self.in_channels, + self.out_channels, + self.kernel_size, + weight_attr=self.weight_attr) output = conv(inp) block = start_prog.global_block() @@ -1061,6 +1066,7 @@ class TestDiracInitializer1(unittest.TestCase): # initialize Conv2D weight class TestDiracInitializer2(TestDiracInitializer1): + def config(self): self.weight_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Dirac(groups=1)) @@ -1082,6 +1088,7 @@ class TestDiracInitializer2(TestDiracInitializer1): # initialize Conv3D weight class TestDiracInitializer3(TestDiracInitializer1): + def config(self): self.weight_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Dirac(groups=2)) @@ -1096,8 +1103,8 @@ class TestDiracInitializer3(TestDiracInitializer1): def check_result(self, w_dygraph, w_static, conv_in, conv_out): self.assertTrue(np.array_equal(w_dygraph, w_static)) self.assertTrue( - np.array_equal(conv_out[:, 0:5, :, :, :], conv_in[:, :, 1:9, 1:9, 1: - 9])) + np.array_equal(conv_out[:, 0:5, :, :, :], conv_in[:, :, 1:9, 1:9, + 1:9])) self.assertTrue( np.array_equal(conv_out[:, 5:10, :, :, :], conv_in[:, :, 1:9, 1:9, 1:9])) diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index 9953681e0f5..0f4a2e7a67c 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -40,6 +40,7 @@ def check_cast_op(op): class TestConstantInitializer(unittest.TestCase): + def static_test_constant_initializer_common(self, init_inst, dtype="float32", @@ -48,12 +49,11 @@ class TestConstantInitializer(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=init_inst) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=init_inst) num_ops = 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -91,8 +91,9 @@ class TestConstantInitializer(unittest.TestCase): """Test constant initializer with supplied value in dygraph """ with fluid.dygraph.guard(): - linear = nn.Linear( - 2, 4, weight_attr=nn.initializer.Constant(value=2.0)) + linear = nn.Linear(2, + 4, + weight_attr=nn.initializer.Constant(value=2.0)) mat_target = np.ones((2, 4), dtype=dtype) * 2.0 mat_linear = linear.weight.numpy() mismatch = np.sum( @@ -116,6 +117,7 @@ class TestConstantInitializer(unittest.TestCase): class TestKaimingInitializer(unittest.TestCase): + def static_test_kaiming_initializer_common(self, init_inst, dtype="float32", @@ -126,12 +128,11 @@ class TestKaimingInitializer(unittest.TestCase): block = program.global_block() shape_mat = [5, 10, 15, 20] if is_conv else [5, 10] for _ in range(2): - param = block.create_parameter( - dtype="float32", - shape=shape_mat, - lod_level=0, - name="param", - initializer=init_inst) + param = block.create_parameter(dtype="float32", + shape=shape_mat, + lod_level=0, + name="param", + initializer=init_inst) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] if uniform: @@ -208,6 +209,7 @@ class TestKaimingInitializer(unittest.TestCase): class TestUniform(unittest.TestCase): + def test_uniform_common(self, dtype="float32", seed=0): """Test the uniform initializer with default value """ @@ -217,12 +219,11 @@ class TestUniform(unittest.TestCase): program.random_seed = seed block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.Uniform()) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.Uniform()) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -248,12 +249,11 @@ class TestUniform(unittest.TestCase): program.random_seed = seed block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.Uniform()) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.Uniform()) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -279,12 +279,12 @@ class TestUniform(unittest.TestCase): program.random_seed = seed block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.Uniform(min_value, max_vlaue)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.Uniform( + min_value, max_vlaue)) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -309,12 +309,12 @@ class TestUniform(unittest.TestCase): program.random_seed = seed block = program.global_block() for i in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.Uniform(min_value, float(i))) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.Uniform( + min_value, float(i))) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) init_op0 = block.ops[0] @@ -351,8 +351,7 @@ class TestUniform(unittest.TestCase): weight_attr = paddle.framework.ParamAttr( name="linear_weight", - initializer=paddle.nn.initializer.Uniform( - low=-0.5, high=0.5)) + initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5)) linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr) min_value, max_value = get_uniform_min_and_max(linear.weight.numpy()) @@ -363,6 +362,7 @@ class TestUniform(unittest.TestCase): class TestNormal(unittest.TestCase): + def test_normal_initializer_default_value(self): """Test the normal initializer with default value """ @@ -371,12 +371,11 @@ class TestNormal(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.Normal()) + block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.Normal()) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') @@ -394,12 +393,11 @@ class TestNormal(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.Normal(2.3, 1.9)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.Normal(2.3, 1.9)) num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -428,12 +426,12 @@ class TestNormal(unittest.TestCase): weight_attr = paddle.framework.ParamAttr( name="linear_weight", - initializer=paddle.nn.initializer.Normal( - mean=0.0, std=2.0)) + initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0)) linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr) class TestTruncatedNormal(unittest.TestCase): + def test_truncated_normal_initializer_default_value(self): """Test the truncated normal initializer with default value """ @@ -442,12 +440,11 @@ class TestTruncatedNormal(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.TruncatedNormal()) + block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.TruncatedNormal()) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] self.assertEqual(init_op.type, 'truncated_gaussian_random') @@ -465,12 +462,12 @@ class TestTruncatedNormal(unittest.TestCase): program = framework.Program() block = program.global_block() for _ in range(2): - block.create_parameter( - dtype=dtype, - shape=[5, 10], - lod_level=0, - name="param", - initializer=initializer.TruncatedNormal(2.3, 1.9)) + block.create_parameter(dtype=dtype, + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.TruncatedNormal( + 2.3, 1.9)) num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] @@ -505,12 +502,13 @@ class TestTruncatedNormal(unittest.TestCase): weight_attr = paddle.framework.ParamAttr( name="linear_weight", - initializer=paddle.nn.initializer.TruncatedNormal( - mean=0.0, std=2.0)) + initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, + std=2.0)) linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr) class TestXavierUniform(unittest.TestCase): + def test_xavier_uniform_initializer(self): """Test Xavier initializer with uniform distribution on for matrix multiply. @@ -555,8 +553,8 @@ class TestXavierUniform(unittest.TestCase): init_op = block.ops[0] self.assertEqual(init_op.type, 'uniform_random') receptive_field_size = float(15 * 20) - limit = np.sqrt(6.0 / ( - (param.shape[0] + param.shape[1]) * receptive_field_size)) + limit = np.sqrt( + 6.0 / ((param.shape[0] + param.shape[1]) * receptive_field_size)) self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0) @@ -573,6 +571,7 @@ class TestXavierUniform(unittest.TestCase): class TestXavierNormal(unittest.TestCase): + def test_xavier_normal_initializer(self): """Test Xavier initializer with normal distribution on for matrix multiply. @@ -617,8 +616,8 @@ class TestXavierNormal(unittest.TestCase): init_op = block.ops[0] self.assertEqual(init_op.type, 'gaussian_random') receptive_field_size = float(15 * 20) - std = np.sqrt(2.0 / ( - (param.shape[0] + param.shape[1]) * receptive_field_size)) + std = np.sqrt( + 2.0 / ((param.shape[0] + param.shape[1]) * receptive_field_size)) self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) self.assertEqual(init_op.attr('seed'), 0) @@ -637,6 +636,7 @@ class TestXavierNormal(unittest.TestCase): class TestAssign(unittest.TestCase): + def test_assign_initializer(self, dtype="float32"): """Test the numpy array initializer with supplied arguments """ @@ -647,12 +647,11 @@ class TestAssign(unittest.TestCase): block = program.global_block() np_array = numpy.random.random((10000)).astype(dtype) for _ in range(2): - block.create_parameter( - dtype=np_array.dtype, - shape=np_array.shape, - lod_level=0, - name="param", - initializer=initializer.Assign(np_array)) + block.create_parameter(dtype=np_array.dtype, + shape=np_array.shape, + lod_level=0, + name="param", + initializer=initializer.Assign(np_array)) num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) init_op = block.ops[0] diff --git a/python/paddle/fluid/tests/unittests/test_inner.py b/python/paddle/fluid/tests/unittests/test_inner.py index 2174c20c9a0..8a412d8138f 100644 --- a/python/paddle/fluid/tests/unittests/test_inner.py +++ b/python/paddle/fluid/tests/unittests/test_inner.py @@ -23,21 +23,26 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class TestMultiplyApi(unittest.TestCase): + def _run_static_graph_case(self, x_data, y_data): with program_guard(Program(), Program()): paddle.enable_static() - x = paddle.static.data( - name='x', shape=x_data.shape, dtype=x_data.dtype) - y = paddle.static.data( - name='y', shape=y_data.shape, dtype=y_data.dtype) + x = paddle.static.data(name='x', + shape=x_data.shape, + dtype=x_data.dtype) + y = paddle.static.data(name='y', + shape=y_data.shape, + dtype=y_data.dtype) res = paddle.inner(x, y) - place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) outs = exe.run(paddle.static.default_main_program(), - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[res]) res = outs[0] return res @@ -89,20 +94,18 @@ class TestMultiplyApi(unittest.TestCase): self.assertTrue(np.allclose(res, np.inner(x_data, y_data))) # test dynamic computation graph: 2-d array Complex - x_data = np.random.rand(20, - 50).astype(np.float64) + 1J * np.random.rand( - 20, 50).astype(np.float64) - y_data = np.random.rand(50).astype(np.float64) + 1J * np.random.rand( - 50).astype(np.float64) + x_data = np.random.rand(20, 50).astype( + np.float64) + 1J * np.random.rand(20, 50).astype(np.float64) + y_data = np.random.rand(50).astype( + np.float64) + 1J * np.random.rand(50).astype(np.float64) res = self._run_dynamic_graph_case(x_data, y_data) self.assertTrue(np.allclose(res, np.inner(x_data, y_data))) # test dynamic computation graph: 3-d array Complex - x_data = np.random.rand(5, 10, - 10).astype(np.float64) + 1J * np.random.rand( - 5, 10, 10).astype(np.float64) - y_data = np.random.rand(2, 10).astype(np.float64) + 1J * np.random.rand( - 2, 10).astype(np.float64) + x_data = np.random.rand(5, 10, 10).astype( + np.float64) + 1J * np.random.rand(5, 10, 10).astype(np.float64) + y_data = np.random.rand(2, 10).astype( + np.float64) + 1J * np.random.rand(2, 10).astype(np.float64) res = self._run_dynamic_graph_case(x_data, y_data) self.assertTrue(np.allclose(res, np.inner(x_data, y_data))) @@ -113,6 +116,7 @@ class TestMultiplyApi(unittest.TestCase): class TestMultiplyError(unittest.TestCase): + def func_test_errors(self): # test static computation graph: dtype can not be int8 paddle.enable_static() @@ -121,7 +125,7 @@ class TestMultiplyError(unittest.TestCase): y = paddle.static.data(name='y', shape=[100], dtype=np.int8) self.assertRaises(TypeError, paddle.inner, x, y) - # test static computation graph: inputs must be broadcastable + # test static computation graph: inputs must be broadcastable with program_guard(Program(), Program()): x = paddle.static.data(name='x', shape=[20, 50], dtype=np.float64) y = paddle.static.data(name='y', shape=[20], dtype=np.float64) @@ -143,7 +147,7 @@ class TestMultiplyError(unittest.TestCase): y = paddle.to_tensor(y_data) self.assertRaises(ValueError, paddle.inner, x, y) - # test dynamic computation graph: dtype must be same + # test dynamic computation graph: dtype must be same x_data = np.random.randn(200).astype(np.float32) y_data = np.random.randn(200).astype(np.float64) x = paddle.to_tensor(x_data) diff --git a/python/paddle/fluid/tests/unittests/test_inplace.py b/python/paddle/fluid/tests/unittests/test_inplace.py index 99873eaa988..b81fcd90746 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace.py +++ b/python/paddle/fluid/tests/unittests/test_inplace.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class TestInplace(unittest.TestCase): + def func_test_forward_version(self): with paddle.fluid.dygraph.guard(): var = paddle.to_tensor(np.ones((4, 2, 3)).astype(np.float32)) @@ -117,6 +118,7 @@ class TestInplace(unittest.TestCase): class TestDygraphInplace(unittest.TestCase): + def setUp(self): self.init_data() self.set_np_compare_func() @@ -283,6 +285,7 @@ class TestDygraphInplace(unittest.TestCase): class TestDygraphInplaceUnsqueeze(TestDygraphInplace): + def non_inplace_api_processing(self, var): return paddle.unsqueeze(var, -1) @@ -291,6 +294,7 @@ class TestDygraphInplaceUnsqueeze(TestDygraphInplace): class TestDygraphInplaceReshape(TestDygraphInplace): + def non_inplace_api_processing(self, var): return paddle.reshape(var, [-1]) @@ -299,6 +303,7 @@ class TestDygraphInplaceReshape(TestDygraphInplace): class TestDygraphInplaceFlatten(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.flatten() @@ -307,26 +312,28 @@ class TestDygraphInplaceFlatten(TestDygraphInplace): class TestDygraphInplaceScatter(TestDygraphInplace): + def init_data(self): self.input_var_numpy = np.array([[1, 1], [2, 2], [3, 3]]) self.dtype = "float32" def non_inplace_api_processing(self, var): index = paddle.to_tensor([2, 1, 0, 1], dtype='int64') - updates = paddle.to_tensor( - [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32') + updates = paddle.to_tensor([[1, 1], [2, 2], [3, 3], [4, 4]], + dtype='float32') return paddle.scatter(var, index, updates, overwrite=False) def inplace_api_processing(self, var): index = paddle.to_tensor([2, 1, 0, 1], dtype='int64') - updates = paddle.to_tensor( - [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32') + updates = paddle.to_tensor([[1, 1], [2, 2], [3, 3], [4, 4]], + dtype='float32') return paddle.scatter_(var, index, updates, overwrite=False) class TestDygraphInplaceElu(TestDygraphInplace): + def non_inplace_api_processing(self, var): return paddle.nn.functional.elu(var) @@ -335,6 +342,7 @@ class TestDygraphInplaceElu(TestDygraphInplace): class TestDygraphInplaceRelu(TestDygraphInplace): + def non_inplace_api_processing(self, var): return paddle.nn.functional.relu(var) @@ -343,6 +351,7 @@ class TestDygraphInplaceRelu(TestDygraphInplace): class TestDygraphInplaceSoftmax(TestDygraphInplace): + def non_inplace_api_processing(self, var): return paddle.nn.functional.softmax(var) @@ -351,6 +360,7 @@ class TestDygraphInplaceSoftmax(TestDygraphInplace): class TestDygraphInplaceTanh(TestDygraphInplace): + def non_inplace_api_processing(self, var): return paddle.tanh(var) @@ -359,6 +369,7 @@ class TestDygraphInplaceTanh(TestDygraphInplace): class TestDygraphInplaceCeil(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.ceil() @@ -367,6 +378,7 @@ class TestDygraphInplaceCeil(TestDygraphInplace): class TestDygraphInplaceFloor(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.floor() @@ -375,6 +387,7 @@ class TestDygraphInplaceFloor(TestDygraphInplace): class TestDygraphInplaceExp(TestDygraphInplace): + def set_np_compare_func(self): self.np_compare = np.allclose @@ -386,6 +399,7 @@ class TestDygraphInplaceExp(TestDygraphInplace): class TestDygraphInplaceReciprocal(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.reciprocal() @@ -394,6 +408,7 @@ class TestDygraphInplaceReciprocal(TestDygraphInplace): class TestDygraphInplaceRound(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.round() @@ -402,6 +417,7 @@ class TestDygraphInplaceRound(TestDygraphInplace): class TestDygraphInplaceSqrt(TestDygraphInplace): + def init_data(self): self.input_var_numpy = np.random.uniform(0, 5, [10, 20, 1]) self.dtype = "float32" @@ -414,6 +430,7 @@ class TestDygraphInplaceSqrt(TestDygraphInplace): class TestDygraphInplaceRsqrt(TestDygraphInplaceSqrt): + def non_inplace_api_processing(self, var): return var.rsqrt() @@ -422,6 +439,7 @@ class TestDygraphInplaceRsqrt(TestDygraphInplaceSqrt): class TestDygraphInplaceClip(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.clip(0.6, 1.5) @@ -430,6 +448,7 @@ class TestDygraphInplaceClip(TestDygraphInplace): class TestDygraphInplaceScale(TestDygraphInplace): + def non_inplace_api_processing(self, var): return var.scale(scale=2.0, bias=3.0) @@ -438,6 +457,7 @@ class TestDygraphInplaceScale(TestDygraphInplace): class TestDygraphInplaceAdd(TestDygraphInplace): + def init_data(self): self.input_var_numpy = np.random.rand(2, 3, 4) self.dtype = "float32" @@ -453,6 +473,7 @@ class TestDygraphInplaceAdd(TestDygraphInplace): class TestDygraphInplaceSubtract(TestDygraphInplaceAdd): + def non_inplace_api_processing(self, var): input_var_2 = paddle.to_tensor(self.input_var_numpy_2) return var.subtract(input_var_2) @@ -463,6 +484,7 @@ class TestDygraphInplaceSubtract(TestDygraphInplaceAdd): class TestLossIsInplaceVar(unittest.TestCase): + def func_test_loss_is_inplace_var(self): with paddle.fluid.dygraph.guard(): var_a = paddle.ones((2, 2)) @@ -493,6 +515,7 @@ class TestLossIsInplaceVar(unittest.TestCase): class TestContinuouslyInplace(unittest.TestCase): + def func_test_continuously_inplace(self): a = paddle.rand([2, 3]) a.stop_gradient = False @@ -511,6 +534,7 @@ class TestContinuouslyInplace(unittest.TestCase): class TestGetitemBeforeInplace(unittest.TestCase): + def test_getitem_before_inplace(self): with _test_eager_guard(): a = paddle.ones(shape=[4, 2, 3], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py index 67f6b910214..dc0b45206d9 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py @@ -27,6 +27,7 @@ import paddle class TestInplaceANBOpTraining(unittest.TestCase): + def setUp(self): self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.N = 4 @@ -50,12 +51,11 @@ class TestInplaceANBOpTraining(unittest.TestCase): startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - data = fluid.layers.data( - name='input', - shape=self.dshape, - dtype=self.dtype, - append_batch_size=False, - stop_gradient=False) + data = fluid.layers.data(name='input', + shape=self.dshape, + dtype=self.dtype, + append_batch_size=False, + stop_gradient=False) if inplace: bn = fluid.layers.inplace_abn( data, @@ -83,7 +83,7 @@ class TestInplaceANBOpTraining(unittest.TestCase): bn = fluid.layers.elu(bn, alpha) # NOTE: in inplace mode input and output of bn - # may have same name, multiply 1. to generate + # may have same name, multiply 1. to generate # a new Variable for fetch bn = bn * 1. @@ -102,14 +102,13 @@ class TestInplaceANBOpTraining(unittest.TestCase): fetch_outs = [] fetch_names = [] for inplace in [False, True]: - main, startup, outs = self.build_program( - place, - layout, - seed, - only_forward, - activation, - alpha, - inplace=inplace) + main, startup, outs = self.build_program(place, + layout, + seed, + only_forward, + activation, + alpha, + inplace=inplace) exe = fluid.Executor(place) exe.run(startup) @@ -145,12 +144,11 @@ class TestInplaceANBOpTraining(unittest.TestCase): fetch_outs.append(bn_fetches) fetch_names.append(fetch_name) - for bn_val, inplace_abn_val, name1, name2 in zip(*( - fetch_outs + fetch_names)): + for bn_val, inplace_abn_val, name1, name2 in zip(*(fetch_outs + + fetch_names)): self.assertTrue( - np.allclose( - bn_val, inplace_abn_val, atol=1e-2), - "Output (" + name1 + ":" + name2 + + np.allclose(bn_val, inplace_abn_val, + atol=1e-2), "Output (" + name1 + ":" + name2 + ") has diff on {} with {} layout and {} activation. \n".format( place, layout, activation) + "\nBN " + str(bn_val) + "\n" + "Inplace ABN " + str(inplace_abn_val)) diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py index b9089448d53..39e493b1b34 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -24,6 +24,7 @@ import numpy as np class ConvBNLayer(fluid.Layer): + def __init__(self, num_channels, num_filters, @@ -33,18 +34,17 @@ class ConvBNLayer(fluid.Layer): data_format="NCHW"): super(ConvBNLayer, self).__init__() - self._conv = paddle.nn.Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False, - data_format=data_format) + self._conv = paddle.nn.Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False, + data_format=data_format) - self._batch_norm = paddle.nn.BatchNorm( - num_filters, data_layout=data_format) + self._batch_norm = paddle.nn.BatchNorm(num_filters, + data_layout=data_format) def forward(self, inputs): y = self._conv(inputs) @@ -61,11 +61,10 @@ def create_program(data_format="NCHW"): if data_format == "NHWC": x = paddle.transpose(x, [0, 2, 3, 1]) x = fluid.layers.prelu(x, mode="channel") - conv = ConvBNLayer( - num_channels=3, - num_filters=3, - filter_size=1, - data_format=data_format) + conv = ConvBNLayer(num_channels=3, + num_filters=3, + filter_size=1, + data_format=data_format) y = conv(x) + x loss = fluid.layers.reduce_sum(y) @@ -77,7 +76,9 @@ def create_program(data_format="NCHW"): class TestInplaceAddto(unittest.TestCase): + def check_result(self, data_format="NCHW"): + def run_program(enable_addto): np.random.seed(10) paddle.seed(10) @@ -86,8 +87,8 @@ class TestInplaceAddto(unittest.TestCase): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) fluid.set_flags({"FLAGS_max_inplace_grad_add": 2}) loss, main, startup, w = create_program(data_format=data_format) - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) strategy = fluid.BuildStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py b/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py index b685900eadf..7ec04ed90b0 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ paddle.disable_static() def clear_grad(w, a): + @paddle.no_grad() def warp(*_): assert w.grad is not None @@ -32,6 +33,7 @@ def clear_grad(w, a): class TestInplaceAndClearGradient(unittest.TestCase): + def test(self): paddle.set_device('cpu') diff --git a/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py index abc8849b614..581ce0d5d02 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py @@ -25,6 +25,7 @@ from paddle.static import Program, program_guard # In static mode, inplace strategy will not be used in Inplace APIs. class TestStaticAutoGeneratedAPI(unittest.TestCase): + def setUp(self): paddle.enable_static() self.init_data() @@ -61,11 +62,13 @@ class TestStaticAutoGeneratedAPI(unittest.TestCase): class TestStaticInplaceAutoGeneratedAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.ceil_() class TestStaticFloorAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.floor() @@ -74,11 +77,13 @@ class TestStaticFloorAPI(TestStaticAutoGeneratedAPI): class TestStaticInplaceFloorAPI(TestStaticFloorAPI): + def executed_paddle_api(self, x): return x.floor_() class TestStaticExpAPI(TestStaticAutoGeneratedAPI): + def set_np_compare_func(self): self.np_compare = np.allclose @@ -90,11 +95,13 @@ class TestStaticExpAPI(TestStaticAutoGeneratedAPI): class TestStaticInplaceExpAPI(TestStaticExpAPI): + def executed_paddle_api(self, x): return x.exp_() class TestStaticReciprocalAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.reciprocal() @@ -103,11 +110,13 @@ class TestStaticReciprocalAPI(TestStaticAutoGeneratedAPI): class TestStaticInplaceReciprocalAPI(TestStaticReciprocalAPI): + def executed_paddle_api(self, x): return x.reciprocal_() class TestStaticRoundAPI(TestStaticAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.round() @@ -116,11 +125,13 @@ class TestStaticRoundAPI(TestStaticAutoGeneratedAPI): class TestStaticInplaceRoundAPI(TestStaticRoundAPI): + def executed_paddle_api(self, x): return x.round_() class TestStaticSqrtAPI(TestStaticAutoGeneratedAPI): + def init_data(self): self.dtype = 'float32' self.shape = [10, 20] @@ -137,11 +148,13 @@ class TestStaticSqrtAPI(TestStaticAutoGeneratedAPI): class TestStaticInplaceSqrtAPI(TestStaticSqrtAPI): + def executed_paddle_api(self, x): return x.sqrt_() class TestStaticRsqrtAPI(TestStaticSqrtAPI): + def executed_paddle_api(self, x): return x.rsqrt() @@ -150,12 +163,14 @@ class TestStaticRsqrtAPI(TestStaticSqrtAPI): class TestStaticInplaceRsqrtAPI(TestStaticRsqrtAPI): + def executed_paddle_api(self, x): return x.rsqrt_() # In dygraph mode, inplace strategy will be used in Inplace APIs. class TestDygraphAutoGeneratedAPI(unittest.TestCase): + def setUp(self): paddle.disable_static() self.init_data() @@ -184,11 +199,13 @@ class TestDygraphAutoGeneratedAPI(unittest.TestCase): class TestDygraphInplaceAutoGeneratedAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.ceil_() class TestDygraphFloorAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.floor() @@ -197,11 +214,13 @@ class TestDygraphFloorAPI(TestDygraphAutoGeneratedAPI): class TestDygraphInplaceFloorAPI(TestDygraphFloorAPI): + def executed_paddle_api(self, x): return x.floor_() class TestDygraphExpAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.exp() @@ -213,11 +232,13 @@ class TestDygraphExpAPI(TestDygraphAutoGeneratedAPI): class TestDygraphInplaceExpAPI(TestDygraphExpAPI): + def executed_paddle_api(self, x): return x.exp_() class TestDygraphReciprocalAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.reciprocal() @@ -226,11 +247,13 @@ class TestDygraphReciprocalAPI(TestDygraphAutoGeneratedAPI): class TestDygraphInplaceReciprocalAPI(TestDygraphReciprocalAPI): + def executed_paddle_api(self, x): return x.reciprocal_() class TestDygraphRoundAPI(TestDygraphAutoGeneratedAPI): + def executed_paddle_api(self, x): return x.round() @@ -239,11 +262,13 @@ class TestDygraphRoundAPI(TestDygraphAutoGeneratedAPI): class TestDygraphInplaceRoundAPI(TestDygraphRoundAPI): + def executed_paddle_api(self, x): return x.round_() class TestDygraphSqrtAPI(TestDygraphAutoGeneratedAPI): + def init_data(self): self.dtype = 'float32' self.shape = [10, 20] @@ -260,11 +285,13 @@ class TestDygraphSqrtAPI(TestDygraphAutoGeneratedAPI): class TestDygraphInplaceSqrtAPI(TestDygraphSqrtAPI): + def executed_paddle_api(self, x): return x.sqrt_() class TestDygraphRsqrtAPI(TestDygraphSqrtAPI): + def executed_paddle_api(self, x): return x.rsqrt() @@ -273,6 +300,7 @@ class TestDygraphRsqrtAPI(TestDygraphSqrtAPI): class TestDygraphInplaceRsqrtAPI(TestDygraphRsqrtAPI): + def executed_paddle_api(self, x): return x.rsqrt_() diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py index 90666d4ebb6..643ff14b878 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py @@ -20,10 +20,11 @@ import unittest class TestSoftmaxWithXe(unittest.TestCase): + def setUp(self): self.initParameter() - self.m, self.n = np.random.random_integers( - low=100, high=2000, size=[2]).astype('int64') + self.m, self.n = np.random.random_integers(low=100, high=2000, + size=[2]).astype('int64') def initParameter(self): self.dtype = 'float32' @@ -38,11 +39,10 @@ class TestSoftmaxWithXe(unittest.TestCase): m, n = x.shape with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.scope_guard(fluid.Scope()): - x_d = fluid.layers.data( - name='x', - shape=[m, n], - dtype=self.dtype, - append_batch_size=False) + x_d = fluid.layers.data(name='x', + shape=[m, n], + dtype=self.dtype, + append_batch_size=False) y_d = fluid.layers.data( name='y', shape=[m, 1] if not self.soft_label else [m, n], @@ -61,17 +61,19 @@ class TestSoftmaxWithXe(unittest.TestCase): build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = inplace - prog = fluid.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - build_strategy=build_strategy, places=place) + prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + build_strategy=build_strategy, places=place) fetch_list = [z_d.name, s_d.name] print('Inplace is {}'.format("ON" if inplace else "OFF")) z, s = exe.run(prog, - feed={x_d.name: x, - y_d.name: y}, + feed={ + x_d.name: x, + y_d.name: y + }, fetch_list=fetch_list) return z, s @@ -82,27 +84,39 @@ class TestSoftmaxWithXe(unittest.TestCase): for a, b in x_range: x = ((b - a) * x + a).astype(self.dtype) if not self.soft_label: - y = np.random.random_integers( - size=[self.m, 1], low=0, high=self.n - 1).astype('int64') + y = np.random.random_integers(size=[self.m, 1], + low=0, + high=self.n - 1).astype('int64') else: y = np.random.random(size=[self.m, self.n]).astype(self.dtype) - norm_y = np.broadcast_to( - np.reshape( - np.sum(y, axis=1), [-1, 1]), y.shape) + norm_y = np.broadcast_to(np.reshape(np.sum(y, axis=1), [-1, 1]), + y.shape) y = y / norm_y - z1, s1 = self.softmax_with_xe( - x, y, place, inplace=False, numeric_stable_mode=False) - z2, s2 = self.softmax_with_xe( - x, y, place, inplace=True, numeric_stable_mode=False) + z1, s1 = self.softmax_with_xe(x, + y, + place, + inplace=False, + numeric_stable_mode=False) + z2, s2 = self.softmax_with_xe(x, + y, + place, + inplace=True, + numeric_stable_mode=False) self.assertTrue((z1 == z2).all()) self.assertTrue((s1 == s2).all()) - z1, s1 = self.softmax_with_xe( - x, y, place, inplace=False, numeric_stable_mode=True) - z2, s2 = self.softmax_with_xe( - x, y, place, inplace=True, numeric_stable_mode=True) + z1, s1 = self.softmax_with_xe(x, + y, + place, + inplace=False, + numeric_stable_mode=True) + z2, s2 = self.softmax_with_xe(x, + y, + place, + inplace=True, + numeric_stable_mode=True) self.assertTrue((z1 == z2).all()) self.assertTrue((s1 == s2).all()) @@ -113,18 +127,21 @@ class TestSoftmaxWithXe(unittest.TestCase): class TestSoftmaxWithXe1(TestSoftmaxWithXe): + def initParameter(self): self.dtype = 'float32' self.soft_label = True class TestSoftmaxWithXe2(TestSoftmaxWithXe): + def initParameter(self): self.dtype = 'float64' self.soft_label = False class TestSoftmaxWithXe3(TestSoftmaxWithXe): + def initParameter(self): self.dtype = 'float64' self.soft_label = True diff --git a/python/paddle/fluid/tests/unittests/test_input_spec.py b/python/paddle/fluid/tests/unittests/test_input_spec.py index 4e0aa4a9bca..f8f04229a4d 100644 --- a/python/paddle/fluid/tests/unittests/test_input_spec.py +++ b/python/paddle/fluid/tests/unittests/test_input_spec.py @@ -22,6 +22,7 @@ from paddle.fluid.dygraph.dygraph_to_static.utils import _compatible_non_tensor_ class TestInputSpec(unittest.TestCase): + def test_default(self): tensor_spec = InputSpec([3, 4]) self.assertEqual(tensor_spec.dtype, @@ -112,6 +113,7 @@ class TestInputSpec(unittest.TestCase): class NetWithNonTensorSpec(paddle.nn.Layer): + def __init__(self, in_num, out_num): super(NetWithNonTensorSpec, self).__init__() self.linear_1 = paddle.nn.Linear(in_num, out_num) @@ -152,6 +154,7 @@ class NetWithNonTensorSpec(paddle.nn.Layer): class TestNetWithNonTensorSpec(unittest.TestCase): + def setUp(self): self.in_num = 16 self.out_num = 16 @@ -233,6 +236,7 @@ class TestNetWithNonTensorSpec(unittest.TestCase): class NetWithNonTensorSpecPrune(paddle.nn.Layer): + def __init__(self, in_num, out_num): super(NetWithNonTensorSpecPrune, self).__init__() self.linear_1 = paddle.nn.Linear(in_num, out_num) @@ -252,6 +256,7 @@ class NetWithNonTensorSpecPrune(paddle.nn.Layer): class TestNetWithNonTensorSpecWithPrune(unittest.TestCase): + def setUp(self): self.in_num = 16 self.out_num = 16 @@ -298,6 +303,7 @@ class TestNetWithNonTensorSpecWithPrune(unittest.TestCase): class UnHashableObject: + def __init__(self, val): self.val = val @@ -306,6 +312,7 @@ class UnHashableObject: class TestCompatibleNonTensorSpec(unittest.TestCase): + def test_case(self): self.assertTrue(_compatible_non_tensor_spec([1, 2, 3], [1, 2, 3])) self.assertFalse(_compatible_non_tensor_spec([1, 2, 3], [1, 2])) @@ -313,8 +320,8 @@ class TestCompatibleNonTensorSpec(unittest.TestCase): # not supported unhashable object. self.assertTrue( - _compatible_non_tensor_spec( - UnHashableObject(1), UnHashableObject(1))) + _compatible_non_tensor_spec(UnHashableObject(1), + UnHashableObject(1))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_install_check.py b/python/paddle/fluid/tests/unittests/test_install_check.py index 15f2b5f3b7e..e51079278db 100644 --- a/python/paddle/fluid/tests/unittests/test_install_check.py +++ b/python/paddle/fluid/tests/unittests/test_install_check.py @@ -18,6 +18,7 @@ import os class TestInstallCheck(unittest.TestCase): + def test_paddle_fluid(self): paddle.fluid.install_check.run_check() diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py index 23c51433476..f932df9dd33 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py @@ -65,9 +65,10 @@ def _reference_instance_norm_grad(x, d_y, scale, mean, var, epsilon): scale_tile = np.reshape(scale, (1, c, 1, 1)) scale_tile = np.tile(scale_tile, (n, 1, h, w)) - d_x = scale_tile * var_inv * (d_y - np.mean( - d_y, axis=(2, 3), keepdims=True) - (x - mean_tile) * var_inv * np.mean( - d_y * (x - mean_tile) * var_inv, axis=(2, 3), keepdims=True)) + d_x = scale_tile * var_inv * ( + d_y - np.mean(d_y, axis=(2, 3), keepdims=True) - + (x - mean_tile) * var_inv * + np.mean(d_y * (x - mean_tile) * var_inv, axis=(2, 3), keepdims=True)) return d_x, d_scale, d_bias @@ -78,6 +79,7 @@ def _cal_mean_variance(x, epsilon, mean_shape): class TestInstanceNormOpTraining(unittest.TestCase): + def setUp(self): self.epsilon = 1e-5 self.init_test_case() @@ -98,6 +100,7 @@ class TestInstanceNormOpTraining(unittest.TestCase): return mean, variance def test_forward_backward(self): + def test_with_place(place, shape): epsilon = self.epsilon n, c, h, w = shape[0], shape[1], shape[2], shape[3] @@ -134,23 +137,26 @@ class TestInstanceNormOpTraining(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) - in_op = block.append_op( - type="instance_norm", - inputs={ - "X": block.var("x"), - "Scale": block.var("scale"), - "Bias": block.var("bias"), - }, - outputs={ - "Y": block.var("y"), - "SavedMean": block.var("saved_mean"), - "SavedVariance": block.var("saved_variance") - }, - attrs={"epsilon": epsilon, }) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) + in_op = block.append_op(type="instance_norm", + inputs={ + "X": block.var("x"), + "Scale": block.var("scale"), + "Bias": block.var("bias"), + }, + outputs={ + "Y": + block.var("y"), + "SavedMean": + block.var("saved_mean"), + "SavedVariance": + block.var("saved_variance") + }, + attrs={ + "epsilon": epsilon, + }) block.create_var(name="y@GRAD", dtype='float32', shape=y.shape) @@ -191,6 +197,7 @@ class TestInstanceNormOpTraining(unittest.TestCase): class TestInstanceNormOpTrainingCase1(TestInstanceNormOpTraining): + def init_test_case(self): self.shape = [2, 3, 4, 5] self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -198,6 +205,7 @@ class TestInstanceNormOpTrainingCase1(TestInstanceNormOpTraining): class TestInstanceNormOpTrainingCase2(TestInstanceNormOpTraining): + def init_test_case(self): self.shape = [20, 50, 4, 5] self.no_grad_set = set(['scale@GRAD', 'bias@GRAD']) @@ -205,11 +213,12 @@ class TestInstanceNormOpTrainingCase2(TestInstanceNormOpTraining): class TestInstanceNormOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of instance_norm must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.instance_norm, x1) # the input dtype of instance_norm must be float32 or float64 @@ -218,15 +227,19 @@ class TestInstanceNormOpError(unittest.TestCase): class TestInstanceNormOpErrorCase1(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - # the first dimension of input for instance_norm must between [2d, 5d] - x = fluid.layers.data( - name='x', shape=[3], dtype="float32", append_batch_size=False) + # the first dimension of input for instance_norm must between [2d, 5d] + x = fluid.layers.data(name='x', + shape=[3], + dtype="float32", + append_batch_size=False) self.assertRaises(ValueError, paddle.static.nn.instance_norm, x) class TestElasticNormOp(unittest.TestCase): + def init_test_case(self): self.epsilon = 1e-5 self.places = [core.CPUPlace()] @@ -244,13 +257,15 @@ class TestElasticNormOp(unittest.TestCase): scale = np.ones(scale_shape).astype(np.float32) bias = np.zeros(scale_shape).astype(np.float32) mean, variance = _cal_mean_variance(inputs, self.epsilon, mean_shape) - out_np, _, _ = _reference_instance_norm_naive( - inputs, scale, bias, self.epsilon, mean, variance) + out_np, _, _ = _reference_instance_norm_naive(inputs, scale, bias, + self.epsilon, mean, + variance) for place in self.places: with fluid.dygraph.guard(place): - instance_norm = fluid.dygraph.InstanceNorm( - 5, param_attr=False, bias_attr=False) + instance_norm = fluid.dygraph.InstanceNorm(5, + param_attr=False, + bias_attr=False) outputs = instance_norm(to_variable(inputs)) self.assertTrue(np.allclose(outputs.numpy(), out_np, atol=1e-6)) @@ -260,6 +275,7 @@ class TestElasticNormOp(unittest.TestCase): class TestElasticNormOpCase2(unittest.TestCase): + def init_test_case(self): self.epsilon = 1e-5 self.places = [core.CPUPlace()] @@ -277,13 +293,15 @@ class TestElasticNormOpCase2(unittest.TestCase): scale = np.ones(scale_shape).astype(np.float32) bias = np.zeros(scale_shape).astype(np.float32) mean, variance = _cal_mean_variance(inputs, self.epsilon, mean_shape) - out_np, _, _ = _reference_instance_norm_naive( - inputs, scale, bias, self.epsilon, mean, variance) + out_np, _, _ = _reference_instance_norm_naive(inputs, scale, bias, + self.epsilon, mean, + variance) for place in self.places: with fluid.dygraph.guard(place): - instance_norm = fluid.dygraph.InstanceNorm( - 3, param_attr=True, bias_attr=True) + instance_norm = fluid.dygraph.InstanceNorm(3, + param_attr=True, + bias_attr=True) outputs = instance_norm(to_variable(inputs)) self.assertTrue(np.allclose(outputs.numpy(), out_np, atol=1e-6)) diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py index 1656bc11869..62677ed2621 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py @@ -27,6 +27,7 @@ import paddle class TestInstanceNorm(unittest.TestCase): + def test_error(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu( @@ -51,8 +52,9 @@ class TestInstanceNorm(unittest.TestCase): def weight_bias_false(): x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') - instance_norm3d = paddle.nn.InstanceNorm3D( - 1, weight_attr=False, bias_attr=False) + instance_norm3d = paddle.nn.InstanceNorm3D(1, + weight_attr=False, + bias_attr=False) with fluid.dygraph.guard(p): weight_bias_false() diff --git a/python/paddle/fluid/tests/unittests/test_inverse_op.py b/python/paddle/fluid/tests/unittests/test_inverse_op.py index 85c4c6000a6..b868fef15ec 100644 --- a/python/paddle/fluid/tests/unittests/test_inverse_op.py +++ b/python/paddle/fluid/tests/unittests/test_inverse_op.py @@ -21,6 +21,7 @@ from op_test import OpTest class TestInverseOp(OpTest): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -44,12 +45,14 @@ class TestInverseOp(OpTest): class TestInverseOpBatched(TestInverseOp): + def config(self): self.matrix_shape = [8, 4, 4] self.dtype = "float64" class TestInverseOpLarge(TestInverseOp): + def config(self): self.matrix_shape = [32, 32] self.dtype = "float64" @@ -59,6 +62,7 @@ class TestInverseOpLarge(TestInverseOp): class TestInverseOpFP32(TestInverseOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float32" @@ -68,18 +72,21 @@ class TestInverseOpFP32(TestInverseOp): class TestInverseOpBatchedFP32(TestInverseOpFP32): + def config(self): self.matrix_shape = [8, 4, 4] self.dtype = "float32" class TestInverseOpLargeFP32(TestInverseOpFP32): + def config(self): self.matrix_shape = [32, 32] self.dtype = "float32" class TestInverseAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -114,6 +121,7 @@ class TestInverseAPI(unittest.TestCase): class TestInverseAPIError(unittest.TestCase): + def test_errors(self): input_np = np.random.random([4, 4]).astype("float64") @@ -136,6 +144,7 @@ class TestInverseAPIError(unittest.TestCase): class TestInverseSingularAPI(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_io_save_load.py b/python/paddle/fluid/tests/unittests/test_io_save_load.py index a9a223f8f99..0d5573ae702 100644 --- a/python/paddle/fluid/tests/unittests/test_io_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_io_save_load.py @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestSaveLoadAPIError(unittest.TestCase): + def func_test_get_valid_program_error(self): # case 1: CompiledProgram no program graph = core.Graph(core.ProgramDesc()) @@ -43,16 +44,16 @@ class TestSaveLoadAPIError(unittest.TestCase): exe = fluid.Executor(place) # case 1: main_program type error when vars None with self.assertRaises(TypeError): - fluid.io.load_vars( - executor=exe, dirname="./fake_dir", main_program="program") + fluid.io.load_vars(executor=exe, + dirname="./fake_dir", + main_program="program") # case 2: main_program type error when vars not None with self.assertRaises(TypeError): - fluid.io.load_vars( - executor=exe, - dirname="./fake_dir", - main_program="program", - vars="vars") + fluid.io.load_vars(executor=exe, + dirname="./fake_dir", + main_program="program", + vars="vars") def test_load_vars_error(self): with _test_eager_guard(): @@ -61,6 +62,7 @@ class TestSaveLoadAPIError(unittest.TestCase): class TestSaveInferenceModelAPIError(unittest.TestCase): + def func_test_useless_feeded_var_names(self): start_prog = fluid.Program() main_prog = fluid.Program() @@ -73,12 +75,11 @@ class TestSaveInferenceModelAPIError(unittest.TestCase): exe.run(start_prog) with self.assertRaisesRegexp( ValueError, "not involved in the target_vars calculation"): - fluid.io.save_inference_model( - dirname='./model', - feeded_var_names=['x', 'y'], - target_vars=[z], - executor=exe, - main_program=main_prog) + fluid.io.save_inference_model(dirname='./model', + feeded_var_names=['x', 'y'], + target_vars=[z], + executor=exe, + main_program=main_prog) def test_useless_feeded_var_names(self): with _test_eager_guard(): @@ -87,6 +88,7 @@ class TestSaveInferenceModelAPIError(unittest.TestCase): class TestWhenTrainWithNoGrad(unittest.TestCase): + def func_test_when_train_with_no_grad(self): paddle.disable_static() net = paddle.nn.Linear(1024, 1) diff --git a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py index 6d7ffecc38f..672498ab56e 100644 --- a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py +++ b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py @@ -23,6 +23,7 @@ from op_test import OpTest class TestIOUSimilarityOp(OpTest): + def test_check_output(self): self.check_output(check_dygraph=False) @@ -32,7 +33,7 @@ class TestIOUSimilarityOp(OpTest): self.boxes2 = random.rand(3, 4).astype('float32') self.output = random.rand(2, 3).astype('float32') self.box_normalized = False - # run python iou computation + # run python iou computation self._compute_iou() self.inputs = {'X': self.boxes1, 'Y': self.boxes2} self.attrs = {"box_normalized": self.box_normalized} @@ -68,6 +69,7 @@ class TestIOUSimilarityOp(OpTest): class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp): + def test_check_output(self): self.check_output(check_dygraph=False) @@ -76,7 +78,7 @@ class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp): self.boxes1_lod = [[1, 1]] self.output_lod = [[1, 1]] self.box_normalized = False - # run python iou computation + # run python iou computation self._compute_iou() self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2} self.attrs = {"box_normalized": self.box_normalized} @@ -84,6 +86,7 @@ class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp): class TestIOUSimilarityOpWithBoxNormalized(TestIOUSimilarityOp): + def test_check_output(self): self.check_output(check_dygraph=False) @@ -92,7 +95,7 @@ class TestIOUSimilarityOpWithBoxNormalized(TestIOUSimilarityOp): self.boxes1_lod = [[1, 1]] self.output_lod = [[1, 1]] self.box_normalized = True - # run python iou computation + # run python iou computation self._compute_iou() self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2} self.attrs = {"box_normalized": self.box_normalized} diff --git a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py index e2094c76b7d..f45ada0a529 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py +++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py @@ -32,8 +32,8 @@ def fc_with_batchnorm(use_feed): hidden, size=200, act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) hidden = fluid.layers.batch_norm(input=hidden) prediction = fluid.layers.fc(hidden, size=10, act='softmax') @@ -43,6 +43,7 @@ def fc_with_batchnorm(use_feed): class TestIrInplace(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -56,8 +57,10 @@ class TestIrInplace(TestParallelExecutorBase): label = np.ones(shape=[32, 1], dtype='int64') self.check_network_convergence( fc_with_batchnorm, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=DeviceType.CUDA, use_ir_memory_optimize=ir_memory_optimize, enable_inplace=enable_inplace) diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py index dba92a68cd6..cd34e907021 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py @@ -33,6 +33,7 @@ from ir_memory_optimize_net_base import TestIrMemOptBase class TestIrMemoryOptimizeIfElseOp(unittest.TestCase): + def check_network_convergence(self, use_cuda=True, use_mem_opt=False, @@ -68,8 +69,8 @@ class TestIrMemoryOptimizeIfElseOp(unittest.TestCase): optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=200) + train_reader = paddle.batch(paddle.dataset.mnist.train(), + batch_size=200) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) @@ -98,8 +99,10 @@ class TestIrMemoryOptimizeIfElseOp(unittest.TestCase): y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(train_cp, - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[avg_loss]) loop += 1 diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py index 30b6d6106cd..360457000be 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py @@ -35,8 +35,9 @@ def lstm_net(data, param_attr=fluid.ParamAttr(learning_rate=emb_lr)) fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False) + lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, + size=hid_dim * 4, + is_reverse=False) lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') lstm_max_tanh = fluid.layers.tanh(lstm_max) fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') @@ -47,6 +48,7 @@ def lstm_net(data, class TestIrMemOptRNN(TestIrMemOptBase): + def setUp(self): self.network = lstm_net diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py index f4ec63a8b91..24ac4630111 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py @@ -54,6 +54,7 @@ def fc_with_inplace_net(use_feed): class TestMNIST(TestParallelExecutorBase): + def _dummy_data(self): np.random.seed(5) img = np.random.random(size=[32, 784]).astype(np.float32) @@ -67,14 +68,18 @@ class TestMNIST(TestParallelExecutorBase): img, label = self._dummy_data() first_loss0, last_loss0 = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_ir_memory_optimize=False) first_loss1, last_loss1 = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_ir_memory_optimize=True) for loss in zip(first_loss0, first_loss1): diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py index aa495c7533c..38d419530d7 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py @@ -30,6 +30,7 @@ from test_parallel_executor_transformer import get_feed_data_reader, transformer # NOTE(dzhwinter): test diferent strategy colisions. # open the eager delete tensor strategy by default. class TestTransformerWithIR(TestParallelExecutorBase): + def test_main(self): if core.is_compiled_with_cuda(): # check python transpiler diff --git a/python/paddle/fluid/tests/unittests/test_ir_pass_pipeline.py b/python/paddle/fluid/tests/unittests/test_ir_pass_pipeline.py index 7d11c03a1f1..1b445f8f987 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_pass_pipeline.py +++ b/python/paddle/fluid/tests/unittests/test_ir_pass_pipeline.py @@ -17,6 +17,7 @@ import test_pipeline class TestPipelineWithIRPass(test_pipeline.TestPipeline): + def need_envs(self): return {'FLAGS_apply_pass_to_program': '1'} diff --git a/python/paddle/fluid/tests/unittests/test_is_complex.py b/python/paddle/fluid/tests/unittests/test_is_complex.py index 988c55ea61a..a441bd86296 100644 --- a/python/paddle/fluid/tests/unittests/test_is_complex.py +++ b/python/paddle/fluid/tests/unittests/test_is_complex.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ import unittest class TestIsComplex(unittest.TestCase): + def test_for_integer(self): x = paddle.arange(10) self.assertFalse(paddle.is_complex(x)) diff --git a/python/paddle/fluid/tests/unittests/test_is_empty_op.py b/python/paddle/fluid/tests/unittests/test_is_empty_op.py index 520e55e9f98..b017341b6c1 100644 --- a/python/paddle/fluid/tests/unittests/test_is_empty_op.py +++ b/python/paddle/fluid/tests/unittests/test_is_empty_op.py @@ -21,6 +21,7 @@ import paddle class TestEmpty(OpTest): + def setUp(self): self.op_type = "is_empty" self.inputs = {'X': np.array([1, 2, 3])} @@ -31,6 +32,7 @@ class TestEmpty(OpTest): class TestNotEmpty(TestEmpty): + def setUp(self): self.op_type = "is_empty" self.inputs = {'X': np.array([])} @@ -38,6 +40,7 @@ class TestNotEmpty(TestEmpty): class TestIsEmptyOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), @@ -52,22 +55,25 @@ class TestIsEmptyOpError(unittest.TestCase): def test_type(): # dtype must be float32, float64, int32, int64 - x3 = paddle.static.data( - name="x3", shape=[4, 32, 32], dtype="bool") + x3 = paddle.static.data(name="x3", + shape=[4, 32, 32], + dtype="bool") res = paddle.is_empty(x=x3) self.assertRaises(TypeError, test_type) def test_name_type(): # name type must be string. - x4 = paddle.static.data( - name="x4", shape=[3, 2], dtype="float32") + x4 = paddle.static.data(name="x4", + shape=[3, 2], + dtype="float32") res = paddle.is_empty(x=x4, name=1) self.assertRaises(TypeError, test_name_type) class TestIsEmptyOpDygraph(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() input = paddle.rand(shape=[4, 32, 32], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_is_integer.py b/python/paddle/fluid/tests/unittests/test_is_integer.py index 1c33065e10b..a933e9fac66 100644 --- a/python/paddle/fluid/tests/unittests/test_is_integer.py +++ b/python/paddle/fluid/tests/unittests/test_is_integer.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ import unittest class TestIsInteger(unittest.TestCase): + def test_for_integer(self): x = paddle.arange(10) self.assertTrue(paddle.is_integer(x)) diff --git a/python/paddle/fluid/tests/unittests/test_is_tensor.py b/python/paddle/fluid/tests/unittests/test_is_tensor.py index 616aaa019ba..59ac179bdaf 100644 --- a/python/paddle/fluid/tests/unittests/test_is_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_is_tensor.py @@ -21,6 +21,7 @@ DELTA = 0.00001 class TestIsTensorApi(unittest.TestCase): + def test_is_tensor_real(self, dtype="float32"): """Test is_tensor api with a real tensor """ diff --git a/python/paddle/fluid/tests/unittests/test_isclose_op.py b/python/paddle/fluid/tests/unittests/test_isclose_op.py index 245520e5ab6..04b7fbe54e7 100644 --- a/python/paddle/fluid/tests/unittests/test_isclose_op.py +++ b/python/paddle/fluid/tests/unittests/test_isclose_op.py @@ -19,6 +19,7 @@ import paddle class TestIscloseOp(OpTest): + def set_args(self): self.input = np.array([10000., 1e-07]).astype("float32") self.other = np.array([10000.1, 1e-08]).astype("float32") @@ -39,13 +40,13 @@ class TestIscloseOp(OpTest): } self.attrs = {'equal_nan': self.equal_nan} self.outputs = { - 'Out': np.array([ - np.isclose( - self.inputs['Input'], - self.inputs['Other'], - rtol=self.rtol, - atol=self.atol, - equal_nan=self.equal_nan) + 'Out': + np.array([ + np.isclose(self.inputs['Input'], + self.inputs['Other'], + rtol=self.rtol, + atol=self.atol, + equal_nan=self.equal_nan) ]) } @@ -54,7 +55,9 @@ class TestIscloseOp(OpTest): class TestIscloseOpException(TestIscloseOp): + def test_check_output(self): + def test_rtol_num(): self.inputs['Rtol'] = np.array([1e-05, 1e-05]).astype("float64") self.inputs['Atol'] = np.array([1e-08]).astype("float64") @@ -85,6 +88,7 @@ class TestIscloseOpException(TestIscloseOp): class TestIscloseOpSmallNum(TestIscloseOp): + def set_args(self): self.input = np.array([10000., 1e-08]).astype("float32") self.other = np.array([10000.1, 1e-09]).astype("float32") @@ -94,6 +98,7 @@ class TestIscloseOpSmallNum(TestIscloseOp): class TestIscloseOpNanFalse(TestIscloseOp): + def set_args(self): self.input = np.array([1.0, float('nan')]).astype("float32") self.other = np.array([1.0, float('nan')]).astype("float32") @@ -103,6 +108,7 @@ class TestIscloseOpNanFalse(TestIscloseOp): class TestIscloseOpNanTrue(TestIscloseOp): + def set_args(self): self.input = np.array([1.0, float('nan')]).astype("float32") self.other = np.array([1.0, float('nan')]).astype("float32") @@ -112,6 +118,7 @@ class TestIscloseOpNanTrue(TestIscloseOp): class TestIscloseStatic(unittest.TestCase): + def test_api_case(self): paddle.enable_static() x_data = np.random.rand(10, 10) @@ -127,14 +134,17 @@ class TestIscloseStatic(unittest.TestCase): result = paddle.isclose(x, y) exe = paddle.fluid.Executor(place) fetches = exe.run(paddle.fluid.default_main_program(), - feed={"x": x_data, - "y": y_data}, + feed={ + "x": x_data, + "y": y_data + }, fetch_list=[result]) expected_out = np.isclose(x_data, y_data) self.assertTrue((fetches[0] == expected_out).all(), True) class TestIscloseDygraph(unittest.TestCase): + def test_api_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -152,6 +162,7 @@ class TestIscloseDygraph(unittest.TestCase): class TestIscloseError(unittest.TestCase): + def test_input_dtype(self): paddle.enable_static() @@ -195,6 +206,7 @@ class TestIscloseError(unittest.TestCase): class TestIscloseOpFloat32(TestIscloseOp): + def set_args(self): self.input = np.array([10.1]).astype("float32") self.other = np.array([10]).astype("float32") @@ -204,6 +216,7 @@ class TestIscloseOpFloat32(TestIscloseOp): class TestIscloseOpFloat64(TestIscloseOp): + def set_args(self): self.input = np.array([10.1]).astype("float64") self.other = np.array([10]).astype("float64") @@ -216,6 +229,7 @@ class TestIscloseOpFloat64(TestIscloseOp): class TestIscloseOpLargeDimInput(TestIscloseOp): + def set_args(self): self.input = np.array(np.zeros([2048, 1024])).astype("float64") self.other = np.array(np.zeros([2048, 1024])).astype("float64") diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_op.py index 83d86aff7ac..65b197067d9 100644 --- a/python/paddle/fluid/tests/unittests/test_isfinite_op.py +++ b/python/paddle/fluid/tests/unittests/test_isfinite_op.py @@ -22,6 +22,7 @@ from paddle.fluid import compiler, Program, program_guard class TestInf(OpTest): + def setUp(self): self.op_type = "isinf" self.dtype = np.float32 @@ -42,7 +43,9 @@ class TestInf(OpTest): class TestRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.isfinite([10]) @@ -58,11 +61,13 @@ class TestRaiseError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16Inf(TestInf): + def init_dtype(self): self.dtype = np.float16 class TestNAN(OpTest): + def setUp(self): self.op_type = "isnan" self.dtype = np.float32 @@ -85,11 +90,13 @@ class TestNAN(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16NAN(TestNAN): + def init_dtype(self): self.dtype = np.float16 class TestIsfinite(OpTest): + def setUp(self): self.op_type = "isfinite" self.dtype = np.float32 @@ -113,11 +120,13 @@ class TestIsfinite(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16Isfinite(TestIsfinite): + def init_dtype(self): self.dtype = np.float16 class BadInputTest(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py index c861f912803..252e43b3423 100644 --- a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py @@ -127,6 +127,7 @@ def test(test_case, op_str, use_gpu=False): class TestCPUNormal(unittest.TestCase): + def test_inf(self): test(self, 'isinf') @@ -138,6 +139,7 @@ class TestCPUNormal(unittest.TestCase): class TestCUDANormal(unittest.TestCase): + def test_inf(self): test(self, 'isinf', True) @@ -149,6 +151,7 @@ class TestCUDANormal(unittest.TestCase): class TestError(unittest.TestCase): + def test_bad_input(self): paddle.enable_static() with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py b/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py index a938024e3c9..5722d36ca5e 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py +++ b/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,9 @@ _counter = 0 class TestPreSaveHooks(unittest.TestCase): + def test_pre_save_hook_functions(self): + def fake_func(*args, **kwgs): global _counter _counter += 1 diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 5dabf854734..6c1bbdac689 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -34,6 +34,7 @@ SEED = 10 def random_batch_reader(input_size, label_size): + def _get_random_inputs_and_labels(input_size, label_size): np.random.seed(SEED) input = np.random.random(size=input_size).astype('float32') @@ -50,6 +51,7 @@ def random_batch_reader(input_size, label_size): class LinearNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNet, self).__init__() self._linear = Linear(in_size, out_size) @@ -60,6 +62,7 @@ class LinearNet(fluid.dygraph.Layer): class LinearNetWithInputSpec(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetWithInputSpec, self).__init__() self._linear = Linear(in_size, out_size) @@ -70,6 +73,7 @@ class LinearNetWithInputSpec(fluid.dygraph.Layer): class LinearNetNotDeclarative(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetNotDeclarative, self).__init__() self._linear = Linear(in_size, out_size) @@ -79,14 +83,14 @@ class LinearNetNotDeclarative(fluid.dygraph.Layer): class LinerNetWithLabel(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(LinerNetWithLabel, self).__init__() self._linear = Linear(in_size, out_size) @declarative(input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image"), InputSpec( - shape=[None, 1], dtype='int64', name="label") + InputSpec(shape=[None, 784], dtype='float32', name="image"), + InputSpec(shape=[None, 1], dtype='int64', name="label") ]) def forward(self, x, label): out = self._linear(x) @@ -96,14 +100,14 @@ class LinerNetWithLabel(paddle.nn.Layer): class LinerNetWithPruneInput(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(LinerNetWithPruneInput, self).__init__() self._linear = Linear(in_size, out_size) @declarative(input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image"), InputSpec( - shape=[None, 1], dtype='int64', name="label") + InputSpec(shape=[None, 784], dtype='float32', name="image"), + InputSpec(shape=[None, 1], dtype='int64', name="label") ]) def forward(self, x, label): out = self._linear(x) @@ -113,14 +117,14 @@ class LinerNetWithPruneInput(paddle.nn.Layer): class LinerNetWithUselessInput(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(LinerNetWithUselessInput, self).__init__() self._linear = Linear(in_size, out_size) @declarative(input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image"), InputSpec( - shape=[None, 1], dtype='int64', name="label") + InputSpec(shape=[None, 784], dtype='float32', name="image"), + InputSpec(shape=[None, 1], dtype='int64', name="label") ]) def forward(self, x, label): out = self._linear(x) @@ -128,6 +132,7 @@ class LinerNetWithUselessInput(paddle.nn.Layer): class LinearNetReturnLoss(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetReturnLoss, self).__init__() self._linear = Linear(in_size, out_size) @@ -141,15 +146,15 @@ class LinearNetReturnLoss(fluid.dygraph.Layer): class LinearNetMultiInput(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetMultiInput, self).__init__() self._linear1 = Linear(in_size, out_size) self._linear2 = Linear(in_size, out_size) @declarative(input_spec=[ - InputSpec( - [None, 8], dtype='float32'), InputSpec( - [None, 8], dtype='float32') + InputSpec([None, 8], dtype='float32'), + InputSpec([None, 8], dtype='float32') ]) def forward(self, x, y): x_out = self._linear1(x) @@ -159,14 +164,14 @@ class LinearNetMultiInput(fluid.dygraph.Layer): class LinearNetMultiInput1(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetMultiInput1, self).__init__() self._linear1 = Linear(in_size, out_size) self._linear2 = Linear(in_size, out_size) - @declarative(input_spec=(InputSpec( - [None, 8], dtype='float32'), InputSpec( - [None, 8], dtype='float32'))) + @declarative(input_spec=(InputSpec([None, 8], dtype='float32'), + InputSpec([None, 8], dtype='float32'))) def forward(self, x, y): x_out = self._linear1(x) y_out = self._linear2(y) @@ -175,6 +180,7 @@ class LinearNetMultiInput1(fluid.dygraph.Layer): class MultiLoadingLinearNet(fluid.dygraph.Layer): + def __init__(self, size, model_path): super(MultiLoadingLinearNet, self).__init__() self._linear = Linear(size, size) @@ -191,6 +197,7 @@ class MultiLoadingLinearNet(fluid.dygraph.Layer): class LinearNetReturnHidden(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetReturnHidden, self).__init__() self._linear_1 = Linear(in_size, out_size) @@ -205,6 +212,7 @@ class LinearNetReturnHidden(fluid.dygraph.Layer): class LinearNetWithNestOut(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetWithNestOut, self).__init__() self._linear_1 = Linear(in_size, out_size) @@ -220,16 +228,17 @@ class LinearNetWithNestOut(fluid.dygraph.Layer): class LinearNetWithDictInput(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(LinearNetWithDictInput, self).__init__() self._linear = Linear(in_size, out_size) @paddle.jit.to_static(input_spec=[{ - 'img': InputSpec( - shape=[None, 8], dtype='float32', name='img') + 'img': + InputSpec(shape=[None, 8], dtype='float32', name='img') }, { - 'label': InputSpec( - shape=[None, 1], dtype='int64', name='label') + 'label': + InputSpec(shape=[None, 1], dtype='int64', name='label') }]) def forward(self, img, label): out = self._linear(img['img']) @@ -239,6 +248,7 @@ class LinearNetWithDictInput(paddle.nn.Layer): class LinearNetWithDictInputNoPrune(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(LinearNetWithDictInputNoPrune, self).__init__() self._linear = Linear(in_size, out_size) @@ -249,6 +259,7 @@ class LinearNetWithDictInputNoPrune(paddle.nn.Layer): class EmptyLayer(paddle.nn.Layer): + def __init__(self): super(EmptyLayer, self).__init__() @@ -258,6 +269,7 @@ class EmptyLayer(paddle.nn.Layer): class NoParamLayer(paddle.nn.Layer): + def __init__(self): super(NoParamLayer, self).__init__() @@ -267,6 +279,7 @@ class NoParamLayer(paddle.nn.Layer): class LinearNetWithMultiStaticFunc(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): super(LinearNetWithMultiStaticFunc, self).__init__() self._linear_0 = Linear(in_size, out_size) @@ -288,12 +301,12 @@ class LinearNetWithMultiStaticFunc(fluid.dygraph.Layer): def train(layer, input_size=784, label_size=1): # create optimizer - sgd = fluid.optimizer.SGDOptimizer( - learning_rate=0.01, parameter_list=layer.parameters()) + sgd = fluid.optimizer.SGDOptimizer(learning_rate=0.01, + parameter_list=layer.parameters()) # create data loader train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator( - random_batch_reader(input_size, label_size)) + train_loader.set_batch_generator(random_batch_reader( + input_size, label_size)) # train for data in train_loader(): img, label = data @@ -312,12 +325,12 @@ def train(layer, input_size=784, label_size=1): def train_with_label(layer, input_size=784, label_size=1): # create optimizer - sgd = fluid.optimizer.SGDOptimizer( - learning_rate=0.01, parameter_list=layer.parameters()) + sgd = fluid.optimizer.SGDOptimizer(learning_rate=0.01, + parameter_list=layer.parameters()) # create data loader train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator( - random_batch_reader(input_size, label_size)) + train_loader.set_batch_generator(random_batch_reader( + input_size, label_size)) # train for data in train_loader(): img, label = data @@ -332,6 +345,7 @@ def train_with_label(layer, input_size=784, label_size=1): class TestJitSaveLoad(unittest.TestCase): + def setUp(self): self.model_path = "test_jit_save_load/model" # enable dygraph mode @@ -345,8 +359,9 @@ class TestJitSaveLoad(unittest.TestCase): example_inputs, layer, _ = train(layer) final_model_path = model_path if model_path else self.model_path orig_input_types = [type(x) for x in example_inputs] - paddle.jit.save( - layer=layer, path=final_model_path, input_spec=example_inputs) + paddle.jit.save(layer=layer, + path=final_model_path, + input_spec=example_inputs) new_input_types = [type(x) for x in example_inputs] self.assertEqual(orig_input_types, new_input_types) return layer @@ -367,7 +382,8 @@ class TestJitSaveLoad(unittest.TestCase): x = fluid.dygraph.to_variable( np.random.random((1, 784)).astype('float32')) self.assertTrue( - np.array_equal(train_layer(x).numpy(), infer_layer(x).numpy())) + np.array_equal(train_layer(x).numpy(), + infer_layer(x).numpy())) def load_and_finetune(self, train_layer, load_train_layer): train_layer.train() @@ -392,7 +408,8 @@ class TestJitSaveLoad(unittest.TestCase): x = fluid.dygraph.to_variable( np.random.random((1, 784)).astype('float32')) self.assertTrue( - np.array_equal(train_layer(x).numpy(), new_layer(x).numpy())) + np.array_equal(train_layer(x).numpy(), + new_layer(x).numpy())) def test_load_dygraph_no_path(self): model_path = "test_jit_save_load.no_path/model_path" @@ -406,6 +423,7 @@ class TestJitSaveLoad(unittest.TestCase): class TestSaveLoadWithNestOut(unittest.TestCase): + def setUp(self): # enable dygraph mode fluid.enable_dygraph() @@ -430,26 +448,28 @@ class TestSaveLoadWithNestOut(unittest.TestCase): class TestSaveLoadWithDictInput(unittest.TestCase): + def test_dict_input(self): - # NOTE: This net cannot be executed, it is just + # NOTE: This net cannot be executed, it is just # a special case for exporting models in model validation # We DO NOT recommend this writing way of Layer net = LinearNetWithDictInput(8, 8) - # net.forward.concrete_program.inputs: - # (<__main__.LinearNetWithDictInput object at 0x7f2655298a98>, - # {'img': var img : fluid.VarType.LOD_TENSOR.shape(-1, 8).astype(VarType.FP32)}, + # net.forward.concrete_program.inputs: + # (<__main__.LinearNetWithDictInput object at 0x7f2655298a98>, + # {'img': var img : fluid.VarType.LOD_TENSOR.shape(-1, 8).astype(VarType.FP32)}, # {'label': var label : fluid.VarType.LOD_TENSOR.shape(-1, 1).astype(VarType.INT64)}) self.assertEqual(len(net.forward.concrete_program.inputs), 3) path = "test_jit_save_load_with_dict_input/model" # prune inputs - paddle.jit.save( - layer=net, - path=path, - input_spec=[{ - 'img': InputSpec( - shape=[None, 8], dtype='float32', name='img') - }]) + paddle.jit.save(layer=net, + path=path, + input_spec=[{ + 'img': + InputSpec(shape=[None, 8], + dtype='float32', + name='img') + }]) img = paddle.randn(shape=[4, 8], dtype='float32') loaded_net = paddle.jit.load(path) @@ -461,20 +481,24 @@ class TestSaveLoadWithDictInput(unittest.TestCase): class TestSaveLoadWithDictInputNoPrune(unittest.TestCase): + def test_dict_input(self): net = LinearNetWithDictInputNoPrune(8, 8) path = "test_jit_save_load_with_dict_input_no_prune/model" # prune inputs - paddle.jit.save( - layer=net, - path=path, - input_spec=[{ - 'img': InputSpec( - shape=[None, 8], dtype='float32', name='img'), - 'img2': InputSpec( - shape=[None, 8], dtype='float32', name='img2') - }]) + paddle.jit.save(layer=net, + path=path, + input_spec=[{ + 'img': + InputSpec(shape=[None, 8], + dtype='float32', + name='img'), + 'img2': + InputSpec(shape=[None, 8], + dtype='float32', + name='img2') + }]) img = paddle.randn(shape=[4, 8], dtype='float32') img2 = paddle.randn(shape=[4, 8], dtype='float32') @@ -485,6 +509,7 @@ class TestSaveLoadWithDictInputNoPrune(unittest.TestCase): class TestSaveLoadWithInputSpec(unittest.TestCase): + def setUp(self): # enable dygraph mode fluid.enable_dygraph() @@ -492,9 +517,8 @@ class TestSaveLoadWithInputSpec(unittest.TestCase): def test_with_input_spec(self): net = LinearNetReturnLoss(8, 8) # set x.shape = [None, 8] - net.forward = declarative( - net.forward, input_spec=[InputSpec( - [None, 8], name='x')]) + net.forward = declarative(net.forward, + input_spec=[InputSpec([None, 8], name='x')]) model_path = "input_spec.output_spec/model" # check inputs and outputs @@ -587,6 +611,7 @@ class TestSaveLoadWithInputSpec(unittest.TestCase): class TestJitSaveLoadConfig(unittest.TestCase): + def setUp(self): # enable dygraph mode fluid.enable_dygraph() @@ -608,18 +633,18 @@ class TestJitSaveLoadConfig(unittest.TestCase): model_path = "save_load_config.output_spec" output_spec = [out] - paddle.jit.save( - layer=train_layer, - path=model_path, - input_spec=[x], - output_spec=output_spec) + paddle.jit.save(layer=train_layer, + path=model_path, + input_spec=[x], + output_spec=output_spec) train_layer.eval() infer_layer = paddle.jit.load(model_path) x = fluid.dygraph.to_variable( np.random.random((4, 8)).astype('float32')) self.assertTrue( - np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) + np.array_equal(train_layer(x)[0].numpy(), + infer_layer(x).numpy())) def test_save_no_support_config_error(self): layer = LinearNet(784, 1) @@ -644,6 +669,7 @@ class TestJitSaveLoadConfig(unittest.TestCase): class TestJitMultipleLoading(unittest.TestCase): + def setUp(self): self.linear_size = 4 self.model_path = "jit_multi_load/model" @@ -658,8 +684,9 @@ class TestJitMultipleLoading(unittest.TestCase): def train_and_save_orig_model(self): layer = LinearNet(self.linear_size, self.linear_size) example_inputs, layer, _ = train(layer, self.linear_size, 1) - paddle.jit.save( - layer=layer, path=self.model_path, input_spec=example_inputs) + paddle.jit.save(layer=layer, + path=self.model_path, + input_spec=example_inputs) def test_load_model_retransform_inference(self): multi_loaded_layer = MultiLoadingLinearNet(self.linear_size, @@ -672,6 +699,7 @@ class TestJitMultipleLoading(unittest.TestCase): class TestJitPruneModelAndLoad(unittest.TestCase): + def setUp(self): self.linear_size = 4 self.model_path = "jit_prune_model_and_load/model" @@ -694,11 +722,10 @@ class TestJitPruneModelAndLoad(unittest.TestCase): train_layer.clear_gradients() output_spec = [hidden] - paddle.jit.save( - layer=train_layer, - path=self.model_path, - input_spec=[x], - output_spec=output_spec) + paddle.jit.save(layer=train_layer, + path=self.model_path, + input_spec=[x], + output_spec=output_spec) return train_layer @@ -711,7 +738,8 @@ class TestJitPruneModelAndLoad(unittest.TestCase): x = fluid.dygraph.to_variable( np.random.random((4, 8)).astype('float32')) self.assertTrue( - np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) + np.array_equal(train_layer(x)[0].numpy(), + infer_layer(x).numpy())) def test_load_var_not_in_extra_var_info(self): self.train_and_save() @@ -729,6 +757,7 @@ class TestJitPruneModelAndLoad(unittest.TestCase): class TestJitSaveMultiCases(unittest.TestCase): + def setUp(self): # enable dygraph mode fluid.enable_dygraph() @@ -789,8 +818,7 @@ class TestJitSaveMultiCases(unittest.TestCase): paddle.jit.save( layer, model_path, - input_spec=[InputSpec( - shape=[None, 784], dtype='float32')]) + input_spec=[InputSpec(shape=[None, 784], dtype='float32')]) self.verify_inference_correctness(layer, model_path) @@ -811,8 +839,7 @@ class TestJitSaveMultiCases(unittest.TestCase): paddle.jit.save( layer, model_path, - input_spec=[InputSpec( - shape=[None, 784], dtype='float32')]) + input_spec=[InputSpec(shape=[None, 784], dtype='float32')]) self.verify_inference_correctness(layer, model_path) @@ -822,17 +849,18 @@ class TestJitSaveMultiCases(unittest.TestCase): out = train_with_label(layer) model_path = "test_prune_to_static_after_train/model" - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image") - ], - output_spec=[out]) - - self.verify_inference_correctness( - layer, model_path, with_label_and_loss=True) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name="image") + ], + output_spec=[out]) + + self.verify_inference_correctness(layer, + model_path, + with_label_and_loss=True) def test_prune_to_static_no_train(self): layer = LinerNetWithLabel(784, 1) @@ -841,29 +869,30 @@ class TestJitSaveMultiCases(unittest.TestCase): # TODO: no train, cannot get output_spec var here # now only can use index output_spec = layer.forward.outputs[:1] - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image") - ], - output_spec=output_spec) - - self.verify_inference_correctness( - layer, model_path, with_label_and_loss=True) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name="image") + ], + output_spec=output_spec) + + self.verify_inference_correctness(layer, + model_path, + with_label_and_loss=True) def test_prune_input_to_static_no_train(self): layer = LinerNetWithPruneInput(784, 1) model_path = "test_prune_input_to_static_no_train/model" - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image") - ]) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name="image") + ]) self.verify_inference_correctness(layer, model_path, with_label=True) @@ -871,13 +900,13 @@ class TestJitSaveMultiCases(unittest.TestCase): layer = LinerNetWithUselessInput(784, 1) model_path = "test_prune_useless_input_to_static_no_train/model" - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image") - ]) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name="image") + ]) self.verify_inference_correctness(layer, model_path, with_label=True) @@ -890,15 +919,14 @@ class TestJitSaveMultiCases(unittest.TestCase): paddle.jit.save( layer, model_path, - input_spec=[InputSpec( - shape=[None, 784], dtype='float32')]) - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name='feed_input') - ]) + input_spec=[InputSpec(shape=[None, 784], dtype='float32')]) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name='feed_input') + ]) self.verify_inference_correctness(layer, model_path) @@ -921,16 +949,15 @@ class TestJitSaveMultiCases(unittest.TestCase): paddle.jit.save( layer, model_path, - input_spec=[InputSpec( - shape=[None, 784], dtype='float32')]) + input_spec=[InputSpec(shape=[None, 784], dtype='float32')]) with self.assertRaises(ValueError): - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name='feed_input') - ]) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name='feed_input') + ]) def test_prune_output_spec_name_error(self): layer = LinerNetWithLabel(784, 1) @@ -940,17 +967,18 @@ class TestJitSaveMultiCases(unittest.TestCase): model_path = "test_prune_to_static_after_train/model" out = paddle.to_tensor(np.random.random((1, 1)).astype('float')) with self.assertRaises(ValueError): - paddle.jit.save( - layer, - model_path, - input_spec=[ - InputSpec( - shape=[None, 784], dtype='float32', name="image") - ], - output_spec=[out]) + paddle.jit.save(layer, + model_path, + input_spec=[ + InputSpec(shape=[None, 784], + dtype='float32', + name="image") + ], + output_spec=[out]) class TestJitSaveLoadEmptyLayer(unittest.TestCase): + def setUp(self): self.model_path = "jit_save_load_empty_layer/model" # enable dygraph mode @@ -967,6 +995,7 @@ class TestJitSaveLoadEmptyLayer(unittest.TestCase): class TestJitSaveLoadNoParamLayer(unittest.TestCase): + def setUp(self): self.model_path = "jit_save_load_no_param_layer/model" # enable dygraph mode @@ -984,6 +1013,7 @@ class TestJitSaveLoadNoParamLayer(unittest.TestCase): class TestJitSaveLoadMultiMethods(unittest.TestCase): + def setUp(self): # enable dygraph mode paddle.disable_static() @@ -1001,15 +1031,16 @@ class TestJitSaveLoadMultiMethods(unittest.TestCase): load_net = paddle.jit.load(model_path_inference) for func, result in result_origin.items(): self.assertTrue( - float((result - getattr(load_net, func, None)(inps)).abs().max( - )) < 1e-5) + float((result - + getattr(load_net, func, None)(inps)).abs().max()) < 1e-5) def test_jit_save_load_multi_methods_inputspec(self): model_path = 'jit_save_load_multi_methods/model' layer = LinearNetWithMultiStaticFunc(784, 1) with self.assertRaises(ValueError): - paddle.jit.save( - layer, model_path, input_spec=[InputSpec(shape=[None, 784])]) + paddle.jit.save(layer, + model_path, + input_spec=[InputSpec(shape=[None, 784])]) def test_parse_name(self): model_path_inference = "jit_save_load_parse_name/model" @@ -1025,6 +1056,7 @@ class TestJitSaveLoadMultiMethods(unittest.TestCase): class LayerSaved(paddle.nn.Layer): + def __init__(self, in_size, out_size): super(LayerSaved, self).__init__() self.hidden = 100 @@ -1046,6 +1078,7 @@ class LayerSaved(paddle.nn.Layer): class LayerLoadFinetune(paddle.nn.Layer): + def __init__(self, in_size, out_size, load_path): super(LayerLoadFinetune, self).__init__() # Test duplicate name @@ -1079,6 +1112,7 @@ class LayerLoadFinetune(paddle.nn.Layer): class TestJitSaveLoadSaveWithoutRunning(unittest.TestCase): + def setUp(self): # enable dygraph mode paddle.disable_static() @@ -1092,25 +1126,23 @@ class TestJitSaveLoadSaveWithoutRunning(unittest.TestCase): with unique_name.guard(): layer_save = LayerSaved(IMAGE_SIZE, IMAGE_SIZE) #save - paddle.jit.save( - layer_save, - model_path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, IMAGE_SIZE], dtype='float32') - ]) + paddle.jit.save(layer_save, + model_path, + input_spec=[ + paddle.static.InputSpec(shape=[None, IMAGE_SIZE], + dtype='float32') + ]) result_00 = layer_save(inps0) result_01 = layer_save(inps1) #load and save without running with unique_name.guard(): layer_load = paddle.jit.load(model_path) - paddle.jit.save( - layer_load, - model_path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, IMAGE_SIZE], dtype='float32') - ]) + paddle.jit.save(layer_load, + model_path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, IMAGE_SIZE], dtype='float32') + ]) #reload layer_reload = paddle.jit.load(model_path) result_10 = layer_reload(inps0) @@ -1121,6 +1153,7 @@ class TestJitSaveLoadSaveWithoutRunning(unittest.TestCase): class TestJitSaveLoadFinetuneLoad(unittest.TestCase): + def setUp(self): # enable dygraph mode paddle.disable_static() @@ -1154,16 +1187,18 @@ class TestJitSaveLoadFinetuneLoad(unittest.TestCase): self.assertTrue(float(((result_01 - result_11)).abs().max()) < 1e-5) -# NOTE(weixin): When there are multiple test functions in an -# `unittest.TestCase`, functions will affect each other, -# and there is a risk of random failure. -# So divided into three TestCase: TestJitSaveLoadFunctionCase1, +# NOTE(weixin): When there are multiple test functions in an +# `unittest.TestCase`, functions will affect each other, +# and there is a risk of random failure. +# So divided into three TestCase: TestJitSaveLoadFunctionCase1, # TestJitSaveLoadFunctionCase2, TestJitSaveLoadFunctionCase3. class TestJitSaveLoadFunctionCase1(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_jit_save_load_static_function(self): + @paddle.jit.to_static def fun(inputs): return paddle.tanh(inputs) @@ -1180,13 +1215,14 @@ class TestJitSaveLoadFunctionCase1(unittest.TestCase): class TestJitSaveLoadFunctionCase2(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_jit_save_load_function_input_spec(self): + @paddle.jit.to_static(input_spec=[ - InputSpec( - shape=[None, 6], dtype='float32', name='x'), + InputSpec(shape=[None, 6], dtype='float32', name='x'), ]) def fun(inputs): return paddle.nn.functional.relu(inputs) @@ -1202,10 +1238,12 @@ class TestJitSaveLoadFunctionCase2(unittest.TestCase): class TestJitSaveLoadFunctionCase3(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_jit_save_load_function_function(self): + def fun(inputs): return paddle.tanh(inputs) @@ -1213,13 +1251,13 @@ class TestJitSaveLoadFunctionCase3(unittest.TestCase): inps = paddle.rand([3, 6]) origin = fun(inps) - paddle.jit.save( - fun, - path, - input_spec=[ - InputSpec( - shape=[None, 6], dtype='float32', name='x'), - ]) + paddle.jit.save(fun, + path, + input_spec=[ + InputSpec(shape=[None, 6], + dtype='float32', + name='x'), + ]) load_func = paddle.jit.load(path) load_result = load_func(inps) @@ -1227,11 +1265,14 @@ class TestJitSaveLoadFunctionCase3(unittest.TestCase): class TestJitSaveLoadFunctionWithParamCase1(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_jit_save_load_function(self): + class LinearNet(paddle.nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = paddle.nn.Linear(5, 6) @@ -1247,8 +1288,8 @@ class TestJitSaveLoadFunctionWithParamCase1(unittest.TestCase): inps = paddle.rand([3, 5]) origin = layer.anothor_forward(inps) - func = paddle.jit.to_static( - layer.anothor_forward, [paddle.static.InputSpec(shape=[-1, 5])]) + func = paddle.jit.to_static(layer.anothor_forward, + [paddle.static.InputSpec(shape=[-1, 5])]) path = 'test_jit_save_load_function_with_params_case1/func' paddle.jit.save(func, path) load_func = paddle.jit.load(path) @@ -1258,11 +1299,14 @@ class TestJitSaveLoadFunctionWithParamCase1(unittest.TestCase): class TestJitSaveLoadFunctionWithParamCase2(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_jit_save_load_function(self): + class LinearNet(paddle.nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = paddle.nn.Linear(5, 6) @@ -1290,11 +1334,14 @@ class TestJitSaveLoadFunctionWithParamCase2(unittest.TestCase): class TestJitSaveLoadFunctionWithParamCase3(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_jit_save_load_function(self): + class LinearNet(paddle.nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = paddle.nn.Linear(5, 6) @@ -1320,6 +1367,7 @@ class TestJitSaveLoadFunctionWithParamCase3(unittest.TestCase): class TestJitSaveLoadDataParallel(unittest.TestCase): + def verify_inference_correctness(self, layer, path): layer.eval() loaded_layer = paddle.jit.load(path) @@ -1338,8 +1386,9 @@ class TestJitSaveLoadDataParallel(unittest.TestCase): layer = paddle.DataParallel(layer) path = "jit_save_data_parallel_with_inputspec/model" - paddle.jit.save( - layer=layer, path=path, input_spec=[InputSpec(shape=[None, 784])]) + paddle.jit.save(layer=layer, + path=path, + input_spec=[InputSpec(shape=[None, 784])]) self.verify_inference_correctness(layer, path) @@ -1359,15 +1408,15 @@ class InputSepcLayer(paddle.nn.Layer): ''' @paddle.jit.to_static(input_spec=[ - InputSpec( - shape=[None, 8], dtype='float32', name='x'), InputSpec( - shape=[None, 1], dtype='float64', name='y') + InputSpec(shape=[None, 8], dtype='float32', name='x'), + InputSpec(shape=[None, 1], dtype='float64', name='y') ]) def forward(self, x, y): return x, y class TestInputSpecCompatibility(unittest.TestCase): + def _assert_input_spec_layer_return(self, expect_layer, test_layer): input_x = paddle.uniform([8, 8], dtype='float32') input_y = paddle.uniform([8, 1], dtype='float64') @@ -1388,26 +1437,26 @@ class TestInputSpecCompatibility(unittest.TestCase): self._assert_input_spec_layer_return(layer, no_input_spec_layer) shutil.rmtree(save_dir) - paddle.jit.save( - layer=layer, - path=path, - input_spec=[ - InputSpec( - shape=[None, 8], dtype='float32', name='x'), InputSpec( - shape=[None, 1], dtype='float64', name='y') - ]) + paddle.jit.save(layer=layer, + path=path, + input_spec=[ + InputSpec(shape=[None, 8], + dtype='float32', + name='x'), + InputSpec(shape=[None, 1], + dtype='float64', + name='y') + ]) same_input_spec_layer = paddle.jit.load(path) self._assert_input_spec_layer_return(layer, same_input_spec_layer) shutil.rmtree(save_dir) - paddle.jit.save( - layer=layer, - path=path, - input_spec=[ - InputSpec( - shape=[8, 8], dtype='float32'), InputSpec( - shape=[8, -1], dtype='float64') - ]) + paddle.jit.save(layer=layer, + path=path, + input_spec=[ + InputSpec(shape=[8, 8], dtype='float32'), + InputSpec(shape=[8, -1], dtype='float64') + ]) compatible_input_spec_layer = paddle.jit.load(path) self._assert_input_spec_layer_return(layer, compatible_input_spec_layer) shutil.rmtree(save_dir) @@ -1419,36 +1468,30 @@ class TestInputSpecCompatibility(unittest.TestCase): with self.assertRaises(ValueError): # type mismatch - paddle.jit.save( - layer=layer, - path=path, - input_spec=[ - InputSpec( - shape=[None, 8], dtype='float64'), InputSpec( - shape=[None, 1], dtype='float64') - ]) + paddle.jit.save(layer=layer, + path=path, + input_spec=[ + InputSpec(shape=[None, 8], dtype='float64'), + InputSpec(shape=[None, 1], dtype='float64') + ]) with self.assertRaises(ValueError): # shape len mismatch - paddle.jit.save( - layer=layer, - path=path, - input_spec=[ - InputSpec( - shape=[None, 8, 1], dtype='float32'), InputSpec( - shape=[None, 1], dtype='float64') - ]) + paddle.jit.save(layer=layer, + path=path, + input_spec=[ + InputSpec(shape=[None, 8, 1], dtype='float32'), + InputSpec(shape=[None, 1], dtype='float64') + ]) with self.assertRaises(ValueError): # shape mismatch - paddle.jit.save( - layer=layer, - path=path, - input_spec=[ - InputSpec( - shape=[None, 8], dtype='float32'), InputSpec( - shape=[None, 2], dtype='float64') - ]) + paddle.jit.save(layer=layer, + path=path, + input_spec=[ + InputSpec(shape=[None, 8], dtype='float32'), + InputSpec(shape=[None, 2], dtype='float64') + ]) if os.path.exists(save_dir): shutil.rmtree(save_dir) diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py index aa94cf2d35c..930d8666ba1 100644 --- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -38,6 +38,7 @@ def kldiv_loss(x, target, reduction): class TestKLDivLossOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'kldiv_loss' @@ -58,8 +59,10 @@ class TestKLDivLossOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], 'Loss', no_grad_set=set(["Target"]), check_eager=True) + self.check_grad(['X'], + 'Loss', + no_grad_set=set(["Target"]), + check_eager=True) def initTestCase(self): self.x_shape = (4, 5, 5) @@ -67,24 +70,28 @@ class TestKLDivLossOp(OpTest): class TestKLDivLossOp2(TestKLDivLossOp): + def initTestCase(self): self.x_shape = (3, 2, 7, 7) self.reduction = 'none' class TestKLDivLossOp3(TestKLDivLossOp): + def initTestCase(self): self.x_shape = (2, 3, 5, 7, 9) self.reduction = 'mean' class TestKLDivLossOp4(TestKLDivLossOp): + def initTestCase(self): self.x_shape = (5, 20) self.reduction = 'sum' class TestKLDivLossDygraph(unittest.TestCase): + def run_kl_loss(self, reduction, shape=(5, 20)): x = np.random.uniform(-10, 10, shape).astype('float64') target = np.random.uniform(-10, 10, shape).astype('float64') @@ -92,8 +99,8 @@ class TestKLDivLossDygraph(unittest.TestCase): with paddle.fluid.dygraph.guard(): kldiv_criterion = paddle.nn.KLDivLoss(reduction) - pred_loss = kldiv_criterion( - paddle.to_tensor(x), paddle.to_tensor(target)) + pred_loss = kldiv_criterion(paddle.to_tensor(x), + paddle.to_tensor(target)) self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss)) def test_kl_loss_batchmean(self): @@ -121,6 +128,7 @@ class TestKLDivLossDygraph(unittest.TestCase): class TestKLDivLossTypePromotion(unittest.TestCase): + def test_kl_div_promotion(self): with paddle.fluid.dygraph.guard(): diff --git a/python/paddle/fluid/tests/unittests/test_kron_op.py b/python/paddle/fluid/tests/unittests/test_kron_op.py index f4d013b7c6a..61b5b92c007 100644 --- a/python/paddle/fluid/tests/unittests/test_kron_op.py +++ b/python/paddle/fluid/tests/unittests/test_kron_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestKronOp(OpTest): + def setUp(self): self.op_type = "kron" self.python_api = paddle.kron @@ -52,6 +53,7 @@ class TestKronOp(OpTest): class TestKronOp2(TestKronOp): + def setUp(self): self.op_type = "kron" self.python_api = paddle.kron @@ -64,6 +66,7 @@ class TestKronOp2(TestKronOp): class TestKronOp3(TestKronOp): + def setUp(self): self.op_type = "kron" self.python_api = paddle.kron @@ -76,6 +79,7 @@ class TestKronOp3(TestKronOp): class TestKronLayer(unittest.TestCase): + def test_case(self): a = np.random.randn(10, 10).astype(np.float64) b = np.random.randn(10, 10).astype(np.float64) @@ -112,6 +116,7 @@ class TestKronLayer(unittest.TestCase): class TestComplexKronOp(OpTest): + def setUp(self): self.op_type = "kron" self.python_api = paddle.kron @@ -173,33 +178,31 @@ class TestComplexKronOp(OpTest): self.check_output(check_eager=True) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) class TestKronOpTypePromotion(TestComplexKronOp): + def init_input_output(self): self.x = np.random.random(self.x_shape).astype(self.dtype) self.y = np.random.random(self.y_shape).astype( diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py index e1b14225809..66eb8ab4f31 100644 --- a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py +++ b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py @@ -35,6 +35,7 @@ def cal_kthvalue(x, k, axis, keepdim=False): class TestKthvalueOp(OpTest): + def init_args(self): self.k = 5 self.axis = -1 @@ -47,8 +48,9 @@ class TestKthvalueOp(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis} - output, indices = cal_kthvalue( - self.input_data, k=self.k, axis=self.axis) + output, indices = cal_kthvalue(self.input_data, + k=self.k, + axis=self.axis) self.outputs = {'Out': output, 'Indices': indices} def test_check_output(self): @@ -61,6 +63,7 @@ class TestKthvalueOp(OpTest): class TestKthvalueOpWithKeepdim(OpTest): + def init_args(self): self.k = 2 self.axis = 1 @@ -73,8 +76,10 @@ class TestKthvalueOpWithKeepdim(OpTest): self.input_data = np.random.random((1, 3, 2, 4, 10)) self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'keepdim': True} - output, indices = cal_kthvalue( - self.input_data, k=self.k, axis=self.axis, keepdim=True) + output, indices = cal_kthvalue(self.input_data, + k=self.k, + axis=self.axis, + keepdim=True) self.outputs = {'Out': output, 'Indices': indices} def test_check_output(self): @@ -87,6 +92,7 @@ class TestKthvalueOpWithKeepdim(OpTest): class TestKthvalueOpKernels(unittest.TestCase): + def setUp(self): self.axises = [2, -1] @@ -123,11 +129,13 @@ class TestKthvalueOpKernels(unittest.TestCase): class TestKthvalueOpWithNaN(unittest.TestCase): + def setUp(self): paddle.disable_static() self.x = paddle.uniform([2, 200, 10], dtype='float32') def test_errors(self): + def test_nan_in_cpu_kernel(): paddle.set_device('cpu') nan_position = 100 @@ -150,6 +158,7 @@ class TestKthvalueOpWithNaN(unittest.TestCase): class TestKthvalueOpErrors(unittest.TestCase): + def setUp(self): self.x = paddle.uniform([2, 10, 20, 25], dtype='float32') @@ -173,6 +182,7 @@ class TestKthvalueOpErrors(unittest.TestCase): class TestModeOpInStatic(unittest.TestCase): + def setUp(self): np.random.seed(666) self.input_data = np.random.random((2, 20, 1, 2, 80)).astype(np.float64) @@ -182,8 +192,9 @@ class TestModeOpInStatic(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - input_tensor = paddle.static.data( - name="x", shape=[2, 20, 1, 2, 80], dtype="float64") + input_tensor = paddle.static.data(name="x", + shape=[2, 20, 1, 2, 80], + dtype="float64") result = paddle.kthvalue(input_tensor, self.k, axis=1) expect_value = cal_kthvalue(self.input_data, self.k, axis=1)[0] exe = paddle.static.Executor(paddle.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_l1_loss.py b/python/paddle/fluid/tests/unittests/test_l1_loss.py index c35188623b4..01d9dba7b42 100644 --- a/python/paddle/fluid/tests/unittests/test_l1_loss.py +++ b/python/paddle/fluid/tests/unittests/test_l1_loss.py @@ -21,6 +21,7 @@ import unittest class TestFunctionalL1Loss(unittest.TestCase): + def setUp(self): self.input_np = np.random.random(size=(10, 10, 5)).astype(np.float32) self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32) @@ -44,10 +45,12 @@ class TestFunctionalL1Loss(unittest.TestCase): self.assertTrue(dy_result.shape, [10, 10, 5]) def run_static(self, use_gpu=False): - input = paddle.fluid.data( - name='input', shape=[10, 10, 5], dtype='float32') - label = paddle.fluid.data( - name='label', shape=[10, 10, 5], dtype='float32') + input = paddle.fluid.data(name='input', + shape=[10, 10, 5], + dtype='float32') + label = paddle.fluid.data(name='label', + shape=[10, 10, 5], + dtype='float32') result0 = paddle.nn.functional.l1_loss(input, label) result1 = paddle.nn.functional.l1_loss(input, label, reduction='sum') result2 = paddle.nn.functional.l1_loss(input, label, reduction='none') @@ -56,10 +59,11 @@ class TestFunctionalL1Loss(unittest.TestCase): place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - static_result = exe.run( - feed={"input": self.input_np, - "label": self.label_np}, - fetch_list=[result0, result1, result2]) + static_result = exe.run(feed={ + "input": self.input_np, + "label": self.label_np + }, + fetch_list=[result0, result1, result2]) expected = np.mean(np.abs(self.input_np - self.label_np)) self.assertTrue(np.allclose(static_result[0], expected)) @@ -91,18 +95,23 @@ class TestFunctionalL1Loss(unittest.TestCase): # test case the raise message def test_errors(self): + def test_value_error(): - input = paddle.fluid.data( - name='input', shape=[10, 10, 5], dtype='float32') - label = paddle.fluid.data( - name='label', shape=[10, 10, 5], dtype='float32') - loss = paddle.nn.functional.l1_loss( - input, label, reduction='reduce_mean') + input = paddle.fluid.data(name='input', + shape=[10, 10, 5], + dtype='float32') + label = paddle.fluid.data(name='label', + shape=[10, 10, 5], + dtype='float32') + loss = paddle.nn.functional.l1_loss(input, + label, + reduction='reduce_mean') self.assertRaises(ValueError, test_value_error) class TestClassL1Loss(unittest.TestCase): + def setUp(self): self.input_np = np.random.random(size=(10, 10, 5)).astype(np.float32) self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32) @@ -129,10 +138,12 @@ class TestClassL1Loss(unittest.TestCase): self.assertTrue(dy_result.shape, [10, 10, 5]) def run_static(self, use_gpu=False): - input = paddle.fluid.data( - name='input', shape=[10, 10, 5], dtype='float32') - label = paddle.fluid.data( - name='label', shape=[10, 10, 5], dtype='float32') + input = paddle.fluid.data(name='input', + shape=[10, 10, 5], + dtype='float32') + label = paddle.fluid.data(name='label', + shape=[10, 10, 5], + dtype='float32') l1_loss = paddle.nn.loss.L1Loss() result0 = l1_loss(input, label) l1_loss = paddle.nn.loss.L1Loss(reduction='sum') @@ -145,10 +156,11 @@ class TestClassL1Loss(unittest.TestCase): place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - static_result = exe.run( - feed={"input": self.input_np, - "label": self.label_np}, - fetch_list=[result0, result1, result2]) + static_result = exe.run(feed={ + "input": self.input_np, + "label": self.label_np + }, + fetch_list=[result0, result1, result2]) expected = np.mean(np.abs(self.input_np - self.label_np)) self.assertTrue(np.allclose(static_result[0], expected)) @@ -179,6 +191,7 @@ class TestClassL1Loss(unittest.TestCase): # test case the raise message def test_errors(self): + def test_value_error(): loss = paddle.nn.loss.L1Loss(reduction="reduce_mean") diff --git a/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py b/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py index 54f5e64fda4..2fef18fd6b5 100644 --- a/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py +++ b/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py @@ -22,6 +22,7 @@ import unittest class LabelSmoothTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', label_shape=(20, 1), @@ -44,13 +45,13 @@ class LabelSmoothTestCase(unittest.TestCase): start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): - label_var = fluid.data( - "input", self.label_shape, dtype=self.dtype) - y_var = fluid.layers.label_smooth( - label_var, - prior_dist=self.prior_dist, - epsilon=self.epsilon, - dtype=self.dtype) + label_var = fluid.data("input", + self.label_shape, + dtype=self.dtype) + y_var = fluid.layers.label_smooth(label_var, + prior_dist=self.prior_dist, + epsilon=self.epsilon, + dtype=self.dtype) feed_dict = {"input": self.label} exe = fluid.Executor(place) exe.run(start) @@ -63,10 +64,12 @@ class LabelSmoothTestCase(unittest.TestCase): start = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, start): - label_var = fluid.data( - "input", self.label_shape, dtype=self.dtype) - y_var = F.label_smooth( - label_var, prior_dist=self.prior_dist, epsilon=self.epsilon) + label_var = fluid.data("input", + self.label_shape, + dtype=self.dtype) + y_var = F.label_smooth(label_var, + prior_dist=self.prior_dist, + epsilon=self.epsilon) feed_dict = {"input": self.label} exe = fluid.Executor(place) exe.run(start) @@ -76,8 +79,9 @@ class LabelSmoothTestCase(unittest.TestCase): def paddle_dygraph_layer(self): paddle.disable_static() label_var = dg.to_variable(self.label) - y_var = F.label_smooth( - label_var, prior_dist=self.prior_dist, epsilon=self.epsilon) + y_var = F.label_smooth(label_var, + prior_dist=self.prior_dist, + epsilon=self.epsilon) y_np = y_var.numpy() return y_np @@ -98,6 +102,7 @@ class LabelSmoothTestCase(unittest.TestCase): class LabelSmoothErrorTestCase(LabelSmoothTestCase): + def runTest(self): place = fluid.CPUPlace() with dg.guard(place): @@ -108,8 +113,7 @@ class LabelSmoothErrorTestCase(LabelSmoothTestCase): def add_cases(suite): suite.addTest(LabelSmoothTestCase(methodName='runTest')) suite.addTest( - LabelSmoothTestCase( - methodName='runTest', label_shape=[2, 3, 1])) + LabelSmoothTestCase(methodName='runTest', label_shape=[2, 3, 1])) def add_error_cases(suite): diff --git a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py index b1d49f8604e..926f86abeea 100644 --- a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py +++ b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py @@ -21,6 +21,7 @@ import paddle class TestLabelSmoothOp(OpTest): + def config(self): self.op_type = "label_smooth" self.python_api = paddle.nn.functional.label_smooth @@ -32,8 +33,8 @@ class TestLabelSmoothOp(OpTest): def setUp(self): self.config() - smoothed_label = (1 - self.epsilon - ) * self.label + self.epsilon / self.label_dim + smoothed_label = ( + 1 - self.epsilon) * self.label + self.epsilon / self.label_dim self.inputs = {'X': self.label} self.attrs = {'epsilon': self.epsilon} self.outputs = {'Out': smoothed_label} @@ -46,6 +47,7 @@ class TestLabelSmoothOp(OpTest): class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp): + def setUp(self): self.config() dist = np.random.random((1, self.label_dim)) @@ -56,21 +58,23 @@ class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp): class TestLabelSmoothOp3D(TestLabelSmoothOp): + def setUp(self): super(TestLabelSmoothOp3D, self).setUp() self.inputs['X'] = self.inputs['X'].reshape( [2, -1, self.inputs['X'].shape[-1]]) - self.outputs['Out'] = self.outputs['Out'].reshape(self.inputs['X'] - .shape) + self.outputs['Out'] = self.outputs['Out'].reshape( + self.inputs['X'].shape) class TestLabelSmoothOpWithPriorDist3D(TestLabelSmoothOpWithPriorDist): + def setUp(self): super(TestLabelSmoothOpWithPriorDist3D, self).setUp() self.inputs['X'] = self.inputs['X'].reshape( [2, -1, self.inputs['X'].shape[-1]]) - self.outputs['Out'] = self.outputs['Out'].reshape(self.inputs['X'] - .shape) + self.outputs['Out'] = self.outputs['Out'].reshape( + self.inputs['X'].shape) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_lamb_op.py b/python/paddle/fluid/tests/unittests/test_lamb_op.py index 26a8064dd90..e244e54e312 100644 --- a/python/paddle/fluid/tests/unittests/test_lamb_op.py +++ b/python/paddle/fluid/tests/unittests/test_lamb_op.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestLambOp1(OpTest): + def set_attrs(self): self.attrs = { 'epsilon': 1e-4, @@ -75,6 +76,7 @@ class TestLambOp1(OpTest): class TestLambOp2(TestLambOp1): + def set_attrs(self): self.attrs = { 'epsilon': 1e-8, @@ -85,6 +87,7 @@ class TestLambOp2(TestLambOp1): class TestLambOpMultipleSteps(TestLambOp1): + def set_attrs(self): self.attrs = { 'epsilon': 1e-8, @@ -152,12 +155,14 @@ def lamb_step(inputs, attributes): moment2_unbiased = moment2_out / (1 - beta2_pow) r_1 = np.linalg.norm(param) - r_2 = np.linalg.norm(moment1_unbiased / (np.sqrt(moment2_unbiased) + epsilon - ) + weight_decay * param) + r_2 = np.linalg.norm(moment1_unbiased / + (np.sqrt(moment2_unbiased) + epsilon) + + weight_decay * param) lr_t = lr * r_1 / r_2 - param_out = param - lr_t * (moment1_unbiased / ( - np.sqrt(moment2_unbiased) + epsilon) + weight_decay * param) + param_out = param - lr_t * (moment1_unbiased / + (np.sqrt(moment2_unbiased) + epsilon) + + weight_decay * param) beta1_pow_out = beta1_pow * beta1 beta2_pow_out = beta2_pow * beta2 @@ -193,13 +198,13 @@ def lamb_step_sparse(inputs, attributes, height, rows, row_numel, np_grad): moment2_unbiased = np.zeros(shape=[height, row_numel]) def update_mom(row_id, update_value): - moment1_out[row_id] = beta1 * moment1[row_id] + (1 - beta1 - ) * update_value + moment1_out[row_id] = beta1 * moment1[row_id] + (1 - + beta1) * update_value moment2_out[row_id] = beta2 * moment2[row_id] + ( 1 - beta2) * np.square(update_value) - moment1_out[row_id] = beta1 * moment1[row_id] + (1 - beta1 - ) * update_value + moment1_out[row_id] = beta1 * moment1[row_id] + (1 - + beta1) * update_value moment2_out[row_id] = beta2 * moment2[row_id] + ( 1 - beta2) * np.square(update_value) @@ -209,8 +214,9 @@ def lamb_step_sparse(inputs, attributes, height, rows, row_numel, np_grad): weight_decay * param) lr_t = lr * r_1 / r_2 - param_out = param - lr_t * (moment1_out / ( - np.sqrt(moment2_out) + epsilon) + weight_decay * param) + param_out = param - lr_t * (moment1_out / + (np.sqrt(moment2_out) + epsilon) + + weight_decay * param) for row_id in range(param_out.shape[0]): update_value = np.zeros(np_grad[0].shape).astype("float32") @@ -226,6 +232,7 @@ def lamb_step_sparse(inputs, attributes, height, rows, row_numel, np_grad): class TestSparseLambOp(unittest.TestCase): + def setup(self, scope, place): beta1 = 0.78 beta2 = 0.836 diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py index 674cd9a3e9c..cde23216c10 100644 --- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py @@ -26,6 +26,7 @@ import paddle.fluid.layers as layers class LAMBOptimizer(paddle.optimizer.Lamb): + def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, fluid.framework.Block) block.program._use_lamb = True @@ -38,18 +39,24 @@ class LAMBOptimizer(paddle.optimizer.Lamb): beta_2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, param_and_grad[0]) - beta_1 = layers.fill_constant( - dtype='float32', shape=[1], value=self._beta1, name='lamb_beta_1') - beta_2 = layers.fill_constant( - dtype='float32', shape=[1], value=self._beta2, name='lamb_beta_2') - epsilon = layers.fill_constant( - dtype='float32', shape=[1], value=self._epsilon, name='epsilon') + beta_1 = layers.fill_constant(dtype='float32', + shape=[1], + value=self._beta1, + name='lamb_beta_1') + beta_2 = layers.fill_constant(dtype='float32', + shape=[1], + value=self._beta2, + name='lamb_beta_2') + epsilon = layers.fill_constant(dtype='float32', + shape=[1], + value=self._epsilon, + name='epsilon') one = paddle.ones(shape=[1]).astype('float32') zero = paddle.zeros(shape=[1]).astype('float32') - next_m = paddle.multiply(m, beta_1) + paddle.multiply(param_and_grad[1], - one - beta_1) + next_m = paddle.multiply(m, beta_1) + paddle.multiply( + param_and_grad[1], one - beta_1) next_v = paddle.multiply(v, beta_2) + paddle.multiply( paddle.pow(param_and_grad[1], 2), one - beta_2) @@ -73,8 +80,8 @@ class LAMBOptimizer(paddle.optimizer.Lamb): ratio = paddle.where( paddle.greater_than(w_norm, zero), - paddle.where( - paddle.greater_than(g_norm, zero), (w_norm / g_norm), one), one) + paddle.where(paddle.greater_than(g_norm, zero), (w_norm / g_norm), + one), one) update_with_lr = ratio * learning_rate * update next_param = param_and_grad[0] - update_with_lr @@ -89,14 +96,16 @@ class LAMBOptimizer(paddle.optimizer.Lamb): class TestLambOpV2(unittest.TestCase): + def test_lamb_op(self): shape = [2, 4, 8, 8] data = paddle.to_tensor(np.random.random(size=shape).astype("float32")) conv = paddle.nn.Conv2D(4, 6, (3, 3)) data = conv(data) loss = paddle.mean(data) - opt = paddle.optimizer.Lamb( - learning_rate=1e-5, epsilon=1e-8, parameters=conv.parameters()) + opt = paddle.optimizer.Lamb(learning_rate=1e-5, + epsilon=1e-8, + parameters=conv.parameters()) loss.backward() opt.minimize(loss) @@ -104,6 +113,7 @@ class TestLambOpV2(unittest.TestCase): class TestLambOpWithCombinedOp(unittest.TestCase): + def test_lamb_op_with_multi_steps(self): paddle.enable_static() @@ -135,8 +145,10 @@ class TestLambOpWithCombinedOp(unittest.TestCase): executor = fluid.Executor(place) executor.run(startup_program) output = executor.run(program=main_program, - feed={'X': feed_x, - 'Y': feed_y}, + feed={ + 'X': feed_x, + 'Y': feed_y + }, fetch_list=[avg_loss.name]) main = fluid.Program() @@ -149,14 +161,17 @@ class TestLambOpWithCombinedOp(unittest.TestCase): exe = fluid.Executor(place) exe.run(startup) out = exe.run(program=main, - feed={'X': feed_x, - 'Y': feed_y}, + feed={ + 'X': feed_x, + 'Y': feed_y + }, fetch_list=[loss.name]) self.assertTrue(np.allclose(out, output)) class TestLambOpV2Group(TestLambOpV2): + def test_lamb_op(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -164,17 +179,16 @@ class TestLambOpV2Group(TestLambOpV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Lamb( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'lamb_weight_decay': 0.001, - 'beta1': 0.9, - 'beta2': 0.99 - }], - lamb_weight_decay=0.01) + adam = paddle.optimizer.Lamb(learning_rate=0.01, + parameters=[{ + 'params': linear_1.parameters() + }, { + 'params': linear_2.parameters(), + 'lamb_weight_decay': 0.001, + 'beta1': 0.9, + 'beta2': 0.99 + }], + lamb_weight_decay=0.01) out = linear_1(a) out = linear_2(out) out.backward() @@ -183,14 +197,16 @@ class TestLambOpV2Group(TestLambOpV2): class TestLambOpMultiPrecision(unittest.TestCase): + def check_main(self, x_np, place, multi_precision=False, seed=10, n=10): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): paddle.seed(seed) with paddle.static.amp.fp16_guard(): - x = paddle.static.data( - name='x', shape=[None, 10], dtype='float32') + x = paddle.static.data(name='x', + shape=[None, 10], + dtype='float32') linear = paddle.nn.Linear(10, 2) hidden = linear(x) loss = paddle.mean(hidden) @@ -198,8 +214,9 @@ class TestLambOpMultiPrecision(unittest.TestCase): original_optimizer = paddle.optimizer.Lamb(learning_rate=1e-3) original_optimizer._multi_precision = multi_precision if multi_precision: - optimizer = paddle.static.amp.decorate( - original_optimizer, use_pure_fp16=True, use_fp16_guard=True) + optimizer = paddle.static.amp.decorate(original_optimizer, + use_pure_fp16=True, + use_fp16_guard=True) else: optimizer = original_optimizer optimizer.minimize(loss) @@ -242,8 +259,8 @@ class TestLambOpMultiPrecision(unittest.TestCase): fetch_list=[weight, bias]) weight_np = weight_np.astype('float32') bias_np = bias_np.astype('float32') - self.assertTrue( - np.array_equal(weight_np, get_parameter(weight))) + self.assertTrue(np.array_equal(weight_np, + get_parameter(weight))) self.assertTrue(np.array_equal(bias_np, get_parameter(bias))) return weight_np, bias_np diff --git a/python/paddle/fluid/tests/unittests/test_launch_coverage.py b/python/paddle/fluid/tests/unittests/test_launch_coverage.py index 9fbf27e3c1d..e4c35a63471 100644 --- a/python/paddle/fluid/tests/unittests/test_launch_coverage.py +++ b/python/paddle/fluid/tests/unittests/test_launch_coverage.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -42,69 +42,70 @@ PADDLE_TRAINER_ENDPOINTS POD_IP (current node ip address, not needed for local training) ''') - #Optional arguments for the launch helper + #Optional arguments for the launch helper parser.add_argument( "--cluster_node_ips", type=str, default="127.0.0.1", help="Paddle cluster nodes ips, such as 192.168.0.16,192.168.0.17..") - parser.add_argument( - "--node_ip", - type=str, - default="127.0.0.1", - help="The current node ip. ") + parser.add_argument("--node_ip", + type=str, + default="127.0.0.1", + help="The current node ip. ") parser.add_argument( "--use_paddlecloud", action='store_true', - help="wheter to use paddlecloud platform to run your multi-process job. If false, no need to set this argument." + help= + "wheter to use paddlecloud platform to run your multi-process job. If false, no need to set this argument." ) - parser.add_argument( - "--started_port", - type=int, - default=None, - help="The trainer's started port on a single node") + parser.add_argument("--started_port", + type=int, + default=None, + help="The trainer's started port on a single node") - parser.add_argument( - "--print_config", - type=bool, - default=True, - help="Print the config or not") + parser.add_argument("--print_config", + type=bool, + default=True, + help="Print the config or not") parser.add_argument( "--selected_gpus", type=str, default=None, - help="It's for gpu training and the training process will run on the selected_gpus," + help= + "It's for gpu training and the training process will run on the selected_gpus," "each process is bound to a single GPU. And if it's not set, this module will use all the gpu cards for training." ) parser.add_argument( "--log_level", type=int, - default=20, # logging.INFO, details are here:https://docs.python.org/3/library/logging.html#levels + default= + 20, # logging.INFO, details are here:https://docs.python.org/3/library/logging.html#levels help="Logging level, default is logging.INFO") parser.add_argument( "--log_dir", type=str, - help="The path for each process's log.If it's not set, the log will printed to default pipe." + help= + "The path for each process's log.If it's not set, the log will printed to default pipe." ) - #positional - parser.add_argument( - "training_script", - type=str, - help="The full path to the single GPU training " - "program/script to be launched in parallel, " - "followed by all the arguments for the " - "training script") + #positional + parser.add_argument("training_script", + type=str, + help="The full path to the single GPU training " + "program/script to be launched in parallel, " + "followed by all the arguments for the " + "training script") - #rest from the training program + #rest from the training program parser.add_argument('training_script_args', nargs=REMAINDER) return parser.parse_args() class TestCoverage(unittest.TestCase): + def test_gpus(self): args = _parse_args() diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py index b75dc2c964c..1cc2906731b 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py @@ -83,27 +83,33 @@ def _reference_layer_norm_grad(x, # dx if scale is not None: dx_end = scale * np.sqrt(1.0 / var) * grad_y - d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape( - [N, 1]) # the second part equals to zero. + d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, + axis=1).reshape([N, 1 + ]) # the second part equals to zero. d_mean = 1.0 / D * d_mean_0 - d_std = np.sum(-(1.0 / var) * (x - mean) * grad_y * scale, - axis=1).reshape([N, 1]) * ( + d_std = np.sum(-(1.0 / var) * + (x - mean) * grad_y * scale, axis=1).reshape([N, 1]) * ( 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)) else: dx_end = 1.0 * np.sqrt(1.0 / var) * grad_y - d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * 1.0, axis=1).reshape( - [N, 1]) # the second part equals to zero. + d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * 1.0, + axis=1).reshape([N, 1 + ]) # the second part equals to zero. d_mean = 1.0 / D * d_mean_0 - d_std = np.sum(-(1.0 / var) * (x - mean) * grad_y * 1.0, - axis=1).reshape([N, 1]) * ( + d_std = np.sum(-(1.0 / var) * + (x - mean) * grad_y * 1.0, axis=1).reshape([N, 1]) * ( 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)) grad_x = dx_end + d_mean + d_std grad_x.shape, x.shape, grad_y.shape = x_shape, x_shape, x_shape - var.shape, mean.shape = [N, ], [N, ] + var.shape, mean.shape = [ + N, + ], [ + N, + ] if scale is not None: scale.shape = scale_shape @@ -111,6 +117,7 @@ def _reference_layer_norm_grad(x, class TestLayerNormOp(unittest.TestCase): + def setUp(self): self.use_cudnn = True @@ -124,6 +131,7 @@ class TestLayerNormOp(unittest.TestCase): has_bias=True, y_grad_scale=1.0, use_mkldnn=False): + def test_with_place(place, shape, begin_norm_axis, @@ -140,8 +148,8 @@ class TestLayerNormOp(unittest.TestCase): np.float32) if has_scale else None bias = np.random.random_sample(scale_shape).astype( np.float32) if has_bias else None - y_grad = (np.random.random_sample(x_shape) * - y_grad_scale).astype(np.float32) + y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( + np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( @@ -162,10 +170,9 @@ class TestLayerNormOp(unittest.TestCase): with fluid.program_guard(program): block = program.global_block() for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape) + block.create_var(name=name, + dtype='float32', + shape=ground_truth[name].shape) inputs = {"X": block.var('x')} fetch_list = [ 'y', @@ -242,83 +249,79 @@ class TestLayerNormOp(unittest.TestCase): self.check_forward_backward(shape=[1, 3, 4, 5], begin_norm_axis=1) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=True) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=True, - has_bias=False) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=False) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=True) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=True, + has_bias=False) + self.check_forward_backward(shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=False) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) - self.check_forward_backward( - shape=[92, 513, 129], begin_norm_axis=2, y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 129], + begin_norm_axis=2, + y_grad_scale=0.1) self.check_forward_backward(shape=[3, 34, 1134], begin_norm_axis=2) - self.check_forward_backward( - shape=[92, 513, 1134], begin_norm_axis=2, y_grad_scale=0.1) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=False, - has_bias=True, - y_grad_scale=0.1) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=True, - has_bias=False, - y_grad_scale=0.1) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=False, - has_bias=False, - y_grad_scale=0.1) - self.check_forward_backward( - shape=[512, 1024], begin_norm_axis=1, has_scale=True, has_bias=True) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=False, + has_bias=True, + y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=True, + has_bias=False, + y_grad_scale=0.1) + self.check_forward_backward(shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=False, + has_bias=False, + y_grad_scale=0.1) + self.check_forward_backward(shape=[512, 1024], + begin_norm_axis=1, + has_scale=True, + has_bias=True) class TestLayerNormAPI(unittest.TestCase): + def test_case(self): - x = fluid.layers.data( - name='x', - shape=[64, 32, 256], - dtype='float32', - append_batch_size=False) - x = fluid.layers.layer_norm( - x, - scale=True, - shift=True, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None) - x = fluid.layers.layer_norm( - x, - scale=False, - shift=False, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None) - x = fluid.layers.layer_norm( - x, - scale=False, - shift=False, - begin_norm_axis=1, - epsilon=1e-05, - param_attr="scale", - bias_attr="shift") + x = fluid.layers.data(name='x', + shape=[64, 32, 256], + dtype='float32', + append_batch_size=False) + x = fluid.layers.layer_norm(x, + scale=True, + shift=True, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None) + x = fluid.layers.layer_norm(x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None) + x = fluid.layers.layer_norm(x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr="scale", + bias_attr="shift") class TestDygraphLayerNormAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): paddle.enable_static() @@ -335,6 +338,7 @@ class TestDygraphLayerNormAPIError(unittest.TestCase): class TestFP16ScaleBiasLayerNorm(unittest.TestCase): + def check_main(self, x_np, weight_np, bias_np, dtype): paddle.disable_static() @@ -379,6 +383,7 @@ class TestFP16ScaleBiasLayerNorm(unittest.TestCase): class TestBF16ScaleBiasLayerNorm(unittest.TestCase): + def check_main(self, x_np, weight_np, bias_np, dtype): paddle.disable_static() @@ -426,6 +431,7 @@ class TestBF16ScaleBiasLayerNorm(unittest.TestCase): class TestGetSetKeepLayerNormScaleBiasFP32Flag(unittest.TestCase): + def test_main(self): self.assertTrue(_keep_layer_norm_scale_bias_to_fp32()) _keep_layer_norm_scale_bias_to_fp32(False) diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py index 85c6694324d..0242df213f2 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py @@ -26,6 +26,7 @@ import paddle class TestDygraphLayerNormv2(unittest.TestCase): + def test_dygraph(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): @@ -116,6 +117,7 @@ class TestDygraphLayerNormv2(unittest.TestCase): class TestLayerNormFunction(unittest.TestCase): + def test_dygraph(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): @@ -163,11 +165,10 @@ class TestLayerNormFunction(unittest.TestCase): y4 = compute_v4(x) self.assertTrue(np.allclose(y3, y4)) - self.assertRaises( - ValueError, - paddle.nn.functional.layer_norm, - x=x, - normalized_shape=1.0) + self.assertRaises(ValueError, + paddle.nn.functional.layer_norm, + x=x, + normalized_shape=1.0) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 676f35838ad..aead014e7ab 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -38,6 +38,7 @@ from paddle.fluid.framework import _test_eager_guard class LayerTest(unittest.TestCase): + @classmethod def setUpClass(cls): cls.seed = 111 @@ -84,12 +85,16 @@ class LayerTest(unittest.TestCase): class TestLayer(LayerTest): + def test_custom_layer_with_kwargs(self): + class CustomLayer(fluid.Layer): + def __init__(self, input_size, linear1_size=4): super(CustomLayer, self).__init__() - self.linear1 = nn.Linear( - input_size, linear1_size, bias_attr=False) + self.linear1 = nn.Linear(input_size, + linear1_size, + bias_attr=False) self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False) def forward(self, x, do_linear2=False): @@ -118,15 +123,16 @@ class TestLayer(LayerTest): def test_dropout(self): inp = np.ones([3, 32, 32], dtype='float32') with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 32, 32], - dtype='float32', - append_batch_size=False) + t = layers.data(name='data', + shape=[3, 32, 32], + dtype='float32', + append_batch_size=False) dropout = nn.Dropout(p=0.35, seed=1, is_test=False) ret = dropout(t) - ret2 = fluid.layers.dropout( - t, dropout_prob=0.35, seed=1, is_test=False) + ret2 = fluid.layers.dropout(t, + dropout_prob=0.35, + seed=1, + is_test=False) static_ret, static_ret2 = self.get_static_graph_result( feed={'data': inp}, fetch_list=[ret, ret2]) with self.dynamic_graph(): @@ -134,16 +140,20 @@ class TestLayer(LayerTest): t = base.to_variable(inp) dropout = nn.Dropout(p=0.35, seed=1, is_test=False) dy_eager_ret = dropout(t) - dy_eager_ret2 = fluid.layers.dropout( - t, dropout_prob=0.35, seed=1, is_test=False) + dy_eager_ret2 = fluid.layers.dropout(t, + dropout_prob=0.35, + seed=1, + is_test=False) dy_eager_ret_value = dy_eager_ret.numpy() dy_eager_ret2_value = dy_eager_ret2.numpy() t = base.to_variable(inp) dropout = nn.Dropout(p=0.35, seed=1, is_test=False) dy_ret = dropout(t) - dy_ret2 = fluid.layers.dropout( - t, dropout_prob=0.35, seed=1, is_test=False) + dy_ret2 = fluid.layers.dropout(t, + dropout_prob=0.35, + seed=1, + is_test=False) dy_ret_value = dy_ret.numpy() dy_ret2_value = dy_ret2.numpy() @@ -157,16 +167,15 @@ class TestLayer(LayerTest): def test_linear(self): inp = np.ones([3, 32, 32], dtype='float32') with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 32, 32], - dtype='float32', - append_batch_size=False) + t = layers.data(name='data', + shape=[3, 32, 32], + dtype='float32', + append_batch_size=False) linear = nn.Linear( 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1)) ret = linear(t) - static_ret = self.get_static_graph_result( - feed={'data': inp}, fetch_list=[ret])[0] + static_ret = self.get_static_graph_result(feed={'data': inp}, + fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): t = base.to_variable(inp) @@ -214,15 +223,14 @@ class TestLayer(LayerTest): def test_Flatten(self): inp = np.ones([3, 4, 4, 5], dtype='float32') with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 4, 4, 5], - dtype='float32', - append_batch_size=False) + t = layers.data(name='data', + shape=[3, 4, 4, 5], + dtype='float32', + append_batch_size=False) flatten = nn.Flatten() ret = flatten(t) - static_ret = self.get_static_graph_result( - feed={'data': inp}, fetch_list=[ret])[0] + static_ret = self.get_static_graph_result(feed={'data': inp}, + fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): t = base.to_variable(inp) @@ -266,30 +274,28 @@ class TestLayer(LayerTest): def test_layer_norm(self): inp = np.ones([3, 32, 32], dtype='float32') with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 32, 32], - dtype='float32', - append_batch_size=False) + t = layers.data(name='data', + shape=[3, 32, 32], + dtype='float32', + append_batch_size=False) ret = layers.layer_norm( t, bias_attr=fluid.initializer.ConstantInitializer(value=1), act='sigmoid') - static_ret = self.get_static_graph_result( - feed={'data': inp}, fetch_list=[ret])[0] + static_ret = self.get_static_graph_result(feed={'data': inp}, + fetch_list=[ret])[0] with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 32, 32], - dtype='float32', - append_batch_size=False) + t = layers.data(name='data', + shape=[3, 32, 32], + dtype='float32', + append_batch_size=False) lm = nn.LayerNorm( normalized_shape=[32, 32], bias_attr=fluid.initializer.ConstantInitializer(value=1), act='sigmoid') ret = lm(t) - static_ret2 = self.get_static_graph_result( - feed={'data': inp}, fetch_list=[ret])[0] + static_ret2 = self.get_static_graph_result(feed={'data': inp}, + fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): lm = nn.LayerNorm( @@ -359,8 +365,7 @@ class TestLayer(LayerTest): my_sync_bn = paddle.nn.SyncBatchNorm(3) ret = my_sync_bn(t) static_ret = self.get_static_graph_result( - feed={'t': np.ones( - [3, 3, 5, 5], dtype='float32')}, + feed={'t': np.ones([3, 3, 5, 5], dtype='float32')}, fetch_list=[ret])[0] with self.dynamic_graph(): @@ -382,8 +387,8 @@ class TestLayer(LayerTest): t = layers.data(name='t', shape=[3, 3], dtype='float32') ret = layers.relu(t) static_ret = self.get_static_graph_result( - feed={'t': np.ones( - [3, 3], dtype='float32')}, fetch_list=[ret])[0] + feed={'t': np.ones([3, 3], + dtype='float32')}, fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): @@ -403,21 +408,20 @@ class TestLayer(LayerTest): t = layers.data(name='t', shape=[3, 3], dtype='float32') t2 = layers.data(name='t2', shape=[3, 3], dtype='float32') ret = layers.matmul(t, t2) - static_ret = self.get_static_graph_result( - feed={ - 't': np.ones( - [3, 3], dtype='float32'), - 't2': np.ones( - [3, 3], dtype='float32') - }, - fetch_list=[ret])[0] + static_ret = self.get_static_graph_result(feed={ + 't': + np.ones([3, 3], dtype='float32'), + 't2': + np.ones([3, 3], dtype='float32') + }, + fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): t = np.ones([3, 3], dtype='float32') t2 = np.ones([3, 3], dtype='float32') - dy_eager_ret = layers.matmul( - base.to_variable(t), base.to_variable(t2)) + dy_eager_ret = layers.matmul(base.to_variable(t), + base.to_variable(t2)) dy_eager_ret_value = dy_eager_ret.numpy() t = np.ones([3, 3], dtype='float32') @@ -433,51 +437,50 @@ class TestLayer(LayerTest): images = layers.data(name='pixel', shape=[3, 5, 5], dtype='float32') ret = layers.conv2d(input=images, num_filters=3, filter_size=[2, 2]) static_ret = self.get_static_graph_result( - feed={'pixel': np.ones( - [2, 3, 5, 5], dtype='float32')}, + feed={'pixel': np.ones([2, 3, 5, 5], dtype='float32')}, fetch_list=[ret])[0] with self.static_graph(): images = layers.data(name='pixel', shape=[3, 5, 5], dtype='float32') - conv2d = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) ret = conv2d(images) static_ret2 = self.get_static_graph_result( - feed={'pixel': np.ones( - [2, 3, 5, 5], dtype='float32')}, + feed={'pixel': np.ones([2, 3, 5, 5], dtype='float32')}, fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): images = np.ones([2, 3, 5, 5], dtype='float32') - conv2d = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) dy_eager_ret = conv2d(base.to_variable(images)) dy_eager_ret_value = dy_eager_ret.numpy() images = np.ones([2, 3, 5, 5], dtype='float32') - conv2d = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) dy_ret = conv2d(base.to_variable(images)) dy_ret_value = dy_ret.numpy() with self.dynamic_graph(): with _test_eager_guard(): images = np.ones([2, 3, 5, 5], dtype='float32') - conv2d = nn.Conv2D( - num_channels=3, - num_filters=3, - filter_size=[2, 2], - bias_attr=False) + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2], + bias_attr=False) dy_ret = conv2d(base.to_variable(images)) self.assertTrue(conv2d.bias is None) images = np.ones([2, 3, 5, 5], dtype='float32') - conv2d = nn.Conv2D( - num_channels=3, - num_filters=3, - filter_size=[2, 2], - bias_attr=False) + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2], + bias_attr=False) dy_ret = conv2d(base.to_variable(images)) self.assertTrue(conv2d.bias is None) @@ -485,8 +488,9 @@ class TestLayer(LayerTest): # the input of Conv2D must be Variable. def test_Variable(): images = np.ones([2, 3, 5, 5], dtype='float32') - conv2d = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) conv2d_ret1 = conv2d(images) self.assertRaises(TypeError, test_Variable) @@ -494,10 +498,12 @@ class TestLayer(LayerTest): # the input dtype of Conv2D must be float16 or float32 or float64 # float16 only can be set on GPU place def test_type(): - images = layers.data( - name='pixel', shape=[3, 5, 5], dtype='int32') - conv2d = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) + images = layers.data(name='pixel', + shape=[3, 5, 5], + dtype='int32') + conv2d = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) conv2d_ret2 = conv2d(images) self.assertRaises(TypeError, test_type) @@ -513,13 +519,13 @@ class TestLayer(LayerTest): weight_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) - conv2d1 = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) - conv2d2 = nn.Conv2D( - num_channels=3, - num_filters=3, - filter_size=[2, 2], - param_attr=weight_attr) + conv2d1 = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) + conv2d2 = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2], + param_attr=weight_attr) dy_ret1 = conv2d1(base.to_variable(images)) dy_ret2 = conv2d2(base.to_variable(images)) self.assertFalse( @@ -535,8 +541,8 @@ class TestLayer(LayerTest): conv2d2.bias.set_value(conv2d1_bias) dy_ret1 = conv2d1(base.to_variable(images)) dy_ret2 = conv2d2(base.to_variable(images)) - self.assertTrue( - np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) + self.assertTrue(np.array_equal(dy_ret1.numpy(), + dy_ret2.numpy())) conv2d2.weight = conv2d1.weight conv2d2.bias = conv2d1.bias @@ -548,16 +554,15 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 5, 5], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) - conv2d1 = nn.Conv2D( - num_channels=3, num_filters=3, filter_size=[2, 2]) - conv2d2 = nn.Conv2D( - num_channels=3, - num_filters=3, - filter_size=[2, 2], - param_attr=weight_attr) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) + conv2d1 = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2]) + conv2d2 = nn.Conv2D(num_channels=3, + num_filters=3, + filter_size=[2, 2], + param_attr=weight_attr) dy_ret1 = conv2d1(base.to_variable(images)) dy_ret2 = conv2d2(base.to_variable(images)) self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) @@ -596,8 +601,10 @@ class TestLayer(LayerTest): updated_hidden, reset_hidden_pre, gate = layers.gru_unit( input=x, hidden=hidden, size=D * 3) static_ret = self.get_static_graph_result( - feed={'x': input, - 'hidden': hidden_input}, + feed={ + 'x': input, + 'hidden': hidden_input + }, fetch_list=[updated_hidden, reset_hidden_pre, gate]) with self.static_graph(): @@ -609,22 +616,24 @@ class TestLayer(LayerTest): updated_hidden, reset_hidden_pre, gate = gru(x, hidden) static_ret2 = self.get_static_graph_result( - feed={'x': input, - 'hidden': hidden_input}, + feed={ + 'x': input, + 'hidden': hidden_input + }, fetch_list=[updated_hidden, reset_hidden_pre, gate]) with self.dynamic_graph(): with _test_eager_guard(): gru = nn.GRUUnit(size=D * 3) - dy_eager_ret = gru( - base.to_variable(input), base.to_variable(hidden_input)) + dy_eager_ret = gru(base.to_variable(input), + base.to_variable(hidden_input)) dy_eager_ret_value = [] for i in range(len(static_ret)): dy_eager_ret_value.append(dy_eager_ret[i].numpy()) gru = nn.GRUUnit(size=D * 3) - dy_ret = gru( - base.to_variable(input), base.to_variable(hidden_input)) + dy_ret = gru(base.to_variable(input), + base.to_variable(hidden_input)) dy_ret_value = [] for i in range(len(static_ret)): dy_ret_value.append(dy_ret[i].numpy()) @@ -642,20 +651,20 @@ class TestLayer(LayerTest): custom_weight)) gru1 = nn.GRUUnit(size=D * 3) gru2 = nn.GRUUnit(size=D * 3, param_attr=weight_attr) - dy_ret1 = gru1( - base.to_variable(input), base.to_variable(hidden_input)) - dy_ret2 = gru2( - base.to_variable(input), base.to_variable(hidden_input)) + dy_ret1 = gru1(base.to_variable(input), + base.to_variable(hidden_input)) + dy_ret2 = gru2(base.to_variable(input), + base.to_variable(hidden_input)) self.assertFalse( np.array_equal(gru1.weight.numpy(), gru2.weight.numpy())) for o1, o2 in zip(dy_ret1, dy_ret2): self.assertFalse(np.array_equal(o1.numpy(), o2.numpy())) gru2.weight.set_value(gru1.weight.numpy()) gru2.bias.set_value(gru1.bias) - dy_ret1 = gru1( - base.to_variable(input), base.to_variable(hidden_input)) - dy_ret2 = gru2( - base.to_variable(input), base.to_variable(hidden_input)) + dy_ret1 = gru1(base.to_variable(input), + base.to_variable(hidden_input)) + dy_ret2 = gru2(base.to_variable(input), + base.to_variable(hidden_input)) for o1, o2 in zip(dy_ret1, dy_ret2): self.assertTrue(np.array_equal(o1.numpy(), o2.numpy())) @@ -667,25 +676,24 @@ class TestLayer(LayerTest): np.array_equal(gru1.bias.numpy(), gru2.bias.numpy())) custom_weight = np.random.randn(D, D * 3).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) gru1 = nn.GRUUnit(size=D * 3) gru2 = nn.GRUUnit(size=D * 3, param_attr=weight_attr) - dy_ret1 = gru1( - base.to_variable(input), base.to_variable(hidden_input)) - dy_ret2 = gru2( - base.to_variable(input), base.to_variable(hidden_input)) + dy_ret1 = gru1(base.to_variable(input), + base.to_variable(hidden_input)) + dy_ret2 = gru2(base.to_variable(input), + base.to_variable(hidden_input)) self.assertFalse( np.array_equal(gru1.weight.numpy(), gru2.weight.numpy())) for o1, o2 in zip(dy_ret1, dy_ret2): self.assertFalse(np.array_equal(o1.numpy(), o2.numpy())) gru2.weight.set_value(gru1.weight.numpy()) gru2.bias.set_value(gru1.bias) - dy_ret1 = gru1( - base.to_variable(input), base.to_variable(hidden_input)) - dy_ret2 = gru2( - base.to_variable(input), base.to_variable(hidden_input)) + dy_ret1 = gru1(base.to_variable(input), + base.to_variable(hidden_input)) + dy_ret2 = gru2(base.to_variable(input), + base.to_variable(hidden_input)) for o1, o2 in zip(dy_ret1, dy_ret2): self.assertTrue(np.array_equal(o1.numpy(), o2.numpy())) @@ -693,8 +701,8 @@ class TestLayer(LayerTest): gru2.bias = gru1.bias self.assertTrue( np.array_equal(gru1.weight.numpy(), gru2.weight.numpy())) - self.assertTrue( - np.array_equal(gru1.bias.numpy(), gru2.bias.numpy())) + self.assertTrue(np.array_equal(gru1.bias.numpy(), + gru2.bias.numpy())) def test_elementwise_math(self): n = np.ones([3, 3], dtype='float32') @@ -718,16 +726,15 @@ class TestLayer(LayerTest): ret = layers.elementwise_sub(ret, t5) ret = layers.elementwise_mul(ret, t6) - static_ret = self.get_static_graph_result( - feed={ - 't': n, - 't2': n2, - 't3': n3, - 't4': n4, - 't5': n5, - 't6': n6 - }, - fetch_list=[ret])[0] + static_ret = self.get_static_graph_result(feed={ + 't': n, + 't2': n2, + 't3': n3, + 't4': n4, + 't5': n5, + 't6': n6 + }, + fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): @@ -754,10 +761,10 @@ class TestLayer(LayerTest): with self.dynamic_graph(): with _test_eager_guard(): - min_eager_ret = layers.elementwise_min( - to_variable(n), to_variable(n2)) - max_eager_ret = layers.elementwise_max( - to_variable(n), to_variable(n2)) + min_eager_ret = layers.elementwise_min(to_variable(n), + to_variable(n2)) + max_eager_ret = layers.elementwise_max(to_variable(n), + to_variable(n2)) min_eager_ret_value = min_eager_ret.numpy() max_eager_ret_value = max_eager_ret.numpy() @@ -778,41 +785,37 @@ class TestLayer(LayerTest): else: place = core.CPUPlace() with self.static_graph(): - seq = layers.data( - name='seq_in', - shape=[3, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) + seq = layers.data(name='seq_in', + shape=[3, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) out = layers.sequence_conv(seq, 2, act='sigmoid') - static_rlt = self.get_static_graph_result( - feed={ - "seq_in": fluid.create_lod_tensor( - data=inp_np, - recursive_seq_lens=[[1, 1, 1]], - place=place) - }, - fetch_list=[out], - with_lod=True)[0] + static_rlt = self.get_static_graph_result(feed={ + "seq_in": + fluid.create_lod_tensor(data=inp_np, + recursive_seq_lens=[[1, 1, 1]], + place=place) + }, + fetch_list=[out], + with_lod=True)[0] with self.static_graph(): - seq = layers.data( - name='seq_in', - shape=[3, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) + seq = layers.data(name='seq_in', + shape=[3, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) seq_conv = nn.SequenceConv('seq_conv', num_filters=2, act='sigmoid') out = seq_conv(seq) - static_rlt2 = self.get_static_graph_result( - feed={ - "seq_in": fluid.create_lod_tensor( - data=inp_np, - recursive_seq_lens=[[1, 1, 1]], - place=place) - }, - fetch_list=[out], - with_lod=True)[0] + static_rlt2 = self.get_static_graph_result(feed={ + "seq_in": + fluid.create_lod_tensor(data=inp_np, + recursive_seq_lens=[[1, 1, 1]], + place=place) + }, + fetch_list=[out], + with_lod=True)[0] self.assertTrue( np.array_equal(np.array(static_rlt), np.array(static_rlt2))) @@ -826,8 +829,8 @@ class TestLayer(LayerTest): filter_size=27, act='sigmoid', bias_attr=fluid.initializer.ConstantInitializer(value=1)) - static_rlt = self.get_static_graph_result( - feed={'pixel': inp_np}, fetch_list=[out])[0] + static_rlt = self.get_static_graph_result(feed={'pixel': inp_np}, + fetch_list=[out])[0] with self.static_graph(): img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32') conv2d_transpose = nn.Conv2DTranspose( @@ -837,8 +840,8 @@ class TestLayer(LayerTest): act='sigmoid', bias_attr=fluid.initializer.ConstantInitializer(value=1)) out = conv2d_transpose(img) - static_rlt2 = self.get_static_graph_result( - feed={'pixel': inp_np}, fetch_list=[out])[0] + static_rlt2 = self.get_static_graph_result(feed={'pixel': inp_np}, + fetch_list=[out])[0] with self.dynamic_graph(): with _test_eager_guard(): conv2d_transpose = nn.Conv2DTranspose( @@ -869,13 +872,13 @@ class TestLayer(LayerTest): weight_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) - conv2d1 = nn.Conv2DTranspose( - num_channels=3, num_filters=3, filter_size=[2, 2]) - conv2d2 = nn.Conv2DTranspose( - num_channels=3, - num_filters=3, - filter_size=[2, 2], - param_attr=weight_attr) + conv2d1 = nn.Conv2DTranspose(num_channels=3, + num_filters=3, + filter_size=[2, 2]) + conv2d2 = nn.Conv2DTranspose(num_channels=3, + num_filters=3, + filter_size=[2, 2], + param_attr=weight_attr) dy_ret1 = conv2d1(base.to_variable(images)) dy_ret2 = conv2d2(base.to_variable(images)) self.assertFalse( @@ -891,8 +894,8 @@ class TestLayer(LayerTest): conv2d2.bias.set_value(conv2d1_bias) dy_ret1 = conv2d1(base.to_variable(images)) dy_ret2 = conv2d2(base.to_variable(images)) - self.assertTrue( - np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) + self.assertTrue(np.array_equal(dy_ret1.numpy(), + dy_ret2.numpy())) conv2d2.weight = conv2d1.weight conv2d2.bias = conv2d1.bias @@ -904,16 +907,15 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 5, 5], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) - conv2d1 = nn.Conv2DTranspose( - num_channels=3, num_filters=3, filter_size=[2, 2]) - conv2d2 = nn.Conv2DTranspose( - num_channels=3, - num_filters=3, - filter_size=[2, 2], - param_attr=weight_attr) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) + conv2d1 = nn.Conv2DTranspose(num_channels=3, + num_filters=3, + filter_size=[2, 2]) + conv2d2 = nn.Conv2DTranspose(num_channels=3, + num_filters=3, + filter_size=[2, 2], + param_attr=weight_attr) dy_ret1 = conv2d1(base.to_variable(images)) dy_ret2 = conv2d2(base.to_variable(images)) self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) @@ -942,8 +944,9 @@ class TestLayer(LayerTest): # the input of Conv2DTranspose must be Variable. def test_Variable(): images = np.ones([2, 3, 5, 5], dtype='float32') - conv2d = nn.Conv2DTranspose( - num_channels=3, num_filters=3, filter_size=[2, 2]) + conv2d = nn.Conv2DTranspose(num_channels=3, + num_filters=3, + filter_size=[2, 2]) conv2d_ret1 = conv2d(images) self.assertRaises(TypeError, test_Variable) @@ -951,10 +954,12 @@ class TestLayer(LayerTest): # the input dtype of Conv2DTranspose must be float16 or float32 or float64 # float16 only can be set on GPU place def test_type(): - images = layers.data( - name='pixel', shape=[3, 5, 5], dtype='int32') - conv2d = nn.Conv2DTranspose( - num_channels=3, num_filters=3, filter_size=[2, 2]) + images = layers.data(name='pixel', + shape=[3, 5, 5], + dtype='int32') + conv2d = nn.Conv2DTranspose(num_channels=3, + num_filters=3, + filter_size=[2, 2]) conv2d_ret2 = conv2d(images) self.assertRaises(TypeError, test_type) @@ -964,16 +969,14 @@ class TestLayer(LayerTest): inp_np_y = np.array([[4, 5, 6]]).astype('float32') with self.static_graph(): - data_x = layers.data( - name='x', - shape=[1, 3], - dtype="float32", - append_batch_size=False) - data_y = layers.data( - name='y', - shape=[1, 3], - dtype="float32", - append_batch_size=False) + data_x = layers.data(name='x', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + data_y = layers.data(name='y', + shape=[1, 3], + dtype="float32", + append_batch_size=False) out = layers.bilinear_tensor_product( data_x, data_y, @@ -981,21 +984,21 @@ class TestLayer(LayerTest): bias_attr=fluid.initializer.ConstantInitializer(value=1), act='sigmoid') - static_rlt = self.get_static_graph_result( - feed={'x': inp_np_x, - 'y': inp_np_y}, fetch_list=[out])[0] + static_rlt = self.get_static_graph_result(feed={ + 'x': inp_np_x, + 'y': inp_np_y + }, + fetch_list=[out])[0] with self.static_graph(): - data_x = layers.data( - name='x', - shape=[1, 3], - dtype="float32", - append_batch_size=False) - data_y = layers.data( - name='y', - shape=[1, 3], - dtype="float32", - append_batch_size=False) + data_x = layers.data(name='x', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + data_y = layers.data(name='y', + shape=[1, 3], + dtype="float32", + append_batch_size=False) btp = nn.BilinearTensorProduct( 3, 3, @@ -1003,9 +1006,11 @@ class TestLayer(LayerTest): bias_attr=fluid.initializer.ConstantInitializer(value=1), act='sigmoid') out = btp(data_x, data_y) - static_rlt2 = self.get_static_graph_result( - feed={'x': inp_np_x, - 'y': inp_np_y}, fetch_list=[out])[0] + static_rlt2 = self.get_static_graph_result(feed={ + 'x': inp_np_x, + 'y': inp_np_y + }, + fetch_list=[out])[0] with self.dynamic_graph(): with _test_eager_guard(): btp = nn.BilinearTensorProduct( @@ -1014,8 +1019,8 @@ class TestLayer(LayerTest): 6, bias_attr=fluid.initializer.ConstantInitializer(value=1), act='sigmoid') - dy_eager_rlt = btp( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + dy_eager_rlt = btp(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) dy_eager_rlt_value = dy_eager_rlt.numpy() btp = nn.BilinearTensorProduct( @@ -1030,32 +1035,34 @@ class TestLayer(LayerTest): with self.dynamic_graph(): with _test_eager_guard(): btp2 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid') - dy_eager_rlt2 = btp2( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + dy_eager_rlt2 = btp2(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) dy_eager_rlt2_value = dy_eager_rlt2.numpy() btp2 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid') - dy_rlt2 = btp2( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + dy_rlt2 = btp2(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) dy_rlt2_value = dy_rlt2.numpy() with self.static_graph(): - data_x2 = layers.data( - name='x', - shape=[1, 3], - dtype="float32", - append_batch_size=False) - data_y2 = layers.data( - name='y', - shape=[1, 3], - dtype="float32", - append_batch_size=False) - out2 = layers.bilinear_tensor_product( - data_x2, data_y2, 6, act='sigmoid') - - static_rlt3 = self.get_static_graph_result( - feed={'x': inp_np_x, - 'y': inp_np_y}, fetch_list=[out2])[0] + data_x2 = layers.data(name='x', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + data_y2 = layers.data(name='y', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + out2 = layers.bilinear_tensor_product(data_x2, + data_y2, + 6, + act='sigmoid') + + static_rlt3 = self.get_static_graph_result(feed={ + 'x': inp_np_x, + 'y': inp_np_y + }, + fetch_list=[out2])[0] self.assertTrue(np.array_equal(dy_rlt2_value, static_rlt3)) self.assertTrue(np.array_equal(dy_eager_rlt2_value, static_rlt3)) @@ -1070,22 +1077,25 @@ class TestLayer(LayerTest): initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) btp1 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid') - btp2 = nn.BilinearTensorProduct( - 3, 3, 6, act='sigmoid', param_attr=weight_attr) - dy_rlt1 = btp1( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) - dy_rlt2 = btp2( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + btp2 = nn.BilinearTensorProduct(3, + 3, + 6, + act='sigmoid', + param_attr=weight_attr) + dy_rlt1 = btp1(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) + dy_rlt2 = btp2(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) self.assertFalse( np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())) btp2.weight.set_value(btp1.weight.numpy()) btp2.bias.set_value(btp1.bias) - dy_rlt1 = btp1( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) - dy_rlt2 = btp2( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) - self.assertTrue( - np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())) + dy_rlt1 = btp1(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) + dy_rlt2 = btp2(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) + self.assertTrue(np.array_equal(dy_rlt1.numpy(), + dy_rlt2.numpy())) btp2.weight = btp1.weight btp2.bias = btp1.bias @@ -1095,59 +1105,59 @@ class TestLayer(LayerTest): np.array_equal(btp1.bias.numpy(), btp2.bias.numpy())) custom_weight = np.random.randn(6, 3, 3).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) btp1 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid') - btp2 = nn.BilinearTensorProduct( - 3, 3, 6, act='sigmoid', param_attr=weight_attr) - dy_rlt1 = btp1( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) - dy_rlt2 = btp2( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + btp2 = nn.BilinearTensorProduct(3, + 3, + 6, + act='sigmoid', + param_attr=weight_attr) + dy_rlt1 = btp1(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) + dy_rlt2 = btp2(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) self.assertFalse(np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())) btp2.weight.set_value(btp1.weight.numpy()) btp2.bias.set_value(btp1.bias) - dy_rlt1 = btp1( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) - dy_rlt2 = btp2( - base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + dy_rlt1 = btp1(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) + dy_rlt2 = btp2(base.to_variable(inp_np_x), + base.to_variable(inp_np_y)) self.assertTrue(np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())) btp2.weight = btp1.weight btp2.bias = btp1.bias self.assertTrue( np.array_equal(btp1.weight.numpy(), btp2.weight.numpy())) - self.assertTrue( - np.array_equal(btp1.bias.numpy(), btp2.bias.numpy())) + self.assertTrue(np.array_equal(btp1.bias.numpy(), + btp2.bias.numpy())) def prelu_test(self, mode): inp_np = np.ones([5, 200, 100, 100]).astype('float32') with self.static_graph(): - data_t = layers.data( - name="input", - shape=[5, 200, 100, 100], - dtype="float32", - append_batch_size=False) - out = layers.prelu( - data_t, mode, param_attr=ParamAttr(initializer=Constant(1.0))) - static_rlt = self.get_static_graph_result( - feed={"input": inp_np}, fetch_list=[out])[0] + data_t = layers.data(name="input", + shape=[5, 200, 100, 100], + dtype="float32", + append_batch_size=False) + out = layers.prelu(data_t, + mode, + param_attr=ParamAttr(initializer=Constant(1.0))) + static_rlt = self.get_static_graph_result(feed={"input": inp_np}, + fetch_list=[out])[0] with self.static_graph(): - data_t = layers.data( - name="input", - shape=[5, 200, 100, 100], - dtype="float32", - append_batch_size=False) - prelu = nn.PRelu( - mode=mode, - channel=inp_np.shape[1], - input_shape=data_t.shape, - param_attr=ParamAttr(initializer=Constant(1.0))) + data_t = layers.data(name="input", + shape=[5, 200, 100, 100], + dtype="float32", + append_batch_size=False) + prelu = nn.PRelu(mode=mode, + channel=inp_np.shape[1], + input_shape=data_t.shape, + param_attr=ParamAttr(initializer=Constant(1.0))) out = prelu(data_t) - static_rlt2 = self.get_static_graph_result( - feed={"input": inp_np}, fetch_list=[out])[0] + static_rlt2 = self.get_static_graph_result(feed={"input": inp_np}, + fetch_list=[out])[0] with self.dynamic_graph(): with _test_eager_guard(): @@ -1159,11 +1169,10 @@ class TestLayer(LayerTest): dy_eager_rlt = prelu(base.to_variable(inp_np)) dy_eager_rlt_value = dy_eager_rlt.numpy() - prelu = nn.PRelu( - mode=mode, - channel=inp_np.shape[1], - input_shape=inp_np.shape, - param_attr=ParamAttr(initializer=Constant(1.0))) + prelu = nn.PRelu(mode=mode, + channel=inp_np.shape[1], + input_shape=inp_np.shape, + param_attr=ParamAttr(initializer=Constant(1.0))) dy_rlt = prelu(base.to_variable(inp_np)) dy_rlt_value = dy_rlt.numpy() @@ -1188,33 +1197,31 @@ class TestLayer(LayerTest): dy_rlt1 = prelu1(inp) dy_rlt2 = prelu2(inp) self.assertFalse( - np.array_equal(prelu1.weight.numpy(), prelu2.weight.numpy( - ))) + np.array_equal(prelu1.weight.numpy(), + prelu2.weight.numpy())) self.assertFalse( np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())) prelu2.weight.set_value(prelu1.weight.numpy()) dy_rlt1 = prelu1(inp) dy_rlt2 = prelu2(inp) - self.assertTrue( - np.array_equal(dy_rlt1.numpy(), dy_rlt2.numpy())) + self.assertTrue(np.array_equal(dy_rlt1.numpy(), + dy_rlt2.numpy())) prelu2.weight = prelu1.weight self.assertTrue( - np.array_equal(prelu1.weight.numpy(), prelu2.weight.numpy( - ))) + np.array_equal(prelu1.weight.numpy(), + prelu2.weight.numpy())) inp_np = np.random.randn(5, 200, 100, 100).astype("float32") inp = base.to_variable(inp_np) - prelu1 = nn.PRelu( - mode=mode, - channel=inp_np.shape[1], - input_shape=inp_np.shape, - param_attr=ParamAttr(initializer=Constant(2.0))) - prelu2 = nn.PRelu( - mode=mode, - channel=inp_np.shape[1], - input_shape=inp_np.shape, - param_attr=ParamAttr(initializer=Constant(1.0))) + prelu1 = nn.PRelu(mode=mode, + channel=inp_np.shape[1], + input_shape=inp_np.shape, + param_attr=ParamAttr(initializer=Constant(2.0))) + prelu2 = nn.PRelu(mode=mode, + channel=inp_np.shape[1], + input_shape=inp_np.shape, + param_attr=ParamAttr(initializer=Constant(1.0))) dy_rlt1 = prelu1(inp) dy_rlt2 = prelu2(inp) self.assertFalse( @@ -1239,31 +1246,31 @@ class TestLayer(LayerTest): dict_size = 20 with self.static_graph(): data_t = layers.data(name='word', shape=[1], dtype='int64') - emb = layers.embedding( - input=data_t, - size=[dict_size, 32], - param_attr='emb.w', - is_sparse=False) - static_rlt = self.get_static_graph_result( - feed={'word': inp_word}, fetch_list=[emb])[0] + emb = layers.embedding(input=data_t, + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + static_rlt = self.get_static_graph_result(feed={'word': inp_word}, + fetch_list=[emb])[0] with self.static_graph(): data_t = layers.data(name='word', shape=[1], dtype='int64') - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False) + emb2 = nn.Embedding(size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) emb_rlt = emb2(data_t) - static_rlt2 = self.get_static_graph_result( - feed={'word': inp_word}, fetch_list=[emb_rlt])[0] + static_rlt2 = self.get_static_graph_result(feed={'word': inp_word}, + fetch_list=[emb_rlt])[0] with self.dynamic_graph(): with _test_eager_guard(): - emb2 = nn.Embedding( - size=[dict_size, 32], - param_attr='eager_emb.w', - is_sparse=False) + emb2 = nn.Embedding(size=[dict_size, 32], + param_attr='eager_emb.w', + is_sparse=False) dy_eager_rlt = emb2(base.to_variable(inp_word)) dy_eager_rlt_value = dy_eager_rlt.numpy() - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False) + emb2 = nn.Embedding(size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) dy_rlt = emb2(base.to_variable(inp_word)) dy_rlt_value = dy_rlt.numpy() @@ -1278,10 +1285,9 @@ class TestLayer(LayerTest): initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) - emb2 = nn.Embedding( - size=[dict_size, 32], - param_attr=weight_attr, - is_sparse=False) + emb2 = nn.Embedding(size=[dict_size, 32], + param_attr=weight_attr, + is_sparse=False) rep1 = emb1(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word)) self.assertFalse( @@ -1298,12 +1304,12 @@ class TestLayer(LayerTest): np.array_equal(emb1.weight.numpy(), emb2.weight.numpy())) custom_weight = np.random.randn(dict_size, 32).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False) - emb2 = nn.Embedding( - size=[dict_size, 32], param_attr=weight_attr, is_sparse=False) + emb2 = nn.Embedding(size=[dict_size, 32], + param_attr=weight_attr, + is_sparse=False) rep1 = emb1(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word)) self.assertFalse(np.array_equal(emb1.weight.numpy(), custom_weight)) @@ -1328,20 +1334,21 @@ class TestLayer(LayerTest): words = [] for i in range(window_size): words.append( - layers.data( - name='word_{0}'.format(i), shape=[None], dtype='int64')) - sample_weights = layers.fill_constant( - shape=[5, 1], dtype='float32', value=1) + layers.data(name='word_{0}'.format(i), + shape=[None], + dtype='int64')) + sample_weights = layers.fill_constant(shape=[5, 1], + dtype='float32', + value=1) embs = [] for i in range(window_size): if i == label_word: continue - emb = fluid.embedding( - input=words[i], - size=[dict_size, 32], - param_attr='emb.w', - is_sparse=False) + emb = fluid.embedding(input=words[i], + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) embs.append(emb) embs = layers.concat(input=embs, axis=1) @@ -1359,19 +1366,22 @@ class TestLayer(LayerTest): feed_dict = dict() for i in range(window_size): feed_dict['word_{0}'.format(i)] = inp_word[i] - static_rlt = self.get_static_graph_result( - feed=feed_dict, fetch_list=[nce_loss])[0] + static_rlt = self.get_static_graph_result(feed=feed_dict, + fetch_list=[nce_loss])[0] with self.static_graph(): words = [] for i in range(window_size): words.append( - layers.data( - name='word_{0}'.format(i), shape=[None], dtype='int64')) - sample_weights = layers.fill_constant( - shape=[5, 1], dtype='float32', value=1) - emb = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False) + layers.data(name='word_{0}'.format(i), + shape=[None], + dtype='int64')) + sample_weights = layers.fill_constant(shape=[5, 1], + dtype='float32', + value=1) + emb = nn.Embedding(size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) embs2 = [] for i in range(window_size): @@ -1398,20 +1408,21 @@ class TestLayer(LayerTest): for i in range(len(words)): feed_dict['word_{0}'.format(i)] = inp_word[i] - static_rlt2 = self.get_static_graph_result( - feed=feed_dict, fetch_list=[nce_loss2])[0] + static_rlt2 = self.get_static_graph_result(feed=feed_dict, + fetch_list=[nce_loss2 + ])[0] with self.dynamic_graph(): with _test_eager_guard(): words = [] for i in range(window_size): words.append(base.to_variable(inp_word[i])) - sample_weights = layers.fill_constant( - shape=[5, 1], dtype='float32', value=1) - emb = nn.Embedding( - size=[dict_size, 32], - param_attr='eager_emb.w', - is_sparse=False) + sample_weights = layers.fill_constant(shape=[5, 1], + dtype='float32', + value=1) + emb = nn.Embedding(size=[dict_size, 32], + param_attr='eager_emb.w', + is_sparse=False) embs3 = [] for i in range(window_size): @@ -1421,8 +1432,9 @@ class TestLayer(LayerTest): emb_rlt = emb(words[i]) embs3.append(emb_rlt) - embs3 = layers.concat( - input=embs3, axis=fluid.dygraph.to_variable(np.array([1]))) + embs3 = layers.concat(input=embs3, + axis=fluid.dygraph.to_variable( + np.array([1]))) nce = nn.NCE(num_total_classes=dict_size, dim=embs3.shape[1], num_neg_samples=2, @@ -1440,10 +1452,12 @@ class TestLayer(LayerTest): words = [] for i in range(window_size): words.append(base.to_variable(inp_word[i])) - sample_weights = layers.fill_constant( - shape=[5, 1], dtype='float32', value=1) - emb = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False) + sample_weights = layers.fill_constant(shape=[5, 1], + dtype='float32', + value=1) + emb = nn.Embedding(size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) embs3 = [] for i in range(window_size): @@ -1453,8 +1467,8 @@ class TestLayer(LayerTest): emb_rlt = emb(words[i]) embs3.append(emb_rlt) - embs3 = layers.concat( - input=embs3, axis=fluid.dygraph.to_variable(np.array([1]))) + embs3 = layers.concat(input=embs3, + axis=fluid.dygraph.to_variable(np.array([1]))) nce = nn.NCE(num_total_classes=dict_size, dim=embs3.shape[1], num_neg_samples=2, @@ -1487,10 +1501,9 @@ class TestLayer(LayerTest): shape=fluid.dygraph.to_variable(np.array([5, 1])), dtype='float32', value=1) - emb = nn.Embedding( - size=[dict_size, 32], - param_attr='eager_emb.w', - is_sparse=False) + emb = nn.Embedding(size=[dict_size, 32], + param_attr='eager_emb.w', + is_sparse=False) embs3 = [] for i in range(window_size): @@ -1541,9 +1554,8 @@ class TestLayer(LayerTest): np.array_equal(nce1.bias.numpy(), nce2.bias.numpy())) custom_weight = np.random.randn(dict_size, 128).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) words = [] for i in range(window_size): words.append(base.to_variable(inp_word[i])) @@ -1551,8 +1563,9 @@ class TestLayer(LayerTest): shape=fluid.dygraph.to_variable(np.array([5, 1])), dtype='float32', value=1) - emb = nn.Embedding( - size=[dict_size, 32], param_attr='emb.w', is_sparse=False) + emb = nn.Embedding(size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) embs3 = [] for i in range(window_size): @@ -1592,25 +1605,24 @@ class TestLayer(LayerTest): nce2.bias.set_value(nce1.bias) nce1_loss = nce1(embs3, wl) nce2_loss = nce2(embs3, wl) - self.assertTrue( - np.array_equal(nce1_loss.numpy(), nce2_loss.numpy())) + self.assertTrue(np.array_equal(nce1_loss.numpy(), + nce2_loss.numpy())) nce2.weight = nce1.weight nce2.bias = nce1.bias self.assertTrue( np.array_equal(nce1.weight.numpy(), nce2.weight.numpy())) - self.assertTrue( - np.array_equal(nce1.bias.numpy(), nce2.bias.numpy())) + self.assertTrue(np.array_equal(nce1.bias.numpy(), + nce2.bias.numpy())) def test_one_hot(self): with self.dynamic_graph(): with _test_eager_guard(): - label = fluid.dygraph.to_variable( - np.array([[1], [1], [3], [0]])) + label = fluid.dygraph.to_variable(np.array([[1], [1], [3], + [0]])) one_hot_label1 = fluid.layers.one_hot(input=label, depth=4) one_hot_label2 = fluid.layers.one_hot( - input=label, - depth=fluid.dygraph.to_variable(np.array([4]))) + input=label, depth=fluid.dygraph.to_variable(np.array([4]))) self.assertTrue( np.array_equal(one_hot_label1.numpy(), one_hot_label2.numpy())) @@ -1627,19 +1639,19 @@ class TestLayer(LayerTest): with _test_eager_guard(): input = fluid.dygraph.to_variable(np.random.random((3, 8, 5))) x0, x1 = fluid.layers.split(input, num_or_sections=2, dim=1) - x00, x11 = fluid.layers.split( - input, - num_or_sections=2, - dim=fluid.dygraph.to_variable(np.array([1]))) + x00, x11 = fluid.layers.split(input, + num_or_sections=2, + dim=fluid.dygraph.to_variable( + np.array([1]))) self.assertTrue(np.array_equal(x0.numpy(), x00.numpy())) self.assertTrue(np.array_equal(x1.numpy(), x11.numpy())) input = fluid.dygraph.to_variable(np.random.random((3, 8, 5))) x0, x1 = fluid.layers.split(input, num_or_sections=2, dim=1) - x00, x11 = fluid.layers.split( - input, - num_or_sections=2, - dim=fluid.dygraph.to_variable(np.array([1]))) + x00, x11 = fluid.layers.split(input, + num_or_sections=2, + dim=fluid.dygraph.to_variable( + np.array([1]))) self.assertTrue(np.array_equal(x0.numpy(), x00.numpy())) self.assertTrue(np.array_equal(x1.numpy(), x11.numpy())) @@ -1653,8 +1665,8 @@ class TestLayer(LayerTest): self.assertTrue( np.array_equal(top5_values1.numpy(), top5_values2.numpy())) self.assertTrue( - np.array_equal(top5_indices1.numpy(), top5_indices2.numpy( - ))) + np.array_equal(top5_indices1.numpy(), + top5_indices2.numpy())) input = fluid.dygraph.to_variable(np.random.random((13, 11))) top5_values1, top5_indices1 = layers.topk(input, k=5) @@ -1667,22 +1679,22 @@ class TestLayer(LayerTest): def test_conv3d(self): with self.static_graph(): - images = layers.data( - name='pixel', shape=[3, 6, 6, 6], dtype='float32') + images = layers.data(name='pixel', + shape=[3, 6, 6, 6], + dtype='float32') ret = layers.conv3d(input=images, num_filters=3, filter_size=2) static_ret = self.get_static_graph_result( - feed={'pixel': np.ones( - [2, 3, 6, 6, 6], dtype='float32')}, + feed={'pixel': np.ones([2, 3, 6, 6, 6], dtype='float32')}, fetch_list=[ret])[0] with self.static_graph(): - images = layers.data( - name='pixel', shape=[3, 6, 6, 6], dtype='float32') + images = layers.data(name='pixel', + shape=[3, 6, 6, 6], + dtype='float32') conv3d = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2) ret = conv3d(images) static_ret2 = self.get_static_graph_result( - feed={'pixel': np.ones( - [2, 3, 6, 6, 6], dtype='float32')}, + feed={'pixel': np.ones([2, 3, 6, 6, 6], dtype='float32')}, fetch_list=[ret])[0] with self.dynamic_graph(): @@ -1708,13 +1720,13 @@ class TestLayer(LayerTest): weight_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) - conv3d1 = nn.Conv3D( - num_channels=3, num_filters=3, filter_size=2) - conv3d2 = nn.Conv3D( - num_channels=3, - num_filters=3, - filter_size=2, - param_attr=weight_attr) + conv3d1 = nn.Conv3D(num_channels=3, + num_filters=3, + filter_size=2) + conv3d2 = nn.Conv3D(num_channels=3, + num_filters=3, + filter_size=2, + param_attr=weight_attr) dy_ret1 = conv3d1(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images)) self.assertFalse( @@ -1730,8 +1742,8 @@ class TestLayer(LayerTest): conv3d1.bias.set_value(conv3d1_bias) dy_ret1 = conv3d1(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images)) - self.assertTrue( - np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) + self.assertTrue(np.array_equal(dy_ret1.numpy(), + dy_ret2.numpy())) conv3d2.weight = conv3d1.weight conv3d2.bias = conv3d1.bias @@ -1743,15 +1755,13 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 6, 6, 6], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2, 2).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) conv3d1 = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2) - conv3d2 = nn.Conv3D( - num_channels=3, - num_filters=3, - filter_size=2, - param_attr=weight_attr) + conv3d2 = nn.Conv3D(num_channels=3, + num_filters=3, + filter_size=2, + param_attr=weight_attr) dy_ret1 = conv3d1(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images)) self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) @@ -1783,37 +1793,37 @@ class TestLayer(LayerTest): place = core.CPUPlace() with self.static_graph(): - x = layers.data( - name='X', - shape=[3, 5], - dtype='float32', - lod_level=1, - append_batch_size=False) + x = layers.data(name='X', + shape=[3, 5], + dtype='float32', + lod_level=1, + append_batch_size=False) ret = layers.row_conv(input=x, future_context_size=2) - static_ret = self.get_static_graph_result( - feed={ - 'X': fluid.create_lod_tensor( - data=input, recursive_seq_lens=[[1, 1, 1]], place=place) - }, - fetch_list=[ret], - with_lod=True)[0] + static_ret = self.get_static_graph_result(feed={ + 'X': + fluid.create_lod_tensor(data=input, + recursive_seq_lens=[[1, 1, 1]], + place=place) + }, + fetch_list=[ret], + with_lod=True)[0] with self.static_graph(): - x = layers.data( - name='X', - shape=[3, 5], - dtype='float32', - lod_level=1, - append_batch_size=False) + x = layers.data(name='X', + shape=[3, 5], + dtype='float32', + lod_level=1, + append_batch_size=False) rowConv = nn.RowConv('RowConv', future_context_size=2) ret = rowConv(x) - static_ret2 = self.get_static_graph_result( - feed={ - 'X': fluid.create_lod_tensor( - data=input, recursive_seq_lens=[[1, 1, 1]], place=place) - }, - fetch_list=[ret], - with_lod=True)[0] + static_ret2 = self.get_static_graph_result(feed={ + 'X': + fluid.create_lod_tensor(data=input, + recursive_seq_lens=[[1, 1, 1]], + place=place) + }, + fetch_list=[ret], + with_lod=True)[0] # TODO: dygraph can't support LODTensor @@ -1830,54 +1840,51 @@ class TestLayer(LayerTest): input = np.random.random(shape).astype('float32') with self.static_graph(): - X = fluid.layers.data( - name='X', - shape=shape, - dtype='float32', - lod_level=1, - append_batch_size=False) + X = fluid.layers.data(name='X', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) ret = layers.group_norm( input=X, groups=2, - param_attr=fluid.initializer.Uniform( - low=-0.5, high=0.5), + param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), bias_attr=fluid.initializer.ConstantInitializer(value=1)) - static_ret = self.get_static_graph_result( - feed={ - 'X': fluid.create_lod_tensor( - data=input, recursive_seq_lens=[[1, 1]], place=place) - }, - fetch_list=[ret], - with_lod=True)[0] + static_ret = self.get_static_graph_result(feed={ + 'X': + fluid.create_lod_tensor(data=input, + recursive_seq_lens=[[1, 1]], + place=place) + }, + fetch_list=[ret], + with_lod=True)[0] with self.static_graph(): - X = fluid.layers.data( - name='X', - shape=shape, - dtype='float32', - lod_level=1, - append_batch_size=False) + X = fluid.layers.data(name='X', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) groupNorm = nn.GroupNorm( channels=shape[1], groups=2, - param_attr=fluid.initializer.Uniform( - low=-0.5, high=0.5), + param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), bias_attr=fluid.initializer.ConstantInitializer(value=1)) ret = groupNorm(X) - static_ret2 = self.get_static_graph_result( - feed={ - 'X': fluid.create_lod_tensor( - data=input, recursive_seq_lens=[[1, 1]], place=place) - }, - fetch_list=[ret], - with_lod=True)[0] + static_ret2 = self.get_static_graph_result(feed={ + 'X': + fluid.create_lod_tensor(data=input, + recursive_seq_lens=[[1, 1]], + place=place) + }, + fetch_list=[ret], + with_lod=True)[0] with self.dynamic_graph(): groupNorm = nn.GroupNorm( channels=shape[1], groups=2, - param_attr=fluid.initializer.Uniform( - low=-0.5, high=0.5), + param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), bias_attr=fluid.initializer.ConstantInitializer(value=1)) dy_ret = groupNorm(base.to_variable(input)) dy_rlt_value = dy_ret.numpy() @@ -1901,19 +1908,23 @@ class TestLayer(LayerTest): input = np.random.random(shape).astype('float32') with self.static_graph(): - X = fluid.layers.data( - name='X', shape=shape, dtype='float32', append_batch_size=False) + X = fluid.layers.data(name='X', + shape=shape, + dtype='float32', + append_batch_size=False) ret = layers.instance_norm(input=X) - static_ret = self.get_static_graph_result( - feed={'X': input}, fetch_list=[ret])[0] + static_ret = self.get_static_graph_result(feed={'X': input}, + fetch_list=[ret])[0] with self.static_graph(): - X = fluid.layers.data( - name='X', shape=shape, dtype='float32', append_batch_size=False) + X = fluid.layers.data(name='X', + shape=shape, + dtype='float32', + append_batch_size=False) instanceNorm = nn.InstanceNorm(num_channels=shape[1]) ret = instanceNorm(X) - static_ret2 = self.get_static_graph_result( - feed={'X': input}, fetch_list=[ret])[0] + static_ret2 = self.get_static_graph_result(feed={'X': input}, + fetch_list=[ret])[0] with self.dynamic_graph(): with _test_eager_guard(): @@ -1968,37 +1979,37 @@ class TestLayer(LayerTest): input = np.random.random(shape).astype('float32') with self.static_graph(): - Weight = fluid.layers.data( - name='Weight', - shape=shape, - dtype='float32', - lod_level=1, - append_batch_size=False) + Weight = fluid.layers.data(name='Weight', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) ret = layers.spectral_norm(weight=Weight, dim=1, power_iters=2) - static_ret = self.get_static_graph_result( - feed={ - 'Weight': fluid.create_lod_tensor( - data=input, recursive_seq_lens=[[1, 1]], place=place), - }, - fetch_list=[ret], - with_lod=True)[0] + static_ret = self.get_static_graph_result(feed={ + 'Weight': + fluid.create_lod_tensor(data=input, + recursive_seq_lens=[[1, 1]], + place=place), + }, + fetch_list=[ret], + with_lod=True)[0] with self.static_graph(): - Weight = fluid.layers.data( - name='Weight', - shape=shape, - dtype='float32', - lod_level=1, - append_batch_size=False) + Weight = fluid.layers.data(name='Weight', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) spectralNorm = nn.SpectralNorm(shape, dim=1, power_iters=2) ret = spectralNorm(Weight) - static_ret2 = self.get_static_graph_result( - feed={ - 'Weight': fluid.create_lod_tensor( - data=input, recursive_seq_lens=[[1, 1]], place=place) - }, - fetch_list=[ret], - with_lod=True)[0] + static_ret2 = self.get_static_graph_result(feed={ + 'Weight': + fluid.create_lod_tensor(data=input, + recursive_seq_lens=[[1, 1]], + place=place) + }, + fetch_list=[ret], + with_lod=True)[0] with self.dynamic_graph(): with _test_eager_guard(): @@ -2024,70 +2035,77 @@ class TestLayer(LayerTest): adj = np.tile(adj, (1, 1, 1)) vectors = np.random.random((1, 10, 5)).astype('float32') with self.static_graph(): - NodesVector = fluid.layers.data( - name='NodesVector', - shape=(1, 10, 5), - dtype='float32', - lod_level=1, - append_batch_size=False) - EdgeSet = fluid.layers.data( - name='EdgeSet', - shape=(1, 9, 2), - dtype='int32', - lod_level=1, - append_batch_size=False) - ret = fluid.contrib.layers.tree_conv( - nodes_vector=NodesVector, - edge_set=EdgeSet, - output_size=6, - num_filters=1, - max_depth=2) - static_ret = self.get_static_graph_result( - feed={ - 'NodesVector': fluid.create_lod_tensor( - data=vectors, recursive_seq_lens=[[1]], place=place), - 'EdgeSet': fluid.create_lod_tensor( - data=adj, recursive_seq_lens=[[1]], place=place) - }, - fetch_list=[ret], - with_lod=False)[0] + NodesVector = fluid.layers.data(name='NodesVector', + shape=(1, 10, 5), + dtype='float32', + lod_level=1, + append_batch_size=False) + EdgeSet = fluid.layers.data(name='EdgeSet', + shape=(1, 9, 2), + dtype='int32', + lod_level=1, + append_batch_size=False) + ret = fluid.contrib.layers.tree_conv(nodes_vector=NodesVector, + edge_set=EdgeSet, + output_size=6, + num_filters=1, + max_depth=2) + static_ret = self.get_static_graph_result(feed={ + 'NodesVector': + fluid.create_lod_tensor(data=vectors, + recursive_seq_lens=[[1]], + place=place), + 'EdgeSet': + fluid.create_lod_tensor(data=adj, + recursive_seq_lens=[[1]], + place=place) + }, + fetch_list=[ret], + with_lod=False)[0] with self.static_graph(): - NodesVector = fluid.layers.data( - name='NodesVector', - shape=(1, 10, 5), - dtype='float32', - lod_level=1, - append_batch_size=False) - EdgeSet = fluid.layers.data( - name='EdgeSet', - shape=(1, 9, 2), - dtype='int32', - lod_level=1, - append_batch_size=False) - treeConv = nn.TreeConv( - feature_size=5, output_size=6, num_filters=1, max_depth=2) + NodesVector = fluid.layers.data(name='NodesVector', + shape=(1, 10, 5), + dtype='float32', + lod_level=1, + append_batch_size=False) + EdgeSet = fluid.layers.data(name='EdgeSet', + shape=(1, 9, 2), + dtype='int32', + lod_level=1, + append_batch_size=False) + treeConv = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2) ret = treeConv(NodesVector, EdgeSet) - static_ret2 = self.get_static_graph_result( - feed={ - 'NodesVector': fluid.create_lod_tensor( - data=vectors, recursive_seq_lens=[[1]], place=place), - 'EdgeSet': fluid.create_lod_tensor( - data=adj, recursive_seq_lens=[[1]], place=place) - }, - fetch_list=[ret], - with_lod=False)[0] + static_ret2 = self.get_static_graph_result(feed={ + 'NodesVector': + fluid.create_lod_tensor(data=vectors, + recursive_seq_lens=[[1]], + place=place), + 'EdgeSet': + fluid.create_lod_tensor(data=adj, + recursive_seq_lens=[[1]], + place=place) + }, + fetch_list=[ret], + with_lod=False)[0] with self.dynamic_graph(): with _test_eager_guard(): - treeConv = nn.TreeConv( - feature_size=5, output_size=6, num_filters=1, max_depth=2) - dy_eager_ret = treeConv( - base.to_variable(vectors), base.to_variable(adj)) + treeConv = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2) + dy_eager_ret = treeConv(base.to_variable(vectors), + base.to_variable(adj)) dy_eager_rlt_value = dy_eager_ret.numpy() - treeConv = nn.TreeConv( - feature_size=5, output_size=6, num_filters=1, max_depth=2) + treeConv = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2) dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) dy_rlt_value = dy_ret.numpy() @@ -2101,33 +2119,31 @@ class TestLayer(LayerTest): weight_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) - treeConv1 = nn.TreeConv( - feature_size=5, - output_size=6, - num_filters=1, - max_depth=2, - bias_attr='eager_tc1_b') - treeConv2 = nn.TreeConv( - feature_size=5, - output_size=6, - num_filters=1, - max_depth=2, - param_attr=weight_attr, - bias_attr='eager_tc2_b') - dy_ret1 = treeConv1( - base.to_variable(vectors), base.to_variable(adj)) - dy_ret2 = treeConv2( - base.to_variable(vectors), base.to_variable(adj)) + treeConv1 = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2, + bias_attr='eager_tc1_b') + treeConv2 = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2, + param_attr=weight_attr, + bias_attr='eager_tc2_b') + dy_ret1 = treeConv1(base.to_variable(vectors), + base.to_variable(adj)) + dy_ret2 = treeConv2(base.to_variable(vectors), + base.to_variable(adj)) self.assertFalse( np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) treeConv2.weight.set_value(treeConv1.weight.numpy()) treeConv2.bias.set_value(treeConv1.bias) - dy_ret1 = treeConv1( - base.to_variable(vectors), base.to_variable(adj)) - dy_ret2 = treeConv2( - base.to_variable(vectors), base.to_variable(adj)) - self.assertTrue( - np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) + dy_ret1 = treeConv1(base.to_variable(vectors), + base.to_variable(adj)) + dy_ret2 = treeConv2(base.to_variable(vectors), + base.to_variable(adj)) + self.assertTrue(np.array_equal(dy_ret1.numpy(), + dy_ret2.numpy())) treeConv2.weight = treeConv1.weight treeConv2.bias = treeConv1.bias @@ -2139,33 +2155,30 @@ class TestLayer(LayerTest): treeConv2.bias.numpy())) custom_weight = np.random.randn(5, 3, 6, 1).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) - treeConv1 = nn.TreeConv( - feature_size=5, - output_size=6, - num_filters=1, - max_depth=2, - bias_attr='tc1_b') - treeConv2 = nn.TreeConv( - feature_size=5, - output_size=6, - num_filters=1, - max_depth=2, - param_attr=weight_attr, - bias_attr='tc2_b') - dy_ret1 = treeConv1( - base.to_variable(vectors), base.to_variable(adj)) - dy_ret2 = treeConv2( - base.to_variable(vectors), base.to_variable(adj)) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) + treeConv1 = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2, + bias_attr='tc1_b') + treeConv2 = nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2, + param_attr=weight_attr, + bias_attr='tc2_b') + dy_ret1 = treeConv1(base.to_variable(vectors), + base.to_variable(adj)) + dy_ret2 = treeConv2(base.to_variable(vectors), + base.to_variable(adj)) self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) treeConv2.weight.set_value(treeConv1.weight.numpy()) treeConv2.bias.set_value(treeConv1.bias) - dy_ret1 = treeConv1( - base.to_variable(vectors), base.to_variable(adj)) - dy_ret2 = treeConv2( - base.to_variable(vectors), base.to_variable(adj)) + dy_ret1 = treeConv1(base.to_variable(vectors), + base.to_variable(adj)) + dy_ret2 = treeConv2(base.to_variable(vectors), + base.to_variable(adj)) self.assertTrue(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) treeConv2.weight = treeConv1.weight @@ -2177,34 +2190,39 @@ class TestLayer(LayerTest): np.array_equal(treeConv1.bias.numpy(), treeConv2.bias.numpy())) def test_conv3d_transpose(self): - input_array = np.arange(0, 48).reshape( - [2, 3, 2, 2, 2]).astype('float32') + input_array = np.arange(0, 48).reshape([2, 3, 2, 2, + 2]).astype('float32') with self.static_graph(): img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') - out = layers.conv3d_transpose( - input=img, num_filters=12, filter_size=12, use_cudnn=False) + out = layers.conv3d_transpose(input=img, + num_filters=12, + filter_size=12, + use_cudnn=False) static_rlt = self.get_static_graph_result( feed={'pixel': input_array}, fetch_list=[out])[0] with self.static_graph(): img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') - conv3d_transpose = nn.Conv3DTranspose( - num_channels=3, num_filters=12, filter_size=12, use_cudnn=False) + conv3d_transpose = nn.Conv3DTranspose(num_channels=3, + num_filters=12, + filter_size=12, + use_cudnn=False) out = conv3d_transpose(img) static_rlt2 = self.get_static_graph_result( feed={'pixel': input_array}, fetch_list=[out])[0] with self.dynamic_graph(): with _test_eager_guard(): - conv3d_transpose = nn.Conv3DTranspose( - num_channels=3, - num_filters=12, - filter_size=12, - use_cudnn=False) + conv3d_transpose = nn.Conv3DTranspose(num_channels=3, + num_filters=12, + filter_size=12, + use_cudnn=False) dy_eager_rlt = conv3d_transpose(base.to_variable(input_array)) dy_eager_rlt_value = dy_eager_rlt.numpy() - conv3d_transpose = nn.Conv3DTranspose( - num_channels=3, num_filters=12, filter_size=12, use_cudnn=False) + conv3d_transpose = nn.Conv3DTranspose(num_channels=3, + num_filters=12, + filter_size=12, + use_cudnn=False) dy_rlt = conv3d_transpose(base.to_variable(input_array)) dy_rlt_value = dy_rlt.numpy() self.assertTrue(np.allclose(static_rlt2, static_rlt)) @@ -2218,19 +2236,17 @@ class TestLayer(LayerTest): weight_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( custom_weight)) - conv3d1 = nn.Conv3DTranspose( - num_channels=3, - num_filters=3, - filter_size=2, - bias_attr='eager_conv3d1_b', - use_cudnn=False) - conv3d2 = nn.Conv3DTranspose( - num_channels=3, - num_filters=3, - filter_size=2, - param_attr=weight_attr, - bias_attr='eager_conv3d2_b', - use_cudnn=False) + conv3d1 = nn.Conv3DTranspose(num_channels=3, + num_filters=3, + filter_size=2, + bias_attr='eager_conv3d1_b', + use_cudnn=False) + conv3d2 = nn.Conv3DTranspose(num_channels=3, + num_filters=3, + filter_size=2, + param_attr=weight_attr, + bias_attr='eager_conv3d2_b', + use_cudnn=False) dy_ret1 = conv3d1(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images)) self.assertFalse( @@ -2246,8 +2262,8 @@ class TestLayer(LayerTest): conv3d1.bias.set_value(conv3d1_bias) dy_ret1 = conv3d1(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images)) - self.assertTrue( - np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) + self.assertTrue(np.array_equal(dy_ret1.numpy(), + dy_ret2.numpy())) conv3d2.weight = conv3d1.weight conv3d2.bias = conv3d1.bias @@ -2259,22 +2275,19 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 6, 6, 6], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2, 2).astype("float32") - weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) - conv3d1 = nn.Conv3DTranspose( - num_channels=3, - num_filters=3, - filter_size=2, - bias_attr='conv3d1_b', - use_cudnn=False) - conv3d2 = nn.Conv3DTranspose( - num_channels=3, - num_filters=3, - filter_size=2, - param_attr=weight_attr, - bias_attr='conv3d2_b', - use_cudnn=False) + weight_attr = fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(custom_weight)) + conv3d1 = nn.Conv3DTranspose(num_channels=3, + num_filters=3, + filter_size=2, + bias_attr='conv3d1_b', + use_cudnn=False) + conv3d2 = nn.Conv3DTranspose(num_channels=3, + num_filters=3, + filter_size=2, + param_attr=weight_attr, + bias_attr='conv3d2_b', + use_cudnn=False) dy_ret1 = conv3d1(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images)) self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) @@ -2400,9 +2413,11 @@ class TestLayer(LayerTest): a = layers.data(name='a', shape=[1], dtype='int64') b = layers.data(name='b', shape=[1], dtype='int64') cond = layers.less_than(x=a, y=b) - static_ret = self.get_static_graph_result( - feed={"a": value_a, - "b": value_b}, fetch_list=[cond])[0] + static_ret = self.get_static_graph_result(feed={ + "a": value_a, + "b": value_b + }, + fetch_list=[cond])[0] with self.dynamic_graph(): with _test_eager_guard(): da = base.to_variable(value_a) @@ -2424,9 +2439,11 @@ class TestLayer(LayerTest): a1 = layers.data(name='a1', shape=[1], dtype='int64') b1 = layers.data(name='b1', shape=[1], dtype='int64') cond1 = layers.less_equal(x=a1, y=b1) - static_ret1 = self.get_static_graph_result( - feed={"a1": value_a, - "b1": value_b}, fetch_list=[cond1])[0] + static_ret1 = self.get_static_graph_result(feed={ + "a1": value_a, + "b1": value_b + }, + fetch_list=[cond1])[0] with self.dynamic_graph(): with _test_eager_guard(): da1 = base.to_variable(value_a) @@ -2448,9 +2465,11 @@ class TestLayer(LayerTest): a2 = layers.data(name='a2', shape=[1], dtype='int64') b2 = layers.data(name='b2', shape=[1], dtype='int64') cond2 = layers.greater_than(x=a2, y=b2) - static_ret2 = self.get_static_graph_result( - feed={"a2": value_a, - "b2": value_b}, fetch_list=[cond2])[0] + static_ret2 = self.get_static_graph_result(feed={ + "a2": value_a, + "b2": value_b + }, + fetch_list=[cond2])[0] with self.dynamic_graph(): with _test_eager_guard(): da2 = base.to_variable(value_a) @@ -2472,9 +2491,11 @@ class TestLayer(LayerTest): a3 = layers.data(name='a3', shape=[1], dtype='int64') b3 = layers.data(name='b3', shape=[1], dtype='int64') cond3 = layers.greater_equal(x=a3, y=b3) - static_ret3 = self.get_static_graph_result( - feed={"a3": value_a, - "b3": value_b}, fetch_list=[cond3])[0] + static_ret3 = self.get_static_graph_result(feed={ + "a3": value_a, + "b3": value_b + }, + fetch_list=[cond3])[0] with self.dynamic_graph(): with _test_eager_guard(): da3 = base.to_variable(value_a) @@ -2496,9 +2517,11 @@ class TestLayer(LayerTest): a4 = layers.data(name='a4', shape=[1], dtype='int64') b4 = layers.data(name='b4', shape=[1], dtype='int64') cond4 = layers.equal(x=a4, y=b4) - static_ret4 = self.get_static_graph_result( - feed={"a4": value_a, - "b4": value_b}, fetch_list=[cond4])[0] + static_ret4 = self.get_static_graph_result(feed={ + "a4": value_a, + "b4": value_b + }, + fetch_list=[cond4])[0] with self.dynamic_graph(): with _test_eager_guard(): da4 = base.to_variable(value_a) @@ -2520,9 +2543,11 @@ class TestLayer(LayerTest): a5 = layers.data(name='a5', shape=[1], dtype='int64') b5 = layers.data(name='b5', shape=[1], dtype='int64') cond5 = layers.equal(x=a5, y=b5) - static_ret5 = self.get_static_graph_result( - feed={"a5": value_a, - "b5": value_b}, fetch_list=[cond5])[0] + static_ret5 = self.get_static_graph_result(feed={ + "a5": value_a, + "b5": value_b + }, + fetch_list=[cond5])[0] with self.dynamic_graph(): with _test_eager_guard(): da5 = base.to_variable(value_a) @@ -2540,6 +2565,7 @@ class TestLayer(LayerTest): self.assertTrue(dcond5.numpy()[i] == static_ret5[i]) def test_cond(self): + def less_than_branch(a, b): return fluid.layers.elementwise_add(a, b) @@ -2547,14 +2573,16 @@ class TestLayer(LayerTest): return fluid.layers.elementwise_sub(a, b) with self.static_graph(): - a = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=0.1) - b = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=0.23) + a = fluid.layers.fill_constant(shape=[1], + dtype='float32', + value=0.1) + b = fluid.layers.fill_constant(shape=[1], + dtype='float32', + value=0.23) out = fluid.layers.cond(a >= b, lambda: greater_equal_branch(a, b), lambda: less_than_branch(a, b)) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(fetch_list=[out]) static_res = ret[0] @@ -2595,6 +2623,7 @@ class TestLayer(LayerTest): self.assertTrue(np.array_equal(static_res, eager_dynamic_res)) def test_case(self): + def fn_1(): return layers.fill_constant(shape=[1, 2], dtype='float32', value=1) @@ -2613,12 +2642,12 @@ class TestLayer(LayerTest): pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1 pred_3 = layers.equal(x, y) # false: 0.3 == 0.1 - out_1 = layers.case( - pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3) + out_1 = layers.case(pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], + default=fn_3) out_2 = layers.case(pred_fn_pairs=[(pred_2, fn_2), (pred_3, fn_3)]) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) static_res1, static_res2 = exe.run(fetch_list=[out_1, out_2]) @@ -2632,11 +2661,11 @@ class TestLayer(LayerTest): pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1 pred_3 = layers.equal(x, y) # false: 0.3 == 0.1 - out_1 = layers.case( - pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], - default=fn_3) - out_2 = layers.case( - pred_fn_pairs=[(pred_2, fn_2), (pred_3, fn_3)]) + out_1 = layers.case(pred_fn_pairs=[(pred_1, fn_1), + (pred_2, fn_2)], + default=fn_3) + out_2 = layers.case(pred_fn_pairs=[(pred_2, fn_2), (pred_3, + fn_3)]) eager_dynamic_res1 = out_1.numpy() eager_dynamic_res2 = out_2.numpy() @@ -2648,8 +2677,8 @@ class TestLayer(LayerTest): pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1 pred_3 = layers.equal(x, y) # false: 0.3 == 0.1 - out_1 = layers.case( - pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3) + out_1 = layers.case(pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], + default=fn_3) out_2 = layers.case(pred_fn_pairs=[(pred_2, fn_2), (pred_3, fn_3)]) dynamic_res1 = out_1.numpy() dynamic_res2 = out_2.numpy() @@ -2660,6 +2689,7 @@ class TestLayer(LayerTest): self.assertTrue(np.array_equal(static_res2, eager_dynamic_res2)) def test_switch_case(self): + def fn_1(): return layers.fill_constant(shape=[1, 2], dtype='float32', value=1) @@ -2673,44 +2703,46 @@ class TestLayer(LayerTest): index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1) index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2) - out_1 = layers.switch_case( - branch_index=index_1, - branch_fns={1: fn_1, - 2: fn_2}, - default=fn_3) - out_2 = layers.switch_case( - branch_index=index_2, - branch_fns=[(1, fn_1), (2, fn_2)], - default=fn_3) - out_3 = layers.switch_case( - branch_index=index_2, - branch_fns=[(0, fn_1), (4, fn_2), (7, fn_3)]) - - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + out_1 = layers.switch_case(branch_index=index_1, + branch_fns={ + 1: fn_1, + 2: fn_2 + }, + default=fn_3) + out_2 = layers.switch_case(branch_index=index_2, + branch_fns=[(1, fn_1), (2, fn_2)], + default=fn_3) + out_3 = layers.switch_case(branch_index=index_2, + branch_fns=[(0, fn_1), (4, fn_2), + (7, fn_3)]) + + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) static_res1, static_res2, static_res3 = exe.run( fetch_list=[out_1, out_2, out_3]) with self.dynamic_graph(): with _test_eager_guard(): - index_1 = layers.fill_constant( - shape=[1], dtype='int32', value=1) - index_2 = layers.fill_constant( - shape=[1], dtype='int32', value=2) - - out_1 = layers.switch_case( - branch_index=index_1, - branch_fns={1: fn_1, - 2: fn_2}, - default=fn_3) - out_2 = layers.switch_case( - branch_index=index_2, - branch_fns=[(1, fn_1), (2, fn_2)], - default=fn_3) - out_3 = layers.switch_case( - branch_index=index_2, - branch_fns=[(0, fn_1), (4, fn_2), (7, fn_3)]) + index_1 = layers.fill_constant(shape=[1], + dtype='int32', + value=1) + index_2 = layers.fill_constant(shape=[1], + dtype='int32', + value=2) + + out_1 = layers.switch_case(branch_index=index_1, + branch_fns={ + 1: fn_1, + 2: fn_2 + }, + default=fn_3) + out_2 = layers.switch_case(branch_index=index_2, + branch_fns=[(1, fn_1), (2, fn_2)], + default=fn_3) + out_3 = layers.switch_case(branch_index=index_2, + branch_fns=[(0, fn_1), (4, fn_2), + (7, fn_3)]) eager_dynamic_res1 = out_1.numpy() eager_dynamic_res2 = out_2.numpy() @@ -2719,18 +2751,18 @@ class TestLayer(LayerTest): index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1) index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2) - out_1 = layers.switch_case( - branch_index=index_1, - branch_fns={1: fn_1, - 2: fn_2}, - default=fn_3) - out_2 = layers.switch_case( - branch_index=index_2, - branch_fns=[(1, fn_1), (2, fn_2)], - default=fn_3) - out_3 = layers.switch_case( - branch_index=index_2, - branch_fns=[(0, fn_1), (4, fn_2), (7, fn_3)]) + out_1 = layers.switch_case(branch_index=index_1, + branch_fns={ + 1: fn_1, + 2: fn_2 + }, + default=fn_3) + out_2 = layers.switch_case(branch_index=index_2, + branch_fns=[(1, fn_1), (2, fn_2)], + default=fn_3) + out_3 = layers.switch_case(branch_index=index_2, + branch_fns=[(0, fn_1), (4, fn_2), + (7, fn_3)]) dynamic_res1 = out_1.numpy() dynamic_res2 = out_2.numpy() @@ -2747,25 +2779,32 @@ class TestLayer(LayerTest): with self.static_graph(): x = fluid.layers.data(name="x1", shape=[6, 5, 8]) - dim1 = fluid.layers.data( - name="dim1", shape=[1], append_batch_size=False) - dim2 = fluid.layers.data( - name="dim2", shape=[1], append_batch_size=False) + dim1 = fluid.layers.data(name="dim1", + shape=[1], + append_batch_size=False) + dim2 = fluid.layers.data(name="dim2", + shape=[1], + append_batch_size=False) crop_shape1 = (1, 2, 4, 4) - crop_shape2 = fluid.layers.data( - name="crop_shape", shape=[4], append_batch_size=False) + crop_shape2 = fluid.layers.data(name="crop_shape", + shape=[4], + append_batch_size=False) crop_shape3 = [-1, dim1, dim2, 4] crop_offsets1 = [0, 0, 1, 0] - crop_offsets2 = fluid.layers.data( - name="crop_offset", shape=[4], append_batch_size=False) + crop_offsets2 = fluid.layers.data(name="crop_offset", + shape=[4], + append_batch_size=False) crop_offsets3 = [0, dim1, dim2, 0] - out1 = fluid.layers.crop_tensor( - x, shape=crop_shape1, offsets=crop_offsets1) - out2 = fluid.layers.crop_tensor( - x, shape=crop_shape2, offsets=crop_offsets2) - out3 = fluid.layers.crop_tensor( - x, shape=crop_shape3, offsets=crop_offsets3) + out1 = fluid.layers.crop_tensor(x, + shape=crop_shape1, + offsets=crop_offsets1) + out2 = fluid.layers.crop_tensor(x, + shape=crop_shape2, + offsets=crop_offsets2) + out3 = fluid.layers.crop_tensor(x, + shape=crop_shape3, + offsets=crop_offsets3) self.assertIsNotNone(out1) self.assertIsNotNone(out2) @@ -2774,8 +2813,10 @@ class TestLayer(LayerTest): def test_shard_index(self): with self.static_graph(): x = fluid.layers.data(name="label", shape=[4, 1], dtype='int64') - shard_label = fluid.layers.shard_index( - input=x, index_num=20, nshards=2, shard_id=0) + shard_label = fluid.layers.shard_index(input=x, + index_num=20, + nshards=2, + shard_id=0) self.assertIsNotNone(shard_label) @@ -2794,8 +2835,10 @@ class TestLayer(LayerTest): exe.run(fluid.default_startup_program()) # x = np.random.rand(3, 32, 32).astype("float32") # y = np.array([[1], [0], [1]]) - static_out = exe.run(feed={"input": x, - "label": y}, + static_out = exe.run(feed={ + "input": x, + "label": y + }, fetch_list=result[0]) with self.dynamic_graph(force_to_use_cpu=True): @@ -2809,6 +2852,7 @@ class TestLayer(LayerTest): class TestBook(LayerTest): + def setUp(self): self.only_static_set = set({"make_word_embedding"}) self.not_compare_static_dygraph_set = set({ @@ -2856,8 +2900,10 @@ class TestBook(LayerTest): if method.__name__ in self.all_close_compare: self.assertTrue( - np.allclose( - static_result[0], dy_result_value, atol=0, rtol=1e-05), + np.allclose(static_result[0], + dy_result_value, + atol=0, + rtol=1e-05), "Result of function [{}] compare failed".format( method.__name__)) continue @@ -2894,19 +2940,18 @@ class TestBook(LayerTest): set_feed_dict=True, append_batch_size=True): if base.enabled(): - return base.to_variable( - value=self._get_np_data(shape, dtype, append_batch_size), - name=name, - zero_copy=False) + return base.to_variable(value=self._get_np_data( + shape, dtype, append_batch_size), + name=name, + zero_copy=False) else: if set_feed_dict: - self._feed_dict[name] = self._get_np_data(shape, dtype, - append_batch_size) - return layers.data( - name=name, - shape=shape, - dtype=dtype, - append_batch_size=append_batch_size) + self._feed_dict[name] = self._get_np_data( + shape, dtype, append_batch_size) + return layers.data(name=name, + shape=shape, + dtype=dtype, + append_batch_size=append_batch_size) def make_sampled_softmax_with_cross_entropy(self): with program_guard(fluid.default_main_program(), @@ -2914,14 +2959,13 @@ class TestBook(LayerTest): logits = self._get_data(name='Logits', shape=[256], dtype='float32') label = self._get_data(name='Label', shape=[1], dtype='int64') num_samples = 25 - output = layers.sampled_softmax_with_cross_entropy(logits, label, - num_samples) + output = layers.sampled_softmax_with_cross_entropy( + logits, label, num_samples) return (output) def make_fit_a_line(self): - with program_guard( - fluid.default_main_program(), - startup_program=fluid.default_startup_program()): + with program_guard(fluid.default_main_program(), + startup_program=fluid.default_startup_program()): x = self._get_data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) y = self._get_data(name='y', shape=[1], dtype='float32') @@ -2949,29 +2993,29 @@ class TestBook(LayerTest): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): img = self._get_data(name='pixel', shape=[3, 2, 2], dtype='float32') - return layers.conv2d_transpose( - input=img, num_filters=10, output_size=28) + return layers.conv2d_transpose(input=img, + num_filters=10, + output_size=28) def make_recognize_digits_conv(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - images = self._get_data( - name='pixel', shape=[1, 28, 28], dtype='float32') + images = self._get_data(name='pixel', + shape=[1, 28, 28], + dtype='float32') label = self._get_data(name='label', shape=[1], dtype='int64') - conv_pool_1 = nets.simple_img_conv_pool( - input=images, - filter_size=5, - num_filters=2, - pool_size=2, - pool_stride=2, - act="relu") - conv_pool_2 = nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=4, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = nets.simple_img_conv_pool(input=images, + filter_size=5, + num_filters=2, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=4, + pool_size=2, + pool_stride=2, + act="relu") predict = layers.fc(input=conv_pool_2, size=10, act="softmax") cost = layers.cross_entropy(input=predict, label=label) @@ -2984,33 +3028,30 @@ class TestBook(LayerTest): dict_size = 10000 embed_size = 32 first_word = self._get_data(name='firstw', shape=[1], dtype='int64') - second_word = self._get_data( - name='secondw', shape=[1], dtype='int64') + second_word = self._get_data(name='secondw', + shape=[1], + dtype='int64') third_word = self._get_data(name='thirdw', shape=[1], dtype='int64') forth_word = self._get_data(name='forthw', shape=[1], dtype='int64') next_word = self._get_data(name='nextw', shape=[1], dtype='int64') - embed_first = layers.embedding( - input=first_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w') - embed_second = layers.embedding( - input=second_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w') - - embed_third = layers.embedding( - input=third_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w') - embed_forth = layers.embedding( - input=forth_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w') + embed_first = layers.embedding(input=first_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + embed_second = layers.embedding(input=second_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + + embed_third = layers.embedding(input=third_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + embed_forth = layers.embedding(input=forth_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') concat_embed = layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], @@ -3045,43 +3086,48 @@ class TestBook(LayerTest): with program_guard(program2): x2 = self._get_data(name='x2', shape=[4, 8], dtype='float32') y2 = self._get_data(name='y2', shape=[4], dtype='int64') - path_table = self._get_data( - name='path_table', shape=[4, 6], dtype='int64') - path_code = self._get_data( - name='path_code', shape=[4, 6], dtype='int64') - return (layers.hsigmoid( - input=x2, - label=y2, - num_classes=6, - path_table=path_table, - path_code=path_code, - is_custom=True)) + path_table = self._get_data(name='path_table', + shape=[4, 6], + dtype='int64') + path_code = self._get_data(name='path_code', + shape=[4, 6], + dtype='int64') + return (layers.hsigmoid(input=x2, + label=y2, + num_classes=6, + path_table=path_table, + path_code=path_code, + is_custom=True)) def make_pool2d(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): x = self._get_data(name='x', shape=[3, 224, 224], dtype='float32') - return (layers.pool2d( - x, pool_size=[5, 3], pool_stride=[1, 2], pool_padding=(2, 1))) + return (layers.pool2d(x, + pool_size=[5, 3], + pool_stride=[1, 2], + pool_padding=(2, 1))) def make_pool2d_infershape(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): theta = self._get_data("theta", shape=[2, 3], dtype='float32') x = fluid.layers.affine_grid(theta, out_shape=[2, 3, 244, 244]) - return (layers.pool2d( - x, pool_size=[5, 3], pool_stride=[1, 2], pool_padding=(2, 1))) + return (layers.pool2d(x, + pool_size=[5, 3], + pool_stride=[1, 2], + pool_padding=(2, 1))) def make_pool3d(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name='x', shape=[3, 244, 244, 244], dtype='float32') - return (layers.pool3d( - x, - pool_size=[5, 3, 2], - pool_stride=[1, 2, 3], - pool_padding=(2, 1, 1))) + x = self._get_data(name='x', + shape=[3, 244, 244, 244], + dtype='float32') + return (layers.pool3d(x, + pool_size=[5, 3, 2], + pool_stride=[1, 2, 3], + pool_padding=(2, 1, 1))) def make_adaptive_pool2d(self): with program_guard(fluid.default_main_program(), @@ -3099,11 +3145,12 @@ class TestBook(LayerTest): def make_adaptive_pool3d(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name='x', shape=[3, 244, 224, 224], dtype='float32') + x = self._get_data(name='x', + shape=[3, 244, 224, 224], + dtype='float32') return (layers.adaptive_pool3d(x, [3, 3, 3], pool_type='avg')) - pool, mask = layers.adaptive_pool3d( - x, [3, 3, 3], require_index=True) + pool, mask = layers.adaptive_pool3d(x, [3, 3, 3], + require_index=True) return (pool) return (mask) return (layers.adaptive_pool3d(x, 3, pool_type='avg')) @@ -3114,17 +3161,21 @@ class TestBook(LayerTest): def make_lstm_unit(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x_t_data = self._get_data( - name='x_t_data', shape=[10, 10], dtype='float32') + x_t_data = self._get_data(name='x_t_data', + shape=[10, 10], + dtype='float32') x_t = layers.fc(input=x_t_data, size=10) - prev_hidden_data = self._get_data( - name='prev_hidden_data', shape=[10, 30], dtype='float32') + prev_hidden_data = self._get_data(name='prev_hidden_data', + shape=[10, 30], + dtype='float32') prev_hidden = layers.fc(input=prev_hidden_data, size=30) - prev_cell_data = self._get_data( - name='prev_cell', shape=[10, 30], dtype='float32') + prev_cell_data = self._get_data(name='prev_cell', + shape=[10, 30], + dtype='float32') prev_cell = layers.fc(input=prev_cell_data, size=30) - return (layers.lstm_unit( - x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell)) + return (layers.lstm_unit(x_t=x_t, + hidden_t_prev=prev_hidden, + cell_t_prev=prev_cell)) def make_softmax(self): with program_guard(fluid.default_main_program(), @@ -3136,11 +3187,10 @@ class TestBook(LayerTest): def make_space_to_depth(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - data = self._get_data( - name='data', - shape=[32, 9, 6, 6], - append_batch_size=False, - dtype='float32') + data = self._get_data(name='data', + shape=[32, 9, 6, 6], + append_batch_size=False, + dtype='float32') return (layers.space_to_depth(data, 3)) def make_lrn(self): @@ -3160,8 +3210,9 @@ class TestBook(LayerTest): words = [] for i in range(window_size): words.append( - self._get_data( - name='word_{0}'.format(i), shape=[1], dtype='int64')) + self._get_data(name='word_{0}'.format(i), + shape=[1], + dtype='int64')) dict_size = 10000 label_word = int(window_size // 2) + 1 @@ -3171,11 +3222,10 @@ class TestBook(LayerTest): if i == label_word: continue - emb = layers.embedding( - input=words[i], - size=[dict_size, 32], - param_attr='emb.w', - is_sparse=True) + emb = layers.embedding(input=words[i], + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=True) embs.append(emb) @@ -3235,18 +3285,18 @@ class TestBook(LayerTest): def make_scatter(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name='x', - shape=[3, 3], - append_batch_size=False, - dtype='float32') - idx = self._get_data( - name='idx', shape=[2], append_batch_size=False, dtype='int32') - updates = self._get_data( - name='updates', - shape=[2, 3], - append_batch_size=False, - dtype='float32') + x = self._get_data(name='x', + shape=[3, 3], + append_batch_size=False, + dtype='float32') + idx = self._get_data(name='idx', + shape=[2], + append_batch_size=False, + dtype='int32') + updates = self._get_data(name='updates', + shape=[2, 3], + append_batch_size=False, + dtype='float32') out = layers.scatter(input=x, index=idx, updates=updates) return (out) @@ -3262,8 +3312,9 @@ class TestBook(LayerTest): with fluid.framework._dygraph_place_guard(place=fluid.CPUPlace()): label = self._get_data(name="label", shape=[1], dtype="int32") one_hot_label = layers.one_hot(input=label, depth=10) - smooth_label = layers.label_smooth( - label=one_hot_label, epsilon=0.1, dtype="int32") + smooth_label = layers.label_smooth(label=one_hot_label, + epsilon=0.1, + dtype="int32") return (smooth_label) def make_topk(self): @@ -3300,8 +3351,9 @@ class TestBook(LayerTest): try: with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name='x2', shape=[3, 9, 6, 7], dtype="float32") + x = self._get_data(name='x2', + shape=[3, 9, 6, 7], + dtype="float32") output = layers.resize_nearest(x, out_shape=[12, 12, 12]) except ValueError: pass @@ -3331,8 +3383,9 @@ class TestBook(LayerTest): try: with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name='x', shape=[3, 9, 6, 7], dtype="float32") + x = self._get_data(name='x', + shape=[3, 9, 6, 7], + dtype="float32") output = layers.resize_trilinear(x, out_shape=[12, 12]) except ValueError: pass @@ -3390,64 +3443,61 @@ class TestBook(LayerTest): def make_rank_loss(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - label = self._get_data( - name='label', - append_batch_size=False, - shape=[16, 1], - dtype="float32") - left = self._get_data( - name='left', - append_batch_size=False, - shape=[16, 1], - dtype="float32") - right = self._get_data( - name='right', - append_batch_size=False, - shape=[16, 1], - dtype="float32") + label = self._get_data(name='label', + append_batch_size=False, + shape=[16, 1], + dtype="float32") + left = self._get_data(name='left', + append_batch_size=False, + shape=[16, 1], + dtype="float32") + right = self._get_data(name='right', + append_batch_size=False, + shape=[16, 1], + dtype="float32") out = layers.rank_loss(label, left, right, name="rank_loss") return (out) def make_shape(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[3, 100, 100], dtype="float32") + input = self._get_data(name="input", + shape=[3, 100, 100], + dtype="float32") out = layers.shape(input) return (out) def make_pad2d(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[3, 100, 100], dtype="float32") + input = self._get_data(name="input", + shape=[3, 100, 100], + dtype="float32") paddings = layers.fill_constant(shape=[4], dtype='int32', value=1) - out = layers.pad2d( - input, - paddings=[1, 2, 3, 4], - mode='reflect', - data_format='NCHW', - name="shape") - out_1 = layers.pad2d( - input, - paddings=paddings, - mode='reflect', - data_format='NCHW', - name="shape") + out = layers.pad2d(input, + paddings=[1, 2, 3, 4], + mode='reflect', + data_format='NCHW', + name="shape") + out_1 = layers.pad2d(input, + paddings=paddings, + mode='reflect', + data_format='NCHW', + name="shape") return (out) return (out_1) def make_prelu(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[5, 200, 100, 100], dtype="float32") + input = self._get_data(name="input", + shape=[5, 200, 100, 100], + dtype="float32") mode = 'channel' - out = layers.prelu( - input, - mode, - param_attr=ParamAttr(initializer=Constant(1.0)), - name='prelu') + out = layers.prelu(input, + mode, + param_attr=ParamAttr(initializer=Constant(1.0)), + name='prelu') return (out) def make_soft_relu(self): @@ -3596,8 +3646,9 @@ class TestBook(LayerTest): def make_uniform_random_batch_size_like(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[13, 11], dtype='float32') + input = self._get_data(name="input", + shape=[13, 11], + dtype='float32') out = layers.uniform_random_batch_size_like(input, [-1, 11]) return (out) @@ -3610,11 +3661,10 @@ class TestBook(LayerTest): def make_sampling_id(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name="X", - shape=[13, 11], - dtype='float32', - append_batch_size=False) + x = self._get_data(name="X", + shape=[13, 11], + dtype='float32', + append_batch_size=False) out = layers.sampling_id(x) return (out) @@ -3622,18 +3672,22 @@ class TestBook(LayerTest): def make_gaussian_random_batch_size_like(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[13, 11], dtype='float32') - - out = layers.gaussian_random_batch_size_like( - input, shape=[-1, 11], mean=1.0, std=2.0) + input = self._get_data(name="input", + shape=[13, 11], + dtype='float32') + + out = layers.gaussian_random_batch_size_like(input, + shape=[-1, 11], + mean=1.0, + std=2.0) return (out) def make_sum(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[13, 11], dtype='float32') + input = self._get_data(name="input", + shape=[13, 11], + dtype='float32') out = layers.sum(input) return (out) @@ -3645,8 +3699,9 @@ class TestBook(LayerTest): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[3, 4, 5, 6], dtype='float32') + input = self._get_data(name="input", + shape=[3, 4, 5, 6], + dtype='float32') out = layers.slice(input, axes=axes, starts=starts, ends=ends) return out @@ -3654,13 +3709,13 @@ class TestBook(LayerTest): def make_scale_variable(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = self._get_data( - name="input", shape=[3, 4, 5, 6], dtype='float32') - scale_var = self._get_data( - name="scale", - shape=[1], - dtype='float32', - append_batch_size=False) + input = self._get_data(name="input", + shape=[3, 4, 5, 6], + dtype='float32') + scale_var = self._get_data(name="scale", + shape=[1], + dtype='float32', + append_batch_size=False) out = layers.scale(input, scale=scale_var) return out @@ -3699,44 +3754,48 @@ class TestBook(LayerTest): def make_batch_norm(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - data = self._get_data( - name='data', shape=[32, 128, 128], dtype="float32") + data = self._get_data(name='data', + shape=[32, 128, 128], + dtype="float32") out = layers.batch_norm(data) return (out) def make_batch_norm_momentum_variable(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - data = self._get_data( - name='data', shape=[32, 128, 128], dtype="float32") - momentum = self._get_data( - name='momentum', - shape=[1], - dtype='float32', - append_batch_size=False) + data = self._get_data(name='data', + shape=[32, 128, 128], + dtype="float32") + momentum = self._get_data(name='momentum', + shape=[1], + dtype='float32', + append_batch_size=False) out = layers.batch_norm(data, momentum=momentum) return (out) def make_inplace_abn(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - data = self._get_data( - name='data', shape=[32, 128, 128], dtype="float32") + data = self._get_data(name='data', + shape=[32, 128, 128], + dtype="float32") out = layers.inplace_abn(data, act='leaky_relu', act_alpha=0.2) return (out) def make_inplace_abn_momentum_variable(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - data = self._get_data( - name='data', shape=[32, 128, 128], dtype="float32") - momentum = self._get_data( - name='momentum', - shape=[1], - dtype='float32', - append_batch_size=False) - out = layers.inplace_abn( - data, momentum=momentum, act='elu', act_alpha=2.0) + data = self._get_data(name='data', + shape=[32, 128, 128], + dtype="float32") + momentum = self._get_data(name='momentum', + shape=[1], + dtype='float32', + append_batch_size=False) + out = layers.inplace_abn(data, + momentum=momentum, + act='elu', + act_alpha=2.0) return (out) def make_range(self): @@ -3754,27 +3813,24 @@ class TestBook(LayerTest): def make_spectral_norm(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - weight = self._get_data( - name='weight', - shape=[2, 3, 32, 32], - dtype="float32", - append_batch_size=False) + weight = self._get_data(name='weight', + shape=[2, 3, 32, 32], + dtype="float32", + append_batch_size=False) out = layers.spectral_norm(weight, dim=1, power_iters=1) return (out) def make_kldiv_loss(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - x = self._get_data( - name='x', - shape=[32, 128, 128], - dtype="float32", - append_batch_size=False) - target = self._get_data( - name='target', - shape=[32, 128, 128], - dtype="float32", - append_batch_size=False) + x = self._get_data(name='x', + shape=[32, 128, 128], + dtype="float32", + append_batch_size=False) + target = self._get_data(name='target', + shape=[32, 128, 128], + dtype="float32", + append_batch_size=False) loss = layers.kldiv_loss(x=x, target=target, reduction='batchmean') return (loss) @@ -3827,12 +3883,15 @@ class TestBook(LayerTest): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): hidden_dim, proj_dim = 16, 8 - seq_data = layers.data( - name='seq_data', shape=[10, 10], dtype='float32', lod_level=1) + seq_data = layers.data(name='seq_data', + shape=[10, 10], + dtype='float32', + lod_level=1) fc_out = layers.fc(input=seq_data, size=4 * hidden_dim) self.assertIsNotNone( - layers.dynamic_lstmp( - input=fc_out, size=4 * hidden_dim, proj_size=proj_dim)) + layers.dynamic_lstmp(input=fc_out, + size=4 * hidden_dim, + proj_size=proj_dim)) def test_linear_chain_crf(self): with self.static_graph(): @@ -3840,55 +3899,52 @@ class TestBook(LayerTest): feature = layers.data(name='feature', shape=[784], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int64') emission = layers.fc(input=feature, size=10) - crf = layers.linear_chain_crf( - input=emission, label=label, param_attr=ParamAttr(name="crfw")) - crf_decode = layers.crf_decoding( - input=emission, param_attr=ParamAttr(name="crfw")) + crf = layers.linear_chain_crf(input=emission, + label=label, + param_attr=ParamAttr(name="crfw")) + crf_decode = layers.crf_decoding(input=emission, + param_attr=ParamAttr(name="crfw")) self.assertFalse(crf is None) self.assertFalse(crf_decode is None) - return layers.chunk_eval( - input=crf_decode, - label=label, - chunk_scheme="IOB", - num_chunk_types=(label_dict_len - 1) // 2) + return layers.chunk_eval(input=crf_decode, + label=label, + chunk_scheme="IOB", + num_chunk_types=(label_dict_len - 1) // 2) def test_linear_chain_crf_padding(self): with self.static_graph(): label_dict_len, max_len = 10, 20 - feature = layers.data( - name='feature', shape=[max_len, 784], dtype='float32') + feature = layers.data(name='feature', + shape=[max_len, 784], + dtype='float32') label = layers.data(name='label', shape=[max_len], dtype='int64') length = layers.data(name='length', shape=[1], dtype='int64') emission = layers.fc(input=feature, size=10, num_flatten_dims=2) - crf = layers.linear_chain_crf( - input=emission, - label=label, - length=length, - param_attr=ParamAttr(name="crfw")) - crf_decode = layers.crf_decoding( - input=emission, - length=length, - param_attr=ParamAttr(name="crfw")) + crf = layers.linear_chain_crf(input=emission, + label=label, + length=length, + param_attr=ParamAttr(name="crfw")) + crf_decode = layers.crf_decoding(input=emission, + length=length, + param_attr=ParamAttr(name="crfw")) self.assertFalse(crf is None) self.assertFalse(crf_decode is None) - return layers.chunk_eval( - input=crf_decode, - label=label, - seq_length=length, - chunk_scheme="IOB", - num_chunk_types=(label_dict_len - 1) // 2) + return layers.chunk_eval(input=crf_decode, + label=label, + seq_length=length, + chunk_scheme="IOB", + num_chunk_types=(label_dict_len - 1) // 2) def test_im2sequence(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): x = layers.data(name='x', shape=[3, 128, 128], dtype='float32') y = layers.data(name='y', shape=[], dtype='float32') - output = layers.im2sequence( - input=x, - input_image_size=y, - stride=[1, 1], - filter_size=[2, 2], - out_stride=[1, 1]) + output = layers.im2sequence(input=x, + input_image_size=y, + stride=[1, 1], + filter_size=[2, 2], + out_stride=[1, 1]) return (output) def test_lod_reset(self): @@ -3896,8 +3952,10 @@ class TestBook(LayerTest): with self.static_graph(): # case 1 x = layers.data(name='x', shape=[10], dtype='float32') - y = layers.data( - name='y', shape=[10, 20], dtype='float32', lod_level=2) + y = layers.data(name='y', + shape=[10, 20], + dtype='float32', + lod_level=2) z = layers.lod_reset(x=x, y=y) self.assertTrue(z.lod_level == 2) # case 2 @@ -3929,24 +3987,32 @@ class TestBook(LayerTest): strides = [1, 1, 1] with self.static_graph(): x = layers.data(name="x", shape=[245, 30, 30], dtype="float32") - out = layers.strided_slice( - x, axes=axes, starts=starts, ends=ends, strides=strides) + out = layers.strided_slice(x, + axes=axes, + starts=starts, + ends=ends, + strides=strides) return out def test_fill_constant_batch_size_like(self): with self.static_graph(): - like = fluid.layers.fill_constant( - shape=[1, 200], value=10, dtype='int64') - out = layers.fill_constant_batch_size_like( - input=like, shape=[2, 3300], value=1315454564656, dtype='int64') + like = fluid.layers.fill_constant(shape=[1, 200], + value=10, + dtype='int64') + out = layers.fill_constant_batch_size_like(input=like, + shape=[2, 3300], + value=1315454564656, + dtype='int64') return out def test_psroi_pool(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): x = layers.data(name="x", shape=[245, 30, 30], dtype="float32") - rois = layers.data( - name="rois", shape=[4], dtype="float32", lod_level=1) + rois = layers.data(name="rois", + shape=[4], + dtype="float32", + lod_level=1) output = layers.psroi_pool(x, rois, 5, 0.25, 7, 7) return (output) @@ -3954,8 +4020,10 @@ class TestBook(LayerTest): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): x = layers.data(name='x', shape=[10], dtype='float32') - y = layers.data( - name='y', shape=[10, 20], dtype='float32', lod_level=2) + y = layers.data(name='y', + shape=[10, 20], + dtype='float32', + lod_level=2) return (layers.sequence_expand(x=x, y=y, ref_level=1)) def test_sequence_reshape(self): @@ -3975,8 +4043,10 @@ class TestBook(LayerTest): def test_sequence_softmax(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): - seq_data = layers.data( - name='seq_data', shape=[10, 10], dtype='float32', lod_level=1) + seq_data = layers.data(name='seq_data', + shape=[10, 10], + dtype='float32', + lod_level=1) seq = layers.fc(input=seq_data, size=20) return (layers.sequence_softmax(seq)) @@ -3990,23 +4060,20 @@ class TestBook(LayerTest): def test_sequence_scatter(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): - x = layers.data( - name='x', - shape=[3, 6], - append_batch_size=False, - dtype='float32') - idx = layers.data( - name='idx', - shape=[12, 1], - append_batch_size=False, - dtype='int32', - lod_level=1) - updates = layers.data( - name='updates', - shape=[12, 1], - append_batch_size=False, - dtype='float32', - lod_level=1) + x = layers.data(name='x', + shape=[3, 6], + append_batch_size=False, + dtype='float32') + idx = layers.data(name='idx', + shape=[12, 1], + append_batch_size=False, + dtype='int32', + lod_level=1) + updates = layers.data(name='updates', + shape=[12, 1], + append_batch_size=False, + dtype='float32', + lod_level=1) out = layers.sequence_scatter(input=x, index=idx, updates=updates) return (out) @@ -4014,39 +4081,44 @@ class TestBook(LayerTest): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): import numpy as np - seqs = layers.data( - name='x', shape=[10, 5], dtype='float32', lod_level=1) + seqs = layers.data(name='x', + shape=[10, 5], + dtype='float32', + lod_level=1) offset = layers.assign(input=np.array([[0, 1]]).astype('int32')) length = layers.assign(input=np.array([[2, 1]]).astype('int32')) - out = layers.sequence_slice( - input=seqs, offset=offset, length=length) + out = layers.sequence_slice(input=seqs, + offset=offset, + length=length) return (out) def test_filter_by_instag(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): - x1 = layers.data( - name='Ins', shape=[32, 1], dtype='float32', lod_level=0) - x2 = layers.data( - name='Ins_tag', - shape=[32, 1], - dtype='int64', - lod_level=0, - stop_gradient=True) - x3 = layers.create_global_var( - shape=[1, 1], - value=20, - dtype='int64', - persistable=True, - force_cpu=True, - name='Filter_tag') + x1 = layers.data(name='Ins', + shape=[32, 1], + dtype='float32', + lod_level=0) + x2 = layers.data(name='Ins_tag', + shape=[32, 1], + dtype='int64', + lod_level=0, + stop_gradient=True) + x3 = layers.create_global_var(shape=[1, 1], + value=20, + dtype='int64', + persistable=True, + force_cpu=True, + name='Filter_tag') out1, out2 = layers.filter_by_instag(x1, x2, x3, is_lod=True) def test_shuffle_batch(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): - x = layers.data( - name='X', shape=[4, 50], dtype='float32', lod_level=0) + x = layers.data(name='X', + shape=[4, 50], + dtype='float32', + lod_level=0) out1 = fluid.contrib.layers.shuffle_batch(x) default_main_program().random_seed = 1000 out2 = fluid.contrib.layers.shuffle_batch(x) @@ -4058,8 +4130,9 @@ class TestBook(LayerTest): with self.static_graph(): x = fluid.data(name="x", shape=[None, 3], dtype="float32") y = fluid.data(name="y", shape=[None, 3], dtype="float32") - sum = fluid.contrib.layers.partial_sum( - [x, y], start_index=0, length=2) + sum = fluid.contrib.layers.partial_sum([x, y], + start_index=0, + length=2) return (sum) def test_batch_fc(self): @@ -4083,8 +4156,9 @@ class TestBook(LayerTest): def test_rank_attention(self): with self.static_graph(): input = fluid.data(name="input", shape=[None, 2], dtype="float32") - rank_offset = fluid.data( - name="rank_offset", shape=[None, 7], dtype="int32") + rank_offset = fluid.data(name="rank_offset", + shape=[None, 7], + dtype="int32") out = fluid.contrib.layers.rank_attention( input=input, rank_offset=rank_offset, @@ -4106,26 +4180,35 @@ class TestBook(LayerTest): rois = layers.data(name="rois", shape=[4], dtype="float32") rois_num = fluid.data(name="rois_num", shape=[None], dtype="int32") output = layers.roi_pool(x, rois, 4, 4, 0.5, rois_num=rois_num) - static_res = self.get_static_graph_result( - feed={'x': x_np, - 'rois': rois_np, - 'rois_num': rois_num_np}, - fetch_list=[output])[0] + static_res = self.get_static_graph_result(feed={ + 'x': x_np, + 'rois': rois_np, + 'rois_num': rois_num_np + }, + fetch_list=[output])[0] with self.dynamic_graph(): with _test_eager_guard(): x_dy = base.to_variable(x_np) rois_dy = base.to_variable(rois_np) rois_num_dy = base.to_variable(rois_num_np) - dy_eager_res = layers.roi_pool( - x_dy, rois_dy, 4, 4, 0.5, rois_num=rois_num_dy) + dy_eager_res = layers.roi_pool(x_dy, + rois_dy, + 4, + 4, + 0.5, + rois_num=rois_num_dy) dy_eager_res_value = dy_eager_res[0].numpy() x_dy = base.to_variable(x_np) rois_dy = base.to_variable(rois_np) rois_num_dy = base.to_variable(rois_num_np) - dy_res = layers.roi_pool( - x_dy, rois_dy, 4, 4, 0.5, rois_num=rois_num_dy) + dy_res = layers.roi_pool(x_dy, + rois_dy, + 4, + 4, + 0.5, + rois_num=rois_num_dy) dy_res_value = dy_res[0].numpy() self.assertTrue(np.array_equal(static_res, dy_res_value)) self.assertTrue(np.array_equal(static_res, dy_eager_res_value)) @@ -4146,26 +4229,37 @@ class TestBook(LayerTest): rois = layers.data(name="rois", shape=[4], dtype="float32") rois_num = fluid.data(name="rois_num", shape=[None], dtype="int32") output = layers.roi_align(x, rois, 4, 4, 0.5, 2, rois_num=rois_num) - static_res = self.get_static_graph_result( - feed={'x': x_np, - 'rois': rois_np, - 'rois_num': rois_num_np}, - fetch_list=[output])[0] + static_res = self.get_static_graph_result(feed={ + 'x': x_np, + 'rois': rois_np, + 'rois_num': rois_num_np + }, + fetch_list=[output])[0] with self.dynamic_graph(): with _test_eager_guard(): x_dy = base.to_variable(x_np) rois_dy = base.to_variable(rois_np) rois_num_dy = base.to_variable(rois_num_np) - dy_eager_res = layers.roi_align( - x_dy, rois_dy, 4, 4, 0.5, 2, rois_num=rois_num_dy) + dy_eager_res = layers.roi_align(x_dy, + rois_dy, + 4, + 4, + 0.5, + 2, + rois_num=rois_num_dy) dy_eager_res_value = dy_eager_res.numpy() x_dy = base.to_variable(x_np) rois_dy = base.to_variable(rois_np) rois_num_dy = base.to_variable(rois_num_np) - dy_res = layers.roi_align( - x_dy, rois_dy, 4, 4, 0.5, 2, rois_num=rois_num_dy) + dy_res = layers.roi_align(x_dy, + rois_dy, + 4, + 4, + 0.5, + 2, + rois_num=rois_num_dy) dy_res_value = dy_res.numpy() self.assertTrue(np.array_equal(static_res, dy_eager_res_value)) self.assertTrue(np.array_equal(static_res, dy_res_value)) @@ -4177,15 +4271,18 @@ class TestBook(LayerTest): label_np = np.random.randint(0, num_classes, [2, 3, 1], dtype=np.int64) with self.static_graph(): - input_ = layers.data( - name="input", shape=[None, 3, num_classes], dtype="float32") - label_ = layers.data( - name="label", shape=[None, 3, 1], dtype="int64") + input_ = layers.data(name="input", + shape=[None, 3, num_classes], + dtype="float32") + label_ = layers.data(name="label", + shape=[None, 3, 1], + dtype="int64") output = layers.dice_loss(input_, label_, eps) - static_res = self.get_static_graph_result( - feed={'input': input_np, - 'label': label_np}, - fetch_list=[output])[0] + static_res = self.get_static_graph_result(feed={ + 'input': input_np, + 'label': label_np + }, + fetch_list=[output])[0] with self.dynamic_graph(): with _test_eager_guard(): @@ -4205,8 +4302,10 @@ class TestBook(LayerTest): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): x = layers.data(name="x", shape=[256, 30, 30], dtype="float32") - rois = layers.data( - name="rois", shape=[8], dtype="float32", lod_level=1) + rois = layers.data(name="rois", + shape=[8], + dtype="float32", + lod_level=1) output = layers.roi_perspective_transform(x, rois, 7, 7, 0.6) return (output) @@ -4220,10 +4319,12 @@ class TestBook(LayerTest): def test_simple_conv2d(self): # TODO(minqiyang): dygraph do not support layers with param now with self.static_graph(): - images = layers.data( - name='pixel', shape=[3, 48, 48], dtype='float32') - return layers.conv2d( - input=images, num_filters=3, filter_size=[4, 4]) + images = layers.data(name='pixel', + shape=[3, 48, 48], + dtype='float32') + return layers.conv2d(input=images, + num_filters=3, + filter_size=[4, 4]) def test_squeeze(self): # TODO(minqiyang): dygraph do not support layers with param now @@ -4235,11 +4336,10 @@ class TestBook(LayerTest): def test_flatten(self): # TODO(minqiyang): dygraph do not support op without kernel now with self.static_graph(): - x = layers.data( - name='x', - append_batch_size=False, - shape=[4, 4, 3], - dtype="float32") + x = layers.data(name='x', + append_batch_size=False, + shape=[4, 4, 3], + dtype="float32") out = layers.flatten(x, axis=1, name="flatten") return (out) @@ -4252,45 +4352,43 @@ class TestBook(LayerTest): def test_deformable_conv(self): with self.static_graph(): - input = layers.data( - name='input', - append_batch_size=False, - shape=[2, 3, 32, 32], - dtype="float32") - offset = layers.data( - name='offset', - append_batch_size=False, - shape=[2, 18, 32, 32], - dtype="float32") - mask = layers.data( - name='mask', - append_batch_size=False, - shape=[2, 9, 32, 32], - dtype="float32") - out = layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=2, - filter_size=3, - padding=1) + input = layers.data(name='input', + append_batch_size=False, + shape=[2, 3, 32, 32], + dtype="float32") + offset = layers.data(name='offset', + append_batch_size=False, + shape=[2, 18, 32, 32], + dtype="float32") + mask = layers.data(name='mask', + append_batch_size=False, + shape=[2, 9, 32, 32], + dtype="float32") + out = layers.deformable_conv(input=input, + offset=offset, + mask=mask, + num_filters=2, + filter_size=3, + padding=1) return (out) def test_deformable_conv2(self): with self.static_graph(): - input = fluid.data( - name='input', shape=[None, 3, None, None], dtype="float32") - offset = fluid.data( - name='offset', shape=[None, 18, None, None], dtype="float32") - mask = fluid.data( - name='mask', shape=[None, 9, None, None], dtype="float32") - out = layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=2, - filter_size=3, - padding=1) + input = fluid.data(name='input', + shape=[None, 3, None, None], + dtype="float32") + offset = fluid.data(name='offset', + shape=[None, 18, None, None], + dtype="float32") + mask = fluid.data(name='mask', + shape=[None, 9, None, None], + dtype="float32") + out = layers.deformable_conv(input=input, + offset=offset, + mask=mask, + num_filters=2, + filter_size=3, + padding=1) return (out) def test_unfold(self): @@ -4303,151 +4401,139 @@ class TestBook(LayerTest): with self.static_graph(): x = fluid.data(name="x", shape=[None, 3], dtype="float32") y = fluid.data(name="y", shape=[None, 3], dtype="float32") - concat1 = fluid.contrib.layers.partial_concat( - [x, y], start_index=0, length=2) - concat2 = fluid.contrib.layers.partial_concat( - x, start_index=0, length=-1) + concat1 = fluid.contrib.layers.partial_concat([x, y], + start_index=0, + length=2) + concat2 = fluid.contrib.layers.partial_concat(x, + start_index=0, + length=-1) return concat1, concat2 def test_deform_roi_pooling(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = layers.data( - name='input', - shape=[2, 3, 32, 32], - dtype='float32', - append_batch_size=False) - rois = layers.data( - name="rois", shape=[4], dtype='float32', lod_level=1) - trans = layers.data( - name="trans", - shape=[2, 3, 32, 32], - dtype='float32', - append_batch_size=False) - out = layers.deformable_roi_pooling( - input=input, - rois=rois, - trans=trans, - no_trans=False, - spatial_scale=1.0, - group_size=(1, 1), - pooled_height=8, - pooled_width=8, - part_size=(8, 8), - sample_per_part=4, - trans_std=0.1) + input = layers.data(name='input', + shape=[2, 3, 32, 32], + dtype='float32', + append_batch_size=False) + rois = layers.data(name="rois", + shape=[4], + dtype='float32', + lod_level=1) + trans = layers.data(name="trans", + shape=[2, 3, 32, 32], + dtype='float32', + append_batch_size=False) + out = layers.deformable_roi_pooling(input=input, + rois=rois, + trans=trans, + no_trans=False, + spatial_scale=1.0, + group_size=(1, 1), + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + trans_std=0.1) return (out) def test_deformable_conv_v1(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = layers.data( - name='input', - append_batch_size=False, - shape=[2, 3, 32, 32], - dtype="float32") - offset = layers.data( - name='offset', - append_batch_size=False, - shape=[2, 18, 32, 32], - dtype="float32") - out = layers.deformable_conv( - input=input, - offset=offset, - mask=None, - num_filters=2, - filter_size=3, - padding=1, - modulated=False) + input = layers.data(name='input', + append_batch_size=False, + shape=[2, 3, 32, 32], + dtype="float32") + offset = layers.data(name='offset', + append_batch_size=False, + shape=[2, 18, 32, 32], + dtype="float32") + out = layers.deformable_conv(input=input, + offset=offset, + mask=None, + num_filters=2, + filter_size=3, + padding=1, + modulated=False) return (out) def test_retinanet_target_assign(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - bbox_pred = layers.data( - name='bbox_pred', - shape=[1, 100, 4], - append_batch_size=False, - dtype='float32') - cls_logits = layers.data( - name='cls_logits', - shape=[1, 100, 10], - append_batch_size=False, - dtype='float32') - anchor_box = layers.data( - name='anchor_box', - shape=[100, 4], - append_batch_size=False, - dtype='float32') - anchor_var = layers.data( - name='anchor_var', - shape=[100, 4], - append_batch_size=False, - dtype='float32') - gt_boxes = layers.data( - name='gt_boxes', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - gt_labels = layers.data( - name='gt_labels', - shape=[10, 1], - append_batch_size=False, - dtype='int32') - is_crowd = layers.data( - name='is_crowd', - shape=[1], - append_batch_size=False, - dtype='int32') - im_info = layers.data( - name='im_info', - shape=[1, 3], - append_batch_size=False, - dtype='float32') - return (layers.retinanet_target_assign( - bbox_pred, cls_logits, anchor_box, anchor_var, gt_boxes, - gt_labels, is_crowd, im_info, 10)) + bbox_pred = layers.data(name='bbox_pred', + shape=[1, 100, 4], + append_batch_size=False, + dtype='float32') + cls_logits = layers.data(name='cls_logits', + shape=[1, 100, 10], + append_batch_size=False, + dtype='float32') + anchor_box = layers.data(name='anchor_box', + shape=[100, 4], + append_batch_size=False, + dtype='float32') + anchor_var = layers.data(name='anchor_var', + shape=[100, 4], + append_batch_size=False, + dtype='float32') + gt_boxes = layers.data(name='gt_boxes', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + gt_labels = layers.data(name='gt_labels', + shape=[10, 1], + append_batch_size=False, + dtype='int32') + is_crowd = layers.data(name='is_crowd', + shape=[1], + append_batch_size=False, + dtype='int32') + im_info = layers.data(name='im_info', + shape=[1, 3], + append_batch_size=False, + dtype='float32') + return (layers.retinanet_target_assign(bbox_pred, cls_logits, + anchor_box, anchor_var, + gt_boxes, gt_labels, + is_crowd, im_info, 10)) def test_sigmoid_focal_loss(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = layers.data( - name='data', - shape=[10, 80], - append_batch_size=False, - dtype='float32') - label = layers.data( - name='label', - shape=[10, 1], - append_batch_size=False, - dtype='int32') - fg_num = layers.data( - name='fg_num', - shape=[1], - append_batch_size=False, - dtype='int32') - out = fluid.layers.sigmoid_focal_loss( - x=input, label=label, fg_num=fg_num, gamma=2., alpha=0.25) + input = layers.data(name='data', + shape=[10, 80], + append_batch_size=False, + dtype='float32') + label = layers.data(name='label', + shape=[10, 1], + append_batch_size=False, + dtype='int32') + fg_num = layers.data(name='fg_num', + shape=[1], + append_batch_size=False, + dtype='int32') + out = fluid.layers.sigmoid_focal_loss(x=input, + label=label, + fg_num=fg_num, + gamma=2., + alpha=0.25) return (out) def test_addmm(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - input = layers.data( - name='input_data', - shape=[3, 3], - append_batch_size=False, - dtype='float32') - x = layers.data( - name='x', - shape=[3, 2], - append_batch_size=False, - dtype='float32') - y = layers.data( - name='y', - shape=[2, 3], - append_batch_size=False, - dtype='float32') + input = layers.data(name='input_data', + shape=[3, 3], + append_batch_size=False, + dtype='float32') + x = layers.data(name='x', + shape=[3, 2], + append_batch_size=False, + dtype='float32') + y = layers.data(name='y', + shape=[2, 3], + append_batch_size=False, + dtype='float32') out = paddle.addmm(input=input, x=x, y=y) return (out) @@ -4455,26 +4541,22 @@ class TestBook(LayerTest): def test_retinanet_detection_output(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): - bboxes = layers.data( - name='bboxes', - shape=[1, 21, 4], - append_batch_size=False, - dtype='float32') - scores = layers.data( - name='scores', - shape=[1, 21, 10], - append_batch_size=False, - dtype='float32') - anchors = layers.data( - name='anchors', - shape=[21, 4], - append_batch_size=False, - dtype='float32') - im_info = layers.data( - name="im_info", - shape=[1, 3], - append_batch_size=False, - dtype='float32') + bboxes = layers.data(name='bboxes', + shape=[1, 21, 4], + append_batch_size=False, + dtype='float32') + scores = layers.data(name='scores', + shape=[1, 21, 10], + append_batch_size=False, + dtype='float32') + anchors = layers.data(name='anchors', + shape=[21, 4], + append_batch_size=False, + dtype='float32') + im_info = layers.data(name="im_info", + shape=[1, 3], + append_batch_size=False, + dtype='float32') nmsed_outs = layers.retinanet_detection_output( bboxes=[bboxes, bboxes], scores=[scores, scores], @@ -4490,26 +4572,32 @@ class TestBook(LayerTest): def test_warpctc_with_padding(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): - input_length = layers.data( - name='logits_length', shape=[11], dtype='int64') - label_length = layers.data( - name='labels_length', shape=[12], dtype='int64') + input_length = layers.data(name='logits_length', + shape=[11], + dtype='int64') + label_length = layers.data(name='labels_length', + shape=[12], + dtype='int64') label = layers.data(name='label', shape=[12, 1], dtype='int32') - predict = layers.data( - name='predict', shape=[4, 4, 8], dtype='float32') - output = layers.warpctc( - input=predict, - label=label, - input_length=input_length, - label_length=label_length) + predict = layers.data(name='predict', + shape=[4, 4, 8], + dtype='float32') + output = layers.warpctc(input=predict, + label=label, + input_length=input_length, + label_length=label_length) return (output) def test_edit_distance(self): with self.static_graph(): - predict = layers.data( - name='predict', shape=[-1, 1], dtype='int64', lod_level=1) - label = layers.data( - name='label', shape=[-1, 1], dtype='int64', lod_level=1) + predict = layers.data(name='predict', + shape=[-1, 1], + dtype='int64', + lod_level=1) + label = layers.data(name='label', + shape=[-1, 1], + dtype='int64', + lod_level=1) evaluator = fluid.evaluator.EditDistance(predict, label) return evaluator.metrics @@ -4517,12 +4605,15 @@ class TestBook(LayerTest): input_size = 128 hidden_size = 256 with self.static_graph(): - input = fluid.data( - name="input", shape=[None, None, input_size], dtype='float32') - pre_hidden = fluid.data( - name="pre_hidden", shape=[None, hidden_size], dtype='float32') - sequence_length = fluid.data( - name="sequence_length", shape=[None], dtype='int32') + input = fluid.data(name="input", + shape=[None, None, input_size], + dtype='float32') + pre_hidden = fluid.data(name="pre_hidden", + shape=[None, hidden_size], + dtype='float32') + sequence_length = fluid.data(name="sequence_length", + shape=[None], + dtype='int32') for bidirectional in [True, False]: for batch_first in [True, False]: @@ -4538,26 +4629,26 @@ class TestBook(LayerTest): class TestMetricsDetectionMap(unittest.TestCase): + def test_detection_map(self): program = fluid.Program() with program_guard(program): - detect_res = fluid.layers.data( - name='detect_res', - shape=[10, 6], - append_batch_size=False, - dtype='float32') - label = fluid.layers.data( - name='label', - shape=[10, 1], - append_batch_size=False, - dtype='float32') - box = fluid.layers.data( - name='bbox', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - map_eval = fluid.metrics.DetectionMAP( - detect_res, label, box, class_num=21) + detect_res = fluid.layers.data(name='detect_res', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + label = fluid.layers.data(name='label', + shape=[10, 1], + append_batch_size=False, + dtype='float32') + box = fluid.layers.data(name='bbox', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + map_eval = fluid.metrics.DetectionMAP(detect_res, + label, + box, + class_num=21) cur_map, accm_map = map_eval.get_map_var() self.assertIsNotNone(cur_map) self.assertIsNotNone(accm_map) @@ -4565,6 +4656,7 @@ class TestMetricsDetectionMap(unittest.TestCase): class ExampleNet(paddle.nn.Layer): + def __init__(self): super(ExampleNet, self).__init__() self.weight = self.create_parameter( @@ -4576,6 +4668,7 @@ class ExampleNet(paddle.nn.Layer): class TestLayerParameterTrainableSet(unittest.TestCase): + def test_layer_parameter_set(self): with fluid.dygraph.guard(): net = ExampleNet() @@ -4583,6 +4676,7 @@ class TestLayerParameterTrainableSet(unittest.TestCase): class TestLayerTrainingAttribute(unittest.TestCase): + def test_set_train_eval_in_dynamic_mode(self): with fluid.dygraph.guard(): net = paddle.nn.Dropout() @@ -4600,6 +4694,7 @@ class TestLayerTrainingAttribute(unittest.TestCase): class MyLayer(paddle.nn.Layer): + def __init__(self): super(MyLayer, self).__init__() self._linear = paddle.nn.Linear(1, 1) @@ -4612,6 +4707,7 @@ class MyLayer(paddle.nn.Layer): class MySuperLayer(paddle.nn.Layer): + def __init__(self): super(MySuperLayer, self).__init__() self._mylayer = MyLayer() @@ -4622,6 +4718,7 @@ class MySuperLayer(paddle.nn.Layer): class TestSubLayerCount(unittest.TestCase): + def test_sublayer(self): with fluid.dygraph.guard(): mySuperlayer = MySuperLayer() diff --git a/python/paddle/fluid/tests/unittests/test_layout_autotune.py b/python/paddle/fluid/tests/unittests/test_layout_autotune.py index a1440f8587a..bd73d9526c0 100644 --- a/python/paddle/fluid/tests/unittests/test_layout_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_layout_autotune.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ import os class SimpleNet(paddle.nn.Layer): + def __init__(self, data_format="NCHW", class_num=2): super(SimpleNet, self).__init__() self.conv = paddle.nn.Conv2D(3, 8, (3, 3)) @@ -43,6 +44,7 @@ class SimpleNet(paddle.nn.Layer): class LayoutAutoTune(unittest.TestCase): + def use_autoune(self): if paddle.is_compiled_with_cuda(): paddle.incubate.autotune.set_config( @@ -101,7 +103,7 @@ class LayoutAutoTune(unittest.TestCase): with paddle.amp.auto_cast(level="O2"): conv_out = conv(data) # conv_out.shape = [1, 14, 12, 8] with NHWC - # layout tuner will transpose conv_out to + # layout tuner will transpose conv_out to # [1, 8, 14, 12] with NCHW before the following transpose op. out = paddle.transpose(conv_out, perm=[0, 3, 1, 2]) loss = out.mean() @@ -131,6 +133,7 @@ class LayoutAutoTune(unittest.TestCase): class TestAutoTuneAPI(unittest.TestCase): + def test_set_config_warnings(self): with warnings.catch_warnings(record=True) as w: config = {"layout": {"enable": 1}} diff --git a/python/paddle/fluid/tests/unittests/test_lbfgs.py b/python/paddle/fluid/tests/unittests/test_lbfgs.py index bb381874760..d4875bce503 100644 --- a/python/paddle/fluid/tests/unittests/test_lbfgs.py +++ b/python/paddle/fluid/tests/unittests/test_lbfgs.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -44,10 +44,13 @@ def test_static_graph_H0(func, x0, H0, dtype='float32'): startup = paddle.static.Program() with paddle.static.program_guard(main, startup): X = paddle.static.data(name='x', shape=[x0.shape[0]], dtype=dtype) - H = paddle.static.data( - name='h', shape=[H0.shape[0], H0.shape[1]], dtype=dtype) - Y = minimize_lbfgs( - func, X, initial_inverse_hessian_estimate=H, dtype=dtype) + H = paddle.static.data(name='h', + shape=[H0.shape[0], H0.shape[1]], + dtype=dtype) + Y = minimize_lbfgs(func, + X, + initial_inverse_hessian_estimate=H, + dtype=dtype) exe = paddle.static.Executor() exe.run(startup) @@ -63,15 +66,15 @@ def test_dynamic_graph(func, x0 = paddle.to_tensor(x0) if H0 is not None: H0 = paddle.to_tensor(H0) - return minimize_lbfgs( - func, - x0, - initial_inverse_hessian_estimate=H0, - line_search_fn=line_search_fn, - dtype=dtype) + return minimize_lbfgs(func, + x0, + initial_inverse_hessian_estimate=H0, + line_search_fn=line_search_fn, + dtype=dtype) class TestLbfgs(unittest.TestCase): + def test_quadratic_nd(self): for dimension in [1, 10]: minimum = np.random.random(size=[dimension]).astype('float32') @@ -105,10 +108,11 @@ class TestLbfgs(unittest.TestCase): self.assertFalse(results[0][0]) def test_multi_minima(self): + def func(x): # df = 12(x + 1.1)(x - 0.2)(x - 0.8) # f = 3*x^4+0.4*x^3-5.46*x^2+2.112*x - # minimum = -1.1 or 0.8. + # minimum = -1.1 or 0.8. # All these minima may be reached from appropriate starting points. return 3 * x**4 + 0.4 * x**3 - 5.64 * x**2 + 2.112 * x @@ -137,6 +141,7 @@ class TestLbfgs(unittest.TestCase): self.assertTrue(np.allclose(minimum, results[2])) def test_exception(self): + def func(x): return paddle.dot(x, x) @@ -145,8 +150,11 @@ class TestLbfgs(unittest.TestCase): # test dtype is not float32 or float64 x1 = np.random.random(size=[2]).astype('int32') - self.assertRaises( - ValueError, test_static_graph, func, x1, dtype='int32') + self.assertRaises(ValueError, + test_static_graph, + func, + x1, + dtype='int32') # test initial_inverse_hessian_estimate is good results = test_static_graph_H0(func, x0, H0, dtype='float32') @@ -156,8 +164,12 @@ class TestLbfgs(unittest.TestCase): # test initial_inverse_hessian_estimate is bad and float64 x2 = np.random.random(size=[2]).astype('float64') H1 = np.array([[1.0, 2.0], [3.0, 1.0]]).astype('float64') - self.assertRaises( - ValueError, test_static_graph_H0, func, x2, H0=H1, dtype='float64') + self.assertRaises(ValueError, + test_static_graph_H0, + func, + x2, + H0=H1, + dtype='float64') if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_lcm.py b/python/paddle/fluid/tests/unittests/test_lcm.py index 123c3e3d444..ca78e239da4 100644 --- a/python/paddle/fluid/tests/unittests/test_lcm.py +++ b/python/paddle/fluid/tests/unittests/test_lcm.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestLcmAPI(unittest.TestCase): + def setUp(self): self.x_np = 12 self.y_np = 20 @@ -40,15 +41,17 @@ class TestLcmAPI(unittest.TestCase): x2 = fluid.data(name='input2', dtype='int32', shape=self.y_shape) out = paddle.lcm(x1, x2) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(fluid.default_main_program(), - feed={'input1': self.x_np, - 'input2': self.y_np}, + feed={ + 'input1': self.x_np, + 'input2': self.y_np + }, fetch_list=[out]) - self.assertTrue((np.array(res[0]) == np.lcm(self.x_np, self.y_np) - ).all()) + self.assertTrue((np.array(res[0]) == np.lcm(self.x_np, + self.y_np)).all()) def test_dygraph(self): paddle.disable_static() @@ -62,6 +65,7 @@ class TestLcmAPI(unittest.TestCase): class TestLcmAPI2(TestLcmAPI): + def setUp(self): self.x_np = np.arange(6).astype(np.int32) self.y_np = np.array([20]).astype(np.int32) @@ -70,6 +74,7 @@ class TestLcmAPI2(TestLcmAPI): class TestLcmAPI3(TestLcmAPI): + def setUp(self): self.x_np = 0 self.y_np = 20 @@ -78,6 +83,7 @@ class TestLcmAPI3(TestLcmAPI): class TestLcmAPI4(TestLcmAPI): + def setUp(self): self.x_np = 0 self.y_np = 0 @@ -86,6 +92,7 @@ class TestLcmAPI4(TestLcmAPI): class TestLcmAPI5(TestLcmAPI): + def setUp(self): self.x_np = 12 self.y_np = -20 diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py index 36368a83893..b70acce3235 100644 --- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py @@ -86,8 +86,8 @@ def piecewise_decay(global_step, boundaries, values): def cosine_decay(global_step, learning_rate, step_each_epoch, epochs): cur_epoch = math.floor(global_step / step_each_epoch) - decayed_lr = learning_rate * 0.5 * ( - math.cos(cur_epoch * math.pi / epochs) + 1) + decayed_lr = learning_rate * 0.5 * (math.cos(cur_epoch * math.pi / epochs) + + 1) return decayed_lr @@ -122,6 +122,7 @@ def lambda_decay(global_step, learning_rate, lr_lambda): class TestLearningRateDecayDygraph(unittest.TestCase): + def test_LR_state_dict(self): with fluid.dygraph.guard(): x = np.random.uniform(-1, 1, [3, 10]).astype("float32") @@ -137,15 +138,12 @@ class TestLearningRateDecayDygraph(unittest.TestCase): Reducelr_scheduler = fluid.dygraph.ReduceLROnPlateau( learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3) - adam1 = fluid.optimizer.Adam( - learning_rate=Exponential_scheduler, - parameter_list=linear.parameters()) - adam2 = fluid.optimizer.Adam( - learning_rate=Step_scheduler, - parameter_list=linear.parameters()) - adam3 = fluid.optimizer.Adam( - learning_rate=Reducelr_scheduler, - parameter_list=linear.parameters()) + adam1 = fluid.optimizer.Adam(learning_rate=Exponential_scheduler, + parameter_list=linear.parameters()) + adam2 = fluid.optimizer.Adam(learning_rate=Step_scheduler, + parameter_list=linear.parameters()) + adam3 = fluid.optimizer.Adam(learning_rate=Reducelr_scheduler, + parameter_list=linear.parameters()) print(adam3.state_dict()) for epoch in range(10): @@ -177,22 +175,22 @@ class TestLearningRateDecayDygraph(unittest.TestCase): learning_rate=Exponential_scheduler_test, parameter_list=linear.parameters()) adam_test.set_dict(opt_state) - self.assertEqual(adam_test._learning_rate.step_num, - adam1._learning_rate.step_num, - "epoch_num is different before and after set_dict") + self.assertEqual( + adam_test._learning_rate.step_num, + adam1._learning_rate.step_num, + "epoch_num is different before and after set_dict") fluid.dygraph.save_dygraph(adam2.state_dict(), "save_path") _, opt_state = fluid.dygraph.load_dygraph("save_path") - adam_test = fluid.optimizer.Adam( - learning_rate=Step_scheduler_test, - parameter_list=linear.parameters()) + adam_test = fluid.optimizer.Adam(learning_rate=Step_scheduler_test, + parameter_list=linear.parameters()) adam_test.set_dict(opt_state) - self.assertEqual(adam_test._learning_rate.epoch_num, - adam2._learning_rate.epoch_num, - "epoch_num is different before and after set_dict") self.assertEqual( - adam_test._learning_rate(), - adam2._learning_rate(), + adam_test._learning_rate.epoch_num, + adam2._learning_rate.epoch_num, + "epoch_num is different before and after set_dict") + self.assertEqual( + adam_test._learning_rate(), adam2._learning_rate(), "current learning rate is different before and after set_dict") fluid.dygraph.save_dygraph(adam3.state_dict(), "save_path") @@ -201,9 +199,10 @@ class TestLearningRateDecayDygraph(unittest.TestCase): learning_rate=Reducelr_scheduler_test, parameter_list=linear.parameters()) adam_test.set_dict(opt_state) - self.assertEqual(adam_test._learning_rate.best_loss, - adam3._learning_rate.best_loss.numpy()[0], - "best_loss is different before and after set_dict") + self.assertEqual( + adam_test._learning_rate.best_loss, + adam3._learning_rate.best_loss.numpy()[0], + "best_loss is different before and after set_dict") self.assertEqual( adam_test._learning_rate.cooldown_counter, adam3._learning_rate.cooldown_counter, @@ -216,8 +215,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): adam3._learning_rate.epoch_num, "epoch is different before and after set_dict") self.assertEqual( - adam_test._learning_rate(), - adam3._learning_rate(), + adam_test._learning_rate(), adam3._learning_rate(), "current learning rate is different before and after set_dict") def test_NoamDecay(self): @@ -235,18 +233,20 @@ class TestLearningRateDecayDygraph(unittest.TestCase): self.assertAlmostEqual( right_result, fluid_result[0], - msg='Failed lr scheduler in step {0}, Python result is {1}, Fluid result is {2}'. - format(step, right_result, fluid_result[0])) + msg= + 'Failed lr scheduler in step {0}, Python result is {1}, Fluid result is {2}' + .format(step, right_result, fluid_result[0])) def test_LinearLrWarmup(self): with fluid.dygraph.guard(): - lr = fluid.layers.polynomial_decay( - learning_rate=1.0, - decay_steps=10, - end_learning_rate=0.0, - power=1.0) - lr = fluid.layers.linear_lr_warmup( - learning_rate=lr, warmup_steps=2, start_lr=0.0, end_lr=1.0) + lr = fluid.layers.polynomial_decay(learning_rate=1.0, + decay_steps=10, + end_learning_rate=0.0, + power=1.0) + lr = fluid.layers.linear_lr_warmup(learning_rate=lr, + warmup_steps=2, + start_lr=0.0, + end_lr=1.0) right_result = [0.5, 0.9, 0.8, 0.7, 0.6] for i in range(5): @@ -257,11 +257,10 @@ class TestLearningRateDecayDygraph(unittest.TestCase): np.allclose((t.numpy())[0].item(), right_result[i])) with self.assertRaises(TypeError): - lr = fluid.layers.linear_lr_warmup( - learning_rate="fake_lr", - warmup_steps=2, - start_lr=0.0, - end_lr=1.0) + lr = fluid.layers.linear_lr_warmup(learning_rate="fake_lr", + warmup_steps=2, + start_lr=0.0, + end_lr=1.0) def test_MultiStepDecay(self): with fluid.dygraph.guard(): @@ -283,8 +282,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase): self.assertAlmostEqual( right_result, fluid_result, - msg='Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'. - format(epoch, right_result, fluid_result)) + msg= + 'Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}' + .format(epoch, right_result, fluid_result)) with self.assertRaises(ValueError): lr = fluid.dygraph.MultiStepDecay(learning_rate, [30, 50, 20], @@ -315,8 +315,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase): self.assertAlmostEqual( right_result, fluid_result, - msg='Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'. - format(epoch, right_result, fluid_result)) + msg= + 'Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}' + .format(epoch, right_result, fluid_result)) with self.assertRaises(TypeError): lr = fluid.dygraph.StepDecay(learning_rate, "test", 0.1) @@ -331,8 +332,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase): scheduler = fluid.dygraph.LambdaDecay(learning_rate, lr_lambda) linear = fluid.dygraph.nn.Linear(10, 10) - adam = fluid.optimizer.Adam( - scheduler, parameter_list=linear.parameters()) + adam = fluid.optimizer.Adam(scheduler, + parameter_list=linear.parameters()) for epoch in range(30): right_result = lambda_decay(epoch, learning_rate, lr_lambda) @@ -341,14 +342,16 @@ class TestLearningRateDecayDygraph(unittest.TestCase): self.assertAlmostEqual( right_result, fluid_result, - msg='Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'. - format(epoch, right_result, fluid_result)) + msg= + 'Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}' + .format(epoch, right_result, fluid_result)) with self.assertRaises(TypeError): lr = fluid.dygraph.LambdaDecay(learning_rate, "test") class TestLearningRateDecay(unittest.TestCase): + def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -375,14 +378,15 @@ class TestLearningRateDecay(unittest.TestCase): if python_decay_fn.__name__ == 'noam_decay': step += 1 lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr]) - python_decayed_lr = python_decay_fn( - global_step=float(step), **kwargs) + python_decayed_lr = python_decay_fn(global_step=float(step), + **kwargs) self.assertAlmostEqual( python_decayed_lr, lr_val[0], - msg='Failed lr scheduler is {0}, step {1}, Python result is {2}, Fluid result is {3}'. - format(python_decay_fn.__name__, - str(step), str(python_decayed_lr), str(lr_val[0]))) + msg= + 'Failed lr scheduler is {0}, step {1}, Python result is {2}, Fluid result is {3}' + .format(python_decay_fn.__name__, str(step), + str(python_decayed_lr), str(lr_val[0]))) def test_decay(self): common_kwargs_true = { @@ -401,26 +405,31 @@ class TestLearningRateDecay(unittest.TestCase): (natural_exp_decay, layers.natural_exp_decay, common_kwargs_false), (inverse_time_decay, layers.inverse_time_decay, common_kwargs_true), (inverse_time_decay, layers.inverse_time_decay, - common_kwargs_false), (polynomial_decay, layers.polynomial_decay, { - "learning_rate": 1.0, - "decay_steps": 5, - "cycle": True - }), (polynomial_decay, layers.polynomial_decay, { - "learning_rate": 1.0, - "decay_steps": 5, - "cycle": False - }), (piecewise_decay, layers.piecewise_decay, { - "boundaries": [3, 6, 9], - "values": [0.1, 0.2, 0.3, 0.4] - }), (cosine_decay, layers.cosine_decay, { - "learning_rate": 0.1, - "step_each_epoch": 100, - "epochs": 120 - }), (noam_decay, layers.noam_decay, { - "d_model": 0.01, - "warmup_steps": 200, - "learning_rate": 2.0 - }) + common_kwargs_false), + (polynomial_decay, layers.polynomial_decay, { + "learning_rate": 1.0, + "decay_steps": 5, + "cycle": True + }), + (polynomial_decay, layers.polynomial_decay, { + "learning_rate": 1.0, + "decay_steps": 5, + "cycle": False + }), + (piecewise_decay, layers.piecewise_decay, { + "boundaries": [3, 6, 9], + "values": [0.1, 0.2, 0.3, 0.4] + }), + (cosine_decay, layers.cosine_decay, { + "learning_rate": 0.1, + "step_each_epoch": 100, + "epochs": 120 + }), + (noam_decay, layers.noam_decay, { + "d_model": 0.01, + "warmup_steps": 200, + "learning_rate": 2.0 + }) ] for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: @@ -433,6 +442,7 @@ class TestLearningRateDecay(unittest.TestCase): class TestLinearWamrupLearningRateDecay(unittest.TestCase): + def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn, kwargs): main_prog = fluid.Program() @@ -443,8 +453,8 @@ class TestLinearWamrupLearningRateDecay(unittest.TestCase): end_lr = 0.1 with fluid.program_guard(main_prog, startup_prog): - decayed_lr = layers.linear_lr_warmup( - fluid_decay_fn(**kwargs), warmup_steps, start_lr, end_lr) + decayed_lr = layers.linear_lr_warmup(fluid_decay_fn(**kwargs), + warmup_steps, start_lr, end_lr) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -456,20 +466,22 @@ class TestLinearWamrupLearningRateDecay(unittest.TestCase): step += 1 lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr]) if step < warmup_steps: - python_decayed_lr = linear_lr_warmup( - float(step), warmup_steps, start_lr, end_lr) + python_decayed_lr = linear_lr_warmup(float(step), warmup_steps, + start_lr, end_lr) else: - python_decayed_lr = python_decay_fn( - global_step=float(step), **kwargs) + python_decayed_lr = python_decay_fn(global_step=float(step), + **kwargs) self.assertAlmostEqual( python_decayed_lr, lr_val[0], - msg='Test {0} Failed, step {1}, Python result is {2}, Fluid result is {3}'. - format(python_decay_fn.__name__, - str(step), str(python_decayed_lr), str(lr_val[0]))) + msg= + 'Test {0} Failed, step {1}, Python result is {2}, Fluid result is {3}' + .format(python_decay_fn.__name__, str(step), + str(python_decayed_lr), str(lr_val[0]))) class TestLinearWamrupLearningRateDecayWithScalarInput(unittest.TestCase): + def run_scalar_lr(self, place, lr, start_lr, end_lr): main_prog = fluid.Program() startup_prog = fluid.Program() @@ -486,8 +498,8 @@ class TestLinearWamrupLearningRateDecayWithScalarInput(unittest.TestCase): for step in range(20): lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr]) if step < warmup_steps: - expected_lr = linear_lr_warmup( - float(step), warmup_steps, start_lr, end_lr) + expected_lr = linear_lr_warmup(float(step), warmup_steps, + start_lr, end_lr) else: expected_lr = lr self.assertAlmostEqual( @@ -497,6 +509,7 @@ class TestLinearWamrupLearningRateDecayWithScalarInput(unittest.TestCase): step, expected_lr, lr_val[0])) def test_scalar_lr(self): + def run_places(lr, start_lr, end_lr): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_lerp_op.py b/python/paddle/fluid/tests/unittests/test_lerp_op.py index 10ab2610a26..0af6e46c73d 100644 --- a/python/paddle/fluid/tests/unittests/test_lerp_op.py +++ b/python/paddle/fluid/tests/unittests/test_lerp_op.py @@ -25,6 +25,7 @@ np.random.seed(0) class TestLerp(OpTest): + def setUp(self): self.op_type = "lerp" self.python_api = paddle.lerp @@ -50,31 +51,37 @@ class TestLerp(OpTest): class TestLerpWithDim2(TestLerp): + def init_shape(self): self.shape = [2, 50] class TestLerpWithDim3(TestLerp): + def init_shape(self): self.shape = [2, 2, 25] class TestLerpWithDim4(TestLerp): + def init_shape(self): self.shape = [2, 2, 5, 5] class TestLerpWithDim5(TestLerp): + def init_shape(self): self.shape = [2, 1, 2, 5, 5] class TestLerpWithDim6(TestLerp): + def init_shape(self): self.shape = [2, 1, 2, 5, 1, 5] class TestLerpAPI(unittest.TestCase): + def init_dtype(self): self.dtype = 'float32' @@ -108,6 +115,7 @@ class TestLerpAPI(unittest.TestCase): run(place) def test_dygraph_api(self): + def run(place): paddle.disable_static(place) x = paddle.to_tensor(self.x) @@ -121,6 +129,7 @@ class TestLerpAPI(unittest.TestCase): run(place) def test_inplace_api(self): + def run(place): paddle.disable_static(place) x = paddle.to_tensor(self.x) @@ -133,6 +142,7 @@ class TestLerpAPI(unittest.TestCase): run(place) def test_inplace_api_exception(self): + def run(place): paddle.disable_static(place) x = paddle.to_tensor(self.x) @@ -159,8 +169,8 @@ class TestLerpAPI(unittest.TestCase): x = np.arange(11., 21.).astype(self.dtype).reshape([2, 5]) y = np.full(20, 7.5).astype(self.dtype).reshape([2, 2, 5]) w = np.full(40, 0.225).astype(self.dtype).reshape([2, 2, 2, 5]) - out = paddle.lerp( - paddle.to_tensor(x), paddle.to_tensor(y), paddle.to_tensor(w)) + out = paddle.lerp(paddle.to_tensor(x), paddle.to_tensor(y), + paddle.to_tensor(w)) res_ref = x + w * (y - x) self.assertEqual(np.allclose(res_ref, out.numpy()), True) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_lgamma_op.py b/python/paddle/fluid/tests/unittests/test_lgamma_op.py index 8e9edab55ba..cb6b031eb98 100644 --- a/python/paddle/fluid/tests/unittests/test_lgamma_op.py +++ b/python/paddle/fluid/tests/unittests/test_lgamma_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ paddle.enable_static() class TestLgammaOp(OpTest): + def setUp(self): self.op_type = 'lgamma' self.python_api = paddle.lgamma @@ -46,12 +47,15 @@ class TestLgammaOp(OpTest): class TestLgammaOpFp32(TestLgammaOp): + def init_dtype_type(self): self.dtype = np.float32 def test_check_grad_normal(self): - self.check_grad( - ['X'], 'Out', numeric_grad_delta=0.005, check_eager=True) + self.check_grad(['X'], + 'Out', + numeric_grad_delta=0.005, + check_eager=True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py b/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py index d273185ad18..3e604d25657 100644 --- a/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py +++ b/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -46,9 +46,12 @@ def all_close(exp, out, n_worker): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestLimitByCapacityInt64API(unittest.TestCase): + def init_test_case(self): - self.expert_count = np.random.randint( - 0, 1000, size=(len(self.capacity) * self.n_worker)) + self.expert_count = np.random.randint(0, + 1000, + size=(len(self.capacity) * + self.n_worker)) self.out = limit_by_capacity(self.expert_count, self.capacity, self.n_worker) self.expert_count = self.expert_count.astype("int64") @@ -63,8 +66,9 @@ class TestLimitByCapacityInt64API(unittest.TestCase): def test_static_api(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - capacity = paddle.static.data( - 'capacity', shape=self.capacity.shape, dtype="int64") + capacity = paddle.static.data('capacity', + shape=self.capacity.shape, + dtype="int64") expert_count_tensor = paddle.static.data( 'ExpertCount', shape=self.expert_count.shape, dtype="int64") out = utils._limit_by_capacity(expert_count_tensor, capacity, @@ -95,6 +99,7 @@ class TestLimitByCapacityInt64API(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestLimitByCapacityInt64API_SmallWorker(TestLimitByCapacityInt64API): + def setUp(self): self.capacity = np.array([100, 12000, 1200, 0, 4700, 1000, 57, 200]) self.n_worker = 1 diff --git a/python/paddle/fluid/tests/unittests/test_linalg_cond.py b/python/paddle/fluid/tests/unittests/test_linalg_cond.py index 42fb2fbc578..74b50c11ce4 100644 --- a/python/paddle/fluid/tests/unittests/test_linalg_cond.py +++ b/python/paddle/fluid/tests/unittests/test_linalg_cond.py @@ -33,8 +33,9 @@ def test_static_assert_true(self, x_list, p_list): exe = static.Executor() result = exe.run(feed={"X": x}, fetch_list=[output]) expected_output = np.linalg.cond(x, p) - np.testing.assert_allclose( - result[0], expected_output, rtol=5e-5) + np.testing.assert_allclose(result[0], + expected_output, + rtol=5e-5) def test_dygraph_assert_true(self, x_list, p_list): @@ -43,8 +44,9 @@ def test_dygraph_assert_true(self, x_list, p_list): input_tensor = paddle.to_tensor(x) output = paddle.linalg.cond(input_tensor, p) expected_output = np.linalg.cond(x, p) - np.testing.assert_allclose( - output.numpy(), expected_output, rtol=5e-5) + np.testing.assert_allclose(output.numpy(), + expected_output, + rtol=5e-5) def gen_input(): @@ -81,6 +83,7 @@ def gen_empty_input(): class API_TestStaticCond(unittest.TestCase): + def test_out(self): paddle.enable_static() # test calling results of 'cond' in static mode @@ -90,6 +93,7 @@ class API_TestStaticCond(unittest.TestCase): class API_TestDygraphCond(unittest.TestCase): + def func_out(self): paddle.disable_static() # test calling results of 'cond' in dynamic mode @@ -104,6 +108,7 @@ class API_TestDygraphCond(unittest.TestCase): class TestCondAPIError(unittest.TestCase): + def func_dygraph_api_error(self): paddle.disable_static() # test raising errors when 'cond' is called in dygraph mode @@ -160,6 +165,7 @@ class TestCondAPIError(unittest.TestCase): class TestCondEmptyTensorInput(unittest.TestCase): + def func_dygraph_empty_tensor_input(self): paddle.disable_static() # test calling results of 'cond' when input is an empty tensor in dynamic mode diff --git a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py index 59ac2e28087..07729ae4e79 100644 --- a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py +++ b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core class LinalgLstsqTestCase(unittest.TestCase): + def setUp(self): self.devices = ["cpu"] self.init_config() @@ -45,8 +46,9 @@ class LinalgLstsqTestCase(unittest.TestCase): def generate_output(self): if len(self._input_shape_1) == 2: - out = np.linalg.lstsq( - self._input_data_1, self._input_data_2, rcond=self.rcond) + out = np.linalg.lstsq(self._input_data_1, + self._input_data_2, + rcond=self.rcond) self._output_solution = out[0] self._output_residuals = out[1] self._output_rank = out[2] @@ -57,10 +59,9 @@ class LinalgLstsqTestCase(unittest.TestCase): self._output_rank = [] self._output_sg_values = [] for i in range(self._input_shape_1[0]): - out = np.linalg.lstsq( - self._input_data_1[i], - self._input_data_2[i], - rcond=self.rcond) + out = np.linalg.lstsq(self._input_data_1[i], + self._input_data_2[i], + rcond=self.rcond) self._output_solution.append(out[0]) self._output_residuals.append(out[1]) self._output_rank.append(out[2]) @@ -71,12 +72,16 @@ class LinalgLstsqTestCase(unittest.TestCase): for dev in self.devices: paddle.set_device(dev) place = paddle.CPUPlace() if dev == "cpu" else paddle.CUDAPlace(0) - x = paddle.to_tensor( - self._input_data_1, place=place, dtype=self.dtype) - y = paddle.to_tensor( - self._input_data_2, place=place, dtype=self.dtype) - results = paddle.linalg.lstsq( - x, y, rcond=self.rcond, driver=self.driver) + x = paddle.to_tensor(self._input_data_1, + place=place, + dtype=self.dtype) + y = paddle.to_tensor(self._input_data_2, + place=place, + dtype=self.dtype) + results = paddle.linalg.lstsq(x, + y, + rcond=self.rcond, + driver=self.driver) self._result_solution = results[0].numpy() self._result_residuals = results[1].numpy() self._result_rank = results[2].numpy() @@ -89,22 +94,23 @@ class LinalgLstsqTestCase(unittest.TestCase): paddle.set_device(dev) place = fluid.CPUPlace() if dev == "cpu" else fluid.CUDAPlace(0) with fluid.program_guard(fluid.Program(), fluid.Program()): - x = paddle.fluid.data( - name="x", - shape=self._input_shape_1, - dtype=self._input_data_1.dtype) - y = paddle.fluid.data( - name="y", - shape=self._input_shape_2, - dtype=self._input_data_2.dtype) - results = paddle.linalg.lstsq( - x, y, rcond=self.rcond, driver=self.driver) + x = paddle.fluid.data(name="x", + shape=self._input_shape_1, + dtype=self._input_data_1.dtype) + y = paddle.fluid.data(name="y", + shape=self._input_shape_2, + dtype=self._input_data_2.dtype) + results = paddle.linalg.lstsq(x, + y, + rcond=self.rcond, + driver=self.driver) exe = fluid.Executor(place) - fetches = exe.run( - fluid.default_main_program(), - feed={"x": self._input_data_1, - "y": self._input_data_2}, - fetch_list=[results]) + fetches = exe.run(fluid.default_main_program(), + feed={ + "x": self._input_data_1, + "y": self._input_data_2 + }, + fetch_list=[results]) self._result_solution = fetches[0] self._result_residuals = fetches[1] self._result_rank = fetches[2] @@ -113,41 +119,44 @@ class LinalgLstsqTestCase(unittest.TestCase): def assert_np_close(self): if len(self._input_shape_1) == 2: - np.testing.assert_allclose( - self._result_solution, self._output_solution, rtol=1e-3) + np.testing.assert_allclose(self._result_solution, + self._output_solution, + rtol=1e-3) if self._input_shape_1[-2] > self._input_shape_1[ -1] and self._output_rank == self._input_shape_1[-1]: - np.testing.assert_allclose( - self._result_residuals, self._output_residuals, rtol=1e-5) + np.testing.assert_allclose(self._result_residuals, + self._output_residuals, + rtol=1e-5) if self.driver in ("gelsy", "gelsd", "gelss"): - np.testing.assert_allclose( - self._result_rank, self._output_rank, rtol=1e-5) + np.testing.assert_allclose(self._result_rank, + self._output_rank, + rtol=1e-5) if self.driver in ("gelsd", "gelss"): - np.testing.assert_allclose( - self._result_sg_values, self._output_sg_values, rtol=1e-5) + np.testing.assert_allclose(self._result_sg_values, + self._output_sg_values, + rtol=1e-5) else: for i in range(len(self._output_solution)): - np.testing.assert_allclose( - self._result_solution[i], - self._output_solution[i], - rtol=1e-3) + np.testing.assert_allclose(self._result_solution[i], + self._output_solution[i], + rtol=1e-3) if self._input_shape_1[-2] > self._input_shape_1[ -1] and self._output_rank[i] == self._input_shape_1[-1]: - np.testing.assert_allclose( - self._result_residuals[i], - self._output_residuals[i], - rtol=1e-5) + np.testing.assert_allclose(self._result_residuals[i], + self._output_residuals[i], + rtol=1e-5) if self.driver in ("gelsy", "gelsd", "gelss"): - np.testing.assert_allclose( - self._result_rank[i], self._output_rank[i], rtol=1e-5) + np.testing.assert_allclose(self._result_rank[i], + self._output_rank[i], + rtol=1e-5) if self.driver in ("gelsd", "gelss"): - np.testing.assert_allclose( - self._result_sg_values[i], - self._output_sg_values[i], - rtol=1e-5) + np.testing.assert_allclose(self._result_sg_values[i], + self._output_sg_values[i], + rtol=1e-5) class LinalgLstsqTestCase1(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float32' self.rcond = 1e-15 @@ -157,6 +166,7 @@ class LinalgLstsqTestCase1(LinalgLstsqTestCase): class LinalgLstsqTestCase2(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float64' self.rcond = 1e-15 @@ -166,6 +176,7 @@ class LinalgLstsqTestCase2(LinalgLstsqTestCase): class LinalgLstsqTestCaseRcond(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float64' self.rcond = 1e-7 @@ -175,6 +186,7 @@ class LinalgLstsqTestCaseRcond(LinalgLstsqTestCase): class LinalgLstsqTestCaseGelsFloat32(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float32' self.rcond = None @@ -184,6 +196,7 @@ class LinalgLstsqTestCaseGelsFloat32(LinalgLstsqTestCase): class LinalgLstsqTestCaseGelssFloat64(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float64' self.rcond = None @@ -193,6 +206,7 @@ class LinalgLstsqTestCaseGelssFloat64(LinalgLstsqTestCase): class LinalgLstsqTestCaseGelsyFloat32(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float32' self.rcond = 1e-15 @@ -202,6 +216,7 @@ class LinalgLstsqTestCaseGelsyFloat32(LinalgLstsqTestCase): class LinalgLstsqTestCaseBatch1(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float32' self.rcond = 1e-15 @@ -211,6 +226,7 @@ class LinalgLstsqTestCaseBatch1(LinalgLstsqTestCase): class LinalgLstsqTestCaseBatch2(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float64' self.rcond = 1e-15 @@ -220,6 +236,7 @@ class LinalgLstsqTestCaseBatch2(LinalgLstsqTestCase): class LinalgLstsqTestCaseLarge1(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float64' self.rcond = 1e-15 @@ -229,6 +246,7 @@ class LinalgLstsqTestCaseLarge1(LinalgLstsqTestCase): class LinalgLstsqTestCaseLarge2(LinalgLstsqTestCase): + def init_config(self): self.dtype = 'float64' self.rcond = 1e-15 diff --git a/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py b/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py index 8d0a34009d6..5d3c1ff9641 100644 --- a/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py +++ b/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py @@ -26,6 +26,7 @@ from decorator_helper import prog_scope class LinalgPinvTestCase(unittest.TestCase): + def setUp(self): self.init_config() self.generate_input() @@ -53,8 +54,9 @@ class LinalgPinvTestCase(unittest.TestCase): for place in self.places: paddle.disable_static(place) x = paddle.to_tensor(self._input_data, place=place) - out = paddle.linalg.pinv( - x, rcond=self.rcond, hermitian=self.hermitian).numpy() + out = paddle.linalg.pinv(x, + rcond=self.rcond, + hermitian=self.hermitian).numpy() if (np.abs(out - self._output_data) < 1e-6).any(): pass else: @@ -69,12 +71,12 @@ class LinalgPinvTestCase(unittest.TestCase): places.append(fluid.CUDAPlace(0)) for place in places: with fluid.program_guard(fluid.Program(), fluid.Program()): - x = paddle.fluid.data( - name="input", - shape=self._input_shape, - dtype=self._input_data.dtype) - out = paddle.linalg.pinv( - x, rcond=self.rcond, hermitian=self.hermitian) + x = paddle.fluid.data(name="input", + shape=self._input_shape, + dtype=self._input_data.dtype) + out = paddle.linalg.pinv(x, + rcond=self.rcond, + hermitian=self.hermitian) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": self._input_data}, @@ -88,10 +90,12 @@ class LinalgPinvTestCase(unittest.TestCase): def test_grad(self): for place in self.places: - x = paddle.to_tensor( - self._input_data, place=place, stop_gradient=False) - out = paddle.linalg.pinv( - x, rcond=self.rcond, hermitian=self.hermitian) + x = paddle.to_tensor(self._input_data, + place=place, + stop_gradient=False) + out = paddle.linalg.pinv(x, + rcond=self.rcond, + hermitian=self.hermitian) try: out.backward() x_grad = x.grad @@ -101,6 +105,7 @@ class LinalgPinvTestCase(unittest.TestCase): class LinalgPinvTestCase1(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (4, 5) np.random.seed(123) @@ -109,6 +114,7 @@ class LinalgPinvTestCase1(LinalgPinvTestCase): class LinalgPinvTestCase2(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (5, 4) np.random.seed(123) @@ -117,6 +123,7 @@ class LinalgPinvTestCase2(LinalgPinvTestCase): class LinalgPinvTestCaseBatch1(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) @@ -125,6 +132,7 @@ class LinalgPinvTestCaseBatch1(LinalgPinvTestCase): class LinalgPinvTestCaseBatch2(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 4, 5) np.random.seed(123) @@ -133,6 +141,7 @@ class LinalgPinvTestCaseBatch2(LinalgPinvTestCase): class LinalgPinvTestCaseBatch3(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 4) np.random.seed(123) @@ -141,6 +150,7 @@ class LinalgPinvTestCaseBatch3(LinalgPinvTestCase): class LinalgPinvTestCaseBatch4(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 6, 5, 4) np.random.seed(123) @@ -149,6 +159,7 @@ class LinalgPinvTestCaseBatch4(LinalgPinvTestCase): class LinalgPinvTestCaseBatchBig(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (2, 200, 300) np.random.seed(123) @@ -157,6 +168,7 @@ class LinalgPinvTestCaseBatchBig(LinalgPinvTestCase): class LinalgPinvTestCaseFP32(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) @@ -170,6 +182,7 @@ class LinalgPinvTestCaseFP32(LinalgPinvTestCase): class LinalgPinvTestCaseRcond(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) @@ -183,6 +196,7 @@ class LinalgPinvTestCaseRcond(LinalgPinvTestCase): class LinalgPinvTestCaseHermitian1(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (5, 5) np.random.seed(123) @@ -197,6 +211,7 @@ class LinalgPinvTestCaseHermitian1(LinalgPinvTestCase): class LinalgPinvTestCaseHermitian2(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) @@ -211,6 +226,7 @@ class LinalgPinvTestCaseHermitian2(LinalgPinvTestCase): class LinalgPinvTestCaseHermitian3(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) @@ -225,6 +241,7 @@ class LinalgPinvTestCaseHermitian3(LinalgPinvTestCase): class LinalgPinvTestCaseHermitian4(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (5, 5) np.random.seed(123) @@ -238,6 +255,7 @@ class LinalgPinvTestCaseHermitian4(LinalgPinvTestCase): class LinalgPinvTestCaseHermitian5(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) @@ -251,6 +269,7 @@ class LinalgPinvTestCaseHermitian5(LinalgPinvTestCase): class LinalgPinvTestCaseHermitianFP32(LinalgPinvTestCase): + def generate_input(self): self._input_shape = (3, 5, 5) np.random.seed(123) diff --git a/python/paddle/fluid/tests/unittests/test_linear.py b/python/paddle/fluid/tests/unittests/test_linear.py index 6b00a86e3e9..b03b8866eaf 100644 --- a/python/paddle/fluid/tests/unittests/test_linear.py +++ b/python/paddle/fluid/tests/unittests/test_linear.py @@ -26,13 +26,14 @@ import paddle.fluid.initializer as I class LinearTestCase(unittest.TestCase): + def setUp(self): self.dtype = 'float32' self.input = np.ones((3, 1, 2)).astype(self.dtype) self.weight = np.ones((2, 2)).astype(self.dtype) self.bias = np.ones((2)).astype(self.dtype) - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() def functional(self, place): paddle.disable_static(place) @@ -57,8 +58,10 @@ class LinearTestCase(unittest.TestCase): trainable=False, regularizer=None, initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0)) - linear = paddle.nn.Linear( - 2, 2, weight_attr=weight_attr, bias_attr=bias_attr) + linear = paddle.nn.Linear(2, + 2, + weight_attr=weight_attr, + bias_attr=bias_attr) y = linear(input) return y.numpy() @@ -77,8 +80,10 @@ class LinearTestCase(unittest.TestCase): if not paddle.is_compiled_with_cuda(): return paddle.seed(100) - linear = paddle.nn.Linear( - 2, 3, weight_attr=paddle.nn.initializer.Normal(0, 1.)) + linear = paddle.nn.Linear(2, + 3, + weight_attr=paddle.nn.initializer.Normal( + 0, 1.)) paddle.nn.utils._stride_column(linear.weight) expect = [[1.4349908, -0.8099171, -2.64788], [-1.4981681, -1.1784115, -0.023253186]] diff --git a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py index 8a9204c73fc..45ae358f886 100755 --- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py +++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class LinearChainCrfForward(object): + def __init__(self, seq_start_positions, emission_weights, emission_row_max, emission_exps, transition_weights, transition_exps, labels): self.tag_num = emission_weights.shape[1] @@ -47,8 +48,8 @@ class LinearChainCrfForward(object): # The output of linear chain crf operator. # alpha is a memo table in dynamic programming to calculate # nomalization factor. - self.alpha = np.zeros( - (seq_start_positions[-1], self.tag_num), dtype="float64") + self.alpha = np.zeros((seq_start_positions[-1], self.tag_num), + dtype="float64") self.log_likelihood = np.zeros((self.seq_num, 1)) def _l1_norm(self, x): @@ -78,8 +79,8 @@ class LinearChainCrfForward(object): log_likelihood -= np.log(s) # calculate the nominator part. - log_likelihood += ( - self.a[label[0]] + x[0, label[0]] + self.b[label[-1]]) + log_likelihood += (self.a[label[0]] + x[0, label[0]] + + self.b[label[-1]]) for k in range(1, seq_len): log_likelihood += (x[k, label[k]] + self.w[label[k - 1], label[k]]) @@ -99,6 +100,7 @@ class LinearChainCrfForward(object): class TestLinearChainCrfOp(OpTest): + def set_test_data(self): # TODO(caoying) Fix the unittest by: add the boundary cases when # sequence lengths are 1, 2, and 3. @@ -122,8 +124,10 @@ class TestLinearChainCrfOp(OpTest): [TAG_NUM + 2, TAG_NUM]).astype("float64") transition_exps = np.exp(transition) - labels = np.random.randint( - low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64") + labels = np.random.randint(low=0, + high=TAG_NUM, + size=(seq_start_pos[-1], 1), + dtype="int64") self.inputs = { "Emission": (emission, lod), @@ -153,11 +157,13 @@ class TestLinearChainCrfOp(OpTest): self.check_grad(["Emission", "Transition"], "LogLikelihood") def test_check_grad_ignore_transition(self): - self.check_grad( - ["Emission"], "LogLikelihood", no_grad_set=set("Transition")) + self.check_grad(["Emission"], + "LogLikelihood", + no_grad_set=set("Transition")) class TestLinearChainCrfPaddingTensor(OpTest): + def seq_pad(self, data, length): max_len = np.max(length) shape = [len(length), max_len] + list(data.shape[1:]) @@ -180,7 +186,7 @@ class TestLinearChainCrfPaddingTensor(OpTest): return padded def set_test_data_1(self): - # Fix the unittest by: add padding tensor in inputs + # Fix the unittest by: add padding tensor in inputs SEQ_NUM = 3 TAG_NUM = 17 MAX_SEQ_LEN = 5 @@ -199,8 +205,10 @@ class TestLinearChainCrfPaddingTensor(OpTest): [TAG_NUM + 2, TAG_NUM]).astype("float64") transition_exps = np.exp(transition) - labels = np.random.randint( - low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64") + labels = np.random.randint(low=0, + high=TAG_NUM, + size=(seq_start_pos[-1], 1), + dtype="int64") self.inputs = { "Emission": self.seq_pad(emission, lod[0]), "Transition": transition, @@ -229,8 +237,9 @@ class TestLinearChainCrfPaddingTensor(OpTest): self.check_grad(["Emission", "Transition"], "LogLikelihood") def test_check_grad_ignore_transition(self): - self.check_grad( - ["Emission"], "LogLikelihood", no_grad_set=set("Transition")) + self.check_grad(["Emission"], + "LogLikelihood", + no_grad_set=set("Transition")) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_linear_interp_op.py b/python/paddle/fluid/tests/unittests/test_linear_interp_op.py index c9948edad00..dd44e70d92e 100755 --- a/python/paddle/fluid/tests/unittests/test_linear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_linear_interp_op.py @@ -63,8 +63,8 @@ def linear_interp_np(input, w1lambda = ratio_w * j - w w2lambda = 1.0 - w1lambda - out[:, :, j] = w2lambda * input[:, :, w] + w1lambda * input[:, :, w + - wid] + out[:, :, + j] = w2lambda * input[:, :, w] + w1lambda * input[:, :, w + wid] if data_layout == "NHWC": out = np.transpose(out, (0, 2, 1)) # NCHW => NHWC @@ -73,6 +73,7 @@ def linear_interp_np(input, class TestLinearInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -124,46 +125,58 @@ class TestLinearInterpOp(OpTest): self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 1 class TestLinearInterpOpDataLayout(TestLinearInterpOp): + def init_test_case(self): self.interp_method = 'linear' self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 1 self.data_layout = 'NHWC' class TestLinearInterpOpAlignMode(TestLinearInterpOp): + def init_test_case(self): self.interp_method = 'linear' self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 0 class TestLinearInterpOpScale(TestLinearInterpOp): + def init_test_case(self): self.interp_method = 'linear' self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0.5 - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 0 class TestLinearInterpOpSizeTensor(TestLinearInterpOp): + def setUp(self): self.out_size = None self.actual_shape = None @@ -212,56 +225,72 @@ class TestLinearInterpOpSizeTensor(TestLinearInterpOp): class TestResizeLinearAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[1, 3, 64], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[1], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[1], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") - - out1 = fluid.layers.resize_linear( - x, out_shape=[128, ], align_mode=1, align_corners=False) - out2 = fluid.layers.resize_linear( - x, out_shape=[128], align_mode=1, align_corners=False) - out3 = fluid.layers.resize_linear( - x, out_shape=shape_tensor, align_mode=1, align_corners=False) - out4 = fluid.layers.resize_linear( - x, - out_shape=[128, ], - actual_shape=actual_size, - align_mode=1, - align_corners=False) - out5 = fluid.layers.resize_linear( - x, scale=scale_tensor, align_mode=1, align_corners=False) - - out6 = interpolate( - x, - scale_factor=scale_tensor, - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') - out7 = interpolate( - x, - size=[128, ], - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') - out8 = interpolate( - x, - size=shape_tensor, - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") + + out1 = fluid.layers.resize_linear(x, + out_shape=[ + 128, + ], + align_mode=1, + align_corners=False) + out2 = fluid.layers.resize_linear(x, + out_shape=[128], + align_mode=1, + align_corners=False) + out3 = fluid.layers.resize_linear(x, + out_shape=shape_tensor, + align_mode=1, + align_corners=False) + out4 = fluid.layers.resize_linear(x, + out_shape=[ + 128, + ], + actual_shape=actual_size, + align_mode=1, + align_corners=False) + out5 = fluid.layers.resize_linear(x, + scale=scale_tensor, + align_mode=1, + align_corners=False) + + out6 = interpolate(x, + scale_factor=scale_tensor, + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') + out7 = interpolate(x, + size=[ + 128, + ], + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') + out8 = interpolate(x, + size=shape_tensor, + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') x_data = np.random.random((1, 3, 64)).astype("float32") dim_data = np.array([128]).astype("int32") - shape_data = np.array([128, ]).astype("int32") - actual_size_data = np.array([128, ]).astype("int32") + shape_data = np.array([ + 128, + ]).astype("int32") + actual_size_data = np.array([ + 128, + ]).astype("int32") scale_data = np.array([2.0]).astype("float32") if core.is_compiled_with_cuda(): @@ -282,34 +311,41 @@ class TestResizeLinearAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5, out6, out7, out8], return_numpy=True) - expect_res = linear_interp_np( - x_data, out_w=128, align_mode=1, align_corners=False) + expect_res = linear_interp_np(x_data, + out_w=128, + align_mode=1, + align_corners=False) for res in results: self.assertTrue(np.allclose(res, expect_res)) class TestLinearInterpOpAPI2_0(unittest.TestCase): + def test_case(self): - # dygraph + # dygraph x_data = np.random.random((1, 3, 128)).astype("float32") - us_1 = paddle.nn.Upsample( - size=[64, ], - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') + us_1 = paddle.nn.Upsample(size=[ + 64, + ], + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') with fluid.dygraph.guard(): x = fluid.dygraph.to_variable(x_data) interp = us_1(x) - expect = linear_interp_np( - x_data, out_w=64, align_mode=1, align_corners=False) + expect = linear_interp_np(x_data, + out_w=64, + align_mode=1, + align_corners=False) self.assertTrue(np.allclose(interp.numpy(), expect)) class TestResizeLinearOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -349,30 +385,41 @@ class TestResizeLinearOpUint8(OpTest): self.input_shape = [2, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = True self.align_mode = 1 class TestLinearInterpOpException(unittest.TestCase): + def test_exception(self): + def input_shape_error(): x1 = fluid.data(name="x1", shape=[1], dtype="float32") - out = fluid.layers.resize_linear( - x1, out_shape=[256, ], data_format='NCW') + out = fluid.layers.resize_linear(x1, + out_shape=[ + 256, + ], + data_format='NCW') def data_format_error(): x2 = fluid.data(name="x2", shape=[1, 3, 128], dtype="float32") - out = fluid.layers.resize_linear( - x2, out_shape=[256, ], data_format='NHWCD') + out = fluid.layers.resize_linear(x2, + out_shape=[ + 256, + ], + data_format='NHWCD') def out_shape_error(): x3 = fluid.data(name="x3", shape=[1, 3, 128], dtype="float32") - out = fluid.layers.resize_linear( - x3, out_shape=[ - 256, - 256, - ], data_format='NHWC') + out = fluid.layers.resize_linear(x3, + out_shape=[ + 256, + 256, + ], + data_format='NHWC') self.assertRaises(ValueError, input_shape_error) self.assertRaises(ValueError, data_format_error) @@ -380,28 +427,36 @@ class TestLinearInterpOpException(unittest.TestCase): class TestLinearInterpOpError(unittest.TestCase): + def test_error(self): with program_guard(Program(), Program()): def input_shape_error(): x1 = fluid.data(name="x1", shape=[1], dtype="float32") - out1 = paddle.nn.Upsample( - size=[256, ], data_format='NCW', mode='linear') + out1 = paddle.nn.Upsample(size=[ + 256, + ], + data_format='NCW', + mode='linear') out1_res = out1(x1) def data_format_error(): x2 = fluid.data(name="x2", shape=[1, 3, 128], dtype="float32") - out2 = paddle.nn.Upsample( - size=[256, ], data_format='NHWCD', mode='linear') + out2 = paddle.nn.Upsample(size=[ + 256, + ], + data_format='NHWCD', + mode='linear') out2_res = out2(x2) def out_shape_error(): x3 = fluid.data(name="x3", shape=[1, 3, 128], dtype="float32") - out3 = paddle.nn.Upsample( - size=[ - 256, - 256, - ], data_format='NHWC', mode='linear') + out3 = paddle.nn.Upsample(size=[ + 256, + 256, + ], + data_format='NHWC', + mode='linear') out3_res = out3(x3) self.assertRaises(ValueError, input_shape_error) diff --git a/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py index b34989f5f5c..69d652299be 100755 --- a/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py @@ -67,8 +67,8 @@ def linear_interp_np(input, w1lambda = ratio_w * j - w w2lambda = 1.0 - w1lambda - out[:, :, j] = w2lambda * input[:, :, w] + w1lambda * input[:, :, w + - wid] + out[:, :, + j] = w2lambda * input[:, :, w] + w1lambda * input[:, :, w + wid] if data_layout == "NHWC": out = np.transpose(out, (0, 2, 1)) # NCHW => NHWC @@ -77,6 +77,7 @@ def linear_interp_np(input, class TestLinearInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -136,46 +137,58 @@ class TestLinearInterpOp(OpTest): self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 1 class TestLinearInterpOpDataLayout(TestLinearInterpOp): + def init_test_case(self): self.interp_method = 'linear' self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 1 self.data_layout = 'NHWC' class TestLinearInterpOpAlignMode(TestLinearInterpOp): + def init_test_case(self): self.interp_method = 'linear' self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 0 class TestLinearInterpOpScale(TestLinearInterpOp): + def init_test_case(self): self.interp_method = 'linear' self.input_shape = [1, 3, 100] self.out_w = 50 self.scale = 0.5 - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = False self.align_mode = 0 class TestLinearInterpOpSizeTensor(TestLinearInterpOp): + def setUp(self): self.out_size = None self.actual_shape = None @@ -233,56 +246,72 @@ class TestLinearInterpOpSizeTensor(TestLinearInterpOp): class TestResizeLinearAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[1, 3, 64], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[1], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[1], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") - - out1 = fluid.layers.resize_linear( - x, out_shape=[128, ], align_mode=1, align_corners=False) - out2 = fluid.layers.resize_linear( - x, out_shape=[128], align_mode=1, align_corners=False) - out3 = fluid.layers.resize_linear( - x, out_shape=shape_tensor, align_mode=1, align_corners=False) - out4 = fluid.layers.resize_linear( - x, - out_shape=[128, ], - actual_shape=actual_size, - align_mode=1, - align_corners=False) - out5 = fluid.layers.resize_linear( - x, scale=scale_tensor, align_mode=1, align_corners=False) - - out6 = interpolate( - x, - scale_factor=scale_tensor, - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') - out7 = interpolate( - x, - size=[128, ], - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') - out8 = interpolate( - x, - size=shape_tensor, - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") + + out1 = fluid.layers.resize_linear(x, + out_shape=[ + 128, + ], + align_mode=1, + align_corners=False) + out2 = fluid.layers.resize_linear(x, + out_shape=[128], + align_mode=1, + align_corners=False) + out3 = fluid.layers.resize_linear(x, + out_shape=shape_tensor, + align_mode=1, + align_corners=False) + out4 = fluid.layers.resize_linear(x, + out_shape=[ + 128, + ], + actual_shape=actual_size, + align_mode=1, + align_corners=False) + out5 = fluid.layers.resize_linear(x, + scale=scale_tensor, + align_mode=1, + align_corners=False) + + out6 = interpolate(x, + scale_factor=scale_tensor, + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') + out7 = interpolate(x, + size=[ + 128, + ], + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') + out8 = interpolate(x, + size=shape_tensor, + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') x_data = np.random.random((1, 3, 64)).astype("float32") dim_data = np.array([128]).astype("int32") - shape_data = np.array([128, ]).astype("int32") - actual_size_data = np.array([128, ]).astype("int32") + shape_data = np.array([ + 128, + ]).astype("int32") + actual_size_data = np.array([ + 128, + ]).astype("int32") scale_data = np.array([2.0]).astype("float32") if core.is_compiled_with_cuda(): @@ -303,34 +332,41 @@ class TestResizeLinearAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5, out6, out7, out8], return_numpy=True) - expect_res = linear_interp_np( - x_data, out_w=128, align_mode=1, align_corners=False) + expect_res = linear_interp_np(x_data, + out_w=128, + align_mode=1, + align_corners=False) for res in results: self.assertTrue(np.allclose(res, expect_res)) class TestLinearInterpOpAPI2_0(unittest.TestCase): + def test_case(self): - # dygraph + # dygraph x_data = np.random.random((1, 3, 128)).astype("float32") - us_1 = paddle.nn.Upsample( - size=[64, ], - mode='linear', - align_mode=1, - align_corners=False, - data_format='NCW') + us_1 = paddle.nn.Upsample(size=[ + 64, + ], + mode='linear', + align_mode=1, + align_corners=False, + data_format='NCW') with fluid.dygraph.guard(): x = fluid.dygraph.to_variable(x_data) interp = us_1(x) - expect = linear_interp_np( - x_data, out_w=64, align_mode=1, align_corners=False) + expect = linear_interp_np(x_data, + out_w=64, + align_mode=1, + align_corners=False) self.assertTrue(np.allclose(interp.numpy(), expect)) class TestResizeLinearOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -379,30 +415,41 @@ class TestResizeLinearOpUint8(OpTest): self.input_shape = [2, 3, 100] self.out_w = 50 self.scale = 0. - self.out_size = np.array([50, ]).astype("int32") + self.out_size = np.array([ + 50, + ]).astype("int32") self.align_corners = True self.align_mode = 1 class TestLinearInterpOpException(unittest.TestCase): + def test_exception(self): + def input_shape_error(): x1 = fluid.data(name="x1", shape=[1], dtype="float32") - out = fluid.layers.resize_linear( - x1, out_shape=[256, ], data_format='NCW') + out = fluid.layers.resize_linear(x1, + out_shape=[ + 256, + ], + data_format='NCW') def data_format_error(): x2 = fluid.data(name="x2", shape=[1, 3, 128], dtype="float32") - out = fluid.layers.resize_linear( - x2, out_shape=[256, ], data_format='NHWCD') + out = fluid.layers.resize_linear(x2, + out_shape=[ + 256, + ], + data_format='NHWCD') def out_shape_error(): x3 = fluid.data(name="x3", shape=[1, 3, 128], dtype="float32") - out = fluid.layers.resize_linear( - x3, out_shape=[ - 256, - 256, - ], data_format='NHWC') + out = fluid.layers.resize_linear(x3, + out_shape=[ + 256, + 256, + ], + data_format='NHWC') self.assertRaises(ValueError, input_shape_error) self.assertRaises(ValueError, data_format_error) @@ -410,28 +457,36 @@ class TestLinearInterpOpException(unittest.TestCase): class TestLinearInterpOpError(unittest.TestCase): + def test_error(self): with program_guard(Program(), Program()): def input_shape_error(): x1 = fluid.data(name="x1", shape=[1], dtype="float32") - out1 = paddle.nn.Upsample( - size=[256, ], data_format='NCW', mode='linear') + out1 = paddle.nn.Upsample(size=[ + 256, + ], + data_format='NCW', + mode='linear') out1_res = out1(x1) def data_format_error(): x2 = fluid.data(name="x2", shape=[1, 3, 128], dtype="float32") - out2 = paddle.nn.Upsample( - size=[256, ], data_format='NHWCD', mode='linear') + out2 = paddle.nn.Upsample(size=[ + 256, + ], + data_format='NHWCD', + mode='linear') out2_res = out2(x2) def out_shape_error(): x3 = fluid.data(name="x3", shape=[1, 3, 128], dtype="float32") - out3 = paddle.nn.Upsample( - size=[ - 256, - 256, - ], data_format='NHWC', mode='linear') + out3 = paddle.nn.Upsample(size=[ + 256, + 256, + ], + data_format='NHWC', + mode='linear') out3_res = out3(x3) self.assertRaises(ValueError, input_shape_error) diff --git a/python/paddle/fluid/tests/unittests/test_linspace.py b/python/paddle/fluid/tests/unittests/test_linspace.py index 65a6c21fb07..e22fb537f45 100644 --- a/python/paddle/fluid/tests/unittests/test_linspace.py +++ b/python/paddle/fluid/tests/unittests/test_linspace.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestLinspaceOpCommonCase(OpTest): + def setUp(self): self.op_type = "linspace" self.python_api = paddle.linspace @@ -43,6 +44,7 @@ class TestLinspaceOpCommonCase(OpTest): class TestLinspaceOpReverseCase(OpTest): + def setUp(self): self.op_type = "linspace" self.python_api = paddle.linspace @@ -61,6 +63,7 @@ class TestLinspaceOpReverseCase(OpTest): class TestLinspaceOpNumOneCase(OpTest): + def setUp(self): self.op_type = "linspace" self.python_api = paddle.linspace @@ -79,6 +82,7 @@ class TestLinspaceOpNumOneCase(OpTest): class TestLinspaceAPI(unittest.TestCase): + def test_variable_input1(self): start = paddle.full(shape=[1], fill_value=0, dtype='float32') stop = paddle.full(shape=[1], fill_value=10, dtype='float32') @@ -110,8 +114,11 @@ class TestLinspaceAPI(unittest.TestCase): def test_name(self): with paddle.static.program_guard(paddle.static.Program()): - out = paddle.linspace( - 0, 10, 5, dtype='float32', name='linspace_res') + out = paddle.linspace(0, + 10, + 5, + dtype='float32', + name='linspace_res') assert 'linspace_res' in out.name def test_imperative(self): @@ -134,6 +141,7 @@ class TestLinspaceAPI(unittest.TestCase): class TestLinspaceOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py index 23c4bc7b978..0d328034ab7 100644 --- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py +++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py @@ -54,11 +54,10 @@ def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id): config = fluid.DistributeTranspilerConfig() config.sync_mode = sync_mode t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - pservers=pserver_endpoints, - trainers=trainers, - sync_mode=sync_mode) + t.transpile(trainer_id, + pservers=pserver_endpoints, + trainers=trainers, + sync_mode=sync_mode) pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) @@ -92,11 +91,10 @@ def run_pserver_with_empty_block(use_cuda, sync_mode, ip, port, trainers, config.slice_var_up = False t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - pservers=pserver_endpoints, - trainers=trainers, - sync_mode=sync_mode) + t.transpile(trainer_id, + pservers=pserver_endpoints, + trainers=trainers, + sync_mode=sync_mode) pserver_prog = t.get_pserver_program(ps2) # pserver2 have no parameter @@ -114,6 +112,7 @@ def gen_complete_file_flag(flag_file): class TestListenAndServOp(unittest.TestCase): + def setUp(self): self.ps_timeout = 200 self.ip = "127.0.0.1" @@ -122,10 +121,9 @@ class TestListenAndServOp(unittest.TestCase): self.trainer_id = 0 def _start_pserver(self, use_cuda, sync_mode, pserver_func): - p = Process( - target=pserver_func, - args=(use_cuda, sync_mode, self.ip, self.port, self.trainers, - self.trainer_id)) + p = Process(target=pserver_func, + args=(use_cuda, sync_mode, self.ip, self.port, + self.trainers, self.trainer_id)) p.daemon = True p.start() return p diff --git a/python/paddle/fluid/tests/unittests/test_load_op.py b/python/paddle/fluid/tests/unittests/test_load_op.py index 885c26e2be0..2896ff218c7 100644 --- a/python/paddle/fluid/tests/unittests/test_load_op.py +++ b/python/paddle/fluid/tests/unittests/test_load_op.py @@ -40,8 +40,9 @@ class TestLoadOp(unittest.TestCase): self.ones))) exe = fluid.Executor(fluid.CPUPlace()) exe.run(start_prog) - fluid.io.save_persistables( - exe, dirname="./model", main_program=main_prog) + fluid.io.save_persistables(exe, + dirname="./model", + main_program=main_prog) def test_load(self): main_prog = fluid.Program() diff --git a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py index a5af6871be4..f3c8024a21a 100644 --- a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py @@ -43,8 +43,9 @@ class TestLoadOpXpu(unittest.TestCase): self.ones))) exe = fluid.Executor(fluid.XPUPlace(0)) exe.run(start_prog) - fluid.io.save_persistables( - exe, dirname="./model", main_program=main_prog) + fluid.io.save_persistables(exe, + dirname="./model", + main_program=main_prog) def test_load_xpu(self): main_prog = fluid.Program() diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py index 35ad6fdb30e..ac88b796054 100644 --- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -26,21 +26,19 @@ from test_imperative_base import new_program_scope def convolutional_neural_network(img): - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') return prediction @@ -58,6 +56,7 @@ def static_train_net(img, label): class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): + def setUp(self): self.seed = 90 self.epoch_num = 1 @@ -71,24 +70,24 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): startup_program = fluid.default_startup_program() main_program = fluid.default_main_program() - img = fluid.data( - name='img', shape=[None, 1, 28, 28], dtype='float32') + img = fluid.data(name='img', + shape=[None, 1, 28, 28], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') prediction, avg_loss = static_train_net(img, label) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) exe.run(startup_program) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=100), - batch_size=self.batch_size) + train_reader = paddle.batch(paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=100), + batch_size=self.batch_size) for _ in range(0, self.epoch_num): for batch_id, data in enumerate(train_reader()): @@ -105,8 +104,9 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): param.name) if only_params: - fluid.io.save_params( - exe, self.save_dirname, filename=self.params_filename) + fluid.io.save_params(exe, + self.save_dirname, + filename=self.params_filename) else: fluid.io.save_inference_model( self.save_dirname, ["img"], [prediction], @@ -142,8 +142,8 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.save_dirname, model_filename=self.model_filename) self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load( - self.save_dirname, model_filename=self.model_filename) + new_load_param_dict = paddle.load(self.save_dirname, + model_filename=self.model_filename) self.check_load_state_dict(orig_param_dict, new_load_param_dict) def test_load_with_param_filename(self): @@ -156,8 +156,8 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.save_dirname, params_filename=self.params_filename) self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load( - self.save_dirname, params_filename=self.params_filename) + new_load_param_dict = paddle.load(self.save_dirname, + params_filename=self.params_filename) self.check_load_state_dict(orig_param_dict, new_load_param_dict) def test_load_with_model_and_param_filename(self): @@ -172,10 +172,9 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): model_filename=self.model_filename) self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load( - self.save_dirname, - params_filename=self.params_filename, - model_filename=self.model_filename) + new_load_param_dict = paddle.load(self.save_dirname, + params_filename=self.params_filename, + model_filename=self.model_filename) self.check_load_state_dict(orig_param_dict, new_load_param_dict) def test_load_state_dict_from_save_params(self): diff --git a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py index 3e2e778d40e..566c4929bf7 100644 --- a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py +++ b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py @@ -22,6 +22,7 @@ from paddle.fluid.executor import Executor class TestLoadVarsShapeCheck(unittest.TestCase): + def setUp(self): self.model_path = "./model_temp/" diff --git a/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py b/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py index 71e2e6fe592..54678b2f45c 100644 --- a/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py @@ -122,8 +122,9 @@ def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, else: score_index.append((scores[idx][c], c, idx)) - sorted_score_index = sorted( - score_index, key=lambda tup: tup[0], reverse=True) + sorted_score_index = sorted(score_index, + key=lambda tup: tup[0], + reverse=True) sorted_score_index = sorted_score_index[:keep_top_k] selected_indices = {} @@ -153,16 +154,15 @@ def batched_multiclass_nms(boxes, lod = [] for n in range(batch_size): - nmsed_outs, nmsed_num = multiclass_nms( - boxes[n], - scores[n], - background, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - normalized, - shared=True) + nmsed_outs, nmsed_num = multiclass_nms(boxes[n], + scores[n], + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized, + shared=True) lod.append(nmsed_num) if nmsed_num == 0: @@ -175,13 +175,15 @@ def batched_multiclass_nms(boxes, c, scores[n][c][idx], xmin, ymin, xmax, ymax, idx + n * num_boxes ]) - sorted_det_out = sorted( - tmp_det_out, key=lambda tup: tup[0], reverse=False) + sorted_det_out = sorted(tmp_det_out, + key=lambda tup: tup[0], + reverse=False) det_outs.extend(sorted_det_out) return det_outs, lod class TestLocalAwareNMSOp(OpTest): + def set_argument(self): self.score_threshold = 0.01 @@ -216,9 +218,10 @@ class TestLocalAwareNMSOp(OpTest): boxes_copy = copy.deepcopy(boxes) scores_copy = copy.deepcopy(scores) - det_outs, lod = batched_multiclass_nms( - boxes_copy, scores_copy, background, score_threshold, nms_threshold, - nms_top_k, keep_top_k) + det_outs, lod = batched_multiclass_nms(boxes_copy, scores_copy, + background, score_threshold, + nms_threshold, nms_top_k, + keep_top_k) lod = [1] if not det_outs else lod det_outs = [[-1, 0]] if not det_outs else det_outs @@ -243,11 +246,13 @@ class TestLocalAwareNMSOp(OpTest): class TestLocalAwareNMSOpNoBoxes(TestLocalAwareNMSOp): + def set_argument(self): self.score_threshold = 2.0 class TestLocalAwareNMSOp4Points(OpTest): + def set_argument(self): self.score_threshold = 0.01 @@ -267,24 +272,29 @@ class TestLocalAwareNMSOp4Points(OpTest): boxes = np.array([[[ 0.42078365, 0.58117018, 2.92776169, 3.28557757, 4.24344318, 0.92196165, 2.72370856, -1.66141214 - ], [ - 0.13856006, 1.86871034, 2.81287224, 3.61381734, 4.5505249, - 0.51766346, 2.75630304, -1.91459389 - ]], [[ - 1.57533883, 1.3217477, 3.07904942, 3.89512545, 4.78680923, - 1.96914586, 3.539482, -1.59739244 - ], [ - 0.55084125, 1.71596215, 2.52476074, 3.18940435, 5.09035159, - 0.91959482, 3.71442385, -0.57299128 - ]]]) + ], + [ + 0.13856006, 1.86871034, 2.81287224, 3.61381734, + 4.5505249, 0.51766346, 2.75630304, -1.91459389 + ]], + [[ + 1.57533883, 1.3217477, 3.07904942, 3.89512545, + 4.78680923, 1.96914586, 3.539482, -1.59739244 + ], + [ + 0.55084125, 1.71596215, 2.52476074, 3.18940435, + 5.09035159, 0.91959482, 3.71442385, -0.57299128 + ]]]) det_outs = np.array([[ 0., 1.5008917, 0.28206837, 1.2140071, 2.8712926, 3.4469104, 4.3943763, 0.7232457, 2.7397292, -1.7858533 - ], [ - 0., 1.1446586, 1.1640508, 1.4800063, 2.856528, 3.6118112, 4.908667, - 1.5478, 3.609713, -1.1861432 - ]]) + ], + [ + 0., 1.1446586, 1.1640508, 1.4800063, 2.856528, + 3.6118112, 4.908667, 1.5478, 3.609713, + -1.1861432 + ]]) lod = [1, 1] nmsed_outs = det_outs.astype('float32') @@ -308,28 +318,31 @@ class TestLocalAwareNMSOp4Points(OpTest): class TestLocalityAwareNMSAPI(unittest.TestCase): + def test_api(self): boxes = fluid.data(name='bboxes', shape=[None, 81, 8], dtype='float32') scores = fluid.data(name='scores', shape=[None, 1, 81], dtype='float32') - fluid.layers.locality_aware_nms( - bboxes=boxes, - scores=scores, - score_threshold=0.5, - nms_top_k=400, - nms_threshold=0.3, - keep_top_k=200, - normalized=False) + fluid.layers.locality_aware_nms(bboxes=boxes, + scores=scores, + score_threshold=0.5, + nms_top_k=400, + nms_threshold=0.3, + keep_top_k=200, + normalized=False) class TestLocalityAwareNMSError(unittest.TestCase): + def test_error(self): boxes = fluid.data(name='bboxes', shape=[None, 81, 8], dtype='float32') scores = fluid.data(name='scores', shape=[None, 1, 81], dtype='float32') - boxes_int = fluid.data( - name='bboxes_int', shape=[None, 81, 8], dtype='int32') - scores_int = fluid.data( - name='scores_int', shape=[None, 1, 81], dtype='int32') + boxes_int = fluid.data(name='bboxes_int', + shape=[None, 81, 8], + dtype='int32') + scores_int = fluid.data(name='scores_int', + shape=[None, 1, 81], + dtype='int32') boxes_tmp = [1, 2] scores_tmp = [1, 2] @@ -361,7 +374,7 @@ class TestLocalityAwareNMSError(unittest.TestCase): scores, 0.5, 400, keep_top_k) nms_threshold = int(0) - # type of nms_threshold must be int + # type of nms_threshold must be int self.assertRaises(TypeError, fluid.layers.locality_aware_nms, boxes, scores, 0.5, 400, 200, nms_threshold) @@ -371,7 +384,7 @@ class TestLocalityAwareNMSError(unittest.TestCase): scores, 0.5, 400, 200, 0.5, nms_eta) bg_label = 1.5 - # type of background_label must be int + # type of background_label must be int self.assertRaises(TypeError, fluid.layers.locality_aware_nms, boxes, scores, 0.5, 400, 200, 0.5, 1.0, bg_label) diff --git a/python/paddle/fluid/tests/unittests/test_lod_append_op.py b/python/paddle/fluid/tests/unittests/test_lod_append_op.py index 806880bdce0..f5102b46586 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_append_op.py +++ b/python/paddle/fluid/tests/unittests/test_lod_append_op.py @@ -25,12 +25,15 @@ from paddle.fluid.backward import append_backward class TestLoDAppendAPI(unittest.TestCase): + def test_api(self, use_cuda=False): main_program = Program() with fluid.program_guard(main_program): x = fluid.layers.data(name='x', shape=[6], dtype='float32') - level = fluid.layers.data( - name='level', shape=[3], dtype='int32', lod_level=0) + level = fluid.layers.data(name='level', + shape=[3], + dtype='int32', + lod_level=0) result = fluid.layers.lod_append(x, level) x_i = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0]).astype("float32") @@ -42,14 +45,17 @@ class TestLoDAppendAPI(unittest.TestCase): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) [out] = exe.run(fluid.default_main_program(), - feed={'x': x_i, - 'level': level_i}, + feed={ + 'x': x_i, + 'level': level_i + }, fetch_list=[result], return_numpy=False) self.assertEqual(out.recursive_sequence_lengths(), [[2, 4]]) class TestLodAppendOpError(unittest.TestCase): + def test_error(self): # The input(x) must be Variable. x1 = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float64") @@ -63,8 +69,10 @@ class TestLodAppendOpError(unittest.TestCase): # Input(x) dtype must be float32 or float64 or int32 or int64 for dtype in ["bool", "float16"]: x3 = fluid.layers.data(name='x3_' + dtype, shape=[4], dtype=dtype) - level3 = fluid.layers.data( - name='level3' + dtype, shape=[4], dtype='int32', lod_level=2) + level3 = fluid.layers.data(name='level3' + dtype, + shape=[4], + dtype='int32', + lod_level=2) self.assertRaises(TypeError, fluid.layers.lod_append, x3, level3) diff --git a/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py b/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py index 353cdc5ab8b..438c6c82f38 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py +++ b/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py @@ -26,6 +26,7 @@ import numpy class TestLoDArrayLength(unittest.TestCase): + def test_array_length(self): tmp = layers.zeros(shape=[10], dtype='int32') i = layers.fill_constant(shape=[1], dtype='int64', value=10) @@ -38,6 +39,7 @@ class TestLoDArrayLength(unittest.TestCase): class TestLoDArrayLengthOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): #for ci coverage @@ -47,6 +49,7 @@ class TestLoDArrayLengthOpError(unittest.TestCase): class TestArrayLengthApi(unittest.TestCase): + def test_api(self): paddle.disable_static() diff --git a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py index d8b5c2eef37..ae1d61f3d44 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py +++ b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py @@ -23,6 +23,7 @@ import unittest class TestLoDRankTable(unittest.TestCase): + def test_lod_rank_table(self): x = data(name='x', shape=[100]) cpu = core.CPUPlace() @@ -33,8 +34,8 @@ class TestLoDRankTable(unittest.TestCase): tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) - tensor.set_recursive_sequence_lengths( - [[1, 2], [5, 1, 1], [3, 1, 5, 1, 3, 3, 1]]) + tensor.set_recursive_sequence_lengths([[1, 2], [5, 1, 1], + [3, 1, 5, 1, 3, 3, 1]]) exe.run(scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() @@ -42,6 +43,7 @@ class TestLoDRankTable(unittest.TestCase): class TestLoDRankTableError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x = numpy.random.random((2, 4)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py index 1fa172cf031..1565e0c563c 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py +++ b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py @@ -22,6 +22,7 @@ from paddle.fluid import Program, program_guard class TestLodResetOpByAttr(OpTest): + def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float64") @@ -45,6 +46,7 @@ class TestLodResetOpByAttr(OpTest): class TestLodResetOpByInput(OpTest): + def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float64") @@ -69,6 +71,7 @@ class TestLodResetOpByInput(OpTest): class TestLodResetOpBoth(OpTest): + def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float64") @@ -93,6 +96,7 @@ class TestLodResetOpBoth(OpTest): class TestLodResetOpYIsLoDTensor(OpTest): + def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float64") @@ -112,6 +116,7 @@ class TestLodResetOpYIsLoDTensor(OpTest): class TestLodAppendOpByAttr(OpTest): + def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float64") @@ -135,6 +140,7 @@ class TestLodAppendOpByAttr(OpTest): class TestLodResetOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input must be Variable. @@ -144,10 +150,13 @@ class TestLodResetOpError(unittest.TestCase): # Input(x) dtype must be float32 or float64 or int32 or int64 for dtype in ["bool", "float16"]: - x2 = fluid.layers.data( - name='x2' + dtype, shape=[4], dtype=dtype) - y2 = fluid.layers.data( - name='y2' + dtype, shape=[4], dtype='int32', lod_level=2) + x2 = fluid.layers.data(name='x2' + dtype, + shape=[4], + dtype=dtype) + y2 = fluid.layers.data(name='y2' + dtype, + shape=[4], + dtype='int32', + lod_level=2) self.assertRaises(TypeError, fluid.layers.lod_reset, x2, y2) diff --git a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py index 678e9e21197..793d0e9bf5a 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py +++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py @@ -21,6 +21,7 @@ import numpy class TestLoDTensorArray(unittest.TestCase): + def test_get_set(self): scope = core.Scope() arr = scope.var('tmp_lod_tensor_array') @@ -45,13 +46,13 @@ class TestLoDTensorArray(unittest.TestCase): t.set_recursive_sequence_lengths([[1]]) tensor_array[i] = t t = tensor_array[i] - self.assertEqual( - numpy.array(t), numpy.array( - [i + 10], dtype='float32')) + self.assertEqual(numpy.array(t), + numpy.array([i + 10], dtype='float32')) self.assertEqual([[1]], t.recursive_sequence_lengths()) class TestCreateArray(unittest.TestCase): + def setUp(self): self.place = paddle.CPUPlace() self.shapes = [[10, 4], [8, 12], [1]] diff --git a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py b/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py index 0148e15b079..2911e7a6b71 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py +++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py @@ -29,6 +29,7 @@ from paddle.fluid.layers.control_flow import array_to_lod_tensor class TestCPULoDTensorArrayOps(unittest.TestCase): + def place(self): return core.CPUPlace() @@ -41,11 +42,10 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): numpy.array(x).astype('int32') for x in [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]] ] - self.main( - tensor=tensor, - expect_array=expect, - expect_lod=[] * 6, - expect_max_len=6) + self.main(tensor=tensor, + expect_array=expect, + expect_lod=[] * 6, + expect_max_len=6) def test_lod_tensor_to_array_level_0_empty_seq(self): tensor = core.LoDTensor() @@ -56,11 +56,10 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): numpy.array(x).astype('int32') for x in [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]] ] - self.main( - tensor=tensor, - expect_array=expect, - expect_lod=[] * 6, - expect_max_len=6) + self.main(tensor=tensor, + expect_array=expect, + expect_lod=[] * 6, + expect_max_len=6) def test_lod_tensor_to_array_level_1(self): tensor = core.LoDTensor() @@ -69,19 +68,17 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor.set_recursive_sequence_lengths([[2, 3], [3, 6, 2, 6, 3]]) expect = [ - numpy.array( - [9, 10, 0, 1, 2], dtype='int32'), numpy.array( - [11, 12, 13, 14, 15, 16, 3, 4, 5, 6, 7, 8], dtype='int32'), - numpy.array( - [17, 18, 19], dtype='int32') + numpy.array([9, 10, 0, 1, 2], dtype='int32'), + numpy.array([11, 12, 13, 14, 15, 16, 3, 4, 5, 6, 7, 8], + dtype='int32'), + numpy.array([17, 18, 19], dtype='int32') ] lod = [[[2, 3]], [[6, 6]], [[3]]] - self.main( - tensor=tensor, - expect_array=expect, - expect_lod=lod, - expect_max_len=3) + self.main(tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=3) def test_lod_tensor_to_array_level_1_empty_seq(self): tensor = core.LoDTensor() @@ -92,19 +89,16 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): [[3, 2, 4, 2], [3, 4, 4, 0, 1, 5, 2, 2, 2, 7, 1]]) expect = [ - numpy.array( - item, dtype='int32') - for item in [[ + numpy.array(item, dtype='int32') for item in [[ 12, 13, 14, 15, 16, 0, 1, 2, 23, 24, 25, 26, 27, 28, 29 ], [17, 18, 3, 4, 5, 6, 11, 30], [19, 20, 7, 8, 9, 10], [21, 22]] ] lod = [[[5, 3, 0, 7]], [[2, 4, 1, 1]], [[2, 4]], [[2]]] - self.main( - tensor=tensor, - expect_array=expect, - expect_lod=lod, - expect_max_len=4) + self.main(tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=4) def test_lod_tensor_to_array_level_2(self): tensor = core.LoDTensor() @@ -115,18 +109,17 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]]) expect = [ - numpy.array( - item, dtype='int32') - for item in [[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49], list( - range(22, 39)) + list(range(7, 21)), list(range(39, 46))] + numpy.array(item, dtype='int32') + for item in [[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49], + list(range(22, 39)) + list(range(7, 21)), + list(range(39, 46))] ] lod = [[[1, 2, 1], [1, 3, 4, 4]], [[4, 3], [1, 4, 4, 8, 4, 6, 4]], [[2], [6, 1]]] - self.main( - tensor=tensor, - expect_array=expect, - expect_lod=lod, - expect_max_len=3) + self.main(tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=3) def test_lod_tensor_to_array_level_2_skip_level(self): tensor = core.LoDTensor() @@ -135,12 +128,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor.set_recursive_sequence_lengths( [[2, 3, 1], [2, 3, 1, 4, 2, 1], [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]]) - self.main( - tensor=tensor, - expect_array=None, - expect_lod=None, - expect_max_len=4, - level=1) + self.main(tensor=tensor, + expect_array=None, + expect_lod=None, + expect_max_len=4, + level=1) def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): place = self.place() @@ -178,20 +170,23 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): self.assertEqual(exp_lod, array[i].recursive_sequence_lengths()) def check_tensor_same(self, actual, expect): - self.assertTrue( - numpy.allclose(numpy.array(actual), numpy.array(expect))) + self.assertTrue(numpy.allclose(numpy.array(actual), + numpy.array(expect))) self.assertEqual(actual.recursive_sequence_lengths(), expect.recursive_sequence_lengths()) class TestCPULoDTensorArrayOpGrad(unittest.TestCase): + def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): - x = layers.data( - name='x', shape=[1], dtype='float32', stop_gradient=False) + x = layers.data(name='x', + shape=[1], + dtype='float32', + stop_gradient=False) table = lod_rank_table(x, level=0) array = lod_tensor_to_array(x, table) result = array_to_lod_tensor(array, table) @@ -208,11 +203,10 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): exe = Executor(place) g_out = [ - numpy.array(item).sum() - for item in exe.run(program, - feed={'x': tensor}, - fetch_list=[g_vars], - return_numpy=False) + numpy.array(item).sum() for item in exe.run(program, + feed={'x': tensor}, + fetch_list=[g_vars], + return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() @@ -220,6 +214,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): class TestLoDTensorArrayError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x = numpy.random.random((10)).astype("float32") @@ -252,6 +247,7 @@ class TestLoDTensorArrayError(unittest.TestCase): class TestArrayLoDTensorError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x = numpy.random.random((10)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_log_loss_op.py b/python/paddle/fluid/tests/unittests/test_log_loss_op.py index 0c57c0addf2..5f7f3019bd1 100644 --- a/python/paddle/fluid/tests/unittests/test_log_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_log_loss_op.py @@ -25,6 +25,7 @@ def sigmoid_array(x): class TestLogLossOp(OpTest): + def setUp(self): self.op_type = 'log_loss' samples_num = 100 @@ -51,6 +52,7 @@ class TestLogLossOp(OpTest): class TestLogLossOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_log_softmax.py b/python/paddle/fluid/tests/unittests/test_log_softmax.py index b3b164725fc..b1b21e0666f 100644 --- a/python/paddle/fluid/tests/unittests/test_log_softmax.py +++ b/python/paddle/fluid/tests/unittests/test_log_softmax.py @@ -40,6 +40,7 @@ def ref_log_softmax_grad(x, axis): class TestLogSoftmaxOp(OpTest): + def setUp(self): self.op_type = 'log_softmax' self.python_api = F.log_softmax @@ -63,16 +64,19 @@ class TestLogSoftmaxOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], ['Out'], user_defined_grads=[self.x_grad], check_eager=True) + self.check_grad(['X'], ['Out'], + user_defined_grads=[self.x_grad], + check_eager=True) class TestLogSoftmaxShape(TestLogSoftmaxOp): + def set_attrs(self): self.shape = [12, 10] class TestLogSoftmaxAxis(TestLogSoftmaxOp): + def set_attrs(self): self.axis = 1 @@ -80,6 +84,7 @@ class TestLogSoftmaxAxis(TestLogSoftmaxOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestLogSoftmaxBF16Op(OpTest): + def setUp(self): self.op_type = 'log_softmax' self.python_api = F.log_softmax @@ -101,13 +106,13 @@ class TestLogSoftmaxBF16Op(OpTest): def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], ['Out'], - user_defined_grads=[self.x_grad], - check_eager=True) + self.check_grad_with_place(place, ['X'], ['Out'], + user_defined_grads=[self.x_grad], + check_eager=True) class TestNNLogSoftmaxAPI(unittest.TestCase): + def setUp(self): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1., 1., self.x_shape).astype(np.float32) @@ -140,6 +145,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase): class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): + def setUp(self): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_logical_op.py b/python/paddle/fluid/tests/unittests/test_logical_op.py index e2c7cf3a5bb..80f65401176 100755 --- a/python/paddle/fluid/tests/unittests/test_logical_op.py +++ b/python/paddle/fluid/tests/unittests/test_logical_op.py @@ -178,10 +178,10 @@ def test(unit_test, use_gpu=False, test_error=False): META_DATA = dict(TEST_META_WRONG_SHAPE_DATA) for shape_data in META_DATA.values(): for data_type in SUPPORTED_DTYPES: - meta_data['x_np'] = np_data_generator( - shape_data['x_shape'], dtype=data_type) - meta_data['y_np'] = np_data_generator( - shape_data['y_shape'], dtype=data_type) + meta_data['x_np'] = np_data_generator(shape_data['x_shape'], + dtype=data_type) + meta_data['y_np'] = np_data_generator(shape_data['y_shape'], + dtype=data_type) if meta_data['binary_op'] and test_error: # catch C++ Exception unit_test.assertRaises(BaseException, run_static, @@ -197,12 +197,13 @@ def test(unit_test, use_gpu=False, test_error=False): else: np_result = np_op(meta_data['x_np']) unit_test.assertTrue((static_result == np_result).all()) - unit_test.assertTrue((dygraph_result.numpy() == np_result).all( - )) + unit_test.assertTrue( + (dygraph_result.numpy() == np_result).all()) unit_test.assertTrue((eager_result.numpy() == np_result).all()) def test_type_error(unit_test, use_gpu, type_str_map): + def check_type(op_str, x, y, binary_op): op = getattr(paddle, op_str) error_type = ValueError @@ -237,10 +238,12 @@ def test_type_error(unit_test, use_gpu, type_str_map): startup_program = paddle.static.Program() main_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - x = paddle.static.data( - name='x', shape=[10], dtype=type_str_map['x']) - y = paddle.static.data( - name='y', shape=[10], dtype=type_str_map['y']) + x = paddle.static.data(name='x', + shape=[10], + dtype=type_str_map['x']) + y = paddle.static.data(name='y', + shape=[10], + dtype=type_str_map['y']) check_type(meta_data['op_str'], x, y, binary_op) @@ -252,6 +255,7 @@ def type_map_factory(): class TestCPU(unittest.TestCase): + def test(self): test(self) @@ -265,6 +269,7 @@ class TestCPU(unittest.TestCase): class TestCUDA(unittest.TestCase): + def test(self): test(self, True) diff --git a/python/paddle/fluid/tests/unittests/test_logit_op.py b/python/paddle/fluid/tests/unittests/test_logit_op.py index 9b46039da13..44865936af9 100644 --- a/python/paddle/fluid/tests/unittests/test_logit_op.py +++ b/python/paddle/fluid/tests/unittests/test_logit_op.py @@ -17,6 +17,7 @@ import numpy as np from op_test import OpTest import paddle from paddle.fluid.framework import _test_eager_guard + np.random.seed(10) @@ -36,6 +37,7 @@ def logit_grad(x, eps=1e-8): class TestLogitOp(OpTest): + def setUp(self): self.op_type = 'logit' self.python_api = paddle.logit @@ -57,21 +59,25 @@ class TestLogitOp(OpTest): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], ['Out'], user_defined_grads=[self.x_grad], check_eager=True) + self.check_grad(['X'], ['Out'], + user_defined_grads=[self.x_grad], + check_eager=True) class TestLogitShape(TestLogitOp): + def set_attrs(self): self.shape = [2, 60] class TestLogitEps(TestLogitOp): + def set_attrs(self): self.eps = 1e-8 class TestLogitAPI(unittest.TestCase): + def setUp(self): self.x_shape = [120] self.x = np.random.uniform(0., 1., self.x_shape).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_logspace.py b/python/paddle/fluid/tests/unittests/test_logspace.py index ffa9885e767..11da5d4accb 100644 --- a/python/paddle/fluid/tests/unittests/test_logspace.py +++ b/python/paddle/fluid/tests/unittests/test_logspace.py @@ -21,6 +21,7 @@ import paddle class TestLogspaceOpCommonCase(OpTest): + def setUp(self): self.op_type = "logspace" dtype = 'float32' @@ -39,6 +40,7 @@ class TestLogspaceOpCommonCase(OpTest): class TestLogspaceOpReverseCase(OpTest): + def setUp(self): self.op_type = "logspace" dtype = 'float32' @@ -57,6 +59,7 @@ class TestLogspaceOpReverseCase(OpTest): class TestLogspaceOpNumOneCase(OpTest): + def setUp(self): self.op_type = "logspace" dtype = 'float32' @@ -75,6 +78,7 @@ class TestLogspaceOpNumOneCase(OpTest): class TestLogspaceOpMinusBaseCase(OpTest): + def setUp(self): self.op_type = "logspace" dtype = 'float32' @@ -93,6 +97,7 @@ class TestLogspaceOpMinusBaseCase(OpTest): class TestLogspaceOpZeroBaseCase(OpTest): + def setUp(self): self.op_type = "logspace" dtype = 'float32' @@ -111,6 +116,7 @@ class TestLogspaceOpZeroBaseCase(OpTest): class TestLogspaceAPI(unittest.TestCase): + def test_variable_input1(self): paddle.enable_static() prog = paddle.static.Program() @@ -152,8 +158,12 @@ class TestLogspaceAPI(unittest.TestCase): def test_name(self): with paddle.static.program_guard(paddle.static.Program()): - out = paddle.logspace( - 0, 10, 5, 2, dtype='float32', name='logspace_res') + out = paddle.logspace(0, + 10, + 5, + 2, + dtype='float32', + name='logspace_res') assert 'logspace_res' in out.name def test_imperative(self): @@ -171,6 +181,7 @@ class TestLogspaceAPI(unittest.TestCase): class TestLogspaceOpError(unittest.TestCase): + def test_errors(self): with paddle.static.program_guard(paddle.static.Program()): @@ -200,8 +211,9 @@ class TestLogspaceOpError(unittest.TestCase): self.assertRaises(TypeError, test_num_type) def test_start_dtype(): - start = paddle.static.data( - shape=[1], dtype="float64", name="start") + start = paddle.static.data(shape=[1], + dtype="float64", + name="start") paddle.logspace(start, 10, 1, 2, dtype="float32") self.assertRaises(ValueError, test_start_dtype) @@ -213,15 +225,17 @@ class TestLogspaceOpError(unittest.TestCase): self.assertRaises(ValueError, test_end_dtype) def test_num_dtype(): - num = paddle.static.data( - shape=[1], dtype="float32", name="step") + num = paddle.static.data(shape=[1], + dtype="float32", + name="step") paddle.logspace(0, 10, num, 2, dtype="float32") self.assertRaises(TypeError, test_num_dtype) def test_base_dtype(): - base = paddle.static.data( - shape=[1], dtype="float64", name="end") + base = paddle.static.data(shape=[1], + dtype="float64", + name="end") paddle.logspace(0, 10, 1, base, dtype="float32") self.assertRaises(ValueError, test_base_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_logsumexp.py b/python/paddle/fluid/tests/unittests/test_logsumexp.py index 91eb65ef284..3e54147b180 100644 --- a/python/paddle/fluid/tests/unittests/test_logsumexp.py +++ b/python/paddle/fluid/tests/unittests/test_logsumexp.py @@ -36,6 +36,7 @@ def logsumexp_wrapper(x, axis=None, keepdim=False, allreduce=False): class TestLogsumexp(OpTest): + def setUp(self): self.op_type = 'logsumexp' self.python_api = logsumexp_wrapper @@ -85,16 +86,19 @@ class TestLogsumexp(OpTest): class TestLogsumexp_shape(TestLogsumexp): + def set_attrs(self): self.shape = [4, 5, 6] class TestLogsumexp_axis(TestLogsumexp): + def set_attrs(self): self.axis = [0, -1] class TestLogsumexp_axis_all(TestLogsumexp): + def set_attrs(self): self.axis = [0, 1, 2, 3] @@ -105,11 +109,13 @@ class TestLogsumexp_axis_all(TestLogsumexp): class TestLogsumexp_keepdim(TestLogsumexp): + def set_attrs(self): self.keepdim = True class TestLogsumexp_reduce_all(TestLogsumexp): + def set_attrs(self): self.reduce_all = True @@ -120,6 +126,7 @@ class TestLogsumexp_reduce_all(TestLogsumexp): class TestLogsumexpError(unittest.TestCase): + def test_errors(self): with paddle.static.program_guard(paddle.static.Program()): self.assertRaises(TypeError, paddle.logsumexp, 1) @@ -128,6 +135,7 @@ class TestLogsumexpError(unittest.TestCase): class TestLogsumexpAPI(unittest.TestCase): + def setUp(self): self.shape = [2, 3, 4, 5] self.x = np.random.uniform(-1, 1, self.shape).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_lookahead.py b/python/paddle/fluid/tests/unittests/test_lookahead.py index 263310043a5..efbc28cfa6c 100644 --- a/python/paddle/fluid/tests/unittests/test_lookahead.py +++ b/python/paddle/fluid/tests/unittests/test_lookahead.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,6 +30,7 @@ SGD_LR = 1.0 class TestLookAhead(unittest.TestCase): + def test_lookahead_static(self): paddle.enable_static() place = fluid.CPUPlace() @@ -65,8 +66,9 @@ class TestLookAhead(unittest.TestCase): if i == 0: slow_param = latest_b if (i + 1) % LOOKAHEAD_K == 0: - self.assertAlmostEqual( - slow_param.all(), latest_b.all(), delta=5e-3) + self.assertAlmostEqual(slow_param.all(), + latest_b.all(), + delta=5e-3) fast_param = latest_b - SGD_LR * b_grad def func_test_look_ahead_dygraph(self): @@ -79,6 +81,7 @@ class TestLookAhead(unittest.TestCase): # define a random dataset class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -92,6 +95,7 @@ class TestLookAhead(unittest.TestCase): return self.num_samples class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) @@ -111,35 +115,34 @@ class TestLookAhead(unittest.TestCase): out = layer(image) loss = loss_fn(out, label) loss.backward() - fast_param = ( - layer.bias.numpy() - SGD_LR * layer.bias.grad.numpy()) + fast_param = (layer.bias.numpy() - + SGD_LR * layer.bias.grad.numpy()) opt.step() if idx == 1: slow_param = fast_param if idx % LOOKAHEAD_K == 0: slow_param = slow_param + LOOKAHEAD_ALPHA * ( fast_param - slow_param) - self.assertAlmostEqual( - np.mean(slow_param), - np.mean(layer.bias.numpy()), - delta=5e-3) + self.assertAlmostEqual(np.mean(slow_param), + np.mean(layer.bias.numpy()), + delta=5e-3) opt.clear_grad() layer = LinearNet() loss_fn = nn.CrossEntropyLoss() optimizer = paddle.optimizer.SGD(learning_rate=SGD_LR, parameters=layer.parameters()) - lookahead = paddle.incubate.optimizer.LookAhead( - optimizer, alpha=LOOKAHEAD_ALPHA, k=LOOKAHEAD_K) + lookahead = paddle.incubate.optimizer.LookAhead(optimizer, + alpha=LOOKAHEAD_ALPHA, + k=LOOKAHEAD_K) # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) - loader = paddle.io.DataLoader( - dataset, - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=2) + loader = paddle.io.DataLoader(dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) train(layer, loader, loss_fn, lookahead) diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py index 0a247b4dbe0..9dc7c1aa636 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py @@ -16,9 +16,10 @@ from __future__ import print_function import unittest import numpy as np -from paddle.fluid.tests.unittests.op_test import ( - OpTest, convert_float_to_uint16, convert_uint16_to_float, - skip_check_grad_ci) +from paddle.fluid.tests.unittests.op_test import (OpTest, + convert_float_to_uint16, + convert_uint16_to_float, + skip_check_grad_ci) import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.op import Operator @@ -49,6 +50,7 @@ def _get_grad(weights, ids, flat_ids, op_version="lookup_table"): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestLookupTableBF16Op(OpTest): + def init_test(self): self.op_type = "lookup_table" self.ids_shape = (4, 1) @@ -75,19 +77,19 @@ class TestLookupTableBF16Op(OpTest): self.check_output_with_place(core.CPUPlace(), check_dygraph=False) def test_check_grad(self): - self.check_grad_with_place( - core.CPUPlace(), ['W'], - 'Out', - no_grad_set=set('Ids'), - check_dygraph=False, - max_relative_error=1.5e-2, - user_defined_grads=[self.w_grad_fp32], - user_defined_grad_outputs=[self.out_bf16]) + self.check_grad_with_place(core.CPUPlace(), ['W'], + 'Out', + no_grad_set=set('Ids'), + check_dygraph=False, + max_relative_error=1.5e-2, + user_defined_grads=[self.w_grad_fp32], + user_defined_grad_outputs=[self.out_bf16]) @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestLookupTableBF16OpIds4D(TestLookupTableBF16Op): + def init_test(self): self.op_type = "lookup_table" self.ids_shape = (2, 4, 5, 1) @@ -96,14 +98,15 @@ class TestLookupTableBF16OpIds4D(TestLookupTableBF16Op): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestLookupTableBF16OpWIsSelectedRows(unittest.TestCase): + def init_test(self): self.op_type = "lookup_table" self.ids_shape = (10, 1) def setUp(self): self.init_test() - self.ids = np.random.randint( - low=0, high=15, size=self.ids_shape).astype("int64") + self.ids = np.random.randint(low=0, high=15, + size=self.ids_shape).astype("int64") self.flat_ids = self.ids.flatten() self.w_fp32 = np.random.random((15, 32)).astype("float32") self.w_bf16 = convert_float_to_uint16(self.w_fp32) @@ -147,6 +150,7 @@ class TestLookupTableBF16OpWIsSelectedRows(unittest.TestCase): "place does not support BF16 evaluation") class TestLookupTableBF16OpWIsSelectedRows4DIds( TestLookupTableBF16OpWIsSelectedRows): + def init_test(self): self.op_type = "lookup_table" self.ids_shape = (3, 4, 5, 1) @@ -163,6 +167,7 @@ class TestLookupTableBF16OpWIsSelectedRows4DIds( @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestLookupTableBF16OpWithPadding(TestLookupTableBF16Op): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -178,6 +183,7 @@ class TestLookupTableBF16OpWithPadding(TestLookupTableBF16Op): @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestLookupTableBF16OpIds4DPadding(TestLookupTableBF16OpIds4D): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -198,8 +204,8 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): def setUp(self): self.ids_shape = [4, 1] self.w_shape = [10, 64] - self.ids = np.random.randint( - low=0, high=9, size=self.ids_shape).astype("int64") + self.ids = np.random.randint(low=0, high=9, + size=self.ids_shape).astype("int64") self.flat_ids = self.ids.flatten() self.value = 3.0 self.w_fp32 = np.full(self.w_shape, self.value) @@ -210,13 +216,13 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): with fluid.program_guard(self.prog, self.startup_prog): x = fluid.layers.data(name='x', shape=self.ids_shape, dtype='int64') - self.emb = fluid.layers.embedding( - input=x, - size=self.w_shape, - param_attr=fluid.ParamAttr( - name="emb_weight", initializer=self.initializer), - is_sparse=False, - dtype="uint16") # bfloat16 + self.emb = fluid.layers.embedding(input=x, + size=self.w_shape, + param_attr=fluid.ParamAttr( + name="emb_weight", + initializer=self.initializer), + is_sparse=False, + dtype="uint16") # bfloat16 exe = fluid.Executor(self.place) exe.run(self.startup_prog) self.result = exe.run(self.prog, diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py index 689b9992a6d..934504e02bf 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py @@ -26,6 +26,7 @@ import struct class TestLookupTableDequantOp(OpTest): + def setUp(self): self.op_type = "lookup_table_dequant" table = np.random.random((17, 32)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_op.py index f3546a7c50d..9d9fa9e385d 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_op.py @@ -26,6 +26,7 @@ import paddle.nn.functional as F class TestLookupTableOp(OpTest): + def setUp(self): self.op_type = "lookup_table" table = np.random.random((17, 31)).astype("float64") @@ -42,11 +43,12 @@ class TestLookupTableOp(OpTest): class TestLookupTableOpWithTensorIds(OpTest): + def setUp(self): self.op_type = "lookup_table" table = np.random.random((17, 31)).astype("float64") - ids = np.random.randint( - low=0, high=17, size=(2, 4, 5, 1)).astype("int64") + ids = np.random.randint(low=0, high=17, + size=(2, 4, 5, 1)).astype("int64") self.inputs = {'W': table, 'Ids': ids} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} @@ -62,6 +64,7 @@ class TestLookupTableOpWithTensorIds(OpTest): "the gradient of paddings makes no sense and we don't " "test the gradient here.") class TestLookupTableOpWithPadding(TestLookupTableOp): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -75,6 +78,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): "the gradient of paddings makes no sense and we don't " "test the gradient here.") class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -85,6 +89,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): class TestLookupTableWIsSelectedRows(unittest.TestCase): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() ids_array = np.array([[0], [4], [3], [5]]).astype("int64") @@ -137,12 +142,13 @@ class TestLookupTableWIsSelectedRows(unittest.TestCase): self.check_with_place(place) -class TestLookupTableWithTensorIdsWIsSelectedRows( - TestLookupTableWIsSelectedRows): +class TestLookupTableWithTensorIdsWIsSelectedRows(TestLookupTableWIsSelectedRows + ): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint( - low=0, high=6, size=(2, 4, 3, 1)).astype("int64") + ids_array = np.random.randint(low=0, high=6, + size=(2, 4, 3, 1)).astype("int64") ids_tensor.set(ids_array, place) return ids_array @@ -152,6 +158,7 @@ class TestLookupTableWithTensorIdsWIsSelectedRows( class TestEmbedOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.randint(0, 10, (4, 1)).astype("int64") @@ -172,8 +179,9 @@ class TestEmbedOpError(unittest.TestCase): def test_param_dtype(): # dtype must be float32 or float64 input2 = fluid.data(name='x2', shape=[4, 1], dtype='int64') - fluid.layers.embedding( - input=input2, size=(10, 64), dtype='int64') + fluid.layers.embedding(input=input2, + size=(10, 64), + dtype='int64') self.assertRaises(TypeError, test_param_dtype) @@ -182,10 +190,11 @@ class TestEmbedOpError(unittest.TestCase): class TestLookupTableOpInt8(OpTest): + def setUp(self): self.op_type = "lookup_table" - table = np.random.randint( - low=-128, high=127, size=(17, 31)).astype("int8") + table = np.random.randint(low=-128, high=127, + size=(17, 31)).astype("int8") ids = np.random.randint(0, 17, 4).astype("int64") ids_expand = np.expand_dims(ids, axis=1) self.inputs = {'W': table, 'Ids': ids_expand} @@ -195,18 +204,19 @@ class TestLookupTableOpInt8(OpTest): self.check_output() def test_check_grad(self): - # since int8 type only be used in test and inference, there is + # since int8 type only be used in test and inference, there is # no gradient implement, so we don't need to test it pass class TestLookupTableOpWithTensorIdsInt8(OpTest): + def setUp(self): self.op_type = "lookup_table" - table = np.random.randint( - low=-128, high=127, size=(17, 31)).astype("int8") - ids = np.random.randint( - low=0, high=17, size=(2, 4, 5, 1)).astype("int64") + table = np.random.randint(low=-128, high=127, + size=(17, 31)).astype("int8") + ids = np.random.randint(low=0, high=17, + size=(2, 4, 5, 1)).astype("int64") self.inputs = {'W': table, 'Ids': ids} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} @@ -214,12 +224,13 @@ class TestLookupTableOpWithTensorIdsInt8(OpTest): self.check_output() def test_check_grad(self): - # since int8 type only be used in test and inference, there is + # since int8 type only be used in test and inference, there is # no gradient implement, so we don't need to test it pass class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -235,6 +246,7 @@ class TestLookupTableOpWithPaddingInt8(TestLookupTableOpInt8): class TestLookupTableOpWithTensorIdsAndPaddingInt8( TestLookupTableOpWithTensorIdsInt8): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -250,6 +262,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt8( class TestLookupTableWIsSelectedRowsInt8(unittest.TestCase): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() ids_array = np.array([[0], [4], [3], [5]]).astype("int64") @@ -304,10 +317,11 @@ class TestLookupTableWIsSelectedRowsInt8(unittest.TestCase): class TestLookupTableWithTensorIdsWIsSelectedRowsInt8( TestLookupTableWIsSelectedRowsInt8): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint( - low=0, high=6, size=(2, 4, 3, 1)).astype("int64") + ids_array = np.random.randint(low=0, high=6, + size=(2, 4, 3, 1)).astype("int64") ids_tensor.set(ids_array, place) return ids_array @@ -318,10 +332,11 @@ class TestLookupTableWithTensorIdsWIsSelectedRowsInt8( @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") class TestLookupTableOpInt16(OpTest): + def setUp(self): self.op_type = "lookup_table" - table = np.random.randint( - low=-128, high=127, size=(17, 31)).astype("int16") + table = np.random.randint(low=-128, high=127, + size=(17, 31)).astype("int16") ids = np.random.randint(0, 17, 4).astype("int64") ids_expand = np.expand_dims(ids, axis=1) self.inputs = {'W': table, 'Ids': ids_expand} @@ -333,12 +348,13 @@ class TestLookupTableOpInt16(OpTest): @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") class TestLookupTableOpWithTensorIdsInt16(OpTest): + def setUp(self): self.op_type = "lookup_table" - table = np.random.randint( - low=-128, high=127, size=(17, 31)).astype("int16") - ids = np.random.randint( - low=0, high=17, size=(2, 4, 5, 1)).astype("int64") + table = np.random.randint(low=-128, high=127, + size=(17, 31)).astype("int16") + ids = np.random.randint(low=0, high=17, + size=(2, 4, 5, 1)).astype("int64") self.inputs = {'W': table, 'Ids': ids} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} @@ -348,6 +364,7 @@ class TestLookupTableOpWithTensorIdsInt16(OpTest): @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -359,6 +376,7 @@ class TestLookupTableOpWithPaddingInt16(TestLookupTableOpInt16): @skip_check_grad_ci(reason="Int16 type only be used in test and inference.") class TestLookupTableOpWithTensorIdsAndPaddingInt16( TestLookupTableOpWithTensorIdsInt16): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -369,6 +387,7 @@ class TestLookupTableOpWithTensorIdsAndPaddingInt16( class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() ids_array = np.array([[0], [4], [3], [5]]).astype("int64") @@ -422,10 +441,11 @@ class TestLookupTableWIsSelectedRowsInt16(unittest.TestCase): class TestLookupTableWithTensorIdsWIsSelectedRowsInt16( TestLookupTableWIsSelectedRowsInt16): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint( - low=0, high=6, size=(2, 4, 3, 1)).astype("int64") + ids_array = np.random.randint(low=0, high=6, + size=(2, 4, 3, 1)).astype("int64") ids_tensor.set(ids_array, place) return ids_array @@ -435,13 +455,13 @@ class TestLookupTableWithTensorIdsWIsSelectedRowsInt16( class TestOutDtype(unittest.TestCase): + def test_dtype(self): api_fn = F.embedding - check_out_dtype( - api_fn, - in_specs=[([10, 16], 'int64'), ([100, 64], )], - expect_dtypes=['float32', 'float64'], - target_index=1) + check_out_dtype(api_fn, + in_specs=[([10, 16], 'int64'), ([100, 64], )], + expect_dtypes=['float32', 'float64'], + target_index=1) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py index 0776ae852d1..06b232443a8 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py @@ -28,6 +28,7 @@ import paddle.fluid.core as core class TestLookupTableV2BF16Op(TestLookupTableBF16Op): + def init_test(self): self.op_type = "lookup_table_v2" self.ids_shape = (4) @@ -35,6 +36,7 @@ class TestLookupTableV2BF16Op(TestLookupTableBF16Op): class TestLookupTableV2BF16OpIds4D(TestLookupTableBF16OpIds4D): + def init_test(self): self.op_type = "lookup_table_v2" self.ids_shape = (2, 4, 5) @@ -43,6 +45,7 @@ class TestLookupTableV2BF16OpIds4D(TestLookupTableBF16OpIds4D): class TestLookupTableV2BF16OpWIsSelectedRows( TestLookupTableBF16OpWIsSelectedRows): + def init_test(self): self.op_type = "lookup_table_v2" self.ids_shape = (10) @@ -50,12 +53,14 @@ class TestLookupTableV2BF16OpWIsSelectedRows( class TestLookupTableV2BF16OpWIsSelectedRows4DIds( TestLookupTableBF16OpWIsSelectedRows4DIds): + def init_test(self): self.op_type = "lookup_table_v2" self.ids_shape = (3, 4, 5) class TestLookupTableBF16OpWithPadding(TestLookupTableV2BF16Op): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -65,6 +70,7 @@ class TestLookupTableBF16OpWithPadding(TestLookupTableV2BF16Op): class TestLookupTableBF16OpIds4DPadding(TestLookupTableV2BF16OpIds4D): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -86,8 +92,8 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): self.op_type = "lookup_table_v2" self.ids_shape = [4] self.w_shape = [10, 64] - self.ids = np.random.randint( - low=0, high=9, size=self.ids_shape).astype("int64") + self.ids = np.random.randint(low=0, high=9, + size=self.ids_shape).astype("int64") self.flat_ids = self.ids.flatten() self.value = 3.0 self.w_fp32 = np.full(self.w_shape, self.value) @@ -98,13 +104,13 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): with fluid.program_guard(self.prog, self.startup_prog): x = fluid.layers.data(name='x', shape=self.ids_shape, dtype='int64') - self.emb = fluid.input.embedding( - input=x, - size=self.w_shape, - param_attr=fluid.ParamAttr( - name="emb_weight", initializer=self.initializer), - is_sparse=False, - dtype="uint16") # bfloat16 + self.emb = fluid.input.embedding(input=x, + size=self.w_shape, + param_attr=fluid.ParamAttr( + name="emb_weight", + initializer=self.initializer), + is_sparse=False, + dtype="uint16") # bfloat16 exe = fluid.Executor(self.place) exe.run(self.startup_prog) self.result = exe.run(self.prog, diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 21844c9e402..eed0530e761 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -27,6 +27,7 @@ from paddle.fluid import Program, program_guard class TestStaticGraphSupportMultipleInt(unittest.TestCase): + def test_main(self): dtypes = ['uint8', 'int8', 'int16', 'int32', 'int64'] if paddle.in_dynamic_mode(): @@ -46,6 +47,7 @@ class TestStaticGraphSupportMultipleInt(unittest.TestCase): class TestLookupTableOp(OpTest): + def setUp(self): self.op_type = "lookup_table_v2" self.python_api = paddle.nn.functional.embedding @@ -65,21 +67,25 @@ class TestLookupTableOp(OpTest): class TestLookupTableOpInt16(OpTest): + def id_dtype(self): return "int16" class TestLookupTableOpInt8(OpTest): + def id_dtype(self): return "int8" class TestLookupTableOpUInt8(OpTest): + def id_dtype(self): return "uint8" class TestLookupTableOpWithTensorIds(OpTest): + def setUp(self): self.op_type = "lookup_table_v2" table = np.random.random((17, 31)).astype("float64") @@ -99,6 +105,7 @@ class TestLookupTableOpWithTensorIds(OpTest): "the gradient of paddings makes no sense and we don't " "test the gradient here.") class TestLookupTableOpWithPadding(TestLookupTableOp): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -112,6 +119,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): "the gradient of paddings makes no sense and we don't " "test the gradient here.") class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -122,6 +130,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): class TestLookupTableWIsSelectedRows(unittest.TestCase): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() ids_array = np.array([0, 4, 3, 5]).astype("int32") @@ -174,12 +183,13 @@ class TestLookupTableWIsSelectedRows(unittest.TestCase): self.check_with_place(place) -class TestLookupTableWithTensorIdsWIsSelectedRows( - TestLookupTableWIsSelectedRows): +class TestLookupTableWithTensorIdsWIsSelectedRows(TestLookupTableWIsSelectedRows + ): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint( - low=0, high=6, size=(2, 4, 3)).astype("int64") + ids_array = np.random.randint(low=0, high=6, + size=(2, 4, 3)).astype("int64") ids_tensor.set(ids_array, place) return ids_array @@ -189,6 +199,7 @@ class TestLookupTableWithTensorIdsWIsSelectedRows( class TestLookupTableIsSparse(unittest.TestCase): + def init_data(self): self.x_data = np.array([[1, 3, 0, 4, 7]]).astype("int64") self.y_data = np.array([[0.1, 0.3, 0, 0.4, 0.7]]).astype("float32") @@ -219,8 +230,10 @@ class TestLookupTableIsSparse(unittest.TestCase): place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'x': self.x_data, - 'y_': self.y_data}, + ret = exe.run(feed={ + 'x': self.x_data, + 'y_': self.y_data + }, fetch_list=['emb_weight'], return_numpy=False) return np.array(ret[0]) @@ -232,11 +245,14 @@ class TestLookupTableIsSparse(unittest.TestCase): self.check_grad(w_grad, w_grad_with_sparse) def check_grad(self, w_grad1, w_grad2, tolerance=1e-6): - np.testing.assert_allclose( - w_grad1, w_grad2, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(w_grad1, + w_grad2, + rtol=tolerance, + atol=tolerance) class TestLookupTableApi(unittest.TestCase): + def test_api(self): x = fluid.layers.data(name='x', shape=[20], dtype='int64') emb = fluid.embedding(input=x, size=[128, 64]) @@ -246,12 +262,15 @@ class TestLookupTableApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'x': x_data, }, + ret = exe.run(feed={ + 'x': x_data, + }, fetch_list=[emb], return_numpy=False) class TestEmbedOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.randint(0, 10, (4, 6)).astype("int64") diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 96a818549e7..fb2038819fc 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -28,6 +28,7 @@ import paddle.fluid.core as core def reduce_lr_on_plateau(decay_rate, threshold, cooldown, patience, m, n, loss, var_list): + def is_better(current, best, m, n): if m == 'min' and n == 'rel': return current < best - best * threshold @@ -57,6 +58,7 @@ def reduce_lr_on_plateau(decay_rate, threshold, cooldown, patience, m, n, loss, class TestReduceOnPlateauDecay(object): + def test_ReduceLR(self): # the decay rate must be less than 1.0 with self.assertRaises(ValueError): @@ -66,8 +68,8 @@ class TestReduceOnPlateauDecay(object): paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, mode="test") # the threshold_mode must be "rel" or "abs" with self.assertRaises(ValueError): - paddle.optimizer.lr.ReduceOnPlateau( - learning_rate=1.0, threshold_mode="test") + paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, + threshold_mode="test") with self.assertRaises(TypeError): paddle.optimizer.lr.ReduceOnPlateau(learning_rate="test") with self.assertRaises(TypeError): @@ -110,8 +112,10 @@ class TestReduceOnPlateauDecay(object): main_prog = paddle.static.Program() start_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): - x = fluid.layers.create_global_var( - [1], 1, 'float32', persistable=True) + x = fluid.layers.create_global_var([1], + 1, + 'float32', + persistable=True) paddle.increment(x) loss = paddle.sin(x) scheduler = paddle.optimizer.lr.ReduceOnPlateau(**kwargs) @@ -159,8 +163,8 @@ class TestReduceOnPlateauDecay(object): linear = paddle.nn.Linear(10, 10) scheduler = paddle.optimizer.lr.ReduceOnPlateau(**kwargs) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(learning_rate=scheduler, + parameters=linear.parameters()) for epoch in range(20): for batch_id in range(1): @@ -181,8 +185,8 @@ class TestReduceOnPlateauDecay(object): self.assertEqual(current_lr, expected_lr) state_dict = adam.state_dict() scheduler1 = paddle.optimizer.lr.ReduceOnPlateau(**kwargs) - adam1 = paddle.optimizer.Adam( - learning_rate=scheduler1, parameters=linear.parameters()) + adam1 = paddle.optimizer.Adam(learning_rate=scheduler1, + parameters=linear.parameters()) adam1.set_state_dict(state_dict) self.assertEqual(scheduler.cooldown_counter, scheduler1.cooldown_counter) @@ -254,8 +258,8 @@ def polynomial_lr(epoch_num, if self.last_epoch == 0: return self.base_lr elif (self.last_epoch - 1 - self.T_max) % (2 * self.T_max) == 0: - return self.last_lr + (self.base_lr - self.eta_min) * (1 - math.cos( - math.pi / self.T_max)) / 2 + return self.last_lr + (self.base_lr - self.eta_min) * ( + 1 - math.cos(math.pi / self.T_max)) / 2 return (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / ( 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max)) * ( @@ -275,13 +279,13 @@ def cosine_annealing_lr(epoch_num, cosine_annealing_lr_current = learning_rate elif (epoch_num - 1 - T_max) % (2 * T_max) == 0: cosine_annealing_lr_current = cosine_annealing_lr_current + ( - learning_rate - eta_min) * (1 - math.cos(math.pi / float(T_max)) - ) / 2 + learning_rate - eta_min) * (1 - + math.cos(math.pi / float(T_max))) / 2 else: - cosine_annealing_lr_current = (1 + math.cos( - math.pi * epoch_num / float(T_max))) / (1 + math.cos(math.pi * ( - epoch_num - 1) / float(T_max))) * (cosine_annealing_lr_current - - eta_min) + eta_min + cosine_annealing_lr_current = ( + 1 + math.cos(math.pi * epoch_num / float(T_max))) / ( + 1 + math.cos(math.pi * (epoch_num - 1) / float(T_max))) * ( + cosine_annealing_lr_current - eta_min) + eta_min return cosine_annealing_lr_current @@ -386,6 +390,7 @@ def one_cycle_lr(epoch_num, class TestLRScheduler(unittest.TestCase): + def _test_static(self, python_func, paddle_api, kwarg, place): scheduler = paddle_api(**kwarg) adam = paddle.optimizer.Adam(learning_rate=scheduler) @@ -426,17 +431,15 @@ class TestLRScheduler(unittest.TestCase): if isinstance(place, paddle.CPUPlace): compiled_train_prog = paddle.static.CompiledProgram( - main_prog).with_data_parallel( - loss_name=loss.name, places=fluid.cpu_places(4)) + main_prog).with_data_parallel(loss_name=loss.name, + places=fluid.cpu_places(4)) for epoch in range(5): python_result = python_func(num, **kwarg) for batch_id in range(2): - _ = exe.run(compiled_train_prog, - feed={ - 'x': - np.random.randn(12, 4, 5).astype('float32') - }, - fetch_list=lr_var.name) + _ = exe.run( + compiled_train_prog, + feed={'x': np.random.randn(12, 4, 5).astype('float32')}, + fetch_list=lr_var.name) scopes = compiled_train_prog._executor.local_scopes() out = np.array(scopes[0].var(lr_var.name).get_tensor()) self.assertEqual(out, np.array(python_result)) @@ -457,12 +460,10 @@ class TestLRScheduler(unittest.TestCase): for epoch in range(5): python_result = python_func(num, **kwarg) for batch_id in range(2): - _ = exe.run(compiled_test_prog, - feed={ - 'x': - np.random.randn(12, 4, 5).astype('float32') - }, - fetch_list=lr_var.name) + _ = exe.run( + compiled_test_prog, + feed={'x': np.random.randn(12, 4, 5).astype('float32')}, + fetch_list=lr_var.name) scopes = compiled_test_prog._executor.local_scopes() out = np.array(scopes[0].var(lr_var.name).get_tensor()) self.assertEqual(out, np.array(python_result)) @@ -483,8 +484,8 @@ class TestLRScheduler(unittest.TestCase): kwarg['learning_rate'] = paddle.optimizer.lr.PiecewiseDecay( [3, 6], [0.5, 0.2, 0.1]) scheduler = paddle_api(**kwarg) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, parameters=linear.parameters()) + adam = paddle.optimizer.Adam(learning_rate=scheduler, + parameters=linear.parameters()) for epoch in range(20): for batch_id in range(2): x = paddle.to_tensor(x) @@ -502,8 +503,8 @@ class TestLRScheduler(unittest.TestCase): self.assertAlmostEqual(current_lr, expected_lr) state_dict = adam.state_dict() scheduler1 = paddle.optimizer.lr.LinearWarmup(**kwarg) - adam1 = paddle.optimizer.Adam( - learning_rate=scheduler1, parameters=linear.parameters()) + adam1 = paddle.optimizer.Adam(learning_rate=scheduler1, + parameters=linear.parameters()) adam1.set_state_dict(state_dict) self.assertEqual(scheduler.last_epoch, scheduler1.last_epoch) self.assertEqual(scheduler.last_lr, scheduler1.last_lr) @@ -520,137 +521,158 @@ class TestLRScheduler(unittest.TestCase): with self.assertRaises(NotImplementedError): paddle.optimizer.lr.LRScheduler().step() with self.assertRaises(TypeError): - paddle.optimizer.lr.MultiStepDecay( - learning_rate="test", milestones=[1, 2, 3]) + paddle.optimizer.lr.MultiStepDecay(learning_rate="test", + milestones=[1, 2, 3]) with self.assertRaises(TypeError): - paddle.optimizer.lr.MultiStepDecay( - learning_rate=0.5, milestones='test') + paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, + milestones='test') with self.assertRaises(ValueError): - paddle.optimizer.lr.MultiStepDecay( - learning_rate=0.5, milestones=[3, 2, 1]) + paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, + milestones=[3, 2, 1]) with self.assertRaises(ValueError): - paddle.optimizer.lr.MultiStepDecay( - learning_rate=0.5, milestones=[1, 2, 3], gamma=2) + paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, + milestones=[1, 2, 3], + gamma=2) with self.assertRaises(TypeError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate='test', total_steps=20) + paddle.optimizer.lr.OneCycleLR(max_learning_rate='test', + total_steps=20) with self.assertRaises(ValueError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=-1.5, total_steps=20) + paddle.optimizer.lr.OneCycleLR(max_learning_rate=-1.5, + total_steps=20) with self.assertRaises(TypeError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=0.1, total_steps=20, end_learning_rate='test') + paddle.optimizer.lr.OneCycleLR(max_learning_rate=0.1, + total_steps=20, + end_learning_rate='test') with self.assertRaises(ValueError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=0.1, total_steps=20, end_learning_rate=-1) + paddle.optimizer.lr.OneCycleLR(max_learning_rate=0.1, + total_steps=20, + end_learning_rate=-1) with self.assertRaises(TypeError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=0.1, total_steps='test') + paddle.optimizer.lr.OneCycleLR(max_learning_rate=0.1, + total_steps='test') with self.assertRaises(ValueError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=0.1, total_steps=-10) + paddle.optimizer.lr.OneCycleLR(max_learning_rate=0.1, + total_steps=-10) with self.assertRaises(ValueError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=0.1, total_steps=20, anneal_strategy='test') + paddle.optimizer.lr.OneCycleLR(max_learning_rate=0.1, + total_steps=20, + anneal_strategy='test') with self.assertRaises(ValueError): - paddle.optimizer.lr.OneCycleLR( - max_learning_rate=0.1, - total_steps=20, - phase_pct=0.6, - three_phase=True) - - func_api_kwargs = [(noam_lr, paddle.optimizer.lr.NoamDecay, { - "d_model": 0.01, - "warmup_steps": 100, - "verbose": False - }), (piecewise_lr, paddle.optimizer.lr.PiecewiseDecay, { - "boundaries": [3, 6, 9, 15, 20], - "values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], - "verbose": False - }), (natural_exp_lr, paddle.optimizer.lr.NaturalExpDecay, { - "learning_rate": 0.5, - "gamma": 0.1, - "verbose": True - }), (inverse_time_lr, paddle.optimizer.lr.InverseTimeDecay, { - "learning_rate": 0.5, - "gamma": 0.1, - "verbose": False - }), (polynomial_lr, paddle.optimizer.lr.PolynomialDecay, { - "learning_rate": 0.5, - "decay_steps": 20, - "end_lr": 0, - "power": 1.0, - "cycle": False - }), (polynomial_lr, paddle.optimizer.lr.PolynomialDecay, { - "learning_rate": 0.5, - "decay_steps": 20, - "end_lr": 0, - "power": 1.0, - "cycle": True, - "verbose": False - }), (linear_warmup_lr, paddle.optimizer.lr.LinearWarmup, { - 'learning_rate': 0.5, - 'warmup_steps': 10, - 'start_lr': 0, - 'end_lr': 0.5 - }), (exponential_lr, paddle.optimizer.lr.ExponentialDecay, { - "learning_rate": 0.5, - "gamma": 0.9, - "verbose": False - }), (multi_step_lr, paddle.optimizer.lr.MultiStepDecay, { - "learning_rate": 0.5, - "milestones": [3, 6, 9, 15, 20], - "gamma": 0.8 - }), (step_lr, paddle.optimizer.lr.StepDecay, { - "learning_rate": 0.5, - "step_size": 2, - "gamma": 0.8, - "verbose": False - }), (lambda_lr, paddle.optimizer.lr.LambdaDecay, { - "learning_rate": 0.5, - "lr_lambda": lambda x: 0.95**x, - "verbose": True - }), (multiplicative_lr, paddle.optimizer.lr.MultiplicativeDecay, { - "learning_rate": 0.5, - "lr_lambda": lambda x: 0.95, - "verbose": True - }), (cosine_annealing_lr, paddle.optimizer.lr.CosineAnnealingDecay, { - "learning_rate": 0.5, - "T_max": 10, - "verbose": False - }), (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { - "max_learning_rate": 0.1, - "total_steps": 20, - "divide_factor": 5, - "end_learning_rate": 0.0001, - "anneal_strategy": 'cos', - "phase_pct": 0.3, - "three_phase": False, - }), (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { - "max_learning_rate": 0.5, - "total_steps": 20, - "divide_factor": 10, - "end_learning_rate": 0.001, - "anneal_strategy": 'linear', - "phase_pct": 0.4, - "three_phase": False, - }), (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { - "max_learning_rate": 1.0, - "total_steps": 20, - "divide_factor": 9, - "end_learning_rate": 0.0001, - "anneal_strategy": 'cos', - "phase_pct": 0.3, - "three_phase": True, - }), (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { - "max_learning_rate": 0.3, - "total_steps": 20, - "divide_factor": 25, - "end_learning_rate": 0.0005, - "anneal_strategy": 'linear', - "phase_pct": 0.2, - "three_phase": True, - })] + paddle.optimizer.lr.OneCycleLR(max_learning_rate=0.1, + total_steps=20, + phase_pct=0.6, + three_phase=True) + + func_api_kwargs = [ + (noam_lr, paddle.optimizer.lr.NoamDecay, { + "d_model": 0.01, + "warmup_steps": 100, + "verbose": False + }), + (piecewise_lr, paddle.optimizer.lr.PiecewiseDecay, { + "boundaries": [3, 6, 9, 15, 20], + "values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], + "verbose": False + }), + (natural_exp_lr, paddle.optimizer.lr.NaturalExpDecay, { + "learning_rate": 0.5, + "gamma": 0.1, + "verbose": True + }), + (inverse_time_lr, paddle.optimizer.lr.InverseTimeDecay, { + "learning_rate": 0.5, + "gamma": 0.1, + "verbose": False + }), + (polynomial_lr, paddle.optimizer.lr.PolynomialDecay, { + "learning_rate": 0.5, + "decay_steps": 20, + "end_lr": 0, + "power": 1.0, + "cycle": False + }), + (polynomial_lr, paddle.optimizer.lr.PolynomialDecay, { + "learning_rate": 0.5, + "decay_steps": 20, + "end_lr": 0, + "power": 1.0, + "cycle": True, + "verbose": False + }), + (linear_warmup_lr, paddle.optimizer.lr.LinearWarmup, { + 'learning_rate': 0.5, + 'warmup_steps': 10, + 'start_lr': 0, + 'end_lr': 0.5 + }), + (exponential_lr, paddle.optimizer.lr.ExponentialDecay, { + "learning_rate": 0.5, + "gamma": 0.9, + "verbose": False + }), + (multi_step_lr, paddle.optimizer.lr.MultiStepDecay, { + "learning_rate": 0.5, + "milestones": [3, 6, 9, 15, 20], + "gamma": 0.8 + }), + (step_lr, paddle.optimizer.lr.StepDecay, { + "learning_rate": 0.5, + "step_size": 2, + "gamma": 0.8, + "verbose": False + }), + (lambda_lr, paddle.optimizer.lr.LambdaDecay, { + "learning_rate": 0.5, + "lr_lambda": lambda x: 0.95**x, + "verbose": True + }), + (multiplicative_lr, paddle.optimizer.lr.MultiplicativeDecay, { + "learning_rate": 0.5, + "lr_lambda": lambda x: 0.95, + "verbose": True + }), + (cosine_annealing_lr, paddle.optimizer.lr.CosineAnnealingDecay, { + "learning_rate": 0.5, + "T_max": 10, + "verbose": False + }), + (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { + "max_learning_rate": 0.1, + "total_steps": 20, + "divide_factor": 5, + "end_learning_rate": 0.0001, + "anneal_strategy": 'cos', + "phase_pct": 0.3, + "three_phase": False, + }), + (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { + "max_learning_rate": 0.5, + "total_steps": 20, + "divide_factor": 10, + "end_learning_rate": 0.001, + "anneal_strategy": 'linear', + "phase_pct": 0.4, + "three_phase": False, + }), + (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { + "max_learning_rate": 1.0, + "total_steps": 20, + "divide_factor": 9, + "end_learning_rate": 0.0001, + "anneal_strategy": 'cos', + "phase_pct": 0.3, + "three_phase": True, + }), + (one_cycle_lr, paddle.optimizer.lr.OneCycleLR, { + "max_learning_rate": 0.3, + "total_steps": 20, + "divide_factor": 25, + "end_learning_rate": 0.0005, + "anneal_strategy": 'linear', + "phase_pct": 0.2, + "three_phase": True, + }) + ] for python_func, paddle_api, kwarg in func_api_kwargs: places = [paddle.CPUPlace()] @@ -665,8 +687,8 @@ class TestLRScheduler(unittest.TestCase): paddle.enable_static() def test_linear_warmp(self): - natural_lr = paddle.optimizer.lr.NaturalExpDecay( - learning_rate=0.5, gamma=0.1) + natural_lr = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, + gamma=0.1) natural_lr_warmup = paddle.optimizer.lr.LinearWarmup( learning_rate=natural_lr, warmup_steps=10, start_lr=0.0, end_lr=0.1) for idx in range(30): diff --git a/python/paddle/fluid/tests/unittests/test_lrn_op.py b/python/paddle/fluid/tests/unittests/test_lrn_op.py index 4589f84deb3..1f8f9c62f0b 100644 --- a/python/paddle/fluid/tests/unittests/test_lrn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lrn_op.py @@ -24,6 +24,7 @@ from paddle.fluid import compiler, Program, program_guard class TestLRNOp(OpTest): + def get_input(self): r''' TODO(gongweibao): why it's grad diff is so large? x = np.ndarray( @@ -104,11 +105,13 @@ class TestLRNOp(OpTest): class TestLRNOpAttrDataFormat(TestLRNOp): + def init_test_case(self): self.data_format = 'NHWC' class TestLRNAPI(unittest.TestCase): + def test_case(self): data1 = fluid.data(name='data1', shape=[2, 4, 5, 5], dtype='float32') data2 = fluid.data(name='data2', shape=[2, 5, 5, 4], dtype='float32') @@ -124,8 +127,10 @@ class TestLRNAPI(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) results = exe.run(fluid.default_main_program(), - feed={"data1": data1_np, - "data2": data2_np}, + feed={ + "data1": data1_np, + "data2": data2_np + }, fetch_list=[out1, out2], return_numpy=True) @@ -134,8 +139,9 @@ class TestLRNAPI(unittest.TestCase): def test_exception(self): input1 = fluid.data(name="input1", shape=[2, 4, 5, 5], dtype="float32") - input2 = fluid.data( - name="input2", shape=[2, 4, 5, 5, 5], dtype="float32") + input2 = fluid.data(name="input2", + shape=[2, 4, 5, 5, 5], + dtype="float32") def _attr_data_fromat(): out = fluid.layers.lrn(input1, data_format='NDHW') @@ -148,6 +154,7 @@ class TestLRNAPI(unittest.TestCase): class TestLRNOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input must be float32 @@ -156,6 +163,7 @@ class TestLRNOpError(unittest.TestCase): class TestLocalResponseNormFAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -167,18 +175,24 @@ class TestLocalResponseNormFAPI(unittest.TestCase): in_np1 = np.random.random([3, 40, 40]).astype("float32") in_np2 = np.transpose(in_np1, (0, 2, 1)) - input1 = fluid.data( - name="input1", shape=[3, 40, 40], dtype="float32") - input2 = fluid.data( - name="input2", shape=[3, 40, 40], dtype="float32") - res1 = paddle.nn.functional.local_response_norm( - x=input1, size=5, data_format='NCL') - res2 = paddle.nn.functional.local_response_norm( - x=input2, size=5, data_format='NLC') + input1 = fluid.data(name="input1", + shape=[3, 40, 40], + dtype="float32") + input2 = fluid.data(name="input2", + shape=[3, 40, 40], + dtype="float32") + res1 = paddle.nn.functional.local_response_norm(x=input1, + size=5, + data_format='NCL') + res2 = paddle.nn.functional.local_response_norm(x=input2, + size=5, + data_format='NLC') exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"input1": in_np1, - "input2": in_np2}, + feed={ + "input1": in_np1, + "input2": in_np2 + }, fetch_list=[res1, res2]) fetches1_tran = np.transpose(fetches[1], (0, 2, 1)) @@ -186,23 +200,29 @@ class TestLocalResponseNormFAPI(unittest.TestCase): def check_static_4d_input(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input1 = fluid.data( - name="input1", shape=[3, 3, 40, 40], dtype="float32") - input2 = fluid.data( - name="input2", shape=[3, 40, 40, 3], dtype="float32") - - res1 = paddle.nn.functional.local_response_norm( - x=input1, size=5, data_format='NCHW') - res2 = paddle.nn.functional.local_response_norm( - x=input2, size=5, data_format='NHWC') + input1 = fluid.data(name="input1", + shape=[3, 3, 40, 40], + dtype="float32") + input2 = fluid.data(name="input2", + shape=[3, 40, 40, 3], + dtype="float32") + + res1 = paddle.nn.functional.local_response_norm(x=input1, + size=5, + data_format='NCHW') + res2 = paddle.nn.functional.local_response_norm(x=input2, + size=5, + data_format='NHWC') in_np1 = np.random.random([3, 3, 40, 40]).astype("float32") in_np2 = np.transpose(in_np1, (0, 2, 3, 1)) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"input1": in_np1, - "input2": in_np2}, + feed={ + "input1": in_np1, + "input2": in_np2 + }, fetch_list=[res1, res2]) fetches1_tran = np.transpose(fetches[1], (0, 3, 1, 2)) @@ -210,22 +230,28 @@ class TestLocalResponseNormFAPI(unittest.TestCase): def check_static_5d_input(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input1 = fluid.data( - name="input1", shape=[3, 3, 3, 40, 40], dtype="float32") - input2 = fluid.data( - name="input2", shape=[3, 3, 40, 40, 3], dtype="float32") - res1 = paddle.nn.functional.local_response_norm( - x=input1, size=5, data_format='NCDHW') - res2 = paddle.nn.functional.local_response_norm( - x=input2, size=5, data_format='NDHWC') + input1 = fluid.data(name="input1", + shape=[3, 3, 3, 40, 40], + dtype="float32") + input2 = fluid.data(name="input2", + shape=[3, 3, 40, 40, 3], + dtype="float32") + res1 = paddle.nn.functional.local_response_norm(x=input1, + size=5, + data_format='NCDHW') + res2 = paddle.nn.functional.local_response_norm(x=input2, + size=5, + data_format='NDHWC') in_np1 = np.random.random([3, 3, 3, 40, 40]).astype("float32") in_np2 = np.transpose(in_np1, (0, 2, 3, 4, 1)) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"input1": in_np1, - "input2": in_np2}, + feed={ + "input1": in_np1, + "input2": in_np2 + }, fetch_list=[res1, res2]) fetches1_tran = np.transpose(fetches[1], (0, 4, 1, 2, 3)) @@ -245,10 +271,12 @@ class TestLocalResponseNormFAPI(unittest.TestCase): in1 = paddle.to_tensor(in_np1) in2 = paddle.to_tensor(in_np2) - res1 = paddle.nn.functional.local_response_norm( - x=in1, size=5, data_format='NCL') - res2 = paddle.nn.functional.local_response_norm( - x=in2, size=5, data_format='NLC') + res1 = paddle.nn.functional.local_response_norm(x=in1, + size=5, + data_format='NCL') + res2 = paddle.nn.functional.local_response_norm(x=in2, + size=5, + data_format='NLC') res2_tran = np.transpose(res2.numpy(), (0, 2, 1)) self.assertTrue(np.allclose(res1.numpy(), res2_tran)) @@ -261,10 +289,12 @@ class TestLocalResponseNormFAPI(unittest.TestCase): in1 = paddle.to_tensor(in_np1) in2 = paddle.to_tensor(in_np2) - res1 = paddle.nn.functional.local_response_norm( - x=in1, size=5, data_format='NCHW') - res2 = paddle.nn.functional.local_response_norm( - x=in2, size=5, data_format='NHWC') + res1 = paddle.nn.functional.local_response_norm(x=in1, + size=5, + data_format='NCHW') + res2 = paddle.nn.functional.local_response_norm(x=in2, + size=5, + data_format='NHWC') res2_tran = np.transpose(res2.numpy(), (0, 3, 1, 2)) self.assertTrue(np.allclose(res1.numpy(), res2_tran)) @@ -277,10 +307,12 @@ class TestLocalResponseNormFAPI(unittest.TestCase): in1 = paddle.to_tensor(in_np1) in2 = paddle.to_tensor(in_np2) - res1 = paddle.nn.functional.local_response_norm( - x=in1, size=5, data_format='NCDHW') - res2 = paddle.nn.functional.local_response_norm( - x=in2, size=5, data_format='NDHWC') + res1 = paddle.nn.functional.local_response_norm(x=in1, + size=5, + data_format='NCDHW') + res2 = paddle.nn.functional.local_response_norm(x=in2, + size=5, + data_format='NDHWC') res2_tran = np.transpose(res2.numpy(), (0, 4, 1, 2, 3)) self.assertTrue(np.allclose(res1.numpy(), res2_tran)) @@ -293,13 +325,14 @@ class TestLocalResponseNormFAPI(unittest.TestCase): class TestLocalResponseNormFAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of lrn must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CPUPlace()) paddle.nn.functional.local_response_norm(x1, size=5) self.assertRaises(TypeError, test_Variable) @@ -312,8 +345,9 @@ class TestLocalResponseNormFAPIError(unittest.TestCase): def test_dataformat(): x = fluid.data(name='x', shape=[3, 4, 5, 6], dtype="float32") - paddle.nn.functional.local_response_norm( - x, size=5, data_format="NCTHW") + paddle.nn.functional.local_response_norm(x, + size=5, + data_format="NCTHW") self.assertRaises(ValueError, test_dataformat) @@ -331,6 +365,7 @@ class TestLocalResponseNormFAPIError(unittest.TestCase): class TestLocalResponseNormCAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py index 372b8d0d4d2..cdde705475e 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py @@ -24,6 +24,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import random + random.seed(2) np.set_printoptions(threshold=np.inf) paddle.enable_static() @@ -34,6 +35,7 @@ EXP_MAX_INPUT = 40.0 class RandomWeight: + def __init__(self): pass @@ -43,27 +45,34 @@ class RandomWeight: self.input_size = input_size self.dtype = dtype - self.weight_ih = np.random.uniform( - low=-std, high=std, size=(4 * self.hidden_size, - self.input_size)).astype(dtype) + self.weight_ih = np.random.uniform(low=-std, + high=std, + size=(4 * self.hidden_size, + self.input_size)).astype(dtype) self.weight_hh = np.random.uniform( - low=-std, high=std, size=(4 * self.hidden_size, - self.hidden_size)).astype(dtype) - self.bias_ih = np.random.uniform( - low=-std, high=std, size=(4 * self.hidden_size)).astype(dtype) - self.bias_hh = np.random.uniform( - low=-std, high=std, size=(4 * self.hidden_size)).astype(dtype) + low=-std, high=std, + size=(4 * self.hidden_size, self.hidden_size)).astype(dtype) + self.bias_ih = np.random.uniform(low=-std, + high=std, + size=(4 * + self.hidden_size)).astype(dtype) + self.bias_hh = np.random.uniform(low=-std, + high=std, + size=(4 * + self.hidden_size)).astype(dtype) weight = RandomWeight() class LayerMixin(object): + def __call__(self, *args, **kwargs): return self.forward(*args, **kwargs) class LayerListMixin(LayerMixin): + def __init__(self, layers=None): self._layers = list(layers) if layers else [] @@ -75,6 +84,7 @@ class LayerListMixin(LayerMixin): class LSTMCell(LayerMixin): + def __init__(self, input_size, hidden_size, bias=True): self.input_size = input_size self.hidden_size = hidden_size @@ -263,6 +273,7 @@ def concat_states(states, bidirectional=False, state_components=1): class RNN(LayerMixin): + def __init__(self, cell, is_reverse=False, time_major=False): super(RNN, self).__init__() self.cell = cell @@ -283,6 +294,7 @@ class RNN(LayerMixin): class BiRNN(LayerMixin): + def __init__(self, cell_fw, cell_bw, time_major=False): super(BiRNN, self).__init__() self.cell_fw = cell_fw @@ -307,6 +319,7 @@ class BiRNN(LayerMixin): class RNNMixin(LayerListMixin): + def forward(self, inputs, initial_states=None, sequence_length=None): batch_index = 1 if self.time_major else 0 batch_size = inputs.shape[batch_index] @@ -339,6 +352,7 @@ class RNNMixin(LayerListMixin): class LSTM(RNNMixin): + def __init__(self, input_size, hidden_size, @@ -380,6 +394,7 @@ class LSTM(RNNMixin): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNLstmOp(OpTest): + def get_weight_names(self): weight_names = [] for i in range(2 * self.num_layers): @@ -392,8 +407,7 @@ class TestCUDNNLstmOp(OpTest): self.op_type = "cudnn_lstm" self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.sequence_length = None if core.is_compiled_with_rocm( - ) else np.array( - [12, 11, 10, 9, 8], dtype=np.int32) + ) else np.array([12, 11, 10, 9, 8], dtype=np.int32) self.num_layers = 1 self.set_attrs() @@ -402,24 +416,24 @@ class TestCUDNNLstmOp(OpTest): input_size = 21 hidden_size = 21 - input = np.random.uniform( - low=-0.1, high=0.1, - size=(seq_length, batch_size, input_size)).astype(self.dtype) + input = np.random.uniform(low=-0.1, + high=0.1, + size=(seq_length, batch_size, + input_size)).astype(self.dtype) input[11][1:][:] = 0 input[10][2:][:] = 0 input[9][3:][:] = 0 input[8][4:][:] = 0 weight.updata_weight(hidden_size, input_size, self.dtype) - rnn1 = LSTM( - input_size, - hidden_size, - num_layers=self.num_layers, - time_major=True, - direction="forward") + rnn1 = LSTM(input_size, + hidden_size, + num_layers=self.num_layers, + time_major=True, + direction="forward") - output, (last_hidden, last_cell) = rnn1( - input, sequence_length=self.sequence_length) + output, (last_hidden, + last_cell) = rnn1(input, sequence_length=self.sequence_length) flat_w = [] num = 0 @@ -443,10 +457,10 @@ class TestCUDNNLstmOp(OpTest): bias_hh = weight.bias_hh flat_w.append(("bias" + str(num), bias_hh)) num += 1 - init_h = np.zeros((self.num_layers, batch_size, - hidden_size)).astype(self.dtype) - init_c = np.zeros((self.num_layers, batch_size, - hidden_size)).astype(self.dtype) + init_h = np.zeros( + (self.num_layers, batch_size, hidden_size)).astype(self.dtype) + init_c = np.zeros( + (self.num_layers, batch_size, hidden_size)).astype(self.dtype) state_out = np.ndarray((300)).astype("uint8") if core.is_compiled_with_rocm(): @@ -491,25 +505,26 @@ class TestCUDNNLstmOp(OpTest): def test_output_with_place(self): place = core.CUDAPlace(0) if core.is_compiled_with_rocm(): - self.check_output_with_place( - place, atol=1e-5, no_check_set=['Reserve', 'StateOut']) + self.check_output_with_place(place, + atol=1e-5, + no_check_set=['Reserve', 'StateOut']) else: - self.check_output_with_place( - place, no_check_set=['Reserve', 'StateOut']) + self.check_output_with_place(place, + no_check_set=['Reserve', 'StateOut']) def test_grad_with_place(self): place = core.CUDAPlace(0) var_name_list = self.get_weight_names() for var_name in var_name_list: self.check_grad_with_place( - place, - set(['Input', var_name, 'InitH', 'InitC']), + place, set(['Input', var_name, 'InitH', 'InitC']), ['Out', 'LastH', 'LastC']) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNlstmAPI(unittest.TestCase): + def test_lstm(self): seq_len = 20 batch_size = 5 @@ -517,8 +532,9 @@ class TestCUDNNlstmAPI(unittest.TestCase): dropout_prob = 0.0 num_layers = 1 dtype = 'float32' if core.is_compiled_with_rocm() else 'float64' - input = fluid.data( - name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype) + input = fluid.data(name='input', + shape=[seq_len, batch_size, hidden_size], + dtype=dtype) init_h = layers.fill_constant([num_layers, batch_size, hidden_size], dtype, 0.0) init_c = layers.fill_constant([num_layers, batch_size, hidden_size], @@ -528,9 +544,10 @@ class TestCUDNNlstmAPI(unittest.TestCase): dropout_prob, False) exe = fluid.Executor(fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) - input_i = np.random.uniform( - low=-0.1, high=0.1, size=(seq_len, batch_size, - hidden_size)).astype("float64") + input_i = np.random.uniform(low=-0.1, + high=0.1, + size=(seq_len, batch_size, + hidden_size)).astype("float64") out = exe.run(fluid.default_main_program(), feed={'input': input_i}, fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0']) @@ -539,6 +556,7 @@ class TestCUDNNlstmAPI(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNlstmAPI(unittest.TestCase): + def test_lstm(self): seq_len = 20 batch_size = 5 @@ -546,8 +564,9 @@ class TestCUDNNlstmAPI(unittest.TestCase): dropout_prob = 0.0 num_layers = 2 dtype = 'float32' if core.is_compiled_with_rocm() else 'float64' - input = fluid.data( - name='input', shape=[seq_len, batch_size, hidden_size], dtype=dtype) + input = fluid.data(name='input', + shape=[seq_len, batch_size, hidden_size], + dtype=dtype) init_h = layers.fill_constant([num_layers, batch_size, hidden_size], dtype, 0.0) init_c = layers.fill_constant([num_layers, batch_size, hidden_size], @@ -557,9 +576,10 @@ class TestCUDNNlstmAPI(unittest.TestCase): dropout_prob, False, True) exe = fluid.Executor(fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) - input_i = np.random.uniform( - low=-0.1, high=0.1, size=(seq_len, batch_size, - hidden_size)).astype(dtype) + input_i = np.random.uniform(low=-0.1, + high=0.1, + size=(seq_len, batch_size, + hidden_size)).astype(dtype) out = exe.run(fluid.default_main_program(), feed={'input': input_i}, fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0']) diff --git a/python/paddle/fluid/tests/unittests/test_lstm_op.py b/python/paddle/fluid/tests/unittests/test_lstm_op.py index fff5fef2922..c4ddab74aae 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_op.py @@ -68,6 +68,7 @@ def lstm( act_gate=None, act_cell=None, act_cand=None): + def _step(x, w_h, w_c, h_pre, c_pre, act_gate, act_cell, act_cand): g = np.dot(h_pre, w_h) # 1 x 4D g = g + x @@ -131,6 +132,7 @@ def lstm( class LstmUnitTestError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_size = 20 @@ -138,10 +140,9 @@ class LstmUnitTestError(unittest.TestCase): dropout_prob = 0.2 hidden_size = 150 num_layers = 1 - input = fluid.data( - name='input', - shape=[batch_size, seq_len, hidden_size], - dtype='float32') + input = fluid.data(name='input', + shape=[batch_size, seq_len, hidden_size], + dtype='float32') pre_hidden = fill_constant([num_layers, batch_size, hidden_size], 'float32', 0.0) pre_cell = fill_constant([num_layers, batch_size, hidden_size], @@ -178,10 +179,9 @@ class LstmUnitTestError(unittest.TestCase): self.assertRaises(TypeError, test_pre_cell_Variable) def test_input_type(): - error_input = fluid.data( - name='error_input', - shape=[None, hidden_size * 3], - dtype='int32') + error_input = fluid.data(name='error_input', + shape=[None, hidden_size * 3], + dtype='int32') LSTM(error_input, pre_hidden, pre_cell, \ seq_len, hidden_size, num_layers, \ dropout_prob=dropout_prob) @@ -189,10 +189,9 @@ class LstmUnitTestError(unittest.TestCase): self.assertRaises(TypeError, test_input_type) def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[None, hidden_size], - dtype='int32') + error_pre_hidden = fluid.data(name='error_pre_hidden', + shape=[None, hidden_size], + dtype='int32') LSTM(input, error_pre_hidden, pre_cell, \ seq_len, hidden_size, num_layers, \ dropout_prob=dropout_prob) @@ -200,10 +199,9 @@ class LstmUnitTestError(unittest.TestCase): self.assertRaises(TypeError, test_pre_hidden_type) def test_pre_cell_type(): - error_pre_cell = fluid.data( - name='error_pre_cell', - shape=[None, hidden_size], - dtype='int32') + error_pre_cell = fluid.data(name='error_pre_cell', + shape=[None, hidden_size], + dtype='int32') LSTM(input, pre_hidden, error_pre_cell, \ seq_len, hidden_size, num_layers, \ dropout_prob=dropout_prob) @@ -212,6 +210,7 @@ class LstmUnitTestError(unittest.TestCase): class TestLstmOp(OpTest): + def set_is_test(self): self.is_test = False @@ -286,28 +285,31 @@ class TestLstmOp(OpTest): self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'Weight', 'Bias'], ['Hidden'], - max_relative_error=5e-4, - check_dygraph=False) + self.check_grad(['Input', 'Weight', 'Bias'], ['Hidden'], + max_relative_error=5e-4, + check_dygraph=False) class TestLstmOpCase1(TestLstmOp): + def set_lod(self): self.lod = [[0, 3, 2]] class TestLstmOpCase2(TestLstmOp): + def set_lod(self): self.lod = [[0, 3, 0]] class TestLstmOpCase3(TestLstmOp): + def set_lod(self): self.lod = [[2, 0, 4]] class TestLstmOpInference(TestLstmOp): + def set_is_test(self): self.is_test = True @@ -317,37 +319,43 @@ class TestLstmOpInference(TestLstmOp): class TestLstmOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): input_data = np.random.random((1, 2048)).astype("float32") - fluid.layers.dynamic_lstm( - input=input_data, size=2048, use_peepholes=False) + fluid.layers.dynamic_lstm(input=input_data, + size=2048, + use_peepholes=False) self.assertRaises(TypeError, test_Variable) def test_h_0(): - in_data = fluid.data( - name="input", shape=[None, 2048], dtype="float32") + in_data = fluid.data(name="input", + shape=[None, 2048], + dtype="float32") h = fluid.data(name="h", shape=[None, 512], dtype="int32") c = fluid.data(name="c", shape=[None, 512], dtype="float32") - fluid.layers.dynamic_lstm( - input=in_data, size=2048, use_peepholes=False, h_0=h, c_0=c) + fluid.layers.dynamic_lstm(input=in_data, + size=2048, + use_peepholes=False, + h_0=h, + c_0=c) self.assertRaises(TypeError, test_h_0) def test_c_0(): - in_data_ = fluid.data( - name="input_", shape=[None, 2048], dtype="float32") + in_data_ = fluid.data(name="input_", + shape=[None, 2048], + dtype="float32") h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32") c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32") - fluid.layers.dynamic_lstm( - input=in_data_, - size=2048, - use_peepholes=False, - h_0=h_, - c_0=c_) + fluid.layers.dynamic_lstm(input=in_data_, + size=2048, + use_peepholes=False, + h_0=h_, + c_0=c_) self.assertRaises(TypeError, test_c_0) diff --git a/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py b/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py index c0875462e33..89a60e26561 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py @@ -31,20 +31,20 @@ def tanh_np(x): class LstmUnitTestError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_size, dict_dim, emb_dim, hidden_dim = 32, 128, 64, 512 - data = fluid.data( - name='step_data', shape=[batch_size], dtype='int64') + data = fluid.data(name='step_data', + shape=[batch_size], + dtype='int64') inputs = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - pre_hidden = fluid.data( - name='pre_hidden', - shape=[batch_size, hidden_dim], - dtype='float32') - pre_cell = fluid.data( - name='pre_cell', - shape=[batch_size, hidden_dim], - dtype='float32') + pre_hidden = fluid.data(name='pre_hidden', + shape=[batch_size, hidden_dim], + dtype='float32') + pre_cell = fluid.data(name='pre_cell', + shape=[batch_size, hidden_dim], + dtype='float32') np_input = np.random.uniform( -0.1, 0.1, (batch_size, emb_dim)).astype('float64') @@ -69,34 +69,32 @@ class LstmUnitTestError(unittest.TestCase): self.assertRaises(TypeError, test_pre_cell_Variable) def test_input_type(): - error_input = fluid.data( - name='error_input', - shape=[batch_size, emb_dim], - dtype='int32') + error_input = fluid.data(name='error_input', + shape=[batch_size, emb_dim], + dtype='int32') lstm_unit(error_input, pre_hidden, pre_cell) self.assertRaises(TypeError, test_input_type) def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[batch_size, hidden_dim], - dtype='int32') + error_pre_hidden = fluid.data(name='error_pre_hidden', + shape=[batch_size, hidden_dim], + dtype='int32') lstm_unit(inputs, error_pre_hidden, pre_cell) self.assertRaises(TypeError, test_pre_hidden_type) def test_pre_cell_type(): - error_pre_cell = fluid.data( - name='error_pre_cell', - shape=[batch_size, hidden_dim], - dtype='int32') + error_pre_cell = fluid.data(name='error_pre_cell', + shape=[batch_size, hidden_dim], + dtype='int32') lstm_unit(inputs, pre_hidden, error_pre_cell) self.assertRaises(TypeError, test_pre_cell_type) class LstmUnitTest(OpTest): + def setUp(self): self.op_type = "lstm_unit" x_np = np.random.normal(size=(15, 160)).astype("float64") diff --git a/python/paddle/fluid/tests/unittests/test_lstmp_op.py b/python/paddle/fluid/tests/unittests/test_lstmp_op.py index 186504af087..abd67007941 100644 --- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py @@ -44,6 +44,7 @@ def lstmp( act_cell=None, act_cand=None, act_proj=None): + def _step(x, w_r, w_rh, w_c, r_pre, c_pre, proj_clip, cell_clip, act_gate, act_cell, act_cand, act_proj): g = np.dot(r_pre, w_r) # 1 x 4D @@ -126,6 +127,7 @@ def lstmp( class TestLstmpOp(LstmTest.TestLstmOp): + def reset_argument(self): pass @@ -196,13 +198,14 @@ class TestLstmpOp(LstmTest.TestLstmOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'Weight', 'ProjWeight', 'Bias'], ['Projection'], - numeric_grad_delta=0.0000005, - check_dygraph=False) + self.check_grad(['Input', 'Weight', 'ProjWeight', 'Bias'], + ['Projection'], + numeric_grad_delta=0.0000005, + check_dygraph=False) class TestLstmpOpHasInitial(TestLstmpOp): + def reset_argument(self): self.has_initial_state = True @@ -213,11 +216,10 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'], - ['Projection'], - numeric_grad_delta=0.0000005, - check_dygraph=False) + self.check_grad(['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'], + ['Projection'], + numeric_grad_delta=0.0000005, + check_dygraph=False) def test_check_grad_ingore_bias(self): N = len(self.lod[0]) @@ -225,11 +227,10 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'ProjWeight', 'Weight'], ['Projection'], - numeric_grad_delta=0.0000005, - no_grad_set=set('Bias'), - check_dygraph=False) + self.check_grad(['Input', 'ProjWeight', 'Weight'], ['Projection'], + numeric_grad_delta=0.0000005, + no_grad_set=set('Bias'), + check_dygraph=False) def test_check_grad_ingore_weight(self): N = len(self.lod[0]) @@ -237,11 +238,10 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'ProjWeight', 'Bias'], ['Projection'], - numeric_grad_delta=0.0000005, - no_grad_set=set('Weight'), - check_dygraph=False) + self.check_grad(['Input', 'ProjWeight', 'Bias'], ['Projection'], + numeric_grad_delta=0.0000005, + no_grad_set=set('Weight'), + check_dygraph=False) def test_check_grad_ingore_proj_weight(self): N = len(self.lod[0]) @@ -249,11 +249,10 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'Weight', 'Bias'], ['Projection'], - numeric_grad_delta=0.0000005, - no_grad_set=set('ProjWeight'), - check_dygraph=False) + self.check_grad(['Input', 'Weight', 'Bias'], ['Projection'], + numeric_grad_delta=0.0000005, + no_grad_set=set('ProjWeight'), + check_dygraph=False) def test_check_grad_ingore_input(self): N = len(self.lod[0]) @@ -261,11 +260,10 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Weight', 'ProjWeight', 'Bias'], ['Projection'], - numeric_grad_delta=0.0000005, - no_grad_set=set('Input'), - check_dygraph=False) + self.check_grad(['Weight', 'ProjWeight', 'Bias'], ['Projection'], + numeric_grad_delta=0.0000005, + no_grad_set=set('Input'), + check_dygraph=False) def test_check_grad_ingore_h0(self): N = len(self.lod[0]) @@ -273,11 +271,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], ['Projection'], - numeric_grad_delta=0.0000005, - no_grad_set=set('H0'), - check_dygraph=False) + self.check_grad(['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], + ['Projection'], + numeric_grad_delta=0.0000005, + no_grad_set=set('H0'), + check_dygraph=False) def test_check_grad_ingore_c0(self): N = len(self.lod[0]) @@ -285,88 +283,93 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') - self.check_grad( - ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], ['Projection'], - numeric_grad_delta=0.0000005, - no_grad_set=set('C0'), - check_dygraph=False) + self.check_grad(['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], + ['Projection'], + numeric_grad_delta=0.0000005, + no_grad_set=set('C0'), + check_dygraph=False) class TestLstmpOpRerverse(TestLstmpOp): + def reset_argument(self): self.is_reverse = True class TestLstmpOpNotUsePeepholes(TestLstmpOp): + def reset_argument(self): self.use_peepholes = False class TestLstmpOpLinearProjection(TestLstmpOp): + def reset_argument(self): self.act_proj = 'identity' class TestLstmpOpLen0Case1(TestLstmpOp): + def reset_argument(self): self.lod = [[0, 4, 0]] class TestLstmpOpLen0Case2(TestLstmpOp): + def reset_argument(self): self.lod = [[2, 0, 3]] class TestLstmpOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): input_data = np.random.random((1, 2048)).astype("float32") - fluid.layers.dynamic_lstmp( - input=input_data, - size=2048, - proj_size=256, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh") + fluid.layers.dynamic_lstmp(input=input_data, + size=2048, + proj_size=256, + use_peepholes=False, + is_reverse=True, + cell_activation="tanh", + proj_activation="tanh") self.assertRaises(TypeError, test_Variable) def test_h_0(): - in_data = fluid.data( - name="input", shape=[None, 2048], dtype="float32") + in_data = fluid.data(name="input", + shape=[None, 2048], + dtype="float32") h = fluid.data(name="h", shape=[None, 512], dtype="int32") c = fluid.data(name="c", shape=[None, 512], dtype="float32") - fluid.layers.dynamic_lstmp( - input=in_data, - size=2048, - proj_size=256, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh", - h_0=h, - c_0=c) + fluid.layers.dynamic_lstmp(input=in_data, + size=2048, + proj_size=256, + use_peepholes=False, + is_reverse=True, + cell_activation="tanh", + proj_activation="tanh", + h_0=h, + c_0=c) self.assertRaises(TypeError, test_h_0) def test_c_0(): - in_data_ = fluid.data( - name="input_", shape=[None, 2048], dtype="float32") + in_data_ = fluid.data(name="input_", + shape=[None, 2048], + dtype="float32") h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32") c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32") - fluid.layers.dynamic_lstmp( - input=in_data_, - size=2048, - proj_size=256, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh", - h_0=h_, - c_0=c_) + fluid.layers.dynamic_lstmp(input=in_data_, + size=2048, + proj_size=256, + use_peepholes=False, + is_reverse=True, + cell_activation="tanh", + proj_activation="tanh", + h_0=h_, + c_0=c_) self.assertRaises(TypeError, test_c_0) diff --git a/python/paddle/fluid/tests/unittests/test_lu_op.py b/python/paddle/fluid/tests/unittests/test_lu_op.py index 1f1e3d1a2fb..2989a030740 100644 --- a/python/paddle/fluid/tests/unittests/test_lu_op.py +++ b/python/paddle/fluid/tests/unittests/test_lu_op.py @@ -68,7 +68,9 @@ def Pmat_to_perm(Pmat_org, cut): sP[idx, :] = tmp permmat.append(permlst) - Pivot = np.array(permmat).reshape(list(shape[:-2]) + [rows, ]) + 1 + Pivot = np.array(permmat).reshape(list(shape[:-2]) + [ + rows, + ]) + 1 return Pivot[..., :cut] @@ -111,18 +113,18 @@ class TestLUOp(OpTest): lshape = np.array(sL.shape) ushape = np.array(sU.shape) - lpad = (len(sL.shape) - 2) * [(0, 0)] + list(( - (0, (ashape - lshape)[-2]), (0, (ashape - lshape)[-1]))) - upad = (len(sU.shape) - 2) * [(0, 0)] + list(( - (0, (ashape - ushape)[-2]), (0, (ashape - ushape)[-1]))) + lpad = (len(sL.shape) - 2) * [(0, 0)] + list( + ((0, (ashape - lshape)[-2]), (0, (ashape - lshape)[-1]))) + upad = (len(sU.shape) - 2) * [(0, 0)] + list( + ((0, (ashape - ushape)[-2]), (0, (ashape - ushape)[-1]))) NsL = np.pad(sL, lpad) NsU = np.pad(sU, upad) NLU = NsL + NsU self.output = NLU self.Pivots = Pmat_to_perm(sP, min(ashape[-2], ashape[-1])) - self.Infos = np.zeros(self.x_shape[:-2]) if len( - X.shape) > 2 else np.array([0]) + self.Infos = np.zeros( + self.x_shape[:-2]) if len(X.shape) > 2 else np.array([0]) def setUp(self): self.op_type = "lu" @@ -171,7 +173,9 @@ class TestLUOp3(TestLUOp): class TestLUAPI(unittest.TestCase): + def test_dygraph(self): + def run_lu_dygraph(shape, dtype): if dtype == "float32": np_dtype = np.float32 @@ -246,17 +250,20 @@ class TestLUAPI(unittest.TestCase): lshape = np.array(sL.shape) ushape = np.array(sU.shape) - lpad = (len(sL.shape) - 2) * [(0, 0)] + list(( - (0, (ashape - lshape)[-2]), (0, (ashape - lshape)[-1]))) - upad = (len(sU.shape) - 2) * [(0, 0)] + list(( - (0, (ashape - ushape)[-2]), (0, (ashape - ushape)[-1]))) + lpad = (len(sL.shape) - 2) * [(0, 0)] + list( + ((0, (ashape - lshape)[-2]), (0, + (ashape - lshape)[-1]))) + upad = (len(sU.shape) - 2) * [(0, 0)] + list( + ((0, (ashape - ushape)[-2]), (0, + (ashape - ushape)[-1]))) NsL = np.pad(sL, lpad) NsU = np.pad(sU, upad) NLU = NsL + NsU - x = paddle.fluid.data( - name="input", shape=shape, dtype=dtype) + x = paddle.fluid.data(name="input", + shape=shape, + dtype=dtype) lu, p = paddle.linalg.lu(x, pivot=pivot) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), diff --git a/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py b/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py index 0aff38cb785..1757adef8e3 100644 --- a/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py +++ b/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py @@ -74,7 +74,9 @@ def Pmat_to_perm(Pmat_org, cut): sP[idx, :] = tmp permmat.append(permlst) - Pivot = np.array(permmat).reshape(list(shape[:-2]) + [rows, ]) + 1 + Pivot = np.array(permmat).reshape(list(shape[:-2]) + [ + rows, + ]) + 1 return Pivot[..., :cut] @@ -130,8 +132,9 @@ class TestLU_UnpackOp(OpTest): place = fluid.CPUPlace() if core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) - xv = paddle.fluid.data( - name="input", shape=self.x_shape, dtype=self.dtype) + xv = paddle.fluid.data(name="input", + shape=self.x_shape, + dtype=self.dtype) lu, p = paddle.linalg.lu(xv) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -186,7 +189,9 @@ class TestLU_UnpackOp3(TestLU_UnpackOp): class TestLU_UnpackAPI(unittest.TestCase): + def test_dygraph(self): + def run_lu_unpack_dygraph(shape, dtype): if dtype == "float32": np_dtype = np.float32 @@ -247,8 +252,9 @@ class TestLU_UnpackAPI(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): sP, sL, sU = scipy_lu_unpack(a) - x = paddle.fluid.data( - name="input", shape=shape, dtype=dtype) + x = paddle.fluid.data(name="input", + shape=shape, + dtype=dtype) lu, p = paddle.linalg.lu(x) pP, pL, pU = paddle.linalg.lu_unpack(lu, p) exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_manual_seed.py b/python/paddle/fluid/tests/unittests/test_manual_seed.py index 75753dcd1e8..e42487df79a 100644 --- a/python/paddle/fluid/tests/unittests/test_manual_seed.py +++ b/python/paddle/fluid/tests/unittests/test_manual_seed.py @@ -23,6 +23,7 @@ import numpy as np class TestManualSeed(unittest.TestCase): + def test_seed(self): fluid.enable_dygraph() diff --git a/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py index 2b511b9eb44..c337736c881 100644 --- a/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py @@ -69,6 +69,7 @@ def margin_cross_entropy(logits, @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOp(OpTest): + def initParams(self): self.op_type = "margin_cross_entropy" self.axis = -1 @@ -99,8 +100,9 @@ class TestMarginCrossEntropyOp(OpTest): np.sum(np.square(weights), axis=0, keepdims=True)) logits = np.matmul(datas, weights) - labels = np.random.randint( - 0, self.num_class, (self.batch_dim, ), dtype="int64") + labels = np.random.randint(0, + self.num_class, (self.batch_dim, ), + dtype="int64") loss, softmax = margin_cross_entropy(logits, labels, self.axis, self.margin1, self.margin2, @@ -128,20 +130,21 @@ class TestMarginCrossEntropyOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpFP32(TestMarginCrossEntropyOp): + def init_dtype(self): self.dtype = np.float32 def test_check_grad(self): - self.check_grad_with_place( - core.CUDAPlace(0), ["Logits"], - "Loss", - numeric_grad_delta=5e-2, - max_relative_error=5e-2) + self.check_grad_with_place(core.CUDAPlace(0), ["Logits"], + "Loss", + numeric_grad_delta=5e-2, + max_relative_error=5e-2) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpFP16(TestMarginCrossEntropyOp): + def init_dtype(self): self.dtype = np.float16 @@ -149,16 +152,16 @@ class TestMarginCrossEntropyOpFP16(TestMarginCrossEntropyOp): self.check_output_with_place(core.CUDAPlace(0), atol=5e-2) def test_check_grad(self): - self.check_grad_with_place( - core.CUDAPlace(0), ["Logits"], - "Loss", - numeric_grad_delta=6e-1, - max_relative_error=6e-1) + self.check_grad_with_place(core.CUDAPlace(0), ["Logits"], + "Loss", + numeric_grad_delta=6e-1, + max_relative_error=6e-1) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpCosFace(TestMarginCrossEntropyOp): + def init_loss_params(self): self.margin1 = 1.0 self.margin2 = 0.0 @@ -169,6 +172,7 @@ class TestMarginCrossEntropyOpCosFace(TestMarginCrossEntropyOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpSphereFace(TestMarginCrossEntropyOp): + def init_loss_params(self): self.margin1 = 1.35 self.margin2 = 0.0 @@ -177,6 +181,7 @@ class TestMarginCrossEntropyOpSphereFace(TestMarginCrossEntropyOp): class TestMarginCrossEntropyOpCPU(TestMarginCrossEntropyOp): + def test_check_output(self): try: self.check_output_with_place(core.CPUPlace(), atol=1e-5) @@ -193,6 +198,7 @@ class TestMarginCrossEntropyOpCPU(TestMarginCrossEntropyOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpV2(unittest.TestCase): + def setUp(self): self.initParams() np.random.seed(self.seed) @@ -239,19 +245,22 @@ class TestMarginCrossEntropyOpV2(unittest.TestCase): np.sum(np.square(weights), axis=0, keepdims=True)) logits_np = np.matmul(datas, weights) - labels_np = np.random.randint( - 0, self.num_class, (self.batch_dim, ), dtype="int64") - - loss_np, softmax_np = margin_cross_entropy( - logits_np, labels_np, self.axis, self.margin1, self.margin2, - self.margin3, self.scale, self.reduction) - - logits = paddle.static.data( - name='logits', - shape=[self.batch_dim, self.num_class], - dtype=self.dtype) - label = paddle.static.data( - name='label', shape=[self.batch_dim], dtype="int64") + labels_np = np.random.randint(0, + self.num_class, (self.batch_dim, ), + dtype="int64") + + loss_np, softmax_np = margin_cross_entropy(logits_np, labels_np, + self.axis, self.margin1, + self.margin2, + self.margin3, self.scale, + self.reduction) + + logits = paddle.static.data(name='logits', + shape=[self.batch_dim, self.num_class], + dtype=self.dtype) + label = paddle.static.data(name='label', + shape=[self.batch_dim], + dtype="int64") loss, softmax = paddle.nn.functional.margin_cross_entropy( logits, label, @@ -263,11 +272,13 @@ class TestMarginCrossEntropyOpV2(unittest.TestCase): reduction=self.reduction) exe = paddle.fluid.Executor(place) - [loss_res, softmax_res] = exe.run( - paddle.fluid.default_main_program(), - feed={'logits': logits_np, - 'label': labels_np}, - fetch_list=[loss, softmax]) + [loss_res, + softmax_res] = exe.run(paddle.fluid.default_main_program(), + feed={ + 'logits': logits_np, + 'label': labels_np + }, + fetch_list=[loss, softmax]) np.testing.assert_allclose(loss_res, loss_np) np.testing.assert_allclose(softmax_res, softmax_np) @@ -287,12 +298,15 @@ class TestMarginCrossEntropyOpV2(unittest.TestCase): np.sum(np.square(weights), axis=0, keepdims=True)) logits_np = np.matmul(datas, weights) - labels_np = np.random.randint( - 0, self.num_class, (self.batch_dim, ), dtype="int64") + labels_np = np.random.randint(0, + self.num_class, (self.batch_dim, ), + dtype="int64") - loss_np, softmax_np = margin_cross_entropy( - logits_np, labels_np, self.axis, self.margin1, self.margin2, - self.margin3, self.scale, self.reduction) + loss_np, softmax_np = margin_cross_entropy(logits_np, labels_np, + self.axis, self.margin1, + self.margin2, + self.margin3, self.scale, + self.reduction) logits = paddle.to_tensor(logits_np, dtype=self.dtype) labels = paddle.to_tensor(labels_np, dtype="int64") @@ -316,6 +330,7 @@ class TestMarginCrossEntropyOpV2(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpV3(TestMarginCrossEntropyOpV2): + def init_reduction(self): self.reduction = 'mean' @@ -323,6 +338,7 @@ class TestMarginCrossEntropyOpV3(TestMarginCrossEntropyOpV2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpV4(TestMarginCrossEntropyOpV2): + def init_reduction(self): self.reduction = 'sum' @@ -330,6 +346,7 @@ class TestMarginCrossEntropyOpV4(TestMarginCrossEntropyOpV2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMarginCrossEntropyOpAPIError(unittest.TestCase): + def setUp(self): self.initParams() np.random.seed(self.seed) @@ -357,11 +374,14 @@ class TestMarginCrossEntropyOpAPIError(unittest.TestCase): self.dtype = np.float64 def test_dynamic_errors(self): + def test_dim(): for place in self.places: with paddle.fluid.dygraph.guard(place): - labels_np = np.random.randint( - 0, self.num_class, (self.batch_dim, 2), dtype="int64") + labels_np = np.random.randint(0, + self.num_class, + (self.batch_dim, 2), + dtype="int64") logits_np = np.random.uniform( -0.99, 0.99, [self.batch_dim, self.num_class]).astype(self.dtype) @@ -381,9 +401,9 @@ class TestMarginCrossEntropyOpAPIError(unittest.TestCase): def test_label_type(): for place in self.places: with paddle.fluid.dygraph.guard(place): - labels_np = np.random.uniform( - 0, self.num_class, - (self.batch_dim, 1)).astype(self.dtype) + labels_np = np.random.uniform(0, self.num_class, + (self.batch_dim, 1)).astype( + self.dtype) logits_np = np.random.uniform( -0.99, 0.99, [self.batch_dim, self.num_class]).astype(self.dtype) @@ -403,8 +423,10 @@ class TestMarginCrossEntropyOpAPIError(unittest.TestCase): def test_group_value(): for place in self.places: with paddle.fluid.dygraph.guard(place): - labels_np = np.random.randint( - 0, self.num_class, (self.batch_dim, ), dtype="int64") + labels_np = np.random.randint(0, + self.num_class, + (self.batch_dim, ), + dtype="int64") logits_np = np.random.uniform( -0.99, 0.99, [self.batch_dim, self.num_class]).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py b/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py index 86fda635baa..e1ae71a9d7a 100644 --- a/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py @@ -21,6 +21,7 @@ from paddle import fluid class TestMarginRankLossOp(OpTest): + def setUp(self): self.op_type = "margin_rank_loss" batch_size = 5 @@ -53,6 +54,7 @@ class TestMarginRankLossOp(OpTest): class TestMarginRankLossLayer(unittest.TestCase): + def setUp(self): self.batch_size = 5 self.margin = 0.5 @@ -86,12 +88,13 @@ class TestMarginRankLossLayer(unittest.TestCase): exe = fluid.Executor(place) exe.run(start) - out_np, = exe.run( - main, - feed={"label": self.label, - "x1": self.x1, - "x2": self.x2}, - fetch_list=[out]) + out_np, = exe.run(main, + feed={ + "label": self.label, + "x1": self.x1, + "x2": self.x2 + }, + fetch_list=[out]) np.testing.assert_allclose(out_np, self.loss) diff --git a/python/paddle/fluid/tests/unittests/test_marker_op.py b/python/paddle/fluid/tests/unittests/test_marker_op.py index 3f9f8c7d6bc..cdf132b72b1 100644 --- a/python/paddle/fluid/tests/unittests/test_marker_op.py +++ b/python/paddle/fluid/tests/unittests/test_marker_op.py @@ -18,6 +18,7 @@ from paddle.distributed.fleet.meta_optimizers.common import OpRole class TestMarkerOp(OpTest): + def setUp(self): self.op_type = "marker" self.inputs = {} diff --git a/python/paddle/fluid/tests/unittests/test_masked_select_op.py b/python/paddle/fluid/tests/unittests/test_masked_select_op.py index 764f4806ba4..2bd2a8f4549 100644 --- a/python/paddle/fluid/tests/unittests/test_masked_select_op.py +++ b/python/paddle/fluid/tests/unittests/test_masked_select_op.py @@ -30,6 +30,7 @@ def np_masked_select(x, mask): class TestMaskedSelectOp(OpTest): + def setUp(self): self.init() self.op_type = "masked_select" @@ -51,16 +52,19 @@ class TestMaskedSelectOp(OpTest): class TestMaskedSelectOp1(TestMaskedSelectOp): + def init(self): self.shape = (6, 8, 9, 18) class TestMaskedSelectOp2(TestMaskedSelectOp): + def init(self): self.shape = (168, ) class TestMaskedSelectAPI(unittest.TestCase): + def test_imperative_mode(self): paddle.disable_static() shape = (88, 6, 8) @@ -86,13 +90,16 @@ class TestMaskedSelectAPI(unittest.TestCase): exe = paddle.static.Executor(place=paddle.CPUPlace()) res = exe.run(paddle.static.default_main_program(), - feed={"x": np_x, - "mask": np_mask}, + feed={ + "x": np_x, + "mask": np_mask + }, fetch_list=[out]) self.assertEqual(np.allclose(res, np_out), True) class TestMaskedSelectError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -100,8 +107,9 @@ class TestMaskedSelectError(unittest.TestCase): shape = [8, 9, 6] x = paddle.fluid.data(shape=shape, dtype='float32', name='x') mask = paddle.fluid.data(shape=shape, dtype='bool', name='mask') - mask_float = paddle.fluid.data( - shape=shape, dtype='float32', name='mask_float') + mask_float = paddle.fluid.data(shape=shape, + dtype='float32', + name='mask_float') np_x = np.random.random(shape).astype('float32') np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) diff --git a/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py b/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py index 5784d3b5d74..3c41ad0f93d 100644 --- a/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py +++ b/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py @@ -21,6 +21,7 @@ import paddle.fluid as fluid class TestMatchMatrixTensorOp(OpTest): + def setUp(self): self.init_op_type() self.set_data() @@ -78,6 +79,7 @@ class TestMatchMatrixTensorOp(OpTest): class TestMatchMatrixTensorOpCase1(TestMatchMatrixTensorOp): + def set_data(self): ix, iy, h, dim_t = [5, 8, 25, 4] x_lod = [[5]] @@ -86,6 +88,7 @@ class TestMatchMatrixTensorOpCase1(TestMatchMatrixTensorOp): class TestMatchMatrixTensorOpCase2(TestMatchMatrixTensorOp): + def set_data(self): ix, iy, h, dim_t = [105, 120, 1, 4] x_lod = [[30, 45, 30]] @@ -94,6 +97,7 @@ class TestMatchMatrixTensorOpCase2(TestMatchMatrixTensorOp): class TestMatchMatrixTensorOpCase3(TestMatchMatrixTensorOp): + def set_data(self): ix, iy, h, dim_t = [5, 9, 32, 1] x_lod = [[1, 2, 2]] @@ -102,6 +106,7 @@ class TestMatchMatrixTensorOpCase3(TestMatchMatrixTensorOp): class TestMatchMatrixTensorOpCase4(TestMatchMatrixTensorOp): + def set_data(self): ix, iy, h, dim_t = [8, 12, 16, 5] x_lod = [[1, 2, 3, 1, 1]] @@ -111,8 +116,9 @@ class TestMatchMatrixTensorOpCase4(TestMatchMatrixTensorOp): def test_api(self): x_lod_tensor = fluid.layers.data(name='x', shape=[10], lod_level=1) y_lod_tensor = fluid.layers.data(name='y', shape=[10], lod_level=1) - out, out_tmp = fluid.contrib.match_matrix_tensor( - x=x_lod_tensor, y=y_lod_tensor, channel_num=3) + out, out_tmp = fluid.contrib.match_matrix_tensor(x=x_lod_tensor, + y=y_lod_tensor, + channel_num=3) place = fluid.CPUPlace() x_data = np.random.rand(7, 10).astype('float32') @@ -122,8 +128,10 @@ class TestMatchMatrixTensorOpCase4(TestMatchMatrixTensorOp): exe = fluid.Executor(place=place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'x': x, - 'y': y}, + ret = exe.run(feed={ + 'x': x, + 'y': y + }, fetch_list=[out], return_numpy=False) diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch.py b/python/paddle/fluid/tests/unittests/test_math_op_patch.py index 258543631f9..9dd47647a1a 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch.py @@ -23,6 +23,7 @@ import numpy as np class TestMathOpPatches(unittest.TestCase): + def setUp(self): paddle.enable_static() @@ -141,8 +142,10 @@ class TestMathOpPatches(unittest.TestCase): a_np = numpy.random.random(size=[10, 1]).astype('float32') b_np = numpy.random.random(size=[10, 1]).astype('float32') + 1e-2 c_np = exe.run(fluid.default_main_program(), - feed={"a": a_np, - 'b': b_np}, + feed={ + "a": a_np, + 'b': b_np + }, fetch_list=[c]) self.assertTrue(numpy.allclose(a_np / b_np, c_np)) @@ -156,8 +159,10 @@ class TestMathOpPatches(unittest.TestCase): a_np = numpy.random.random(size=[10, 1]).astype('float32') b_np = numpy.random.random(size=[10, 1]).astype('float32') c_np = exe.run(fluid.default_main_program(), - feed={"a": a_np, - 'b': b_np}, + feed={ + "a": a_np, + 'b': b_np + }, fetch_list=[c]) self.assertTrue(numpy.allclose(a_np * b_np, c_np)) @@ -171,8 +176,10 @@ class TestMathOpPatches(unittest.TestCase): a_np = numpy.random.random(size=[10, 1]).astype('float32') b_np = numpy.random.random(size=[10, 1]).astype('float32') c_np = exe.run(fluid.default_main_program(), - feed={"a": a_np, - 'b': b_np}, + feed={ + "a": a_np, + 'b': b_np + }, fetch_list=[c]) self.assertTrue(numpy.allclose(a_np + b_np, c_np)) @@ -186,8 +193,10 @@ class TestMathOpPatches(unittest.TestCase): a_np = numpy.random.random(size=[10, 1]).astype('float32') b_np = numpy.random.random(size=[10, 1]).astype('float32') c_np = exe.run(fluid.default_main_program(), - feed={"a": a_np, - 'b': b_np}, + feed={ + "a": a_np, + 'b': b_np + }, fetch_list=[c]) self.assertTrue(numpy.allclose(a_np - b_np, c_np)) @@ -217,8 +226,10 @@ class TestMathOpPatches(unittest.TestCase): b_np = numpy.array([3, 4, 11, 15, 8, 18]).astype('float32') c_np, = exe.run(fluid.default_main_program(), - feed={"a": a_np, - "b": b_np}, + feed={ + "a": a_np, + "b": b_np + }, fetch_list=[c]) self.assertTrue(numpy.array_equal(c_np, a_np == b_np)) @@ -239,8 +250,10 @@ class TestMathOpPatches(unittest.TestCase): a_np = numpy.array([3, 4, 10, 14, 9, 18]).astype('float') b_np = numpy.array([3, 4, 11, 15, 8, 18]).astype('float') c_np, = exe.run(fluid.default_main_program(), - feed={"a": a_np, - "b": b_np}, + feed={ + "a": a_np, + "b": b_np + }, fetch_list=[c]) self.assertTrue(numpy.array_equal(c_np, a_np - b_np)) @@ -282,8 +295,10 @@ class TestMathOpPatches(unittest.TestCase): exe = fluid.Executor() out = exe.run(fluid.default_main_program(), - feed={"x": x_np, - "y": y_np}, + feed={ + "x": x_np, + "y": y_np + }, fetch_list=[z]) self.assertTrue(np.array_equal(out[0], out_np)) @@ -299,8 +314,10 @@ class TestMathOpPatches(unittest.TestCase): exe = fluid.Executor() out = exe.run(fluid.default_main_program(), - feed={"x": x_np, - "y": y_np}, + feed={ + "x": x_np, + "y": y_np + }, fetch_list=[z]) self.assertTrue(np.array_equal(out[0], out_np)) @@ -316,8 +333,10 @@ class TestMathOpPatches(unittest.TestCase): exe = fluid.Executor() out = exe.run(fluid.default_main_program(), - feed={"x": x_np, - "y": y_np}, + feed={ + "x": x_np, + "y": y_np + }, fetch_list=[z]) self.assertTrue(np.array_equal(out[0], out_np)) @@ -360,16 +379,18 @@ class TestMathOpPatches(unittest.TestCase): def test_matmul(self): a = paddle.static.data(name='a', shape=[2, 3], dtype='float32') b = paddle.static.data(name='b', shape=[3, 5], dtype='float32') - c = a @b # __matmul__ + c = a @ b # __matmul__ a_np = numpy.random.uniform(-1, 1, size=[2, 3]).astype('float32') b_np = numpy.random.uniform(-1, 1, size=[3, 5]).astype('float32') place = paddle.CPUPlace() exe = paddle.static.Executor(place) c_np = exe.run(paddle.static.default_main_program(), - feed={"a": a_np, - "b": b_np}, + feed={ + "a": a_np, + "b": b_np + }, fetch_list=[c]) - self.assertTrue(numpy.allclose(a_np @b_np, c_np)) + self.assertTrue(numpy.allclose(a_np @ b_np, c_np)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index 48aa530ff87..92fa9049dab 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestMathOpPatchesVarBase(unittest.TestCase): + def setUp(self): self.shape = [10, 1024] self.dtype = np.float32 @@ -388,12 +389,10 @@ class TestMathOpPatchesVarBase(unittest.TestCase): y = t * x self.assertTrue( - np.allclose( - y.numpy(), - t * np.ones( - (2, 2), dtype="float32"), - rtol=1e-05, - atol=0.0)) + np.allclose(y.numpy(), + t * np.ones((2, 2), dtype="float32"), + rtol=1e-05, + atol=0.0)) def test_np_left_mul(self): with _test_eager_guard(): @@ -482,50 +481,57 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertEqual(x.size, 6) self.assertEqual(x.numel(), 6) self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy())) - self.assertTrue( - np.array_equal(x.tanh().numpy(), paddle.tanh(x).numpy())) - self.assertTrue( - np.array_equal(x.atan().numpy(), paddle.atan(x).numpy())) + self.assertTrue(np.array_equal(x.tanh().numpy(), + paddle.tanh(x).numpy())) + self.assertTrue(np.array_equal(x.atan().numpy(), + paddle.atan(x).numpy())) self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy())) m = x.abs() + self.assertTrue(np.array_equal(m.sqrt().numpy(), + paddle.sqrt(m).numpy())) self.assertTrue( - np.array_equal(m.sqrt().numpy(), paddle.sqrt(m).numpy())) - self.assertTrue( - np.array_equal(m.rsqrt().numpy(), paddle.rsqrt(m).numpy())) + np.array_equal(m.rsqrt().numpy(), + paddle.rsqrt(m).numpy())) + self.assertTrue(np.array_equal(x.ceil().numpy(), + paddle.ceil(x).numpy())) self.assertTrue( - np.array_equal(x.ceil().numpy(), paddle.ceil(x).numpy())) - self.assertTrue( - np.array_equal(x.floor().numpy(), paddle.floor(x).numpy())) + np.array_equal(x.floor().numpy(), + paddle.floor(x).numpy())) self.assertTrue(np.array_equal(x.cos().numpy(), paddle.cos(x).numpy())) - self.assertTrue( - np.array_equal(x.acos().numpy(), paddle.acos(x).numpy())) - self.assertTrue( - np.array_equal(x.asin().numpy(), paddle.asin(x).numpy())) + self.assertTrue(np.array_equal(x.acos().numpy(), + paddle.acos(x).numpy())) + self.assertTrue(np.array_equal(x.asin().numpy(), + paddle.asin(x).numpy())) self.assertTrue(np.array_equal(x.sin().numpy(), paddle.sin(x).numpy())) - self.assertTrue( - np.array_equal(x.sinh().numpy(), paddle.sinh(x).numpy())) - self.assertTrue( - np.array_equal(x.cosh().numpy(), paddle.cosh(x).numpy())) - self.assertTrue( - np.array_equal(x.round().numpy(), paddle.round(x).numpy())) - self.assertTrue( - np.array_equal(x.reciprocal().numpy(), paddle.reciprocal(x).numpy( - ))) - self.assertTrue( - np.array_equal(x.square().numpy(), paddle.square(x).numpy())) - self.assertTrue( - np.array_equal(x.rank().numpy(), paddle.rank(x).numpy())) - self.assertTrue( - np.array_equal(x[0].t().numpy(), paddle.t(x[0]).numpy())) - self.assertTrue( - np.array_equal(x.asinh().numpy(), paddle.asinh(x).numpy())) + self.assertTrue(np.array_equal(x.sinh().numpy(), + paddle.sinh(x).numpy())) + self.assertTrue(np.array_equal(x.cosh().numpy(), + paddle.cosh(x).numpy())) + self.assertTrue( + np.array_equal(x.round().numpy(), + paddle.round(x).numpy())) + self.assertTrue( + np.array_equal(x.reciprocal().numpy(), + paddle.reciprocal(x).numpy())) + self.assertTrue( + np.array_equal(x.square().numpy(), + paddle.square(x).numpy())) + self.assertTrue(np.array_equal(x.rank().numpy(), + paddle.rank(x).numpy())) + self.assertTrue(np.array_equal(x[0].t().numpy(), + paddle.t(x[0]).numpy())) + self.assertTrue( + np.array_equal(x.asinh().numpy(), + paddle.asinh(x).numpy())) ### acosh(x) = nan, need to change input t_np = np.random.uniform(1, 2, [2, 3]).astype(self.dtype) t = paddle.to_tensor(t_np) self.assertTrue( - np.array_equal(t.acosh().numpy(), paddle.acosh(t).numpy())) + np.array_equal(t.acosh().numpy(), + paddle.acosh(t).numpy())) self.assertTrue( - np.array_equal(x.atanh().numpy(), paddle.atanh(x).numpy())) + np.array_equal(x.atanh().numpy(), + paddle.atanh(x).numpy())) d = paddle.to_tensor([[1.2285208, 1.3491015, 1.4899898], [1.30058, 1.0688717, 1.4928783], [1.0958099, 1.3724753, 1.8926544]]) @@ -533,62 +539,74 @@ class TestMathOpPatchesVarBase(unittest.TestCase): # ROCM not support cholesky if not fluid.core.is_compiled_with_rocm(): self.assertTrue( - np.array_equal(d.cholesky().numpy(), paddle.cholesky(d).numpy( - ))) + np.array_equal(d.cholesky().numpy(), + paddle.cholesky(d).numpy())) self.assertTrue( - np.array_equal(x.is_empty().numpy(), paddle.is_empty(x).numpy())) + np.array_equal(x.is_empty().numpy(), + paddle.is_empty(x).numpy())) self.assertTrue( - np.array_equal(x.isfinite().numpy(), paddle.isfinite(x).numpy())) + np.array_equal(x.isfinite().numpy(), + paddle.isfinite(x).numpy())) self.assertTrue( np.array_equal( - x.cast('int32').numpy(), paddle.cast(x, 'int32').numpy())) + x.cast('int32').numpy(), + paddle.cast(x, 'int32').numpy())) self.assertTrue( np.array_equal( x.expand([3, 2, 3]).numpy(), paddle.expand(x, [3, 2, 3]).numpy())) self.assertTrue( np.array_equal( - x.tile([2, 2]).numpy(), paddle.tile(x, [2, 2]).numpy())) + x.tile([2, 2]).numpy(), + paddle.tile(x, [2, 2]).numpy())) self.assertTrue( - np.array_equal(x.flatten().numpy(), paddle.flatten(x).numpy())) + np.array_equal(x.flatten().numpy(), + paddle.flatten(x).numpy())) index = paddle.to_tensor([0, 1]) self.assertTrue( np.array_equal( - x.gather(index).numpy(), paddle.gather(x, index).numpy())) + x.gather(index).numpy(), + paddle.gather(x, index).numpy())) index = paddle.to_tensor([[0, 1], [1, 2]]) self.assertTrue( np.array_equal( - x.gather_nd(index).numpy(), paddle.gather_nd(x, index).numpy())) + x.gather_nd(index).numpy(), + paddle.gather_nd(x, index).numpy())) self.assertTrue( np.array_equal( - x.reverse([0, 1]).numpy(), paddle.reverse(x, [0, 1]).numpy())) + x.reverse([0, 1]).numpy(), + paddle.reverse(x, [0, 1]).numpy())) self.assertTrue( np.array_equal( - a.reshape([3, 2]).numpy(), paddle.reshape(a, [3, 2]).numpy())) + a.reshape([3, 2]).numpy(), + paddle.reshape(a, [3, 2]).numpy())) self.assertTrue( np.array_equal( x.slice([0, 1], [0, 0], [1, 2]).numpy(), paddle.slice(x, [0, 1], [0, 0], [1, 2]).numpy())) self.assertTrue( np.array_equal( - x.split(2)[0].numpy(), paddle.split(x, 2)[0].numpy())) + x.split(2)[0].numpy(), + paddle.split(x, 2)[0].numpy())) m = paddle.to_tensor( np.random.uniform(-1, 1, [1, 6, 1, 1]).astype(self.dtype)) self.assertTrue( np.array_equal( - m.squeeze([]).numpy(), paddle.squeeze(m, []).numpy())) + m.squeeze([]).numpy(), + paddle.squeeze(m, []).numpy())) self.assertTrue( np.array_equal( - m.squeeze([1, 2]).numpy(), paddle.squeeze(m, [1, 2]).numpy())) + m.squeeze([1, 2]).numpy(), + paddle.squeeze(m, [1, 2]).numpy())) m = paddle.to_tensor([2, 3, 3, 1, 5, 3], 'float32') self.assertTrue( - np.array_equal(m.unique()[0].numpy(), paddle.unique(m)[0].numpy())) + np.array_equal(m.unique()[0].numpy(), + paddle.unique(m)[0].numpy())) self.assertTrue( np.array_equal( m.unique(return_counts=True)[1], - paddle.unique( - m, return_counts=True)[1])) + paddle.unique(m, return_counts=True)[1])) self.assertTrue(np.array_equal(x.flip([0]), paddle.flip(x, [0]))) self.assertTrue(np.array_equal(x.unbind(0), paddle.unbind(x, 0))) self.assertTrue(np.array_equal(x.roll(1), paddle.roll(x, 1))) @@ -602,56 +620,67 @@ class TestMathOpPatchesVarBase(unittest.TestCase): # 2. Binary operation self.assertTrue( - np.array_equal(x.divide(y).numpy(), paddle.divide(x, y).numpy())) + np.array_equal(x.divide(y).numpy(), + paddle.divide(x, y).numpy())) self.assertTrue( np.array_equal( x.matmul(y, True, False).numpy(), paddle.matmul(x, y, True, False).numpy())) self.assertTrue( np.array_equal( - x.norm( - p='fro', axis=[0, 1]).numpy(), - paddle.norm( - x, p='fro', axis=[0, 1]).numpy())) + x.norm(p='fro', axis=[0, 1]).numpy(), + paddle.norm(x, p='fro', axis=[0, 1]).numpy())) self.assertTrue( - np.array_equal(x.dist(y).numpy(), paddle.dist(x, y).numpy())) + np.array_equal(x.dist(y).numpy(), + paddle.dist(x, y).numpy())) self.assertTrue( - np.array_equal(x.cross(y).numpy(), paddle.cross(x, y).numpy())) + np.array_equal(x.cross(y).numpy(), + paddle.cross(x, y).numpy())) m = x.expand([2, 2, 3]) n = y.expand([2, 2, 3]).transpose([0, 2, 1]) self.assertTrue( - np.array_equal(m.bmm(n).numpy(), paddle.bmm(m, n).numpy())) + np.array_equal(m.bmm(n).numpy(), + paddle.bmm(m, n).numpy())) self.assertTrue( np.array_equal( x.histogram(5, -1, 1).numpy(), paddle.histogram(x, 5, -1, 1).numpy())) self.assertTrue( - np.array_equal(x.equal(y).numpy(), paddle.equal(x, y).numpy())) + np.array_equal(x.equal(y).numpy(), + paddle.equal(x, y).numpy())) self.assertTrue( np.array_equal( - x.greater_equal(y).numpy(), paddle.greater_equal(x, y).numpy())) + x.greater_equal(y).numpy(), + paddle.greater_equal(x, y).numpy())) self.assertTrue( np.array_equal( - x.greater_than(y).numpy(), paddle.greater_than(x, y).numpy())) + x.greater_than(y).numpy(), + paddle.greater_than(x, y).numpy())) self.assertTrue( np.array_equal( - x.less_equal(y).numpy(), paddle.less_equal(x, y).numpy())) + x.less_equal(y).numpy(), + paddle.less_equal(x, y).numpy())) self.assertTrue( np.array_equal( - x.less_than(y).numpy(), paddle.less_than(x, y).numpy())) + x.less_than(y).numpy(), + paddle.less_than(x, y).numpy())) self.assertTrue( np.array_equal( - x.not_equal(y).numpy(), paddle.not_equal(x, y).numpy())) + x.not_equal(y).numpy(), + paddle.not_equal(x, y).numpy())) self.assertTrue( np.array_equal( - x.equal_all(y).numpy(), paddle.equal_all(x, y).numpy())) + x.equal_all(y).numpy(), + paddle.equal_all(x, y).numpy())) self.assertTrue( np.array_equal( - x.allclose(y).numpy(), paddle.allclose(x, y).numpy())) + x.allclose(y).numpy(), + paddle.allclose(x, y).numpy())) m = x.expand([2, 2, 3]) self.assertTrue( np.array_equal( - x.expand_as(m).numpy(), paddle.expand_as(x, m).numpy())) + x.expand_as(m).numpy(), + paddle.expand_as(x, m).numpy())) index = paddle.to_tensor([2, 1, 0]) self.assertTrue( np.array_equal( @@ -663,24 +692,30 @@ class TestMathOpPatchesVarBase(unittest.TestCase): y = paddle.to_tensor([[False, False], [False, True]]) self.assertTrue( np.array_equal( - x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) + x.logical_and(y).numpy(), + paddle.logical_and(x, y).numpy())) self.assertTrue( np.array_equal( - x.logical_not(y).numpy(), paddle.logical_not(x, y).numpy())) + x.logical_not(y).numpy(), + paddle.logical_not(x, y).numpy())) self.assertTrue( np.array_equal( - x.logical_or(y).numpy(), paddle.logical_or(x, y).numpy())) + x.logical_or(y).numpy(), + paddle.logical_or(x, y).numpy())) self.assertTrue( np.array_equal( - x.logical_xor(y).numpy(), paddle.logical_xor(x, y).numpy())) + x.logical_xor(y).numpy(), + paddle.logical_xor(x, y).numpy())) self.assertTrue( np.array_equal( - x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) + x.logical_and(y).numpy(), + paddle.logical_and(x, y).numpy())) a = paddle.to_tensor([[1, 2], [3, 4]]) b = paddle.to_tensor([[4, 3], [2, 1]]) self.assertTrue( np.array_equal( - x.where(a, b).numpy(), paddle.where(x, a, b).numpy())) + x.where(a, b).numpy(), + paddle.where(x, a, b).numpy())) x_np = np.random.randn(3, 6, 9, 7) x = paddle.to_tensor(x_np) diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op.py b/python/paddle/fluid/tests/unittests/test_matmul_op.py index aa67d923370..4b4a4c7e15f 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py @@ -89,6 +89,7 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): class Generator(object): + def setUp(self): self.op_type = "matmul" X = np.random.random(self.shape_X).astype("float32") @@ -108,26 +109,33 @@ class Generator(object): self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3) def test_check_grad_ignore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=1e-3, + no_grad_set=set("X")) def test_check_grad_ignore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=1e-3, + no_grad_set=set('Y')) class TestMatmulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The inputs type of matmul_op must be Variable. input1 = 12 self.assertRaises(TypeError, fluid.layers.matmul, input1, input1) # The inputs dtype of matmul_op must be float32, float64. - input2 = fluid.layers.data( - name='input2', shape=[10, 10], dtype="int32") + input2 = fluid.layers.data(name='input2', + shape=[10, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.matmul, input2, input2) - input3 = fluid.layers.data( - name='input3', shape=[2, 2], dtype="float16") + input3 = fluid.layers.data(name='input3', + shape=[2, 2], + dtype="float16") fluid.layers.matmul(input3, input3) @@ -163,8 +171,10 @@ def test_negative_dims_program(obj): obj.assertEqual(Ref.shape[idx], output.shape[idx]) exe = fluid.Executor(fluid.CPUPlace()) res, = exe.run(fluid.default_main_program(), - feed={'x': X, - 'y': Y}, + feed={ + 'x': X, + 'y': Y + }, fetch_list=[output]) np.allclose(res, Ref, atol=1e-5) @@ -175,13 +185,14 @@ def api_test(dim_x, dim_y, trans_x, trans_y): dim_x, dim_y, trans_x, trans_y)) shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x, trans_y) - globals()[test_name] = type(test_name, (unittest.TestCase, ), { - 'shape_X': shape_x, - 'shape_Y': shape_y, - 'transpose_X': trans_x, - 'transpose_Y': trans_y, - 'test_propram': test_negative_dims_program, - }) + globals()[test_name] = type( + test_name, (unittest.TestCase, ), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + 'test_propram': test_negative_dims_program, + }) # Generate operators cases for all possibilities @@ -190,12 +201,13 @@ def inject_test(dim_x, dim_y, trans_x, trans_y): dim_x, dim_y, trans_x, trans_y)) shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x, trans_y) - globals()[test_name] = type(test_name, (Generator, OpTest), { - 'shape_X': shape_x, - 'shape_Y': shape_y, - 'transpose_X': trans_x, - 'transpose_Y': trans_y, - }) + globals()[test_name] = type( + test_name, (Generator, OpTest), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + }) for dim_X in (1, 2, 3): @@ -270,17 +282,19 @@ for dim in [4]: test_name = ( 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( dim, dim, transpose_X, transpose_Y)) - shape_X, shape_Y = generate_compatible_shapes_ndim(dim, transpose_X, - transpose_Y) - globals()[test_name] = type(test_name, (Generator, OpTest), { - 'shape_X': shape_X, - 'shape_Y': shape_Y, - 'transpose_X': transpose_X, - 'transpose_Y': transpose_Y, - }) + shape_X, shape_Y = generate_compatible_shapes_ndim( + dim, transpose_X, transpose_Y) + globals()[test_name] = type( + test_name, (Generator, OpTest), { + 'shape_X': shape_X, + 'shape_Y': shape_Y, + 'transpose_X': transpose_X, + 'transpose_Y': transpose_Y, + }) class API_TestMm(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2], dtype="float64") @@ -291,13 +305,11 @@ class API_TestMm(unittest.TestCase): data1 = np.random.rand(2) data2 = np.random.rand(2) np_res = exe.run(feed={'x': data1, 'y': data2}, fetch_list=[result]) - expected_result = np.matmul( - data1.reshape(1, 2), data2.reshape(2, 1)) + expected_result = np.matmul(data1.reshape(1, 2), + data2.reshape(2, 1)) self.assertTrue( - np.allclose( - np_res, expected_result, atol=1e-5), - "two value is\ + np.allclose(np_res, expected_result, atol=1e-5), "two value is\ {}\n{}, check diff!".format(np_res, expected_result)) def test_dygraph_without_out(self): @@ -313,6 +325,7 @@ class API_TestMm(unittest.TestCase): class Test_API_Matmul(unittest.TestCase): + def test_dygraph_without_out(self): device = fluid.CPUPlace() with fluid.dygraph.guard(device): @@ -326,7 +339,9 @@ class Test_API_Matmul(unittest.TestCase): class API_TestMmError(unittest.TestCase): + def test_errors(self): + def test_error1(): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.data(name="data1", shape=[10, 2], dtype="float32") @@ -337,20 +352,24 @@ class API_TestMmError(unittest.TestCase): def test_error2(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data1 = fluid.data( - name="data1", shape=[-1, 10, 2], dtype="float32") - data2 = fluid.data( - name="data2", shape=[-1, 2, 10], dtype="float32") + data1 = fluid.data(name="data1", + shape=[-1, 10, 2], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[-1, 2, 10], + dtype="float32") paddle.mm(data1, data2) test_error2() def test_error3(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data1 = fluid.data( - name="data1", shape=[10, 10, 2], dtype="float32") - data2 = fluid.data( - name="data2", shape=[3, 2, 10], dtype="float32") + data1 = fluid.data(name="data1", + shape=[10, 10, 2], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[3, 2, 10], + dtype="float32") paddle.mm(data1, data2) self.assertRaises(ValueError, test_error3) diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py b/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py index e180faf3806..58cfc004092 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py @@ -114,6 +114,7 @@ def reference_matmul_mul_head(X, # Generator for multiple head class GeneratorMulHead(object): + def setUp(self): self.op_type = "matmul" X = np.random.random(self.shape_X).astype("float32") @@ -137,15 +138,16 @@ def inject_test_multiple_head(dim_x, dim_y, trans_x, trans_y, head_number): test_name = ( 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}_head_{}'.format( dim_x, dim_y, trans_x, trans_y, head_number)) - shape_x, shape_y = generate_compatible_shapes_mul_head(dim_x, dim_y, - trans_x, trans_y) - globals()[test_name] = type(test_name, (GeneratorMulHead, OpTest), { - 'shape_X': shape_x, - 'shape_Y': shape_y, - 'transpose_X': trans_x, - 'transpose_Y': trans_y, - 'head_number': head_number - }) + shape_x, shape_y = generate_compatible_shapes_mul_head( + dim_x, dim_y, trans_x, trans_y) + globals()[test_name] = type( + test_name, (GeneratorMulHead, OpTest), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + 'head_number': head_number + }) def matmul_head2(X, Y, head_number=1): @@ -227,26 +229,29 @@ def generate_compatible_shapes_mul_head2(dim_X, dim_Y, transpose_X, # Generator for multiple head, case 2 when width of X is not same as height of Y class GeneratorMulHead2(object): + def setUp(self): self.op_type = "matmul" X = np.zeros(self.shape_X) Y = np.zeros(self.shape_Y) if len(self.shape_X) == 2: - X = np.arange( - 0, self.shape_X[-1] * self.shape_X[-2], - dtype=np.float32).reshape(self.shape_X) - Y = np.arange( - 0, self.shape_Y[-1] * self.shape_Y[-2], - dtype=np.float32).reshape(self.shape_Y) + X = np.arange(0, + self.shape_X[-1] * self.shape_X[-2], + dtype=np.float32).reshape(self.shape_X) + Y = np.arange(0, + self.shape_Y[-1] * self.shape_Y[-2], + dtype=np.float32).reshape(self.shape_Y) else: for i in range(0, len(self.shape_X) - 1): - X[i, :, :] = np.arange( - 0, self.shape_X[-1] * self.shape_X[-2], - dtype=np.float32).reshape(list(self.shape_X)[-2:]) - Y[i, :, :] = np.arange( - 0, self.shape_Y[-1] * self.shape_Y[-2], - dtype=np.float32).reshape(list(self.shape_Y)[-2:]) + X[i, :, :] = np.arange(0, + self.shape_X[-1] * self.shape_X[-2], + dtype=np.float32).reshape( + list(self.shape_X)[-2:]) + Y[i, :, :] = np.arange(0, + self.shape_Y[-1] * self.shape_Y[-2], + dtype=np.float32).reshape( + list(self.shape_Y)[-2:]) Out = reference_matmul_mul_head2(X, Y, 4, self.transpose_X, self.transpose_Y) @@ -267,15 +272,16 @@ def inject_test_multiple_head2(dim_x, dim_y, trans_x, trans_y, head_number): test_name = ( 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}_head2_{}'.format( dim_x, dim_y, trans_x, trans_y, head_number)) - shape_x, shape_y = generate_compatible_shapes_mul_head2(dim_x, dim_y, - trans_x, trans_y) - globals()[test_name] = type(test_name, (GeneratorMulHead2, OpTest), { - 'shape_X': shape_x, - 'shape_Y': shape_y, - 'transpose_X': trans_x, - 'transpose_Y': trans_y, - 'head_number': head_number - }) + shape_x, shape_y = generate_compatible_shapes_mul_head2( + dim_x, dim_y, trans_x, trans_y) + globals()[test_name] = type( + test_name, (GeneratorMulHead2, OpTest), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + 'head_number': head_number + }) #test case for multiple head diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py index f6f62045b19..e6481e12f1e 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -109,8 +109,10 @@ class TestMatMulV2Op(OpTest): def test_check_grad(self): if core.is_compiled_with_rocm(): - self.check_grad( - ['X', 'Y'], 'Out', max_relative_error=1e-2, check_eager=False) + self.check_grad(['X', 'Y'], + 'Out', + max_relative_error=1e-2, + check_eager=False) else: self.check_grad(['X', 'Y'], 'Out', check_eager=False) @@ -335,9 +337,11 @@ class TestMatMulOpBroadcast2(TestMatMulV2Op): def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestMatMulOpFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -345,8 +349,9 @@ def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place( - place, atol=atol, check_eager=False) + self.check_output_with_place(place, + atol=atol, + check_eager=False) def test_check_grad(self): place = core.CUDAPlace(0) @@ -384,11 +389,13 @@ create_test_fp16_class(TestMatMulOp17) def create_test_bf16_class(parent, atol=0.01): + @unittest.skipIf( - not core.is_compiled_with_cuda() or - not core.is_bfloat16_supported(core.CUDAPlace(0)), + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not compiled with CUDA and not support the bfloat16") class TestMatMulOpBf16Case(parent): + def get_numeric_grad(self, place, check_name): scope = core.Scope() self._check_grad_helper() @@ -407,20 +414,18 @@ def create_test_bf16_class(parent, atol=0.01): def test_check_grad_x(self): place = core.CUDAPlace(0) numeric_grads = self.get_numeric_grad(place, 'X') - self.check_grad_with_place( - place, ['X'], - 'Out', - no_grad_set=set(['Y']), - user_defined_grads=[numeric_grads]) + self.check_grad_with_place(place, ['X'], + 'Out', + no_grad_set=set(['Y']), + user_defined_grads=[numeric_grads]) def test_check_grad_y(self): place = core.CUDAPlace(0) numeric_grads = self.get_numeric_grad(place, 'Y') - self.check_grad_with_place( - place, ['Y'], - 'Out', - no_grad_set=set(['X']), - user_defined_grads=[numeric_grads]) + self.check_grad_with_place(place, ['Y'], + 'Out', + no_grad_set=set(['X']), + user_defined_grads=[numeric_grads]) def test_check_grad(self): pass @@ -450,6 +455,7 @@ create_test_bf16_class(TestMatMulOp17) class TestMatMulV2API(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -467,8 +473,10 @@ class TestMatMulV2API(unittest.TestCase): exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"input_x": x_np, - "input_y": y_np}, + feed={ + "input_x": x_np, + "input_y": y_np + }, fetch_list=[result]) def test_static(self): @@ -500,9 +508,8 @@ class TestMatMulV2API(unittest.TestCase): place = core.CUDAPlace(0) if core.is_float16_supported(place): with fluid.dygraph.guard(place): - paddle.set_flags({ - 'FLAGS_gemm_use_half_precision_compute_type': False - }) + paddle.set_flags( + {'FLAGS_gemm_use_half_precision_compute_type': False}) input_x = np.random.random([2, 8, 16]).astype("float16") input_y = np.random.random([2, 16, 8]).astype("float16") for i in range(0, 16, 2): @@ -517,18 +524,16 @@ class TestMatMulV2API(unittest.TestCase): self.assertTrue(paddle.isfinite(result)[0, 0, 0]) self.assertTrue(np.isfinite(result_np)[0, 0, 0]) self.assertTrue(np.array_equal(result_np, result.numpy())) - paddle.set_flags({ - 'FLAGS_gemm_use_half_precision_compute_type': True - }) + paddle.set_flags( + {'FLAGS_gemm_use_half_precision_compute_type': True}) def test_compute_type_fp16_nan(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): with fluid.dygraph.guard(place): - paddle.set_flags({ - 'FLAGS_gemm_use_half_precision_compute_type': True - }) + paddle.set_flags( + {'FLAGS_gemm_use_half_precision_compute_type': True}) input_x = np.random.random([2, 8, 16]).astype("float16") input_y = np.random.random([2, 16, 8]).astype("float16") for i in range(0, 16, 2): @@ -543,9 +548,8 @@ class TestMatMulV2API(unittest.TestCase): self.assertFalse( paddle.isfinite(result)[0, 0, 0]) # contains nan/inf self.assertTrue(np.isfinite(result_np)[0, 0, 0]) - paddle.set_flags({ - 'FLAGS_gemm_use_half_precision_compute_type': False - }) + paddle.set_flags( + {'FLAGS_gemm_use_half_precision_compute_type': False}) def test_api_eager_dygraph(self): with _test_eager_guard(): @@ -554,6 +558,7 @@ class TestMatMulV2API(unittest.TestCase): class TestComplexMatMulOp(OpTest): + def setUp(self): self.op_type = "matmul_v2" self.init_base_dtype() @@ -589,33 +594,31 @@ class TestComplexMatMulOp(OpTest): self.check_output(check_eager=False) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) class TestComplexMatMulOpBroadcast(OpTest): + def setUp(self): self.op_type = "matmul_v2" self.init_base_dtype() @@ -653,33 +656,31 @@ class TestComplexMatMulOpBroadcast(OpTest): self.check_output(check_eager=False) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[self.grad_x, self.grad_y], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[self.grad_x, self.grad_y], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - user_defined_grads=[self.grad_y], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['Y'], + 'Out', + no_grad_set=set("X"), + user_defined_grads=[self.grad_y], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=False) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Y'), + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=False) class TestMatMulTypePromotion(TestComplexMatMulOp): + def init_input_output(self): self.x = np.random.random((10, 10)).astype(self.dtype) self.y = np.random.random( diff --git a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py index 2bbacc316f6..2e73e4d782d 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py @@ -141,9 +141,10 @@ def batched_multiclass_nms(boxes, index_outs = [] lod = [] for n in range(batch_size): - nmsed_outs, indices = multiclass_nms( - boxes[n], scores[n], background, score_threshold, post_threshold, - nms_top_k, keep_top_k, normalized, use_gaussian, gaussian_sigma) + nmsed_outs, indices = multiclass_nms(boxes[n], scores[n], background, + score_threshold, post_threshold, + nms_top_k, keep_top_k, normalized, + use_gaussian, gaussian_sigma) nmsed_num = len(nmsed_outs) lod.append(nmsed_num) if nmsed_num == 0: @@ -158,6 +159,7 @@ def batched_multiclass_nms(boxes, class TestMatrixNMSOp(OpTest): + def set_argument(self): self.post_threshold = 0. self.use_gaussian = False @@ -220,17 +222,20 @@ class TestMatrixNMSOp(OpTest): class TestMatrixNMSOpNoOutput(TestMatrixNMSOp): + def set_argument(self): self.post_threshold = 2.0 class TestMatrixNMSOpGaussian(TestMatrixNMSOp): + def set_argument(self): self.post_threshold = 0. self.use_gaussian = True class TestMatrixNMSError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): M = 1200 @@ -248,54 +253,52 @@ class TestMatrixNMSError(unittest.TestCase): scores = np.reshape(scores, (N, M, C)) scores_np = np.transpose(scores, (0, 2, 1)) - boxes_data = fluid.data( - name='bboxes', shape=[M, C, BOX_SIZE], dtype='float32') - scores_data = fluid.data( - name='scores', shape=[N, C, M], dtype='float32') + boxes_data = fluid.data(name='bboxes', + shape=[M, C, BOX_SIZE], + dtype='float32') + scores_data = fluid.data(name='scores', + shape=[N, C, M], + dtype='float32') def test_bboxes_Variable(): # the bboxes type must be Variable - fluid.layers.matrix_nms( - bboxes=boxes_np, - scores=scores_data, - nms_top_k=nms_top_k, - keep_top_k=keep_top_k, - score_threshold=score_threshold, - post_threshold=post_threshold) + fluid.layers.matrix_nms(bboxes=boxes_np, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) def test_scores_Variable(): # the scores type must be Variable - fluid.layers.matrix_nms( - bboxes=boxes_data, - scores=scores_np, - nms_top_k=nms_top_k, - keep_top_k=keep_top_k, - score_threshold=score_threshold, - post_threshold=post_threshold) + fluid.layers.matrix_nms(bboxes=boxes_data, + scores=scores_np, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) def test_empty(): # when all score are lower than threshold try: - fluid.layers.matrix_nms( - bboxes=boxes_data, - scores=scores_data, - nms_top_k=nms_top_k, - keep_top_k=keep_top_k, - score_threshold=10., - post_threshold=post_threshold) + fluid.layers.matrix_nms(bboxes=boxes_data, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=10., + post_threshold=post_threshold) except Exception as e: self.fail(e) def test_coverage(): # cover correct workflow try: - fluid.layers.matrix_nms( - bboxes=boxes_data, - scores=scores_data, - nms_top_k=nms_top_k, - keep_top_k=keep_top_k, - score_threshold=score_threshold, - post_threshold=post_threshold) + fluid.layers.matrix_nms(bboxes=boxes_data, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) except Exception as e: self.fail(e) diff --git a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py index 96823f49d2f..1eb1f42671b 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestMatrixPowerOp(OpTest): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -44,11 +45,14 @@ class TestMatrixPowerOp(OpTest): self.check_output() def test_grad(self): - self.check_grad( - ["X"], "Out", numeric_grad_delta=1e-5, max_relative_error=1e-7) + self.check_grad(["X"], + "Out", + numeric_grad_delta=1e-5, + max_relative_error=1e-7) class TestMatrixPowerOpN1(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -56,6 +60,7 @@ class TestMatrixPowerOpN1(TestMatrixPowerOp): class TestMatrixPowerOpN2(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -63,6 +68,7 @@ class TestMatrixPowerOpN2(TestMatrixPowerOp): class TestMatrixPowerOpN3(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -70,6 +76,7 @@ class TestMatrixPowerOpN3(TestMatrixPowerOp): class TestMatrixPowerOpN4(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -77,6 +84,7 @@ class TestMatrixPowerOpN4(TestMatrixPowerOp): class TestMatrixPowerOpN5(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -84,6 +92,7 @@ class TestMatrixPowerOpN5(TestMatrixPowerOp): class TestMatrixPowerOpN6(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -91,6 +100,7 @@ class TestMatrixPowerOpN6(TestMatrixPowerOp): class TestMatrixPowerOpN10(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -98,17 +108,21 @@ class TestMatrixPowerOpN10(TestMatrixPowerOp): class TestMatrixPowerOpNMinus(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" self.n = -1 def test_grad(self): - self.check_grad( - ["X"], "Out", numeric_grad_delta=1e-5, max_relative_error=1e-6) + self.check_grad(["X"], + "Out", + numeric_grad_delta=1e-5, + max_relative_error=1e-6) class TestMatrixPowerOpNMinus2(TestMatrixPowerOpNMinus): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -116,6 +130,7 @@ class TestMatrixPowerOpNMinus2(TestMatrixPowerOpNMinus): class TestMatrixPowerOpNMinus3(TestMatrixPowerOpNMinus): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -123,6 +138,7 @@ class TestMatrixPowerOpNMinus3(TestMatrixPowerOpNMinus): class TestMatrixPowerOpNMinus4(TestMatrixPowerOpNMinus): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -130,6 +146,7 @@ class TestMatrixPowerOpNMinus4(TestMatrixPowerOpNMinus): class TestMatrixPowerOpNMinus5(TestMatrixPowerOpNMinus): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -137,6 +154,7 @@ class TestMatrixPowerOpNMinus5(TestMatrixPowerOpNMinus): class TestMatrixPowerOpNMinus6(TestMatrixPowerOpNMinus): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -144,17 +162,21 @@ class TestMatrixPowerOpNMinus6(TestMatrixPowerOpNMinus): class TestMatrixPowerOpNMinus10(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" self.n = -10 def test_grad(self): - self.check_grad( - ["X"], "Out", numeric_grad_delta=1e-5, max_relative_error=1e-6) + self.check_grad(["X"], + "Out", + numeric_grad_delta=1e-5, + max_relative_error=1e-6) class TestMatrixPowerOpBatched1(TestMatrixPowerOp): + def config(self): self.matrix_shape = [8, 4, 4] self.dtype = "float64" @@ -162,6 +184,7 @@ class TestMatrixPowerOpBatched1(TestMatrixPowerOp): class TestMatrixPowerOpBatched2(TestMatrixPowerOp): + def config(self): self.matrix_shape = [2, 6, 4, 4] self.dtype = "float64" @@ -169,6 +192,7 @@ class TestMatrixPowerOpBatched2(TestMatrixPowerOp): class TestMatrixPowerOpBatched3(TestMatrixPowerOp): + def config(self): self.matrix_shape = [2, 6, 4, 4] self.dtype = "float64" @@ -176,6 +200,7 @@ class TestMatrixPowerOpBatched3(TestMatrixPowerOp): class TestMatrixPowerOpBatchedLong(TestMatrixPowerOp): + def config(self): self.matrix_shape = [1, 2, 3, 4, 4, 3, 3] self.dtype = "float64" @@ -183,6 +208,7 @@ class TestMatrixPowerOpBatchedLong(TestMatrixPowerOp): class TestMatrixPowerOpLarge1(TestMatrixPowerOp): + def config(self): self.matrix_shape = [32, 32] self.dtype = "float64" @@ -190,6 +216,7 @@ class TestMatrixPowerOpLarge1(TestMatrixPowerOp): class TestMatrixPowerOpLarge2(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float64" @@ -197,6 +224,7 @@ class TestMatrixPowerOpLarge2(TestMatrixPowerOp): class TestMatrixPowerOpFP32(TestMatrixPowerOp): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float32" @@ -207,6 +235,7 @@ class TestMatrixPowerOpFP32(TestMatrixPowerOp): class TestMatrixPowerOpBatchedFP32(TestMatrixPowerOpFP32): + def config(self): self.matrix_shape = [2, 8, 4, 4] self.dtype = "float32" @@ -214,6 +243,7 @@ class TestMatrixPowerOpBatchedFP32(TestMatrixPowerOpFP32): class TestMatrixPowerOpLarge1FP32(TestMatrixPowerOpFP32): + def config(self): self.matrix_shape = [32, 32] self.dtype = "float32" @@ -221,6 +251,7 @@ class TestMatrixPowerOpLarge1FP32(TestMatrixPowerOpFP32): class TestMatrixPowerOpLarge2FP32(TestMatrixPowerOpFP32): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float32" @@ -228,6 +259,7 @@ class TestMatrixPowerOpLarge2FP32(TestMatrixPowerOpFP32): class TestMatrixPowerOpFP32Minus(TestMatrixPowerOpFP32): + def config(self): self.matrix_shape = [10, 10] self.dtype = "float32" @@ -235,6 +267,7 @@ class TestMatrixPowerOpFP32Minus(TestMatrixPowerOpFP32): class TestMatrixPowerAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -271,6 +304,7 @@ class TestMatrixPowerAPI(unittest.TestCase): class TestMatrixPowerAPIError(unittest.TestCase): + def test_errors(self): input_np = np.random.random([4, 4]).astype("float64") @@ -279,11 +313,12 @@ class TestMatrixPowerAPIError(unittest.TestCase): # n must be int for n in [2.0, '2', -2.0]: - input = fluid.data( - name="input_float32", shape=[4, 4], dtype='float32') + input = fluid.data(name="input_float32", + shape=[4, 4], + dtype='float32') self.assertRaises(TypeError, paddle.linalg.matrix_power, input, n) - # The data type of input must be float32 or float64. + # The data type of input must be float32 or float64. for dtype in ["bool", "int32", "int64", "float16"]: input = fluid.data(name="input_" + dtype, shape=[4, 4], dtype=dtype) self.assertRaises(TypeError, paddle.linalg.matrix_power, input, 2) @@ -303,6 +338,7 @@ class TestMatrixPowerAPIError(unittest.TestCase): class TestMatrixPowerSingularAPI(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py b/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py index b13b3462617..b0b04a3cc10 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,6 +35,7 @@ def matrix_rank_wraper(x, tol=None, use_default_tol=True, hermitian=False): class TestMatrixRankOP(OpTest): + def setUp(self): self.python_api = matrix_rank_wraper self.op_type = "matrix_rank" @@ -61,6 +62,7 @@ class TestMatrixRankOP(OpTest): class TestMatrixRankOP1(TestMatrixRankOP): + def init_data(self): self.x = np.eye(3, k=1, dtype=np.float64) self.tol_tensor = None @@ -72,6 +74,7 @@ class TestMatrixRankOP1(TestMatrixRankOP): class TestMatrixRankOP2(TestMatrixRankOP): + def init_data(self): self.x = np.random.rand(3, 4, 5, 6).astype(np.float32) self.tol_tensor = np.random.random([3, 4]).astype(self.x.dtype) @@ -83,6 +86,7 @@ class TestMatrixRankOP2(TestMatrixRankOP): class TestMatrixRankOP3(TestMatrixRankOP): + def init_data(self): self.x = np.eye(200, dtype=np.float64) self.tol_tensor = None @@ -94,6 +98,7 @@ class TestMatrixRankOP3(TestMatrixRankOP): class TestMatrixRankOP4(TestMatrixRankOP): + def init_data(self): self.x = np.random.rand(1, 10).astype(np.float32) self.tol_tensor = None @@ -105,6 +110,7 @@ class TestMatrixRankOP4(TestMatrixRankOP): class TestMatrixRankOP5(TestMatrixRankOP): + def init_data(self): self.x = np.random.rand(5, 1).astype(np.float64) self.tol_tensor = np.random.random([1, 4]).astype(self.x.dtype) @@ -116,6 +122,7 @@ class TestMatrixRankOP5(TestMatrixRankOP): class TestMatrixRankOP6(TestMatrixRankOP): + def init_data(self): self.x = np.random.rand(3, 4, 5, 6).astype(np.float32) self.tol_tensor = None @@ -127,6 +134,7 @@ class TestMatrixRankOP6(TestMatrixRankOP): class TestMatrixRankOP7(TestMatrixRankOP): + def init_data(self): self.x = np.eye(200, dtype=np.float64) self.tol_tensor = np.random.random([200, 200]).astype(self.x.dtype) @@ -138,6 +146,7 @@ class TestMatrixRankOP7(TestMatrixRankOP): class TestMatrixRankAPI(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() @@ -172,25 +181,31 @@ class TestMatrixRankAPI(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): x_np = np.random.rand(3, 4, 7, 7).astype(np.float64) tol_np = np.random.random([3, 4]).astype(np.float32) - x_pd = paddle.fluid.data( - name="X", shape=[3, 4, 7, 7], dtype='float64') - tol_pd = paddle.fluid.data( - name="TolTensor", shape=[3, 4], dtype='float32') + x_pd = paddle.fluid.data(name="X", + shape=[3, 4, 7, 7], + dtype='float64') + tol_pd = paddle.fluid.data(name="TolTensor", + shape=[3, 4], + dtype='float32') rank_np = np.linalg.matrix_rank(x_np, tol_np, hermitian=False) - rank_pd = paddle.linalg.matrix_rank( - x_pd, tol_pd, hermitian=False) + rank_pd = paddle.linalg.matrix_rank(x_pd, + tol_pd, + hermitian=False) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"X": x_np, - "TolTensor": tol_np}, + feed={ + "X": x_np, + "TolTensor": tol_np + }, fetch_list=[rank_pd]) self.assertTrue(np.allclose(fetches[0], rank_np)) for place in places: with fluid.program_guard(fluid.Program(), fluid.Program()): x_np = np.random.rand(3, 4, 7, 7).astype(np.float64) - x_pd = paddle.fluid.data( - name="X", shape=[3, 4, 7, 7], dtype='float64') + x_pd = paddle.fluid.data(name="X", + shape=[3, 4, 7, 7], + dtype='float64') rank_np = np.linalg.matrix_rank(x_np, hermitian=True) rank_pd = paddle.linalg.matrix_rank(x_pd, hermitian=True) exe = fluid.Executor(place) @@ -202,8 +217,9 @@ class TestMatrixRankAPI(unittest.TestCase): for place in places: with fluid.program_guard(fluid.Program(), fluid.Program()): x_np = np.random.rand(3, 4, 7, 7).astype(np.float64) - x_pd = paddle.fluid.data( - name="X", shape=[3, 4, 7, 7], dtype='float64') + x_pd = paddle.fluid.data(name="X", + shape=[3, 4, 7, 7], + dtype='float64') rank_np = np.linalg.matrix_rank(x_np, 0.1, hermitian=False) rank_pd = paddle.linalg.matrix_rank(x_pd, 0.1, hermitian=False) exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py b/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py index fe00a825ba1..cadbca93ad3 100644 --- a/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py +++ b/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py @@ -26,11 +26,12 @@ paddle.enable_static() class TestMaxMinAmaxAminAPI(unittest.TestCase): + def setUp(self): self.init_case() self.cal_np_out_and_gradient() - self.place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() def init_case(self): self.x_np = np.array([[0.2, 0.3, 0.5, 0.9], [0.1, 0.2, 0.6, 0.7]]) @@ -40,9 +41,10 @@ class TestMaxMinAmaxAminAPI(unittest.TestCase): self.keepdim = False # If there are multiple minimum or maximum elements, max/min/amax/amin is non-derivable, - # its gradient check is not supported by unittest framework, + # its gradient check is not supported by unittest framework, # thus we calculate the gradient by numpy function. def cal_np_out_and_gradient(self): + def _cal_np_out_and_gradient(func): if func is 'amax': out = np.amax(self.x_np, axis=self.axis, keepdims=self.keepdim) @@ -88,6 +90,7 @@ class TestMaxMinAmaxAminAPI(unittest.TestCase): # We check the output between paddle API and numpy in static graph. def test_static_graph(self): + def _test_static_graph(func): startup_program = fluid.Program() train_program = fluid.Program() @@ -107,13 +110,15 @@ class TestMaxMinAmaxAminAPI(unittest.TestCase): _test_static_graph('max') _test_static_graph('min') - # As dygraph is easy to compute gradient, we check the gradient between + # As dygraph is easy to compute gradient, we check the gradient between # paddle API and numpy in dygraph. def test_dygraph(self): + def _test_dygraph(func): paddle.disable_static() - x = paddle.to_tensor( - self.x_np, dtype=self.dtype, stop_gradient=False) + x = paddle.to_tensor(self.x_np, + dtype=self.dtype, + stop_gradient=False) out = self._choose_paddle_func(func, x) grad_tensor = paddle.ones_like(x) paddle.autograd.backward([out], [grad_tensor], True) @@ -130,6 +135,7 @@ class TestMaxMinAmaxAminAPI(unittest.TestCase): # test two minimum or maximum elements class TestMaxMinAmaxAminAPI2(TestMaxMinAmaxAminAPI): + def init_case(self): self.x_np = np.array([[0.2, 0.3, 0.9, 0.9], [0.1, 0.1, 0.6, 0.7]]) self.shape = [2, 4] @@ -140,6 +146,7 @@ class TestMaxMinAmaxAminAPI2(TestMaxMinAmaxAminAPI): # test different axis class TestMaxMinAmaxAminAPI3(TestMaxMinAmaxAminAPI): + def init_case(self): self.x_np = np.array([[0.2, 0.3, 0.9, 0.9], [0.1, 0.1, 0.6, 0.7]]) self.shape = [2, 4] @@ -150,6 +157,7 @@ class TestMaxMinAmaxAminAPI3(TestMaxMinAmaxAminAPI): # test keepdim = True class TestMaxMinAmaxAminAPI4(TestMaxMinAmaxAminAPI): + def init_case(self): self.x_np = np.array([[0.2, 0.3, 0.9, 0.9], [0.1, 0.1, 0.6, 0.7]]) self.shape = [2, 4] @@ -160,9 +168,10 @@ class TestMaxMinAmaxAminAPI4(TestMaxMinAmaxAminAPI): # test axis is tuple class TestMaxMinAmaxAminAPI5(TestMaxMinAmaxAminAPI): + def init_case(self): - self.x_np = np.array( - [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(np.int32) + self.x_np = np.array([[[1, 2], [3, 4]], [[5, 6], [7, + 8]]]).astype(np.int32) self.shape = [2, 2, 2] self.dtype = 'int32' self.axis = (0, 1) @@ -171,6 +180,7 @@ class TestMaxMinAmaxAminAPI5(TestMaxMinAmaxAminAPI): # test multiple minimum or maximum elements class TestMaxMinAmaxAminAPI6(TestMaxMinAmaxAminAPI): + def init_case(self): self.x_np = np.array([[0.2, 0.9, 0.9, 0.9], [0.9, 0.9, 0.2, 0.2]]) self.shape = [2, 4] diff --git a/python/paddle/fluid/tests/unittests/test_max_op.py b/python/paddle/fluid/tests/unittests/test_max_op.py index d5b884dfcc9..dc11d78699e 100644 --- a/python/paddle/fluid/tests/unittests/test_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_max_op.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core class ApiMaxTest(unittest.TestCase): + def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -114,13 +115,13 @@ class ApiMaxTest(unittest.TestCase): class TestOutDtype(unittest.TestCase): + def test_max(self): api_fn = paddle.max shape = [10, 16] - check_out_dtype( - api_fn, - in_specs=[(shape, )], - expect_dtypes=['float32', 'float64', 'int32', 'int64']) + check_out_dtype(api_fn, + in_specs=[(shape, )], + expect_dtypes=['float32', 'float64', 'int32', 'int64']) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_maximum_op.py b/python/paddle/fluid/tests/unittests/test_maximum_op.py index 72db3df044e..9568a145ed4 100644 --- a/python/paddle/fluid/tests/unittests/test_maximum_op.py +++ b/python/paddle/fluid/tests/unittests/test_maximum_op.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core class ApiMaximumTest(unittest.TestCase): + def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -47,8 +48,10 @@ class ApiMaximumTest(unittest.TestCase): data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.maximum(data_x, data_y) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected1)) @@ -58,8 +61,10 @@ class ApiMaximumTest(unittest.TestCase): data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.maximum(data_x, data_z) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "z": self.input_z}, + res, = exe.run(feed={ + "x": self.input_x, + "z": self.input_z + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected2)) @@ -69,8 +74,10 @@ class ApiMaximumTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.maximum(data_a, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"a": self.input_a, - "c": self.input_c}, + res, = exe.run(feed={ + "a": self.input_a, + "c": self.input_c + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected3)) @@ -80,8 +87,10 @@ class ApiMaximumTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.maximum(data_b, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"b": self.input_b, - "c": self.input_c}, + res, = exe.run(feed={ + "b": self.input_b, + "c": self.input_c + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected4)) diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py index 4bc7b09c71e..64803bf39fe 100644 --- a/python/paddle/fluid/tests/unittests/test_maxout_op.py +++ b/python/paddle/fluid/tests/unittests/test_maxout_op.py @@ -37,6 +37,7 @@ def maxout_forward_naive(x, groups, channel_axis): class TestMaxOutOp(OpTest): + def setUp(self): self.op_type = "maxout" self.python_api = paddle.nn.functional.maxout @@ -64,21 +65,25 @@ class TestMaxOutOp(OpTest): class TestMaxOutOpAxis0(TestMaxOutOp): + def set_attrs(self): self.axis = -1 class TestMaxOutOpAxis1(TestMaxOutOp): + def set_attrs(self): self.axis = 3 class TestMaxOutOpFP32(TestMaxOutOp): + def set_attrs(self): self.dtype = 'float32' class TestMaxOutOpGroups(TestMaxOutOp): + def set_attrs(self): self.groups = 3 @@ -139,8 +144,9 @@ class TestMaxoutAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.maxout, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[2, 4, 6, 8], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[2, 4, 6, 8], + dtype='int32') self.assertRaises(TypeError, F.maxout, x_int32) x_float32 = paddle.fluid.data(name='x_float32', shape=[2, 4, 6, 8]) diff --git a/python/paddle/fluid/tests/unittests/test_mean_iou.py b/python/paddle/fluid/tests/unittests/test_mean_iou.py index b392a328494..a6b1ca4dab4 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_iou.py +++ b/python/paddle/fluid/tests/unittests/test_mean_iou.py @@ -55,6 +55,7 @@ def compute_mean_iou(predictions, labels, num_classes, in_wrongs, in_corrects, class TestMeanIOUOp(OpTest): + def setUp(self): self.config() self.op_type = "mean_iou" @@ -65,18 +66,21 @@ class TestMeanIOUOp(OpTest): in_wrongs = [] for i in range(self.in_wrong_num): - in_wrongs.append(("in_wrong_%d" % i, np.random.randint( - 0, 10, [self.num_classes]).astype("int32"))) + in_wrongs.append( + ("in_wrong_%d" % i, + np.random.randint(0, 10, [self.num_classes]).astype("int32"))) in_corrects = [] for i in range(self.in_correct_num): - in_corrects.append(("in_correct_%d" % i, np.random.randint( - 0, 10, [self.num_classes]).astype("int32"))) + in_corrects.append( + ("in_correct_%d" % i, + np.random.randint(0, 10, [self.num_classes]).astype("int32"))) in_mean_ious = [] for i in range(self.in_mean_iou_num): - in_mean_ious.append(("in_mean_iou_%d" % i, np.random.uniform( - 0, 1, [1]).astype("float32"))) + in_mean_ious.append(("in_mean_iou_%d" % i, + np.random.uniform(0, 1, + [1]).astype("float32"))) self.inputs = { 'Predictions': predictions, @@ -107,6 +111,7 @@ class TestMeanIOUOp(OpTest): class TestCase1(TestMeanIOUOp): + def config(self): self.num_classes = 5 self.image_size = [100, 128] @@ -121,13 +126,14 @@ class TestCase1(TestMeanIOUOp): class TestMeanIOUOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): # The input type of accuracy_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.mean_iou, x1, y1) # The input dtype of accuracy_op must be float32 or float64. x2 = fluid.layers.data(name='x2', shape=[4], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py index c5ee5c91e1c..af15f271b4a 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_op.py +++ b/python/paddle/fluid/tests/unittests/test_mean_op.py @@ -22,6 +22,7 @@ import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid import Program, program_guard from paddle.fluid.framework import _test_eager_guard + np.random.seed(10) @@ -38,6 +39,7 @@ def reduce_mean_wrapper(x, axis=0, keepdim=False, reduce_all=False): class TestMeanOp(OpTest): + def setUp(self): self.op_type = "mean" self.python_api = fluid.layers.mean @@ -57,23 +59,27 @@ class TestMeanOp(OpTest): class TestMeanOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of mean_op must be Variable. input1 = 12 self.assertRaises(TypeError, fluid.layers.mean, input1) # The input dtype of mean_op must be float16, float32, float64. - input2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") + input2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.mean, input2) - input3 = fluid.layers.data( - name='input3', shape=[4], dtype="float16") + input3 = fluid.layers.data(name='input3', + shape=[4], + dtype="float16") fluid.layers.softmax(input3) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16MeanOp(TestMeanOp): + def init_dtype_type(self): self.dtype = np.float16 self.__class__.no_need_check_grad = True @@ -99,6 +105,7 @@ class TestFP16MeanOp(TestMeanOp): @OpTestTool.skip_if_not_cpu_bf16() class TestBF16MeanOp(TestMeanOp): + def init_dtype_type(self): self.dtype = np.uint16 @@ -128,6 +135,7 @@ def ref_reduce_mean_grad(x, axis, dtype): class TestReduceMeanOp(OpTest): + def setUp(self): self.op_type = 'reduce_mean' self.python_api = reduce_mean_wrapper @@ -178,15 +186,18 @@ class TestReduceMeanOp(OpTest): return with fluid.dygraph.guard(place=place): x = paddle.tensor(self.inputs['X']) - y = paddle.mean( - x, axis=self.attrs['dim'], keepdim=self.attrs['keep_dim']) + y = paddle.mean(x, + axis=self.attrs['dim'], + keepdim=self.attrs['keep_dim']) dx = paddle.grad(y, x)[0].numpy() - dx_expected = ref_reduce_mean_grad( - self.inputs['X'], self.attrs['dim'], self.dtype) + dx_expected = ref_reduce_mean_grad(self.inputs['X'], + self.attrs['dim'], + self.dtype) self.assertTrue(np.array_equal(dx, dx_expected)) class TestReduceMeanOpDefaultAttrs(TestReduceMeanOp): + def setUp(self): self.op_type = 'reduce_mean' self.python_api = reduce_mean_wrapper @@ -200,88 +211,104 @@ class TestReduceMeanOpDefaultAttrs(TestReduceMeanOp): class TestReduceMeanOpFloat32(TestReduceMeanOp): + def set_attrs(self): self.dtype = 'float32' class TestReduceMeanOpFloat16(TestReduceMeanOp): + def set_attrs(self): self.dtype = 'float16' class TestReduceMeanOpShape1D(TestReduceMeanOp): + def set_attrs(self): self.shape = [100] class TestReduceMeanOpShape1DFP16(TestReduceMeanOp): + def set_attrs(self): self.shape = [100] self.dtype = 'float16' class TestReduceMeanOpShape6D(TestReduceMeanOp): + def set_attrs(self): self.shape = [2, 3, 4, 5, 6, 7] class TestReduceMeanOpShape6DFP16(TestReduceMeanOp): + def set_attrs(self): self.shape = [2, 3, 4, 5, 6, 7] self.dtype = 'float16' class TestReduceMeanOpAxisAll(TestReduceMeanOp): + def set_attrs(self): self.axis = [0, 1, 2, 3] class TestReduceMeanOpAxisAllFP16(TestReduceMeanOp): + def set_attrs(self): self.axis = [0, 1, 2, 3] self.dtype = 'float16' class TestReduceMeanOpAxisTuple(TestReduceMeanOp): + def set_attrs(self): self.axis = (0, 1, 2) class TestReduceMeanOpAxisTupleFP16(TestReduceMeanOp): + def set_attrs(self): self.axis = (0, 1, 2) self.dtype = 'float16' class TestReduceMeanOpAxisNegative(TestReduceMeanOp): + def set_attrs(self): self.axis = [-2, -1] class TestReduceMeanOpAxisNegativeFP16(TestReduceMeanOp): + def set_attrs(self): self.axis = [-2, -1] self.dtype = 'float16' class TestReduceMeanOpKeepdimTrue1(TestReduceMeanOp): + def set_attrs(self): self.keepdim = True class TestReduceMeanOpKeepdimTrue1FP16(TestReduceMeanOp): + def set_attrs(self): self.keepdim = True self.dtype = 'float16' class TestReduceMeanOpKeepdimTrue2(TestReduceMeanOp): + def set_attrs(self): self.axis = [0, 1, 2, 3] self.keepdim = True class TestReduceMeanOpKeepdimTrue2FP16(TestReduceMeanOp): + def set_attrs(self): self.axis = [0, 1, 2, 3] self.keepdim = True @@ -289,11 +316,13 @@ class TestReduceMeanOpKeepdimTrue2FP16(TestReduceMeanOp): class TestReduceMeanOpReduceAllTrue(TestReduceMeanOp): + def set_attrs(self): self.reduce_all = True class TestReduceMeanOpReduceAllTrueFP16(TestReduceMeanOp): + def set_attrs(self): self.reduce_all = True self.dtype = 'float16' @@ -337,9 +366,8 @@ class TestMeanAPI(unittest.TestCase): if len(axis) == 0: axis = None out_ref = np.mean(x, axis, keepdims=keepdim) - self.assertEqual( - np.allclose( - out.numpy(), out_ref, rtol=1e-04), True) + self.assertEqual(np.allclose(out.numpy(), out_ref, rtol=1e-04), + True) test_case(self.x) test_case(self.x, []) diff --git a/python/paddle/fluid/tests/unittests/test_median.py b/python/paddle/fluid/tests/unittests/test_median.py index be2206d0267..2f568022449 100644 --- a/python/paddle/fluid/tests/unittests/test_median.py +++ b/python/paddle/fluid/tests/unittests/test_median.py @@ -23,6 +23,7 @@ DELTA = 1e-6 class TestMedian(unittest.TestCase): + def check_numpy_res(self, np1, np2): self.assertEqual(np1.shape, np2.shape) mismatch = np.sum((np1 - np2) * (np1 - np2)) @@ -57,8 +58,7 @@ class TestMedian(unittest.TestCase): w = 4 l = 2 x = np.arange(h * w * l).reshape([h, w, l]) - lis_tests = [[x, axis, keepdims] - for axis in [-1, 0, 1, 2, None] + lis_tests = [[x, axis, keepdims] for axis in [-1, 0, 1, 2, None] for keepdims in [False, True]] for lis_test in lis_tests: self.static_single_test_median(lis_test) @@ -69,8 +69,7 @@ class TestMedian(unittest.TestCase): w = 4 l = 2 x = np.arange(h * w * l).reshape([h, w, l]) - lis_tests = [[x, axis, keepdims] - for axis in [-1, 0, 1, 2, None] + lis_tests = [[x, axis, keepdims] for axis in [-1, 0, 1, 2, None] for keepdims in [False, True]] for lis_test in lis_tests: self.dygraph_single_test_median(lis_test) diff --git a/python/paddle/fluid/tests/unittests/test_memcpy_op.py b/python/paddle/fluid/tests/unittests/test_memcpy_op.py index 623c43f5b75..a1469ca558b 100755 --- a/python/paddle/fluid/tests/unittests/test_memcpy_op.py +++ b/python/paddle/fluid/tests/unittests/test_memcpy_op.py @@ -26,6 +26,7 @@ from paddle.fluid.backward import append_backward class TestMemcpy_FillConstant(unittest.TestCase): + def get_prog(self): paddle.enable_static() main_program = Program() @@ -38,21 +39,19 @@ class TestMemcpy_FillConstant(unittest.TestCase): dtype='float32', persistable=False, stop_gradient=True) - gpu_var = main_program.global_block().create_var( - name=gpu_var_name, - shape=[10, 10], - dtype='float32', - persistable=False, - stop_gradient=True) - main_program.global_block().append_op( - type="fill_constant", - outputs={"Out": gpu_var_name}, - attrs={ - "shape": [10, 10], - "dtype": gpu_var.dtype, - "value": 1.0, - "place_type": 1 - }) + gpu_var = main_program.global_block().create_var(name=gpu_var_name, + shape=[10, 10], + dtype='float32', + persistable=False, + stop_gradient=True) + main_program.global_block().append_op(type="fill_constant", + outputs={"Out": gpu_var_name}, + attrs={ + "shape": [10, 10], + "dtype": gpu_var.dtype, + "value": 1.0, + "place_type": 1 + }) main_program.global_block().append_op( type="fill_constant", outputs={"Out": pinned_var_name}, @@ -66,11 +65,10 @@ class TestMemcpy_FillConstant(unittest.TestCase): def test_gpu_copy_to_pinned(self): main_program, gpu_var, pinned_var = self.get_prog() - main_program.global_block().append_op( - type='memcpy', - inputs={'X': gpu_var}, - outputs={'Out': pinned_var}, - attrs={'dst_place_type': 2}) + main_program.global_block().append_op(type='memcpy', + inputs={'X': gpu_var}, + outputs={'Out': pinned_var}, + attrs={'dst_place_type': 2}) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) gpu_, pinned_ = exe.run(main_program, @@ -81,11 +79,10 @@ class TestMemcpy_FillConstant(unittest.TestCase): def test_pinned_copy_gpu(self): main_program, gpu_var, pinned_var = self.get_prog() - main_program.global_block().append_op( - type='memcpy', - inputs={'X': pinned_var}, - outputs={'Out': gpu_var}, - attrs={'dst_place_type': 1}) + main_program.global_block().append_op(type='memcpy', + inputs={'X': pinned_var}, + outputs={'Out': gpu_var}, + attrs={'dst_place_type': 1}) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) gpu_, pinned_ = exe.run(main_program, @@ -132,11 +129,10 @@ class TestMemcpy_FillConstant(unittest.TestCase): "place_type": 2 }) - main_program.global_block().append_op( - type='memcpy', - inputs={'X': pinned_var}, - outputs={'Out': gpu_var}, - attrs={'dst_place_type': 1}) + main_program.global_block().append_op(type='memcpy', + inputs={'X': pinned_var}, + outputs={'Out': gpu_var}, + attrs={'dst_place_type': 1}) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) gpu_, pinned_ = exe.run(main_program, @@ -149,6 +145,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): class TestMemcpyOPError(unittest.TestCase): + def get_prog(self): paddle.enable_static() main_program = Program() @@ -175,20 +172,20 @@ class TestMemcpyOPError(unittest.TestCase): selected_row_var = main_program.global_block().create_var( \ name="selected_row_0", dtype="float32", persistable=False, \ type=fluid.core.VarDesc.VarType.SELECTED_ROWS, stop_gradient=True) - main_program.global_block().append_op( - type="fill_constant", - outputs={"Out": selected_row_var}, - attrs={ - "shape": selected_row_var.shape, - "dtype": selected_row_var.dtype, - "value": 1.0, - "place_type": 1 - }) - main_program.global_block().append_op( - type='memcpy', - inputs={'X': selected_row_var}, - outputs={'Out': pinned_var}, - attrs={'dst_place_type': 2}) + main_program.global_block().append_op(type="fill_constant", + outputs={"Out": selected_row_var}, + attrs={ + "shape": + selected_row_var.shape, + "dtype": + selected_row_var.dtype, + "value": 1.0, + "place_type": 1 + }) + main_program.global_block().append_op(type='memcpy', + inputs={'X': selected_row_var}, + outputs={'Out': pinned_var}, + attrs={'dst_place_type': 2}) with self.assertRaises(NotImplementedError): place = fluid.CUDAPlace(0) exe = fluid.Executor(place) @@ -199,6 +196,7 @@ class TestMemcpyOPError(unittest.TestCase): class TestMemcpyApi(unittest.TestCase): + def test_api(self): a = paddle.ones([1024, 1024]) b = paddle.tensor.creation._memcpy(a, paddle.CUDAPinnedPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_memory_analysis.py b/python/paddle/fluid/tests/unittests/test_memory_analysis.py index 9388e07dbf8..1672e7371cb 100644 --- a/python/paddle/fluid/tests/unittests/test_memory_analysis.py +++ b/python/paddle/fluid/tests/unittests/test_memory_analysis.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from simple_nets import simple_fc_net class TestMemoryAnalysis(unittest.TestCase): + def setUp(self): paddle.enable_static() @@ -27,17 +28,18 @@ class TestMemoryAnalysis(unittest.TestCase): optimizer = paddle.optimizer.Adam(learning_rate=1e-3) optimizer.minimize(loss) main_prog = paddle.static.default_main_program() - max_tmp_mem_1, max_persitable_mem_1 = get_max_memory_info( - main_prog, batch_size=32) + max_tmp_mem_1, max_persitable_mem_1 = get_max_memory_info(main_prog, + batch_size=32) self.assertGreater(max_tmp_mem_1, 0) self.assertGreater(max_persitable_mem_1, 0) - max_tmp_mem_2, max_persitable_mem_2 = get_max_memory_info( - main_prog, batch_size=64) + max_tmp_mem_2, max_persitable_mem_2 = get_max_memory_info(main_prog, + batch_size=64) self.assertEqual(max_persitable_mem_1, max_persitable_mem_2) self.assertLess(max_tmp_mem_1, max_tmp_mem_2) class TestPreAllocateMemory(unittest.TestCase): + def setUp(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py index a1b7380fdd9..98550ac5018 100644 --- a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py +++ b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py @@ -18,13 +18,15 @@ import unittest class TestMemoryReuseExcludeFeedVar(unittest.TestCase): + def setUp(self): self.image_shape = [28, 28] self.iteration = 10 def main_impl(self, place): - image = fluid.layers.data( - name='image', shape=self.image_shape, dtype='float32') + image = fluid.layers.data(name='image', + shape=self.image_shape, + dtype='float32') relu_image = fluid.layers.relu(image) loss = fluid.layers.reduce_mean(relu_image) @@ -35,13 +37,13 @@ class TestMemoryReuseExcludeFeedVar(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - compiled_prog = fluid.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - loss_name=loss.name, build_strategy=build_strategy) + compiled_prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name, build_strategy=build_strategy) image_tensor = fluid.LoDTensor() - np_image = np.random.uniform( - low=-10, high=10, size=self.image_shape).astype('float32') + np_image = np.random.uniform(low=-10, high=10, + size=self.image_shape).astype('float32') image_tensor.set(np_image, place) feed_dict = [{image.name: image_tensor}] diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py index 4cdb5b5d9f7..adc3cd0a844 100644 --- a/python/paddle/fluid/tests/unittests/test_memory_usage.py +++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py @@ -43,6 +43,7 @@ def train_simulator(test_batch_size=10): class TestMemoryUsage(unittest.TestCase): + def test_with_unit_B(self): with self.program_scope_guard(): train_simulator() diff --git a/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py b/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py index d2fa344b67a..ef9e948d982 100644 --- a/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py +++ b/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py @@ -21,6 +21,7 @@ from paddle.fluid.op import Operator class TestMergeSelectedRows(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_merged_adam_op.py b/python/paddle/fluid/tests/unittests/test_merged_adam_op.py index f515a9f95b1..02cadf02300 100644 --- a/python/paddle/fluid/tests/unittests/test_merged_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_merged_adam_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -83,6 +83,7 @@ def run_adam_op(params, class TestMergedAdam(unittest.TestCase): + def setUp(self): paddle.disable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -110,21 +111,20 @@ class TestMergedAdam(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_merged): - return run_adam_op( - params=params, - grads=grads, - lrs=lrs, - moment1s=moment1s, - moment2s=moment2s, - beta1_pows=beta1_pows, - beta2_pows=beta2_pows, - master_params=master_params, - epsilon=0.9, - beta1=0.9, - beta2=0.99, - place=place, - multi_precision=multi_precision, - use_merged=use_merged) + return run_adam_op(params=params, + grads=grads, + lrs=lrs, + moment1s=moment1s, + moment2s=moment2s, + beta1_pows=beta1_pows, + beta2_pows=beta2_pows, + master_params=master_params, + epsilon=0.9, + beta1=0.9, + beta2=0.99, + place=place, + multi_precision=multi_precision, + use_merged=use_merged) outs1 = run_op(True) outs2 = run_op(False) @@ -137,9 +137,8 @@ class TestMergedAdam(unittest.TestCase): if place == 'gpu': self.assertTrue(np.array_equal(value1[i], value2[i])) else: - self.assertTrue( - np.allclose( - value1[i], value2[i], atol=1e-7)) + self.assertTrue(np.allclose(value1[i], value2[i], + atol=1e-7)) def get_places(self): places = ['cpu'] diff --git a/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py b/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py index c38dea8bc39..4afdc267de5 100644 --- a/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -45,22 +45,21 @@ def run_momentum_op(params, } param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) for p in params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in params ] grad_vars = [ - helper.create_variable( - shape=g.shape, dtype=g.dtype) for g in grads + helper.create_variable(shape=g.shape, dtype=g.dtype) for g in grads ] velocity_vars = [ - helper.create_variable( - persistable=True, shape=v.shape, dtype=v.dtype) - for v in velocitys + helper.create_variable(persistable=True, + shape=v.shape, + dtype=v.dtype) for v in velocitys ] - lr_var = helper.create_variable( - persistable=True, - shape=learning_rate.shape, - dtype=learning_rate.dtype) + lr_var = helper.create_variable(persistable=True, + shape=learning_rate.shape, + dtype=learning_rate.dtype) feed_dict = OrderedDict() @@ -79,14 +78,15 @@ def run_momentum_op(params, if multi_precision: master_param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) - for p in master_params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in master_params ] feed_dict.update( - OrderedDict([(mp_var.name, mp_val) - for mp_var, mp_val in zip(master_param_vars, - master_params)])) + OrderedDict([ + (mp_var.name, mp_val) + for mp_var, mp_val in zip(master_param_vars, master_params) + ])) # CPUPlace does not use MasterParam if isinstance(place, paddle.CUDAPlace): fetch_list = fetch_list + [ @@ -108,8 +108,10 @@ def run_momentum_op(params, if multi_precision: inputs['MasterParam'] = master_param_vars[i] outputs['MasterParamOut'] = master_param_vars[i] - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) else: inputs = { 'Param': param_vars, @@ -121,8 +123,10 @@ def run_momentum_op(params, if multi_precision: inputs['MasterParam'] = master_param_vars outputs['MasterParamOut'] = master_param_vars - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) exe = paddle.static.Executor(place) with paddle.static.scope_guard(paddle.static.Scope()): @@ -152,22 +156,21 @@ def run_momentum_op2(params, helper = LayerHelper(op_type, **locals()) param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) for p in params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in params ] grad_vars = [ - helper.create_variable( - shape=g.shape, dtype=g.dtype) for g in grads + helper.create_variable(shape=g.shape, dtype=g.dtype) for g in grads ] velocity_vars = [ - helper.create_variable( - persistable=True, shape=v.shape, dtype=v.dtype) - for v in velocitys + helper.create_variable(persistable=True, + shape=v.shape, + dtype=v.dtype) for v in velocitys ] - lr_var = helper.create_variable( - persistable=True, - shape=learning_rate.shape, - dtype=learning_rate.dtype) + lr_var = helper.create_variable(persistable=True, + shape=learning_rate.shape, + dtype=learning_rate.dtype) feed_dict = OrderedDict() @@ -186,14 +189,15 @@ def run_momentum_op2(params, if multi_precision: master_param_vars = [ - helper.create_variable( - persistable=True, shape=p.shape, dtype=p.dtype) - for p in master_params + helper.create_variable(persistable=True, + shape=p.shape, + dtype=p.dtype) for p in master_params ] feed_dict.update( - OrderedDict([(mp_var.name, mp_val) - for mp_var, mp_val in zip(master_param_vars, - master_params)])) + OrderedDict([ + (mp_var.name, mp_val) + for mp_var, mp_val in zip(master_param_vars, master_params) + ])) # CPUPlace does not use MasterParam if isinstance(place, paddle.CUDAPlace): fetch_list = fetch_list + [ @@ -223,8 +227,10 @@ def run_momentum_op2(params, 'regularization_method': 'l2_decay', 'regularization_coeff': 2.0, } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) else: inputs = { 'Param': param_vars, @@ -237,16 +243,22 @@ def run_momentum_op2(params, inputs['MasterParam'] = master_param_vars outputs['MasterParamOut'] = master_param_vars attrs = { - 'mu': mu, - 'multi_precision': multi_precision, - 'rescale_grad': rescale_grad, - 'use_nesterov': use_nesterov, + 'mu': + mu, + 'multi_precision': + multi_precision, + 'rescale_grad': + rescale_grad, + 'use_nesterov': + use_nesterov, 'regularization_method': ['l2_decay' for i in range(len(param_vars))], 'regularization_coeff': [2.0 for i in range(len(param_vars))], } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) exe = paddle.static.Executor(place) with paddle.static.scope_guard(paddle.static.Scope()): @@ -255,6 +267,7 @@ def run_momentum_op2(params, class TestMergedMomentum(unittest.TestCase): + def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -284,18 +297,17 @@ class TestMergedMomentum(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_merged): - # FIXME(zengjinle): CPU Momentum Op does not support rescale_grad + # FIXME(zengjinle): CPU Momentum Op does not support rescale_grad rescale_grad = 1.0 if isinstance(place, paddle.CPUPlace) else 0.01 - return run_momentum_op( - params, - grads, - velocitys, - master_params, - learning_rate, - place, - multi_precision, - rescale_grad=rescale_grad, - use_merged=use_merged) + return run_momentum_op(params, + grads, + velocitys, + master_params, + learning_rate, + place, + multi_precision, + rescale_grad=rescale_grad, + use_merged=use_merged) outs1 = run_op(True) outs2 = run_op(False) @@ -319,6 +331,7 @@ class TestMergedMomentum(unittest.TestCase): class TestMergedMomentum2(unittest.TestCase): + def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] @@ -347,19 +360,18 @@ class TestMergedMomentum2(unittest.TestCase): self.shapes, multi_precision, self.seed, place) def run_op(use_nesterov, use_merged): - # FIXME(zengjinle): CPU Momentum Op does not support rescale_grad + # FIXME(zengjinle): CPU Momentum Op does not support rescale_grad rescale_grad = 1.0 if isinstance(place, paddle.CPUPlace) else 0.01 - return run_momentum_op2( - params, - grads, - velocitys, - master_params, - learning_rate, - place, - multi_precision, - rescale_grad=rescale_grad, - use_merged=use_merged, - use_nesterov=use_nesterov) + return run_momentum_op2(params, + grads, + velocitys, + master_params, + learning_rate, + place, + multi_precision, + rescale_grad=rescale_grad, + use_merged=use_merged, + use_nesterov=use_nesterov) outs1 = run_op(use_nesterov=True, use_merged=True) outs2 = run_op(use_nesterov=True, use_merged=False) diff --git a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py index 95acdbe4a06..8e76859c880 100644 --- a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py +++ b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestMeshgridOp(OpTest): + def setUp(self): self.op_type = "meshgrid" self.dtype = self.get_dtype() @@ -62,17 +63,23 @@ class TestMeshgridOp(OpTest): class TestMeshgridOp2(TestMeshgridOp): + def get_x_shape(self): return [100, 300] class TestMeshgridOp3(unittest.TestCase): + def test_api(self): x = fluid.data(shape=[100], dtype='int32', name='x') y = fluid.data(shape=[200], dtype='int32', name='y') - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_1 = np.reshape(input_1, [100, 1]) out_1 = np.broadcast_to(out_1, [100, 200]) @@ -82,20 +89,27 @@ class TestMeshgridOp3(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) grid_x, grid_y = paddle.tensor.meshgrid(x, y) res_1, res_2 = exe.run(fluid.default_main_program(), - feed={'x': input_1, - 'y': input_2}, + feed={ + 'x': input_1, + 'y': input_2 + }, fetch_list=[grid_x, grid_y]) assert np.array_equal(res_1, out_1) assert np.array_equal(res_2, out_2) class TestMeshgridOp4(unittest.TestCase): + def test_list_input(self): x = fluid.data(shape=[100], dtype='int32', name='x') y = fluid.data(shape=[200], dtype='int32', name='y') - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_1 = np.reshape(input_1, [100, 1]) out_1 = np.broadcast_to(out_1, [100, 200]) @@ -105,8 +119,10 @@ class TestMeshgridOp4(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) grid_x, grid_y = paddle.tensor.meshgrid([x, y]) res_1, res_2 = exe.run(fluid.default_main_program(), - feed={'x': input_1, - 'y': input_2}, + feed={ + 'x': input_1, + 'y': input_2 + }, fetch_list=[grid_x, grid_y]) assert np.array_equal(res_1, out_1) @@ -114,12 +130,17 @@ class TestMeshgridOp4(unittest.TestCase): class TestMeshgridOp5(unittest.TestCase): + def test_tuple_input(self): x = fluid.data(shape=[100], dtype='int32', name='x') y = fluid.data(shape=[200], dtype='int32', name='y') - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') out_1 = np.reshape(input_1, [100, 1]) out_1 = np.broadcast_to(out_1, [100, 200]) @@ -129,8 +150,10 @@ class TestMeshgridOp5(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) grid_x, grid_y = paddle.tensor.meshgrid((x, y)) res_1, res_2 = exe.run(fluid.default_main_program(), - feed={'x': input_1, - 'y': input_2}, + feed={ + 'x': input_1, + 'y': input_2 + }, fetch_list=[grid_x, grid_y]) assert np.array_equal(res_1, out_1) @@ -138,9 +161,14 @@ class TestMeshgridOp5(unittest.TestCase): class TestMeshgridOp6(unittest.TestCase): + def test_api_with_dygraph(self): - input_3 = np.random.randint(0, 100, [100, ]).astype('int32') - input_4 = np.random.randint(0, 100, [200, ]).astype('int32') + input_3 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_4 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') with fluid.dygraph.guard(): tensor_3 = fluid.dygraph.to_variable(input_3) @@ -156,9 +184,14 @@ class TestMeshgridOp6(unittest.TestCase): class TestMeshgridOp7(unittest.TestCase): + def test_api_with_dygraph_list_input(self): - input_3 = np.random.randint(0, 100, [100, ]).astype('int32') - input_4 = np.random.randint(0, 100, [200, ]).astype('int32') + input_3 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_4 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') with fluid.dygraph.guard(): tensor_3 = fluid.dygraph.to_variable(input_3) @@ -174,9 +207,14 @@ class TestMeshgridOp7(unittest.TestCase): class TestMeshgridOp8(unittest.TestCase): + def test_api_with_dygraph_tuple_input(self): - input_3 = np.random.randint(0, 100, [100, ]).astype('int32') - input_4 = np.random.randint(0, 100, [200, ]).astype('int32') + input_3 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_4 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') with fluid.dygraph.guard(): tensor_3 = fluid.dygraph.to_variable(input_3) @@ -192,9 +230,14 @@ class TestMeshgridOp8(unittest.TestCase): class TestMeshgridEager(unittest.TestCase): + def test_dygraph_final_state_api(self): - input_1 = np.random.randint(0, 100, [100, ]).astype('int32') - input_2 = np.random.randint(0, 100, [200, ]).astype('int32') + input_1 = np.random.randint(0, 100, [ + 100, + ]).astype('int32') + input_2 = np.random.randint(0, 100, [ + 200, + ]).astype('int32') with fluid.dygraph.guard(): tensor_1 = fluid.dygraph.to_variable(input_1) diff --git a/python/paddle/fluid/tests/unittests/test_min_op.py b/python/paddle/fluid/tests/unittests/test_min_op.py index 13f82fb9bd7..6e5f9d13215 100644 --- a/python/paddle/fluid/tests/unittests/test_min_op.py +++ b/python/paddle/fluid/tests/unittests/test_min_op.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class ApiMinTest(unittest.TestCase): + def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -93,13 +94,13 @@ class ApiMinTest(unittest.TestCase): class TestOutDtype(unittest.TestCase): + def test_min(self): api_fn = paddle.min shape = [10, 16] - check_out_dtype( - api_fn, - in_specs=[(shape, )], - expect_dtypes=['float32', 'float64', 'int32', 'int64']) + check_out_dtype(api_fn, + in_specs=[(shape, )], + expect_dtypes=['float32', 'float64', 'int32', 'int64']) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py index 6730d5de2c2..6b0f6075e81 100644 --- a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py +++ b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestMineHardExamplesOp(OpTest): + def set_data(self): self.init_test_data() self.inputs = { @@ -58,17 +59,17 @@ class TestMineHardExamplesOp(OpTest): self.neg_overlap = 0.5 self.sample_size = 0 self.mining_type = "max_negative" - self.cls_loss = np.array([[0.1, 0.1, 0.3], - [0.3, 0.1, 0.1]]).astype('float64') + self.cls_loss = np.array([[0.1, 0.1, 0.3], [0.3, 0.1, + 0.1]]).astype('float64') - self.loc_loss = np.array([[0.1, 0.2, 0.3], - [0.3, 0.4, 0.1]]).astype('float64') + self.loc_loss = np.array([[0.1, 0.2, 0.3], [0.3, 0.4, + 0.1]]).astype('float64') - self.match_dis = np.array([[0.2, 0.4, 0.8], - [0.1, 0.9, 0.3]]).astype('float64') + self.match_dis = np.array([[0.2, 0.4, 0.8], [0.1, 0.9, + 0.3]]).astype('float64') - self.match_indices = np.array([[0, -1, -1], - [-1, 0, -1]]).astype('int32') + self.match_indices = np.array([[0, -1, -1], [-1, 0, + -1]]).astype('int32') self.updated_match_indices = self.match_indices @@ -77,19 +78,20 @@ class TestMineHardExamplesOp(OpTest): class TestMineHardExamplesOpHardExample(TestMineHardExamplesOp): + def init_test_data(self): super(TestMineHardExamplesOpHardExample, self).init_test_data() self.mining_type = "hard_example" self.sample_size = 2 - self.cls_loss = np.array([[0.5, 0.1, 0.3], - [0.3, 0.1, 0.1]]).astype('float64') + self.cls_loss = np.array([[0.5, 0.1, 0.3], [0.3, 0.1, + 0.1]]).astype('float64') - self.loc_loss = np.array([[0.2, 0.2, 0.3], - [0.3, 0.1, 0.2]]).astype('float64') + self.loc_loss = np.array([[0.2, 0.2, 0.3], [0.3, 0.1, + 0.2]]).astype('float64') - self.match_indices = np.array([[0, -1, -1], - [-1, 0, -1]]).astype('int32') + self.match_indices = np.array([[0, -1, -1], [-1, 0, + -1]]).astype('int32') self.updated_match_indices = np.array([[0, -1, -1], [-1, -1, -1]]).astype('int32') diff --git a/python/paddle/fluid/tests/unittests/test_minimum_op.py b/python/paddle/fluid/tests/unittests/test_minimum_op.py index ce7b9f72b66..2da5df85826 100644 --- a/python/paddle/fluid/tests/unittests/test_minimum_op.py +++ b/python/paddle/fluid/tests/unittests/test_minimum_op.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core class ApiMinimumTest(unittest.TestCase): + def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -47,8 +48,10 @@ class ApiMinimumTest(unittest.TestCase): data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.minimum(data_x, data_y) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected1)) @@ -58,8 +61,10 @@ class ApiMinimumTest(unittest.TestCase): data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.minimum(data_x, data_z) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "z": self.input_z}, + res, = exe.run(feed={ + "x": self.input_x, + "z": self.input_z + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected2)) @@ -69,8 +74,10 @@ class ApiMinimumTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.minimum(data_a, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"a": self.input_a, - "c": self.input_c}, + res, = exe.run(feed={ + "a": self.input_a, + "c": self.input_c + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected3)) @@ -80,8 +87,10 @@ class ApiMinimumTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.minimum(data_b, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"b": self.input_b, - "c": self.input_c}, + res, = exe.run(feed={ + "b": self.input_b, + "c": self.input_c + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected4)) diff --git a/python/paddle/fluid/tests/unittests/test_minus_op.py b/python/paddle/fluid/tests/unittests/test_minus_op.py index 461ff6a9273..9a63947b877 100644 --- a/python/paddle/fluid/tests/unittests/test_minus_op.py +++ b/python/paddle/fluid/tests/unittests/test_minus_op.py @@ -21,6 +21,7 @@ import paddle class TestMinusOp(OpTest): + def setUp(self): self.op_type = "minus" self.inputs = { diff --git a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py index 33393bc2fcd..650b6a9a247 100644 --- a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py +++ b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py @@ -41,23 +41,21 @@ def conv_net(use_feed): img = fluid.layers.data(name='image', shape=img_shape, dtype='float16') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) conv_pool_1 = fluid.layers.cast(conv_pool_1, np.float32) - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - pool_size=2, - pool_stride=2, - act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") hidden = fluid.layers.cast(conv_pool_2, np.float32) return loss_net(hidden, label) @@ -68,9 +66,11 @@ def _optimizer(learning_rate=1e-6): class TestResnet(TestParallelExecutorBase): + def check_model(self, use_device): - img, label = init_data( - batch_size=batch_size, img_shape=img_shape, label_range=9) + img, label = init_data(batch_size=batch_size, + img_shape=img_shape, + label_range=9) img = np.float16(img) feed_dict = {"image": img, "label": label} diff --git a/python/paddle/fluid/tests/unittests/test_mixed_precision.py b/python/paddle/fluid/tests/unittests/test_mixed_precision.py index 57ea7ad1aa2..68dfb88ccd0 100644 --- a/python/paddle/fluid/tests/unittests/test_mixed_precision.py +++ b/python/paddle/fluid/tests/unittests/test_mixed_precision.py @@ -25,6 +25,7 @@ paddle.enable_static() class SimpleNet(nn.Layer): + def __init__(self, input_size, output_size): super(SimpleNet, self).__init__() self.linear1 = nn.Linear(input_size, output_size) @@ -38,7 +39,7 @@ class SimpleNet(nn.Layer): x = self.linear1(x) # currently, paddle's relu may hide nan/inf, relu(nan) = 0, relu(inf)= inf # so, do not use it here. - #x = self.relu1(x) + #x = self.relu1(x) x = self.linear2(x) #x = self.relu2(x) x = self.linear3(x) @@ -47,6 +48,7 @@ class SimpleNet(nn.Layer): class AMPTest(unittest.TestCase): + def setUp(self): self.place = paddle.CUDAPlace(0) @@ -63,8 +65,9 @@ class AMPTest(unittest.TestCase): opt = paddle.fluid.optimizer.Adam( learning_rate=0.0001, parameter_list=model.parameters()) # 定义优化器 - opt = paddle.static.amp.decorate( - opt, init_loss_scaling=128.0, use_dynamic_loss_scaling=True) + opt = paddle.static.amp.decorate(opt, + init_loss_scaling=128.0, + use_dynamic_loss_scaling=True) opt.minimize(loss) return model, loss, opt @@ -105,8 +108,10 @@ class AMPTest(unittest.TestCase): train_data[i][10] = np.inf loss_, weight_, moment1_, beta_pow1_, found_inf = exe.run( main_prog, - feed={"X": train_data[i], - "Y": labels[i]}, + feed={ + "X": train_data[i], + "Y": labels[i] + }, fetch_list=fetch_list) print(loss_, weight_[0][0], moment1_[0][0], beta_pow1_, found_inf) diff --git a/python/paddle/fluid/tests/unittests/test_mode_op.py b/python/paddle/fluid/tests/unittests/test_mode_op.py index 471904b0c94..ebb95dfea39 100644 --- a/python/paddle/fluid/tests/unittests/test_mode_op.py +++ b/python/paddle/fluid/tests/unittests/test_mode_op.py @@ -57,6 +57,7 @@ def cal_mode(a, axis, keepdim=False): class TestModeOp(OpTest): + def init_args(self): self.axis = 1 @@ -82,6 +83,7 @@ class TestModeOp(OpTest): class TestModeOpLastdim(OpTest): + def init_args(self): self.axis = -1 @@ -107,12 +109,14 @@ class TestModeOpLastdim(OpTest): class TestModeOpKernels(unittest.TestCase): + def setUp(self): self.axises = [-1, 1] np.random.seed(666) self.inputs = np.ceil(np.random.rand(2, 10, 10) * 1000) def test_mode_op(self): + def test_cpu_kernel(): paddle.set_device('cpu') tensor = paddle.to_tensor(self.inputs) @@ -121,8 +125,9 @@ class TestModeOpKernels(unittest.TestCase): v, inds = paddle.mode(tensor, axis) self.assertTrue(np.allclose(v.numpy(), value_expect)) - value_expect, indice_expect = cal_mode( - self.inputs, axis, keepdim=True) + value_expect, indice_expect = cal_mode(self.inputs, + axis, + keepdim=True) v, inds = paddle.mode(tensor, axis, keepdim=True) self.assertTrue(np.allclose(v.numpy(), value_expect)) @@ -134,8 +139,9 @@ class TestModeOpKernels(unittest.TestCase): v, inds = paddle.mode(tensor, axis) self.assertTrue(np.allclose(v.numpy(), value_expect)) - value_expect, indice_expect = cal_mode( - self.inputs, axis, keepdim=True) + value_expect, indice_expect = cal_mode(self.inputs, + axis, + keepdim=True) v, inds = paddle.mode(tensor, axis, keepdim=True) self.assertTrue(np.allclose(v.numpy(), value_expect)) @@ -146,6 +152,7 @@ class TestModeOpKernels(unittest.TestCase): class TestModeOpErrors(unittest.TestCase): + def setUp(self): self.x = paddle.uniform([2, 10, 20, 25], dtype='float32') @@ -156,17 +163,19 @@ class TestModeOpErrors(unittest.TestCase): class TestModeOpInStatic(unittest.TestCase): + def setUp(self): np.random.seed(666) - self.input_data = np.ceil( - np.random.random((2, 10, 10)) * 1000, dtype=np.float64) + self.input_data = np.ceil(np.random.random((2, 10, 10)) * 1000, + dtype=np.float64) def test_run_static(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - input_tensor = paddle.static.data( - name="x", shape=[2, 10, 10], dtype="float64") + input_tensor = paddle.static.data(name="x", + shape=[2, 10, 10], + dtype="float64") result = paddle.mode(input_tensor, axis=1) expect_value = cal_mode(self.input_data, axis=1)[0] diff --git a/python/paddle/fluid/tests/unittests/test_modelaverage.py b/python/paddle/fluid/tests/unittests/test_modelaverage.py index 8dab35f7f54..7bb1e7d2e7a 100644 --- a/python/paddle/fluid/tests/unittests/test_modelaverage.py +++ b/python/paddle/fluid/tests/unittests/test_modelaverage.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ import paddle.nn as nn class TestModelAverage(unittest.TestCase): + def test_model_average_static(self): paddle.enable_static() place = fluid.CPUPlace() @@ -39,8 +40,8 @@ class TestModelAverage(unittest.TestCase): hidden = fluid.layers.fc(input=data, size=10) loss = fluid.layers.mean(hidden) test_program = train_program.clone() - optimizer = paddle.optimizer.Momentum( - learning_rate=0.2, momentum=0.1) + optimizer = paddle.optimizer.Momentum(learning_rate=0.2, + momentum=0.1) optimizer.minimize(loss) # build ModelAverage optimizer @@ -59,28 +60,18 @@ class TestModelAverage(unittest.TestCase): 'fc_0.b_0_old_num_accumulates_0', 'fc_0.b_0_num_updates_0' ]) self.assertTrue( - np.equal( - sum_1, np.zeros( - shape=[10], dtype='float32')).all()) + np.equal(sum_1, np.zeros(shape=[10], dtype='float32')).all()) self.assertTrue( - np.equal( - sum_2, np.zeros( - shape=[10], dtype='float32')).all()) + np.equal(sum_2, np.zeros(shape=[10], dtype='float32')).all()) self.assertTrue( - np.equal( - num_accumulates, np.array( - [0], dtype='int64')).all()) + np.equal(num_accumulates, np.array([0], dtype='int64')).all()) self.assertTrue( - np.equal( - old_num_accumulates, np.array( - [2], dtype='int64')).all()) + np.equal(old_num_accumulates, np.array([2], dtype='int64')).all()) self.assertTrue( - np.equal( - num_updates, np.array( - [10], dtype='int64')).all()) + np.equal(num_updates, np.array([10], dtype='int64')).all()) - average_b = (sum_1 + sum_2 + sum_3) / ( - num_accumulates + old_num_accumulates) + average_b = (sum_1 + sum_2 + sum_3) / (num_accumulates + + old_num_accumulates) # apply ModelAverage with model_average.apply(exe): x = np.random.random(size=(10, 1)).astype('float32') @@ -105,6 +96,7 @@ class TestModelAverage(unittest.TestCase): # define a random dataset class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -118,6 +110,7 @@ class TestModelAverage(unittest.TestCase): return self.num_samples class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) @@ -142,12 +135,12 @@ class TestModelAverage(unittest.TestCase): sum_1 = model_average._get_accumulator('sum_1', layer.bias) sum_2 = model_average._get_accumulator('sum_2', layer.bias) sum_3 = model_average._get_accumulator('sum_3', layer.bias) - num_accumulates = model_average._get_accumulator('num_accumulates', - layer.bias) + num_accumulates = model_average._get_accumulator( + 'num_accumulates', layer.bias) old_num_accumulates = model_average._get_accumulator( 'old_num_accumulates', layer.bias) - num_updates = model_average._get_accumulator('num_updates', - layer.bias) + num_updates = model_average._get_accumulator( + 'num_updates', layer.bias) return ((sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates)).numpy() @@ -157,10 +150,9 @@ class TestModelAverage(unittest.TestCase): out = layer(image) loss = loss_fn(out, label) loss.backward() - self.assertAlmostEqual( - np.mean(layer.bias.numpy()), - np.mean(check_param), - delta=5e-3) + self.assertAlmostEqual(np.mean(layer.bias.numpy()), + np.mean(check_param), + delta=5e-3) # print("Evaluate batch {}: loss = {}, bias = {}".format( # batch_id, np.mean(loss.numpy()), layer.bias.numpy())) @@ -168,8 +160,9 @@ class TestModelAverage(unittest.TestCase): layer = LinearNet() loss_fn = nn.CrossEntropyLoss() - optimizer = paddle.optimizer.Momentum( - learning_rate=0.2, momentum=0.1, parameters=layer.parameters()) + optimizer = paddle.optimizer.Momentum(learning_rate=0.2, + momentum=0.1, + parameters=layer.parameters()) # build ModelAverage optimizer model_average = paddle.incubate.optimizer.ModelAverage( 0.15, @@ -179,18 +172,16 @@ class TestModelAverage(unittest.TestCase): # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) - loader = paddle.io.DataLoader( - dataset, - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=2) - eval_loader = paddle.io.DataLoader( - dataset, - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=1) + loader = paddle.io.DataLoader(dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + eval_loader = paddle.io.DataLoader(dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=1) # train check_param = train(layer, loader, loss_fn, optimizer, model_average) # print(check_param) diff --git a/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py b/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py index f7b6ad9bfee..2258f32dafd 100644 --- a/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py @@ -29,6 +29,7 @@ def modified_huber_loss_forward(val): class TestModifiedHuberLossOp(OpTest): + def setUp(self): self.op_type = 'modified_huber_loss' samples_num = 100 diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py index a4f38e37731..0b6bd99e659 100644 --- a/python/paddle/fluid/tests/unittests/test_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py @@ -53,6 +53,7 @@ def calculate_momentum_by_numpy(param, class TestMomentumOp1(OpTest): + def setUp(self): self.op_type = "momentum" self.dtype = np.float32 @@ -92,6 +93,7 @@ class TestMomentumOp1(OpTest): class TestMomentumOpFp16(TestMomentumOp1): + def init_dtype(self): self.dtype = np.float16 @@ -139,6 +141,7 @@ class TestMomentumOp2(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestLarsMomentumOpWithMP(OpTest): + def setUp(self): self.config() self.op_type = "lars_momentum" @@ -218,6 +221,7 @@ class TestLarsMomentumOpWithMP(OpTest): class TestLarsMomentumOp(OpTest): + def setUp(self): self.config() self.op_type = "lars_momentum" @@ -240,8 +244,8 @@ class TestLarsMomentumOp(OpTest): gnorm = np.sqrt(np.square(grad).sum()) local_lr = learning_rate * lars_coeff * pnorm / ( gnorm + lars_weight_decay * param) - velocity_out = mu * velocity + local_lr * (grad + lars_weight_decay - * param) + velocity_out = mu * velocity + local_lr * ( + grad + lars_weight_decay * param) param_out = param - velocity_out params.append(("SubParam_" + str(i), param)) @@ -274,6 +278,7 @@ class TestLarsMomentumOp(OpTest): class TestSparseMomentumOp(unittest.TestCase): + def setUp(self): self.use_nesterov = False self.regularization_method = "" @@ -322,18 +327,17 @@ class TestSparseMomentumOp(unittest.TestCase): lr.set(lr_array, place) # create and run operator - op = Operator( - "momentum", - Param='Param', - Grad='Grad', - Velocity='Velocity', - ParamOut='ParamOut', - VelocityOut='VelocityOut', - LearningRate='LearningRate', - mu=mu, - use_nesterov=use_nesterov, - regularization_method=regularization_method, - regularization_coeff=regularization_coeff) + op = Operator("momentum", + Param='Param', + Grad='Grad', + Velocity='Velocity', + ParamOut='ParamOut', + VelocityOut='VelocityOut', + LearningRate='LearningRate', + mu=mu, + use_nesterov=use_nesterov, + regularization_method=regularization_method, + regularization_coeff=regularization_coeff) op.run(scope, place) # get and compare result @@ -373,11 +377,13 @@ class TestSparseMomentumOp(unittest.TestCase): class TestSparseMomentumOp2(TestSparseMomentumOp): + def init_kernel(self): self.use_nesterov = True class TestSparseMomentumOpWithMultiPrecision(unittest.TestCase): + def setUp(self): self.init_args() self.regularization_method = "" @@ -431,22 +437,21 @@ class TestSparseMomentumOpWithMultiPrecision(unittest.TestCase): lr.set(lr_array, place) # create and run operator - op = Operator( - "momentum", - Param='Param', - Grad='Grad', - Velocity='Velocity', - MasterParam='MasterParam', - ParamOut='ParamOut', - VelocityOut='VelocityOut', - MasterParamOut='MasterParamOut', - LearningRate='LearningRate', - mu=mu, - use_nesterov=use_nesterov, - regularization_method=regularization_method, - regularization_coeff=regularization_coeff, - multi_precision=True, - rescale_grad=1.0) + op = Operator("momentum", + Param='Param', + Grad='Grad', + Velocity='Velocity', + MasterParam='MasterParam', + ParamOut='ParamOut', + VelocityOut='VelocityOut', + MasterParamOut='MasterParamOut', + LearningRate='LearningRate', + mu=mu, + use_nesterov=use_nesterov, + regularization_method=regularization_method, + regularization_coeff=regularization_coeff, + multi_precision=True, + rescale_grad=1.0) op.run(scope, place) # get and compare result @@ -482,19 +487,22 @@ class TestSparseMomentumOpWithMultiPrecision(unittest.TestCase): class TestSparseMomentumOpWithMultiPrecision2( TestSparseMomentumOpWithMultiPrecision): + def init_args(self): self.use_nesterov = True class TestMomentumV2(unittest.TestCase): + def test_momentum_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Momentum( - learning_rate=0.01, momentum=0.9, parameters=linear.parameters()) + adam = paddle.optimizer.Momentum(learning_rate=0.01, + momentum=0.9, + parameters=linear.parameters()) out = linear(a) out.backward() adam.step() @@ -511,13 +519,13 @@ class TestMomentumV2(unittest.TestCase): cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) - rms_optimizer = paddle.optimizer.Momentum( - learning_rate=0.1, momentum=0.9) + rms_optimizer = paddle.optimizer.Momentum(learning_rate=0.1, + momentum=0.9) rms_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -525,8 +533,9 @@ class TestMomentumV2(unittest.TestCase): exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) def test_raise_error(self): - self.assertRaises( - ValueError, paddle.optimizer.Momentum, learning_rate=None) + self.assertRaises(ValueError, + paddle.optimizer.Momentum, + learning_rate=None) self.assertRaises(ValueError, paddle.optimizer.Momentum, momentum=None) def test_api_eager_dygraph(self): @@ -536,6 +545,7 @@ class TestMomentumV2(unittest.TestCase): class TestMomentumOpWithDecay(OpTest): + def setUp(self): self.op_type = "momentum" self.dtype = np.float32 @@ -588,6 +598,7 @@ class TestMomentumOpWithDecay(OpTest): class TestMomentumOpWithDecayFP16(TestMomentumOpWithDecay): + def init_config(self): self.dtype = np.float16 @@ -597,11 +608,13 @@ class TestMomentumOpWithDecayFP16(TestMomentumOpWithDecay): class TestMomentumOpWithDecay2(TestMomentumOpWithDecay): + def init_config(self): self.use_nesterov = False class TestSparseMomentumOpWithDecay(TestSparseMomentumOp): + def setUp(self): self.use_nesterov = False self.regularization_method = 'l2_decay' @@ -609,11 +622,13 @@ class TestSparseMomentumOpWithDecay(TestSparseMomentumOp): class TestSparseMomentumOpWithDecay2(TestSparseMomentumOpWithDecay): + def init_kernel(self): self.use_nesterov = True class TestMomentumOpWithDecayAPI(unittest.TestCase): + def _test_momentum_dygraph_common(self, regularization): paddle.disable_static() inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") @@ -650,8 +665,8 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): momentum_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -660,14 +675,17 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): class TestFusedMomentumWithDecayAPI(unittest.TestCase): + def get_program(self, weight_attr, bias_attr=False): main_program = paddle.static.Program() startup_program = paddle.static.Program() - with paddle.static.program_guard( - main_program=main_program, startup_program=startup_program): + with paddle.static.program_guard(main_program=main_program, + startup_program=startup_program): x = paddle.static.data(name='x', shape=[10, 10]) - linear = paddle.nn.Linear( - 10, 10, weight_attr=weight_attr, bias_attr=bias_attr) + linear = paddle.nn.Linear(10, + 10, + weight_attr=weight_attr, + bias_attr=bias_attr) out = linear(x) loss = paddle.mean(out) optimizer = paddle.optimizer.Momentum( @@ -731,10 +749,11 @@ class TestFusedMomentumWithDecayAPI(unittest.TestCase): class TestMomentumOpVsMomentumOpWithDecayAPI(unittest.TestCase): + def __update_params(self, momentum, linear): for i in range(10): - inp = paddle.full( - shape=[2, 2], fill_value=i, dtype='float32').astype("float32") + inp = paddle.full(shape=[2, 2], fill_value=i, + dtype='float32').astype("float32") inp = paddle.to_tensor(inp) out = linear(inp) loss = paddle.mean(out) @@ -786,6 +805,7 @@ class TestMomentumOpVsMomentumOpWithDecayAPI(unittest.TestCase): class TestMomentumV2Group(TestMomentumV2): + def test_momentum_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -793,18 +813,22 @@ class TestMomentumV2Group(TestMomentumV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Momentum( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - 'momentum': 0.99 - }], - weight_decay=0.1, - momentum=0.9) + adam = paddle.optimizer.Momentum(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001, + 'learning_rate': + 0.1, + 'momentum': + 0.99 + }], + weight_decay=0.1, + momentum=0.9) out = linear_1(a) out = linear_2(out) out.backward() @@ -813,6 +837,7 @@ class TestMomentumV2Group(TestMomentumV2): class TestMultiTensorMomentumDygraph(unittest.TestCase): + def _momentum_optimize_dygraph(self, place, use_param_attr=False, @@ -882,8 +907,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def _check_with_param_arrt(self, place, use_amp): output1, params1 = self._momentum_optimize_dygraph( @@ -899,8 +923,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def _check_with_param_group(self, place, use_amp): output1, params1 = self._momentum_optimize_dygraph( @@ -916,8 +939,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): self.assertEqual(np.allclose(output1, output2, rtol=1e-05), True) for idx in range(len(params1)): self.assertEqual( - np.allclose( - params1[idx], params2[idx], rtol=1e-05), True) + np.allclose(params1[idx], params2[idx], rtol=1e-05), True) def test_main(self): for place in self._get_places(): @@ -933,6 +955,7 @@ class TestMultiTensorMomentumDygraph(unittest.TestCase): class TestMultiTensorMomentumStatic(unittest.TestCase): + def _momentum_optimize_static(self, place, use_amp=False, @@ -945,8 +968,8 @@ class TestMultiTensorMomentumStatic(unittest.TestCase): exe = paddle.static.Executor(place=place) train_program = paddle.static.Program() startup_program = paddle.static.Program() - optimizer = paddle.optimizer.Momentum( - multi_precision=use_amp, use_multi_tensor=use_multi_tensor) + optimizer = paddle.optimizer.Momentum(multi_precision=use_amp, + use_multi_tensor=use_multi_tensor) if use_amp: optimizer = paddle.static.amp.decorate( optimizer, @@ -956,11 +979,13 @@ class TestMultiTensorMomentumStatic(unittest.TestCase): use_fp16_guard=False) with paddle.static.program_guard(train_program, startup_program): if use_amp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float16') else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float32') hidden = paddle.static.nn.fc(x=data, size=10) loss = paddle.fluid.layers.mean(hidden) optimizer.minimize(loss) @@ -985,14 +1010,15 @@ class TestMultiTensorMomentumStatic(unittest.TestCase): return places def _check_with_place_amp(self, place, use_amp): - output1 = self._momentum_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=True) - output2 = self._momentum_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=False) + output1 = self._momentum_optimize_static(place=place, + use_amp=use_amp, + use_multi_tensor=True) + output2 = self._momentum_optimize_static(place=place, + use_amp=use_amp, + use_multi_tensor=False) for idx in range(len(output1)): self.assertEqual( - np.allclose( - output1[idx], output2[idx], rtol=1e-05), True) + np.allclose(output1[idx], output2[idx], rtol=1e-05), True) def test_main(self): for place in self._get_places(): diff --git a/python/paddle/fluid/tests/unittests/test_monitor.py b/python/paddle/fluid/tests/unittests/test_monitor.py index bea2f6c8b38..107d0ba6f4c 100644 --- a/python/paddle/fluid/tests/unittests/test_monitor.py +++ b/python/paddle/fluid/tests/unittests/test_monitor.py @@ -17,6 +17,7 @@ TestCases for Monitor from __future__ import print_function import paddle + paddle.enable_static() import paddle.fluid as fluid @@ -50,8 +51,10 @@ class TestDatasetWithStat(unittest.TestCase): slots = ["slot1", "slot2", "slot3", "slot4"] slots_vars = [] for slot in slots: - var = fluid.layers.data( - name=slot, shape=[1], dtype="int64", lod_level=1) + var = fluid.layers.data(name=slot, + shape=[1], + dtype="int64", + lod_level=1) slots_vars.append(var) embs = [] @@ -75,9 +78,8 @@ class TestDatasetWithStat(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) if self.use_data_loader: - data_loader = fluid.io.DataLoader.from_dataset(dataset, - fluid.cpu_places(), - self.drop_last) + data_loader = fluid.io.DataLoader.from_dataset( + dataset, fluid.cpu_places(), self.drop_last) for i in range(self.epoch_num): for data in data_loader(): exe.run(fluid.default_main_program(), feed=data) @@ -85,12 +87,11 @@ class TestDatasetWithStat(unittest.TestCase): else: for i in range(self.epoch_num): try: - exe.train_from_dataset( - fluid.default_main_program(), - dataset, - fetch_list=[embs[0], embs[1]], - fetch_info=["emb0", "emb1"], - print_period=1) + exe.train_from_dataset(fluid.default_main_program(), + dataset, + fetch_list=[embs[0], embs[1]], + fetch_info=["emb0", "emb1"], + print_period=1) except Exception as e: self.assertTrue(False) diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py index 89eef6ca242..b32833916e2 100644 --- a/python/paddle/fluid/tests/unittests/test_mse_loss.py +++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py @@ -24,6 +24,7 @@ from paddle.fluid.executor import Executor class TestMseLoss(unittest.TestCase): + def test_mse_loss(self): input_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") label_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") @@ -40,15 +41,19 @@ class TestMseLoss(unittest.TestCase): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) result = exe.run(fluid.default_main_program(), - feed={"input": input_val, - "label": label_val}, + feed={ + "input": input_val, + "label": label_val + }, fetch_list=[output]) self.assertTrue(np.isclose(np_result, result).all()) class TestMseInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): input = [256, 3] label = fluid.data(name='label', shape=[None, 3], dtype='float32') @@ -65,6 +70,7 @@ class TestMseInvalidInput(unittest.TestCase): class TestNNMseLoss(unittest.TestCase): + def test_NNMseLoss_mean(self): for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") @@ -72,28 +78,30 @@ class TestNNMseLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.layers.data( - name='input', shape=dim, dtype='float32') - label = fluid.layers.data( - name='label', shape=dim, dtype='float32') + input = fluid.layers.data(name='input', + shape=dim, + dtype='float32') + label = fluid.layers.data(name='label', + shape=dim, + dtype='float32') mse_loss = paddle.nn.loss.MSELoss() ret = mse_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[ret]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[ret]) with fluid.dygraph.guard(): mse_loss = paddle.nn.loss.MSELoss() - dy_ret = mse_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = mse_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_result = dy_ret.numpy() sub = input_np - label_np @@ -110,28 +118,30 @@ class TestNNMseLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.layers.data( - name='input', shape=dim, dtype='float32') - label = fluid.layers.data( - name='label', shape=dim, dtype='float32') + input = fluid.layers.data(name='input', + shape=dim, + dtype='float32') + label = fluid.layers.data(name='label', + shape=dim, + dtype='float32') mse_loss = paddle.nn.loss.MSELoss(reduction='sum') ret = mse_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[ret]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[ret]) with fluid.dygraph.guard(): mse_loss = paddle.nn.loss.MSELoss(reduction='sum') - dy_ret = mse_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = mse_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_result = dy_ret.numpy() sub = input_np - label_np @@ -148,28 +158,30 @@ class TestNNMseLoss(unittest.TestCase): paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.layers.data( - name='input', shape=dim, dtype='float32') - label = fluid.layers.data( - name='label', shape=dim, dtype='float32') + input = fluid.layers.data(name='input', + shape=dim, + dtype='float32') + label = fluid.layers.data(name='label', + shape=dim, + dtype='float32') mse_loss = paddle.nn.loss.MSELoss(reduction='none') ret = mse_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[ret]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[ret]) with fluid.dygraph.guard(): mse_loss = paddle.nn.loss.MSELoss(reduction='none') - dy_ret = mse_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = mse_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_result = dy_ret.numpy() sub = input_np - label_np @@ -181,6 +193,7 @@ class TestNNMseLoss(unittest.TestCase): class TestNNFunctionalMseLoss(unittest.TestCase): + def test_NNFunctionalMseLoss_mean(self): for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") @@ -188,26 +201,30 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.enable_static() prog = paddle.static.Program() startup_prog = paddle.static.Program() - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=dim, dtype='float32') - target = paddle.fluid.data( - name='target', shape=dim, dtype='float32') + input = paddle.fluid.data(name='input', + shape=dim, + dtype='float32') + target = paddle.fluid.data(name='target', + shape=dim, + dtype='float32') mse_loss = paddle.nn.functional.mse_loss(input, target, 'mean') exe = paddle.static.Executor(place) exe.run(startup_prog) - static_result = exe.run( - prog, - feed={"input": input_np, - "target": target_np}, - fetch_list=[mse_loss]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "target": target_np + }, + fetch_list=[mse_loss]) paddle.disable_static() - dy_ret = paddle.nn.functional.mse_loss( - paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'mean') + dy_ret = paddle.nn.functional.mse_loss(paddle.to_tensor(input_np), + paddle.to_tensor(target_np), + 'mean') dy_result = dy_ret.numpy() sub = input_np - target_np @@ -224,26 +241,30 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.enable_static() prog = paddle.static.Program() startup_prog = paddle.static.Program() - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=dim, dtype='float32') - target = paddle.fluid.data( - name='target', shape=dim, dtype='float32') + input = paddle.fluid.data(name='input', + shape=dim, + dtype='float32') + target = paddle.fluid.data(name='target', + shape=dim, + dtype='float32') mse_loss = paddle.nn.functional.mse_loss(input, target, 'sum') exe = paddle.static.Executor(place) exe.run(startup_prog) - static_result = exe.run( - prog, - feed={"input": input_np, - "target": target_np}, - fetch_list=[mse_loss]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "target": target_np + }, + fetch_list=[mse_loss]) paddle.disable_static() - dy_ret = paddle.nn.functional.mse_loss( - paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'sum') + dy_ret = paddle.nn.functional.mse_loss(paddle.to_tensor(input_np), + paddle.to_tensor(target_np), + 'sum') dy_result = dy_ret.numpy() sub = input_np - target_np @@ -260,26 +281,30 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.enable_static() prog = paddle.static.Program() startup_prog = paddle.static.Program() - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() with paddle.static.program_guard(prog, startup_prog): - input = paddle.fluid.data( - name='input', shape=dim, dtype='float32') - target = paddle.fluid.data( - name='target', shape=dim, dtype='float32') + input = paddle.fluid.data(name='input', + shape=dim, + dtype='float32') + target = paddle.fluid.data(name='target', + shape=dim, + dtype='float32') mse_loss = paddle.nn.functional.mse_loss(input, target, 'none') exe = paddle.static.Executor(place) exe.run(startup_prog) - static_result = exe.run( - prog, - feed={"input": input_np, - "target": target_np}, - fetch_list=[mse_loss]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "target": target_np + }, + fetch_list=[mse_loss]) paddle.disable_static() - dy_ret = paddle.nn.functional.mse_loss( - paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'none') + dy_ret = paddle.nn.functional.mse_loss(paddle.to_tensor(input_np), + paddle.to_tensor(target_np), + 'none') dy_result = dy_ret.numpy() sub = input_np - target_np diff --git a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py index c862c555c89..cc4ed645c7d 100644 --- a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py @@ -23,10 +23,12 @@ import paddle.fluid.layers as layers import paddle.fluid.core as core import gradient_checker from decorator_helper import prog_scope + paddle.enable_static() class TestMulGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): prog = fluid.Program() @@ -45,6 +47,7 @@ class TestMulGradCheck(unittest.TestCase): class TestMulDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -61,8 +64,11 @@ class TestMulDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, y_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -73,6 +79,7 @@ class TestMulDoubleGradCheck(unittest.TestCase): class TestMatmulDoubleGradCheck(unittest.TestCase): + def setUp(self): self.init_test() @@ -87,17 +94,25 @@ class TestMatmulDoubleGradCheck(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = layers.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = layers.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = layers.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = layers.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = layers.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = layers.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -108,6 +123,7 @@ class TestMatmulDoubleGradCheck(unittest.TestCase): def TestMatmulDoubleGradCheckCase1(TestMatmulDoubleGradCheck): + def init_test(self): self.x_shape = [2, 3] self.y_shape = [3, 2] @@ -116,6 +132,7 @@ def TestMatmulDoubleGradCheckCase1(TestMatmulDoubleGradCheck): def TestMatmulDoubleGradCheckCase2(TestMatmulDoubleGradCheck): + def init_test(self): self.x_shape = [2, 4, 3] self.y_shape = [2, 4, 5] @@ -124,6 +141,7 @@ def TestMatmulDoubleGradCheckCase2(TestMatmulDoubleGradCheck): def TestMatmulDoubleGradCheckCase3(TestMatmulDoubleGradCheck): + def init_test(self): self.x_shape = [2, 3, 4, 5] self.y_shape = [2, 3, 3, 5] @@ -132,6 +150,7 @@ def TestMatmulDoubleGradCheckCase3(TestMatmulDoubleGradCheck): def TestMatmulDoubleGradCheckCase4(TestMatmulDoubleGradCheck): + def init_test(self): self.x_shape = [2, 3, 4] self.y_shape = [4, 3] diff --git a/python/paddle/fluid/tests/unittests/test_mul_op.py b/python/paddle/fluid/tests/unittests/test_mul_op.py index 927383c1223..23904f9fa4f 100644 --- a/python/paddle/fluid/tests/unittests/test_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_mul_op.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid @@ -26,6 +27,7 @@ from paddle.fluid import Program, program_guard class TestMulOp(OpTest): + def setUp(self): self.op_type = "mul" self.dtype = np.float64 @@ -46,22 +48,27 @@ class TestMulOp(OpTest): self.check_grad(['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.5, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.5, + no_grad_set=set('Y')) class TestMulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of mul_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - x2 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + x2 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.mul, x1, x2) # The input dtype of mul_op must be float32 or float64. x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32") @@ -70,6 +77,7 @@ class TestMulOpError(unittest.TestCase): class TestMulOp2(OpTest): + def setUp(self): self.op_type = "mul" self.dtype = np.float64 @@ -97,17 +105,22 @@ class TestMulOp2(OpTest): self.check_grad(['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set('X')) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.5, + no_grad_set=set('X')) def test_check_grad_ignore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + self.check_grad(['X'], + 'Out', + max_relative_error=0.5, + no_grad_set=set('Y')) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16MulOp1(TestMulOp): + def init_dtype_type(self): self.dtype = np.float16 @@ -119,31 +132,31 @@ class TestFP16MulOp1(TestMulOp): def test_check_grad_normal(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.5) + self.check_grad_with_place(place, ['X', 'Y'], + 'Out', + max_relative_error=0.5) def test_check_grad_ingore_x(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.5, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.5, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.5, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.5, + no_grad_set=set('Y')) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16MulOp2(TestMulOp2): + def init_dtype_type(self): self.dtype = np.float16 @@ -155,26 +168,25 @@ class TestFP16MulOp2(TestMulOp2): def test_check_grad_normal(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.9) + self.check_grad_with_place(place, ['X', 'Y'], + 'Out', + max_relative_error=0.9) def test_check_grad_ingore_x(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.5, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.5, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.9, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.9, + no_grad_set=set('Y')) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_multi_dot_op.py b/python/paddle/fluid/tests/unittests/test_multi_dot_op.py index 11c04363170..d4d9fcd8b6a 100644 --- a/python/paddle/fluid/tests/unittests/test_multi_dot_op.py +++ b/python/paddle/fluid/tests/unittests/test_multi_dot_op.py @@ -26,6 +26,7 @@ paddle.enable_static() #the unittest of multi_dot #compare the result of paddle multi_dot and numpy multi_dot class TestMultiDotOp(OpTest): + def setUp(self): self.op_type = "multi_dot" self.python_api = paddle.linalg.multi_dot @@ -51,6 +52,7 @@ class TestMultiDotOp(OpTest): #(A*B)*C class TestMultiDotOp3Mat(TestMultiDotOp): + def get_inputs_and_outputs(self): self.A = np.random.random((2, 10)).astype(self.dtype) self.B = np.random.random((10, 4)).astype(self.dtype) @@ -66,6 +68,7 @@ class TestMultiDotOp3Mat(TestMultiDotOp): #A*(B*C) class TestMultiDotOp3Mat2(TestMultiDotOp): + def get_inputs_and_outputs(self): self.A = np.random.random((3, 4)).astype(self.dtype) self.B = np.random.random((4, 8)).astype(self.dtype) @@ -80,14 +83,15 @@ class TestMultiDotOp3Mat2(TestMultiDotOp): class TestMultiDotOp4Mat(TestMultiDotOp): + def get_inputs_and_outputs(self): self.A = np.random.random((8, 6)).astype(self.dtype) self.B = np.random.random((6, 3)).astype(self.dtype) self.C = np.random.random((3, 4)).astype(self.dtype) self.D = np.random.random((4, 5)).astype(self.dtype) self.inputs = { - 'X': - [('x0', self.A), ('x1', self.B), ('x2', self.C), ('x3', self.D)] + 'X': [('x0', self.A), ('x1', self.B), ('x2', self.C), + ('x3', self.D)] } self.outputs = {'Out': multi_dot([self.A, self.B, self.C, self.D])} @@ -99,6 +103,7 @@ class TestMultiDotOp4Mat(TestMultiDotOp): class TestMultiDotOpFirst1D(TestMultiDotOp): + def get_inputs_and_outputs(self): self.A = np.random.random((4)).astype(self.dtype) self.B = np.random.random((4, 3)).astype(self.dtype) @@ -107,6 +112,7 @@ class TestMultiDotOpFirst1D(TestMultiDotOp): class TestMultiDotOp3MatFirst1D(TestMultiDotOp3Mat): + def get_inputs_and_outputs(self): self.A = np.random.random((4)).astype(self.dtype) self.B = np.random.random((4, 3)).astype(self.dtype) @@ -116,19 +122,21 @@ class TestMultiDotOp3MatFirst1D(TestMultiDotOp3Mat): class TestMultiDotOp4MatFirst1D(TestMultiDotOp4Mat): + def get_inputs_and_outputs(self): self.A = np.random.random((4)).astype(self.dtype) self.B = np.random.random((4, 3)).astype(self.dtype) self.C = np.random.random((3, 4)).astype(self.dtype) self.D = np.random.random((4, 5)).astype(self.dtype) self.inputs = { - 'X': - [('x0', self.A), ('x1', self.B), ('x2', self.C), ('x3', self.D)] + 'X': [('x0', self.A), ('x1', self.B), ('x2', self.C), + ('x3', self.D)] } self.outputs = {'Out': multi_dot([self.A, self.B, self.C, self.D])} class TestMultiDotOpLast1D(TestMultiDotOp): + def get_inputs_and_outputs(self): self.A = np.random.random((3, 6)).astype(self.dtype) self.B = np.random.random((6)).astype(self.dtype) @@ -137,6 +145,7 @@ class TestMultiDotOpLast1D(TestMultiDotOp): class TestMultiDotOp3MatLast1D(TestMultiDotOp3Mat): + def get_inputs_and_outputs(self): self.A = np.random.random((2, 4)).astype(self.dtype) self.B = np.random.random((4, 3)).astype(self.dtype) @@ -151,19 +160,21 @@ class TestMultiDotOp3MatLast1D(TestMultiDotOp3Mat): class TestMultiDotOp4MatLast1D(TestMultiDotOp4Mat): + def get_inputs_and_outputs(self): self.A = np.random.random((2, 3)).astype(self.dtype) self.B = np.random.random((3, 2)).astype(self.dtype) self.C = np.random.random((2, 3)).astype(self.dtype) self.D = np.random.random((3)).astype(self.dtype) self.inputs = { - 'X': - [('x0', self.A), ('x1', self.B), ('x2', self.C), ('x3', self.D)] + 'X': [('x0', self.A), ('x1', self.B), ('x2', self.C), + ('x3', self.D)] } self.outputs = {'Out': multi_dot([self.A, self.B, self.C, self.D])} class TestMultiDotOpFirstAndLast1D(TestMultiDotOp): + def get_inputs_and_outputs(self): self.A = np.random.random((4, )).astype(self.dtype) self.B = np.random.random((4)).astype(self.dtype) @@ -172,6 +183,7 @@ class TestMultiDotOpFirstAndLast1D(TestMultiDotOp): class TestMultiDotOp3MatFirstAndLast1D(TestMultiDotOp3Mat): + def get_inputs_and_outputs(self): self.A = np.random.random((6, )).astype(self.dtype) self.B = np.random.random((6, 4)).astype(self.dtype) @@ -181,20 +193,22 @@ class TestMultiDotOp3MatFirstAndLast1D(TestMultiDotOp3Mat): class TestMultiDotOp4MatFirstAndLast1D(TestMultiDotOp4Mat): + def get_inputs_and_outputs(self): self.A = np.random.random((3, )).astype(self.dtype) self.B = np.random.random((3, 4)).astype(self.dtype) self.C = np.random.random((4, 2)).astype(self.dtype) self.D = np.random.random((2)).astype(self.dtype) self.inputs = { - 'X': - [('x0', self.A), ('x1', self.B), ('x2', self.C), ('x3', self.D)] + 'X': [('x0', self.A), ('x1', self.B), ('x2', self.C), + ('x3', self.D)] } self.outputs = {'Out': multi_dot([self.A, self.B, self.C, self.D])} #####python API test####### class TestMultiDotOpError(unittest.TestCase): + def test_errors(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -204,8 +218,9 @@ class TestMultiDotOpError(unittest.TestCase): [input1, input1]) # The inputs dtype of multi_dot must be float64, float64 or float16. - input2 = paddle.static.data( - name='input2', shape=[10, 10], dtype="int32") + input2 = paddle.static.data(name='input2', + shape=[10, 10], + dtype="int32") self.assertRaises(TypeError, paddle.linalg.multi_dot, [input2, input2]) @@ -231,6 +246,7 @@ class TestMultiDotOpError(unittest.TestCase): class APITestMultiDot(unittest.TestCase): + def test_out(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): @@ -240,15 +256,15 @@ class APITestMultiDot(unittest.TestCase): exe = paddle.static.Executor(paddle.CPUPlace()) data1 = np.random.rand(3, 2).astype("float64") data2 = np.random.rand(2, 3).astype("float64") - np_res = exe.run(feed={'x0': data1, - 'x1': data2}, + np_res = exe.run(feed={ + 'x0': data1, + 'x1': data2 + }, fetch_list=[result]) expected_result = np.linalg.multi_dot([data1, data2]) self.assertTrue( - np.allclose( - np_res, expected_result, atol=1e-5), - "two value is\ + np.allclose(np_res, expected_result, atol=1e-5), "two value is\ {}\n{}, check diff!".format(np_res, expected_result)) def test_dygraph_without_out(self): diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py index 3158d78db63..67650158bef 100644 --- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py @@ -55,8 +55,8 @@ def iou(box_a, box_b, norm): xb = min(xmax_a, xmax_b) yb = min(ymax_a, ymax_b) - inter_area = max(xb - xa + (norm == False), - 0.0) * max(yb - ya + (norm == False), 0.0) + inter_area = max(xb - xa + + (norm == False), 0.0) * max(yb - ya + (norm == False), 0.0) iou_ratio = inter_area / (area_a + area_b - inter_area) @@ -147,8 +147,9 @@ def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, else: score_index.append((scores[idx][c], c, idx)) - sorted_score_index = sorted( - score_index, key=lambda tup: tup[0], reverse=True) + sorted_score_index = sorted(score_index, + key=lambda tup: tup[0], + reverse=True) sorted_score_index = sorted_score_index[:keep_top_k] selected_indices = {} @@ -179,16 +180,15 @@ def lod_multiclass_nms(boxes, scores, background, score_threshold, score = scores[head:head + box_lod[0][n]] offset = head head = head + box_lod[0][n] - nmsed_outs, nmsed_num = multiclass_nms( - box, - score, - background, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - normalized, - shared=False) + nmsed_outs, nmsed_num = multiclass_nms(box, + score, + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized, + shared=False) lod.append(nmsed_num) if nmsed_num == 0: @@ -201,8 +201,9 @@ def lod_multiclass_nms(boxes, scores, background, score_threshold, c, score[idx][c], xmin, ymin, xmax, ymax, offset * num_class + idx * num_class + c ]) - sorted_det_out = sorted( - tmp_det_out, key=lambda tup: tup[0], reverse=False) + sorted_det_out = sorted(tmp_det_out, + key=lambda tup: tup[0], + reverse=False) det_outs.extend(sorted_det_out) return det_outs, lod @@ -222,16 +223,15 @@ def batched_multiclass_nms(boxes, index_outs = [] lod = [] for n in range(batch_size): - nmsed_outs, nmsed_num = multiclass_nms( - boxes[n], - scores[n], - background, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - normalized, - shared=True) + nmsed_outs, nmsed_num = multiclass_nms(boxes[n], + scores[n], + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized, + shared=True) lod.append(nmsed_num) if nmsed_num == 0: @@ -244,13 +244,15 @@ def batched_multiclass_nms(boxes, c, scores[n][c][idx], xmin, ymin, xmax, ymax, idx + n * num_boxes ]) - sorted_det_out = sorted( - tmp_det_out, key=lambda tup: tup[0], reverse=False) + sorted_det_out = sorted(tmp_det_out, + key=lambda tup: tup[0], + reverse=False) det_outs.extend(sorted_det_out) return det_outs, lod class TestMulticlassNMSOp(OpTest): + def set_argument(self): self.score_threshold = 0.01 @@ -302,6 +304,7 @@ class TestMulticlassNMSOp(OpTest): class TestMulticlassNMSOpNoOutput(TestMulticlassNMSOp): + def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 @@ -309,6 +312,7 @@ class TestMulticlassNMSOpNoOutput(TestMulticlassNMSOp): class TestMulticlassNMSLoDInput(OpTest): + def set_argument(self): self.score_threshold = 0.01 @@ -335,9 +339,10 @@ class TestMulticlassNMSLoDInput(OpTest): boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - det_outs, lod = lod_multiclass_nms( - boxes, scores, background, score_threshold, nms_threshold, - nms_top_k, keep_top_k, box_lod, normalized) + det_outs, lod = lod_multiclass_nms(boxes, scores, background, + score_threshold, nms_threshold, + nms_top_k, keep_top_k, box_lod, + normalized) det_outs = np.array(det_outs).astype('float32') nmsed_outs = det_outs[:, :-1].astype('float32') if len( det_outs) else det_outs @@ -362,6 +367,7 @@ class TestMulticlassNMSLoDInput(OpTest): class TestMulticlassNMSNoBox(TestMulticlassNMSLoDInput): + def setUp(self): self.set_argument() M = 1200 @@ -385,9 +391,10 @@ class TestMulticlassNMSNoBox(TestMulticlassNMSLoDInput): boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - det_outs, lod = lod_multiclass_nms( - boxes, scores, background, score_threshold, nms_threshold, - nms_top_k, keep_top_k, box_lod, normalized) + det_outs, lod = lod_multiclass_nms(boxes, scores, background, + score_threshold, nms_threshold, + nms_top_k, keep_top_k, box_lod, + normalized) det_outs = np.array(det_outs).astype('float32') nmsed_outs = det_outs[:, :-1].astype('float32') if len( det_outs) else det_outs @@ -409,6 +416,7 @@ class TestMulticlassNMSNoBox(TestMulticlassNMSLoDInput): class TestIOU(unittest.TestCase): + def test_iou(self): box1 = np.array([4.0, 3.0, 7.0, 5.0]).astype('float32') box2 = np.array([3.0, 4.0, 6.0, 8.0]).astype('float32') @@ -419,6 +427,7 @@ class TestIOU(unittest.TestCase): class TestMulticlassNMS2Op(TestMulticlassNMSOp): + def setUp(self): self.set_argument() N = 7 @@ -448,8 +457,8 @@ class TestMulticlassNMS2Op(TestMulticlassNMSOp): nmsed_outs = det_outs[:, :-1].astype('float32') if len( det_outs) else det_outs - index_outs = det_outs[:, -1:].astype('int') if len( - det_outs) else det_outs + index_outs = det_outs[:, + -1:].astype('int') if len(det_outs) else det_outs self.op_type = 'multiclass_nms2' self.inputs = {'BBoxes': boxes, 'Scores': scores} self.outputs = { @@ -471,6 +480,7 @@ class TestMulticlassNMS2Op(TestMulticlassNMSOp): class TestMulticlassNMS2OpNoOutput(TestMulticlassNMS2Op): + def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 @@ -478,6 +488,7 @@ class TestMulticlassNMS2OpNoOutput(TestMulticlassNMS2Op): class TestMulticlassNMS2LoDInput(TestMulticlassNMSLoDInput): + def setUp(self): self.set_argument() M = 1200 @@ -501,15 +512,16 @@ class TestMulticlassNMS2LoDInput(TestMulticlassNMSLoDInput): boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - det_outs, lod = lod_multiclass_nms( - boxes, scores, background, score_threshold, nms_threshold, - nms_top_k, keep_top_k, box_lod, normalized) + det_outs, lod = lod_multiclass_nms(boxes, scores, background, + score_threshold, nms_threshold, + nms_top_k, keep_top_k, box_lod, + normalized) det_outs = np.array(det_outs) nmsed_outs = det_outs[:, :-1].astype('float32') if len( det_outs) else det_outs - index_outs = det_outs[:, -1:].astype('int') if len( - det_outs) else det_outs + index_outs = det_outs[:, + -1:].astype('int') if len(det_outs) else det_outs self.op_type = 'multiclass_nms2' self.inputs = { 'BBoxes': (boxes, box_lod), @@ -534,6 +546,7 @@ class TestMulticlassNMS2LoDInput(TestMulticlassNMSLoDInput): class TestMulticlassNMS2LoDNoOutput(TestMulticlassNMS2LoDInput): + def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 @@ -541,6 +554,7 @@ class TestMulticlassNMS2LoDNoOutput(TestMulticlassNMS2LoDInput): class TestMulticlassNMSError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): M = 1200 @@ -554,10 +568,12 @@ class TestMulticlassNMSError(unittest.TestCase): scores = np.reshape(scores, (N, M, C)) scores_np = np.transpose(scores, (0, 2, 1)) - boxes_data = fluid.data( - name='bboxes', shape=[M, C, BOX_SIZE], dtype='float32') - scores_data = fluid.data( - name='scores', shape=[N, C, M], dtype='float32') + boxes_data = fluid.data(name='bboxes', + shape=[M, C, BOX_SIZE], + dtype='float32') + scores_data = fluid.data(name='scores', + shape=[N, C, M], + dtype='float32') def test_bboxes_Variable(): # the bboxes type must be Variable @@ -572,6 +588,7 @@ class TestMulticlassNMSError(unittest.TestCase): class TestMulticlassNMS3Op(TestMulticlassNMS2Op): + def setUp(self): self.set_argument() N = 7 @@ -601,8 +618,8 @@ class TestMulticlassNMS3Op(TestMulticlassNMS2Op): nmsed_outs = det_outs[:, :-1].astype('float32') if len( det_outs) else det_outs - index_outs = det_outs[:, -1:].astype('int') if len( - det_outs) else det_outs + index_outs = det_outs[:, + -1:].astype('int') if len(det_outs) else det_outs self.op_type = 'multiclass_nms3' self.inputs = {'BBoxes': boxes, 'Scores': scores} self.outputs = { @@ -625,6 +642,7 @@ class TestMulticlassNMS3Op(TestMulticlassNMS2Op): class TestMulticlassNMS3OpNoOutput(TestMulticlassNMS3Op): + def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 @@ -632,6 +650,7 @@ class TestMulticlassNMS3OpNoOutput(TestMulticlassNMS3Op): class TestMulticlassNMS3LoDInput(TestMulticlassNMS2LoDInput): + def setUp(self): self.set_argument() M = 1200 @@ -655,9 +674,10 @@ class TestMulticlassNMS3LoDInput(TestMulticlassNMS2LoDInput): boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - det_outs, lod = lod_multiclass_nms( - boxes, scores, background, score_threshold, nms_threshold, - nms_top_k, keep_top_k, box_lod, normalized) + det_outs, lod = lod_multiclass_nms(boxes, scores, background, + score_threshold, nms_threshold, + nms_top_k, keep_top_k, box_lod, + normalized) det_outs = np.array(det_outs) nmsed_outs = det_outs[:, :-1].astype('float32') if len( @@ -687,6 +707,7 @@ class TestMulticlassNMS3LoDInput(TestMulticlassNMS2LoDInput): class TestMulticlassNMS3LoDNoOutput(TestMulticlassNMS3LoDInput): + def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 diff --git a/python/paddle/fluid/tests/unittests/test_multihead_attention.py b/python/paddle/fluid/tests/unittests/test_multihead_attention.py index f60da862ac0..9a0e3f1b2a9 100644 --- a/python/paddle/fluid/tests/unittests/test_multihead_attention.py +++ b/python/paddle/fluid/tests/unittests/test_multihead_attention.py @@ -21,6 +21,7 @@ import numpy as np class TestMultiheadAttention(unittest.TestCase): + def gen_random_input(self): """Generate random input data. """ @@ -32,25 +33,22 @@ class TestMultiheadAttention(unittest.TestCase): def set_program(self): """Build the test program. """ - queries = fluid.layers.data( - name="queries", - shape=self.input_shape, - dtype="float32", - append_batch_size=False) + queries = fluid.layers.data(name="queries", + shape=self.input_shape, + dtype="float32", + append_batch_size=False) queries.stop_gradient = False - keys = fluid.layers.data( - name="keys", - shape=self.input_shape, - dtype="float32", - append_batch_size=False) + keys = fluid.layers.data(name="keys", + shape=self.input_shape, + dtype="float32", + append_batch_size=False) keys.stop_gradient = False - contexts = fluid.nets.scaled_dot_product_attention( - queries=queries, - keys=keys, - values=keys, - num_heads=8, - dropout_rate=0.) + contexts = fluid.nets.scaled_dot_product_attention(queries=queries, + keys=keys, + values=keys, + num_heads=8, + dropout_rate=0.) out = fluid.layers.reduce_sum(contexts, dim=None) fluid.backward.append_backward(loss=out) diff --git a/python/paddle/fluid/tests/unittests/test_multinomial_op.py b/python/paddle/fluid/tests/unittests/test_multinomial_op.py index 4dfc881d772..b60a46d66ad 100644 --- a/python/paddle/fluid/tests/unittests/test_multinomial_op.py +++ b/python/paddle/fluid/tests/unittests/test_multinomial_op.py @@ -44,6 +44,7 @@ def sample_output_two_dimension(out, shape): class TestMultinomialOp(OpTest): + def setUp(self): paddle.enable_static() self.op_type = "multinomial" @@ -67,12 +68,12 @@ class TestMultinomialOp(OpTest): prob = self.input_np / self.input_np.sum(axis=-1, keepdims=True) sample_prob = self.sample_output(np.array(outs[0])) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) class TestMultinomialOp2(TestMultinomialOp): + def init_data(self): # input probability is a matrix self.input_np = np.random.rand(3, 4) @@ -84,6 +85,7 @@ class TestMultinomialOp2(TestMultinomialOp): class TestMultinomialOp3(TestMultinomialOp): + def init_data(self): # replacement is False. number of samples must be less than number of categories. self.input_np = np.random.rand(1000) @@ -99,6 +101,7 @@ class TestMultinomialOp3(TestMultinomialOp): class TestMultinomialApi(unittest.TestCase): + def test_dygraph(self): # input probability is a vector, and replacement is True paddle.disable_static() @@ -110,8 +113,7 @@ class TestMultinomialApi(unittest.TestCase): sample_prob = sample_output_one_dimension(out.numpy(), 4) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) def test_dygraph2(self): @@ -124,8 +126,7 @@ class TestMultinomialApi(unittest.TestCase): sample_prob = sample_output_two_dimension(out.numpy(), [3, 4]) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) paddle.enable_static() @@ -170,12 +171,12 @@ class TestMultinomialApi(unittest.TestCase): sample_prob = sample_output_one_dimension(out, 4) prob = x_np / x_np.sum(axis=-1, keepdims=True) self.assertTrue( - np.allclose( - sample_prob, prob, rtol=0, atol=0.01), + np.allclose(sample_prob, prob, rtol=0, atol=0.01), "sample_prob: " + str(sample_prob) + "\nprob: " + str(prob)) class TestMultinomialAlias(unittest.TestCase): + def test_alias(self): paddle.disable_static() x = paddle.rand([4]) @@ -185,10 +186,12 @@ class TestMultinomialAlias(unittest.TestCase): class TestMultinomialError(unittest.TestCase): + def setUp(self): paddle.disable_static() def test_num_sample(self): + def test_num_sample_less_than_0(): x = paddle.rand([4]) paddle.multinomial(x, num_samples=-2) @@ -196,6 +199,7 @@ class TestMultinomialError(unittest.TestCase): self.assertRaises(ValueError, test_num_sample_less_than_0) def test_replacement_False(self): + def test_samples_larger_than_categories(): x = paddle.rand([4]) paddle.multinomial(x, num_samples=5, replacement=False) @@ -203,6 +207,7 @@ class TestMultinomialError(unittest.TestCase): self.assertRaises(ValueError, test_samples_larger_than_categories) def test_input_probs_dim(self): + def test_dim_larger_than_2(): x = paddle.rand([2, 3, 3]) paddle.multinomial(x) @@ -226,6 +231,7 @@ class TestMultinomialError(unittest.TestCase): class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_multiplex_op.py b/python/paddle/fluid/tests/unittests/test_multiplex_op.py index 093ee86aeea..29a11ab68d0 100644 --- a/python/paddle/fluid/tests/unittests/test_multiplex_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiplex_op.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestMultiplexOp(OpTest): + def setUp(self): self.op_type = "multiplex" rows = 4 @@ -61,6 +62,7 @@ class TestMultiplexOp(OpTest): class TestMultiplexOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): x1 = fluid.data(name='x1', shape=[None, 2], dtype='int64') @@ -86,14 +88,16 @@ class TestMultiplexOpError(unittest.TestCase): self.assertRaises(TypeError, test_type) def test_type2(): - index2 = fluid.data( - name='index2', shape=[None, 1], dtype='int16') + index2 = fluid.data(name='index2', + shape=[None, 1], + dtype='int16') paddle.multiplex(inputs=[x1, x2], index=index2) self.assertRaises(TypeError, test_type2) class TestMultiplexODygrap(unittest.TestCase): + def test_multiplex_dygraph(self): paddle.disable_static() img1 = np.array([[1, 2], [3, 4]]).astype(np.float32) @@ -122,10 +126,12 @@ class TestMultiplexODygrap(unittest.TestCase): res_eager = paddle.multiplex(inputs_eager, index_eager) res_eager.backward() self.assertEqual((res.numpy() == res_eager.numpy()).all(), True) - self.assertEqual((inputs[0].grad.numpy() == - inputs_eager[0].grad.numpy()).all(), True) - self.assertEqual((inputs[1].grad.numpy() == - inputs_eager[1].grad.numpy()).all(), True) + self.assertEqual( + (inputs[0].grad.numpy() == inputs_eager[0].grad.numpy() + ).all(), True) + self.assertEqual( + (inputs[1].grad.numpy() == inputs_eager[1].grad.numpy() + ).all(), True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_multiply.py b/python/paddle/fluid/tests/unittests/test_multiply.py index e8463ed8ad2..cfc56d5a959 100755 --- a/python/paddle/fluid/tests/unittests/test_multiply.py +++ b/python/paddle/fluid/tests/unittests/test_multiply.py @@ -24,21 +24,26 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class TestMultiplyApi(unittest.TestCase): + def _run_static_graph_case(self, x_data, y_data): with program_guard(Program(), Program()): paddle.enable_static() - x = paddle.static.data( - name='x', shape=x_data.shape, dtype=x_data.dtype) - y = paddle.static.data( - name='y', shape=y_data.shape, dtype=y_data.dtype) + x = paddle.static.data(name='x', + shape=x_data.shape, + dtype=x_data.dtype) + y = paddle.static.data(name='y', + shape=y_data.shape, + dtype=y_data.dtype) res = tensor.multiply(x, y) - place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) outs = exe.run(paddle.static.default_main_program(), - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[res]) res = outs[0] return res @@ -108,6 +113,7 @@ class TestMultiplyApi(unittest.TestCase): class TestMultiplyError(unittest.TestCase): + def func_test_errors(self): # test static computation graph: dtype can not be int8 paddle.enable_static() @@ -116,7 +122,7 @@ class TestMultiplyError(unittest.TestCase): y = paddle.static.data(name='y', shape=[100], dtype=np.int8) self.assertRaises(TypeError, tensor.multiply, x, y) - # test static computation graph: inputs must be broadcastable + # test static computation graph: inputs must be broadcastable with program_guard(Program(), Program()): x = paddle.static.data(name='x', shape=[20, 50], dtype=np.float64) y = paddle.static.data(name='y', shape=[20], dtype=np.float64) @@ -145,7 +151,7 @@ class TestMultiplyError(unittest.TestCase): y = paddle.to_tensor(y_data) self.assertRaises(ValueError, paddle.multiply, x, y) - # test dynamic computation graph: dtype must be same + # test dynamic computation graph: dtype must be same x_data = np.random.randn(200).astype(np.int64) y_data = np.random.randn(200).astype(np.float64) x = paddle.to_tensor(x_data) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py index e23905005df..d409648c716 100755 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py @@ -27,6 +27,7 @@ IMAGE_SIZE = 32 class RandomDataset(Dataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -41,6 +42,7 @@ class RandomDataset(Dataset): class RandomIterableDataset(IterableDataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -53,6 +55,7 @@ class RandomIterableDataset(IterableDataset): class TestTensorDataset(unittest.TestCase): + def run_main(self, num_workers, places): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -65,12 +68,11 @@ class TestTensorDataset(unittest.TestCase): dataset = TensorDataset([input, label]) assert len(dataset) == 16 - dataloader = DataLoader( - dataset, - places=place, - num_workers=num_workers, - batch_size=1, - drop_last=True) + dataloader = DataLoader(dataset, + places=place, + num_workers=num_workers, + batch_size=1, + drop_last=True) for i, (input, label) in enumerate(dataloader()): assert len(input) == 1 @@ -98,6 +100,7 @@ class TestTensorDataset(unittest.TestCase): class TestComposeDataset(unittest.TestCase): + def func_test_main(self): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -123,6 +126,7 @@ class TestComposeDataset(unittest.TestCase): class TestRandomSplitApi(unittest.TestCase): + def func_test_main(self): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -149,6 +153,7 @@ class TestRandomSplitApi(unittest.TestCase): class TestRandomSplitError(unittest.TestCase): + def func_test_errors(self): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -164,6 +169,7 @@ class TestRandomSplitError(unittest.TestCase): class TestSubsetDataset(unittest.TestCase): + def run_main(self, num_workers, places): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -180,12 +186,11 @@ class TestSubsetDataset(unittest.TestCase): assert len(dataset) == 5 def prepare_dataloader(dataset): - return DataLoader( - dataset, - places=places, - num_workers=num_workers, - batch_size=1, - drop_last=True) + return DataLoader(dataset, + places=places, + num_workers=num_workers, + batch_size=1, + drop_last=True) dataloader = prepare_dataloader(dataset) dataloader_even = prepare_dataloader(even_subset) @@ -234,6 +239,7 @@ class TestSubsetDataset(unittest.TestCase): class TestChainDataset(unittest.TestCase): + def run_main(self, num_workers, places): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -271,6 +277,7 @@ class TestChainDataset(unittest.TestCase): class NumpyMixTensorDataset(Dataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -285,6 +292,7 @@ class NumpyMixTensorDataset(Dataset): class TestNumpyMixTensorDataset(TestTensorDataset): + def run_main(self, num_workers, places): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -292,12 +300,11 @@ class TestNumpyMixTensorDataset(TestTensorDataset): with fluid.dygraph.guard(place): dataset = NumpyMixTensorDataset(16) assert len(dataset) == 16 - dataloader = DataLoader( - dataset, - places=place, - num_workers=num_workers, - batch_size=1, - drop_last=True) + dataloader = DataLoader(dataset, + places=place, + num_workers=num_workers, + batch_size=1, + drop_last=True) for i, (input, label) in enumerate(dataloader()): assert len(input) == 1 @@ -311,6 +318,7 @@ class TestNumpyMixTensorDataset(TestTensorDataset): class ComplextDataset(Dataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -318,9 +326,10 @@ class ComplextDataset(Dataset): return self.sample_num def __getitem__(self, idx): - return (3.1, 'abc', paddle.to_tensor( - np.random.random([IMAGE_SIZE]).astype('float32'), - place=paddle.CPUPlace()), + return (3.1, 'abc', + paddle.to_tensor(np.random.random([IMAGE_SIZE + ]).astype('float32'), + place=paddle.CPUPlace()), [1, np.random.random([2]).astype('float32')], { 'a': 2.0, 'b': np.random.random([2]).astype('float32') @@ -328,6 +337,7 @@ class ComplextDataset(Dataset): class TestComplextDataset(unittest.TestCase): + def run_main(self, num_workers): paddle.static.default_startup_program().random_seed = 1 paddle.static.default_main_program().random_seed = 1 @@ -335,12 +345,11 @@ class TestComplextDataset(unittest.TestCase): with fluid.dygraph.guard(place): dataset = ComplextDataset(16) assert len(dataset) == 16 - dataloader = DataLoader( - dataset, - places=place, - num_workers=num_workers, - batch_size=2, - drop_last=True) + dataloader = DataLoader(dataset, + places=place, + num_workers=num_workers, + batch_size=2, + drop_last=True) for i, data in enumerate(dataloader()): assert len(data) == 5 @@ -373,6 +382,7 @@ class TestComplextDataset(unittest.TestCase): class SingleFieldDataset(Dataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -384,6 +394,7 @@ class SingleFieldDataset(Dataset): class TestSingleFieldDataset(unittest.TestCase): + def init_dataset(self): self.sample_num = 16 self.dataset = SingleFieldDataset(self.sample_num) @@ -394,12 +405,11 @@ class TestSingleFieldDataset(unittest.TestCase): place = paddle.CPUPlace() with fluid.dygraph.guard(place): self.init_dataset() - dataloader = DataLoader( - self.dataset, - places=place, - num_workers=num_workers, - batch_size=2, - drop_last=True) + dataloader = DataLoader(self.dataset, + places=place, + num_workers=num_workers, + batch_size=2, + drop_last=True) for i, data in enumerate(dataloader()): assert isinstance(data, @@ -417,6 +427,7 @@ class TestSingleFieldDataset(unittest.TestCase): class SingleFieldIterableDataset(IterableDataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -426,12 +437,14 @@ class SingleFieldIterableDataset(IterableDataset): class TestSingleFieldIterableDataset(TestSingleFieldDataset): + def init_dataset(self): self.sample_num = 16 self.dataset = SingleFieldIterableDataset(self.sample_num) class TestDataLoaderGenerateStates(unittest.TestCase): + def setUp(self): self.inputs = [(0, 1), (0, 2), (1, 3)] self.outputs = [[1835504127, 1731038949, 1320224556, 2330041505], @@ -451,16 +464,16 @@ class TestDataLoaderGenerateStates(unittest.TestCase): class TestDatasetWithDropLast(unittest.TestCase): + def run_main(self, dataset, num_samples, batch_size): for num_workers in [0, 1]: for drop_last in [True, False]: steps = (num_samples + (1 - int(drop_last)) * \ (batch_size - 1)) // batch_size - dataloader = DataLoader( - dataset, - batch_size=batch_size, - drop_last=drop_last, - num_workers=num_workers) + dataloader = DataLoader(dataset, + batch_size=batch_size, + drop_last=drop_last, + num_workers=num_workers) datas = [] for data in dataloader: datas.append(data) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py index fcc7c17ce06..c3eda1b3fdf 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py @@ -32,6 +32,7 @@ from test_multiprocess_dataloader_static import EPOCH_NUM, BATCH_SIZE, IMAGE_SIZ class SimpleFCNet(fluid.dygraph.Layer): + def __init__(self): super(SimpleFCNet, self).__init__() @@ -43,20 +44,18 @@ class SimpleFCNet(fluid.dygraph.Layer): in_channel = IMAGE_SIZE for hidden_size in [10, 20, 30]: self._fcs.append( - Linear( - in_channel, - hidden_size, - act='tanh', - param_attr=param_attr, - bias_attr=bias_attr)) + Linear(in_channel, + hidden_size, + act='tanh', + param_attr=param_attr, + bias_attr=bias_attr)) in_channel = hidden_size self._fcs.append( - Linear( - in_channel, - CLASS_NUM, - act='softmax', - param_attr=param_attr, - bias_attr=bias_attr)) + Linear(in_channel, + CLASS_NUM, + act='softmax', + param_attr=param_attr, + bias_attr=bias_attr)) def forward(self, image): out = image @@ -66,6 +65,7 @@ class SimpleFCNet(fluid.dygraph.Layer): class TestDygraphDataLoader(unittest.TestCase): + def run_main(self, num_workers, places, persistent_workers): fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 @@ -74,12 +74,11 @@ class TestDygraphDataLoader(unittest.TestCase): optimizer = fluid.optimizer.Adam(parameter_list=fc_net.parameters()) dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - num_workers=num_workers, - batch_size=BATCH_SIZE, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + num_workers=num_workers, + batch_size=BATCH_SIZE, + drop_last=True, + persistent_workers=persistent_workers) assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) step_list = [] @@ -117,10 +116,9 @@ class TestDygraphDataLoader(unittest.TestCase): print(self.__class__.__name__, p, num_workers, persistent_workers) sys.stdout.flush() - ret = self.run_main( - num_workers=num_workers, - places=p, - persistent_workers=persistent_workers) + ret = self.run_main(num_workers=num_workers, + places=p, + persistent_workers=persistent_workers) results.append(ret) diff = np.max( np.abs(results[0]['loss'] - results[1]['loss']) / @@ -129,6 +127,7 @@ class TestDygraphDataLoader(unittest.TestCase): class TestDygraphDataLoaderWithBatchedDataset(TestDygraphDataLoader): + def run_main(self, num_workers, places, persistent_workers): fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 @@ -137,12 +136,11 @@ class TestDygraphDataLoaderWithBatchedDataset(TestDygraphDataLoader): optimizer = fluid.optimizer.Adam(parameter_list=fc_net.parameters()) dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - num_workers=num_workers, - batch_size=None, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + num_workers=num_workers, + batch_size=None, + drop_last=True, + persistent_workers=persistent_workers) assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) step_list = [] diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py index 52f4c256773..2d6cdac4854 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py @@ -31,6 +31,7 @@ from paddle.fluid.dataloader.dataloader_iter import _worker_loop class RandomDataset(Dataset): + def __init__(self, sample_num): self.sample_num = sample_num @@ -45,6 +46,7 @@ class RandomDataset(Dataset): class TestDataLoaderAssert(unittest.TestCase): + def test_main(self): place = fluid.cpu_places()[0] with fluid.dygraph.guard(place): @@ -67,8 +69,9 @@ class TestDataLoaderAssert(unittest.TestCase): # num_workers < 0 try: - loader = DataLoader( - dataset=dataset, places=place, num_workers=-1) + loader = DataLoader(dataset=dataset, + places=place, + num_workers=-1) self.assertTrue(False) except AssertionError: pass @@ -82,26 +85,27 @@ class TestDataLoaderAssert(unittest.TestCase): # set batch_sampler and shuffle/batch_size/drop_last try: - loader = DataLoader( - dataset=dataset, - places=place, - batch_sampler=batch_sampler, - shuffle=True, - drop_last=True) + loader = DataLoader(dataset=dataset, + places=place, + batch_sampler=batch_sampler, + shuffle=True, + drop_last=True) self.assertTrue(False) except AssertionError: pass # set batch_sampler correctly try: - loader = DataLoader( - dataset=dataset, places=place, batch_sampler=batch_sampler) + loader = DataLoader(dataset=dataset, + places=place, + batch_sampler=batch_sampler) self.assertTrue(True) except AssertionError: self.assertTrue(False) class TestDatasetRuntimeError(unittest.TestCase): + def test_main(self): dataset = Dataset() @@ -148,6 +152,7 @@ class TestDatasetRuntimeError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestDataLoaderWorkerLoop(unittest.TestCase): + def run_without_worker_done(self, use_shared_memory=True): try: place = fluid.cpu_places()[0] @@ -161,15 +166,13 @@ class TestDataLoaderWorkerLoop(unittest.TestCase): # test collate_fn def _collate_fn(sample_list): return [ - np.stack( - s, axis=0) for s in list(zip(*sample_list)) + np.stack(s, axis=0) for s in list(zip(*sample_list)) ] - loader = DataLoader( - dataset, - num_workers=1, - places=place, - use_shared_memory=use_shared_memory) + loader = DataLoader(dataset, + num_workers=1, + places=place, + use_shared_memory=use_shared_memory) assert loader.num_workers > 0, \ "go to AssertionError and pass in Mac and Windows" loader = iter(loader) @@ -204,15 +207,13 @@ class TestDataLoaderWorkerLoop(unittest.TestCase): # test collate_fn def _collate_fn(sample_list): return [ - np.stack( - s, axis=0) for s in list(zip(*sample_list)) + np.stack(s, axis=0) for s in list(zip(*sample_list)) ] - loader = DataLoader( - dataset, - num_workers=1, - places=place, - use_shared_memory=use_shared_memory) + loader = DataLoader(dataset, + num_workers=1, + places=place, + use_shared_memory=use_shared_memory) assert loader.num_workers > 0, \ "go to AssertionError and pass in Mac and Windows" loader = iter(loader) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py index 490e95a0f0b..7ebcf4b8efa 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py @@ -32,6 +32,7 @@ from test_multiprocess_dataloader_iterable_dataset_static import EPOCH_NUM, BATC class SimpleFCNet(fluid.dygraph.Layer): + def __init__(self): super(SimpleFCNet, self).__init__() @@ -43,20 +44,18 @@ class SimpleFCNet(fluid.dygraph.Layer): in_channel = IMAGE_SIZE for hidden_size in [10, 20, 30]: self._fcs.append( - Linear( - in_channel, - hidden_size, - act='tanh', - param_attr=param_attr, - bias_attr=bias_attr)) + Linear(in_channel, + hidden_size, + act='tanh', + param_attr=param_attr, + bias_attr=bias_attr)) in_channel = hidden_size self._fcs.append( - Linear( - in_channel, - CLASS_NUM, - act='softmax', - param_attr=param_attr, - bias_attr=bias_attr)) + Linear(in_channel, + CLASS_NUM, + act='softmax', + param_attr=param_attr, + bias_attr=bias_attr)) def forward(self, image): out = image @@ -66,6 +65,7 @@ class SimpleFCNet(fluid.dygraph.Layer): class TestDygraphDataLoader(unittest.TestCase): + def run_main(self, num_workers, places, persistent_workers): fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 @@ -74,12 +74,11 @@ class TestDygraphDataLoader(unittest.TestCase): optimizer = fluid.optimizer.Adam(parameter_list=fc_net.parameters()) dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - num_workers=num_workers, - batch_size=BATCH_SIZE, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + num_workers=num_workers, + batch_size=BATCH_SIZE, + drop_last=True, + persistent_workers=persistent_workers) step_list = [] loss_list = [] @@ -116,16 +115,16 @@ class TestDygraphDataLoader(unittest.TestCase): print(self.__class__.__name__, p, num_workers, persistent_workers) sys.stdout.flush() - ret = self.run_main( - num_workers=num_workers, - places=p, - persistent_workers=persistent_workers) + ret = self.run_main(num_workers=num_workers, + places=p, + persistent_workers=persistent_workers) results.append(ret) assert results[0]['loss'].shape[0] * 2 == results[1][ 'loss'].shape[0] class TestDygraphDataLoaderWithBatchedDataset(TestDygraphDataLoader): + def run_main(self, num_workers, places, persistent_workers): fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 @@ -134,12 +133,11 @@ class TestDygraphDataLoaderWithBatchedDataset(TestDygraphDataLoader): optimizer = fluid.optimizer.Adam(parameter_list=fc_net.parameters()) dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - num_workers=num_workers, - batch_size=None, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + num_workers=num_workers, + batch_size=None, + drop_last=True, + persistent_workers=persistent_workers) step_list = [] loss_list = [] diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py index d2b7971a85d..066585edff2 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py @@ -23,6 +23,7 @@ from paddle.io import IterableDataset, BatchSampler, DataLoader, get_worker_info class RangeIterableDatasetSplit(IterableDataset): + def __init__(self, start, end): self.start = start self.end = end @@ -34,8 +35,8 @@ class RangeIterableDatasetSplit(IterableDataset): iter_end = self.end else: per_worker = int( - math.ceil((self.end - self.start) / float( - worker_info.num_workers))) + math.ceil( + (self.end - self.start) / float(worker_info.num_workers))) worker_id = worker_info.id iter_start = self.start + worker_id * per_worker iter_end = min(iter_start + per_worker, self.end) @@ -45,16 +46,16 @@ class RangeIterableDatasetSplit(IterableDataset): class TestDynamicDataLoaderIterSplit(unittest.TestCase): + def test_main(self): place = fluid.CPUPlace() with fluid.dygraph.guard(place): dataset = RangeIterableDatasetSplit(0, 10) - dataloader = DataLoader( - dataset, - places=place, - num_workers=2, - batch_size=1, - drop_last=True) + dataloader = DataLoader(dataset, + places=place, + num_workers=2, + batch_size=1, + drop_last=True) rets = [] for d in dataloader: @@ -64,6 +65,7 @@ class TestDynamicDataLoaderIterSplit(unittest.TestCase): class RangeIterableDataset(IterableDataset): + def __init__(self, start, end): self.start = start self.end = end @@ -74,6 +76,7 @@ class RangeIterableDataset(IterableDataset): class TestDynamicDataLoaderIterInitFuncSplit(unittest.TestCase): + def test_main(self): place = fluid.CPUPlace() with fluid.dygraph.guard(place): @@ -92,13 +95,12 @@ class TestDynamicDataLoaderIterInitFuncSplit(unittest.TestCase): dataset.start = start + worker_id * num_per_worker dataset.end = min(dataset.start + num_per_worker, end) - dataloader = DataLoader( - dataset, - places=place, - num_workers=1, - batch_size=1, - drop_last=True, - worker_init_fn=worker_spliter) + dataloader = DataLoader(dataset, + places=place, + num_workers=1, + batch_size=1, + drop_last=True, + worker_init_fn=worker_spliter) rets = [] for d in dataloader: diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py index 9e09c5e3a1d..2ef623c2189 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py @@ -33,6 +33,7 @@ CLASS_NUM = 10 class RandomDataset(IterableDataset): + def __init__(self, sample_num, class_num): self.sample_num = sample_num self.class_num = class_num @@ -54,8 +55,9 @@ def simple_fc_net_static(): with fluid.unique_name.guard(): with fluid.program_guard(main_prog, startup_prog): - image = fluid.data( - name='image', shape=[None, IMAGE_SIZE], dtype='float32') + image = fluid.data(name='image', + shape=[None, IMAGE_SIZE], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') hidden = image param_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant( @@ -75,8 +77,7 @@ def simple_fc_net_static(): param_attr=param_attr, bias_attr=bias_attr) loss = fluid.layers.reduce_mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) @@ -100,21 +101,21 @@ def prepare_places(with_data_parallel, with_cpu=False, with_gpu=True): class TestStaticDataLoader(unittest.TestCase): + def run_main(self, num_workers, places, persistent_workers): scope = fluid.Scope() with fluid.scope_guard(scope): startup_prog, main_prog, image, label, loss = simple_fc_net_static() dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - feed_list=[image, label], - places=places, - num_workers=num_workers, - batch_size=BATCH_SIZE, - return_list=False, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + feed_list=[image, label], + places=places, + num_workers=num_workers, + batch_size=BATCH_SIZE, + return_list=False, + drop_last=True, + persistent_workers=persistent_workers) # assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) exe = fluid.Executor(place=places[0]) @@ -122,8 +123,8 @@ class TestStaticDataLoader(unittest.TestCase): prog = fluid.CompiledProgram(main_prog) if len(places) > 1: - prog = prog.with_data_parallel( - loss_name=loss.name, places=places) + prog = prog.with_data_parallel(loss_name=loss.name, + places=places) step_list = [] loss_list = [] @@ -165,16 +166,16 @@ class TestStaticDataLoader(unittest.TestCase): print(self.__class__.__name__, p, num_workers, persistent_workers) sys.stdout.flush() - ret = self.run_main( - num_workers=num_workers, - places=p, - persistent_workers=persistent_workers) + ret = self.run_main(num_workers=num_workers, + places=p, + persistent_workers=persistent_workers) results.append(ret) assert results[0]['loss'].shape[0] * 2 == results[1][ 'loss'].shape[0] class RandomBatchedDataset(IterableDataset): + def __init__(self, sample_num, class_num): self.sample_num = sample_num // BATCH_SIZE self.class_num = class_num @@ -194,29 +195,29 @@ class RandomBatchedDataset(IterableDataset): class TestStaticDataLoaderWithBatchedDataset(TestStaticDataLoader): + def run_main(self, num_workers, places, persistent_workers): scope = fluid.Scope() with fluid.scope_guard(scope): startup_prog, main_prog, image, label, loss = simple_fc_net_static() dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - feed_list=[image, label], - places=places, - num_workers=num_workers, - batch_size=None, - return_list=False, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + feed_list=[image, label], + places=places, + num_workers=num_workers, + batch_size=None, + return_list=False, + drop_last=True, + persistent_workers=persistent_workers) exe = fluid.Executor(place=places[0]) exe.run(startup_prog) prog = fluid.CompiledProgram(main_prog) if len(places) > 1: - prog = prog.with_data_parallel( - loss_name=loss.name, places=places) + prog = prog.with_data_parallel(loss_name=loss.name, + places=places) step_list = [] loss_list = [] diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py index 9f73ee041e0..4da22817be2 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py @@ -33,6 +33,7 @@ CLASS_NUM = 10 class RandomDataset(Dataset): + def __init__(self, sample_num, class_num): self.sample_num = sample_num self.class_num = class_num @@ -55,8 +56,9 @@ def simple_fc_net_static(): with fluid.unique_name.guard(): with fluid.program_guard(main_prog, startup_prog): - image = fluid.data( - name='image', shape=[None, IMAGE_SIZE], dtype='float32') + image = fluid.data(name='image', + shape=[None, IMAGE_SIZE], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') hidden = image param_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant( @@ -76,8 +78,7 @@ def simple_fc_net_static(): param_attr=param_attr, bias_attr=bias_attr) loss = fluid.layers.reduce_mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) @@ -101,21 +102,21 @@ def prepare_places(with_data_parallel, with_cpu=False, with_gpu=True): class TestStaticDataLoader(unittest.TestCase): + def run_main(self, num_workers, places, persistent_workers, use_pe=True): scope = fluid.Scope() with fluid.scope_guard(scope): startup_prog, main_prog, image, label, loss = simple_fc_net_static() dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - feed_list=[image, label], - places=places, - num_workers=num_workers, - batch_size=BATCH_SIZE, - return_list=False, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + feed_list=[image, label], + places=places, + num_workers=num_workers, + batch_size=BATCH_SIZE, + return_list=False, + drop_last=True, + persistent_workers=persistent_workers) assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) exe = fluid.Executor(place=places[0]) @@ -124,8 +125,8 @@ class TestStaticDataLoader(unittest.TestCase): if use_pe: prog = fluid.CompiledProgram(main_prog) if len(places) > 1: - prog = prog.with_data_parallel( - loss_name=loss.name, places=places) + prog = prog.with_data_parallel(loss_name=loss.name, + places=places) else: prog = main_prog @@ -169,10 +170,9 @@ class TestStaticDataLoader(unittest.TestCase): print(self.__class__.__name__, p, num_workers, persistent_workers) sys.stdout.flush() - ret = self.run_main( - num_workers=num_workers, - places=p, - persistent_workers=persistent_workers) + ret = self.run_main(num_workers=num_workers, + places=p, + persistent_workers=persistent_workers) results.append(ret) diff = np.max( np.abs(results[0]['loss'] - results[1]['loss']) / @@ -181,20 +181,21 @@ class TestStaticDataLoader(unittest.TestCase): class TestStaticDataLoaderReturnList(unittest.TestCase): + def test_single_place(self): scope = fluid.Scope() - image = fluid.data( - name='image', shape=[None, IMAGE_SIZE], dtype='float32') + image = fluid.data(name='image', + shape=[None, IMAGE_SIZE], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') with fluid.scope_guard(scope): dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - feed_list=[image, label], - num_workers=0, - batch_size=BATCH_SIZE, - drop_last=True, - return_list=True) + dataloader = DataLoader(dataset, + feed_list=[image, label], + num_workers=0, + batch_size=BATCH_SIZE, + drop_last=True, + return_list=True) for d in dataloader: assert isinstance(d, list) @@ -204,19 +205,19 @@ class TestStaticDataLoaderReturnList(unittest.TestCase): def test_multi_place(self): scope = fluid.Scope() - image = fluid.data( - name='image', shape=[None, IMAGE_SIZE], dtype='float32') + image = fluid.data(name='image', + shape=[None, IMAGE_SIZE], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') with fluid.scope_guard(scope): dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - feed_list=[image, label], - num_workers=0, - batch_size=BATCH_SIZE, - places=[fluid.CPUPlace()] * 2, - drop_last=True, - return_list=True) + dataloader = DataLoader(dataset, + feed_list=[image, label], + num_workers=0, + batch_size=BATCH_SIZE, + places=[fluid.CPUPlace()] * 2, + drop_last=True, + return_list=True) for d in dataloader: assert isinstance(d, list) @@ -226,6 +227,7 @@ class TestStaticDataLoaderReturnList(unittest.TestCase): class RandomBatchedDataset(Dataset): + def __init__(self, sample_num, class_num): self.sample_num = int(sample_num / BATCH_SIZE) self.class_num = class_num @@ -247,21 +249,21 @@ class RandomBatchedDataset(Dataset): class TestStaticDataLoaderWithBatchedDataset(TestStaticDataLoader): + def run_main(self, num_workers, places, persistent_workers): scope = fluid.Scope() with fluid.scope_guard(scope): startup_prog, main_prog, image, label, loss = simple_fc_net_static() dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) - dataloader = DataLoader( - dataset, - feed_list=[image, label], - places=places, - num_workers=num_workers, - batch_size=None, - return_list=False, - drop_last=True, - persistent_workers=persistent_workers) + dataloader = DataLoader(dataset, + feed_list=[image, label], + places=places, + num_workers=num_workers, + batch_size=None, + return_list=False, + drop_last=True, + persistent_workers=persistent_workers) assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) exe = fluid.Executor(place=places[0]) @@ -269,8 +271,8 @@ class TestStaticDataLoaderWithBatchedDataset(TestStaticDataLoader): prog = fluid.CompiledProgram(main_prog) if len(places) > 1: - prog = prog.with_data_parallel( - loss_name=loss.name, places=places) + prog = prog.with_data_parallel(loss_name=loss.name, + places=places) step_list = [] loss_list = [] diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py index c3b53e81a66..825a6b8fa49 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py @@ -26,6 +26,7 @@ class ReaderException(Exception): class TestMultiprocessReaderExceptionWithQueueSuccess(unittest.TestCase): + def setUp(self): self.use_pipe = False self.raise_exception = False @@ -41,12 +42,12 @@ class TestMultiprocessReaderExceptionWithQueueSuccess(unittest.TestCase): batch_size = 4 def fake_reader(): + def __impl__(): for _ in range(sample_num): if not self.raise_exception: - yield list( - np.random.uniform( - low=-1, high=1, size=[10])), + yield list(np.random.uniform(low=-1, high=1, + size=[10])), else: raise ValueError() @@ -54,8 +55,9 @@ class TestMultiprocessReaderExceptionWithQueueSuccess(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): image = fluid.data(name='image', dtype='float32', shape=[None, 10]) - reader = fluid.io.DataLoader.from_generator( - feed_list=[image], capacity=2, iterable=iterable) + reader = fluid.io.DataLoader.from_generator(feed_list=[image], + capacity=2, + iterable=iterable) image_p_1 = image + 1 @@ -63,15 +65,13 @@ class TestMultiprocessReaderExceptionWithQueueSuccess(unittest.TestCase): [fake_reader(), fake_reader()], use_pipe=self.use_pipe) if isinstance(place, fluid.CUDAPlace): - reader.set_sample_generator( - decorated_reader, - batch_size=batch_size, - places=fluid.cuda_places(0)) + reader.set_sample_generator(decorated_reader, + batch_size=batch_size, + places=fluid.cuda_places(0)) else: - reader.set_sample_generator( - decorated_reader, - batch_size=batch_size, - places=fluid.cpu_places(1)) + reader.set_sample_generator(decorated_reader, + batch_size=batch_size, + places=fluid.cpu_places(1)) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -120,6 +120,7 @@ class TestMultiprocessReaderExceptionWithQueueSuccess(unittest.TestCase): class TestMultiprocessReaderExceptionWithQueueFailed( TestMultiprocessReaderExceptionWithQueueSuccess): + def setUp(self): self.use_pipe = False self.raise_exception = True @@ -127,6 +128,7 @@ class TestMultiprocessReaderExceptionWithQueueFailed( class TestMultiprocessReaderExceptionWithPipeSuccess( TestMultiprocessReaderExceptionWithQueueSuccess): + def setUp(self): self.use_pipe = True self.raise_exception = False @@ -134,6 +136,7 @@ class TestMultiprocessReaderExceptionWithPipeSuccess( class TestMultiprocessReaderExceptionWithPipeFailed( TestMultiprocessReaderExceptionWithQueueSuccess): + def setUp(self): self.use_pipe = True self.raise_exception = True diff --git a/python/paddle/fluid/tests/unittests/test_mv_op.py b/python/paddle/fluid/tests/unittests/test_mv_op.py index 09ec702671b..086ed5e693b 100644 --- a/python/paddle/fluid/tests/unittests/test_mv_op.py +++ b/python/paddle/fluid/tests/unittests/test_mv_op.py @@ -25,6 +25,7 @@ from op_test import OpTest class TestMVOp(OpTest): + def setUp(self): self.op_type = "mv" self.python_api = paddle.mv @@ -44,6 +45,7 @@ class TestMVOp(OpTest): class TestMVAPI(unittest.TestCase): + def test_dygraph_api_out(self): paddle.disable_static() @@ -71,10 +73,12 @@ class TestMVAPI(unittest.TestCase): self.input_vec = np.random.rand(100).astype("float64") with program_guard(train_program, startup_program): - data_x = paddle.static.data( - "x", shape=[5, 100], dtype="float64") - data_vec = paddle.static.data( - "vec", shape=[100], dtype="float64") + data_x = paddle.static.data("x", + shape=[5, 100], + dtype="float64") + data_vec = paddle.static.data("vec", + shape=[100], + dtype="float64") data_x.stop_gradient = x_stop_gradient data_vec.stop_gradient = vec_stop_gradient @@ -83,16 +87,19 @@ class TestMVAPI(unittest.TestCase): self.place = paddle.CPUPlace() exe = paddle.static.Executor(self.place) - res, = exe.run( - feed={"x": self.input_x, - "vec": self.input_vec}, - fetch_list=[result_vec]) + res, = exe.run(feed={ + "x": self.input_x, + "vec": self.input_vec + }, + fetch_list=[result_vec]) z_expected = np.array(np.dot(self.input_x, self.input_vec)) self.assertTrue(np.allclose(res, z_expected)) class TestMVError(unittest.TestCase): + def test_input(self): + def test_shape(): paddle.enable_static() @@ -100,8 +107,9 @@ class TestMVError(unittest.TestCase): self.input_vec = np.random.rand(100).astype("float64") data_x = paddle.static.data("x", shape=[5, 100], dtype="float64") - data_vec = paddle.static.data( - "vec", shape=[100, 2], dtype="float64") + data_vec = paddle.static.data("vec", + shape=[100, 2], + dtype="float64") result_vec = paddle.mv(data_x, data_vec) self.assertRaises(ValueError, test_shape) diff --git a/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py b/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py index d8d10816bf9..6994bf30523 100644 --- a/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py +++ b/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py @@ -23,6 +23,7 @@ if fluid.is_compiled_with_cuda(): class TestBase(unittest.TestCase): + def setUp(self): if fluid.is_compiled_with_cuda(): self._limit = fluid.core.globals()['FLAGS_gpu_memory_limit_mb'] @@ -35,8 +36,7 @@ class TestBase(unittest.TestCase): place = fluid.CUDAPlace(0) t = fluid.LoDTensor() - t.set(np.ndarray( - [int(self._limit / 2), other_dim], dtype='float32'), + t.set(np.ndarray([int(self._limit / 2), other_dim], dtype='float32'), place) del t diff --git a/python/paddle/fluid/tests/unittests/test_name_scope.py b/python/paddle/fluid/tests/unittests/test_name_scope.py index a1f0d56d0ff..92d3f04fd2c 100644 --- a/python/paddle/fluid/tests/unittests/test_name_scope.py +++ b/python/paddle/fluid/tests/unittests/test_name_scope.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestNameScope(unittest.TestCase): + def test_name_scope(self): with fluid.name_scope("s1"): a = fluid.layers.data(name='data', shape=[1], dtype='int32') diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py index 84559048a2b..6eb951b8ad1 100644 --- a/python/paddle/fluid/tests/unittests/test_nan_inf.py +++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py @@ -25,6 +25,7 @@ paddle.enable_static() class TestNanInf(unittest.TestCase): + def setUp(self): self._python_interp = sys.executable if os.getenv('WITH_COVERAGE', 'OFF') == 'ON': @@ -35,11 +36,10 @@ class TestNanInf(unittest.TestCase): def check_nan_inf(self): cmd = self._python_interp - proc = subprocess.Popen( - cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=self.env) + proc = subprocess.Popen(cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.env) out, err = proc.communicate() returncode = proc.returncode @@ -51,8 +51,8 @@ class TestNanInf(unittest.TestCase): if paddle.fluid.core.is_compiled_with_cuda(): assert (out + err).find('find nan or inf==='.encode()) != -1 else: - assert (out + err - ).find('There are `nan` or `inf` in tensor'.encode()) != -1 + assert (out + err).find( + 'There are `nan` or `inf` in tensor'.encode()) != -1 def test_nan_inf_in_static_mode(self): self._python_interp += " check_nan_inf_base.py" @@ -64,6 +64,7 @@ class TestNanInf(unittest.TestCase): class TestNanInfEnv(TestNanInf): + def setUp(self): super(TestNanInfEnv, self).setUp() # windows python have some bug with env, so need use str to pass ci diff --git a/python/paddle/fluid/tests/unittests/test_nanmean_api.py b/python/paddle/fluid/tests/unittests/test_nanmean_api.py index 90a9a130899..7f6306f0ae5 100644 --- a/python/paddle/fluid/tests/unittests/test_nanmean_api.py +++ b/python/paddle/fluid/tests/unittests/test_nanmean_api.py @@ -31,8 +31,8 @@ class TestNanmeanAPI(unittest.TestCase): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) self.x[0, :, :, :] = np.nan - self.x_grad = np.array([[np.nan, np.nan, 3.], - [0., np.nan, 2.]]).astype(np.float32) + self.x_grad = np.array([[np.nan, np.nan, 3.], [0., np.nan, + 2.]]).astype(np.float32) self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ else paddle.CPUPlace() @@ -72,9 +72,8 @@ class TestNanmeanAPI(unittest.TestCase): out_np[nan_mask] = 0 self.assertEqual(np.allclose(out_np, out_ref, rtol=1e-04), True) else: - self.assertEqual( - np.allclose( - out.numpy(), out_ref, rtol=1e-04), True) + self.assertEqual(np.allclose(out.numpy(), out_ref, rtol=1e-04), + True) test_case(self.x) test_case(self.x, []) diff --git a/python/paddle/fluid/tests/unittests/test_nanmedian.py b/python/paddle/fluid/tests/unittests/test_nanmedian.py index 2e1f13a8c7d..74c0c635dd3 100644 --- a/python/paddle/fluid/tests/unittests/test_nanmedian.py +++ b/python/paddle/fluid/tests/unittests/test_nanmedian.py @@ -23,6 +23,7 @@ np.random.seed(102) class TestNanmedian(unittest.TestCase): + def setUp(self): single_axis_shape = (120) multi_axis_shape = (2, 3, 4, 5) @@ -32,8 +33,10 @@ class TestNanmedian(unittest.TestCase): np.random.uniform(-1, 1, single_axis_shape).astype(np.float32), "multi_axis_normal": np.random.uniform(-1, 1, multi_axis_shape).astype(np.float32), - "single_axis_all_nan": np.full(single_axis_shape, np.nan), - "multi_axis_all_nan": np.full(multi_axis_shape, np.nan), + "single_axis_all_nan": + np.full(single_axis_shape, np.nan), + "multi_axis_all_nan": + np.full(multi_axis_shape, np.nan), } single_partial_nan = self.fake_data["single_axis_normal"].copy() @@ -108,15 +111,15 @@ class TestNanmedian(unittest.TestCase): continue np_res = np.nanmedian(data, keepdims=keep_dim) - pd_res = paddle.nanmedian( - paddle.to_tensor(data), keepdim=keep_dim) + pd_res = paddle.nanmedian(paddle.to_tensor(data), + keepdim=keep_dim) self.assertTrue( - np.allclose( - np_res, pd_res.numpy(), equal_nan=True)) + np.allclose(np_res, pd_res.numpy(), equal_nan=True)) def test_axis_case(data, axis): - pd_res = paddle.nanmedian( - paddle.to_tensor(data), axis=axis, keepdim=False) + pd_res = paddle.nanmedian(paddle.to_tensor(data), + axis=axis, + keepdim=False) axis = clean_axis_numpy(axis, len(data.shape)) np_res = np.nanmedian(data, axis=axis, keepdims=False) self.assertTrue(np.allclose(np_res, pd_res.numpy(), equal_nan=True)) diff --git a/python/paddle/fluid/tests/unittests/test_nansum_api.py b/python/paddle/fluid/tests/unittests/test_nansum_api.py index a9fc285d2d9..34c34e9d8a6 100644 --- a/python/paddle/fluid/tests/unittests/test_nansum_api.py +++ b/python/paddle/fluid/tests/unittests/test_nansum_api.py @@ -23,6 +23,7 @@ from paddle.fluid import Program, program_guard class API_Test_Nansum(unittest.TestCase): + def test_static_graph(self): paddle.enable_static() startup_program = fluid.Program() @@ -39,8 +40,9 @@ class API_Test_Nansum(unittest.TestCase): exe = fluid.Executor(place) exe.run(startup_program) - x = np.array([[float('nan'), 3, 5, 9], - [1, 2, float('-nan'), 7]]).astype(np.float32) + x = np.array([[float('nan'), 3, 5, 9], [1, 2, + float('-nan'), + 7]]).astype(np.float32) res = exe.run(train_program, feed={'input': x}, fetch_list=[out1, out2, out3, out4]) @@ -54,18 +56,14 @@ class API_Test_Nansum(unittest.TestCase): out3_ref = np.array([17, 10]).astype(np.float32) out4_ref = np.array([[17], [10]]).astype(np.float32) - self.assertTrue( - (out1_np == out1_ref).all(), - msg='nansum output is wrong, out =' + str(out1_np)) - self.assertTrue( - (out2_np == out2_ref).all(), - msg='nansum output is wrong, out =' + str(out2_np)) - self.assertTrue( - (out3_np == out3_ref).all(), - msg='nansum output is wrong, out =' + str(out3_np)) - self.assertTrue( - (out4_np == out4_ref).all(), - msg='nansum output is wrong, out =' + str(out4_np)) + self.assertTrue((out1_np == out1_ref).all(), + msg='nansum output is wrong, out =' + str(out1_np)) + self.assertTrue((out2_np == out2_ref).all(), + msg='nansum output is wrong, out =' + str(out2_np)) + self.assertTrue((out3_np == out3_ref).all(), + msg='nansum output is wrong, out =' + str(out3_np)) + self.assertTrue((out4_np == out4_ref).all(), + msg='nansum output is wrong, out =' + str(out4_np)) def test_error_api(self): paddle.enable_static() @@ -85,8 +83,8 @@ class API_Test_Nansum(unittest.TestCase): self.assertRaises(TypeError, run2) def test_dygraph(self): - x = np.array([[float('nan'), 3, 5, 9], - [1, 2, float('-nan'), 7]]).astype(np.float32) + x = np.array([[float('nan'), 3, 5, 9], [1, 2, float('-nan'), + 7]]).astype(np.float32) with fluid.dygraph.guard(): inputs = fluid.dygraph.to_variable(x) out = paddle.nansum(inputs) diff --git a/python/paddle/fluid/tests/unittests/test_nce.py b/python/paddle/fluid/tests/unittests/test_nce.py index 6c2fc4d8429..bbeec5ce621 100644 --- a/python/paddle/fluid/tests/unittests/test_nce.py +++ b/python/paddle/fluid/tests/unittests/test_nce.py @@ -56,13 +56,15 @@ def nce(input, weight, bias, sample_weight, labels, num_classes, o = sample_out[i] cost = -np.log(o / (o + b)) if samples[i][2] else -np.log(b / (o + b)) out[samples[i][0]] += cost * samples[i][3] - return (out[:, np.newaxis], np.array(sample_out).reshape( - batch_size, num_sample_class + num_true_class), + return (out[:, np.newaxis], + np.array(sample_out).reshape(batch_size, + num_sample_class + num_true_class), np.array(sample_labels).reshape(batch_size, num_sample_class + num_true_class)) class TestNCE(OpTest): + def generate_data(self, dim, batch_size, num_classes, num_true_class, num_neg_samples, is_sparse): input = np.random.randn(batch_size, dim).astype(np.float32) @@ -118,11 +120,13 @@ class TestNCE(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ["Input", "Weight", "Bias"], "Cost", max_relative_error=0.02) + self.check_grad(["Input", "Weight", "Bias"], + "Cost", + max_relative_error=0.02) class TestNCECase1Tensor(TestNCE): + def set_data(self): self.generate_data(10, 20, 100, 2, 5, False) @@ -137,6 +141,7 @@ class TestNCETensorIsTest(TestNCE): class TestNCECase1SelectedRows(unittest.TestCase): + def setUp(self): self.base_lr = 0.0001 self.batch_size = 8 @@ -238,73 +243,89 @@ class TestNCECase1SelectedRows(unittest.TestCase): class TestNCE_OpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - input1 = fluid.create_lod_tensor( - np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()) - label1 = fluid.layers.data( - name='label1', shape=[-1, 4], dtype="int64") + input1 = fluid.create_lod_tensor(np.array([0.0, 3.0, 2.0, 4.0]), + [[1, 1, 2]], fluid.CPUPlace()) + label1 = fluid.layers.data(name='label1', + shape=[-1, 4], + dtype="int64") # the input(input) of nce layer must be Variable. self.assertRaises(TypeError, fluid.layers.nce, input1, label1, 5) - input2 = fluid.layers.data( - name='input2', shape=[-1, 4], dtype="float32") - label2 = fluid.create_lod_tensor( - np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()) + input2 = fluid.layers.data(name='input2', + shape=[-1, 4], + dtype="float32") + label2 = fluid.create_lod_tensor(np.array([0.0, 3.0, 2.0, 4.0]), + [[1, 1, 2]], fluid.CPUPlace()) # the input(label) of nce layer must be Variable. self.assertRaises(TypeError, fluid.layers.nce, input2, label2, 5) - input3 = fluid.layers.data( - name='input3', shape=[-1, 4], dtype="float16") - label3 = fluid.layers.data( - name='label3', shape=[-1, 1], dtype="int64") + input3 = fluid.layers.data(name='input3', + shape=[-1, 4], + dtype="float16") + label3 = fluid.layers.data(name='label3', + shape=[-1, 1], + dtype="int64") # the data type of input(input) must be float32 or float64. self.assertRaises(TypeError, fluid.layers.nce, input3, label3, 5) - input4 = fluid.layers.data( - name='input4', shape=[-1, 4], dtype="float32") - label4 = fluid.layers.data( - name='label4', shape=[-1, 1], dtype="int32") + input4 = fluid.layers.data(name='input4', + shape=[-1, 4], + dtype="float32") + label4 = fluid.layers.data(name='label4', + shape=[-1, 1], + dtype="int32") # the data type of input(label) must be int64. self.assertRaises(TypeError, fluid.layers.nce, input4, label4, 5) class TestDygraphNCE_OpError(unittest.TestCase): + def test_NCE_errors(self): with program_guard(Program(), Program()): nce = fluid.NCE(20, 5) - input1 = fluid.create_lod_tensor( - np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()) - label1 = fluid.layers.data( - name='label1', shape=[-1, 4], dtype="int64") + input1 = fluid.create_lod_tensor(np.array([0.0, 3.0, 2.0, 4.0]), + [[1, 1, 2]], fluid.CPUPlace()) + label1 = fluid.layers.data(name='label1', + shape=[-1, 4], + dtype="int64") # the input(input) of NCE layer must be Variable. self.assertRaises(TypeError, nce, input1, label1) - input2 = fluid.layers.data( - name='input2', shape=[-1, 4], dtype="float32") - label2 = fluid.create_lod_tensor( - np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()) + input2 = fluid.layers.data(name='input2', + shape=[-1, 4], + dtype="float32") + label2 = fluid.create_lod_tensor(np.array([0.0, 3.0, 2.0, 4.0]), + [[1, 1, 2]], fluid.CPUPlace()) # the input(label) of NCE layer must be Variable. self.assertRaises(TypeError, nce, input2, label2) - input3 = fluid.layers.data( - name='input3', shape=[-1, 4], dtype="float16") - label3 = fluid.layers.data( - name='label3', shape=[-1, 1], dtype="int64") + input3 = fluid.layers.data(name='input3', + shape=[-1, 4], + dtype="float16") + label3 = fluid.layers.data(name='label3', + shape=[-1, 1], + dtype="int64") # the data type of input(input) must be float32 or float64. self.assertRaises(TypeError, nce, input3, label3) - input4 = fluid.layers.data( - name='input4', shape=[-1, 4], dtype="float32") - label4 = fluid.layers.data( - name='label4', shape=[-1, 1], dtype="int32") + input4 = fluid.layers.data(name='input4', + shape=[-1, 4], + dtype="float32") + label4 = fluid.layers.data(name='label4', + shape=[-1, 1], + dtype="int32") # the data type of input(label) must be int64. self.assertRaises(TypeError, nce, input4, label4) - input5 = fluid.layers.data( - name='input5', shape=[-1, 4], dtype="float32") - label5 = fluid.layers.data( - name='label5', shape=[-1, 1], dtype="int64") + input5 = fluid.layers.data(name='input5', + shape=[-1, 4], + dtype="float32") + label5 = fluid.layers.data(name='label5', + shape=[-1, 1], + dtype="int64") sample_weight = fluid.create_lod_tensor( np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()) # the sample_weight of nce must be Variable or None. diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py index 5df085d4feb..3bcafe53cb8 100755 --- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py @@ -73,6 +73,7 @@ def nearest_neighbor_interp_np(X, class TestNearestInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -96,9 +97,10 @@ class TestNearestInterpOp(OpTest): out_h = self.out_h out_w = self.out_w - output_np = nearest_neighbor_interp_np( - input_np, out_h, out_w, self.out_size, self.actual_shape, - self.align_corners, self.data_layout) + output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, + self.out_size, self.actual_shape, + self.align_corners, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -120,8 +122,10 @@ class TestNearestInterpOp(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'nearest' @@ -134,6 +138,7 @@ class TestNearestInterpOp(OpTest): class TestNearestNeighborInterpCase1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -144,6 +149,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp): class TestNearestNeighborInterpCase2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -154,6 +160,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp): class TestNearestNeighborInterpCase3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -164,6 +171,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp): class TestNearestNeighborInterpCase4(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -175,6 +183,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp): class TestNearestNeighborInterpCase5(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -186,6 +195,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp): class TestNearestNeighborInterpCase6(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -197,6 +207,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp): class TestNearestNeighborInterpSame(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -207,6 +218,7 @@ class TestNearestNeighborInterpSame(TestNearestInterpOp): class TestNearestNeighborInterpActualShape(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -218,6 +230,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 4, 4, 5] @@ -230,14 +243,15 @@ class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): class TestNearestInterpOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None self.init_test_case() self.op_type = "nearest_interp" self.check_eager = True - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale > 0: out_h = int(self.input_shape[2] * self.scale) @@ -263,8 +277,9 @@ class TestNearestInterpOpUint8(OpTest): self.outputs = {'Out': output_np} def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_eager=self.check_eager) + self.check_output_with_place(place=core.CPUPlace(), + atol=1, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'nearest' @@ -276,6 +291,7 @@ class TestNearestInterpOpUint8(OpTest): class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -286,6 +302,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -297,11 +314,13 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): class TestNearestInterpWithoutCorners(TestNearestInterpOp): + def set_align_corners(self): self.align_corners = False class TestNearestNeighborInterpScale1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -313,6 +332,7 @@ class TestNearestNeighborInterpScale1(TestNearestInterpOp): class TestNearestNeighborInterpScale2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 5, 7] @@ -324,6 +344,7 @@ class TestNearestNeighborInterpScale2(TestNearestInterpOp): class TestNearestNeighborInterpScale3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -335,6 +356,7 @@ class TestNearestNeighborInterpScale3(TestNearestInterpOp): class TestNearestInterpOp_attr_tensor(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -385,8 +407,10 @@ class TestNearestInterpOp_attr_tensor(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'nearest' @@ -400,6 +424,7 @@ class TestNearestInterpOp_attr_tensor(OpTest): # out_size is a tensor list class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -412,6 +437,7 @@ class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): # out_size is a 1-D tensor class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -425,6 +451,7 @@ class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): # scale is a 1-D tensor class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -437,6 +464,7 @@ class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): class TestNearestAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32") @@ -444,15 +472,18 @@ class TestNearestAPI(unittest.TestCase): dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") - out1 = fluid.layers.resize_nearest( - y, out_shape=[12, 12], data_format='NHWC') + out1 = fluid.layers.resize_nearest(y, + out_shape=[12, 12], + data_format='NHWC') out2 = fluid.layers.resize_nearest(x, out_shape=[12, dim]) out3 = fluid.layers.resize_nearest(x, out_shape=shape_tensor) - out4 = fluid.layers.resize_nearest( - x, out_shape=[4, 4], actual_shape=actual_size) + out4 = fluid.layers.resize_nearest(x, + out_shape=[4, 4], + actual_shape=actual_size) out5 = fluid.layers.resize_nearest(x, scale=scale_tensor) x_data = np.random.random((2, 3, 6, 6)).astype("float32") @@ -479,8 +510,10 @@ class TestNearestAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = nearest_neighbor_interp_np( - x_data, out_h=12, out_w=12, align_corners=True) + expect_res = nearest_neighbor_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=True) self.assertTrue( np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 1)))) for i in range(len(results) - 1): @@ -488,13 +521,15 @@ class TestNearestAPI(unittest.TestCase): class TestNearestInterpException(unittest.TestCase): + def test_exception(self): input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32") def attr_data_format(): # for 4-D input, data_format can only be NCHW or NHWC - out = fluid.layers.resize_nearest( - input, out_shape=[4, 8], data_format='NDHWC') + out = fluid.layers.resize_nearest(input, + out_shape=[4, 8], + data_format='NDHWC') def attr_scale_type(): out = fluid.layers.resize_nearest(input, scale='scale') diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py index e2ac98f7c9f..322db889d23 100755 --- a/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py @@ -158,6 +158,7 @@ def nearest_neighbor_interp3d_np(X, class TestNearestInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -215,10 +216,12 @@ class TestNearestInterpOp(OpTest): input_np, out_h, out_w, scale_h, scale_w, self.out_size, self.actual_shape, self.align_corners, self.data_layout) elif len(self.input_shape) == 5: - output_np = nearest_neighbor_interp3d_np( - input_np, out_d, out_h, out_w, scale_d, scale_h, scale_w, - self.out_size, self.actual_shape, self.align_corners, - self.data_layout) + output_np = nearest_neighbor_interp3d_np(input_np, out_d, out_h, + out_w, scale_d, scale_h, + scale_w, self.out_size, + self.actual_shape, + self.align_corners, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -267,6 +270,7 @@ class TestNearestInterpOp(OpTest): class TestNearestNeighborInterpCase1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 1, 7, 8] @@ -278,6 +282,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp): class TestNearestNeighborInterpCase2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -288,6 +293,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp): class TestNearestNeighborInterpCase3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -298,6 +304,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp): class TestNearestNeighborInterpCase4(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -309,6 +316,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp): class TestNearestNeighborInterpCase5(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -320,6 +328,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp): class TestNearestNeighborInterpCase6(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [1, 1, 32, 64] @@ -331,6 +340,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp): class TestNearestNeighborInterpSame(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -341,6 +351,7 @@ class TestNearestNeighborInterpSame(TestNearestInterpOp): class TestNearestNeighborInterpActualShape(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -352,6 +363,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 4, 4, 5] @@ -364,13 +376,14 @@ class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): class TestNearestInterpOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None self.init_test_case() self.op_type = "nearest_interp_v2" - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale: if isinstance(self.scale, float) or isinstance(self.scale, int): @@ -421,6 +434,7 @@ class TestNearestInterpOpUint8(OpTest): class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [2, 3, 32, 64] @@ -431,6 +445,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [4, 1, 7, 8] @@ -442,11 +457,13 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): class TestNearestInterpWithoutCorners(TestNearestInterpOp): + def set_align_corners(self): self.align_corners = False class TestNearestNeighborInterpScale1(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -458,6 +475,7 @@ class TestNearestNeighborInterpScale1(TestNearestInterpOp): class TestNearestNeighborInterpScale2(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 5, 7] @@ -469,6 +487,7 @@ class TestNearestNeighborInterpScale2(TestNearestInterpOp): class TestNearestNeighborInterpScale3(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 7, 5] @@ -480,6 +499,7 @@ class TestNearestNeighborInterpScale3(TestNearestInterpOp): class TestNearestNeighbor3DInterp(TestNearestInterpOp): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 4, 7, 5] @@ -492,6 +512,7 @@ class TestNearestNeighbor3DInterp(TestNearestInterpOp): class TestNearestInterpOp_attr_tensor(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -565,6 +586,7 @@ class TestNearestInterpOp_attr_tensor(OpTest): # out_size is a tensor list class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 3, 9, 6] @@ -577,6 +599,7 @@ class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): # out_size is a 1-D tensor class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -590,6 +613,7 @@ class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): # scale is a 1-D tensor class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'nearest' self.input_shape = [3, 2, 32, 16] @@ -602,6 +626,7 @@ class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): class TestNearestAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32") @@ -609,15 +634,18 @@ class TestNearestAPI(unittest.TestCase): dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") - out1 = fluid.layers.resize_nearest( - y, out_shape=[12, 12], data_format='NHWC') + out1 = fluid.layers.resize_nearest(y, + out_shape=[12, 12], + data_format='NHWC') out2 = fluid.layers.resize_nearest(x, out_shape=[12, dim]) out3 = fluid.layers.resize_nearest(x, out_shape=shape_tensor) - out4 = fluid.layers.resize_nearest( - x, out_shape=[4, 4], actual_shape=actual_size) + out4 = fluid.layers.resize_nearest(x, + out_shape=[4, 4], + actual_shape=actual_size) out5 = fluid.layers.resize_nearest(x, scale=scale_tensor) x_data = np.random.random((2, 3, 6, 6)).astype("float32") @@ -644,8 +672,10 @@ class TestNearestAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = nearest_neighbor_interp_np( - x_data, out_h=12, out_w=12, align_corners=True) + expect_res = nearest_neighbor_interp_np(x_data, + out_h=12, + out_w=12, + align_corners=True) self.assertTrue( np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 1)))) for i in range(len(results) - 1): @@ -653,6 +683,7 @@ class TestNearestAPI(unittest.TestCase): class TestNearestInterpOpAPI_dy(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_cuda(): @@ -664,17 +695,19 @@ class TestNearestInterpOpAPI_dy(unittest.TestCase): scale_np = np.array([2, 2]).astype("int64") input_x = paddle.to_tensor(input_data) scale = paddle.to_tensor(scale_np) - expect_res = nearest_neighbor_interp_np( - input_data, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, - scale_factor=scale, - mode="nearest", - align_corners=False) + expect_res = nearest_neighbor_interp_np(input_data, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + scale_factor=scale, + mode="nearest", + align_corners=False) self.assertTrue(np.allclose(out.numpy(), expect_res)) class TestNearestInterp3DOpAPI_dy(unittest.TestCase): + def test_case(self): import paddle if core.is_compiled_with_cuda(): @@ -686,26 +719,30 @@ class TestNearestInterp3DOpAPI_dy(unittest.TestCase): scale_np = np.array([2, 2, 2]).astype("int64") input_x = paddle.to_tensor(input_data) scale = paddle.to_tensor(scale_np) - expect_res = nearest_neighbor_interp3d_np( - input_data, out_d=12, out_h=12, out_w=12, align_corners=False) - out = interpolate( - x=input_x, - scale_factor=scale, - mode="nearest", - align_corners=False, - data_format="NCDHW") + expect_res = nearest_neighbor_interp3d_np(input_data, + out_d=12, + out_h=12, + out_w=12, + align_corners=False) + out = interpolate(x=input_x, + scale_factor=scale, + mode="nearest", + align_corners=False, + data_format="NCDHW") self.assertTrue(np.allclose(out.numpy(), expect_res)) class TestNearestInterpException(unittest.TestCase): + def test_exception(self): import paddle input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32") def attr_data_format(): # for 4-D input, data_format can only be NCHW or NHWC - out = fluid.layers.resize_nearest( - input, out_shape=[4, 8], data_format='NDHWC') + out = fluid.layers.resize_nearest(input, + out_shape=[4, 8], + data_format='NDHWC') def attr_scale_type(): out = fluid.layers.resize_nearest(input, scale='scale') @@ -719,8 +756,9 @@ class TestNearestInterpException(unittest.TestCase): def mode_error(): x = paddle.randn([1, 3]) - out = paddle.nn.functional.interpolate( - x, scale_factor='scale', mode="BILINEAR") + out = paddle.nn.functional.interpolate(x, + scale_factor='scale', + mode="BILINEAR") self.assertRaises(ValueError, attr_data_format) self.assertRaises(TypeError, attr_scale_type) diff --git a/python/paddle/fluid/tests/unittests/test_neg_op.py b/python/paddle/fluid/tests/unittests/test_neg_op.py index e7b16bde023..473d2b77bae 100644 --- a/python/paddle/fluid/tests/unittests/test_neg_op.py +++ b/python/paddle/fluid/tests/unittests/test_neg_op.py @@ -18,6 +18,7 @@ import paddle class TestNegOp(unittest.TestCase): + def setUp(self): self.init_dtype_type() self.input = (np.random.random((32, 8)) * 100).astype(self.dtype) @@ -63,26 +64,31 @@ class TestNegOp(unittest.TestCase): class TestNegOpFp32(TestNegOp): + def init_dtype_type(self): self.dtype = np.float32 class TestNegOpInt64(TestNegOp): + def init_dtype_type(self): self.dtype = np.int64 class TestNegOpInt32(TestNegOp): + def init_dtype_type(self): self.dtype = np.int32 class TestNegOpInt16(TestNegOp): + def init_dtype_type(self): self.dtype = np.int16 class TestNegOpInt8(TestNegOp): + def init_dtype_type(self): self.dtype = np.int8 diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py index 60dcf195daf..7f230164d60 100644 --- a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py +++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py @@ -26,6 +26,7 @@ BATCH_SIZE = 20 class TestNetWithDtype(unittest.TestCase): + def setUp(self): self.dtype = "float64" self.init_dtype() @@ -43,8 +44,8 @@ class TestNetWithDtype(unittest.TestCase): sgd_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=BATCH_SIZE) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(startup) diff --git a/python/paddle/fluid/tests/unittests/test_new_group_api.py b/python/paddle/fluid/tests/unittests/test_new_group_api.py index b9b80d3b431..af8df48ff23 100644 --- a/python/paddle/fluid/tests/unittests/test_new_group_api.py +++ b/python/paddle/fluid/tests/unittests/test_new_group_api.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestCollectiveAllreduceAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py index 53ade0dfb79..0143bdb5324 100755 --- a/python/paddle/fluid/tests/unittests/test_newprofiler.py +++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py @@ -27,15 +27,18 @@ from paddle.io import Dataset, DataLoader class TestProfiler(unittest.TestCase): + def test_profiler(self): + def my_trace_back(prof): profiler.export_chrome_tracing('./test_profiler_chrometracing/')( prof) profiler.export_protobuf('./test_profiler_pb/')(prof) x_value = np.random.randn(2, 3, 3) - x = paddle.to_tensor( - x_value, stop_gradient=False, place=paddle.CPUPlace()) + x = paddle.to_tensor(x_value, + stop_gradient=False, + place=paddle.CPUPlace()) y = x / 2.0 ones_like_y = paddle.ones_like(y) with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], ) as prof: @@ -45,26 +48,27 @@ class TestProfiler(unittest.TestCase): with profiler.RecordEvent(name='test'): y = x / 2.0 - with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], - scheduler=(1, 2)) as prof: + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], + scheduler=(1, 2)) as prof: self.assertEqual(utils._is_profiler_used, True) with profiler.RecordEvent(name='test'): y = x / 2.0 prof = None - with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], - scheduler=profiler.make_scheduler( - closed=0, ready=1, record=1, repeat=1), - on_trace_ready=my_trace_back) as prof: + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], + scheduler=profiler.make_scheduler(closed=0, + ready=1, + record=1, + repeat=1), + on_trace_ready=my_trace_back) as prof: y = x / 2.0 prof = None - with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], - scheduler=profiler.make_scheduler( - closed=0, ready=0, record=2, repeat=1), - on_trace_ready=my_trace_back) as prof: + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], + scheduler=profiler.make_scheduler(closed=0, + ready=0, + record=2, + repeat=1), + on_trace_ready=my_trace_back) as prof: for i in range(3): y = x / 2.0 prof.step() @@ -106,26 +110,26 @@ class TestProfiler(unittest.TestCase): y = x / 2.0 prof.step() prof = None - with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], - scheduler=my_sheduler, - on_trace_ready=my_trace_back) as prof: + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], + scheduler=my_sheduler, + on_trace_ready=my_trace_back) as prof: for i in range(5): y = x / 2.0 prof.step() prof = None - with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], - scheduler=my_sheduler1) as prof: + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], + scheduler=my_sheduler1) as prof: for i in range(5): y = x / 2.0 prof.step() prof = None - with profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], - scheduler=profiler.make_scheduler( - closed=1, ready=1, record=2, repeat=1, skip_first=1), - on_trace_ready=my_trace_back) as prof: + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], + scheduler=profiler.make_scheduler(closed=1, + ready=1, + record=2, + repeat=1, + skip_first=1), + on_trace_ready=my_trace_back) as prof: for i in range(5): y = x / 2.0 paddle.grad(outputs=y, inputs=[x], grad_outputs=ones_like_y) @@ -139,8 +143,11 @@ class TestProfiler(unittest.TestCase): simple_net = SimpleNet() opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=simple_net.parameters()) - loader = DataLoader( - dataset, batch_size=4, shuffle=True, drop_last=True, num_workers=2) + loader = DataLoader(dataset, + batch_size=4, + shuffle=True, + drop_last=True, + num_workers=2) prof = profiler.Profiler(on_trace_ready=lambda prof: None) prof.start() for i, (image, label) in enumerate(loader()): @@ -157,8 +164,8 @@ class TestProfiler(unittest.TestCase): dataset = RandomDataset(10 * 4) simple_net = SimpleNet() loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True) - opt = paddle.optimizer.Adam( - learning_rate=1e-3, parameters=simple_net.parameters()) + opt = paddle.optimizer.Adam(learning_rate=1e-3, + parameters=simple_net.parameters()) prof = profiler.Profiler(on_trace_ready=lambda prof: None) prof.start() for i, (image, label) in enumerate(loader()): @@ -173,16 +180,19 @@ class TestProfiler(unittest.TestCase): class TestNvprof(unittest.TestCase): + def test_nvprof(self): for i in range(10): paddle.fluid.profiler._nvprof_range(i, 10, 20) x_value = np.random.randn(2, 3, 3) - x = paddle.to_tensor( - x_value, stop_gradient=False, place=paddle.CPUPlace()) + x = paddle.to_tensor(x_value, + stop_gradient=False, + place=paddle.CPUPlace()) y = x / 2.0 class TestGetProfiler(unittest.TestCase): + def test_getprofiler(self): config_content = ''' { @@ -207,8 +217,9 @@ class TestGetProfiler(unittest.TestCase): import paddle.profiler.profiler as profiler profiler = profiler.get_profiler(filehandle.name) x_value = np.random.randn(2, 3, 3) - x = paddle.to_tensor( - x_value, stop_gradient=False, place=paddle.CPUPlace()) + x = paddle.to_tensor(x_value, + stop_gradient=False, + place=paddle.CPUPlace()) with profiler: for i in range(5): y = x / 2.0 @@ -249,7 +260,7 @@ class TestGetProfiler(unittest.TestCase): except: pass - # test scheduler + # test scheduler config_content = ''' { "targets": ["Cpu", "Gpu"], @@ -323,6 +334,7 @@ class TestGetProfiler(unittest.TestCase): class RandomDataset(Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -336,6 +348,7 @@ class RandomDataset(Dataset): class SimpleNet(nn.Layer): + def __init__(self): super(SimpleNet, self).__init__() self.fc = nn.Linear(100, 10) @@ -345,18 +358,19 @@ class SimpleNet(nn.Layer): class TestTimerOnly(unittest.TestCase): + def test_with_dataloader(self): + def train(step_num_samples=None): dataset = RandomDataset(20 * 4) simple_net = SimpleNet() opt = paddle.optimizer.SGD(learning_rate=1e-3, parameters=simple_net.parameters()) - loader = DataLoader( - dataset, - batch_size=4, - shuffle=True, - drop_last=True, - num_workers=2) + loader = DataLoader(dataset, + batch_size=4, + shuffle=True, + drop_last=True, + num_workers=2) step_info = '' p = profiler.Profiler(timer_only=True) p.start() diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler_helper.py b/python/paddle/fluid/tests/unittests/test_newprofiler_helper.py index 05e79200354..d7a7a25d7ae 100755 --- a/python/paddle/fluid/tests/unittests/test_newprofiler_helper.py +++ b/python/paddle/fluid/tests/unittests/test_newprofiler_helper.py @@ -20,6 +20,7 @@ import paddle.profiler.statistic_helper as statistic_helper class TestStatisticHelper(unittest.TestCase): + def test_sum_ranges_case1(self): src = [(1, 3), (4, 10), (11, 15)] self.assertEqual(statistic_helper.sum_ranges(src), 12) diff --git a/python/paddle/fluid/tests/unittests/test_nll_loss.py b/python/paddle/fluid/tests/unittests/test_nll_loss.py index c53fdffe1cf..eb027951c52 100644 --- a/python/paddle/fluid/tests/unittests/test_nll_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nll_loss.py @@ -20,7 +20,10 @@ from op_test import OpTest from paddle.fluid.framework import _test_eager_guard -def nll_loss_1d(logs, targets, weight=None, reduction='mean', +def nll_loss_1d(logs, + targets, + weight=None, + reduction='mean', ignore_index=-100): input_shape = logs.shape N = input_shape[0] @@ -38,13 +41,16 @@ def nll_loss_1d(logs, targets, weight=None, reduction='mean', if reduction == 'sum': return np.sum(out), np.array([total_weight]).astype('float64') elif reduction == 'mean': - return out.sum() / total_weight, np.array( - [total_weight]).astype('float64') + return out.sum() / total_weight, np.array([total_weight + ]).astype('float64') elif reduction == 'none': return out -def nll_loss_2d(logs, targets, weight=None, reduction='mean', +def nll_loss_2d(logs, + targets, + weight=None, + reduction='mean', ignore_index=-100): input_shape = logs.shape N = input_shape[0] @@ -65,13 +71,14 @@ def nll_loss_2d(logs, targets, weight=None, reduction='mean', if reduction == 'sum': return np.sum(out), np.array([total_weight]).astype('float64') elif reduction == 'mean': - return out.sum() / total_weight, np.array( - [total_weight]).astype('float64') + return out.sum() / total_weight, np.array([total_weight + ]).astype('float64') elif reduction == 'none': return out class TestNLLLoss(unittest.TestCase): + def test_NLLLoss_1D_mean(self): np.random.seed(200) input_np = np.random.random(size=(10, 10)).astype(np.float64) @@ -79,8 +86,8 @@ class TestNLLLoss(unittest.TestCase): label_np = np.random.randint(0, 10, size=(10, )).astype(np.int64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[10, 10], dtype='float64') @@ -89,23 +96,24 @@ class TestNLLLoss(unittest.TestCase): res = nll_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[res]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[res]) with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss() - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() with fluid.dygraph.guard(): with _test_eager_guard(): nll_loss = paddle.nn.loss.NLLLoss() - eager_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + eager_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) eager_result = eager_res.numpy() expected = nll_loss_1d(input_np, label_np)[0] @@ -121,8 +129,8 @@ class TestNLLLoss(unittest.TestCase): label_np = np.random.randint(0, 10, size=(10, )).astype(np.int64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[10, 10], dtype='float64') @@ -131,16 +139,17 @@ class TestNLLLoss(unittest.TestCase): res = nll_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[res]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[res]) with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss(reduction='sum') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() with _test_eager_guard(): @@ -167,8 +176,8 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(10, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() # place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[10, 10], dtype='float64') @@ -189,15 +198,15 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np)) - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() with _test_eager_guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np)) - eager_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + eager_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) loss = eager_res.sum() loss.backward() eager_result = eager_res.numpy() @@ -217,8 +226,8 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(10, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() # place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[10, 10], dtype='float64') @@ -239,11 +248,13 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np), reduction='sum') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() - expected = nll_loss_1d( - input_np, label_np, weight=weight_np, reduction='sum')[0] + expected = nll_loss_1d(input_np, + label_np, + weight=weight_np, + reduction='sum')[0] self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -277,8 +288,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np)) - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() expected = nll_loss_1d(input_np, label_np, weight=weight_np)[0] @@ -314,11 +325,13 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np), reduction='none') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() - expected = nll_loss_1d( - input_np, label_np, weight=weight_np, reduction='none') + expected = nll_loss_1d(input_np, + label_np, + weight=weight_np, + reduction='none') self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -331,27 +344,29 @@ class TestNLLLoss(unittest.TestCase): label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') nll_loss = paddle.nn.loss.NLLLoss() res = nll_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[res]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[res]) with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss() - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() expected = nll_loss_2d(input_np, label_np)[0] @@ -367,27 +382,29 @@ class TestNLLLoss(unittest.TestCase): label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') nll_loss = paddle.nn.loss.NLLLoss(reduction='sum') res = nll_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[res]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[res]) with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss(reduction='sum') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() expected = nll_loss_2d(input_np, label_np, reduction='sum')[0] @@ -404,12 +421,13 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(3, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') @@ -428,8 +446,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np)) - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() expected = nll_loss_2d(input_np, label_np, weight=weight_np)[0] @@ -448,8 +466,9 @@ class TestNLLLoss(unittest.TestCase): startup_prog = fluid.Program() place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') @@ -468,8 +487,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np)) - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() expected = nll_loss_2d(input_np, label_np, weight=weight_np)[0] @@ -486,11 +505,12 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(3, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') @@ -509,12 +529,14 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np), reduction='sum') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() - expected = nll_loss_2d( - input_np, label_np, weight=weight_np, reduction='sum')[0] + expected = nll_loss_2d(input_np, + label_np, + weight=weight_np, + reduction='sum')[0] self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -527,27 +549,29 @@ class TestNLLLoss(unittest.TestCase): label_np = np.random.randint(0, 3, size=(5, 5, 5, 5)).astype(np.int64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5, 5], dtype='int64') nll_loss = paddle.nn.loss.NLLLoss() res = nll_loss(input, label) exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[res]) + static_result = exe.run(prog, + feed={ + "input": input_np, + "label": label_np + }, + fetch_list=[res]) with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss() - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() input_shape = input_np.shape @@ -569,12 +593,13 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(3, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') nll_loss = paddle.nn.loss.NLLLoss(weight=weight) @@ -592,8 +617,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np)) - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() input_shape = input_np.shape @@ -601,8 +626,9 @@ class TestNLLLoss(unittest.TestCase): input_np_reshape = np.reshape(input_np, (input_shape[0], input_shape[1], 1, -1)) label_np_reshape = np.reshape(label_np, (label_shape[0], 1, -1)) - expected = nll_loss_2d( - input_np_reshape, label_np_reshape, weight=weight_np)[0] + expected = nll_loss_2d(input_np_reshape, + label_np_reshape, + weight=weight_np)[0] self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -616,12 +642,13 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(3, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') nll_loss = paddle.nn.loss.NLLLoss(weight=weight, reduction='sum') @@ -639,8 +666,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np), reduction='sum') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() input_shape = input_np.shape @@ -648,11 +675,10 @@ class TestNLLLoss(unittest.TestCase): input_np_reshape = np.reshape(input_np, (input_shape[0], input_shape[1], 1, -1)) label_np_reshape = np.reshape(label_np, (label_shape[0], 1, -1)) - expected = nll_loss_2d( - input_np_reshape, - label_np_reshape, - weight=weight_np, - reduction='sum')[0] + expected = nll_loss_2d(input_np_reshape, + label_np_reshape, + weight=weight_np, + reduction='sum')[0] self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -666,12 +692,13 @@ class TestNLLLoss(unittest.TestCase): weight_np = np.random.random(size=(3, )).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() #place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') nll_loss = paddle.nn.loss.NLLLoss(weight=weight, reduction='none') @@ -689,8 +716,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np), reduction='none') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() input_shape = input_np.shape @@ -699,11 +726,10 @@ class TestNLLLoss(unittest.TestCase): input_np_reshape = np.reshape(input_np, (input_shape[0], input_shape[1], 1, -1)) label_np_reshape = np.reshape(label_np, (label_shape[0], 1, -1)) - expected = nll_loss_2d( - input_np_reshape, - label_np_reshape, - weight=weight_np, - reduction='none') + expected = nll_loss_2d(input_np_reshape, + label_np_reshape, + weight=weight_np, + reduction='none') expected = np.reshape(expected, out_shape) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -719,8 +745,9 @@ class TestNLLLoss(unittest.TestCase): startup_prog = fluid.Program() place = fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5, 5], dtype='float64') + input = fluid.data(name='input', + shape=[5, 3, 5, 5, 5], + dtype='float64') label = fluid.data(name='label', shape=[5, 5, 5, 5], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') nll_loss = paddle.nn.loss.NLLLoss(weight=weight, reduction='none') @@ -738,8 +765,8 @@ class TestNLLLoss(unittest.TestCase): with fluid.dygraph.guard(): nll_loss = paddle.nn.loss.NLLLoss( weight=paddle.to_tensor(weight_np), reduction='none') - dy_res = nll_loss( - paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_res = nll_loss(paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) dy_result = dy_res.numpy() input_shape = input_np.shape @@ -748,11 +775,10 @@ class TestNLLLoss(unittest.TestCase): input_np_reshape = np.reshape(input_np, (input_shape[0], input_shape[1], 1, -1)) label_np_reshape = np.reshape(label_np, (label_shape[0], 1, -1)) - expected = nll_loss_2d( - input_np_reshape, - label_np_reshape, - weight=weight_np, - reduction='none') + expected = nll_loss_2d(input_np_reshape, + label_np_reshape, + weight=weight_np, + reduction='none') expected = np.reshape(expected, out_shape) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) @@ -760,6 +786,7 @@ class TestNLLLoss(unittest.TestCase): class TestNLLLossOp1DWithReduce(OpTest): + def setUp(self): self.init_test_case() self.op_type = "nll_loss" @@ -780,8 +807,9 @@ class TestNLLLossOp1DWithReduce(OpTest): np.random.seed(200) weight_np = np.random.uniform(0.1, 0.8, self.input_shape[1]).astype("float64") - output_np, total_weight_np = nll_loss_1d( - input_np, label_np, weight=weight_np) + output_np, total_weight_np = nll_loss_1d(input_np, + label_np, + weight=weight_np) self.inputs['Weight'] = weight_np self.outputs = {'Out': output_np, 'Total_weight': total_weight_np} @@ -808,6 +836,7 @@ class TestNLLLossOp1DWithReduce(OpTest): class TestNLLLossOp1DNoReduce(OpTest): + def setUp(self): self.init_test_case() self.op_type = "nll_loss" @@ -827,8 +856,10 @@ class TestNLLLossOp1DNoReduce(OpTest): np.random.seed(200) weight_np = np.random.uniform(0.1, 0.8, self.input_shape[1]).astype("float64") - output_np, total_weight_np = nll_loss_1d( - input_np, label_np, weight=weight_np, reduction='none') + output_np, total_weight_np = nll_loss_1d(input_np, + label_np, + weight=weight_np, + reduction='none') self.inputs['Weight'] = weight_np self.outputs = {'Out': output_np, 'Total_weight': total_weight_np} @@ -855,6 +886,7 @@ class TestNLLLossOp1DNoReduce(OpTest): class TestNLLLossOp2DWithReduce(OpTest): + def setUp(self): self.init_test_case() self.op_type = "nll_loss" @@ -873,8 +905,9 @@ class TestNLLLossOp2DWithReduce(OpTest): np.random.seed(200) weight_np = np.random.uniform(0.1, 0.8, self.input_shape[1]).astype("float64") - output_np, total_weight_np = nll_loss_2d( - input_np, label_np, weight=weight_np) + output_np, total_weight_np = nll_loss_2d(input_np, + label_np, + weight=weight_np) self.inputs['Weight'] = weight_np self.outputs = {'Out': output_np, 'Total_weight': total_weight_np} @@ -901,6 +934,7 @@ class TestNLLLossOp2DWithReduce(OpTest): class TestNLLLossOp2DNoReduce(OpTest): + def setUp(self): self.init_test_case() self.op_type = "nll_loss" @@ -920,8 +954,10 @@ class TestNLLLossOp2DNoReduce(OpTest): np.random.seed(200) weight_np = np.random.uniform(0.1, 0.8, self.input_shape[1]).astype("float64") - output_np, total_weight_np = nll_loss_2d( - input_np, label_np, weight=weight_np, reduction='none') + output_np, total_weight_np = nll_loss_2d(input_np, + label_np, + weight=weight_np, + reduction='none') self.inputs['Weight'] = weight_np self.outputs = {'Out': output_np, 'Total_weight': total_weight_np} @@ -948,6 +984,7 @@ class TestNLLLossOp2DNoReduce(OpTest): class TestNLLLossName(unittest.TestCase): + def test_name(self): prog = paddle.static.Program() startup_prog = paddle.static.Program() @@ -961,15 +998,22 @@ class TestNLLLossName(unittest.TestCase): class TestNLLLossInvalidArgs(unittest.TestCase): + def test_x_dim_value_error(self): + def test_x_dim_lt_2(): prog = paddle.static.Program() startup_prog = paddle.static.Program() place = paddle.CPUPlace() with paddle.static.program_guard(prog, startup_prog): - x = paddle.fluid.data(name='x', shape=[10, ], dtype='float64') - label = paddle.fluid.data( - name='label', shape=[10, ], dtype='float64') + x = paddle.fluid.data(name='x', shape=[ + 10, + ], dtype='float64') + label = paddle.fluid.data(name='label', + shape=[ + 10, + ], + dtype='float64') nll_loss = paddle.nn.loss.NLLLoss() res = nll_loss(x, label) @@ -987,14 +1031,16 @@ class TestNLLLossInvalidArgs(unittest.TestCase): self.assertRaises(ValueError, test_x_dim_imperative_lt_2) def test_reduction_value_error(self): + def test_NLLLoss_reduction_not_sum_mean_none(): prog = paddle.static.Program() startup_prog = paddle.static.Program() place = paddle.CPUPlace() with paddle.static.program_guard(prog, startup_prog): x = paddle.fluid.data(name='x', shape=[10, 10], dtype='float64') - label = paddle.fluid.data( - name='label', shape=[10], dtype='int64') + label = paddle.fluid.data(name='label', + shape=[10], + dtype='int64') nll_loss = paddle.nn.loss.NLLLoss(reduction='') res = nll_loss(x, label) @@ -1018,8 +1064,9 @@ class TestNLLLossInvalidArgs(unittest.TestCase): place = paddle.CPUPlace() with paddle.static.program_guard(prog, startup_prog): x = paddle.fluid.data(name='x', shape=[10, 10], dtype='float64') - label = paddle.fluid.data( - name='label', shape=[10], dtype='int64') + label = paddle.fluid.data(name='label', + shape=[10], + dtype='int64') res = paddle.nn.functional.nll_loss(x, label, reduction='') self.assertRaises(ValueError, diff --git a/python/paddle/fluid/tests/unittests/test_nms_op.py b/python/paddle/fluid/tests/unittests/test_nms_op.py index 1b5ac1f1337..f3c253d45c0 100644 --- a/python/paddle/fluid/tests/unittests/test_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_nms_op.py @@ -68,6 +68,7 @@ def nms(boxes, nms_threshold): class TestNMSOp(OpTest): + def setUp(self): self.op_type = 'nms' self.dtype = np.float64 diff --git a/python/paddle/fluid/tests/unittests/test_nn_dice_loss.py b/python/paddle/fluid/tests/unittests/test_nn_dice_loss.py index 31606376777..8dfaca25e27 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_dice_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nn_dice_loss.py @@ -24,6 +24,7 @@ eps = 1e-6 class TestDiceLossValue(unittest.TestCase): + def test_dice_loss(self): input_ = paddle.rand([2, 3, num_classes]) label_ = paddle.randint(0, num_classes, [2, 3, 1], dtype=paddle.int64) @@ -41,19 +42,23 @@ class TestDiceLossValue(unittest.TestCase): class TestDiceLossInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_dtype(): input_ = paddle.rand([2, 3, num_classes], dtype=paddle.float32) - label_ = paddle.randint( - 0, num_classes, [2, 3, 1], dtype=paddle.int64) + label_ = paddle.randint(0, + num_classes, [2, 3, 1], + dtype=paddle.int64) nn.dice_loss(input_, label_.astype(paddle.float32)) self.assertRaises(AssertionError, test_invalid_dtype) def test_zero_shape_input(): input_ = paddle.rand([0, 3, num_classes], dtype=paddle.float32) - label_ = paddle.randint( - 0, num_classes, [0, 3, 1], dtype=paddle.int64) + label_ = paddle.randint(0, + num_classes, [0, 3, 1], + dtype=paddle.int64) nn.dice_loss(input_, label_) self.assertRaises(AssertionError, test_zero_shape_input) diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py index 0b5493e2170..98e323c0d9e 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py @@ -25,6 +25,7 @@ paddle.disable_static() class EmbeddingDygraph(unittest.TestCase): + def func_1(self): x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) paddle.disable_static(paddle.CPUPlace()) @@ -35,8 +36,8 @@ class EmbeddingDygraph(unittest.TestCase): w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32) embedding.weight.set_value(w0) - adam = paddle.optimizer.Adam( - parameters=[embedding.weight], learning_rate=0.01) + adam = paddle.optimizer.Adam(parameters=[embedding.weight], + learning_rate=0.01) adam.clear_grad() out = embedding(x) diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py index 4af0cce12b7..62267bdf6f4 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py @@ -21,6 +21,7 @@ import paddle.nn.functional as functional class EmbeddingStatic(unittest.TestCase): + def test_1(self): prog = fluid.Program() with fluid.program_guard(prog): @@ -29,23 +30,24 @@ class EmbeddingStatic(unittest.TestCase): initializer = fluid.initializer.NumpyArrayInitializer( np.random.random(size=(128, 100))) - param_attr = fluid.ParamAttr( - name="emb_weight", - learning_rate=0.5, - initializer=initializer, - trainable=True) + param_attr = fluid.ParamAttr(name="emb_weight", + learning_rate=0.5, + initializer=initializer, + trainable=True) - weight = prog.global_block().create_parameter( - (128, 100), attr=param_attr, dtype="float32") + weight = prog.global_block().create_parameter((128, 100), + attr=param_attr, + dtype="float32") - label = fluid.layers.data( - name="label", - shape=[4], - append_batch_size=False, - dtype="int64") + label = fluid.layers.data(name="label", + shape=[4], + append_batch_size=False, + dtype="int64") - emb = functional.embedding( - x=label, weight=weight, sparse=True, name="embedding") + emb = functional.embedding(x=label, + weight=weight, + sparse=True, + name="embedding") test_bad_x() @@ -57,27 +59,25 @@ class EmbeddingStatic(unittest.TestCase): initializer = fluid.initializer.NumpyArrayInitializer( np.random.random(size=(128, 100))) - param_attr = fluid.ParamAttr( - name="emb_weight", - learning_rate=0.5, - initializer=initializer, - trainable=True) - - weight = prog.global_block().create_parameter( - (128, 100), attr=param_attr, dtype="float32") - - label = fluid.layers.data( - name="label", - shape=[4], - append_batch_size=False, - dtype="int32") - - emb = functional.embedding( - x=label, - weight=weight, - padding_idx=129, - sparse=True, - name="embedding") + param_attr = fluid.ParamAttr(name="emb_weight", + learning_rate=0.5, + initializer=initializer, + trainable=True) + + weight = prog.global_block().create_parameter((128, 100), + attr=param_attr, + dtype="float32") + + label = fluid.layers.data(name="label", + shape=[4], + append_batch_size=False, + dtype="int32") + + emb = functional.embedding(x=label, + weight=weight, + padding_idx=129, + sparse=True, + name="embedding") with self.assertRaises(ValueError): test_bad_x() diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py index 9b7ba5c4b05..f0a1bdc76d8 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py @@ -27,6 +27,7 @@ from paddle.fluid.framework import Program, program_guard class TestOneHotOp(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -50,6 +51,7 @@ class TestOneHotOp(OpTest): class TestOneHotOp_attr(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -73,6 +75,7 @@ class TestOneHotOp_attr(OpTest): class TestOneHotOp_default_dtype(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -96,6 +99,7 @@ class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype_attr(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -119,6 +123,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_exception(unittest.TestCase): + def setUp(self): self.op_type = 'one_hot_v2' self.depth = 10 @@ -134,18 +139,18 @@ class TestOneHotOp_exception(unittest.TestCase): def test_check_output(self): program = Program() with program_guard(program): - x = fluid.layers.data( - name='x', shape=[self.dimension], dtype='float32', lod_level=1) + x = fluid.layers.data(name='x', + shape=[self.dimension], + dtype='float32', + lod_level=1) block = program.current_block() - one_hot_out = block.create_var( - name="one_hot_out", - type=core.VarDesc.VarType.LOD_TENSOR, - dtype='float32') - block.append_op( - type='one_hot', - inputs={'X': x}, - attrs={'depth': self.depth}, - outputs={'Out': one_hot_out}) + one_hot_out = block.create_var(name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op(type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) exe = fluid.Executor(self.place) def run(): @@ -157,6 +162,7 @@ class TestOneHotOp_exception(unittest.TestCase): class TestOneHotOpApi(unittest.TestCase): + def test_api(self): num_classes = 10 self._run(num_classes) @@ -167,9 +173,9 @@ class TestOneHotOpApi(unittest.TestCase): def test_api_with_dygraph(self): num_classes = 10 - label = np.array( - [np.random.randint(0, num_classes - 1) - for i in range(6)]).reshape([6, 1]) + label = np.array([ + np.random.randint(0, num_classes - 1) for i in range(6) + ]).reshape([6, 1]) with fluid.dygraph.guard(): one_hot_label = functional.one_hot( x=fluid.dygraph.to_variable(label), num_classes=num_classes) @@ -184,21 +190,23 @@ class TestOneHotOpApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'label': label_data, }, + ret = exe.run(feed={ + 'label': label_data, + }, fetch_list=[one_hot_label], return_numpy=False) class BadInputTestOnehotV2(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): def test_bad_x(): - label = fluid.layers.data( - name="label", - shape=[4], - append_batch_size=False, - dtype="float32") + label = fluid.layers.data(name="label", + shape=[4], + append_batch_size=False, + dtype="float32") one_hot_label = functional.one_hot(x=label, num_classes=4) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index 4685b00b394..c168b827e84 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -23,26 +23,33 @@ import paddle.fluid.layers as layers import paddle.fluid.core as core import gradient_checker from decorator_helper import prog_scope + paddle.enable_static() class TestSliceOpDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): self.config() - out = fluid.layers.slice( - self.inputs, axes=self.axes, starts=self.starts, ends=self.ends) - gradient_checker.double_grad_check( - [self.inputs], out, x_init=self.x_arr, place=place) + out = fluid.layers.slice(self.inputs, + axes=self.axes, + starts=self.starts, + ends=self.ends) + gradient_checker.double_grad_check([self.inputs], + out, + x_init=self.x_arr, + place=place) def config(self): self.starts = [1, 0, -1] self.ends = [3, 3, 6] self.axes = [0, 1, 2] self.x_arr = np.random.random([3, 4, 5, 2]).astype("float64") - self.inputs = layers.create_parameter( - dtype="float64", shape=[3, 4, 5, 2], name='x') + self.inputs = layers.create_parameter(dtype="float64", + shape=[3, 4, 5, 2], + name='x') def test_grad(self): places = [fluid.CPUPlace()] @@ -53,16 +60,19 @@ class TestSliceOpDoubleGradCheck(unittest.TestCase): class TestSliceOpDoubleGradCheckCase3(TestSliceOpDoubleGradCheck): + def config(self): self.starts = [1, -1, 1] self.ends = [3, 3, 3] self.axes = [0, 1, 2] self.x_arr = np.random.random([3, 3, 3]).astype("float64") - self.inputs = layers.create_parameter( - dtype="float64", shape=[3, 3, 3], name='x3') + self.inputs = layers.create_parameter(dtype="float64", + shape=[3, 3, 3], + name='x3') class TestReduceMeanWithDimDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [7, 11] @@ -74,8 +84,11 @@ class TestReduceMeanWithDimDoubleGradCheck(unittest.TestCase): y = layers.reduce_mean(x, dim=0) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -86,6 +99,7 @@ class TestReduceMeanWithDimDoubleGradCheck(unittest.TestCase): class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): shape = [7, 11] @@ -97,8 +111,11 @@ class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase): y = layers.reduce_sum(x, dim=0) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x], + y, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -109,6 +126,7 @@ class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase): class TestReshapeDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): x_shape = [3, 12] @@ -121,8 +139,11 @@ class TestReshapeDoubleGradCheck(unittest.TestCase): out = layers.expand(x, expand_times) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -133,6 +154,7 @@ class TestReshapeDoubleGradCheck(unittest.TestCase): class TestExpandDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): x_shape = [3, 12] @@ -145,8 +167,11 @@ class TestExpandDoubleGradCheck(unittest.TestCase): out = layers.reshape(x, new_shape) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -157,6 +182,7 @@ class TestExpandDoubleGradCheck(unittest.TestCase): class TestTileDoubleGradCheck(unittest.TestCase): + def tile_wrapper(self, x): return paddle.tile(x[0], [4, 9]) @@ -172,10 +198,15 @@ class TestTileDoubleGradCheck(unittest.TestCase): out = paddle.tile(x, repeat_times) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.tile_wrapper, [x], out, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.tile_wrapper, [x], + out, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -186,6 +217,7 @@ class TestTileDoubleGradCheck(unittest.TestCase): class TestExpandV2DoubleGradCheck(unittest.TestCase): + def expand_wrapper(self, x): return paddle.expand(x[0], [4, 12]) @@ -201,10 +233,15 @@ class TestExpandV2DoubleGradCheck(unittest.TestCase): out = paddle.expand(x, new_shape) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.expand_wrapper, [x], out, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.expand_wrapper, [x], + out, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -215,6 +252,7 @@ class TestExpandV2DoubleGradCheck(unittest.TestCase): class TestSqueezeDoubleGradCheck(unittest.TestCase): + def squeeze_warpper(self, x): axes = [0, 2] return paddle.squeeze(x[0], axes) @@ -231,10 +269,16 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase): out = paddle.squeeze(x, axes) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.squeeze_warpper, [x], out, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.squeeze_warpper, + [x], + out, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -245,6 +289,7 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase): class TestUnsqueezeDoubleGradCheck(unittest.TestCase): + def unsqueeze_wrapper(self, x): axes = [1, 2] return paddle.unsqueeze(x[0], axes) @@ -261,10 +306,16 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase): out = paddle.unsqueeze(x, axes) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.unsqueeze_wrapper, [x], out, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.unsqueeze_wrapper, + [x], + out, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -275,6 +326,7 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase): class TestClipDoubleGradCheck(unittest.TestCase): + def clip_wrapper(self, x): return paddle.clip(x[0], min=-1., max=1.) @@ -289,8 +341,10 @@ class TestClipDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-5., 5., x_shape).astype(dtype) gradient_checker.double_grad_check([x], out, x_init=x_arr, place=place) - gradient_checker.double_grad_check_for_dygraph( - self.clip_wrapper, [x], out, x_init=x_arr, place=place) + gradient_checker.double_grad_check_for_dygraph(self.clip_wrapper, [x], + out, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -301,6 +355,7 @@ class TestClipDoubleGradCheck(unittest.TestCase): class TestTransposeDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): x_shape = [3, 40] @@ -323,6 +378,7 @@ class TestTransposeDoubleGradCheck(unittest.TestCase): class TestTransposeDoubleGradCheckCase1(unittest.TestCase): + @prog_scope() def func(self, place): x_shape = [2, 3, 4, 5] @@ -345,6 +401,7 @@ class TestTransposeDoubleGradCheckCase1(unittest.TestCase): class TestConstantPadDoubleGradCheck(unittest.TestCase): + def pad_wrapper(self, x): pad = [1, 1, 1, 1] return paddle.nn.functional.pad(x[0], pad) @@ -361,10 +418,15 @@ class TestConstantPadDoubleGradCheck(unittest.TestCase): out = paddle.nn.functional.pad(x, pad) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x], out, x_init=x_arr, place=place, eps=eps) - gradient_checker.double_grad_check_for_dygraph( - self.pad_wrapper, [x], out, x_init=x_arr, place=place) + gradient_checker.double_grad_check([x], + out, + x_init=x_arr, + place=place, + eps=eps) + gradient_checker.double_grad_check_for_dygraph(self.pad_wrapper, [x], + out, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -375,6 +437,7 @@ class TestConstantPadDoubleGradCheck(unittest.TestCase): class TestConstantPadDoubleGradCheckCase1(TestConstantPadDoubleGradCheck): + @prog_scope() def func(self, place): x_shape = [2, 3, 4, 5] @@ -390,6 +453,7 @@ class TestConstantPadDoubleGradCheckCase1(TestConstantPadDoubleGradCheck): class TestConcatDoubleGradCheck(unittest.TestCase): + def concat_wrapper(self, x): return paddle.concat(x, axis=0) @@ -407,13 +471,15 @@ class TestConcatDoubleGradCheck(unittest.TestCase): x2_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) x1_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) - gradient_checker.double_grad_check( - [x1, x2], out, x_init=[x1_arr, x2_arr], place=place) - gradient_checker.double_grad_check_for_dygraph( - self.concat_wrapper, [x1, x2], - out, - x_init=[x1_arr, x2_arr], - place=place) + gradient_checker.double_grad_check([x1, x2], + out, + x_init=[x1_arr, x2_arr], + place=place) + gradient_checker.double_grad_check_for_dygraph(self.concat_wrapper, + [x1, x2], + out, + x_init=[x1_arr, x2_arr], + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -424,20 +490,23 @@ class TestConcatDoubleGradCheck(unittest.TestCase): class TestAvgPool2DDoubleGradCheckCase1(unittest.TestCase): + @prog_scope() def func(self, place): - input_NCHW = fluid.layers.data( - name="input_NCHW", - shape=[2, 3, 5, 5], - append_batch_size=False, - dtype="float32") + input_NCHW = fluid.layers.data(name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") input_NCHW.persistable = True y = layers.pool2d(input_NCHW, pool_size=2, pool_type="avg") x_arr = np.random.uniform(-1, 1, [2, 3, 5, 5]).astype(np.float32) - gradient_checker.double_grad_check( - [input_NCHW], y, x_init=x_arr, place=place, eps=0.05) + gradient_checker.double_grad_check([input_NCHW], + y, + x_init=x_arr, + place=place, + eps=0.05) def test_grad(self): places = [fluid.CPUPlace()] @@ -448,28 +517,36 @@ class TestAvgPool2DDoubleGradCheckCase1(unittest.TestCase): class TestAvgPool2DDoubleGradCheckCase2(unittest.TestCase): + def pool2d_wrapper(self, x): - return paddle.nn.functional.avg_pool2d( - x[0], kernel_size=2, data_format="NHWC") + return paddle.nn.functional.avg_pool2d(x[0], + kernel_size=2, + data_format="NHWC") @prog_scope() def func(self, place): - input_NHWC = fluid.layers.data( - name="input_NHWC", - shape=[2, 5, 5, 3], - append_batch_size=False, - dtype="float32") + input_NHWC = fluid.layers.data(name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") input_NHWC.persistable = True - y = paddle.nn.functional.avg_pool2d( - input_NHWC, kernel_size=2, data_format="NHWC") + y = paddle.nn.functional.avg_pool2d(input_NHWC, + kernel_size=2, + data_format="NHWC") x_arr = np.random.uniform(-1, 1, [2, 5, 5, 3]).astype(np.float32) - gradient_checker.double_grad_check( - [input_NHWC], y, x_init=x_arr, place=place, eps=0.05) + gradient_checker.double_grad_check([input_NHWC], + y, + x_init=x_arr, + place=place, + eps=0.05) - gradient_checker.double_grad_check_for_dygraph( - self.pool2d_wrapper, [input_NHWC], y, x_init=x_arr, place=place) + gradient_checker.double_grad_check_for_dygraph(self.pool2d_wrapper, + [input_NHWC], + y, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -480,27 +557,35 @@ class TestAvgPool2DDoubleGradCheckCase2(unittest.TestCase): class TestAvgPool2DDoubleGradCheckCase3(unittest.TestCase): + def pool2d_wrapper(self, x): - return paddle.nn.functional.avg_pool2d( - x[0], kernel_size=2, padding=[1, 1]) + return paddle.nn.functional.avg_pool2d(x[0], + kernel_size=2, + padding=[1, 1]) @prog_scope() def func(self, place): - input_NCHW = fluid.layers.data( - name="input_NCHW", - shape=[2, 3, 5, 5], - append_batch_size=False, - dtype="float32") + input_NCHW = fluid.layers.data(name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") input_NCHW.persistable = True - y = paddle.nn.functional.avg_pool2d( - input_NCHW, kernel_size=2, padding=[1, 1]) + y = paddle.nn.functional.avg_pool2d(input_NCHW, + kernel_size=2, + padding=[1, 1]) x_arr = np.random.uniform(-1, 1, [2, 3, 5, 5]).astype(np.float32) - gradient_checker.double_grad_check( - [input_NCHW], y, x_init=x_arr, place=place, eps=0.05) - gradient_checker.double_grad_check_for_dygraph( - self.pool2d_wrapper, [input_NCHW], y, x_init=x_arr, place=place) + gradient_checker.double_grad_check([input_NCHW], + y, + x_init=x_arr, + place=place, + eps=0.05) + gradient_checker.double_grad_check_for_dygraph(self.pool2d_wrapper, + [input_NCHW], + y, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] @@ -511,26 +596,32 @@ class TestAvgPool2DDoubleGradCheckCase3(unittest.TestCase): class TestAvgPool2DDoubleGradCheckCase4(unittest.TestCase): + def pool2d_wrapper(self, x): return paddle.nn.functional.avg_pool2d(x[0], kernel_size=[4, 4]) @prog_scope() def func(self, place): - input_NCHW = fluid.layers.data( - name="input_NCHW", - shape=[2, 3, 5, 5], - append_batch_size=False, - dtype="float32") + input_NCHW = fluid.layers.data(name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") input_NCHW.persistable = True y = layers.pool2d(input_NCHW, pool_size=[4, 4], pool_type="avg") y = paddle.nn.functional.avg_pool2d(input_NCHW, kernel_size=[4, 4]) x_arr = np.random.uniform(-1, 1, [2, 3, 5, 5]).astype(np.float32) - gradient_checker.double_grad_check( - [input_NCHW], y, x_init=x_arr, place=place, eps=0.05) - gradient_checker.double_grad_check_for_dygraph( - self.pool2d_wrapper, [input_NCHW], y, x_init=x_arr, place=place) + gradient_checker.double_grad_check([input_NCHW], + y, + x_init=x_arr, + place=place, + eps=0.05) + gradient_checker.double_grad_check_for_dygraph(self.pool2d_wrapper, + [input_NCHW], + y, + x_init=x_arr, + place=place) def test_grad(self): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py index 8ee3b2ac203..2fb1c92330b 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py @@ -34,12 +34,14 @@ def calc_margin_rank_loss(x, y, label, margin=0.0, reduction='none'): def create_test_case(margin, reduction): + class MarginRankingLossCls(unittest.TestCase): + def setUp(self): self.x_data = np.random.rand(10, 10).astype("float64") self.y_data = np.random.rand(10, 10).astype("float64") - self.label_data = np.random.choice( - [-1, 1], size=[10, 10]).astype("float64") + self.label_data = np.random.choice([-1, 1], + size=[10, 10]).astype("float64") self.places = [] self.places.append(fluid.CPUPlace()) if core.is_compiled_with_cuda(): @@ -47,19 +49,21 @@ def create_test_case(margin, reduction): def run_static_functional_api(self, place): paddle.enable_static() - expected = calc_margin_rank_loss( - self.x_data, - self.y_data, - self.label_data, - margin=margin, - reduction=reduction) + expected = calc_margin_rank_loss(self.x_data, + self.y_data, + self.label_data, + margin=margin, + reduction=reduction) with program_guard(Program(), Program()): - x = paddle.static.data( - name="x", shape=[10, 10], dtype="float64") - y = paddle.static.data( - name="y", shape=[10, 10], dtype="float64") - label = paddle.static.data( - name="label", shape=[10, 10], dtype="float64") + x = paddle.static.data(name="x", + shape=[10, 10], + dtype="float64") + y = paddle.static.data(name="y", + shape=[10, 10], + dtype="float64") + label = paddle.static.data(name="label", + shape=[10, 10], + dtype="float64") result = paddle.nn.functional.margin_ranking_loss( x, y, label, margin, reduction) exe = paddle.static.Executor(place) @@ -73,19 +77,21 @@ def create_test_case(margin, reduction): def run_static_api(self, place): paddle.enable_static() - expected = calc_margin_rank_loss( - self.x_data, - self.y_data, - self.label_data, - margin=margin, - reduction=reduction) + expected = calc_margin_rank_loss(self.x_data, + self.y_data, + self.label_data, + margin=margin, + reduction=reduction) with program_guard(Program(), Program()): - x = paddle.static.data( - name="x", shape=[10, 10], dtype="float64") - y = paddle.static.data( - name="y", shape=[10, 10], dtype="float64") - label = paddle.static.data( - name="label", shape=[10, 10], dtype="float64") + x = paddle.static.data(name="x", + shape=[10, 10], + dtype="float64") + y = paddle.static.data(name="y", + shape=[10, 10], + dtype="float64") + label = paddle.static.data(name="label", + shape=[10, 10], + dtype="float64") margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) @@ -105,14 +111,13 @@ def create_test_case(margin, reduction): y = paddle.to_tensor(self.y_data) label = paddle.to_tensor(self.label_data) - result = paddle.nn.functional.margin_ranking_loss(x, y, label, - margin, reduction) - expected = calc_margin_rank_loss( - self.x_data, - self.y_data, - self.label_data, - margin=margin, - reduction=reduction) + result = paddle.nn.functional.margin_ranking_loss( + x, y, label, margin, reduction) + expected = calc_margin_rank_loss(self.x_data, + self.y_data, + self.label_data, + margin=margin, + reduction=reduction) self.assertTrue(np.allclose(result.numpy(), expected)) def run_dynamic_api(self, place): @@ -123,12 +128,11 @@ def create_test_case(margin, reduction): margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) - expected = calc_margin_rank_loss( - self.x_data, - self.y_data, - self.label_data, - margin=margin, - reduction=reduction) + expected = calc_margin_rank_loss(self.x_data, + self.y_data, + self.label_data, + margin=margin, + reduction=reduction) self.assertTrue(np.allclose(result.numpy(), expected)) def run_dynamic_broadcast_api(self, place): @@ -140,12 +144,11 @@ def create_test_case(margin, reduction): margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) - expected = calc_margin_rank_loss( - self.x_data, - self.y_data, - label_data, - margin=margin, - reduction=reduction) + expected = calc_margin_rank_loss(self.x_data, + self.y_data, + label_data, + margin=margin, + reduction=reduction) self.assertTrue(np.allclose(result.numpy(), expected)) def test_case(self): @@ -171,6 +174,7 @@ class MarginRakingLossError(unittest.TestCase): paddle.enable_static() def test_errors(self): + def test_margin_value_error(): margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=0.1, reduction="reduce_mean") @@ -180,8 +184,9 @@ class MarginRakingLossError(unittest.TestCase): def test_functional_margin_value_error(): x = paddle.static.data(name="x", shape=[10, 10], dtype="float64") y = paddle.static.data(name="y", shape=[10, 10], dtype="float64") - label = paddle.static.data( - name="label", shape=[10, 10], dtype="float64") + label = paddle.static.data(name="label", + shape=[10, 10], + dtype="float64") result = paddle.nn.functional.margin_ranking_loss( x, y, label, margin=0.1, reduction="reduction_mean") diff --git a/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py b/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py index 6dbabda1f4c..a83cf124945 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py @@ -23,10 +23,12 @@ import paddle.fluid.layers as layers import paddle.fluid.core as core import gradient_checker from decorator_helper import prog_scope + paddle.enable_static() class TestMatmulDoubleGradCheck(unittest.TestCase): + def setUp(self): self.init_test() @@ -41,17 +43,25 @@ class TestMatmulDoubleGradCheck(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -62,6 +72,7 @@ class TestMatmulDoubleGradCheck(unittest.TestCase): class TestMatmulDoubleGradCheckCase1(TestMatmulDoubleGradCheck): + def init_test(self): self.x_shape = [2, 3] self.y_shape = [3, 2] @@ -77,6 +88,7 @@ class TestMatmulDoubleGradCheckCase1(TestMatmulDoubleGradCheck): class TestMatmulDoubleGradCheck2(unittest.TestCase): + def setUp(self): self.init_test() @@ -91,17 +103,25 @@ class TestMatmulDoubleGradCheck2(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -112,6 +132,7 @@ class TestMatmulDoubleGradCheck2(unittest.TestCase): class TestMatmulDoubleGradCheckCase3(unittest.TestCase): + def setUp(self): self.init_test() @@ -126,17 +147,25 @@ class TestMatmulDoubleGradCheckCase3(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.double_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.double_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -147,6 +176,7 @@ class TestMatmulDoubleGradCheckCase3(unittest.TestCase): class TestMatmulTripleGradCheckDotCase(unittest.TestCase): + def setUp(self): self.init_test() @@ -163,16 +193,21 @@ def func(self, place): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') out = paddle.matmul(x, y, self.transpose_x, self.transpose_y, name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): @@ -184,6 +219,7 @@ def test_grad(self): class TestMatmulTripleGradCheckNormalCase1(unittest.TestCase): + def setUp(self): self.init_test() @@ -198,17 +234,25 @@ class TestMatmulTripleGradCheckNormalCase1(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -219,6 +263,7 @@ class TestMatmulTripleGradCheckNormalCase1(unittest.TestCase): class TestMatmulTripleGradCheckNormalCase2(unittest.TestCase): + def setUp(self): self.init_test() @@ -233,17 +278,25 @@ class TestMatmulTripleGradCheckNormalCase2(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -254,6 +307,7 @@ class TestMatmulTripleGradCheckNormalCase2(unittest.TestCase): class TestMatmulTripleGradCheckNormalCase3(unittest.TestCase): + def setUp(self): self.init_test() @@ -268,17 +322,25 @@ class TestMatmulTripleGradCheckNormalCase3(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -289,6 +351,7 @@ class TestMatmulTripleGradCheckNormalCase3(unittest.TestCase): class TestMatmulTripleGradCheckNormalCase4(unittest.TestCase): + def setUp(self): self.init_test() @@ -303,17 +366,25 @@ class TestMatmulTripleGradCheckNormalCase4(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -324,6 +395,7 @@ class TestMatmulTripleGradCheckNormalCase4(unittest.TestCase): class TestMatmulTripleGradCheckBroadcastCase1(unittest.TestCase): + def setUp(self): self.init_test() @@ -338,17 +410,25 @@ class TestMatmulTripleGradCheckBroadcastCase1(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -359,6 +439,7 @@ class TestMatmulTripleGradCheckBroadcastCase1(unittest.TestCase): class TestMatmulTripleGradCheckBroadcastCase2(unittest.TestCase): + def setUp(self): self.init_test() @@ -373,17 +454,25 @@ class TestMatmulTripleGradCheckBroadcastCase2(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -394,6 +483,7 @@ class TestMatmulTripleGradCheckBroadcastCase2(unittest.TestCase): class TestMatmulTripleGradCheckBroadcastCase3(unittest.TestCase): + def setUp(self): self.init_test() @@ -408,17 +498,25 @@ class TestMatmulTripleGradCheckBroadcastCase3(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -429,6 +527,7 @@ class TestMatmulTripleGradCheckBroadcastCase3(unittest.TestCase): class TestMatmulTripleGradCheckBroadcastCase4(unittest.TestCase): + def setUp(self): self.init_test() @@ -443,17 +542,25 @@ class TestMatmulTripleGradCheckBroadcastCase4(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -464,6 +571,7 @@ class TestMatmulTripleGradCheckBroadcastCase4(unittest.TestCase): class TestMatmulTripleGradCheckBroadcastCase5(unittest.TestCase): + def setUp(self): self.init_test() @@ -478,17 +586,25 @@ class TestMatmulTripleGradCheckBroadcastCase5(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -499,6 +615,7 @@ class TestMatmulTripleGradCheckBroadcastCase5(unittest.TestCase): class TestMatmulTripleGradCheckSpecialCase1(unittest.TestCase): + def setUp(self): self.init_test() @@ -513,17 +630,25 @@ class TestMatmulTripleGradCheckSpecialCase1(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] @@ -534,6 +659,7 @@ class TestMatmulTripleGradCheckSpecialCase1(unittest.TestCase): class TestMatmulTripleGradCheckSpecialCase2(unittest.TestCase): + def setUp(self): self.init_test() @@ -548,17 +674,25 @@ class TestMatmulTripleGradCheckSpecialCase2(unittest.TestCase): eps = 0.005 dtype = np.float64 typename = "float64" - x = paddle.static.create_parameter( - dtype=typename, shape=self.x_shape, name='x') - y = paddle.static.create_parameter( - dtype=typename, shape=self.y_shape, name='y') - out = paddle.matmul( - x, y, self.transpose_x, self.transpose_y, name='out') + x = paddle.static.create_parameter(dtype=typename, + shape=self.x_shape, + name='x') + y = paddle.static.create_parameter(dtype=typename, + shape=self.y_shape, + name='y') + out = paddle.matmul(x, + y, + self.transpose_x, + self.transpose_y, + name='out') np.random.seed(2021) x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype) y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype) - gradient_checker.triple_grad_check( - [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + gradient_checker.triple_grad_check([x, y], + out, + x_init=[x_arr, y_arr], + place=place, + eps=eps) def test_grad(self): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py index 90132a0923d..170b916941d 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py @@ -26,6 +26,7 @@ import paddle.nn.functional as functional class TestNNSigmoidAPI(unittest.TestCase): + def setUp(self): self.init_data() @@ -71,6 +72,7 @@ class TestNNSigmoidAPI(unittest.TestCase): class TestNNFunctionalSigmoidAPI(unittest.TestCase): + def setUp(self): self.init_data() diff --git a/python/paddle/fluid/tests/unittests/test_nonzero_api.py b/python/paddle/fluid/tests/unittests/test_nonzero_api.py index 8569be82db0..b107823277e 100644 --- a/python/paddle/fluid/tests/unittests/test_nonzero_api.py +++ b/python/paddle/fluid/tests/unittests/test_nonzero_api.py @@ -23,6 +23,7 @@ from paddle.fluid import Program, program_guard class TestNonZeroAPI(unittest.TestCase): + def test_nonzero_api_as_tuple(self): data = np.array([[True, False], [False, True]]) with program_guard(Program(), Program()): diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py index 5b0a9599bf8..c65bff3a7bb 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_all.py +++ b/python/paddle/fluid/tests/unittests/test_norm_all.py @@ -35,9 +35,9 @@ def p_norm_python_api(x, return _C_ops.final_state_p_norm(x, p, axis, epsilon, keepdim, as_vector) if _in_legacy_dygraph(): - return _C_ops.p_norm(x, 'axis', axis, 'porder', - float(p), 'keepdim', keepdim, 'epsilon', epsilon, - 'as_vector', as_vector) + return _C_ops.p_norm(x, 'axis', axis, 'porder', float(p), 'keepdim', + keepdim, 'epsilon', epsilon, 'as_vector', + as_vector) def p_norm(x, axis, porder, keepdims=False, reduce_all=False): @@ -81,8 +81,8 @@ def p_norm(x, axis, porder, keepdims=False, reduce_all=False): def frobenius_norm(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) if axis is None: x = x.reshape(1, x.size) - r = np.linalg.norm( - x, ord='fro', axis=axis, keepdims=keepdims).astype(x.dtype) + r = np.linalg.norm(x, ord='fro', axis=axis, + keepdims=keepdims).astype(x.dtype) return r @@ -91,6 +91,7 @@ def final_state_frobenius_norm(x, dim, keep_dim, reduce_all): class TestFrobeniusNormOp(OpTest): + def setUp(self): self.python_api = final_state_frobenius_norm self.op_type = "frobenius_norm" @@ -120,6 +121,7 @@ class TestFrobeniusNormOp(OpTest): class TestFrobeniusNormOp2(TestFrobeniusNormOp): + def init_test_case(self): self.shape = [5, 5, 5] self.axis = (0, 1) @@ -131,6 +133,7 @@ class TestFrobeniusNormOp2(TestFrobeniusNormOp): class TestPnormOp(OpTest): + def setUp(self): self.op_type = "p_norm" self.python_api = p_norm_python_api @@ -180,14 +183,20 @@ class TestPnormOp(OpTest): if porder == 0: grad = np.zeros(x.shape).astype(x.dtype) elif porder in [float("inf"), float("-inf")]: - norm = p_norm( - x, axis=axis, porder=porder, keepdims=True, reduce_all=asvector) + norm = p_norm(x, + axis=axis, + porder=porder, + keepdims=True, + reduce_all=asvector) x_abs = np.abs(x) grad = np.sign(x) grad[x_abs != norm] = 0.0 else: - norm = p_norm( - x, axis=axis, porder=porder, keepdims=True, reduce_all=asvector) + norm = p_norm(x, + axis=axis, + porder=porder, + keepdims=True, + reduce_all=asvector) grad = np.power(norm, 1 - porder) * np.power( np.abs(x), porder - 1) * np.sign(x) @@ -200,6 +209,7 @@ class TestPnormOp(OpTest): class TestPnormOp2(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -214,6 +224,7 @@ class TestPnormOp2(TestPnormOp): class TestPnormOp3(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -228,6 +239,7 @@ class TestPnormOp3(TestPnormOp): class TestPnormOp4(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -242,6 +254,7 @@ class TestPnormOp4(TestPnormOp): class TestPnormOp5(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = 2 @@ -256,6 +269,7 @@ class TestPnormOp5(TestPnormOp): class TestPnormOp6(TestPnormOp): + def init_test_case(self): self.shape = [3, 20, 3] self.axis = -1 @@ -272,6 +286,7 @@ class TestPnormOp6(TestPnormOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPnormOpFP16(TestPnormOp): + def init_test_case(self): self.shape = [2, 3, 4, 5] self.axis = 1 @@ -289,13 +304,15 @@ class TestPnormOpFP16(TestPnormOp): def test_check_grad(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['X'], 'Out', user_defined_grads=self.gradient) + self.check_grad_with_place(place, ['X'], + 'Out', + user_defined_grads=self.gradient) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPnormOpFP161(TestPnormOpFP16): + def init_test_case(self): self.shape = [2, 3, 4, 5] self.axis = -1 @@ -309,6 +326,7 @@ class TestPnormOpFP161(TestPnormOpFP16): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPnormBF16Op(OpTest): + def setUp(self): self.op_type = "p_norm" self.python_api = p_norm_python_api @@ -333,11 +351,10 @@ class TestPnormBF16Op(OpTest): def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], - 'Out', - user_defined_grads=self.gradient, - check_eager=True) + self.check_grad_with_place(place, ['X'], + 'Out', + user_defined_grads=self.gradient, + check_eager=True) def init_test_case(self): self.shape = [2, 3, 4, 5] @@ -365,14 +382,20 @@ class TestPnormBF16Op(OpTest): if porder == 0: grad = np.zeros(x.shape).astype(x.dtype) elif porder in [float("inf"), float("-inf")]: - norm = p_norm( - x, axis=axis, porder=porder, keepdims=True, reduce_all=asvector) + norm = p_norm(x, + axis=axis, + porder=porder, + keepdims=True, + reduce_all=asvector) x_abs = np.abs(x) grad = np.sign(x) grad[x_abs != norm] = 0.0 else: - norm = p_norm( - x, axis=axis, porder=porder, keepdims=True, reduce_all=asvector) + norm = p_norm(x, + axis=axis, + porder=porder, + keepdims=True, + reduce_all=asvector) grad = np.power(norm, 1 - porder) * np.power( np.abs(x), porder - 1) * np.sign(x) @@ -407,8 +430,10 @@ def run_pnorm(self, p, axis, shape_x, dtype, keep_dim, check_dim=False): place = fluid.CPUPlace() exe = fluid.Executor(place) np_input = (np.random.rand(*shape_x) + 1.0).astype(dtype) - expected_result = p_norm( - np_input, porder=p, axis=axis, keepdims=keep_dim).astype(dtype) + expected_result = p_norm(np_input, + porder=p, + axis=axis, + keepdims=keep_dim).astype(dtype) result, = exe.run(feed={"X": np_input}, fetch_list=[out]) self.assertEqual((np.abs(result - expected_result) < 1e-6).all(), True) if keep_dim and check_dim: @@ -450,134 +475,119 @@ def run_graph(self, p, axis, shape_x, dtype): class API_NormTest(unittest.TestCase): + def test_basic(self): keep_dims = {False, True} for keep in keep_dims: - run_fro( - self, - p='fro', - axis=None, - shape_x=[2, 3, 4], - dtype="float32", - keep_dim=keep) - run_fro( - self, - p='fro', - axis=[0, 1], - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=2, - axis=None, - shape_x=[3, 4], - dtype="float32", - keep_dim=keep) - run_pnorm( - self, - p=2, - axis=1, - shape_x=[3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=np.inf, - axis=0, - shape_x=[2, 3, 4], - dtype="float32", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=np.inf, - axis=None, - shape_x=[2, 3, 4], - dtype="float32", - keep_dim=keep) - run_pnorm( - self, - p=-np.inf, - axis=0, - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=-np.inf, - axis=None, - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep) - run_pnorm( - self, - p=0, - axis=1, - shape_x=[3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - - run_pnorm( - self, - p=1, - axis=1, - shape_x=[3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=0, - axis=None, - shape_x=[3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=2, - axis=[0, 1], - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=2, - axis=-1, - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=1, - axis=[0, 1], - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=np.inf, - axis=[0, 1], - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) - run_pnorm( - self, - p=-np.inf, - axis=[0, 1], - shape_x=[2, 3, 4], - dtype="float64", - keep_dim=keep, - check_dim=True) + run_fro(self, + p='fro', + axis=None, + shape_x=[2, 3, 4], + dtype="float32", + keep_dim=keep) + run_fro(self, + p='fro', + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=2, + axis=None, + shape_x=[3, 4], + dtype="float32", + keep_dim=keep) + run_pnorm(self, + p=2, + axis=1, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=np.inf, + axis=0, + shape_x=[2, 3, 4], + dtype="float32", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=np.inf, + axis=None, + shape_x=[2, 3, 4], + dtype="float32", + keep_dim=keep) + run_pnorm(self, + p=-np.inf, + axis=0, + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=-np.inf, + axis=None, + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep) + run_pnorm(self, + p=0, + axis=1, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + + run_pnorm(self, + p=1, + axis=1, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=0, + axis=None, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=2, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=2, + axis=-1, + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=1, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=np.inf, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm(self, + p=-np.inf, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) def test_dygraph(self): run_graph(self, p='fro', axis=None, shape_x=[2, 3, 4], dtype="float32") @@ -611,8 +621,11 @@ class API_NormTest(unittest.TestCase): self.assertRaises(ValueError, paddle.norm, data, p=[1], axis=-1) self.assertRaises(ValueError, paddle.norm, 0, [1, 0], "float64") data = fluid.data(name="data_3d", shape=[2, 2, 2], dtype="float64") - self.assertRaises( - ValueError, paddle.norm, data, p='unspport', axis=[-3, -2, -1]) + self.assertRaises(ValueError, + paddle.norm, + data, + p='unspport', + axis=[-3, -2, -1]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py index 13c2edbf37c..46fbc00eacf 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py @@ -27,6 +27,7 @@ from decorator_helper import prog_scope class TestInstanceNormDoubleGradCheck(unittest.TestCase): + @prog_scope() def func(self, place): prog = fluid.Program() @@ -39,8 +40,12 @@ class TestInstanceNormDoubleGradCheck(unittest.TestCase): x = layers.create_parameter(dtype=dtype, shape=shape, name='x') z = fluid.layers.instance_norm(input=x) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + gradient_checker.double_grad_check([x], + z, + x_init=x_arr, + atol=atol, + place=place, + eps=eps) def test_grad(self): paddle.enable_static() @@ -53,6 +58,7 @@ class TestInstanceNormDoubleGradCheck(unittest.TestCase): class TestInstanceNormDoubleGradCheckWithoutParamBias( TestInstanceNormDoubleGradCheck): + @prog_scope() def func(self, place): prog = fluid.Program() @@ -63,14 +69,20 @@ class TestInstanceNormDoubleGradCheckWithoutParamBias( eps = 0.005 atol = 1e-4 x = layers.create_parameter(dtype=dtype, shape=shape, name='x') - z = fluid.layers.instance_norm( - input=x, param_attr=False, bias_attr=False) + z = fluid.layers.instance_norm(input=x, + param_attr=False, + bias_attr=False) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - gradient_checker.double_grad_check( - [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + gradient_checker.double_grad_check([x], + z, + x_init=x_arr, + atol=atol, + place=place, + eps=eps) class TestInstanceNormDoubleGradEagerCheck(unittest.TestCase): + def instance_norm_wrapper(self, x): return paddle.nn.functional.instance_norm(x[0]) @@ -87,8 +99,12 @@ class TestInstanceNormDoubleGradEagerCheck(unittest.TestCase): z = paddle.nn.functional.instance_norm(x) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) # check for static mode - gradient_checker.double_grad_check( - [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + gradient_checker.double_grad_check([x], + z, + x_init=x_arr, + atol=atol, + place=place, + eps=eps) # check for eager mode gradient_checker.double_grad_check_for_dygraph( self.instance_norm_wrapper, [x], @@ -108,6 +124,7 @@ class TestInstanceNormDoubleGradEagerCheck(unittest.TestCase): class TestInstanceNormDoubleGradEagerCheckWithParams( TestInstanceNormDoubleGradEagerCheck): + def instance_norm_wrapper(self, x): instance_norm = paddle.nn.InstanceNorm2D(3) return instance_norm(x[0]) @@ -125,8 +142,12 @@ class TestInstanceNormDoubleGradEagerCheckWithParams( z = paddle.nn.InstanceNorm2D(3)(x) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) # check for static mode - gradient_checker.double_grad_check( - [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + gradient_checker.double_grad_check([x], + z, + x_init=x_arr, + atol=atol, + place=place, + eps=eps) # check for eager mode gradient_checker.double_grad_check_for_dygraph( self.instance_norm_wrapper, [x], @@ -137,6 +158,7 @@ class TestInstanceNormDoubleGradEagerCheckWithParams( class TestBatchNormDoubleGradCheck(unittest.TestCase): + def setUp(self): self.init_test() @@ -162,13 +184,16 @@ class TestBatchNormDoubleGradCheck(unittest.TestCase): eps = 0.005 atol = 1e-4 x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x') - z = fluid.layers.batch_norm( - input=x, - data_layout=self.data_layout, - use_global_stats=self.use_global_stats) + z = fluid.layers.batch_norm(input=x, + data_layout=self.data_layout, + use_global_stats=self.use_global_stats) x_arr = np.random.uniform(-1, 1, self.shape).astype(dtype) - gradient_checker.double_grad_check( - [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + gradient_checker.double_grad_check([x], + z, + x_init=x_arr, + atol=atol, + place=place, + eps=eps) gradient_checker.double_grad_check_for_dygraph( self.batch_norm_wrapper, [x], z, @@ -186,6 +211,7 @@ class TestBatchNormDoubleGradCheck(unittest.TestCase): class TestBatchNormDoubleGradCheckCase1(TestBatchNormDoubleGradCheck): + def init_test(self): self.data_layout = 'NHWC' self.use_global_stats = False @@ -194,6 +220,7 @@ class TestBatchNormDoubleGradCheckCase1(TestBatchNormDoubleGradCheck): class TestBatchNormDoubleGradCheckCase2(TestBatchNormDoubleGradCheck): + def init_test(self): self.data_layout = 'NCHW' self.use_global_stats = True @@ -202,6 +229,7 @@ class TestBatchNormDoubleGradCheckCase2(TestBatchNormDoubleGradCheck): class TestBatchNormDoubleGradCheckCase3(TestBatchNormDoubleGradCheck): + def init_test(self): self.data_layout = 'NHWC' self.use_global_stats = True @@ -210,6 +238,7 @@ class TestBatchNormDoubleGradCheckCase3(TestBatchNormDoubleGradCheck): class TestBatchNormDoubleGradCheckCase4(TestBatchNormDoubleGradCheck): + def init_test(self): self.data_layout = 'NCHW' self.use_global_stats = False @@ -225,6 +254,7 @@ class TestBatchNormDoubleGradCheckCase4(TestBatchNormDoubleGradCheck): class TestBatchNormDoubleGradCheckCase5(TestBatchNormDoubleGradCheck): + @prog_scope() def func(self, place): prog = fluid.Program() @@ -233,27 +263,25 @@ class TestBatchNormDoubleGradCheckCase5(TestBatchNormDoubleGradCheck): dtype = "float32" eps = 0.005 atol = 2e-4 - chn = self.shape[1] if self.data_layout == 'NCHW' else self.shape[ - -1] + chn = self.shape[1] if self.data_layout == 'NCHW' else self.shape[-1] x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x') - z = fluid.layers.batch_norm( - input=x, - data_layout=self.data_layout, - use_global_stats=self.use_global_stats) + z = fluid.layers.batch_norm(input=x, + data_layout=self.data_layout, + use_global_stats=self.use_global_stats) x_arr = np.random.uniform(-1, 1, self.shape).astype(dtype) w, b = prog.global_block().all_parameters()[1:3] w_arr = np.ones(chn).astype(dtype) b_arr = np.zeros(chn).astype(dtype) - gradient_checker.double_grad_check( - [x, w, b], - z, - x_init=[x_arr, w_arr, b_arr], - atol=atol, - place=place, - eps=eps) + gradient_checker.double_grad_check([x, w, b], + z, + x_init=[x_arr, w_arr, b_arr], + atol=atol, + place=place, + eps=eps) class TestBatchNormDoubleGradCheckCase6(TestBatchNormDoubleGradCheckCase5): + def init_test(self): self.data_layout = 'NCHW' self.use_global_stats = True diff --git a/python/paddle/fluid/tests/unittests/test_norm_op.py b/python/paddle/fluid/tests/unittests/test_norm_op.py index 49e1f253349..7cc598a7387 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_norm_op.py @@ -30,6 +30,7 @@ def l2_norm(x, axis, epsilon): class TestNormOp(OpTest): + def setUp(self): self.op_type = "norm" self.python_api = paddle.fluid.layers.l2_normalize @@ -57,6 +58,7 @@ class TestNormOp(OpTest): class TestNormOp2(TestNormOp): + def init_test_case(self): self.shape = [5, 3, 9, 7] self.axis = 0 @@ -64,6 +66,7 @@ class TestNormOp2(TestNormOp): class TestNormOp3(TestNormOp): + def init_test_case(self): self.shape = [5, 3, 2, 7] self.axis = -1 @@ -73,6 +76,7 @@ class TestNormOp3(TestNormOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class TestNormOp4(TestNormOp): + def init_test_case(self): self.shape = [128, 1024, 14, 14] self.axis = 2 @@ -85,6 +89,7 @@ class TestNormOp4(TestNormOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class TestNormOp5(TestNormOp): + def init_test_case(self): self.shape = [2048, 2048] self.axis = 1 @@ -95,6 +100,7 @@ class TestNormOp5(TestNormOp): class TestNormOp6(TestNormOp): + def init_dtype(self): self.dtype = "float32" @@ -105,6 +111,7 @@ class TestNormOp6(TestNormOp): @unittest.skipIf(not fluid.core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestNormOp7(TestNormOp): + def init_dtype(self): self.dtype = "float16" @@ -112,12 +119,14 @@ class TestNormOp7(TestNormOp): self.check_output_with_place(fluid.core.CUDAPlace(0), atol=5e-2) def test_check_grad(self): - self.check_grad_with_place( - fluid.core.CUDAPlace(0), ['X'], 'Out', max_relative_error=0.05) + self.check_grad_with_place(fluid.core.CUDAPlace(0), ['X'], + 'Out', + max_relative_error=0.05) @skip_check_grad_ci(reason="skip check grad for test mode.") class TestNormTestOp(OpTest): + def setUp(self): self.op_type = "norm" self.init_test_case() @@ -144,6 +153,7 @@ class TestNormTestOp(OpTest): class API_NormTest(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_normal.py b/python/paddle/fluid/tests/unittests/test_normal.py index 79632817662..b4cfed68149 100644 --- a/python/paddle/fluid/tests/unittests/test_normal.py +++ b/python/paddle/fluid/tests/unittests/test_normal.py @@ -22,6 +22,7 @@ paddle.seed(10) class TestNormalAPI(unittest.TestCase): + def setUp(self): self.mean = 1.0 self.std = 0.0 @@ -142,28 +143,33 @@ class TestNormalAPI(unittest.TestCase): class TestNormalAPI_mean_is_tensor(TestNormalAPI): + def set_attrs(self): self.mean = np.random.uniform(-2, -1, [2, 3, 4, 5]).astype('float64') class TestNormalAPI_std_is_tensor(TestNormalAPI): + def set_attrs(self): self.std = np.random.uniform(0.7, 1, [2, 3, 17]).astype('float64') class TestNormalAPI_mean_std_are_tensor(TestNormalAPI): + def set_attrs(self): self.mean = np.random.uniform(1, 2, [1, 100]).astype('float64') self.std = np.random.uniform(0.5, 1, [1, 100]).astype('float64') class TestNormalAPI_mean_std_are_tensor_with_different_dtype(TestNormalAPI): + def set_attrs(self): self.mean = np.random.uniform(1, 2, [100]).astype('float64') self.std = np.random.uniform(1, 2, [100]).astype('float32') class TestNormalAlias(unittest.TestCase): + def test_alias(self): paddle.disable_static() shape = [1, 2, 3] @@ -174,6 +180,7 @@ class TestNormalAlias(unittest.TestCase): class TestNormalErrors(unittest.TestCase): + def test_errors(self): with paddle.static.program_guard(paddle.static.Program()): mean = [1, 2, 3] diff --git a/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py b/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py index 24fdcf8c884..c717831b247 100644 --- a/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py +++ b/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py @@ -32,11 +32,10 @@ class TestNormalization(unittest.TestCase): def set_program(self, axis, epsilon): """Build the test program. """ - data = fluid.layers.data( - name=self.data_desc["name"], - shape=self.data_desc["shape"], - dtype="float32", - append_batch_size=False) + data = fluid.layers.data(name=self.data_desc["name"], + shape=self.data_desc["shape"], + dtype="float32", + append_batch_size=False) data.stop_gradient = False l2_norm = fluid.layers.l2_normalize(x=data, axis=axis, epsilon=epsilon) out = fluid.layers.reduce_sum(l2_norm, dim=None) diff --git a/python/paddle/fluid/tests/unittests/test_normalize.py b/python/paddle/fluid/tests/unittests/test_normalize.py index 2f52ae391c7..ebcaf26955e 100644 --- a/python/paddle/fluid/tests/unittests/test_normalize.py +++ b/python/paddle/fluid/tests/unittests/test_normalize.py @@ -31,6 +31,7 @@ def p_normalize(x, axis=1, p=2, epsilon=1e-12, keepdims=True): class TestNNFunctionalNormalize(unittest.TestCase): + def setUp(self): self.input_np = np.random.random(size=(10, 10)).astype(np.float32) self.input_np2 = np.array([0.0, 0.0]).astype(np.float32) @@ -68,10 +69,11 @@ class TestNNFunctionalNormalize(unittest.TestCase): place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - static_result = exe.run( - feed={"input": self.input_np, - "input2": self.input_np2}, - fetch_list=[result0, result1, result2, result4]) + static_result = exe.run(feed={ + "input": self.input_np, + "input2": self.input_np2 + }, + fetch_list=[result0, result1, result2, result4]) self.assertTrue(np.allclose(static_result[0], self.expected0)) self.assertTrue(np.allclose(static_result[1], self.expected1)) diff --git a/python/paddle/fluid/tests/unittests/test_npair_loss_op.py b/python/paddle/fluid/tests/unittests/test_npair_loss_op.py index c6d7e0300d0..470dfff788b 100644 --- a/python/paddle/fluid/tests/unittests/test_npair_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_npair_loss_op.py @@ -22,12 +22,13 @@ from paddle.fluid import Program, program_guard def npairloss(anchor, positive, labels, l2_reg=0.002): + def softmax_cross_entropy_with_logits(logits, labels): logits = np.exp(logits) logits = logits / np.sum(logits, axis=1).reshape(-1, 1) - return np.mean( - -np.sum(labels * np.log(logits), axis=1), dtype=np.float32) + return np.mean(-np.sum(labels * np.log(logits), axis=1), + dtype=np.float32) batch_size = labels.shape[0] @@ -47,6 +48,7 @@ def npairloss(anchor, positive, labels, l2_reg=0.002): class TestNpairLossOp(unittest.TestCase): + def setUp(self): self.dtype = np.float32 @@ -65,32 +67,30 @@ class TestNpairLossOp(unittest.TestCase): feat_dim).astype(np.float32) embeddings_positive = np.random.rand(num_data, feat_dim).astype(np.float32) - row_labels = np.random.randint( - 0, num_classes, size=(num_data)).astype(np.float32) - out_loss = npairloss( - embeddings_anchor, - embeddings_positive, - row_labels, - l2_reg=reg_lambda) - - anc = fluid.layers.data( - dtype='float32', - name='anc', - shape=embeddings_anchor.shape, - append_batch_size=False) - pos = fluid.layers.data( - dtype='float32', - name='pos', - shape=embeddings_positive.shape, - append_batch_size=False) - lab = fluid.layers.data( - dtype='float32', - name='lab', - shape=row_labels.shape, - append_batch_size=False) - - npair_loss_op = fluid.layers.npair_loss( - anchor=anc, positive=pos, labels=lab, l2_reg=reg_lambda) + row_labels = np.random.randint(0, num_classes, + size=(num_data)).astype(np.float32) + out_loss = npairloss(embeddings_anchor, + embeddings_positive, + row_labels, + l2_reg=reg_lambda) + + anc = fluid.layers.data(dtype='float32', + name='anc', + shape=embeddings_anchor.shape, + append_batch_size=False) + pos = fluid.layers.data(dtype='float32', + name='pos', + shape=embeddings_positive.shape, + append_batch_size=False) + lab = fluid.layers.data(dtype='float32', + name='lab', + shape=row_labels.shape, + append_batch_size=False) + + npair_loss_op = fluid.layers.npair_loss(anchor=anc, + positive=pos, + labels=lab, + l2_reg=reg_lambda) out_tensor = exe.run(feed={ 'anc': embeddings_anchor, 'pos': embeddings_positive, @@ -98,47 +98,46 @@ class TestNpairLossOp(unittest.TestCase): }, fetch_list=[npair_loss_op.name]) - self.__assert_close( - out_tensor, - out_loss, - "inference output are different at " + str(place) + ", " + - str(np.dtype('float32')) + str(np.array(out_tensor)) + - str(out_loss), - atol=1e-3) + self.__assert_close(out_tensor, + out_loss, + "inference output are different at " + str(place) + + ", " + str(np.dtype('float32')) + + str(np.array(out_tensor)) + str(out_loss), + atol=1e-3) class TestNpairLossOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): anchor_np = np.random.random((2, 4)).astype("float32") positive_np = np.random.random((2, 4)).astype("float32") labels_np = np.random.random((2)).astype("float32") - anchor_data = fluid.data( - name='anchor', shape=[2, 4], dtype='float32') - positive_data = fluid.data( - name='positive', shape=[2, 4], dtype='float32') + anchor_data = fluid.data(name='anchor', + shape=[2, 4], + dtype='float32') + positive_data = fluid.data(name='positive', + shape=[2, 4], + dtype='float32') labels_data = fluid.data(name='labels', shape=[2], dtype='float32') def test_anchor_Variable(): # the anchor type must be Variable - fluid.layers.npair_loss( - anchor=anchor_np, - positive=positive_data, - labels=labels_data) + fluid.layers.npair_loss(anchor=anchor_np, + positive=positive_data, + labels=labels_data) def test_positive_Variable(): # the positive type must be Variable - fluid.layers.npair_loss( - anchor=anchor_data, - positive=positive_np, - labels=labels_data) + fluid.layers.npair_loss(anchor=anchor_data, + positive=positive_np, + labels=labels_data) def test_labels_Variable(): # the labels type must be Variable - fluid.layers.npair_loss( - anchor=anchor_data, - positive=positive_data, - labels=labels_np) + fluid.layers.npair_loss(anchor=anchor_data, + positive=positive_data, + labels=labels_np) self.assertRaises(TypeError, test_anchor_Variable) self.assertRaises(TypeError, test_positive_Variable) @@ -146,30 +145,30 @@ class TestNpairLossOpError(unittest.TestCase): def test_anchor_type(): # dtype must be float32 or float64 - anchor_data1 = fluid.data( - name='anchor1', shape=[2, 4], dtype='int32') - fluid.layers.npair_loss( - anchor=anchor_data, - positive=positive_data, - labels=labels_np) + anchor_data1 = fluid.data(name='anchor1', + shape=[2, 4], + dtype='int32') + fluid.layers.npair_loss(anchor=anchor_data, + positive=positive_data, + labels=labels_np) def test_positive_type(): # dtype must be float32 or float64 - positive_data1 = fluid.data( - name='positive1', shape=[2, 4], dtype='int32') - fluid.layers.npair_loss( - anchor=anchor_data, - positive=positive_data1, - labels=labels_np) + positive_data1 = fluid.data(name='positive1', + shape=[2, 4], + dtype='int32') + fluid.layers.npair_loss(anchor=anchor_data, + positive=positive_data1, + labels=labels_np) def test_labels_type(): # dtype must be float32 or float64 - labels_data1 = fluid.data( - name='labels1', shape=[2], dtype='int32') - fluid.layers.npair_loss( - anchor=anchor_data, - positive=positive_data, - labels=labels_data1) + labels_data1 = fluid.data(name='labels1', + shape=[2], + dtype='int32') + fluid.layers.npair_loss(anchor=anchor_data, + positive=positive_data, + labels=labels_data1) self.assertRaises(TypeError, test_anchor_type) self.assertRaises(TypeError, test_positive_type) diff --git a/python/paddle/fluid/tests/unittests/test_number_count_op.py b/python/paddle/fluid/tests/unittests/test_number_count_op.py index bb09b8c6512..50ddde7a4dd 100644 --- a/python/paddle/fluid/tests/unittests/test_number_count_op.py +++ b/python/paddle/fluid/tests/unittests/test_number_count_op.py @@ -38,6 +38,7 @@ def count(x, upper_num): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestNumberCountOpInt64(op_test.OpTest): + def setUp(self): upper_num = 16 self.op_type = "number_count" @@ -53,10 +54,11 @@ class TestNumberCountOpInt64(op_test.OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestNumberCountAPI(unittest.TestCase): + def setUp(self): self.upper_num = 320 - self.x = np.random.randint( - -1, self.upper_num, size=(6000, 200)).astype('int64') + self.x = np.random.randint(-1, self.upper_num, + size=(6000, 200)).astype('int64') self.out = count(self.x, self.upper_num) self.place = paddle.CUDAPlace(0) diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py index d3b95097957..9ab24dea565 100644 --- a/python/paddle/fluid/tests/unittests/test_numel_op.py +++ b/python/paddle/fluid/tests/unittests/test_numel_op.py @@ -24,11 +24,14 @@ import paddle class TestNumelOp(OpTest): + def setUp(self): self.op_type = "size" self.init() x = np.random.random((self.shape)).astype("float64") - self.inputs = {'Input': x, } + self.inputs = { + 'Input': x, + } self.outputs = {'Out': np.array([np.size(x)])} def test_check_output(self): @@ -39,16 +42,19 @@ class TestNumelOp(OpTest): class TestNumelOp1(TestNumelOp): + def init(self): self.shape = (11, 66) class TestNumelOp2(TestNumelOp): + def init(self): self.shape = (0, ) class TestNumelAPI(unittest.TestCase): + def test_numel_static(self): main_program = fluid.Program() startup_program = fluid.Program() @@ -67,10 +73,12 @@ class TestNumelAPI(unittest.TestCase): "x_2": input_2, }, fetch_list=[out_1, out_2]) - assert (np.array_equal( - res_1, np.array([np.size(input_1)]).astype("int64"))) - assert (np.array_equal( - res_2, np.array([np.size(input_2)]).astype("int64"))) + assert (np.array_equal(res_1, + np.array([np.size(input_1) + ]).astype("int64"))) + assert (np.array_equal(res_2, + np.array([np.size(input_2) + ]).astype("int64"))) def test_numel_imperative(self): paddle.disable_static(paddle.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_op.py index e1da94c1219..5caede6295b 100644 --- a/python/paddle/fluid/tests/unittests/test_one_hot_op.py +++ b/python/paddle/fluid/tests/unittests/test_one_hot_op.py @@ -26,6 +26,7 @@ from paddle.fluid.framework import Program, program_guard class TestOneHotOp(OpTest): + def setUp(self): self.op_type = 'one_hot' depth = 10 @@ -50,6 +51,7 @@ class TestOneHotOp(OpTest): class TestOneHotOp_attr(OpTest): + def setUp(self): self.op_type = 'one_hot' depth = 10 @@ -73,6 +75,7 @@ class TestOneHotOp_attr(OpTest): class TestOneHotOp_default_dtype(OpTest): + def setUp(self): self.op_type = 'one_hot' depth = 10 @@ -97,6 +100,7 @@ class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype_attr(OpTest): + def setUp(self): self.op_type = 'one_hot' depth = 10 @@ -120,6 +124,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_out_of_range(OpTest): + def setUp(self): self.op_type = 'one_hot' depth = 10 @@ -139,6 +144,7 @@ class TestOneHotOp_out_of_range(OpTest): class TestOneHotOp_exception(unittest.TestCase): + def setUp(self): self.op_type = 'one_hot' self.depth = 10 @@ -154,18 +160,18 @@ class TestOneHotOp_exception(unittest.TestCase): def test_check_output(self): program = Program() with program_guard(program): - x = fluid.layers.data( - name='x', shape=[self.dimension], dtype='float32', lod_level=1) + x = fluid.layers.data(name='x', + shape=[self.dimension], + dtype='float32', + lod_level=1) block = program.current_block() - one_hot_out = block.create_var( - name="one_hot_out", - type=core.VarDesc.VarType.LOD_TENSOR, - dtype='float32') - block.append_op( - type='one_hot', - inputs={'X': x}, - attrs={'depth': self.depth}, - outputs={'Out': one_hot_out}) + one_hot_out = block.create_var(name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op(type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) exe = fluid.Executor(self.place) def run(): @@ -177,24 +183,23 @@ class TestOneHotOp_exception(unittest.TestCase): class TestOneHotOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input must be Variable in_w = np.random.random((4, 1)).astype("int32") self.assertRaises(TypeError, fluid.layers.one_hot, in_w) # the input must be int32 or int 64 - in_w2 = fluid.layers.data( - name="in_w2", - shape=[4, 1], - append_batch_size=False, - dtype="float32") + in_w2 = fluid.layers.data(name="in_w2", + shape=[4, 1], + append_batch_size=False, + dtype="float32") self.assertRaises(TypeError, fluid.layers.one_hot, in_w2) # the depth must be int, long or Variable - in_r = fluid.layers.data( - name="in_r", - shape=[4, 1], - append_batch_size=False, - dtype="int32") + in_r = fluid.layers.data(name="in_r", + shape=[4, 1], + append_batch_size=False, + dtype="int32") depth_w = np.array([4]) self.assertRaises(TypeError, fluid.layers.one_hot, in_r, 4.1) self.assertRaises(TypeError, fluid.layers.one_hot, in_r, depth_w) diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py index b16c4b5ce69..14ea523fb29 100644 --- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py @@ -26,6 +26,7 @@ from paddle.fluid.framework import Program, program_guard, _test_eager_guard class TestOneHotOp(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -49,6 +50,7 @@ class TestOneHotOp(OpTest): class TestOneHotOp_attr(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -72,6 +74,7 @@ class TestOneHotOp_attr(OpTest): class TestOneHotOp_default_dtype(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -95,6 +98,7 @@ class TestOneHotOp_default_dtype(OpTest): class TestOneHotOp_default_dtype_attr(OpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -118,6 +122,7 @@ class TestOneHotOp_default_dtype_attr(OpTest): class TestOneHotOp_exception(unittest.TestCase): + def setUp(self): self.op_type = 'one_hot_v2' self.depth = 10 @@ -133,18 +138,18 @@ class TestOneHotOp_exception(unittest.TestCase): def test_check_output(self): program = Program() with program_guard(program): - x = fluid.layers.data( - name='x', shape=[self.dimension], dtype='float32', lod_level=1) + x = fluid.layers.data(name='x', + shape=[self.dimension], + dtype='float32', + lod_level=1) block = program.current_block() - one_hot_out = block.create_var( - name="one_hot_out", - type=core.VarDesc.VarType.LOD_TENSOR, - dtype='float32') - block.append_op( - type='one_hot', - inputs={'X': x}, - attrs={'depth': self.depth}, - outputs={'Out': one_hot_out}) + one_hot_out = block.create_var(name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op(type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) exe = fluid.Executor(self.place) def run(): @@ -156,6 +161,7 @@ class TestOneHotOp_exception(unittest.TestCase): class TestOneHotOpApi(unittest.TestCase): + def test_api(self): depth = 10 self._run(depth) @@ -188,21 +194,23 @@ class TestOneHotOpApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'label': label_data, }, + ret = exe.run(feed={ + 'label': label_data, + }, fetch_list=[one_hot_label], return_numpy=False) class BadInputTestOnehotV2(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): def test_bad_x(): - label = fluid.layers.data( - name="label", - shape=[4], - append_batch_size=False, - dtype="float32") + label = fluid.layers.data(name="label", + shape=[4], + append_batch_size=False, + dtype="float32") one_hot_label = fluid.one_hot(input=label, depth=4) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/test_ones_like.py b/python/paddle/fluid/tests/unittests/test_ones_like.py index db7fc9d2b2e..0c6e2476be3 100644 --- a/python/paddle/fluid/tests/unittests/test_ones_like.py +++ b/python/paddle/fluid/tests/unittests/test_ones_like.py @@ -23,6 +23,7 @@ from paddle.fluid import core, Program, program_guard class TestOnesLikeAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x = paddle.fluid.data('x', [3, 4]) @@ -30,6 +31,7 @@ class TestOnesLikeAPIError(unittest.TestCase): class TestOnesLikeAPI(unittest.TestCase): + def test_api(self): shape = [3, 4] startup_program = Program() @@ -44,8 +46,8 @@ class TestOnesLikeAPI(unittest.TestCase): out4 = ones_like(x, 'int32') out5 = ones_like(x, 'int64') - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) outs = exe.run(train_program, feed={'X': np.ones(shape).astype('float32')}, @@ -58,10 +60,11 @@ class TestOnesLikeAPI(unittest.TestCase): class TestOnesLikeImpeartive(unittest.TestCase): + def test_out(self): shape = [3, 4] - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() paddle.disable_static(place) x = paddle.to_tensor(np.ones(shape)) for dtype in [np.bool, np.float32, np.float64, np.int32, np.int64]: diff --git a/python/paddle/fluid/tests/unittests/test_ones_op.py b/python/paddle/fluid/tests/unittests/test_ones_op.py index 47ce3796432..93cda220496 100644 --- a/python/paddle/fluid/tests/unittests/test_ones_op.py +++ b/python/paddle/fluid/tests/unittests/test_ones_op.py @@ -26,6 +26,7 @@ import numpy as np class ApiOnesTest(unittest.TestCase): + def test_paddle_ones(self): with paddle.static.program_guard(paddle.static.Program()): ones = paddle.ones(shape=[10]) @@ -62,7 +63,9 @@ class ApiOnesTest(unittest.TestCase): class ApiOnesZerosError(unittest.TestCase): + def test_errors(self): + def test_error1(): with paddle.static.program_guard(paddle.static.Program()): ones = paddle.ones(shape=10, dtype="int64") diff --git a/python/paddle/fluid/tests/unittests/test_onnx_export.py b/python/paddle/fluid/tests/unittests/test_onnx_export.py index 07016d42901..06375125232 100644 --- a/python/paddle/fluid/tests/unittests/test_onnx_export.py +++ b/python/paddle/fluid/tests/unittests/test_onnx_export.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from paddle.fluid.framework import in_dygraph_mode, _test_eager_guard class LinearNet(paddle.nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = paddle.nn.Linear(128, 10) @@ -34,6 +35,7 @@ class LinearNet(paddle.nn.Layer): class Logic(paddle.nn.Layer): + def __init__(self): super(Logic, self).__init__() @@ -45,9 +47,10 @@ class Logic(paddle.nn.Layer): class TestExportWithTensor(unittest.TestCase): + def func_with_tensor(self): - self.x_spec = paddle.static.InputSpec( - shape=[None, 128], dtype='float32') + self.x_spec = paddle.static.InputSpec(shape=[None, 128], + dtype='float32') model = LinearNet() paddle.onnx.export(model, 'linear_net', input_spec=[self.x_spec]) @@ -58,6 +61,7 @@ class TestExportWithTensor(unittest.TestCase): class TestExportWithTensor1(unittest.TestCase): + def func_with_tensor(self): self.x = paddle.to_tensor(np.random.random((1, 128))) model = LinearNet() @@ -70,14 +74,17 @@ class TestExportWithTensor1(unittest.TestCase): class TestExportPrunedGraph(unittest.TestCase): + def func_prune_graph(self): model = Logic() self.x = paddle.to_tensor(np.array([1])) self.y = paddle.to_tensor(np.array([-1])) paddle.jit.to_static(model) out = model(self.x, self.y, z=True) - paddle.onnx.export( - model, 'pruned', input_spec=[self.x], output_spec=[out]) + paddle.onnx.export(model, + 'pruned', + input_spec=[self.x], + output_spec=[out]) def test_prune_graph(self): # test eager diff --git a/python/paddle/fluid/tests/unittests/test_op_function_generator.py b/python/paddle/fluid/tests/unittests/test_op_function_generator.py index eeaf1a012ad..e97895cf8bb 100644 --- a/python/paddle/fluid/tests/unittests/test_op_function_generator.py +++ b/python/paddle/fluid/tests/unittests/test_op_function_generator.py @@ -25,6 +25,7 @@ from paddle import _C_ops class TestTracedLayer(fluid.dygraph.Layer): + def __init__(self, name_scope): super(TestTracedLayer, self).__init__(name_scope) @@ -33,6 +34,7 @@ class TestTracedLayer(fluid.dygraph.Layer): class TestVariable(unittest.TestCase): + def setUp(self): self.shape = [512, 768] self.dtype = np.float32 diff --git a/python/paddle/fluid/tests/unittests/test_op_name_conflict.py b/python/paddle/fluid/tests/unittests/test_op_name_conflict.py index ee8f0c2cd29..86f82f0a9e2 100644 --- a/python/paddle/fluid/tests/unittests/test_op_name_conflict.py +++ b/python/paddle/fluid/tests/unittests/test_op_name_conflict.py @@ -18,6 +18,7 @@ import unittest class TestOpNameConflict(unittest.TestCase): + def test_conflict(self): main = fluid.Program() startup = fluid.Program() @@ -53,12 +54,16 @@ class TestOpNameConflict(unittest.TestCase): ) else fluid.CPUPlace() exe = fluid.Executor(place) - data = fluid.data( - name='data', shape=[None, 1, 2, 2], dtype='float32') - tensor = fluid.data( - name='tensor', shape=[None, 32, 64], dtype='float32') - x = fluid.data( - name='x', shape=[None, 1], dtype='float32', lod_level=1) + data = fluid.data(name='data', + shape=[None, 1, 2, 2], + dtype='float32') + tensor = fluid.data(name='tensor', + shape=[None, 32, 64], + dtype='float32') + x = fluid.data(name='x', + shape=[None, 1], + dtype='float32', + lod_level=1) input_scale = fluid.layers.create_parameter( shape=[1], @@ -68,10 +73,12 @@ class TestOpNameConflict(unittest.TestCase): shape=[1], dtype="float32", default_initializer=fluid.initializer.Constant(0.5)) - out_affine = fluid.layers.affine_channel( - data, scale=input_scale, bias=input_bias) - out_similarity = fluid.layers.similarity_focus( - input=data, axis=1, indexes=[0]) + out_affine = fluid.layers.affine_channel(data, + scale=input_scale, + bias=input_bias) + out_similarity = fluid.layers.similarity_focus(input=data, + axis=1, + indexes=[0]) position_tensor = fluid.layers.add_position_encoding( input=tensor, alpha=1.0, beta=1.0) x_reversed = fluid.layers.sequence_reverse(x) @@ -82,17 +89,20 @@ class TestOpNameConflict(unittest.TestCase): x_d = fluid.create_lod_tensor( np.array([[1.1], [2.2], [3.3], [4.4]]).astype('float32'), [[1, 3]], place) - outs = exe.run( - test_program, - fetch_list=[ - out_affine, out_similarity, position_tensor, x_reversed - ], - feed={ - data.name: np.ones([1, 1, 2, 2]).astype('float32'), - tensor.name: np.ones([1, 32, 64]).astype('float32'), - x.name: x_d - }, - return_numpy=False) + outs = exe.run(test_program, + fetch_list=[ + out_affine, out_similarity, position_tensor, + x_reversed + ], + feed={ + data.name: + np.ones([1, 1, 2, 2]).astype('float32'), + tensor.name: + np.ones([1, 32, 64]).astype('float32'), + x.name: + x_d + }, + return_numpy=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_op_support_gpu.py b/python/paddle/fluid/tests/unittests/test_op_support_gpu.py index e203fccd03f..dba0577a101 100644 --- a/python/paddle/fluid/tests/unittests/test_op_support_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_op_support_gpu.py @@ -19,6 +19,7 @@ import paddle.fluid.core as core class TestOpSupportGPU(unittest.TestCase): + def test_case(self): self.assertEqual(core.is_compiled_with_cuda(), core.op_support_gpu("sum")) diff --git a/python/paddle/fluid/tests/unittests/test_op_version.py b/python/paddle/fluid/tests/unittests/test_op_version.py index 1d7167955ac..87f12a0bc6e 100644 --- a/python/paddle/fluid/tests/unittests/test_op_version.py +++ b/python/paddle/fluid/tests/unittests/test_op_version.py @@ -21,6 +21,7 @@ import paddle.fluid as fluid class OpLastCheckpointCheckerTest(unittest.TestCase): + def __init__(self, methodName='runTest'): super(OpLastCheckpointCheckerTest, self).__init__(methodName) self.checker = utils.OpLastCheckpointChecker() @@ -51,6 +52,7 @@ class OpLastCheckpointCheckerTest(unittest.TestCase): class OpVersionTest(unittest.TestCase): + def __init__(self, methodName='runTest'): super(OpVersionTest, self).__init__(methodName) self.vmap = fluid.core.get_op_version_map() diff --git a/python/paddle/fluid/tests/unittests/test_operator.py b/python/paddle/fluid/tests/unittests/test_operator.py index 544fca8cecd..a3ca52f78c9 100644 --- a/python/paddle/fluid/tests/unittests/test_operator.py +++ b/python/paddle/fluid/tests/unittests/test_operator.py @@ -21,6 +21,7 @@ import paddle.fluid.proto.framework_pb2 as framework_pb2 class TestGetAllProtos(unittest.TestCase): + def test_all(self): all_protos = op.get_all_op_protos() self.assertNotEqual(0, len(all_protos)) @@ -30,6 +31,7 @@ class TestGetAllProtos(unittest.TestCase): class TestOpDescCreationMethod(unittest.TestCase): + def test_plain_input_output(self): op_proto = framework_pb2.OpProto() op_proto.type = "test" @@ -110,8 +112,10 @@ class TestOpDescCreationMethod(unittest.TestCase): expected1.type = 'fc' self.assertEqual(expected1, generated1) - generated2 = method( - X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y') + generated2 = method(X=['x1', 'x2', 'x3'], + b='b', + W=['w1', 'w2', 'w3'], + Y='y') expected2 = framework_pb2.OpDesc() tmp = expected2.inputs.add() @@ -158,14 +162,13 @@ class TestOpDescCreationMethod(unittest.TestCase): method = op.OpDescCreationMethod(op_proto) - generated = method( - X="a", - int_attr=10, - float_attr=3.2, - string_attr="test_str", - ints_attr=[0, 1, 2, 3, 4], - floats_attr=[0.2, 3.2, 4.5], - strings_attr=["a", "b", "c"]) + generated = method(X="a", + int_attr=10, + float_attr=3.2, + string_attr="test_str", + ints_attr=[0, 1, 2, 3, 4], + floats_attr=[0.2, 3.2, 4.5], + strings_attr=["a", "b", "c"]) expected = framework_pb2.OpDesc() expected.type = "test" @@ -208,6 +211,7 @@ class TestOpDescCreationMethod(unittest.TestCase): class TestOpCreations(unittest.TestCase): + def test_all(self): add_op = op.Operator("sum", X=["a", "b"], Out="z") self.assertIsNotNone(add_op) diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index 17eeedc5244..3c0871cfc82 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -25,6 +25,7 @@ main_program = default_startup_program() class TestOperator(unittest.TestCase): + def test_error_type(self): block = main_program._create_block() try: @@ -45,18 +46,25 @@ class TestOperator(unittest.TestCase): def test_op_desc_creation(self): program = Program() block = program.current_block() - mul_x = block.create_var( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - mul_op = block.append_op( - type="mul", - inputs={"X": [mul_x], - "Y": mul_y}, - outputs={"Out": [mul_out]}, - attrs={"x_num_col_dims": 1}) + mul_x = block.create_var(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x") + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + mul_op = block.append_op(type="mul", + inputs={ + "X": [mul_x], + "Y": mul_y + }, + outputs={"Out": [mul_out]}, + attrs={"x_num_col_dims": 1}) self.assertNotEqual(str(mul_op), "") self.assertEqual(mul_op.type, "mul") @@ -87,18 +95,25 @@ class TestOperator(unittest.TestCase): def test_mult_input(self): program = Program() block = program.current_block() - sum_x1 = block.create_var( - dtype="int", shape=[3, 4], lod_level=0, name="sum.x1") - sum_x2 = block.create_var( - dtype="int", shape=[3, 4], lod_level=0, name="sum.x2") - sum_x3 = block.create_var( - dtype="int", shape=[3, 4], lod_level=0, name="sum.x3") - sum_out = block.create_var( - dtype="int", shape=[3, 4], lod_level=0, name="sum.out") - sum_op = block.append_op( - type="sum", - inputs={"X": [sum_x1, sum_x2, sum_x3]}, - outputs={"Out": sum_out}) + sum_x1 = block.create_var(dtype="int", + shape=[3, 4], + lod_level=0, + name="sum.x1") + sum_x2 = block.create_var(dtype="int", + shape=[3, 4], + lod_level=0, + name="sum.x2") + sum_x3 = block.create_var(dtype="int", + shape=[3, 4], + lod_level=0, + name="sum.x3") + sum_out = block.create_var(dtype="int", + shape=[3, 4], + lod_level=0, + name="sum.out") + sum_op = block.append_op(type="sum", + inputs={"X": [sum_x1, sum_x2, sum_x3]}, + outputs={"Out": sum_out}) self.assertEqual(sum_op.type, "sum") self.assertEqual(sum_op.input_names, ["X"]) self.assertEqual(sum_op.input("X"), ["sum.x1", "sum.x2", "sum.x3"]) diff --git a/python/paddle/fluid/tests/unittests/test_ops_nms.py b/python/paddle/fluid/tests/unittests/test_ops_nms.py index c0bbe82d358..54ea804cdbd 100644 --- a/python/paddle/fluid/tests/unittests/test_ops_nms.py +++ b/python/paddle/fluid/tests/unittests/test_ops_nms.py @@ -70,6 +70,7 @@ def gen_args(num_boxes, dtype): class TestOpsNMS(unittest.TestCase): + def setUp(self): self.num_boxes = 64 self.threshold = 0.5 @@ -85,11 +86,11 @@ class TestOpsNMS(unittest.TestCase): boxes, scores, category_idxs, categories = gen_args( self.num_boxes, dtype) paddle.set_device(device) - out = paddle.vision.ops.nms( - paddle.to_tensor(boxes), self.threshold, - paddle.to_tensor(scores)) - out = paddle.vision.ops.nms( - paddle.to_tensor(boxes), self.threshold) + out = paddle.vision.ops.nms(paddle.to_tensor(boxes), + self.threshold, + paddle.to_tensor(scores)) + out = paddle.vision.ops.nms(paddle.to_tensor(boxes), + self.threshold) out_py = nms(boxes, self.threshold) self.assertTrue( @@ -102,10 +103,11 @@ class TestOpsNMS(unittest.TestCase): boxes, scores, category_idxs, categories = gen_args( self.num_boxes, dtype) paddle.set_device(device) - out = paddle.vision.ops.nms( - paddle.to_tensor(boxes), self.threshold, - paddle.to_tensor(scores), - paddle.to_tensor(category_idxs), categories, self.topk) + out = paddle.vision.ops.nms(paddle.to_tensor(boxes), + self.threshold, + paddle.to_tensor(scores), + paddle.to_tensor(category_idxs), + categories, self.topk) out_py = multiclass_nms(boxes, scores, category_idxs, self.threshold, self.topk) @@ -119,10 +121,12 @@ class TestOpsNMS(unittest.TestCase): paddle.enable_static() boxes, scores, category_idxs, categories = gen_args( self.num_boxes, dtype) - boxes_static = paddle.static.data( - shape=boxes.shape, dtype=boxes.dtype, name="boxes") - scores_static = paddle.static.data( - shape=scores.shape, dtype=scores.dtype, name="scores") + boxes_static = paddle.static.data(shape=boxes.shape, + dtype=boxes.dtype, + name="boxes") + scores_static = paddle.static.data(shape=scores.shape, + dtype=scores.dtype, + name="scores") category_idxs_static = paddle.static.data( shape=category_idxs.shape, dtype=category_idxs.dtype, @@ -175,15 +179,17 @@ class TestOpsNMS(unittest.TestCase): fun, path, input_spec=[ - paddle.static.InputSpec( - shape=[None, 4], dtype='float32', name='x') - ], ) + paddle.static.InputSpec(shape=[None, 4], + dtype='float32', + name='x') + ], + ) load_func = paddle.jit.load(path) res = load_func(paddle.to_tensor(boxes)) self.assertTrue( np.array_equal(origin, res), - "origin out: {}\n inference model out: {}\n".format(origin, - res)) + "origin out: {}\n inference model out: {}\n".format( + origin, res)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index a0c5ce77f1d..b70b69ca97c 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -28,35 +28,45 @@ from paddle.fluid.framework import _test_eager_guard import paddle from paddle.io import Dataset import numpy + paddle.enable_static() class TestOptimizer(unittest.TestCase): + def test_sgd_optimizer(self): + def check_sgd_optimizer(optimizer_attr): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr=optimizer_attr) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr=optimizer_attr) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) opts, _ = sgd_optimizer.minimize(mean_out, init_program) return opts @@ -71,31 +81,40 @@ class TestOptimizer(unittest.TestCase): class TestOptimizerBackwardApplygrad(unittest.TestCase): + def test_sgd_optimizer(self): + def check_sgd_optimizer(optimizer_attr): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr=optimizer_attr) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr=optimizer_attr) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) with framework.program_guard(program, init_program): p_g = sgd_optimizer.backward(mean_out) @@ -112,7 +131,9 @@ class TestOptimizerBackwardApplygrad(unittest.TestCase): class TestMomentumOptimizer(unittest.TestCase): + class MockMomentum(optimizer.MomentumOptimizer): + def get_accumulators(self): return self._accumulators @@ -123,29 +144,36 @@ class TestMomentumOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) learning_rate = 0.01 - momentum_optimizer = self.MockMomentum( - learning_rate=learning_rate, momentum=0.2) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + momentum_optimizer = self.MockMomentum(learning_rate=learning_rate, + momentum=0.2) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) @@ -176,29 +204,37 @@ class TestMomentumOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) learning_rate = 0.01 - momentum_optimizer = self.MockMomentum( - learning_rate=learning_rate, momentum=0.2, use_nesterov=True) + momentum_optimizer = self.MockMomentum(learning_rate=learning_rate, + momentum=0.2, + use_nesterov=True) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) @@ -227,7 +263,9 @@ class TestMomentumOptimizer(unittest.TestCase): class TestAdagradOptimizer(unittest.TestCase): + class MockAdagrad(optimizer.AdagradOptimizer): + def get_accumulators(self): return self._accumulators @@ -238,29 +276,36 @@ class TestAdagradOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) learning_rate = 0.01 - adagrad_optimizer = self.MockAdagrad( - learning_rate=learning_rate, epsilon=1.0e-6) + adagrad_optimizer = self.MockAdagrad(learning_rate=learning_rate, + epsilon=1.0e-6) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) @@ -287,7 +332,9 @@ class TestAdagradOptimizer(unittest.TestCase): class TestAdamOptimizer(unittest.TestCase): + class MockAdam(optimizer.AdamOptimizer): + def get_accumulators(self): return self._accumulators @@ -301,29 +348,37 @@ class TestAdamOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) learning_rate = 0.01 - adam_optimizer = self.MockAdam( - learning_rate=learning_rate, beta1=0.9, beta2=0.999) + adam_optimizer = self.MockAdam(learning_rate=learning_rate, + beta1=0.9, + beta2=0.999) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) @@ -352,7 +407,9 @@ class TestAdamOptimizer(unittest.TestCase): class TestAdamaxOptimizer(unittest.TestCase): + class MockAdamax(optimizer.AdamaxOptimizer): + def get_accumulators(self): return self._accumulators @@ -366,29 +423,37 @@ class TestAdamaxOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) learning_rate = 0.01 - adamax_optimizer = self.MockAdamax( - learning_rate=learning_rate, beta1=0.9, beta2=0.999) + adamax_optimizer = self.MockAdamax(learning_rate=learning_rate, + beta1=0.9, + beta2=0.999) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) @@ -417,33 +482,44 @@ class TestAdamaxOptimizer(unittest.TestCase): class TestDpsgdOptimizer(unittest.TestCase): + def test_dpsgd_optimizer(self): + def check_dpsgd_optimizer(optimizer_attr): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr=optimizer_attr) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) - dpsgd_optimizer = optimizer.DpsgdOptimizer( - learning_rate=0.01, clip=100.0, batch_size=16.0, sigma=0.0) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr=optimizer_attr) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) + dpsgd_optimizer = optimizer.DpsgdOptimizer(learning_rate=0.01, + clip=100.0, + batch_size=16.0, + sigma=0.0) opts, _ = dpsgd_optimizer.minimize(mean_out, init_program) return opts @@ -458,7 +534,9 @@ class TestDpsgdOptimizer(unittest.TestCase): class TestDecayedAdagradOptimizer(unittest.TestCase): + class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer): + def get_accumulators(self): return self._accumulators @@ -469,26 +547,33 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) learning_rate = 0.01 decayed_adagrad_optimizer = self.MockDecayedAdagrad( learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) @@ -519,7 +604,9 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): class TestFtrlOptimizer(unittest.TestCase): + class MockFtrl(optimizer.FtrlOptimizer): + def get_accumulators(self): return self._accumulators @@ -533,29 +620,38 @@ class TestFtrlOptimizer(unittest.TestCase): init_program = framework.Program() program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) learning_rate = 0.01 - ftrl_optimizer = self.MockFtrl( - learning_rate=learning_rate, l1=0.0, l2=0.0, lr_power=-0.5) + ftrl_optimizer = self.MockFtrl(learning_rate=learning_rate, + l1=0.0, + l2=0.0, + lr_power=-0.5) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) @@ -584,34 +680,44 @@ class TestFtrlOptimizer(unittest.TestCase): class TestLookaheadOptimizer(unittest.TestCase): + def test_lookahead_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() init_block = init_program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr={'learning_rate': 1.1}) - init_mul_x = init_block.create_parameter( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + init_mul_x = init_block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x") + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) sgd = optimizer.SGD(learning_rate=0.01) lookahead = optimizer.LookaheadOptimizer(sgd, alpha=0.5, k=5) @@ -622,83 +728,109 @@ class TestLookaheadOptimizer(unittest.TestCase): class TestRecomputeOptimizer(unittest.TestCase): + def net(self, return_input=False, with_dropout=False, with_seed=False): program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x") + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") if with_dropout is True: - mul_out_drop = block.create_var( - dtype="float32", - shape=[5, 8], - lod_level=0, - name="mul.out.dropout") - mul_out_mask = block.create_var( - dtype="uint8", shape=[5, 8], lod_level=0, name="mul.out.mask") + mul_out_drop = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out.dropout") + mul_out_mask = block.create_var(dtype="uint8", + shape=[5, 8], + lod_level=0, + name="mul.out.mask") if with_seed is True: - seed_out = block.create_var( - dtype="int32", shape=[1], name="seed.out") - - b1 = block.create_parameter( - dtype="float32", shape=[5, 8], lod_level=0, name="b1") - b1_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="b1_out") - b2 = block.create_parameter( - dtype="float32", shape=[5, 8], lod_level=0, name="b2") - b2_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="b2_out") - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) + seed_out = block.create_var(dtype="int32", + shape=[1], + name="seed.out") + + b1 = block.create_parameter(dtype="float32", + shape=[5, 8], + lod_level=0, + name="b1") + b1_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="b1_out") + b2 = block.create_parameter(dtype="float32", + shape=[5, 8], + lod_level=0, + name="b2") + b2_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="b2_out") + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) if with_dropout is True: dropout_inputs = {'X': [mul_out]} if with_seed is True: - block.append_op( - type='seed', - outputs={'Out': seed_out}, - attrs={ - 'deterministic': True, - 'rng_name': 'rng0', - 'force_cpu': True - }) + block.append_op(type='seed', + outputs={'Out': seed_out}, + attrs={ + 'deterministic': True, + 'rng_name': 'rng0', + 'force_cpu': True + }) dropout_inputs = {'X': [mul_out], 'Seed': [seed_out]} - block.append_op( - type='dropout', - inputs=dropout_inputs, - outputs={'Out': [mul_out_drop], - 'Mask': [mul_out_mask]}, - attrs={'dropout_prob': 0.5, }) - block.append_op( - type="elementwise_add", - inputs={"X": mul_out_drop, - "Y": b1}, - outputs={"Out": b1_out}) + block.append_op(type='dropout', + inputs=dropout_inputs, + outputs={ + 'Out': [mul_out_drop], + 'Mask': [mul_out_mask] + }, + attrs={ + 'dropout_prob': 0.5, + }) + block.append_op(type="elementwise_add", + inputs={ + "X": mul_out_drop, + "Y": b1 + }, + outputs={"Out": b1_out}) else: - block.append_op( - type="elementwise_add", - inputs={"X": mul_out, - "Y": b1}, - outputs={"Out": b1_out}) - - block.append_op( - type="elementwise_add", - inputs={"X": b1_out, - "Y": b2}, - outputs={"Out": b2_out}) - block.append_op( - type="mean", inputs={"X": b2_out}, outputs={"Out": mean_out}) + block.append_op(type="elementwise_add", + inputs={ + "X": mul_out, + "Y": b1 + }, + outputs={"Out": b1_out}) + + block.append_op(type="elementwise_add", + inputs={ + "X": b1_out, + "Y": b2 + }, + outputs={"Out": b2_out}) + block.append_op(type="mean", + inputs={"X": b2_out}, + outputs={"Out": mean_out}) if return_input == True: return mul_x, mul_out, b1_out, b2_out, mean_out @@ -832,11 +964,10 @@ class TestRecomputeOptimizer(unittest.TestCase): recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) # apply backward - params_grads = recompute_optimizer.backward( - mean_out, - startup_program=None, - parameter_list=None, - no_grad_set=None) + params_grads = recompute_optimizer.backward(mean_out, + startup_program=None, + parameter_list=None, + no_grad_set=None) # apply gradient program = mean_out.block.program @@ -916,13 +1047,13 @@ class TestRecomputeOptimizer(unittest.TestCase): def gen_data(): return { "x": np.random.random(size=(100, 3)).astype('float32'), - "y": np.random.randint( - 2, size=(100, 1)).astype('int64') + "y": np.random.randint(2, size=(100, 1)).astype('int64') } def mlp(input_x, input_y): - drop_res = fluid.layers.dropout( - input_x, dropout_prob=0.5, name="dropout_with_seed_cpu") + drop_res = fluid.layers.dropout(input_x, + dropout_prob=0.5, + name="dropout_with_seed_cpu") prediction = fluid.layers.fc(input=[drop_res], size=2, act='softmax') @@ -935,8 +1066,9 @@ class TestRecomputeOptimizer(unittest.TestCase): scope = fluid.Scope() with fluid.scope_guard(scope): with program_guard(main_program, startup_program): - input_x = fluid.layers.data( - name="x", shape=[3], dtype='float32') + input_x = fluid.layers.data(name="x", + shape=[3], + dtype='float32') input_y = fluid.layers.data(name="y", shape=[1], dtype='int64') drop_res, prediction, cost = mlp(input_x, input_y) sgd = fluid.optimizer.Adam(learning_rate=0.01) @@ -960,6 +1092,7 @@ class TestRecomputeOptimizer(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestRecomputeOptimizerCUDA(unittest.TestCase): + def test_dropout_with_seed(self): """ when we recompute a dropout op, make sure that the recomputed one @@ -969,13 +1102,13 @@ class TestRecomputeOptimizerCUDA(unittest.TestCase): def gen_data(): return { "x": np.random.random(size=(100, 3)).astype('float32'), - "y": np.random.randint( - 2, size=(100, 1)).astype('int64') + "y": np.random.randint(2, size=(100, 1)).astype('int64') } def mlp(input_x, input_y): - drop_res = fluid.layers.dropout( - input_x, dropout_prob=0.5, name="dropout_with_seed_gpu") + drop_res = fluid.layers.dropout(input_x, + dropout_prob=0.5, + name="dropout_with_seed_gpu") prediction = fluid.layers.fc(input=[drop_res], size=2, act='softmax') @@ -988,8 +1121,9 @@ class TestRecomputeOptimizerCUDA(unittest.TestCase): scope = fluid.Scope() with fluid.scope_guard(scope): with program_guard(main_program, startup_program): - input_x = fluid.layers.data( - name="x", shape=[3], dtype='float32') + input_x = fluid.layers.data(name="x", + shape=[3], + dtype='float32') input_y = fluid.layers.data(name="y", shape=[1], dtype='int64') drop_res, prediction, cost = mlp(input_x, input_y) sgd = fluid.optimizer.Adam(learning_rate=0.01) @@ -1011,34 +1145,50 @@ class TestRecomputeOptimizerCUDA(unittest.TestCase): class TestGradientMergeOptimizer(unittest.TestCase): + def net(self): program = framework.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - b1 = block.create_parameter( - dtype="float32", shape=[5, 8], lod_level=0, name="b1") - b1_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="b1_out") - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - block.append_op( - type="elementwise_add", - inputs={"X": mul_out, - "Y": b1}, - outputs={"Out": b1_out}) - block.append_op( - type="mean", inputs={"X": b1_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x") + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + b1 = block.create_parameter(dtype="float32", + shape=[5, 8], + lod_level=0, + name="b1") + b1_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="b1_out") + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + block.append_op(type="elementwise_add", + inputs={ + "X": mul_out, + "Y": b1 + }, + outputs={"Out": b1_out}) + block.append_op(type="mean", + inputs={"X": b1_out}, + outputs={"Out": mean_out}) return mean_out def test_program_desc(self, ): @@ -1079,9 +1229,9 @@ class TestGradientMergeOptimizer(unittest.TestCase): # optimize block self.assertEqual(len(main_program.block(1).ops), 6) - self.assertEqual([op.type for op in main_program.block(1).ops], [ - 'scale', 'scale', 'sgd', 'sgd', 'fill_constant', 'fill_constant' - ]) + self.assertEqual( + [op.type for op in main_program.block(1).ops], + ['scale', 'scale', 'sgd', 'sgd', 'fill_constant', 'fill_constant']) class TestOptimizerDtype(unittest.TestCase): @@ -1091,7 +1241,9 @@ class TestOptimizerDtype(unittest.TestCase): ''' def check_with_dtype(self, dtype): + class MyLayer(paddle.nn.Layer): + def __init__(self, dtype): super(MyLayer, self).__init__() self._w = self.create_parameter([2, 3], dtype=dtype) @@ -1132,6 +1284,7 @@ class TestMasterWeightSaveForFP16(unittest.TestCase): numpy.random.seed(100) class SimpleNet(paddle.nn.Layer): + def __init__(self, input_size, output_size): super(SimpleNet, self).__init__() self.linears = paddle.nn.LayerList([ @@ -1149,6 +1302,7 @@ class TestMasterWeightSaveForFP16(unittest.TestCase): nums_batch = 10 class RandomDataset(Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -1161,19 +1315,17 @@ class TestMasterWeightSaveForFP16(unittest.TestCase): return self.num_samples dataset = RandomDataset(nums_batch * batch_size) - loader = paddle.io.DataLoader( - dataset, - batch_size=batch_size, - shuffle=False, - drop_last=True, - num_workers=0) + loader = paddle.io.DataLoader(dataset, + batch_size=batch_size, + shuffle=False, + drop_last=True, + num_workers=0) mse = paddle.nn.MSELoss() model = SimpleNet(input_size, output_size) # 定义模型 - optimizer = paddle.optimizer.Momentum( - learning_rate=0.0001, - parameters=model.parameters(), - multi_precision=True) # 定义优化器 + optimizer = paddle.optimizer.Momentum(learning_rate=0.0001, + parameters=model.parameters(), + multi_precision=True) # 定义优化器 scaler = paddle.amp.GradScaler(init_loss_scaling=1024) model = paddle.amp.decorate(models=model, level='O2') diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py index b2b133a6b42..31bbaefd165 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestOptimizerForVarBase(unittest.TestCase): + def setUp(self): self.lr = 0.01 @@ -33,8 +34,9 @@ class TestOptimizerForVarBase(unittest.TestCase): z = x + y - opt = optimizer( - learning_rate=self.lr, parameters=[x], weight_decay=0.01) + opt = optimizer(learning_rate=self.lr, + parameters=[x], + weight_decay=0.01) z.backward() opt.step() diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py index 7caae211b7b..30cfa9f17eb 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py @@ -23,6 +23,7 @@ import paddle.fluid.optimizer as optimizer from paddle.fluid.backward import _append_grad_suffix_ import paddle + paddle.enable_static() np.random.seed(10) @@ -81,21 +82,18 @@ class SimpleNetWithCond(object): param_x = fluid.layers.create_parameter( dtype="float32", shape=self.shape, - attr=fluid.ParamAttr( - learning_rate=self.param_lr, name="param_x"), + attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_x"), default_initializer=fluid.initializer.NumpyArrayInitializer(self.x)) param_y = fluid.layers.create_parameter( dtype="float32", shape=self.shape, - attr=fluid.ParamAttr( - learning_rate=self.param_lr, name="param_y"), + attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_y"), default_initializer=fluid.initializer.NumpyArrayInitializer(self.y)) param_z = fluid.layers.create_parameter( dtype="float32", shape=self.shape, - attr=fluid.ParamAttr( - learning_rate=self.param_lr, name="param_z"), + attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_z"), default_initializer=fluid.initializer.NumpyArrayInitializer(self.z)) sum_xy = fluid.layers.elementwise_add(param_x, param_y, name='sum_xy') @@ -103,18 +101,21 @@ class SimpleNetWithCond(object): useless = fluid.layers.fc(param_x, size=1, name='fc_useless') def cond_true(): - cond_yz = fluid.layers.elementwise_add( - param_y, param_z, name='sum_cond_yz') + cond_yz = fluid.layers.elementwise_add(param_y, + param_z, + name='sum_cond_yz') # param_y will not be updated param_y.stop_gradient = self.y_no_grad - cond_res = fluid.layers.elementwise_add( - cond_yz, param_z, name='sum_cond_true') + cond_res = fluid.layers.elementwise_add(cond_yz, + param_z, + name='sum_cond_true') cond_useless = fluid.layers.elementwise_mul(param_x, param_y) return cond_res def cond_false(): - cond_res = fluid.layers.elementwise_add( - param_y, param_z, name='sum_cond_false') + cond_res = fluid.layers.elementwise_add(param_y, + param_z, + name='sum_cond_false') cond_useless = fluid.layers.elementwise_mul(param_z, param_z) return cond_res @@ -229,17 +230,18 @@ class TestOptimizer(unittest.TestCase): res = exe.run(main_program, fetch_list=fetch_list) gt_grads = test_net._calc_gradient(cond_i) - gt_params = self._apply_optimize(test_net, - gt_grads) + gt_params = self._apply_optimize( + test_net, gt_grads) param_grads = gt_params + gt_grads for i in range(len(res)): - np.testing.assert_allclose(res[i], - param_grads[i]) + np.testing.assert_allclose( + res[i], param_grads[i]) @unittest.skipIf(not fluid.core.supports_bfloat16(), "place does not support BF16 evaluation") class TestSGDOptimizer(TestOptimizer): + def test_optimizer_multiblock_except(self): with self.assertRaisesRegexp(ValueError, "var param_y not in this block"): @@ -256,8 +258,10 @@ class TestAdamOptimizer(TestOptimizer): def setUp(self): self._init_config() beta1, beta2, epsilon = 0.9, 0.999, 1e-8 - self.optimizer = optimizer.AdamOptimizer( - learning_rate=0.01, beta1=beta1, beta2=beta2, epsilon=epsilon) + self.optimizer = optimizer.AdamOptimizer(learning_rate=0.01, + beta1=beta1, + beta2=beta2, + epsilon=epsilon) self.attr = { "beta1": beta1, "beta2": beta2, @@ -282,8 +286,9 @@ class TestAdamOptimizer(TestOptimizer): moment2_out = beta2 * moment2 + (1. - beta2) * np.square(grad) lr = attr['lr'] * np.sqrt(1. - beta2_pow) / (1. - beta1_pow) - param_out = param - lr * (moment1_out / (np.sqrt(moment2_out) + epsilon - * np.sqrt(1 - beta2_pow))) + param_out = param - lr * ( + moment1_out / + (np.sqrt(moment2_out) + epsilon * np.sqrt(1 - beta2_pow))) # update hyper-parameter of optimizer self.param_attr[name]['beta1_pow'] = beta1_pow * beta1 diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py index 2cb6d0be430..40afe9248bf 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py @@ -78,8 +78,8 @@ def static(train_data, def fn_2(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: - loss = layers.softmax_with_cross_entropy( - logits=pred, label=label) + loss = layers.softmax_with_cross_entropy(logits=pred, + label=label) avg_loss = layers.mean(loss, name='mean_softmax_loss') opt.minimize(avg_loss) return avg_loss @@ -96,14 +96,14 @@ def static(train_data, mod_two = layers.elementwise_mod(id, two) == 0 if loss_in_switch: - avg_loss = layers.case([( - mod_two, lambda: fn_1(adam, None, prediction, label))], - lambda: fn_2(sgd, None, prediction, label)) + avg_loss = layers.case( + [(mod_two, lambda: fn_1(adam, None, prediction, label))], + lambda: fn_2(sgd, None, prediction, label)) else: loss_1 = layers.cross_entropy(input=prediction, label=label) avg_loss_1 = layers.mean(loss_1) - loss_2 = layers.softmax_with_cross_entropy( - logits=prediction, label=label) + loss_2 = layers.softmax_with_cross_entropy(logits=prediction, + label=label) avg_loss_2 = layers.mean(loss_2) avg_loss = layers.case([(mod_two, lambda: fn_1(adam, avg_loss_1))], lambda: fn_2(sgd, avg_loss_2)) @@ -127,6 +127,7 @@ def static(train_data, class DygraphLayer(fluid.dygraph.Layer): + def __init__(self): super(DygraphLayer, self).__init__() self.fc_1 = fluid.dygraph.nn.Linear( @@ -136,7 +137,8 @@ class DygraphLayer(fluid.dygraph.Layer): param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( value=0.99)), bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( - value=0.5)), ) + value=0.5)), + ) self.fc_2 = fluid.dygraph.nn.Linear( FC_SIZE, @@ -159,8 +161,8 @@ def dynamic(train_data, use_cuda=False, use_parallel_exe=False): fluid.default_startup_program().random_seed = SEED fluid.default_main_program().random_seed = SEED dy_layer = DygraphLayer() - adam = fluid.optimizer.Adam( - learning_rate=LR, parameter_list=dy_layer.parameters()) + adam = fluid.optimizer.Adam(learning_rate=LR, + parameter_list=dy_layer.parameters()) sgd = fluid.optimizer.SGD(learning_rate=LR, parameter_list=dy_layer.parameters()) @@ -176,8 +178,8 @@ def dynamic(train_data, use_cuda=False, use_parallel_exe=False): loss.backward() adam.minimize(loss) else: - softmax_loss = layers.softmax_with_cross_entropy(prediction, - var_label) + softmax_loss = layers.softmax_with_cross_entropy( + prediction, var_label) loss = layers.mean(softmax_loss) loss.backward() sgd.minimize(loss) @@ -199,8 +201,9 @@ class TestMultiTask(unittest.TestCase): np.random.seed(seed) image_np = np.random.random(size=image_shape).astype('float32') np.random.seed(seed) - label_np = np.random.randint( - low=0, high=CLASS_NUM - 1, size=label_shape).astype('int64') + label_np = np.random.randint(low=0, + high=CLASS_NUM - 1, + size=label_shape).astype('int64') return image_np, label_np def init_train_data(self): @@ -223,13 +226,13 @@ class TestMultiTask(unittest.TestCase): np.allclose(pre_1, pre_2), msg='static prediction is {}\ndynamic prediction is {}'.format( pre_1, pre_2)) - self.assertTrue( - np.allclose(loss_1, loss_2), - msg='static loss is {}\ndynamic loss is {}'.format(loss_1, - loss_2)) + self.assertTrue(np.allclose(loss_1, loss_2), + msg='static loss is {}\ndynamic loss is {}'.format( + loss_1, loss_2)) class TestMultiOptimizersMultiCardsError(unittest.TestCase): + def test_error(self): startup_program = Program() main_program = Program() @@ -270,15 +273,15 @@ class TestMultiOptimizersMultiCardsError(unittest.TestCase): # to use multi cards ** only on CPU ** not GPU to reduce CI time. os.environ['CPU_NUM'] = str(2) - pe_exe = fluid.ParallelExecutor( - use_cuda=use_cuda, - main_program=main_program, - loss_name=avg_loss.name) + pe_exe = fluid.ParallelExecutor(use_cuda=use_cuda, + main_program=main_program, + loss_name=avg_loss.name) num_devices = pe_exe.device_count def not_implemented_error(): pe_exe.run(feed={ - 'X': np.random.random(size=[64, 10]).astype('float32'), + 'X': + np.random.random(size=[64, 10]).astype('float32'), }, fetch_list=[avg_loss.name]) diff --git a/python/paddle/fluid/tests/unittests/test_outer.py b/python/paddle/fluid/tests/unittests/test_outer.py index 2c4d64344cf..cffe8a895c7 100644 --- a/python/paddle/fluid/tests/unittests/test_outer.py +++ b/python/paddle/fluid/tests/unittests/test_outer.py @@ -23,21 +23,26 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class TestMultiplyApi(unittest.TestCase): + def _run_static_graph_case(self, x_data, y_data): with program_guard(Program(), Program()): paddle.enable_static() - x = paddle.static.data( - name='x', shape=x_data.shape, dtype=x_data.dtype) - y = paddle.static.data( - name='y', shape=y_data.shape, dtype=y_data.dtype) + x = paddle.static.data(name='x', + shape=x_data.shape, + dtype=x_data.dtype) + y = paddle.static.data(name='y', + shape=y_data.shape, + dtype=y_data.dtype) res = paddle.outer(x, y) - place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) outs = exe.run(paddle.static.default_main_program(), - feed={'x': x_data, - 'y': y_data}, + feed={ + 'x': x_data, + 'y': y_data + }, fetch_list=[res]) res = outs[0] return res @@ -89,20 +94,18 @@ class TestMultiplyApi(unittest.TestCase): self.assertTrue(np.allclose(res, np.outer(x_data, y_data), rtol=1e4)) # test dynamic computation graph: 2-d array Complex - x_data = np.random.rand(20, - 50).astype(np.float64) + 1J * np.random.rand( - 20, 50).astype(np.float64) - y_data = np.random.rand(50).astype(np.float64) + 1J * np.random.rand( - 50).astype(np.float64) + x_data = np.random.rand(20, 50).astype( + np.float64) + 1J * np.random.rand(20, 50).astype(np.float64) + y_data = np.random.rand(50).astype( + np.float64) + 1J * np.random.rand(50).astype(np.float64) res = self._run_dynamic_graph_case(x_data, y_data) self.assertTrue(np.allclose(res, np.outer(x_data, y_data))) # test dynamic computation graph: 3-d array Complex - x_data = np.random.rand(5, 10, - 10).astype(np.float64) + 1J * np.random.rand( - 5, 10, 10).astype(np.float64) - y_data = np.random.rand(2, 10).astype(np.float64) + 1J * np.random.rand( - 2, 10).astype(np.float64) + x_data = np.random.rand(5, 10, 10).astype( + np.float64) + 1J * np.random.rand(5, 10, 10).astype(np.float64) + y_data = np.random.rand(2, 10).astype( + np.float64) + 1J * np.random.rand(2, 10).astype(np.float64) res = self._run_dynamic_graph_case(x_data, y_data) self.assertTrue(np.allclose(res, np.outer(x_data, y_data))) @@ -113,6 +116,7 @@ class TestMultiplyApi(unittest.TestCase): class TestMultiplyError(unittest.TestCase): + def func_test_errors(self): # test static computation graph: dtype can not be int8 paddle.enable_static() @@ -130,7 +134,7 @@ class TestMultiplyError(unittest.TestCase): y = paddle.to_tensor(y_data) self.assertRaises(RuntimeError, paddle.outer, x, y) - # test dynamic computation graph: dtype must be same + # test dynamic computation graph: dtype must be same x_data = np.random.randn(200).astype(np.float32) y_data = np.random.randn(200).astype(np.float64) x = paddle.to_tensor(x_data) diff --git a/python/paddle/fluid/tests/unittests/test_overlap_add_op.py b/python/paddle/fluid/tests/unittests/test_overlap_add_op.py index 7af67d01b57..e04db251de6 100644 --- a/python/paddle/fluid/tests/unittests/test_overlap_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_overlap_add_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -70,6 +70,7 @@ def overlap_add(x, hop_length, axis=-1): class TestOverlapAddOp(OpTest): + def setUp(self): self.op_type = "overlap_add" self.shape, self.type, self.attrs = self.initTestCase() @@ -99,6 +100,7 @@ class TestOverlapAddOp(OpTest): class TestCase1(TestOverlapAddOp): + def initTestCase(self): input_shape = (3, 50) input_type = 'float64' @@ -110,6 +112,7 @@ class TestCase1(TestOverlapAddOp): class TestCase2(TestOverlapAddOp): + def initTestCase(self): input_shape = (2, 40, 5) input_type = 'float64' @@ -121,6 +124,7 @@ class TestCase2(TestOverlapAddOp): class TestCase3(TestOverlapAddOp): + def initTestCase(self): input_shape = (5, 40, 2) input_type = 'float64' @@ -132,6 +136,7 @@ class TestCase3(TestOverlapAddOp): class TestCase4(TestOverlapAddOp): + def initTestCase(self): input_shape = (3, 5, 12, 8) input_type = 'float64' @@ -143,6 +148,7 @@ class TestCase4(TestOverlapAddOp): class TestCase5(TestOverlapAddOp): + def initTestCase(self): input_shape = (8, 12, 5, 3) input_type = 'float64' diff --git a/python/paddle/fluid/tests/unittests/test_pad2d_op.py b/python/paddle/fluid/tests/unittests/test_pad2d_op.py index 1da94ee4fca..0f43ddbd8fc 100644 --- a/python/paddle/fluid/tests/unittests/test_pad2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pad2d_op.py @@ -20,6 +20,7 @@ from paddle.fluid import Program, program_guard class TestPad2dOp(OpTest): + def setUp(self): self.pad_value = 0.0 self.variable_paddings = False @@ -29,11 +30,11 @@ class TestPad2dOp(OpTest): self.attrs = {} if self.variable_paddings: self.attrs['paddings'] = [] - self.inputs['Paddings'] = np.array(self.paddings).flatten().astype( - "int32") + self.inputs['Paddings'] = np.array( + self.paddings).flatten().astype("int32") else: - self.attrs['paddings'] = np.array(self.paddings).flatten().astype( - "int32") + self.attrs['paddings'] = np.array( + self.paddings).flatten().astype("int32") self.attrs['pad_value'] = self.pad_value self.attrs['mode'] = self.mode self.attrs['data_format'] = self.data_format @@ -67,6 +68,7 @@ class TestPad2dOp(OpTest): class TestCase1(TestPad2dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.paddings = [0, 1, 2, 3] @@ -75,6 +77,7 @@ class TestCase1(TestPad2dOp): class TestCase2(TestPad2dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.paddings = [0, 1, 2, 3] @@ -83,6 +86,7 @@ class TestCase2(TestPad2dOp): class TestCase3(TestPad2dOp): + def initTestCase(self): self.shape = (2, 4, 4, 4) self.paddings = [0, 1, 2, 3] @@ -91,6 +95,7 @@ class TestCase3(TestPad2dOp): class TestCase4(TestPad2dOp): + def initTestCase(self): self.shape = (2, 4, 4, 4) self.paddings = [0, 1, 2, 3] @@ -99,6 +104,7 @@ class TestCase4(TestPad2dOp): class TestCase5(TestPad2dOp): + def initTestCase(self): self.shape = (2, 4, 4, 4) self.paddings = [0, 1, 2, 3] @@ -108,6 +114,7 @@ class TestCase5(TestPad2dOp): class TestCase6(TestPad2dOp): + def initTestCase(self): self.shape = (2, 4, 4, 4) self.paddings = [0, 1, 2, 3] @@ -118,6 +125,7 @@ class TestCase6(TestPad2dOp): class TestCase7(TestPad2dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.paddings = [0, 1, 2, 3] @@ -127,6 +135,7 @@ class TestCase7(TestPad2dOp): class TestPad2dOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.random((2, 2, 2, 2)).astype("float32") @@ -136,8 +145,9 @@ class TestPad2dOpError(unittest.TestCase): self.assertRaises(TypeError, test_Variable) - data = fluid.data( - name='data', shape=[None, 3, 20, 20], dtype='float16') + data = fluid.data(name='data', + shape=[None, 3, 20, 20], + dtype='float16') fluid.layers.pad2d(input=data, paddings=[1, 1, 1, 1]) diff --git a/python/paddle/fluid/tests/unittests/test_pad3d_op.py b/python/paddle/fluid/tests/unittests/test_pad3d_op.py index eabff5f0021..b277ebbb75d 100644 --- a/python/paddle/fluid/tests/unittests/test_pad3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pad3d_op.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard, Executor, default_main_program class TestPad3dOp(OpTest): + def setUp(self): paddle.enable_static() self.value = 0.0 @@ -34,11 +35,11 @@ class TestPad3dOp(OpTest): self.attrs = {} if self.variable_paddings: self.attrs['paddings'] = [] - self.inputs['Paddings'] = np.array(self.paddings).flatten().astype( - "int32") + self.inputs['Paddings'] = np.array( + self.paddings).flatten().astype("int32") else: - self.attrs['paddings'] = np.array(self.paddings).flatten().astype( - "int32") + self.attrs['paddings'] = np.array( + self.paddings).flatten().astype("int32") self.attrs['value'] = self.value self.attrs['mode'] = self.mode self.attrs['data_format'] = self.data_format @@ -87,6 +88,7 @@ class TestPad3dOp(OpTest): class TestCase1(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [0, 1, 2, 3, 4, 5] @@ -97,6 +99,7 @@ class TestCase1(TestPad3dOp): class TestCase2(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [1, 1, 1, 1, 1, 1] @@ -107,6 +110,7 @@ class TestCase2(TestPad3dOp): class TestCase3(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [0, 1, 1, 0, 2, 3] @@ -116,6 +120,7 @@ class TestCase3(TestPad3dOp): class TestCase4(TestPad3dOp): + def initTestCase(self): self.shape = (4, 4, 4, 4, 4) self.paddings = [0, 1, 2, 1, 2, 3] @@ -125,6 +130,7 @@ class TestCase4(TestPad3dOp): class TestCase5(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [0, 1, 2, 3, 2, 1] @@ -134,6 +140,7 @@ class TestCase5(TestPad3dOp): class TestCase6(TestPad3dOp): + def initTestCase(self): self.shape = (4, 4, 4, 4, 4) self.paddings = [5, 4, 2, 1, 2, 3] @@ -143,6 +150,7 @@ class TestCase6(TestPad3dOp): class TestCase7(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [0, 1, 2, 3, 2, 1] @@ -152,6 +160,7 @@ class TestCase7(TestPad3dOp): class TestCase8(TestPad3dOp): + def initTestCase(self): self.shape = (4, 4, 4, 4, 4) self.paddings = [0, 1, 2, 1, 2, 3] @@ -161,6 +170,7 @@ class TestCase8(TestPad3dOp): class TestCase9(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [0, 1, 2, 3, 4, 5] @@ -171,6 +181,7 @@ class TestCase9(TestPad3dOp): class TestCase10(TestPad3dOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.paddings = [0, 1, 2, 3, 4, 5] @@ -181,6 +192,7 @@ class TestCase10(TestPad3dOp): class TestPadAPI(unittest.TestCase): + def setUp(self): self.places = [paddle.CPUPlace()] if core.is_compiled_with_cuda(): @@ -223,10 +235,14 @@ class TestPadAPI(unittest.TestCase): feed={"x": input_data}, fetch_list=[result1, result2]) - np_out1 = self._get_numpy_out( - input_data, pad, mode, data_format="NCDHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, data_format="NDHWC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + data_format="NCDHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + data_format="NDHWC") self.assertTrue(np.allclose(fetches[0], np_out1)) self.assertTrue(np.allclose(fetches[1], np_out2)) @@ -245,10 +261,14 @@ class TestPadAPI(unittest.TestCase): feed={"x": input_data}, fetch_list=[result1, result2]) - np_out1 = self._get_numpy_out( - input_data, pad, mode, data_format="NCDHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, data_format="NDHWC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + data_format="NCDHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + data_format="NDHWC") self.assertTrue(np.allclose(fetches[0], np_out1)) self.assertTrue(np.allclose(fetches[1], np_out2)) @@ -267,10 +287,14 @@ class TestPadAPI(unittest.TestCase): feed={"x": input_data}, fetch_list=[result1, result2]) - np_out1 = self._get_numpy_out( - input_data, pad, mode, data_format="NCDHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, data_format="NDHWC") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + data_format="NCDHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + data_format="NDHWC") self.assertTrue(np.allclose(fetches[0], np_out1)) self.assertTrue(np.allclose(fetches[1], np_out2)) @@ -351,12 +375,21 @@ class TestPadAPI(unittest.TestCase): mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCDHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NDHWC") - np_out3 = self._get_numpy_out( - input_data, pad_3, mode, value, data_format="NCDHW") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCDHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NDHWC") + np_out3 = self._get_numpy_out(input_data, + pad_3, + mode, + value, + data_format="NCDHW") tensor_data = paddle.to_tensor(input_data) y1 = F.pad(tensor_data, @@ -387,12 +420,21 @@ class TestPadAPI(unittest.TestCase): mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCHW") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NHWC") - np_out3 = self._get_numpy_out( - input_data, pad_3, mode, value, data_format="NCHW") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCHW") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NHWC") + np_out3 = self._get_numpy_out(input_data, + pad_3, + mode, + value, + data_format="NCHW") tensor_data = paddle.to_tensor(input_data) tensor_pad = paddle.to_tensor(pad, dtype="int32") @@ -425,12 +467,21 @@ class TestPadAPI(unittest.TestCase): mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) - np_out1 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NCL") - np_out2 = self._get_numpy_out( - input_data, pad, mode, value, data_format="NLC") - np_out3 = self._get_numpy_out( - input_data, pad_3, mode, value, data_format="NCL") + np_out1 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NCL") + np_out2 = self._get_numpy_out(input_data, + pad, + mode, + value, + data_format="NLC") + np_out3 = self._get_numpy_out(input_data, + pad_3, + mode, + value, + data_format="NCL") tensor_data = paddle.to_tensor(input_data) tensor_pad = paddle.to_tensor(pad, dtype="int32") @@ -456,6 +507,7 @@ class TestPadAPI(unittest.TestCase): class TestPad1dAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -503,42 +555,53 @@ class TestPad1dAPI(unittest.TestCase): pad_reflection = nn.Pad1D(padding=pad, mode="reflect") pad_replication = nn.Pad1D(padding=pad, mode="replicate") pad_constant = nn.Pad1D(padding=pad, mode="constant", value=value) - pad_constant_int = nn.Pad1D( - padding=pad_int, mode="constant", value=value) + pad_constant_int = nn.Pad1D(padding=pad_int, + mode="constant", + value=value) pad_circular = nn.Pad1D(padding=pad, mode="circular") data = paddle.to_tensor(input_data) output = pad_reflection(data) - np_out = self._get_numpy_out( - input_data, pad, "reflect", data_format="NCL") + np_out = self._get_numpy_out(input_data, + pad, + "reflect", + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_replication(data) - np_out = self._get_numpy_out( - input_data, pad, "replicate", data_format="NCL") + np_out = self._get_numpy_out(input_data, + pad, + "replicate", + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant(data) - np_out = self._get_numpy_out( - input_data, pad, "constant", value=value, data_format="NCL") + np_out = self._get_numpy_out(input_data, + pad, + "constant", + value=value, + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant_int(data) - np_out = self._get_numpy_out( - input_data, [pad_int] * 2, - "constant", - value=value, - data_format="NCL") + np_out = self._get_numpy_out(input_data, [pad_int] * 2, + "constant", + value=value, + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_circular(data) - np_out = self._get_numpy_out( - input_data, pad, "circular", value=value, data_format="NCL") + np_out = self._get_numpy_out(input_data, + pad, + "circular", + value=value, + data_format="NCL") self.assertTrue(np.allclose(output.numpy(), np_out)) class TestPad2dAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -588,42 +651,52 @@ class TestPad2dAPI(unittest.TestCase): pad_reflection = nn.Pad2D(padding=pad, mode="reflect") pad_replication = nn.Pad2D(padding=pad, mode="replicate") pad_constant = nn.Pad2D(padding=pad, mode="constant", value=value) - pad_constant_int = nn.Pad2D( - padding=pad_int, mode="constant", value=value) + pad_constant_int = nn.Pad2D(padding=pad_int, + mode="constant", + value=value) pad_circular = nn.Pad2D(padding=pad, mode="circular") data = paddle.to_tensor(input_data) output = pad_reflection(data) - np_out = self._get_numpy_out( - input_data, pad, "reflect", data_format="NCHW") + np_out = self._get_numpy_out(input_data, + pad, + "reflect", + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_replication(data) - np_out = self._get_numpy_out( - input_data, pad, "replicate", data_format="NCHW") + np_out = self._get_numpy_out(input_data, + pad, + "replicate", + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant(data) - np_out = self._get_numpy_out( - input_data, pad, "constant", value=value, data_format="NCHW") + np_out = self._get_numpy_out(input_data, + pad, + "constant", + value=value, + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant_int(data) - np_out = self._get_numpy_out( - input_data, [pad_int] * 4, - "constant", - value=value, - data_format="NCHW") + np_out = self._get_numpy_out(input_data, [pad_int] * 4, + "constant", + value=value, + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_circular(data) - np_out = self._get_numpy_out( - input_data, pad, "circular", data_format="NCHW") + np_out = self._get_numpy_out(input_data, + pad, + "circular", + data_format="NCHW") self.assertTrue(np.allclose(output.numpy(), np_out)) class TestPad3dAPI(unittest.TestCase): + def _get_numpy_out(self, input_data, pad, @@ -675,38 +748,47 @@ class TestPad3dAPI(unittest.TestCase): pad_reflection = nn.Pad3D(padding=pad, mode="reflect") pad_replication = nn.Pad3D(padding=pad, mode="replicate") pad_constant = nn.Pad3D(padding=pad, mode="constant", value=value) - pad_constant_int = nn.Pad3D( - padding=pad_int, mode="constant", value=value) + pad_constant_int = nn.Pad3D(padding=pad_int, + mode="constant", + value=value) pad_circular = nn.Pad3D(padding=pad, mode="circular") data = paddle.to_tensor(input_data) output = pad_reflection(data) - np_out = self._get_numpy_out( - input_data, pad, "reflect", data_format="NCDHW") + np_out = self._get_numpy_out(input_data, + pad, + "reflect", + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_replication(data) - np_out = self._get_numpy_out( - input_data, pad, "replicate", data_format="NCDHW") + np_out = self._get_numpy_out(input_data, + pad, + "replicate", + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant(data) - np_out = self._get_numpy_out( - input_data, pad, "constant", value=value, data_format="NCDHW") + np_out = self._get_numpy_out(input_data, + pad, + "constant", + value=value, + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_constant_int(data) - np_out = self._get_numpy_out( - input_data, [pad_int] * 6, - "constant", - value=value, - data_format="NCDHW") + np_out = self._get_numpy_out(input_data, [pad_int] * 6, + "constant", + value=value, + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_circular(data) - np_out = self._get_numpy_out( - input_data, pad, "circular", data_format="NCDHW") + np_out = self._get_numpy_out(input_data, + pad, + "circular", + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) def test_pad_tensor(self): @@ -717,30 +799,38 @@ class TestPad3dAPI(unittest.TestCase): pad_tensor = paddle.to_tensor(pad) input_data = np.random.rand(*input_shape).astype(np.float32) - pad_reflection_ncdhw = nn.Pad3D( - padding=pad_tensor, mode="reflect", data_format="NCDHW") - pad_reflection_ndhwc = nn.Pad3D( - padding=pad_tensor, mode="reflect", data_format="NDHWC") + pad_reflection_ncdhw = nn.Pad3D(padding=pad_tensor, + mode="reflect", + data_format="NCDHW") + pad_reflection_ndhwc = nn.Pad3D(padding=pad_tensor, + mode="reflect", + data_format="NDHWC") data = paddle.to_tensor(input_data) output = pad_reflection_ncdhw(data) - np_out = self._get_numpy_out( - input_data, pad, "reflect", data_format="NCDHW") + np_out = self._get_numpy_out(input_data, + pad, + "reflect", + data_format="NCDHW") self.assertTrue(np.allclose(output.numpy(), np_out)) output = pad_reflection_ndhwc(data) - np_out = self._get_numpy_out( - input_data, pad, "reflect", data_format="NDHWC") + np_out = self._get_numpy_out(input_data, + pad, + "reflect", + data_format="NDHWC") self.assertTrue(np.allclose(output.numpy(), np_out)) class TestPad3dOpError(unittest.TestCase): + def setUp(self): self.places = [paddle.CPUPlace()] if core.is_compiled_with_cuda(): self.places.append(paddle.CUDAPlace(0)) def test_errors(self): + def test_variable(): input_shape = (1, 2, 3, 4, 5) data = np.random.rand(*input_shape).astype(np.float32) @@ -806,12 +896,14 @@ class TestPad3dOpError(unittest.TestCase): class TestPadDataformatError(unittest.TestCase): + def test_errors(self): + def test_ncl(): input_shape = (1, 2, 3, 4) pad = paddle.to_tensor(np.array([2, 1, 2, 1]).astype('int32')) - data = np.arange( - np.prod(input_shape), dtype=np.float64).reshape(input_shape) + 1 + data = np.arange(np.prod(input_shape), + dtype=np.float64).reshape(input_shape) + 1 my_pad = nn.Pad1D(padding=pad, mode="replicate", data_format="NCL") data = paddle.to_tensor(data) result = my_pad(data) @@ -819,8 +911,8 @@ class TestPadDataformatError(unittest.TestCase): def test_nchw(): input_shape = (1, 2, 4) pad = paddle.to_tensor(np.array([2, 1, 2, 1]).astype('int32')) - data = np.arange( - np.prod(input_shape), dtype=np.float64).reshape(input_shape) + 1 + data = np.arange(np.prod(input_shape), + dtype=np.float64).reshape(input_shape) + 1 my_pad = nn.Pad1D(padding=pad, mode="replicate", data_format="NCHW") data = paddle.to_tensor(data) result = my_pad(data) @@ -828,10 +920,11 @@ class TestPadDataformatError(unittest.TestCase): def test_ncdhw(): input_shape = (1, 2, 3, 4) pad = paddle.to_tensor(np.array([2, 1, 2, 1]).astype('int32')) - data = np.arange( - np.prod(input_shape), dtype=np.float64).reshape(input_shape) + 1 - my_pad = nn.Pad1D( - padding=pad, mode="replicate", data_format="NCDHW") + data = np.arange(np.prod(input_shape), + dtype=np.float64).reshape(input_shape) + 1 + my_pad = nn.Pad1D(padding=pad, + mode="replicate", + data_format="NCDHW") data = paddle.to_tensor(data) result = my_pad(data) diff --git a/python/paddle/fluid/tests/unittests/test_pad_constant_like.py b/python/paddle/fluid/tests/unittests/test_pad_constant_like.py index 41257895a73..882b3c3b42a 100644 --- a/python/paddle/fluid/tests/unittests/test_pad_constant_like.py +++ b/python/paddle/fluid/tests/unittests/test_pad_constant_like.py @@ -22,6 +22,7 @@ from paddle.fluid import Program, program_guard class TestPadConstantLikeOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = "pad_constant_like" @@ -32,10 +33,11 @@ class TestPadConstantLikeOp(OpTest): self.attrs = {} self.attrs['pad_value'] = self.pad_value self.outputs = { - 'Out': np.pad(self.inputs['Y'], - self.paddings, - mode='constant', - constant_values=self.pad_value) + 'Out': + np.pad(self.inputs['Y'], + self.paddings, + mode='constant', + constant_values=self.pad_value) } def test_check_output(self): @@ -52,6 +54,7 @@ class TestPadConstantLikeOp(OpTest): class TestCase1(TestPadConstantLikeOp): + def initTestCase(self): self.x_shape = (4, 3, 4, 5) self.y_shape = (2, 3, 4, 5) @@ -60,6 +63,7 @@ class TestCase1(TestPadConstantLikeOp): class TestCase2(TestPadConstantLikeOp): + def initTestCase(self): self.x_shape = (4, 3, 4, 10) self.y_shape = (2, 3, 2, 10) @@ -68,35 +72,38 @@ class TestCase2(TestPadConstantLikeOp): class TestPadConstantLikeOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x_data = np.random.random((2, 2, 2, 2)).astype("float32") y_data = np.random.random((2, 2, 2, 2)).astype("float32") def test_Variable_x(): - var_y = fluid.data( - name="data_y", shape=[2, 2, 2, 2], dtype="float32") + var_y = fluid.data(name="data_y", + shape=[2, 2, 2, 2], + dtype="float32") fluid.layers.pad_constant_like(x=x_data, y=var_y) self.assertRaises(TypeError, test_Variable_x) def test_Variable_y(): - var_x = fluid.data( - name="data_x", shape=[2, 2, 2, 2], dtype="float32") + var_x = fluid.data(name="data_x", + shape=[2, 2, 2, 2], + dtype="float32") fluid.layers.pad_constant_like(x=var_x, y=y_data) self.assertRaises(TypeError, test_Variable_y) class TestOutDtype(unittest.TestCase): + def test_dtype(self): api_fn = fluid.layers.pad_constant_like - check_out_dtype( - api_fn, - in_specs=[([2, 3, 2, 3], 'float64'), ([1, 3, 1, 3], )], - expect_dtypes=['float32', 'float64', 'int32', 'int64'], - target_index=1, - pad_value=0.) + check_out_dtype(api_fn, + in_specs=[([2, 3, 2, 3], 'float64'), ([1, 3, 1, 3], )], + expect_dtypes=['float32', 'float64', 'int32', 'int64'], + target_index=1, + pad_value=0.) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_pad_op.py b/python/paddle/fluid/tests/unittests/test_pad_op.py index a62d19d1c0c..30044fec755 100644 --- a/python/paddle/fluid/tests/unittests/test_pad_op.py +++ b/python/paddle/fluid/tests/unittests/test_pad_op.py @@ -23,19 +23,23 @@ from paddle.fluid import Program, program_guard class TestPadOp(OpTest): + def setUp(self): self.initTestCase() self.dtype = self.get_dtype() self.op_type = "pad" - self.inputs = {'X': np.random.random(self.shape).astype(self.dtype), } + self.inputs = { + 'X': np.random.random(self.shape).astype(self.dtype), + } self.attrs = {} self.attrs['paddings'] = np.array(self.paddings).flatten() self.attrs['pad_value'] = self.pad_value self.outputs = { - 'Out': np.pad(self.inputs['X'], - self.paddings, - mode='constant', - constant_values=self.pad_value) + 'Out': + np.pad(self.inputs['X'], + self.paddings, + mode='constant', + constant_values=self.pad_value) } def get_dtype(self): @@ -54,6 +58,7 @@ class TestPadOp(OpTest): class TestCase1(TestPadOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.paddings = [(0, 1), (2, 3), (2, 1), (1, 1)] @@ -61,6 +66,7 @@ class TestCase1(TestPadOp): class TestCase2(TestPadOp): + def initTestCase(self): self.shape = (5, 5, 5) self.paddings = [(0, 0), (0, 0), (1, 2)] @@ -68,6 +74,7 @@ class TestCase2(TestPadOp): class TestCase3(TestPadOp): + def initTestCase(self): self.shape = (100) self.paddings = [(0, 1)] @@ -78,9 +85,11 @@ class TestCase3(TestPadOp): def create_test_fp16(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPadFp16(parent): + def get_dtype(self): return np.float16 @@ -99,6 +108,7 @@ create_test_fp16(TestCase3) class TestPadOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.random((2, 2)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py index 23f6b3d646b..43d42769053 100644 --- a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py +++ b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core def _dygraph_guard_(func): + def __impl__(*args, **kwargs): if paddle.in_dynamic_mode(): return func(*args, **kwargs) @@ -42,6 +43,7 @@ def random_var(size, low=-1, high=1, dtype='float32'): class TestDygraphDoubleGrad(TestCase): + def setUp(self): self.sort_sum_gradient = False self.shape = [5, 10] @@ -54,14 +56,13 @@ class TestDygraphDoubleGrad(TestCase): retain_graph=None, create_graph=False, allow_unused=False): - return paddle.grad( - outputs=outputs, - inputs=inputs, - grad_outputs=grad_outputs, - no_grad_vars=no_grad_vars, - retain_graph=retain_graph, - create_graph=create_graph, - allow_unused=allow_unused) + return paddle.grad(outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused) @dygraph_guard def func_exception(self): @@ -87,8 +88,8 @@ class TestDygraphDoubleGrad(TestCase): [random_var(shape)], [random_var(shape)]) with self.assertRaises(AssertionError): - self.grad( - [random_var(shape)], [random_var(shape)], no_grad_vars=[1]) + self.grad([random_var(shape)], [random_var(shape)], + no_grad_vars=[1]) with self.assertRaises(AssertionError): self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1) @@ -105,24 +106,27 @@ class TestDygraphDoubleGrad(TestCase): y = x + 1 for create_graph in [False, True]: - dx, = self.grad( - [x], [x], create_graph=create_graph, retain_graph=True) + dx, = self.grad([x], [x], + create_graph=create_graph, + retain_graph=True) self.assertEqual(dx.shape, x.shape) self.assertTrue(np.all(dx.numpy() == 1)) self.assertNotEqual(dx.stop_gradient, create_graph) - dx_mul_2, = self.grad( - [y, x], [x], create_graph=create_graph, retain_graph=True) + dx_mul_2, = self.grad([y, x], [x], + create_graph=create_graph, + retain_graph=True) self.assertEqual(dx_mul_2.shape, x.shape) self.assertTrue(np.all(dx_mul_2.numpy() == 2)) self.assertNotEqual(dx_mul_2.stop_gradient, create_graph) - none_grad, = self.grad( - [x], [y], create_graph=create_graph, allow_unused=True) + none_grad, = self.grad([x], [y], + create_graph=create_graph, + allow_unused=True) self.assertTrue(none_grad is None) - grad_with_none_and_not_none, = self.grad( - [x, y], [y], create_graph=create_graph) + grad_with_none_and_not_none, = self.grad([x, y], [y], + create_graph=create_graph) self.assertTrue(grad_with_none_and_not_none.shape, x.shape) self.assertTrue(np.all(grad_with_none_and_not_none.numpy() == 1)) self.assertNotEqual(grad_with_none_and_not_none.stop_gradient, @@ -141,10 +145,11 @@ class TestDygraphDoubleGrad(TestCase): half_numel = int(numel / 2) half_x_positive = np.random.uniform(low=1, high=2, size=[half_numel]) - half_x_negative = np.random.uniform( - low=-2, high=-1, size=[numel - half_numel]) - x_np = np.array(list(half_x_positive) + list(half_x_negative)).astype( - 'float32') + half_x_negative = np.random.uniform(low=-2, + high=-1, + size=[numel - half_numel]) + x_np = np.array(list(half_x_positive) + + list(half_x_negative)).astype('float32') np.random.shuffle(x_np) x = fluid.dygraph.to_variable(x_np) @@ -173,12 +178,11 @@ class TestDygraphDoubleGrad(TestCase): for grad_y in [random_grad_y]: for grad_z in [random_grad_z]: for create_graph in [False, True]: - dx_actual, = self.grad( - outputs=[y, z], - inputs=[x], - grad_outputs=[grad_y, grad_z], - create_graph=create_graph, - retain_graph=True) + dx_actual, = self.grad(outputs=[y, z], + inputs=[x], + grad_outputs=[grad_y, grad_z], + create_graph=create_graph, + retain_graph=True) grad_y_np = ones_grad_y if grad_y is None else grad_y.numpy( ) @@ -236,9 +240,10 @@ class TestDygraphDoubleGrad(TestCase): loss.backward() x_grad_actual = x.gradient() - x_grad_expected = (2.0 / float(numel) * ( - x_np + dx_expected * - (x_np > 0) * 2 / float(numel))).astype('float32') + x_grad_expected = ( + 2.0 / float(numel) * + (x_np + dx_expected * + (x_np > 0) * 2 / float(numel))).astype('float32') self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) def test_example_with_gradient_accumulation_and_create_graph(self): @@ -261,8 +266,9 @@ class TestDygraphDoubleGrad(TestCase): w_mean = fluid.layers.reduce_mean(w) del y1, z, w - dx_actual, = self.grad( - [w_mean], [x], create_graph=True, no_grad_vars=[y2]) + dx_actual, = self.grad([w_mean], [x], + create_graph=True, + no_grad_vars=[y2]) self.assertFalse(y2.stop_gradient) self.assertFalse(dx_actual.stop_gradient) @@ -278,9 +284,10 @@ class TestDygraphDoubleGrad(TestCase): loss.backward() x_grad_actual = x.gradient() - x_grad_expected = (2.0 / float(numel) * ( - x_np + dx_expected * - (x_np > 0) * 4 / float(numel))).astype('float32') + x_grad_expected = ( + 2.0 / float(numel) * + (x_np + dx_expected * + (x_np > 0) * 4 / float(numel))).astype('float32') self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) def test_example_with_gradient_accumulation_and_no_grad_vars(self): @@ -329,6 +336,7 @@ class TestDygraphDoubleGrad(TestCase): class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad): + def setUp(self): self.sort_sum_gradient = True self.shape = [5, 10] diff --git a/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py b/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py index 3fc06e3c8df..f13e20f4e91 100644 --- a/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py +++ b/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py @@ -55,6 +55,7 @@ def send_parambase(queue, event, device, dtype): class leak_checker(object): + def __init__(self, test_case): self.checked_pids = [os.getpid()] self.test_case = test_case @@ -98,6 +99,7 @@ class leak_checker(object): class TestMultiprocessingBase(unittest.TestCase): + def get_tensor(self, device="cpu"): self.device = device.lower() place = None @@ -123,6 +125,7 @@ class TestMultiprocessingBase(unittest.TestCase): dtype="float32", repeat=1, param=False): + def test_fill(): if param: x = self.get_parameter() @@ -178,6 +181,7 @@ class TestMultiprocessingBase(unittest.TestCase): class TestMultiprocessingCpu(TestMultiprocessingBase): + def func_test_pass_tensor(self): if in_dygraph_mode(): return @@ -213,6 +217,7 @@ class TestMultiprocessingCpu(TestMultiprocessingBase): class TestMultiprocessingGpu(TestMultiprocessingBase): + @unittest.skipIf(not paddle.fluid.core.is_compiled_with_cuda(), "core is not compiled with CUDA") def func_test_pass_tensor(self): diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py index 8945d35c131..3cf35550c58 100644 --- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py @@ -41,12 +41,14 @@ LARGE_PARAM = 2**26 def random_batch_reader(): + def _get_random_inputs_and_labels(): np.random.seed(SEED) image = np.random.random([BATCH_SIZE, IMAGE_SIZE]).astype('float32') label = np.random.randint(0, CLASS_NUM - 1, ( BATCH_SIZE, - 1, )).astype('int64') + 1, + )).astype('int64') return image, label def __reader__(): @@ -60,6 +62,7 @@ def random_batch_reader(): class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) @@ -69,6 +72,7 @@ class LinearNet(nn.Layer): class LayerWithLargeParameters(paddle.nn.Layer): + def __init__(self): super(LayerWithLargeParameters, self).__init__() self._l = paddle.nn.Linear(10, LARGE_PARAM) @@ -89,6 +93,7 @@ def train(layer, loader, loss_fn, opt): class TestSaveLoadLargeParameters(unittest.TestCase): + def setUp(self): pass @@ -111,6 +116,7 @@ class TestSaveLoadLargeParameters(unittest.TestCase): class TestSaveLoadPickle(unittest.TestCase): + def test_pickle_protocol(self): # enable dygraph mode paddle.disable_static() @@ -130,7 +136,9 @@ class TestSaveLoadPickle(unittest.TestCase): with self.assertRaises(ValueError): paddle.save(save_dict, path, 5) - protocols = [2, ] + protocols = [ + 2, + ] if sys.version_info.major >= 3 and sys.version_info.minor >= 4: protocols += [3, 4] for protocol in protocols: @@ -143,6 +151,7 @@ class TestSaveLoadPickle(unittest.TestCase): class TestSaveLoadAny(unittest.TestCase): + def set_zero(self, prog, place, scope=None): if scope is None: scope = fluid.global_scope() @@ -184,8 +193,9 @@ class TestSaveLoadAny(unittest.TestCase): def test_replace_static_save_load(self): paddle.enable_static() with new_program_scope(): - x = paddle.static.data( - name="static_x", shape=[None, IMAGE_SIZE], dtype='float32') + x = paddle.static.data(name="static_x", + shape=[None, IMAGE_SIZE], + dtype='float32') z = paddle.static.nn.fc(x, 10) z = paddle.static.nn.fc(z, 10, bias_attr=False) loss = fluid.layers.reduce_mean(z) @@ -200,8 +210,8 @@ class TestSaveLoadAny(unittest.TestCase): base_map = {} for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_map[var.name] = t path = os.path.join("test_replace_static_save_load", "model") # paddle.save, legacy paddle.fluid.load @@ -210,18 +220,18 @@ class TestSaveLoadAny(unittest.TestCase): paddle.fluid.io.load(prog, path) for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, np.array(base_t))) - # legacy paddle.fluid.save, paddle.load + # legacy paddle.fluid.save, paddle.load paddle.fluid.io.save(prog, path) self.set_zero(prog, place) self.replace_static_load(prog, path) for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) # test for return tensor @@ -247,11 +257,11 @@ class TestSaveLoadAny(unittest.TestCase): self.set_zero(prog, place) for var in prog.list_vars(): if var.persistable: - tensor = paddle.load( - os.path.join(path_vars, var.name), return_numpy=False) + tensor = paddle.load(os.path.join(path_vars, var.name), + return_numpy=False) var.set_value(tensor) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -259,6 +269,7 @@ class TestSaveLoadAny(unittest.TestCase): paddle.disable_static() class StepDecay(LRScheduler): + def __init__(self, learning_rate, step_size, @@ -276,8 +287,8 @@ class TestSaveLoadAny(unittest.TestCase): layer = LinearNet() inps = paddle.randn([2, IMAGE_SIZE]) - adam = opt.Adam( - learning_rate=StepDecay(0.1, 1), parameters=layer.parameters()) + adam = opt.Adam(learning_rate=StepDecay(0.1, 1), + parameters=layer.parameters()) y = layer(inps) y.mean().backward() adam.step() @@ -316,8 +327,9 @@ class TestSaveLoadAny(unittest.TestCase): t_dygraph = paddle.load(path) np_dygraph = paddle.load(path, return_numpy=True) self.assertTrue( - isinstance(t_dygraph, (paddle.fluid.core.VarBase, - paddle.fluid.core.eager.Tensor))) + isinstance( + t_dygraph, + (paddle.fluid.core.VarBase, paddle.fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(tensor.numpy(), np_dygraph)) self.assertTrue(np.array_equal(tensor.numpy(), t_dygraph.numpy())) paddle.enable_static() @@ -332,8 +344,9 @@ class TestSaveLoadAny(unittest.TestCase): paddle.enable_static() with new_program_scope(): # create network - x = paddle.static.data( - name="x", shape=[None, IMAGE_SIZE], dtype='float32') + x = paddle.static.data(name="x", + shape=[None, IMAGE_SIZE], + dtype='float32') z = paddle.static.nn.fc(x, 128) loss = fluid.layers.reduce_mean(z) place = fluid.CPUPlace( @@ -379,8 +392,9 @@ class TestSaveLoadAny(unittest.TestCase): paddle.enable_static() with new_program_scope(): layer = LinearNet() - data = paddle.static.data( - name='x_static_save', shape=(None, IMAGE_SIZE), dtype='float32') + data = paddle.static.data(name='x_static_save', + shape=(None, IMAGE_SIZE), + dtype='float32') y_static = layer(data) program = paddle.static.default_main_program() place = fluid.CPUPlace( @@ -401,17 +415,16 @@ class TestSaveLoadAny(unittest.TestCase): layer = paddle.nn.Linear(3, 4) state_dict = layer.state_dict() obj1 = [ - paddle.randn( - [3, 4], dtype='float32'), np.random.randn(5, 6), - ('fake_weight', np.ones( - [7, 8], dtype='float32')) + paddle.randn([3, 4], dtype='float32'), + np.random.randn(5, 6), + ('fake_weight', np.ones([7, 8], dtype='float32')) ] obj2 = {'k1': obj1, 'k2': state_dict, 'epoch': 123} - obj3 = (paddle.randn( - [5, 4], dtype='float32'), np.random.randn(3, 4).astype("float32"), { - "state_dict": state_dict, - "opt": state_dict - }) + obj3 = (paddle.randn([5, 4], dtype='float32'), + np.random.randn(3, 4).astype("float32"), { + "state_dict": state_dict, + "opt": state_dict + }) obj4 = (np.random.randn(5, 6), (123, )) path1 = "test_save_load_any_complex_object_dygraph/obj1" @@ -428,8 +441,8 @@ class TestSaveLoadAny(unittest.TestCase): load_tensor3 = paddle.load(path3, return_numpy=False) load_tensor4 = paddle.load(path4, return_numpy=False) - self.assertTrue( - np.array_equal(load_tensor1[0].numpy(), obj1[0].numpy())) + self.assertTrue(np.array_equal(load_tensor1[0].numpy(), + obj1[0].numpy())) self.assertTrue(np.array_equal(load_tensor1[1], obj1[1])) self.assertTrue(np.array_equal(load_tensor1[2].numpy(), obj1[2][1])) for i in range(len(load_tensor1)): @@ -440,8 +453,8 @@ class TestSaveLoadAny(unittest.TestCase): np.array_equal(v.numpy(), load_tensor2['k2'][k].numpy())) self.assertTrue(load_tensor2['epoch'] == 123) - self.assertTrue( - np.array_equal(load_tensor3[0].numpy(), obj3[0].numpy())) + self.assertTrue(np.array_equal(load_tensor3[0].numpy(), + obj3[0].numpy())) self.assertTrue(np.array_equal(np.array(load_tensor3[1]), obj3[1])) for k, v in state_dict.items(): @@ -502,8 +515,8 @@ class TestSaveLoadAny(unittest.TestCase): np.array_equal(v.numpy(), np.array(load_tensor2['k2'][k]))) self.assertTrue(load_tensor2['epoch'] == 123) - self.assertTrue( - isinstance(load_tensor3[0], paddle.fluid.core.LoDTensor)) + self.assertTrue(isinstance(load_tensor3[0], + paddle.fluid.core.LoDTensor)) self.assertTrue( np.array_equal(np.array(load_tensor3[0]), obj3[0].numpy())) self.assertTrue(np.array_equal(np.array(load_tensor3[1]), obj3[1])) @@ -513,8 +526,8 @@ class TestSaveLoadAny(unittest.TestCase): isinstance(load_tensor3[2]["state_dict"][k], paddle.fluid.core.LoDTensor)) self.assertTrue( - np.array_equal( - np.array(load_tensor3[2]["state_dict"][k]), v.numpy())) + np.array_equal(np.array(load_tensor3[2]["state_dict"][k]), + v.numpy())) for k, v in state_dict.items(): self.assertTrue( @@ -557,8 +570,9 @@ class TestSaveLoadAny(unittest.TestCase): paddle.enable_static() with new_program_scope(): # create network - x = paddle.static.data( - name="x", shape=[None, IMAGE_SIZE], dtype='float32') + x = paddle.static.data(name="x", + shape=[None, IMAGE_SIZE], + dtype='float32') z = paddle.static.nn.fc(x, 10, bias_attr=False) z = paddle.static.nn.fc(z, 128, bias_attr=False) loss = fluid.layers.reduce_mean(z) @@ -572,16 +586,15 @@ class TestSaveLoadAny(unittest.TestCase): state_dict = prog.state_dict() keys = list(state_dict.keys()) obj1 = [ - state_dict[keys[0]], np.random.randn(5, 6), - ('fake_weight', np.ones( - [7, 8], dtype='float32')) + state_dict[keys[0]], + np.random.randn(5, 6), + ('fake_weight', np.ones([7, 8], dtype='float32')) ] obj2 = {'k1': obj1, 'k2': state_dict, 'epoch': 123} - obj3 = (state_dict[keys[0]], np.ndarray( - [3, 4], dtype="float32"), { - "state_dict": state_dict, - "opt": state_dict - }) + obj3 = (state_dict[keys[0]], np.ndarray([3, 4], dtype="float32"), { + "state_dict": state_dict, + "opt": state_dict + }) obj4 = (np.ndarray([3, 4], dtype="float32"), ) path1 = "test_save_load_any_complex_object_static/obj1" @@ -608,8 +621,8 @@ class TestSaveLoadAny(unittest.TestCase): type(load_tensor1[i]) == type(load_tensor2['k1'][i])) for k, v in state_dict.items(): self.assertTrue( - np.array_equal( - np.array(v), np.array(load_tensor2['k2'][k]))) + np.array_equal(np.array(v), + np.array(load_tensor2['k2'][k]))) self.assertTrue(load_tensor2['epoch'] == 123) self.assertTrue(isinstance(load_tensor3[0], fluid.core.LoDTensor)) @@ -622,16 +635,15 @@ class TestSaveLoadAny(unittest.TestCase): isinstance(load_tensor3[2]["state_dict"][k], fluid.core.LoDTensor)) self.assertTrue( - np.array_equal( - np.array(load_tensor3[2]["state_dict"][k]), np.array( - v))) + np.array_equal(np.array(load_tensor3[2]["state_dict"][k]), + np.array(v))) for k, v in state_dict.items(): self.assertTrue( isinstance(load_tensor3[2]["opt"][k], fluid.core.LoDTensor)) self.assertTrue( - np.array_equal( - np.array(load_tensor3[2]["opt"][k]), np.array(v))) + np.array_equal(np.array(load_tensor3[2]["opt"][k]), + np.array(v))) self.assertTrue(isinstance(load_tensor4[0], fluid.core.LoDTensor)) self.assertTrue(np.array_equal(np.array(load_tensor4[0]), obj4[0])) @@ -657,8 +669,8 @@ class TestSaveLoadAny(unittest.TestCase): for k, v in state_dict.items(): self.assertTrue( - np.array_equal(load_array3[2]["state_dict"][k], np.array( - v))) + np.array_equal(load_array3[2]["state_dict"][k], + np.array(v))) for k, v in state_dict.items(): self.assertTrue( @@ -683,38 +695,38 @@ class TestSaveLoadAny(unittest.TestCase): type(load_tensor1[i]) == type(load_tensor2['k1'][i])) for k, v in state_dict.items(): self.assertTrue( - np.array_equal( - np.array(v), np.array(load_tensor2['k2'][k]))) + np.array_equal(np.array(v), + np.array(load_tensor2['k2'][k]))) self.assertTrue(load_tensor2['epoch'] == 123) self.assertTrue( - isinstance(load_tensor3[0], (fluid.core.VarBase, - fluid.core.eager.Tensor))) + isinstance(load_tensor3[0], + (fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(load_tensor3[0].numpy(), obj3[0])) self.assertTrue( - isinstance(load_tensor3[1], (fluid.core.VarBase, - fluid.core.eager.Tensor))) + isinstance(load_tensor3[1], + (fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(load_tensor3[1].numpy(), obj3[1])) for k, v in state_dict.items(): self.assertTrue( - isinstance(load_tensor3[2]["state_dict"][k], ( - fluid.core.VarBase, fluid.core.eager.Tensor))) + isinstance(load_tensor3[2]["state_dict"][k], + (fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue( np.array_equal(load_tensor3[2]["state_dict"][k].numpy(), np.array(v))) for k, v in state_dict.items(): self.assertTrue( - isinstance(load_tensor3[2]["opt"][k], ( - fluid.core.VarBase, fluid.core.eager.Tensor))) + isinstance(load_tensor3[2]["opt"][k], + (fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue( np.array_equal(load_tensor3[2]["opt"][k].numpy(), np.array(v))) self.assertTrue( - isinstance(load_tensor4[0], (fluid.core.VarBase, - fluid.core.eager.Tensor))) + isinstance(load_tensor4[0], + (fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(load_tensor4[0].numpy(), obj4[0])) load_array1 = paddle.load(path1, return_numpy=True) @@ -738,8 +750,8 @@ class TestSaveLoadAny(unittest.TestCase): for k, v in state_dict.items(): self.assertTrue( - np.array_equal(load_array3[2]["state_dict"][k], np.array( - v))) + np.array_equal(load_array3[2]["state_dict"][k], + np.array(v))) for k, v in state_dict.items(): self.assertTrue( @@ -764,6 +776,7 @@ class TestSaveLoadAny(unittest.TestCase): class TestSaveLoadToMemory(unittest.TestCase): + def test_dygraph_save_to_memory(self): paddle.disable_static() linear = LinearNet() @@ -792,8 +805,9 @@ class TestSaveLoadToMemory(unittest.TestCase): paddle.enable_static() with new_program_scope(): # create network - x = paddle.static.data( - name="x", shape=[None, IMAGE_SIZE], dtype='float32') + x = paddle.static.data(name="x", + shape=[None, IMAGE_SIZE], + dtype='float32') z = paddle.static.nn.fc(x, 10, bias_attr=False) z = paddle.static.nn.fc(z, 128, bias_attr=False) loss = fluid.layers.reduce_mean(z) @@ -829,6 +843,7 @@ class TestSaveLoadToMemory(unittest.TestCase): class TestSaveLoad(unittest.TestCase): + def setUp(self): # enable dygraph mode paddle.disable_static() @@ -903,12 +918,14 @@ class TestSaveLoad(unittest.TestCase): class TestSaveLoadProgram(unittest.TestCase): + def test_save_load_program(self): paddle.enable_static() with new_program_scope(): layer = LinearNet() - data = paddle.static.data( - name='x_static_save', shape=(None, IMAGE_SIZE), dtype='float32') + data = paddle.static.data(name='x_static_save', + shape=(None, IMAGE_SIZE), + dtype='float32') y_static = layer(data) main_program = paddle.static.default_main_program() startup_program = paddle.static.default_startup_program() @@ -927,6 +944,7 @@ class TestSaveLoadProgram(unittest.TestCase): class TestSaveLoadLayer(unittest.TestCase): + def test_save_load_layer(self): paddle.disable_static() inps = paddle.randn([1, IMAGE_SIZE], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py index 0b9e038f7cd..bba65e469ab 100644 --- a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py +++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py @@ -34,6 +34,7 @@ IMAGE_SIZE = 784 class TestSaveLoadBinaryFormat(unittest.TestCase): + def setUp(self): # enable static graph mode paddle.enable_static() @@ -50,17 +51,18 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): self.assertTrue(np.sum(np.abs(new_t)) == 0) def replace_save_vars(self, program, dirname): + def predicate(var): return var.persistable vars = filter(predicate, program.list_vars()) for var in vars: - paddle.save( - var.get_value(), - os.path.join(dirname, var.name), - use_binary_format=True) + paddle.save(var.get_value(), + os.path.join(dirname, var.name), + use_binary_format=True) def replace_load_vars(self, program, dirname): + def predicate(var): return var.persistable @@ -74,8 +76,9 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): paddle.enable_static() with new_program_scope(): # create network - x = paddle.static.data( - name="x", shape=[None, IMAGE_SIZE], dtype='float32') + x = paddle.static.data(name="x", + shape=[None, IMAGE_SIZE], + dtype='float32') z = paddle.static.nn.fc(x, 10, bias_attr=False) z = paddle.static.nn.fc(z, 128, bias_attr=False) loss = fluid.layers.reduce_mean(z) @@ -88,8 +91,8 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): base_map = {} for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -100,26 +103,30 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): self.set_zero(prog, place) var_list = list( filter(lambda var: var.persistable, prog.list_vars())) - fluid.io.load_vars( - exe, path_vars1, main_program=prog, vars=var_list) + fluid.io.load_vars(exe, + path_vars1, + main_program=prog, + vars=var_list) for var in prog.list_vars(): if var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) # test for io.save_vars/replace_load_vars path_vars2 = 'test_replace_save_load_vars_binary2/model/' - fluid.io.save_vars( - exe, path_vars2, main_program=prog, vars=var_list) + fluid.io.save_vars(exe, + path_vars2, + main_program=prog, + vars=var_list) self.set_zero(prog, place) self.replace_load_vars(prog, path_vars2) for var in prog.list_vars(): if var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -132,7 +139,8 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): y = fluid.layers.fc( x, OUTPUT_NUM, - name='fc_vars', ) + name='fc_vars', + ) prog = fluid.default_main_program() place = fluid.CPUPlace( ) if not paddle.fluid.core.is_compiled_with_cuda( @@ -146,8 +154,9 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): if var.persistable and list( var.shape) == [IMAGE_SIZE, OUTPUT_NUM]: tensor = var.get_value() - paddle.save( - tensor, dirname + 'fc_vars.w_0', use_binary_format=True) + paddle.save(tensor, + dirname + 'fc_vars.w_0', + use_binary_format=True) break origin = np.array(var.get_value()) @@ -221,8 +230,8 @@ class TestSaveLoadBinaryFormat(unittest.TestCase): self.assertTrue(isinstance(load_sr, fluid.core.SelectedRows)) self.assertTrue(list(load_sr.rows()) == rows) self.assertTrue(load_sr.height() == height) - self.assertTrue( - np.array_equal(np.array(load_sr.get_tensor()), np_array)) + self.assertTrue(np.array_equal(np.array(load_sr.get_tensor()), + np_array)) with self.assertRaises(RuntimeError): fluid.core.save_selected_rows( diff --git a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py index c91616b06ee..651d9b5ea68 100644 --- a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py +++ b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py @@ -28,19 +28,22 @@ def test_static(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): prog = paddle.static.Program() startup_prog = paddle.static.Program() - place = fluid.CUDAPlace(0) if paddle.fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if paddle.fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with paddle.static.program_guard(prog, startup_prog): x = paddle.fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) y = paddle.fluid.data(name='y', shape=y_np.shape, dtype=x_np.dtype) - dist = paddle.nn.layer.distance.PairwiseDistance( - p=p, epsilon=epsilon, keepdim=keepdim) + dist = paddle.nn.layer.distance.PairwiseDistance(p=p, + epsilon=epsilon, + keepdim=keepdim) distance = dist(x, y) exe = paddle.static.Executor(place) static_ret = exe.run(prog, - feed={'x': x_np, - 'y': y_np}, + feed={ + 'x': x_np, + 'y': y_np + }, fetch_list=[distance]) static_ret = static_ret[0] return static_ret @@ -50,8 +53,9 @@ def test_dygraph(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): paddle.disable_static() x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) - dist = paddle.nn.layer.distance.PairwiseDistance( - p=p, epsilon=epsilon, keepdim=keepdim) + dist = paddle.nn.layer.distance.PairwiseDistance(p=p, + epsilon=epsilon, + keepdim=keepdim) distance = dist(x, y) dygraph_ret = distance.numpy() paddle.enable_static() @@ -59,6 +63,7 @@ def test_dygraph(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): class TestPairwiseDistance(unittest.TestCase): + def test_pairwise_distance(self): all_shape = [[100, 100], [4, 5, 6, 7]] dtypes = ['float32', 'float64'] @@ -71,8 +76,9 @@ class TestPairwiseDistance(unittest.TestCase): static_ret = test_static(x_np, y_np, keepdim=keepdim) dygraph_ret = test_dygraph(x_np, y_np, keepdim=keepdim) - excepted_value = pairwise_distance( - x_np, y_np, keepdim=keepdim) + excepted_value = pairwise_distance(x_np, + y_np, + keepdim=keepdim) self.assertTrue(np.allclose(static_ret, dygraph_ret)) self.assertTrue(np.allclose(static_ret, excepted_value)) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_class_center_sample.py b/python/paddle/fluid/tests/unittests/test_parallel_class_center_sample.py index e2a526110f1..5dfe41d7bfe 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_class_center_sample.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_class_center_sample.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestParallelClassCenterSample(TestMultipleGpus): + def test_parallel_class_center_sample(self): self.run_mnist_2gpu('parallel_class_center_sample.py') self.run_mnist_2gpu('parallel_class_center_sample.py', eager_mode=False) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_control_flow.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_control_flow.py index 3c45b2c7950..41ab6ebf29a 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_control_flow.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_control_flow.py @@ -26,6 +26,7 @@ flag_name = os.path.splitext(__file__)[0] class TestDygraphControlFlowSame(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -34,14 +35,14 @@ class TestDygraphControlFlowSame(TestDistBase): def test_net(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_control_flow_same.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_control_flow_same.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestFleetDygraphControlFlowSame(TestDygraphControlFlowSame): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -51,6 +52,7 @@ class TestFleetDygraphControlFlowSame(TestDygraphControlFlowSame): class TestFleetDygraphControlFlowSameAccGrad(TestDygraphControlFlowSame): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -60,6 +62,7 @@ class TestFleetDygraphControlFlowSameAccGrad(TestDygraphControlFlowSame): class TestDygraphControlFlowDiff(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -68,14 +71,14 @@ class TestDygraphControlFlowDiff(TestDistBase): def test_net(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_control_flow_different.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_control_flow_different.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestFleetDygraphControlFlowDiff(TestDygraphControlFlowDiff): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -85,6 +88,7 @@ class TestFleetDygraphControlFlowDiff(TestDygraphControlFlowDiff): class TestFleetDygraphControlFlowDiffAccGrad(TestDygraphControlFlowDiff): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py index 8145e880a65..930bf5345fc 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py @@ -150,6 +150,7 @@ def start_local_trainers(cluster, class TestMultipleGpus(unittest.TestCase): + def run_mnist_2gpu(self, target_file_name, eager_mode=True): if not fluid.core.is_compiled_with_cuda( ) or fluid.core.get_cuda_device_count() == 0: @@ -161,12 +162,11 @@ class TestMultipleGpus(unittest.TestCase): cluster, pod = get_cluster_from_args(selected_gpus) - procs = start_local_trainers( - cluster, - pod, - eager_mode=eager_mode, - training_script=target_file_name, - training_script_args=[]) + procs = start_local_trainers(cluster, + pod, + eager_mode=eager_mode, + training_script=target_file_name, + training_script_args=[]) while True: alive = watch_local_trainers(procs, cluster.trainers_endpoints()) @@ -178,15 +178,15 @@ class TestMultipleGpus(unittest.TestCase): class TestMultipleWithGloo(unittest.TestCase): + def run_mnist_2cpu(self, target_file_name): cluster, pod = get_cluster_from_args( [0, 1]) #tmp use. for getting trainer_nranks() - procs = start_local_trainers_cpu( - cluster.trainers_endpoints(), - training_script=target_file_name, - training_script_args=[]) + procs = start_local_trainers_cpu(cluster.trainers_endpoints(), + training_script=target_file_name, + training_script_args=[]) while True: alive = watch_local_trainers(procs, cluster.trainers_nranks()) @@ -198,18 +198,21 @@ class TestMultipleWithGloo(unittest.TestCase): class TestDataParallelGradientCheck(TestMultipleGpus): + def test_multiple_gpus_dynamic(self): self.run_mnist_2gpu('parallel_dygraph_gradient_check.py') class TestDataParallelWithPyLayer(TestMultipleGpus): + def test_parallel_dygraph_dataparallel_with_pylayer(self): self.run_mnist_2gpu('parallel_dygraph_dataparallel_with_pylayer.py') - self.run_mnist_2gpu( - 'parallel_dygraph_dataparallel_with_pylayer.py', eager_mode=False) + self.run_mnist_2gpu('parallel_dygraph_dataparallel_with_pylayer.py', + eager_mode=False) class TestGradientCheckInEagerMode(TestMultipleGpus): + def test_multiple_gpus_dynamic(self): self.run_mnist_2gpu('parallel_dygraph_gradient_check_in_eager_mode.py') diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py index ce67a2ce4d2..725d5249f59 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py @@ -104,6 +104,7 @@ def start_local_trainers(cluster, class TestMultipleGpus(unittest.TestCase): + def run_mnist_2gpu(self, target_file_name): #if not fluid.core.is_compiled_with_cuda( #) or fluid.core.get_cuda_device_count() == 0: @@ -114,11 +115,10 @@ class TestMultipleGpus(unittest.TestCase): pod = None cluster, pod = get_cluster_from_args(selected_gpus) - procs = start_local_trainers( - cluster, - pod, - training_script=target_file_name, - training_script_args=[]) + procs = start_local_trainers(cluster, + pod, + training_script=target_file_name, + training_script_args=[]) while True: alive = watch_local_trainers(procs, cluster.trainers_nranks()) @@ -130,11 +130,13 @@ class TestMultipleGpus(unittest.TestCase): class TestDataParallelGradientCheck(TestMultipleGpus): + def test_multiple_gpus_dynamic(self): self.run_mnist_2gpu('parallel_dygraph_gradient_check.py') class TestDataParallelGradientCheckInEagerMode(TestMultipleGpus): + def test_multiple_gpus_dynamic(self): self.run_mnist_2gpu('parallel_dygraph_gradient_check_in_eager_mode.py') diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py index 0c55e135721..e25a74863e4 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py @@ -27,6 +27,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphMnist(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -35,17 +36,17 @@ class TestParallelDygraphMnist(TestDistBase): def test_mnist(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) #TODO(liuyuhui): Multi-Card Baidu Kunlun XPU training exist accuracy problems -#it is difficult to find out immediately where the problem is, -#and we will work with frameworkers' help to fix it. +#it is difficult to find out immediately where the problem is, +#and we will work with frameworkers' help to fix it. class TestParallelDygraphMnistXPU(TestDistBase): + def _setup_config(self): self._sync_mode = False self._bkcl_mode = True @@ -54,20 +55,21 @@ class TestParallelDygraphMnistXPU(TestDistBase): def test_mnist_xpu(self): if fluid.core.is_compiled_with_xpu(): - self.check_with_place( - "parallel_dygraph_mnist.py", - delta=1e-4, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_mnist.py", + delta=1e-4, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphMnistSpawn(TestDistSpawnRunner): + def test_mnist_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): self.check_dist_result_with_spawn(test_class=TestMnist, delta=1e-5) class TestParallelDygraphMnistAccGrad(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -78,14 +80,14 @@ class TestParallelDygraphMnistAccGrad(TestDistBase): def test_mnist(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_mnist.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestFleetDygraphMnistXPU(TestDistBase): + def _setup_config(self): self._sync_mode = False self._bkcl_mode = True @@ -95,11 +97,10 @@ class TestFleetDygraphMnistXPU(TestDistBase): def test_mnist(self): if fluid.core.is_compiled_with_xpu(): - self.check_with_place( - "parallel_dygraph_mnist.py", - delta=1e-4, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_mnist.py", + delta=1e-4, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mp_layers.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mp_layers.py index da8df19a1e6..8e26452389b 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mp_layers.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mp_layers.py @@ -22,6 +22,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestModelParallelLayer(TestMultipleGpus): + def test_hybrid_parallel_mp_layer(self): self.run_mnist_2gpu('hybrid_parallel_mp_layers.py') self.run_mnist_2gpu('hybrid_parallel_mp_layers.py', eager_mode=False) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync.py index 2e364e5d4d9..a3c4a90746f 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync.py @@ -29,6 +29,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphNoSync(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -37,14 +38,14 @@ class TestParallelDygraphNoSync(TestDistBase): def test_no_sync(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_no_sync.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_no_sync.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphNoSyncUnusedParam(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -53,14 +54,14 @@ class TestParallelDygraphNoSyncUnusedParam(TestDistBase): def test_no_sync_ununsed_param(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_no_sync_unused_params.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_no_sync_unused_params.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphNoSyncControlFlow(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -69,37 +70,39 @@ class TestParallelDygraphNoSyncControlFlow(TestDistBase): def test_no_sync_control_flow(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_no_sync_control_flow.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_no_sync_control_flow.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphNoSyncSpawn(TestDistSpawnRunner): + def test_no_sync_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): self.check_dist_result_with_spawn(test_class=TestNoSync, delta=1e-5) class TestParallelDygraphNoSyncUnusedParamSpawn(TestDistSpawnRunner): + def _args_config(self, args): args.find_unused_parameters = True def test_no_sync_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): - self.check_dist_result_with_spawn( - test_class=TestNoSyncUnusedParam, delta=1e-5) + self.check_dist_result_with_spawn(test_class=TestNoSyncUnusedParam, + delta=1e-5) class TestParallelDygraphNoSyncControlFlowSpawn(TestDistSpawnRunner): + def _args_config(self, args): args.find_unused_parameters = True def test_no_sync_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): - self.check_dist_result_with_spawn( - test_class=TestNoSyncControlFlow, delta=1e-5) + self.check_dist_result_with_spawn(test_class=TestNoSyncControlFlow, + delta=1e-5) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync_gradient_check.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync_gradient_check.py index d5eebf01adb..fad9e902cc9 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync_gradient_check.py @@ -21,6 +21,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestDataParallelLayer(TestMultipleGpus): + def test_parallel_dygraph_dataparallel_no_sync(self): self.run_mnist_2gpu('parallel_dygraph_no_sync_gradient_check.py') diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_pipeline_parallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_pipeline_parallel.py index 7f7db930d4c..5357a6a132a 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_pipeline_parallel.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_pipeline_parallel.py @@ -22,6 +22,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestHybridPipeParallel(TestMultipleGpus): + def test_hybrid_parallel_pp_layer(self): self.run_mnist_2gpu('hybrid_parallel_pp_layer.py') self.run_mnist_2gpu('hybrid_parallel_pp_layer.py', eager_mode=False) @@ -32,8 +33,8 @@ class TestHybridPipeParallel(TestMultipleGpus): def test_hybrid_parallel_shared_weight(self): self.run_mnist_2gpu('hybrid_parallel_shared_weight.py') - self.run_mnist_2gpu( - 'hybrid_parallel_shared_weight.py', eager_mode=False) + self.run_mnist_2gpu('hybrid_parallel_shared_weight.py', + eager_mode=False) def test_pipeline_parallel_amp(self): self.run_mnist_2gpu('hybrid_parallel_pp_amp.py') @@ -45,8 +46,8 @@ class TestHybridPipeParallel(TestMultipleGpus): def test_hybrid_parallel_transformer(self): self.run_mnist_2gpu('hybrid_parallel_pp_transformer.py') - self.run_mnist_2gpu( - 'hybrid_parallel_pp_transformer.py', eager_mode=False) + self.run_mnist_2gpu('hybrid_parallel_pp_transformer.py', + eager_mode=False) def test_hybrid_parallel_save_load(self): self.run_mnist_2gpu('hybrid_parallel_pp_save_load.py') diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_se_resnext.py index cf89dc484c4..9127f3bfdb8 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_se_resnext.py @@ -27,6 +27,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphSeResNeXt(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -34,18 +35,18 @@ class TestParallelDygraphSeResNeXt(TestDistBase): def test_se_resnext(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_se_resnext.py", - delta=0.01, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_se_resnext.py", + delta=0.01, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphSeResNeXtSpawn(TestDistSpawnRunner): + def test_se_resnext_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): - self.check_dist_result_with_spawn( - test_class=TestSeResNeXt, delta=0.01) + self.check_dist_result_with_spawn(test_class=TestSeResNeXt, + delta=0.01) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sharding_parallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sharding_parallel.py index 503bd9d0f97..920ef969317 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sharding_parallel.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sharding_parallel.py @@ -26,8 +26,8 @@ class TestHybridParallel(TestMultipleGpus): # check sharding logic as well as the accuracy with single mode def test_hybrid_parallel_sharding_logic(self): self.run_mnist_2gpu('hybrid_parallel_sharding_model.py') - self.run_mnist_2gpu( - 'hybrid_parallel_sharding_model.py', eager_mode=False) + self.run_mnist_2gpu('hybrid_parallel_sharding_model.py', + eager_mode=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding.py index 43907da6098..ae65b545a95 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding.py @@ -28,6 +28,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphSparseEmdedding(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -35,14 +36,14 @@ class TestParallelDygraphSparseEmdedding(TestDistBase): def test_sparse_embedding(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_sparse_embedding.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_sparse_embedding.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphSparseEmdeddingFP64(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -50,18 +51,18 @@ class TestParallelDygraphSparseEmdeddingFP64(TestDistBase): def test_sparse_embedding_fp64(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_sparse_embedding_fp64.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_sparse_embedding_fp64.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphSparseEmdeddingSpawn(TestDistSpawnRunner): + def test_sparse_embedding_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): - self.check_dist_result_with_spawn( - test_class=TestSparseEmbedding, delta=1e-5) + self.check_dist_result_with_spawn(test_class=TestSparseEmbedding, + delta=1e-5) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_diff_length_gloo.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_diff_length_gloo.py index 1c425a40a9b..2abd9a1f854 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_diff_length_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_diff_length_gloo.py @@ -28,6 +28,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphSparseEmdedding_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True @@ -35,11 +36,10 @@ class TestParallelDygraphSparseEmdedding_GLOO(TestDistBase): self._diff_batch = True def test_sparse_embedding(self): - self.check_with_place( - "parallel_dygraph_sparse_embedding.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_sparse_embedding.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py index 56fcf806c47..5d42d54a28c 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_gloo.py @@ -28,31 +28,31 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphSparseEmdedding_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True self._dygraph = True def test_sparse_embedding(self): - self.check_with_place( - "parallel_dygraph_sparse_embedding.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_sparse_embedding.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphSparseEmdeddingFP64_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True self._dygraph = True def test_sparse_embedding_fp64(self): - self.check_with_place( - "parallel_dygraph_sparse_embedding_fp64.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_sparse_embedding_fp64.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py index 9aca448f161..7b1cd0efcdf 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py @@ -27,6 +27,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphSparseEmdeddingOverHeight(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -42,6 +43,7 @@ class TestParallelDygraphSparseEmdeddingOverHeight(TestDistBase): class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner): + def test_sparse_embedding_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): self.check_dist_result_with_spawn( diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py index ba43e26e23a..d48eb401e7a 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height_gloo.py @@ -27,6 +27,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphSparseEmdeddingOverHeight_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py index 7cf1e9711b7..d4b73ab7b42 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py @@ -18,10 +18,12 @@ from test_dist_base import TestDistBase import paddle.fluid as fluid import os + flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphMnist(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -29,11 +31,10 @@ class TestParallelDygraphMnist(TestDistBase): def test_mnist(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_sync_batch_norm.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_sync_batch_norm.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_tensor_parallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_tensor_parallel.py index 14a29162784..971c545f0f5 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_tensor_parallel.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_tensor_parallel.py @@ -22,6 +22,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestHybridParallel(TestMultipleGpus): + def test_hybrid_parallel_mp_random(self): self.run_mnist_2gpu('hybrid_parallel_mp_random.py') self.run_mnist_2gpu('hybrid_parallel_mp_random.py', eager_mode=False) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer.py index 71a8c7347e1..03c3235b503 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer.py @@ -27,6 +27,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphTransformer(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -34,14 +35,14 @@ class TestParallelDygraphTransformer(TestDistBase): def test_transformer(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_transformer.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_transformer.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphTransformerAccGrad(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -51,11 +52,10 @@ class TestParallelDygraphTransformerAccGrad(TestDistBase): def test_transformer(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_transformer.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_transformer.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py index d3619cc1b9a..bfd9158e9ec 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py @@ -27,20 +27,21 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphTransformer_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True self._dygraph = True def test_transformer(self): - self.check_with_place( - "parallel_dygraph_transformer.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_transformer.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphTransformerAccGrad_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True @@ -50,11 +51,10 @@ class TestParallelDygraphTransformerAccGrad_GLOO(TestDistBase): def test_transformer(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_transformer.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_transformer.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables.py index 75fa6f7c71d..1f71514cc73 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables.py @@ -27,6 +27,7 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphUnusedVar(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -34,14 +35,14 @@ class TestParallelDygraphUnusedVar(TestDistBase): def test_net(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_unused_variables.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_unused_variables.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestFleetDygraphUnusedVar(TestParallelDygraphUnusedVar): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -50,6 +51,7 @@ class TestFleetDygraphUnusedVar(TestParallelDygraphUnusedVar): class TestSparseEmbeddingUnusedVarsSpawn(TestDistSpawnRunner): + def test_mnist_with_spawn(self): if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4): self.check_dist_result_with_spawn( @@ -57,6 +59,7 @@ class TestSparseEmbeddingUnusedVarsSpawn(TestDistSpawnRunner): class TestParallelDygraphNoVar(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -64,14 +67,14 @@ class TestParallelDygraphNoVar(TestDistBase): def test_net(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_none_var.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_none_var.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphSharedUnusedVariables(TestDistBase): + def _setup_config(self): self._sync_mode = False self._nccl2_mode = True @@ -79,11 +82,10 @@ class TestParallelDygraphSharedUnusedVariables(TestDistBase): def test_mnist(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "parallel_dygraph_shared_unused_var.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_shared_unused_var.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables_gloo.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables_gloo.py index 89373fcb6ee..f605ae8fe28 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables_gloo.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables_gloo.py @@ -27,45 +27,45 @@ flag_name = os.path.splitext(__file__)[0] class TestParallelDygraphUnusedVar_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True self._dygraph = True def test_net(self): - self.check_with_place( - "parallel_dygraph_unused_variables.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_unused_variables.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphNoVar_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True self._dygraph = True def test_net(self): - self.check_with_place( - "parallel_dygraph_none_var.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_none_var.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) class TestParallelDygraphSharedUnusedVariables_GLOO(TestDistBase): + def _setup_config(self): self._sync_mode = False self._gloo_mode = True self._dygraph = True def test_mnist(self): - self.check_with_place( - "parallel_dygraph_shared_unused_var.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("parallel_dygraph_shared_unused_var.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py index 47d286fb6ab..c81a3801995 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py @@ -39,27 +39,25 @@ embedding_name = 'emb' def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, is_sparse, **ignored): # 8 features - predicate_embedding = fluid.layers.embedding( - input=predicate, - is_sparse=is_sparse, - size=[pred_dict_len, word_dim], - dtype='float32', - param_attr='vemb') - - mark_embedding = fluid.layers.embedding( - input=mark, - is_sparse=is_sparse, - size=[mark_dict_len, mark_dim], - dtype='float32') + predicate_embedding = fluid.layers.embedding(input=predicate, + is_sparse=is_sparse, + size=[pred_dict_len, word_dim], + dtype='float32', + param_attr='vemb') + + mark_embedding = fluid.layers.embedding(input=mark, + is_sparse=is_sparse, + size=[mark_dict_len, mark_dim], + dtype='float32') word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] emb_layers = [ - fluid.layers.embedding( - size=[word_dict_len, word_dim], - is_sparse=is_sparse, - input=x, - param_attr=fluid.ParamAttr( - name=embedding_name, trainable=False)) for x in word_input + fluid.layers.embedding(size=[word_dict_len, word_dim], + is_sparse=is_sparse, + input=x, + param_attr=fluid.ParamAttr(name=embedding_name, + trainable=False)) + for x in word_input ] # TODO(zcd): if the parameter is not trainable, the # parameter's gradient should not generated. @@ -76,12 +74,11 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, hidden_0 = fluid.layers.sums(input=hidden_0_layers) - lstm_0 = fluid.layers.dynamic_lstm( - input=hidden_0, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid') + lstm_0 = fluid.layers.dynamic_lstm(input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') # stack L-LSTM and R-LSTM with direct edges input_tmp = [hidden_0, lstm_0] @@ -92,13 +89,12 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh') ]) - lstm = fluid.layers.dynamic_lstm( - input=mix_hidden, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - is_reverse=((i % 2) == 1)) + lstm = fluid.layers.dynamic_lstm(input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) input_tmp = [mix_hidden, lstm] @@ -111,6 +107,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, class TestCRFModel(unittest.TestCase): + def check_network_convergence(self, is_sparse, build_strategy=None, @@ -121,31 +118,48 @@ class TestCRFModel(unittest.TestCase): scope = fluid.Scope() with fluid.scope_guard(scope): with fluid.program_guard(main, startup): - word = fluid.layers.data( - name='word_data', shape=[1], dtype='int64', lod_level=1) - predicate = fluid.layers.data( - name='verb_data', shape=[1], dtype='int64', lod_level=1) - ctx_n2 = fluid.layers.data( - name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) - ctx_n1 = fluid.layers.data( - name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) - ctx_0 = fluid.layers.data( - name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) - ctx_p1 = fluid.layers.data( - name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) - ctx_p2 = fluid.layers.data( - name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) - mark = fluid.layers.data( - name='mark_data', shape=[1], dtype='int64', lod_level=1) + word = fluid.layers.data(name='word_data', + shape=[1], + dtype='int64', + lod_level=1) + predicate = fluid.layers.data(name='verb_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_n2 = fluid.layers.data(name='ctx_n2_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_n1 = fluid.layers.data(name='ctx_n1_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_0 = fluid.layers.data(name='ctx_0_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_p1 = fluid.layers.data(name='ctx_p1_data', + shape=[1], + dtype='int64', + lod_level=1) + ctx_p2 = fluid.layers.data(name='ctx_p2_data', + shape=[1], + dtype='int64', + lod_level=1) + mark = fluid.layers.data(name='mark_data', + shape=[1], + dtype='int64', + lod_level=1) feature_out = db_lstm(**locals()) - target = fluid.layers.data( - name='target', shape=[1], dtype='int64', lod_level=1) + target = fluid.layers.data(name='target', + shape=[1], + dtype='int64', + lod_level=1) crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=target, - param_attr=fluid.ParamAttr( - name='crfw', learning_rate=1e-1)) + param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1)) avg_cost = fluid.layers.mean(crf_cost) sgd_optimizer = fluid.optimizer.SGD( @@ -156,10 +170,9 @@ class TestCRFModel(unittest.TestCase): staircase=True)) sgd_optimizer.minimize(avg_cost) - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.conll05.test(), buf_size=8192), - batch_size=8) + train_data = paddle.batch(paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=8) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) @@ -168,19 +181,19 @@ class TestCRFModel(unittest.TestCase): train_cp = compiler.CompiledProgram(main).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) - feeder = fluid.DataFeeder( - feed_list=[ - word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, - mark, target - ], - place=fluid.CPUPlace()) + feeder = fluid.DataFeeder(feed_list=[ + word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, + mark, target + ], + place=fluid.CPUPlace()) data = train_data() for i in range(4): cur_batch = next(data) - print(exe.run(train_cp, - feed=feeder.feed(cur_batch), - fetch_list=[avg_cost.name])[0]) + print( + exe.run(train_cp, + feed=feeder.feed(cur_batch), + fetch_list=[avg_cost.name])[0]) def _new_build_strategy(self, use_reduce=False): build_strategy = fluid.BuildStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py index e0bae089829..7618371036b 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py @@ -21,6 +21,7 @@ import os class TestParallelExecutorDropExeScope(unittest.TestCase): + def check_drop_scope(self, use_cuda=True): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -42,16 +43,14 @@ class TestParallelExecutorDropExeScope(unittest.TestCase): exec_strateg = fluid.ExecutionStrategy() exec_strateg.num_iteration_per_drop_scope = 10 - train_exe = fluid.ParallelExecutor( - use_cuda=use_cuda, - main_program=train_program, - loss_name=loss.name, - exec_strategy=exec_strateg) - test_exe = fluid.ParallelExecutor( - use_cuda=use_cuda, - main_program=test_program, - share_vars_from=train_exe, - exec_strategy=exec_strateg) + train_exe = fluid.ParallelExecutor(use_cuda=use_cuda, + main_program=train_program, + loss_name=loss.name, + exec_strategy=exec_strateg) + test_exe = fluid.ParallelExecutor(use_cuda=use_cuda, + main_program=test_program, + share_vars_from=train_exe, + exec_strategy=exec_strateg) x = numpy.random.random(size=(10, 1)).astype('float32') train_exe.run(feed={"X": x}, fetch_list=[loss.name]) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py index 328b3a4813e..aefa635508d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py @@ -18,10 +18,12 @@ import unittest import logging import six import os + os.environ['CPU_NUM'] = str(4) class TestBase(unittest.TestCase): + def main(self, network_func, iter=10, @@ -47,21 +49,21 @@ class TestBase(unittest.TestCase): exe_strategy._dry_run = True exe_strategy.use_experimental_executor = use_experimental_executor train_cp = compiler.CompiledProgram( - main_prog).with_data_parallel( - loss_name=loss.name, exec_strategy=exe_strategy) + main_prog).with_data_parallel(loss_name=loss.name, + exec_strategy=exe_strategy) for _ in six.moves.xrange(iter): for _ in six.moves.xrange(iter_per_pe): exe.run(train_cp) class TestMNISTDryRun(TestBase): + def test_mnist_dry_run(self): for use_gpu in (False, True): for use_experimental_executor in (False, True): - self.main( - network_func=TestMNISTDryRun.network_func, - use_gpu=use_gpu, - use_experimental_executor=use_experimental_executor) + self.main(network_func=TestMNISTDryRun.network_func, + use_gpu=use_gpu, + use_experimental_executor=use_experimental_executor) @staticmethod def network_func(): diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py index 2597df7faff..2c903f7c997 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py @@ -24,12 +24,14 @@ import os class TestFeedPersistableVar(unittest.TestCase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) batch_size = 4 - cls.img, cls.label = init_data( - batch_size, img_shape=[784], label_range=9) + cls.img, cls.label = init_data(batch_size, + img_shape=[784], + label_range=9) cls.feed_dict = { 'image': cls.img, 'label': cls.label, @@ -37,12 +39,11 @@ class TestFeedPersistableVar(unittest.TestCase): } def optimizer(self): - learning_rate = fluid.layers.create_global_var( - name="learning_rate", - shape=[1], - value=1.0, - dtype='float32', - persistable=True) + learning_rate = fluid.layers.create_global_var(name="learning_rate", + shape=[1], + value=1.0, + dtype='float32', + persistable=True) optimizer = fluid.optimizer.SGD(learning_rate=learning_rate) return optimizer @@ -70,12 +71,12 @@ class TestFeedPersistableVar(unittest.TestCase): self.check_feed_persistable_var(self.feed_dict) self.check_feed_persistable_var(self.feed_dict, use_cuda=True) - self.feed_dict['learning_rate'] = numpy.array( - [1.0, 1.0]).astype("float32") + self.feed_dict['learning_rate'] = numpy.array([1.0, + 1.0]).astype("float32") self.check_feed_persistable_var(self.feed_dict, use_cuda=True) - self.feed_dict['learning_rate'] = numpy.array( - [1.0, 1.0]).astype("float32") + self.feed_dict['learning_rate'] = numpy.array([1.0, + 1.0]).astype("float32") run = partial(self.check_feed_persistable_var, self.feed_dict) self.assertRaises(RuntimeError, run) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py index 052edac0ea7..0c3c293f7b9 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py @@ -38,6 +38,7 @@ def Lenet(data, class_dim): class TestFetchAndFeed(unittest.TestCase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -52,8 +53,9 @@ class TestFetchAndFeed(unittest.TestCase): startup = fluid.Program() startup.random_seed = seed with fluid.program_guard(main_program, startup): - data = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') + data = fluid.layers.data(name='image', + shape=[3, 224, 224], + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = Lenet(data, class_dim=102) loss = fluid.layers.cross_entropy(input=out, label=label) @@ -84,13 +86,14 @@ class TestFetchAndFeed(unittest.TestCase): def run_parallel_exe_with_fetch(self, compiled_program, exe, use_cuda, data, label, loss): + def get_data(batch_size=8): np.random.seed(5) while True: - img = np.random.random( - size=[batch_size, 3, 224, 224]).astype(np.float32) - l = (np.random.random(size=[batch_size, 1]) * - 10).astype(np.int64) + img = np.random.random(size=[batch_size, 3, 224, 224]).astype( + np.float32) + l = (np.random.random(size=[batch_size, 1]) * 10).astype( + np.int64) yield img, l fetch_list = [] @@ -117,15 +120,16 @@ class TestFetchAndFeed(unittest.TestCase): def run_parallel_exe_with_feed(self, compiled_program, exe, use_cuda, data, label, loss): + def get_data(batch_size=8): np.random.seed(5) while True: train_data = [] for _ in range(batch_size): - img = np.random.random( - size=[1, 3, 224, 224]).astype(np.float32) - label = (np.random.random(size=[1, 1]) * - 10).astype(np.int64) + img = np.random.random(size=[1, 3, 224, 224]).astype( + np.float32) + label = (np.random.random(size=[1, 1]) * 10).astype( + np.int64) train_data.append([img, label]) yield train_data @@ -143,30 +147,28 @@ class TestFetchAndFeed(unittest.TestCase): def check_executor(self, use_faster_executor=False, num_threads=4): if core.is_compiled_with_cuda(): - self.parallel_exe( - use_cuda=True, - run_parallel_exe=self.run_parallel_exe_with_fetch, - use_faster_executor=use_faster_executor, - num_threads=num_threads) - self.parallel_exe( - use_cuda=False, - run_parallel_exe=self.run_parallel_exe_with_fetch, - use_faster_executor=use_faster_executor, - num_threads=num_threads) + self.parallel_exe(use_cuda=True, + run_parallel_exe=self.run_parallel_exe_with_fetch, + use_faster_executor=use_faster_executor, + num_threads=num_threads) + self.parallel_exe(use_cuda=False, + run_parallel_exe=self.run_parallel_exe_with_fetch, + use_faster_executor=use_faster_executor, + num_threads=num_threads) def test_fetch(self): for use_faster_executor in {True, False}: - self.check_executor( - use_faster_executor=use_faster_executor, num_threads=4) - self.check_executor( - use_faster_executor=use_faster_executor, num_threads=1) + self.check_executor(use_faster_executor=use_faster_executor, + num_threads=4) + self.check_executor(use_faster_executor=use_faster_executor, + num_threads=1) def test_feed(self): if core.is_compiled_with_cuda(): - self.parallel_exe( - use_cuda=True, run_parallel_exe=self.run_parallel_exe_with_feed) - self.parallel_exe( - use_cuda=False, run_parallel_exe=self.run_parallel_exe_with_feed) + self.parallel_exe(use_cuda=True, + run_parallel_exe=self.run_parallel_exe_with_feed) + self.parallel_exe(use_cuda=False, + run_parallel_exe=self.run_parallel_exe_with_feed) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py index a34982ef3dd..1a015369ec6 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py @@ -26,6 +26,7 @@ def enable_parallel_ssa_executor(enabled=True): class TestParallelExecutorFetchIsolatedVarBase(unittest.TestCase): + def build_network(self, is_training): x = fluid.data(name='x', shape=[-1, 10], dtype='float32') y = fluid.data(name='y', shape=[-1, 10], dtype='float32') @@ -54,7 +55,10 @@ class TestParallelExecutorFetchIsolatedVarBase(unittest.TestCase): for is_training in [False, True]: for use_experimental_executor in [False, True]: for use_parallel_ssa_executor in [False, True]: - func = lambda: self.run_impl(use_gpu, dev_cnt, is_training, use_experimental_executor, use_parallel_ssa_executor) + func = lambda: self.run_impl( + use_gpu, dev_cnt, is_training, + use_experimental_executor, + use_parallel_ssa_executor) self.run_func_with_guard(func) def run_impl(self, use_gpu, dev_cnt, is_training, use_experimental_executor, @@ -63,8 +67,8 @@ class TestParallelExecutorFetchIsolatedVarBase(unittest.TestCase): enable_parallel_ssa_executor(use_parallel_ssa_executor) if fluid.is_compiled_with_cuda(): - if fluid.core.globals()[ - 'FLAGS_enable_parallel_graph'] and not use_gpu: + if fluid.core.globals( + )['FLAGS_enable_parallel_graph'] and not use_gpu: return # windows has only 1 GPU if use_gpu and dev_cnt > 1 and os.name == "nt": @@ -81,11 +85,11 @@ class TestParallelExecutorFetchIsolatedVarBase(unittest.TestCase): exe.run(fluid.default_startup_program()) - prog = fluid.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - loss_name=loss_name, - exec_strategy=self.exec_strategy(use_experimental_executor), - places=places) + prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss_name, + exec_strategy=self.exec_strategy(use_experimental_executor), + places=places) BATCH_SIZE = 8 * dev_cnt for _ in six.moves.range(10): @@ -93,8 +97,10 @@ class TestParallelExecutorFetchIsolatedVarBase(unittest.TestCase): y_np = np.random.random(size=[BATCH_SIZE, 10]).astype('float32') _, y_np_fetch = exe.run(prog, - feed={'x': x_np, - 'y': y_np}, + feed={ + 'x': x_np, + 'y': y_np + }, fetch_list=[loss, isolated_var]) self.assertTrue(np.array_equal(y_np, y_np_fetch)) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py index 24aa080e68c..80da6b5ac61 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ from paddle.nn import CrossEntropyLoss class TestFixOpRunOrder(unittest.TestCase): + def setUp(self): paddle.enable_static() paddle.seed(1) @@ -29,8 +30,8 @@ class TestFixOpRunOrder(unittest.TestCase): fluid.set_flags({'FLAGS_cudnn_deterministic': 1}) def get_place(self): - return paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + return paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() def get_feed(self): batch_size = 4 @@ -43,10 +44,12 @@ class TestFixOpRunOrder(unittest.TestCase): startup_prog = paddle.static.Program() scope = paddle.static.Scope() with paddle.static.program_guard(main_prog, startup_prog): - image = paddle.static.data( - name="image", shape=[None, 3, 224, 224], dtype="float32") - label = paddle.static.data( - name="label", shape=[None, 1], dtype="int64") + image = paddle.static.data(name="image", + shape=[None, 3, 224, 224], + dtype="float32") + label = paddle.static.data(name="label", + shape=[None, 1], + dtype="int64") model = resnet18() pred = model(image) loss_fn = CrossEntropyLoss() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py index 124e1dc0c16..a3a26f481f3 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py @@ -19,6 +19,7 @@ import six class TestInferencePartialFeed(unittest.TestCase): + def setUp(self): self.iterations = 10 self.size = 10 @@ -46,8 +47,10 @@ class TestInferencePartialFeed(unittest.TestCase): prog = fluid.CompiledProgram(main_prog).with_data_parallel( places=places) - gen_random = lambda shape:np.random.uniform(low=-1.0, high=1.0, size=shape).astype('float32') - assert_result = lambda feed, result: self.assertTrue(np.array_equal(np.maximum(0, feed), result)) + gen_random = lambda shape: np.random.uniform( + low=-1.0, high=1.0, size=shape).astype('float32') + assert_result = lambda feed, result: self.assertTrue( + np.array_equal(np.maximum(0, feed), result)) def assert_merged_unmerged(merged, unmerged): unmerged = np.concatenate(unmerged, axis=0) @@ -142,17 +145,20 @@ class TestInferencePartialFeed(unittest.TestCase): for p in places: for has_persistable in [False, True]: for use_split in [False, True]: - self.run_network( - p, use_split=use_split, has_persistable=has_persistable) + self.run_network(p, + use_split=use_split, + has_persistable=has_persistable) class TestInferencePartialFeedUsingDataLoader(unittest.TestCase): + def setUp(self): self.epoch_num = 3 self.batch_num = 101 # a prime number self.batch_size = 32 def create_reader(self): + def __impl__(): for _ in six.moves.range(self.batch_num): yield np.random.random([self.batch_size, 1]).astype('float32'), @@ -162,20 +168,22 @@ class TestInferencePartialFeedUsingDataLoader(unittest.TestCase): def run_network(self, iterable, use_cuda, drop_last): x = fluid.data(shape=[None, 1], name='x', dtype='float32') places = fluid.cuda_places() if use_cuda else fluid.cpu_places(4) - loader = fluid.io.DataLoader.from_generator( - feed_list=[x], capacity=16, iterable=iterable, drop_last=drop_last) + loader = fluid.io.DataLoader.from_generator(feed_list=[x], + capacity=16, + iterable=iterable, + drop_last=drop_last) y = fluid.layers.fc(x, size=10) loss = fluid.layers.reduce_mean(y) exe = fluid.Executor(places[0]) exe.run(fluid.default_startup_program()) - prog = fluid.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - places=places, loss_name=loss.name) + prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + places=places, loss_name=loss.name) - loader.set_batch_generator( - self.create_reader(), places=places if iterable else None) + loader.set_batch_generator(self.create_reader(), + places=places if iterable else None) for _ in six.moves.range(self.epoch_num): actual_batch_num = 0 @@ -203,8 +211,8 @@ class TestInferencePartialFeedUsingDataLoader(unittest.TestCase): self.assertGreater(self.batch_num, actual_batch_num) def test_main(self): - use_cuda_list = [False, True] if fluid.is_compiled_with_cuda( - ) else [False] + use_cuda_list = [False, True + ] if fluid.is_compiled_with_cuda() else [False] iterable_list = [False, True] drop_last_list = [False, True] for iterable in iterable_list: diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index 61d643f24c1..81625a29e22 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -34,8 +34,8 @@ def simple_fc_net(use_feed): hidden, size=200, act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) prediction = fluid.layers.fc(hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) @@ -73,6 +73,7 @@ def init_data(): class TestMNIST(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -92,15 +93,19 @@ class TestMNIST(TestParallelExecutorBase): all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_reduce=False) reduce_first_loss, reduce_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_reduce=True) @@ -119,12 +124,13 @@ class TestMNIST(TestParallelExecutorBase): img, label = init_data() - self.check_network_convergence( - simple_fc_net, - feed_dict={"image": img, - "label": label}, - use_device=use_device, - use_reduce=use_reduce) + self.check_network_convergence(simple_fc_net, + feed_dict={ + "image": img, + "label": label + }, + use_device=use_device, + use_reduce=use_reduce) def test_simple_fc(self): # use_device @@ -149,23 +155,29 @@ class TestMNIST(TestParallelExecutorBase): single_first_loss, single_last_loss = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=False) parallel_first_loss, parallel_last_loss = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=True) self.assertAlmostEquals( np.mean(parallel_first_loss), single_first_loss, - delta=1e-6, ) - self.assertAlmostEquals( - np.mean(parallel_last_loss), single_last_loss, delta=1e-6) + delta=1e-6, + ) + self.assertAlmostEquals(np.mean(parallel_last_loss), + single_last_loss, + delta=1e-6) def test_simple_fc_parallel_accuracy(self): self.check_simple_fc_parallel_accuracy(DeviceType.CUDA) @@ -178,12 +190,13 @@ class TestMNIST(TestParallelExecutorBase): return img, label = init_data() - self.check_network_convergence( - fc_with_batchnorm, - feed_dict={"image": img, - "label": label}, - use_device=use_device, - use_fast_executor=use_fast_executor) + self.check_network_convergence(fc_with_batchnorm, + feed_dict={ + "image": img, + "label": label + }, + use_device=use_device, + use_fast_executor=use_fast_executor) def test_batchnorm_fc(self): for use_device in (DeviceType.CPU, DeviceType.CUDA): @@ -201,6 +214,7 @@ class TestMNIST(TestParallelExecutorBase): class TestMNISTNoReduce(unittest.TestCase): + def run_program(self, device_type): if device_type == DeviceType.CUDA: if not paddle.is_compiled_with_cuda(): @@ -225,18 +239,16 @@ class TestMNISTNoReduce(unittest.TestCase): build_strategy = paddle.static.BuildStrategy() build_strategy.reduce_strategy = no_reduce main_multi_place = paddle.static.CompiledProgram( - main).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - places=places) + main).with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + places=places) build_strategy = paddle.static.BuildStrategy() build_strategy.reduce_strategy = no_reduce - main_single_place = paddle.static.CompiledProgram(main.clone( - )).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - places=places[0]) + main_single_place = paddle.static.CompiledProgram( + main.clone()).with_data_parallel(loss_name=loss.name, + build_strategy=build_strategy, + places=places[0]) image, label = init_data() feed = {'image': image, 'label': label} @@ -256,13 +268,13 @@ class TestMNISTNoReduce(unittest.TestCase): grads_single_place[i].append(g) for i in range(len(grads)): - grads_single_place[i] = np.concatenate( - grads_single_place[i], axis=0) / len(places) + grads_single_place[i] = np.concatenate(grads_single_place[i], + axis=0) / len(places) self.assertEqual(len(grads_multi_place), len(grads_single_place)) for g1, g2 in zip(grads_multi_place, grads_single_place): - self.assertTrue( - np.allclose(g1, g2), 'g1 = {}\ng2 = {}\n'.format(g1, g2)) + self.assertTrue(np.allclose(g1, g2), + 'g1 = {}\ng2 = {}\n'.format(g1, g2)) def split_feed(self, feed, n): image = feed['image'] diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py index e07b89f7aae..36299da25a6 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py @@ -18,6 +18,7 @@ import unittest import numpy as np import os + os.environ['FLAGS_enable_parallel_graph'] = str(1) import paddle.fluid.core as core import os @@ -26,6 +27,7 @@ from simple_nets import simple_fc_net, init_data class TestMNIST(TestParallelExecutorBase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -36,12 +38,13 @@ class TestMNIST(TestParallelExecutorBase): return img, label = init_data() - self.check_network_convergence( - simple_fc_net, - feed_dict={"image": img, - "label": label}, - use_device=use_device, - use_reduce=use_reduce) + self.check_network_convergence(simple_fc_net, + feed_dict={ + "image": img, + "label": label + }, + use_device=use_device, + use_reduce=use_reduce) def test_simple_fc(self): # use_device @@ -54,23 +57,29 @@ class TestMNIST(TestParallelExecutorBase): img, label = init_data() single_first_loss, single_last_loss = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=False) parallel_first_loss, parallel_last_loss = self.check_network_convergence( method=simple_fc_net, - feed_dict={"image": img, - "label": label}, + feed_dict={ + "image": img, + "label": label + }, use_device=use_device, use_parallel_executor=True) self.assertAlmostEquals( np.mean(parallel_first_loss), single_first_loss, - delta=1e-6, ) - self.assertAlmostEquals( - np.mean(parallel_last_loss), single_last_loss, delta=1e-6) + delta=1e-6, + ) + self.assertAlmostEquals(np.mean(parallel_last_loss), + single_last_loss, + delta=1e-6) def test_simple_fc_parallel_accuracy(self): self.check_simple_fc_parallel_accuracy(DeviceType.CUDA) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py index 0fac0610fd2..68f51540764 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py @@ -24,12 +24,13 @@ import os # NCCL 2.7 decides to use shared memory while NCCL 2.6 didn't, hence causing the error. # include/shm.h:28 NCCL WARN Call to posix_fallocate failed: No space left on device # -# Set environment variables NCCL_SHM_DISABLE=1 to disables the Shared Memory (SHM) transports +# Set environment variables NCCL_SHM_DISABLE=1 to disables the Shared Memory (SHM) transports # and force to use P2P which is the default transports way of NCCL2.6. os.environ['NCCL_SHM_DISABLE'] = str(1) class TestPEProfiler(TestProfiler): + def test_cpu_profiler(self): exe = fluid.Executor(fluid.CPUPlace()) self.net_profiler(exe, 'CPU', "Default", use_parallel_executor=True) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py index d9ae3cf5e75..3b275a75b7d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py @@ -24,8 +24,8 @@ import unittest paddle.enable_static() -logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', + level=logging.INFO) logger = logging.getLogger("paddle_with_cinn") @@ -85,8 +85,8 @@ def train(dot_save_dir, prefix, seed=1234): main_program = paddle.static.Program() img, label, loss = build_program(main_program, startup_program) - place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_program) @@ -109,6 +109,7 @@ def train(dot_save_dir, prefix, seed=1234): @unittest.skipIf(not set_cinn_flag(True), "Paddle is not compiled with CINN.") class TestParallelExecutorRunCinn(unittest.TestCase): + def setUp(self): self.tmpdir = tempfile.mkdtemp(prefix="dots_") diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py index fc76f5d152d..964fce25a6f 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py @@ -21,6 +21,7 @@ from simple_nets import simple_fc_net, init_data class TestMNIST(unittest.TestCase): + @classmethod def setUpClass(cls): cls.save_dirname = "./" @@ -37,12 +38,13 @@ class TestMNIST(unittest.TestCase): exe_loss = self.run_with_executor() [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - self.save_dirname, self.exe, self.model_filename, - self.params_filename) + fetch_targets] = fluid.io.load_inference_model(self.save_dirname, + self.exe, + self.model_filename, + self.params_filename) - train_exe = fluid.ParallelExecutor( - use_cuda=False, main_program=inference_program) + train_exe = fluid.ParallelExecutor(use_cuda=False, + main_program=inference_program) feed_vars = [ inference_program.global_block().var(var_name) for var_name in ["image", "label"] @@ -71,12 +73,12 @@ class TestMNIST(unittest.TestCase): feed=feeder.feed(self.batch_data), fetch_list=[loss.name]) - fluid.io.save_inference_model( - self.save_dirname, ["image", "label"], [loss], - self.exe, - model_filename=self.model_filename, - params_filename=self.params_filename, - main_program=main) + fluid.io.save_inference_model(self.save_dirname, ["image", "label"], + [loss], + self.exe, + model_filename=self.model_filename, + params_filename=self.params_filename, + main_program=main) return loss_data diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py index 9b48a87bff7..15a26fc0c06 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py @@ -20,20 +20,19 @@ from functools import partial class TestResnetCPU(TestResnetBase): + def test_seresnext_with_learning_rate_decay(self): # NOTE(zcd): This test is compare the result of use parallel_executor # and executor, and the result of drop_out op and batch_norm op in # this two executor have diff, so the two ops should be removed # from the model. - check_func = partial( - self.check_network_convergence, - optimizer=seresnext_net.optimizer, - use_parallel_executor=False) - self._compare_result_with_origin_model( - check_func, - use_device=DeviceType.CPU, - compare_separately=False, - delta2=1e-3) + check_func = partial(self.check_network_convergence, + optimizer=seresnext_net.optimizer, + use_parallel_executor=False) + self._compare_result_with_origin_model(check_func, + use_device=DeviceType.CPU, + compare_separately=False, + delta2=1e-3) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py index ff529ce94bd..ee7736a7303 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py @@ -20,17 +20,18 @@ from functools import partial class TestResnetGPU(TestResnetBase): + def test_seresnext_with_learning_rate_decay(self): # NOTE(zcd): This test is compare the result of use parallel_executor # and executor, and the result of drop_out op and batch_norm op in # this two executor have diff, so the two ops should be removed # from the model. - check_func = partial( - self.check_network_convergence, - optimizer=seresnext_net.optimizer, - use_parallel_executor=False) - self._compare_result_with_origin_model( - check_func, use_device=DeviceType.CUDA, compare_separately=False) + check_func = partial(self.check_network_convergence, + optimizer=seresnext_net.optimizer, + use_parallel_executor=False) + self._compare_result_with_origin_model(check_func, + use_device=DeviceType.CUDA, + compare_separately=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py index 0f1a86a83db..1c355c32ed7 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import paddle.fluid as fluid + fluid.core._set_fuse_parameter_group_size(3) fluid.core._set_fuse_parameter_memory_size(131072) @@ -24,15 +25,15 @@ from functools import partial class TestResnetWithFuseAllReduceCPU(TestResnetBase): + def test_seresnext_with_fused_all_reduce(self): # NOTE(zcd): In order to make the program faster, # this unit test remove drop_out and batch_norm. - check_func = partial( - self.check_network_convergence, - optimizer=seresnext_net.optimizer, - fuse_all_reduce_ops=True) - self._compare_result_with_origin_model( - check_func, use_device=DeviceType.CPU) + check_func = partial(self.check_network_convergence, + optimizer=seresnext_net.optimizer, + fuse_all_reduce_ops=True) + self._compare_result_with_origin_model(check_func, + use_device=DeviceType.CPU) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py index c747591c816..566e3d4248d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import paddle.fluid as fluid + fluid.core._set_fuse_parameter_group_size(3) fluid.core._set_fuse_parameter_memory_size(131072) @@ -24,15 +25,16 @@ from functools import partial class TestResnetWithFuseAllReduceGPU(TestResnetBase): + def test_seresnext_with_fused_all_reduce(self): # NOTE(zcd): In order to make the program faster, # this unit test remove drop_out and batch_norm. - check_func = partial( - self.check_network_convergence, - optimizer=seresnext_net.optimizer, - fuse_all_reduce_ops=True) - self._compare_result_with_origin_model( - check_func, use_device=DeviceType.CUDA, delta2=1e-2) + check_func = partial(self.check_network_convergence, + optimizer=seresnext_net.optimizer, + fuse_all_reduce_ops=True) + self._compare_result_with_origin_model(check_func, + use_device=DeviceType.CUDA, + delta2=1e-2) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py index e67934d87f9..d4cc297d689 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py @@ -20,6 +20,7 @@ import paddle.fluid.core as core class TestResnetWithReduceBase(TestParallelExecutorBase): + def _compare_reduce_and_allreduce(self, use_device, delta2=1e-5): if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda(): return @@ -86,9 +87,10 @@ class TestResnetWithReduceBase(TestParallelExecutorBase): class TestResnetWithReduceCPU(TestResnetWithReduceBase): + def test_seresnext_with_reduce(self): - self._compare_reduce_and_allreduce( - use_device=DeviceType.CPU, delta2=1e-3) + self._compare_reduce_and_allreduce(use_device=DeviceType.CPU, + delta2=1e-3) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py index 4de1a6092dc..13591f8d87d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py @@ -18,9 +18,10 @@ from test_parallel_executor_seresnext_with_reduce_cpu import TestResnetWithReduc class TestResnetWithReduceGPU(TestResnetWithReduceBase): + def test_seresnext_with_reduce(self): - self._compare_reduce_and_allreduce( - use_device=DeviceType.CUDA, delta2=1e-2) + self._compare_reduce_and_allreduce(use_device=DeviceType.CUDA, + delta2=1e-2) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py index fd47dc37e76..e9f4e679d5a 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py @@ -25,6 +25,7 @@ import math class ParallelExecutorTestingDuringTraining(unittest.TestCase): + def check_network_convergence(self, use_cuda, build_strategy=None): os.environ['CPU_NUM'] = str(4) main = fluid.Program() @@ -70,32 +71,31 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): sys.exit("got NaN loss, training failed.") self.assertTrue( - np.allclose( - train_loss, test_loss, atol=1e-2), - "Train loss: " + str(train_loss) + "\n Test loss:" + - str(test_loss)) + np.allclose(train_loss, test_loss, + atol=1e-2), "Train loss: " + str(train_loss) + + "\n Test loss:" + str(test_loss)) def test_parallel_testing(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce if core.is_compiled_with_cuda(): - self.check_network_convergence( - use_cuda=True, build_strategy=build_strategy) - self.check_network_convergence( - use_cuda=False, build_strategy=build_strategy) + self.check_network_convergence(use_cuda=True, + build_strategy=build_strategy) + self.check_network_convergence(use_cuda=False, + build_strategy=build_strategy) def test_parallel_testing_with_new_strategy_gpu(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if core.is_compiled_with_cuda(): - self.check_network_convergence( - use_cuda=True, build_strategy=build_strategy) + self.check_network_convergence(use_cuda=True, + build_strategy=build_strategy) def test_parallel_testing_with_new_strategy_cpu(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce - self.check_network_convergence( - use_cuda=False, build_strategy=build_strategy) + self.check_network_convergence(use_cuda=False, + build_strategy=build_strategy) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py index b87e8d4e3c2..cc90fdb07f4 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py @@ -106,8 +106,8 @@ def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head): if is_target: # This is used to avoid attention on paddings and subsequent # words. - slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, - max_len)) + slf_attn_bias_data = np.ones( + (inst_data.shape[0], max_len, max_len)) slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( [-1, 1, max_len, max_len]) slf_attn_bias_data = np.tile(slf_attn_bias_data, @@ -161,10 +161,9 @@ def get_feed_data_reader(): if feed_data_reader is not None: return feed_data_reader - reader = paddle.batch( - wmt16.train(ModelHyperParams.src_vocab_size, - ModelHyperParams.trg_vocab_size), - batch_size=transformer_model.batch_size) + reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=transformer_model.batch_size) all_batch_tensors = [] for batch in reader(): tensors = [] @@ -178,15 +177,15 @@ def get_feed_data_reader(): for t in all_batch_tensors: yield t - feed_data_reader = FeedDataReader( - feed_list=transformer_model.build_inputs( - ModelHyperParams.max_length + 1, ModelHyperParams.n_head), - reader=__reader__) + feed_data_reader = FeedDataReader(feed_list=transformer_model.build_inputs( + ModelHyperParams.max_length + 1, ModelHyperParams.n_head), + reader=__reader__) return feed_data_reader class TestTransformer(TestParallelExecutorBase): + def test_main(self): if core.is_compiled_with_cuda(): self.check_network_convergence( @@ -198,11 +197,10 @@ class TestTransformer(TestParallelExecutorBase): use_device=DeviceType.CUDA, enable_sequential_execution=True, feed_data_reader=get_feed_data_reader()) - self.check_network_convergence( - transformer, - use_device=DeviceType.CPU, - iter=2, - feed_data_reader=get_feed_data_reader()) + self.check_network_convergence(transformer, + use_device=DeviceType.CPU, + iter=2, + feed_data_reader=get_feed_data_reader()) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_margin_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_parallel_margin_cross_entropy.py index e6402ee78f1..bacf97e0c68 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_margin_cross_entropy.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_margin_cross_entropy.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,11 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestParallelMarginSoftmaxWithCrossEntropy(TestMultipleGpus): + def test_parallel_margin_cross_entropy(self): self.run_mnist_2gpu('parallel_margin_cross_entropy.py') - self.run_mnist_2gpu( - 'parallel_margin_cross_entropy.py', eager_mode=False) + self.run_mnist_2gpu('parallel_margin_cross_entropy.py', + eager_mode=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_parameter.py b/python/paddle/fluid/tests/unittests/test_parameter.py index 61d75fca274..d75a6c0dd90 100644 --- a/python/paddle/fluid/tests/unittests/test_parameter.py +++ b/python/paddle/fluid/tests/unittests/test_parameter.py @@ -30,15 +30,15 @@ main_program = default_main_program() class ParameterChecks(unittest.TestCase): + def test_parameter(self): shape = [784, 100] val = 1.0625 b = main_program.global_block() - param = b.create_parameter( - name='fc.w', - shape=shape, - dtype='float32', - initializer=ConstantInitializer(val)) + param = b.create_parameter(name='fc.w', + shape=shape, + dtype='float32', + initializer=ConstantInitializer(val)) self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) @@ -80,17 +80,25 @@ class ParameterChecks(unittest.TestCase): def func_exception(self): b = main_program.global_block() with self.assertRaises(ValueError): - b.create_parameter( - name='test', shape=None, dtype='float32', initializer=None) + b.create_parameter(name='test', + shape=None, + dtype='float32', + initializer=None) with self.assertRaises(ValueError): - b.create_parameter( - name='test', shape=[1], dtype=None, initializer=None) + b.create_parameter(name='test', + shape=[1], + dtype=None, + initializer=None) with self.assertRaises(ValueError): - b.create_parameter( - name='test', shape=[], dtype='float32', initializer=None) + b.create_parameter(name='test', + shape=[], + dtype='float32', + initializer=None) with self.assertRaises(ValueError): - b.create_parameter( - name='test', shape=[-1], dtype='float32', initializer=None) + b.create_parameter(name='test', + shape=[-1], + dtype='float32', + initializer=None) def func_parambase_to_vector(self): with guard(): diff --git a/python/paddle/fluid/tests/unittests/test_partial_concat_op.py b/python/paddle/fluid/tests/unittests/test_partial_concat_op.py index a83ca3f81a8..842b1e725d4 100644 --- a/python/paddle/fluid/tests/unittests/test_partial_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_partial_concat_op.py @@ -41,6 +41,7 @@ def np_partial_concat(inputs, start, length): class TestPartialConcatOp(OpTest): + def setUp(self): self.op_type = "partial_concat" self.init_kernel_type() @@ -74,6 +75,7 @@ class TestPartialConcatOp(OpTest): class TestPartialConcatOp2(TestPartialConcatOp): + def init_para(self): self.batch_size = random.randint(1, 10) self.column = random.randint(101, 200) @@ -83,6 +85,7 @@ class TestPartialConcatOp2(TestPartialConcatOp): class TestPartialConcatOp3(TestPartialConcatOp): + def init_para(self): self.batch_size = random.randint(1, 10) self.column = random.randint(101, 200) @@ -92,6 +95,7 @@ class TestPartialConcatOp3(TestPartialConcatOp): class TestPartialConcatOp4(TestPartialConcatOp): + def init_para(self): self.batch_size = random.randint(1, 10) self.column = random.randint(101, 200) diff --git a/python/paddle/fluid/tests/unittests/test_partial_sum_op.py b/python/paddle/fluid/tests/unittests/test_partial_sum_op.py index eb516643010..7f016d3d1b5 100644 --- a/python/paddle/fluid/tests/unittests/test_partial_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_partial_sum_op.py @@ -26,6 +26,7 @@ import six class TestPartialSumOp(OpTest): + def setUp(self): self.op_type = "partial_sum" self.init_kernel_type() @@ -66,6 +67,7 @@ class TestPartialSumOp(OpTest): class TestPartialSumOp2(TestPartialSumOp): + def init_para(self): self.batch_size = random.randint(1, 10) self.column = random.randint(101, 200) @@ -75,6 +77,7 @@ class TestPartialSumOp2(TestPartialSumOp): class TestPartialSumOp3(TestPartialSumOp): + def init_para(self): self.batch_size = random.randint(1, 10) self.column = random.randint(101, 200) @@ -84,6 +87,7 @@ class TestPartialSumOp3(TestPartialSumOp): class TestPartialSumOp4(TestPartialSumOp): + def init_para(self): self.batch_size = random.randint(1, 10) self.column = random.randint(101, 200) diff --git a/python/paddle/fluid/tests/unittests/test_pass_builder.py b/python/paddle/fluid/tests/unittests/test_pass_builder.py index 023ceeaa73a..01d65941068 100644 --- a/python/paddle/fluid/tests/unittests/test_pass_builder.py +++ b/python/paddle/fluid/tests/unittests/test_pass_builder.py @@ -27,6 +27,7 @@ import tempfile class TestPassBuilder(unittest.TestCase): + def check_network_convergence(self, use_cuda, build_strategy=None): os.environ['CPU_NUM'] = str(4) main = fluid.Program() @@ -72,10 +73,9 @@ class TestPassBuilder(unittest.TestCase): sys.exit("got NaN loss, training failed.") self.assertTrue( - np.allclose( - train_loss, test_loss, atol=1e-8), - "Train loss: " + str(train_loss) + "\n Test loss:" + - str(test_loss)) + np.allclose(train_loss, test_loss, + atol=1e-8), "Train loss: " + str(train_loss) + + "\n Test loss:" + str(test_loss)) def test_parallel_testing_with_new_strategy(self): build_strategy = fluid.BuildStrategy() @@ -93,8 +93,8 @@ class TestPassBuilder(unittest.TestCase): viz_pass = pass_builder.append_pass("graph_viz_pass") self.assertEqual(origin_len + 1, len(pass_builder.all_passes())) - pass_builder.insert_pass( - len(pass_builder.all_passes()), "graph_viz_pass") + pass_builder.insert_pass(len(pass_builder.all_passes()), + "graph_viz_pass") self.assertEqual(origin_len + 2, len(pass_builder.all_passes())) pass_builder.remove_pass(len(pass_builder.all_passes()) - 1) diff --git a/python/paddle/fluid/tests/unittests/test_pipeline.py b/python/paddle/fluid/tests/unittests/test_pipeline.py index 04772a2da28..7075b9a0d43 100644 --- a/python/paddle/fluid/tests/unittests/test_pipeline.py +++ b/python/paddle/fluid/tests/unittests/test_pipeline.py @@ -25,6 +25,7 @@ flag_name = os.path.splitext(__file__)[0] class TestPipeline(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -41,21 +42,19 @@ class TestPipeline(TestDistBase): # Now pipeline only gets the loss value of the last # microbatch, so it is not consistable with the # non-pipeline one. - self.check_with_place( - "pipeline_mnist.py", - delta=1e0, - check_error_log=True, - log_name=flag_name, - need_envs=self.need_envs()) + self.check_with_place("pipeline_mnist.py", + delta=1e0, + check_error_log=True, + log_name=flag_name, + need_envs=self.need_envs()) def test_dist_train_multi_device(self): if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "pipeline_mnist_multi_device.py", - check_error_log=True, - delta=1e0, - log_name=flag_name, - need_envs=self.need_envs()) + self.check_with_place("pipeline_mnist_multi_device.py", + check_error_log=True, + delta=1e0, + log_name=flag_name, + need_envs=self.need_envs()) def test_dist_train_one_device(self): if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_pipeline_parallel.py b/python/paddle/fluid/tests/unittests/test_pipeline_parallel.py index f62e160673f..8773e8d47ed 100644 --- a/python/paddle/fluid/tests/unittests/test_pipeline_parallel.py +++ b/python/paddle/fluid/tests/unittests/test_pipeline_parallel.py @@ -21,6 +21,7 @@ from test_parallel_dygraph_dataparallel import TestMultipleGpus class TestPipelineParallel(TestMultipleGpus): + def test_pipeline_parallel(self): self.run_mnist_2gpu('hybrid_parallel_pp_alexnet.py') diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py index 06d975fe2b8..05b158624dd 100644 --- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py +++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py @@ -50,6 +50,7 @@ def pixel_shuffle_np(x, up_factor, data_format="NCHW"): class TestPixelShuffleOp(OpTest): + def setUp(self): self.op_type = "pixel_shuffle" self.python_api = paddle.nn.functional.pixel_shuffle @@ -81,11 +82,13 @@ class TestPixelShuffleOp(OpTest): class TestChannelLast(TestPixelShuffleOp): + def init_data_format(self): self.format = "NHWC" class TestPixelShuffleAPI(unittest.TestCase): + def setUp(self): self.x_1_np = np.random.random([2, 9, 4, 4]).astype("float64") self.x_2_np = np.random.random([2, 4, 4, 9]).astype("float64") @@ -98,10 +101,12 @@ class TestPixelShuffleAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=[2, 9, 4, 4], dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=[2, 4, 4, 9], dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=[2, 9, 4, 4], + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=[2, 4, 4, 9], + dtype="float64") out_1 = F.pixel_shuffle(x_1, 3) out_2 = F.pixel_shuffle(x_2, 3, "NHWC") @@ -126,10 +131,12 @@ class TestPixelShuffleAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=[2, 9, 4, 4], dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=[2, 4, 4, 9], dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=[2, 9, 4, 4], + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=[2, 4, 4, 9], + dtype="float64") # init instance ps_1 = paddle.nn.PixelShuffle(3) ps_2 = paddle.nn.PixelShuffle(3, "NHWC") @@ -171,14 +178,14 @@ class TestPixelShuffleAPI(unittest.TestCase): paddle.disable_static(place=place) - pixel_shuffle = paddle.nn.PixelShuffle( - up_factor, data_format=data_format) + pixel_shuffle = paddle.nn.PixelShuffle(up_factor, + data_format=data_format) result = pixel_shuffle(paddle.to_tensor(x)) self.assertTrue(np.allclose(result.numpy(), npresult)) - result_functional = F.pixel_shuffle( - paddle.to_tensor(x), 3, data_format) + result_functional = F.pixel_shuffle(paddle.to_tensor(x), 3, + data_format) self.assertTrue(np.allclose(result_functional.numpy(), npresult)) def test_dygraph1(self): @@ -189,7 +196,9 @@ class TestPixelShuffleAPI(unittest.TestCase): class TestPixelShuffleError(unittest.TestCase): + def test_error_functional(self): + def error_upscale_factor(): with paddle.fluid.dygraph.guard(): x = np.random.random([2, 9, 4, 4]).astype("float64") @@ -205,6 +214,7 @@ class TestPixelShuffleError(unittest.TestCase): self.assertRaises(ValueError, error_data_format) def test_error_layer(self): + def error_upscale_factor_layer(): with paddle.fluid.dygraph.guard(): x = np.random.random([2, 9, 4, 4]).astype("float64") diff --git a/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py index 768a9e307c9..1ae2c016e25 100644 --- a/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py +++ b/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py @@ -120,10 +120,12 @@ class TestPixelUnshuffleAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=[2, 1, 12, 12], dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=[2, 12, 12, 1], dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=[2, 1, 12, 12], + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=[2, 12, 12, 1], + dtype="float64") out_1 = F.pixel_unshuffle(x_1, 3) out_2 = F.pixel_unshuffle(x_2, 3, "NHWC") @@ -150,10 +152,12 @@ class TestPixelUnshuffleAPI(unittest.TestCase): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=[2, 1, 12, 12], dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=[2, 12, 12, 1], dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=[2, 1, 12, 12], + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=[2, 12, 12, 1], + dtype="float64") # init instance ps_1 = paddle.nn.PixelUnshuffle(3) ps_2 = paddle.nn.PixelUnshuffle(3, "NHWC") @@ -196,14 +200,14 @@ class TestPixelUnshuffleAPI(unittest.TestCase): paddle.disable_static(place=place) - pixel_unshuffle = paddle.nn.PixelUnshuffle( - down_factor, data_format=data_format) + pixel_unshuffle = paddle.nn.PixelUnshuffle(down_factor, + data_format=data_format) result = pixel_unshuffle(paddle.to_tensor(x)) self.assertTrue(np.allclose(result.numpy(), npresult)) - result_functional = F.pixel_unshuffle( - paddle.to_tensor(x), 3, data_format) + result_functional = F.pixel_unshuffle(paddle.to_tensor(x), 3, + data_format) self.assertTrue(np.allclose(result_functional.numpy(), npresult)) pixel_unshuffle_str = 'downscale_factor={}'.format(down_factor) @@ -252,8 +256,8 @@ class TestPixelUnshuffleError(unittest.TestCase): def error_data_format(): with paddle.fluid.dygraph.guard(): x = np.random.random([2, 1, 12, 12]).astype("float64") - pixel_unshuffle = F.pixel_unshuffle( - paddle.to_tensor(x), 3, "WOW") + pixel_unshuffle = F.pixel_unshuffle(paddle.to_tensor(x), 3, + "WOW") self.assertRaises(ValueError, error_data_format) diff --git a/python/paddle/fluid/tests/unittests/test_poisson_op.py b/python/paddle/fluid/tests/unittests/test_poisson_op.py index 7dd3841fe4b..57adcd26959 100644 --- a/python/paddle/fluid/tests/unittests/test_poisson_op.py +++ b/python/paddle/fluid/tests/unittests/test_poisson_op.py @@ -39,6 +39,7 @@ def output_hist(out, lam, a, b): class TestPoissonOp1(OpTest): + def setUp(self): self.op_type = "poisson" self.config() @@ -55,10 +56,8 @@ class TestPoissonOp1(OpTest): def verify_output(self, outs): hist, prob = output_hist(np.array(outs[0]), self.lam, self.a, self.b) - self.assertTrue( - np.allclose( - hist, prob, rtol=0.01), - "actual: {}, expected: {}".format(hist, prob)) + self.assertTrue(np.allclose(hist, prob, rtol=0.01), + "actual: {}, expected: {}".format(hist, prob)) def test_check_output(self): self.check_output_customized(self.verify_output) @@ -67,14 +66,14 @@ class TestPoissonOp1(OpTest): self.check_grad( ['X'], 'Out', - user_defined_grads=[np.zeros( - [1024, 1024], dtype=self.dtype)], + user_defined_grads=[np.zeros([1024, 1024], dtype=self.dtype)], user_defined_grad_outputs=[ np.random.rand(1024, 1024).astype(self.dtype) ]) class TestPoissonOp2(TestPoissonOp1): + def config(self): self.lam = 5 self.a = 1 @@ -83,6 +82,7 @@ class TestPoissonOp2(TestPoissonOp1): class TestPoissonAPI(unittest.TestCase): + def test_static(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py index 578b01b02d6..ed4bcf13b71 100644 --- a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py +++ b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py @@ -35,13 +35,14 @@ def PolygonBoxRestore(input): indexes = indexes.repeat( [geo_channels / 2], axis=0)[np.newaxis, :] # [1, geo_channels/2, 2, h, w] - indexes = indexes.repeat( - [batch_size], axis=0) # [batch_size, geo_channels/2, 2, h, w] + indexes = indexes.repeat([batch_size], + axis=0) # [batch_size, geo_channels/2, 2, h, w] return indexes.reshape( input.shape) * 4 - input # [batch_size, geo_channels, h, w] class TestPolygonBoxRestoreOp(OpTest): + def config(self): self.input_shape = (1, 8, 2, 2) @@ -58,20 +59,25 @@ class TestPolygonBoxRestoreOp(OpTest): class TestCase1(TestPolygonBoxRestoreOp): + def config(self): self.input_shape = (2, 10, 3, 2) class TestCase2(TestPolygonBoxRestoreOp): + def config(self): self.input_shape = (3, 12, 4, 5) class TestPolygonBoxInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): - input = fluid.data( - name='input', shape=[None, 3, 32, 32], dtype='int64') + input = fluid.data(name='input', + shape=[None, 3, 32, 32], + dtype='int64') out = fluid.layers.polygon_box_transform(input) self.assertRaises(TypeError, test_invalid_input) diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py index e1cfcc3f066..2dd26bef9d1 100644 --- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py @@ -46,8 +46,8 @@ def max_pool1D_forward_naive(x, if adaptive: L_out = ksize[0] else: - L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 out = np.zeros((N, C, L_out)) @@ -79,8 +79,8 @@ def avg_pool1D_forward_naive(x, if adaptive: L_out = ksize[0] else: - L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 out = np.zeros((N, C, L_out)) @@ -96,15 +96,16 @@ def avg_pool1D_forward_naive(x, field_size = (r_end - r_start) \ if (exclusive or adaptive) else (ksize[0]) if data_type == np.int8 or data_type == np.uint8: - out[:, :, i] = (np.rint( - np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type) + out[:, :, i] = (np.rint(np.sum(x_masked, axis=(2, 3)) / + field_size)).astype(data_type) else: - out[:, :, i] = (np.sum(x_masked, axis=(2)) / - field_size).astype(data_type) + out[:, :, + i] = (np.sum(x_masked, axis=(2)) / field_size).astype(data_type) return out class TestPool1D_API(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -117,8 +118,11 @@ class TestPool1D_API(unittest.TestCase): result = F.avg_pool1d(input, kernel_size=2, stride=2, padding=0) input_np = np.random.random([2, 3, 32]).astype("float32") - result_np = avg_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0], ceil_mode=False) + result_np = avg_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0], + ceil_mode=False) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -132,13 +136,16 @@ class TestPool1D_API(unittest.TestCase): input = fluid.dygraph.to_variable(input_np) result = F.avg_pool1d(input, kernel_size=2, stride=2, padding=[0]) - result_np = avg_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0]) + result_np = avg_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0]) self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool1d_dg = paddle.nn.layer.AvgPool1D( - kernel_size=2, stride=None, padding=0) + avg_pool1d_dg = paddle.nn.layer.AvgPool1D(kernel_size=2, + stride=None, + padding=0) result = avg_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -146,16 +153,24 @@ class TestPool1D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = F.avg_pool1d( - input, kernel_size=2, stride=2, padding=[1], exclusive=True) - - result_np = avg_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[1], exclusive=False) + result = F.avg_pool1d(input, + kernel_size=2, + stride=2, + padding=[1], + exclusive=True) + + result_np = avg_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[1], + exclusive=False) self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool1d_dg = paddle.nn.AvgPool1D( - kernel_size=2, stride=None, padding=1, exclusive=True) + avg_pool1d_dg = paddle.nn.AvgPool1D(kernel_size=2, + stride=None, + padding=1, + exclusive=True) result = avg_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -166,8 +181,10 @@ class TestPool1D_API(unittest.TestCase): result = F.max_pool1d(input, kernel_size=2, stride=2, padding=[0]) input_np = np.random.random([2, 3, 32]).astype("float32") - result_np = max_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0]) + result_np = max_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0]) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -181,13 +198,16 @@ class TestPool1D_API(unittest.TestCase): input = fluid.dygraph.to_variable(input_np) result = F.max_pool1d(input, kernel_size=2, stride=2, padding=0) - result_np = max_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0]) + result_np = max_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0]) self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool1d_dg = paddle.nn.layer.MaxPool1D( - kernel_size=2, stride=None, padding=0) + max_pool1d_dg = paddle.nn.layer.MaxPool1D(kernel_size=2, + stride=None, + padding=0) result = max_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -195,16 +215,22 @@ class TestPool1D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result, index = F.max_pool1d( - input, kernel_size=2, stride=2, padding=0, return_mask=True) + result, index = F.max_pool1d(input, + kernel_size=2, + stride=2, + padding=0, + return_mask=True) - result_np = max_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0]) + result_np = max_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0]) self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool1d_dg = paddle.nn.layer.MaxPool1D( - kernel_size=2, stride=None, padding=0) + max_pool1d_dg = paddle.nn.layer.MaxPool1D(kernel_size=2, + stride=None, + padding=0) result = max_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -212,11 +238,15 @@ class TestPool1D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = F.max_pool1d( - input, kernel_size=2, stride=2, padding="SAME") + result = F.max_pool1d(input, + kernel_size=2, + stride=2, + padding="SAME") - result_np = max_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0]) + result_np = max_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0]) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -224,11 +254,15 @@ class TestPool1D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = F.avg_pool1d( - input, kernel_size=2, stride=2, padding="SAME") + result = F.avg_pool1d(input, + kernel_size=2, + stride=2, + padding="SAME") - result_np = avg_pool1D_forward_naive( - input_np, ksize=[2], strides=[2], paddings=[0]) + result_np = avg_pool1D_forward_naive(input_np, + ksize=[2], + strides=[2], + paddings=[0]) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -249,15 +283,19 @@ class TestPool1D_API(unittest.TestCase): class TestPool2DError_API(unittest.TestCase): + def test_error_api(self): + def run1(): with fluid.dygraph.guard(): input_np = np.random.uniform(-1, 1, [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[2]] - res_pd = F.max_pool1d( - input_pd, kernel_size=2, stride=2, padding=padding) + res_pd = F.max_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding) self.assertRaises(ValueError, run1) @@ -267,8 +305,10 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[2]] - res_pd = F.max_pool1d( - input_pd, kernel_size=2, stride=2, padding=padding) + res_pd = F.max_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding) self.assertRaises(ValueError, run2) @@ -278,8 +318,10 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "padding" - res_pd = F.max_pool1d( - input_pd, kernel_size=2, stride=2, padding=padding) + res_pd = F.max_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding) self.assertRaises(ValueError, run3) @@ -289,12 +331,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = F.max_pool1d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True) + res_pd = F.max_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) self.assertRaises(ValueError, run4) @@ -304,12 +345,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = F.max_pool1d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True) + res_pd = F.max_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) self.assertRaises(ValueError, run5) @@ -319,12 +359,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = F.avg_pool1d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True) + res_pd = F.avg_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) self.assertRaises(ValueError, run6) @@ -334,12 +373,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "paddle" - res_pd = F.avg_pool1d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True) + res_pd = F.avg_pool1d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) self.assertRaises(ValueError, run7) @@ -349,12 +387,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = 0 - res_pd = F.avg_pool1d( - input_pd, - kernel_size=-1, - stride=2, - padding=padding, - ceil_mode=True) + res_pd = F.avg_pool1d(input_pd, + kernel_size=-1, + stride=2, + padding=padding, + ceil_mode=True) self.assertRaises(ValueError, run_kernel_out_of_range) @@ -364,12 +401,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = 0 - res_pd = F.avg_pool1d( - input_pd, - kernel_size=2, - stride=0, - padding=padding, - ceil_mode=True) + res_pd = F.avg_pool1d(input_pd, + kernel_size=2, + stride=0, + padding=padding, + ceil_mode=True) self.assertRaises(ValueError, run_stride_out_of_range) diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_api.py b/python/paddle/fluid/tests/unittests/test_pool2d_api.py index e86fa0ec483..b17c0ea0391 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_api.py @@ -24,6 +24,7 @@ from test_pool2d_op import adaptive_start_index, adaptive_end_index, pool2D_forw class TestPool2D_API(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -32,17 +33,17 @@ class TestPool2D_API(unittest.TestCase): def check_avg_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 32, 32], dtype="float32") + input = fluid.data(name="input", + shape=[2, 3, 32, 32], + dtype="float32") result = avg_pool2d(input, kernel_size=2, stride=2, padding=0) input_np = np.random.random([2, 3, 32, 32]).astype("float32") - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='avg') + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg') exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -56,16 +57,16 @@ class TestPool2D_API(unittest.TestCase): input = fluid.dygraph.to_variable(input_np) result = avg_pool2d(input, kernel_size=2, stride=2, padding=0) - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='avg') + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg') self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool2d_dg = paddle.nn.layer.AvgPool2D( - kernel_size=2, stride=2, padding=0) + avg_pool2d_dg = paddle.nn.layer.AvgPool2D(kernel_size=2, + stride=2, + padding=0) result = avg_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -73,20 +74,24 @@ class TestPool2D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = avg_pool2d( - input, kernel_size=2, stride=2, padding=1, ceil_mode=False) - - result_np = avg_pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[1, 1], - ceil_mode=False, - exclusive=False) + result = avg_pool2d(input, + kernel_size=2, + stride=2, + padding=1, + ceil_mode=False) + + result_np = avg_pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[1, 1], + ceil_mode=False, + exclusive=False) self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool2d_dg = paddle.nn.layer.AvgPool2D( - kernel_size=2, stride=2, padding=1, ceil_mode=False) + avg_pool2d_dg = paddle.nn.layer.AvgPool2D(kernel_size=2, + stride=2, + padding=1, + ceil_mode=False) result = avg_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -94,35 +99,39 @@ class TestPool2D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = avg_pool2d( - input, kernel_size=2, stride=2, padding=0, ceil_mode=True) - - result_np = avg_pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - ceil_mode=True) + result = avg_pool2d(input, + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True) + + result_np = avg_pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + ceil_mode=True) self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool2d_dg = paddle.nn.layer.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) + avg_pool2d_dg = paddle.nn.layer.AvgPool2D(kernel_size=2, + stride=2, + padding=0, + ceil_mode=True) result = avg_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) def check_max_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 32, 32], dtype="float32") + input = fluid.data(name="input", + shape=[2, 3, 32, 32], + dtype="float32") result = max_pool2d(input, kernel_size=2, stride=2, padding=0) input_np = np.random.random([2, 3, 32, 32]).astype("float32") - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='max') + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -134,19 +143,22 @@ class TestPool2D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = max_pool2d( - input, kernel_size=2, stride=2, padding=0, return_mask=False) - - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='max') + result = max_pool2d(input, + kernel_size=2, + stride=2, + padding=0, + return_mask=False) + + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool2d_dg = paddle.nn.layer.MaxPool2D( - kernel_size=2, stride=2, padding=0) + max_pool2d_dg = paddle.nn.layer.MaxPool2D(kernel_size=2, + stride=2, + padding=0) result = max_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -155,42 +167,44 @@ class TestPool2D_API(unittest.TestCase): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable( np.transpose(input_np, [0, 2, 3, 1])) - result = max_pool2d( - input, - kernel_size=2, - stride=2, - padding=0, - return_mask=False, - data_format="NHWC") - - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='max') + result = max_pool2d(input, + kernel_size=2, + stride=2, + padding=0, + return_mask=False, + data_format="NHWC") + + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') self.assertTrue( - np.allclose( - np.transpose(result.numpy(), [0, 3, 1, 2]), result_np)) + np.allclose(np.transpose(result.numpy(), [0, 3, 1, 2]), + result_np)) def check_max_dygraph_padding_results(self, place): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = max_pool2d( - input, kernel_size=2, stride=2, padding=1, ceil_mode=False) - - result_np = max_pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[1, 1], - ceil_mode=False, - exclusive=False) + result = max_pool2d(input, + kernel_size=2, + stride=2, + padding=1, + ceil_mode=False) + + result_np = max_pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[1, 1], + ceil_mode=False, + exclusive=False) self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool2d_dg = paddle.nn.layer.MaxPool2D( - kernel_size=2, stride=2, padding=1, ceil_mode=False) + max_pool2d_dg = paddle.nn.layer.MaxPool2D(kernel_size=2, + stride=2, + padding=1, + ceil_mode=False) result = max_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -198,19 +212,23 @@ class TestPool2D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = max_pool2d( - input, kernel_size=2, stride=2, padding=0, ceil_mode=True) - - result_np = max_pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - ceil_mode=True) + result = max_pool2d(input, + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True) + + result_np = max_pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + ceil_mode=True) self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool2d_dg = paddle.nn.layer.MaxPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) + max_pool2d_dg = paddle.nn.layer.MaxPool2D(kernel_size=2, + stride=2, + padding=0, + ceil_mode=True) result = max_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -218,24 +236,23 @@ class TestPool2D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result, indices = max_pool2d( - input, - kernel_size=2, - stride=None, - padding="SAME", - return_mask=True) - - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='max', - padding_algorithm="SAME") + result, indices = max_pool2d(input, + kernel_size=2, + stride=None, + padding="SAME", + return_mask=True) + + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max', + padding_algorithm="SAME") self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool2d_dg = paddle.nn.layer.MaxPool2D( - kernel_size=2, stride=2, padding=0) + max_pool2d_dg = paddle.nn.layer.MaxPool2D(kernel_size=2, + stride=2, + padding=0) result = max_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -243,20 +260,22 @@ class TestPool2D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = avg_pool2d( - input, kernel_size=2, stride=None, padding="SAME") - - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='avg', - padding_algorithm="SAME") + result = avg_pool2d(input, + kernel_size=2, + stride=None, + padding="SAME") + + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg', + padding_algorithm="SAME") self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool2d_dg = paddle.nn.layer.AvgPool2D( - kernel_size=2, stride=2, padding=0) + avg_pool2d_dg = paddle.nn.layer.AvgPool2D(kernel_size=2, + stride=2, + padding=0) result = avg_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -265,23 +284,22 @@ class TestPool2D_API(unittest.TestCase): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) padding = [[0, 0], [0, 0], [0, 0], [0, 0]] - result = max_pool2d( - input, - kernel_size=2, - stride=2, - padding=padding, - return_mask=False) - - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='max') + result = max_pool2d(input, + kernel_size=2, + stride=2, + padding=padding, + return_mask=False) + + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool2d_dg = paddle.nn.layer.MaxPool2D( - kernel_size=2, stride=2, padding=0) + max_pool2d_dg = paddle.nn.layer.MaxPool2D(kernel_size=2, + stride=2, + padding=0) result = max_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -290,23 +308,22 @@ class TestPool2D_API(unittest.TestCase): input_np = np.random.random([2, 3, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) padding = [[0, 0], [0, 0], [0, 0], [0, 0]] - result = avg_pool2d( - input, - kernel_size=2, - stride=2, - padding=padding, - divisor_override=4) - - result_np = pool2D_forward_naive( - input_np, - ksize=[2, 2], - strides=[2, 2], - paddings=[0, 0], - pool_type='avg') + result = avg_pool2d(input, + kernel_size=2, + stride=2, + padding=padding, + divisor_override=4) + + result_np = pool2D_forward_naive(input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg') self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool2d_dg = paddle.nn.layer.AvgPool2D( - kernel_size=2, stride=2, padding=0) + avg_pool2d_dg = paddle.nn.layer.AvgPool2D(kernel_size=2, + stride=2, + padding=0) result = avg_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -331,15 +348,19 @@ class TestPool2D_API(unittest.TestCase): class TestPool2DError_API(unittest.TestCase): + def test_error_api(self): + def run1(): with fluid.dygraph.guard(): input_np = np.random.uniform(-1, 1, [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[0, 1], [0, 0], [0, 0], [0, 0]] - res_pd = max_pool2d( - input_pd, kernel_size=2, stride=2, padding=padding) + res_pd = max_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding) self.assertRaises(ValueError, run1) @@ -349,12 +370,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[0, 1], [0, 0], [0, 0], [0, 0]] - res_pd = max_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - data_format='NHWC') + res_pd = max_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') self.assertRaises(ValueError, run2) @@ -364,12 +384,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "padding" - res_pd = max_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - data_format='NHWC') + res_pd = max_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') self.assertRaises(ValueError, run3) @@ -379,12 +398,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "padding" - res_pd = avg_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - data_format='NHWC') + res_pd = avg_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') self.assertRaises(ValueError, run3_avg) @@ -394,13 +412,12 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = max_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True, - data_format='NHWC') + res_pd = max_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True, + data_format='NHWC') self.assertRaises(ValueError, run4) @@ -410,13 +427,12 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = avg_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True, - data_format='NHWC') + res_pd = avg_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True, + data_format='NHWC') self.assertRaises(ValueError, run4_avg) @@ -426,12 +442,11 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "padding" - res_pd = avg_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - data_format='NHWC') + res_pd = avg_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') self.assertRaises(ValueError, run5) @@ -441,13 +456,12 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = avg_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=True, - data_format='NHWC') + res_pd = avg_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True, + data_format='NHWC') self.assertRaises(ValueError, run6) @@ -457,13 +471,12 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = avg_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=False, - data_format='NNNN') + res_pd = avg_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=False, + data_format='NNNN') self.assertRaises(ValueError, run7) @@ -473,13 +486,12 @@ class TestPool2DError_API(unittest.TestCase): [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = "VALID" - res_pd = max_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - ceil_mode=False, - data_format='NNNN') + res_pd = max_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=False, + data_format='NNNN') self.assertRaises(ValueError, run8) @@ -488,14 +500,13 @@ class TestPool2DError_API(unittest.TestCase): input_np = np.random.uniform(-1, 1, [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = max_pool2d( - input_pd, - kernel_size=2, - stride=2, - padding=0, - ceil_mode=False, - data_format='NHWC', - return_mask=True) + res_pd = max_pool2d(input_pd, + kernel_size=2, + stride=2, + padding=0, + ceil_mode=False, + data_format='NHWC', + return_mask=True) self.assertRaises(ValueError, run9) @@ -504,13 +515,12 @@ class TestPool2DError_API(unittest.TestCase): input_np = np.random.uniform(-1, 1, [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool2d( - input_pd, - kernel_size=[-1, 2], - stride=2, - padding=0, - ceil_mode=False, - data_format='NHWC') + res_pd = avg_pool2d(input_pd, + kernel_size=[-1, 2], + stride=2, + padding=0, + ceil_mode=False, + data_format='NHWC') self.assertRaises(ValueError, run_kernel_out_of_range) @@ -519,13 +529,12 @@ class TestPool2DError_API(unittest.TestCase): input_np = np.random.uniform(-1, 1, [2, 3, 32, 32]).astype(np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool2d( - input_pd, - kernel_size=3, - stride=[0, 2], - padding=0, - ceil_mode=False, - data_format='NHWC') + res_pd = avg_pool2d(input_pd, + kernel_size=3, + stride=[0, 2], + padding=0, + ceil_mode=False, + data_format='NHWC') self.assertRaises(ValueError, run_stride_out_of_range) diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py index 582ec950106..81fa00986d7 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py @@ -49,11 +49,11 @@ def max_pool2D_forward_naive(x, if adaptive: H_out, W_out = ksize else: - H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 - W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - 1 - ) // strides[1] + 1 if ceil_mode else ( + W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - + 1) // strides[1] + 1 if ceil_mode else ( W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) for i in range(H_out): @@ -91,11 +91,11 @@ def avg_pool2D_forward_naive(x, if adaptive: H_out, W_out = ksize else: - H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 - W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - 1 - ) // strides[1] + 1 if ceil_mode else ( + W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - + 1) // strides[1] + 1 if ceil_mode else ( W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) for i in range(H_out): @@ -122,9 +122,9 @@ def avg_pool2D_forward_naive(x, field_size = (r_end - r_start) * (c_end - c_start) if data_type == np.int8 or data_type == np.uint8: - out[:, :, i, j] = (np.rint( - np.sum(x_masked, axis=(2, 3)) / - field_size)).astype(data_type) + out[:, :, i, + j] = (np.rint(np.sum(x_masked, axis=(2, 3)) / + field_size)).astype(data_type) else: out[:, :, i, j] = (np.sum(x_masked, axis=(2, 3)) / field_size).astype(data_type) @@ -149,8 +149,8 @@ def pool2D_forward_naive(x, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -233,8 +233,9 @@ def pool2D_forward_naive(x, x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] if pool_type == 'avg': if (exclusive or adaptive): - field_size = (in_h_end - in_h_start) * ( - in_w_end - in_w_start) + field_size = (in_h_end - in_h_start) * (in_w_end - + in_w_start) + # if (exclusive or adaptive) else (ksize[0] * ksize[1]) out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size @@ -244,8 +245,8 @@ def pool2D_forward_naive(x, x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] if pool_type == 'avg': if (exclusive or adaptive): - field_size = (in_h_end - in_h_start) * ( - in_w_end - in_w_start) + field_size = (in_h_end - in_h_start) * (in_w_end - + in_w_start) out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size elif pool_type == 'max': out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) @@ -253,6 +254,7 @@ def pool2D_forward_naive(x, class TestPool2D_Op_Mixin(object): + def setUp(self): self.op_type = "pool2d" self.use_cudnn = False @@ -272,10 +274,12 @@ class TestPool2D_Op_Mixin(object): self.init_shape() input = np.random.random(self.shape).astype(self.dtype) - output = pool2D_forward_naive( - input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, self.data_format, - self.pool_type, self.padding_algorithm).astype(self.dtype) + output = pool2D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool, + self.ceil_mode, self.exclusive, + self.adaptive, self.data_format, + self.pool_type, + self.padding_algorithm).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { @@ -313,18 +317,16 @@ class TestPool2D_Op_Mixin(object): # TODO(wangzhongpu): support mkldnn op in dygraph mode if self.has_cudnn() and self.pool_type != "max": place = core.CUDAPlace(0) - self.check_grad_with_place( - place, - set(['X']), - 'Out', - max_relative_error=0.07, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, + set(['X']), + 'Out', + max_relative_error=0.07, + check_dygraph=(self.use_mkldnn == False)) elif self.pool_type != "max": - self.check_grad( - set(['X']), - 'Out', - max_relative_error=0.07, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad(set(['X']), + 'Out', + max_relative_error=0.07, + check_dygraph=(self.use_mkldnn == False)) def init_data_format(self): self.data_format = "NCHW" @@ -368,6 +370,7 @@ class TestPool2D_Op(TestPool2D_Op_Mixin, OpTest): class TestCase1(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -387,6 +390,7 @@ class TestCase1(TestPool2D_Op): class TestCase2(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -406,18 +410,21 @@ class TestCase2(TestPool2D_Op): class TestCase3(TestPool2D_Op): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase4(TestCase1): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase5(TestCase2): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive @@ -427,9 +434,11 @@ class TestCase5(TestCase2): def create_test_cudnn_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNCase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -449,9 +458,11 @@ create_test_cudnn_class(TestCase5) def create_test_cudnn_fp16_class(parent, check_grad=True): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNFp16Case(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -484,9 +495,11 @@ def create_test_cudnn_fp16_class(parent, check_grad=True): def create_test_fp16_class(parent, check_grad=True): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFp16Case(parent): + def init_kernel_type(self): self.use_cudnn = False self.dtype = np.float16 @@ -536,9 +549,11 @@ create_test_fp16_class(TestCase5) def create_test_cudnn_use_ceil_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPool2DUseCeilCase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -555,7 +570,9 @@ create_test_cudnn_use_ceil_class(TestCase1) def create_test_use_ceil_class(parent): + class TestPool2DUseCeilCase(parent): + def init_ceil_mode(self): self.ceil_mode = True @@ -569,11 +586,13 @@ create_test_use_ceil_class(TestCase2) class TestAvgInclude(TestCase2): + def init_exclusive(self): self.exclusive = False class TestCUDNNAvgInclude(TestCase2): + def init_kernel_type(self): self.use_cudnn = True @@ -582,11 +601,13 @@ class TestCUDNNAvgInclude(TestCase2): class TestAvgPoolAdaptive(TestCase1): + def init_adaptive(self): self.adaptive = True class TestAvgPoolAdaptiveAsyOutSize(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -603,6 +624,7 @@ class TestAvgPoolAdaptiveAsyOutSize(TestCase1): class TestPool2D_AsyPadding(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -613,6 +635,7 @@ class TestPool2D_AsyPadding(TestPool2D_Op): class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -623,6 +646,7 @@ class TestCase1_AsyPadding(TestCase1): class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -633,6 +657,7 @@ class TestCase2_AsyPadding(TestCase2): class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -643,6 +668,7 @@ class TestCase3_AsyPadding(TestCase3): class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -653,6 +679,7 @@ class TestCase4_AsyPadding(TestCase4): class TestCase5_AsyPadding((TestCase5)): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -684,6 +711,7 @@ create_test_use_ceil_class(TestCase2_AsyPadding) class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): self.exclusive = False @@ -697,6 +725,7 @@ class TestAvgInclude_AsyPadding(TestCase2): class TestCUDNNAvgInclude_AsyPadding(TestCase2): + def init_kernel_type(self): self.use_cudnn = True @@ -713,6 +742,7 @@ class TestCUDNNAvgInclude_AsyPadding(TestCase2): class TestAvgPoolAdaptive_AsyPadding(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -727,6 +757,7 @@ class TestAvgPoolAdaptive_AsyPadding(TestCase1): #----------- test channel_last -------------- class TestPool2D_channel_last(TestPool2D_Op): + def init_data_format(self): self.data_format = "NHWC" @@ -735,6 +766,7 @@ class TestPool2D_channel_last(TestPool2D_Op): class TestCase1_channel_last(TestCase1): + def init_data_format(self): self.data_format = "NHWC" @@ -743,6 +775,7 @@ class TestCase1_channel_last(TestCase1): class TestCase2_channel_last(TestCase2): + def init_data_format(self): self.data_format = "NHWC" @@ -751,6 +784,7 @@ class TestCase2_channel_last(TestCase2): class TestCase3_channel_last(TestCase3): + def init_data_format(self): self.data_format = "NHWC" @@ -759,6 +793,7 @@ class TestCase3_channel_last(TestCase3): class TestCase4_channel_last(TestCase4): + def init_data_format(self): self.data_format = "NHWC" @@ -767,6 +802,7 @@ class TestCase4_channel_last(TestCase4): class TestCase5_channel_last(TestCase5): + def init_data_format(self): self.data_format = "NHWC" @@ -796,6 +832,7 @@ create_test_use_ceil_class(TestCase2_channel_last) class TestCase5_Max(TestCase2): + def init_pool_type(self): self.pool_type = "max" @@ -804,13 +841,16 @@ class TestCase5_Max(TestCase2): return if self.has_cudnn() and self.pool_type == "max": place = core.CUDAPlace(0) - self.check_grad_with_place( - place, set(['X']), 'Out', max_relative_error=1.00) + self.check_grad_with_place(place, + set(['X']), + 'Out', + max_relative_error=1.00) elif self.pool_type == "max": self.check_grad(set(['X']), 'Out', max_relative_error=1.00) class TestCase5_channel_last_Max(TestCase5_Max): + def init_data_format(self): self.data_format = "NHWC" @@ -823,11 +863,13 @@ create_test_cudnn_class(TestCase5_channel_last_Max) class TestAvgInclude_channel_last(TestCase2_channel_last): + def init_exclusive(self): self.exclusive = False class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last): + def init_kernel_type(self): self.use_cudnn = True @@ -836,11 +878,13 @@ class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last): class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): + def init_adaptive(self): self.adaptive = True class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -849,6 +893,7 @@ class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -857,6 +902,7 @@ class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -865,6 +911,7 @@ class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -873,6 +920,7 @@ class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -881,6 +929,7 @@ class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -896,8 +945,8 @@ create_test_cudnn_class(TestCase4_AsyPadding_channel_last) create_test_cudnn_class(TestCase5_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestPool2D_AsyPadding_channel_last) -create_test_cudnn_fp16_class( - TestCase1_AsyPadding_channel_last, check_grad=False) +create_test_cudnn_fp16_class(TestCase1_AsyPadding_channel_last, + check_grad=False) create_test_cudnn_fp16_class(TestCase2_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestCase3_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestCase4_AsyPadding_channel_last) @@ -911,6 +960,7 @@ create_test_use_ceil_class(TestCase2_AsyPadding_channel_last) class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): + def init_data_format(self): self.data_format = "NHWC" @@ -918,8 +968,9 @@ class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): self.shape = [2, 7, 7, 3] -class TestCUDNNAvgInclude_AsyPadding_channel_last( - TestCUDNNAvgInclude_AsyPadding): +class TestCUDNNAvgInclude_AsyPadding_channel_last(TestCUDNNAvgInclude_AsyPadding + ): + def init_data_format(self): self.data_format = "NHWC" @@ -927,8 +978,9 @@ class TestCUDNNAvgInclude_AsyPadding_channel_last( self.shape = [2, 7, 7, 3] -class TestAvgPoolAdaptive_AsyPadding_channel_last( - TestAvgPoolAdaptive_AsyPadding): +class TestAvgPoolAdaptive_AsyPadding_channel_last(TestAvgPoolAdaptive_AsyPadding + ): + def init_data_format(self): self.data_format = "NHWC" @@ -940,7 +992,9 @@ class TestAvgPoolAdaptive_AsyPadding_channel_last( def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.paddings = [0, 0] self.padding_algorithm = "SAME" @@ -966,9 +1020,11 @@ create_test_padding_SAME_class(TestCase5_channel_last) def create_test_cudnn_padding_SAME_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingSMAECase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -997,7 +1053,9 @@ create_test_cudnn_padding_SAME_class(TestCase5_channel_last) def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.paddings = [1, 1] self.padding_algorithm = "VALID" @@ -1023,9 +1081,11 @@ create_test_padding_VALID_class(TestCase5_channel_last) def create_test_cudnn_padding_VALID_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingVALIDCase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -1054,6 +1114,7 @@ create_test_cudnn_padding_VALID_class(TestCase5_channel_last) class TestCase1_strides(TestCase1): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 2] @@ -1069,117 +1130,106 @@ create_test_cudnn_padding_SAME_class(TestCase1_strides) # ----- test API class TestPool2DAPI(unittest.TestCase): + def test_api(self): x_NHWC = np.random.random([2, 5, 5, 3]).astype("float32") x_NCHW = np.random.random([2, 3, 5, 5]).astype("float32") - input_NHWC = fluid.layers.data( - name="input_NHWC", - shape=[2, 5, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCHW = fluid.layers.data( - name="input_NCHW", - shape=[2, 3, 5, 5], - append_batch_size=False, - dtype="float32") - - input_NHWC_negetive = fluid.layers.data( - name="input_NHWC_negetive", - shape=[2, -1, 5, 3], - append_batch_size=False, - dtype="float32") - - input_NCHW_negetive = fluid.layers.data( - name="input_NCHW_negetive", - shape=[2, 3, -1, -1], - append_batch_size=False, - dtype="float32") + input_NHWC = fluid.layers.data(name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCHW = fluid.layers.data(name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") + + input_NHWC_negetive = fluid.layers.data(name="input_NHWC_negetive", + shape=[2, -1, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCHW_negetive = fluid.layers.data(name="input_NCHW_negetive", + shape=[2, 3, -1, -1], + append_batch_size=False, + dtype="float32") ksize = [3, 3] - out_1 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1], - use_cudnn=False, - data_format="NHWC") - - out_2 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="avg", - pool_padding=[[0, 0], [1, 1], [1, 1], [0, 0]], - use_cudnn=False, - data_format="NHWC") - - out_3 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=ksize, - pool_type="avg", - pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1]], - use_cudnn=False, - data_format="NCHW") - - out_4 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=ksize, - pool_type="avg", - pool_padding=[1, 2, 1, 0], - use_cudnn=False, - data_format="NCHW") + out_1 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + use_cudnn=False, + data_format="NHWC") + + out_2 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [1, 1], [1, 1], + [0, 0]], + use_cudnn=False, + data_format="NHWC") + + out_3 = fluid.layers.pool2d(input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [0, 0], [1, 1], + [1, 1]], + use_cudnn=False, + data_format="NCHW") + + out_4 = fluid.layers.pool2d(input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[1, 2, 1, 0], + use_cudnn=False, + data_format="NCHW") # test VALID - out_5 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=ksize, - pool_type="avg", - pool_padding="VALID", - use_cudnn=False, - data_format="NCHW") - - out_6 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALID", - use_cudnn=False, - data_format="NHWC") + out_5 = fluid.layers.pool2d(input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + use_cudnn=False, + data_format="NCHW") + + out_6 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + use_cudnn=False, + data_format="NHWC") # test SAME - out_7 = fluid.layers.pool2d( - input=input_NCHW, - pool_size=[4, 4], - pool_type="avg", - pool_padding="SAME", - use_cudnn=False, - data_format="NCHW") - - out_8 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=[4, 4], - pool_type="max", - pool_padding="SAME", - use_cudnn=False, - data_format="NHWC") + out_7 = fluid.layers.pool2d(input=input_NCHW, + pool_size=[4, 4], + pool_type="avg", + pool_padding="SAME", + use_cudnn=False, + data_format="NCHW") + + out_8 = fluid.layers.pool2d(input=input_NHWC, + pool_size=[4, 4], + pool_type="max", + pool_padding="SAME", + use_cudnn=False, + data_format="NHWC") # test negetive - out_9 = fluid.layers.pool2d( - input=input_NHWC_negetive, - pool_size=ksize, - pool_type="avg", - pool_padding=[0, 0], - use_cudnn=False, - data_format="NHWC") + out_9 = fluid.layers.pool2d(input=input_NHWC_negetive, + pool_size=ksize, + pool_type="avg", + pool_padding=[0, 0], + use_cudnn=False, + data_format="NHWC") assert out_9.shape == (2, -1, 3, 3) - out_10 = fluid.layers.pool2d( - input=input_NCHW_negetive, - pool_size=ksize, - pool_type="avg", - pool_padding=[0, 0], - use_cudnn=False, - data_format="NCHW") + out_10 = fluid.layers.pool2d(input=input_NCHW_negetive, + pool_size=ksize, + pool_type="avg", + pool_padding=[0, 0], + use_cudnn=False, + data_format="NCHW") assert out_10.shape == (2, 3, -1, -1) exe = fluid.Executor(place=fluid.CPUPlace()) @@ -1191,52 +1241,44 @@ class TestPool2DAPI(unittest.TestCase): "input_NHWC_negetive": x_NHWC, "input_NCHW_negetive": x_NCHW }, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) assert np.allclose( res_1, - pool2D_forward_naive( - x=x_NHWC, - ksize=ksize, - pool_type="max", - strides=[1, 1], - paddings=[1, 1], - data_format="NHWC")) + pool2D_forward_naive(x=x_NHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1], + paddings=[1, 1], + data_format="NHWC")) assert np.allclose( res_2, - pool2D_forward_naive( - x=x_NHWC, - ksize=ksize, - pool_type="avg", - strides=[1, 1], - paddings=[1, 1, 1, 1], - data_format="NHWC")) - assert np.allclose( - res_3, - pool2D_forward_naive( - x=x_NCHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1], - paddings=[1, 1, 1, 1], - data_format="NCHW"), - rtol=0.07, - atol=1e-05) - - assert np.allclose( - res_4, - pool2D_forward_naive( - x=x_NCHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1], - paddings=[1, 2, 1, 0], - data_format="NCHW"), - rtol=0.07, - atol=1e-05) + pool2D_forward_naive(x=x_NHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 1, 1, 1], + data_format="NHWC")) + assert np.allclose(res_3, + pool2D_forward_naive(x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 1, 1, 1], + data_format="NCHW"), + rtol=0.07, + atol=1e-05) + + assert np.allclose(res_4, + pool2D_forward_naive(x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 2, 1, 0], + data_format="NCHW"), + rtol=0.07, + atol=1e-05) # VALID assert np.allclose( @@ -1253,200 +1295,187 @@ class TestPool2DAPI(unittest.TestCase): atol=1e-05) assert np.allclose( res_6, - pool2D_forward_naive( - x=x_NHWC, - ksize=ksize, - pool_type="max", - strides=[1, 1], - paddings=[10, 20], - padding_algorithm="VALID", - data_format="NHWC")) + pool2D_forward_naive(x=x_NHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NHWC")) # SAME - assert np.allclose( - res_7, - pool2D_forward_naive( - x=x_NCHW, - ksize=[4, 4], - pool_type="avg", - strides=[1, 1], - paddings=[10, 20], - padding_algorithm="SAME", - data_format="NCHW"), - rtol=0.07, - atol=1e-05) + assert np.allclose(res_7, + pool2D_forward_naive(x=x_NCHW, + ksize=[4, 4], + pool_type="avg", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NCHW"), + rtol=0.07, + atol=1e-05) assert np.allclose( res_8, - pool2D_forward_naive( - x=x_NHWC, - ksize=[4, 4], - pool_type="max", - strides=[1, 1], - paddings=[10, 20], - padding_algorithm="SAME", - data_format="NHWC")) + pool2D_forward_naive(x=x_NHWC, + ksize=[4, 4], + pool_type="max", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NHWC")) class TestPool2DAPI_Error(unittest.TestCase): + def test_api(self): - input_NHWC = fluid.layers.data( - name="input_NHWC", - shape=[2, 5, 5, 3], - append_batch_size=False, - dtype="float32") + input_NHWC = fluid.layers.data(name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") ksize = [3, 3] # cudnn type error def run_1(): - out_1 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1], - use_cudnn=[0], - data_format="NHWC") + out_1 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + use_cudnn=[0], + data_format="NHWC") self.assertRaises(TypeError, run_1) # data_format value error def run_2(): - out_2 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1], - use_cudnn=False, - data_format="NHWCC") + out_2 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + use_cudnn=False, + data_format="NHWCC") self.assertRaises(ValueError, run_2) # padding str value error def run_3(): - out_3 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALIDSAME", - use_cudnn=False, - data_format="NHWC") + out_3 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALIDSAME", + use_cudnn=False, + data_format="NHWC") self.assertRaises(ValueError, run_3) # padding str valid and ceil_mode value error def run_4(): - out_4 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALID", - use_cudnn=False, - ceil_mode=True, - data_format="NHWC") + out_4 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + use_cudnn=False, + ceil_mode=True, + data_format="NHWC") self.assertRaises(ValueError, run_4) # padding with 8 ele. value error def run_5(): - out_5 = fluid.layers.pool2d( - input=input_NHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1]], - use_cudnn=False, - data_format="NHWC") + out_5 = fluid.layers.pool2d(input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[[1, 1], [0, 0], [0, 0], + [1, 1]], + use_cudnn=False, + data_format="NHWC") self.assertRaises(ValueError, run_5) class TestDygraphPool2DAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of Pool2D must be Variable. data1 = np.random.random((3, 32, 32, 5)).astype('float32') - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=1, - global_pooling=False) + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) self.assertRaises(TypeError, pool2d, data1) # the input dtype of Pool2D must be uint8 or int8 or float16 or float32 or float64 # uint8 and int8 only can be set on mkldnn # float16 only can be set on GPU place - data2 = fluid.layers.data( - name='x1', shape=[3, 32, 32, 5], dtype="int32") + data2 = fluid.layers.data(name='x1', + shape=[3, 32, 32, 5], + dtype="int32") self.assertRaises(TypeError, pool2d, data2) def test_data_format_error(self): with program_guard(Program(), Program()): # the data_format must be 'NCHW' or 'NHWC' data1 = np.random.random((3, 32, 32, 5)).astype('float32') - self.assertRaises( - ValueError, - fluid.dygraph.Pool2D, - pool_size=2, - pool_type='max', - pool_stride=1, - global_pooling=False, - data_format='NWHC') + self.assertRaises(ValueError, + fluid.dygraph.Pool2D, + pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False, + data_format='NWHC') class TestDygraphPool2DAPI(unittest.TestCase): + def test_nhwc(self): with fluid.dygraph.guard(): data = np.random.random((3, 32, 32, 5)).astype('float32') x = fluid.dygraph.to_variable(data) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=1, - pool_padding=[0, 0], - global_pooling=False, - data_format='NHWC') + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='max', + pool_stride=1, + pool_padding=[0, 0], + global_pooling=False, + data_format='NHWC') out1 = pool2d(x) - out2 = pool2D_forward_naive( - data, [2, 2], [1, 1], - paddings=[0, 0], - pool_type='max', - data_format='NHWC') + out2 = pool2D_forward_naive(data, [2, 2], [1, 1], + paddings=[0, 0], + pool_type='max', + data_format='NHWC') self.assertTrue(np.allclose(out1.numpy(), out2)) def test_lower_case(self): with fluid.dygraph.guard(): data = np.random.random((3, 32, 32, 5)).astype('float32') x = fluid.dygraph.to_variable(data) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=1, - pool_padding=[0, 0], - global_pooling=False, - data_format='nhwc') + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='max', + pool_stride=1, + pool_padding=[0, 0], + global_pooling=False, + data_format='nhwc') out1 = pool2d(x) - out2 = pool2D_forward_naive( - data, [2, 2], [1, 1], - paddings=[0, 0], - pool_type='max', - data_format='NHWC') + out2 = pool2D_forward_naive(data, [2, 2], [1, 1], + paddings=[0, 0], + pool_type='max', + data_format='NHWC') self.assertTrue(np.allclose(out1.numpy(), out2)) def test_upper_case(self): with fluid.dygraph.guard(): data = np.random.random((3, 32, 32, 5)).astype('float32') x = fluid.dygraph.to_variable(data) - pool2d = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='MAX', - pool_stride=1, - pool_padding=[0, 0], - global_pooling=False, - data_format='nhwc') + pool2d = fluid.dygraph.Pool2D(pool_size=2, + pool_type='MAX', + pool_stride=1, + pool_padding=[0, 0], + global_pooling=False, + data_format='nhwc') out1 = pool2d(x) - out2 = pool2D_forward_naive( - data, [2, 2], [1, 1], - paddings=[0, 0], - pool_type='max', - data_format='NHWC') + out2 = pool2D_forward_naive(data, [2, 2], [1, 1], + paddings=[0, 0], + pool_type='max', + data_format='NHWC') self.assertTrue(np.allclose(out1.numpy(), out2)) diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py index f20d2aad49f..3ecfb06bb58 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py @@ -28,6 +28,7 @@ from test_pool3d_op import adaptive_start_index, adaptive_end_index, pool3D_forw class TestPool3D_API(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -36,17 +37,17 @@ class TestPool3D_API(unittest.TestCase): def check_avg_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 32, 32, 32], dtype="float32") + input = fluid.data(name="input", + shape=[2, 3, 32, 32, 32], + dtype="float32") result = avg_pool3d(input, kernel_size=2, stride=2, padding=0) input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='avg') + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='avg') exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -60,18 +61,18 @@ class TestPool3D_API(unittest.TestCase): input = fluid.dygraph.to_variable(input_np) result = avg_pool3d(input, kernel_size=2, stride=2, padding="SAME") - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='avg', - padding_algorithm="SAME") + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='avg', + padding_algorithm="SAME") self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool3d_dg = paddle.nn.layer.AvgPool3D( - kernel_size=2, stride=None, padding="SAME") + avg_pool3d_dg = paddle.nn.layer.AvgPool3D(kernel_size=2, + stride=None, + padding="SAME") result = avg_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -79,30 +80,27 @@ class TestPool3D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = avg_pool3d( - input, - kernel_size=2, - stride=2, - padding=1, - ceil_mode=False, - exclusive=True) - - result_np = avg_pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[1, 1, 1], - ceil_mode=False, - exclusive=False) + result = avg_pool3d(input, + kernel_size=2, + stride=2, + padding=1, + ceil_mode=False, + exclusive=True) + + result_np = avg_pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[1, 1, 1], + ceil_mode=False, + exclusive=False) self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool3d_dg = paddle.nn.layer.AvgPool3D( - kernel_size=2, - stride=None, - padding=1, - ceil_mode=False, - exclusive=True) + avg_pool3d_dg = paddle.nn.layer.AvgPool3D(kernel_size=2, + stride=None, + padding=1, + ceil_mode=False, + exclusive=True) result = avg_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -110,36 +108,40 @@ class TestPool3D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = avg_pool3d( - input, kernel_size=2, stride=2, padding=0, ceil_mode=True) - - result_np = avg_pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - ceil_mode=True) + result = avg_pool3d(input, + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True) + + result_np = avg_pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + ceil_mode=True) self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool3d_dg = paddle.nn.layer.AvgPool3D( - kernel_size=2, stride=None, padding=0, ceil_mode=True) + avg_pool3d_dg = paddle.nn.layer.AvgPool3D(kernel_size=2, + stride=None, + padding=0, + ceil_mode=True) result = avg_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) def check_max_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 32, 32, 32], dtype="float32") + input = fluid.data(name="input", + shape=[2, 3, 32, 32, 32], + dtype="float32") result = max_pool3d(input, kernel_size=2, stride=2, padding=0) input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='max') + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -153,16 +155,16 @@ class TestPool3D_API(unittest.TestCase): input = fluid.dygraph.to_variable(input_np) result = max_pool3d(input, kernel_size=2, stride=2, padding=0) - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='max') + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool3d_dg = paddle.nn.layer.MaxPool3D( - kernel_size=2, stride=None, padding=0) + max_pool3d_dg = paddle.nn.layer.MaxPool3D(kernel_size=2, + stride=None, + padding=0) result = max_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -171,43 +173,45 @@ class TestPool3D_API(unittest.TestCase): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable( np.transpose(input_np, [0, 2, 3, 4, 1])) - result = max_pool3d( - input, - kernel_size=2, - stride=2, - padding=0, - data_format="NDHWC", - return_mask=False) - - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='max') + result = max_pool3d(input, + kernel_size=2, + stride=2, + padding=0, + data_format="NDHWC", + return_mask=False) + + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') self.assertTrue( - np.allclose( - np.transpose(result.numpy(), [0, 4, 1, 2, 3]), result_np)) + np.allclose(np.transpose(result.numpy(), [0, 4, 1, 2, 3]), + result_np)) def check_max_dygraph_ceilmode_results(self, place): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = max_pool3d( - input, kernel_size=2, stride=2, padding=0, ceil_mode=True) - - result_np = max_pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - ceil_mode=True) + result = max_pool3d(input, + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True) + + result_np = max_pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + ceil_mode=True) self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool3d_dg = paddle.nn.layer.MaxPool3D( - kernel_size=2, stride=None, padding=0, ceil_mode=True) + max_pool3d_dg = paddle.nn.layer.MaxPool3D(kernel_size=2, + stride=None, + padding=0, + ceil_mode=True) result = max_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -215,20 +219,24 @@ class TestPool3D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result = max_pool3d( - input, kernel_size=2, stride=2, padding=1, ceil_mode=False) - - result_np = max_pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[1, 1, 1], - ceil_mode=False) + result = max_pool3d(input, + kernel_size=2, + stride=2, + padding=1, + ceil_mode=False) + + result_np = max_pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[1, 1, 1], + ceil_mode=False) self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool3d_dg = paddle.nn.layer.MaxPool3D( - kernel_size=2, stride=None, padding=1, ceil_mode=False) + max_pool3d_dg = paddle.nn.layer.MaxPool3D(kernel_size=2, + stride=None, + padding=1, + ceil_mode=False) result = max_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -236,24 +244,23 @@ class TestPool3D_API(unittest.TestCase): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) - result, indices = max_pool3d( - input, - kernel_size=2, - stride=None, - padding="SAME", - return_mask=True) - - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='max', - padding_algorithm="SAME") + result, indices = max_pool3d(input, + kernel_size=2, + stride=None, + padding="SAME", + return_mask=True) + + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max', + padding_algorithm="SAME") self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool3d_dg = paddle.nn.layer.MaxPool3D( - kernel_size=2, stride=2, padding=0) + max_pool3d_dg = paddle.nn.layer.MaxPool3D(kernel_size=2, + stride=2, + padding=0) result = max_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -264,16 +271,16 @@ class TestPool3D_API(unittest.TestCase): padding = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]] result = max_pool3d(input, kernel_size=2, stride=2, padding=padding) - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='max') + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') self.assertTrue(np.allclose(result.numpy(), result_np)) - max_pool3d_dg = paddle.nn.layer.MaxPool3D( - kernel_size=2, stride=2, padding=0) + max_pool3d_dg = paddle.nn.layer.MaxPool3D(kernel_size=2, + stride=2, + padding=0) result = max_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) @@ -286,33 +293,31 @@ class TestPool3D_API(unittest.TestCase): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) padding = 0 - result = avg_pool3d( - input, - kernel_size=2, - stride=2, - padding=padding, - divisor_override=8) - - result_np = pool3D_forward_naive( - input_np, - ksize=[2, 2, 2], - strides=[2, 2, 2], - paddings=[0, 0, 0], - pool_type='avg') + result = avg_pool3d(input, + kernel_size=2, + stride=2, + padding=padding, + divisor_override=8) + + result_np = pool3D_forward_naive(input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='avg') self.assertTrue(np.allclose(result.numpy(), result_np)) - avg_pool3d_dg = paddle.nn.layer.AvgPool3D( - kernel_size=2, stride=2, padding=0) + avg_pool3d_dg = paddle.nn.layer.AvgPool3D(kernel_size=2, + stride=2, + padding=0) result = avg_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) padding = [0, 0, 0, 0, 0, 0] - result = avg_pool3d( - input, - kernel_size=2, - stride=2, - padding=padding, - divisor_override=8) + result = avg_pool3d(input, + kernel_size=2, + stride=2, + padding=padding, + divisor_override=8) self.assertTrue(np.allclose(result.numpy(), result_np)) def test_pool3d(self): @@ -334,174 +339,167 @@ class TestPool3D_API(unittest.TestCase): class TestPool3DError_API(unittest.TestCase): + def test_error_api(self): + def run1(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[0, 1], [0, 0], [0, 0], [0, 0], [0, 0]] - res_pd = avg_pool3d( - input_pd, kernel_size=2, stride=2, padding=padding) + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=2, + padding=padding) self.assertRaises(ValueError, run1) def run2(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[0, 1], [0, 0], [0, 0], [0, 0], [0, 0]] - res_pd = avg_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - data_format='NCDHW') + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NCDHW') self.assertRaises(ValueError, run2) def run3(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) padding = [[0, 1], [0, 0], [0, 0], [0, 0], [0, 0]] - res_pd = avg_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding=padding, - data_format='NDHWC') + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NDHWC') self.assertRaises(ValueError, run3) def run4(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding=0, - data_format='NNNN') + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=2, + padding=0, + data_format='NNNN') self.assertRaises(ValueError, run4) def run5(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = max_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding=0, - data_format='NNNN') + res_pd = max_pool3d(input_pd, + kernel_size=2, + stride=2, + padding=0, + data_format='NNNN') self.assertRaises(ValueError, run5) def run6(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding="padding", - data_format='NNNN') + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=2, + padding="padding", + data_format='NNNN') self.assertRaises(ValueError, run6) def run7(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = max_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding="padding", - data_format='NNNN') + res_pd = max_pool3d(input_pd, + kernel_size=2, + stride=2, + padding="padding", + data_format='NNNN') self.assertRaises(ValueError, run7) def run8(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding="VALID", - ceil_mode=True, - data_format='NNNN') + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=2, + padding="VALID", + ceil_mode=True, + data_format='NNNN') self.assertRaises(ValueError, run8) def run9(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = max_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding="VALID", - ceil_mode=True, - data_format='NNNN') + res_pd = max_pool3d(input_pd, + kernel_size=2, + stride=2, + padding="VALID", + ceil_mode=True, + data_format='NNNN') self.assertRaises(ValueError, run9) def run10(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = max_pool3d( - input_pd, - kernel_size=2, - stride=2, - padding=0, - data_format='NDHWC', - return_mask=True) + res_pd = max_pool3d(input_pd, + kernel_size=2, + stride=2, + padding=0, + data_format='NDHWC', + return_mask=True) self.assertRaises(ValueError, run10) def run_kernel_out_of_range(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool3d( - input_pd, - kernel_size=-1, - stride=2, - padding="VALID", - ceil_mode=True) + res_pd = avg_pool3d(input_pd, + kernel_size=-1, + stride=2, + padding="VALID", + ceil_mode=True) self.assertRaises(ValueError, run_kernel_out_of_range) def run_size_out_of_range(): with fluid.dygraph.guard(): - input_np = np.random.uniform( - -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_np = np.random.uniform(-1, 1, [2, 3, 32, 32, 32]).astype( + np.float32) input_pd = fluid.dygraph.to_variable(input_np) - res_pd = avg_pool3d( - input_pd, - kernel_size=2, - stride=0, - padding="VALID", - ceil_mode=True) + res_pd = avg_pool3d(input_pd, + kernel_size=2, + stride=0, + padding="VALID", + ceil_mode=True) self.assertRaises(ValueError, run_size_out_of_range) diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py index 40b9be9ee4f..2045f6bdd7a 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py @@ -49,8 +49,8 @@ def pool3D_forward_naive(x, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -148,21 +148,21 @@ def pool3D_forward_naive(x, d_end = np.min((d_end, D)) h_end = np.min((h_end, H)) if data_format == 'NCDHW': - x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start: - w_end] + x_masked = x[:, :, d_start:d_end, h_start:h_end, + w_start:w_end] if pool_type == 'avg': if (exclusive or adaptive): field_size = (d_end - d_start) * ( h_end - h_start) * (w_end - w_start) - out[:, :, k, i, j] = np.sum(x_masked, - axis=(2, 3, 4)) / field_size + out[:, :, k, i, + j] = np.sum(x_masked, axis=(2, 3, 4)) / field_size elif pool_type == 'max': out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) elif data_format == 'NDHWC': - x_masked = x[:, d_start:d_end, h_start:h_end, w_start: - w_end, :] + x_masked = x[:, d_start:d_end, h_start:h_end, + w_start:w_end, :] if pool_type == 'avg': if (exclusive or adaptive): field_size = (d_end - d_start) * ( @@ -184,17 +184,16 @@ def max_pool3D_forward_naive(x, ceil_mode=False, exclusive=True, adaptive=False): - out = pool3D_forward_naive( - x=x, - ksize=ksize, - strides=strides, - paddings=paddings, - global_pool=global_pool, - ceil_mode=ceil_mode, - exclusive=exclusive, - adaptive=adaptive, - data_format='NCDHW', - pool_type="max") + out = pool3D_forward_naive(x=x, + ksize=ksize, + strides=strides, + paddings=paddings, + global_pool=global_pool, + ceil_mode=ceil_mode, + exclusive=exclusive, + adaptive=adaptive, + data_format='NCDHW', + pool_type="max") return out @@ -206,21 +205,21 @@ def avg_pool3D_forward_naive(x, ceil_mode=False, exclusive=True, adaptive=False): - out = pool3D_forward_naive( - x=x, - ksize=ksize, - strides=strides, - paddings=paddings, - global_pool=global_pool, - ceil_mode=ceil_mode, - exclusive=exclusive, - adaptive=adaptive, - data_format='NCDHW', - pool_type="avg") + out = pool3D_forward_naive(x=x, + ksize=ksize, + strides=strides, + paddings=paddings, + global_pool=global_pool, + ceil_mode=ceil_mode, + exclusive=exclusive, + adaptive=adaptive, + data_format='NCDHW', + pool_type="avg") return out class TestPool3D_Op(OpTest): + def setUp(self): self.op_type = "pool3d" self.init_kernel_type() @@ -239,10 +238,12 @@ class TestPool3D_Op(OpTest): paddle.enable_static() input = np.random.random(self.shape).astype(self.dtype) - output = pool3D_forward_naive( - input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, self.data_format, - self.pool_type, self.padding_algorithm).astype(self.dtype) + output = pool3D_forward_naive(input, self.ksize, self.strides, + self.paddings, self.global_pool, + self.ceil_mode, self.exclusive, + self.adaptive, self.data_format, + self.pool_type, + self.padding_algorithm).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} @@ -278,8 +279,10 @@ class TestPool3D_Op(OpTest): if self.has_cudnn() and self.pool_type != "max": place = core.CUDAPlace(0) if core.is_compiled_with_rocm(): - self.check_grad_with_place( - place, set(['X']), 'Out', max_relative_error=1e-2) + self.check_grad_with_place(place, + set(['X']), + 'Out', + max_relative_error=1e-2) else: self.check_grad_with_place(place, set(['X']), 'Out') elif self.pool_type != "max": @@ -322,6 +325,7 @@ class TestPool3D_Op(OpTest): class TestCase1(TestPool3D_Op): + def init_shape(self): self.shape = [1, 3, 7, 7, 7] @@ -340,6 +344,7 @@ class TestCase1(TestPool3D_Op): class TestCase2(TestPool3D_Op): + def init_shape(self): self.shape = [1, 3, 6, 7, 7] @@ -358,16 +363,19 @@ class TestCase2(TestPool3D_Op): class TestCase3(TestPool3D_Op): + def init_pool_type(self): self.pool_type = "max" class TestCase4(TestCase1): + def init_pool_type(self): self.pool_type = "max" class TestCase5(TestCase2): + def init_pool_type(self): self.pool_type = "max" @@ -376,9 +384,11 @@ class TestCase5(TestCase2): def create_test_cudnn_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNCase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -396,9 +406,11 @@ create_test_cudnn_class(TestCase5) def create_test_cudnn_fp16_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNFp16Case(parent): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -418,9 +430,11 @@ def create_test_cudnn_fp16_class(parent): def create_test_fp16_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFp16Case(parent): + def init_kernel_type(self): self.use_cudnn = False self.dtype = np.float16 @@ -453,9 +467,11 @@ create_test_fp16_class(TestCase5) # ---- test ceil mode ------ def create_test_cudnn_use_ceil_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPool3DUseCeilCase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -472,7 +488,9 @@ create_test_cudnn_use_ceil_class(TestCase1) def create_test_use_ceil_class(parent): + class TestPool3DUseCeilCase(parent): + def init_ceil_mode(self): self.ceil_mode = True @@ -486,6 +504,7 @@ create_test_use_ceil_class(TestCase2) class TestAvgInclude(TestCase2): + def init_exclusive(self): self.exclusive = False @@ -493,6 +512,7 @@ class TestAvgInclude(TestCase2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNAvgInclude(TestCase2): + def init_kernel_type(self): self.use_cudnn = True @@ -501,11 +521,13 @@ class TestCUDNNAvgInclude(TestCase2): class TestAvgPoolAdaptive(TestCase1): + def init_adaptive(self): self.adaptive = True class TestAvgPoolAdaptiveAsyOutSize(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -519,6 +541,7 @@ class TestAvgPoolAdaptiveAsyOutSize(TestCase1): #-------test pool3d with asymmetric padding------ class TestPool3D_Op_AsyPadding(TestPool3D_Op): + def init_test_case(self): self.ksize = [3, 4, 3] self.strides = [1, 1, 2] @@ -531,6 +554,7 @@ class TestPool3D_Op_AsyPadding(TestPool3D_Op): class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): self.ksize = [3, 3, 4] self.strides = [1, 1, 2] @@ -543,6 +567,7 @@ class TestCase1_AsyPadding(TestCase1): class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] @@ -555,6 +580,7 @@ class TestCase2_AsyPadding(TestCase2): class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] @@ -567,6 +593,7 @@ class TestCase3_AsyPadding(TestCase3): class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] @@ -579,6 +606,7 @@ class TestCase4_AsyPadding(TestCase4): class TestCase5_AsyPadding(TestCase5): + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] @@ -612,6 +640,7 @@ create_test_use_ceil_class(TestCase2_AsyPadding) class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): self.exclusive = False @@ -622,6 +651,7 @@ class TestAvgInclude_AsyPadding(TestCase2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNAvgInclude_AsyPadding(TestCase2): + def init_kernel_type(self): self.use_cudnn = True @@ -636,6 +666,7 @@ class TestCUDNNAvgInclude_AsyPadding(TestCase2): class TestAvgPoolAdaptive_AsyPadding(TestCase1): + def init_adaptive(self): self.adaptive = True @@ -645,6 +676,7 @@ class TestAvgPoolAdaptive_AsyPadding(TestCase1): # ------------ test channel_last -------------- class TestPool3D_channel_last(TestPool3D_Op): + def init_data_format(self): self.data_format = "NDHWC" @@ -653,6 +685,7 @@ class TestPool3D_channel_last(TestPool3D_Op): class TestCase1_channel_last(TestCase1): + def init_data_format(self): self.data_format = "NDHWC" @@ -661,6 +694,7 @@ class TestCase1_channel_last(TestCase1): class TestCase2_channel_last(TestCase2): + def init_data_format(self): self.data_format = "NDHWC" @@ -669,6 +703,7 @@ class TestCase2_channel_last(TestCase2): class TestCase3_channel_last(TestCase3): + def init_data_format(self): self.data_format = "NDHWC" @@ -677,6 +712,7 @@ class TestCase3_channel_last(TestCase3): class TestCase4_channel_last(TestCase4): + def init_data_format(self): self.data_format = "NDHWC" @@ -685,6 +721,7 @@ class TestCase4_channel_last(TestCase4): class TestCase5_channel_last(TestCase5): + def init_data_format(self): self.data_format = "NDHWC" @@ -707,6 +744,7 @@ create_test_use_ceil_class(TestCase2_channel_last) class TestCase5_Max(TestCase2): + def init_pool_type(self): self.pool_type = "max" @@ -715,13 +753,16 @@ class TestCase5_Max(TestCase2): return if self.has_cudnn() and self.pool_type == "max": place = core.CUDAPlace(0) - self.check_grad_with_place( - place, set(['X']), 'Out', max_relative_error=1.00) + self.check_grad_with_place(place, + set(['X']), + 'Out', + max_relative_error=1.00) elif self.pool_type == "max": self.check_grad(set(['X']), 'Out', max_relative_error=1.00) class TestCase5_channel_last_Max(TestCase5_Max): + def init_data_format(self): self.data_format = "NDHWC" @@ -734,6 +775,7 @@ create_test_cudnn_class(TestCase5_channel_last_Max) class TestAvgInclude_channel_last(TestCase2_channel_last): + def init_exclusive(self): self.exclusive = False @@ -741,6 +783,7 @@ class TestAvgInclude_channel_last(TestCase2_channel_last): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last): + def init_kernel_type(self): self.use_cudnn = True @@ -749,12 +792,14 @@ class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last): class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): + def init_adaptive(self): self.adaptive = True # --- asy padding class TestPool3D_Op_AsyPadding_channel_last(TestPool3D_Op_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @@ -763,6 +808,7 @@ class TestPool3D_Op_AsyPadding_channel_last(TestPool3D_Op_AsyPadding): class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @@ -771,6 +817,7 @@ class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @@ -779,6 +826,7 @@ class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @@ -787,6 +835,7 @@ class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @@ -795,6 +844,7 @@ class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @@ -817,20 +867,23 @@ create_test_use_ceil_class(TestCase2_AsyPadding_channel_last) class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): + def init_data_format(self): self.data_format = "NDHWC" @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") -class TestCUDNNAvgInclude_AsyPadding_channel_last( - TestCUDNNAvgInclude_AsyPadding): +class TestCUDNNAvgInclude_AsyPadding_channel_last(TestCUDNNAvgInclude_AsyPadding + ): + def init_data_format(self): self.data_format = "NDHWC" -class TestAvgPoolAdaptive_AsyPadding_channel_last( - TestAvgPoolAdaptive_AsyPadding): +class TestAvgPoolAdaptive_AsyPadding_channel_last(TestAvgPoolAdaptive_AsyPadding + ): + def init_data_format(self): self.data_format = "NDHWC" @@ -840,7 +893,9 @@ class TestAvgPoolAdaptive_AsyPadding_channel_last( #test padding = SAME VALID def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.paddings = [0, 0, 0] self.padding_algorithm = "SAME" @@ -866,9 +921,11 @@ create_test_padding_SAME_class(TestCase5_channel_last) def create_test_cudnn_padding_SAME_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingSMAECase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -897,7 +954,9 @@ create_test_cudnn_padding_SAME_class(TestCase5_channel_last) def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.paddings = [1, 1, 1] self.padding_algorithm = "VALID" @@ -923,9 +982,11 @@ create_test_padding_VALID_class(TestCase5_channel_last) def create_test_cudnn_padding_VALID_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNPaddingVALIDCase(parent): + def init_kernel_type(self): self.use_cudnn = True @@ -955,257 +1016,234 @@ create_test_cudnn_padding_VALID_class(TestCase5_channel_last) #test API class TestPool3DAPI(unittest.TestCase): + def test_api(self): x_NDHWC = np.random.random([2, 5, 5, 5, 3]).astype("float32") x_NCDHW = np.random.random([2, 3, 5, 5, 5]).astype("float32") - input_NDHWC = fluid.layers.data( - name="input_NDHWC", - shape=[2, 5, 5, 5, 3], - append_batch_size=False, - dtype="float32") + input_NDHWC = fluid.layers.data(name="input_NDHWC", + shape=[2, 5, 5, 5, 3], + append_batch_size=False, + dtype="float32") - input_NCDHW = fluid.layers.data( - name="input_NCDHW", - shape=[2, 3, 5, 5, 5], - append_batch_size=False, - dtype="float32") + input_NCDHW = fluid.layers.data(name="input_NCDHW", + shape=[2, 3, 5, 5, 5], + append_batch_size=False, + dtype="float32") ksize = [3, 3, 3] - out_1 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1, 1], - use_cudnn=False, - data_format="NDHWC") - - out_2 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="avg", - pool_padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]], - use_cudnn=False, - data_format="NDHWC") - - out_3 = fluid.layers.pool3d( - input=input_NCDHW, - pool_size=ksize, - pool_type="avg", - pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]], - use_cudnn=False, - data_format="NCDHW") - - out_4 = fluid.layers.pool3d( - input=input_NCDHW, - pool_size=ksize, - pool_type="avg", - pool_padding=[1, 2, 1, 0, 0, 1], - use_cudnn=False, - data_format="NCDHW") + out_1 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1, 1], + use_cudnn=False, + data_format="NDHWC") + + out_2 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [1, 1], [1, 1], + [1, 1], [0, 0]], + use_cudnn=False, + data_format="NDHWC") + + out_3 = fluid.layers.pool3d(input=input_NCDHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [0, 0], [1, 1], + [1, 1], [1, 1]], + use_cudnn=False, + data_format="NCDHW") + + out_4 = fluid.layers.pool3d(input=input_NCDHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[1, 2, 1, 0, 0, 1], + use_cudnn=False, + data_format="NCDHW") # test VALID - out_5 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="avg", - pool_padding="VALID", - use_cudnn=False, - data_format="NDHWC") - - out_6 = fluid.layers.pool3d( - input=input_NCDHW, - pool_size=ksize, - pool_type="avg", - pool_padding="VALID", - use_cudnn=False, - data_format="NCDHW") + out_5 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + use_cudnn=False, + data_format="NDHWC") + + out_6 = fluid.layers.pool3d(input=input_NCDHW, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + use_cudnn=False, + data_format="NCDHW") # test SAME - out_7 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_stride=[1, 1, 2], - pool_type="avg", - pool_padding="SAME", - use_cudnn=False, - data_format="NDHWC") - - out_8 = fluid.layers.pool3d( - input=input_NCDHW, - pool_size=[4, 4, 4], - pool_type="avg", - pool_padding="SAME", - use_cudnn=False, - data_format="NCDHW") + out_7 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_stride=[1, 1, 2], + pool_type="avg", + pool_padding="SAME", + use_cudnn=False, + data_format="NDHWC") + + out_8 = fluid.layers.pool3d(input=input_NCDHW, + pool_size=[4, 4, 4], + pool_type="avg", + pool_padding="SAME", + use_cudnn=False, + data_format="NCDHW") exe = fluid.Executor(place=fluid.CPUPlace()) [res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8] = exe.run( fluid.default_main_program(), - feed={"input_NDHWC": x_NDHWC, - "input_NCDHW": x_NCDHW}, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + feed={ + "input_NDHWC": x_NDHWC, + "input_NCDHW": x_NCDHW + }, + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) assert np.allclose( res_1, - pool3D_forward_naive( - x=x_NDHWC, - ksize=ksize, - pool_type="max", - strides=[1, 1, 1], - paddings=[1, 1, 1], - data_format="NDHWC")) + pool3D_forward_naive(x=x_NDHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1, 1], + paddings=[1, 1, 1], + data_format="NDHWC")) assert np.allclose( res_2, - pool3D_forward_naive( - x=x_NDHWC, - ksize=ksize, - pool_type="avg", - strides=[1, 1, 1], - paddings=[1, 1, 1, 1, 1, 1], - data_format="NDHWC")) - assert np.allclose( - res_3, - pool3D_forward_naive( - x=x_NCDHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1, 1], - paddings=[1, 1, 1, 1, 1, 1], - data_format="NCDHW"), - rtol=0.07, - atol=1e-05) - - assert np.allclose( - res_4, - pool3D_forward_naive( - x=x_NCDHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1, 1], - paddings=[1, 2, 1, 0, 0, 1], - data_format="NCDHW"), - rtol=0.07, - atol=1e-05) + pool3D_forward_naive(x=x_NDHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[1, 1, 1, 1, 1, 1], + data_format="NDHWC")) + assert np.allclose(res_3, + pool3D_forward_naive(x=x_NCDHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[1, 1, 1, 1, 1, 1], + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) + + assert np.allclose(res_4, + pool3D_forward_naive(x=x_NCDHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[1, 2, 1, 0, 0, 1], + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) # VALID assert np.allclose( res_5, - pool3D_forward_naive( - x=x_NDHWC, - ksize=ksize, - pool_type="avg", - strides=[1, 1, 1], - paddings=[10, 20], - padding_algorithm="VALID", - data_format="NDHWC")) - - assert np.allclose( - res_6, - pool3D_forward_naive( - x=x_NCDHW, - ksize=ksize, - pool_type="avg", - strides=[1, 1, 1], - paddings=[10, 20], - padding_algorithm="VALID", - data_format="NCDHW"), - rtol=0.07, - atol=1e-05) + pool3D_forward_naive(x=x_NDHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NDHWC")) + + assert np.allclose(res_6, + pool3D_forward_naive(x=x_NCDHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) # SAME assert np.allclose( res_7, - pool3D_forward_naive( - x=x_NDHWC, - ksize=ksize, - pool_type="avg", - strides=[1, 1, 2], - paddings=[10, 20], - padding_algorithm="SAME", - data_format="NDHWC")) - - assert np.allclose( - res_8, - pool3D_forward_naive( - x=x_NCDHW, - ksize=[4, 4, 4], - pool_type="avg", - strides=[1, 1, 1], - paddings=[10, 20], - padding_algorithm="SAME", - data_format="NCDHW"), - rtol=0.07, - atol=1e-05) + pool3D_forward_naive(x=x_NDHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 2], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NDHWC")) + + assert np.allclose(res_8, + pool3D_forward_naive(x=x_NCDHW, + ksize=[4, 4, 4], + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) class TestPool3DAPI_Error(unittest.TestCase): + def test_api(self): - input_NDHWC = fluid.layers.data( - name="input_NDHWC", - shape=[2, 5, 5, 5, 3], - append_batch_size=False, - dtype="float32") + input_NDHWC = fluid.layers.data(name="input_NDHWC", + shape=[2, 5, 5, 5, 3], + append_batch_size=False, + dtype="float32") ksize = [3, 3, 3] # cudnn type error def run_1(): - out_1 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1, 1], - use_cudnn=[0], - data_format="NDHWC") + out_1 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1, 1], + use_cudnn=[0], + data_format="NDHWC") self.assertRaises(TypeError, run_1) # data_format value error def run_2(): - out_2 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[1, 1, 1], - use_cudnn=False, - data_format="NDHWCC") + out_2 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1, 1], + use_cudnn=False, + data_format="NDHWCC") self.assertRaises(ValueError, run_2) # padding str value error def run_3(): - out_3 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALIDSAME", - use_cudnn=False, - data_format="NDHWC") + out_3 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALIDSAME", + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_3) # padding str valid and ceil_mode value error def run_4(): - out_4 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="max", - pool_padding="VALID", - use_cudnn=False, - ceil_mode=True, - data_format="NDHWC") + out_4 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + use_cudnn=False, + ceil_mode=True, + data_format="NDHWC") self.assertRaises(ValueError, run_4) # padding with 8 ele. value error def run_5(): - out_5 = fluid.layers.pool3d( - input=input_NDHWC, - pool_size=ksize, - pool_type="max", - pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1], [1, 1]], - use_cudnn=False, - data_format="NDHWC") + out_5 = fluid.layers.pool3d(input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[[1, 1], [0, 0], [0, 0], + [1, 1], [1, 1]], + use_cudnn=False, + data_format="NDHWC") self.assertRaises(ValueError, run_5) diff --git a/python/paddle/fluid/tests/unittests/test_pool_max_op.py b/python/paddle/fluid/tests/unittests/test_pool_max_op.py index 4b3c777ccf3..ea373757591 100644 --- a/python/paddle/fluid/tests/unittests/test_pool_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool_max_op.py @@ -135,6 +135,7 @@ def max_pool2D_forward_naive(x, class TestMaxPoolWithIndex_Op(OpTest): + def setUp(self): self.init_test_case() self.init_global() @@ -181,11 +182,13 @@ class TestMaxPoolWithIndex_Op(OpTest): class TestCase1(TestMaxPoolWithIndex_Op): + def init_global(self): self.global_pool = True class TestCase2(TestMaxPoolWithIndex_Op): + def init_test_case(self): self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive @@ -199,12 +202,14 @@ class TestCase2(TestMaxPoolWithIndex_Op): class TestCase3(TestCase2): + def init_global(self): self.global_pool = False #----------------max_pool2d_with_index---------------- class TestCase4(TestMaxPoolWithIndex_Op): + def init_test_case(self): self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive @@ -218,11 +223,13 @@ class TestCase4(TestMaxPoolWithIndex_Op): class TestCase5(TestCase4): + def init_global(self): self.global_pool = False class TestCase6(TestMaxPoolWithIndex_Op): + def init_test_case(self): self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive @@ -236,16 +243,19 @@ class TestCase6(TestMaxPoolWithIndex_Op): class TestCase7(TestCase6): + def init_global(self): self.global_pool = False class TestCastAdaptive2d(TestCase6): + def init_adaptive(self): self.adaptive = True class TestCastAdaptive3d(TestMaxPoolWithIndex_Op): + def init_adaptive(self): self.adaptive = True diff --git a/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py b/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py index afe8d212d6e..157123d82c3 100644 --- a/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py +++ b/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py @@ -53,6 +53,7 @@ def py_pnpair_op(score, label, query, column=-1, weight=None): class TestPositiveNegativePairOp(OpTest): + def setUp(self): self.op_type = 'positive_negative_pair' batch_size = 20 @@ -77,6 +78,7 @@ class TestPositiveNegativePairOp(OpTest): class TestPositiveNegativePairOpAccumulateWeight(OpTest): + def setUp(self): self.op_type = 'positive_negative_pair' batch_size = 20 @@ -89,16 +91,19 @@ class TestPositiveNegativePairOpAccumulateWeight(OpTest): query = np.array( [np.random.randint(max_query_id) for i in range(batch_size)]) query = np.reshape(query, newshape=(batch_size, 1)).astype('int64') - acc_pos = np.reshape( - np.random.randint(max_random_num), newshape=(1)).astype('float32') - acc_neg = np.reshape( - np.random.randint(max_random_num), newshape=(1)).astype('float32') - acc_neu = np.reshape( - np.random.randint(max_random_num), newshape=(1)).astype('float32') + acc_pos = np.reshape(np.random.randint(max_random_num), + newshape=(1)).astype('float32') + acc_neg = np.reshape(np.random.randint(max_random_num), + newshape=(1)).astype('float32') + acc_neu = np.reshape(np.random.randint(max_random_num), + newshape=(1)).astype('float32') column = np.random.randint(score_dim) - pos, neg, neu = py_pnpair_op( - score, label, query, column=column, weight=weight) + pos, neg, neu = py_pnpair_op(score, + label, + query, + column=column, + weight=weight) self.inputs = { 'Score': score, 'Label': label, diff --git a/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py b/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py index 056db5b8590..43f98cada42 100644 --- a/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py +++ b/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,19 +34,18 @@ def gen_pow2_warmup_op_lr(warmup_steps, total_steps, base_lr, end_lr, place): class Pow2Warmup(LinearWarmup): + def __init__(self, warmup_steps, total_steps, base_lr, end_lr): assert total_steps > warmup_steps - lr_sch = PolynomialDecay( - learning_rate=base_lr, - decay_steps=total_steps - warmup_steps, - end_lr=end_lr, - power=2) + lr_sch = PolynomialDecay(learning_rate=base_lr, + decay_steps=total_steps - warmup_steps, + end_lr=end_lr, + power=2) - super(Pow2Warmup, self).__init__( - learning_rate=lr_sch, - warmup_steps=warmup_steps, - start_lr=0.0, - end_lr=base_lr) + super(Pow2Warmup, self).__init__(learning_rate=lr_sch, + warmup_steps=warmup_steps, + start_lr=0.0, + end_lr=base_lr) def gen_pow2_warmup_py_lr(warmup_steps, total_steps, base_lr, end_lr, place): @@ -58,6 +57,7 @@ def gen_pow2_warmup_py_lr(warmup_steps, total_steps, base_lr, end_lr, place): class TestPow2WarmupLRScheduler(unittest.TestCase): + def setUp(self): paddle.enable_static() self.params = { diff --git a/python/paddle/fluid/tests/unittests/test_precision_recall_op.py b/python/paddle/fluid/tests/unittests/test_precision_recall_op.py index 64563762595..ee88d76ac67 100644 --- a/python/paddle/fluid/tests/unittests/test_precision_recall_op.py +++ b/python/paddle/fluid/tests/unittests/test_precision_recall_op.py @@ -87,6 +87,7 @@ def compute_metrics(states, cls_num): class TestPrecisionRecallOp_0(OpTest): + def setUp(self): self.op_type = "precision_recall" ins_num = 64 @@ -114,6 +115,7 @@ class TestPrecisionRecallOp_0(OpTest): class TestPrecisionRecallOp_1(OpTest): + def setUp(self): self.op_type = "precision_recall" ins_num = 64 @@ -148,6 +150,7 @@ class TestPrecisionRecallOp_1(OpTest): class TestPrecisionRecallOp_2(OpTest): + def setUp(self): self.op_type = "precision_recall" ins_num = 64 diff --git a/python/paddle/fluid/tests/unittests/test_prelu_op.py b/python/paddle/fluid/tests/unittests/test_prelu_op.py index 73c423a23e6..1de0c434952 100644 --- a/python/paddle/fluid/tests/unittests/test_prelu_op.py +++ b/python/paddle/fluid/tests/unittests/test_prelu_op.py @@ -41,9 +41,10 @@ def ref_prelu_nn(x, num_parameters, init): class TestFunctionalPReluAPI(unittest.TestCase): + def setUp(self): - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.random.uniform(-1., 1., [1, 2, 3, 4]).astype('float32') self.weight_np_0 = np.random.randn(1).astype('float32') self.weight_np_1 = np.random.randn(self.x_np.shape[1]).astype('float32') @@ -54,8 +55,10 @@ class TestFunctionalPReluAPI(unittest.TestCase): weight = paddle.fluid.data('Alpha', weight_np.shape, 'float32') out = F.prelu(x, weight) exe = paddle.static.Executor(self.place) - res = exe.run(feed={'X': self.x_np, - 'Alpha': weight_np}, + res = exe.run(feed={ + 'X': self.x_np, + 'Alpha': weight_np + }, fetch_list=[out]) out_ref = ref_prelu(self.x_np, weight_np) self.assertEqual(np.allclose(out_ref, res[0]), True) @@ -83,32 +86,37 @@ class TestFunctionalPReluAPI(unittest.TestCase): def test_error(self): with paddle.static.program_guard(paddle.static.Program()): - weight_fp32 = paddle.fluid.data( - name='weight_fp32', shape=[1], dtype='float32') + weight_fp32 = paddle.fluid.data(name='weight_fp32', + shape=[1], + dtype='float32') # The input type must be Variable. self.assertRaises(TypeError, F.prelu, x=1, weight=weight_fp32) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[2, 3], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[2, 3], + dtype='int32') self.assertRaises(TypeError, F.prelu, x=x_int32, weight=weight_fp32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[2, 3], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[2, 3], + dtype='float16') F.prelu(x=x_fp16, weight=weight_fp32) class TestNNPReluAPI(unittest.TestCase): + def setUp(self): - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.ones([1, 2, 3, 4]).astype('float32') def test_static_api(self): startup_program = paddle.static.Program() train_program = paddle.static.Program() with paddle.static.program_guard(train_program, startup_program): - x = paddle.fluid.data( - name='X', shape=self.x_np.shape, dtype='float32') + x = paddle.fluid.data(name='X', + shape=self.x_np.shape, + dtype='float32') m = paddle.nn.PReLU() out = m(x) exe = paddle.static.Executor(self.place) @@ -162,6 +170,7 @@ def prelu_api_wrapper(x, weight, data_format="NCHW"): class PReluTest(OpTest): + def setUp(self): self.init_dtype() self.init_input_shape() @@ -196,16 +205,16 @@ class PReluTest(OpTest): self.inputs = {'X': x_np, 'Alpha': alpha_np} # NOTE(zhiqu): reshape inputs['Alpha'] from [1, 100, 1, 1] to [1, 100] + [1]*len(x.shape[2:]) - # since np operands could not be broadcast together with shapes (1,100,2,2,2,3) (1,100,1,1) + # since np operands could not be broadcast together with shapes (1,100,2,2,2,3) (1,100,1,1) reshaped_alpha = self.inputs['Alpha'] if self.attrs == {'mode': "channel", "data_format": "NCHW"}: - reshaped_alpha = np.reshape( - self.inputs['Alpha'], - [1, self.x_shape[1]] + [1] * len(self.x_shape[2:])) + reshaped_alpha = np.reshape(self.inputs['Alpha'], + [1, self.x_shape[1]] + + [1] * len(self.x_shape[2:])) elif self.attrs == {'mode': "channel", "data_format": "NHWC"}: - reshaped_alpha = np.reshape( - self.inputs['Alpha'], - [1] + [1] * len(self.x_shape[1:-1]) + [self.x_shape[-1]]) + reshaped_alpha = np.reshape(self.inputs['Alpha'], + [1] + [1] * len(self.x_shape[1:-1]) + + [self.x_shape[-1]]) out_np = np.maximum(self.inputs['X'], 0.) out_np = out_np + np.minimum(self.inputs['X'], 0.) * reshaped_alpha assert out_np is not self.inputs['X'] @@ -228,9 +237,11 @@ class PReluTest(OpTest): @skip_check_grad_ci( - reason="[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" + reason= + "[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" ) class TestModeAll(PReluTest): + def init_input_shape(self): self.x_shape = [2, 3, 4, 5] @@ -239,9 +250,11 @@ class TestModeAll(PReluTest): @skip_check_grad_ci( - reason="[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" + reason= + "[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" ) class TestModeAllNHWC(PReluTest): + def init_input_shape(self): self.x_shape = [2, 3, 4, 50] @@ -250,6 +263,7 @@ class TestModeAllNHWC(PReluTest): class TestModeElt(PReluTest): + def init_input_shape(self): self.x_shape = [3, 2, 5, 10] @@ -258,6 +272,7 @@ class TestModeElt(PReluTest): class TestModeEltNHWC(PReluTest): + def init_input_shape(self): self.x_shape = [3, 2, 5, 10] @@ -266,9 +281,11 @@ class TestModeEltNHWC(PReluTest): @skip_check_grad_ci( - reason="[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" + reason= + "[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" ) class TestModeAllRank3(PReluTest): + def init_input_shape(self): self.x_shape = [1, 200, 3] @@ -277,9 +294,11 @@ class TestModeAllRank3(PReluTest): @skip_check_grad_ci( - reason="[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" + reason= + "[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" ) class TestModeAllRank3NHWC(PReluTest): + def init_input_shape(self): self.x_shape = [1, 200, 3] @@ -288,9 +307,11 @@ class TestModeAllRank3NHWC(PReluTest): @skip_check_grad_ci( - reason="[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" + reason= + "[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" ) class TestModeAllRank6(PReluTest): + def init_input_shape(self): self.x_shape = [1, 2, 3, 4, 5, 6] @@ -299,9 +320,11 @@ class TestModeAllRank6(PReluTest): @skip_check_grad_ci( - reason="[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" + reason= + "[skip shape check] Input(Alpha) must be 1-D and only has one data in 'all' mode" ) class TestModeAllRank6NHWC(PReluTest): + def init_input_shape(self): self.x_shape = [1, 2, 3, 4, 5, 6] @@ -310,6 +333,7 @@ class TestModeAllRank6NHWC(PReluTest): class TestModeChannelRank3(PReluTest): + def init_input_shape(self): self.x_shape = [1, 200, 3] @@ -318,6 +342,7 @@ class TestModeChannelRank3(PReluTest): class TestModeChannelRank3NHWC(PReluTest): + def init_input_shape(self): self.x_shape = [1, 3, 100] @@ -326,6 +351,7 @@ class TestModeChannelRank3NHWC(PReluTest): class TestModeChannelRank6(PReluTest): + def init_input_shape(self): self.x_shape = [1, 100, 2, 2, 2, 2] @@ -334,6 +360,7 @@ class TestModeChannelRank6(PReluTest): class TestModeChannelRank6NHWC(PReluTest): + def init_input_shape(self): self.x_shape = [1, 2, 2, 2, 2, 100] @@ -342,6 +369,7 @@ class TestModeChannelRank6NHWC(PReluTest): class TestModeElementRank3(PReluTest): + def init_input_shape(self): self.x_shape = [3, 10, 10] @@ -350,6 +378,7 @@ class TestModeElementRank3(PReluTest): class TestModeElementRank3NHWC(PReluTest): + def init_input_shape(self): self.x_shape = [3, 10, 10] @@ -358,6 +387,7 @@ class TestModeElementRank3NHWC(PReluTest): class TestModeElementRank6(PReluTest): + def init_input_shape(self): self.x_shape = [3, 2, 2, 4, 5, 2] @@ -366,6 +396,7 @@ class TestModeElementRank6(PReluTest): class TestModeElementRank6NHWC(PReluTest): + def init_input_shape(self): self.x_shape = [3, 2, 2, 4, 5, 2] @@ -377,9 +408,11 @@ def create_test_fp16_class(parent, check_grad=True, atol=1e-3, max_relative_error=0.05): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPReluFp16Case(parent): + def init_dtype(self): self.dtype = np.float16 @@ -387,8 +420,9 @@ def create_test_fp16_class(parent, if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place( - place, atol=atol, check_eager=self.eager_mode) + self.check_output_with_place(place, + atol=atol, + check_eager=self.eager_mode) def test_check_grad(self): place = core.CUDAPlace(0) @@ -431,21 +465,25 @@ def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): is_bias=False, default_initializer=fluid.initializer.ConstantInitializer(0.25)) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="prelu", - inputs={"X": x, - 'Alpha': alpha}, - attrs={"mode": mode, - 'data_format': data_format}, - outputs={"Out": out}) + helper.append_op(type="prelu", + inputs={ + "X": x, + 'Alpha': alpha + }, + attrs={ + "mode": mode, + 'data_format': data_format + }, + outputs={"Out": out}) return out # error message test if mode is not one of 'all', 'channel', 'element' class TestModeError(unittest.TestCase): + def setUp(self): - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.ones([1, 2, 3, 4]).astype('float32') def test_mode_error(self): diff --git a/python/paddle/fluid/tests/unittests/test_print_op.py b/python/paddle/fluid/tests/unittests/test_print_op.py index cc06a3cf7fa..46073273065 100755 --- a/python/paddle/fluid/tests/unittests/test_print_op.py +++ b/python/paddle/fluid/tests/unittests/test_print_op.py @@ -30,6 +30,7 @@ paddle.enable_static() class TestPrintOpCPU(unittest.TestCase): + def setUp(self): self.place = paddle.CPUPlace() self.x_tensor = fluid.core.LoDTensor() @@ -74,7 +75,8 @@ class TestPrintOpCPU(unittest.TestCase): print_tensor_name=print_tensor_name, print_tensor_type=print_tensor_type, print_tensor_shape=print_tensor_shape, - print_tensor_lod=print_tensor_lod, ) + print_tensor_lod=print_tensor_lod, + ) loss = paddle.mean(x) paddle.static.append_backward(loss=loss) exe = paddle.static.Executor(self.place) @@ -92,11 +94,12 @@ class TestPrintOpCPU(unittest.TestCase): class TestPrintOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of Print_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], paddle.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + paddle.CPUPlace()) self.assertRaises(TypeError, paddle.static.Print, x1) # The input dtype of Print_op must be float32, float64, int32_t, int64_t or bool. x2 = paddle.static.data(name='x2', shape=[4], dtype="float16") @@ -106,6 +109,7 @@ class TestPrintOpError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPrintOpGPU(TestPrintOpCPU): + def setUp(self): self.place = paddle.CUDAPlace(0) self.x_tensor = fluid.core.LoDTensor() @@ -115,6 +119,7 @@ class TestPrintOpGPU(TestPrintOpCPU): class TestPrintOpBackward(unittest.TestCase): + def check_backward(self, use_cuda): main = paddle.static.Program() startup = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/test_prior_box_op.py b/python/paddle/fluid/tests/unittests/test_prior_box_op.py index 7381b74af71..2e18f8b748e 100644 --- a/python/paddle/fluid/tests/unittests/test_prior_box_op.py +++ b/python/paddle/fluid/tests/unittests/test_prior_box_op.py @@ -22,6 +22,7 @@ from op_test import OpTest class TestPriorBoxOp(OpTest): + def set_data(self): self.init_test_params() self.init_test_input() @@ -79,8 +80,8 @@ class TestPriorBoxOp(OpTest): self.flip = True self.set_min_max_aspect_ratios_order() self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0] - self.aspect_ratios = np.array( - self.aspect_ratios, dtype=np.float).flatten() + self.aspect_ratios = np.array(self.aspect_ratios, + dtype=np.float).flatten() self.variances = [0.1, 0.1, 0.2, 0.2] self.variances = np.array(self.variances, dtype=np.float).flatten() @@ -118,22 +119,22 @@ class TestPriorBoxOp(OpTest): ar = self.real_aspect_ratios[r] c_w = min_size * math.sqrt(ar) / 2 c_h = (min_size / math.sqrt(ar)) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 if len(self.max_sizes) > 0: max_size = self.max_sizes[s] # second prior: aspect_ratio = 1, c_w = c_h = math.sqrt(min_size * max_size) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 else: c_w = c_h = min_size / 2. @@ -146,11 +147,11 @@ class TestPriorBoxOp(OpTest): max_size = self.max_sizes[s] # second prior: aspect_ratio = 1, c_w = c_h = math.sqrt(min_size * max_size) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 # rest of priors @@ -160,29 +161,31 @@ class TestPriorBoxOp(OpTest): continue c_w = min_size * math.sqrt(ar) / 2 c_h = (min_size / math.sqrt(ar)) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 # clip the prior's coordidate such that it is within[0, 1] if self.clip: out_boxes = np.clip(out_boxes, 0.0, 1.0) # set the variance. - out_var = np.tile(self.variances, (self.layer_h, self.layer_w, - self.num_priors, 1)) + out_var = np.tile(self.variances, + (self.layer_h, self.layer_w, self.num_priors, 1)) self.out_boxes = out_boxes.astype('float32') self.out_var = out_var.astype('float32') class TestPriorBoxOpWithoutMaxSize(TestPriorBoxOp): + def set_max_sizes(self): self.max_sizes = [] class TestPriorBoxOpWithSpecifiedOutOrder(TestPriorBoxOp): + def set_min_max_aspect_ratios_order(self): self.min_max_aspect_ratios_order = True diff --git a/python/paddle/fluid/tests/unittests/test_prod_op.py b/python/paddle/fluid/tests/unittests/test_prod_op.py index cdfcbb4e4e7..656601e05d1 100644 --- a/python/paddle/fluid/tests/unittests/test_prod_op.py +++ b/python/paddle/fluid/tests/unittests/test_prod_op.py @@ -20,6 +20,7 @@ import numpy as np class TestProdOp(unittest.TestCase): + def setUp(self): self.input = np.random.random(size=(10, 10, 5)).astype(np.float32) @@ -50,13 +51,16 @@ class TestProdOp(unittest.TestCase): self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) dy_result = paddle.prod(input, axis=1, keepdim=True, dtype='int64') - expected_result = np.prod( - self.input, axis=1, keepdims=True, dtype=np.int64) + expected_result = np.prod(self.input, + axis=1, + keepdims=True, + dtype=np.int64) self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) def run_static(self, use_gpu=False): - input = paddle.fluid.data( - name='input', shape=[10, 10, 5], dtype='float32') + input = paddle.fluid.data(name='input', + shape=[10, 10, 5], + dtype='float32') result0 = paddle.prod(input) result1 = paddle.prod(input, axis=1) result2 = paddle.prod(input, axis=-1) @@ -86,8 +90,10 @@ class TestProdOp(unittest.TestCase): self.assertTrue(np.allclose(static_result[4], expected_result)) expected_result = np.prod(self.input, axis=1, dtype=np.int64) self.assertTrue(np.allclose(static_result[5], expected_result)) - expected_result = np.prod( - self.input, axis=1, keepdims=True, dtype=np.int64) + expected_result = np.prod(self.input, + axis=1, + keepdims=True, + dtype=np.int64) self.assertTrue(np.allclose(static_result[6], expected_result)) def test_cpu(self): @@ -111,12 +117,14 @@ class TestProdOp(unittest.TestCase): class TestProdOpError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x = paddle.fluid.data(name='x', shape=[2, 2, 4], dtype='float32') - bool_x = paddle.fluid.data( - name='bool_x', shape=[2, 2, 4], dtype='bool') + bool_x = paddle.fluid.data(name='bool_x', + shape=[2, 2, 4], + dtype='bool') # The argument x shoule be a Tensor self.assertRaises(TypeError, paddle.prod, [1]) diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py index 1b8852810f2..0eec7633a2e 100644 --- a/python/paddle/fluid/tests/unittests/test_profiler.py +++ b/python/paddle/fluid/tests/unittests/test_profiler.py @@ -28,6 +28,7 @@ import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2 class TestProfiler(unittest.TestCase): + @classmethod def setUpClass(cls): os.environ['CPU_NUM'] = str(4) @@ -39,8 +40,9 @@ class TestProfiler(unittest.TestCase): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') - counter = fluid.layers.zeros( - shape=[1], dtype='int64', force_cpu=True) + counter = fluid.layers.zeros(shape=[1], + dtype='int64', + force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) @@ -58,8 +60,9 @@ class TestProfiler(unittest.TestCase): cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size) + batch_acc = fluid.layers.accuracy(input=predict, + label=label, + total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) @@ -70,8 +73,8 @@ class TestProfiler(unittest.TestCase): exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 train_program = fluid.compiler.CompiledProgram( - main_program).with_data_parallel( - loss_name=avg_cost.name, exec_strategy=exec_strategy) + main_program).with_data_parallel(loss_name=avg_cost.name, + exec_strategy=exec_strategy) else: train_program = main_program return train_program, startup_program, avg_cost, batch_size, batch_acc @@ -95,16 +98,18 @@ class TestProfiler(unittest.TestCase): "Kernel %s missing event. Has this kernel been recorded by RecordEvent?" % event.name) elif event.type == profiler_pb2.Event.CPU and ( - event.name.startswith("Driver API") or - event.name.startswith("Runtime API")): + event.name.startswith("Driver API") + or event.name.startswith("Runtime API")): print("Warning: unregister", event.name) def run_iter(self, exe, main_program, fetch_list): x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") outs = exe.run(main_program, - feed={'x': x, - 'y': y}, + feed={ + 'x': x, + 'y': y + }, fetch_list=fetch_list) def net_profiler(self, @@ -127,13 +132,15 @@ class TestProfiler(unittest.TestCase): self.run_iter(exe, main_program, [avg_cost, batch_acc, batch_size]) else: - options = utils.ProfilerOptions(options={ - 'state': state, - 'sorted_key': 'total', - 'tracer_level': tracer_option, - 'batch_range': [0, 10] if batch_range is None else batch_range, - 'profile_path': profile_path - }) + options = utils.ProfilerOptions( + options={ + 'state': state, + 'sorted_key': 'total', + 'tracer_level': tracer_option, + 'batch_range': + [0, 10] if batch_range is None else batch_range, + 'profile_path': profile_path + }) with utils.Profiler(enabled=True, options=options) as prof: for iter in range(10): self.run_iter(exe, main_program, @@ -148,39 +155,37 @@ class TestProfiler(unittest.TestCase): def test_cpu_profiler(self): exe = fluid.Executor(fluid.CPUPlace()) for use_new_api in [False, True]: - self.net_profiler( - exe, - 'CPU', - "Default", - batch_range=[5, 10], - use_new_api=use_new_api) + self.net_profiler(exe, + 'CPU', + "Default", + batch_range=[5, 10], + use_new_api=use_new_api) @unittest.skipIf(not core.is_compiled_with_cuda(), "profiler is enabled only with GPU") def test_cuda_profiler(self): exe = fluid.Executor(fluid.CUDAPlace(0)) for use_new_api in [False, True]: - self.net_profiler( - exe, - 'GPU', - "OpDetail", - batch_range=[0, 10], - use_new_api=use_new_api) + self.net_profiler(exe, + 'GPU', + "OpDetail", + batch_range=[0, 10], + use_new_api=use_new_api) @unittest.skipIf(not core.is_compiled_with_cuda(), "profiler is enabled only with GPU") def test_all_profiler(self): exe = fluid.Executor(fluid.CUDAPlace(0)) for use_new_api in [False, True]: - self.net_profiler( - exe, - 'All', - "AllOpDetail", - batch_range=None, - use_new_api=use_new_api) + self.net_profiler(exe, + 'All', + "AllOpDetail", + batch_range=None, + use_new_api=use_new_api) class TestProfilerAPIError(unittest.TestCase): + def test_errors(self): options = utils.ProfilerOptions() self.assertTrue(options['profile_path'] is None) diff --git a/python/paddle/fluid/tests/unittests/test_profiler_statistic.py b/python/paddle/fluid/tests/unittests/test_profiler_statistic.py index 7079d9678b2..e5463b1a90d 100644 --- a/python/paddle/fluid/tests/unittests/test_profiler_statistic.py +++ b/python/paddle/fluid/tests/unittests/test_profiler_statistic.py @@ -19,6 +19,7 @@ import paddle.profiler as profiler class HostPythonNode: + def __init__(self, name, type, start_ns, end_ns, process_id, thread_id): self.name = name self.type = type @@ -32,6 +33,7 @@ class HostPythonNode: class DevicePythonNode: + def __init__(self, name, type, start_ns, end_ns, device_id, context_id, stream_id): self.name = name @@ -44,6 +46,7 @@ class DevicePythonNode: class TestProfilerStatistic(unittest.TestCase): + def test_statistic_case1(self): root_node = HostPythonNode('Root Node', profiler.TracerEventType.UserDefined, 0, @@ -54,10 +57,12 @@ class TestProfilerStatistic(unittest.TestCase): dataloader_node = HostPythonNode('Dataloader', profiler.TracerEventType.Dataloader, 5, 15, 1000, 1001) - mobilenet_node = HostPythonNode( - 'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001) - yolonet_node = HostPythonNode( - 'Yolov3Net', profiler.TracerEventType.Forward, 50, 110, 1000, 1001) + mobilenet_node = HostPythonNode('MobileNet', + profiler.TracerEventType.Forward, 20, + 50, 1000, 1001) + yolonet_node = HostPythonNode('Yolov3Net', + profiler.TracerEventType.Forward, 50, 110, + 1000, 1001) userdefined_node = HostPythonNode('Communication Time', profiler.TracerEventType.UserDefined, @@ -72,8 +77,9 @@ class TestProfilerStatistic(unittest.TestCase): optimization_node = HostPythonNode( 'Optimization', profiler.TracerEventType.Optimization, 220, 300, 1000, 1001) - conv2d_node = HostPythonNode( - 'conv2d', profiler.TracerEventType.Operator, 25, 40, 1000, 1001) + conv2d_node = HostPythonNode('conv2d', + profiler.TracerEventType.Operator, 25, 40, + 1000, 1001) sync_batch_norm_node = HostPythonNode('sync_batch_norm', profiler.TracerEventType.Operator, 60, 100, 1000, 1001) @@ -92,10 +98,12 @@ class TestProfilerStatistic(unittest.TestCase): conv2d_cudaMemCpy = HostPythonNode('cudaMemcpy', profiler.TracerEventType.CudaRuntime, 35, 40, 1000, 1001) - conv2d_kernel = DevicePythonNode( - 'conv2d_kernel', profiler.TracerEventType.Kernel, 35, 50, 0, 0, 0) - conv2d_memcpy = DevicePythonNode( - 'conv2d_memcpy', profiler.TracerEventType.Memcpy, 50, 60, 0, 0, 0) + conv2d_kernel = DevicePythonNode('conv2d_kernel', + profiler.TracerEventType.Kernel, 35, + 50, 0, 0, 0) + conv2d_memcpy = DevicePythonNode('conv2d_memcpy', + profiler.TracerEventType.Memcpy, 50, + 60, 0, 0, 0) sync_batch_norm_infer_shape = HostPythonNode( 'sync_batch_norm::infer_shape', profiler.TracerEventType.OperatorInner, 60, 70, 1000, 1001) @@ -146,8 +154,8 @@ class TestProfilerStatistic(unittest.TestCase): 'Process Cpu Utilization': '1.02', 'System Cpu Utilization': '0.68' } - statistic_data = profiler.profiler_statistic.StatisticData(thread_tree, - extra_info) + statistic_data = profiler.profiler_statistic.StatisticData( + thread_tree, extra_info) time_range_summary = statistic_data.time_range_summary event_summary = statistic_data.event_summary @@ -200,8 +208,9 @@ class TestProfilerStatistic(unittest.TestCase): 0) self.assertEqual( event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15) - self.assertEqual(event_summary.memory_manipulation_items['AsyncMemcpy'] - .general_gpu_time, 60) + self.assertEqual( + event_summary.memory_manipulation_items['AsyncMemcpy']. + general_gpu_time, 60) print( profiler.profiler_statistic._build_table( statistic_data, @@ -222,10 +231,12 @@ class TestProfilerStatistic(unittest.TestCase): profiler.TracerEventType.Dataloader, 5, 15, 1000, 1001) - mobilenet_node = HostPythonNode( - 'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001) - yolonet_node = HostPythonNode( - 'Yolov3Net', profiler.TracerEventType.Forward, 50, 110, 1000, 1001) + mobilenet_node = HostPythonNode('MobileNet', + profiler.TracerEventType.Forward, 20, + 50, 1000, 1001) + yolonet_node = HostPythonNode('Yolov3Net', + profiler.TracerEventType.Forward, 50, 110, + 1000, 1001) userdefined_node = HostPythonNode('Communication Time', profiler.TracerEventType.UserDefined, @@ -263,8 +274,9 @@ class TestProfilerStatistic(unittest.TestCase): optimization_node = HostPythonNode( 'Optimization', profiler.TracerEventType.Optimization, 220, 300, 1000, 1001) - conv2d_node = HostPythonNode( - 'conv2d', profiler.TracerEventType.Operator, 25, 40, 1000, 1001) + conv2d_node = HostPythonNode('conv2d', + profiler.TracerEventType.Operator, 25, 40, + 1000, 1001) sync_batch_norm_node = HostPythonNode('sync_batch_norm', profiler.TracerEventType.Operator, 60, 100, 1000, 1001) @@ -283,10 +295,12 @@ class TestProfilerStatistic(unittest.TestCase): conv2d_cudaMemCpy = HostPythonNode('cudaMemcpy', profiler.TracerEventType.CudaRuntime, 35, 40, 1000, 1001) - conv2d_kernel = DevicePythonNode( - 'conv2d_kernel', profiler.TracerEventType.Kernel, 35, 50, 0, 0, 0) - conv2d_memcpy = DevicePythonNode( - 'conv2d_memcpy', profiler.TracerEventType.Memcpy, 50, 60, 0, 0, 0) + conv2d_kernel = DevicePythonNode('conv2d_kernel', + profiler.TracerEventType.Kernel, 35, + 50, 0, 0, 0) + conv2d_memcpy = DevicePythonNode('conv2d_memcpy', + profiler.TracerEventType.Memcpy, 50, + 60, 0, 0, 0) sync_batch_norm_infer_shape = HostPythonNode( 'sync_batch_norm::infer_shape', profiler.TracerEventType.OperatorInner, 60, 70, 1000, 1001) @@ -363,8 +377,8 @@ class TestProfilerStatistic(unittest.TestCase): 'Process Cpu Utilization': '1.02', 'System Cpu Utilization': '0.68' } - statistic_data = profiler.profiler_statistic.StatisticData(thread_tree, - extra_info) + statistic_data = profiler.profiler_statistic.StatisticData( + thread_tree, extra_info) time_range_summary = statistic_data.time_range_summary event_summary = statistic_data.event_summary distributed_summary = statistic_data.distributed_summary @@ -433,8 +447,9 @@ class TestProfilerStatistic(unittest.TestCase): 0) self.assertEqual( event_summary.memory_manipulation_items['AsyncMemcpy'].cpu_time, 15) - self.assertEqual(event_summary.memory_manipulation_items['AsyncMemcpy'] - .general_gpu_time, 60) + self.assertEqual( + event_summary.memory_manipulation_items['AsyncMemcpy']. + general_gpu_time, 60) print( profiler.profiler_statistic._build_table( statistic_data, @@ -454,8 +469,9 @@ class TestProfilerStatistic(unittest.TestCase): dataloader_node = HostPythonNode('Dataloader', profiler.TracerEventType.Dataloader, 5, 15, 1000, 1001) - mobilenet_node = HostPythonNode( - 'MobileNet', profiler.TracerEventType.Forward, 20, 50, 1000, 1001) + mobilenet_node = HostPythonNode('MobileNet', + profiler.TracerEventType.Forward, 20, + 50, 1000, 1001) backward_node = HostPythonNode('Gradient Backward', profiler.TracerEventType.Backward, 120, @@ -467,8 +483,9 @@ class TestProfilerStatistic(unittest.TestCase): profiler.TracerEventType.UserDefined, 60, 70, 1000, 1001) - conv2d_node = HostPythonNode( - 'conv2d', profiler.TracerEventType.Operator, 25, 25, 1000, 1001) + conv2d_node = HostPythonNode('conv2d', + profiler.TracerEventType.Operator, 25, 25, + 1000, 1001) conv2d_infer_shape = HostPythonNode( 'conv2d::infer_shape', profiler.TracerEventType.OperatorInner, 25, @@ -480,8 +497,9 @@ class TestProfilerStatistic(unittest.TestCase): 'cudalaunchkernel', profiler.TracerEventType.CudaRuntime, 25, 25, 1000, 1001) - conv2d_kernel = DevicePythonNode( - 'conv2d_kernel', profiler.TracerEventType.Kernel, 35, 35, 0, 0, 0) + conv2d_kernel = DevicePythonNode('conv2d_kernel', + profiler.TracerEventType.Kernel, 35, + 35, 0, 0, 0) another_kernel = DevicePythonNode( 'void phi::funcs::VectorizedBroadcastKernel, phi::funcs::AddFunctor>()', profiler.TracerEventType.Kernel, 35, 35, 0, 0, 0) @@ -500,15 +518,16 @@ class TestProfilerStatistic(unittest.TestCase): 'Process Cpu Utilization': '1.02', 'System Cpu Utilization': '0.68' } - statistic_data = profiler.profiler_statistic.StatisticData(thread_tree, - extra_info) + statistic_data = profiler.profiler_statistic.StatisticData( + thread_tree, extra_info) time_range_summary = statistic_data.time_range_summary event_summary = statistic_data.event_summary self.assertEqual(event_summary.items['conv2d'].cpu_time, 0) self.assertEqual(event_summary.items['conv2d'].general_gpu_time, 0) - self.assertEqual(event_summary.userdefined_items['Communication Time'] - .general_gpu_time, 0) + self.assertEqual( + event_summary.userdefined_items['Communication Time']. + general_gpu_time, 0) for sort_key in [ profiler.SortedKeys.CPUTotal, profiler.SortedKeys.CPUMax, profiler.SortedKeys.CPUMin, profiler.SortedKeys.CPUAvg, @@ -516,12 +535,11 @@ class TestProfilerStatistic(unittest.TestCase): profiler.SortedKeys.GPUMin, profiler.SortedKeys.GPUAvg ]: print( - profiler.profiler_statistic._build_table( - statistic_data, - sorted_by=sort_key, - op_detail=True, - thread_sep=False, - time_unit='ms')) + profiler.profiler_statistic._build_table(statistic_data, + sorted_by=sort_key, + op_detail=True, + thread_sep=False, + time_unit='ms')) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py index d73ed872888..b768aa73051 100644 --- a/python/paddle/fluid/tests/unittests/test_program.py +++ b/python/paddle/fluid/tests/unittests/test_program.py @@ -23,6 +23,7 @@ main_program = default_main_program() class TestProgram(unittest.TestCase): + def test_program(self): b = main_program.current_block() self.assertEqual(-1, b.parent_idx) @@ -54,15 +55,20 @@ class TestProgram(unittest.TestCase): def test_program_clone(self): prog = Program() - x = prog.global_block().create_var( - name='X', shape=[1000, 784], dtype='float32') + x = prog.global_block().create_var(name='X', + shape=[1000, 784], + dtype='float32') - y = prog.global_block().create_var( - name='Y', shape=[784, 100], dtype='float32') + y = prog.global_block().create_var(name='Y', + shape=[784, 100], + dtype='float32') out = prog.global_block().create_var(name='Out', dtype='float32') - prog.global_block().append_op( - type="mul", inputs={'X': [x], - 'Y': [y]}, outputs={'Out': [out]}) + prog.global_block().append_op(type="mul", + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) # FIXME(yuyang18): We manual compare the output string, since the order # of variable could be changed. @@ -72,15 +78,20 @@ class TestProgram(unittest.TestCase): def test_parse_program_from_string(self): prog = Program() - x = prog.global_block().create_var( - name='X', shape=[1000, 784], dtype='float32') + x = prog.global_block().create_var(name='X', + shape=[1000, 784], + dtype='float32') - y = prog.global_block().create_var( - name='Y', shape=[784, 100], dtype='float32') + y = prog.global_block().create_var(name='Y', + shape=[784, 100], + dtype='float32') out = prog.global_block().create_var(name='Out', dtype='float32') - prog.global_block().append_op( - type="mul", inputs={'X': [x], - 'Y': [y]}, outputs={'Out': [out]}) + prog.global_block().append_op(type="mul", + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) binary_str = prog.desc.serialize_to_string() prog_restored = Program.parse_from_string(binary_str) @@ -100,18 +111,17 @@ class TestProgram(unittest.TestCase): self.assertNotEqual(0, len(new_program.blocks[0].all_parameters())) def test_program_inference_optimize(self): + def net(): - reader = fluid.layers.py_reader( - capacity=10, - shapes=[[-1, 10], [-1, 1]], - lod_levels=[0, 0], - dtypes=['float32', 'int64'], - use_double_buffer=True) + reader = fluid.layers.py_reader(capacity=10, + shapes=[[-1, 10], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + use_double_buffer=True) in_data, label = fluid.layers.read_file(reader) predict_label = fluid.layers.fc(in_data, size=2, act='softmax') loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) @@ -163,18 +173,17 @@ class TestProgram(unittest.TestCase): "program") def test_remove_training_info(self): + def net(): - reader = fluid.layers.py_reader( - capacity=10, - shapes=[[-1, 10], [-1, 1]], - lod_levels=[0, 0], - dtypes=['float32', 'int64'], - use_double_buffer=True) + reader = fluid.layers.py_reader(capacity=10, + shapes=[[-1, 10], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + use_double_buffer=True) in_data, label = fluid.layers.read_file(reader) predict_label = fluid.layers.fc(in_data, size=2, act='softmax') loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/test_program_code.py b/python/paddle/fluid/tests/unittests/test_program_code.py index e82447519bf..390644cec54 100644 --- a/python/paddle/fluid/tests/unittests/test_program_code.py +++ b/python/paddle/fluid/tests/unittests/test_program_code.py @@ -21,19 +21,25 @@ import paddle.fluid.layers as layers class TestProgramToReadableCode(unittest.TestCase): + def setUp(self): self.program = fluid.Program() self.block = self.program.current_block() - self.var = self.block.create_var( - name="X", shape=[-1, 23, 48], dtype='float32') - self.param = self.block.create_parameter( - name="W", shape=[23, 48], dtype='float32', trainable=True) - self.op = self.block.append_op( - type="abs", inputs={"X": [self.var]}, outputs={"Out": [self.var]}) + self.var = self.block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') + self.param = self.block.create_parameter(name="W", + shape=[23, 48], + dtype='float32', + trainable=True) + self.op = self.block.append_op(type="abs", + inputs={"X": [self.var]}, + outputs={"Out": [self.var]}) # add control flow op and sub block self.append_cond_op(self.program) def append_cond_op(self, program): + def true_func(): return layers.fill_constant(shape=[2, 3], dtype='int32', value=2) diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py index a1a3b31a976..c602cfb4ad0 100755 --- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py +++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py @@ -34,16 +34,19 @@ def lstm_net(use_feed): hid_dim2 = 96 class_dim = 2 emb_lr = 30.0 - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") emb = fluid.layers.embedding( input=data, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr(learning_rate=emb_lr)) fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False) + lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, + size=hid_dim * 4, + is_reverse=False) lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') lstm_max_tanh = fluid.layers.tanh(lstm_max) fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') @@ -63,8 +66,8 @@ def simple_fc_net_with_accuracy(use_feed): hidden, size=200, act='relu', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) prediction = fluid.layers.fc(hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) @@ -125,13 +128,14 @@ def optimization_in_cond_net(with_optimize=False): class TestProgramPruneBackward(unittest.TestCase): + def program_compare(self, program_a, program_b): assert isinstance( - program_a, fluid.framework. - Program), "The first argument should be fluid.framework.Program." + program_a, fluid.framework.Program + ), "The first argument should be fluid.framework.Program." assert isinstance( - program_b, fluid.framework. - Program), "The second argument should be fluid.framework Program." + program_b, fluid.framework.Program + ), "The second argument should be fluid.framework Program." self.assertEqual(len(program_a.blocks), len(program_b.blocks)) for idx in range(len(program_a.blocks)): @@ -172,6 +176,7 @@ class TestProgramPruneBackward(unittest.TestCase): self.assertEqual(loss_data_orig, loss_data_prune) def test_simple_fc_net(self): + def optimizer(): optimizer = fluid.optimizer.SGD( learning_rate=0.001, @@ -180,13 +185,15 @@ class TestProgramPruneBackward(unittest.TestCase): with self.program_scope_guard(): img, label = init_data() - self.check_prune_correctness( - method=simple_fc_net, - feed_dict={"image": img, - "label": label}, - optimizer=optimizer) + self.check_prune_correctness(method=simple_fc_net, + feed_dict={ + "image": img, + "label": label + }, + optimizer=optimizer) def test_simple_fc_net_with_accuracy(self): + def optimizer(): optimizer = fluid.optimizer.SGD( learning_rate=0.001, @@ -195,13 +202,15 @@ class TestProgramPruneBackward(unittest.TestCase): with self.program_scope_guard(): img, label = init_data() - self.check_prune_correctness( - method=simple_fc_net_with_accuracy, - feed_dict={"image": img, - "label": label}, - optimizer=optimizer) + self.check_prune_correctness(method=simple_fc_net_with_accuracy, + feed_dict={ + "image": img, + "label": label + }, + optimizer=optimizer) def test_batchnorm_fc(self): + def optimizer(): optimizer = fluid.optimizer.SGD( learning_rate=0.001, @@ -210,11 +219,12 @@ class TestProgramPruneBackward(unittest.TestCase): with self.program_scope_guard(): img, label = init_data() - self.check_prune_correctness( - method=fc_with_batchnorm, - feed_dict={"image": img, - "label": label}, - optimizer=optimizer) + self.check_prune_correctness(method=fc_with_batchnorm, + feed_dict={ + "image": img, + "label": label + }, + optimizer=optimizer) def test_seresnet(self): with self.program_scope_guard(): @@ -224,6 +234,7 @@ class TestProgramPruneBackward(unittest.TestCase): optimizer=seresnext_net.optimizer) def test_transformer(self): + def optimizer(): optimizer = fluid.optimizer.Adam( learning_rate=0.001, @@ -234,10 +245,12 @@ class TestProgramPruneBackward(unittest.TestCase): # the program argument is used to distinguish Program and CompiledProgram feed_dict = get_feed_data_reader().get_next( fluid.Executor(core.CPUPlace()), fluid.default_main_program()) - self.check_prune_correctness( - method=transformer, feed_dict=feed_dict, optimizer=optimizer) + self.check_prune_correctness(method=transformer, + feed_dict=feed_dict, + optimizer=optimizer) def test_lstm(self): + def optimizer(): optimizer = fluid.optimizer.Adagrad( learning_rate=0.001, @@ -247,16 +260,20 @@ class TestProgramPruneBackward(unittest.TestCase): with self.program_scope_guard(): word_dict_size = 5147 reader = fake_imdb_reader(word_dict_size, 1) - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") - feeder = fluid.DataFeeder( - feed_list=[data, label], place=core.CPUPlace()) + feeder = fluid.DataFeeder(feed_list=[data, label], + place=core.CPUPlace()) feed_data = feeder.feed(reader()) - self.check_prune_correctness( - method=lstm_net, feed_dict=feed_data, optimizer=optimizer) + self.check_prune_correctness(method=lstm_net, + feed_dict=feed_data, + optimizer=optimizer) def test_cond(self): + def optimizer(): optimizer = fluid.optimizer.SGD(learning_rate=0.01) return optimizer @@ -265,8 +282,9 @@ class TestProgramPruneBackward(unittest.TestCase): x_in = np.random.random(size=(10, 4)).astype('float32') label_in = np.random.randint(1, size=(10, 1)).astype('int64') feed_dict = {'x': x_in, 'label': label_in} - self.check_prune_correctness( - method=cond_net, feed_dict=feed_dict, optimizer=optimizer) + self.check_prune_correctness(method=cond_net, + feed_dict=feed_dict, + optimizer=optimizer) def test_optimization_in_cond(self): x_in = np.random.random(size=(10, 4)).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_program_to_string.py b/python/paddle/fluid/tests/unittests/test_program_to_string.py index 22ba43bde2a..23965e36a35 100644 --- a/python/paddle/fluid/tests/unittests/test_program_to_string.py +++ b/python/paddle/fluid/tests/unittests/test_program_to_string.py @@ -18,14 +18,17 @@ import unittest class TestProgram(unittest.TestCase): + def test_program_to_string(self): prog = fluid.default_main_program() - a = fluid.layers.data( - name="X", shape=[2, 3], dtype="float32", append_batch_size=False) + a = fluid.layers.data(name="X", + shape=[2, 3], + dtype="float32", + append_batch_size=False) c = fluid.layers.fc(a, size=3) prog_string = prog.to_string(throw_on_error=True, with_details=False) - prog_string_with_details = prog.to_string( - throw_on_error=False, with_details=True) + prog_string_with_details = prog.to_string(throw_on_error=False, + with_details=True) assert prog_string is not None assert len(prog_string_with_details) > len(prog_string) diff --git a/python/paddle/fluid/tests/unittests/test_protobuf.py b/python/paddle/fluid/tests/unittests/test_protobuf.py index 7b80927c48d..4648d9f90ab 100644 --- a/python/paddle/fluid/tests/unittests/test_protobuf.py +++ b/python/paddle/fluid/tests/unittests/test_protobuf.py @@ -19,6 +19,7 @@ import unittest class TestFrameworkProto(unittest.TestCase): + def test_all(self): op_proto = framework_pb2.OpProto() ipt0 = op_proto.inputs.add() diff --git a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py index 7fb2171f611..1cbf2ccd7b7 100644 --- a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py +++ b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import Program class TestOpDesc(unittest.TestCase): + def test_op_desc(self): program_desc = core.ProgramDesc() self.assertIsNotNone(program_desc) @@ -81,6 +82,7 @@ class TestOpDesc(unittest.TestCase): class TestProgramDesc(unittest.TestCase): + def test_instance(self): program_desc = core.ProgramDesc() self.assertIsNotNone(program_desc) @@ -108,6 +110,7 @@ class TestProgramDesc(unittest.TestCase): class TestVarDesc(unittest.TestCase): + def test_shape(self): program_desc = core.ProgramDesc() block = program_desc.block(0) @@ -164,6 +167,7 @@ class TestVarDesc(unittest.TestCase): class TestBlockDesc(unittest.TestCase): + def test_add_var(self): program_desc = core.ProgramDesc() self.assertIsNotNone(program_desc) diff --git a/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py index 57e96f1fa34..ecccd7ba6c3 100644 --- a/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py +++ b/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestProximalAdagradOp(OpTest): + def setUp(self): self.op_type = "proximal_adagrad" w = np.random.random((102, 105)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py b/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py index 067502baecc..b130a195971 100644 --- a/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py +++ b/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestProximalGDOp(OpTest): + def setUp(self): self.op_type = "proximal_gd" w = np.random.random((102, 105)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py b/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py index 8e5ba7c3363..71b07155f40 100644 --- a/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py @@ -24,13 +24,16 @@ from paddle.fluid import compiler, Program, program_guard class TestPRROIPoolOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() self.prRoIPool = PyPrRoIPool() - self.outs = self.prRoIPool.compute( - self.x, self.rois, self.output_channels, self.spatial_scale, - self.pooled_height, self.pooled_width).astype('float32') + self.outs = self.prRoIPool.compute(self.x, self.rois, + self.output_channels, + self.spatial_scale, + self.pooled_height, + self.pooled_width).astype('float32') self.inputs = {'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod)} self.attrs = { 'output_channels': self.output_channels, @@ -95,8 +98,10 @@ class TestPRROIPoolOp(OpTest): name="X", shape=[self.channels, self.height, self.width], dtype="float32") - rois = fluid.layers.data( - name="ROIs", shape=[4], dtype="float32", lod_level=1) + rois = fluid.layers.data(name="ROIs", + shape=[4], + dtype="float32", + lod_level=1) output = fluid.layers.prroi_pool(x, rois, 0.25, 2, 2) loss = fluid.layers.mean(output) optimizer = fluid.optimizer.SGD(learning_rate=1e-3) @@ -106,9 +111,10 @@ class TestPRROIPoolOp(OpTest): self.rois_lod, place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - exe.run(fluid.default_main_program(), - {'X': input_x, - "ROIs": input_rois}) + exe.run(fluid.default_main_program(), { + 'X': input_x, + "ROIs": input_rois + }) def test_net(self): places = [fluid.CPUPlace()] @@ -119,10 +125,13 @@ class TestPRROIPoolOp(OpTest): def test_errors(self): with program_guard(Program(), Program()): - x = fluid.layers.data( - name="x", shape=[245, 30, 30], dtype="float32") - rois = fluid.layers.data( - name="rois", shape=[4], dtype="float32", lod_level=1) + x = fluid.layers.data(name="x", + shape=[245, 30, 30], + dtype="float32") + rois = fluid.layers.data(name="rois", + shape=[4], + dtype="float32", + lod_level=1) # spatial_scale must be float type self.assertRaises(TypeError, fluid.layers.prroi_pool, x, rois, 2, 7, 7) @@ -135,13 +144,16 @@ class TestPRROIPoolOp(OpTest): class TestPRROIPoolOpTensorRoIs(OpTest): + def set_data(self): self.init_test_case() self.make_rois() self.prRoIPool = PyPrRoIPool() - self.outs = self.prRoIPool.compute( - self.x, self.rois, self.output_channels, self.spatial_scale, - self.pooled_height, self.pooled_width).astype('float32') + self.outs = self.prRoIPool.compute(self.x, self.rois, + self.output_channels, + self.spatial_scale, + self.pooled_height, + self.pooled_width).astype('float32') self.rois_index = np.array(self.rois_lod).reshape([-1]).astype(np.int64) self.inputs = { @@ -213,10 +225,15 @@ class TestPRROIPoolOpTensorRoIs(OpTest): shape=[self.channels, self.height, self.width], dtype="float32") rois = fluid.layers.data(name="ROIs", shape=[4], dtype="float32") - rois_index = fluid.layers.data( - name='rois_idx', shape=[], dtype="int64") - output = fluid.layers.prroi_pool( - x, rois, 0.25, 2, 2, batch_roi_nums=rois_index) + rois_index = fluid.layers.data(name='rois_idx', + shape=[], + dtype="int64") + output = fluid.layers.prroi_pool(x, + rois, + 0.25, + 2, + 2, + batch_roi_nums=rois_index) loss = fluid.layers.mean(output) optimizer = fluid.optimizer.SGD(learning_rate=1e-3) optimizer.minimize(loss) @@ -237,10 +254,13 @@ class TestPRROIPoolOpTensorRoIs(OpTest): def test_errors(self): with program_guard(Program(), Program()): - x = fluid.layers.data( - name="x", shape=[245, 30, 30], dtype="float32") - rois = fluid.layers.data( - name="rois", shape=[4], dtype="float32", lod_level=1) + x = fluid.layers.data(name="x", + shape=[245, 30, 30], + dtype="float32") + rois = fluid.layers.data(name="rois", + shape=[4], + dtype="float32", + lod_level=1) # spatial_scale must be float type self.assertRaises(TypeError, fluid.layers.prroi_pool, x, rois, 2, 7, 7) @@ -252,27 +272,24 @@ class TestPRROIPoolOpTensorRoIs(OpTest): 7, 0.7) def test_bad_x(): - x = fluid.layers.data( - name='data1', - shape=[2, 3, 16, 16], - dtype='int64', - append_batch_size=False) - label = fluid.layers.data( - name='label1', - shape=[2, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) + x = fluid.layers.data(name='data1', + shape=[2, 3, 16, 16], + dtype='int64', + append_batch_size=False) + label = fluid.layers.data(name='label1', + shape=[2, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) output = fluid.layers.prroi_pool(x, label, 0.25, 2, 2) self.assertRaises(TypeError, test_bad_x) def test_bad_y(): - x = fluid.layers.data( - name='data2', - shape=[2, 3, 16, 16], - dtype='float32', - append_batch_size=False) + x = fluid.layers.data(name='data2', + shape=[2, 3, 16, 16], + dtype='float32', + append_batch_size=False) label = [[1, 2, 3, 4], [2, 3, 4, 5]] output = fluid.layers.prroi_pool(x, label, 0.25, 2, 2) diff --git a/python/paddle/fluid/tests/unittests/test_prune.py b/python/paddle/fluid/tests/unittests/test_prune.py index 3755d92858a..c320e3fbf58 100644 --- a/python/paddle/fluid/tests/unittests/test_prune.py +++ b/python/paddle/fluid/tests/unittests/test_prune.py @@ -25,6 +25,7 @@ import contextlib class TestPrune(unittest.TestCase): + def net(self): x = fluid.layers.data(name='x', shape=[2], dtype='float32') label = fluid.layers.data(name="label", shape=[1], dtype="int64") @@ -40,9 +41,9 @@ class TestPrune(unittest.TestCase): with fluid.program_guard(program, startup_program): (x, y, label, loss) = self.net() self.assertEqual(len(block.ops), 5) - self.assertEqual([op.type for op in block.ops], [ - "mul", "elementwise_add", "softmax", "cross_entropy2", "mean" - ]) + self.assertEqual( + [op.type for op in block.ops], + ["mul", "elementwise_add", "softmax", "cross_entropy2", "mean"]) pruned_program = program._prune_with_input( feeded_var_names=[y.name, label.name], targets=[loss]) self.assertEqual(len(pruned_program.global_block().ops), 2) @@ -56,9 +57,9 @@ class TestPrune(unittest.TestCase): with fluid.program_guard(program, startup_program): (x, y, label, loss) = self.net() self.assertEqual(len(block.ops), 5) - self.assertEqual([op.type for op in block.ops], [ - "mul", "elementwise_add", "softmax", "cross_entropy2", "mean" - ]) + self.assertEqual( + [op.type for op in block.ops], + ["mul", "elementwise_add", "softmax", "cross_entropy2", "mean"]) pruned_program = program._prune(targets=[loss]) self.assertEqual(len(pruned_program.global_block().ops), 5) self.assertEqual( @@ -72,9 +73,9 @@ class TestPrune(unittest.TestCase): with fluid.program_guard(program, startup_program): (x, y, label, loss) = self.net() self.assertEqual(len(block.ops), 5) - self.assertEqual([op.type for op in block.ops], [ - "mul", "elementwise_add", "softmax", "cross_entropy2", "mean" - ]) + self.assertEqual( + [op.type for op in block.ops], + ["mul", "elementwise_add", "softmax", "cross_entropy2", "mean"]) pruned_program = program._prune(targets=loss) self.assertEqual(len(pruned_program.global_block().ops), 5) self.assertEqual( @@ -88,9 +89,9 @@ class TestPrune(unittest.TestCase): with fluid.program_guard(program, startup_program): (x, y, label, loss) = self.net() self.assertEqual(len(block.ops), 5) - self.assertEqual([op.type for op in block.ops], [ - "mul", "elementwise_add", "softmax", "cross_entropy2", "mean" - ]) + self.assertEqual( + [op.type for op in block.ops], + ["mul", "elementwise_add", "softmax", "cross_entropy2", "mean"]) try: pruned_program = program._prune(targets=None) except ValueError as e: @@ -113,6 +114,7 @@ def _mock_guard(mock): class TestExecutorRunAutoPrune(unittest.TestCase): + def net1(self): x = fluid.layers.data(name='x', shape=[2], dtype='float32') label = fluid.layers.data(name="label", shape=[1], dtype="int64") @@ -176,8 +178,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) self.assertIsNotNone(scope.find_var(loss1.name)) @@ -200,8 +204,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) @@ -231,8 +237,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) @@ -254,15 +262,17 @@ class TestExecutorRunAutoPrune(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) compiled_prog = fluid.CompiledProgram( - program).with_data_parallel( - loss_name=loss1.name, places=fluid.CPUPlace()) + program).with_data_parallel(loss_name=loss1.name, + places=fluid.CPUPlace()) weight_init = np.array( scope.find_var(w_param_attrs.name).get_tensor()) x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(compiled_prog, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) @@ -286,8 +296,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={y.name: x_np, - 'label': label_np}, + feed={ + y.name: x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) @@ -310,14 +322,15 @@ class TestExecutorRunAutoPrune(unittest.TestCase): exe.run(startup_program) x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') - self.assertRaises( - Exception, - exe.run, - program, - feed={y.name: x_np, - 'label': label_np}, - fetch_list=[loss1.name], - use_prune=True) + self.assertRaises(Exception, + exe.run, + program, + feed={ + y.name: x_np, + 'label': label_np + }, + fetch_list=[loss1.name], + use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) self.assertIsNone(scope.find_var(loss2.name)) @@ -343,12 +356,14 @@ class TestExecutorRunAutoPrune(unittest.TestCase): sgd_optimizer.minimize(loss1) exe.run(startup_program) x_np = np.random.random(size=(10, 2)).astype('float32') - label_np = np.random.randint( - 1, size=(10, 1)).astype('int64') + label_np = np.random.randint(1, + size=(10, 1)).astype('int64') for i in range(10): res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=True) if i == 0: @@ -380,8 +395,8 @@ class TestExecutorRunAutoPrune(unittest.TestCase): train2 = adam_optimizer2.minimize(loss2) exe.run(startup_program) x_np = np.random.random(size=(10, 2)).astype('float32') - label_np = np.random.randint( - 1, size=(10, 1)).astype('int64') + label_np = np.random.randint(1, + size=(10, 1)).astype('int64') for i in range(10): if i % 2: @@ -431,15 +446,17 @@ class TestExecutorRunAutoPrune(unittest.TestCase): sgd_optimizer.minimize(loss1) exe.run(startup_program) x_np = np.random.random(size=(10, 2)).astype('float32') - label_np = np.random.randint( - 1, size=(10, 1)).astype('int64') + label_np = np.random.randint(1, + size=(10, 1)).astype('int64') compiled_prog = fluid.CompiledProgram( - program).with_data_parallel( - loss_name=loss1.name, places=fluid.CPUPlace()) + program).with_data_parallel(loss_name=loss1.name, + places=fluid.CPUPlace()) for i in range(10): res = exe.run(compiled_prog, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=True) if i == 0: @@ -468,8 +485,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) weight_without_prune = np.array( @@ -480,8 +499,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name, train1], use_prune=True) weight_with_prune = np.array( @@ -492,8 +513,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) exe.run(cloned_program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) weight_expected = np.array( @@ -528,11 +551,11 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') compiled_prog1 = fluid.CompiledProgram( - program).with_data_parallel( - loss_name=loss1.name, places=[fluid.CPUPlace()] * 2) + program).with_data_parallel(loss_name=loss1.name, + places=[fluid.CPUPlace()] * 2) compiled_prog2 = fluid.CompiledProgram( - program).with_data_parallel( - loss_name=loss2.name, places=[fluid.CPUPlace()] * 2) + program).with_data_parallel(loss_name=loss2.name, + places=[fluid.CPUPlace()] * 2) for i in range(10): if i % 2 == 1: res = exe.run(compiled_prog1, @@ -547,8 +570,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): use_prune=True) else: res = exe.run(compiled_prog2, - feed={'x2': x_np, - 'label': label_np}, + feed={ + 'x2': x_np, + 'label': label_np + }, fetch_list=[loss2.name, train2], use_prune=True) weight1 = np.array( @@ -560,9 +585,11 @@ class TestExecutorRunAutoPrune(unittest.TestCase): for i in range(10): if i % 2 == 1: exe.run(cloned_program, - feed={'x1': x_np, - 'x2': x_np, - 'label': label_np}, + feed={ + 'x1': x_np, + 'x2': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) weight2 = np.array(scope.find_var(w1_param_attrs.name).get_tensor()) @@ -591,8 +618,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) @@ -604,8 +633,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name, train1], use_prune=True) weight_with_prune = np.array( @@ -616,8 +647,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) exe.run(cloned_program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) weight_expected = np.array( @@ -654,8 +687,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x1': x_np, - 'label': label_np}, + feed={ + 'x1': x_np, + 'label': label_np + }, fetch_list=[loss1.name, train1], use_prune=True) self.assertIsNotNone(scope.find_var(w1_param_attrs.name)) @@ -691,8 +726,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) @@ -704,8 +741,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) res = exe.run(program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name, train1]) weight_with_prune = np.array( scope.find_var(w_param_attrs.name).get_tensor()) @@ -715,8 +754,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): with fluid.scope_guard(scope): exe.run(startup_program) exe.run(cloned_program, - feed={'x': x_np, - 'label': label_np}, + feed={ + 'x': x_np, + 'label': label_np + }, fetch_list=[loss1.name], use_prune=False) weight_expected = np.array( @@ -740,8 +781,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={y.name: x_np, - 'label': label_np}, + feed={ + y.name: x_np, + 'label': label_np + }, fetch_list=[y.name, loss1.name], use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) @@ -767,8 +810,10 @@ class TestExecutorRunAutoPrune(unittest.TestCase): x_np = np.random.random(size=(10, 2)).astype('float32') label_np = np.random.randint(1, size=(10, 1)).astype('int64') res = exe.run(program, - feed={x.name: x_np, - 'label': label_np}, + feed={ + x.name: x_np, + 'label': label_np + }, fetch_list=[x.name, loss1.name], use_prune=True) self.assertIsNotNone(scope.find_var(loss1.name)) diff --git a/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py b/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py index 8a641a6b4fa..4b73e05f502 100644 --- a/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py +++ b/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -67,9 +67,10 @@ def assert_allclose(output, expected, n_expert): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPruneGateByCapacityAPI1(unittest.TestCase): + def init_test_case(self): - self.gate_idx = np.random.randint( - 0, self.n_expert, size=(200, )).astype(self.dtype) + self.gate_idx = np.random.randint(0, self.n_expert, + size=(200, )).astype(self.dtype) expert_count = count(self.gate_idx, self.n_expert * self.n_worker) capacity = np.random.randint(10, 200, size=(self.n_expert, )) self.expert_count = limit_by_capacity(expert_count, capacity, @@ -88,8 +89,9 @@ class TestPruneGateByCapacityAPI1(unittest.TestCase): def test_static_api(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - gate_idx_tensor = paddle.static.data( - 'GateIdx', shape=self.gate_idx.shape, dtype="int64") + gate_idx_tensor = paddle.static.data('GateIdx', + shape=self.gate_idx.shape, + dtype="int64") expert_count_tensor = paddle.static.data( 'ExpertCount', shape=self.expert_count.shape, dtype="int64") out = utils._prune_gate_by_capacity(gate_idx_tensor, @@ -107,8 +109,9 @@ class TestPruneGateByCapacityAPI1(unittest.TestCase): paddle.disable_static(self.place) gate_idx_tensor = paddle.to_tensor(self.gate_idx) expert_count_tensor = paddle.to_tensor(self.expert_count) - out = utils._prune_gate_by_capacity( - gate_idx_tensor, expert_count_tensor, self.n_expert, self.n_worker) + out = utils._prune_gate_by_capacity(gate_idx_tensor, + expert_count_tensor, self.n_expert, + self.n_worker) assert_allclose(out.numpy(), self.out, self.n_expert) def test_dygraph_api(self): @@ -120,6 +123,7 @@ class TestPruneGateByCapacityAPI1(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestPruneGateByCapacityAPI2(TestPruneGateByCapacityAPI1): + def setUp(self): self.n_expert = 12 self.n_worker = 1 diff --git a/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py b/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py index 16abb8a7da4..d9de42ae562 100644 --- a/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py +++ b/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py @@ -19,6 +19,7 @@ from paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher import RoundR class TestPsDispatcher(unittest.TestCase): + def setUp(self): self.points = [ "127.0.0.1:1001", "127.0.0.1:1002", "127.0.0.1:1003", @@ -34,7 +35,9 @@ class TestPsDispatcher(unittest.TestCase): base.dispatch([]) def test_hash(self): + class Var: + def __init__(self, index): self._name = "var_{}".format(index) @@ -52,7 +55,9 @@ class TestPsDispatcher(unittest.TestCase): self.assertEqual(len(eplist), 4) def test_round_rodin(self): + class Var: + def __init__(self, index): self._name = "var_{}".format(index) diff --git a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py index 39dec982b66..3e3529b2240 100644 --- a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py @@ -79,12 +79,14 @@ def calc_psroi_pool(x, rois, rois_num_per_img, output_channels, spatial_scale, for iw in range(wstart, wend): out_sum += x_i[c_in, ih, iw] bin_area = (hend - hstart) * (wend - wstart) - out_data[i, c, ph, pw] = 0. if is_empty else ( - out_sum / float(bin_area)) + out_data[i, c, ph, + pw] = 0. if is_empty else (out_sum / + float(bin_area)) return out_data class TestPSROIPoolOp(OpTest): + def set_data(self): paddle.enable_static() self.init_test_case() @@ -141,12 +143,13 @@ class TestPSROIPoolOp(OpTest): self.rois_num = len(rois) self.rois_with_batch_id = np.array(rois).astype('float64') self.boxes = self.rois_with_batch_id[:, 1:] - self.boxes_num = np.array( - [bno + 1 for bno in range(self.batch_size)]).astype('int32') + self.boxes_num = np.array([bno + 1 for bno in range(self.batch_size) + ]).astype('int32') def setUp(self): self.op_type = 'psroi_pool' - self.python_api = lambda x, boxes, boxes_num, pooled_height, pooled_width, output_channels, spatial_scale: paddle.vision.ops.psroi_pool(x, boxes, boxes_num, (pooled_height, pooled_width), spatial_scale) + self.python_api = lambda x, boxes, boxes_num, pooled_height, pooled_width, output_channels, spatial_scale: paddle.vision.ops.psroi_pool( + x, boxes, boxes_num, (pooled_height, pooled_width), spatial_scale) self.set_data() def test_check_output(self): @@ -157,29 +160,31 @@ class TestPSROIPoolOp(OpTest): class TestPSROIPoolDynamicFunctionAPI(unittest.TestCase): + def setUp(self): self.x = np.random.random([2, 490, 28, 28]).astype(np.float32) - self.boxes = np.array( - [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]]).astype(np.float32) + self.boxes = np.array([[1, 5, 8, 10], [4, 2, 6, 7], + [12, 12, 19, 21]]).astype(np.float32) self.boxes_num = np.array([1, 2]).astype(np.int32) def test_output_size(self): + def test_output_size_is_int(): output_size = 7 - out = paddle.vision.ops.psroi_pool( - paddle.to_tensor(self.x), - paddle.to_tensor(self.boxes), - paddle.to_tensor(self.boxes_num), output_size).numpy() + out = paddle.vision.ops.psroi_pool(paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num), + output_size).numpy() expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 10, 1.0, 7, 7) self.assertTrue(np.allclose(out, expect_out)) def test_output_size_is_tuple(): output_size = (7, 7) - out = paddle.vision.ops.psroi_pool( - paddle.to_tensor(self.x), - paddle.to_tensor(self.boxes), - paddle.to_tensor(self.boxes_num), output_size).numpy() + out = paddle.vision.ops.psroi_pool(paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num), + output_size).numpy() expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 10, 1.0, 7, 7) self.assertTrue(np.allclose(out, expect_out)) @@ -205,6 +210,7 @@ class TestPSROIPoolDynamicFunctionAPI(unittest.TestCase): class TestPSROIPoolDynamicClassAPI(unittest.TestCase): + def setUp(self): self.x = np.random.random([2, 128, 32, 32]).astype(np.float32) self.boxes = np.array([[3, 5, 6, 13], [7, 4, 22, 18], [4, 5, 7, 10], @@ -212,32 +218,31 @@ class TestPSROIPoolDynamicClassAPI(unittest.TestCase): self.boxes_num = np.array([2, 2]).astype(np.int32) def test_output_size(self): + def test_output_size_is_int(): psroi_module = paddle.vision.ops.PSRoIPool(8, 1.1) - out = psroi_module( - paddle.to_tensor(self.x), - paddle.to_tensor(self.boxes), - paddle.to_tensor(self.boxes_num)).numpy() + out = psroi_module(paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num)).numpy() expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 2, 1.1, 8, 8) self.assertTrue(np.allclose(out, expect_out)) def test_output_size_is_tuple(): psroi_pool_module = paddle.vision.ops.PSRoIPool(8, 1.1) - out = psroi_pool_module( - paddle.to_tensor(self.x), - paddle.to_tensor(self.boxes), - paddle.to_tensor(self.boxes_num)).numpy() + out = psroi_pool_module(paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num)).numpy() expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 2, 1.1, 8, 8) self.assertTrue(np.allclose(out, expect_out)) def test_dytype_is_float64(): psroi_pool_module = paddle.vision.ops.PSRoIPool(8, 1.1) - out = psroi_pool_module( - paddle.to_tensor(self.x, 'float64'), - paddle.to_tensor(self.boxes, 'float64'), - paddle.to_tensor(self.boxes_num, 'int32')).numpy() + out = psroi_pool_module(paddle.to_tensor(self.x, 'float64'), + paddle.to_tensor(self.boxes, 'float64'), + paddle.to_tensor(self.boxes_num, + 'int32')).numpy() expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 2, 1.1, 8, 8) self.assertTrue(np.allclose(out, expect_out)) @@ -254,6 +259,7 @@ class TestPSROIPoolDynamicClassAPI(unittest.TestCase): class TestPSROIPoolBoxesNumError(unittest.TestCase): + def setUp(self): paddle.disable_static() self.x = paddle.uniform([2, 490, 28, 28], dtype='float32') @@ -261,22 +267,28 @@ class TestPSROIPoolBoxesNumError(unittest.TestCase): [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], 'float32') def test_errors(self): + def test_boxes_num_nums_error(): boxes_num = paddle.to_tensor([1, 5], 'int32') - out = paddle.vision.ops.psroi_pool( - self.x, self.boxes, boxes_num, output_size=7) + out = paddle.vision.ops.psroi_pool(self.x, + self.boxes, + boxes_num, + output_size=7) self.assertRaises(ValueError, test_boxes_num_nums_error) def test_boxes_num_length_error(): boxes_num = paddle.to_tensor([1, 1, 1], 'int32') - out = paddle.vision.ops.psroi_pool( - self.x, self.boxes, boxes_num, output_size=7) + out = paddle.vision.ops.psroi_pool(self.x, + self.boxes, + boxes_num, + output_size=7) self.assertRaises(ValueError, test_boxes_num_length_error) class TestPSROIPoolChannelError(unittest.TestCase): + def setUp(self): paddle.disable_static() self.x = paddle.uniform([2, 490, 28, 28], dtype='float32') @@ -285,6 +297,7 @@ class TestPSROIPoolChannelError(unittest.TestCase): self.output_size = 4 def test_errors(self): + def test_channel_error(): boxes_num = paddle.to_tensor([2, 1], 'int32') out = paddle.vision.ops.psroi_pool(self.x, self.boxes, boxes_num, @@ -294,15 +307,17 @@ class TestPSROIPoolChannelError(unittest.TestCase): class TestPSROIPoolStaticAPI(unittest.TestCase): + def setUp(self): paddle.enable_static() - self.x_placeholder = paddle.static.data( - name='x', shape=[2, 490, 28, 28]) + self.x_placeholder = paddle.static.data(name='x', + shape=[2, 490, 28, 28]) self.x = np.random.random([2, 490, 28, 28]).astype(np.float32) - self.boxes_placeholder = paddle.static.data( - name='boxes', shape=[3, 4], lod_level=1) - self.boxes = np.array( - [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]]).astype(np.float32) + self.boxes_placeholder = paddle.static.data(name='boxes', + shape=[3, 4], + lod_level=1) + self.boxes = np.array([[1, 5, 8, 10], [4, 2, 6, 7], + [12, 12, 19, 21]]).astype(np.float32) self.boxes_num = np.array([1, 2]).astype(np.int32) def test_function_in_static(self): @@ -317,11 +332,13 @@ class TestPSROIPoolStaticAPI(unittest.TestCase): places.append(paddle.CUDAPlace(0)) for place in places: exe = paddle.static.Executor(place) - boxes_lod_data = paddle.fluid.create_lod_tensor(self.boxes, - [[1, 2]], place) + boxes_lod_data = paddle.fluid.create_lod_tensor( + self.boxes, [[1, 2]], place) out_res = exe.run(paddle.static.default_main_program(), - feed={'x': self.x, - 'boxes': boxes_lod_data}, + feed={ + 'x': self.x, + 'boxes': boxes_lod_data + }, fetch_list=[out.name]) self.assertTrue(np.allclose(out_res, expect_out)) diff --git a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py index 07354f1b7b2..b15edb44d57 100644 --- a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py +++ b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py @@ -34,11 +34,15 @@ class TestPullGpupsSparse(unittest.TestCase): slots = [] with fluid.program_guard(train_program, startup_program): - l = fluid.layers.data( - name='input', shape=[1], dtype="int64", lod_level=1) + l = fluid.layers.data(name='input', + shape=[1], + dtype="int64", + lod_level=1) slots.append(l) - output = _pull_gpups_sparse( - slots, size=[11], is_distributed=True, is_sparse=True) + output = _pull_gpups_sparse(slots, + size=[11], + is_distributed=True, + is_sparse=True) cost = paddle.fluid.layers.mean(output) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(cost, train_program) diff --git a/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py b/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py index 2662cd5250f..bbc383eaf61 100644 --- a/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py +++ b/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py @@ -27,6 +27,7 @@ paddle.enable_static() class TestPutAlongAxisOp(OpTest): + def setUp(self): self.init_data() self.reduce_op = "assign" @@ -68,6 +69,7 @@ class TestPutAlongAxisOp(OpTest): class TestPutAlongAxisAPI(unittest.TestCase): + def setUp(self): np.random.seed(0) self.shape = [1, 3] @@ -111,6 +113,7 @@ class TestPutAlongAxisAPI(unittest.TestCase): run(place) def test_api_dygraph(self): + def run(place): paddle.disable_static(place) x_tensor = paddle.to_tensor(self.x_np) @@ -122,9 +125,8 @@ class TestPutAlongAxisAPI(unittest.TestCase): np.put_along_axis(self.x_np, self.index_np, self.value_np, self.axis)) out_ref = self.x_np - self.assertEqual( - np.allclose( - out.numpy(), out_ref, rtol=1e-03), True) + self.assertEqual(np.allclose(out.numpy(), out_ref, rtol=1e-03), + True) # for ci coverage, numpy put_along_axis did not support argument of 'reduce' paddle.put_along_axis(x_tensor, index_tensor, value_tensor, @@ -138,6 +140,7 @@ class TestPutAlongAxisAPI(unittest.TestCase): run(place) def test_inplace_dygraph(self): + def run(place): paddle.disable_static(place) x_tensor = paddle.to_tensor(self.x_np) @@ -151,9 +154,8 @@ class TestPutAlongAxisAPI(unittest.TestCase): self.axis)) out_ref = self.x_np - self.assertEqual( - np.allclose( - x_tensor.numpy(), out_ref, rtol=1e-03), True) + self.assertEqual(np.allclose(x_tensor.numpy(), out_ref, rtol=1e-03), + True) paddle.enable_static() for place in self.place: @@ -161,6 +163,7 @@ class TestPutAlongAxisAPI(unittest.TestCase): class TestPutAlongAxisAPICase2(TestPutAlongAxisAPI): + def setUp(self): np.random.seed(0) self.shape = [2, 2] @@ -177,12 +180,13 @@ class TestPutAlongAxisAPICase2(TestPutAlongAxisAPI): class TestPutAlongAxisAPICase3(TestPutAlongAxisAPI): + def setUp(self): np.random.seed(0) self.shape = [2, 2] self.index_shape = [4, 2] - self.index_np = np.array( - [[0, 0], [1, 0], [0, 0], [1, 0]]).astype('int64') + self.index_np = np.array([[0, 0], [1, 0], [0, 0], [1, + 0]]).astype('int64') self.x_np = np.random.random(self.shape).astype(np.float32) self.place = [paddle.CPUPlace()] self.axis = 0 diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py index 14b0eec9cbc..f0f791d62a7 100644 --- a/python/paddle/fluid/tests/unittests/test_py_func_op.py +++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py @@ -65,8 +65,8 @@ def cross_entropy_grad(logits, labels, bwd_dout): N = logits.shape[1] dlogits = np.zeros([M, N]).astype(logits.dtype) for idx in six.moves.range(M): - dlogits[idx][labels[idx][0]] = -bwd_dout[idx] / logits[idx][labels[idx][ - 0]] + dlogits[idx][labels[idx] + [0]] = -bwd_dout[idx] / logits[idx][labels[idx][0]] return dlogits, None @@ -76,22 +76,20 @@ def simple_fc_net(img, label, use_py_func_op): hidden = fluid.layers.fc( hidden, size=200, - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) if not use_py_func_op: hidden = fluid.layers.tanh(hidden) else: new_hidden = fluid.default_main_program().current_block( - ).create_var( - name='hidden_{}'.format(idx), - dtype='float32', - shape=hidden.shape) - hidden = fluid.layers.py_func( - func=tanh, - x=hidden, - out=new_hidden, - backward_func=tanh_grad, - skip_vars_in_backward_input=hidden) + ).create_var(name='hidden_{}'.format(idx), + dtype='float32', + shape=hidden.shape) + hidden = fluid.layers.py_func(func=tanh, + x=hidden, + out=new_hidden, + backward_func=tanh_grad, + skip_vars_in_backward_input=hidden) prediction = fluid.layers.fc(hidden, size=10, act='softmax') if not use_py_func_op: @@ -99,17 +97,17 @@ def simple_fc_net(img, label, use_py_func_op): else: loss = fluid.default_main_program().current_block().create_var( name='loss', dtype='float32', shape=[-1, 1]) - loss = fluid.layers.py_func( - func=cross_entropy, - x=[prediction, label], - out=loss, - backward_func=cross_entropy_grad, - skip_vars_in_backward_input=loss) + loss = fluid.layers.py_func(func=cross_entropy, + x=[prediction, label], + out=loss, + backward_func=cross_entropy_grad, + skip_vars_in_backward_input=loss) dummy_var = fluid.default_main_program().current_block().create_var( name='test_tmp_var', dtype='float32', shape=[1]) - fluid.layers.py_func( - func=dummy_func_with_no_input, x=None, out=dummy_var) + fluid.layers.py_func(func=dummy_func_with_no_input, + x=None, + out=dummy_var) loss += dummy_var fluid.layers.py_func(func=dummy_func_with_no_output, x=loss, out=None) @@ -117,17 +115,15 @@ def simple_fc_net(img, label, use_py_func_op): dtype='float32', shape=[-1, 1]) dummy_var_out = fluid.default_main_program().current_block().create_var( dtype='float32', shape=[1]) - fluid.layers.py_func( - func=dummy_func_with_multi_input_output, - x=(loss, dummy_var), - out=(loss_out, dummy_var_out)) + fluid.layers.py_func(func=dummy_func_with_multi_input_output, + x=(loss, dummy_var), + out=(loss_out, dummy_var_out)) assert loss == loss_out and dummy_var == dummy_var_out, \ "py_func failed with multi input and output" - fluid.layers.py_func( - func=dummy_func_with_multi_input_output, - x=[loss, dummy_var], - out=[loss_out, dummy_var_out]) + fluid.layers.py_func(func=dummy_func_with_multi_input_output, + x=[loss, dummy_var], + out=[loss_out, dummy_var_out]) assert loss == loss_out and dummy_var == dummy_var_out, \ "py_func failed with multi input and output" @@ -137,8 +133,9 @@ def simple_fc_net(img, label, use_py_func_op): def reader(): for _ in six.moves.range(dev_cnt * 100): - yield np.random.random([784]), np.random.random_integers( - size=[1], low=0, high=9) + yield np.random.random([784]), np.random.random_integers(size=[1], + low=0, + high=9) def test_main(use_cuda, use_py_func_op, use_parallel_executor): @@ -165,8 +162,8 @@ def test_main(use_cuda, use_py_func_op, use_parallel_executor): train_cp = fluid.default_main_program() if use_parallel_executor: - train_cp = compiler.CompiledProgram(fluid.default_main_program( - )) + train_cp = compiler.CompiledProgram( + fluid.default_main_program()) train_cp = train_cp.with_data_parallel(loss_name=loss.name) fetch_list = [loss.name] else: @@ -183,6 +180,7 @@ def test_main(use_cuda, use_py_func_op, use_parallel_executor): class TestPyFuncOpUseExecutor(unittest.TestCase): + def setUp(self): self.use_parallel_executor = False @@ -201,6 +199,7 @@ class TestPyFuncOpUseExecutor(unittest.TestCase): class TestPyFuncOpUseParallelExecutor(TestPyFuncOpUseExecutor): + def setUp(self): self.use_parallel_executor = True diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py index 624927d809f..d3bc50bffe7 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py @@ -19,18 +19,23 @@ import numpy as np class TestPyReaderCombination(unittest.TestCase): + def setUp(self): self.n1 = 10 self.n2 = 20 self.batch_size = 2 def create_reader(self, batch_num): + def __impl__(): for _ in range(batch_num): - image = np.random.uniform( - low=-1, high=1, size=[batch_num, 784]).astype('float32') - label = np.random.random_integers( - low=0, high=9, size=[batch_num, 1]).astype('int64') + image = np.random.uniform(low=-1, high=1, + size=[batch_num, + 784]).astype('float32') + label = np.random.random_integers(low=0, + high=9, + size=[batch_num, + 1]).astype('int64') yield image, label return __impl__ @@ -52,14 +57,17 @@ class TestPyReaderCombination(unittest.TestCase): def main_impl(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - image = fluid.layers.data( - name='image', dtype='float32', shape=[784]) + image = fluid.layers.data(name='image', + dtype='float32', + shape=[784]) label = fluid.layers.data(name='label', dtype='int64', shape=[1]) - py_reader1 = fluid.io.PyReader( - feed_list=[image, label], capacity=16, iterable=True) - py_reader2 = fluid.io.PyReader( - feed_list=[image, label], capacity=16, iterable=True) + py_reader1 = fluid.io.PyReader(feed_list=[image, label], + capacity=16, + iterable=True) + py_reader2 = fluid.io.PyReader(feed_list=[image, label], + capacity=16, + iterable=True) reader1 = paddle.reader.cache(self.create_reader(self.n1)) reader2 = paddle.reader.cache(self.create_reader(self.n2)) @@ -92,6 +100,7 @@ class TestPyReaderCombination(unittest.TestCase): class TestPyReaderCombination2(TestPyReaderCombination): + def setUp(self): self.n1 = 20 self.n2 = 10 @@ -99,6 +108,7 @@ class TestPyReaderCombination2(TestPyReaderCombination): class TestPyReaderCombination3(TestPyReaderCombination): + def setUp(self): self.n1 = 10 self.n2 = 10 diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py index f4fa419b91d..337cafbb124 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py @@ -19,34 +19,32 @@ import paddle class TestPyReaderErrorMsg(unittest.TestCase): + def test_check_input_array(self): fluid.reader.GeneratorLoader._check_input_array([ - np.random.randint( - 100, size=[2]), np.random.randint( - 100, size=[2]), np.random.randint( - 100, size=[2]) + np.random.randint(100, size=[2]), + np.random.randint(100, size=[2]), + np.random.randint(100, size=[2]) ]) - self.assertRaises( - TypeError, - fluid.reader.GeneratorLoader._check_input_array, [ - np.random.randint( - 100, size=[2]), np.random.randint( - 100, size=[1]), np.random.randint( - 100, size=[3]) - ]) + self.assertRaises(TypeError, + fluid.reader.GeneratorLoader._check_input_array, [ + np.random.randint(100, size=[2]), + np.random.randint(100, size=[1]), + np.random.randint(100, size=[3]) + ]) class TestDoubleBufferAPI(unittest.TestCase): + def test_double_buffer(self): paddle.enable_static() if fluid.core.is_compiled_with_cuda(): - reader = fluid.layers.py_reader( - capacity=64, - shapes=[(-1, 1, 28, 28), (-1, 1)], - dtypes=['float32', 'int64'], - use_double_buffer=False) - reader = fluid.layers.double_buffer( - reader, place=fluid.core.CUDAPlace(0)) + reader = fluid.layers.py_reader(capacity=64, + shapes=[(-1, 1, 28, 28), (-1, 1)], + dtypes=['float32', 'int64'], + use_double_buffer=False) + reader = fluid.layers.double_buffer(reader, + place=fluid.core.CUDAPlace(0)) image, label = fluid.layers.read_file(reader) diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py b/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py index 55dc3a7aa34..4b5e2b9711e 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py @@ -17,6 +17,7 @@ import unittest class TestLoDLevelShare(unittest.TestCase): + def setUp(self): self.use_double_buffer = False @@ -35,6 +36,7 @@ class TestLoDLevelShare(unittest.TestCase): class TestLoDLevelShare2(TestLoDLevelShare): + def setUp(self): self.use_double_buffer = True diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py b/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py index 3aa359c0e0d..f03782cf665 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py @@ -23,6 +23,7 @@ from threading import Thread def user_reader(inputs): + def _reader(): for d in inputs: yield d @@ -31,6 +32,7 @@ def user_reader(inputs): def batch_feeder(batch_reader, pin_memory=False, img_dtype="float32"): + def _feeder(): for batch_data in batch_reader(): sample_batch = [] @@ -49,6 +51,7 @@ def batch_feeder(batch_reader, pin_memory=False, img_dtype="float32"): class TestPyReader(unittest.TestCase): + def setUp(self): self.capacity = 10 self.shapes = [(-1, 3, 2, 1), (-1, 1)] @@ -57,29 +60,27 @@ class TestPyReader(unittest.TestCase): def test_pin_memory_pyreader(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() executor = fluid.Executor(place) - data_file = fluid.layers.py_reader( - capacity=self.capacity, - dtypes=self.dtypes, - lod_levels=self.lod_levels, - shapes=self.shapes) + data_file = fluid.layers.py_reader(capacity=self.capacity, + dtypes=self.dtypes, + lod_levels=self.lod_levels, + shapes=self.shapes) # feed_queue = data_file.queue read_out_data = fluid.layers.read_file(data_file) self.inputs = [] for _ in range(10): - sample = np.random.uniform( - low=0, high=1, size=[3, 2, 1]).astype("float32") + sample = np.random.uniform(low=0, high=1, + size=[3, 2, 1]).astype("float32") label = np.random.randint(low=0, high=10, dtype="int64") self.inputs.append((sample, label)) self.input_tensors = [] for d, l in batch_feeder( - paddle.batch( - user_reader(self.inputs), batch_size=2), + paddle.batch(user_reader(self.inputs), batch_size=2), pin_memory=True if fluid.core.is_compiled_with_cuda() else False)(): ta = fluid.LoDTensorArray() @@ -97,11 +98,10 @@ class TestPyReader(unittest.TestCase): self.batched_inputs.append([feed_d, feed_l]) data_file.decorate_tensor_provider( - batch_feeder( - paddle.batch( - user_reader(self.inputs), batch_size=2), - pin_memory=True - if fluid.core.is_compiled_with_cuda() else False)) + batch_feeder(paddle.batch(user_reader(self.inputs), + batch_size=2), + pin_memory=True + if fluid.core.is_compiled_with_cuda() else False)) executor.run(fluid.default_startup_program()) self.outputs = [] diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py index 3efe5aac884..f0757d2885d 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py @@ -26,6 +26,7 @@ def feed_data(feed_queue, inputs): class TestPyReader(unittest.TestCase): + def setUp(self): self.capacity = 10 self.batch_size_min = 10 @@ -43,15 +44,14 @@ class TestPyReader(unittest.TestCase): def main(self, use_thread=False): with fluid.program_guard(fluid.Program(), fluid.Program()): - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() executor = fluid.Executor(place) - data_file = fluid.layers.py_reader( - capacity=self.capacity, - dtypes=self.dtypes, - lod_levels=self.lod_levels, - shapes=self.shapes) + data_file = fluid.layers.py_reader(capacity=self.capacity, + dtypes=self.dtypes, + lod_levels=self.lod_levels, + shapes=self.shapes) feed_queue = data_file.queue read_out_data = fluid.layers.read_file(data_file) self.inputs = [] @@ -61,9 +61,10 @@ class TestPyReader(unittest.TestCase): batch_size = np.random.random_integers(self.batch_size_min, self.batch_size_max) for shape, dtype in zip(self.shapes, self.dtypes): - next_data = np.random.uniform( - low=0, high=1000, - size=(batch_size, ) + shape[1:]).astype(dtype) + next_data = np.random.uniform(low=0, + high=1000, + size=(batch_size, ) + + shape[1:]).astype(dtype) in_data.append( fluid.executor._as_lodtensor(next_data, place)) @@ -72,8 +73,8 @@ class TestPyReader(unittest.TestCase): executor.run(fluid.default_startup_program()) self.outputs = [] if use_thread: - thread = Thread( - target=feed_data, args=(feed_queue, self.inputs)) + thread = Thread(target=feed_data, + args=(feed_queue, self.inputs)) thread.start() for in_data in self.inputs: self.outputs.append( diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py b/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py index c6e18565078..c6e951997ea 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py @@ -19,34 +19,38 @@ import numpy as np class TestPyReader(unittest.TestCase): + def setUp(self): self.batch_size = 32 self.epoch_num = 2 self.sample_num = 10 def test_returnlist(self): + def reader_creator_random_image(height, width): + def reader(): for i in range(self.sample_num): - yield np.random.uniform( - low=0, high=255, size=[height, width]), + yield np.random.uniform(low=0, + high=255, + size=[height, width]), return reader for return_list in [True, False]: with fluid.program_guard(fluid.Program(), fluid.Program()): - image = fluid.layers.data( - name='image', shape=[784, 784], dtype='float32') - reader = fluid.io.PyReader( - feed_list=[image], - capacity=4, - iterable=True, - return_list=return_list) + image = fluid.layers.data(name='image', + shape=[784, 784], + dtype='float32') + reader = fluid.io.PyReader(feed_list=[image], + capacity=4, + iterable=True, + return_list=return_list) user_defined_reader = reader_creator_random_image(784, 784) reader.decorate_sample_list_generator( - paddle.batch( - user_defined_reader, batch_size=self.batch_size), + paddle.batch(user_defined_reader, + batch_size=self.batch_size), fluid.core.CPUPlace()) # definition of network is omitted executor = fluid.Executor(fluid.core.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py b/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py index 4efca5e2aaf..7f0cf633ed2 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py @@ -23,6 +23,7 @@ os.environ['CPU_NUM'] = '1' def random_reader(sample_num): + def __impl__(): for _ in range(sample_num): yield np.random.random( @@ -33,6 +34,7 @@ def random_reader(sample_num): class TestCaseBase(unittest.TestCase): + def setUp(self): self.batch_size = 32 self.epoch_num = 2 @@ -52,21 +54,22 @@ class TestCaseBase(unittest.TestCase): def run_main(self, reader, use_sample_generator, iterable, drop_last): image = fluid.layers.data(name='image', dtype='float32', shape=[784]) label = fluid.layers.data(name='label', dtype='int64', shape=[1]) - py_reader = fluid.io.PyReader( - feed_list=[image, label], - capacity=16, - iterable=iterable, - use_double_buffer=False) + py_reader = fluid.io.PyReader(feed_list=[image, label], + capacity=16, + iterable=iterable, + use_double_buffer=False) batch_reader = paddle.batch(reader, self.batch_size, drop_last) all_datas = self.generate_all_data(batch_reader) if not use_sample_generator: - py_reader.decorate_sample_list_generator( - batch_reader, places=fluid.cpu_places()) + py_reader.decorate_sample_list_generator(batch_reader, + places=fluid.cpu_places()) else: - py_reader.decorate_sample_generator( - reader, self.batch_size, drop_last, places=fluid.cpu_places()) + py_reader.decorate_sample_generator(reader, + self.batch_size, + drop_last, + places=fluid.cpu_places()) if drop_last: batch_num = int(self.sample_num / self.batch_size) @@ -113,6 +116,7 @@ class TestCaseBase(unittest.TestCase): class TestCase1(TestCaseBase): + def setUp(self): self.batch_size = 32 self.epoch_num = 10 @@ -120,6 +124,7 @@ class TestCase1(TestCaseBase): class TestCase2(TestCaseBase): + def setUp(self): self.batch_size = 32 self.epoch_num = 2 @@ -127,6 +132,7 @@ class TestCase2(TestCaseBase): class TestCase3(TestCaseBase): + def setUp(self): self.batch_size = 32 self.epoch_num = 2 diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py index b5684de4b90..4be5a4ae948 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py @@ -24,6 +24,7 @@ import numpy as np import threading import multiprocessing import os + os.environ['CPU_NUM'] = str(4) @@ -114,13 +115,12 @@ def simple_fc_net(in_size, hidden, size=hidden_size, act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( + value=1.0))) predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax') loss = fluid.layers.mean( - fluid.layers.cross_entropy( - input=predict_label, label=label)) + fluid.layers.cross_entropy(input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) @@ -128,6 +128,7 @@ def simple_fc_net(in_size, class TestPyReaderUsingExecutor(unittest.TestCase): + def setUp(self): self.in_size = 1000 self.hidden_sizes = [50, 30, 20] @@ -145,10 +146,14 @@ class TestPyReaderUsingExecutor(unittest.TestCase): for use_decorate_paddle_reader in [False, True]: print('Test Parameters:'), print({ - 'use_cuda': use_cuda, - 'use_parallel_executor': use_parallel_executor, - 'use_double_buffer': use_double_buffer, - 'use_feed_list': use_feed_list, + 'use_cuda': + use_cuda, + 'use_parallel_executor': + use_parallel_executor, + 'use_double_buffer': + use_double_buffer, + 'use_feed_list': + use_feed_list, 'use_decorate_paddle_reader': use_decorate_paddle_reader }) @@ -157,13 +162,15 @@ class TestPyReaderUsingExecutor(unittest.TestCase): use_decorate_paddle_reader) def tensor_reader(self, use_decorate_paddle_reader): + def reader(): for sample_id in range(self.batch_size * self.iterations * self.batch_size_times): in_data = np.random.uniform( low=0, high=1, size=(self.in_size, )).astype('float32') - label = np.random.random_integers( - low=0, high=self.class_num - 1, size=(1, )).astype('int64') + label = np.random.random_integers(low=0, + high=self.class_num - 1, + size=(1, )).astype('int64') reshaped_in_data = np.reshape(in_data, [1, -1]) reshaped_label = np.reshape(label, [1, -1]) @@ -239,8 +246,8 @@ class TestPyReaderUsingExecutor(unittest.TestCase): py_reader.decorate_sample_list_generator(batch_reader) py_reader.start() else: - thread = threading.Thread( - target=feed_data, args=(feed_queue, batch_reader)) + thread = threading.Thread(target=feed_data, + args=(feed_queue, batch_reader)) thread.daemon = True thread.start() diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index aadfb4d3944..d55e427f286 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,13 +23,17 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class FakeTensor(paddle.fluid.core.VarBase): + def __init__(self): pass class TestPyLayer(unittest.TestCase): + def func_test_simple_pylayer_multiple_output(self): + class tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, x2, func1, func2=paddle.square): ctx.func = func2 @@ -65,7 +69,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_simple_pylayer_multiple_output() def func_test_simple_pylayer_return_none_with_no_grad(self): + class tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, x2, func1, func2=paddle.square): ctx.func = func2 @@ -105,7 +111,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_simple_pylayer_return_none_with_no_grad() def func_test_simple_pylayer_single_output(self): + class tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, func1, func2=paddle.square): ctx.func = func2 @@ -137,12 +145,15 @@ class TestPyLayer(unittest.TestCase): self.func_test_simple_pylayer_single_output() def func_test_pylayer_num_output_match(self): + class tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward( - ctx, - x1, - x2, ): + ctx, + x1, + x2, + ): return x1 + x2 @staticmethod @@ -163,7 +174,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_num_output_match() def func_test_pylayer_dtype(self): + class tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x, dtype): y = paddle.cast(x, dtype) @@ -192,7 +205,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_dtype() def func_test_pylayer_Exception_forward(self): + class Layer_None1(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, *args): return None @@ -206,6 +221,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_None1.apply(input1) class Layer_None2(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, *args): return [None, args[0]] @@ -219,6 +235,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_None2.apply(input1) class Layer_one1(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, *args): return 1 @@ -233,6 +250,7 @@ class TestPyLayer(unittest.TestCase): z = Layer_one1.apply(input1) class Layer_one2(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, *args): return [1, 2, args[0]] @@ -242,10 +260,11 @@ class TestPyLayer(unittest.TestCase): return args input1 = paddle.randn([2, 3]).astype("float64") - # return int + # return int z = Layer_one2.apply(input1) class Layer_no_fw(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def backward(ctx, *args): return args @@ -260,7 +279,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_Exception_forward() def func_test_pylayer_nograd(self): + class tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, func1, func2=paddle.square, xx=None): ctx.func = func2 @@ -283,7 +304,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_nograd() def func_test_pylayer_Exception_bk(self): + class Layer_bk_none1(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): return x * 2 @@ -300,6 +323,7 @@ class TestPyLayer(unittest.TestCase): z.sum().backward() class Layer_bk_none2(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, x2): return x1 + x2 @@ -316,6 +340,7 @@ class TestPyLayer(unittest.TestCase): z.mean().backward() class Layer_bk_one1(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): return x + x @@ -332,6 +357,7 @@ class TestPyLayer(unittest.TestCase): z.mean().backward() class Layer_bk_one2(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, x2): return x1 * 2, x2 * 5 @@ -349,6 +375,7 @@ class TestPyLayer(unittest.TestCase): z.mean().backward() class Layer_no_bk(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): return x * 2, x * 5 @@ -362,6 +389,7 @@ class TestPyLayer(unittest.TestCase): z.mean().backward() class Layer_bk_match(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): return x * 2, x * 5 @@ -383,7 +411,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_Exception_bk() def func_test_pylayer_bk_return_none(self): + class Layer_bk_none1(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, x2): return x1 + x2 @@ -402,6 +432,7 @@ class TestPyLayer(unittest.TestCase): z.mean().backward() class Layer_bk_none2(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1, x2): return x1 * 2, x2 * 5 @@ -425,7 +456,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_bk_return_none() def func_test_pylayer_inplace(self): + class cus_tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): return x @@ -435,6 +468,7 @@ class TestPyLayer(unittest.TestCase): return dy class Layer(paddle.nn.Layer): + def __init__(self): super(Layer, self).__init__() @@ -461,6 +495,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class cus_tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): ctx.mark_dirty(x) @@ -471,6 +506,7 @@ class TestPyLayer(unittest.TestCase): return dy class Layer(paddle.nn.Layer): + def __init__(self): super(Layer, self).__init__() @@ -495,6 +531,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class cus_tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): ctx.mark_dirty(x) @@ -505,6 +542,7 @@ class TestPyLayer(unittest.TestCase): return dy class Layer(paddle.nn.Layer): + def __init__(self): super(Layer, self).__init__() @@ -527,6 +565,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class cus_tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): ctx.mark_dirty(x) @@ -537,6 +576,7 @@ class TestPyLayer(unittest.TestCase): return dy class Layer(paddle.nn.Layer): + def __init__(self): super(Layer, self).__init__() @@ -556,7 +596,9 @@ class TestPyLayer(unittest.TestCase): self.assertTrue(data.grad is not None) def func_test_pylayer_inplace_and_leaf_exception(self): + class cus_pylayer_op(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): if in_dygraph_mode(): @@ -568,6 +610,7 @@ class TestPyLayer(unittest.TestCase): return dy class Layer(paddle.nn.Layer): + def __init__(self): super(Layer, self).__init__() @@ -589,7 +632,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_pylayer_inplace_and_leaf_exception() def func_test_backward_in_backward(self): + class cus_tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x): temp = x.detach() @@ -619,7 +664,9 @@ class TestPyLayer(unittest.TestCase): self.func_test_backward_in_backward() def func_test_return_to_tensor(self): + class Tanh(EagerPyLayer if in_dygraph_mode() else PyLayer): + @staticmethod def forward(ctx, x1): y1 = paddle.tanh(x1) @@ -649,6 +696,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class Tanh(EagerPyLayer): + @staticmethod def forward(ctx, x): return x, x + x @@ -666,6 +714,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class Tanh(EagerPyLayer): + @staticmethod def forward(ctx, x): ctx.set_materialize_grads(False) @@ -684,6 +733,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class Tanh(EagerPyLayer): + @staticmethod def forward(ctx, x): a = x + x @@ -704,6 +754,7 @@ class TestPyLayer(unittest.TestCase): with _test_eager_guard(): class Tanh(EagerPyLayer): + @staticmethod def forward(ctx, x): a = x + x @@ -725,8 +776,11 @@ class TestPyLayer(unittest.TestCase): class TestPyLayerReturnType(unittest.TestCase): + def test_forward_args_fake_tensor(self): + class Tanh(PyLayer): + @staticmethod def forward(ctx, x1): y1 = FakeTensor() @@ -742,7 +796,9 @@ class TestPyLayerReturnType(unittest.TestCase): y1, y2 = Tanh.apply(input1) def test_forward_kwargs_fake_tensor(self): + class Tanh(PyLayer): + @staticmethod def forward(ctx, x1): @@ -758,7 +814,9 @@ class TestPyLayerReturnType(unittest.TestCase): y = Tanh.apply(x1=input1) def test_forward_return_fake_tensor(self): + class Tanh(PyLayer): + @staticmethod def forward(ctx, x1): @@ -774,7 +832,9 @@ class TestPyLayerReturnType(unittest.TestCase): y = Tanh.apply(x1=input1) def test_forward_return_fake_tensor_tuple(self): + class Tanh(PyLayer): + @staticmethod def forward(ctx, x1): @@ -790,7 +850,9 @@ class TestPyLayerReturnType(unittest.TestCase): y = Tanh.apply(x1=input1) def test_backward_return_fake_tensor_tuple(self): + class Tanh(PyLayer): + @staticmethod def forward(ctx, x1, x2): return x1 + 1, x1 + 2 @@ -808,7 +870,9 @@ class TestPyLayerReturnType(unittest.TestCase): y.mean().backward() def test_backward_return_fake_tensor(self): + class Tanh(PyLayer): + @staticmethod def forward(ctx, x1): return x1 + 1, x1 + 2 diff --git a/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py b/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py index 9ffea2c565c..6f3f94253c8 100644 --- a/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py +++ b/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py @@ -18,6 +18,7 @@ import paddle.fluid as fluid class TestPyramidHashOpApi(unittest.TestCase): + def test_api(self): num_voc = 128 embed_dim = 64 @@ -38,13 +39,16 @@ class TestPyramidHashOpApi(unittest.TestCase): lr=0.002, param_attr=fluid.ParamAttr( name="PyramidHash_emb_0", - learning_rate=0, ), + learning_rate=0, + ), param_attr_wl=fluid.ParamAttr( name="Filter", - learning_rate=0, ), + learning_rate=0, + ), param_attr_bl=None, distribute_update_vars=["PyramidHash_emb_0"], - name=None, ) + name=None, + ) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( diff --git a/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py b/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py index a58d7d35807..ac5ca3d9a1b 100644 --- a/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py +++ b/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ import unittest class TestBF16DataType(unittest.TestCase): + def test_matmul(self): a_bf16 = np.random.random((6, 7)).astype(bfloat16) b_bf16 = np.random.random((7, 8)).astype(bfloat16) diff --git a/python/paddle/fluid/tests/unittests/test_qr_op.py b/python/paddle/fluid/tests/unittests/test_qr_op.py index ecf65d16d34..338b08d1aa5 100644 --- a/python/paddle/fluid/tests/unittests/test_qr_op.py +++ b/python/paddle/fluid/tests/unittests/test_qr_op.py @@ -25,6 +25,7 @@ from op_test import OpTest class TestQrOp(OpTest): + def setUp(self): paddle.enable_static() np.random.seed(7) @@ -74,31 +75,37 @@ class TestQrOp(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad( - ['X'], ['Q', 'R'], numeric_grad_delta=1e-5, max_relative_error=1e-6) + self.check_grad(['X'], ['Q', 'R'], + numeric_grad_delta=1e-5, + max_relative_error=1e-6) class TestQrOpCase1(TestQrOp): + def get_shape(self): return (10, 12) class TestQrOpCase2(TestQrOp): + def get_shape(self): return (16, 15) class TestQrOpCase3(TestQrOp): + def get_shape(self): return (2, 12, 16) class TestQrOpCase4(TestQrOp): + def get_shape(self): return (3, 16, 15) class TestQrOpCase5(TestQrOp): + def get_mode(self): return "complete" @@ -107,6 +114,7 @@ class TestQrOpCase5(TestQrOp): class TestQrOpCase6(TestQrOp): + def get_mode(self): return "complete" @@ -115,6 +123,7 @@ class TestQrOpCase6(TestQrOp): class TestQrAPI(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() np.random.seed(7) @@ -176,8 +185,8 @@ class TestQrAPI(unittest.TestCase): ] modes = ["reduced", "complete", "r"] dtypes = ["float32", "float64"] - for tensor_shape, mode, dtype in itertools.product(tensor_shapes, modes, - dtypes): + for tensor_shape, mode, dtype in itertools.product( + tensor_shapes, modes, dtypes): run_qr_dygraph(tensor_shape, mode, dtype) def test_static(self): @@ -219,29 +228,27 @@ class TestQrAPI(unittest.TestCase): tmp_q, tmp_r = np.linalg.qr(a[coord], mode=mode) np_q[coord] = tmp_q np_r[coord] = tmp_r - x = paddle.fluid.data( - name="input", shape=shape, dtype=dtype) + x = paddle.fluid.data(name="input", + shape=shape, + dtype=dtype) if mode == "r": r = paddle.linalg.qr(x, mode=mode) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": a}, fetch_list=[r]) - self.assertTrue( - np.allclose( - fetches[0], np_r, atol=1e-5)) + self.assertTrue(np.allclose(fetches[0], np_r, + atol=1e-5)) else: q, r = paddle.linalg.qr(x, mode=mode) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": a}, fetch_list=[q, r]) - self.assertTrue( - np.allclose( - fetches[0], np_q, atol=1e-5)) - self.assertTrue( - np.allclose( - fetches[1], np_r, atol=1e-5)) + self.assertTrue(np.allclose(fetches[0], np_q, + atol=1e-5)) + self.assertTrue(np.allclose(fetches[1], np_r, + atol=1e-5)) tensor_shapes = [ (3, 5), @@ -256,8 +263,8 @@ class TestQrAPI(unittest.TestCase): ] modes = ["reduced", "complete", "r"] dtypes = ["float32", "float64"] - for tensor_shape, mode, dtype in itertools.product(tensor_shapes, modes, - dtypes): + for tensor_shape, mode, dtype in itertools.product( + tensor_shapes, modes, dtypes): run_qr_static(tensor_shape, mode, dtype) diff --git a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py index f0368cd2bc3..3831abd1673 100644 --- a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py +++ b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py @@ -135,10 +135,14 @@ class TestMuitlpleQ(unittest.TestCase): def test_quantile_multiple_axis_keepdim(self): x = paddle.to_tensor(self.input_data) - paddle_res = paddle.quantile( - x, q=[0.1, 0.2, 0.3], axis=[1, 2], keepdim=True) - np_res = np.quantile( - self.input_data, q=[0.1, 0.2, 0.3], axis=[1, 2], keepdims=True) + paddle_res = paddle.quantile(x, + q=[0.1, 0.2, 0.3], + axis=[1, 2], + keepdim=True) + np_res = np.quantile(self.input_data, + q=[0.1, 0.2, 0.3], + axis=[1, 2], + keepdims=True) self.assertTrue(np.allclose(paddle_res.numpy(), np_res)) @@ -239,12 +243,12 @@ class TestQuantileRuntime(unittest.TestCase): paddle.enable_static() for (func, res_func) in API_list: for device in self.devices: - x = paddle.static.data( - name="x", shape=self.input_data.shape, dtype=paddle.float32) - x_fp64 = paddle.static.data( - name="x_fp64", - shape=self.input_data.shape, - dtype=paddle.float64) + x = paddle.static.data(name="x", + shape=self.input_data.shape, + dtype=paddle.float32) + x_fp64 = paddle.static.data(name="x_fp64", + shape=self.input_data.shape, + dtype=paddle.float64) results = func(x, q=0.5, axis=1) np_input_data = self.input_data.astype('float32') @@ -254,14 +258,16 @@ class TestQuantileRuntime(unittest.TestCase): exe = paddle.static.Executor(device) paddle_res, paddle_res_fp64 = exe.run( paddle.static.default_main_program(), - feed={"x": np_input_data, - "x_fp64": np_input_data_fp64}, + feed={ + "x": np_input_data, + "x_fp64": np_input_data_fp64 + }, fetch_list=[results, results_fp64]) np_res = res_func(np_input_data, q=0.5, axis=1) np_res_fp64 = res_func(np_input_data_fp64, q=0.5, axis=1) self.assertTrue( - np.allclose(paddle_res, np_res) and - np.allclose(paddle_res_fp64, np_res_fp64)) + np.allclose(paddle_res, np_res) + and np.allclose(paddle_res_fp64, np_res_fp64)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_query_op.py b/python/paddle/fluid/tests/unittests/test_query_op.py index fc8ce5ad5f6..ced08a0fc53 100644 --- a/python/paddle/fluid/tests/unittests/test_query_op.py +++ b/python/paddle/fluid/tests/unittests/test_query_op.py @@ -20,6 +20,7 @@ from paddle.fluid import core class TestCudnnVersion(unittest.TestCase): + def test_no_cudnn(self): cudnn_version = paddle.get_cudnn_version() if not core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_queue.py b/python/paddle/fluid/tests/unittests/test_queue.py index cfb843d75eb..a4b01870d32 100644 --- a/python/paddle/fluid/tests/unittests/test_queue.py +++ b/python/paddle/fluid/tests/unittests/test_queue.py @@ -24,6 +24,7 @@ import paddle.fluid.core as core class TestQueue(unittest.TestCase): + def test_eq(self): """ test queue_generator op, enqueue op and dequeue op. @@ -33,36 +34,33 @@ class TestQueue(unittest.TestCase): startup_program = fluid.Program() value = np.random.rand(1) with fluid.program_guard(main_program, startup_program): - data_in = layers.create_global_var( - shape=[2, 3], - value=value, - dtype="float32", - persistable=True, - name='var_in') - data_out = layers.create_global_var( - shape=[2, 3], - value=value - 1.0, - dtype="float32", - persistable=True, - name='var_out') + data_in = layers.create_global_var(shape=[2, 3], + value=value, + dtype="float32", + persistable=True, + name='var_in') + data_out = layers.create_global_var(shape=[2, 3], + value=value - 1.0, + dtype="float32", + persistable=True, + name='var_out') startup_block = startup_program.block(0) queue_name = 'blocking_queue' - startup_block.create_var( - name=queue_name, persistable=True, type=core.VarDesc.VarType.RAW) - startup_block.append_op( - type="queue_generator", attrs={'names': [queue_name]}) + startup_block.create_var(name=queue_name, + persistable=True, + type=core.VarDesc.VarType.RAW) + startup_block.append_op(type="queue_generator", + attrs={'names': [queue_name]}) block = main_program.block(0) - block.append_op( - type='enqueue', - inputs={'X': data_in}, - attrs={'queue_name': queue_name}) - block.append_op( - type='dequeue', - outputs={'Out': [data_out]}, - attrs={'queue_name': queue_name}) + block.append_op(type='enqueue', + inputs={'X': data_in}, + attrs={'queue_name': queue_name}) + block.append_op(type='dequeue', + outputs={'Out': [data_out]}, + attrs={'queue_name': queue_name}) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) ret = exe.run(main_program, fetch_list=[data_out.name]) diff --git a/python/paddle/fluid/tests/unittests/test_rad2deg.py b/python/paddle/fluid/tests/unittests/test_rad2deg.py index 9f117cbab9a..0299884a8bb 100644 --- a/python/paddle/fluid/tests/unittests/test_rad2deg.py +++ b/python/paddle/fluid/tests/unittests/test_rad2deg.py @@ -26,10 +26,11 @@ paddle.enable_static() class TestRad2degAPI(unittest.TestCase): + def setUp(self): self.x_dtype = 'float64' - self.x_np = np.array( - [3.142, -3.142, 6.283, -6.283, 1.570, -1.570]).astype(np.float64) + self.x_np = np.array([3.142, -3.142, 6.283, -6.283, 1.570, + -1.570]).astype(np.float64) self.x_shape = [6] self.out_np = np.rad2deg(self.x_np) @@ -40,8 +41,8 @@ class TestRad2degAPI(unittest.TestCase): x = fluid.data(name='input', dtype=self.x_dtype, shape=self.x_shape) out = paddle.rad2deg(x) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(fluid.default_main_program(), feed={'input': self.x_np}, @@ -58,6 +59,7 @@ class TestRad2degAPI(unittest.TestCase): class TestRad2degAPI2(TestRad2degAPI): + def setUp(self): self.x_np = np.pi / 2 self.x_shape = [1] diff --git a/python/paddle/fluid/tests/unittests/test_rand_op.py b/python/paddle/fluid/tests/unittests/test_rand_op.py index 4b8fe8c7e47..d8b4de6036e 100644 --- a/python/paddle/fluid/tests/unittests/test_rand_op.py +++ b/python/paddle/fluid/tests/unittests/test_rand_op.py @@ -35,8 +35,8 @@ class TestRandOpError(unittest.TestCase): with program_guard(main_prog, start_prog): def test_Variable(): - x1 = fluid.create_lod_tensor( - np.zeros((4, 784)), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.zeros((4, 784)), [[1, 1, 1, 1]], + fluid.CPUPlace()) rand(x1) self.assertRaises(TypeError, test_Variable) @@ -71,8 +71,9 @@ class TestRandOp(unittest.TestCase): var_shape = fluid.data(name='var_shape', shape=[2], dtype="int64") result_3 = rand(var_shape) - var_shape_int32 = fluid.data( - name='var_shape_int32', shape=[2], dtype="int32") + var_shape_int32 = fluid.data(name='var_shape_int32', + shape=[2], + dtype="int32") result_4 = rand(var_shape_int32) exe.run(startup_program) @@ -81,8 +82,10 @@ class TestRandOp(unittest.TestCase): x2 = np.array([4, 3]).astype('int32') ret = exe.run( train_program, - feed={"var_shape": x1, - "var_shape_int32": x2}, + feed={ + "var_shape": x1, + "var_shape_int32": x2 + }, fetch_list=[result_1, result_1, result_2, result_3, result_4]) def test_run(self): @@ -117,6 +120,7 @@ class TestRandOpForDygraph(unittest.TestCase): class TestRandDtype(unittest.TestCase): + def test_default_dtype(self): paddle.disable_static() diff --git a/python/paddle/fluid/tests/unittests/test_randint_like.py b/python/paddle/fluid/tests/unittests/test_randint_like.py index c716fd54924..181a7f9763e 100644 --- a/python/paddle/fluid/tests/unittests/test_randint_like.py +++ b/python/paddle/fluid/tests/unittests/test_randint_like.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from paddle.static import program_guard, Program # Test python API class TestRandintLikeAPI(unittest.TestCase): + def setUp(self): self.x_bool = np.zeros((10, 12)).astype("bool") self.x_int32 = np.zeros((10, 12)).astype("int32") @@ -38,25 +39,30 @@ class TestRandintLikeAPI(unittest.TestCase): paddle.enable_static() with program_guard(Program(), Program()): # results are from [-100, 100). - x_bool = paddle.fluid.data( - name="x_bool", shape=[10, 12], dtype="bool") - x_int32 = paddle.fluid.data( - name="x_int32", shape=[10, 12], dtype="int32") - x_int64 = paddle.fluid.data( - name="x_int64", shape=[10, 12], dtype="int64") - x_float16 = paddle.fluid.data( - name="x_float16", shape=[10, 12], dtype="float16") - x_float32 = paddle.fluid.data( - name="x_float32", shape=[10, 12], dtype="float32") - x_float64 = paddle.fluid.data( - name="x_float64", shape=[10, 12], dtype="float64") + x_bool = paddle.fluid.data(name="x_bool", + shape=[10, 12], + dtype="bool") + x_int32 = paddle.fluid.data(name="x_int32", + shape=[10, 12], + dtype="int32") + x_int64 = paddle.fluid.data(name="x_int64", + shape=[10, 12], + dtype="int64") + x_float16 = paddle.fluid.data(name="x_float16", + shape=[10, 12], + dtype="float16") + x_float32 = paddle.fluid.data(name="x_float32", + shape=[10, 12], + dtype="float32") + x_float64 = paddle.fluid.data(name="x_float64", + shape=[10, 12], + dtype="float64") exe = paddle.static.Executor(self.place) # x dtype is bool output dtype in ["bool", "int32", "int64", "float16", "float32", "float64"] outlist1 = [ - paddle.randint_like( - x_bool, low=-10, high=10, dtype=dtype) + paddle.randint_like(x_bool, low=-10, high=10, dtype=dtype) for dtype in self.dtype ] outs1 = exe.run(feed={'x_bool': self.x_bool}, fetch_list=outlist1) @@ -66,8 +72,7 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is int32 output dtype in ["bool", "int32", "int64", "float16", "float32", "float64"] outlist2 = [ - paddle.randint_like( - x_int32, low=-5, high=10, dtype=dtype) + paddle.randint_like(x_int32, low=-5, high=10, dtype=dtype) for dtype in self.dtype ] outs2 = exe.run(feed={'x_int32': self.x_int32}, fetch_list=outlist2) @@ -77,8 +82,7 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is int64 output dtype in ["bool", "int32", "int64", "float16", "float32", "float64"] outlist3 = [ - paddle.randint_like( - x_int64, low=-100, high=100, dtype=dtype) + paddle.randint_like(x_int64, low=-100, high=100, dtype=dtype) for dtype in self.dtype ] outs3 = exe.run(feed={'x_int64': self.x_int64}, fetch_list=outlist3) @@ -88,8 +92,7 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is float16 output dtype in ["bool", "int32", "int64", "float16", "float32", "float64"] outlist4 = [ - paddle.randint_like( - x_float16, low=-3, high=25, dtype=dtype) + paddle.randint_like(x_float16, low=-3, high=25, dtype=dtype) for dtype in self.dtype ] outs4 = exe.run(feed={'x_float16': self.x_float16}, @@ -100,8 +103,7 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is float32 output dtype in ["bool", "int32", "int64", "float16", "float32", "float64"] outlist5 = [ - paddle.randint_like( - x_float32, low=-25, high=25, dtype=dtype) + paddle.randint_like(x_float32, low=-25, high=25, dtype=dtype) for dtype in self.dtype ] outs5 = exe.run(feed={'x_float32': self.x_float32}, @@ -112,8 +114,7 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is float64 output dtype in ["bool", "int32", "int64", "float16", "float32", "float64"] outlist6 = [ - paddle.randint_like( - x_float64, low=-16, high=16, dtype=dtype) + paddle.randint_like(x_float64, low=-16, high=16, dtype=dtype) for dtype in self.dtype ] outs6 = exe.run(feed={'x_float64': self.x_float64}, @@ -132,34 +133,45 @@ class TestRandintLikeAPI(unittest.TestCase): x_inputs = paddle.to_tensor(x) # self.dtype ["bool", "int32", "int64", "float16", "float32", "float64"] for dtype in self.dtype: - out = paddle.randint_like( - x_inputs, low=-100, high=100, dtype=dtype) + out = paddle.randint_like(x_inputs, + low=-100, + high=100, + dtype=dtype) self.assertTrue(out.numpy().dtype, np.dtype(dtype)) - self.assertTrue(((out.numpy() >= -100) & - (out.numpy() <= 100)).all(), True) + self.assertTrue( + ((out.numpy() >= -100) & (out.numpy() <= 100)).all(), True) paddle.enable_static() def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): - x_bool = paddle.fluid.data( - name="x_bool", shape=[10, 12], dtype="bool") - x_int32 = paddle.fluid.data( - name="x_int32", shape=[10, 12], dtype="int32") - x_int64 = paddle.fluid.data( - name="x_int64", shape=[10, 12], dtype="int64") - x_float16 = paddle.fluid.data( - name="x_float16", shape=[10, 12], dtype="float16") - x_float32 = paddle.fluid.data( - name="x_float32", shape=[10, 12], dtype="float32") - x_float64 = paddle.fluid.data( - name="x_float64", shape=[10, 12], dtype="float64") + x_bool = paddle.fluid.data(name="x_bool", + shape=[10, 12], + dtype="bool") + x_int32 = paddle.fluid.data(name="x_int32", + shape=[10, 12], + dtype="int32") + x_int64 = paddle.fluid.data(name="x_int64", + shape=[10, 12], + dtype="int64") + x_float16 = paddle.fluid.data(name="x_float16", + shape=[10, 12], + dtype="float16") + x_float32 = paddle.fluid.data(name="x_float32", + shape=[10, 12], + dtype="float32") + x_float64 = paddle.fluid.data(name="x_float64", + shape=[10, 12], + dtype="float64") # x dtype is bool # low is 5 and high is 5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_bool, low=5, high=5) + self.assertRaises(ValueError, + paddle.randint_like, + x_bool, + low=5, + high=5) # low(default value) is 0 and high is -5, low must less then high self.assertRaises(ValueError, paddle.randint_like, x_bool, high=-5) # if high is None, low must be greater than 0 @@ -167,8 +179,11 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is int32 # low is 5 and high is 5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_int32, low=5, high=5) + self.assertRaises(ValueError, + paddle.randint_like, + x_int32, + low=5, + high=5) # low(default value) is 0 and high is -5, low must less then high self.assertRaises(ValueError, paddle.randint_like, x_int32, high=-5) # if high is None, low must be greater than 0 @@ -176,8 +191,11 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is int64 # low is 5 and high is 5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_int64, low=5, high=5) + self.assertRaises(ValueError, + paddle.randint_like, + x_int64, + low=5, + high=5) # low(default value) is 0 and high is -5, low must less then high self.assertRaises(ValueError, paddle.randint_like, x_int64, high=-5) # if high is None, low must be greater than 0 @@ -185,36 +203,57 @@ class TestRandintLikeAPI(unittest.TestCase): # x dtype is float16 # low is 5 and high is 5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_float16, low=5, high=5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float16, + low=5, + high=5) # low(default value) is 0 and high is -5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_float16, high=-5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float16, + high=-5) # if high is None, low must be greater than 0 - self.assertRaises( - ValueError, paddle.randint_like, x_float16, low=-5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float16, + low=-5) # x dtype is float32 # low is 5 and high is 5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_float32, low=5, high=5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float32, + low=5, + high=5) # low(default value) is 0 and high is -5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_float32, high=-5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float32, + high=-5) # if high is None, low must be greater than 0 - self.assertRaises( - ValueError, paddle.randint_like, x_float32, low=-5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float32, + low=-5) # x dtype is float64 # low is 5 and high is 5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_float64, low=5, high=5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float64, + low=5, + high=5) # low(default value) is 0 and high is -5, low must less then high - self.assertRaises( - ValueError, paddle.randint_like, x_float64, high=-5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float64, + high=-5) # if high is None, low must be greater than 0 - self.assertRaises( - ValueError, paddle.randint_like, x_float64, low=-5) + self.assertRaises(ValueError, + paddle.randint_like, + x_float64, + low=-5) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_randint_op.py b/python/paddle/fluid/tests/unittests/test_randint_op.py index 361f4d280f7..f5d18a9268f 100644 --- a/python/paddle/fluid/tests/unittests/test_randint_op.py +++ b/python/paddle/fluid/tests/unittests/test_randint_op.py @@ -35,6 +35,7 @@ def output_hist(out): class TestRandintOp(OpTest): + def setUp(self): self.op_type = "randint" self.inputs = {} @@ -50,9 +51,8 @@ class TestRandintOp(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.001), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.001), + "hist: " + str(hist)) def test_check_output_eager(self): with _test_eager_guard(): @@ -60,6 +60,7 @@ class TestRandintOp(OpTest): class TestRandintOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): self.assertRaises(TypeError, paddle.randint, 5, shape=np.array([2])) @@ -69,8 +70,10 @@ class TestRandintOpError(unittest.TestCase): self.assertRaises(TypeError, paddle.randint, 5, shape=['2']) shape_tensor = paddle.static.data('X', [1]) self.assertRaises(TypeError, paddle.randint, 5, shape=shape_tensor) - self.assertRaises( - TypeError, paddle.randint, 5, shape=[shape_tensor]) + self.assertRaises(TypeError, + paddle.randint, + 5, + shape=[shape_tensor]) def test_errors_eager(self): with _test_eager_guard(): @@ -78,6 +81,7 @@ class TestRandintOpError(unittest.TestCase): class TestRandintOp_attr_tensorlist(OpTest): + def setUp(self): self.op_type = "randint" self.new_shape = (10000, 784) @@ -98,9 +102,8 @@ class TestRandintOp_attr_tensorlist(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.001), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.001), + "hist: " + str(hist)) def test_check_output_eager(self): with _test_eager_guard(): @@ -108,6 +111,7 @@ class TestRandintOp_attr_tensorlist(OpTest): class TestRandint_attr_tensor(OpTest): + def setUp(self): self.op_type = "randint" self.inputs = {"ShapeTensor": np.array([10000, 784]).astype("int64")} @@ -123,9 +127,8 @@ class TestRandint_attr_tensor(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.001), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.001), + "hist: " + str(hist)) def test_check_output_eager(self): with _test_eager_guard(): @@ -134,29 +137,39 @@ class TestRandint_attr_tensor(OpTest): # Test python API class TestRandintAPI(unittest.TestCase): + def test_api(self): with program_guard(Program(), Program()): # results are from [0, 5). out1 = paddle.randint(5) # shape is a list and dtype is 'int32' - out2 = paddle.randint( - low=-100, high=100, shape=[64, 64], dtype='int32') + out2 = paddle.randint(low=-100, + high=100, + shape=[64, 64], + dtype='int32') # shape is a tuple and dtype is 'int64' - out3 = paddle.randint( - low=-100, high=100, shape=(32, 32, 3), dtype='int64') + out3 = paddle.randint(low=-100, + high=100, + shape=(32, 32, 3), + dtype='int64') # shape is a tensorlist and dtype is 'float32' dim_1 = paddle.fluid.layers.fill_constant([1], "int64", 32) dim_2 = paddle.fluid.layers.fill_constant([1], "int32", 50) - out4 = paddle.randint( - low=-100, high=100, shape=[dim_1, 5, dim_2], dtype='int32') + out4 = paddle.randint(low=-100, + high=100, + shape=[dim_1, 5, dim_2], + dtype='int32') # shape is a tensor and dtype is 'float64' - var_shape = paddle.static.data( - name='var_shape', shape=[2], dtype="int64") - out5 = paddle.randint( - low=1, high=1000, shape=var_shape, dtype='int64') - - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + var_shape = paddle.static.data(name='var_shape', + shape=[2], + dtype="int64") + out5 = paddle.randint(low=1, + high=1000, + shape=var_shape, + dtype='int64') + + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) outs = exe.run( feed={'var_shape': np.array([100, 100]).astype('int64')}, @@ -168,6 +181,7 @@ class TestRandintAPI(unittest.TestCase): class TestRandintImperative(unittest.TestCase): + def test_api(self): paddle.disable_static() @@ -189,6 +203,7 @@ class TestRandintImperative(unittest.TestCase): class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): @@ -212,8 +227,8 @@ class TestRandomValue(unittest.TestCase): paddle.set_device('gpu') paddle.seed(100) - x = paddle.randint( - -10000, 10000, [32, 3, 1024, 1024], dtype='int32').numpy() + x = paddle.randint(-10000, 10000, [32, 3, 1024, 1024], + dtype='int32').numpy() self.assertTrue(x.mean(), -0.7517569760481516) self.assertTrue(x.std(), 5773.696619107639) expect = [2535, 2109, 5916, -5011, -261] @@ -223,8 +238,8 @@ class TestRandomValue(unittest.TestCase): expect = [881, 1560, 1100, 9664, 1669] self.assertTrue(np.array_equal(x[30, 2, 1000, 1000:1005], expect)) - x = paddle.randint( - -10000, 10000, [32, 3, 1024, 1024], dtype='int64').numpy() + x = paddle.randint(-10000, 10000, [32, 3, 1024, 1024], + dtype='int64').numpy() self.assertTrue(x.mean(), -1.461287518342336) self.assertTrue(x.std(), 5773.023477548159) expect = [7213, -9597, 754, 8129, -1158] diff --git a/python/paddle/fluid/tests/unittests/test_randn_op.py b/python/paddle/fluid/tests/unittests/test_randn_op.py index 6d33b468ee1..8347411192e 100644 --- a/python/paddle/fluid/tests/unittests/test_randn_op.py +++ b/python/paddle/fluid/tests/unittests/test_randn_op.py @@ -22,6 +22,7 @@ from paddle.static import program_guard, Program class TestRandnOp(unittest.TestCase): + def test_api(self): shape = [1000, 784] train_program = Program() @@ -37,12 +38,11 @@ class TestRandnOp(unittest.TestCase): var_shape = paddle.static.data('X', [2], 'int32') x4 = paddle.randn(var_shape) - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) res = exe.run(train_program, - feed={'X': np.array( - shape, dtype='int32')}, + feed={'X': np.array(shape, dtype='int32')}, fetch_list=[x1, x2, x3, x4]) for out in res: @@ -51,10 +51,11 @@ class TestRandnOp(unittest.TestCase): class TestRandnOpForDygraph(unittest.TestCase): + def test_api(self): shape = [1000, 784] - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() paddle.disable_static(place) x1 = paddle.randn(shape, 'float32') x2 = paddle.randn(shape, 'float64') @@ -73,6 +74,7 @@ class TestRandnOpForDygraph(unittest.TestCase): class TestRandnOpError(unittest.TestCase): + def test_error(self): with program_guard(Program(), Program()): # The argument shape's size of randn_op should not be 0. diff --git a/python/paddle/fluid/tests/unittests/test_random_crop_op.py b/python/paddle/fluid/tests/unittests/test_random_crop_op.py index 98e060f69d2..7b15899bab2 100644 --- a/python/paddle/fluid/tests/unittests/test_random_crop_op.py +++ b/python/paddle/fluid/tests/unittests/test_random_crop_op.py @@ -22,6 +22,7 @@ import paddle.fluid as fluid class TestRandomCropOp(OpTest): + def setUp(self): to_crop = np.array([[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]] * 5).astype(np.int32) @@ -47,6 +48,7 @@ class TestRandomCropOp(OpTest): class TestRandomCropOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program()): @@ -57,15 +59,17 @@ class TestRandomCropOpError(unittest.TestCase): self.assertRaises(TypeError, test_x_type) def test_x_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[None, 3, 256, 256], dtype='float16') + x2 = fluid.layers.data(name='x2', + shape=[None, 3, 256, 256], + dtype='float16') fluid.layers.random_crop(x2) self.assertRaises(TypeError, test_x_dtype) def test_shape_type(): - x3 = fluid.layers.data( - name='x3', shape=[None, 3, 256, 256], dtype='float32') + x3 = fluid.layers.data(name='x3', + shape=[None, 3, 256, 256], + dtype='float32') fluid.layers.random_crop(x3, shape=1) self.assertRaises(TypeError, test_shape_type) diff --git a/python/paddle/fluid/tests/unittests/test_random_routing_op.py b/python/paddle/fluid/tests/unittests/test_random_routing_op.py index e4bb7c5ca5f..d4eadd268cc 100644 --- a/python/paddle/fluid/tests/unittests/test_random_routing_op.py +++ b/python/paddle/fluid/tests/unittests/test_random_routing_op.py @@ -42,14 +42,15 @@ def random_routing(topk_idx, topk_value, prob, topk=2): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestNumberCountAPIFp32(unittest.TestCase): + def setUp(self): self.dtype = "float32" self.init() def init(self): self.upper_range = 8 - self.x = np.random.randint( - -1, self.upper_range, size=(200, 2)).astype('int64') + self.x = np.random.randint(-1, self.upper_range, + size=(200, 2)).astype('int64') self.prob = np.random.random((self.x.shape[0], )).astype(self.dtype) self.topk_value = np.random.random(self.x.shape).astype(self.dtype) self.out = random_routing(self.x, self.topk_value, @@ -73,6 +74,7 @@ class TestNumberCountAPIFp32(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestNumberCountAPIFp16(TestNumberCountAPIFp32): + def setUp(self): self.dtype = "float16" self.init() diff --git a/python/paddle/fluid/tests/unittests/test_random_seed.py b/python/paddle/fluid/tests/unittests/test_random_seed.py index 617c0e61da8..f4d16a0a81e 100644 --- a/python/paddle/fluid/tests/unittests/test_random_seed.py +++ b/python/paddle/fluid/tests/unittests/test_random_seed.py @@ -39,17 +39,23 @@ class TestGeneratorSeed(unittest.TestCase): x = fluid.layers.uniform_random([10], dtype="float32", min=0.0, max=1.0) st1 = gen.get_state() - x1 = fluid.layers.uniform_random( - [10], dtype="float32", min=0.0, max=1.0) + x1 = fluid.layers.uniform_random([10], + dtype="float32", + min=0.0, + max=1.0) gen.set_state(st1) print(gen.get_state()) - x2 = fluid.layers.uniform_random( - [10], dtype="float32", min=0.0, max=1.0) + x2 = fluid.layers.uniform_random([10], + dtype="float32", + min=0.0, + max=1.0) paddle.seed(12312321111) - x3 = fluid.layers.uniform_random( - [10], dtype="float32", min=0.0, max=1.0) + x3 = fluid.layers.uniform_random([10], + dtype="float32", + min=0.0, + max=1.0) x_np = x.numpy() x1_np = x1.numpy() @@ -100,13 +106,17 @@ class TestGeneratorSeed(unittest.TestCase): gen = paddle.seed(111111111) st = gen.get_state() # x = np.arange(1,101).reshape(2,50).astype("float32") - x = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) + x = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) y = fluid.layers.dropout(x, 0.5) gen.manual_seed(111111111) #gen.set_state(st) - x1 = fluid.layers.uniform_random( - [2, 10], dtype="float32", min=0.0, max=1.0) + x1 = fluid.layers.uniform_random([2, 10], + dtype="float32", + min=0.0, + max=1.0) y1 = fluid.layers.dropout(x1, 0.5) y_np = y.numpy() y1_np = y1.numpy() @@ -376,23 +386,31 @@ class TestGeneratorSeed(unittest.TestCase): fluid.enable_dygraph() gen.manual_seed(12312321111) - x = fluid.layers.uniform_random( - [10, 10], dtype="float32", min=0.0, max=1.0) + x = fluid.layers.uniform_random([10, 10], + dtype="float32", + min=0.0, + max=1.0) y = fluid.layers.sampling_id(x) st1 = gen.get_state() - x1 = fluid.layers.uniform_random( - [10, 10], dtype="float32", min=0.0, max=1.0) + x1 = fluid.layers.uniform_random([10, 10], + dtype="float32", + min=0.0, + max=1.0) y1 = fluid.layers.sampling_id(x) gen.set_state(st1) - x2 = fluid.layers.uniform_random( - [10, 10], dtype="float32", min=0.0, max=1.0) + x2 = fluid.layers.uniform_random([10, 10], + dtype="float32", + min=0.0, + max=1.0) y2 = fluid.layers.sampling_id(x) gen.manual_seed(12312321111) - x3 = fluid.layers.uniform_random( - [10, 10], dtype="float32", min=0.0, max=1.0) + x3 = fluid.layers.uniform_random([10, 10], + dtype="float32", + min=0.0, + max=1.0) y3 = fluid.layers.sampling_id(x) x_np = y.numpy() @@ -457,13 +475,13 @@ class TestGeneratorSeed(unittest.TestCase): result_1 = fluid.layers.fc( input=x, size=10, - param_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0)) + param_attr=fluid.initializer.TruncatedNormal(loc=0.0, + scale=2.0)) result_2 = fluid.layers.fc( input=x, size=10, - param_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0)) + param_attr=fluid.initializer.TruncatedNormal(loc=0.0, + scale=2.0)) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) diff --git a/python/paddle/fluid/tests/unittests/test_randperm_op.py b/python/paddle/fluid/tests/unittests/test_randperm_op.py index deb0a9a0821..5a75e839397 100644 --- a/python/paddle/fluid/tests/unittests/test_randperm_op.py +++ b/python/paddle/fluid/tests/unittests/test_randperm_op.py @@ -71,8 +71,8 @@ class TestRandpermOp(OpTest): def verify_output(self, outs): out_np = np.array(outs[0]) - self.assertTrue( - check_randperm_out(self.n, out_np), msg=error_msg(out_np)) + self.assertTrue(check_randperm_out(self.n, out_np), + msg=error_msg(out_np)) def test_eager(self): with _test_eager_guard(): @@ -80,26 +80,31 @@ class TestRandpermOp(OpTest): class TestRandpermOpN(TestRandpermOp): + def init_attrs(self): self.n = 10000 class TestRandpermOpInt32(TestRandpermOp): + def init_attrs(self): self.dtype = "int32" class TestRandpermOpFloat32(TestRandpermOp): + def init_attrs(self): self.dtype = "float32" class TestRandpermOpFloat64(TestRandpermOp): + def init_attrs(self): self.dtype = "float64" class TestRandpermOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): self.assertRaises(ValueError, paddle.randperm, -3) @@ -107,10 +112,11 @@ class TestRandpermOpError(unittest.TestCase): class TestRandpermAPI(unittest.TestCase): + def test_out(self): n = 10 - place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() with program_guard(Program(), Program()): x1 = paddle.randperm(n) x2 = paddle.randperm(n, 'float32') @@ -125,18 +131,20 @@ class TestRandpermAPI(unittest.TestCase): class TestRandpermImperative(unittest.TestCase): + def test_out(self): paddle.disable_static() n = 10 for dtype in ['int32', np.int64, 'float32', 'float64']: data_p = paddle.randperm(n, dtype) data_np = data_p.numpy() - self.assertTrue( - check_randperm_out(n, data_np), msg=error_msg(data_np)) + self.assertTrue(check_randperm_out(n, data_np), + msg=error_msg(data_np)) paddle.enable_static() class TestRandpermEager(unittest.TestCase): + def test_out(self): paddle.disable_static() n = 10 @@ -144,12 +152,13 @@ class TestRandpermEager(unittest.TestCase): for dtype in ['int32', np.int64, 'float32', 'float64']: data_p = paddle.randperm(n, dtype) data_np = data_p.numpy() - self.assertTrue( - check_randperm_out(n, data_np), msg=error_msg(data_np)) + self.assertTrue(check_randperm_out(n, data_np), + msg=error_msg(data_np)) paddle.enable_static() class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_range.py b/python/paddle/fluid/tests/unittests/test_range.py index e19c1b227f5..3df893b0b59 100644 --- a/python/paddle/fluid/tests/unittests/test_range.py +++ b/python/paddle/fluid/tests/unittests/test_range.py @@ -26,6 +26,7 @@ def arange_wrapper(start, end, step, dtype=None): class TestRangeOp(OpTest): + def setUp(self): self.op_type = "range" self.init_config() @@ -36,8 +37,9 @@ class TestRangeOp(OpTest): } self.outputs = { - 'Out': np.arange(self.case[0], self.case[1], - self.case[2]).astype(self.dtype) + 'Out': + np.arange(self.case[0], self.case[1], + self.case[2]).astype(self.dtype) } def init_config(self): @@ -50,6 +52,7 @@ class TestRangeOp(OpTest): class TestFloatRangeOpCase0(TestRangeOp): + def init_config(self): self.dtype = np.float32 self.python_api = partial(arange_wrapper, dtype=self.dtype) @@ -57,6 +60,7 @@ class TestFloatRangeOpCase0(TestRangeOp): class TestInt32RangeOpCase0(TestRangeOp): + def init_config(self): self.dtype = np.int32 self.python_api = partial(arange_wrapper, dtype=self.dtype) @@ -64,6 +68,7 @@ class TestInt32RangeOpCase0(TestRangeOp): class TestInt32RangeOpCase1(TestRangeOp): + def init_config(self): self.dtype = np.int32 self.python_api = partial(arange_wrapper, dtype=self.dtype) @@ -71,6 +76,7 @@ class TestInt32RangeOpCase1(TestRangeOp): class TestInt32RangeOpCase2(TestRangeOp): + def init_config(self): self.dtype = np.int32 self.python_api = partial(arange_wrapper, dtype=self.dtype) diff --git a/python/paddle/fluid/tests/unittests/test_rank_attention_op.py b/python/paddle/fluid/tests/unittests/test_rank_attention_op.py index 64d564c223f..1cca1378232 100644 --- a/python/paddle/fluid/tests/unittests/test_rank_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_rank_attention_op.py @@ -144,6 +144,7 @@ def gen_rank_offset(pv_nums, max_rank): class TestRankAttentionOpComplex(OpTest): + def config(self): self.pv_num = 100 self.x_feat = 10 @@ -161,8 +162,8 @@ class TestRankAttentionOpComplex(OpTest): ] rank_para = np.random.random(rank_para_shape).astype(self.dtype) np_out, np_input_help, np_param_help, np_ins_rank = np_rank_attention( - input, - np.array(rank_offset), rank_para, self.max_rank, self.pv_num * 7) + input, np.array(rank_offset), rank_para, self.max_rank, + self.pv_num * 7) self.inputs = { "X": input, "RankOffset": np.array(rank_offset).astype("int32"), @@ -185,6 +186,7 @@ class TestRankAttentionOpComplex(OpTest): class TestRankAttentionOpCpu(OpTest): + def config(self): self.pv_num = 100 self.x_feat = 10 @@ -202,8 +204,8 @@ class TestRankAttentionOpCpu(OpTest): ] rank_para = np.random.random(rank_para_shape).astype(self.dtype) np_out, np_input_help, np_param_help, np_ins_rank = np_rank_attention( - input, - np.array(rank_offset), rank_para, self.max_rank, self.pv_num * 7) + input, np.array(rank_offset), rank_para, self.max_rank, + self.pv_num * 7) self.inputs = { "X": input, "RankOffset": np.array(rank_offset).astype("int32"), diff --git a/python/paddle/fluid/tests/unittests/test_rank_loss_op.py b/python/paddle/fluid/tests/unittests/test_rank_loss_op.py index c4851bc274b..eb29c68daf7 100644 --- a/python/paddle/fluid/tests/unittests/test_rank_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_rank_loss_op.py @@ -22,6 +22,7 @@ from paddle.fluid import Program, program_guard class TestRankLossOp(OpTest): + def setUp(self): self.op_type = "rank_loss" shape = (100, 1) @@ -57,36 +58,42 @@ class TestRankLossOp(OpTest): class TestRankLossOp1(TestRankLossOp): + def set_shape(self): batch_size = 100 return (batch_size), (batch_size, 1), (batch_size, 1) class TestRankLossOp2(TestRankLossOp): + def set_shape(self): batch_size = 100 return (batch_size, 1), (batch_size), (batch_size, 1) class TestRankLossOp3(TestRankLossOp): + def set_shape(self): batch_size = 100 return (batch_size, 1), (batch_size, 1), (batch_size) class TestRankLossOp4(TestRankLossOp): + def set_shape(self): batch_size = 100 return (batch_size), (batch_size), (batch_size, 1) class TestRankLossOp5(TestRankLossOp): + def set_shape(self): batch_size = 100 return (batch_size), (batch_size), (batch_size) class TestRankLossOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): label = fluid.data(name="label", shape=[16, 1], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py index 34930e3577b..43108fb4ab4 100644 --- a/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py @@ -25,6 +25,7 @@ import os class TestRawProgramOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "0" os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" @@ -35,16 +36,15 @@ class TestRawProgramOptimizer(unittest.TestCase): prediction = paddle.static.nn.fc(x=[fc_2], size=label_dim, activation='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=input_y) + cost = paddle.nn.functional.cross_entropy(input=prediction, + label=input_y) avg_cost = paddle.mean(x=cost) return avg_cost def gen_data(self): return { "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint( - 2, size=(128, 1)).astype('int64') + "y": np.random.randint(2, size=(128, 1)).astype('int64') } def test_single_gpu(self): @@ -56,10 +56,12 @@ class TestRawProgramOptimizer(unittest.TestCase): strategy.without_graph_optimization = True with fluid.program_guard(sharding_program, sharding_startup_program): with fluid.unique_name.guard(): - input_x = paddle.static.data( - name="x", shape=[None, 32], dtype='float32') - input_y = paddle.static.data( - name="y", shape=[None, 1], dtype='int64') + input_x = paddle.static.data(name="x", + shape=[None, 32], + dtype='float32') + input_y = paddle.static.data(name="y", + shape=[None, 1], + dtype='int64') cost = self.mlp(input_x=input_x, input_y=input_y) output_name = cost.name optimizer = fleet.distributed_optimizer(fluid.optimizer.Adam(), diff --git a/python/paddle/fluid/tests/unittests/test_reader_reset.py b/python/paddle/fluid/tests/unittests/test_reader_reset.py index 2cef896aa75..bb69083e785 100644 --- a/python/paddle/fluid/tests/unittests/test_reader_reset.py +++ b/python/paddle/fluid/tests/unittests/test_reader_reset.py @@ -14,6 +14,7 @@ from __future__ import print_function import os + os.environ['CPU_NUM'] = str(1) import paddle.fluid as fluid from paddle.fluid import compiler @@ -23,7 +24,9 @@ import unittest class TestReaderReset(unittest.TestCase): + def prepare_data(self): + def fake_data_generator(): for n in range(self.total_ins_num): yield np.ones(self.ins_shape) * n, n @@ -44,8 +47,9 @@ class TestReaderReset(unittest.TestCase): startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): - image = fluid.layers.data( - name='image', shape=self.ins_shape, dtype='float32') + image = fluid.layers.data(name='image', + shape=self.ins_shape, + dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') data_reader_handle = fluid.io.PyReader( feed_list=[image, label], @@ -59,8 +63,7 @@ class TestReaderReset(unittest.TestCase): exe.run(startup_prog) data_reader_handle.decorate_sample_list_generator( - paddle.batch( - self.prepare_data(), batch_size=self.batch_size)) + paddle.batch(self.prepare_data(), batch_size=self.batch_size)) train_cp = compiler.CompiledProgram(main_prog).with_data_parallel( places=[place]) @@ -75,8 +78,9 @@ class TestReaderReset(unittest.TestCase): fetch_list=fetch_list, return_numpy=True) ins_num = data_val.shape[0] - broadcasted_label = np.ones((ins_num, ) + tuple( - self.ins_shape)) * label_val.reshape((ins_num, 1)) + broadcasted_label = np.ones(( + ins_num, ) + tuple(self.ins_shape)) * label_val.reshape( + (ins_num, 1)) self.assertEqual(data_val.all(), broadcasted_label.all()) batch_id += 1 except fluid.core.EOFException: diff --git a/python/paddle/fluid/tests/unittests/test_real_imag_op.py b/python/paddle/fluid/tests/unittests/test_real_imag_op.py index 523f48374ea..1402585c037 100644 --- a/python/paddle/fluid/tests/unittests/test_real_imag_op.py +++ b/python/paddle/fluid/tests/unittests/test_real_imag_op.py @@ -34,6 +34,7 @@ paddle_apis = { class TestRealOp(OpTest): + def setUp(self): # switch to static paddle.enable_static() @@ -47,7 +48,8 @@ class TestRealOp(OpTest): def init_input_output(self): self.inputs = { - 'X': np.random.random( + 'X': + np.random.random( (20, 5)).astype(self.dtype) + 1j * np.random.random( (20, 5)).astype(self.dtype) } @@ -55,22 +57,22 @@ class TestRealOp(OpTest): def init_grad_input_output(self): self.grad_out = np.ones((20, 5), self.dtype) - self.grad_x = np.real(self.grad_out) + 1j * np.zeros( - self.grad_out.shape) + self.grad_x = np.real( + self.grad_out) + 1j * np.zeros(self.grad_out.shape) def test_check_output(self): self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - user_defined_grads=[self.grad_x], - user_defined_grad_outputs=[self.grad_out], - check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=[self.grad_x], + user_defined_grad_outputs=[self.grad_out], + check_eager=True) class TestImagOp(TestRealOp): + def setUp(self): # switch to static paddle.enable_static() @@ -84,11 +86,12 @@ class TestImagOp(TestRealOp): def init_grad_input_output(self): self.grad_out = np.ones((20, 5), self.dtype) - self.grad_x = np.zeros(self.grad_out.shape) + 1j * np.real( - self.grad_out) + self.grad_x = np.zeros( + self.grad_out.shape) + 1j * np.real(self.grad_out) class TestRealAPI(unittest.TestCase): + def setUp(self): # switch to static paddle.enable_static() @@ -101,6 +104,7 @@ class TestRealAPI(unittest.TestCase): self._shape = [2, 20, 2, 3] def test_in_static_mode(self): + def init_input_output(dtype): input = np.random.random(self._shape).astype( dtype) + 1j * np.random.random(self._shape).astype(dtype) @@ -154,6 +158,7 @@ class TestRealAPI(unittest.TestCase): class TestImagAPI(TestRealAPI): + def setUp(self): # switch to static paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py index a8adee742c6..568d57c0935 100644 --- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py @@ -29,6 +29,7 @@ np.random.seed(123) class PyRNNBase(object): + def __init__(self, input_shape, output_shape): self.x = np.ones(shape=input_shape).astype("float32") self.y = np.zeros(shape=output_shape).astype("float32") @@ -46,6 +47,7 @@ class PyRNNBase(object): class PySimpleRNN1(PyRNNBase): + def __init__(self, input_shape, output_shape): super(PySimpleRNN1, self).__init__(input_shape, output_shape) @@ -67,6 +69,7 @@ class PySimpleRNN1(PyRNNBase): class PySimpleRNN2(PyRNNBase): + def __init__(self, input_shape, output_shape): super(PySimpleRNN2, self).__init__(input_shape, output_shape) @@ -134,14 +137,14 @@ class RecurrentOpTest1(unittest.TestCase): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot') + h_boot = layers.data(shape=[self.input_dim], + dtype='float32', + name='h_boot') h_boot.stop_gradient = False rnn = layers.StaticRNN() @@ -149,10 +152,8 @@ class RecurrentOpTest1(unittest.TestCase): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) - h = layers.scale( - x=layers.elementwise_add( - x=h_pre, y=x_t), - scale=self.py_rnn.scale) + h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t), + scale=self.py_rnn.scale) rnn.update_memory(h_pre, h) rnn.output(h) @@ -199,8 +200,7 @@ class RecurrentOpTest1(unittest.TestCase): for idx, name in enumerate(self.grad_data_field): self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) self.assertTrue( - np.isclose( - num_grad[idx], ana_grad[idx], rtol=rtol).all(), + np.isclose(num_grad[idx], ana_grad[idx], rtol=rtol).all(), "num_grad (" + name + ") has diff at " + str(self.place) + "\nExpect " + str(num_grad[idx]) + "\n" + "But Got" + str(ana_grad[idx]) + " in class " + self.__class__.__name__) @@ -265,14 +265,14 @@ class RecurrentOpTest2(RecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot') + h_boot = layers.data(shape=[self.input_dim], + dtype='float32', + name='h_boot') h_boot.stop_gradient = False rnn = layers.StaticRNN() @@ -322,9 +322,10 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): ''' class PySimpleRNN3(PyRNNBase): + def __init__(self, input_shape, output_shape): - super(RecurrentOpMultipleMemoryTest.PySimpleRNN3, self).__init__( - input_shape, output_shape) + super(RecurrentOpMultipleMemoryTest.PySimpleRNN3, + self).__init__(input_shape, output_shape) seq_len, batch_size, input_dim = input_shape self.h_boot1 = np.random.normal(size=(batch_size, @@ -366,23 +367,20 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False - h_boot1 = layers.data( - shape=[self.batch_size, self.input_dim], - dtype='float32', - name='h_boot1', - append_batch_size=False) + h_boot1 = layers.data(shape=[self.batch_size, self.input_dim], + dtype='float32', + name='h_boot1', + append_batch_size=False) h_boot1.stop_gradient = False - h_boot2 = layers.data( - shape=[self.batch_size, self.input_dim], - dtype='float32', - name='h_boot2', - append_batch_size=False) + h_boot2 = layers.data(shape=[self.batch_size, self.input_dim], + dtype='float32', + name='h_boot2', + append_batch_size=False) h_boot2.stop_gradient = False rnn = layers.StaticRNN() @@ -417,9 +415,10 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): ''' class PySimpleRNN4(PyRNNBase): + def __init__(self, input_shape, output_shape): - super(RecurrentOpNoMemBootTest.PySimpleRNN4, self).__init__( - input_shape, output_shape) + super(RecurrentOpNoMemBootTest.PySimpleRNN4, + self).__init__(input_shape, output_shape) men_dim = input_shape self.mems = np.zeros(shape=men_dim).astype("float32") @@ -443,18 +442,17 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): self.input_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape, - self.output_shape) + self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4( + self.input_shape, self.output_shape) with fluid.program_guard(self.main_program, self.startup_program): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False rnn = layers.StaticRNN() @@ -490,25 +488,31 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): ''' class PySimpleRNN5(PyRNNBase): + def __init__(self, input_shape, output_shape): - super(RecurrentOpSubBlockTest.PySimpleRNN5, self).__init__( - input_shape, output_shape) + super(RecurrentOpSubBlockTest.PySimpleRNN5, + self).__init__(input_shape, output_shape) seq_len, batch_size, input_dim = input_shape - self.w1 = np.random.uniform( - -0.1, 0.1, size=(input_dim, input_dim)).astype("float32") - self.w2 = np.random.uniform( - -0.1, 0.1, size=(input_dim * 2, input_dim)).astype("float32") - - self.emb = np.random.uniform( - -0.1, 0.1, size=(seq_len, batch_size, - input_dim)).astype("float32") + self.w1 = np.random.uniform(-0.1, 0.1, + size=(input_dim, + input_dim)).astype("float32") + self.w2 = np.random.uniform(-0.1, + 0.1, + size=(input_dim * 2, + input_dim)).astype("float32") + + self.emb = np.random.uniform(-0.1, + 0.1, + size=(seq_len, batch_size, + input_dim)).astype("float32") men_dim = (seq_len, batch_size, input_dim) self.mems = np.zeros(shape=men_dim).astype("float32") self.oy = np.matmul(self.emb, self.w1) def step(self, step_id, x): + def dot_attention(query, memory): attn = np.matmul(query, memory.transpose((0, 2, 1))) weight = softmax(attn) @@ -544,19 +548,18 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): self.input_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = RecurrentOpSubBlockTest.PySimpleRNN5(self.input_shape, - self.output_shape) + self.py_rnn = RecurrentOpSubBlockTest.PySimpleRNN5( + self.input_shape, self.output_shape) with fluid.program_guard(self.main_program, self.startup_program): rnn_out = self.create_rnn_op() self.output = layers.mean(rnn_out) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype='float32', + name='x', + append_batch_size=False) x.stop_gradient = False emb = layers.data( @@ -566,17 +569,15 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): append_batch_size=False) emb.stop_gradient = False - w1 = layers.data( - shape=[self.input_dim, self.input_dim], - dtype='float32', - name='w1', - append_batch_size=False) + w1 = layers.data(shape=[self.input_dim, self.input_dim], + dtype='float32', + name='w1', + append_batch_size=False) w1.stop_gradient = False - w2 = layers.data( - shape=[self.input_dim * 2, self.input_dim], - dtype='float32', - name='w2', - append_batch_size=False) + w2 = layers.data(shape=[self.input_dim * 2, self.input_dim], + dtype='float32', + name='w2', + append_batch_size=False) w2.stop_gradient = False rnn = layers.StaticRNN() @@ -590,10 +591,9 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): y = layers.matmul(emb, w1) with rnn.step(): - pre_h = rnn.memory( - shape=(self.sent_len, self.input_dim), - batch_ref=x, - init_value=0.0) + pre_h = rnn.memory(shape=(self.sent_len, self.input_dim), + batch_ref=x, + init_value=0.0) step_in = rnn.step_input(x) concat_in = layers.concat([step_in, pre_h], 1) new_h = layers.matmul(concat_in, w2) @@ -640,14 +640,14 @@ class RecurrentOpStopGradientTest(RecurrentOpTest1): self.output = layers.mean(self.create_rnn_op()) def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype="float32", - name="x", - append_batch_size=False) + x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], + dtype="float32", + name="x", + append_batch_size=False) x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype="float32", name="h_boot") + h_boot = layers.data(shape=[self.input_dim], + dtype="float32", + name="h_boot") h_boot.stop_gradient = True rnn = layers.StaticRNN() diff --git a/python/paddle/fluid/tests/unittests/test_recv_save_op.py b/python/paddle/fluid/tests/unittests/test_recv_save_op.py index 233cbf129f1..7e875ee84b8 100644 --- a/python/paddle/fluid/tests/unittests/test_recv_save_op.py +++ b/python/paddle/fluid/tests/unittests/test_recv_save_op.py @@ -49,17 +49,20 @@ def run_pserver(pserver_id): param.set(param_array, place) optimize_block = program._create_block(program.global_block().idx) - program.global_block().append_op( - type="listen_and_serv", - inputs={'X': []}, - outputs={}, - attrs={ - "optimize_blocks": [optimize_block], - "endpoint": '127.0.0.1:0', - "Fanin": 1, - "distributed_mode": DistributedMode.SYNC, - "grad_to_block_id": [] - }) + program.global_block().append_op(type="listen_and_serv", + inputs={'X': []}, + outputs={}, + attrs={ + "optimize_blocks": + [optimize_block], + "endpoint": + '127.0.0.1:0', + "Fanin": + 1, + "distributed_mode": + DistributedMode.SYNC, + "grad_to_block_id": [] + }) exe = fluid.Executor(place) exe.run(program) @@ -67,6 +70,7 @@ def run_pserver(pserver_id): @unittest.skip("do not need currently") class TestListenAndServOp(unittest.TestCase): + def setUp(self): self.ps_timeout = 5 @@ -103,16 +107,15 @@ class TestListenAndServOp(unittest.TestCase): emaps = ['127.0.0.1:' + str(port0), '127.0.0.1:' + str(port1)] # create and run recv and save operator - remote_recv_op = Operator( - "recv_save", - trainer_id=0, - shape=[10, 8], - slice_shapes=["5,8", "5,8"], - slice_varnames=["table", "table"], - remote_varnames=['table', 'table'], - is_sparse=False, - endpoints=emaps, - file_path=model_file) + remote_recv_op = Operator("recv_save", + trainer_id=0, + shape=[10, 8], + slice_shapes=["5,8", "5,8"], + slice_varnames=["table", "table"], + remote_varnames=['table', 'table'], + is_sparse=False, + endpoints=emaps, + file_path=model_file) remote_recv_op.run(scope, place) @@ -141,31 +144,28 @@ class TestListenAndServOp(unittest.TestCase): dtype="float32", persistable=True) - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [origin]}, - attrs={'file_path': model_file}) - - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [slice0]}, - attrs={ - 'file_path': model_file, - 'seek': 2 * 8, - 'shape': slice0.shape - }) - - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [slice1]}, - attrs={ - 'file_path': model_file, - 'seek': 5 * 8, - 'shape': slice1.shape - }) + load_block.append_op(type='load', + inputs={}, + outputs={'Out': [origin]}, + attrs={'file_path': model_file}) + + load_block.append_op(type='load', + inputs={}, + outputs={'Out': [slice0]}, + attrs={ + 'file_path': model_file, + 'seek': 2 * 8, + 'shape': slice0.shape + }) + + load_block.append_op(type='load', + inputs={}, + outputs={'Out': [slice1]}, + attrs={ + 'file_path': model_file, + 'seek': 5 * 8, + 'shape': slice1.shape + }) exe = fluid.Executor(place=fluid.CPUPlace()) exe.run(load_prog) diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py index 01d386724d1..d6fabb44b4f 100644 --- a/python/paddle/fluid/tests/unittests/test_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import convert_np_dtype_to_dtype_ class TestSumOp(OpTest): + def setUp(self): self.python_api = paddle.sum self.op_type = "reduce_sum" @@ -40,6 +41,7 @@ class TestSumOp(OpTest): class TestSumOp_fp16(OpTest): + def setUp(self): self.python_api = paddle.sum self.op_type = "reduce_sum" @@ -61,13 +63,16 @@ class TestSumOp_fp16(OpTest): return grad, def test_check_grad(self): - self.check_grad( - ['X'], 'Out', user_defined_grads=self.gradient, check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=self.gradient, + check_eager=True) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSumOp_bf16(OpTest): + def setUp(self): np.random.seed(100) self.python_api = paddle.sum @@ -88,11 +93,10 @@ class TestSumOp_bf16(OpTest): def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], - 'Out', - user_defined_grads=self.gradient, - check_eager=True) + self.check_grad_with_place(place, ['X'], + 'Out', + user_defined_grads=self.gradient, + check_eager=True) def calc_gradient(self): x = self.x @@ -101,6 +105,7 @@ class TestSumOp_bf16(OpTest): class TestSumOp_fp16_withInt(OpTest): + def setUp(self): self.python_api = paddle.sum self.op_type = "reduce_sum" @@ -124,11 +129,14 @@ class TestSumOp_fp16_withInt(OpTest): return grad, def test_check_grad(self): - self.check_grad( - ['X'], 'Out', user_defined_grads=self.gradient, check_eager=True) + self.check_grad(['X'], + 'Out', + user_defined_grads=self.gradient, + check_eager=True) class TestSumOp5D(OpTest): + def setUp(self): self.python_api = paddle.sum self.op_type = "reduce_sum" @@ -146,6 +154,7 @@ class TestSumOp5D(OpTest): class TestSumOp6D(OpTest): + def setUp(self): self.python_api = paddle.sum self.op_type = "reduce_sum" @@ -163,6 +172,7 @@ class TestSumOp6D(OpTest): class TestSumOp8D(OpTest): + def setUp(self): self.python_api = paddle.sum self.op_type = "reduce_sum" @@ -258,6 +268,7 @@ def raw_reduce_prod(x, dim=[0], keep_dim=False): class TestProdOp(OpTest): + def setUp(self): self.op_type = "reduce_prod" self.python_api = raw_reduce_prod @@ -277,6 +288,7 @@ class TestProdOp(OpTest): class TestProd6DOp(OpTest): + def setUp(self): self.op_type = "reduce_prod" self.python_api = raw_reduce_prod @@ -301,6 +313,7 @@ class TestProd6DOp(OpTest): class TestProd8DOp(OpTest): + def setUp(self): self.op_type = "reduce_prod" self.python_api = raw_reduce_prod @@ -326,6 +339,7 @@ class TestProd8DOp(OpTest): class TestAllOp(OpTest): + def setUp(self): self.op_type = "reduce_all" self.python_api = paddle.all @@ -338,6 +352,7 @@ class TestAllOp(OpTest): class TestAll8DOp(OpTest): + def setUp(self): self.op_type = "reduce_all" self.python_api = paddle.all @@ -353,6 +368,7 @@ class TestAll8DOp(OpTest): class TestAllOpWithDim(OpTest): + def setUp(self): self.op_type = "reduce_all" self.python_api = paddle.all @@ -365,6 +381,7 @@ class TestAllOpWithDim(OpTest): class TestAll8DOpWithDim(OpTest): + def setUp(self): self.op_type = "reduce_all" self.python_api = paddle.all @@ -380,14 +397,14 @@ class TestAll8DOpWithDim(OpTest): class TestAllOpWithKeepDim(OpTest): + def setUp(self): self.op_type = "reduce_all" self.python_api = paddle.all self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} self.attrs = {'dim': [1], 'keep_dim': True} self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].all(axis=1), axis=1) + 'Out': np.expand_dims(self.inputs['X'].all(axis=1), axis=1) } def test_check_output(self): @@ -395,6 +412,7 @@ class TestAllOpWithKeepDim(OpTest): class TestAll8DOpWithKeepDim(OpTest): + def setUp(self): self.op_type = "reduce_all" self.python_api = paddle.all @@ -404,8 +422,8 @@ class TestAll8DOpWithKeepDim(OpTest): } self.attrs = {'dim': (5, ), 'keep_dim': True} self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].all(axis=self.attrs['dim']), axis=5) + 'Out': + np.expand_dims(self.inputs['X'].all(axis=self.attrs['dim']), axis=5) } def test_check_output(self): @@ -413,18 +431,21 @@ class TestAll8DOpWithKeepDim(OpTest): class TestAllOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of reduce_all_op must be Variable. input1 = 12 self.assertRaises(TypeError, fluid.layers.reduce_all, input1) # The input dtype of reduce_all_op must be bool. - input2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") + input2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.reduce_all, input2) class TestAnyOp(OpTest): + def setUp(self): self.op_type = "reduce_any" self.python_api = paddle.any @@ -437,6 +458,7 @@ class TestAnyOp(OpTest): class TestAny8DOp(OpTest): + def setUp(self): self.op_type = "reduce_any" self.python_api = paddle.any @@ -452,6 +474,7 @@ class TestAny8DOp(OpTest): class TestAnyOpWithDim(OpTest): + def setUp(self): self.op_type = "reduce_any" self.python_api = paddle.any @@ -464,6 +487,7 @@ class TestAnyOpWithDim(OpTest): class TestAny8DOpWithDim(OpTest): + def setUp(self): self.op_type = "reduce_any" self.python_api = paddle.any @@ -479,14 +503,15 @@ class TestAny8DOpWithDim(OpTest): class TestAnyOpWithKeepDim(OpTest): + def setUp(self): self.op_type = "reduce_any" self.python_api = paddle.any self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} self.attrs = {'dim': (1, ), 'keep_dim': True} self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].any(axis=self.attrs['dim']), axis=1) + 'Out': + np.expand_dims(self.inputs['X'].any(axis=self.attrs['dim']), axis=1) } def test_check_output(self): @@ -494,6 +519,7 @@ class TestAnyOpWithKeepDim(OpTest): class TestAny8DOpWithKeepDim(OpTest): + def setUp(self): self.op_type = "reduce_any" self.python_api = paddle.any @@ -503,8 +529,8 @@ class TestAny8DOpWithKeepDim(OpTest): } self.attrs = {'dim': (1, ), 'keep_dim': True} self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].any(axis=self.attrs['dim']), axis=1) + 'Out': + np.expand_dims(self.inputs['X'].any(axis=self.attrs['dim']), axis=1) } def test_check_output(self): @@ -512,18 +538,21 @@ class TestAny8DOpWithKeepDim(OpTest): class TestAnyOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of reduce_any_op must be Variable. input1 = 12 self.assertRaises(TypeError, fluid.layers.reduce_any, input1) # The input dtype of reduce_any_op must be bool. - input2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") + input2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.reduce_any, input2) class Test1DReduce(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random(120).astype("float64")} @@ -537,6 +566,7 @@ class Test1DReduce(OpTest): class Test2DReduce0(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': [0]} @@ -545,6 +575,7 @@ class Test2DReduce0(Test1DReduce): class Test2DReduce1(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': [1]} @@ -555,6 +586,7 @@ class Test2DReduce1(Test1DReduce): class Test3DReduce0(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': [1]} @@ -565,6 +597,7 @@ class Test3DReduce0(Test1DReduce): class Test3DReduce1(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': [2]} @@ -575,6 +608,7 @@ class Test3DReduce1(Test1DReduce): class Test3DReduce2(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': [-2]} @@ -585,6 +619,7 @@ class Test3DReduce2(Test1DReduce): class Test3DReduce3(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': [1, 2]} @@ -595,6 +630,7 @@ class Test3DReduce3(Test1DReduce): class Test8DReduce0(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.attrs = {'dim': (4, 2, 3)} @@ -607,17 +643,20 @@ class Test8DReduce0(Test1DReduce): class TestKeepDimReduce(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} self.attrs = {'dim': [1], 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } class TestKeepDim8DReduce(Test1DReduce): + def setUp(self): self.op_type = "reduce_sum" self.inputs = { @@ -625,8 +664,9 @@ class TestKeepDim8DReduce(Test1DReduce): } self.attrs = {'dim': (3, 4, 5), 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } @@ -669,6 +709,7 @@ class TestReduceMinOpMultiAxises(OpTest): class TestKeepDimReduceSumMultiAxises(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} @@ -686,13 +727,14 @@ class TestKeepDimReduceSumMultiAxises(OpTest): class TestReduceSumWithDimOne(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((100, 1, 1)).astype("float64")} self.attrs = {'dim': [1, 2], 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), - keepdims=True) + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=True) } def test_check_output(self): @@ -703,13 +745,14 @@ class TestReduceSumWithDimOne(OpTest): class TestReduceSumWithNumelOne(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((100, 1)).astype("float64")} self.attrs = {'dim': [1], 'keep_dim': False} self.outputs = { - 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), - keepdims=False) + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=False) } def test_check_output(self): @@ -720,6 +763,7 @@ class TestReduceSumWithNumelOne(OpTest): class TestReduceAll(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((100, 1, 1)).astype("float64")} @@ -734,6 +778,7 @@ class TestReduceAll(OpTest): class Test1DReduceWithAxes1(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random(100).astype("float64")} @@ -748,14 +793,17 @@ class Test1DReduceWithAxes1(OpTest): class TestReduceWithDtype(OpTest): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((6, 2, 10)).astype("float64")} self.outputs = {'Out': self.inputs['X'].sum().astype('float64')} self.attrs = {'reduce_all': True} self.attrs.update({ - 'in_dtype': int(convert_np_dtype_to_dtype_(np.float32)), - 'out_dtype': int(convert_np_dtype_to_dtype_(np.float64)) + 'in_dtype': + int(convert_np_dtype_to_dtype_(np.float32)), + 'out_dtype': + int(convert_np_dtype_to_dtype_(np.float64)) }) def test_check_output(self): @@ -766,35 +814,42 @@ class TestReduceWithDtype(OpTest): class TestReduceWithDtype1(TestReduceWithDtype): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((6, 2, 10)).astype("float64")} self.outputs = {'Out': self.inputs['X'].sum(axis=1)} self.attrs = {'dim': [1]} self.attrs.update({ - 'in_dtype': int(convert_np_dtype_to_dtype_(np.float32)), - 'out_dtype': int(convert_np_dtype_to_dtype_(np.float64)) + 'in_dtype': + int(convert_np_dtype_to_dtype_(np.float32)), + 'out_dtype': + int(convert_np_dtype_to_dtype_(np.float64)) }) class TestReduceWithDtype2(TestReduceWithDtype): + def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((6, 2, 10)).astype("float64")} self.outputs = {'Out': self.inputs['X'].sum(axis=1, keepdims=True)} self.attrs = {'dim': [1], 'keep_dim': True} self.attrs.update({ - 'in_dtype': int(convert_np_dtype_to_dtype_(np.float32)), - 'out_dtype': int(convert_np_dtype_to_dtype_(np.float64)) + 'in_dtype': + int(convert_np_dtype_to_dtype_(np.float32)), + 'out_dtype': + int(convert_np_dtype_to_dtype_(np.float64)) }) class TestReduceSumOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of reduce_sum_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.reduce_sum, x1) # The input dtype of reduce_sum_op must be float32 or float64 or int32 or int64. x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") @@ -802,6 +857,7 @@ class TestReduceSumOpError(unittest.TestCase): class API_TestSumOp(unittest.TestCase): + def run_static(self, shape, x_dtype, @@ -827,8 +883,9 @@ class API_TestSumOp(unittest.TestCase): fetch_list=[result_sum]) self.assertTrue( - np.allclose( - res, np.sum(input_data.astype(attr_dtype), axis=np_axis))) + np.allclose(res, + np.sum(input_data.astype(attr_dtype), + axis=np_axis))) def test_static(self): shape = [10, 10] @@ -859,8 +916,10 @@ class API_TestSumOp(unittest.TestCase): shape = [5, 5, 5] self.run_static(shape, "int32", (0, 1), attr_dtype="int32") - self.run_static( - shape, "int32", (), attr_dtype="int32", np_axis=(0, 1, 2)) + self.run_static(shape, + "int32", (), + attr_dtype="int32", + np_axis=(0, 1, 2)) def test_dygraph(self): np_x = np.random.random([2, 3, 4]).astype('int32') @@ -878,6 +937,7 @@ class API_TestSumOp(unittest.TestCase): class TestAllAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) paddle.enable_static() @@ -933,6 +993,7 @@ class TestAllAPI(unittest.TestCase): class TestAnyAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter.py b/python/paddle/fluid/tests/unittests/test_reducescatter.py index 7c355d46285..c340157c132 100644 --- a/python/paddle/fluid/tests/unittests/test_reducescatter.py +++ b/python/paddle/fluid/tests/unittests/test_reducescatter.py @@ -23,6 +23,7 @@ paddle.enable_static() class TestReduceScatterOp(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py index 5a494b5529e..b84943a0223 100644 --- a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py +++ b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestReduceScatterAPI(TestDistBase): + def _setup_config(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_registry.py b/python/paddle/fluid/tests/unittests/test_registry.py index 39cf64465ab..e9f847185fc 100644 --- a/python/paddle/fluid/tests/unittests/test_registry.py +++ b/python/paddle/fluid/tests/unittests/test_registry.py @@ -21,6 +21,7 @@ from decorator_helper import prog_scope class TestRegistry(unittest.TestCase): + @prog_scope() def test_registry_layer(self): x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py index 08a70fe1852..304e47da9a6 100644 --- a/python/paddle/fluid/tests/unittests/test_regularizer.py +++ b/python/paddle/fluid/tests/unittests/test_regularizer.py @@ -29,6 +29,7 @@ from paddle.fluid.backward import append_backward class TestL2DecayRegularizer(unittest.TestCase): + def test_l2decay_regularizer(self): paddle.enable_static() program = framework.Program() @@ -42,20 +43,28 @@ class TestL2DecayRegularizer(unittest.TestCase): self.assertTrue(mul_x.regularizer is not None) self.assertTrue( isinstance(mul_x.regularizer, regularizer.L2DecayRegularizer)) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) @@ -68,6 +77,7 @@ class TestL2DecayRegularizer(unittest.TestCase): class TestL1DecayRegularizer(unittest.TestCase): + def test_l2decay_regularizer(self): paddle.enable_static() program = framework.Program() @@ -81,20 +91,28 @@ class TestL1DecayRegularizer(unittest.TestCase): self.assertTrue(mul_x.regularizer is not None) self.assertTrue( isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer)) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) @@ -120,8 +138,9 @@ def bow_net(data, This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( - input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, + is_sparse=is_sparse, + size=[dict_dim, emb_dim]) bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") @@ -133,6 +152,7 @@ def bow_net(data, class TestRegularizer(unittest.TestCase): + def setUp(self): self.word_len = 1500 self.train_data = [[(random.sample(range(1000), 10), [0])] @@ -176,10 +196,12 @@ class TestRegularizer(unittest.TestCase): paddle.framework.random._manual_program_seed(1) main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() - with self.scope_prog_guard( - main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + with self.scope_prog_guard(main_prog=main_prog, + startup_prog=startup_prog): + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost = model(data, label, self.word_len) @@ -197,10 +219,12 @@ class TestRegularizer(unittest.TestCase): main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() - with self.scope_prog_guard( - main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + with self.scope_prog_guard(main_prog=main_prog, + startup_prog=startup_prog): + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost_l2 = model(data, label, self.word_len) @@ -231,10 +255,9 @@ class TestRegularizer(unittest.TestCase): assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1]) for i in range(len(dense_sparse_p_sum[0])): - assert np.isclose( - a=dense_sparse_p_sum[0][i], - b=dense_sparse_p_sum[1][i], - rtol=5e-5) + assert np.isclose(a=dense_sparse_p_sum[0][i], + b=dense_sparse_p_sum[1][i], + rtol=5e-5) def test_repeated_regularization(self): l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) @@ -252,10 +275,14 @@ class TestRegularizer(unittest.TestCase): paddle.seed(1) paddle.framework.random._manual_program_seed(1) - linear1 = fluid.dygraph.Linear( - 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) - linear2 = fluid.dygraph.Linear( - 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + linear1 = fluid.dygraph.Linear(2, + 2, + param_attr=fc_param_attr, + bias_attr=fc_param_attr) + linear2 = fluid.dygraph.Linear(2, + 2, + param_attr=fc_param_attr, + bias_attr=fc_param_attr) loss1 = linear1(input) loss1.backward() diff --git a/python/paddle/fluid/tests/unittests/test_regularizer_api.py b/python/paddle/fluid/tests/unittests/test_regularizer_api.py index afa2441aac2..da2643cc647 100644 --- a/python/paddle/fluid/tests/unittests/test_regularizer_api.py +++ b/python/paddle/fluid/tests/unittests/test_regularizer_api.py @@ -41,8 +41,9 @@ def bow_net(data, This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( - input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, + is_sparse=is_sparse, + size=[dict_dim, emb_dim]) bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") @@ -55,6 +56,7 @@ def bow_net(data, class TestRegularizer(unittest.TestCase): + def setUp(self): self.word_len = 1500 self.train_data = [[(random.sample(range(1000), 10), [0])] @@ -98,10 +100,12 @@ class TestRegularizer(unittest.TestCase): paddle.framework.random._manual_program_seed(1) main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() - with self.scope_prog_guard( - main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + with self.scope_prog_guard(main_prog=main_prog, + startup_prog=startup_prog): + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost = model(data, label, self.word_len) @@ -119,10 +123,12 @@ class TestRegularizer(unittest.TestCase): main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() - with self.scope_prog_guard( - main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + with self.scope_prog_guard(main_prog=main_prog, + startup_prog=startup_prog): + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost_l2 = model(data, label, self.word_len) @@ -154,10 +160,9 @@ class TestRegularizer(unittest.TestCase): assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1]) for i in range(len(dense_sparse_p_sum[0])): - assert np.isclose( - a=dense_sparse_p_sum[0][i], - b=dense_sparse_p_sum[1][i], - rtol=5e-5) + assert np.isclose(a=dense_sparse_p_sum[0][i], + b=dense_sparse_p_sum[1][i], + rtol=5e-5) def test_repeated_regularization(self): paddle.enable_static() @@ -176,10 +181,14 @@ class TestRegularizer(unittest.TestCase): paddle.seed(1) paddle.framework.random._manual_program_seed(1) - linear1 = fluid.dygraph.Linear( - 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) - linear2 = fluid.dygraph.Linear( - 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + linear1 = fluid.dygraph.Linear(2, + 2, + param_attr=fc_param_attr, + bias_attr=fc_param_attr) + linear2 = fluid.dygraph.Linear(2, + 2, + param_attr=fc_param_attr, + bias_attr=fc_param_attr) loss1 = linear1(input) loss1.backward() diff --git a/python/paddle/fluid/tests/unittests/test_renorm_op.py b/python/paddle/fluid/tests/unittests/test_renorm_op.py index e00a892cf71..e266800319d 100644 --- a/python/paddle/fluid/tests/unittests/test_renorm_op.py +++ b/python/paddle/fluid/tests/unittests/test_renorm_op.py @@ -25,9 +25,10 @@ paddle.set_device('cpu') class TestRenormAPI(unittest.TestCase): + def input_data(self): - self.data_x = np.array( - [[[2.0, 2, -2], [3, 0.3, 3]], [[2, -8, 2], [3.1, 3.7, 3]]]) + self.data_x = np.array([[[2.0, 2, -2], [3, 0.3, 3]], + [[2, -8, 2], [3.1, 3.7, 3]]]) self.p = 1.0 self.dim = 2 self.max_norm = 2.05 @@ -65,9 +66,10 @@ class TestRenormAPI(unittest.TestCase): self.assertTrue(np.allclose(expected, np.array(y))) z = paddle.mean(y) z.backward(retain_graph=True) - expected_grad = np.array( - [[[0, 0.01394558, 0.02733333], [0, 0.01394558, 0.00683333]], - [[0, 0.01045918, 0.00683333], [0, 0.01394558, 0.00683333]]]) + expected_grad = np.array([[[0, 0.01394558, 0.02733333], + [0, 0.01394558, 0.00683333]], + [[0, 0.01045918, 0.00683333], + [0, 0.01394558, 0.00683333]]]) self.assertTrue(np.allclose(expected_grad, np.array(x.grad))) #test exception: with fluid.dygraph.guard(): diff --git a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py index d67b8088265..1f6fb37e1e0 100644 --- a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py @@ -42,14 +42,14 @@ class TestReorderLoDTensor(unittest.TestCase): @classmethod def set_program(cls): - dat = fluid.layers.data( - name=cls.data_desc[0][0], shape=cls.data_desc[0][1]) + dat = fluid.layers.data(name=cls.data_desc[0][0], + shape=cls.data_desc[0][1]) dat.stop_gradient = False - rank_dat = fluid.layers.data( - name=cls.data_desc[1][0], shape=cls.data_desc[1][1]) + rank_dat = fluid.layers.data(name=cls.data_desc[1][0], + shape=cls.data_desc[1][1]) table = lod_rank_table(rank_dat) - new_dat = fluid.layers.reorder_lod_tensor_by_rank( - x=dat, rank_table=table) + new_dat = fluid.layers.reorder_lod_tensor_by_rank(x=dat, + rank_table=table) loss = fluid.layers.reduce_sum(new_dat) fluid.backward.append_backward(loss=loss) cls.fetch_list = [new_dat, cls.data_desc[0][0] + '@GRAD'] @@ -86,8 +86,8 @@ class TestReorderLoDTensor(unittest.TestCase): size=self.num_seq if i == 0 else sum(lod_level_i)).tolist() data_lod.append(lod_level_i) data_value = numpy.random.random( - size=[sum(data_lod[-1]) if data_lod else self.num_seq - ] + data_shape).astype('float32') + size=[sum(data_lod[-1]) if data_lod else self.num_seq] + + data_shape).astype('float32') self.data[data_name] = (data_value, data_lod) def set_inputs(self, place): @@ -106,8 +106,8 @@ class TestReorderLoDTensor(unittest.TestCase): rank_table = [] # list of (index, length) for i in range(len(ref_lod)): rank_table.append((i, ref_lod[i])) - rank_table = sorted( - rank_table, key=functools.cmp_to_key(lambda x, y: y[1] - x[1])) + rank_table = sorted(rank_table, + key=functools.cmp_to_key(lambda x, y: y[1] - x[1])) # compute the input sequence info according to input_lod input_value, input_lod = self.data[self.data_desc[0][0]] @@ -122,8 +122,8 @@ class TestReorderLoDTensor(unittest.TestCase): for lod_level_i in offset_lod[level:]: sub_lod_i = [] for idx in range(start_idx, end_idx): - sub_lod_i.append(lod_level_i[idx + 1] - lod_level_i[ - idx]) + sub_lod_i.append(lod_level_i[idx + 1] - + lod_level_i[idx]) sub_lod.append(sub_lod_i) start_idx = lod_level_i[start_idx] end_idx = lod_level_i[end_idx] @@ -158,8 +158,9 @@ class TestReorderLoDTensor(unittest.TestCase): expect_output, expect_output_lod = self.reorder() for actual_output in self.actual_outputs: self.assertTrue( - numpy.allclose( - numpy.array(actual_output), expect_output, atol=0.001)) + numpy.allclose(numpy.array(actual_output), + expect_output, + atol=0.001)) self.assertEqual(expect_output_lod, actual_output.recursive_sequence_lengths()) # check gradient @@ -167,8 +168,9 @@ class TestReorderLoDTensor(unittest.TestCase): expect_grad_lod = self.data[self.data_desc[0][0]][1] for actual_grad in self.actual_grads: self.assertTrue( - numpy.allclose( - numpy.array(actual_grad), expect_grad, atol=0.001)) + numpy.allclose(numpy.array(actual_grad), + expect_grad, + atol=0.001)) self.assertEqual(expect_grad_lod, actual_grad.recursive_sequence_lengths()) @@ -180,8 +182,9 @@ class TestReorderLoDTensor(unittest.TestCase): expect_output, expect_output_lod = self.reorder() for actual_output in self.actual_outputs: self.assertTrue( - numpy.allclose( - numpy.array(actual_output), expect_output, atol=0.001)) + numpy.allclose(numpy.array(actual_output), + expect_output, + atol=0.001)) self.assertEqual(expect_output_lod, actual_output.recursive_sequence_lengths()) # check gradient @@ -189,8 +192,9 @@ class TestReorderLoDTensor(unittest.TestCase): expect_grad_lod = self.data[self.data_desc[0][0]][1] for actual_grad in self.actual_grads: self.assertTrue( - numpy.allclose( - numpy.array(actual_grad), expect_grad, atol=0.001)) + numpy.allclose(numpy.array(actual_grad), + expect_grad, + atol=0.001)) self.assertEqual(expect_grad_lod, actual_grad.recursive_sequence_lengths()) @@ -206,19 +210,21 @@ class TestReorderLoDTensor(unittest.TestCase): self.run_program() for actual_output in self.actual_outputs: self.assertTrue( - numpy.allclose( - numpy.array(actual_output), expect_output, atol=0.001)) + numpy.allclose(numpy.array(actual_output), + expect_output, + atol=0.001)) class TestReorderLoDTensorError(unittest.TestCase): + def test_errors(self): with program_guard(Program()): def test_Variable(): # The input must be Variable. x1 = numpy.array([0.9383, 0.1983, 3.2, 1.2]).astype("float64") - table1 = numpy.array( - [0.9383, 0.1983, 3.2, 1.2]).astype("float64") + table1 = numpy.array([0.9383, 0.1983, 3.2, + 1.2]).astype("float64") new_dat = fluid.layers.reorder_lod_tensor_by_rank( x=x1, rank_table=table1) @@ -226,8 +232,9 @@ class TestReorderLoDTensorError(unittest.TestCase): def test_type(): x2 = fluid.layers.data(name='x1', shape=[4], dtype='float32') - table2 = fluid.layers.data( - name='table2', shape=[4], dtype='int32') + table2 = fluid.layers.data(name='table2', + shape=[4], + dtype='int32') new_dat2 = fluid.layers.reorder_lod_tensor_by_rank( x=x2, rank_table=table2) diff --git a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py index b047b0c53d8..7abc758617c 100644 --- a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py +++ b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class TestRepeatInterleaveOp(OpTest): + def setUp(self): self.op_type = "repeat_interleave" self.init_dtype_type() @@ -63,6 +64,7 @@ class TestRepeatInterleaveOp(OpTest): class TestRepeatInterleaveOp2(OpTest): + def setUp(self): self.op_type = "repeat_interleave" self.init_dtype_type() @@ -100,6 +102,7 @@ class TestRepeatInterleaveOp2(OpTest): class TestIndexSelectAPI(unittest.TestCase): + def input_data(self): self.data_x = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]]) @@ -112,15 +115,16 @@ class TestIndexSelectAPI(unittest.TestCase): # case 1: with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 4]) - index = fluid.layers.data( - name='repeats', - shape=[4], - dtype='int32', - append_batch_size=False) + index = fluid.layers.data(name='repeats', + shape=[4], + dtype='int32', + append_batch_size=False) z = paddle.repeat_interleave(x, index, axis=1) exe = fluid.Executor(fluid.CPUPlace()) - res, = exe.run(feed={'x': self.data_x, - 'repeats': self.data_index}, + res, = exe.run(feed={ + 'x': self.data_x, + 'repeats': self.data_index + }, fetch_list=[z.name], return_numpy=False) expect_out = np.repeat(self.data_x, self.data_index, axis=1) @@ -130,11 +134,10 @@ class TestIndexSelectAPI(unittest.TestCase): repeats = np.array([1, 2, 1]).astype('int32') with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 4]) - index = fluid.layers.data( - name='repeats', - shape=[3], - dtype='int32', - append_batch_size=False) + index = fluid.layers.data(name='repeats', + shape=[3], + dtype='int32', + append_batch_size=False) z = paddle.repeat_interleave(x, index, axis=0) exe = fluid.Executor(fluid.CPUPlace()) res, = exe.run(feed={ diff --git a/python/paddle/fluid/tests/unittests/test_require_version.py b/python/paddle/fluid/tests/unittests/test_require_version.py index d1cb0aa4d81..8e9ea51c733 100644 --- a/python/paddle/fluid/tests/unittests/test_require_version.py +++ b/python/paddle/fluid/tests/unittests/test_require_version.py @@ -22,6 +22,7 @@ import paddle class VersionTest(unittest.TestCase): + def test_check_output(self): warnings.warn( "paddle.__version__: %s, fluid_version.full_version: %s, fluid_version.major: %s, fluid_version.minor: %s, fluid_version.patch: %s, fluid_version.rc: %s." @@ -67,6 +68,7 @@ class VersionTest(unittest.TestCase): # Test Errors class TestErrors(unittest.TestCase): + def test_errors(self): # The type of params must be str. def test_input_type(): diff --git a/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py b/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py index 84e22024f76..839b0e331a8 100644 --- a/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py +++ b/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,11 +18,13 @@ from paddle import _C_ops from paddle.fluid import framework from paddle.fluid.framework import _test_eager_guard import unittest + paddle.set_device('cpu') # Test 1 def clear_grad_test_0(w, a): + @paddle.no_grad() def warp(*_): assert w.grad is not None @@ -33,6 +35,7 @@ def clear_grad_test_0(w, a): class TestInplaceAndClearGradient(unittest.TestCase): + def func_test(self): input_data = np.ones([1, 1]) w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) @@ -54,12 +57,14 @@ class TestInplaceAndClearGradient(unittest.TestCase): # Test 2 class Counter: + def __init__(self): self.num_calls = 0 self.step = 0 def clear_grad_test_1(w, c): + @paddle.no_grad() def warp(*_): assert w.grad is not None @@ -73,6 +78,7 @@ def clear_grad_test_1(w, c): class TestInplaceClearGradAccumulation(unittest.TestCase): + def func_test(self): input_data = np.ones([1, 1]) w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) @@ -100,6 +106,7 @@ class TestInplaceClearGradAccumulation(unittest.TestCase): class TestInplaceClearGradAccumulationAlt(unittest.TestCase): + def func_test(self): input_data = np.ones([1, 1]) w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) diff --git a/python/paddle/fluid/tests/unittests/test_reshape_op.py b/python/paddle/fluid/tests/unittests/test_reshape_op.py index 40481b09782..d4d89177653 100755 --- a/python/paddle/fluid/tests/unittests/test_reshape_op.py +++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py @@ -27,6 +27,7 @@ import paddle.fluid.core as core # situation 1: have shape( list, no tensor), no actual shape(Tensor) class TestReshapeOp(OpTest): + def setUp(self): self.init_data() self.op_type = "reshape2" @@ -50,6 +51,7 @@ class TestReshapeOp(OpTest): class TestReshapeBF16Op(OpTest): + def setUp(self): self.init_data() self.op_type = "reshape2" @@ -59,8 +61,10 @@ class TestReshapeBF16Op(OpTest): self.inputs = {"X": convert_float_to_uint16(x)} self.attrs = {"shape": self.new_shape} self.outputs = { - "Out": convert_float_to_uint16(out), - 'XShape': convert_float_to_uint16( + "Out": + convert_float_to_uint16(out), + 'XShape': + convert_float_to_uint16( np.random.random(self.ori_shape).astype("float32")) } @@ -77,6 +81,7 @@ class TestReshapeBF16Op(OpTest): class TestReshapeOpDimInfer1(TestReshapeOp): + def init_data(self): self.ori_shape = (5, 25) self.new_shape = (5, -1, 5) @@ -84,6 +89,7 @@ class TestReshapeOpDimInfer1(TestReshapeOp): class TestReshapeOpDimInfer2(TestReshapeOp): + def init_data(self): self.ori_shape = (10, 2, 6) self.new_shape = (10, 0, 3, -1) @@ -92,14 +98,14 @@ class TestReshapeOpDimInfer2(TestReshapeOp): # situation 2: have shape(list, no tensor), have actual shape(Tensor) class TestReshapeOpWithInputShape(OpTest): + def setUp(self): self.init_data() self.op_type = "reshape2" self.inputs = { "X": np.random.random(self.ori_shape).astype("float32"), - "Shape": np.array( - self.actual_shape, dtype="int32") + "Shape": np.array(self.actual_shape, dtype="int32") } self.attrs = {"shape": self.new_shape} self.outputs = { @@ -121,6 +127,7 @@ class TestReshapeOpWithInputShape(OpTest): # Situation 3: have shape(list, have tensor), no actual shape(Tensor) class TestReshapeOp_attr_ShapeTensor(OpTest): + def setUp(self): self.init_data() self.op_type = "reshape2" @@ -154,6 +161,7 @@ class TestReshapeOp_attr_ShapeTensor(OpTest): class TestReshapeOpDimInfer1_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor): + def init_data(self): self.ori_shape = (5, 20) self.new_shape = (5, -1, 20) @@ -162,6 +170,7 @@ class TestReshapeOpDimInfer1_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor): class TestReshapeOpDimInfer2_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor): + def init_data(self): self.ori_shape = (10, 2, 6) self.new_shape = (10, 0, 3, -1) @@ -171,14 +180,14 @@ class TestReshapeOpDimInfer2_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor): # Situation 4: have shape(Tensor), no actual shape(Tensor) class TestReshapeOp_attr_OnlyShape(OpTest): + def setUp(self): self.init_data() self.op_type = "reshape2" self.inputs = { "X": np.random.random(self.ori_shape).astype("float32"), - "Shape": np.array( - self.new_shape, dtype="int32") + "Shape": np.array(self.new_shape, dtype="int32") } self.attrs = {} self.outputs = { @@ -199,6 +208,7 @@ class TestReshapeOp_attr_OnlyShape(OpTest): class TestReshapeOpDimInfer1_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): + def init_data(self): self.ori_shape = (5, 20) self.new_shape = (5, -1, 10) @@ -207,6 +217,7 @@ class TestReshapeOpDimInfer1_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): + def init_data(self): self.ori_shape = (10, 2, 6) self.new_shape = (10, 0, 3, -1) @@ -216,6 +227,7 @@ class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): # test int8 data type on CPU class TestReshapeInt8Op(OpTest): + def setUp(self): self.init_dtype() self.init_data() @@ -242,8 +254,9 @@ class TestReshapeInt8Op(OpTest): self.infered_shape = (10, 2, 3, -1) def test_check_output(self): - self.check_output_with_place( - fluid.core.CPUPlace(), atol=1e-5, no_check_set=['XShape']) + self.check_output_with_place(fluid.core.CPUPlace(), + atol=1e-5, + no_check_set=['XShape']) def test_check_grad(self): pass @@ -251,17 +264,18 @@ class TestReshapeInt8Op(OpTest): # test unt8 data type on CPU class TestReshapeUint8Op(TestReshapeInt8Op): + def init_dtype(self): self.dtype = np.uint8 class TestReshapeOpBool(TestReshapeOp): + def setUp(self): self.init_data() self.op_type = "reshape2" self.inputs = { - "X": np.random.choice( - [True, False], size=self.ori_shape) + "X": np.random.choice([True, False], size=self.ori_shape) } self.attrs = {"shape": self.new_shape} self.outputs = { @@ -275,6 +289,7 @@ class TestReshapeOpBool(TestReshapeOp): # Test python API class TestReshapeAPI(unittest.TestCase): + def _set_paddle_api(self): self.fill_constant = paddle.fluid.layers.fill_constant self.data = paddle.static.data @@ -304,8 +319,9 @@ class TestReshapeAPI(unittest.TestCase): out_1 = self.reshape(x, shape) # situation 2: have shape(list, no tensor), have actual shape(Tensor) - out_2 = fluid.layers.reshape( - x, shape=shape, actual_shape=actual_shape) + out_2 = fluid.layers.reshape(x, + shape=shape, + actual_shape=actual_shape) # Situation 3: have shape(list, have tensor), no actual shape(Tensor) out_3 = self.reshape(x, shape=[positive_five, 10]) @@ -316,8 +332,10 @@ class TestReshapeAPI(unittest.TestCase): exe = paddle.static.Executor(place=paddle.CPUPlace()) res_1, res_2, res_3, res_4 = exe.run( main_prog, - feed={"x": input, - "shape": np.array([2, 5, 5]).astype("int32")}, + feed={ + "x": input, + "shape": np.array([2, 5, 5]).astype("int32") + }, fetch_list=[out_1, out_2, out_3, out_4]) assert np.array_equal(res_1, input.reshape(shape)) @@ -354,6 +372,7 @@ class TestReshapeAPI(unittest.TestCase): class TestStaticReshape_(TestReshapeAPI): + def _executed_api(self): self.reshape = paddle.reshape_ @@ -379,6 +398,7 @@ class TestStaticReshape_(TestReshapeAPI): # Test Input Error class TestReshapeOpError(unittest.TestCase): + def _set_paddle_api(self): self.data = paddle.static.data self.reshape = paddle.reshape @@ -391,8 +411,8 @@ class TestReshapeOpError(unittest.TestCase): with program_guard(Program(), Program()): # The x type of reshape_op must be Variable. def test_x_type(): - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], paddle.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + paddle.CPUPlace()) self.reshape(x1, shape=[1]) self.assertRaises(TypeError, test_x_type) @@ -405,8 +425,9 @@ class TestReshapeOpError(unittest.TestCase): self.assertRaises(TypeError, test_x_dtype) def test_x_dtype_float16(): - x_float16 = self.data( - name="x_float16", shape=[2, 25], dtype="float16") + x_float16 = self.data(name="x_float16", + shape=[2, 25], + dtype="float16") self.reshape(x_float16, shape=[2, 5, 5]) test_x_dtype_float16() @@ -453,6 +474,7 @@ class TestReshapeOpError(unittest.TestCase): class TestDygraphReshapeAPI(unittest.TestCase): + def setUp(self): self.executed_api() @@ -488,14 +510,16 @@ class TestDygraphReshapeAPI(unittest.TestCase): class TestDygraphReshapeInplaceAPI(TestDygraphReshapeAPI): + def executed_api(self): self.reshape = paddle.reshape_ class TestReshapeZeroTensor(unittest.TestCase): + def test_reshape_zero_tensor_success(self): zero_tensor = paddle.zeros([0, 2, 3]) - # since we use "0" as the dimension copy semantically in reshape, + # since we use "0" as the dimension copy semantically in reshape, # we need to copy the 0 dim in the src tensor in order to make a successful zero tensor reshape zero_tensor = zero_tensor.reshape([0, 6]) self.assertTrue(list(zero_tensor.shape) == [0, 6]) diff --git a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py index 2f6ca1dfa0c..829960250d0 100644 --- a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py @@ -21,8 +21,8 @@ import unittest paddle.enable_static() -logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', + level=logging.INFO) logger = logging.getLogger(__name__) @@ -38,6 +38,7 @@ def set_cinn_flag(val): @unittest.skipIf(not set_cinn_flag(True), "Paddle is not compiled with CINN.") class TestResnet50Accuracy(unittest.TestCase): + def reader(self, limit): for _ in range(limit): yield {'image': np.random.randint(0, 256, size=[32, 3, 224, 224]).astype('float32'), \ @@ -52,8 +53,9 @@ class TestResnet50Accuracy(unittest.TestCase): def build_program(self, main_program, startup_program): with paddle.static.program_guard(main_program, startup_program): - image = paddle.static.data( - name='image', shape=[32, 3, 224, 224], dtype='float32') + image = paddle.static.data(name='image', + shape=[32, 3, 224, 224], + dtype='float32') label = paddle.static.data(name='label', shape=[32], dtype='int64') # TODO: stop_gradient slower training speed, need fix @@ -62,8 +64,8 @@ class TestResnet50Accuracy(unittest.TestCase): model = paddle.vision.models.resnet50() prediction = model(image) - loss = paddle.nn.functional.cross_entropy( - input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy(input=prediction, + label=label) loss = paddle.mean(loss) adam = paddle.optimizer.Adam(learning_rate=0.001) adam.minimize(loss) @@ -98,8 +100,8 @@ class TestResnet50Accuracy(unittest.TestCase): return loss_vals def test_check_resnet50_accuracy(self): - place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda( - ) else paddle.CPUPlace() + place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() loop_num = 10 feed = self.generate_random_data(loop_num) diff --git a/python/paddle/fluid/tests/unittests/test_retain_graph.py b/python/paddle/fluid/tests/unittests/test_retain_graph.py index 0259b898a48..71998f57e5f 100644 --- a/python/paddle/fluid/tests/unittests/test_retain_graph.py +++ b/python/paddle/fluid/tests/unittests/test_retain_graph.py @@ -24,6 +24,7 @@ paddle.seed(SEED) class Generator(fluid.dygraph.Layer): + def __init__(self): super(Generator, self).__init__() self.conv1 = paddle.nn.Conv2D(3, 3, 3, padding=1) @@ -35,6 +36,7 @@ class Generator(fluid.dygraph.Layer): class Discriminator(fluid.dygraph.Layer): + def __init__(self): super(Discriminator, self).__init__() self.convd = paddle.nn.Conv2D(6, 3, 1) @@ -45,6 +47,7 @@ class Discriminator(fluid.dygraph.Layer): class TestRetainGraph(unittest.TestCase): + def cal_gradient_penalty(self, netD, real_data, @@ -73,21 +76,21 @@ class TestRetainGraph(unittest.TestCase): fake_AB = paddle.concat((real_data.detach(), interpolatesv), 1) disc_interpolates = netD(fake_AB) - outs = paddle.fluid.layers.fill_constant( - disc_interpolates.shape, disc_interpolates.dtype, 1.0) - gradients = paddle.grad( - outputs=disc_interpolates, - inputs=fake_AB, - grad_outputs=outs, - create_graph=True, - retain_graph=True, - only_inputs=True) + outs = paddle.fluid.layers.fill_constant(disc_interpolates.shape, + disc_interpolates.dtype, + 1.0) + gradients = paddle.grad(outputs=disc_interpolates, + inputs=fake_AB, + grad_outputs=outs, + create_graph=True, + retain_graph=True, + only_inputs=True) gradients = paddle.reshape(gradients[0], [real_data.shape[0], -1]) - gradient_penalty = paddle.mean((paddle.norm(gradients + 1e-16, 2, 1) - - constant)** - 2) * lambda_gp # added eps + gradient_penalty = paddle.mean( + (paddle.norm(gradients + 1e-16, 2, 1) - constant)** + 2) * lambda_gp # added eps return gradient_penalty, gradients else: return 0.0, None @@ -113,11 +116,13 @@ class TestRetainGraph(unittest.TestCase): fake_AB = paddle.concat((realA, fakeB), 1) G_pred_fake = d(fake_AB.detach()) - false_target = paddle.fluid.layers.fill_constant(G_pred_fake.shape, - 'float32', 0.0) + false_target = paddle.fluid.layers.fill_constant( + G_pred_fake.shape, 'float32', 0.0) - G_gradient_penalty, _ = self.cal_gradient_penalty( - d, realA, fakeB, lambda_gp=10.0) + G_gradient_penalty, _ = self.cal_gradient_penalty(d, + realA, + fakeB, + lambda_gp=10.0) loss_d = gan_criterion(G_pred_fake, false_target) + G_gradient_penalty loss_d.backward(retain_graph=need_retain) @@ -128,8 +133,8 @@ class TestRetainGraph(unittest.TestCase): G_pred_fake = d(fake_AB) true_target = paddle.fluid.layers.fill_constant(G_pred_fake.shape, 'float32', 1.0) - loss_g = l1_criterion(fakeB, realB) + gan_criterion(G_pred_fake, - true_target) + loss_g = l1_criterion(fakeB, realB) + gan_criterion( + G_pred_fake, true_target) loss_g.backward() optim_g.minimize(loss_g) diff --git a/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py b/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py index 1bfc1b00aa8..4353c27278f 100644 --- a/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py +++ b/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py @@ -45,8 +45,9 @@ def multiclass_nms(prediction, class_num, keep_top_k, nms_threshold): for idx in indices: score_index.append((prediction[c][idx][4], c, idx)) - sorted_score_index = sorted( - score_index, key=lambda tup: tup[0], reverse=True) + sorted_score_index = sorted(score_index, + key=lambda tup: tup[0], + reverse=True) if keep_top_k > -1 and num_det > keep_top_k: sorted_score_index = sorted_score_index[:keep_top_k] num_det = keep_top_k @@ -103,10 +104,10 @@ def retinanet_detection_out(boxes_list, scores_list, anchors_list, im_info, box_offset] * anchor_box_width + anchor_box_center_x target_box_center_y = bboxes_per_level[ box_offset + 1] * anchor_box_height + anchor_box_center_y - target_box_width = math.exp(bboxes_per_level[box_offset + - 2]) * anchor_box_width - target_box_height = math.exp(bboxes_per_level[ - box_offset + 3]) * anchor_box_height + target_box_width = math.exp( + bboxes_per_level[box_offset + 2]) * anchor_box_width + target_box_height = math.exp( + bboxes_per_level[box_offset + 3]) * anchor_box_height pred_box_xmin = target_box_center_x - target_box_width / 2 pred_box_ymin = target_box_center_y - target_box_height / 2 @@ -119,13 +120,17 @@ def retinanet_detection_out(boxes_list, scores_list, anchors_list, im_info, pred_box_ymax = pred_box_ymax / im_scale pred_box_xmin = max( - min(pred_box_xmin, np.round(im_width / im_scale) - 1), 0.) + min(pred_box_xmin, + np.round(im_width / im_scale) - 1), 0.) pred_box_ymin = max( - min(pred_box_ymin, np.round(im_height / im_scale) - 1), 0.) + min(pred_box_ymin, + np.round(im_height / im_scale) - 1), 0.) pred_box_xmax = max( - min(pred_box_xmax, np.round(im_width / im_scale) - 1), 0.) + min(pred_box_xmax, + np.round(im_width / im_scale) - 1), 0.) pred_box_ymax = max( - min(pred_box_ymax, np.round(im_height / im_scale) - 1), 0.) + min(pred_box_ymax, + np.round(im_height / im_scale) - 1), 0.) if c not in prediction.keys(): prediction[c] = [] @@ -167,6 +172,7 @@ def batched_retinanet_detection_out(boxes, scores, anchors, im_info, class TestRetinanetDetectionOutOp1(OpTest): + def set_argument(self): self.score_threshold = 0.05 self.min_level = 3 @@ -251,11 +257,14 @@ class TestRetinanetDetectionOutOp1(OpTest): 'Scores': [('s0', self.scores_list[0]), ('s1', self.scores_list[1]), ('s2', self.scores_list[2]), ('s3', self.scores_list[3]), ('s4', self.scores_list[4])], - 'Anchors': - [('a0', self.anchors_list[0]), ('a1', self.anchors_list[1]), - ('a2', self.anchors_list[2]), ('a3', self.anchors_list[3]), - ('a4', self.anchors_list[4])], - 'ImInfo': (self.im_info, [[1, ]]) + 'Anchors': [('a0', self.anchors_list[0]), + ('a1', self.anchors_list[1]), + ('a2', self.anchors_list[2]), + ('a3', self.anchors_list[3]), + ('a4', self.anchors_list[4])], + 'ImInfo': (self.im_info, [[ + 1, + ]]) } self.outputs = {'Out': (nmsed_outs, [lod])} self.attrs = { @@ -271,6 +280,7 @@ class TestRetinanetDetectionOutOp1(OpTest): class TestRetinanetDetectionOutOp2(OpTest): + def set_argument(self): self.score_threshold = 0.05 self.min_level = 3 @@ -295,6 +305,7 @@ class TestRetinanetDetectionOutOp2(OpTest): class TestRetinanetDetectionOutOpNo3(TestRetinanetDetectionOutOp1): + def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 @@ -324,6 +335,7 @@ class TestRetinanetDetectionOutOpNo3(TestRetinanetDetectionOutOp1): class TestRetinanetDetectionOutOpNo4(TestRetinanetDetectionOutOp1): + def set_argument(self): self.score_threshold = 0.05 self.min_level = 2 @@ -360,16 +372,19 @@ class TestRetinanetDetectionOutOpNo4(TestRetinanetDetectionOutOp1): nmsed_outs = np.array(nmsed_outs).astype('float32') self.op_type = 'retinanet_detection_output' self.inputs = { - 'BBoxes': - [('b0', self.bboxes_list[0]), ('b1', self.bboxes_list[1]), - ('b2', self.bboxes_list[2]), ('b3', self.bboxes_list[3])], + 'BBoxes': [('b0', self.bboxes_list[0]), ('b1', self.bboxes_list[1]), + ('b2', self.bboxes_list[2]), + ('b3', self.bboxes_list[3])], 'Scores': [('s0', self.scores_list[0]), ('s1', self.scores_list[1]), ('s2', self.scores_list[2]), ('s3', self.scores_list[3])], - 'Anchors': - [('a0', self.anchors_list[0]), ('a1', self.anchors_list[1]), - ('a2', self.anchors_list[2]), ('a3', self.anchors_list[3])], - 'ImInfo': (self.im_info, [[1, ]]) + 'Anchors': [('a0', self.anchors_list[0]), + ('a1', self.anchors_list[1]), + ('a2', self.anchors_list[2]), + ('a3', self.anchors_list[3])], + 'ImInfo': (self.im_info, [[ + 1, + ]]) } self.outputs = {'Out': (nmsed_outs, [lod])} self.attrs = { @@ -385,6 +400,7 @@ class TestRetinanetDetectionOutOpNo4(TestRetinanetDetectionOutOp1): class TestRetinanetDetectionOutOpNo5(TestRetinanetDetectionOutOp1): + def set_argument(self): self.score_threshold = 0.05 self.min_level = 3 @@ -412,24 +428,32 @@ class TestRetinanetDetectionOutOpNo5(TestRetinanetDetectionOutOp1): class TestRetinanetDetectionOutOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - bboxes_low1 = fluid.data( - name='bboxes_low1', shape=[1, 44, 4], dtype='float32') - bboxes_high1 = fluid.data( - name='bboxes_high1', shape=[1, 11, 4], dtype='float32') - scores_low1 = fluid.data( - name='scores_low1', shape=[1, 44, 10], dtype='float32') - scores_high1 = fluid.data( - name='scores_high1', shape=[1, 11, 10], dtype='float32') - anchors_low1 = fluid.data( - name='anchors_low1', shape=[44, 4], dtype='float32') - anchors_high1 = fluid.data( - name='anchors_high1', shape=[11, 4], dtype='float32') - im_info1 = fluid.data( - name="im_info1", shape=[1, 3], dtype='float32') - - # The `bboxes` must be list, each element must be Variable and + bboxes_low1 = fluid.data(name='bboxes_low1', + shape=[1, 44, 4], + dtype='float32') + bboxes_high1 = fluid.data(name='bboxes_high1', + shape=[1, 11, 4], + dtype='float32') + scores_low1 = fluid.data(name='scores_low1', + shape=[1, 44, 10], + dtype='float32') + scores_high1 = fluid.data(name='scores_high1', + shape=[1, 11, 10], + dtype='float32') + anchors_low1 = fluid.data(name='anchors_low1', + shape=[44, 4], + dtype='float32') + anchors_high1 = fluid.data(name='anchors_high1', + shape=[11, 4], + dtype='float32') + im_info1 = fluid.data(name="im_info1", + shape=[1, 3], + dtype='float32') + + # The `bboxes` must be list, each element must be Variable and # its Tensor data type must be one of float32 and float64. def test_bboxes_type(): fluid.layers.retinanet_detection_output( @@ -441,8 +465,9 @@ class TestRetinanetDetectionOutOpError(unittest.TestCase): self.assertRaises(TypeError, test_bboxes_type) def test_bboxes_tensor_dtype(): - bboxes_high2 = fluid.data( - name='bboxes_high2', shape=[1, 11, 4], dtype='int32') + bboxes_high2 = fluid.data(name='bboxes_high2', + shape=[1, 11, 4], + dtype='int32') fluid.layers.retinanet_detection_output( bboxes=[bboxes_high2, 5], scores=[scores_low1, scores_high1], @@ -463,8 +488,9 @@ class TestRetinanetDetectionOutOpError(unittest.TestCase): self.assertRaises(TypeError, test_scores_type) def test_scores_tensor_dtype(): - scores_high2 = fluid.data( - name='scores_high2', shape=[1, 11, 10], dtype='int32') + scores_high2 = fluid.data(name='scores_high2', + shape=[1, 11, 10], + dtype='int32') fluid.layers.retinanet_detection_output( bboxes=[bboxes_low1, bboxes_high1], scores=[scores_high2, 5], @@ -485,8 +511,9 @@ class TestRetinanetDetectionOutOpError(unittest.TestCase): self.assertRaises(TypeError, test_anchors_type) def test_anchors_tensor_dtype(): - anchors_high2 = fluid.data( - name='anchors_high2', shape=[11, 4], dtype='int32') + anchors_high2 = fluid.data(name='anchors_high2', + shape=[11, 4], + dtype='int32') fluid.layers.retinanet_detection_output( bboxes=[bboxes_low1, bboxes_high1], scores=[scores_low1, scores_high1], @@ -507,8 +534,9 @@ class TestRetinanetDetectionOutOpError(unittest.TestCase): self.assertRaises(TypeError, test_iminfo_type) def test_iminfo_tensor_dtype(): - im_info2 = fluid.data( - name='im_info2', shape=[1, 3], dtype='int32') + im_info2 = fluid.data(name='im_info2', + shape=[1, 3], + dtype='int32') fluid.layers.retinanet_detection_output( bboxes=[bboxes_low1, bboxes_high1], scores=[scores_low1, scores_high1], diff --git a/python/paddle/fluid/tests/unittests/test_reverse_op.py b/python/paddle/fluid/tests/unittests/test_reverse_op.py index 9b739eff97c..263fecc619e 100644 --- a/python/paddle/fluid/tests/unittests/test_reverse_op.py +++ b/python/paddle/fluid/tests/unittests/test_reverse_op.py @@ -23,6 +23,7 @@ from paddle.fluid import core class TestReverseOp(OpTest): + def initTestCase(self): self.x = np.random.random((3, 40)).astype('float64') self.axis = [0] @@ -45,54 +46,63 @@ class TestReverseOp(OpTest): class TestCase0(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 40)).astype('float64') self.axis = [1] class TestCase0_neg(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 40)).astype('float64') self.axis = [-1] class TestCase1(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 40)).astype('float64') self.axis = [0, 1] class TestCase1_neg(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 40)).astype('float64') self.axis = [0, -1] class TestCase2(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 4, 10)).astype('float64') self.axis = [0, 2] class TestCase2_neg(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 4, 10)).astype('float64') self.axis = [0, -2] class TestCase3(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 4, 10)).astype('float64') self.axis = [1, 2] class TestCase3_neg(TestReverseOp): + def initTestCase(self): self.x = np.random.random((3, 4, 10)).astype('float64') self.axis = [-1, -2] class TestCase4(unittest.TestCase): + def test_error(self): place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -100,8 +110,9 @@ class TestCase4(unittest.TestCase): train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): - label = fluid.layers.data( - name="label", shape=[1, 1, 1, 1, 1, 1, 1, 1], dtype="int64") + label = fluid.layers.data(name="label", + shape=[1, 1, 1, 1, 1, 1, 1, 1], + dtype="int64") rev = fluid.layers.reverse(label, axis=[-1, -2]) def _run_program(): @@ -112,10 +123,11 @@ class TestCase4(unittest.TestCase): class TestReverseLoDTensorArray(unittest.TestCase): + def setUp(self): self.shapes = [[5, 25], [5, 20], [5, 5]] - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.exe = fluid.Executor(self.place) def run_program(self, arr_len, axis=0): diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py index 62839d3a960..42f32f2e75b 100644 --- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py +++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py @@ -28,8 +28,9 @@ def create_selected_rows_and_tensor(scope, place, height, row_num, sr = scope.var("@selected_rows@").get_selected_rows() tensor = scope.var("grad").get_tensor() - rows = np.random.random_integers( - low=0, high=height - 1, size=[row_num, ]).astype('int64') + rows = np.random.random_integers(low=0, high=height - 1, size=[ + row_num, + ]).astype('int64') sr_val = np.random.random(size=[row_num, embedding_size]).astype('float32') sr.set_height(height) @@ -46,6 +47,7 @@ def create_selected_rows_and_tensor(scope, place, height, row_num, class TestBase(unittest.TestCase): + def setup(self, place, is_sparse, @@ -62,8 +64,8 @@ class TestBase(unittest.TestCase): self.param = np.random.random(size).astype("float32") self.mean_square_name = "mean_square" - self.mean_square = np.random.uniform( - low=1, high=2, size=size).astype("float32") + self.mean_square = np.random.uniform(low=1, high=2, + size=size).astype("float32") self.mean_grad_name = "mean_grad" self.mean_grad = np.random.random(size).astype("float32") @@ -84,19 +86,19 @@ class TestBase(unittest.TestCase): grad_tensor.set(self.grad, place) self.moment_name = "moment" - self.moment = np.random.uniform( - low=0, high=1, size=size).astype("float32") + self.moment = np.random.uniform(low=0, high=1, + size=size).astype("float32") self.epsilon = epsilon self.decay = 0.9 self.momentum = 0.1 self.centered = centered - self.ms_out = self.decay * self.mean_square + (1 - self.decay - ) * self.grad * self.grad + self.ms_out = self.decay * self.mean_square + ( + 1 - self.decay) * self.grad * self.grad if centered: - self.mg_out = self.decay * self.mean_grad + (1 - self.decay - ) * self.grad + self.mg_out = self.decay * self.mean_grad + (1 - + self.decay) * self.grad self.moment_out = self.momentum * self.moment + \ self.learning_rate * self.grad / np.sqrt(self.ms_out - np.square(self.mg_out) + self.epsilon) else: @@ -126,13 +128,13 @@ class TestBase(unittest.TestCase): def check(self, actual_t, expect_t, place, out_name, atol=1e-5): self.assertTrue( - np.allclose( - actual_t, expect_t, atol=atol), - "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " - + str(expect_t) + "\n" + "But Got" + str(actual_t)) + np.allclose(actual_t, expect_t, atol=atol), + "Output (" + out_name + ") has diff at " + str(place) + + "\nExpect " + str(expect_t) + "\n" + "But Got" + str(actual_t)) class TestRmspropOp(TestBase): + def check_with_place(self, place, is_sparse, @@ -170,29 +172,25 @@ class TestRmspropOp(TestBase): rmsprop_op.run(self.scope, self.place) - self.check( - np.array(self.mean_square_tensor), - self.ms_out, - self.place, - self.mean_square_name, - atol=atol) - self.check( - np.array(self.moment_tensor), - self.moment_out, - self.place, - self.moment_name, - atol=atol) - self.check( - np.array(self.param_tensor), - self.param_out, - self.place, - self.param_name, - atol=atol) + self.check(np.array(self.mean_square_tensor), + self.ms_out, + self.place, + self.mean_square_name, + atol=atol) + self.check(np.array(self.moment_tensor), + self.moment_out, + self.place, + self.moment_name, + atol=atol) + self.check(np.array(self.param_tensor), + self.param_out, + self.place, + self.param_name, + atol=atol) if self.centered: - self.check( - np.array(self.mean_grad_tensor), self.mg_out, self.place, - self.mean_grad_name) + self.check(np.array(self.mean_grad_tensor), self.mg_out, self.place, + self.mean_grad_name) def test_rmsprop(self): places = [core.CPUPlace()] @@ -203,37 +201,37 @@ class TestRmspropOp(TestBase): for place in places: for centered in [False, True]: with fluid.scope_guard(core.Scope()): - self.check_with_place( - place, is_sparse=False, centered=centered, size=size) + self.check_with_place(place, + is_sparse=False, + centered=centered, + size=size) with fluid.scope_guard(core.Scope()): - self.check_with_place( - place, - is_sparse=True, - centered=centered, - row_num=512, - size=size) + self.check_with_place(place, + is_sparse=True, + centered=centered, + row_num=512, + size=size) with fluid.scope_guard(core.Scope()): - self.check_with_place( - place, - is_sparse=True, - centered=centered, - row_num=60, - size=size) + self.check_with_place(place, + is_sparse=True, + centered=centered, + row_num=60, + size=size) class TestRMSPropV2(unittest.TestCase): + def test_rmsprop_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.RMSProp( - learning_rate=0.01, - parameters=linear.parameters(), - weight_decay=0.01) + adam = paddle.optimizer.RMSProp(learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=0.01) out = linear(a) out.backward() adam.step() @@ -254,8 +252,8 @@ class TestRMSPropV2(unittest.TestCase): rms_optimizer.minimize(avg_cost) fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) + train_reader = paddle.batch(paddle.dataset.uci_housing.train(), + batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -264,34 +262,38 @@ class TestRMSPropV2(unittest.TestCase): def test_raise_error(self): self.assertRaises(ValueError, paddle.optimizer.RMSProp, None) - self.assertRaises( - ValueError, paddle.optimizer.RMSProp, learning_rate=0.1, rho=None) - self.assertRaises( - ValueError, - paddle.optimizer.RMSProp, - learning_rate=0.1, - epsilon=None) - self.assertRaises( - ValueError, - paddle.optimizer.RMSProp, - learning_rate=0.1, - momentum=None) + self.assertRaises(ValueError, + paddle.optimizer.RMSProp, + learning_rate=0.1, + rho=None) + self.assertRaises(ValueError, + paddle.optimizer.RMSProp, + learning_rate=0.1, + epsilon=None) + self.assertRaises(ValueError, + paddle.optimizer.RMSProp, + learning_rate=0.1, + momentum=None) def test_rmsprop_op_invalid_input(self): paddle.disable_static() linear = paddle.nn.Linear(10, 10) with self.assertRaises(ValueError): - adam = paddle.optimizer.RMSProp( - 0.1, epsilon=-1, parameters=linear.parameters()) + adam = paddle.optimizer.RMSProp(0.1, + epsilon=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.RMSProp( - 0.1, momentum=-1, parameters=linear.parameters()) + adam = paddle.optimizer.RMSProp(0.1, + momentum=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.RMSProp( - 0.1, rho=-1, parameters=linear.parameters()) + adam = paddle.optimizer.RMSProp(0.1, + rho=-1, + parameters=linear.parameters()) class TestRMSPropV2Group(TestRMSPropV2): + def test_rmsprop_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -299,15 +301,17 @@ class TestRMSPropV2Group(TestRMSPropV2): linear_1 = paddle.nn.Linear(13, 5) linear_2 = paddle.nn.Linear(5, 3) # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.RMSProp( - learning_rate=0.01, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001 - }], - weight_decay=0.01) + adam = paddle.optimizer.RMSProp(learning_rate=0.01, + parameters=[{ + 'params': + linear_1.parameters() + }, { + 'params': + linear_2.parameters(), + 'weight_decay': + 0.001 + }], + weight_decay=0.01) out = linear_1(a) out = linear_2(out) out.backward() diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 6b9438eecea..527b6c5e2d8 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -35,15 +35,19 @@ import numpy as np class TestLSTMCellError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_size, input_size, hidden_size = 4, 16, 16 - inputs = fluid.data( - name='inputs', shape=[None, input_size], dtype='float32') - pre_hidden = fluid.data( - name='pre_hidden', shape=[None, hidden_size], dtype='float32') - pre_cell = fluid.data( - name='pre_cell', shape=[None, hidden_size], dtype='float32') + inputs = fluid.data(name='inputs', + shape=[None, input_size], + dtype='float32') + pre_hidden = fluid.data(name='pre_hidden', + shape=[None, hidden_size], + dtype='float32') + pre_cell = fluid.data(name='pre_cell', + shape=[None, hidden_size], + dtype='float32') cell = LSTMCell(hidden_size) def test_input_Variable(): @@ -68,28 +72,25 @@ class TestLSTMCellError(unittest.TestCase): self.assertRaises(TypeError, test_pre_cell_Variable) def test_input_type(): - error_inputs = fluid.data( - name='error_inputs', - shape=[None, input_size], - dtype='int32') + error_inputs = fluid.data(name='error_inputs', + shape=[None, input_size], + dtype='int32') cell(error_inputs, [pre_hidden, pre_cell]) self.assertRaises(TypeError, test_input_type) def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[None, hidden_size], - dtype='int32') + error_pre_hidden = fluid.data(name='error_pre_hidden', + shape=[None, hidden_size], + dtype='int32') cell(inputs, [error_pre_hidden, pre_cell]) self.assertRaises(TypeError, test_pre_hidden_type) def test_pre_cell_type(): - error_pre_cell = fluid.data( - name='error_pre_cell', - shape=[None, hidden_size], - dtype='int32') + error_pre_cell = fluid.data(name='error_pre_cell', + shape=[None, hidden_size], + dtype='int32') cell(inputs, [pre_hidden, error_pre_cell]) self.assertRaises(TypeError, test_pre_cell_type) @@ -102,18 +103,22 @@ class TestLSTMCellError(unittest.TestCase): class TestLSTMCell(unittest.TestCase): + def setUp(self): self.batch_size = 4 self.input_size = 16 self.hidden_size = 16 def test_run(self): - inputs = fluid.data( - name='inputs', shape=[None, self.input_size], dtype='float32') - pre_hidden = fluid.data( - name='pre_hidden', shape=[None, self.hidden_size], dtype='float32') - pre_cell = fluid.data( - name='pre_cell', shape=[None, self.hidden_size], dtype='float32') + inputs = fluid.data(name='inputs', + shape=[None, self.input_size], + dtype='float32') + pre_hidden = fluid.data(name='pre_hidden', + shape=[None, self.hidden_size], + dtype='float32') + pre_cell = fluid.data(name='pre_cell', + shape=[None, self.hidden_size], + dtype='float32') cell = LSTMCell(self.hidden_size) lstm_hidden_new, lstm_states_new = cell(inputs, [pre_hidden, pre_cell]) @@ -142,14 +147,14 @@ class TestLSTMCell(unittest.TestCase): ], ["LSTMCell/BasicLSTMUnit_0.b_0", "basicLSTM/BasicLSTMUnit_0.b_0"]] for names in param_names: - param = np.array(fluid.global_scope().find_var(names[0]).get_tensor( - )) - param = np.random.uniform( - -0.1, 0.1, size=param.shape).astype('float32') - fluid.global_scope().find_var(names[0]).get_tensor().set(param, - place) - fluid.global_scope().find_var(names[1]).get_tensor().set(param, - place) + param = np.array(fluid.global_scope().find_var( + names[0]).get_tensor()) + param = np.random.uniform(-0.1, 0.1, + size=param.shape).astype('float32') + fluid.global_scope().find_var(names[0]).get_tensor().set( + param, place) + fluid.global_scope().find_var(names[1]).get_tensor().set( + param, place) out = exe.run(feed={ 'inputs': inputs_np, @@ -162,16 +167,17 @@ class TestLSTMCell(unittest.TestCase): class TestGRUCellError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_size, input_size, hidden_size = 4, 16, 16 - inputs = fluid.data( - name='inputs', shape=[None, input_size], dtype='float32') - pre_hidden = layers.data( - name='pre_hidden', - shape=[None, hidden_size], - append_batch_size=False, - dtype='float32') + inputs = fluid.data(name='inputs', + shape=[None, input_size], + dtype='float32') + pre_hidden = layers.data(name='pre_hidden', + shape=[None, hidden_size], + append_batch_size=False, + dtype='float32') cell = GRUCell(hidden_size) def test_input_Variable(): @@ -189,19 +195,17 @@ class TestGRUCellError(unittest.TestCase): self.assertRaises(TypeError, test_pre_hidden_Variable) def test_input_type(): - error_inputs = fluid.data( - name='error_inputs', - shape=[None, input_size], - dtype='int32') + error_inputs = fluid.data(name='error_inputs', + shape=[None, input_size], + dtype='int32') cell(error_inputs, pre_hidden) self.assertRaises(TypeError, test_input_type) def test_pre_hidden_type(): - error_pre_hidden = fluid.data( - name='error_pre_hidden', - shape=[None, hidden_size], - dtype='int32') + error_pre_hidden = fluid.data(name='error_pre_hidden', + shape=[None, hidden_size], + dtype='int32') cell(inputs, error_pre_hidden) self.assertRaises(TypeError, test_pre_hidden_type) @@ -214,25 +218,28 @@ class TestGRUCellError(unittest.TestCase): class TestGRUCell(unittest.TestCase): + def setUp(self): self.batch_size = 4 self.input_size = 16 self.hidden_size = 16 def test_run(self): - inputs = fluid.data( - name='inputs', shape=[None, self.input_size], dtype='float32') - pre_hidden = layers.data( - name='pre_hidden', - shape=[None, self.hidden_size], - append_batch_size=False, - dtype='float32') + inputs = fluid.data(name='inputs', + shape=[None, self.input_size], + dtype='float32') + pre_hidden = layers.data(name='pre_hidden', + shape=[None, self.hidden_size], + append_batch_size=False, + dtype='float32') cell = GRUCell(self.hidden_size) gru_hidden_new, _ = cell(inputs, pre_hidden) - gru_unit = contrib.layers.rnn_impl.BasicGRUUnit( - "basicGRU", self.hidden_size, None, None, None, None, "float32") + gru_unit = contrib.layers.rnn_impl.BasicGRUUnit("basicGRU", + self.hidden_size, None, + None, None, None, + "float32") gru_hidden = gru_unit(inputs, pre_hidden) if core.is_compiled_with_cuda(): @@ -255,64 +262,65 @@ class TestGRUCell(unittest.TestCase): ] for names in param_names: - param = np.array(fluid.global_scope().find_var(names[0]).get_tensor( - )) - param = np.random.uniform( - -0.1, 0.1, size=param.shape).astype('float32') - fluid.global_scope().find_var(names[0]).get_tensor().set(param, - place) - fluid.global_scope().find_var(names[1]).get_tensor().set(param, - place) - - out = exe.run(feed={'inputs': inputs_np, - 'pre_hidden': pre_hidden_np}, + param = np.array(fluid.global_scope().find_var( + names[0]).get_tensor()) + param = np.random.uniform(-0.1, 0.1, + size=param.shape).astype('float32') + fluid.global_scope().find_var(names[0]).get_tensor().set( + param, place) + fluid.global_scope().find_var(names[1]).get_tensor().set( + param, place) + + out = exe.run(feed={ + 'inputs': inputs_np, + 'pre_hidden': pre_hidden_np + }, fetch_list=[gru_hidden_new, gru_hidden]) self.assertTrue(np.allclose(out[0], out[1], rtol=1e-4, atol=0)) class TestRnnError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): batch_size = 4 input_size = 16 hidden_size = 16 seq_len = 4 - inputs = fluid.data( - name='inputs', shape=[None, input_size], dtype='float32') - pre_hidden = layers.data( - name='pre_hidden', - shape=[None, hidden_size], - append_batch_size=False, - dtype='float32') - inputs_basic_lstm = fluid.data( - name='inputs_basic_lstm', - shape=[None, None, input_size], - dtype='float32') - sequence_length = fluid.data( - name="sequence_length", shape=[None], dtype='int64') - - inputs_dynamic_rnn = layers.transpose( - inputs_basic_lstm, perm=[1, 0, 2]) + inputs = fluid.data(name='inputs', + shape=[None, input_size], + dtype='float32') + pre_hidden = layers.data(name='pre_hidden', + shape=[None, hidden_size], + append_batch_size=False, + dtype='float32') + inputs_basic_lstm = fluid.data(name='inputs_basic_lstm', + shape=[None, None, input_size], + dtype='float32') + sequence_length = fluid.data(name="sequence_length", + shape=[None], + dtype='int64') + + inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, + perm=[1, 0, 2]) cell = LSTMCell(hidden_size, name="LSTMCell_for_rnn") np_inputs_dynamic_rnn = np.random.random( (seq_len, batch_size, input_size)).astype("float32") def test_input_Variable(): - dynamic_rnn( - cell=cell, - inputs=np_inputs_dynamic_rnn, - sequence_length=sequence_length, - is_reverse=False) + dynamic_rnn(cell=cell, + inputs=np_inputs_dynamic_rnn, + sequence_length=sequence_length, + is_reverse=False) self.assertRaises(TypeError, test_input_Variable) def test_input_list(): - dynamic_rnn( - cell=cell, - inputs=[np_inputs_dynamic_rnn], - sequence_length=sequence_length, - is_reverse=False) + dynamic_rnn(cell=cell, + inputs=[np_inputs_dynamic_rnn], + sequence_length=sequence_length, + is_reverse=False) self.assertRaises(TypeError, test_input_list) @@ -320,12 +328,11 @@ class TestRnnError(unittest.TestCase): cell = GRUCell(hidden_size, name="GRUCell_for_rnn") error_initial_states = np.random.random( (batch_size, hidden_size)).astype("float32") - dynamic_rnn( - cell=cell, - inputs=inputs_dynamic_rnn, - initial_states=error_initial_states, - sequence_length=sequence_length, - is_reverse=False) + dynamic_rnn(cell=cell, + inputs=inputs_dynamic_rnn, + initial_states=error_initial_states, + sequence_length=sequence_length, + is_reverse=False) self.assertRaises(TypeError, test_initial_states_type) @@ -336,28 +343,27 @@ class TestRnnError(unittest.TestCase): np.random.random( (batch_size, hidden_size)).astype("float32") ] - dynamic_rnn( - cell=cell, - inputs=inputs_dynamic_rnn, - initial_states=error_initial_states, - sequence_length=sequence_length, - is_reverse=False) + dynamic_rnn(cell=cell, + inputs=inputs_dynamic_rnn, + initial_states=error_initial_states, + sequence_length=sequence_length, + is_reverse=False) self.assertRaises(TypeError, test_initial_states_type) def test_sequence_length_type(): np_sequence_length = np.random.random( (batch_size)).astype("float32") - dynamic_rnn( - cell=cell, - inputs=inputs_dynamic_rnn, - sequence_length=np_sequence_length, - is_reverse=False) + dynamic_rnn(cell=cell, + inputs=inputs_dynamic_rnn, + sequence_length=np_sequence_length, + is_reverse=False) self.assertRaises(TypeError, test_sequence_length_type) class TestRnn(unittest.TestCase): + def setUp(self): self.batch_size = 4 self.input_size = 16 @@ -365,20 +371,19 @@ class TestRnn(unittest.TestCase): self.seq_len = 4 def test_run(self): - inputs_basic_lstm = fluid.data( - name='inputs_basic_lstm', - shape=[None, None, self.input_size], - dtype='float32') - sequence_length = fluid.data( - name="sequence_length", shape=[None], dtype='int64') + inputs_basic_lstm = fluid.data(name='inputs_basic_lstm', + shape=[None, None, self.input_size], + dtype='float32') + sequence_length = fluid.data(name="sequence_length", + shape=[None], + dtype='int64') inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, perm=[1, 0, 2]) cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn") - output, final_state = dynamic_rnn( - cell=cell, - inputs=inputs_dynamic_rnn, - sequence_length=sequence_length, - is_reverse=False) + output, final_state = dynamic_rnn(cell=cell, + inputs=inputs_dynamic_rnn, + sequence_length=sequence_length, + is_reverse=False) output_new = layers.transpose(output, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = basic_lstm(inputs_basic_lstm, None, None, self.hidden_size, num_layers=1, \ @@ -394,8 +399,8 @@ class TestRnn(unittest.TestCase): inputs_basic_lstm_np = np.random.uniform( -0.1, 0.1, (self.seq_len, self.batch_size, self.input_size)).astype('float32') - sequence_length_np = np.ones( - self.batch_size, dtype='int64') * self.seq_len + sequence_length_np = np.ones(self.batch_size, + dtype='int64') * self.seq_len inputs_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.input_size)).astype('float32') @@ -407,20 +412,21 @@ class TestRnn(unittest.TestCase): param_names = [[ "LSTMCell_for_rnn/BasicLSTMUnit_0.w_0", "basic_lstm_layers_0/BasicLSTMUnit_0.w_0" - ], [ - "LSTMCell_for_rnn/BasicLSTMUnit_0.b_0", - "basic_lstm_layers_0/BasicLSTMUnit_0.b_0" - ]] + ], + [ + "LSTMCell_for_rnn/BasicLSTMUnit_0.b_0", + "basic_lstm_layers_0/BasicLSTMUnit_0.b_0" + ]] for names in param_names: - param = np.array(fluid.global_scope().find_var(names[0]).get_tensor( - )) - param = np.random.uniform( - -0.1, 0.1, size=param.shape).astype('float32') - fluid.global_scope().find_var(names[0]).get_tensor().set(param, - place) - fluid.global_scope().find_var(names[1]).get_tensor().set(param, - place) + param = np.array(fluid.global_scope().find_var( + names[0]).get_tensor()) + param = np.random.uniform(-0.1, 0.1, + size=param.shape).astype('float32') + fluid.global_scope().find_var(names[0]).get_tensor().set( + param, place) + fluid.global_scope().find_var(names[1]).get_tensor().set( + param, place) out = exe.run(feed={ 'inputs_basic_lstm': inputs_basic_lstm_np, @@ -455,11 +461,12 @@ class EncoderCell(RNNCell): """Encoder Cell""" def __init__( - self, - num_layers, - hidden_size, - dropout_prob=0., - init_scale=0.1, ): + self, + num_layers, + hidden_size, + dropout_prob=0., + init_scale=0.1, + ): self.num_layers = num_layers self.hidden_size = hidden_size self.dropout_prob = dropout_prob @@ -474,7 +481,8 @@ class EncoderCell(RNNCell): out, new_state = self.lstm_cells[i](step_input, states[i]) step_input = layers.dropout( out, - self.dropout_prob, ) if self.dropout_prob else out + self.dropout_prob, + ) if self.dropout_prob else out new_states.append(new_state) return step_input, new_states @@ -500,7 +508,8 @@ class DecoderCell(RNNCell): out, new_lstm_state = self.lstm_cells[i](step_input, states[i]) step_input = layers.dropout( out, - self.dropout_prob, ) if self.dropout_prob else out + self.dropout_prob, + ) if self.dropout_prob else out new_lstm_states.append(new_lstm_state) return step_input, new_lstm_states @@ -510,11 +519,13 @@ def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size, "vanilla seq2seq model" # data source = fluid.data(name="src", shape=[None, None], dtype="int64") - source_length = fluid.data( - name="src_sequence_length", shape=[None], dtype="int64") + source_length = fluid.data(name="src_sequence_length", + shape=[None], + dtype="int64") target = fluid.data(name="trg", shape=[None, None], dtype="int64") - target_length = fluid.data( - name="trg_sequence_length", shape=[None], dtype="int64") + target_length = fluid.data(name="trg_sequence_length", + shape=[None], + dtype="int64") label = fluid.data(name="label", shape=[None, None, 1], dtype="int64") # embedding @@ -523,25 +534,29 @@ def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size, # encoder enc_cell = EncoderCell(num_layers, hidden_size, dropout_prob) - enc_output, enc_final_state = dynamic_rnn( - cell=enc_cell, inputs=src_emb, sequence_length=source_length) + enc_output, enc_final_state = dynamic_rnn(cell=enc_cell, + inputs=src_emb, + sequence_length=source_length) # decoder dec_cell = DecoderCell(num_layers, hidden_size, dropout_prob) - dec_output, dec_final_state = dynamic_rnn( - cell=dec_cell, inputs=tar_emb, initial_states=enc_final_state) + dec_output, dec_final_state = dynamic_rnn(cell=dec_cell, + inputs=tar_emb, + initial_states=enc_final_state) logits = layers.fc(dec_output, size=trg_vocab_size, num_flatten_dims=len(dec_output.shape) - 1, bias_attr=False) # loss - loss = layers.softmax_with_cross_entropy( - logits=logits, label=label, soft_label=False) + loss = layers.softmax_with_cross_entropy(logits=logits, + label=label, + soft_label=False) loss = layers.unsqueeze(loss, axes=[2]) max_tar_seq_len = layers.shape(target)[1] - tar_mask = layers.sequence_mask( - target_length, maxlen=max_tar_seq_len, dtype="float32") + tar_mask = layers.sequence_mask(target_length, + maxlen=max_tar_seq_len, + dtype="float32") loss = loss * tar_mask loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) @@ -572,23 +587,28 @@ class TestSeq2SeqModel(unittest.TestCase): src_seq_len = 10 trg_seq_len = 12 self.data = { - "src": np.random.randint( + "src": + np.random.randint( 2, self.model_hparams["src_vocab_size"], (iter_num * batch_size, src_seq_len)).astype("int64"), - "src_sequence_length": np.random.randint( - 1, src_seq_len, (iter_num * batch_size, )).astype("int64"), - "trg": np.random.randint( + "src_sequence_length": + np.random.randint(1, src_seq_len, + (iter_num * batch_size, )).astype("int64"), + "trg": + np.random.randint( 2, self.model_hparams["src_vocab_size"], (iter_num * batch_size, trg_seq_len)).astype("int64"), - "trg_sequence_length": np.random.randint( - 1, trg_seq_len, (iter_num * batch_size, )).astype("int64"), - "label": np.random.randint( + "trg_sequence_length": + np.random.randint(1, trg_seq_len, + (iter_num * batch_size, )).astype("int64"), + "label": + np.random.randint( 2, self.model_hparams["src_vocab_size"], (iter_num * batch_size, trg_seq_len, 1)).astype("int64"), } - place = core.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else core.CPUPlace() + place = core.CUDAPlace( + 0) if core.is_compiled_with_cuda() else core.CPUPlace() self.exe = Executor(place) def test_seq2seq_model(self): @@ -599,18 +619,26 @@ class TestSeq2SeqModel(unittest.TestCase): self.exe.run(startup_program) for iter_idx in range(self.iter_num): cost_val = self.exe.run(feed={ - "src": self.data["src"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "src_sequence_length": self.data["src_sequence_length"] - [iter_idx * self.batch_size:(iter_idx + 1) * - self.batch_size], - "trg": self.data["trg"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "trg_sequence_length": self.data["trg_sequence_length"][ - iter_idx * self.batch_size:(iter_idx + 1 - ) * self.batch_size], - "label": self.data["label"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size] + "src": + self.data["src"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "src_sequence_length": + self.data["src_sequence_length"][iter_idx * + self.batch_size:(iter_idx + + 1) * + self.batch_size], + "trg": + self.data["trg"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "trg_sequence_length": + self.data["trg_sequence_length"][iter_idx * + self.batch_size:(iter_idx + + 1) * + self.batch_size], + "label": + self.data["label"][iter_idx * + self.batch_size:(iter_idx + 1) * + self.batch_size] }, fetch_list=[cost])[0] print("iter_idx: %d, cost: %f" % (iter_idx, cost_val)) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py index 3621fd1b9d4..f53df455239 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py @@ -32,10 +32,12 @@ import paddle.fluid.core as core from paddle.fluid.executor import Executor from paddle.fluid import framework from paddle.fluid.framework import _test_eager_guard + paddle.enable_static() class EncoderCell(layers.RNNCell): + def __init__(self, num_layers, hidden_size, dropout_prob=0.): self.num_layers = num_layers self.hidden_size = hidden_size @@ -59,6 +61,7 @@ class EncoderCell(layers.RNNCell): class DecoderCell(layers.RNNCell): + def __init__(self, num_layers, hidden_size, dropout_prob=0.): self.num_layers = num_layers self.hidden_size = hidden_size @@ -71,14 +74,15 @@ class DecoderCell(layers.RNNCell): query = layers.fc(hidden, size=encoder_output.shape[-1], bias_attr=False) - attn_scores = layers.matmul( - layers.unsqueeze(query, [1]), encoder_output, transpose_y=True) + attn_scores = layers.matmul(layers.unsqueeze(query, [1]), + encoder_output, + transpose_y=True) if encoder_padding_mask is not None: attn_scores = layers.elementwise_add(attn_scores, encoder_padding_mask) attn_scores = layers.softmax(attn_scores) - attn_out = layers.squeeze( - layers.matmul(attn_scores, encoder_output), [1]) + attn_out = layers.squeeze(layers.matmul(attn_scores, encoder_output), + [1]) attn_out = layers.concat([attn_out, hidden], 1) attn_out = layers.fc(attn_out, size=self.hidden_size, bias_attr=False) return attn_out @@ -101,6 +105,7 @@ class DecoderCell(layers.RNNCell): class Encoder(object): + def __init__(self, num_layers, hidden_size, dropout_prob=0.): self.encoder_cell = EncoderCell(num_layers, hidden_size, dropout_prob) @@ -114,6 +119,7 @@ class Encoder(object): class Decoder(object): + def __init__(self, num_layers, hidden_size, @@ -142,11 +148,13 @@ class Decoder(object): encoder_output, beam_size) encoder_padding_mask = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, beam_size) - decoder = layers.BeamSearchDecoder( - cell=self.decoder_cell, output_fn=output_layer, **kwargs) + decoder = layers.BeamSearchDecoder(cell=self.decoder_cell, + output_fn=output_layer, + **kwargs) else: - decoder = layers.BasicDecoder( - self.decoder_cell, helper, output_fn=output_layer) + decoder = layers.BasicDecoder(self.decoder_cell, + helper, + output_fn=output_layer) (decoder_output, decoder_final_state, dec_seq_lengths) = layers.dynamic_decode( @@ -189,12 +197,12 @@ class Seq2SeqModel(object): self.encoder = Encoder(num_layers, hidden_size, dropout_prob) self.decoder = Decoder(num_layers, hidden_size, dropout_prob, decoding_strategy, max_decoding_length) - self.output_layer = lambda x: layers.fc( - x, - size=trg_vocab_size, - num_flatten_dims=len(x.shape) - 1, - param_attr=fluid.ParamAttr(), - bias_attr=False) + self.output_layer = lambda x: layers.fc(x, + size=trg_vocab_size, + num_flatten_dims=len(x.shape) - + 1, + param_attr=fluid.ParamAttr(), + bias_attr=False) def __call__(self, src, src_length, trg=None, trg_length=None): # encoder @@ -202,11 +210,13 @@ class Seq2SeqModel(object): self.src_embeder(src), src_length) decoder_initial_states = [ - encoder_final_state, self.decoder.decoder_cell.get_initial_states( + encoder_final_state, + self.decoder.decoder_cell.get_initial_states( batch_ref=encoder_output, shape=[encoder_output.shape[-1]]) ] - src_mask = layers.sequence_mask( - src_length, maxlen=layers.shape(src)[1], dtype="float32") + src_mask = layers.sequence_mask(src_length, + maxlen=layers.shape(src)[1], + dtype="float32") encoder_padding_mask = (src_mask - 1.0) * 1e9 encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1]) @@ -214,20 +224,23 @@ class Seq2SeqModel(object): decoder_kwargs = { "inputs": self.trg_embeder(trg), "sequence_length": trg_length, - } if self.decoder.decoding_strategy == "train_greedy" else ({ - "embedding_fn": self.trg_embeder, - "beam_size": self.beam_size, - "start_token": self.start_token, - "end_token": self.end_token - } if self.decoder.decoding_strategy == "beam_search" else { - "embedding_fn": self.trg_embeder, - "start_tokens": layers.fill_constant_batch_size_like( - input=encoder_output, - shape=[-1], - dtype=src.dtype, - value=self.start_token), - "end_token": self.end_token - }) + } if self.decoder.decoding_strategy == "train_greedy" else ( + { + "embedding_fn": self.trg_embeder, + "beam_size": self.beam_size, + "start_token": self.start_token, + "end_token": self.end_token + } if self.decoder.decoding_strategy == "beam_search" else { + "embedding_fn": + self.trg_embeder, + "start_tokens": + layers.fill_constant_batch_size_like(input=encoder_output, + shape=[-1], + dtype=src.dtype, + value=self.start_token), + "end_token": + self.end_token + }) decoder_kwargs["output_layer"] = self.output_layer (decoder_output, decoder_final_state, @@ -252,8 +265,9 @@ class PolicyGradient(object): """ update policy model self.model with policy gradient algorithm """ - self.reward = fluid.layers.py_func( - func=reward_func, x=[action, length], out=reward) + self.reward = fluid.layers.py_func(func=reward_func, + x=[action, length], + out=reward) neg_log_prob = layers.cross_entropy(act_prob, action) cost = neg_log_prob * reward cost = (layers.reduce_sum(cost) / layers.reduce_sum(length) @@ -282,8 +296,8 @@ def reward_func(samples, sample_length): dmat = np.ones([batch_size, max_seq_length], dtype=dtype) else: steps = np.tile(np.arange(max_seq_length), [batch_size, 1]) - mask = np.asarray( - steps < (sequence_length - 1)[:, None], dtype=dtype) + mask = np.asarray(steps < (sequence_length - 1)[:, None], + dtype=dtype) # Make each row = [discount, ..., discount, 1, ..., 1] dmat = mask * discount + (1 - mask) dmat = np.cumprod(dmat[:, ::-1], axis=1)[:, ::-1] @@ -342,6 +356,7 @@ class MLE(object): class SeqPGAgent(object): + def __init__(self, model_cls, alg_cls=PolicyGradient, @@ -364,14 +379,17 @@ class SeqPGAgent(object): def build_program(self, model_cls, alg_cls, model_hparams, alg_hparams): with fluid.program_guard(self.main_program, self.startup_program): source = fluid.data(name="src", shape=[None, None], dtype="int64") - source_length = fluid.data( - name="src_sequence_length", shape=[None], dtype="int64") + source_length = fluid.data(name="src_sequence_length", + shape=[None], + dtype="int64") # only for teacher-forcing MLE training target = fluid.data(name="trg", shape=[None, None], dtype="int64") - target_length = fluid.data( - name="trg_sequence_length", shape=[None], dtype="int64") - label = fluid.data( - name="label", shape=[None, None, 1], dtype="int64") + target_length = fluid.data(name="trg_sequence_length", + shape=[None], + dtype="int64") + label = fluid.data(name="label", + shape=[None, None, 1], + dtype="int64") self.model = model_cls(**model_hparams) self.alg = alg_cls(**alg_hparams) self.probs, self.samples, self.sample_length = self.model( @@ -405,6 +423,7 @@ class SeqPGAgent(object): class TestDynamicDecode(unittest.TestCase): + def setUp(self): np.random.seed(123) self.model_hparams = { @@ -424,53 +443,63 @@ class TestDynamicDecode(unittest.TestCase): src_seq_len = 10 trg_seq_len = 12 self.data = { - "src": np.random.randint( + "src": + np.random.randint( 2, self.model_hparams["src_vocab_size"], (iter_num * batch_size, src_seq_len)).astype("int64"), - "src_sequence_length": np.random.randint( - 1, src_seq_len, (iter_num * batch_size, )).astype("int64"), - "trg": np.random.randint( + "src_sequence_length": + np.random.randint(1, src_seq_len, + (iter_num * batch_size, )).astype("int64"), + "trg": + np.random.randint( 2, self.model_hparams["src_vocab_size"], (iter_num * batch_size, trg_seq_len)).astype("int64"), - "trg_sequence_length": np.random.randint( - 1, trg_seq_len, (iter_num * batch_size, )).astype("int64"), - "label": np.random.randint( + "trg_sequence_length": + np.random.randint(1, trg_seq_len, + (iter_num * batch_size, )).astype("int64"), + "label": + np.random.randint( 2, self.model_hparams["src_vocab_size"], (iter_num * batch_size, trg_seq_len, 1)).astype("int64"), } - place = core.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else core.CPUPlace() + place = core.CUDAPlace( + 0) if core.is_compiled_with_cuda() else core.CPUPlace() self.exe = Executor(place) def test_mle_train(self): paddle.enable_static() self.model_hparams["decoding_strategy"] = "train_greedy" - agent = SeqPGAgent( - model_cls=Seq2SeqModel, - alg_cls=MLE, - model_hparams=self.model_hparams, - alg_hparams={"lr": 0.001}, - executor=self.exe, - main_program=fluid.Program(), - startup_program=fluid.Program(), - seed=123) + agent = SeqPGAgent(model_cls=Seq2SeqModel, + alg_cls=MLE, + model_hparams=self.model_hparams, + alg_hparams={"lr": 0.001}, + executor=self.exe, + main_program=fluid.Program(), + startup_program=fluid.Program(), + seed=123) self.exe.run(agent.startup_program) for iter_idx in range(self.iter_num): reward, cost = agent.learn( { - "src": self.data["src"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "src_sequence_length": self.data["src_sequence_length"][ - iter_idx * self.batch_size:(iter_idx + 1 - ) * self.batch_size], - "trg": self.data["trg"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "trg_sequence_length": self.data["trg_sequence_length"] - [iter_idx * self.batch_size:(iter_idx + 1) * - self.batch_size], - "label": self.data["label"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size] + "src": + self.data["src"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "src_sequence_length": + self.data["src_sequence_length"][iter_idx * self.batch_size: + (iter_idx + 1) * + self.batch_size], + "trg": + self.data["trg"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "trg_sequence_length": + self.data["trg_sequence_length"][iter_idx * self.batch_size: + (iter_idx + 1) * + self.batch_size], + "label": + self.data["label"][iter_idx * + self.batch_size:(iter_idx + 1) * + self.batch_size] }, fetch_list=[agent.cost, agent.cost]) print("iter_idx: %d, reward: %f, cost: %f" % @@ -479,24 +508,25 @@ class TestDynamicDecode(unittest.TestCase): def test_greedy_train(self): paddle.enable_static() self.model_hparams["decoding_strategy"] = "infer_greedy" - agent = SeqPGAgent( - model_cls=Seq2SeqModel, - alg_cls=PolicyGradient, - model_hparams=self.model_hparams, - alg_hparams={"lr": 0.001}, - executor=self.exe, - main_program=fluid.Program(), - startup_program=fluid.Program(), - seed=123) + agent = SeqPGAgent(model_cls=Seq2SeqModel, + alg_cls=PolicyGradient, + model_hparams=self.model_hparams, + alg_hparams={"lr": 0.001}, + executor=self.exe, + main_program=fluid.Program(), + startup_program=fluid.Program(), + seed=123) self.exe.run(agent.startup_program) for iter_idx in range(self.iter_num): reward, cost = agent.learn( { - "src": self.data["src"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "src_sequence_length": self.data["src_sequence_length"] - [iter_idx * self.batch_size:(iter_idx + 1) * - self.batch_size] + "src": + self.data["src"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "src_sequence_length": + self.data["src_sequence_length"][iter_idx * self.batch_size: + (iter_idx + 1) * + self.batch_size] }, fetch_list=[agent.reward, agent.cost]) print("iter_idx: %d, reward: %f, cost: %f" % @@ -505,24 +535,25 @@ class TestDynamicDecode(unittest.TestCase): def test_sample_train(self): paddle.enable_static() self.model_hparams["decoding_strategy"] = "infer_sample" - agent = SeqPGAgent( - model_cls=Seq2SeqModel, - alg_cls=PolicyGradient, - model_hparams=self.model_hparams, - alg_hparams={"lr": 0.001}, - executor=self.exe, - main_program=fluid.Program(), - startup_program=fluid.Program(), - seed=123) + agent = SeqPGAgent(model_cls=Seq2SeqModel, + alg_cls=PolicyGradient, + model_hparams=self.model_hparams, + alg_hparams={"lr": 0.001}, + executor=self.exe, + main_program=fluid.Program(), + startup_program=fluid.Program(), + seed=123) self.exe.run(agent.startup_program) for iter_idx in range(self.iter_num): reward, cost = agent.learn( { - "src": self.data["src"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "src_sequence_length": self.data["src_sequence_length"] - [iter_idx * self.batch_size:(iter_idx + 1) * - self.batch_size] + "src": + self.data["src"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "src_sequence_length": + self.data["src_sequence_length"][iter_idx * self.batch_size: + (iter_idx + 1) * + self.batch_size] }, fetch_list=[agent.reward, agent.cost]) print("iter_idx: %d, reward: %f, cost: %f" % @@ -536,8 +567,9 @@ class TestDynamicDecode(unittest.TestCase): startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): source = fluid.data(name="src", shape=[None, None], dtype="int64") - source_length = fluid.data( - name="src_sequence_length", shape=[None], dtype="int64") + source_length = fluid.data(name="src_sequence_length", + shape=[None], + dtype="int64") model = Seq2SeqModel(**self.model_hparams) output = model(source, source_length) @@ -546,11 +578,14 @@ class TestDynamicDecode(unittest.TestCase): trans_ids = self.exe.run( program=main_program, feed={ - "src": self.data["src"][iter_idx * self.batch_size:( - iter_idx + 1) * self.batch_size, :], - "src_sequence_length": self.data["src_sequence_length"] - [iter_idx * self.batch_size:(iter_idx + 1) * - self.batch_size] + "src": + self.data["src"][iter_idx * self.batch_size:(iter_idx + 1) * + self.batch_size, :], + "src_sequence_length": + self.data["src_sequence_length"][iter_idx * + self.batch_size:(iter_idx + + 1) * + self.batch_size] }, fetch_list=[output])[0] @@ -569,6 +604,7 @@ class TestDynamicDecode(unittest.TestCase): class ModuleApiTest(unittest.TestCase): + @classmethod def setUpClass(cls): cls._np_rand_state = np.random.get_state() @@ -577,10 +613,11 @@ class ModuleApiTest(unittest.TestCase): np.random.seed(cls._random_seed) random.seed(cls._random_seed) - cls.model_cls = type(cls.__name__ + "Model", (Layer, ), { - "__init__": cls.model_init_wrapper(cls.model_init), - "forward": cls.model_forward - }) + cls.model_cls = type( + cls.__name__ + "Model", (Layer, ), { + "__init__": cls.model_init_wrapper(cls.model_init), + "forward": cls.model_forward + }) @classmethod def tearDownClass(cls): @@ -589,6 +626,7 @@ class ModuleApiTest(unittest.TestCase): @staticmethod def model_init_wrapper(func): + def __impl__(self, *args, **kwargs): Layer.__init__(self) func(self, *args, **kwargs) @@ -649,8 +687,7 @@ class ModuleApiTest(unittest.TestCase): if expect_output: for actual_t, expect_t in zip(dygraph_output, expect_output): self.assertTrue( - np.allclose( - actual_t, expect_t, rtol=1e-5, atol=0)) + np.allclose(actual_t, expect_t, rtol=1e-5, atol=0)) def check_output(self): devices = ["CPU", "GPU"] if fluid.is_compiled_with_cuda() else ["CPU"] @@ -660,6 +697,7 @@ class ModuleApiTest(unittest.TestCase): class TestBeamSearch(ModuleApiTest): + def setUp(self): paddle.set_default_dtype("float64") shape = (8, 32) @@ -684,26 +722,25 @@ class TestBeamSearch(ModuleApiTest): eos_id=1, beam_size=4, max_step_num=20): - embedder = paddle.fluid.dygraph.Embedding( - size=[vocab_size, embed_dim], dtype="float64") + embedder = paddle.fluid.dygraph.Embedding(size=[vocab_size, embed_dim], + dtype="float64") output_layer = nn.Linear(hidden_size, vocab_size) cell = nn.LSTMCell(embed_dim, hidden_size) self.max_step_num = max_step_num - self.beam_search_decoder = BeamSearchDecoder( - cell, - start_token=bos_id, - end_token=eos_id, - beam_size=beam_size, - embedding_fn=embedder, - output_fn=output_layer) + self.beam_search_decoder = BeamSearchDecoder(cell, + start_token=bos_id, + end_token=eos_id, + beam_size=beam_size, + embedding_fn=embedder, + output_fn=output_layer) @staticmethod def model_forward(model, init_hidden, init_cell): - return dynamic_decode( - model.beam_search_decoder, [init_hidden, init_cell], - max_step_num=model.max_step_num, - impute_finished=True, - is_test=True)[0] + return dynamic_decode(model.beam_search_decoder, + [init_hidden, init_cell], + max_step_num=model.max_step_num, + impute_finished=True, + is_test=True)[0] def make_inputs(self): inputs = [ diff --git a/python/paddle/fluid/tests/unittests/test_rnn_dp.py b/python/paddle/fluid/tests/unittests/test_rnn_dp.py index 8d7e86fcdb9..46e1530a191 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_dp.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_dp.py @@ -27,6 +27,7 @@ paddle.enable_static() class RNNEncoder(nn.Layer): + def __init__(self, input_size, hidden_size, @@ -41,13 +42,12 @@ class RNNEncoder(nn.Layer): self._direction = direction self._pooling_type = pooling_type - self.rnn_layer = nn.SimpleRNN( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - direction=direction, - dropout=dropout, - **kwargs) + self.rnn_layer = nn.SimpleRNN(input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + direction=direction, + dropout=dropout, + **kwargs) def get_input_dim(self): return self._input_size @@ -66,6 +66,7 @@ class RNNEncoder(nn.Layer): class RNNModel(nn.Layer): + def __init__(self, vocab_size, num_classes, @@ -78,17 +79,15 @@ class RNNModel(nn.Layer): pooling_type=None, fc_hidden_size=96): super().__init__() - self.embedder = nn.Embedding( - num_embeddings=vocab_size, - embedding_dim=emb_dim, - padding_idx=padding_idx) - self.rnn_encoder = RNNEncoder( - emb_dim, - rnn_hidden_size, - num_layers=rnn_layers, - direction=direction, - dropout=dropout_rate, - pooling_type=pooling_type) + self.embedder = nn.Embedding(num_embeddings=vocab_size, + embedding_dim=emb_dim, + padding_idx=padding_idx) + self.rnn_encoder = RNNEncoder(emb_dim, + rnn_hidden_size, + num_layers=rnn_layers, + direction=direction, + dropout=dropout_rate, + pooling_type=pooling_type) self.fc = nn.Linear(self.rnn_encoder.get_output_dim(), fc_hidden_size) self.output_layer = nn.Linear(fc_hidden_size, num_classes) @@ -104,23 +103,23 @@ def rnn_pretrain_forward(train_program, start_program, topo=None): with static.program_guard(train_program, start_program), paddle.utils.unique_name.guard(): batch_size = 1 - tokens = static.data( - name="tokens", shape=[batch_size, -1], dtype="int64") + tokens = static.data(name="tokens", + shape=[batch_size, -1], + dtype="int64") seq_len = static.data(name="ids", shape=[batch_size], dtype="int64") labels = static.data(name="labels", shape=[batch_size], dtype="int64") data_holders = [tokens, seq_len, labels] vocab_size = 10 num_classes = 2 pad_token_id = 0 - model = RNNModel( - vocab_size, - num_classes, - direction='forward', - padding_idx=pad_token_id, - pooling_type='max') - - optimizer = paddle.optimizer.Adam( - parameters=model.parameters(), learning_rate=0.001) + model = RNNModel(vocab_size, + num_classes, + direction='forward', + padding_idx=pad_token_id, + pooling_type='max') + + optimizer = paddle.optimizer.Adam(parameters=model.parameters(), + learning_rate=0.001) criterion = paddle.nn.CrossEntropyLoss() preds = model(tokens, seq_len) loss = criterion(preds, labels) @@ -129,6 +128,7 @@ def rnn_pretrain_forward(train_program, start_program, topo=None): class TestFleetMetaOptimizer(unittest.TestCase): + def setUp(self): os.environ["PADDLE_TRAINER_ID"] = "1" os.environ[ diff --git a/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py b/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py index 9bfec8e9bdd..f5ce0306091 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py @@ -24,19 +24,21 @@ import paddle.fluid.core as core class RNNMemoryHelperOpTest(unittest.TestCase): + def setUp(self): self.program = Program() self.place = core.CPUPlace() - self.X = self.program.global_block().create_var( - name='X', shape=[2, 3], dtype='float32') - self.Out = self.program.global_block().create_var( - name='Out', shape=[2, 3], dtype='float32') - self.program.global_block().append_op( - type='rnn_memory_helper', - inputs={"X": self.X}, - outputs={"Out": self.Out}, - attrs={}) + self.X = self.program.global_block().create_var(name='X', + shape=[2, 3], + dtype='float32') + self.Out = self.program.global_block().create_var(name='Out', + shape=[2, 3], + dtype='float32') + self.program.global_block().append_op(type='rnn_memory_helper', + inputs={"X": self.X}, + outputs={"Out": self.Out}, + attrs={}) def test_forward(self): x_np = np.random.normal(size=(2, 3)).astype("float32") @@ -50,29 +52,31 @@ class RNNMemoryHelperOpTest(unittest.TestCase): class RNNMemoryHelperGradOpTest(unittest.TestCase): + def setUp(self): self.program = Program() self.place = core.CPUPlace() self.input_names = ['X', 'Out', 'Out@GRAD'] self.input_vars = { - name: self.program.global_block().create_var( - name=name, shape=[2, 3], dtype='float32') + name: self.program.global_block().create_var(name=name, + shape=[2, 3], + dtype='float32') for name in self.input_names } self.output_names = ['X@GRAD'] self.output_vars = { - name: self.program.global_block().create_var( - name=name, shape=[2, 3], dtype='float32') + name: self.program.global_block().create_var(name=name, + shape=[2, 3], + dtype='float32') for name in self.output_names } - self.program.global_block().append_op( - type='rnn_memory_helper_grad', - inputs=self.input_vars, - outputs=self.output_vars, - attrs={}) + self.program.global_block().append_op(type='rnn_memory_helper_grad', + inputs=self.input_vars, + outputs=self.output_vars, + attrs={}) def test_backward(self): self.feed_map = { @@ -89,6 +93,7 @@ class RNNMemoryHelperGradOpTest(unittest.TestCase): class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): + def setUp(self): self.program = Program() self.fake_program = Program() @@ -96,8 +101,9 @@ class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): self.input_names = ['X', 'Out'] self.input_vars = { - name: self.program.global_block().create_var( - name=name, shape=[2, 3], dtype='float32') + name: self.program.global_block().create_var(name=name, + shape=[2, 3], + dtype='float32') for name in self.input_names } self.input_vars["Out@GRAD"] = \ @@ -106,16 +112,16 @@ class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): self.output_names = ['X@GRAD'] self.output_vars = { - name: self.program.global_block().create_var( - name=name, shape=[2, 3], dtype='float32') + name: self.program.global_block().create_var(name=name, + shape=[2, 3], + dtype='float32') for name in self.output_names } - self.program.global_block().append_op( - type='rnn_memory_helper_grad', - inputs=self.input_vars, - outputs=self.output_vars, - attrs={}) + self.program.global_block().append_op(type='rnn_memory_helper_grad', + inputs=self.input_vars, + outputs=self.output_vars, + attrs={}) def test_backward(self): self.feed_map = { @@ -129,8 +135,9 @@ class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): feed=self.feed_map, fetch_list=self.fetch_list) self.assertTrue( - np.allclose( - out[0], np.zeros(shape=(2, 3)).astype("float32"), rtol=1e-5)) + np.allclose(out[0], + np.zeros(shape=(2, 3)).astype("float32"), + rtol=1e-5)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_rnn_op.py b/python/paddle/fluid/tests/unittests/test_rnn_op.py index 79e33166bb6..f03215a480a 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_op.py @@ -25,6 +25,7 @@ import random import sys from op_test import OpTest + sys.path.append("./rnn") from rnn_numpy import SimpleRNN, LSTM, GRU from convert import get_params_for_net @@ -35,6 +36,7 @@ paddle.enable_static() class TestRNNOp(OpTest): + def get_weight_names(self): weight_names = [] for i in range(self.num_layers): @@ -49,8 +51,7 @@ class TestRNNOp(OpTest): self.op_type = "rnn" self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64 self.sequence_length = None if core.is_compiled_with_rocm( - ) else np.array( - [12, 11, 10, 9, 8], dtype=np.int32) + ) else np.array([12, 11, 10, 9, 8], dtype=np.int32) self.num_layers = 1 self.is_bidirec = False self.mode = "LSTM" @@ -65,27 +66,27 @@ class TestRNNOp(OpTest): input_size = 3 hidden_size = 2 - input = np.random.uniform( - low=-0.1, high=0.1, - size=(seq_length, batch_size, input_size)).astype(self.dtype) + input = np.random.uniform(low=-0.1, + high=0.1, + size=(seq_length, batch_size, + input_size)).astype(self.dtype) if self.sequence_length is not None: input[11][1:][:] = 0 input[10][2:][:] = 0 input[9][3:][:] = 0 input[8][4:][:] = 0 - rnn1 = LSTM( - input_size, - hidden_size, - num_layers=self.num_layers, - time_major=True, - direction=direction, - dropout=self.dropout, - dtype=self.dtype) + rnn1 = LSTM(input_size, + hidden_size, + num_layers=self.num_layers, + time_major=True, + direction=direction, + dropout=self.dropout, + dtype=self.dtype) flat_w = get_params_for_net(rnn1) - output, (last_hidden, last_cell) = rnn1( - input, sequence_length=self.sequence_length) + output, (last_hidden, + last_cell) = rnn1(input, sequence_length=self.sequence_length) if core.is_compiled_with_rocm(): @@ -140,28 +141,32 @@ class TestRNNOp(OpTest): var_name_list = self.get_weight_names() grad_check_list = ['Input', 'init_h', 'init_c'] grad_check_list.extend(var_name_list) - self.check_grad( - set(grad_check_list), ['Out', 'last_hidden', 'last_cell']) + self.check_grad(set(grad_check_list), + ['Out', 'last_hidden', 'last_cell']) class TestRNNOp1(TestRNNOp): + def set_attrs(self): self.sequence_length = None class TestRNNOp2(TestRNNOp): + def set_attrs(self): self.sequence_length = None self.is_bidirec = True class TestRNNOp3(TestRNNOp): + def set_attrs(self): self.is_test = True self.sequence_length = None class TestRNNOp4(TestRNNOp): + def set_attrs(self): self.is_test = True self.sequence_length = None @@ -169,17 +174,20 @@ class TestRNNOp4(TestRNNOp): class TestRNNOp5(TestRNNOp): + def set_attrs(self): self.num_layers = 2 class TestRNNOp6(TestRNNOp): + def set_attrs(self): self.num_layers = 2 self.is_bidirec = True class TestRNNOp7(TestRNNOp): + def set_attrs(self): self.num_layers = 2 self.is_bidirec = True @@ -187,6 +195,7 @@ class TestRNNOp7(TestRNNOp): class TestRNNOp8(TestRNNOp): + def set_attrs(self): self.num_layers = 2 self.is_bidirec = True @@ -194,6 +203,7 @@ class TestRNNOp8(TestRNNOp): class TestRNNOp9(TestRNNOp): + def set_attrs(self): self.num_layers = 3 diff --git a/python/paddle/fluid/tests/unittests/test_roi_align_op.py b/python/paddle/fluid/tests/unittests/test_roi_align_op.py index a22b331b032..cf528105686 100644 --- a/python/paddle/fluid/tests/unittests/test_roi_align_op.py +++ b/python/paddle/fluid/tests/unittests/test_roi_align_op.py @@ -23,6 +23,7 @@ from op_test import OpTest class TestROIAlignOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() @@ -66,8 +67,8 @@ class TestROIAlignOp(OpTest): bilinear_pos = np.zeros( [self.channels, self.pooled_height, self.pooled_width, count, 4], np.float64) - bilinear_w = np.zeros( - [self.pooled_height, self.pooled_width, count, 4], np.float64) + bilinear_w = np.zeros([self.pooled_height, self.pooled_width, count, 4], + np.float64) for ph in range(self.pooled_width): for pw in range(self.pooled_height): c = 0 @@ -172,12 +173,14 @@ class TestROIAlignOp(OpTest): rois.append(roi) self.rois_num = len(rois) self.rois = np.array(rois).astype("float64") - self.boxes_num = np.array( - [bno + 1 for bno in range(self.batch_size)]).astype('int32') + self.boxes_num = np.array([bno + 1 for bno in range(self.batch_size) + ]).astype('int32') def setUp(self): self.op_type = "roi_align" - self.python_api = lambda x, boxes, boxes_num, pooled_height, pooled_width, spatial_scale, sampling_ratio, aligned: paddle.vision.ops.roi_align(x, boxes, boxes_num, (pooled_height, pooled_width), spatial_scale, sampling_ratio, aligned) + self.python_api = lambda x, boxes, boxes_num, pooled_height, pooled_width, spatial_scale, sampling_ratio, aligned: paddle.vision.ops.roi_align( + x, boxes, boxes_num, (pooled_height, pooled_width), spatial_scale, + sampling_ratio, aligned) self.set_data() def test_check_output(self): @@ -188,6 +191,7 @@ class TestROIAlignOp(OpTest): class TestROIAlignInLodOp(TestROIAlignOp): + def set_data(self): self.init_test_case() self.make_rois() @@ -213,6 +217,7 @@ class TestROIAlignInLodOp(TestROIAlignOp): class TestROIAlignOpWithAligned(TestROIAlignOp): + def init_test_case(self): self.batch_size = 3 self.channels = 3 diff --git a/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py b/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py index d4e48ac8a57..202805b0961 100644 --- a/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py +++ b/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py @@ -100,10 +100,10 @@ def get_transform_matrix(transformed_width, transformed_height, roi_x, roi_y): dy2 = y3 - y2 dy3 = y0 - y1 + y2 - y3 matrix = np.zeros([9]) - matrix[6] = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1 + 1e-5) / ( - normalized_width - 1) - matrix[7] = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1 + 1e-5) / ( - normalized_height - 1) + matrix[6] = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1 + + 1e-5) / (normalized_width - 1) + matrix[7] = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1 + + 1e-5) / (normalized_height - 1) matrix[8] = 1 matrix[3] = (y1 - y0 + matrix[6] * @@ -199,8 +199,8 @@ def roi_transform(in_data, rois, rois_lod, transformed_height, roi2image[j] = i out = np.zeros([rois_num, channels, transformed_height, transformed_width]) - mask = np.zeros( - [rois_num, 1, transformed_height, transformed_width]).astype('int') + mask = np.zeros([rois_num, 1, transformed_height, + transformed_width]).astype('int') matrix = np.zeros([rois_num, 9], dtype=in_data.dtype) for n in range(rois_num): roi_x = [] @@ -209,8 +209,9 @@ def roi_transform(in_data, rois, rois_lod, transformed_height, roi_x.append(rois[n][2 * k] * spatial_scale) roi_y.append(rois[n][2 * k + 1] * spatial_scale) image_id = roi2image[n] - transform_matrix = get_transform_matrix( - transformed_width, transformed_height, roi_x, roi_y) + transform_matrix = get_transform_matrix(transformed_width, + transformed_height, roi_x, + roi_y) matrix[n] = transform_matrix for c in range(channels): for out_h in range(transformed_height): @@ -230,6 +231,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height, class TestROIPoolOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() @@ -241,9 +243,11 @@ class TestROIPoolOp(OpTest): 'transformed_height': self.transformed_height, 'transformed_width': self.transformed_width } - out, mask, transform_matrix = roi_transform( - self.x, self.rois, self.rois_lod, self.transformed_height, - self.transformed_width, self.spatial_scale) + out, mask, transform_matrix = roi_transform(self.x, self.rois, + self.rois_lod, + self.transformed_height, + self.transformed_width, + self.spatial_scale) self.outputs = { 'Out': out, 'Mask': mask, @@ -316,13 +320,18 @@ class TestROIPoolOp(OpTest): def test_errors(self): x = fluid.data(name='x', shape=[100, 256, 28, 28], dtype='float32') - rois = fluid.data( - name='rois', shape=[None, 8], lod_level=1, dtype='float32') - - x_int = fluid.data( - name='x_int', shape=[100, 256, 28, 28], dtype='int32') - rois_int = fluid.data( - name='rois_int', shape=[None, 8], lod_level=1, dtype='int32') + rois = fluid.data(name='rois', + shape=[None, 8], + lod_level=1, + dtype='float32') + + x_int = fluid.data(name='x_int', + shape=[100, 256, 28, 28], + dtype='int32') + rois_int = fluid.data(name='rois_int', + shape=[None, 8], + lod_level=1, + dtype='int32') x_tmp = [1, 2] rois_tmp = [1, 2] diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py index f0afcff63c6..d01daf75036 100644 --- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py @@ -25,6 +25,7 @@ import paddle.fluid as fluid class TestROIPoolOp(OpTest): + def set_data(self): self.init_test_case() self.make_rois() @@ -132,12 +133,13 @@ class TestROIPoolOp(OpTest): rois.append(roi) self.rois_num = len(rois) self.rois = np.array(rois).astype("float64") - self.boxes_num = np.array( - [bno + 1 for bno in range(self.batch_size)]).astype('int32') + self.boxes_num = np.array([bno + 1 for bno in range(self.batch_size) + ]).astype('int32') def setUp(self): self.op_type = "roi_pool" - self.python_api = lambda x, boxes, boxes_num, pooled_height, pooled_width, spatial_scale: paddle.vision.ops.roi_pool(x, boxes, boxes_num, (pooled_height, pooled_width), spatial_scale) + self.python_api = lambda x, boxes, boxes_num, pooled_height, pooled_width, spatial_scale: paddle.vision.ops.roi_pool( + x, boxes, boxes_num, (pooled_height, pooled_width), spatial_scale) self.python_out_sig = ["Out"] self.set_data() @@ -149,24 +151,27 @@ class TestROIPoolOp(OpTest): class BadInputTestRoiPool(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): def test_bad_x(): - x = fluid.layers.data( - name='data1', shape=[2, 1, 4, 4], dtype='int64') - label = fluid.layers.data( - name='label', shape=[2, 4], dtype='float32', lod_level=1) + x = fluid.layers.data(name='data1', + shape=[2, 1, 4, 4], + dtype='int64') + label = fluid.layers.data(name='label', + shape=[2, 4], + dtype='float32', + lod_level=1) output = fluid.layers.roi_pool(x, label, 1, 1, 1.0) self.assertRaises(TypeError, test_bad_x) def test_bad_y(): - x = fluid.layers.data( - name='data2', - shape=[2, 1, 4, 4], - dtype='float32', - append_batch_size=False) + x = fluid.layers.data(name='data2', + shape=[2, 1, 4, 4], + dtype='float32', + append_batch_size=False) label = [[1, 2, 3, 4], [2, 3, 4, 5]] output = fluid.layers.roi_pool(x, label, 1, 1, 1.0) @@ -174,6 +179,7 @@ class BadInputTestRoiPool(unittest.TestCase): class TestROIPoolInLodOp(TestROIPoolOp): + def set_data(self): self.init_test_case() self.make_rois() diff --git a/python/paddle/fluid/tests/unittests/test_roll_op.py b/python/paddle/fluid/tests/unittests/test_roll_op.py index c315aa9b746..546c278b8fa 100644 --- a/python/paddle/fluid/tests/unittests/test_roll_op.py +++ b/python/paddle/fluid/tests/unittests/test_roll_op.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class TestRollOp(OpTest): + def setUp(self): self.python_api = paddle.roll self.op_type = "roll" @@ -31,8 +32,8 @@ class TestRollOp(OpTest): self.inputs = {'X': np.random.random(self.x_shape).astype(self.dtype)} self.attrs = {'shifts': self.shifts, 'axis': self.axis} self.outputs = { - 'Out': np.roll(self.inputs['X'], self.attrs['shifts'], - self.attrs['axis']) + 'Out': + np.roll(self.inputs['X'], self.attrs['shifts'], self.attrs['axis']) } def init_dtype_type(self): @@ -49,6 +50,7 @@ class TestRollOp(OpTest): class TestRollOpCase2(TestRollOp): + def init_dtype_type(self): self.dtype = np.float32 self.x_shape = (100, 10, 5) @@ -57,9 +59,10 @@ class TestRollOpCase2(TestRollOp): class TestRollAPI(unittest.TestCase): + def input_data(self): - self.data_x = np.array( - [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) + self.data_x = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0]]) def test_roll_op_api(self): self.input_data() diff --git a/python/paddle/fluid/tests/unittests/test_rot90_op.py b/python/paddle/fluid/tests/unittests/test_rot90_op.py index 404bb3ae1eb..3829eaed277 100644 --- a/python/paddle/fluid/tests/unittests/test_rot90_op.py +++ b/python/paddle/fluid/tests/unittests/test_rot90_op.py @@ -48,9 +48,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[4, 1], [5, 2], [6, 3]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_k_0(self): paddle.enable_static() @@ -74,9 +73,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_k_2(self): paddle.enable_static() @@ -100,9 +98,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[6, 5, 4], [3, 2, 1]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_k_3(self): paddle.enable_static() @@ -126,9 +123,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[4, 1], [5, 2], [6, 3]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_neg_k_1(self): paddle.enable_static() @@ -152,9 +148,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[4, 1], [5, 2], [6, 3]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_neg_k_2(self): paddle.enable_static() @@ -178,9 +173,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[6, 5, 4], [3, 2, 1]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_neg_k_3(self): paddle.enable_static() @@ -204,9 +198,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[3, 6], [2, 5], [1, 4]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_static_neg_k_4(self): paddle.enable_static() @@ -230,9 +223,8 @@ class TestRot90_API(unittest.TestCase): out_np = np.array(res[0]) out_ref = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) - self.assertTrue( - (out_np == out_ref).all(), - msg='rot90 output is wrong, out =' + str(out_np)) + self.assertTrue((out_np == out_ref).all(), + msg='rot90 output is wrong, out =' + str(out_np)) def test_error_api(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_row_conv_op.py b/python/paddle/fluid/tests/unittests/test_row_conv_op.py index b3b0742e7af..e12d9108ab9 100644 --- a/python/paddle/fluid/tests/unittests/test_row_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_row_conv_op.py @@ -45,6 +45,7 @@ def row_conv_forward(x, lod, wt): class TestRowConvOp1(OpTest): + def setUp(self): self.op_type = "row_conv" @@ -67,15 +68,20 @@ class TestRowConvOp1(OpTest): self.check_grad(['X', 'Filter'], 'Out', check_dygraph=False) def test_check_grad_ignore_x(self): - self.check_grad( - ['Filter'], 'Out', no_grad_set=set('X'), check_dygraph=False) + self.check_grad(['Filter'], + 'Out', + no_grad_set=set('X'), + check_dygraph=False) def test_check_grad_ignore_wt(self): - self.check_grad( - ['X'], 'Out', no_grad_set=set('Filter'), check_dygraph=False) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Filter'), + check_dygraph=False) class TestRowConvOp2(OpTest): + def setUp(self): self.op_type = "row_conv" @@ -98,27 +104,24 @@ class TestRowConvOp2(OpTest): #dimensional input, the dX on CPU for some values has max_rel_error #slightly more than 0.05 def test_check_grad_normal(self): - self.check_grad( - ['X', 'Filter'], - 'Out', - max_relative_error=0.06, - check_dygraph=False) + self.check_grad(['X', 'Filter'], + 'Out', + max_relative_error=0.06, + check_dygraph=False) def test_check_grad_ignore_x(self): - self.check_grad( - ['Filter'], - 'Out', - max_relative_error=0.06, - no_grad_set=set('X'), - check_dygraph=False) + self.check_grad(['Filter'], + 'Out', + max_relative_error=0.06, + no_grad_set=set('X'), + check_dygraph=False) def test_check_grad_ignore_wt(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.06, - no_grad_set=set('Filter'), - check_dygraph=False) + self.check_grad(['X'], + 'Out', + max_relative_error=0.06, + no_grad_set=set('Filter'), + check_dygraph=False) def row_conv_foward_Tensor(x, wt): @@ -138,6 +141,7 @@ def row_conv_foward_Tensor(x, wt): class TestRowOpWithTensorInput(OpTest): + def setUp(self): self.op_type = "row_conv" length = [1, 2, 3] @@ -157,18 +161,23 @@ class TestRowOpWithTensorInput(OpTest): self.check_output(check_dygraph=False) def test_check_grad_ignore_x(self): - self.check_grad( - ['Filter'], 'Out', no_grad_set=set('X'), check_dygraph=False) + self.check_grad(['Filter'], + 'Out', + no_grad_set=set('X'), + check_dygraph=False) def test_check_grad_normal(self): self.check_grad(['X', 'Filter'], 'Out', check_dygraph=False) def test_check_grad_ignore_wt(self): - self.check_grad( - ['X'], 'Out', no_grad_set=set('Filter'), check_dygraph=False) + self.check_grad(['X'], + 'Out', + no_grad_set=set('Filter'), + check_dygraph=False) class TestRowConvLayer(unittest.TestCase): + def setUp(self): self.B = 2 self.T = 6 diff --git a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py index 7a6ce5bc921..c7a8c04fa34 100644 --- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py @@ -32,12 +32,13 @@ def rpn_target_assign(anchor_by_gt_overlap, rpn_fg_fraction, use_random=True): anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) - anchor_to_gt_max = anchor_by_gt_overlap[np.arange( - anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax] + anchor_to_gt_max = anchor_by_gt_overlap[ + np.arange(anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax] gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) - gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange( - anchor_by_gt_overlap.shape[1])] + gt_to_anchor_max = anchor_by_gt_overlap[ + gt_to_anchor_argmax, + np.arange(anchor_by_gt_overlap.shape[1])] anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] @@ -48,8 +49,9 @@ def rpn_target_assign(anchor_by_gt_overlap, num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg and use_random: - disable_inds = np.random.choice( - fg_inds, size=(len(fg_inds) - num_fg), replace=False) + disable_inds = np.random.choice(fg_inds, + size=(len(fg_inds) - num_fg), + replace=False) else: disable_inds = fg_inds[num_fg:] @@ -88,13 +90,12 @@ def rpn_target_assign(anchor_by_gt_overlap, def get_anchor(n, c, h, w): input_feat = np.random.random((n, c, h, w)).astype('float32') - anchors, _ = anchor_generator_in_python( - input_feat=input_feat, - anchor_sizes=[32., 64.], - aspect_ratios=[0.5, 1.0], - variances=[1.0, 1.0, 1.0, 1.0], - stride=[16.0, 16.0], - offset=0.5) + anchors, _ = anchor_generator_in_python(input_feat=input_feat, + anchor_sizes=[32., 64.], + aspect_ratios=[0.5, 1.0], + variances=[1.0, 1.0, 1.0, 1.0], + stride=[16.0, 16.0], + offset=0.5) return anchors @@ -118,10 +119,10 @@ def rpn_target_assign_in_python(all_anchors, if rpn_straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh inds_inside = np.where( - (all_anchors[:, 0] >= -rpn_straddle_thresh) & - (all_anchors[:, 1] >= -rpn_straddle_thresh) & ( - all_anchors[:, 2] < im_width + rpn_straddle_thresh) & ( - all_anchors[:, 3] < im_height + rpn_straddle_thresh))[0] + (all_anchors[:, 0] >= -rpn_straddle_thresh) + & (all_anchors[:, 1] >= -rpn_straddle_thresh) + & (all_anchors[:, 2] < im_width + rpn_straddle_thresh) + & (all_anchors[:, 3] < im_height + rpn_straddle_thresh))[0] # keep only inside anchors inside_anchors = all_anchors[inds_inside, :] else: @@ -142,7 +143,7 @@ def rpn_target_assign_in_python(all_anchors, rpn_negative_overlap, rpn_fg_fraction, use_random) - # unmap to all anchor + # unmap to all anchor loc_inds = inds_inside[loc_inds] score_inds = inds_inside[score_inds] @@ -172,12 +173,13 @@ def rpn_target_assign_in_python(all_anchors, def retinanet_target_assign(anchor_by_gt_overlap, gt_labels, positive_overlap, negative_overlap): anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) - anchor_to_gt_max = anchor_by_gt_overlap[np.arange( - anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax] + anchor_to_gt_max = anchor_by_gt_overlap[ + np.arange(anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax] gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) - gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange( - anchor_by_gt_overlap.shape[1])] + gt_to_anchor_max = anchor_by_gt_overlap[ + gt_to_anchor_argmax, + np.arange(anchor_by_gt_overlap.shape[1])] anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] @@ -269,6 +271,7 @@ def retinanet_target_assign_in_python(all_anchors, gt_boxes, gt_labels, class TestRpnTargetAssignOp(OpTest): + def setUp(self): n, c, h, w = 2, 4, 14, 14 all_anchors = get_anchor(n, c, h, w) @@ -336,6 +339,7 @@ class TestRpnTargetAssignOp(OpTest): class TestRetinanetTargetAssignOp(OpTest): + def setUp(self): n, c, h, w = 2, 4, 14, 14 all_anchors = get_anchor(n, c, h, w) @@ -396,23 +400,31 @@ class TestRetinanetTargetAssignOp(OpTest): class TestRetinanetTargetAssignOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - bbox_pred1 = fluid.data( - name='bbox_pred1', shape=[1, 100, 4], dtype='float32') - cls_logits1 = fluid.data( - name='cls_logits1', shape=[1, 100, 10], dtype='float32') - anchor_box1 = fluid.data( - name='anchor_box1', shape=[100, 4], dtype='float32') - anchor_var1 = fluid.data( - name='anchor_var1', shape=[100, 4], dtype='float32') - gt_boxes1 = fluid.data( - name='gt_boxes1', shape=[10, 4], dtype='float32') - gt_labels1 = fluid.data( - name='gt_labels1', shape=[10, 1], dtype='int32') + bbox_pred1 = fluid.data(name='bbox_pred1', + shape=[1, 100, 4], + dtype='float32') + cls_logits1 = fluid.data(name='cls_logits1', + shape=[1, 100, 10], + dtype='float32') + anchor_box1 = fluid.data(name='anchor_box1', + shape=[100, 4], + dtype='float32') + anchor_var1 = fluid.data(name='anchor_var1', + shape=[100, 4], + dtype='float32') + gt_boxes1 = fluid.data(name='gt_boxes1', + shape=[10, 4], + dtype='float32') + gt_labels1 = fluid.data(name='gt_labels1', + shape=[10, 1], + dtype='int32') is_crowd1 = fluid.data(name='is_crowd1', shape=[1], dtype='float32') - im_info1 = fluid.data( - name='im_info1', shape=[1, 3], dtype='float32') + im_info1 = fluid.data(name='im_info1', + shape=[1, 3], + dtype='float32') # The `bbox_pred` must be Variable and the data type of `bbox_pred` Tensor # one of float32 and float64. @@ -424,8 +436,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_bbox_pred_type) def test_bbox_pred_tensor_dtype(): - bbox_pred2 = fluid.data( - name='bbox_pred2', shape=[1, 100, 4], dtype='intt32') + bbox_pred2 = fluid.data(name='bbox_pred2', + shape=[1, 100, 4], + dtype='intt32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred2, cls_logits1, anchor_box1, anchor_var1, gt_boxes1, gt_labels1, is_crowd1, im_info1, 10) @@ -442,8 +455,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_cls_logits_type) def test_cls_logits_tensor_dtype(): - cls_logits2 = fluid.data( - name='cls_logits2', shape=[1, 100, 10], dtype='int32') + cls_logits2 = fluid.data(name='cls_logits2', + shape=[1, 100, 10], + dtype='int32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits2, anchor_box1, anchor_var1, gt_boxes1, gt_labels1, is_crowd1, im_info1, 10) @@ -460,8 +474,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_anchor_box_type) def test_anchor_box_tensor_dtype(): - anchor_box2 = fluid.data( - name='anchor_box2', shape=[100, 4], dtype='int32') + anchor_box2 = fluid.data(name='anchor_box2', + shape=[100, 4], + dtype='int32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits1, anchor_box2, anchor_var1, gt_boxes1, gt_labels1, is_crowd1, im_info1, 10) @@ -478,8 +493,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_anchor_var_type) def test_anchor_var_tensor_dtype(): - anchor_var2 = fluid.data( - name='anchor_var2', shape=[100, 4], dtype='int32') + anchor_var2 = fluid.data(name='anchor_var2', + shape=[100, 4], + dtype='int32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits1, anchor_box1, anchor_var2, gt_boxes1, gt_labels1, is_crowd1, im_info1, 10) @@ -496,8 +512,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_gt_boxes_type) def test_gt_boxes_tensor_dtype(): - gt_boxes2 = fluid.data( - name='gt_boxes2', shape=[10, 4], dtype='int32') + gt_boxes2 = fluid.data(name='gt_boxes2', + shape=[10, 4], + dtype='int32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits1, anchor_box1, anchor_var1, gt_boxes2, gt_labels1, is_crowd1, im_info1, 10) @@ -514,8 +531,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_gt_label_type) def test_gt_label_tensor_dtype(): - gt_labels2 = fluid.data( - name='label2', shape=[10, 1], dtype='float32') + gt_labels2 = fluid.data(name='label2', + shape=[10, 1], + dtype='float32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits1, anchor_box1, anchor_var1, gt_boxes1, gt_labels2, is_crowd1, im_info1, 10) @@ -532,8 +550,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_is_crowd_type) def test_is_crowd_tensor_dtype(): - is_crowd2 = fluid.data( - name='is_crowd2', shape=[10, 1], dtype='float32') + is_crowd2 = fluid.data(name='is_crowd2', + shape=[10, 1], + dtype='float32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits1, anchor_box1, anchor_var1, gt_boxes1, gt_labels1, is_crowd2, im_info1, 10) @@ -550,8 +569,9 @@ class TestRetinanetTargetAssignOpError(unittest.TestCase): self.assertRaises(TypeError, test_im_info_type) def test_im_info_tensor_dtype(): - im_info2 = fluid.data( - name='im_info2', shape=[1, 3], dtype='int32') + im_info2 = fluid.data(name='im_info2', + shape=[1, 3], + dtype='int32') score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \ fluid.layers.retinanet_target_assign(bbox_pred1, cls_logits1, anchor_box1, anchor_var1, gt_boxes1, gt_labels1, is_crowd1, im_info2, 10) diff --git a/python/paddle/fluid/tests/unittests/test_rrelu_op.py b/python/paddle/fluid/tests/unittests/test_rrelu_op.py index 9d33ce085b7..523b65bcd6d 100644 --- a/python/paddle/fluid/tests/unittests/test_rrelu_op.py +++ b/python/paddle/fluid/tests/unittests/test_rrelu_op.py @@ -44,6 +44,7 @@ def check_output(input, output, lower, upper): class TestFunctionalRReluAPI(unittest.TestCase): + def setUp(self): self.x_np = np.random.uniform(-1., 1., [1, 2, 3, 4]).astype('float64') self.lower_0 = 0.05 @@ -58,12 +59,17 @@ class TestFunctionalRReluAPI(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data( - name="input", shape=[2, 3, 4, 5], dtype="float32") - res1 = F.rrelu( - x=input, lower=self.lower_0, upper=self.upper_0, training=False) - res2 = F.rrelu( - x=input, lower=self.lower_1, upper=self.upper_1, training=False) + input = fluid.data(name="input", + shape=[2, 3, 4, 5], + dtype="float32") + res1 = F.rrelu(x=input, + lower=self.lower_0, + upper=self.upper_0, + training=False) + res2 = F.rrelu(x=input, + lower=self.lower_1, + upper=self.upper_1, + training=False) in_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype("float32") res_np1 = ref_rrelu(in_np, self.lower_0, self.upper_0) @@ -89,10 +95,12 @@ class TestFunctionalRReluAPI(unittest.TestCase): for place in self.places: paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=self.x_np.shape, dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=self.x_np.shape, dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=self.x_np.shape, + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=self.x_np.shape, + dtype="float64") out_1 = F.rrelu(x_1, self.lower_0, self.upper_0, training=False) out_2 = F.rrelu(x_2, self.lower_1, self.upper_1, training=False) out_3 = F.rrelu(x_2, self.lower_1, self.upper_1, training=True) @@ -123,10 +131,12 @@ class TestFunctionalRReluAPI(unittest.TestCase): for place in self.places: paddle.enable_static() - x_1 = paddle.fluid.data( - name="x", shape=self.x_np.shape, dtype="float64") - x_2 = paddle.fluid.data( - name="x2", shape=self.x_np.shape, dtype="float64") + x_1 = paddle.fluid.data(name="x", + shape=self.x_np.shape, + dtype="float64") + x_2 = paddle.fluid.data(name="x2", + shape=self.x_np.shape, + dtype="float64") # init instance rrelu_1 = paddle.nn.RReLU(self.lower_0, self.upper_0) rrelu_2 = paddle.nn.RReLU(self.lower_1, self.upper_1) @@ -171,8 +181,8 @@ class TestFunctionalRReluAPI(unittest.TestCase): rrelu = paddle.nn.RReLU(self.lower_0, self.upper_0) result = rrelu(paddle.to_tensor(self.x_np)) self.assertTrue( - check_output(self.x_np, - result.numpy(), self.lower_0, self.upper_0)) + check_output(self.x_np, result.numpy(), self.lower_0, + self.upper_0)) paddle.enable_static() def test_dygraph(self): @@ -182,52 +192,67 @@ class TestFunctionalRReluAPI(unittest.TestCase): rrelu = paddle.nn.RReLU(self.lower_0, self.upper_0) out_np = rrelu(paddle.to_tensor(self.x_np)) self.assertTrue( - check_output(self.x_np, - out_np.numpy(), self.lower_0, self.upper_0)) + check_output(self.x_np, out_np.numpy(), self.lower_0, + self.upper_0)) paddle.enable_static() def test_error_functional(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises( - TypeError, F.rrelu, x=1, lower=self.lower_0, upper=self.upper_0) + self.assertRaises(TypeError, + F.rrelu, + x=1, + lower=self.lower_0, + upper=self.upper_0) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[2, 3], dtype='int32') - self.assertRaises( - TypeError, - F.rrelu, - x=x_int32, - lower=self.lower_0, - upper=self.upper_0) - x_bool = paddle.fluid.data( - name='x_bool', shape=[2, 3], dtype='int32') - self.assertRaises( - TypeError, - F.rrelu, - x=x_bool, - lower=self.lower_0, - upper=self.upper_0) + x_int32 = paddle.fluid.data(name='x_int32', + shape=[2, 3], + dtype='int32') + self.assertRaises(TypeError, + F.rrelu, + x=x_int32, + lower=self.lower_0, + upper=self.upper_0) + x_bool = paddle.fluid.data(name='x_bool', + shape=[2, 3], + dtype='int32') + self.assertRaises(TypeError, + F.rrelu, + x=x_bool, + lower=self.lower_0, + upper=self.upper_0) # lower and upper must be float - x_fp32 = paddle.fluid.data( - name='x_fp32', shape=[2, 3], dtype='float32') + x_fp32 = paddle.fluid.data(name='x_fp32', + shape=[2, 3], + dtype='float32') self.assertRaises(TypeError, F.rrelu, x=x_fp32, lower=0, upper=0.5) self.assertRaises(TypeError, F.rrelu, x=x_fp32, lower=0.5, upper=1) # lower and upper must be in (0, 1) - self.assertRaises( - ValueError, F.rrelu, x=x_fp32, lower=-1., upper=0.5) - self.assertRaises( - ValueError, F.rrelu, x=x_fp32, lower=0.5, upper=2.) + self.assertRaises(ValueError, + F.rrelu, + x=x_fp32, + lower=-1., + upper=0.5) + self.assertRaises(ValueError, + F.rrelu, + x=x_fp32, + lower=0.5, + upper=2.) # upper should not be less than lower - self.assertRaises( - ValueError, F.rrelu, x=x_fp32, lower=0.5, upper=0.2) + self.assertRaises(ValueError, + F.rrelu, + x=x_fp32, + lower=0.5, + upper=0.2) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[2, 3], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[2, 3], + dtype='float16') F.rrelu(x=x_fp16, lower=self.lower_0, upper=self.upper_0) def test_error_layer(self): + def error_int_dtype(): with paddle.fluid.dygraph.guard(): x = np.random.random([2, 3]).astype("float64") @@ -273,6 +298,7 @@ class TestFunctionalRReluAPI(unittest.TestCase): class RReluTest(OpTest): + def setUp(self): self.op_type = "rrelu" self.lower = 0.1 @@ -305,6 +331,7 @@ class RReluTest(OpTest): class RReluTrainingTest(OpTest): + def setUp(self): self.op_type = "rrelu" self.lower = 0.3 @@ -314,6 +341,7 @@ class RReluTrainingTest(OpTest): class RReluTrainingTest(OpTest): + def setUp(self): self.op_type = "rrelu" self.lower = 0.3 diff --git a/python/paddle/fluid/tests/unittests/test_run.py b/python/paddle/fluid/tests/unittests/test_run.py index c0157c5b906..d2f3ec9ebcd 100644 --- a/python/paddle/fluid/tests/unittests/test_run.py +++ b/python/paddle/fluid/tests/unittests/test_run.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -51,13 +51,13 @@ def write_file(name, ct): def get_files(pth, prefix): return [ - f for f in listdir(pth) - if isfile(join(pth, f)) and f.startswith(prefix) and f != - f"{prefix}.gpu.log" + f for f in listdir(pth) if isfile(join(pth, f)) and f.startswith(prefix) + and f != f"{prefix}.gpu.log" ] class Collective_Test(unittest.TestCase): + def setUp(self): write_file(pyname, colpyfile) @@ -109,6 +109,7 @@ class Collective_Test(unittest.TestCase): class PS_Test(unittest.TestCase): + def setUp(self): write_file(pyname, pspyfile) diff --git a/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py b/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py index df626dc6dde..d59c9db637f 100644 --- a/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py +++ b/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py @@ -18,6 +18,7 @@ import sys class TestRunFluidByModule(unittest.TestCase): + def test_module(self): print(sys.executable) res = os.system(sys.executable + ' -m "paddle.fluid.reader"') @@ -25,6 +26,7 @@ class TestRunFluidByModule(unittest.TestCase): class TestRunFluidByCommand(unittest.TestCase): + def test_command(self): res = os.system(sys.executable + ' -c "import paddle.fluid"') self.assertEqual(res, 0) # 0 means status OK diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py index fdb931e2531..00deabbf72e 100644 --- a/python/paddle/fluid/tests/unittests/test_run_program_op.py +++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py @@ -41,7 +41,7 @@ def program_scope_guard(): yield -# NOTE: Because RunProgramOp has a special output of type std::vector, +# NOTE: Because RunProgramOp has a special output of type std::vector, # the OpTest cannot be used in RunProgramOp. The variable type cannot be specified # when creating output variables in OpTest, default type is LoDTensor # NOTE: the gradient test method in OpTest also cannot be used for RunProgramOp, @@ -49,6 +49,7 @@ def program_scope_guard(): # when create Operator, so here compare gradients with static graph # NOTE: Here rewrite a simple unittest framework for RunProgramOp class RunProgramOpTest(unittest.TestCase): + def build_model(self): raise NotImplementedError( "RunProgramOp test should implement build_model") @@ -126,13 +127,18 @@ class RunProgramOpTest(unittest.TestCase): self.assertTrue(np.allclose(expect_v, actual_v, atol=1e-5)) def prepare_dygraph_input(self, place, return_param_list=False): + def create_var_base(is_input, name, np_value, stop_gradient): if _in_eager_mode_: - var = core.eager.Tensor( - value=np_value, name=name, place=place, zero_copy=True) + var = core.eager.Tensor(value=np_value, + name=name, + place=place, + zero_copy=True) else: - var = core.VarBase( - value=np_value, name=name, place=place, zero_copy=True) + var = core.VarBase(value=np_value, + name=name, + place=place, + zero_copy=True) var.stop_gradient = stop_gradient return var @@ -155,6 +161,7 @@ class RunProgramOpTest(unittest.TestCase): return inputs def prepare_dygraph_output(self): + def create_var_base(is_input, name): var = framework._varbase_creator(dtype=None, shape=None, name=name) var.stop_gradient = False @@ -234,6 +241,7 @@ class RunProgramOpTest(unittest.TestCase): class TestRunProgramOpWithFC(RunProgramOpTest): + def setUp(self): self.op_type = "run_program" self.dtype = np.float32 @@ -245,14 +253,14 @@ class TestRunProgramOpWithFC(RunProgramOpTest): self.inputs = { 'X': { - self.input_names['X'][0]: np.random.random((32, 1, 28, 28)) - .astype(self.dtype) + self.input_names['X'][0]: + np.random.random((32, 1, 28, 28)).astype(self.dtype) }, 'Params': { - self.input_names['Params'][0]: np.random.random( - (784, 10)).astype(self.dtype), - self.input_names['Params'][1]: np.random.random( - (32, 10)).astype(self.dtype) + self.input_names['Params'][0]: + np.random.random((784, 10)).astype(self.dtype), + self.input_names['Params'][1]: + np.random.random((32, 10)).astype(self.dtype) } } @@ -264,21 +272,20 @@ class TestRunProgramOpWithFC(RunProgramOpTest): def build_model(self): # 1. simple model - img = fluid.data( - name=self.input_names['X'][0], - shape=[None, 1, 28, 28], - dtype='float32') + img = fluid.data(name=self.input_names['X'][0], + shape=[None, 1, 28, 28], + dtype='float32') weight_attr = fluid.ParamAttr( name=self.input_names['Params'][0], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer(self.inputs[ - 'Params'][self.input_names['Params'][0]]), + initializer=fluid.initializer.NumpyArrayInitializer( + self.inputs['Params'][self.input_names['Params'][0]]), trainable=True) bias_attr = fluid.ParamAttr( name=self.input_names['Params'][1], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer(self.inputs[ - 'Params'][self.input_names['Params'][1]]), + initializer=fluid.initializer.NumpyArrayInitializer( + self.inputs['Params'][self.input_names['Params'][1]]), trainable=True) pred = fluid.layers.fc(input=img, size=10, @@ -294,6 +301,7 @@ class TestRunProgramOpWithFC(RunProgramOpTest): class TestRunProgramOpWithEmbedding(RunProgramOpTest): + def setUp(self): self.op_type = "run_program" self.dtype = np.float32 @@ -313,7 +321,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): self.check_output() def test_check_grad(self): - # NOTE: fecth not support SelectedRows, catnot compare + # NOTE: fecth not support SelectedRows, catnot compare # sparse gradients with staic mode, only run dygraph places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -324,16 +332,17 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): def build_model(self): # 1. simple model - x = fluid.layers.data( - name=self.input_names['X'][0], shape=[5], dtype='int64') + x = fluid.layers.data(name=self.input_names['X'][0], + shape=[5], + dtype='int64') emb = fluid.input.embedding( input=x, size=[10, 16], param_attr=fluid.ParamAttr( name="emb_weight", learning_rate=10, - initializer=fluid.initializer.NumpyArrayInitializer(self.inputs[ - 'Params'][self.input_names['Params'][0]])), + initializer=fluid.initializer.NumpyArrayInitializer( + self.inputs['Params'][self.input_names['Params'][0]])), is_sparse=True) y = fluid.layers.reduce_sum(emb, dim=-1) # 2. get forward op num @@ -345,6 +354,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): class Net(paddle.nn.Layer): + def __init__(self): super(Net, self).__init__() self.fc1 = paddle.nn.Linear(10, 10) @@ -358,6 +368,7 @@ class Net(paddle.nn.Layer): class TestParametersWithStopGradient(unittest.TestCase): + def setUp(self): self.seed = 2021 self.iter = 5 diff --git a/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py b/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py index 21fdeeeb3e6..18e3b67c25e 100644 --- a/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py +++ b/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core class TestRunTimeException(unittest.TestCase): + def test_run_time_exception(self): place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -41,6 +42,7 @@ class TestRunTimeException(unittest.TestCase): class TestCompileTimeException(unittest.TestCase): + def test_compile_time_exception(self): self.assertRaises(ValueError, self.build_model) @@ -48,8 +50,10 @@ class TestCompileTimeException(unittest.TestCase): train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): - label = fluid.layers.data( - name="label", shape=[1], dtype="int64", append_batch_size=False) + label = fluid.layers.data(name="label", + shape=[1], + dtype="int64", + append_batch_size=False) fluid.layers.one_hot(input=label, depth=100) diff --git a/python/paddle/fluid/tests/unittests/test_sample_logits_op.py b/python/paddle/fluid/tests/unittests/test_sample_logits_op.py index a3eaf24bd6b..749a32978be 100644 --- a/python/paddle/fluid/tests/unittests/test_sample_logits_op.py +++ b/python/paddle/fluid/tests/unittests/test_sample_logits_op.py @@ -19,6 +19,7 @@ from op_test import OpTest class TestSampleLogitsOp(OpTest): + def setUp(self): self.op_type = "sample_logits" self.dtype = np.float64 @@ -92,12 +93,14 @@ class TestSampleLogitsOp(OpTest): class TestSampleLogitsOpNoUniq(TestSampleLogitsOp): + def setUp(self): super(TestSampleLogitsOpNoUniq, self).setUp() self.attrs = {'num_samples': self.S, 'uniq': False} class TestSampleLogitsOpWithAccidentalHits(TestSampleLogitsOp): + def setUp(self): super(TestSampleLogitsOpWithAccidentalHits, self).setUp() self.attrs = {'num_samples': self.S, 'remove_accidental_hits': False} diff --git a/python/paddle/fluid/tests/unittests/test_sampling_id_op.py b/python/paddle/fluid/tests/unittests/test_sampling_id_op.py index 521cd3ae238..f9271f475fb 100644 --- a/python/paddle/fluid/tests/unittests/test_sampling_id_op.py +++ b/python/paddle/fluid/tests/unittests/test_sampling_id_op.py @@ -23,6 +23,7 @@ import paddle class TestSamplingIdShape(unittest.TestCase): + def test_shape(self): paddle.enable_static() x = fluid.layers.data(name='x', shape=[3], dtype='float32') @@ -33,8 +34,7 @@ class TestSamplingIdShape(unittest.TestCase): exe.run(fluid.default_startup_program()) feed = { - 'x': np.array( - [[0.2, 0.3, 0.5], [0.2, 0.3, 0.4]], dtype='float32') + 'x': np.array([[0.2, 0.3, 0.5], [0.2, 0.3, 0.4]], dtype='float32') } output_np = exe.run(feed=feed, fetch_list=[output])[0] diff --git a/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py b/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py index 86431086ac5..9f8f9c382ca 100644 --- a/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py +++ b/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py @@ -31,13 +31,14 @@ def getModelOp(model_path): result = set() for i in range(0, size): - #print(main_block.op(i).type()) + #print(main_block.op(i).type()) result.add(main_block.op(i).type()) return result class WhileNet(paddle.nn.Layer): + def __init__(self): super(WhileNet, self).__init__() @@ -55,6 +56,7 @@ class WhileNet(paddle.nn.Layer): class ForNet(paddle.nn.Layer): + def __init__(self): super(ForNet, self).__init__() @@ -68,6 +70,7 @@ class ForNet(paddle.nn.Layer): class IfElseNet(paddle.nn.Layer): + def __init__(self): super(IfElseNet, self).__init__() @@ -81,15 +84,15 @@ class IfElseNet(paddle.nn.Layer): class TestConditionalOp(unittest.TestCase): + def test_while_op(self): paddle.disable_static() net = WhileNet() - net = paddle.jit.to_static( - net, - input_spec=[ - paddle.static.InputSpec( - shape=[1, 3, 8, 8], dtype='float32') - ]) + net = paddle.jit.to_static(net, + input_spec=[ + paddle.static.InputSpec( + shape=[1, 3, 8, 8], dtype='float32') + ]) paddle.jit.save(net, './while_net') right_pdmodel = set([ @@ -107,9 +110,7 @@ class TestConditionalOp(unittest.TestCase): paddle.disable_static() net = ForNet() net = paddle.jit.to_static( - net, - input_spec=[paddle.static.InputSpec( - shape=[1], dtype='int32')]) + net, input_spec=[paddle.static.InputSpec(shape=[1], dtype='int32')]) paddle.jit.save(net, './for_net') right_pdmodel = set([ @@ -127,9 +128,7 @@ class TestConditionalOp(unittest.TestCase): paddle.disable_static() net = IfElseNet() net = paddle.jit.to_static( - net, - input_spec=[paddle.static.InputSpec( - shape=[1], dtype='int32')]) + net, input_spec=[paddle.static.InputSpec(shape=[1], dtype='int32')]) paddle.jit.save(net, './if_net') right_pdmodel = set([ diff --git a/python/paddle/fluid/tests/unittests/test_save_model_without_var.py b/python/paddle/fluid/tests/unittests/test_save_model_without_var.py index 4c63dced83b..df520f8716d 100644 --- a/python/paddle/fluid/tests/unittests/test_save_model_without_var.py +++ b/python/paddle/fluid/tests/unittests/test_save_model_without_var.py @@ -22,12 +22,12 @@ from paddle.fluid.executor import as_numpy class TestSaveModelWithoutVar(unittest.TestCase): + def test_no_var_save(self): - data = fluid.layers.data( - name='data', - shape=[-1, 1], - dtype='float32', - append_batch_size=False) + data = fluid.layers.data(name='data', + shape=[-1, 1], + dtype='float32', + append_batch_size=False) data_plus = data + 1 if fluid.core.is_compiled_with_cuda(): @@ -41,13 +41,12 @@ class TestSaveModelWithoutVar(unittest.TestCase): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - fluid.io.save_inference_model( - dirname='test', - feeded_var_names=['data'], - target_vars=[data_plus], - executor=exe, - model_filename='model', - params_filename='params') + fluid.io.save_inference_model(dirname='test', + feeded_var_names=['data'], + target_vars=[data_plus], + executor=exe, + model_filename='model', + params_filename='params') expected_warn = "no variable in your model, please ensure there are any variables in your model to save" self.assertTrue(len(w) > 0) self.assertTrue(expected_warn == str(w[-1].message)) diff --git a/python/paddle/fluid/tests/unittests/test_scale_op.py b/python/paddle/fluid/tests/unittests/test_scale_op.py index 04ddb5a788d..f00b5fdc436 100644 --- a/python/paddle/fluid/tests/unittests/test_scale_op.py +++ b/python/paddle/fluid/tests/unittests/test_scale_op.py @@ -25,6 +25,7 @@ from paddle.static import Program, program_guard class TestScaleOp(OpTest): + def setUp(self): self.op_type = "scale" self.python_api = paddle.scale @@ -47,6 +48,7 @@ class TestScaleOp(OpTest): class TestScaleOpScaleVariable(OpTest): + def setUp(self): self.op_type = "scale" self.python_api = paddle.scale @@ -71,6 +73,7 @@ class TestScaleOpScaleVariable(OpTest): class TestScaleOpSelectedRows(unittest.TestCase): + def init_dtype_type(self): pass @@ -129,7 +132,9 @@ class TestScaleOpSelectedRows(unittest.TestCase): class TestScaleRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.scale([10]) @@ -140,6 +145,7 @@ class TestScaleRaiseError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestScaleFp16Op(TestScaleOp): + def init_dtype_type(self): self.dtype = np.float16 @@ -151,11 +157,14 @@ class TestScaleFp16Op(TestScaleOp): def test_check_grad(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ["X"], "Out", max_relative_error=0.05, check_eager=True) + self.check_grad_with_place(place, ["X"], + "Out", + max_relative_error=0.05, + check_eager=True) class TestScaleBF16Op(OpTest): + def setUp(self): self.op_type = "scale" self.python_api = paddle.scale @@ -176,6 +185,7 @@ class TestScaleBF16Op(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestScaleFp16OpSelectedRows(TestScaleOpSelectedRows): + def init_dtype_type(self): self.dtype = np.float16 @@ -191,6 +201,7 @@ class TestScaleFp16OpSelectedRows(TestScaleOpSelectedRows): class TestScaleApiStatic(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): return paddle.scale(x, scale, bias) @@ -208,11 +219,13 @@ class TestScaleApiStatic(unittest.TestCase): class TestScaleInplaceApiStatic(TestScaleApiStatic): + def _executed_api(self, x, scale=1.0, bias=0.0): return x.scale_(scale, bias) class TestScaleApiDygraph(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): return paddle.scale(x, scale, bias) @@ -226,6 +239,7 @@ class TestScaleApiDygraph(unittest.TestCase): class TestScaleInplaceApiDygraph(TestScaleApiDygraph): + def _executed_api(self, x, scale=1.0, bias=0.0): return x.scale_(scale, bias) diff --git a/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py b/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py index a205189e4f9..05f824b42a1 100644 --- a/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py +++ b/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py @@ -21,13 +21,16 @@ from paddle.fluid import Program, program_guard class TestScaledDotProductAttentionError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - queries = fluid.data( - name="queries", shape=[3, 5, 9], dtype="float32") + queries = fluid.data(name="queries", + shape=[3, 5, 9], + dtype="float32") keys = fluid.data(name="keys", shape=[3, 6, 9], dtype="float32") - values = fluid.data( - name="values", shape=[3, 6, 10], dtype="float32") + values = fluid.data(name="values", + shape=[3, 6, 10], + dtype="float32") def test_queries_Variable(): queries_data = np.random.rand(3, 5, 9).astype("float32") @@ -51,40 +54,48 @@ class TestScaledDotProductAttentionError(unittest.TestCase): self.assertRaises(TypeError, test_values_Variable) def test_diff_dtype(): - keys_error = fluid.data( - name="keys_error", shape=[3, 6, 9], dtype="float64") - values_error = fluid.data( - name="values_error", shape=[3, 6, 10], dtype="float64") + keys_error = fluid.data(name="keys_error", + shape=[3, 6, 9], + dtype="float64") + values_error = fluid.data(name="values_error", + shape=[3, 6, 10], + dtype="float64") fluid.nets.scaled_dot_product_attention(queries, keys_error, values_error) self.assertRaises(TypeError, test_diff_dtype) def test_diff_dim(): - keys_error_dim = fluid.data( - name="keys_error_dim", shape=[3, 6], dtype="float32") - values_error_dim = fluid.data( - name="values_error_dim", shape=[3], dtype="float32") + keys_error_dim = fluid.data(name="keys_error_dim", + shape=[3, 6], + dtype="float32") + values_error_dim = fluid.data(name="values_error_dim", + shape=[3], + dtype="float32") fluid.nets.scaled_dot_product_attention(queries, keys_error_dim, values_error_dim) self.assertRaises(ValueError, test_diff_dim) def test_diff_hidden_size(): - queries_error_hs = fluid.data( - name="queries_error_hs", shape=[3, 5, 9], dtype="float32") - keys_error_hs = fluid.data( - name="keys_error_hs", shape=[3, 6, 10], dtype="float32") + queries_error_hs = fluid.data(name="queries_error_hs", + shape=[3, 5, 9], + dtype="float32") + keys_error_hs = fluid.data(name="keys_error_hs", + shape=[3, 6, 10], + dtype="float32") fluid.nets.scaled_dot_product_attention(queries_error_hs, keys_error_hs, values) self.assertRaises(ValueError, test_diff_hidden_size) def test_diff_max_len(): - keys_error_len = fluid.data( - name="keys_error_len", shape=[3, 7, 9], dtype="float32") - values_error_len = fluid.data( - name="values_error_len", shape=[3, 6, 10], dtype="float32") + keys_error_len = fluid.data(name="keys_error_len", + shape=[3, 7, 9], + dtype="float32") + values_error_len = fluid.data(name="values_error_len", + shape=[3, 6, 10], + dtype="float32") fluid.nets.scaled_dot_product_attention(queries, keys_error_len, values_error_len) diff --git a/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py b/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py index ddbee33c35b..1833f36013d 100644 --- a/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py +++ b/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py @@ -117,9 +117,8 @@ class TestScatterNdAddWithHighRankSame(OpTest): self.python_api = paddle.scatter_nd_add shape = (3, 2, 2, 1, 10) ref_np = np.random.rand(*shape).astype("float64") - index_np = np.vstack( - [np.random.randint( - 0, s, size=100) for s in shape]).T.astype("int32") + index_np = np.vstack([np.random.randint(0, s, size=100) + for s in shape]).T.astype("int32") update_shape = judge_update_shape(ref_np, index_np) updates_np = np.random.rand(*update_shape).astype("float64") expect_np = numpy_scatter_nd_add(ref_np.copy(), index_np, updates_np) @@ -167,70 +166,64 @@ class TestScatterNdOpAPI(unittest.TestCase): """ def testcase1(self): - ref1 = fluid.layers.data( - name='ref1', - shape=[10, 9, 8, 1, 3], - dtype='float32', - append_batch_size=False) - index1 = fluid.layers.data( - name='index1', - shape=[5, 5, 8, 5], - dtype='int32', - append_batch_size=False) - updates1 = fluid.layers.data( - name='update1', - shape=[5, 5, 8], - dtype='float32', - append_batch_size=False) + ref1 = fluid.layers.data(name='ref1', + shape=[10, 9, 8, 1, 3], + dtype='float32', + append_batch_size=False) + index1 = fluid.layers.data(name='index1', + shape=[5, 5, 8, 5], + dtype='int32', + append_batch_size=False) + updates1 = fluid.layers.data(name='update1', + shape=[5, 5, 8], + dtype='float32', + append_batch_size=False) output1 = fluid.layers.scatter_nd_add(ref1, index1, updates1) def testcase2(self): - ref2 = fluid.layers.data( - name='ref2', - shape=[10, 9, 8, 1, 3], - dtype='double', - append_batch_size=False) - index2 = fluid.layers.data( - name='index2', - shape=[5, 8, 5], - dtype='int32', - append_batch_size=False) - updates2 = fluid.layers.data( - name='update2', - shape=[5, 8], - dtype='double', - append_batch_size=False) - output2 = fluid.layers.scatter_nd_add( - ref2, index2, updates2, name="scatter_nd_add") + ref2 = fluid.layers.data(name='ref2', + shape=[10, 9, 8, 1, 3], + dtype='double', + append_batch_size=False) + index2 = fluid.layers.data(name='index2', + shape=[5, 8, 5], + dtype='int32', + append_batch_size=False) + updates2 = fluid.layers.data(name='update2', + shape=[5, 8], + dtype='double', + append_batch_size=False) + output2 = fluid.layers.scatter_nd_add(ref2, + index2, + updates2, + name="scatter_nd_add") def testcase3(self): shape3 = [10, 9, 8, 1, 3] - index3 = fluid.layers.data( - name='index3', - shape=[5, 5, 8, 5], - dtype='int32', - append_batch_size=False) - updates3 = fluid.layers.data( - name='update3', - shape=[5, 5, 8], - dtype='float32', - append_batch_size=False) + index3 = fluid.layers.data(name='index3', + shape=[5, 5, 8, 5], + dtype='int32', + append_batch_size=False) + updates3 = fluid.layers.data(name='update3', + shape=[5, 5, 8], + dtype='float32', + append_batch_size=False) output3 = fluid.layers.scatter_nd(index3, updates3, shape3) def testcase4(self): shape4 = [10, 9, 8, 1, 3] - index4 = fluid.layers.data( - name='index4', - shape=[5, 5, 8, 5], - dtype='int32', - append_batch_size=False) - updates4 = fluid.layers.data( - name='update4', - shape=[5, 5, 8], - dtype='double', - append_batch_size=False) - output4 = fluid.layers.scatter_nd( - index4, updates4, shape4, name='scatter_nd') + index4 = fluid.layers.data(name='index4', + shape=[5, 5, 8, 5], + dtype='int32', + append_batch_size=False) + updates4 = fluid.layers.data(name='update4', + shape=[5, 5, 8], + dtype='double', + append_batch_size=False) + output4 = fluid.layers.scatter_nd(index4, + updates4, + shape4, + name='scatter_nd') def testcase5(self): if not fluid.core.is_compiled_with_cuda(): @@ -244,15 +237,15 @@ class TestScatterNdOpAPI(unittest.TestCase): with fluid.dygraph.guard(): device = paddle.get_device() paddle.set_device('gpu') - gpu_value = paddle.scatter_nd_add( - paddle.to_tensor(x), - paddle.to_tensor(index), paddle.to_tensor(val)) + gpu_value = paddle.scatter_nd_add(paddle.to_tensor(x), + paddle.to_tensor(index), + paddle.to_tensor(val)) paddle.set_device('cpu') - cpu_value = paddle.scatter_nd_add( - paddle.to_tensor(x), - paddle.to_tensor(index), paddle.to_tensor(val)) - self.assertTrue( - np.array_equal(gpu_value.numpy(), cpu_value.numpy())) + cpu_value = paddle.scatter_nd_add(paddle.to_tensor(x), + paddle.to_tensor(index), + paddle.to_tensor(val)) + self.assertTrue(np.array_equal(gpu_value.numpy(), + cpu_value.numpy())) paddle.set_device(device) @switch_to_static_graph @@ -260,10 +253,12 @@ class TestScatterNdOpAPI(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x_t = paddle.static.data(name="x", dtype=x.dtype, shape=x.shape) - index_t = paddle.static.data( - name="index", dtype=index.dtype, shape=index.shape) - val_t = paddle.static.data( - name="val", dtype=val.dtype, shape=val.shape) + index_t = paddle.static.data(name="index", + dtype=index.dtype, + shape=index.shape) + val_t = paddle.static.data(name="val", + dtype=val.dtype, + shape=val.shape) out_t = paddle.scatter_nd_add(x_t, index_t, val_t) feed = {x_t.name: x, index_t.name: index, val_t.name: val} fetch = [out_t] @@ -279,15 +274,20 @@ class TestScatterNdOpAPI(unittest.TestCase): #Test Raise Error class TestScatterNdOpRaise(unittest.TestCase): + def test_check_raise(self): + def check_raise_is_test(): try: - ref5 = fluid.layers.data( - name='ref5', shape=[3, 4, 5], dtype='float32') - index5 = fluid.layers.data( - name='index5', shape=[2, 10], dtype='int32') - updates5 = fluid.layers.data( - name='updates5', shape=[2, 10], dtype='float32') + ref5 = fluid.layers.data(name='ref5', + shape=[3, 4, 5], + dtype='float32') + index5 = fluid.layers.data(name='index5', + shape=[2, 10], + dtype='int32') + updates5 = fluid.layers.data(name='updates5', + shape=[2, 10], + dtype='float32') output5 = fluid.layers.scatter_nd_add(ref5, index5, updates5) except Exception as e: t = \ @@ -299,31 +299,31 @@ class TestScatterNdOpRaise(unittest.TestCase): def test_check_raise2(self): with self.assertRaises(ValueError): - ref6 = fluid.layers.data( - name='ref6', - shape=[10, 9, 8, 1, 3], - dtype='double', - append_batch_size=False) - index6 = fluid.layers.data( - name='index6', - shape=[5, 8, 5], - dtype='int32', - append_batch_size=False) - updates6 = fluid.layers.data( - name='update6', - shape=[5, 8], - dtype='float32', - append_batch_size=False) + ref6 = fluid.layers.data(name='ref6', + shape=[10, 9, 8, 1, 3], + dtype='double', + append_batch_size=False) + index6 = fluid.layers.data(name='index6', + shape=[5, 8, 5], + dtype='int32', + append_batch_size=False) + updates6 = fluid.layers.data(name='update6', + shape=[5, 8], + dtype='float32', + append_batch_size=False) output6 = fluid.layers.scatter_nd_add(ref6, index6, updates6) def test_check_raise3(self): + def check_raise_is_test(): try: shape = [3, 4, 5] - index7 = fluid.layers.data( - name='index7', shape=[2, 1], dtype='int32') - updates7 = fluid.layers.data( - name='updates7', shape=[2, 4, 5, 20], dtype='float32') + index7 = fluid.layers.data(name='index7', + shape=[2, 1], + dtype='int32') + updates7 = fluid.layers.data(name='updates7', + shape=[2, 4, 5, 20], + dtype='float32') output7 = fluid.layers.scatter_nd(index7, updates7, shape) except Exception as e: t = \ @@ -335,6 +335,7 @@ class TestScatterNdOpRaise(unittest.TestCase): class TestDygraph(unittest.TestCase): + def test_dygraph(self): with fluid.dygraph.guard(fluid.CPUPlace()): index_data = np.array([[1, 1], [0, 1], [1, 3]]).astype(np.int64) diff --git a/python/paddle/fluid/tests/unittests/test_scatter_op.py b/python/paddle/fluid/tests/unittests/test_scatter_op.py index d7f8886dcd3..2fe162d8090 100644 --- a/python/paddle/fluid/tests/unittests/test_scatter_op.py +++ b/python/paddle/fluid/tests/unittests/test_scatter_op.py @@ -25,6 +25,7 @@ from paddle.fluid.dygraph.base import switch_to_static_graph class TestScatterOp(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -44,6 +45,7 @@ class TestScatterOp(OpTest): class TestScatterOp0(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -64,6 +66,7 @@ class TestScatterOp0(OpTest): class TestScatterOp1(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -89,6 +92,7 @@ class TestScatterOp1(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestScatterOp2(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -108,13 +112,15 @@ class TestScatterOp2(OpTest): def test_check_grad(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X', 'Updates'], 'Out', check_eager=False) + self.check_grad_with_place(place, ['X', 'Updates'], + 'Out', + check_eager=False) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestScatterOp3(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -138,11 +144,13 @@ class TestScatterOp3(OpTest): def test_check_grad(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X', 'Updates'], 'Out', check_eager=False) + self.check_grad_with_place(place, ['X', 'Updates'], + 'Out', + check_eager=False) class TestScatterOp4(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -164,6 +172,7 @@ class TestScatterOp4(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestScatterOp5(OpTest): + def setUp(self): self.op_type = "scatter" self.python_api = paddle.scatter @@ -183,11 +192,13 @@ class TestScatterOp5(OpTest): def test_check_grad(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X', 'Updates'], 'Out', check_eager=False) + self.check_grad_with_place(place, ['X', 'Updates'], + 'Out', + check_eager=False) class TestScatterAPI(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -206,8 +217,8 @@ class TestScatterAPI(unittest.TestCase): input_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float64) index_data = np.array([2, 1, 0, 1]).astype(np.int64) - updates_data = np.array( - [[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float64) + updates_data = np.array([[1, 1], [2, 2], [3, 3], + [4, 4]]).astype(np.float64) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), @@ -229,8 +240,8 @@ class TestScatterAPI(unittest.TestCase): with fluid.dygraph.guard(place): x_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float64) index_data = np.array([2, 1, 0, 1]).astype(np.int64) - updates_data = np.array( - [[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float64) + updates_data = np.array([[1, 1], [2, 2], [3, 3], + [4, 4]]).astype(np.float64) x = fluid.dygraph.to_variable(x_data) index = fluid.dygraph.to_variable(index_data) @@ -250,9 +261,9 @@ class TestScatterAPI(unittest.TestCase): def test_dygraph(): with fluid.dygraph.guard(): - gpu_out = paddle.scatter( - paddle.to_tensor(x), - paddle.to_tensor(index), paddle.to_tensor(updates)) + gpu_out = paddle.scatter(paddle.to_tensor(x), + paddle.to_tensor(index), + paddle.to_tensor(updates)) return gpu_out.numpy() @switch_to_static_graph @@ -260,10 +271,12 @@ class TestScatterAPI(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x_t = paddle.static.data(name="x", dtype=x.dtype, shape=x.shape) - index_t = paddle.static.data( - name="index", dtype=index.dtype, shape=index.shape) - updates_t = paddle.static.data( - name="updates", dtype=updates.dtype, shape=updates.shape) + index_t = paddle.static.data(name="index", + dtype=index.dtype, + shape=index.shape) + updates_t = paddle.static.data(name="updates", + dtype=updates.dtype, + shape=updates.shape) out_t = paddle.scatter(x_t, index_t, updates_t) feed = { x_t.name: x, @@ -282,6 +295,7 @@ class TestScatterAPI(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestScatterOpFp16(OpTest): + def setUp(self): self.__class__.op_type = "scatter" self.python_api = paddle.scatter @@ -301,8 +315,8 @@ class TestScatterOpFp16(OpTest): self.ref_dx[self.index_np] = zero_np def compute_ref_grad_updates(self): - ref_grad_updates = paddle.gather( - paddle.to_tensor(self.dout_np), paddle.to_tensor(self.index_np)) + ref_grad_updates = paddle.gather(paddle.to_tensor(self.dout_np), + paddle.to_tensor(self.index_np)) return ref_grad_updates def test_scatter_fp16(self): @@ -311,19 +325,21 @@ class TestScatterOpFp16(OpTest): index_tensor = paddle.to_tensor(self.index_np) updates_tensor = paddle.to_tensor(self.updates_np, stop_gradient=False) out_tensor = paddle.scatter(x_tensor, index_tensor, updates_tensor) - paddle.autograd.backward( - [out_tensor], [paddle.to_tensor(self.dout_np)], retain_graph=True) + paddle.autograd.backward([out_tensor], [paddle.to_tensor(self.dout_np)], + retain_graph=True) ref_grad_updates = self.compute_ref_grad_updates() - np.testing.assert_allclose( - ref_grad_updates.numpy(), - updates_tensor.grad.numpy(), - rtol=1e-5, - atol=1e-5) - np.testing.assert_allclose( - self.ref_dx, x_tensor.grad.numpy(), rtol=1e-5, atol=1e-5) + np.testing.assert_allclose(ref_grad_updates.numpy(), + updates_tensor.grad.numpy(), + rtol=1e-5, + atol=1e-5) + np.testing.assert_allclose(self.ref_dx, + x_tensor.grad.numpy(), + rtol=1e-5, + atol=1e-5) class TestScatterInplaceAPI(TestScatterAPI): + def executed_api(self): self.scatter = paddle.scatter_ diff --git a/python/paddle/fluid/tests/unittests/test_scope.py b/python/paddle/fluid/tests/unittests/test_scope.py index 805aabd393e..9e9f2472d44 100644 --- a/python/paddle/fluid/tests/unittests/test_scope.py +++ b/python/paddle/fluid/tests/unittests/test_scope.py @@ -20,6 +20,7 @@ import six class TestScope(unittest.TestCase): + def test_create_destroy(self): paddle_c = paddle.fluid.core scope = paddle_c.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_searchsorted_op.py b/python/paddle/fluid/tests/unittests/test_searchsorted_op.py index f802b0adfcb..84aa4e858ef 100644 --- a/python/paddle/fluid/tests/unittests/test_searchsorted_op.py +++ b/python/paddle/fluid/tests/unittests/test_searchsorted_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ from op_test import OpTest class TestSearchSorted(OpTest): + def setUp(self): self.python_api = paddle.searchsorted self.op_type = "searchsorted" @@ -36,8 +37,8 @@ class TestSearchSorted(OpTest): self.attrs = {"out_int32": False, "right": False} self.attrs["right"] = True if self.side == 'right' else False self.outputs = { - 'Out': np.searchsorted( - self.sorted_sequence, self.values, side=self.side) + 'Out': + np.searchsorted(self.sorted_sequence, self.values, side=self.side) } def test_check_output(self): @@ -50,6 +51,7 @@ class TestSearchSorted(OpTest): class TestSearchSortedOp1(TestSearchSorted): + def init_test_case(self): self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype("int32") self.values = np.array([[3, 6, 9], [3, 6, 9]]).astype("int32") @@ -57,6 +59,7 @@ class TestSearchSortedOp1(TestSearchSorted): class TestSearchSortedOp2(TestSearchSorted): + def init_test_case(self): self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype("int64") self.values = np.array([[3, 6, 9], [3, 6, 9]]).astype("int64") @@ -64,22 +67,25 @@ class TestSearchSortedOp2(TestSearchSorted): class TestSearchSortedOp3(TestSearchSorted): + def init_test_case(self): self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype("float64") - self.values = np.array( - [[np.nan, np.nan, np.nan], [3, 6, 9]]).astype("float64") + self.values = np.array([[np.nan, np.nan, np.nan], + [3, 6, 9]]).astype("float64") self.side = "left" class TestSearchSortedOp4(TestSearchSorted): + def init_test_case(self): self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype("float64") - self.values = np.array( - [[np.inf, np.inf, np.inf], [3, 6, 9]]).astype("float64") + self.values = np.array([[np.inf, np.inf, np.inf], + [3, 6, 9]]).astype("float64") self.side = "right" class TestSearchSortedOp5(TestSearchSorted): + def init_test_case(self): self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype("float64") self.values = np.array([[np.inf, np.inf, np.inf], @@ -88,6 +94,7 @@ class TestSearchSortedOp5(TestSearchSorted): class TestSearchSortedAPI(unittest.TestCase): + def init_test_case(self): self.sorted_sequence = np.array([2, 4, 6, 8, 10]).astype("float64") self.values = np.array([[3, 6, 9], [3, 6, 9]]).astype("float64") @@ -107,8 +114,9 @@ class TestSearchSortedAPI(unittest.TestCase): 'SortedSequence', shape=self.sorted_sequence.shape, dtype="float64") - values = paddle.static.data( - 'Values', shape=self.values.shape, dtype="float64") + values = paddle.static.data('Values', + shape=self.values.shape, + dtype="float64") out = paddle.searchsorted(sorted_sequence, values) exe = paddle.static.Executor(place) res = exe.run(feed={ @@ -123,14 +131,16 @@ class TestSearchSortedAPI(unittest.TestCase): run(place) def test_dygraph_api(self): + def run(place): paddle.disable_static(place) sorted_sequence = paddle.to_tensor(self.sorted_sequence) values = paddle.to_tensor(self.values) out = paddle.searchsorted(sorted_sequence, values, right=True) - out_ref = np.searchsorted( - self.sorted_sequence, self.values, side='right') + out_ref = np.searchsorted(self.sorted_sequence, + self.values, + side='right') self.assertEqual(np.allclose(out_ref, out.numpy()), True) paddle.enable_static() @@ -146,15 +156,18 @@ class TestSearchSortedAPI(unittest.TestCase): class TestSearchSortedError(unittest.TestCase): + def test_error_api(self): paddle.enable_static() def test_searchsorted_dims_matched_before_lastdim_error1(): with paddle.static.program_guard(paddle.static.Program()): - sorted_sequence = paddle.static.data( - 'SortedSequence', shape=[2, 2, 3], dtype="float64") - values = paddle.static.data( - 'Values', shape=[2, 5], dtype="float64") + sorted_sequence = paddle.static.data('SortedSequence', + shape=[2, 2, 3], + dtype="float64") + values = paddle.static.data('Values', + shape=[2, 5], + dtype="float64") out = paddle.searchsorted(sorted_sequence, values) self.assertRaises(RuntimeError, @@ -162,10 +175,12 @@ class TestSearchSortedError(unittest.TestCase): def test_searchsorted_dims_matched_before_lastdim_error2(): with paddle.static.program_guard(paddle.static.Program()): - sorted_sequence = paddle.static.data( - 'SortedSequence', shape=[2, 2, 3], dtype="float64") - values = paddle.static.data( - 'Values', shape=[2, 3, 5], dtype="float64") + sorted_sequence = paddle.static.data('SortedSequence', + shape=[2, 2, 3], + dtype="float64") + values = paddle.static.data('Values', + shape=[2, 3, 5], + dtype="float64") out = paddle.searchsorted(sorted_sequence, values) self.assertRaises(RuntimeError, @@ -173,22 +188,28 @@ class TestSearchSortedError(unittest.TestCase): def test_searchsorted_sortedsequence_size_error(): with paddle.static.program_guard(paddle.static.Program()): - sorted_sequence = paddle.static.data( - 'SortedSequence', shape=[2, 2, pow(2, 34)], dtype="float64") - values = paddle.static.data( - 'Values', shape=[2, 2, 5], dtype="float64") - out = paddle.searchsorted( - sorted_sequence, values, out_int32=True) + sorted_sequence = paddle.static.data('SortedSequence', + shape=[2, 2, + pow(2, 34)], + dtype="float64") + values = paddle.static.data('Values', + shape=[2, 2, 5], + dtype="float64") + out = paddle.searchsorted(sorted_sequence, + values, + out_int32=True) self.assertRaises(RuntimeError, test_searchsorted_sortedsequence_size_error) def test_sortedsequence_values_type_error(): with paddle.static.program_guard(paddle.static.Program()): - sorted_sequence = paddle.static.data( - 'SortedSequence', shape=[2, 3], dtype="int16") - values = paddle.static.data( - 'Values', shape=[2, 5], dtype="int16") + sorted_sequence = paddle.static.data('SortedSequence', + shape=[2, 3], + dtype="int16") + values = paddle.static.data('Values', + shape=[2, 5], + dtype="int16") out = paddle.searchsorted(sorted_sequence, values) self.assertRaises(TypeError, test_sortedsequence_values_type_error) diff --git a/python/paddle/fluid/tests/unittests/test_seed_op.py b/python/paddle/fluid/tests/unittests/test_seed_op.py index 0dcc197ece7..3e4730a1fbc 100644 --- a/python/paddle/fluid/tests/unittests/test_seed_op.py +++ b/python/paddle/fluid/tests/unittests/test_seed_op.py @@ -24,6 +24,7 @@ paddle.enable_static() class TestSeedOpFixSeed(OpTest): + def setUp(self): self.op_type = "seed" self.inputs = {} @@ -35,6 +36,7 @@ class TestSeedOpFixSeed(OpTest): class TestSeedOpDiffSeed(OpTest): + def setUp(self): self.op_type = "seed" self.inputs = {} @@ -46,6 +48,7 @@ class TestSeedOpDiffSeed(OpTest): class TestDropoutWithRandomSeedGenerator(unittest.TestCase): + def setUp(self): paddle.framework.random.set_random_seed_generator('seed0', 123) paddle.framework.random.set_random_seed_generator('seed1', 123) diff --git a/python/paddle/fluid/tests/unittests/test_segment_ops.py b/python/paddle/fluid/tests/unittests/test_segment_ops.py index 90d597837a8..678a888eeda 100644 --- a/python/paddle/fluid/tests/unittests/test_segment_ops.py +++ b/python/paddle/fluid/tests/unittests/test_segment_ops.py @@ -85,6 +85,7 @@ def segment_pool_split(X, SegmentIds, pooltype): class TestSegmentOps(OpTest): + def set_data(self): x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) segment_ids = self.set_segment(len(x), len(x) // 5 + 1) @@ -125,6 +126,7 @@ class TestSegmentOps(OpTest): class TestSegmentSum2(TestSegmentOps): + def prepare(self): super(TestSegmentSum2, self).prepare() self.shape = [40, 20] @@ -142,6 +144,7 @@ class TestSegmentSum2(TestSegmentOps): class TestSegmentMax(TestSegmentOps): + def compute(self, x, segment_ids): return compute_segment_min_max(x, segment_ids, pooltype="MAX") @@ -165,12 +168,14 @@ class TestSegmentMax(TestSegmentOps): class TestSegmentMax2(TestSegmentMax): + def prepare(self): super(TestSegmentMax2, self).prepare() self.dtype = np.float32 class TestSegmentMin(TestSegmentMax): + def compute(self, x, segment_ids): return compute_segment_min_max(x, segment_ids, pooltype="MIN") @@ -180,12 +185,14 @@ class TestSegmentMin(TestSegmentMax): class TestSegmentMin2(TestSegmentMin): + def prepare(self): super(TestSegmentMin2, self).prepare() self.dtype = np.float32 class TestSegmentMean(TestSegmentOps): + def compute(self, x, segment_ids): return compute_segment_mean(x, segment_ids) @@ -200,13 +207,16 @@ class TestSegmentMean(TestSegmentOps): result = self.compute(x, segment_ids) self.inputs = {'X': x, 'SegmentIds': segment_ids} self.outputs = { - 'Out': result, - 'SummedIds': compute_segment_sum( + 'Out': + result, + 'SummedIds': + compute_segment_sum( np.ones([len(x), 1]).astype(self.dtype), segment_ids) } class TestSegmentMean2(TestSegmentMean): + def prepare(self): super(TestSegmentMean2, self).prepare() self.dtype = np.float32 @@ -215,6 +225,7 @@ class TestSegmentMean2(TestSegmentMean): class API_SegmentOpsTest(unittest.TestCase): + def test_static(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.static.data(name="x", shape=[3, 3], dtype="float32") @@ -234,22 +245,22 @@ class API_SegmentOpsTest(unittest.TestCase): np_max = np.array([[3, 2, 3], [4, 5, 6]], dtype="float32") np_min = np.array([[1, 2, 1], [4, 5, 6]], dtype="float32") - ret = exe.run(feed={'x': data1, - 'y': data2}, + ret = exe.run(feed={ + 'x': data1, + 'y': data2 + }, fetch_list=[res_sum, res_mean, res_max, res_min]) for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): self.assertTrue( - np.allclose( - np_res, ret_res, atol=1e-6), - "two value is\ + np.allclose(np_res, ret_res, atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, ret_res)) def test_dygraph(self): device = paddle.CPUPlace() with paddle.fluid.dygraph.guard(device): - x = paddle.to_tensor( - [[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32') + x = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], + dtype='float32') y = paddle.to_tensor([0, 0, 1], dtype="int32") res_sum = paddle.incubate.segment_sum(x, y) res_mean = paddle.incubate.segment_mean(x, y) @@ -265,9 +276,7 @@ class API_SegmentOpsTest(unittest.TestCase): for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret): self.assertTrue( - np.allclose( - np_res, ret_res.numpy(), atol=1e-6), - "two value is\ + np.allclose(np_res, ret_res.numpy(), atol=1e-6), "two value is\ {}\n{}, check diff!".format(np_res, ret_res)) diff --git a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py index 23b394516fc..8a41e05d1d5 100644 --- a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py +++ b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py @@ -26,6 +26,7 @@ from paddle.fluid.layers.control_flow import select_input, select_output class TestSplitMergeSelectedVarOps(unittest.TestCase): + def test_forward_backward_list_output(self): for branch_num in range(2, 10): program = Program() @@ -45,16 +46,18 @@ class TestSplitMergeSelectedVarOps(unittest.TestCase): mean = layers.mean(y) append_backward(mean) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = Executor(place) feed_x = np.asarray([1.3, -1.4]).astype(np.float32) for i in range(branch_num): feed_mask = np.asarray([i]).astype(np.int32) ret = exe.run(program, - feed={'x': feed_x, - 'mask': feed_mask}, + feed={ + 'x': feed_x, + 'mask': feed_mask + }, fetch_list=[y.name, x.grad_name]) x_grad = np.asarray([0.5, 0.5]).astype(np.float32) self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x)) @@ -62,6 +65,7 @@ class TestSplitMergeSelectedVarOps(unittest.TestCase): class TestSelectInputOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): mask = layers.data(name='mask', shape=[1], dtype='int32') @@ -88,14 +92,17 @@ class TestSelectInputOpError(unittest.TestCase): class TestSelectOutput_Error(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): in1 = layers.data(name='in1', shape=[1], dtype='int32') - mask_int32 = layers.data( - name='mask_int32', shape=[1], dtype='int32') - mask_float32 = layers.data( - name='mask_float32', shape=[1], dtype='float32') + mask_int32 = layers.data(name='mask_int32', + shape=[1], + dtype='int32') + mask_float32 = layers.data(name='mask_float32', + shape=[1], + dtype='float32') out1 = layers.data(name='out1', shape=[1], dtype='int32') # 1. The type of input in select_output must Variable. diff --git a/python/paddle/fluid/tests/unittests/test_selected_rows.py b/python/paddle/fluid/tests/unittests/test_selected_rows.py index 2f34f79b8ea..31023ef6090 100644 --- a/python/paddle/fluid/tests/unittests/test_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_selected_rows.py @@ -20,6 +20,7 @@ import numpy as np class TestSelectedRows(unittest.TestCase): + def test_selected_rows(self): place = core.CPUPlace() height = 10 diff --git a/python/paddle/fluid/tests/unittests/test_selu_op.py b/python/paddle/fluid/tests/unittests/test_selu_op.py index f1619881794..6807f96109e 100644 --- a/python/paddle/fluid/tests/unittests/test_selu_op.py +++ b/python/paddle/fluid/tests/unittests/test_selu_op.py @@ -40,6 +40,7 @@ def ref_selu(x, class SeluTest(OpTest): + def setUp(self): self.op_type = "selu" self.python_api = paddle.nn.functional.selu @@ -129,18 +130,21 @@ class TestSeluAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, F.selu, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[12, 10], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[12, 10], + dtype='int32') self.assertRaises(TypeError, F.selu, x_int32) # The scale must be greater than 1.0 - x_fp32 = paddle.fluid.data( - name='x_fp32', shape=[12, 10], dtype='float32') + x_fp32 = paddle.fluid.data(name='x_fp32', + shape=[12, 10], + dtype='float32') self.assertRaises(ValueError, F.selu, x_fp32, -1.0) # The alpha must be no less than 0 self.assertRaises(ValueError, F.selu, x_fp32, 1.6, -1.0) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[12, 10], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[12, 10], + dtype='float16') F.selu(x_fp16) diff --git a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py index 827f63fe823..a089563b6ec 100644 --- a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py +++ b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py @@ -17,6 +17,7 @@ import unittest class TestAttrSet(unittest.TestCase): + def test_set_bool_attr(self): x = fluid.layers.data(name='x', shape=[3, 7, 3, 7], dtype='float32') param_attr = fluid.ParamAttr( @@ -25,8 +26,9 @@ class TestAttrSet(unittest.TestCase): bias_attr = fluid.ParamAttr( name='batch_norm_b', initializer=fluid.initializer.Constant(value=0.0)) - bn = fluid.layers.batch_norm( - input=x, param_attr=param_attr, bias_attr=bias_attr) + bn = fluid.layers.batch_norm(input=x, + param_attr=param_attr, + bias_attr=bias_attr) block = fluid.default_main_program().desc.block(0) op = block.op(0) before_type = op.attr_type('is_test') diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index 098999501bc..9aee71af416 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -27,6 +27,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestSetValueBase(unittest.TestCase): + def setUp(self): paddle.enable_static() self.set_dtype() @@ -52,6 +53,7 @@ class TestSetValueBase(unittest.TestCase): class TestSetValueApi(TestSetValueBase): + def _run_static(self): paddle.enable_static() with paddle.static.program_guard(self.program): @@ -77,12 +79,10 @@ class TestSetValueApi(TestSetValueBase): self._get_answer() error_msg = "\nIn {} mode: \nExpected res = \n{}, \n\nbut received : \n{}" - self.assertTrue( - (self.data == static_out).all(), - msg=error_msg.format("static", self.data, static_out)) - self.assertTrue( - (self.data == dynamic_out).all(), - msg=error_msg.format("dynamic", self.data, dynamic_out)) + self.assertTrue((self.data == static_out).all(), + msg=error_msg.format("static", self.data, static_out)) + self.assertTrue((self.data == dynamic_out).all(), + msg=error_msg.format("dynamic", self.data, dynamic_out)) def test_api(self): with _test_eager_guard(): @@ -93,6 +93,7 @@ class TestSetValueApi(TestSetValueBase): # 1. Test different type of item: int, Python slice, Paddle Tensor # 1.1 item is int class TestSetValueItemInt(TestSetValueApi): + def _call_setitem(self, x): x[0] = self.value @@ -103,6 +104,7 @@ class TestSetValueItemInt(TestSetValueApi): # 1.2 item is slice # 1.2.1 step is 1 class TestSetValueItemSlice(TestSetValueApi): + def _call_setitem(self, x): x[0:2] = self.value @@ -111,6 +113,7 @@ class TestSetValueItemSlice(TestSetValueApi): class TestSetValueItemSlice2(TestSetValueApi): + def _call_setitem(self, x): x[0:-1] = self.value @@ -119,6 +122,7 @@ class TestSetValueItemSlice2(TestSetValueApi): class TestSetValueItemSlice3(TestSetValueApi): + def _call_setitem(self, x): x[0:-1, 0:2] = self.value @@ -127,6 +131,7 @@ class TestSetValueItemSlice3(TestSetValueApi): class TestSetValueItemSlice4(TestSetValueApi): + def _call_setitem(self, x): x[0:, 1:2, :] = self.value @@ -135,6 +140,7 @@ class TestSetValueItemSlice4(TestSetValueApi): class TestSetValueItemSlice5(TestSetValueApi): + def _call_setitem(self, x): x[0:, 1:1, :] = self.value @@ -143,7 +149,9 @@ class TestSetValueItemSlice5(TestSetValueApi): class TestSetValueItemSliceInWhile(TestSetValueApi): + def _call_setitem(self, x): + def cond(i, x): return i < 1 @@ -161,6 +169,7 @@ class TestSetValueItemSliceInWhile(TestSetValueApi): # 1.2.2 step > 1 class TestSetValueItemSliceStep(TestSetValueApi): + def set_shape(self): self.shape = [5, 5, 5] @@ -172,6 +181,7 @@ class TestSetValueItemSliceStep(TestSetValueApi): class TestSetValueItemSliceStep2(TestSetValueApi): + def set_shape(self): self.shape = [7, 5, 5] @@ -183,6 +193,7 @@ class TestSetValueItemSliceStep2(TestSetValueApi): class TestSetValueItemSliceStep3(TestSetValueApi): + def _call_setitem(self, x): x[0:-1, 0:2, ::2] = self.value @@ -191,6 +202,7 @@ class TestSetValueItemSliceStep3(TestSetValueApi): class TestSetValueItemSliceStep4(TestSetValueApi): + def _call_setitem(self, x): x[0:, 1:2:2, :] = self.value @@ -200,6 +212,7 @@ class TestSetValueItemSliceStep4(TestSetValueApi): # 1.2.3 step < 0 class TestSetValueItemSliceNegetiveStep(TestSetValueApi): + def set_shape(self): self.shape = [5, 2] @@ -214,6 +227,7 @@ class TestSetValueItemSliceNegetiveStep(TestSetValueApi): class TestSetValueItemSliceNegetiveStep2(TestSetValueApi): + def set_shape(self): self.shape = [5] @@ -228,6 +242,7 @@ class TestSetValueItemSliceNegetiveStep2(TestSetValueApi): class TestSetValueItemSliceNegetiveStep3(TestSetValueApi): + def set_shape(self): self.shape = [3] @@ -242,6 +257,7 @@ class TestSetValueItemSliceNegetiveStep3(TestSetValueApi): class TestSetValueItemSliceNegetiveStep4(TestSetValueApi): + def set_shape(self): self.shape = [3, 4, 5] @@ -256,6 +272,7 @@ class TestSetValueItemSliceNegetiveStep4(TestSetValueApi): class TestSetValueItemEllipsis1(TestSetValueApi): + def _call_setitem(self, x): x[0:, ..., 1:] = self.value @@ -264,6 +281,7 @@ class TestSetValueItemEllipsis1(TestSetValueApi): class TestSetValueItemEllipsis2(TestSetValueApi): + def _call_setitem(self, x): x[0:, ...] = self.value @@ -272,6 +290,7 @@ class TestSetValueItemEllipsis2(TestSetValueApi): class TestSetValueItemEllipsis3(TestSetValueApi): + def _call_setitem(self, x): x[..., 1:] = self.value @@ -280,6 +299,7 @@ class TestSetValueItemEllipsis3(TestSetValueApi): class TestSetValueItemEllipsis4(TestSetValueApi): + def _call_setitem(self, x): x[...] = self.value @@ -289,6 +309,7 @@ class TestSetValueItemEllipsis4(TestSetValueApi): # 1.4 item is Paddle Tensor class TestSetValueItemTensor(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") x[zero] = self.value @@ -298,6 +319,7 @@ class TestSetValueItemTensor(TestSetValueApi): class TestSetValueItemTensor2(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -308,6 +330,7 @@ class TestSetValueItemTensor2(TestSetValueApi): class TestSetValueItemTensor3(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -318,6 +341,7 @@ class TestSetValueItemTensor3(TestSetValueApi): class TestSetValueItemTensor4(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -328,6 +352,7 @@ class TestSetValueItemTensor4(TestSetValueApi): class TestSetValueItemTensor5(TestSetValueApi): + def _call_setitem(self, x): zero = paddle.full([1], 0, dtype="int32") two = paddle.full([1], 2, dtype="int64") @@ -338,6 +363,7 @@ class TestSetValueItemTensor5(TestSetValueApi): class TestSetValueItemTensor6(TestSetValueApi): + def set_shape(self): self.shape = [3, 4, 5] @@ -352,6 +378,7 @@ class TestSetValueItemTensor6(TestSetValueApi): # 1.5 item is None class TestSetValueItemNone1(TestSetValueApi): + def _call_setitem(self, x): x[None] = self.value @@ -360,6 +387,7 @@ class TestSetValueItemNone1(TestSetValueApi): class TestSetValueItemNone2(TestSetValueApi): + def _call_setitem(self, x): x[0, None, 1] = self.value @@ -368,6 +396,7 @@ class TestSetValueItemNone2(TestSetValueApi): class TestSetValueItemNone3(TestSetValueApi): + def _call_setitem(self, x): x[:, None, None, 1] = self.value @@ -376,6 +405,7 @@ class TestSetValueItemNone3(TestSetValueApi): class TestSetValueItemNone4(TestSetValueApi): + def _call_setitem(self, x): x[0, 0, None, 1] = self.value @@ -384,6 +414,7 @@ class TestSetValueItemNone4(TestSetValueApi): class TestSetValueItemNone5(TestSetValueApi): + def _call_setitem(self, x): x[0, None, 0, None, 1] = self.value @@ -392,6 +423,7 @@ class TestSetValueItemNone5(TestSetValueApi): class TestSetValueItemNone6(TestSetValueApi): + def _call_setitem(self, x): x[None, 0, 0, None, 0] = self.value @@ -400,6 +432,7 @@ class TestSetValueItemNone6(TestSetValueApi): class TestSetValueItemNone7(TestSetValueApi): + def _call_setitem(self, x): x[:, None, 1] = np.zeros(self.shape)[:, None, 0] @@ -408,6 +441,7 @@ class TestSetValueItemNone7(TestSetValueApi): class TestSetValueItemNone8(TestSetValueApi): + def _call_setitem(self, x): x[:, 1, None] = np.zeros(self.shape)[:, 0, None] @@ -416,6 +450,7 @@ class TestSetValueItemNone8(TestSetValueApi): class TestSetValueItemNone9(TestSetValueApi): + def _call_setitem(self, x): x[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] @@ -424,6 +459,7 @@ class TestSetValueItemNone9(TestSetValueApi): class TestSetValueItemNone10(TestSetValueApi): + def _call_setitem(self, x): x[..., None, :, None] = np.zeros(self.shape)[..., None, :, None] @@ -433,6 +469,7 @@ class TestSetValueItemNone10(TestSetValueApi): # 1.5 item is list or Tensor of bol class TestSetValueItemBool1(TestSetValueApi): + def _call_setitem(self, x): x[[True, False]] = self.value @@ -441,6 +478,7 @@ class TestSetValueItemBool1(TestSetValueApi): class TestSetValueItemBool2(TestSetValueApi): + def _call_setitem(self, x): x[[False, False]] = self.value @@ -449,6 +487,7 @@ class TestSetValueItemBool2(TestSetValueApi): class TestSetValueItemBool3(TestSetValueApi): + def _call_setitem(self, x): x[[False, True]] = np.zeros(self.shape[2]) @@ -457,6 +496,7 @@ class TestSetValueItemBool3(TestSetValueApi): class TestSetValueItemBool4(TestSetValueApi): + def _call_setitem(self, x): idx = paddle.assign(np.array([False, True])) x[idx] = np.zeros(self.shape[2]) @@ -466,17 +506,19 @@ class TestSetValueItemBool4(TestSetValueApi): class TestSetValueItemBool5(TestSetValueApi): + def _call_setitem(self, x): idx = paddle.assign( np.array([[False, True, False], [True, True, False]])) x[idx] = self.value def _get_answer(self): - self.data[np.array([[False, True, False], [True, True, False] - ])] = self.value + self.data[np.array([[False, True, False], [True, True, + False]])] = self.value class TestSetValueItemBool6(TestSetValueApi): + def _call_setitem(self, x): x[0, ...] = 0 x[x > 0] = self.value @@ -491,7 +533,9 @@ class TestSetValueItemBool6(TestSetValueApi): def create_test_value_int32(parent): + class TestValueInt(parent): + def set_value(self): self.value = 7 @@ -511,7 +555,9 @@ create_test_value_int32(TestSetValueItemSlice4) def create_test_value_int64(parent): + class TestValueInt(parent): + def set_value(self): self.value = 7 @@ -531,7 +577,9 @@ create_test_value_int64(TestSetValueItemSlice4) def create_test_value_fp32(parent): + class TestValueInt(parent): + def set_value(self): self.value = 3.3 @@ -551,7 +599,9 @@ create_test_value_fp32(TestSetValueItemSlice4) def create_test_value_fp64(parent): + class TestValueInt(parent): + def set_value(self): self.value = 2.0**127 # float32:[-2^128, 2^128) @@ -571,7 +621,9 @@ create_test_value_fp64(TestSetValueItemSlice4) def create_test_value_bool(parent): + class TestValueInt(parent): + def set_value(self): self.value = 0 @@ -592,7 +644,9 @@ create_test_value_bool(TestSetValueItemSlice4) # 2.2 value is numpy.array (int32, int64, float32, float64, bool) def create_test_value_numpy_int32(parent): + class TestValueInt(parent): + def set_value(self): self.value = np.array([5]) @@ -612,7 +666,9 @@ create_test_value_numpy_int32(TestSetValueItemSlice4) def create_test_value_numpy_int64(parent): + class TestValueInt(parent): + def set_value(self): self.value = np.array([1]) @@ -632,7 +688,9 @@ create_test_value_numpy_int64(TestSetValueItemSlice4) def create_test_value_numpy_fp32(parent): + class TestValueInt(parent): + def set_value(self): self.value = np.array([1]) @@ -652,7 +710,9 @@ create_test_value_numpy_fp32(TestSetValueItemSlice4) def create_test_value_numpy_fp64(parent): + class TestValueInt(parent): + def set_value(self): self.value = np.array([2**127]).astype("float64") @@ -672,7 +732,9 @@ create_test_value_numpy_fp64(TestSetValueItemSlice4) def create_test_value_numpy_bool(parent): + class TestValueInt(parent): + def set_value(self): self.value = np.array([0]) @@ -693,7 +755,9 @@ create_test_value_numpy_bool(TestSetValueItemSlice4) # 2.3 value is a Paddle Tensor (int32, int64, float32, float64, bool) def create_test_value_tensor_int32(parent): + class TestValueInt(parent): + def set_dtype(self): self.dtype = "int32" @@ -717,7 +781,9 @@ create_test_value_tensor_int32(TestSetValueItemSlice4) def create_test_value_tensor_int64(parent): + class TestValueInt(parent): + def set_dtype(self): self.dtype = "int64" @@ -741,7 +807,9 @@ create_test_value_tensor_int64(TestSetValueItemSlice4) def create_test_value_tensor_fp32(parent): + class TestValueInt(parent): + def set_dtype(self): self.dtype = "float32" @@ -765,7 +833,9 @@ create_test_value_tensor_fp32(TestSetValueItemSlice4) def create_test_value_tensor_fp64(parent): + class TestValueInt(parent): + def set_dtype(self): self.dtype = "float64" @@ -789,7 +859,9 @@ create_test_value_tensor_fp64(TestSetValueItemSlice4) def create_test_value_tensor_bool(parent): + class TestValueInt(parent): + def set_dtype(self): self.dtype = "bool" @@ -814,6 +886,7 @@ create_test_value_tensor_bool(TestSetValueItemSlice4) # 3. Test different shape of value class TestSetValueValueShape1(TestSetValueApi): + def set_value(self): self.value = np.array([3, 4, 5, 6]) # shape is (4,) @@ -825,6 +898,7 @@ class TestSetValueValueShape1(TestSetValueApi): class TestSetValueValueShape2(TestSetValueApi): + def set_value(self): self.value = np.array([[3, 4, 5, 6]]) # shape is (1,4) @@ -836,9 +910,10 @@ class TestSetValueValueShape2(TestSetValueApi): class TestSetValueValueShape3(TestSetValueApi): + def set_value(self): - self.value = np.array( - [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]) # shape is (3,4) + self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2], + [3, 3, 3, 3]]) # shape is (3,4) def _call_setitem(self, x): x[0] = self.value @@ -848,10 +923,11 @@ class TestSetValueValueShape3(TestSetValueApi): class TestSetValueValueShape4(TestSetValueApi): + def set_value(self): - self.value = np.array( - [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]).astype( - self.dtype) # shape is (3,4) + self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2], + [3, 3, 3, + 3]]).astype(self.dtype) # shape is (3,4) def _call_setitem(self, x): x[0] = paddle.assign(self.value) # x is Paddle.Tensor @@ -861,6 +937,7 @@ class TestSetValueValueShape4(TestSetValueApi): class TestSetValueValueShape5(TestSetValueApi): + def set_value(self): self.value = np.array([3, 3, 3]).astype(self.dtype) @@ -876,6 +953,7 @@ class TestSetValueValueShape5(TestSetValueApi): # 4. Test error class TestError(TestSetValueBase): + def _value_type_error(self): with self.assertRaisesRegexp( TypeError, @@ -948,6 +1026,7 @@ class TestError(TestSetValueBase): class Model(paddle.nn.Layer): + def __init__(self): super(Model, self).__init__() self.conv = paddle.nn.Conv2D(12, 12, 3) @@ -963,6 +1042,7 @@ class Model(paddle.nn.Layer): class TestBackward(unittest.TestCase): + def test_static(self): paddle.enable_static() main_program = paddle.static.Program() @@ -976,8 +1056,9 @@ class TestBackward(unittest.TestCase): x = paddle.static.data(name="x", shape=[4, 4], dtype='float32') y = paddle.static.data(name="y", shape=[4, 4], dtype='float32') - label = paddle.static.data( - name="label", shape=[4, 1], dtype='int64') + label = paddle.static.data(name="label", + shape=[4, 1], + dtype='int64') z = paddle.add(x, y) var = y[0, :] @@ -985,8 +1066,8 @@ class TestBackward(unittest.TestCase): prediction = paddle.static.nn.fc(x=z, size=2, activation='softmax') - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label) + cost = paddle.nn.functional.cross_entropy(input=prediction, + label=label) loss = paddle.mean(cost) sgd = paddle.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) @@ -996,9 +1077,11 @@ class TestBackward(unittest.TestCase): var_grad, z_grad = exe.run( main_program, - feed={"x": x_np, - "y": y_np, - "label": label_np}, + feed={ + "x": x_np, + "y": y_np, + "label": label_np + }, fetch_list=[var.name + "@GRAD", z.name + "@GRAD"]) self.assertTrue((var_grad == z_grad[0, :]).all()) @@ -1023,6 +1106,7 @@ class TestBackward(unittest.TestCase): class TestGradientTruncated(unittest.TestCase): + def func_test_consistent_with_competitor(self): paddle.disable_static() @@ -1033,8 +1117,8 @@ class TestGradientTruncated(unittest.TestCase): return y.sum() # case 1 - array = np.arange( - 1, 1 + 2 * 3 * 4, dtype="float32").reshape([1, 2, 1, 3, 1, 4]) + array = np.arange(1, 1 + 2 * 3 * 4, + dtype="float32").reshape([1, 2, 1, 3, 1, 4]) value = np.arange(100, 104, dtype="float32").reshape(1, 4) inps = paddle.to_tensor(array, stop_gradient=False) @@ -1044,10 +1128,11 @@ class TestGradientTruncated(unittest.TestCase): loss.backward() value_grad = np.array([[600., 606., 612., 618.]]) - input_grad = np.array( - [[[[[[4., 32., 108., 256.]], [[500., 864., 1372., 2048.]], - [[2916., 4000., 5324., 6912.]]]], - [[[[0., 0., 0., 0.]], [[0., 0., 0., 0.]], [[0., 0., 0., 0.]]]]]]) + input_grad = np.array([[[[[[4., 32., 108., 256.]], + [[500., 864., 1372., 2048.]], + [[2916., 4000., 5324., 6912.]]]], + [[[[0., 0., 0., 0.]], [[0., 0., 0., 0.]], + [[0., 0., 0., 0.]]]]]]) self.assertTrue( np.array_equal(inps.grad.numpy(), input_grad), msg="The gradient of value should be \n{},\n but reveived {}". @@ -1068,11 +1153,12 @@ class TestGradientTruncated(unittest.TestCase): loss.backward() value_grad2 = np.array([600.]) - input_grad2 = np.array( - [[[4., 32., 108.], [0., 0., 0.]], [[1372., 2048., 2916.], - [4000., 5324., 6912.]], - [[8788., 10976., 13500.], [16384., 19652., 23328.]], - [[27436., 32000., 37044.], [42592., 48668., 55296.]]]) + input_grad2 = np.array([[[4., 32., 108.], [0., 0., 0.]], + [[1372., 2048., 2916.], [4000., 5324., 6912.]], + [[8788., 10976., 13500.], + [16384., 19652., 23328.]], + [[27436., 32000., 37044.], + [42592., 48668., 55296.]]]) self.assertTrue( np.array_equal(inps2.grad.numpy(), input_grad2), msg="The gradient of value should be \n{},\n but reveived {}". @@ -1089,8 +1175,8 @@ class TestGradientTruncated(unittest.TestCase): y = a * a return y.sum() - array = np.arange( - 1, 1 + 2 * 3 * 4, dtype="float32").reshape([4, 3, 1, 1, 2, 1]) + array = np.arange(1, 1 + 2 * 3 * 4, + dtype="float32").reshape([4, 3, 1, 1, 2, 1]) value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) inps = paddle.to_tensor(array, stop_gradient=False) @@ -1100,14 +1186,16 @@ class TestGradientTruncated(unittest.TestCase): loss.backward() value_grad = np.array([[[600.], [606.]]]) - input_grad = np.array( - [[[[[[0.], [0.]]]], [[[[0.], [0.]]]], [[[[0.], [0.]]]]], - [[[[[1372.], [2048.]]]], [[[[2916.], [4000.]]]], - [[[[5324.], [6912.]]]]], [[[[[8788.], [10976.]]]], [[[[13500.], - [16384.]]]], - [[[[19652.], [23328.]]]]], - [[[[[27436.], [32000.]]]], [[[[37044.], [42592.]]]], - [[[[48668.], [55296.]]]]]]) + input_grad = np.array([[[[[[0.], [0.]]]], [[[[0.], [0.]]]], + [[[[0.], [0.]]]]], + [[[[[1372.], [2048.]]]], [[[[2916.], [4000.]]]], + [[[[5324.], [6912.]]]]], + [[[[[8788.], [10976.]]]], [[[[13500.], + [16384.]]]], + [[[[19652.], [23328.]]]]], + [[[[[27436.], [32000.]]]], + [[[[37044.], [42592.]]]], + [[[[48668.], [55296.]]]]]]) self.assertTrue( np.array_equal(inps.grad.numpy(), input_grad), msg="The gradient of value should be \n{},\n but reveived {}". @@ -1124,8 +1212,8 @@ class TestGradientTruncated(unittest.TestCase): y = a * a return y.sum() - array = np.arange( - 1, 1 + 2 * 3 * 4, dtype="float32").reshape([2, 3, 1, 4, 1]) + array = np.arange(1, 1 + 2 * 3 * 4, + dtype="float32").reshape([2, 3, 1, 4, 1]) value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) inps = paddle.to_tensor(array, stop_gradient=False) @@ -1135,8 +1223,8 @@ class TestGradientTruncated(unittest.TestCase): loss.backward() value_grad = np.array([[[600.], [606.]]]) - input_grad = np.array([[[[[0.], [32.], [108.], - [0.]]], [[[0.], [864.], [1372.], [0.]]], + input_grad = np.array([[[[[0.], [32.], [108.], [0.]]], + [[[0.], [864.], [1372.], [0.]]], [[[0.], [4000.], [5324.], [0.]]]], [[[[8788.], [10976.], [13500.], [16384.]]], [[[19652.], [23328.], [27436.], [32000.]]], @@ -1166,8 +1254,8 @@ class TestGradientTruncated(unittest.TestCase): loss = set_value5(inps, value) loss.backward() - value_grad = np.array([[200., 202., 204., 206.], - [208., 210., 212., 214.], + value_grad = np.array([[200., 202., 204., + 206.], [208., 210., 212., 214.], [216., 218., 220., 222.]]) input_grad = np.array([[[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]], @@ -1208,38 +1296,49 @@ class TestGradientTruncated(unittest.TestCase): def op1(x): value = paddle.fluid.layers.fill_constant([1], "float32", 1) - # test stop_gradient + # test stop_gradient value.stop_gradient = True x.stop_gradient = False - start = paddle.fluid.layers.fill_constant( - [1], "int32", 5, force_cpu=True) - end = paddle.fluid.layers.fill_constant( - [1], "int32", 0, force_cpu=True) - step = paddle.fluid.layers.fill_constant( - [1], "int32", -2, force_cpu=True) + start = paddle.fluid.layers.fill_constant([1], + "int32", + 5, + force_cpu=True) + end = paddle.fluid.layers.fill_constant([1], + "int32", + 0, + force_cpu=True) + step = paddle.fluid.layers.fill_constant([1], + "int32", + -2, + force_cpu=True) inputs = { 'Input': x, 'ValueTensor': value, - 'StartsTensorList': [start, ], - 'EndsTensorList': [end, ], - 'StepsTensorList': [step, ] + 'StartsTensorList': [ + start, + ], + 'EndsTensorList': [ + end, + ], + 'StepsTensorList': [ + step, + ] } helper = LayerHelper("set_value") y = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="set_value", - inputs=inputs, - outputs={'Out': y}, - attrs={'axes': [0]}) + helper.append_op(type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}) return y, value def op2(x): value = paddle.fluid.layers.fill_constant([1, 3, 2], "float32", 1) - # test stop_gradient + # test stop_gradient value.stop_gradient = False x.stop_gradient = False attrs = { @@ -1256,11 +1355,10 @@ class TestGradientTruncated(unittest.TestCase): helper = LayerHelper("set_value") y = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="set_value", - inputs=inputs, - outputs={'Out': y}, - attrs=attrs) + helper.append_op(type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs=attrs) return y, value @@ -1268,39 +1366,51 @@ class TestGradientTruncated(unittest.TestCase): value = paddle.fluid.layers.fill_constant([1], "float32", 1) x.stop_gradient = True value.stop_gradient = False - start = paddle.fluid.layers.fill_constant( - [1], "int32", 0, force_cpu=True) - end = paddle.fluid.layers.fill_constant( - [1], "int32", 5, force_cpu=True) - step = paddle.fluid.layers.fill_constant( - [1], "int32", 3, force_cpu=True) + start = paddle.fluid.layers.fill_constant([1], + "int32", + 0, + force_cpu=True) + end = paddle.fluid.layers.fill_constant([1], + "int32", + 5, + force_cpu=True) + step = paddle.fluid.layers.fill_constant([1], + "int32", + 3, + force_cpu=True) inputs = { 'Input': x, 'ValueTensor': value, - 'StartsTensorList': [start, ], - 'EndsTensorList': [end, ], - 'StepsTensorList': [step, ] + 'StartsTensorList': [ + start, + ], + 'EndsTensorList': [ + end, + ], + 'StepsTensorList': [ + step, + ] } helper = LayerHelper("set_value") y = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="set_value", - inputs=inputs, - outputs={'Out': y}, - attrs={'axes': [0]}) + helper.append_op(type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}) return y, value def set_value(array, i, op): name_x = to_string('x', i) - x = paddle.static.data( - name=name_x, shape=array.shape, dtype='float32') + x = paddle.static.data(name=name_x, + shape=array.shape, + dtype='float32') - # set_value_op in __get/setitem__ is an inplace operation. - # When `input.stop_gradient = True` and `value.stop_gradient = False`, + # set_value_op in __get/setitem__ is an inplace operation. + # When `input.stop_gradient = True` and `value.stop_gradient = False`, # set_value_grad_op will not be run during backward. y, value = op(x) @@ -1325,8 +1435,8 @@ class TestGradientTruncated(unittest.TestCase): input_shape = [7, 6, 5, 4, 3, 2] - array = np.arange( - 0, numel(input_shape), dtype="float32").reshape(input_shape) + array = np.arange(0, numel(input_shape), + dtype="float32").reshape(input_shape) for i in range(len(input_shape)): program = paddle.static.Program() @@ -1350,6 +1460,7 @@ class TestGradientTruncated(unittest.TestCase): class TestSetValueInplace(unittest.TestCase): + def test_inplace(self): paddle.disable_static() with paddle.fluid.dygraph.guard(): @@ -1368,6 +1479,7 @@ class TestSetValueInplace(unittest.TestCase): class TestSetValueInplaceLeafVar(unittest.TestCase): + def test_inplace_var_become_leaf_var(self): paddle.disable_static() diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py index ad03fa30009..8e00d905a35 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py @@ -27,6 +27,7 @@ paddle.enable_static() class TestSGDOp(OpTest): + def setUp(self): self.op_type = "sgd" self.conf() @@ -46,16 +47,18 @@ class TestSGDOp(OpTest): class TestSGDOpCase8X(TestSGDOp): + def conf(self): self.h = 10 self.w = 64 class TestSparseSGDOp(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() - # create and initialize Grad Variable + # create and initialize Grad Variable height = 10 rows = [0, 4, 7] self.conf() @@ -81,12 +84,11 @@ class TestSparseSGDOp(unittest.TestCase): lr.set(lr_array, place) # create and run sgd operator - sgd_op = Operator( - "sgd", - Param='Param', - Grad='Grad', - ParamOut='Param', - LearningRate='LearningRate') + sgd_op = Operator("sgd", + Param='Param', + Grad='Grad', + ParamOut='Param', + LearningRate='LearningRate') sgd_op.run(scope, place) # get and compare result @@ -119,11 +121,13 @@ class TestSparseSGDOp(unittest.TestCase): class TestSparseSGDOpCase8X(TestSparseSGDOp): + def conf(self): self.row_numel = 16 class TestSGDOpOptimizeSelectedRows(unittest.TestCase): + def check_with_place(self, place): scope = core.Scope() @@ -168,16 +172,15 @@ class TestSGDOpOptimizeSelectedRows(unittest.TestCase): # optimize with Python w_after_optimize = np.copy(w_before_optimize) for index, id in enumerate(grad_rows): - w_after_optimize[id] = w_before_optimize[ - id] - lr_value * grad_array[index] + w_after_optimize[ + id] = w_before_optimize[id] - lr_value * grad_array[index] # create and run sgd operator - sgd_op = Operator( - "sgd", - Param='Param', - Grad='Grad', - ParamOut='Param', - LearningRate='LearningRate') + sgd_op = Operator("sgd", + Param='Param', + Grad='Grad', + ParamOut='Param', + LearningRate='LearningRate') sgd_op.run(scope, place) # get and compare result @@ -192,11 +195,13 @@ class TestSGDOpOptimizeSelectedRows(unittest.TestCase): class TestSGDOpWithLargeInput(unittest.TestCase): + def runTest(self): paddle.enable_static() data = fluid.layers.fill_constant(shape=[1], value=128, dtype='int64') - label = fluid.layers.fill_constant( - shape=[1, 150], value=0.5, dtype='float32') + label = fluid.layers.fill_constant(shape=[1, 150], + value=0.5, + dtype='float32') emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32') out = fluid.layers.l2_normalize(x=emb, axis=-1) @@ -214,6 +219,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase): class TestSGDV2(unittest.TestCase): + def test_sgd_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") @@ -235,26 +241,33 @@ class TestSGDV2(unittest.TestCase): init_program = paddle.static.Program() program = paddle.static.Program() block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr=optimizer_attr) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out") - block.append_op( - type="mul", - inputs={"X": mul_x, - "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}) - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + mul_x = block.create_parameter(dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr=optimizer_attr) + mul_y = block.create_var(dtype="float32", + shape=[10, 8], + lod_level=0, + name="mul.y") + mul_out = block.create_var(dtype="float32", + shape=[5, 8], + lod_level=0, + name="mul.out") + mean_out = block.create_var(dtype="float32", + shape=[1], + lod_level=0, + name="mean.out") + block.append_op(type="mul", + inputs={ + "X": mul_x, + "Y": mul_y + }, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + block.append_op(type="mean", + inputs={"X": mul_out}, + outputs={"Out": mean_out}) sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01) opts, _ = sgd_optimizer.minimize(mean_out, init_program) return opts @@ -299,6 +312,7 @@ class TestSGDV2(unittest.TestCase): class TestSGDMultiPrecision2_0(unittest.TestCase): + def dygraph_sgd_mp(self, mp): paddle.disable_static() paddle.seed(10) @@ -346,11 +360,13 @@ class TestSGDMultiPrecision2_0(unittest.TestCase): use_fp16_guard=False) with paddle.static.program_guard(train_program, startup_program): if mp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float16') else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float32') hidden = paddle.static.nn.fc(x=data, size=10) loss = paddle.fluid.layers.mean(hidden) optimizer.minimize(loss) @@ -376,31 +392,26 @@ class TestSGDMultiPrecision2_0(unittest.TestCase): output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True) output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False) self.assertEqual( - np.allclose( - output1_dy.astype('float32').numpy(), - output2_dy.astype('float32').numpy(), - atol=1e-01), - True) + np.allclose(output1_dy.astype('float32').numpy(), + output2_dy.astype('float32').numpy(), + atol=1e-01), True) for idx in range(len(params1_dy)): self.assertEqual( - np.allclose( - params1_dy[idx].astype('float32').numpy(), - params2_dy[idx].astype('float32').numpy(), - atol=1e-01), - True) + np.allclose(params1_dy[idx].astype('float32').numpy(), + params2_dy[idx].astype('float32').numpy(), + atol=1e-01), True) "Test static mode" output1_st = self.static_sgd_mp(mp=True) output2_st = self.static_sgd_mp(mp=False) for idx in range(len(output1_st)): self.assertEqual( - np.allclose( - output1_st[idx].astype('float32'), - output2_st[idx].astype('float32'), - atol=1e-01), - True) + np.allclose(output1_st[idx].astype('float32'), + output2_st[idx].astype('float32'), + atol=1e-01), True) class TestSGDMultiPrecision1_0(unittest.TestCase): + def dygraph_sgd_mp(self, mp): paddle.disable_static() paddle.seed(10) @@ -451,11 +462,13 @@ class TestSGDMultiPrecision1_0(unittest.TestCase): use_fp16_guard=False) with paddle.static.program_guard(train_program, startup_program): if mp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float16') else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32') + data = paddle.static.data(shape=[2, 2], + name='X', + dtype='float32') hidden = paddle.static.nn.fc(x=data, size=10) loss = paddle.fluid.layers.mean(hidden) optimizer.minimize(loss) @@ -481,28 +494,22 @@ class TestSGDMultiPrecision1_0(unittest.TestCase): output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True) output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False) self.assertEqual( - np.allclose( - output1_dy.astype('float32').numpy(), - output2_dy.astype('float32').numpy(), - atol=1e-01), - True) + np.allclose(output1_dy.astype('float32').numpy(), + output2_dy.astype('float32').numpy(), + atol=1e-01), True) for idx in range(len(params1_dy)): self.assertEqual( - np.allclose( - params1_dy[idx].astype('float32').numpy(), - params2_dy[idx].astype('float32').numpy(), - atol=1e-01), - True) + np.allclose(params1_dy[idx].astype('float32').numpy(), + params2_dy[idx].astype('float32').numpy(), + atol=1e-01), True) "Test static mode" output1_st = self.static_sgd_mp(mp=True) output2_st = self.static_sgd_mp(mp=False) for idx in range(len(output1_st)): self.assertEqual( - np.allclose( - output1_st[idx].astype('float32'), - output2_st[idx].astype('float32'), - atol=1e-01), - True) + np.allclose(output1_st[idx].astype('float32'), + output2_st[idx].astype('float32'), + atol=1e-01), True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py index a468d6e828c..4df56373a53 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py @@ -19,8 +19,9 @@ import numpy as np import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.op import Operator -from paddle.fluid.tests.unittests.op_test import ( - convert_float_to_uint16, convert_uint16_to_float, OpTest, OpTestTool) +from paddle.fluid.tests.unittests.op_test import (convert_float_to_uint16, + convert_uint16_to_float, + OpTest, OpTestTool) import paddle import paddle.static.amp as amp import struct @@ -29,6 +30,7 @@ import struct @unittest.skipIf(not core.supports_bfloat16(), 'place does not support BF16 evaluation') class TestSGDOpBF16(OpTest): + def setUp(self): self.op_type = 'sgd' self.dtype = np.uint16 @@ -56,12 +58,14 @@ class TestSGDOpBF16(OpTest): @unittest.skipIf(not core.supports_bfloat16(), 'place does not support BF16 evaluation') class TestSGDOpBF16Case2(TestSGDOpBF16): + def conf(self): self.h = 10 self.w = 64 class TestSparseSGDOpBF16(unittest.TestCase): + @classmethod def setUpClass(cls): np.random.seed(12345) @@ -122,6 +126,7 @@ class TestSparseSGDOpBF16(unittest.TestCase): @unittest.skipIf(not core.supports_bfloat16(), 'place does not support BF16 evaluation') class TestSparseGradSGDOpBF16(TestSparseSGDOpBF16): + def setUp(self): self.setup_params() @@ -133,19 +138,20 @@ class TestSparseGradSGDOpBF16(TestSparseSGDOpBF16): def test_sparse_grad_sgd(self): scope = core.Scope() place = core.CPUPlace() - _, grad_array = self.create_sparse_grad_var( - scope, place, self.grad_height, self.grad_rows, self.grad_row_numel) + _, grad_array = self.create_sparse_grad_var(scope, place, + self.grad_height, + self.grad_rows, + self.grad_row_numel) param_tensor, param_array = self.create_dense_param_var( scope, place, self.grad_height, self.grad_row_numel) _, lr_value = self.create_dense_lr_var(scope, place) - sgd_op = Operator( - 'sgd', - Param='Param', - Grad='Grad', - ParamOut='Param', - LearningRate='LearningRate', - use_mkldnn=True) + sgd_op = Operator('sgd', + Param='Param', + Grad='Grad', + ParamOut='Param', + LearningRate='LearningRate', + use_mkldnn=True) sgd_op.run(scope, place) reference = self.ref_optimize(param_array, self.grad_rows, grad_array, @@ -157,6 +163,7 @@ class TestSparseGradSGDOpBF16(TestSparseSGDOpBF16): @unittest.skipIf(not core.supports_bfloat16(), 'place does not support BF16 evaluation') class TestSparseGradSGDOpBF16Case2(TestSparseGradSGDOpBF16): + def setup_params(self): self.grad_height = 14 self.grad_rows = [1, 4, 12, 7, 8] @@ -164,6 +171,7 @@ class TestSparseGradSGDOpBF16Case2(TestSparseGradSGDOpBF16): class TestSparseGradSGDOpBF16Case3(TestSparseGradSGDOpBF16): + def setup_params(self): self.grad_height = 10 self.grad_rows = [0, 4, 7] @@ -173,6 +181,7 @@ class TestSparseGradSGDOpBF16Case3(TestSparseGradSGDOpBF16): @unittest.skipIf(not core.supports_bfloat16(), 'place does not support BF16 evaluation') class TestSparseGradParamSGDOpBF16(TestSparseSGDOpBF16): + def setUp(self): self.setup_params() @@ -185,20 +194,21 @@ class TestSparseGradParamSGDOpBF16(TestSparseSGDOpBF16): def test_sparse_param_grad_sgd(self): scope = core.Scope() place = core.CPUPlace() - _, grad_array = self.create_sparse_grad_var( - scope, place, self.grad_height, self.grad_rows, self.grad_row_numel) + _, grad_array = self.create_sparse_grad_var(scope, place, + self.grad_height, + self.grad_rows, + self.grad_row_numel) param_tensor, param_array = self.create_sparse_param_var( scope, place, self.grad_height, self.param_rows, self.grad_row_numel) _, lr_value = self.create_dense_lr_var(scope, place) - sgd_op = Operator( - 'sgd', - Param='Param', - Grad='Grad', - ParamOut='Param', - LearningRate='LearningRate', - use_mkldnn=True) + sgd_op = Operator('sgd', + Param='Param', + Grad='Grad', + ParamOut='Param', + LearningRate='LearningRate', + use_mkldnn=True) sgd_op.run(scope, place) reference = self.ref_optimize(param_array, self.grad_rows, grad_array, @@ -208,6 +218,7 @@ class TestSparseGradParamSGDOpBF16(TestSparseSGDOpBF16): class TestSparseGradParamSGDOpBF16Case2(TestSparseGradParamSGDOpBF16): + def setup_params(self): self.grad_height = 14 self.grad_rows = [1, 4, 12, 7, 8] @@ -217,6 +228,7 @@ class TestSparseGradParamSGDOpBF16Case2(TestSparseGradParamSGDOpBF16): @OpTestTool.skip_if_not_cpu_bf16() class TestSGDOpBF16API(unittest.TestCase): + @classmethod def setUpClass(cls): np.random.seed(12345) @@ -249,18 +261,18 @@ class TestSGDOpBF16API(unittest.TestCase): return self._fp322bf16(self._bf162fp32(lhs) * self._bf162fp32(rhs)) def _reference(self, data, emb_weight, bf16=False): - emb_out_shape = np.array( - [self.ids_shape[0], self.w_shape[1]], dtype=np.int64) - mean_grad_value = np.float32(1.0) / np.prod( - emb_out_shape, dtype=np.float32) + emb_out_shape = np.array([self.ids_shape[0], self.w_shape[1]], + dtype=np.int64) + mean_grad_value = np.float32(1.0) / np.prod(emb_out_shape, + dtype=np.float32) if bf16: - mean_grad = np.full( - emb_out_shape, - self._fp322bf16(mean_grad_value), - dtype=np.uint16) + mean_grad = np.full(emb_out_shape, + self._fp322bf16(mean_grad_value), + dtype=np.uint16) else: - mean_grad = np.full( - emb_out_shape, mean_grad_value, dtype=np.float32) + mean_grad = np.full(emb_out_shape, + mean_grad_value, + dtype=np.float32) # add_grad = 1 * mean_grad out_dtype = np.uint16 if bf16 else np.float32 lookup_table_grad = np.zeros(self.w_shape, dtype=out_dtype) @@ -286,7 +298,11 @@ class TestSGDOpBF16API(unittest.TestCase): ref_grad = emb_weight - self.learning_rate * lookup_table_grad return ref_grad - def _check_output(self, actual, reference, bf16=False, atol=0, + def _check_output(self, + actual, + reference, + bf16=False, + atol=0, rtol=0.15e-2): output = actual if bf16 else convert_uint16_to_float(actual) if bf16: @@ -294,8 +310,10 @@ class TestSGDOpBF16API(unittest.TestCase): else: try: print('Compare with FP32 values:') - np.testing.assert_allclose( - output, reference, atol=atol, rtol=rtol) + np.testing.assert_allclose(output, + reference, + atol=atol, + rtol=rtol) except AssertionError as e: print(e) @@ -313,15 +331,16 @@ class TestSGDOpBF16API(unittest.TestCase): main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='X', shape=self.ids_shape, dtype='int64') - label = fluid.layers.data( - name='Y', shape=self.y_shape, dtype='uint16') - emb = fluid.layers.embedding( - input=x, - size=self.w_shape, - param_attr=fluid.ParamAttr( - name="emb_weight", initializer=self.initializer), - is_sparse=False, - dtype="uint16") # bfloat16 + label = fluid.layers.data(name='Y', + shape=self.y_shape, + dtype='uint16') + emb = fluid.layers.embedding(input=x, + size=self.w_shape, + param_attr=fluid.ParamAttr( + name="emb_weight", + initializer=self.initializer), + is_sparse=False, + dtype="uint16") # bfloat16 cost = fluid.layers.elementwise_add(emb, label) avg_cost = paddle.mean(cost) @@ -330,7 +349,9 @@ class TestSGDOpBF16API(unittest.TestCase): sgd_optimizer = amp.bf16.decorate_bf16( sgd_optimizer, amp_lists=amp.bf16.AutoMixedPrecisionListsBF16( - custom_bf16_list={'lookup_table', }), + custom_bf16_list={ + 'lookup_table', + }), use_bf16_guard=False, use_pure_bf16=True) sgd_optimizer.minimize( @@ -340,12 +361,14 @@ class TestSGDOpBF16API(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) test_prog = main.clone(for_test=True) - sgd_optimizer.amp_init( - place, test_program=test_prog, use_bf16_test=True) + sgd_optimizer.amp_init(place, + test_program=test_prog, + use_bf16_test=True) ref_emb = np.full(self.w_shape, self.value, dtype=np.float32) - ref_emb_bf16 = np.full( - self.w_shape, self._fp322bf16(self.value), dtype=np.uint16) + ref_emb_bf16 = np.full(self.w_shape, + self._fp322bf16(self.value), + dtype=np.uint16) emb_weight = [] for sample in train_reader(): @@ -353,8 +376,10 @@ class TestSGDOpBF16API(unittest.TestCase): label = sample[0][1] y_bf16 = convert_float_to_uint16(label) emb_weight = exe.run(main, - feed={'X': data, - 'Y': y_bf16}, + feed={ + 'X': data, + 'Y': y_bf16 + }, fetch_list=['emb_weight']) ref_emb = self._reference(data, ref_emb) diff --git a/python/paddle/fluid/tests/unittests/test_shape_op.py b/python/paddle/fluid/tests/unittests/test_shape_op.py index 3d961a7413c..cb64739f8f0 100644 --- a/python/paddle/fluid/tests/unittests/test_shape_op.py +++ b/python/paddle/fluid/tests/unittests/test_shape_op.py @@ -23,6 +23,7 @@ from paddle.fluid.op import Operator class TestShapeOp(OpTest): + def setUp(self): self.op_type = "shape" self.python_api = paddle.shape @@ -40,16 +41,19 @@ class TestShapeOp(OpTest): class case1(TestShapeOp): + def config(self): self.shape = [2] class case2(TestShapeOp): + def config(self): self.shape = [1, 2, 3] class TestShapeWithSelectedRows(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_shard_index_op.py b/python/paddle/fluid/tests/unittests/test_shard_index_op.py index 9ccf1f254a5..9d52f8f8459 100644 --- a/python/paddle/fluid/tests/unittests/test_shard_index_op.py +++ b/python/paddle/fluid/tests/unittests/test_shard_index_op.py @@ -50,6 +50,7 @@ def common_setup(self, index_num, nshards, shard_id, ignore_value): class TestShardIndexShardId0Op(OpTest): + def setUp(self): common_setup(self, 20, 2, 0, -1) @@ -58,6 +59,7 @@ class TestShardIndexShardId0Op(OpTest): class TestShardIndexShardId1Op(OpTest): + def setUp(self): common_setup(self, 20, 2, 1, -1) @@ -66,6 +68,7 @@ class TestShardIndexShardId1Op(OpTest): class TestShardIndexIgnoreValueOp(OpTest): + def setUp(self): common_setup(self, 20, 2, 0, -2) @@ -74,6 +77,7 @@ class TestShardIndexIgnoreValueOp(OpTest): class TestShardIndexNotEvenlyDividedOp(OpTest): + def setUp(self): common_setup(self, 15, 2, 1, -1) diff --git a/python/paddle/fluid/tests/unittests/test_share_data_op.py b/python/paddle/fluid/tests/unittests/test_share_data_op.py index 1e6f0ef693c..a049661eaab 100644 --- a/python/paddle/fluid/tests/unittests/test_share_data_op.py +++ b/python/paddle/fluid/tests/unittests/test_share_data_op.py @@ -20,6 +20,7 @@ from paddle.fluid.op import Operator class TestShareDataOp(OpTest): + def setUp(self): self.op_type = "share_data" input = np.random.rand(2, 3, 5).astype("float32") @@ -31,6 +32,7 @@ class TestShareDataOp(OpTest): class TestShareDataOpOnDifferentPlaces(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py index 6e1099e5a39..daa3f191ccd 100644 --- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py +++ b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py @@ -28,13 +28,16 @@ from paddle.fluid.layers.control_flow import lod_rank_table class TestShrinkRNNMemoryBase(unittest.TestCase): + def setUp(self): self.main_program = Program() switch_main_program(self.main_program) x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False - rank_table_tensor = layers.data( - 'rank_table_tensor', shape=[1], dtype='float32', lod_level=1) + rank_table_tensor = layers.data('rank_table_tensor', + shape=[1], + dtype='float32', + lod_level=1) table = lod_rank_table(x=rank_table_tensor) i = layers.zeros(dtype='int64', shape=[1]) self.mem1 = shrink_memory(x=x, i=i, table=table) @@ -56,6 +59,7 @@ class TestShrinkRNNMemoryBase(unittest.TestCase): class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase): + def test_refer_lod(self): cpu = core.CPUPlace() x_tensor = core.LoDTensor() @@ -65,13 +69,15 @@ class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase): rank_table_tensor = core.LoDTensor() rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]]) - rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), - cpu) + rank_table_tensor.set( + np.random.random(size=(6, 1)).astype('float32'), cpu) exe = Executor(cpu) outs = exe.run( - feed={'x': x_tensor, - 'rank_table_tensor': rank_table_tensor}, + feed={ + 'x': x_tensor, + 'rank_table_tensor': rank_table_tensor + }, fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad], return_numpy=False) self.assertTrue(np.allclose(tensor_np[0:6], outs[0])) @@ -81,6 +87,7 @@ class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase): class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase): + def test_no_lod(self): cpu = core.CPUPlace() x_tensor = core.LoDTensor() @@ -89,13 +96,15 @@ class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase): rank_table_tensor = core.LoDTensor() rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]]) - rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), - cpu) + rank_table_tensor.set( + np.random.random(size=(6, 1)).astype('float32'), cpu) exe = Executor(cpu) outs = exe.run( - feed={'x': x_tensor, - 'rank_table_tensor': rank_table_tensor}, + feed={ + 'x': x_tensor, + 'rank_table_tensor': rank_table_tensor + }, fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad], return_numpy=False) self.assertTrue(np.allclose(tensor_np[0:3], outs[0])) @@ -105,6 +114,7 @@ class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase): class TestShrinkRNNMemoryOpError(unittest.TestCase): + def test_erroes(self): with program_guard(Program(), Program()): x = layers.zeros(dtype='int64', shape=[3, 100]) diff --git a/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py b/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py index 62c26a73a8d..6292a4d2b51 100644 --- a/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py +++ b/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py @@ -25,6 +25,7 @@ import random class TestShuffleBatchOpBase(OpTest): + def gen_random_array(self, shape, low=0, high=1): rnd = (high - low) * np.random.random(shape) + low return rnd.astype(self.dtype) @@ -43,8 +44,8 @@ class TestShuffleBatchOpBase(OpTest): self.dtype = np.float64 self.shape = self.get_shape() x = self.gen_random_array(self.shape) - seed = np.random.random_integers( - low=10, high=100, size=(1, )).astype('int64') + seed = np.random.random_integers(low=10, high=100, + size=(1, )).astype('int64') self.inputs = {'X': x, 'Seed': seed} self.outputs = { 'Out': np.array([]).astype(x.dtype), @@ -81,6 +82,7 @@ class TestShuffleBatchOpBase(OpTest): class TestShuffleBatchOp2(TestShuffleBatchOpBase): + def get_shape(self): return (4, 30) diff --git a/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py b/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py index aeaae905818..1ff167b680f 100644 --- a/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py +++ b/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core class TestShuffleChannelOp(OpTest): + def setUp(self): self.op_type = "shuffle_channel" self.batch_size = 10 diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py index e5406f4d0c2..9c0d2bc9235 100644 --- a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py +++ b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py @@ -40,11 +40,12 @@ class TestSigmoidCrossEntropyWithLogitsOp1(OpTest): batch_size = 64 num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype("float64")), - 'Label': np.random.randint(0, 2, (batch_size, num_classes)) - .astype("float64") + 'X': + logit( + np.random.uniform(0, 1, + (batch_size, num_classes)).astype("float64")), + 'Label': + np.random.randint(0, 2, (batch_size, num_classes)).astype("float64") } # Fw Pass is implemented as elementwise sigmoid followed by @@ -73,13 +74,17 @@ class TestSigmoidCrossEntropyWithLogitsOp2(OpTest): num_classes = 20 ignore_index = -1 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype("float64")), - 'Label': np.random.randint(-1, 2, (batch_size, num_classes)) - .astype("float64") + 'X': + logit( + np.random.uniform(0, 1, + (batch_size, num_classes)).astype("float64")), + 'Label': + np.random.randint(-1, 2, + (batch_size, num_classes)).astype("float64") + } + self.attrs = { + 'ignore_index': ignore_index, } - self.attrs = {'ignore_index': ignore_index, } # Fw Pass is implemented as elementwise sigmoid followed by # elementwise logistic loss # Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X)) @@ -107,11 +112,12 @@ class TestSigmoidCrossEntropyWithLogitsOp3(OpTest): batch_size = 64 num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype("float64")), - 'Label': np.random.uniform(0, 1, (batch_size, num_classes)) - .astype("float64") + 'X': + logit( + np.random.uniform(0, 1, + (batch_size, num_classes)).astype("float64")), + 'Label': + np.random.uniform(0, 1, (batch_size, num_classes)).astype("float64") } # Fw Pass is implemented as elementwise sigmoid followed by @@ -130,6 +136,7 @@ class TestSigmoidCrossEntropyWithLogitsOp3(OpTest): class TestSigmoidCrossEntropyWithNorm(OpTest): + def setUp(self): self.op_type = "sigmoid_cross_entropy_with_logits" self.python_api = test_fluid_sigmoid @@ -137,11 +144,13 @@ class TestSigmoidCrossEntropyWithNorm(OpTest): num_classes = 20 ignore_index = -1 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype("float64")), - 'Label': np.random.randint(-1, 2, (batch_size, num_classes)) - .astype("float64") + 'X': + logit( + np.random.uniform(0, 1, + (batch_size, num_classes)).astype("float64")), + 'Label': + np.random.randint(-1, 2, + (batch_size, num_classes)).astype("float64") } self.attrs = {'ignore_index': ignore_index, 'normalize': True} sigmoid_X = expit(self.inputs['X']) @@ -171,11 +180,13 @@ class TestSigmoidCrossEntropyWithLogitsOp5(OpTest): batch_size = [10, 10] num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype("float64")), - 'Label': np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype("float64") + 'X': + logit( + np.random.uniform( + 0, 1, tuple(batch_size + [num_classes])).astype("float64")), + 'Label': + np.random.uniform(0, 1, tuple(batch_size + + [num_classes])).astype("float64") } # Fw Pass is implemented as elementwise sigmoid followed by @@ -194,6 +205,7 @@ class TestSigmoidCrossEntropyWithLogitsOp5(OpTest): class TestSigmoidCrossEntropyWithNorm2(OpTest): + def setUp(self): self.op_type = "sigmoid_cross_entropy_with_logits" self.python_api = test_fluid_sigmoid @@ -201,11 +213,13 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest): num_classes = 20 ignore_index = -1 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype("float64")), - 'Label': np.random.randint(-1, 2, tuple(batch_size + [num_classes])) - .astype("float64") + 'X': + logit( + np.random.uniform( + 0, 1, tuple(batch_size + [num_classes])).astype("float64")), + 'Label': + np.random.randint(-1, 2, tuple(batch_size + + [num_classes])).astype("float64") } self.attrs = {'ignore_index': ignore_index, 'normalize': True} sigmoid_X = expit(self.inputs['X']) @@ -234,12 +248,14 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest): batch_size = [10, 10] num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype("float64")), + 'X': + logit( + np.random.uniform(0, 1, + tuple(batch_size + + [num_classes])).astype("float64")), 'Label': - np.random.randint(0, 2, tuple(batch_size + [num_classes])) - .astype("float64") + np.random.randint(0, 2, tuple(batch_size + + [num_classes])).astype("float64") } # Fw Pass is implemented as elementwise sigmoid followed by @@ -257,17 +273,18 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest): self.check_grad(['X'], 'Out', check_eager=True) class TestSigmoidCrossEntropyWithLogitsOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of sigmoid_cross_entropy_with_logits must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], - fluid.CPUPlace()) - lab1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], - fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, + 5]), [[1, 1, 1, 1]], + fluid.CPUPlace()) + lab1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], + fluid.CPUPlace()) fluid.layers.sigmoid_cross_entropy_with_logits(x1, lab1) self.assertRaises(TypeError, test_Variable) @@ -275,10 +292,12 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest): def test_dtype(): # the input dtype of sigmoid_cross_entropy_with_logits must be float16 or float32 or float64 # float16 only can be set on GPU place - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="int32") - lab2 = fluid.layers.data( - name='lab2', shape=[3, 4, 5, 6], dtype="int32") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="int32") + lab2 = fluid.layers.data(name='lab2', + shape=[3, 4, 5, 6], + dtype="int32") fluid.layers.sigmoid_cross_entropy_with_logits(x2, lab2) self.assertRaises(TypeError, test_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py index 15a4827cecb..bdfa1a19eca 100644 --- a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py +++ b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py @@ -27,8 +27,12 @@ def call_sfl_functional(logit, alpha=0.25, gamma=2.0, reduction='sum'): - res = paddle.nn.functional.sigmoid_focal_loss( - logit, label, normalizer, alpha=alpha, gamma=gamma, reduction=reduction) + res = paddle.nn.functional.sigmoid_focal_loss(logit, + label, + normalizer, + alpha=alpha, + gamma=gamma, + reduction=reduction) return res @@ -43,16 +47,19 @@ def test_static(place, prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(prog, startup_prog): - logit = paddle.fluid.data( - name='logit', shape=logit_np.shape, dtype='float64') - label = paddle.fluid.data( - name='label', shape=label_np.shape, dtype='float64') + logit = paddle.fluid.data(name='logit', + shape=logit_np.shape, + dtype='float64') + label = paddle.fluid.data(name='label', + shape=label_np.shape, + dtype='float64') feed_dict = {"logit": logit_np, "label": label_np} normalizer = None if normalizer_np is not None: - normalizer = paddle.fluid.data( - name='normalizer', shape=normalizer_np.shape, dtype='float64') + normalizer = paddle.fluid.data(name='normalizer', + shape=normalizer_np.shape, + dtype='float64') feed_dict["normalizer"] = normalizer_np res = call_sfl_functional(logit, label, normalizer, alpha, gamma, @@ -115,14 +122,14 @@ def calc_sigmoid_focal_loss(logit_np, class TestSigmoidFocalLoss(unittest.TestCase): + def test_SigmoidFocalLoss(self): - logit_np = np.random.uniform( - 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) - label_np = np.random.randint( - 0, 2, size=(2, 3, 4, 10)).astype(np.float64) + logit_np = np.random.uniform(0.1, 0.8, + size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint(0, 2, + size=(2, 3, 4, 10)).astype(np.float64) normalizer_nps = [ - np.asarray( - [np.sum(label_np > 0)], dtype=label_np.dtype), None + np.asarray([np.sum(label_np > 0)], dtype=label_np.dtype), None ] places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): @@ -148,8 +155,8 @@ class TestSigmoidFocalLoss(unittest.TestCase): expected = calc_sigmoid_focal_loss( logit_np, label_np, normalizer_np, alpha, gamma, reduction) - self.assertTrue( - np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, + expected)) self.assertTrue( np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) @@ -159,13 +166,12 @@ class TestSigmoidFocalLoss(unittest.TestCase): paddle.disable_static() logit = paddle.to_tensor([[0.97], [0.91], [0.03]], dtype='float32') label = paddle.to_tensor([[1.0], [1.0], [0.0]], dtype='float32') - self.assertRaises( - ValueError, - paddle.nn.functional.sigmoid_focal_loss, - logit=logit, - label=label, - normalizer=None, - reduction="unsupport reduction") + self.assertRaises(ValueError, + paddle.nn.functional.sigmoid_focal_loss, + logit=logit, + label=label, + normalizer=None, + reduction="unsupport reduction") paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py index 6c1b15ab003..7a625fb296a 100644 --- a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py @@ -46,8 +46,9 @@ def sigmoid_focal_loss_forward(x_data, label_data, fg_num_data, gamma, alpha, p = 1. / (1. + math.exp(-x)) FLT_MIN = 1.175494351e-38 term_pos = math.pow((1. - p), gamma) * math.log(max(FLT_MIN, p)) - term_neg = math.pow(p, gamma) * ( - -1. * x * (x >= 0) - math.log(1. + math.exp(x - 2. * x * (x >= 0)))) + term_neg = math.pow(p, gamma) * (-1. * x * (x >= 0) - + math.log(1. + math.exp(x - 2. * x * + (x >= 0)))) out_data[idx] = 0.0 out_data[idx] += -c_pos * term_pos * z_pos out_data[idx] += -c_neg * term_neg * z_neg @@ -57,6 +58,7 @@ def sigmoid_focal_loss_forward(x_data, label_data, fg_num_data, gamma, alpha, class TestSigmoidFocalLossOp1(OpTest): + def set_argument(self): self.num_anchors = 10 self.num_classes = 10 @@ -84,9 +86,10 @@ class TestSigmoidFocalLossOp1(OpTest): 'gamma': self.gamma, 'alpha': self.alpha, } - loss = sigmoid_focal_loss_forward( - self.inputs['X'], self.inputs['Label'], self.inputs['FgNum'], - self.gamma, self.alpha, self.num_classes) + loss = sigmoid_focal_loss_forward(self.inputs['X'], + self.inputs['Label'], + self.inputs['FgNum'], self.gamma, + self.alpha, self.num_classes) self.outputs = {'Out': loss.astype('float64')} def test_check_output(self): @@ -99,17 +102,20 @@ class TestSigmoidFocalLossOp1(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSigmoidFocalLossOp2(TestSigmoidFocalLossOp1): + def test_check_output(self): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=2e-3) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=0.002) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.002) class TestSigmoidFocalLossOp3(TestSigmoidFocalLossOp1): + def set_argument(self): self.num_anchors = 200 self.num_classes = 10 @@ -120,36 +126,47 @@ class TestSigmoidFocalLossOp3(TestSigmoidFocalLossOp1): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSigmoidFocalLossOp4(TestSigmoidFocalLossOp3): + def test_check_output(self): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=2e-3) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=0.002) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.002) class TestSigmoidFocalLossOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - label1 = fluid.layers.fill_constant( - shape=[10, 1], dtype="int32", value=1) - fg_num1 = fluid.layers.fill_constant( - shape=[1], dtype="int32", value=5) + label1 = fluid.layers.fill_constant(shape=[10, 1], + dtype="int32", + value=1) + fg_num1 = fluid.layers.fill_constant(shape=[1], + dtype="int32", + value=5) # The `x` must be Variable and the data type of `x` Tensor must be one of float32 and float64. def test_x_type(): x1 = [2] - fluid.layers.sigmoid_focal_loss( - x=x1, label=label1, fg_num=fg_num1, gamma=2., alpha=0.25) + fluid.layers.sigmoid_focal_loss(x=x1, + label=label1, + fg_num=fg_num1, + gamma=2., + alpha=0.25) self.assertRaises(TypeError, test_x_type) def test_x_tensor_dtype(): x2 = fluid.layers.data(name='x2', shape=[10, 10], dtype="int16") - fluid.layers.sigmoid_focal_loss( - x=x2, label=label1, fg_num=fg_num1, gamma=2., alpha=0.25) + fluid.layers.sigmoid_focal_loss(x=x2, + label=label1, + fg_num=fg_num1, + gamma=2., + alpha=0.25) self.assertRaises(TypeError, test_x_tensor_dtype) @@ -158,32 +175,46 @@ class TestSigmoidFocalLossOpError(unittest.TestCase): # The `label` must be Variable and the data type of `label` Tensor must be int32. def test_label_type(): label2 = [2] - fluid.layers.sigmoid_focal_loss( - x=x3, label=label2, fg_num=fg_num1, gamma=2., alpha=0.25) + fluid.layers.sigmoid_focal_loss(x=x3, + label=label2, + fg_num=fg_num1, + gamma=2., + alpha=0.25) self.assertRaises(TypeError, test_label_type) def test_label_tensor_dtype(): - label3 = fluid.layers.fill_constant( - shape=[10, 1], dtype="float32", value=1.) - fluid.layers.sigmoid_focal_loss( - x=x3, label=label3, fg_num=fg_num1, gamma=2., alpha=0.25) + label3 = fluid.layers.fill_constant(shape=[10, 1], + dtype="float32", + value=1.) + fluid.layers.sigmoid_focal_loss(x=x3, + label=label3, + fg_num=fg_num1, + gamma=2., + alpha=0.25) self.assertRaises(TypeError, test_label_tensor_dtype) # The `fg_num` must be Variable and the data type of `fg_num` Tensor must be int32. def test_fgnum_type(): fg_num2 = [2] - fluid.layers.sigmoid_focal_loss( - x=x3, label=label1, fg_num=fg_num2, gamma=2., alpha=0.25) + fluid.layers.sigmoid_focal_loss(x=x3, + label=label1, + fg_num=fg_num2, + gamma=2., + alpha=0.25) self.assertRaises(TypeError, test_fgnum_type) def test_fgnum_tensor_dtype(): - fg_num3 = fluid.layers.fill_constant( - shape=[1], dtype="float32", value=5.) - fluid.layers.sigmoid_focal_loss( - x=x3, label=label1, fg_num=fg_num3, gamma=2., alpha=0.25) + fg_num3 = fluid.layers.fill_constant(shape=[1], + dtype="float32", + value=5.) + fluid.layers.sigmoid_focal_loss(x=x3, + label=label1, + fg_num=fg_num3, + gamma=2., + alpha=0.25) self.assertRaises(TypeError, test_fgnum_tensor_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py index bd145a968ed..444675a4bb5 100644 --- a/python/paddle/fluid/tests/unittests/test_sign_op.py +++ b/python/paddle/fluid/tests/unittests/test_sign_op.py @@ -23,6 +23,7 @@ from paddle.fluid import Program, program_guard class TestSignOp(OpTest): + def setUp(self): self.op_type = "sign" self.inputs = { @@ -38,24 +39,29 @@ class TestSignOp(OpTest): class TestSignOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of sign_op must be Variable or numpy.ndarray. input1 = 12 self.assertRaises(TypeError, fluid.layers.sign, input1) # The input dtype of sign_op must be float16, float32, float64. - input2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") - input3 = fluid.layers.data( - name='input3', shape=[12, 10], dtype="int64") + input2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") + input3 = fluid.layers.data(name='input3', + shape=[12, 10], + dtype="int64") self.assertRaises(TypeError, fluid.layers.sign, input2) self.assertRaises(TypeError, fluid.layers.sign, input3) - input4 = fluid.layers.data( - name='input4', shape=[4], dtype="float16") + input4 = fluid.layers.data(name='input4', + shape=[4], + dtype="float16") fluid.layers.sign(input4) class TestSignAPI(unittest.TestCase): + def test_dygraph(self): with fluid.dygraph.guard(): np_x = np.array([-1., 0., -0., 1.2, 1.5], dtype='float64') @@ -71,14 +77,17 @@ class TestSignAPI(unittest.TestCase): input1 = 12 self.assertRaises(TypeError, paddle.tensor.math.sign, input1) # The input dtype of sign_op must be float16, float32, float64. - input2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") - input3 = fluid.layers.data( - name='input3', shape=[12, 10], dtype="int64") + input2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") + input3 = fluid.layers.data(name='input3', + shape=[12, 10], + dtype="int64") self.assertRaises(TypeError, paddle.tensor.math.sign, input2) self.assertRaises(TypeError, paddle.tensor.math.sign, input3) - input4 = fluid.layers.data( - name='input4', shape=[4], dtype="float16") + input4 = fluid.layers.data(name='input4', + shape=[4], + dtype="float16") paddle.sign(input4) diff --git a/python/paddle/fluid/tests/unittests/test_signal.py b/python/paddle/fluid/tests/unittests/test_signal.py index ecbbd8f52db..8257630cf20 100644 --- a/python/paddle/fluid/tests/unittests/test_signal.py +++ b/python/paddle/fluid/tests/unittests/test_signal.py @@ -56,8 +56,8 @@ def tiny(x): x = np.asarray(x) # Only floating types generate a tiny - if np.issubdtype(x.dtype, np.floating) or np.issubdtype(x.dtype, - np.complexfloating): + if np.issubdtype(x.dtype, np.floating) or np.issubdtype( + x.dtype, np.complexfloating): dtype = x.dtype else: dtype = np.float32 @@ -144,18 +144,19 @@ def __window_ss_fill(x, win_sq, n_frames, hop_length): # pragma: no cover n_fft = len(win_sq) for i in range(n_frames): sample = i * hop_length - x[sample:min(n, sample + n_fft)] += win_sq[:max(0, - min(n_fft, n - sample))] + x[sample:min(n, sample + + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))] def window_sumsquare( - window, - n_frames, - hop_length=512, - win_length=None, - n_fft=2048, - dtype=np.float32, - norm=None, ): + window, + n_frames, + hop_length=512, + win_length=None, + n_fft=2048, + dtype=np.float32, + norm=None, +): if win_length is None: win_length = n_fft @@ -335,8 +336,9 @@ def stft(x, y = np.pad(y, int(n_fft // 2), mode=pad_mode) elif n_fft > y.shape[-1]: - raise Exception("n_fft={} is too large for input signal of length={}". - format(n_fft, y.shape[-1])) + raise Exception( + "n_fft={} is too large for input signal of length={}".format( + n_fft, y.shape[-1])) # Window the time series. y_frames = frame(y, frame_length=n_fft, hop_length=hop_length) @@ -345,8 +347,9 @@ def stft(x, dtype = dtype_r2c(y.dtype) # Pre-allocate the STFT matrix - stft_matrix = np.empty( - (int(1 + n_fft // 2), y_frames.shape[1]), dtype=dtype, order="F") + stft_matrix = np.empty((int(1 + n_fft // 2), y_frames.shape[1]), + dtype=dtype, + order="F") # how many columns can we fit within MAX_MEM_BLOCK? n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize) @@ -355,8 +358,9 @@ def stft(x, for bl_s in range(0, stft_matrix.shape[1], n_columns): bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) - stft_matrix[:, bl_s:bl_t] = fft.rfft( - fft_window * y_frames[:, bl_s:bl_t], axis=0) + stft_matrix[:, + bl_s:bl_t] = fft.rfft(fft_window * y_frames[:, bl_s:bl_t], + axis=0) if input_rank == 2: stft_matrix = np.expand_dims(stft_matrix, 0) @@ -365,12 +369,13 @@ def stft(x, def istft( - x, - hop_length=None, - win_length=None, - window="hann", - center=True, - length=None, ): + x, + hop_length=None, + win_length=None, + window="hann", + center=True, + length=None, +): stft_matrix = x input_rank = len(stft_matrix.shape) @@ -434,7 +439,8 @@ def istft( win_length=win_length, n_fft=n_fft, hop_length=hop_length, - dtype=dtype, ) + dtype=dtype, + ) approx_nonzero_indices = ifft_window_sum > tiny(ifft_window_sum) y[approx_nonzero_indices] /= ifft_window_sum[approx_nonzero_indices] @@ -537,6 +543,7 @@ def overlap_add_for_api_test(x, hop_length, axis=-1): def place(devices, key='place'): + def decorate(cls): module = sys.modules[cls.__module__].__dict__ raw_classes = { @@ -591,8 +598,8 @@ def rand_x(dims=1, np.random.randint(min_dim_len, max_dim_len) for i in range(dims) ] if complex: - return np.random.randn(*shape).astype(dtype) + 1.j * np.random.randn( - *shape).astype(dtype) + return np.random.randn(*shape).astype( + dtype) + 1.j * np.random.randn(*shape).astype(dtype) else: return np.random.randn(*shape).astype(dtype) diff --git a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py index 888bec928ff..114003f0708 100755 --- a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py +++ b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py @@ -23,15 +23,17 @@ from paddle.fluid import Program, program_guard class TestSimilarityFocusOp(OpTest): + def setUp(self): self.op_type = "similarity_focus" batch_size = 2 x_dim, y_dim, z_dim = 3, 2, 2 self.inputs = { - 'X': np.array([[[[0.8, 0.1], [0.4, 0.5]], [[0.9, 0.7], [0.9, 0.9]], - [[0.8, 0.9], [0.1, 0.2]]], - [[[0.2, 0.5], [0.3, 0.4]], [[0.9, 0.7], [0.8, 0.4]], - [[0.0, 0.2], [0.4, 0.7]]]]), + 'X': + np.array([[[[0.8, 0.1], [0.4, 0.5]], [[0.9, 0.7], [0.9, 0.9]], + [[0.8, 0.9], [0.1, 0.2]]], + [[[0.2, 0.5], [0.3, 0.4]], [[0.9, 0.7], [0.8, 0.4]], + [[0.0, 0.2], [0.4, 0.7]]]]), } self.attrs = { 'axis': 1, @@ -42,8 +44,8 @@ class TestSimilarityFocusOp(OpTest): for batch in range(batch_size): res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) for index in self.attrs['indexes']: - channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy( - ) + channel = self.inputs['X'][batch, + index, :, :].reshape(-1).copy() tag1 = [0 for i in range(y_dim)] tag2 = [0 for i in range(z_dim)] cnt = 0 @@ -72,12 +74,14 @@ class TestSimilarityFocusOp(OpTest): class TestSimilarityFocusOp_axis1(OpTest): + def setUp(self): self.op_type = "similarity_focus" batch_size = 3 x_dim, y_dim, z_dim = 4, 5, 6 self.inputs = { - 'X': np.random.random( + 'X': + np.random.random( (batch_size, x_dim, y_dim, z_dim)).astype("float32"), } self.attrs = { @@ -89,8 +93,8 @@ class TestSimilarityFocusOp_axis1(OpTest): for batch in range(batch_size): res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) for index in self.attrs['indexes']: - channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy( - ) + channel = self.inputs['X'][batch, + index, :, :].reshape(-1).copy() tag1 = [0 for i in range(y_dim)] tag2 = [0 for i in range(z_dim)] cnt = 0 @@ -120,12 +124,14 @@ class TestSimilarityFocusOp_axis1(OpTest): class TestSimilarityFocusOp_axis2(OpTest): + def setUp(self): self.op_type = "similarity_focus" batch_size = 6 x_dim, y_dim, z_dim = 7, 8, 9 self.inputs = { - 'X': np.random.random( + 'X': + np.random.random( (batch_size, x_dim, y_dim, z_dim)).astype("float32"), } self.attrs = { @@ -137,8 +143,8 @@ class TestSimilarityFocusOp_axis2(OpTest): for batch in range(batch_size): res = np.zeros((x_dim, 1, z_dim)).astype("float32").reshape(-1) for index in self.attrs['indexes']: - channel = self.inputs['X'][batch, :, index, :].reshape(-1).copy( - ) + channel = self.inputs['X'][batch, :, + index, :].reshape(-1).copy() tag1 = [0 for i in range(x_dim)] tag2 = [0 for i in range(z_dim)] cnt = 0 @@ -168,12 +174,14 @@ class TestSimilarityFocusOp_axis2(OpTest): class TestSimilarityFocusOp_axis3(OpTest): + def setUp(self): self.op_type = "similarity_focus" batch_size = 64 x_dim, y_dim, z_dim = 48, 48, 13 self.inputs = { - 'X': np.random.random( + 'X': + np.random.random( (batch_size, x_dim, y_dim, z_dim)).astype("float32"), } self.attrs = { @@ -185,8 +193,8 @@ class TestSimilarityFocusOp_axis3(OpTest): for batch in range(batch_size): res = np.zeros((x_dim, y_dim, 1)).astype("float32").reshape(-1) for index in self.attrs['indexes']: - channel = self.inputs['X'][batch, :, :, index].reshape(-1).copy( - ) + channel = self.inputs['X'][batch, :, :, + index].reshape(-1).copy() tag1 = [0 for i in range(x_dim)] tag2 = [0 for i in range(y_dim)] cnt = 0 @@ -216,28 +224,32 @@ class TestSimilarityFocusOp_axis3(OpTest): class TestSimilarityFocusOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): data = fluid.data(name='data', shape=[16, 3, 2, 2], dtype='float32') def test_input_Variable(): input = np.random.rand(16, 3, 2, 2).astype("float32") - out = fluid.layers.similarity_focus( - input=input, axis=1, indexes=[0]) + out = fluid.layers.similarity_focus(input=input, + axis=1, + indexes=[0]) self.assertRaises(TypeError, test_input_Variable) def test_axis_Int(): axis = 1.0 - out = fluid.layers.similarity_focus( - input=data, axis=axis, indexes=[0]) + out = fluid.layers.similarity_focus(input=data, + axis=axis, + indexes=[0]) self.assertRaises(TypeError, test_axis_Int) def test_indexes_List(): indexes = 0 - out = fluid.layers.similarity_focus( - input=data, axis=1, indexes=indexes) + out = fluid.layers.similarity_focus(input=data, + axis=1, + indexes=indexes) self.assertRaises(TypeError, test_indexes_List) diff --git a/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py b/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py index d7e24b6308e..7676e15a74b 100644 --- a/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py @@ -23,6 +23,7 @@ import paddle.fluid.core as core import paddle.fluid.layers as layers import random import sys + sys.path.append("./rnn") from rnn_numpy import SimpleRNN from convert import get_params_for_net @@ -33,6 +34,7 @@ paddle.enable_static() class TestSimpleRNNOp(OpTest): + def get_weight_names(self): weight_names = [] for i in range(self.num_layers): @@ -47,8 +49,7 @@ class TestSimpleRNNOp(OpTest): self.op_type = "rnn" self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" self.sequence_length = None if core.is_compiled_with_rocm( - ) else np.array( - [12, 11, 10, 9, 8], dtype=np.int32) + ) else np.array([12, 11, 10, 9, 8], dtype=np.int32) self.num_layers = 1 self.is_bidirec = False self.is_test = False @@ -63,24 +64,24 @@ class TestSimpleRNNOp(OpTest): input_size = 3 hidden_size = 2 - input = np.random.uniform( - low=-0.1, high=0.1, - size=(seq_length, batch_size, input_size)).astype(self.dtype) + input = np.random.uniform(low=-0.1, + high=0.1, + size=(seq_length, batch_size, + input_size)).astype(self.dtype) if self.sequence_length is not None: input[11][1:][:] = 0 input[10][2:][:] = 0 input[9][3:][:] = 0 input[8][4:][:] = 0 - rnn1 = SimpleRNN( - input_size, - hidden_size, - num_layers=self.num_layers, - time_major=True, - direction=direction, - dropout=self.dropout, - nonlinearity=self.mode, - dtype=self.dtype) + rnn1 = SimpleRNN(input_size, + hidden_size, + num_layers=self.num_layers, + time_major=True, + direction=direction, + dropout=self.dropout, + nonlinearity=self.mode, + dtype=self.dtype) flat_w = get_params_for_net(rnn1) @@ -134,23 +135,27 @@ class TestSimpleRNNOp(OpTest): class TestSimpleRNNOp1(TestSimpleRNNOp): + def set_attrs(self): self.sequence_length = None class TestSimpleRNNOp2(TestSimpleRNNOp): + def set_attrs(self): self.sequence_length = None self.is_bidirec = True class TestSimpleRNNOp3(TestSimpleRNNOp): + def set_attrs(self): self.sequence_length = None self.is_test = True class TestSimpleRNNOp4(TestSimpleRNNOp): + def set_attrs(self): self.sequence_length = None self.is_bidirec = True @@ -158,6 +163,7 @@ class TestSimpleRNNOp4(TestSimpleRNNOp): class TestSimpleRNNOp5(TestSimpleRNNOp): + def set_attrs(self): self.mode = "RNN_RELU" diff --git a/python/paddle/fluid/tests/unittests/test_size_op.py b/python/paddle/fluid/tests/unittests/test_size_op.py index 09cd35391ba..bb64e3e66b2 100644 --- a/python/paddle/fluid/tests/unittests/test_size_op.py +++ b/python/paddle/fluid/tests/unittests/test_size_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestSizeOp(OpTest): + def setUp(self): self.op_type = "size" self.shape = [] @@ -36,26 +37,31 @@ class TestSizeOp(OpTest): class TestRank1Tensor(TestSizeOp): + def config(self): self.shape = [2] class TestRank2Tensor(TestSizeOp): + def config(self): self.shape = [2, 3] class TestRank3Tensor(TestSizeOp): + def config(self): self.shape = [2, 3, 100] class TestLargeTensor(TestSizeOp): + def config(self): self.shape = [2**10] class TestSizeAPI(unittest.TestCase): + def test_size_static(self): main_program = fluid.Program() startup_program = fluid.Program() @@ -74,10 +80,12 @@ class TestSizeAPI(unittest.TestCase): "x_2": input_2, }, fetch_list=[out_1, out_2]) - assert (np.array_equal( - res_1, np.array([np.size(input_1)]).astype("int64"))) - assert (np.array_equal( - res_2, np.array([np.size(input_2)]).astype("int64"))) + assert (np.array_equal(res_1, + np.array([np.size(input_1) + ]).astype("int64"))) + assert (np.array_equal(res_2, + np.array([np.size(input_2) + ]).astype("int64"))) def test_size_imperative(self): paddle.disable_static(paddle.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 34f296c4b63..3b341d79366 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -29,6 +29,7 @@ paddle.enable_static() # Situation 1: starts(list, no tensor), ends(list, no tensor) # 1.1 without attr(decrease) class TestSliceOp(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -57,6 +58,7 @@ class TestSliceOp(OpTest): class TestCase1(TestSliceOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-3, 0, 2] @@ -67,6 +69,7 @@ class TestCase1(TestSliceOp): class TestCase2(TestSliceOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-3, 0, 2] @@ -78,6 +81,7 @@ class TestCase2(TestSliceOp): # 1.2 with attr(decrease) class TestSliceOp_decs_dim(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -108,6 +112,7 @@ class TestSliceOp_decs_dim(OpTest): class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [1, 0, 2] @@ -119,6 +124,7 @@ class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-1, 0, 2] @@ -130,6 +136,7 @@ class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 7]).astype("float64") self.starts = [0, 1, 2, 3] @@ -141,6 +148,7 @@ class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-1] @@ -152,6 +160,7 @@ class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [0, 1, 2, 3] @@ -165,6 +174,7 @@ class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): # Situation 2: starts(list, have tensor), ends(list, no tensor) # without attr(decrease) class TestSliceOp_starts_ListTensor(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -203,6 +213,7 @@ class TestSliceOp_starts_ListTensor(OpTest): # Situation 2: starts(list, have tensor), ends(list, no tensor) # with attr(decrease) class TestSliceOp_decs_dim_starts_ListTensor(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -243,6 +254,7 @@ class TestSliceOp_decs_dim_starts_ListTensor(OpTest): class TestSliceOp_decs_dim_5_starts_ListTensor( TestSliceOp_decs_dim_starts_ListTensor): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype("float64") self.starts = [-1] @@ -258,13 +270,13 @@ class TestSliceOp_decs_dim_5_starts_ListTensor( # Situation 3: starts(tensor), ends(list, no tensor) # with attr(decrease) class TestSliceOp_decs_dim_starts_OneTensor(OpTest): + def setUp(self): self.op_type = "slice" self.config() self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int32") } self.outputs = {'Out': self.out} self.attrs = { @@ -294,16 +306,15 @@ class TestSliceOp_decs_dim_starts_OneTensor(OpTest): # Situation 4: starts(tensor), ends(tensor) # without attr(decrease) class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): + def setUp(self): self.op_type = "slice" self.config() self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int64"), - "EndsTensor": np.array( - self.ends, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int64"), + "EndsTensor": np.array(self.ends, dtype="int32") } self.outputs = {'Out': self.out} self.attrs = { @@ -331,15 +342,14 @@ class TestSliceOp_starts_OneTensor_ends_OneTensor(OpTest): # Situation 5: starts(tensor), ends(tensor) # with attr(decrease) class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): + def setUp(self): self.op_type = "slice" self.config() self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32"), - "EndsTensor": np.array( - self.ends, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int32"), + "EndsTensor": np.array(self.ends, dtype="int32") } self.outputs = {'Out': self.out} self.attrs = { @@ -369,6 +379,7 @@ class TestSliceOp_decs_dim_starts_and_ends_OneTensor(OpTest): # Situation 6: starts(tensor), ends(list, have tensor) # without attr(decrease) class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -380,8 +391,7 @@ class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32"), + "StartsTensor": np.array(self.starts, dtype="int32"), 'EndsTensorList': ends_tensor } self.outputs = {'Out': self.out} @@ -413,6 +423,7 @@ class TestSliceOp_starts_OneTensor_ends_ListTensor(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -442,13 +453,15 @@ class TestFP16(OpTest): def test_check_grad_normal(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['Input'], 'Out', max_relative_error=0.006) + self.check_grad_with_place(place, ['Input'], + 'Out', + max_relative_error=0.006) @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16_2(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -478,14 +491,14 @@ class TestFP16_2(OpTest): def test_check_grad_normal(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['Input'], - 'Out', - max_relative_error=0.006, - numeric_grad_delta=0.5) + self.check_grad_with_place(place, ['Input'], + 'Out', + max_relative_error=0.006, + numeric_grad_delta=0.5) class TestBF16(OpTest): + def setUp(self): self.op_type = "slice" self.config() @@ -516,30 +529,38 @@ class TestBF16(OpTest): # Test python API class TestSliceAPI(unittest.TestCase): + def test_1(self): input = np.random.random([3, 4, 5, 6]).astype("float64") minus_1 = fluid.layers.fill_constant([1], "int32", -1) minus_3 = fluid.layers.fill_constant([1], "int64", -3) - starts = fluid.layers.data( - name='starts', shape=[1, 3], append_batch_size=False) - ends = fluid.layers.data( - name='ends', shape=[3], append_batch_size=False) - - x = fluid.layers.data( - name="x", - shape=[3, 4, 5, 6], - append_batch_size=False, - dtype="float64") + starts = fluid.layers.data(name='starts', + shape=[1, 3], + append_batch_size=False) + ends = fluid.layers.data(name='ends', + shape=[3], + append_batch_size=False) + + x = fluid.layers.data(name="x", + shape=[3, 4, 5, 6], + append_batch_size=False, + dtype="float64") # value_int64 is greater than 2147483647 which is the max of int32 value_int64 = fluid.layers.fill_constant([1], "int64", 2147483648) - out_1 = paddle.slice( - x, axes=[0, 1, 2], starts=[-3, 0, 2], ends=[value_int64, 100, -1]) - out_2 = paddle.slice( - x, axes=[0, 1, 3], starts=[minus_3, 0, 2], ends=[3, 100, -1]) - out_3 = paddle.slice( - x, axes=[0, 1, 3], starts=[minus_3, 0, 2], ends=[3, 100, minus_1]) + out_1 = paddle.slice(x, + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[value_int64, 100, -1]) + out_2 = paddle.slice(x, + axes=[0, 1, 3], + starts=[minus_3, 0, 2], + ends=[3, 100, -1]) + out_3 = paddle.slice(x, + axes=[0, 1, 3], + starts=[minus_3, 0, 2], + ends=[3, 100, minus_1]) out_4 = paddle.slice(x, axes=[0, 1, 2], starts=starts, ends=ends) out_5 = x[-3:3, 0:100, 2:-1] @@ -566,19 +587,17 @@ class TestSliceAPI(unittest.TestCase): class TestSliceApiWithTensor(unittest.TestCase): + def test_starts_ends_is_tensor(self): with paddle.fluid.dygraph.guard(): a = paddle.rand(shape=[4, 5, 6], dtype='float32') axes = [0, 1, 2] starts = [-3, 0, 2] ends = [3, 2, 4] - a_1 = paddle.slice( - a, - axes=axes, - starts=paddle.to_tensor( - starts, dtype='int32'), - ends=paddle.to_tensor( - ends, dtype='int32')) + a_1 = paddle.slice(a, + axes=axes, + starts=paddle.to_tensor(starts, dtype='int32'), + ends=paddle.to_tensor(ends, dtype='int32')) a_2 = paddle.slice(a, axes=axes, starts=starts, ends=ends) self.assertTrue(np.array_equal(a_1.numpy(), a_2.numpy())) @@ -601,6 +620,7 @@ class TestSliceApiWithTensor(unittest.TestCase): class TestSliceApiEager(unittest.TestCase): + def test_slice_api(self): with paddle.fluid.dygraph.guard(): with _test_eager_guard(): @@ -611,11 +631,10 @@ class TestSliceApiEager(unittest.TestCase): ends = [3, 2, 4] a_1 = paddle.slice(a, axes=axes, starts=starts, ends=ends) - a_2 = paddle.slice( - a, - axes=axes, - starts=paddle.to_tensor(starts), - ends=paddle.to_tensor(ends)) + a_2 = paddle.slice(a, + axes=axes, + starts=paddle.to_tensor(starts), + ends=paddle.to_tensor(ends)) a_1.backward() grad_truth = paddle.zeros_like(a) @@ -626,6 +645,7 @@ class TestSliceApiEager(unittest.TestCase): class TestSliceApiWithLoDTensorArray(unittest.TestCase): + def setUp(self): self.shape = (3, 4) self.data = np.random.random(size=self.shape).astype('float32') @@ -634,18 +654,16 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase): self.end = 2 self.axis = 1 - self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( - ) else fluid.CPUPlace() + self.place = fluid.CUDAPlace( + 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.exe = fluid.Executor(self.place) def set_program_and_run(self, main_program, case_num): with fluid.program_guard(main_program): x = [ - fluid.data( - name='x0', shape=self.shape, dtype="float32"), fluid.data( - name='x1', shape=self.shape, dtype="float32"), - fluid.data( - name='x2', shape=self.shape, dtype="float32") + fluid.data(name='x0', shape=self.shape, dtype="float32"), + fluid.data(name='x1', shape=self.shape, dtype="float32"), + fluid.data(name='x2', shape=self.shape, dtype="float32") ] for each_x in x: @@ -663,14 +681,16 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase): end = fluid.layers.array_length( arr) - 1 # dtype of end is int64 self.sliced_arr = slice_arr = arr[self.start:end] - output, _ = fluid.layers.tensor_array_to_tensor( - slice_arr, axis=self.axis, use_stack=True) + output, _ = fluid.layers.tensor_array_to_tensor(slice_arr, + axis=self.axis, + use_stack=True) elif case_num == 3: value_int64 = fluid.layers.fill_constant([1], "int64", 2147483648) self.sliced_arr = slice_arr = arr[self.start:value_int64] - output, _ = fluid.layers.tensor_array_to_tensor( - slice_arr, axis=self.axis, use_stack=True) + output, _ = fluid.layers.tensor_array_to_tensor(slice_arr, + axis=self.axis, + use_stack=True) loss = fluid.layers.reduce_sum(output) fluid.backward.append_backward(loss) @@ -703,9 +723,8 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase): self.sliced_arr.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY) self.assertEqual(self.sliced_arr.shape, self.shape) self.assertTrue( - np.array_equal( - self.out, np.stack( - [self.data, self.data], axis=self.axis))) + np.array_equal(self.out, + np.stack([self.data, self.data], axis=self.axis))) self.assertTrue(np.array_equal(self.g_x0, np.ones_like(self.data))) self.assertTrue(np.array_equal(self.g_x1, np.ones_like(self.data))) self.assertTrue(np.array_equal(self.g_x2, np.zeros_like(self.data))) @@ -720,14 +739,14 @@ class TestSliceApiWithLoDTensorArray(unittest.TestCase): self.assertTrue( np.array_equal( self.out, - np.stack( - [self.data, self.data, self.data], axis=self.axis))) + np.stack([self.data, self.data, self.data], axis=self.axis))) self.assertTrue(np.array_equal(self.g_x0, np.ones_like(self.data))) self.assertTrue(np.array_equal(self.g_x1, np.ones_like(self.data))) self.assertTrue(np.array_equal(self.g_x2, np.ones_like(self.data))) class TestImperativeVarBaseGetItem(unittest.TestCase): + def test_getitem_with_long(self): with fluid.dygraph.guard(): data = np.random.random((2, 80, 16128)).astype('float32') @@ -739,6 +758,7 @@ class TestImperativeVarBaseGetItem(unittest.TestCase): self.assertEqual(sliced.shape, [2, 78, 78]) def test_getitem_with_float(self): + def test_float_in_slice_item(): with fluid.dygraph.guard(): data = np.random.random((2, 80, 16128)).astype('float32') @@ -757,6 +777,7 @@ class TestImperativeVarBaseGetItem(unittest.TestCase): class TestInferShape(unittest.TestCase): + def test(self): x = paddle.ones(shape=[3, 4, 5]) x.desc.set_shape([3, -1, 5]) @@ -772,7 +793,9 @@ class TestInferShape(unittest.TestCase): x_arr = np.arange(0, 24, dtype=np.float32).reshape([2, 3, 4]) x = paddle.to_tensor(x_arr) - pp_slice = paddle.slice(x, [100, ], [0], [1]) + pp_slice = paddle.slice(x, [ + 100, + ], [0], [1]) np_slice = x_arr[:, :, 0:1] self.assertTrue(np.array_equal(pp_slice, np_slice)) @@ -784,13 +807,9 @@ class TestInferShape(unittest.TestCase): x = paddle.to_tensor(np.reshape(x_arr, (0, 0, 0))) starts = paddle.to_tensor( - np.reshape( - np.array( - [], dtype=np.int32), (0, ))) + np.reshape(np.array([], dtype=np.int32), (0, ))) ends = paddle.to_tensor( - np.reshape( - np.array( - [], dtype=np.int32), (0, ))) + np.reshape(np.array([], dtype=np.int32), (0, ))) with self.assertRaises(ValueError): paddle.slice(x, [-1000000], starts, ends) @@ -808,15 +827,15 @@ class TestInferShape(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestImperativeCUDAPinnedInput(unittest.TestCase): + def test_input_cuda_pinned_var(self): with fluid.dygraph.guard(): data = np.random.random((2, 80, 16128)).astype('float32') - var = core.VarBase( - value=data, - name='', - persistable=False, - place=fluid.CUDAPinnedPlace(), - zero_copy=False) + var = core.VarBase(value=data, + name='', + persistable=False, + place=fluid.CUDAPinnedPlace(), + zero_copy=False) sliced = var[:, 10:, :var.shape[1]] self.assertEqual(sliced.shape, [2, 70, 80]) diff --git a/python/paddle/fluid/tests/unittests/test_slice_var.py b/python/paddle/fluid/tests/unittests/test_slice_var.py index b16c7446035..d9cb3e2073b 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_var.py +++ b/python/paddle/fluid/tests/unittests/test_slice_var.py @@ -23,14 +23,15 @@ import random class TestSliceVar(unittest.TestCase): + def check_slice_output(self, shapes, expected_sizes, min_size): var_list = [] program = fluid.Program() for shape in shapes: - var = program.global_block().create_var( - name=str(random.randint(10000, 99999)), - persistable=True, - shape=shape) + var = program.global_block().create_var(name=str( + random.randint(10000, 99999)), + persistable=True, + shape=shape) var_list.append(var) blocks = slice_variable(var_list, 10, min_size) all_sizes = [] @@ -43,12 +44,12 @@ class TestSliceVar(unittest.TestCase): def test_1k(self): shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10]] - expected_sizes = [ - [15], [1024], - [2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 784], - [2040, 2040, 2040, 2040], - [1150, 1150, 1150, 1150, 1150, 1150, 1100] - ] + expected_sizes = [[15], [1024], + [ + 2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, + 2352, 784 + ], [2040, 2040, 2040, 2040], + [1150, 1150, 1150, 1150, 1150, 1150, 1100]] self.check_slice_output(shapes, expected_sizes, 1024) diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py index 74409c86710..38cf45bfcc5 100644 --- a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py +++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py @@ -40,6 +40,7 @@ def smooth_l1_loss_np(input, label, reduction='mean', delta=1.0): class SmoothL1Loss(unittest.TestCase): + def setUp(self): np.random.seed(123) @@ -48,8 +49,8 @@ class SmoothL1Loss(unittest.TestCase): label_np = np.random.random([100, 200]).astype(np.float32) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype='float32') label = fluid.data(name='label', shape=[100, 200], dtype='float32') @@ -66,9 +67,8 @@ class SmoothL1Loss(unittest.TestCase): self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): smooth_l1_loss = paddle.nn.loss.SmoothL1Loss() - dy_ret = smooth_l1_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = smooth_l1_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = smooth_l1_loss_np(input_np, label_np, reduction='mean') @@ -81,8 +81,8 @@ class SmoothL1Loss(unittest.TestCase): label_np = np.random.random([100, 200]).astype(np.float32) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype='float32') label = fluid.data(name='label', shape=[100, 200], dtype='float32') @@ -99,9 +99,8 @@ class SmoothL1Loss(unittest.TestCase): self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='sum') - dy_ret = smooth_l1_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = smooth_l1_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = smooth_l1_loss_np(input_np, label_np, reduction='sum') @@ -114,8 +113,8 @@ class SmoothL1Loss(unittest.TestCase): label_np = np.random.random([100, 200]).astype(np.float32) prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype='float32') label = fluid.data(name='label', shape=[100, 200], dtype='float32') @@ -132,9 +131,8 @@ class SmoothL1Loss(unittest.TestCase): self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='none') - dy_ret = smooth_l1_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = smooth_l1_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = smooth_l1_loss_np(input_np, label_np, reduction='none') @@ -148,8 +146,8 @@ class SmoothL1Loss(unittest.TestCase): delta = np.random.rand() prog = fluid.Program() startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype='float32') label = fluid.data(name='label', shape=[100, 200], dtype='float32') @@ -166,9 +164,8 @@ class SmoothL1Loss(unittest.TestCase): self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(delta=delta) - dy_ret = smooth_l1_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) + dy_ret = smooth_l1_loss(fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) expected = smooth_l1_loss_np(input_np, label_np, delta=delta) diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py index 63e8568048d..b102236380d 100644 --- a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py @@ -29,6 +29,7 @@ def smooth_l1_loss_forward(val, sigma2): class TestSmoothL1LossOp1(OpTest): + def setUp(self): self.op_type = "smooth_l1_loss" dims = (5, 20) @@ -51,27 +52,28 @@ class TestSmoothL1LossOp1(OpTest): self.check_output(check_eager=True) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], 'Out', max_relative_error=0.02, check_eager=True) + self.check_grad(['X', 'Y'], + 'Out', + max_relative_error=0.02, + check_eager=True) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - max_relative_error=0.03, - no_grad_set=set("X"), - check_eager=True) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.03, + no_grad_set=set("X"), + check_eager=True) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.03, - no_grad_set=set('Y'), - check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.03, + no_grad_set=set('Y'), + check_eager=True) class TestSmoothL1LossOp2(OpTest): + def setUp(self): self.op_type = "smooth_l1_loss" dims = (5, 20) @@ -98,34 +100,35 @@ class TestSmoothL1LossOp2(OpTest): self.check_output(check_eager=True) def test_check_grad_normal(self): - self.check_grad( - ['X', 'Y'], 'Out', max_relative_error=0.03, check_eager=True) + self.check_grad(['X', 'Y'], + 'Out', + max_relative_error=0.03, + check_eager=True) def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], - 'Out', - max_relative_error=0.03, - no_grad_set=set(['X', 'InsideWeight', 'OutsideWeight']), - check_eager=True) + self.check_grad(['Y'], + 'Out', + max_relative_error=0.03, + no_grad_set=set(['X', 'InsideWeight', 'OutsideWeight']), + check_eager=True) def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=0.03, - no_grad_set=set(['Y', 'InsideWeight', 'OutsideWeight']), - check_eager=True) + self.check_grad(['X'], + 'Out', + max_relative_error=0.03, + no_grad_set=set(['Y', 'InsideWeight', 'OutsideWeight']), + check_eager=True) class TestSmoothL1LossOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program(), fluid.Program()): # The input type of accuracy_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, fluid.layers.smooth_l1, x1, y1) # The input dtype of accuracy_op must be float32 or float64. x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32") diff --git a/python/paddle/fluid/tests/unittests/test_softmax2d.py b/python/paddle/fluid/tests/unittests/test_softmax2d.py index 4879e9a0efb..cb851c771b8 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax2d.py +++ b/python/paddle/fluid/tests/unittests/test_softmax2d.py @@ -21,6 +21,7 @@ from test_softmax_op import ref_softmax class TestSoftmax2DAPI(unittest.TestCase): + def setUp(self): self.shape = [2, 6, 5, 4] self.x_np = np.random.uniform(-1, 1, self.shape).astype('float64') @@ -50,6 +51,7 @@ class TestSoftmax2DAPI(unittest.TestCase): class TestSoftmax2DShape(TestSoftmax2DAPI): + def setUp(self): self.shape = [2, 6, 4] self.x_np = np.random.uniform(-1, 1, self.shape).astype('float64') @@ -59,6 +61,7 @@ class TestSoftmax2DShape(TestSoftmax2DAPI): class TestSoftmax2DFloat32(TestSoftmax2DAPI): + def setUp(self): self.shape = [2, 3, 4] self.x_np = np.random.uniform(-1, 1, self.shape).astype('float32') @@ -68,6 +71,7 @@ class TestSoftmax2DFloat32(TestSoftmax2DAPI): class TestSoftmax2DCPU(TestSoftmax2DAPI): + def setUp(self): self.shape = [2, 6, 4] self.x_np = np.random.uniform(-1, 1, self.shape).astype('float64') @@ -76,6 +80,7 @@ class TestSoftmax2DCPU(TestSoftmax2DAPI): class TestSoftmax2DRepr(unittest.TestCase): + def setUp(self): self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ else paddle.CPUPlace() @@ -88,6 +93,7 @@ class TestSoftmax2DRepr(unittest.TestCase): class TestSoftmax2DError(unittest.TestCase): + def setUp(self): self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ else paddle.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py index cff06f9025f..3aa1cafd92f 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py @@ -40,6 +40,7 @@ def _get_softmax(x, mask, fp16=True): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxMaskFuseOp(OpTest): + def setUp(self): self.op_type = "fused_softmax_mask" x = np.random.random((1, 1, 8, 32)) @@ -65,6 +66,7 @@ class TestSoftmaxMaskFuseOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxMaskFuseOp0(OpTest): + def setUp(self): self.op_type = "fused_softmax_mask" x = np.random.random((1, 1, 8, 32)).astype("float16") @@ -84,11 +86,13 @@ class TestSoftmaxMaskFuseOp0(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestDropoutBiasFuseOp3(unittest.TestCase): + def test_static_result(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input_x = fluid.data(name="x", shape=[1, 1, 8, 32], dtype="float32") - input_mask = fluid.data( - name="mask", shape=[1, 1, 8, 32], dtype="float32") + input_mask = fluid.data(name="mask", + shape=[1, 1, 8, 32], + dtype="float32") rst = incubate.softmax_mask_fuse(input_x, input_mask) x_in_np = np.random.random((1, 1, 8, 32)).astype("float32") @@ -98,8 +102,10 @@ class TestDropoutBiasFuseOp3(unittest.TestCase): exe = fluid.Executor(fluid.CUDAPlace(0)) fetches = exe.run(fluid.default_main_program(), - feed={"x": x_in_np, - "mask": mask_in_np}, + feed={ + "x": x_in_np, + "mask": mask_in_np + }, fetch_list=[rst]) self.assertTrue(np.allclose(fetches[0], rst_np)) diff --git a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py index a73ebd73e49..53128e51298 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py @@ -41,6 +41,7 @@ def _get_softmax_upper(x, fp16=True): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxMaskFuseOp(OpTest): + def setUp(self): self.op_type = "fused_softmax_mask_upper_triangle" x = np.random.random((1, 4, 32, 32)).astype("float16") @@ -58,6 +59,7 @@ class TestSoftmaxMaskFuseOp(OpTest): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxMaskFuseOp1(OpTest): + def setUp(self): self.op_type = "fused_softmax_mask_upper_triangle" x = np.random.random((1, 4, 32, 32)) @@ -89,8 +91,9 @@ class TestDropoutBiasFuseOp2(unittest.TestCase): def test_static(self): for dtype in self.dtypes: with fluid.program_guard(fluid.Program(), fluid.Program()): - input_x = fluid.data( - name="x", shape=[1, 4, 32, 32], dtype=dtype) + input_x = fluid.data(name="x", + shape=[1, 4, 32, 32], + dtype=dtype) rst = incubate.softmax_mask_fuse_upper_triangle(input_x) x_in_np = np.random.random((1, 4, 32, 32)).astype(dtype) diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 4f1c37a2424..8618e046893 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -45,6 +45,7 @@ def ref_softmax(x, axis=None, dtype=None): class TestSoftmaxOp(OpTest): + def get_x_shape(self): return [10, 10] @@ -96,19 +97,20 @@ class TestSoftmaxOp(OpTest): max_relative_error=0.01, check_dygraph=(self.use_mkldnn == False)) else: - self.check_grad( - ["X"], - "Out", - max_relative_error=0.01, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad(["X"], + "Out", + max_relative_error=0.01, + check_dygraph=(self.use_mkldnn == False)) class TestSoftmaxOp2(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] class TestSoftmaxOp3(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -117,6 +119,7 @@ class TestSoftmaxOp3(TestSoftmaxOp): class TestSoftmaxOp4(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -125,6 +128,7 @@ class TestSoftmaxOp4(TestSoftmaxOp): class TestSoftmaxOp5(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -133,6 +137,7 @@ class TestSoftmaxOp5(TestSoftmaxOp): class TestSoftmaxOp6(TestSoftmaxOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -143,6 +148,7 @@ class TestSoftmaxOp6(TestSoftmaxOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp(TestSoftmaxOp): + def init_kernel_type(self): self.use_cudnn = True @@ -150,6 +156,7 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -157,6 +164,7 @@ class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp3(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -167,6 +175,7 @@ class TestSoftmaxCUDNNOp3(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp4(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -177,6 +186,7 @@ class TestSoftmaxCUDNNOp4(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -187,6 +197,7 @@ class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp6(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -197,6 +208,7 @@ class TestSoftmaxCUDNNOp6(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp7(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5, 6] @@ -204,6 +216,7 @@ class TestSoftmaxCUDNNOp7(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp8(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5, 6] @@ -214,6 +227,7 @@ class TestSoftmaxCUDNNOp8(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp9(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5, 6] @@ -224,6 +238,7 @@ class TestSoftmaxCUDNNOp9(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp10(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5, 6] @@ -234,6 +249,7 @@ class TestSoftmaxCUDNNOp10(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp11(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5, 6] @@ -244,6 +260,7 @@ class TestSoftmaxCUDNNOp11(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp12(TestSoftmaxCUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5, 6] @@ -254,6 +271,7 @@ class TestSoftmaxCUDNNOp12(TestSoftmaxCUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxFP16Op(TestSoftmaxOp): + def init_kernel_type(self): self.dtype = np.float16 @@ -271,6 +289,7 @@ class TestSoftmaxFP16Op(TestSoftmaxOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxFP16Op2(TestSoftmaxFP16Op): + def get_x_shape(self): return [2, 3, 4, 10] @@ -278,6 +297,7 @@ class TestSoftmaxFP16Op2(TestSoftmaxFP16Op): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp): + def init_kernel_type(self): self.use_cudnn = True self.dtype = np.float16 @@ -292,6 +312,7 @@ class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): + def get_x_shape(self): return [2, 3, 4, 5] @@ -299,6 +320,7 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxBF16Op(OpTest): + def setUp(self): self.op_type = "softmax" self.use_cudnn = self.init_cudnn() @@ -326,30 +348,31 @@ class TestSoftmaxBF16Op(OpTest): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place( - place, check_dygraph=(self.use_mkldnn == False)) + self.check_output_with_place(place, + check_dygraph=(self.use_mkldnn == False)) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ["X"], - "Out", - numeric_grad_delta=0.05, - check_dygraph=(self.use_mkldnn == False)) + self.check_grad_with_place(place, ["X"], + "Out", + numeric_grad_delta=0.05, + check_dygraph=(self.use_mkldnn == False)) @unittest.skipIf( not core.is_compiled_with_cuda() or core.cudnn_version() < 8100, "core is not compiled with CUDA and cudnn version need larger than 8.1.0") class TestSoftmaxBF16CUDNNOp(TestSoftmaxBF16Op): + def init_cudnn(self): return True class TestSoftmaxAPI(unittest.TestCase): + def setUp(self): - self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else paddle.CPUPlace() + self.place = paddle.CUDAPlace( + 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32') self.out_ref = np.apply_along_axis(stable_softmax, -1, self.x_np) self.executed_api() @@ -405,16 +428,19 @@ class TestSoftmaxAPI(unittest.TestCase): # The input type must be Variable. self.assertRaises(TypeError, self.softmax, 1) # The input dtype must be float16, float32, float64. - x_int32 = paddle.fluid.data( - name='x_int32', shape=[2, 3], dtype='int32') + x_int32 = paddle.fluid.data(name='x_int32', + shape=[2, 3], + dtype='int32') self.assertRaises(TypeError, self.softmax, x_int32) # support the input dtype is float16 - x_fp16 = paddle.fluid.data( - name='x_fp16', shape=[2, 3], dtype='float16') + x_fp16 = paddle.fluid.data(name='x_fp16', + shape=[2, 3], + dtype='float16') self.softmax(x_fp16) class TestSoftmaxInplaceAPI(TestSoftmaxAPI): + def executed_api(self): self.softmax = F.softmax_ diff --git a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py index 75d09e3df0c..d4cb658d96a 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py @@ -112,8 +112,10 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): else: axis_dim = self.shape[self.axis] self.shape[self.axis] = 1 - labels = np.random.randint( - 0, axis_dim, self.shape, dtype=self.hard_label_dtype()) + labels = np.random.randint(0, + axis_dim, + self.shape, + dtype=self.hard_label_dtype()) loss = cross_entropy(softmax, labels, self.soft_label, self.axis, self.ignore_index) @@ -145,45 +147,48 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): def test_check_grad(self): if core.is_compiled_with_rocm(): if self.python_api is not None: - self.check_grad( - ["Logits"], - "Loss", - max_relative_error=5e-1, - check_eager=True) + self.check_grad(["Logits"], + "Loss", + max_relative_error=5e-1, + check_eager=True) # HIP will have accuracy fail when using float32 in CPU place self.check_grad(["Logits"], "Loss", max_relative_error=5e-1) else: if self.python_api is not None: - self.check_grad( - ["Logits"], - "Loss", - numeric_grad_delta=0.001, - check_eager=True) + self.check_grad(["Logits"], + "Loss", + numeric_grad_delta=0.001, + check_eager=True) self.check_grad(["Logits"], "Loss", numeric_grad_delta=0.001) class TestSoftmaxWithCrossEntropyOpInt32(TestSoftmaxWithCrossEntropyOp): + def hard_label_dtype(self): return "int32" class TestSoftmaxWithCrossEntropyOpInt16(TestSoftmaxWithCrossEntropyOp): + def hard_label_dtype(self): return "int16" class TestSoftmaxWithCrossEntropyOpInt8(TestSoftmaxWithCrossEntropyOp): + def hard_label_dtype(self): return "int8" class TestSoftmaxWithCrossEntropyOpUInt8(TestSoftmaxWithCrossEntropyOp): + def hard_label_dtype(self): return "uint8" class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_1D( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -199,6 +204,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_1D( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_1D( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -217,6 +223,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_1D( ############################################################################## class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -232,6 +239,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D_Axis2( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -247,6 +255,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D_Axis2( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D_Axis3( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -262,6 +271,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D_Axis3( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D_Axis4( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -286,6 +296,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_SoftLabel_2D_Axis4( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -301,6 +312,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Axis2( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -316,6 +328,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Axis2( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Axis3( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -331,6 +344,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Axis3( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Axis4( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -355,6 +369,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Axis4( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_Ignore( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -370,6 +385,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_Ignore( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_Ignore_Axis( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -385,6 +401,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_Ignore_Axis( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Ignore( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -400,6 +417,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Ignore( class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Ignore_Axis3( TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_core_api_without_softmax @@ -419,6 +437,7 @@ class TestSoftmaxWithCrossEntropyOp_NotWithSoftmax_HardLabel_2D_Ignore_Axis3( class TestSoftmaxWithCrossEntropyOpNoCudnn(TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -435,6 +454,7 @@ class TestSoftmaxWithCrossEntropyOpNoCudnn(TestSoftmaxWithCrossEntropyOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxWithCrossEntropyOpFp16(TestSoftmaxWithCrossEntropyOp): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -482,13 +502,16 @@ class TestSoftmaxWithCrossEntropyOpFp16(TestSoftmaxWithCrossEntropyOp): def test_check_grad(self): if self.python_api is not None: - self.check_grad( - ["Logits"], "Loss", max_relative_error=0.1, check_eager=True) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.1, + check_eager=True) self.check_grad(["Logits"], "Loss", max_relative_error=0.1) -class TestSoftmaxWithCrossEntropyOpNoCudnnFp16( - TestSoftmaxWithCrossEntropyOpFp16): +class TestSoftmaxWithCrossEntropyOpNoCudnnFp16(TestSoftmaxWithCrossEntropyOpFp16 + ): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -502,8 +525,10 @@ class TestSoftmaxWithCrossEntropyOpNoCudnnFp16( def test_check_grad(self): if self.python_api is not None: - self.check_grad( - ["Logits"], "Loss", max_relative_error=0.1, check_eager=True) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.1, + check_eager=True) self.check_grad(["Logits"], "Loss", max_relative_error=0.1) @@ -533,11 +558,10 @@ class TestSoftmaxWithCrossEntropyOp2(TestSoftmaxWithCrossEntropyOp): if core.is_compiled_with_rocm(): # HIP will have accuracy fail when using float32 in CPU place if self.python_api is not None: - self.check_grad( - ["Logits"], - "Loss", - max_relative_error=0.1, - check_eager=True) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.1, + check_eager=True) self.check_grad(["Logits"], "Loss", max_relative_error=0.1) else: if self.python_api is not None: @@ -564,6 +588,7 @@ class TestSoftmaxWithCrossEntropyOp3(TestSoftmaxWithCrossEntropyOp): class TestSoftmaxWithCrossEntropyOp3NoCudnn(TestSoftmaxWithCrossEntropyOp3): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -653,8 +678,8 @@ class TestSoftmaxWithCrossEntropyOpAxis4(TestSoftmaxWithCrossEntropyOp): self.use_softmax = True -class TestSoftmaxWithCrossEntropyOpAxisDimEqualOne( - TestSoftmaxWithCrossEntropyOp): +class TestSoftmaxWithCrossEntropyOpAxisDimEqualOne(TestSoftmaxWithCrossEntropyOp + ): """ Test softmax with cross entropy operator with discreate one-hot labels. Given axis != -1 @@ -675,6 +700,7 @@ class TestSoftmaxWithCrossEntropyOpAxisDimEqualOne( class TestSoftmaxWithCrossEntropyOpNoCudnnFp16Axis1( TestSoftmaxWithCrossEntropyOpNoCudnnFp16): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -690,6 +716,7 @@ class TestSoftmaxWithCrossEntropyOpNoCudnnFp16Axis1( class TestSoftmaxWithCrossEntropyOpNoCudnnFp16Axis2( TestSoftmaxWithCrossEntropyOpNoCudnnFp16): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -705,6 +732,7 @@ class TestSoftmaxWithCrossEntropyOpNoCudnnFp16Axis2( class TestSoftmaxWithCrossEntropyOpNoCudnnFp16Axis3( TestSoftmaxWithCrossEntropyOpNoCudnnFp16): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -718,8 +746,9 @@ class TestSoftmaxWithCrossEntropyOpNoCudnnFp16Axis3( self.use_softmax = True -class TestSoftmaxWithCrossEntropyOpSoftLabelAxis1( - TestSoftmaxWithCrossEntropyOp2): +class TestSoftmaxWithCrossEntropyOpSoftLabelAxis1(TestSoftmaxWithCrossEntropyOp2 + ): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -733,8 +762,9 @@ class TestSoftmaxWithCrossEntropyOpSoftLabelAxis1( self.use_softmax = True -class TestSoftmaxWithCrossEntropyOpSoftLabelAxis2( - TestSoftmaxWithCrossEntropyOp2): +class TestSoftmaxWithCrossEntropyOpSoftLabelAxis2(TestSoftmaxWithCrossEntropyOp2 + ): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -748,8 +778,9 @@ class TestSoftmaxWithCrossEntropyOpSoftLabelAxis2( self.use_softmax = True -class TestSoftmaxWithCrossEntropyOpSoftLabelAxis3( - TestSoftmaxWithCrossEntropyOp2): +class TestSoftmaxWithCrossEntropyOpSoftLabelAxis3(TestSoftmaxWithCrossEntropyOp2 + ): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -763,8 +794,9 @@ class TestSoftmaxWithCrossEntropyOpSoftLabelAxis3( self.use_softmax = True -class TestSoftmaxWithCrossEntropyOpSoftLabelAxis4( - TestSoftmaxWithCrossEntropyOp2): +class TestSoftmaxWithCrossEntropyOpSoftLabelAxis4(TestSoftmaxWithCrossEntropyOp2 + ): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -780,6 +812,7 @@ class TestSoftmaxWithCrossEntropyOpSoftLabelAxis4( class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis1( TestSoftmaxWithCrossEntropyOp3): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -795,6 +828,7 @@ class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis1( class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis2( TestSoftmaxWithCrossEntropyOp3): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -810,6 +844,7 @@ class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis2( class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis3( TestSoftmaxWithCrossEntropyOp3): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api @@ -825,6 +860,7 @@ class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis3( class TestSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4( TestSoftmaxWithCrossEntropyOp3): + def initParams(self): self.op_type = "softmax_with_cross_entropy" self.python_api = python_api diff --git a/python/paddle/fluid/tests/unittests/test_solve_op.py b/python/paddle/fluid/tests/unittests/test_solve_op.py index fd527ec90f2..99c5eb21db4 100644 --- a/python/paddle/fluid/tests/unittests/test_solve_op.py +++ b/python/paddle/fluid/tests/unittests/test_solve_op.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid @@ -27,6 +28,7 @@ from paddle.fluid import Program, program_guard # 2D normal case class TestSolveOp(OpTest): + def config(self): self.input_x_matrix_shape = [15, 15] self.input_y_matrix_shape = [15, 10] @@ -53,8 +55,9 @@ class TestSolveOp(OpTest): self.check_grad(['X', 'Y'], 'Out') -# x broadcast + 3D batch case +# x broadcast + 3D batch case class TestSolveOpBatched_case0(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -75,6 +78,7 @@ class TestSolveOpBatched_case0(OpTest): # 3D batch + y vector case class TestSolveOpBatched_case1(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -95,6 +99,7 @@ class TestSolveOpBatched_case1(OpTest): # 3D batch + y broadcast case class TestSolveOpBatched_case2(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -113,8 +118,9 @@ class TestSolveOpBatched_case2(OpTest): self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.02) -# x broadcast + 3D batch case +# x broadcast + 3D batch case class TestSolveOpBatched_case3(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -133,8 +139,9 @@ class TestSolveOpBatched_case3(OpTest): self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.02) -# 3D normal batch case +# 3D normal batch case class TestSolveOpBatched_case4(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -155,6 +162,7 @@ class TestSolveOpBatched_case4(OpTest): # 4D normal batch case class TestSolveOpBatched_case5(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -175,6 +183,7 @@ class TestSolveOpBatched_case5(OpTest): # 4D batch + y broadcast case class TestSolveOpBatched_case6(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -195,6 +204,7 @@ class TestSolveOpBatched_case6(OpTest): # 5D normal batch case class TestSolveOpBatched_case7(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -215,6 +225,7 @@ class TestSolveOpBatched_case7(OpTest): # 5D batch + y broadcast case class TestSolveOpBatched_case8(OpTest): + def setUp(self): self.op_type = "solve" self.dtype = "float64" @@ -234,16 +245,17 @@ class TestSolveOpBatched_case8(OpTest): class TestSolveOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of solve_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, paddle.linalg.solve, x1, y1) - # The data type of input must be float32 or float64. + # The data type of input must be float32 or float64. x2 = fluid.data(name="x2", shape=[30, 30], dtype="bool") y2 = fluid.data(name="y2", shape=[30, 10], dtype="bool") self.assertRaises(TypeError, paddle.linalg.solve, x2, y2) @@ -273,6 +285,7 @@ class TestSolveOpError(unittest.TestCase): # 2D + vector case, FP64 class TestSolveOpAPI_1(unittest.TestCase): + def setUp(self): np.random.seed(2021) self.place = [paddle.CPUPlace()] @@ -282,10 +295,12 @@ class TestSolveOpAPI_1(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - paddle_input_x = fluid.data( - name="input_x", shape=[3, 3], dtype=self.dtype) - paddle_input_y = fluid.data( - name="input_y", shape=[3], dtype=self.dtype) + paddle_input_x = fluid.data(name="input_x", + shape=[3, 3], + dtype=self.dtype) + paddle_input_y = fluid.data(name="input_y", + shape=[3], + dtype=self.dtype) paddle_result = paddle.linalg.solve(paddle_input_x, paddle_input_y) np_input_x = np.random.random([3, 3]).astype(self.dtype) @@ -294,11 +309,12 @@ class TestSolveOpAPI_1(unittest.TestCase): np_result = np.linalg.solve(np_input_x, np_input_y) exe = fluid.Executor(place) - fetches = exe.run( - fluid.default_main_program(), - feed={"input_x": np_input_x, - "input_y": np_input_y}, - fetch_list=[paddle_result]) + fetches = exe.run(fluid.default_main_program(), + feed={ + "input_x": np_input_x, + "input_y": np_input_y + }, + fetch_list=[paddle_result]) self.assertTrue( np.allclose(fetches[0], np.linalg.solve(np_input_x, np_input_y))) @@ -308,6 +324,7 @@ class TestSolveOpAPI_1(unittest.TestCase): self.check_static_result(place=place) def test_dygraph(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -319,8 +336,8 @@ class TestSolveOpAPI_1(unittest.TestCase): numpy_output = np.linalg.solve(input_x_np, input_y_np) paddle_output = paddle.linalg.solve(tensor_input_x, tensor_input_y) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() @@ -330,6 +347,7 @@ class TestSolveOpAPI_1(unittest.TestCase): # 2D normal case, FP64 class TestSolveOpAPI_2(unittest.TestCase): + def setUp(self): np.random.seed(2021) self.place = [paddle.CPUPlace()] @@ -340,10 +358,12 @@ class TestSolveOpAPI_2(unittest.TestCase): def check_static_result(self, place): paddle.enable_static() with fluid.program_guard(fluid.Program(), fluid.Program()): - paddle_input_x = fluid.data( - name="input_x", shape=[10, 10], dtype=self.dtype) - paddle_input_y = fluid.data( - name="input_y", shape=[10, 4], dtype=self.dtype) + paddle_input_x = fluid.data(name="input_x", + shape=[10, 10], + dtype=self.dtype) + paddle_input_y = fluid.data(name="input_y", + shape=[10, 4], + dtype=self.dtype) paddle_result = paddle.linalg.solve(paddle_input_x, paddle_input_y) np_input_x = np.random.random([10, 10]).astype(self.dtype) @@ -352,11 +372,12 @@ class TestSolveOpAPI_2(unittest.TestCase): np_result = np.linalg.solve(np_input_x, np_input_y) exe = fluid.Executor(place) - fetches = exe.run( - fluid.default_main_program(), - feed={"input_x": np_input_x, - "input_y": np_input_y}, - fetch_list=[paddle_result]) + fetches = exe.run(fluid.default_main_program(), + feed={ + "input_x": np_input_x, + "input_y": np_input_y + }, + fetch_list=[paddle_result]) self.assertTrue( np.allclose(fetches[0], np.linalg.solve(np_input_x, np_input_y))) @@ -366,6 +387,7 @@ class TestSolveOpAPI_2(unittest.TestCase): self.check_static_result(place=place) def test_dygraph(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -377,8 +399,8 @@ class TestSolveOpAPI_2(unittest.TestCase): numpy_output = np.linalg.solve(input_x_np, input_y_np) paddle_output = paddle.linalg.solve(tensor_input_x, tensor_input_y) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() @@ -388,6 +410,7 @@ class TestSolveOpAPI_2(unittest.TestCase): # 2D normal case, FP32 class TestSolveOpAPI_3(unittest.TestCase): + def setUp(self): np.random.seed(2021) self.place = [paddle.CPUPlace()] @@ -398,10 +421,12 @@ class TestSolveOpAPI_3(unittest.TestCase): def check_static_result(self, place): paddle.enable_static() with fluid.program_guard(fluid.Program(), fluid.Program()): - paddle_input_x = fluid.data( - name="input_x", shape=[10, 10], dtype=self.dtype) - paddle_input_y = fluid.data( - name="input_y", shape=[10, 4], dtype=self.dtype) + paddle_input_x = fluid.data(name="input_x", + shape=[10, 10], + dtype=self.dtype) + paddle_input_y = fluid.data(name="input_y", + shape=[10, 4], + dtype=self.dtype) paddle_result = paddle.linalg.solve(paddle_input_x, paddle_input_y) np_input_x = np.random.random([10, 10]).astype(self.dtype) @@ -410,22 +435,23 @@ class TestSolveOpAPI_3(unittest.TestCase): np_result = np.linalg.solve(np_input_x, np_input_y) exe = fluid.Executor(place) - fetches = exe.run( - fluid.default_main_program(), - feed={"input_x": np_input_x, - "input_y": np_input_y}, - fetch_list=[paddle_result]) + fetches = exe.run(fluid.default_main_program(), + feed={ + "input_x": np_input_x, + "input_y": np_input_y + }, + fetch_list=[paddle_result]) self.assertTrue( - np.allclose( - fetches[0], - np.linalg.solve(np_input_x, np_input_y), - rtol=1.e-4)) + np.allclose(fetches[0], + np.linalg.solve(np_input_x, np_input_y), + rtol=1.e-4)) def test_static(self): for place in self.place: self.check_static_result(place=place) def test_dygraph(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -438,8 +464,7 @@ class TestSolveOpAPI_3(unittest.TestCase): numpy_output = np.linalg.solve(input_x_np, input_y_np) paddle_output = paddle.linalg.solve(tensor_input_x, tensor_input_y) self.assertEqual( - np.allclose( - numpy_output, paddle_output.numpy(), rtol=1.e-4), + np.allclose(numpy_output, paddle_output.numpy(), rtol=1.e-4), True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() @@ -450,6 +475,7 @@ class TestSolveOpAPI_3(unittest.TestCase): # 3D + y broadcast case, FP64 class TestSolveOpAPI_4(unittest.TestCase): + def setUp(self): np.random.seed(2021) self.place = [paddle.CPUPlace()] @@ -459,10 +485,12 @@ class TestSolveOpAPI_4(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): - paddle_input_x = fluid.data( - name="input_x", shape=[2, 3, 3], dtype=self.dtype) - paddle_input_y = fluid.data( - name="input_y", shape=[1, 3, 3], dtype=self.dtype) + paddle_input_x = fluid.data(name="input_x", + shape=[2, 3, 3], + dtype=self.dtype) + paddle_input_y = fluid.data(name="input_y", + shape=[1, 3, 3], + dtype=self.dtype) paddle_result = paddle.linalg.solve(paddle_input_x, paddle_input_y) np_input_x = np.random.random([2, 3, 3]).astype(self.dtype) @@ -471,11 +499,12 @@ class TestSolveOpAPI_4(unittest.TestCase): np_result = np.linalg.solve(np_input_x, np_input_y) exe = fluid.Executor(place) - fetches = exe.run( - fluid.default_main_program(), - feed={"input_x": np_input_x, - "input_y": np_input_y}, - fetch_list=[paddle_result]) + fetches = exe.run(fluid.default_main_program(), + feed={ + "input_x": np_input_x, + "input_y": np_input_y + }, + fetch_list=[paddle_result]) self.assertTrue( np.allclose(fetches[0], np.linalg.solve(np_input_x, np_input_y))) @@ -485,6 +514,7 @@ class TestSolveOpAPI_4(unittest.TestCase): self.check_static_result(place=place) def test_dygraph(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -496,8 +526,8 @@ class TestSolveOpAPI_4(unittest.TestCase): numpy_output = np.linalg.solve(input_x_np, input_y_np) paddle_output = paddle.linalg.solve(tensor_input_x, tensor_input_y) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() @@ -526,8 +556,10 @@ class TestSolveOpSingularAPI(unittest.TestCase): exe = fluid.Executor(place) try: fetches = exe.run(fluid.default_main_program(), - feed={"x": input_x_np, - "y": input_y_np}, + feed={ + "x": input_x_np, + "y": input_y_np + }, fetch_list=[result]) except RuntimeError as ex: print("The mat is singular") diff --git a/python/paddle/fluid/tests/unittests/test_sort_op.py b/python/paddle/fluid/tests/unittests/test_sort_op.py index d678aa835d5..2faa2c138d8 100644 --- a/python/paddle/fluid/tests/unittests/test_sort_op.py +++ b/python/paddle/fluid/tests/unittests/test_sort_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestSortOnCPU(unittest.TestCase): + def setUp(self): self.place = core.CPUPlace() @@ -33,10 +34,9 @@ class TestSortOnCPU(unittest.TestCase): input = fluid.data(name="input", shape=[2, 3, 4], dtype="float32") output = paddle.sort(x=input) exe = fluid.Executor(self.place) - data = np.array( - [[[5, 8, 9, 5], [0, 0, 1, 7], [6, 9, 2, 4]], - [[5, 2, 4, 2], [4, 7, 7, 9], [1, 7, 0, 6]]], - dtype='float32') + data = np.array([[[5, 8, 9, 5], [0, 0, 1, 7], [6, 9, 2, 4]], + [[5, 2, 4, 2], [4, 7, 7, 9], [1, 7, 0, 6]]], + dtype='float32') result, = exe.run(feed={'input': data}, fetch_list=[output]) np_result = np.sort(result) self.assertEqual((result == np_result).all(), True) @@ -46,16 +46,16 @@ class TestSortOnCPU(unittest.TestCase): input = fluid.data(name="input", shape=[2, 3, 4], dtype="float32") output = paddle.sort(x=input, axis=1) exe = fluid.Executor(self.place) - data = np.array( - [[[5, 8, 9, 5], [0, 0, 1, 7], [6, 9, 2, 4]], - [[5, 2, 4, 2], [4, 7, 7, 9], [1, 7, 0, 6]]], - dtype='float32') + data = np.array([[[5, 8, 9, 5], [0, 0, 1, 7], [6, 9, 2, 4]], + [[5, 2, 4, 2], [4, 7, 7, 9], [1, 7, 0, 6]]], + dtype='float32') result, = exe.run(feed={'input': data}, fetch_list=[output]) np_result = np.sort(result, axis=1) self.assertEqual((result == np_result).all(), True) class TestSortOnGPU(TestSortOnCPU): + def init_place(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -64,6 +64,7 @@ class TestSortOnGPU(TestSortOnCPU): class TestSortDygraph(unittest.TestCase): + def setUp(self): self.input_data = np.random.rand(10, 10) if core.is_compiled_with_cuda(): @@ -87,9 +88,8 @@ class TestSortDygraph(unittest.TestCase): paddle.disable_static(self.place) var_x = paddle.to_tensor(self.input_data) out = paddle.sort(var_x, axis=-1) - self.assertEqual( - (np.sort( - self.input_data, axis=-1) == out.numpy()).all(), True) + self.assertEqual((np.sort(self.input_data, + axis=-1) == out.numpy()).all(), True) paddle.enable_static() def test_api_1(self): diff --git a/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py b/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py index 75e1c16231c..c4304fa920b 100644 --- a/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py +++ b/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestSpaceToDepthOp(OpTest): + @staticmethod def helper(in_, width, height, channel, batch, blocksize, forward, out_): channel_out = channel // (blocksize * blocksize) @@ -65,17 +66,18 @@ class TestSpaceToDepthOp(OpTest): self.forward = 1 def test_check_output(self): - place = fluid.core.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.core.CPUPlace() + place = fluid.core.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.core.CPUPlace() self.check_output_with_place(place, 1e-5, None, False) def test_check_grad(self): - place = fluid.core.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.core.CPUPlace() + place = fluid.core.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.core.CPUPlace() self.check_grad_with_place(place, ['X'], 'Out') class TestSpaceToDepthOpBasic(TestSpaceToDepthOp): + def init_data(self): self.ori_shape = (32, 8, 6, 6) self.infered_shape = (32, 32, 3, 3) @@ -90,6 +92,7 @@ class TestSpaceToDepthOpBasic(TestSpaceToDepthOp): class TestSpaceToDepthOpDoubleBasic(TestSpaceToDepthOp): + def init_data(self): self.ori_shape = (32, 8, 6, 6) self.infered_shape = (32, 32, 3, 3) @@ -104,6 +107,7 @@ class TestSpaceToDepthOpDoubleBasic(TestSpaceToDepthOp): class TestSpaceToDepthOpWithStride3(TestSpaceToDepthOp): + def init_data(self): self.ori_shape = (32, 9, 6, 6) self.infered_shape = (32, 81, 2, 2) @@ -118,6 +122,7 @@ class TestSpaceToDepthOpWithStride3(TestSpaceToDepthOp): class TestSpaceToDepthOpWithNotSquare(TestSpaceToDepthOp): + def init_data(self): self.ori_shape = (32, 9, 9, 6) self.infered_shape = (32, 81, 3, 2) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py b/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py index c016a482f36..f9e40584ee6 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py @@ -196,6 +196,7 @@ def init_csr_format(batch_size, num_heads, rows, blocksize): "core is not compiled with CUDA and cuda version need larger than or equal to 11.3" ) class TestSparseAttentionOp(OpTest): + def config(self): self.shape = (1, 1, 16, 16) self.blocksize = 4 @@ -272,6 +273,7 @@ class TestSparseAttentionOp(OpTest): class TestSparseAttentionOpFp32Test(TestSparseAttentionOp): + def config(self): self.shape = (1, 1, 8, 16) self.blocksize = 2 @@ -280,6 +282,7 @@ class TestSparseAttentionOpFp32Test(TestSparseAttentionOp): class TestSparseAttentionOpShapeTest(TestSparseAttentionOp): + def config(self): self.shape = (2, 2, 32, 8) self.blocksize = 8 @@ -292,6 +295,7 @@ class TestSparseAttentionOpShapeTest(TestSparseAttentionOp): "core is not compiled with CUDA and cuda version need larger than or equal to 11.3" ) class TestSparseAttentionAPI(unittest.TestCase): + def setUp(self): self.place = paddle.CUDAPlace(0) self.shape = (1, 1, 8, 4) @@ -314,10 +318,12 @@ class TestSparseAttentionAPI(unittest.TestCase): offset_shape = (batch_size, num_heads, rows + 1) columns_shape = (batch_size, num_heads, int(sparse_nnz_num)) - offset = paddle.static.data( - name="Offset", shape=offset_shape, dtype="int32") - columns = paddle.static.data( - name="Columns", shape=columns_shape, dtype="int32") + offset = paddle.static.data(name="Offset", + shape=offset_shape, + dtype="int32") + columns = paddle.static.data(name="Columns", + shape=columns_shape, + dtype="int32") key_padding_mask_shape = (self.shape[0], self.shape[2]) attn_mask_shape = (self.shape[2], self.shape[2]) if self.use_mask == True: @@ -325,30 +331,33 @@ class TestSparseAttentionAPI(unittest.TestCase): name="KeyPaddingMask", shape=key_padding_mask_shape, dtype=self.dtype) - attn_mask = paddle.static.data( - name="AttnMask", shape=attn_mask_shape, dtype=self.dtype) - Out = F.sparse_attention( - Q, - K, - V, - offset, - columns, - key_padding_mask=key_padding_mask, - attn_mask=attn_mask) + attn_mask = paddle.static.data(name="AttnMask", + shape=attn_mask_shape, + dtype=self.dtype) + Out = F.sparse_attention(Q, + K, + V, + offset, + columns, + key_padding_mask=key_padding_mask, + attn_mask=attn_mask) else: Out = F.sparse_attention(Q, K, V, offset, columns) Q_np = np.random.random(self.shape).astype(self.dtype) K_np = np.random.random(self.shape).astype(self.dtype) V_np = np.random.random(self.shape).astype(self.dtype) - offset_np, columns_np = init_csr_format( - self.shape[0], self.shape[1], self.shape[2], self.blocksize) + offset_np, columns_np = init_csr_format(self.shape[0], + self.shape[1], + self.shape[2], + self.blocksize) offset_np = offset_np.astype('int32') columns_np = columns_np.astype('int32') # init mask tensor - key_padding_mask_np = np.random.randint( - 0, 2, size=key_padding_mask_shape) + key_padding_mask_np = np.random.randint(0, + 2, + size=key_padding_mask_shape) attn_mask_np = np.random.randint(0, 2, size=attn_mask_shape) key_padding_mask_np = init_mask(key_padding_mask_np) attn_mask_np = init_mask(attn_mask_np) @@ -388,8 +397,7 @@ class TestSparseAttentionAPI(unittest.TestCase): Q_np, K_np, V_np, offset_np, columns_np) self.assertTrue( - np.allclose( - fetches_result, expected_result, atol=1e-5)) + np.allclose(fetches_result, expected_result, atol=1e-5)) def test_dygraph(self): paddle.disable_static() @@ -419,14 +427,13 @@ class TestSparseAttentionAPI(unittest.TestCase): paddle_attn_mask = paddle.to_tensor(attn_mask, place=self.place) if self.use_mask == True: - paddle_result = F.sparse_attention( - paddle_query, - paddle_key, - paddle_value, - paddle_offset, - paddle_colunmns, - key_padding_mask=paddle_kp_mask, - attn_mask=paddle_attn_mask) + paddle_result = F.sparse_attention(paddle_query, + paddle_key, + paddle_value, + paddle_offset, + paddle_colunmns, + key_padding_mask=paddle_kp_mask, + attn_mask=paddle_attn_mask) numpy_result, __, __ = ref_batch_sparse_attention( query, @@ -442,16 +449,16 @@ class TestSparseAttentionAPI(unittest.TestCase): paddle_value, paddle_offset, paddle_colunmns) - numpy_result, __, __ = ref_batch_sparse_attention(query, key, value, - offset, columns) + numpy_result, __, __ = ref_batch_sparse_attention( + query, key, value, offset, columns) numpy_result = numpy_result.astype(self.dtype) self.assertTrue( - np.allclose( - paddle_result.numpy(), numpy_result, atol=1e-5)) + np.allclose(paddle_result.numpy(), numpy_result, atol=1e-5)) class TestSparseAttentionAPITestFloat(TestSparseAttentionAPI): + def setUp(self): self.place = paddle.CUDAPlace(0) self.shape = (2, 2, 8, 4) @@ -461,6 +468,7 @@ class TestSparseAttentionAPITestFloat(TestSparseAttentionAPI): class TestSparseAttentionAPITestShape1(TestSparseAttentionAPI): + def setUp(self): self.place = paddle.CUDAPlace(0) self.shape = (2, 2, 64, 32) @@ -470,6 +478,7 @@ class TestSparseAttentionAPITestShape1(TestSparseAttentionAPI): class TestSparseAttentionAPITestShape2(TestSparseAttentionAPI): + def setUp(self): self.place = paddle.CUDAPlace(0) self.shape = (2, 1, 64, 32) @@ -479,6 +488,7 @@ class TestSparseAttentionAPITestShape2(TestSparseAttentionAPI): class TestSparseAttentionAPITestShape3(TestSparseAttentionAPI): + def setUp(self): self.place = paddle.CUDAPlace(0) self.shape = (4, 4, 128, 32) @@ -488,6 +498,7 @@ class TestSparseAttentionAPITestShape3(TestSparseAttentionAPI): class TestSparseAttentionAPITestShape4(TestSparseAttentionAPI): + def setUp(self): self.place = paddle.CUDAPlace(0) self.shape = (3, 3, 35, 15) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py index 5634490aa3e..623d1b57b3e 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,11 +22,13 @@ from paddle.fluid.framework import _test_eager_guard class TestSparseConv(unittest.TestCase): + def test_conv3d(self): with _test_eager_guard(): kernel = [[[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]] - dense_kernel = paddle.to_tensor( - kernel, dtype='float32', stop_gradient=False) + dense_kernel = paddle.to_tensor(kernel, + dtype='float32', + stop_gradient=False) dense_kernel = paddle.reshape(dense_kernel, [1, 3, 3, 1, 1]) paddings = [0, 0, 0] strides = [1, 1, 1] @@ -44,8 +46,7 @@ class TestSparseConv(unittest.TestCase): out = paddle.incubate.sparse.nn.functional.conv3d( sparse_input, dense_kernel, - bias=paddle.to_tensor( - bias, dtype='float32'), + bias=paddle.to_tensor(bias, dtype='float32'), stride=strides, padding=paddings, dilation=dilations, @@ -64,8 +65,8 @@ class TestSparseConv(unittest.TestCase): sparse_x = paddle.incubate.sparse.sparse_coo_tensor( indices, values, dense_shape, stop_gradient=True) weight = paddle.randn((1, 3, 3, 1, 1), dtype='float32') - y = paddle.incubate.sparse.nn.functional.subm_conv3d(sparse_x, - weight) + y = paddle.incubate.sparse.nn.functional.subm_conv3d( + sparse_x, weight) assert np.array_equal(sparse_x.indices().numpy(), y.indices().numpy()) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py b/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py index 9cf5eace71b..f8bc93f2703 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class TestSparseCopy(unittest.TestCase): + def test_copy_sparse_coo(self): with _test_eager_guard(): np_x = [[0, 1.0, 0], [2.0, 0, 0], [0, 3.0, 0]] diff --git a/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py b/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py index 033dbd250ed..b71a34f9dfd 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -63,6 +63,7 @@ def calculate_sparse_momentum_by_numpy(param, class TestSparseMomentumOp(OpTest): + def setUp(self): self.op_type = "sparse_momentum" self.dtype = np.float32 @@ -85,18 +86,16 @@ class TestSparseMomentumOp(OpTest): grad = np.random.random( (self.batch_size, self.num_classes)).astype(self.dtype) if self.axis == 0: - index = np.random.randint( - 0, - self.batch_size, - size=(self.batch_size // 2, ), - dtype=self.index_dtype) + index = np.random.randint(0, + self.batch_size, + size=(self.batch_size // 2, ), + dtype=self.index_dtype) grad = grad[index] else: - index = np.random.randint( - 0, - self.num_classes, - size=(self.num_classes // 2, ), - dtype=self.index_dtype) + index = np.random.randint(0, + self.num_classes, + size=(self.num_classes // 2, ), + dtype=self.index_dtype) grad = grad[:, index] velocity = np.random.random( (self.batch_size, self.num_classes)).astype(self.dtype) @@ -128,19 +127,25 @@ class TestSparseMomentumOp(OpTest): } self.inputs = { - 'Param': param.astype("float16") if self.multi_precision else param, - 'Velocity': velocity.astype("float32") - if self.multi_precision else velocity, - 'LearningRate': learning_rate.astype("float32") + 'Param': + param.astype("float16") if self.multi_precision else param, + 'Velocity': + velocity.astype("float32") if self.multi_precision else velocity, + 'LearningRate': + learning_rate.astype("float32") if self.multi_precision else learning_rate, - 'Grad': grad.astype("float16") if self.multi_precision else grad, - 'Index': index, - 'Axis': np.array(self.axis).astype(np.int32), + 'Grad': + grad.astype("float16") if self.multi_precision else grad, + 'Index': + index, + 'Axis': + np.array(self.axis).astype(np.int32), } self.outputs = { - 'ParamOut': param_out.astype("float16") - if self.multi_precision else param_out, - 'VelocityOut': velocity_out.astype("float32") + 'ParamOut': + param_out.astype("float16") if self.multi_precision else param_out, + 'VelocityOut': + velocity_out.astype("float32") if self.multi_precision else velocity_out, } @@ -163,39 +168,45 @@ class TestSparseMomentumOp(OpTest): pass def test_check_output(self): - self.check_output( - atol=5e-3 if self.multi_precision else 1e-5, check_eager=True) + self.check_output(atol=5e-3 if self.multi_precision else 1e-5, + check_eager=True) class TestSparseMomentumOpDtype1(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float32 self.index_dtype = np.int64 class TestSparseMomentumOpDtype2(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float64 self.index_dtype = np.int32 class TestSparseMomentumOpDtype3(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float64 self.index_dtype = np.int64 class TestSparseMomentumOpAxis(TestSparseMomentumOp): + def init_axis(self): self.axis = 1 class TestSparseMomentumOpNesterov(TestSparseMomentumOp): + def init_use_nesterov(self): self.use_nesterov = True class TestSparseMomentumOpMultiPrecision(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float16 self.index_dtype = np.int32 @@ -208,6 +219,7 @@ class TestSparseMomentumOpMultiPrecision(TestSparseMomentumOp): class TestSparseMomentumOpMultiPrecision1(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float16 self.index_dtype = np.int64 @@ -220,6 +232,7 @@ class TestSparseMomentumOpMultiPrecision1(TestSparseMomentumOp): class TestSparseMomentumOpMultiPrecision2(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float16 self.index_dtype = np.int32 @@ -232,6 +245,7 @@ class TestSparseMomentumOpMultiPrecision2(TestSparseMomentumOp): class TestSparseMomentumOpMultiPrecision3(TestSparseMomentumOp): + def init_dtype(self): self.dtype = np.float16 self.index_dtype = np.int64 diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py index cc917e1ab42..8eccefed6ef 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import copy class TestSparseBatchNorm(unittest.TestCase): + def test(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): @@ -48,11 +49,10 @@ class TestSparseBatchNorm(unittest.TestCase): sparse_y = sparse_batch_norm(sparse_x) # compare the result with dense batch_norm - assert np.allclose( - dense_y.flatten().numpy(), - sparse_y.values().flatten().numpy(), - atol=1e-5, - rtol=1e-5) + assert np.allclose(dense_y.flatten().numpy(), + sparse_y.values().flatten().numpy(), + atol=1e-5, + rtol=1e-5) # test backward sparse_y.backward(sparse_y) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py b/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py index c0a43b3dad3..5f6d71008d7 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ import copy class TestMaxPool3DFunc(unittest.TestCase): + def setInput(self): paddle.seed(0) self.dense_x = paddle.randn((1, 4, 4, 4, 4)) @@ -56,12 +57,11 @@ class TestMaxPool3DFunc(unittest.TestCase): out.backward(out) dense_x = copy.deepcopy(self.dense_x) - dense_out = paddle.nn.functional.max_pool3d( - dense_x, - self.kernel_sizes, - stride=self.strides, - padding=self.paddings, - data_format='NDHWC') + dense_out = paddle.nn.functional.max_pool3d(dense_x, + self.kernel_sizes, + stride=self.strides, + padding=self.paddings, + data_format='NDHWC') dense_out.backward(dense_out) #compare with dense @@ -70,11 +70,13 @@ class TestMaxPool3DFunc(unittest.TestCase): class TestStride(TestMaxPool3DFunc): + def setStride(self): self.strides = 1 class TestPadding(TestMaxPool3DFunc): + def setPadding(self): self.paddings = 1 @@ -83,6 +85,7 @@ class TestPadding(TestMaxPool3DFunc): class TestKernelSize(TestMaxPool3DFunc): + def setKernelSize(self): self.kernel_sizes = [5, 5, 5] @@ -92,6 +95,7 @@ class TestKernelSize(TestMaxPool3DFunc): class TestInput(TestMaxPool3DFunc): + def setInput(self): paddle.seed(0) self.dense_x = paddle.randn((2, 6, 7, 9, 3)) @@ -100,6 +104,7 @@ class TestInput(TestMaxPool3DFunc): class TestMaxPool3DAPI(unittest.TestCase): + def test(self): with _test_eager_guard(): dense_x = paddle.randn((2, 3, 6, 6, 3)) @@ -109,8 +114,9 @@ class TestMaxPool3DAPI(unittest.TestCase): out = max_pool3d(sparse_x) out = out.to_dense() - dense_out = paddle.nn.functional.max_pool3d( - dense_x, 3, data_format='NDHWC') + dense_out = paddle.nn.functional.max_pool3d(dense_x, + 3, + data_format='NDHWC') assert np.allclose(dense_out.numpy(), out.numpy()) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py index 85afe10349e..3fd6665b26d 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py @@ -23,6 +23,7 @@ from paddle import _C_ops class TestSparseUnary(unittest.TestCase): + def assert_raises_on_dense_tensor(self, sparse_func): with _test_eager_guard(): dense_x = paddle.ones((2, 3)) @@ -30,12 +31,14 @@ class TestSparseUnary(unittest.TestCase): sparse_func(dense_x) def compare_with_dense( - self, - x, - to_sparse: Callable[[paddle.Tensor], paddle.Tensor], - dense_func: Callable[[paddle.Tensor], paddle.Tensor], - sparse_func: Callable[[paddle.Tensor], paddle.Tensor], - test_gradient: bool, ): + self, + x, + to_sparse: Callable[[paddle.Tensor], paddle.Tensor], + dense_func: Callable[[paddle.Tensor], paddle.Tensor], + sparse_func: Callable[[paddle.Tensor], paddle.Tensor], + test_gradient: bool, + ): + def tensor_allclose(dense_tensor: paddle.Tensor, sparse_tensor: paddle.Tensor): dense_numpy = dense_tensor.numpy() @@ -45,14 +48,16 @@ class TestSparseUnary(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): - dense_x = paddle.to_tensor( - x, dtype="float32", stop_gradient=not test_gradient) + dense_x = paddle.to_tensor(x, + dtype="float32", + stop_gradient=not test_gradient) sparse_x = to_sparse(dense_x) sparse_out = sparse_func(sparse_x) - dense_x = paddle.to_tensor( - x, dtype="float32", stop_gradient=not test_gradient) + dense_x = paddle.to_tensor(x, + dtype="float32", + stop_gradient=not test_gradient) dense_out = dense_func(dense_x) assert tensor_allclose(dense_out, sparse_out) @@ -71,13 +76,15 @@ class TestSparseUnary(unittest.TestCase): lambda x: x.to_sparse_coo(sparse_dim), paddle.nn.ReLU(), paddle.incubate.sparse.nn.ReLU(), - True, ) + True, + ) self.compare_with_dense( x, lambda x: x.to_sparse_csr(), paddle.nn.ReLU(), paddle.incubate.sparse.nn.ReLU(), - False, ) + False, + ) self.assert_raises_on_dense_tensor(paddle.incubate.sparse.nn.ReLU()) def test_sparse_sqrt(self): @@ -88,13 +95,15 @@ class TestSparseUnary(unittest.TestCase): lambda x: x.to_sparse_coo(sparse_dim), paddle.sqrt, paddle.incubate.sparse.sqrt, - True, ) + True, + ) self.compare_with_dense( x, lambda x: x.to_sparse_csr(), paddle.sqrt, paddle.incubate.sparse.sqrt, - False, ) + False, + ) self.assert_raises_on_dense_tensor(paddle.incubate.sparse.sqrt) def test_sparse_sin(self): @@ -105,13 +114,15 @@ class TestSparseUnary(unittest.TestCase): lambda x: x.to_sparse_coo(sparse_dim), paddle.sin, paddle.incubate.sparse.sin, - True, ) + True, + ) self.compare_with_dense( x, lambda x: x.to_sparse_csr(), paddle.sin, paddle.incubate.sparse.sin, - False, ) + False, + ) self.assert_raises_on_dense_tensor(paddle.incubate.sparse.sin) def test_sparse_tanh(self): @@ -122,13 +133,15 @@ class TestSparseUnary(unittest.TestCase): lambda x: x.to_sparse_coo(sparse_dim), paddle.tanh, paddle.incubate.sparse.tanh, - True, ) + True, + ) self.compare_with_dense( x, lambda x: x.to_sparse_csr(), paddle.tanh, paddle.incubate.sparse.tanh, - False, ) + False, + ) self.assert_raises_on_dense_tensor(paddle.incubate.sparse.tanh) diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py index 31f4092666e..5705763e0af 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ devices = ['cpu', 'gpu'] class TestSparseCreate(unittest.TestCase): + def test_create_coo_by_tensor(self): with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] @@ -31,8 +32,10 @@ class TestSparseCreate(unittest.TestCase): dense_shape = [3, 4] dense_indices = paddle.to_tensor(indices) dense_elements = paddle.to_tensor(values, dtype='float32') - coo = paddle.incubate.sparse.sparse_coo_tensor( - dense_indices, dense_elements, dense_shape, stop_gradient=False) + coo = paddle.incubate.sparse.sparse_coo_tensor(dense_indices, + dense_elements, + dense_shape, + stop_gradient=False) # test the to_string.py print(coo) assert np.array_equal(indices, coo.indices().numpy()) @@ -43,8 +46,8 @@ class TestSparseCreate(unittest.TestCase): indices = [[0, 1, 2], [1, 2, 0]] values = [1.0, 2.0, 3.0] dense_shape = [3, 3] - coo = paddle.incubate.sparse.sparse_coo_tensor(indices, values, - dense_shape) + coo = paddle.incubate.sparse.sparse_coo_tensor( + indices, values, dense_shape) assert np.array_equal(indices, coo.indices().numpy()) assert np.array_equal(values, coo.values().numpy()) @@ -71,8 +74,8 @@ class TestSparseCreate(unittest.TestCase): cols = [1, 3, 2, 0, 1] values = [1, 2, 3, 4, 5] dense_shape = [3, 4] - csr = paddle.incubate.sparse.sparse_csr_tensor(crows, cols, values, - dense_shape) + csr = paddle.incubate.sparse.sparse_csr_tensor( + crows, cols, values, dense_shape) # test the to_string.py print(csr) assert np.array_equal(crows, csr.crows().numpy()) @@ -85,8 +88,10 @@ class TestSparseCreate(unittest.TestCase): indices = [[0, 1], [0, 1]] values = [1.0, 2.0] dense_shape = [2, 2] - coo = paddle.incubate.sparse.sparse_coo_tensor( - indices, values, dense_shape, place=place) + coo = paddle.incubate.sparse.sparse_coo_tensor(indices, + values, + dense_shape, + place=place) assert coo.place.is_cpu_place() assert coo.values().place.is_cpu_place() assert coo.indices().place.is_cpu_place() @@ -94,8 +99,10 @@ class TestSparseCreate(unittest.TestCase): crows = [0, 2, 3, 5] cols = [1, 3, 2, 0, 1] values = [1.0, 2.0, 3.0, 4.0, 5.0] - csr = paddle.incubate.sparse.sparse_csr_tensor( - crows, cols, values, [3, 5], place=place) + csr = paddle.incubate.sparse.sparse_csr_tensor(crows, + cols, + values, [3, 5], + place=place) assert csr.place.is_cpu_place() assert csr.crows().place.is_cpu_place() assert csr.cols().place.is_cpu_place() @@ -108,15 +115,19 @@ class TestSparseCreate(unittest.TestCase): dense_shape = [2, 2] indices = paddle.to_tensor(indices, dtype='int32') values = paddle.to_tensor(values, dtype='float32') - coo = paddle.incubate.sparse.sparse_coo_tensor( - indices, values, dense_shape, dtype='float64') + coo = paddle.incubate.sparse.sparse_coo_tensor(indices, + values, + dense_shape, + dtype='float64') assert coo.dtype == paddle.float64 crows = [0, 2, 3, 5] cols = [1, 3, 2, 0, 1] values = [1.0, 2.0, 3.0, 4.0, 5.0] - csr = paddle.incubate.sparse.sparse_csr_tensor( - crows, cols, values, [3, 5], dtype='float16') + csr = paddle.incubate.sparse.sparse_csr_tensor(crows, + cols, + values, [3, 5], + dtype='float16') assert csr.dtype == paddle.float16 def test_create_coo_no_shape(self): @@ -130,6 +141,7 @@ class TestSparseCreate(unittest.TestCase): class TestSparseConvert(unittest.TestCase): + def test_to_sparse_coo(self): with _test_eager_guard(): x = [[0, 1, 0, 2], [0, 0, 3, 0], [4, 5, 0, 0]] @@ -166,7 +178,7 @@ class TestSparseConvert(unittest.TestCase): out_grad = [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]] dense_tensor.backward(paddle.to_tensor(out_grad)) - #mask the out_grad by sparse_x.indices() + #mask the out_grad by sparse_x.indices() correct_x_grad = [2.0, 4.0, 7.0, 9.0, 10.0] assert np.array_equal(correct_x_grad, sparse_x.grad.values().numpy()) @@ -232,14 +244,15 @@ class TestSparseConvert(unittest.TestCase): def test_sparse_coo_tensor_grad(self): with _test_eager_guard(): for device in devices: - if device == 'cpu' or (device == 'gpu' and - paddle.is_compiled_with_cuda()): + if device == 'cpu' or (device == 'gpu' + and paddle.is_compiled_with_cuda()): paddle.device.set_device(device) indices = [[0, 1], [0, 1]] values = [1, 2] indices = paddle.to_tensor(indices, dtype='int32') - values = paddle.to_tensor( - values, dtype='float32', stop_gradient=False) + values = paddle.to_tensor(values, + dtype='float32', + stop_gradient=False) sparse_x = paddle.incubate.sparse.sparse_coo_tensor( indices, values, shape=[2, 2], stop_gradient=False) grad_indices = [[0, 1], [1, 1]] @@ -255,8 +268,9 @@ class TestSparseConvert(unittest.TestCase): # test the non-zero values is a vector values = [[1, 1], [2, 2]] - values = paddle.to_tensor( - values, dtype='float32', stop_gradient=False) + values = paddle.to_tensor(values, + dtype='float32', + stop_gradient=False) sparse_x = paddle.incubate.sparse.sparse_coo_tensor( indices, values, shape=[2, 2, 2], stop_gradient=False) grad_values = [[2, 2], [3, 3]] @@ -271,16 +285,16 @@ class TestSparseConvert(unittest.TestCase): def test_sparse_coo_tensor_sorted(self): with _test_eager_guard(): for device in devices: - if device == 'cpu' or (device == 'gpu' and - paddle.is_compiled_with_cuda()): + if device == 'cpu' or (device == 'gpu' + and paddle.is_compiled_with_cuda()): paddle.device.set_device(device) - #test unsorted and duplicate indices + #test unsorted and duplicate indices indices = [[1, 0, 0], [0, 1, 1]] values = [1.0, 2.0, 3.0] indices = paddle.to_tensor(indices, dtype='int32') values = paddle.to_tensor(values, dtype='float32') - sparse_x = paddle.incubate.sparse.sparse_coo_tensor(indices, - values) + sparse_x = paddle.incubate.sparse.sparse_coo_tensor( + indices, values) indices_sorted = [[0, 1], [1, 0]] values_sorted = [5.0, 1.0] assert np.array_equal(indices_sorted, @@ -291,8 +305,8 @@ class TestSparseConvert(unittest.TestCase): # test the non-zero values is a vector values = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]] values = paddle.to_tensor(values, dtype='float32') - sparse_x = paddle.incubate.sparse.sparse_coo_tensor(indices, - values) + sparse_x = paddle.incubate.sparse.sparse_coo_tensor( + indices, values) values_sorted = [[5.0, 5.0], [1.0, 1.0]] assert np.array_equal(indices_sorted, sparse_x.indices().numpy()) @@ -301,6 +315,7 @@ class TestSparseConvert(unittest.TestCase): class TestCooError(unittest.TestCase): + def test_small_shape(self): with _test_eager_guard(): with self.assertRaises(ValueError): @@ -317,8 +332,8 @@ class TestCooError(unittest.TestCase): # 2. test the nnz of indices must same as nnz of values indices = [[1, 2], [1, 0]] values = [1, 2, 3] - sparse_x = paddle.incubate.sparse.sparse_coo_tensor(indices, - values) + sparse_x = paddle.incubate.sparse.sparse_coo_tensor( + indices, values) def test_same_dimensions(self): with _test_eager_guard(): @@ -326,19 +341,21 @@ class TestCooError(unittest.TestCase): indices = [[1, 2], [1, 0]] values = [1, 2, 3] shape = [2, 3, 4] - sparse_x = paddle.incubate.sparse.sparse_coo_tensor( - indices, values, shape=shape) + sparse_x = paddle.incubate.sparse.sparse_coo_tensor(indices, + values, + shape=shape) def test_indices_dtype(self): with _test_eager_guard(): with self.assertRaises(TypeError): indices = [[1.0, 2.0], [0, 1]] values = [1, 2] - sparse_x = paddle.incubate.sparse.sparse_coo_tensor(indices, - values) + sparse_x = paddle.incubate.sparse.sparse_coo_tensor( + indices, values) class TestCsrError(unittest.TestCase): + def test_dimension1(self): with _test_eager_guard(): with self.assertRaises(ValueError): diff --git a/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py b/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py index dccc117f6bc..10fcf961f4b 100644 --- a/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py +++ b/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py @@ -27,13 +27,14 @@ from paddle.fluid.dygraph import parallel_helper import multiprocessing # NOTE(chenweihang): Coverage CI is currently not able to count python3 -# unittest, so the unittests here covers some cases that will only be -# executed in the python3 sub-process. +# unittest, so the unittests here covers some cases that will only be +# executed in the python3 sub-process. @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestInitParallelEnv(unittest.TestCase): + def test_check_env_failed(self): os.environ['FLAGS_selected_gpus'] = '0' os.environ['PADDLE_TRAINER_ID'] = '0' @@ -56,6 +57,7 @@ class TestInitParallelEnv(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSpawnAssistMethod(unittest.TestCase): + def test_nprocs_greater_than_device_num_error(self): with self.assertRaises(RuntimeError): _get_subprocess_env_list(nprocs=100, options=dict()) diff --git a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py index 7dd0c762598..a448884df00 100644 --- a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py @@ -52,6 +52,7 @@ def spectral_norm(weight, u, v, dim, power_iters, eps): "because grad is not calculated in power iterations, " "which cannot be checked by python grad unittests") class TestSpectralNormOpNoGrad(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'spectral_norm' @@ -92,6 +93,7 @@ class TestSpectralNormOpNoGrad(OpTest): "because grad is not calculated in power iterations, " "which cannot be checked by python grad unittests") class TestSpectralNormOpNoGrad2(TestSpectralNormOpNoGrad): + def initTestCase(self): self.weight_shape = (2, 3, 3, 3) self.u_shape = (3, ) @@ -102,11 +104,13 @@ class TestSpectralNormOpNoGrad2(TestSpectralNormOpNoGrad): class TestSpectralNormOp(TestSpectralNormOpNoGrad): + def test_check_grad_ignore_uv(self): self.check_grad( ['Weight'], 'Out', - no_grad_set=set(["U", "V"]), ) + no_grad_set=set(["U", "V"]), + ) def initTestCase(self): self.weight_shape = (10, 12) @@ -118,6 +122,7 @@ class TestSpectralNormOp(TestSpectralNormOpNoGrad): class TestSpectralNormOp2(TestSpectralNormOp): + def initTestCase(self): self.weight_shape = (2, 6, 3, 3) self.u_shape = (6, ) @@ -128,6 +133,7 @@ class TestSpectralNormOp2(TestSpectralNormOp): class TestSpectralNormOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): @@ -147,11 +153,13 @@ class TestSpectralNormOpError(unittest.TestCase): class TestDygraphSpectralNormOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): shape = (2, 4, 3, 3) - spectralNorm = fluid.dygraph.nn.SpectralNorm( - shape, dim=1, power_iters=2) + spectralNorm = fluid.dygraph.nn.SpectralNorm(shape, + dim=1, + power_iters=2) def test_Variable(): weight_1 = np.random.random((2, 4)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py index fb401347308..8f238084587 100644 --- a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py @@ -28,6 +28,7 @@ from paddle.fluid.layer_helper import LayerHelper class TestCPULoDTensorArrayOps(unittest.TestCase): + def place(self): return core.CPUPlace() @@ -52,12 +53,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): expect_false = core.LoDTensor() expect_false.set(expect_false_tensor, self.place()) - self.main( - tensor=tensor, - mask=mask, - expect_true=expect_true, - expect_false=expect_false, - expect_out=tensor) + self.main(tensor=tensor, + mask=mask, + expect_true=expect_true, + expect_false=expect_false, + expect_out=tensor) def split_and_merge_lod_tensor_level_0(self, use_merge_lod_infer=False): tensor = core.LoDTensor() @@ -84,13 +84,12 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): expect_false.set(expect_false_tensor, self.place()) expect_false.set_recursive_sequence_lengths(expect_false_lod) - self.main( - tensor=tensor, - mask=mask, - expect_true=expect_true, - expect_false=expect_false, - expect_out=tensor, - use_merge_lod_infer=use_merge_lod_infer) + self.main(tensor=tensor, + mask=mask, + expect_true=expect_true, + expect_false=expect_false, + expect_out=tensor, + use_merge_lod_infer=use_merge_lod_infer) def test_split_and_merge_lod_tensor_1(self): self.split_and_merge_lod_tensor_level_0() @@ -129,31 +128,31 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): helper = LayerHelper('merge_lod_tensor_infer') out = helper.create_variable_for_type_inference( dtype=out_true.dtype) - helper.append_op( - type='merge_lod_tensor_infer', - inputs={ - 'X': x, - 'Mask': y, - 'InTrue': out_true, - 'InFalse': out_false - }, - outputs={'Out': out}, - attrs={'level': level}) + helper.append_op(type='merge_lod_tensor_infer', + inputs={ + 'X': x, + 'Mask': y, + 'InTrue': out_true, + 'InFalse': out_false + }, + outputs={'Out': out}, + attrs={'level': level}) out.persistable = True else: - out = merge_lod_tensor( - in_true=out_true, - in_false=out_false, - mask=y, - x=x, - level=level) + out = merge_lod_tensor(in_true=out_true, + in_false=out_false, + mask=y, + x=x, + level=level) out.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, - feed={'x': tensor, - 'y': mask}, + feed={ + 'x': tensor, + 'y': mask + }, scope=scope, return_numpy=False) @@ -174,20 +173,28 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): + def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): - x = layers.data( - name='x', shape=[1], dtype='float32', stop_gradient=False) - y = layers.data( - name='y', shape=[1], dtype='bool', stop_gradient=False) + x = layers.data(name='x', + shape=[1], + dtype='float32', + stop_gradient=False) + y = layers.data(name='y', + shape=[1], + dtype='bool', + stop_gradient=False) level = 0 out_true, out_false = split_lod_tensor(input=x, mask=y, level=level) - out = merge_lod_tensor( - in_true=out_true, in_false=out_false, mask=y, x=x, level=level) + out = merge_lod_tensor(in_true=out_true, + in_false=out_false, + mask=y, + x=x, + level=level) mean = layers.mean(out) append_backward(mean) @@ -207,14 +214,16 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): g_vars = program.global_block().var(x.name + "@GRAD") g_out = [ - item.sum() - for item in map(np.array, - exe.run(program, - feed={'x': tensor, - 'y': mask}, - fetch_list=[g_vars], - scope=scope, - return_numpy=False)) + item.sum() for item in map( + np.array, + exe.run(program, + feed={ + 'x': tensor, + 'y': mask + }, + fetch_list=[g_vars], + scope=scope, + return_numpy=False)) ] g_out_sum = np.array(g_out).sum() @@ -223,68 +232,78 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): class TestMergeLodTensorOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - input_data = layers.data( - name='x', shape=[1], dtype='float32', stop_gradient=False) - y = layers.data( - name='y', shape=[1], dtype='bool', stop_gradient=False) - x_true = layers.data( - name='x_true', shape=[1], dtype='float32', stop_gradient=False) - x_false = layers.data( - name='x_false', shape=[1], dtype='float32', stop_gradient=False) + input_data = layers.data(name='x', + shape=[1], + dtype='float32', + stop_gradient=False) + y = layers.data(name='y', + shape=[1], + dtype='bool', + stop_gradient=False) + x_true = layers.data(name='x_true', + shape=[1], + dtype='float32', + stop_gradient=False) + x_false = layers.data(name='x_false', + shape=[1], + dtype='float32', + stop_gradient=False) level = 0 def test_x(): - out = merge_lod_tensor( - int_true=x_true, - in_false=x_false, - x=set(), - mask=y, - level=level) + out = merge_lod_tensor(int_true=x_true, + in_false=x_false, + x=set(), + mask=y, + level=level) self.assertRaises(TypeError, test_x) def test_mask(): - out = merge_lod_tensor( - int_true=x_true, - in_false=x_false, - x=input_data, - mask=set(), - level=level) + out = merge_lod_tensor(int_true=x_true, + in_false=x_false, + x=input_data, + mask=set(), + level=level) self.assertRaises(TypeError, test_mask) def test_xtrue(): - out = merge_lod_tensor( - int_true=set(), - in_false=x_false, - x=input_data, - mask=y, - level=level) + out = merge_lod_tensor(int_true=set(), + in_false=x_false, + x=input_data, + mask=y, + level=level) self.assertRaises(TypeError, test_xtrue) def test_xfalse(): - out = merge_lod_tensor( - int_true=x_true, - in_false=set(), - x=input_data, - mask=y, - level=level) + out = merge_lod_tensor(int_true=x_true, + in_false=set(), + x=input_data, + mask=y, + level=level) self.assertRaises(TypeError, test_xfalse) class TestSplitLodTensorWithError(unittest.TestCase): + def test_error(self): main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): - x = layers.data( - name='x', shape=[1], dtype='float32', stop_gradient=False) - y = layers.data( - name='y', shape=[1], dtype='bool', stop_gradient=False) + x = layers.data(name='x', + shape=[1], + dtype='float32', + stop_gradient=False) + y = layers.data(name='y', + shape=[1], + dtype='bool', + stop_gradient=False) level = 0 with self.assertRaises(TypeError): diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index bf3be4080a9..e3f72d7b41c 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestSplitOp(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() @@ -56,6 +57,7 @@ class TestSplitOp(OpTest): # test with attr(num) class TestSplitOp_2(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() @@ -93,6 +95,7 @@ class TestSplitOp_2(OpTest): # attr(axis) is Tensor class TestSplitOp_AxisTensor(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() @@ -129,6 +132,7 @@ class TestSplitOp_AxisTensor(OpTest): # attr(sections) is list containing Tensor class TestSplitOp_SectionsTensor(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() @@ -174,6 +178,7 @@ class TestSplitOp_SectionsTensor(OpTest): class TestSplitOp_unk_section(OpTest): + def setUp(self): self._set_op_type() self.dtype = self.get_dtype() @@ -210,6 +215,7 @@ class TestSplitOp_unk_section(OpTest): class TestSplitByrefOp(OpTest): + def _set_op_type(self): self.op_type = "split_byref" @@ -218,9 +224,11 @@ class TestSplitByrefOp(OpTest): def create_test_fp16(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSplitFp16(parent): + def get_dtype(self): return np.float16 @@ -238,9 +246,11 @@ create_test_fp16(TestSplitOp) def create_test_bf16(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSplitBf16(parent): + def get_dtype(self): return np.uint16 @@ -260,6 +270,7 @@ create_test_bf16(TestSplitOp) class TestSplitAPI(unittest.TestCase): + def test_api(self): input_1 = np.random.random([4, 5, 6]).astype("int32") positive_1_int32 = fluid.layers.fill_constant([1], "int32", 1) @@ -273,16 +284,19 @@ class TestSplitAPI(unittest.TestCase): num_or_sections=[positive_2_int64, positive_1_int32, -1], dim=positive_1_int64) - out_3, out_4, out_5 = fluid.layers.split( - input=x_1, num_or_sections=[2, 1, 2], dim=positive_1_int32) + out_3, out_4, out_5 = fluid.layers.split(input=x_1, + num_or_sections=[2, 1, 2], + dim=positive_1_int32) fluid.layers.split(input=x_2, num_or_sections=2, dim=2) exe = fluid.Executor(place=fluid.CPUPlace()) - [res_0, res_1, res_2, res_3, res_4, res_5] = exe.run( - fluid.default_main_program(), - feed={"x_1": input_1, - "x_2": input_1}, - fetch_list=[out_0, out_1, out_2, out_3, out_4, out_5]) + [res_0, res_1, res_2, res_3, res_4, + res_5] = exe.run(fluid.default_main_program(), + feed={ + "x_1": input_1, + "x_2": input_1 + }, + fetch_list=[out_0, out_1, out_2, out_3, out_4, out_5]) out = np.split(input_1, [2, 3], 1) assert np.array_equal(res_0, out[0]) @@ -294,6 +308,7 @@ class TestSplitAPI(unittest.TestCase): class TestSplitOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The type of axis in split_op should be int or Variable. @@ -332,6 +347,7 @@ class TestSplitOpError(unittest.TestCase): class API_TestSplit(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[4, 6, 6], dtype='float64') @@ -341,8 +357,10 @@ class API_TestSplit(unittest.TestCase): exe = fluid.Executor(place) input1 = np.random.random([4, 6, 6]).astype('float64') input2 = np.array([2]).astype('int32') - r0, r1, r2, = exe.run(feed={"data1": input1, - "data2": input2}, + r0, r1, r2, = exe.run(feed={ + "data1": input1, + "data2": input2 + }, fetch_list=[x0, x1, x2]) ex_x0, ex_x1, ex_x2 = np.split(input1, 3, axis=2) self.assertTrue(np.allclose(ex_x0, r0)) @@ -351,6 +369,7 @@ class API_TestSplit(unittest.TestCase): class API_TestSplit2(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[4, 6, 6], dtype='float64') @@ -367,6 +386,7 @@ class API_TestSplit2(unittest.TestCase): class API_TestSplit3(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.layers.data('data', shape=[-1, 10], dtype='float64') @@ -381,6 +401,7 @@ class API_TestSplit3(unittest.TestCase): class API_TestSplit4(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.layers.data('data', shape=[-1, 10], dtype='float64') @@ -390,8 +411,10 @@ class API_TestSplit4(unittest.TestCase): exe = fluid.Executor(place) input1 = np.random.random([1, 10]).astype('float64') input2 = np.array([7]).astype('int32') - r0, r1 = exe.run(feed={"data": input1, - "index": input2}, + r0, r1 = exe.run(feed={ + "data": input1, + "index": input2 + }, fetch_list=[x0, x1]) ex_x0, ex_x1 = np.split(input1, (3, ), axis=1) self.assertTrue(np.allclose(ex_x0, r0)) @@ -399,6 +422,7 @@ class API_TestSplit4(unittest.TestCase): class API_TestDygraphSplit(unittest.TestCase): + def test_out1(self): with fluid.dygraph.guard(): input_1 = np.random.random([4, 6, 6]).astype("int32") @@ -451,8 +475,9 @@ class API_TestDygraphSplit(unittest.TestCase): # input is a variable which shape is [4, 6, 6] input = paddle.to_tensor(input_1) num1 = paddle.full(shape=[1], fill_value=2, dtype='int32') - x0, x1, x2 = paddle.split( - input, num_or_sections=[num1, 2, 2], axis=1) + x0, x1, x2 = paddle.split(input, + num_or_sections=[num1, 2, 2], + axis=1) x0_out = x0.numpy() x1_out = x1.numpy() x2_out = x2.numpy() @@ -467,8 +492,9 @@ class API_TestDygraphSplit(unittest.TestCase): # input is a variable which shape is [4, 6, 6] input = paddle.to_tensor(input_1) num1 = paddle.full(shape=[1], fill_value=1, dtype='int32') - x0, x1, x2 = paddle.split( - input, num_or_sections=[2, 2, 2], axis=num1) + x0, x1, x2 = paddle.split(input, + num_or_sections=[2, 2, 2], + axis=num1) x0_out = x0.numpy() x1_out = x1.numpy() x2_out = x2.numpy() @@ -479,6 +505,7 @@ class API_TestDygraphSplit(unittest.TestCase): class API_TestEmptySplit(unittest.TestCase): + def test_axis_input_empty_section(self): with fluid.dygraph.guard(): input_1 = np.random.random([8, 6, 6]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_split_program.py b/python/paddle/fluid/tests/unittests/test_split_program.py index 3245e8d997a..ff8348eb719 100644 --- a/python/paddle/fluid/tests/unittests/test_split_program.py +++ b/python/paddle/fluid/tests/unittests/test_split_program.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import numpy as np class TestSplitProgram(unittest.TestCase): + def setUp(self): paddle.enable_static() if paddle.is_compiled_with_cuda(): @@ -31,10 +32,12 @@ class TestSplitProgram(unittest.TestCase): main = paddle.static.Program() startup = paddle.static.Program() with paddle.static.program_guard(main, startup): - image = paddle.static.data( - shape=[batch_size, 3, 224, 224], dtype='float32', name='image') - label = paddle.static.data( - shape=[batch_size, 1], dtype='int64', name='label') + image = paddle.static.data(shape=[batch_size, 3, 224, 224], + dtype='float32', + name='image') + label = paddle.static.data(shape=[batch_size, 1], + dtype='int64', + name='label') model = resnet(pretrained=False) loss_fn = nn.loss.CrossEntropyLoss() @@ -63,9 +66,8 @@ class TestSplitProgram(unittest.TestCase): self.assertEqual(len(vars_actual), len(vars_expected)) for actual, expected in zip(vars_actual, vars_expected): self.assertEqual(actual.shape, expected.shape) - self.assertTrue( - np.array_equal(actual, expected), - '{}\n{}\n'.format(actual, expected)) + self.assertTrue(np.array_equal(actual, expected), + '{}\n{}\n'.format(actual, expected)) def get_places(self): places = [paddle.CPUPlace()] @@ -90,8 +92,10 @@ class TestSplitProgram(unittest.TestCase): exe = paddle.static.Executor(place) image_np = np.random.random(size=image.shape).astype('float32') - label_np = np.random.randint( - low=0, high=1000, dtype='int64', size=label.shape) + label_np = np.random.randint(low=0, + high=1000, + dtype='int64', + size=label.shape) scope = paddle.static.Scope() if not use_split: @@ -99,14 +103,16 @@ class TestSplitProgram(unittest.TestCase): exe.run(startup_prog) for _ in range(batch_num): exe.run(main_prog, - feed={image.name: image_np, - label.name: label_np}) + feed={ + image.name: image_np, + label.name: label_np + }) return self.get_var_values(scope, startup_vars) op_num = len(main_prog.global_block().ops) split_op_indices = [int(op_num / 3.0), int(op_num * 3 / 4.0)] - programs, input_vars, output_vars = split_program(main_prog, - split_op_indices) + programs, input_vars, output_vars = split_program( + main_prog, split_op_indices) op_nums = [0] + split_op_indices + [op_num] op_nums = [op_nums[i + 1] - op_nums[i] for i in range(len(op_nums) - 1)] num_split = len(split_op_indices) + 1 @@ -137,8 +143,8 @@ class TestSplitProgram(unittest.TestCase): for out_name, out_value in zip(output_vars[i], output_var_values): if not out_value._is_initialized(): - tmp_vars[out_name] = np.ndarray(out_value._get_dims( - )).astype('float32') + tmp_vars[out_name] = np.ndarray( + out_value._get_dims()).astype('float32') else: tmp_vars[out_name] = np.array(out_value) diff --git a/python/paddle/fluid/tests/unittests/test_spp_op.py b/python/paddle/fluid/tests/unittests/test_spp_op.py index 4a7ea97cfbd..a4f34c4fcfa 100644 --- a/python/paddle/fluid/tests/unittests/test_spp_op.py +++ b/python/paddle/fluid/tests/unittests/test_spp_op.py @@ -22,6 +22,7 @@ from test_pool2d_op import avg_pool2D_forward_naive class TestSppOp(OpTest): + def setUp(self): self.op_type = "spp" self.init_test_case() @@ -37,13 +38,13 @@ class TestSppOp(OpTest): padding = [0, 0] kernel_size[0] = np.ceil(hsize / bins.astype("double")).astype("int32") - padding[0] = ( - (kernel_size[0] * bins - hsize + 1) / 2).astype("int32") + padding[0] = ((kernel_size[0] * bins - hsize + 1) / + 2).astype("int32") kernel_size[1] = np.ceil(wsize / bins.astype("double")).astype("int32") - padding[1] = ( - (kernel_size[1] * bins - wsize + 1) / 2).astype("int32") + padding[1] = ((kernel_size[1] * bins - wsize + 1) / + 2).astype("int32") out_level = self.pool2D_forward_naive(input, kernel_size, kernel_size, padding) out_level_flatten.append( @@ -53,7 +54,9 @@ class TestSppOp(OpTest): else: output = np.concatenate((output, out_level_flatten[i]), 1) # output = np.concatenate(out_level_flatten.tolist(), 0); - self.inputs = {'X': input.astype('float64'), } + self.inputs = { + 'X': input.astype('float64'), + } self.attrs = { 'pyramid_height': self.pyramid_height, 'pooling_type': self.pool_type @@ -74,6 +77,7 @@ class TestSppOp(OpTest): class TestCase2(TestSppOp): + def init_test_case(self): self.shape = [3, 2, 16, 16] self.pyramid_height = 3 diff --git a/python/paddle/fluid/tests/unittests/test_square_error_cost.py b/python/paddle/fluid/tests/unittests/test_square_error_cost.py index a10d0efe3c8..18d6d58daa5 100644 --- a/python/paddle/fluid/tests/unittests/test_square_error_cost.py +++ b/python/paddle/fluid/tests/unittests/test_square_error_cost.py @@ -24,6 +24,7 @@ from paddle.fluid.executor import Executor class TestSquareErrorCost(unittest.TestCase): + def test_square_error_cost(self): input_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") label_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") @@ -41,15 +42,19 @@ class TestSquareErrorCost(unittest.TestCase): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) result = exe.run(fluid.default_main_program(), - feed={"input": input_val, - "label": label_val}, + feed={ + "input": input_val, + "label": label_val + }, fetch_list=[output]) self.assertTrue(np.isclose(np_result, result).all()) class TestSquareErrorInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): input = [256, 3] label = fluid.data(name='label1', shape=[None, 3], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py b/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py index b964793342e..3f4d376a942 100644 --- a/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py +++ b/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py @@ -20,6 +20,7 @@ from op_test import OpTest class TestSquaredL2DistanceOp_f0(OpTest): + def setUp(self): self.op_type = "squared_l2_distance" self.inputs = { @@ -41,6 +42,7 @@ class TestSquaredL2DistanceOp_f0(OpTest): class TestSquaredL2DistanceOp_f1(OpTest): + def setUp(self): self.op_type = "squared_l2_distance" self.inputs = { @@ -62,6 +64,7 @@ class TestSquaredL2DistanceOp_f1(OpTest): class TestSquaredL2DistanceOp_f2(OpTest): + def setUp(self): self.op_type = "squared_l2_distance" self.inputs = { diff --git a/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py b/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py index 430632ebb87..ee8f7245634 100644 --- a/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py @@ -39,11 +39,13 @@ class TestL2LossOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['X'], 'Out', max_relative_error=self.max_relative_error) + self.check_grad(['X'], + 'Out', + max_relative_error=self.max_relative_error) class TestL2LossDeterministic(unittest.TestCase): + def check_place(self, place): with paddle.fluid.dygraph.guard(place): x_np = np.random.rand(5, 11, 13).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_squeeze2_op.py b/python/paddle/fluid/tests/unittests/test_squeeze2_op.py index 7d7893cfda0..711373165fd 100755 --- a/python/paddle/fluid/tests/unittests/test_squeeze2_op.py +++ b/python/paddle/fluid/tests/unittests/test_squeeze2_op.py @@ -25,6 +25,7 @@ paddle.enable_static() # Correct: General. class TestSqueezeOp(OpTest): + def setUp(self): self.op_type = "squeeze2" self.python_api = paddle.squeeze @@ -56,6 +57,7 @@ class TestSqueezeOp(OpTest): # Correct: There is mins axis. class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = (0, -2) @@ -64,14 +66,16 @@ class TestSqueezeOp1(TestSqueezeOp): # Correct: No axes input. class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) diff --git a/python/paddle/fluid/tests/unittests/test_squeeze_op.py b/python/paddle/fluid/tests/unittests/test_squeeze_op.py index e0e31894cb5..c7a0724d372 100755 --- a/python/paddle/fluid/tests/unittests/test_squeeze_op.py +++ b/python/paddle/fluid/tests/unittests/test_squeeze_op.py @@ -28,12 +28,15 @@ paddle.enable_static() # Correct: General. class TestSqueezeOp(OpTest): + def setUp(self): self.op_type = "squeeze" self.init_test_case() self.inputs = {"X": np.random.random(self.ori_shape).astype("float64")} self.init_attrs() - self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape), } + self.outputs = { + "Out": self.inputs["X"].reshape(self.new_shape), + } def test_check_output(self): self.check_output() @@ -51,6 +54,7 @@ class TestSqueezeOp(OpTest): class TestSqueezeBF16Op(OpTest): + def setUp(self): self.op_type = "squeeze" self.dtype = np.uint16 @@ -78,6 +82,7 @@ class TestSqueezeBF16Op(OpTest): # Correct: There is mins axis. class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 3, 1, 40) self.axes = (0, -2) @@ -86,14 +91,16 @@ class TestSqueezeOp1(TestSqueezeOp): # Correct: No axes input. class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) @@ -102,6 +109,7 @@ class TestSqueezeOp3(TestSqueezeOp): # Correct: The demension of axis is not of size 1 remains unchanged. class TestSqueezeOp4(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, 2) @@ -109,12 +117,13 @@ class TestSqueezeOp4(TestSqueezeOp): class TestSqueezeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): # The input type of softmax_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], paddle.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + paddle.CPUPlace()) self.assertRaises(TypeError, paddle.squeeze, x1) # The input axes of squeeze must be list. x2 = paddle.static.data(name='x2', shape=[4], dtype="int32") @@ -125,6 +134,7 @@ class TestSqueezeOpError(unittest.TestCase): class API_TestSqueeze(unittest.TestCase): + def setUp(self): self.executed_api() @@ -135,8 +145,9 @@ class API_TestSqueeze(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data1 = paddle.static.data( - 'data1', shape=[-1, 1, 10], dtype='float64') + data1 = paddle.static.data('data1', + shape=[-1, 1, 10], + dtype='float64') result_squeeze = self.squeeze(data1, axis=[1]) place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -148,11 +159,13 @@ class API_TestSqueeze(unittest.TestCase): class API_TestStaticSqueeze_(API_TestSqueeze): + def executed_api(self): self.squeeze = paddle.squeeze_ class API_TestDygraphSqueeze(unittest.TestCase): + def setUp(self): self.executed_api() @@ -206,6 +219,7 @@ class API_TestDygraphSqueeze(unittest.TestCase): class API_TestDygraphSqueezeInplace(API_TestDygraphSqueeze): + def executed_api(self): self.squeeze = paddle.squeeze_ diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py index faabcea13ae..6f4e490be6b 100644 --- a/python/paddle/fluid/tests/unittests/test_stack_op.py +++ b/python/paddle/fluid/tests/unittests/test_stack_op.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core class TestStackOpBase(OpTest): + def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) @@ -63,36 +64,43 @@ class TestStackOpBase(OpTest): class TestStackOp1(TestStackOpBase): + def initParameters(self): self.num_inputs = 8 class TestStackOp2(TestStackOpBase): + def initParameters(self): self.num_inputs = 10 class TestStackOp3(TestStackOpBase): + def initParameters(self): self.axis = -1 class TestStackOp4(TestStackOpBase): + def initParameters(self): self.axis = -4 class TestStackOp5(TestStackOpBase): + def initParameters(self): self.axis = 1 class TestStackOp6(TestStackOpBase): + def initParameters(self): self.axis = 3 class TestStackBF16Op(OpTest): + def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) @@ -167,9 +175,8 @@ class TestStackAPIWithLoDTensorArray(unittest.TestCase): exe = fluid.Executor(self.place) res = exe.run(self.program, fetch_list=self.out_var) self.assertTrue( - np.array_equal( - res[0], np.stack( - [self.x] * self.iter_num, axis=self.axis))) + np.array_equal(res[0], + np.stack([self.x] * self.iter_num, axis=self.axis))) class TestTensorStackAPIWithLoDTensorArray(unittest.TestCase): @@ -203,12 +210,12 @@ class TestTensorStackAPIWithLoDTensorArray(unittest.TestCase): exe = fluid.Executor(self.place) res = exe.run(self.program, fetch_list=self.out_var) self.assertTrue( - np.array_equal( - res[0], np.stack( - [self.x] * self.iter_num, axis=self.axis))) + np.array_equal(res[0], + np.stack([self.x] * self.iter_num, axis=self.axis))) class API_test(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[1, 2], dtype='float64') @@ -220,11 +227,12 @@ class API_test(unittest.TestCase): input1 = np.random.random([1, 2]).astype('float64') input2 = np.random.random([1, 2]).astype('float64') input3 = np.random.random([1, 2]).astype('float64') - result, = exe.run( - feed={"data1": input1, - "data2": input2, - "data3": input3}, - fetch_list=[result_stack]) + result, = exe.run(feed={ + "data1": input1, + "data2": input2, + "data3": input3 + }, + fetch_list=[result_stack]) expected_result = np.stack([input1, input2, input3], axis=0) self.assertTrue(np.allclose(expected_result, result)) @@ -235,6 +243,7 @@ class API_test(unittest.TestCase): class API_DygraphTest(unittest.TestCase): + def test_out(self): data1 = np.array([[1.0, 2.0]]) data2 = np.array([[3.0, 4.0]]) diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel.py index 6f2f7408262..ee11b3acaf9 100644 --- a/python/paddle/fluid/tests/unittests/test_static_model_parallel.py +++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestStaticModelParallel(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -34,29 +35,26 @@ class TestStaticModelParallel(TestDistBase): def test_dist_static_model_parallel(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "static_model_parallel_by_row.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("static_model_parallel_by_row.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) def test_dist_static_model_parallel2(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "static_model_parallel_by_col.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("static_model_parallel_by_col.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) def test_dist_static_model_parallel3(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "static_model_parallel_embedding.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("static_model_parallel_embedding.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py index e4ce8e8170f..7675ec7f477 100644 --- a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py +++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestStaticModelParallel(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -34,11 +35,10 @@ class TestStaticModelParallel(TestDistBase): def test_dist_static_model_parallel_fused_feedforward(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "static_model_parallel_fused_attention.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("static_model_parallel_fused_attention.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py index 1a6b637e1b4..cb535ee43da 100644 --- a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py +++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestStaticModelParallel(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False @@ -34,11 +35,10 @@ class TestStaticModelParallel(TestDistBase): def test_dist_static_model_parallel_fused_feedforward(self): import paddle.fluid as fluid if fluid.core.is_compiled_with_cuda(): - self.check_with_place( - "static_model_parallel_fused_feedforward.py", - delta=1e-5, - check_error_log=True, - log_name=flag_name) + self.check_with_place("static_model_parallel_fused_feedforward.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py index 5475fd4a10a..f300b561114 100644 --- a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py @@ -24,6 +24,7 @@ flag_name = os.path.splitext(__file__)[0] class TestStaticModelParallel(TestDistBase): + def _setup_config(self): self._sync_mode = True self._use_reduce = False diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index cfce0bb7d31..9c44785d1c4 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -35,6 +35,7 @@ paddle.enable_static() class SimpleLSTMRNN(fluid.Layer): + def __init__(self, name_scope, hidden_size, @@ -81,23 +82,29 @@ class SimpleLSTMRNN(fluid.Layer): self.hidden_array = [] for i in range(self._num_layers): - pre_hidden = fluid.layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = fluid.layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = fluid.layers.reshape( - pre_hidden, shape=[-1, self._hidden_size]) - pre_cell = fluid.layers.reshape( - pre_cell, shape=[-1, self._hidden_size]) + pre_hidden = fluid.layers.slice(init_hidden, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_cell = fluid.layers.slice(init_cell, + axes=[0], + starts=[i], + ends=[i + 1]) + pre_hidden = fluid.layers.reshape(pre_hidden, + shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape(pre_cell, + shape=[-1, self._hidden_size]) self.hidden_array.append(pre_hidden) self.cell_array.append(pre_cell) res = [] for index in range(self._num_steps): - self._input = fluid.layers.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1]) - self._input = fluid.layers.reshape( - self._input, shape=[-1, self._hidden_size]) + self._input = fluid.layers.slice(input_embedding, + axes=[1], + starts=[index], + ends=[index + 1]) + self._input = fluid.layers.reshape(self._input, + shape=[-1, self._hidden_size]) for k in range(self._num_layers): pre_hidden = self.hidden_array[k] pre_cell = self.cell_array[k] @@ -108,8 +115,9 @@ class SimpleLSTMRNN(fluid.Layer): gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) + i, j, f, o = fluid.layers.split(gate_input, + num_or_sections=4, + dim=-1) c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( i) * fluid.layers.tanh(j) m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) @@ -123,8 +131,8 @@ class SimpleLSTMRNN(fluid.Layer): dropout_prob=self._dropout, dropout_implementation='upscale_in_train') res.append( - fluid.layers.reshape( - self._input, shape=[1, -1, self._hidden_size])) + fluid.layers.reshape(self._input, + shape=[1, -1, self._hidden_size])) real_res = fluid.layers.concat(res, 0) real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) @@ -139,6 +147,7 @@ class SimpleLSTMRNN(fluid.Layer): class PtbModel(fluid.Layer): + def __init__(self, name_scope, hidden_size, @@ -154,13 +163,12 @@ class PtbModel(fluid.Layer): self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - self.full_name(), - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) + self.simple_lstm_rnn = SimpleLSTMRNN(self.full_name(), + hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) self.embedding = paddle.nn.Embedding( num_embeddings=vocab_size, embedding_dim=hidden_size, @@ -198,17 +206,18 @@ class PtbModel(fluid.Layer): x_emb, dropout_prob=self.drop_out, dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( + x_emb, init_h, init_c) rnn_out = fluid.layers.reshape( rnn_out, shape=[-1, self.num_steps, self.hidden_size]) projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) + projection = fluid.layers.reshape(projection, + shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy(logits=projection, + label=label, + soft_label=False) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reduce_sum(loss) @@ -217,9 +226,10 @@ class PtbModel(fluid.Layer): class TestSaveLoadBase(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -234,24 +244,26 @@ class TestSaveLoadBase(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -271,8 +283,8 @@ class TestSaveLoadBase(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -291,8 +303,8 @@ class TestSaveLoadBase(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -305,8 +317,8 @@ class TestSaveLoadBase(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -314,16 +326,17 @@ class TestSaveLoadBase(unittest.TestCase): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) class TestSaveLoadPartial(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -338,24 +351,26 @@ class TestSaveLoadPartial(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -383,8 +398,8 @@ class TestSaveLoadPartial(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -403,8 +418,8 @@ class TestSaveLoadPartial(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -417,8 +432,8 @@ class TestSaveLoadPartial(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -426,17 +441,18 @@ class TestSaveLoadPartial(unittest.TestCase): for var in test_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) fluid.load(test_program, "./test_1.pdmodel", None) class TestSaveLoadSetStateDict(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -451,24 +467,26 @@ class TestSaveLoadSetStateDict(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -488,8 +506,8 @@ class TestSaveLoadSetStateDict(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -508,8 +526,8 @@ class TestSaveLoadSetStateDict(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -522,8 +540,8 @@ class TestSaveLoadSetStateDict(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -531,16 +549,17 @@ class TestSaveLoadSetStateDict(unittest.TestCase): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) class TestProgramStatePartial(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -555,24 +574,26 @@ class TestProgramStatePartial(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -600,8 +621,8 @@ class TestProgramStatePartial(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -620,8 +641,8 @@ class TestProgramStatePartial(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -634,8 +655,8 @@ class TestProgramStatePartial(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -656,8 +677,8 @@ class TestProgramStatePartial(unittest.TestCase): for var in test_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -667,8 +688,8 @@ class TestProgramStatePartial(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -676,8 +697,8 @@ class TestProgramStatePartial(unittest.TestCase): for var in test_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -687,8 +708,8 @@ class TestProgramStatePartial(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -696,8 +717,8 @@ class TestProgramStatePartial(unittest.TestCase): for var in test_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -707,8 +728,8 @@ class TestProgramStatePartial(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -716,16 +737,17 @@ class TestProgramStatePartial(unittest.TestCase): for var in test_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) class TestVariableInit(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_variable_init(self): @@ -758,8 +780,8 @@ class TestVariableInit(unittest.TestCase): place = self.set_place() exe = fluid.Executor(place) - parameter_list = list( - filter(fluid.io.is_parameter, program.list_vars())) + parameter_list = list(filter(fluid.io.is_parameter, + program.list_vars())) fluid.core._create_loaded_parameter(parameter_list, new_scope, exe._default_executor) @@ -794,8 +816,8 @@ class TestVariableInit(unittest.TestCase): base_map = {} for var in program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update base_map[var.name] = t @@ -808,6 +830,7 @@ class TestVariableInit(unittest.TestCase): class TestLoadFromOldInterface(unittest.TestCase): + def setUp(self): if os.path.exists("test_path.pdparams"): os.remove("test_path.pdparams") @@ -816,8 +839,8 @@ class TestLoadFromOldInterface(unittest.TestCase): os.remove("test_static_load_var_list.pdparams") def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_load_from_old_interface(self): seed = 90 @@ -832,24 +855,26 @@ class TestLoadFromOldInterface(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -871,8 +896,8 @@ class TestLoadFromOldInterface(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -891,8 +916,8 @@ class TestLoadFromOldInterface(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -906,8 +931,8 @@ class TestLoadFromOldInterface(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -915,8 +940,8 @@ class TestLoadFromOldInterface(unittest.TestCase): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -947,24 +972,26 @@ class TestLoadFromOldInterface(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -986,8 +1013,8 @@ class TestLoadFromOldInterface(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -1006,8 +1033,8 @@ class TestLoadFromOldInterface(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -1016,7 +1043,7 @@ class TestLoadFromOldInterface(unittest.TestCase): fluid.io.save_persistables(exe, "test_static_load_var_list", main_program) - # set var to zero + # set var to zero var_list = [] for i, var in enumerate(main_program.list_vars()): if isinstance(var, framework.Parameter) or var.persistable: @@ -1025,8 +1052,8 @@ class TestLoadFromOldInterface(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -1034,8 +1061,8 @@ class TestLoadFromOldInterface(unittest.TestCase): var_list_names = [var.name for var in var_list] for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) if var.name in var_list_names: # loaded vars base_t = base_map[var.name] @@ -1046,9 +1073,10 @@ class TestLoadFromOldInterface(unittest.TestCase): class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_load_from_old_interface(self): seed = 90 @@ -1063,24 +1091,26 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -1100,8 +1130,8 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -1120,15 +1150,17 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t #fluid.save(main_program, "./test_1") - fluid.io.save_persistables( - exe, "test_path", main_program, filename="model_single") + fluid.io.save_persistables(exe, + "test_path", + main_program, + filename="model_single") # set var to zero for var in main_program.list_vars(): @@ -1136,8 +1168,8 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -1147,8 +1179,8 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -1166,8 +1198,10 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): fluid.load(main_program, file_model_path, exe, fluid.io.get_program_persistable_vars(main_program)) - fluid.io.save_params( - exe, "test_path", main_program, filename="model_single") + fluid.io.save_params(exe, + "test_path", + main_program, + filename="model_single") with self.assertRaises(RuntimeError): fluid.load(main_program, file_model_path, exe, fluid.io.get_program_persistable_vars(main_program)) @@ -1183,22 +1217,22 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): # check save params, load var_list = get_program_persistable_vars with self.assertRaises(RuntimeError): - temp_var = framework.Variable( - main_program.global_block(), - shape=[1], - name="test_temp_var") + temp_var = framework.Variable(main_program.global_block(), + shape=[1], + name="test_temp_var") all_var_list = list(main_program.list_vars()) fluid.load(main_program, file_model_path, exe, all_var_list + [temp_var]) class TestProgramStateOldSave(unittest.TestCase): + def setUp(self): self.test_dygraph = True def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -1213,24 +1247,26 @@ class TestProgramStateOldSave(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -1258,8 +1294,8 @@ class TestProgramStateOldSave(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -1278,8 +1314,8 @@ class TestProgramStateOldSave(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -1292,8 +1328,8 @@ class TestProgramStateOldSave(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -1345,16 +1381,17 @@ class TestProgramStateOldSave(unittest.TestCase): def check_in_static(self, main_program, base_map): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) class TestProgramStateOldSaveSingleModel(unittest.TestCase): + def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + return fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -1369,24 +1406,26 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -1414,8 +1453,8 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), feed={ @@ -1434,14 +1473,16 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t - fluid.io.save_persistables( - exe, "test_program_2", main_program, filename="model_1") + fluid.io.save_persistables(exe, + "test_program_2", + main_program, + filename="model_1") # set var to zero for var in main_program.list_vars(): @@ -1449,8 +1490,8 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -1462,8 +1503,8 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -1472,9 +1513,9 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): os.path.join("test_program_2", "model_1")) with self.assertRaises(TypeError): - fluid.load_program_state( - os.path.join("test_program_2", "model_1"), - var_list=["str"]) + fluid.load_program_state(os.path.join("test_program_2", + "model_1"), + var_list=["str"]) with self.assertRaises(RuntimeError): fluid.load_program_state( @@ -1486,16 +1527,16 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase): class TestStaticSaveLoadPickle(unittest.TestCase): + def test_pickle_protocol(self): # enable static mode paddle.enable_static() with new_program_scope(): # create network - x = paddle.static.data( - name="static_save_load_large_x", - shape=[None, 10], - dtype='float32') + x = paddle.static.data(name="static_save_load_large_x", + shape=[None, 10], + dtype='float32') z = paddle.static.nn.fc(x, 10, bias_attr=False) place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -1505,8 +1546,8 @@ class TestStaticSaveLoadPickle(unittest.TestCase): base_map = {} for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -1523,7 +1564,9 @@ class TestStaticSaveLoadPickle(unittest.TestCase): with self.assertRaises(ValueError): paddle.fluid.save(prog, path, 5) - protocols = [2, ] + protocols = [ + 2, + ] if sys.version_info.major >= 3 and sys.version_info.minor >= 4: protocols += [3, 4] for protocol in protocols: @@ -1535,16 +1578,16 @@ class TestStaticSaveLoadPickle(unittest.TestCase): var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) self.assertTrue(np.sum(np.abs(new_t)) == 0) paddle.fluid.load(prog, path) for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py b/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py index bc8c3cc5b23..25619aa4a5c 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py @@ -28,6 +28,7 @@ import numpy as np @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") class TestSaveLoadBF16(unittest.TestCase): + def set_place(self): return fluid.CPUPlace() @@ -44,24 +45,26 @@ class TestSaveLoadBF16(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) + ptb_model = PtbModel("ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) place = self.set_place() exe = fluid.Executor(place) sgd = SGDOptimizer(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') + x = fluid.layers.data(name="x", + shape=[-1, num_steps], + dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') + init_hidden = fluid.layers.data(name="init_hidden", + shape=[1], + dtype='float32') + init_cell = fluid.layers.data(name="init_cell", + shape=[1], + dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) @@ -85,8 +88,8 @@ class TestSaveLoadBF16(unittest.TestCase): # slice_op PR(datatypes in model graph are different than datatypes during runtime because of that) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='uint16') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='uint16') + init_cell_data = np.zeros((num_layers, batch_size, hidden_size), + dtype='uint16') fetch_list = [static_loss, static_last_hidden, static_last_cell] out = exe.run(fluid.default_main_program(), @@ -103,8 +106,8 @@ class TestSaveLoadBF16(unittest.TestCase): base_map = {} for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -117,8 +120,8 @@ class TestSaveLoadBF16(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been set to zero self.assertTrue(np.sum(np.abs(new_t)) == 0) @@ -126,8 +129,8 @@ class TestSaveLoadBF16(unittest.TestCase): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load_large.py b/python/paddle/fluid/tests/unittests/test_static_save_load_large.py index 389fc259b55..fdb6a1f2f05 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load_large.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load_large.py @@ -28,15 +28,15 @@ LARGE_PARAM = 2**26 class TestStaticSaveLoadLargeParameters(unittest.TestCase): + def test_large_parameters_static_save(self): # enable static mode paddle.enable_static() with new_program_scope(): # create network - x = paddle.static.data( - name="static_save_load_large_x", - shape=[None, 10], - dtype='float32') + x = paddle.static.data(name="static_save_load_large_x", + shape=[None, 10], + dtype='float32') z = paddle.static.nn.fc(x, LARGE_PARAM, bias_attr=False) place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -46,8 +46,8 @@ class TestStaticSaveLoadLargeParameters(unittest.TestCase): base_map = {} for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) # make sure all the paramerter or optimizer var have been update self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t @@ -62,16 +62,16 @@ class TestStaticSaveLoadLargeParameters(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) self.assertTrue(np.sum(np.abs(new_t)) == 0) paddle.fluid.load(prog, path) for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) @@ -81,16 +81,16 @@ class TestStaticSaveLoadLargeParameters(unittest.TestCase): ten = fluid.global_scope().find_var(var.name).get_tensor() ten.set(np.zeros_like(np.array(ten)), place) - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) self.assertTrue(np.sum(np.abs(new_t)) == 0) program_state = fluid.load_program_state(path) fluid.set_program_state(prog, program_state) for var in prog.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) + new_t = np.array(fluid.global_scope().find_var( + var.name).get_tensor()) base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) diff --git a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py index 2c6d646baf5..0e22905e81d 100644 --- a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py @@ -17,10 +17,12 @@ import unittest class StaticShapeInferrenceTest(unittest.TestCase): + def test_static_graph(self): paddle.enable_static() - data = paddle.fluid.layers.data( - name="x", shape=[-1, 2], dtype='float32') + data = paddle.fluid.layers.data(name="x", + shape=[-1, 2], + dtype='float32') shape = paddle.fluid.layers.shape(data) # shape should be [-1, 2] x = paddle.fluid.layers.uniform_random(shape) self.assertEqual(x.shape, data.shape) diff --git a/python/paddle/fluid/tests/unittests/test_std_layer.py b/python/paddle/fluid/tests/unittests/test_std_layer.py index 2196996afff..4252899eba6 100644 --- a/python/paddle/fluid/tests/unittests/test_std_layer.py +++ b/python/paddle/fluid/tests/unittests/test_std_layer.py @@ -27,6 +27,7 @@ def ref_std(x, axis=None, unbiased=True, keepdim=False): class TestStdAPI(unittest.TestCase): + def setUp(self): self.dtype = 'float64' self.shape = [1, 3, 4, 10] @@ -67,36 +68,43 @@ class TestStdAPI(unittest.TestCase): class TestStdAPI_dtype(TestStdAPI): + def set_attrs(self): self.dtype = 'float32' class TestStdAPI_axis_int(TestStdAPI): + def set_attrs(self): self.axis = 2 class TestStdAPI_axis_list(TestStdAPI): + def set_attrs(self): self.axis = [1, 2] class TestStdAPI_axis_tuple(TestStdAPI): + def set_attrs(self): self.axis = (1, 3) class TestStdAPI_keepdim(TestStdAPI): + def set_attrs(self): self.keepdim = False class TestStdAPI_unbiased(TestStdAPI): + def set_attrs(self): self.unbiased = False class TestStdAPI_alias(unittest.TestCase): + def test_alias(self): paddle.disable_static() x = paddle.to_tensor(np.array([10, 12], 'float32')) @@ -109,6 +117,7 @@ class TestStdAPI_alias(unittest.TestCase): class TestStdError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.fluid.data('X', [2, 3, 4], 'int32') diff --git a/python/paddle/fluid/tests/unittests/test_stft_op.py b/python/paddle/fluid/tests/unittests/test_stft_op.py index 41e950606b3..8110f1d805f 100644 --- a/python/paddle/fluid/tests/unittests/test_stft_op.py +++ b/python/paddle/fluid/tests/unittests/test_stft_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -45,13 +45,14 @@ def frame_from_librosa(x, frame_length, hop_length, axis=-1): def stft_np(x, window, n_fft, hop_length, **kwargs): frames = frame_from_librosa(x, n_fft, hop_length) - frames = np.multiply(frames.transpose([0, 2, 1]), window).transpose( - [0, 2, 1]) + frames = np.multiply(frames.transpose([0, 2, 1]), + window).transpose([0, 2, 1]) res = np.fft.rfft(frames, axis=1) return res class TestStftOp(OpTest): + def setUp(self): self.op_type = "stft" self.shape, self.type, self.attrs = self.initTestCase() @@ -60,8 +61,10 @@ class TestStftOp(OpTest): 'Window': np.hamming(self.attrs['n_fft']).astype(self.type), } self.outputs = { - 'Out': stft_np( - x=self.inputs['X'], window=self.inputs['Window'], **self.attrs) + 'Out': + stft_np(x=self.inputs['X'], + window=self.inputs['Window'], + **self.attrs) } def initTestCase(self): diff --git a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py index 4954cfc97e4..e8d42a2fae8 100644 --- a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py @@ -55,12 +55,14 @@ def strided_slice_native_forward(input, axes, starts, ends, strides): class TestStrideSliceOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'strided_slice' self.python_api = paddle.strided_slice - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) self.inputs = {'Input': self.input} self.outputs = {'Out': self.output} @@ -88,6 +90,7 @@ class TestStrideSliceOp(OpTest): class TestStrideSliceOp1(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(100) self.axes = [0] @@ -98,6 +101,7 @@ class TestStrideSliceOp1(TestStrideSliceOp): class TestStrideSliceOp2(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(100) self.axes = [0] @@ -108,6 +112,7 @@ class TestStrideSliceOp2(TestStrideSliceOp): class TestStrideSliceOp3(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(100) self.axes = [0] @@ -118,6 +123,7 @@ class TestStrideSliceOp3(TestStrideSliceOp): class TestStrideSliceOp4(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 4, 10) self.axes = [0, 1, 2] @@ -128,6 +134,7 @@ class TestStrideSliceOp4(TestStrideSliceOp): class TestStrideSliceOp5(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(5, 5, 5) self.axes = [0, 1, 2] @@ -138,6 +145,7 @@ class TestStrideSliceOp5(TestStrideSliceOp): class TestStrideSliceOp6(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(5, 5, 5) self.axes = [0, 1, 2] @@ -148,6 +156,7 @@ class TestStrideSliceOp6(TestStrideSliceOp): class TestStrideSliceOp7(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(5, 5, 5) self.axes = [0, 1, 2] @@ -158,6 +167,7 @@ class TestStrideSliceOp7(TestStrideSliceOp): class TestStrideSliceOp8(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(1, 100, 1) self.axes = [1] @@ -168,6 +178,7 @@ class TestStrideSliceOp8(TestStrideSliceOp): class TestStrideSliceOp9(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(1, 100, 1) self.axes = [1] @@ -178,6 +189,7 @@ class TestStrideSliceOp9(TestStrideSliceOp): class TestStrideSliceOp10(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(10, 10) self.axes = [0, 1] @@ -188,6 +200,7 @@ class TestStrideSliceOp10(TestStrideSliceOp): class TestStrideSliceOp11(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4) self.axes = [0, 1, 2, 3] @@ -198,6 +211,7 @@ class TestStrideSliceOp11(TestStrideSliceOp): class TestStrideSliceOp12(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4, 5) self.axes = [0, 1, 2, 3, 4] @@ -208,6 +222,7 @@ class TestStrideSliceOp12(TestStrideSliceOp): class TestStrideSliceOp13(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 6, 7, 8) self.axes = [0, 1, 2, 3, 4, 5] @@ -218,6 +233,7 @@ class TestStrideSliceOp13(TestStrideSliceOp): class TestStrideSliceOp14(TestStrideSliceOp): + def initTestCase(self): self.input = np.random.rand(4, 4, 4, 4) self.axes = [1, 2, 3] @@ -228,11 +244,13 @@ class TestStrideSliceOp14(TestStrideSliceOp): class TestStrideSliceOpBool(TestStrideSliceOp): + def test_check_grad(self): pass class TestStrideSliceOpBool1D(TestStrideSliceOpBool): + def initTestCase(self): self.input = np.random.rand(100).astype("bool") self.axes = [0] @@ -243,6 +261,7 @@ class TestStrideSliceOpBool1D(TestStrideSliceOpBool): class TestStrideSliceOpBool2D(TestStrideSliceOpBool): + def initTestCase(self): self.input = np.random.rand(10, 10).astype("bool") self.axes = [0, 1] @@ -253,6 +272,7 @@ class TestStrideSliceOpBool2D(TestStrideSliceOpBool): class TestStrideSliceOpBool3D(TestStrideSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 4, 10).astype("bool") self.axes = [0, 1, 2] @@ -263,6 +283,7 @@ class TestStrideSliceOpBool3D(TestStrideSliceOpBool): class TestStrideSliceOpBool4D(TestStrideSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4).astype("bool") self.axes = [0, 1, 2, 3] @@ -273,6 +294,7 @@ class TestStrideSliceOpBool4D(TestStrideSliceOpBool): class TestStrideSliceOpBool5D(TestStrideSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 4, 5).astype("bool") self.axes = [0, 1, 2, 3, 4] @@ -283,6 +305,7 @@ class TestStrideSliceOpBool5D(TestStrideSliceOpBool): class TestStrideSliceOpBool6D(TestStrideSliceOpBool): + def initTestCase(self): self.input = np.random.rand(3, 3, 3, 6, 7, 8).astype("bool") self.axes = [0, 1, 2, 3, 4, 5] @@ -293,6 +316,7 @@ class TestStrideSliceOpBool6D(TestStrideSliceOpBool): class TestStridedSliceOp_starts_ListTensor(OpTest): + def setUp(self): self.op_type = "strided_slice" self.config() @@ -319,8 +343,9 @@ class TestStridedSliceOp_starts_ListTensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [1, -1, 1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) self.starts_infer = [1, 10, 2] @@ -332,6 +357,7 @@ class TestStridedSliceOp_starts_ListTensor(OpTest): class TestStridedSliceOp_ends_ListTensor(OpTest): + def setUp(self): self.op_type = "strided_slice" self.config() @@ -358,8 +384,9 @@ class TestStridedSliceOp_ends_ListTensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 2] self.infer_flags = [1, -1, 1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) self.ends_infer = [3, 1, 4] @@ -371,13 +398,13 @@ class TestStridedSliceOp_ends_ListTensor(OpTest): class TestStridedSliceOp_starts_Tensor(OpTest): + def setUp(self): self.op_type = "strided_slice" self.config() self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32") + "StartsTensor": np.array(self.starts, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { @@ -395,8 +422,9 @@ class TestStridedSliceOp_starts_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output() @@ -406,13 +434,13 @@ class TestStridedSliceOp_starts_Tensor(OpTest): class TestStridedSliceOp_ends_Tensor(OpTest): + def setUp(self): self.op_type = "strided_slice" self.config() self.inputs = { 'Input': self.input, - "EndsTensor": np.array( - self.ends, dtype="int32") + "EndsTensor": np.array(self.ends, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { @@ -430,8 +458,9 @@ class TestStridedSliceOp_ends_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output() @@ -441,6 +470,7 @@ class TestStridedSliceOp_ends_Tensor(OpTest): class TestStridedSliceOp_listTensor_Tensor(OpTest): + def setUp(self): self.config() ends_tensor = [] @@ -451,8 +481,7 @@ class TestStridedSliceOp_listTensor_Tensor(OpTest): self.inputs = { 'Input': self.input, - "StartsTensor": np.array( - self.starts, dtype="int32"), + "StartsTensor": np.array(self.starts, dtype="int32"), "EndsTensorList": ends_tensor } self.outputs = {'Out': self.output} @@ -471,8 +500,9 @@ class TestStridedSliceOp_listTensor_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, 1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output() @@ -482,13 +512,13 @@ class TestStridedSliceOp_listTensor_Tensor(OpTest): class TestStridedSliceOp_strides_Tensor(OpTest): + def setUp(self): self.op_type = "strided_slice" self.config() self.inputs = { 'Input': self.input, - "StridesTensor": np.array( - self.strides, dtype="int32") + "StridesTensor": np.array(self.strides, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { @@ -506,8 +536,9 @@ class TestStridedSliceOp_strides_Tensor(OpTest): self.axes = [0, 1, 2] self.strides = [1, -1, 1] self.infer_flags = [-1, -1, -1] - self.output = strided_slice_native_forward( - self.input, self.axes, self.starts, self.ends, self.strides) + self.output = strided_slice_native_forward(self.input, self.axes, + self.starts, self.ends, + self.strides) def test_check_output(self): self.check_output() @@ -518,42 +549,48 @@ class TestStridedSliceOp_strides_Tensor(OpTest): # Test python API class TestStridedSliceAPI(unittest.TestCase): + def test_1(self): input = np.random.random([3, 4, 5, 6]).astype("float64") minus_1 = fluid.layers.fill_constant([1], "int32", -1) minus_3 = fluid.layers.fill_constant([1], "int32", -3) - starts = fluid.layers.data( - name='starts', shape=[3], dtype='int32', append_batch_size=False) - ends = fluid.layers.data( - name='ends', shape=[3], dtype='int32', append_batch_size=False) - strides = fluid.layers.data( - name='strides', shape=[3], dtype='int32', append_batch_size=False) - - x = fluid.layers.data( - name="x", - shape=[3, 4, 5, 6], - append_batch_size=False, - dtype="float64") - out_1 = paddle.strided_slice( - x, - axes=[0, 1, 2], - starts=[-3, 0, 2], - ends=[3, 100, -1], - strides=[1, 1, 1]) - out_2 = paddle.strided_slice( - x, - axes=[0, 1, 3], - starts=[minus_3, 0, 2], - ends=[3, 100, -1], - strides=[1, 1, 1]) - out_3 = paddle.strided_slice( - x, - axes=[0, 1, 3], - starts=[minus_3, 0, 2], - ends=[3, 100, minus_1], - strides=[1, 1, 1]) - out_4 = paddle.strided_slice( - x, axes=[0, 1, 2], starts=starts, ends=ends, strides=strides) + starts = fluid.layers.data(name='starts', + shape=[3], + dtype='int32', + append_batch_size=False) + ends = fluid.layers.data(name='ends', + shape=[3], + dtype='int32', + append_batch_size=False) + strides = fluid.layers.data(name='strides', + shape=[3], + dtype='int32', + append_batch_size=False) + + x = fluid.layers.data(name="x", + shape=[3, 4, 5, 6], + append_batch_size=False, + dtype="float64") + out_1 = paddle.strided_slice(x, + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[3, 100, -1], + strides=[1, 1, 1]) + out_2 = paddle.strided_slice(x, + axes=[0, 1, 3], + starts=[minus_3, 0, 2], + ends=[3, 100, -1], + strides=[1, 1, 1]) + out_3 = paddle.strided_slice(x, + axes=[0, 1, 3], + starts=[minus_3, 0, 2], + ends=[3, 100, minus_1], + strides=[1, 1, 1]) + out_4 = paddle.strided_slice(x, + axes=[0, 1, 2], + starts=starts, + ends=ends, + strides=strides) out_5 = x[-3:3, 0:100:2, -1:2:-1] out_6 = x[minus_3:3:1, 0:100:2, :, minus_1:2:minus_1] @@ -583,16 +620,19 @@ class TestStridedSliceAPI(unittest.TestCase): starts = [-3, 0, 2] ends = [3, 2, 4] strides_1 = [1, 1, 1] - sliced_1 = paddle.strided_slice( - x, axes=axes, starts=starts, ends=ends, strides=strides_1) + sliced_1 = paddle.strided_slice(x, + axes=axes, + starts=starts, + ends=ends, + strides=strides_1) assert sliced_1.shape == (3, 2, 2, 2) @unittest.skipIf(not paddle.is_compiled_with_cuda(), "Cannot use CUDAPinnedPlace in CPU only version") def test_cuda_pinned_place(self): with paddle.fluid.dygraph.guard(): - x = paddle.to_tensor( - np.random.randn(2, 10), place=paddle.CUDAPinnedPlace()) + x = paddle.to_tensor(np.random.randn(2, 10), + place=paddle.CUDAPinnedPlace()) self.assertTrue(x.place.is_cuda_pinned_place()) y = x[:, ::2] self.assertFalse(x.place.is_cuda_pinned_place()) @@ -600,14 +640,14 @@ class TestStridedSliceAPI(unittest.TestCase): class ArrayLayer(paddle.nn.Layer): + def __init__(self, input_size=224, output_size=10, array_size=1): super(ArrayLayer, self).__init__() self.input_size = input_size self.output_size = output_size self.array_size = array_size for i in range(self.array_size): - setattr(self, - self.create_name(i), + setattr(self, self.create_name(i), paddle.nn.Linear(input_size, output_size)) def create_name(self, index): @@ -664,6 +704,7 @@ class ArrayLayer(paddle.nn.Layer): class TestStridedSliceTensorArray(unittest.TestCase): + def setUp(self): paddle.disable_static() @@ -677,9 +718,9 @@ class TestStridedSliceTensorArray(unittest.TestCase): def is_grads_equal(self, g1, g2): for i, g in enumerate(g1): - self.assertTrue( - self.grad_equal(g, g2[i]), - msg="gradient_1:\n{} \ngradient_2:\n{}".format(g, g2)) + self.assertTrue(self.grad_equal(g, g2[i]), + msg="gradient_1:\n{} \ngradient_2:\n{}".format( + g, g2)) def is_grads_equal_zeros(self, grads): for g in grads: @@ -717,20 +758,23 @@ class TestStridedSliceTensorArray(unittest.TestCase): with paddle.fluid.dygraph.guard(): class Simple(paddle.nn.Layer): + def __init__(self): super(Simple, self).__init__() def forward(self, inps): tensor_array = None for i, tensor in enumerate(inps): - index = paddle.full( - shape=[1], dtype='int64', fill_value=i) + index = paddle.full(shape=[1], + dtype='int64', + fill_value=i) if tensor_array is None: tensor_array = paddle.tensor.array_write( tensor, i=index) else: - paddle.tensor.array_write( - tensor, i=index, array=tensor_array) + paddle.tensor.array_write(tensor, + i=index, + array=tensor_array) array1 = paddle.concat(tensor_array) array2 = paddle.concat(tensor_array[::-1]) @@ -739,14 +783,12 @@ class TestStridedSliceTensorArray(unittest.TestCase): net = Simple() func = paddle.jit.to_static(net.forward) - inps1 = paddle.to_tensor( - np.random.randn(2, 10), - place=paddle.CUDAPinnedPlace(), - stop_gradient=False) - inps2 = paddle.to_tensor( - np.random.randn(2, 10), - place=paddle.CUDAPinnedPlace(), - stop_gradient=False) + inps1 = paddle.to_tensor(np.random.randn(2, 10), + place=paddle.CUDAPinnedPlace(), + stop_gradient=False) + inps2 = paddle.to_tensor(np.random.randn(2, 10), + place=paddle.CUDAPinnedPlace(), + stop_gradient=False) self.assertTrue(inps1.place.is_cuda_pinned_place()) self.assertTrue(inps2.place.is_cuda_pinned_place()) @@ -756,163 +798,191 @@ class TestStridedSliceTensorArray(unittest.TestCase): self.assertFalse(result.place.is_cuda_pinned_place()) def test_strided_slice_tensor_array(self): + class Net01(ArrayLayer): + def array_slice(self, tensors): return tensors[::-1] self.create_case(Net01(array_size=10)) class Net02(ArrayLayer): + def array_slice(self, tensors): return tensors[::-2] self.create_case(Net02(input_size=112, array_size=11)) class Net03(ArrayLayer): + def array_slice(self, tensors): return tensors[::-3] self.create_case(Net03(input_size=112, array_size=9)) class Net04(ArrayLayer): + def array_slice(self, tensors): return tensors[1::-4] self.create_case(Net04(input_size=112, array_size=9)) class Net05(ArrayLayer): + def array_slice(self, tensors): return tensors[:7:-4] self.create_case(Net05(input_size=112, array_size=9)) class Net06(ArrayLayer): + def array_slice(self, tensors): return tensors[8:0:-4] self.create_case(Net06(input_size=112, array_size=9)) class Net07(ArrayLayer): + def array_slice(self, tensors): return tensors[8:1:-4] self.create_case(Net07(input_size=112, array_size=9)) class Net08(ArrayLayer): + def array_slice(self, tensors): return tensors[::2] self.create_case(Net08(input_size=112, array_size=11)) class Net09(ArrayLayer): + def array_slice(self, tensors): return tensors[::3] self.create_case(Net09(input_size=112, array_size=9)) class Net10(ArrayLayer): + def array_slice(self, tensors): return tensors[1::4] self.create_case(Net10(input_size=112, array_size=9)) class Net11(ArrayLayer): + def array_slice(self, tensors): return tensors[:8:4] self.create_case(Net11(input_size=112, array_size=9)) class Net12(ArrayLayer): + def array_slice(self, tensors): return tensors[1:8:4] self.create_case(Net12(input_size=112, array_size=9)) class Net13(ArrayLayer): + def array_slice(self, tensors): return tensors[8:10:4] self.create_case(Net13(input_size=112, array_size=13)) class Net14(ArrayLayer): + def array_slice(self, tensors): return tensors[3:10:4] self.create_case(Net14(input_size=112, array_size=13)) class Net15(ArrayLayer): + def array_slice(self, tensors): return tensors[2:10:4] self.create_case(Net15(input_size=112, array_size=13)) class Net16(ArrayLayer): + def array_slice(self, tensors): return tensors[3:10:3] self.create_case(Net16(input_size=112, array_size=13)) class Net17(ArrayLayer): + def array_slice(self, tensors): return tensors[3:15:3] self.create_case(Net17(input_size=112, array_size=13)) class Net18(ArrayLayer): + def array_slice(self, tensors): return tensors[0:15:3] self.create_case(Net18(input_size=112, array_size=13)) class Net19(ArrayLayer): + def array_slice(self, tensors): return tensors[-1:-5:-3] self.create_case(Net19(input_size=112, array_size=13)) class Net20(ArrayLayer): + def array_slice(self, tensors): return tensors[-1:-6:-3] self.create_case(Net20(input_size=112, array_size=13)) class Net21(ArrayLayer): + def array_slice(self, tensors): return tensors[-3:-6:-3] self.create_case(Net21(input_size=112, array_size=13)) class Net22(ArrayLayer): + def array_slice(self, tensors): return tensors[-5:-1:3] self.create_case(Net22(input_size=112, array_size=13)) class Net23(ArrayLayer): + def array_slice(self, tensors): return tensors[-6:-1:3] self.create_case(Net23(input_size=112, array_size=13)) class Net24(ArrayLayer): + def array_slice(self, tensors): return tensors[-6:-3:3] self.create_case(Net24(input_size=112, array_size=13)) class Net25(ArrayLayer): + def array_slice(self, tensors): return tensors[0::3] self.create_case(Net25(input_size=112, array_size=13)) class Net26(ArrayLayer): + def array_slice(self, tensors): return tensors[-60:20:3] self.create_case(Net26(input_size=112, array_size=13)) class Net27(ArrayLayer): + def array_slice(self, tensors): return tensors[-3:-60:-3] diff --git a/python/paddle/fluid/tests/unittests/test_subtract_op.py b/python/paddle/fluid/tests/unittests/test_subtract_op.py index 7f3738960c5..d7d9d3c8e25 100644 --- a/python/paddle/fluid/tests/unittests/test_subtract_op.py +++ b/python/paddle/fluid/tests/unittests/test_subtract_op.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core class ApiSubtractTest(unittest.TestCase): + def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) @@ -47,8 +48,10 @@ class ApiSubtractTest(unittest.TestCase): data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.subtract(data_x, data_y) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected1)) @@ -58,8 +61,10 @@ class ApiSubtractTest(unittest.TestCase): data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.subtract(data_x, data_z) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "z": self.input_z}, + res, = exe.run(feed={ + "x": self.input_x, + "z": self.input_z + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected2)) @@ -69,8 +74,10 @@ class ApiSubtractTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.subtract(data_a, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"a": self.input_a, - "c": self.input_c}, + res, = exe.run(feed={ + "a": self.input_a, + "c": self.input_c + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected3)) @@ -80,8 +87,10 @@ class ApiSubtractTest(unittest.TestCase): data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.subtract(data_b, data_c) exe = paddle.static.Executor(self.place) - res, = exe.run(feed={"b": self.input_b, - "c": self.input_c}, + res, = exe.run(feed={ + "b": self.input_b, + "c": self.input_c + }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected4)) diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 6f625c09797..9d1a4cf19eb 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -22,13 +22,15 @@ from paddle import enable_static import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.op import Operator -from paddle.fluid.tests.unittests.op_test import ( - OpTest, convert_float_to_uint16, convert_uint16_to_float) +from paddle.fluid.tests.unittests.op_test import (OpTest, + convert_float_to_uint16, + convert_uint16_to_float) from paddle import _C_ops from paddle.fluid.framework import _test_eager_guard class TestSumOp(OpTest): + def setUp(self): self.op_type = "sum" self.init_kernel_type() @@ -53,6 +55,7 @@ class TestSumOp(OpTest): class TestSelectedRowsSumOp(unittest.TestCase): + def setUp(self): self.height = 10 self.row_numel = 12 @@ -144,6 +147,7 @@ class TestSelectedRowsSumOp(unittest.TestCase): class TestSelectedRowsSumOpInt(TestSelectedRowsSumOp): + def init_kernel_type(self): self.dtype = np.int32 @@ -151,6 +155,7 @@ class TestSelectedRowsSumOpInt(TestSelectedRowsSumOp): @unittest.skipIf(not core.supports_bfloat16(), 'place does not support BF16 evaluation') class TestSelectedRowsSumBF16Op(TestSelectedRowsSumOp): + def setUp(self): self.height = 10 self.row_numel = 12 @@ -158,8 +163,8 @@ class TestSelectedRowsSumBF16Op(TestSelectedRowsSumOp): self.dtype = np.uint16 self.init_kernel_type() np.random.seed(12345) - self.data = np.random.random((len(self.rows), - self.row_numel)).astype(np.float32) + self.data = np.random.random( + (len(self.rows), self.row_numel)).astype(np.float32) def _get_array(self, rows, row_numel): if len(rows) > 0: @@ -211,11 +216,13 @@ class TestSelectedRowsSumBF16Op(TestSelectedRowsSumOp): class TestSelectedRowsSumBF16OpBigRow(TestSelectedRowsSumBF16Op): + def init_kernel_type(self): self.row_numel = 102 class TestLoDTensorAndSelectedRowsOp(TestSelectedRowsSumOp): + def setUp(self): self.height = 10 self.row_numel = 12 @@ -246,11 +253,12 @@ class TestLoDTensorAndSelectedRowsOp(TestSelectedRowsSumOp): out_t = np.array(out) self.assertEqual(out_t.shape[0], self.height) self.assertTrue( - np.array_equal(out_t, - self._get_array([i for i in range( - self.height)], self.row_numel) * np.tile( - np.array(result).reshape(self.height, 1), - self.row_numel))) + np.array_equal( + out_t, + self._get_array([i + for i in range(self.height)], self.row_numel) * + np.tile( + np.array(result).reshape(self.height, 1), self.row_numel))) def create_lod_tensor(self, scope, place, var_name): var = scope.var(var_name) @@ -265,6 +273,7 @@ class TestLoDTensorAndSelectedRowsOp(TestSelectedRowsSumOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestFP16SumOp(TestSumOp): + def init_kernel_type(self): self.dtype = np.float16 @@ -282,9 +291,11 @@ class TestFP16SumOp(TestSumOp): def create_test_sum_fp16_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSumFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -301,6 +312,7 @@ def create_test_sum_fp16_class(parent): #----------- test bf16 ----------- class TestSumBF16Op(OpTest): + def setUp(self): self.op_type = "sum" self.init_kernel_type() @@ -326,12 +338,15 @@ class TestSumBF16Op(OpTest): class API_Test_Add_n(unittest.TestCase): + def test_api(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - input0 = fluid.layers.fill_constant( - shape=[2, 3], dtype='int64', value=5) - input1 = fluid.layers.fill_constant( - shape=[2, 3], dtype='int64', value=3) + input0 = fluid.layers.fill_constant(shape=[2, 3], + dtype='int64', + value=5) + input1 = fluid.layers.fill_constant(shape=[2, 3], + dtype='int64', + value=3) expected_result = np.empty((2, 3)) expected_result.fill(8) sum_value = paddle.add_n([input0, input1]) @@ -371,7 +386,9 @@ class API_Test_Add_n(unittest.TestCase): class TestRaiseSumError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.sum([11, 22]) @@ -392,7 +409,9 @@ class TestRaiseSumError(unittest.TestCase): class TestRaiseSumsError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.sums([11, 22]) @@ -428,7 +447,9 @@ class TestRaiseSumsError(unittest.TestCase): class TestSumOpError(unittest.TestCase): + def test_errors(self): + def test_empty_list_input(): with fluid.dygraph.guard(): fluid._C_ops.sum([]) diff --git a/python/paddle/fluid/tests/unittests/test_svd_op.py b/python/paddle/fluid/tests/unittests/test_svd_op.py index c2d712b3d7e..ef9bbae6b81 100644 --- a/python/paddle/fluid/tests/unittests/test_svd_op.py +++ b/python/paddle/fluid/tests/unittests/test_svd_op.py @@ -26,6 +26,7 @@ from decorator_helper import prog_scope class TestSvdOp(OpTest): + def setUp(self): paddle.enable_static() self.generate_input() @@ -102,11 +103,12 @@ class TestSvdCheckGrad2(TestSvdOp): vander matrix must be a full rank matrix. """ self._input_shape = (5, 5) - self._input_data = np.vander( - [2, 3, 4, 5, 6]).astype("float64").reshape(self._input_shape) + self._input_data = np.vander([2, 3, 4, 5, 6]).astype("float64").reshape( + self._input_shape) class TestSvdNormalMatrixSmall(TestSvdCheckGrad2): + def generate_input(self): """ small matrix SVD. """ @@ -115,37 +117,40 @@ class TestSvdNormalMatrixSmall(TestSvdCheckGrad2): class TestSvdNormalMatrix6x3(TestSvdCheckGrad2): + def generate_input(self): """ return a deterministic matrix, the range matrix; vander matrix must be a full rank matrix. """ self._input_shape = (6, 3) - self._input_data = np.array( - [[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0], - [2.0, 4.0, 9.0], [3.0, 6.0, 8.0], - [3.0, 1.0, 0.0]]).astype("float64") + self._input_data = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], + [0.0, 0.0, 6.0], [2.0, 4.0, 9.0], + [3.0, 6.0, 8.0], [3.0, 1.0, + 0.0]]).astype("float64") class TestSvdNormalMatrix3x6(TestSvdCheckGrad2): + def generate_input(self): """ return a deterministic matrix, the range matrix; vander matrix must be a full rank matrix. """ self._input_shape = (3, 6) - self._input_data = np.array( - [[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0], - [2.0, 4.0, 9.0], [3.0, 6.0, 8.0], - [3.0, 1.0, 0.0]]).astype("float64") + self._input_data = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], + [0.0, 0.0, 6.0], [2.0, 4.0, 9.0], + [3.0, 6.0, 8.0], [3.0, 1.0, + 0.0]]).astype("float64") self._input_data = self._input_data.transpose((-1, -2)) class TestSvdNormalMatrix6x3Batched(TestSvdOp): + def generate_input(self): self._input_shape = (10, 6, 3) - self._input_data = np.array( - [[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0], - [2.0, 4.0, 9.0], [3.0, 6.0, 8.0], - [3.0, 1.0, 0.0]]).astype("float64") + self._input_data = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], + [0.0, 0.0, 6.0], [2.0, 4.0, 9.0], + [3.0, 6.0, 8.0], [3.0, 1.0, + 0.0]]).astype("float64") self._input_data = np.stack([self._input_data] * 10, axis=0) def test_svd_forward(self): @@ -155,15 +160,16 @@ class TestSvdNormalMatrix6x3Batched(TestSvdOp): class TestSvdNormalMatrix3x6Batched(TestSvdOp): + def generate_input(self): """ return a deterministic matrix, the range matrix; vander matrix must be a full rank matrix. """ self._input_shape = (10, 3, 6) - self._input_data = np.array( - [[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0], - [2.0, 4.0, 9.0], [3.0, 6.0, 8.0], - [3.0, 1.0, 0.0]]).astype("float64") + self._input_data = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], + [0.0, 0.0, 6.0], [2.0, 4.0, 9.0], + [3.0, 6.0, 8.0], [3.0, 1.0, + 0.0]]).astype("float64") self._input_data = self._input_data.transpose((-1, -2)) self._input_data = np.stack([self._input_data] * 10, axis=0) @@ -174,15 +180,16 @@ class TestSvdNormalMatrix3x6Batched(TestSvdOp): class TestSvdNormalMatrix3x3x3x6Batched(TestSvdOp): + def generate_input(self): """ return a deterministic matrix, the range matrix; vander matrix must be a full rank matrix. """ self._input_shape = (3, 3, 3, 6) - self._input_data = np.array( - [[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], [0.0, 0.0, 6.0], - [2.0, 4.0, 9.0], [3.0, 6.0, 8.0], - [3.0, 1.0, 0.0]]).astype("float64") + self._input_data = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 5.0], + [0.0, 0.0, 6.0], [2.0, 4.0, 9.0], + [3.0, 6.0, 8.0], [3.0, 1.0, + 0.0]]).astype("float64") self._input_data = self._input_data.transpose((-1, -2)) self._input_data = np.stack( [self._input_data, self._input_data, self._input_data], axis=0) @@ -198,6 +205,7 @@ class TestSvdNormalMatrix3x3x3x6Batched(TestSvdOp): @skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + "however it is desirable to cover the forward pass") class TestSvdNormalMatrixBig(TestSvdOp): + def generate_input(self): """ big matrix SVD. @@ -215,6 +223,7 @@ class TestSvdNormalMatrixBig(TestSvdOp): class TestSvdNormalMatrixBig2(TestSvdOp): + def generate_input(self): """ big matrix SVD. """ @@ -223,6 +232,7 @@ class TestSvdNormalMatrixBig2(TestSvdOp): class TestSvdNormalMatrixFullMatrices(unittest.TestCase): + def setUp(self): paddle.disable_static() @@ -242,6 +252,7 @@ class TestSvdNormalMatrixFullMatrices(unittest.TestCase): class TestSvdFullMatriceGrad(TestSvdNormalMatrix6x3): + def get_full_matrices_option(self): return True @@ -260,6 +271,7 @@ class TestSvdFullMatriceGrad(TestSvdNormalMatrix6x3): class TestSvdAPI(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() a = np.random.rand(5, 5) @@ -276,8 +288,9 @@ class TestSvdAPI(unittest.TestCase): for place in places: with fluid.program_guard(fluid.Program(), fluid.Program()): a = np.random.rand(5, 5) - x = paddle.fluid.data( - name="input", shape=[5, 5], dtype='float64') + x = paddle.fluid.data(name="input", + shape=[5, 5], + dtype='float64') u, s, vh = paddle.linalg.svd(x) exe = fluid.Executor(place) gt_u, gt_s, gt_vh = np.linalg.svd(a, full_matrices=False) diff --git a/python/paddle/fluid/tests/unittests/test_switch.py b/python/paddle/fluid/tests/unittests/test_switch.py index b9f3c804ef3..9d28615f71b 100644 --- a/python/paddle/fluid/tests/unittests/test_switch.py +++ b/python/paddle/fluid/tests/unittests/test_switch.py @@ -24,6 +24,7 @@ from paddle.fluid.framework import default_startup_program class TestSwitch(unittest.TestCase): + def check_switch(self, value): x = layers.fill_constant(shape=[1], dtype='float32', value=value) zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) @@ -31,8 +32,10 @@ class TestSwitch(unittest.TestCase): two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0) three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0) - result = layers.create_global_var( - shape=[1], value=-1.0, dtype='float32', persistable=True) + result = layers.create_global_var(shape=[1], + value=-1.0, + dtype='float32', + persistable=True) with layers.Switch() as switch: with switch.case(layers.less_than(x, zero_var)): @@ -62,16 +65,20 @@ class TestSwitch(unittest.TestCase): class TestSwitchCaseError(unittest.TestCase): + def test_error(self): main_program = framework.Program() startup_program = framework.Program() with framework.program_guard(main_program, startup_program): cond = layers.fill_constant(shape=[1], dtype='float32', value=0.0) - zero_var = layers.fill_constant( - shape=[1], dtype='float32', value=0.0) - - result = layers.create_global_var( - shape=[1], value=-1.0, dtype='float32', persistable=True) + zero_var = layers.fill_constant(shape=[1], + dtype='float32', + value=0.0) + + result = layers.create_global_var(shape=[1], + value=-1.0, + dtype='float32', + persistable=True) # 1. The type of 'condition' in case must be Variable. def test_condition_type(): diff --git a/python/paddle/fluid/tests/unittests/test_switch_autotune.py b/python/paddle/fluid/tests/unittests/test_switch_autotune.py index 0049a922b91..a22df61ace8 100644 --- a/python/paddle/fluid/tests/unittests/test_switch_autotune.py +++ b/python/paddle/fluid/tests/unittests/test_switch_autotune.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import os class SimpleNet(paddle.nn.Layer): + def __init__(self): super(SimpleNet, self).__init__() self.conv = paddle.nn.Conv2D(1, 2, (3, 3)) @@ -50,6 +51,7 @@ def static_program(net, data): class TestAutoTune(unittest.TestCase): + def set_flags(self, enable_autotune): if paddle.is_compiled_with_cuda(): if enable_autotune: @@ -97,6 +99,7 @@ class TestAutoTune(unittest.TestCase): class TestDygraphAutoTuneStatus(TestAutoTune): + def run_program(self, enable_autotune): self.set_flags(enable_autotune) if enable_autotune: @@ -135,6 +138,7 @@ class TestDygraphAutoTuneStatus(TestAutoTune): class TestStaticAutoTuneStatus(TestAutoTune): + def run_program(self, enable_autotune): paddle.enable_static() @@ -142,8 +146,9 @@ class TestStaticAutoTuneStatus(TestAutoTune): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): - data = paddle.static.data( - name='X', shape=data_shape, dtype='float32') + data = paddle.static.data(name='X', + shape=data_shape, + dtype='float32') net = SimpleNet() loss = static_program(net, data) place = paddle.CUDAPlace(0) if paddle.fluid.core.is_compiled_with_cuda( @@ -188,6 +193,7 @@ class TestStaticAutoTuneStatus(TestAutoTune): class TestAutoTuneAPI(unittest.TestCase): + def test_set_config_warnings(self): with warnings.catch_warnings(record=True) as w: config = {"kernel": {"enable": 1, "tuning_range": 1}} diff --git a/python/paddle/fluid/tests/unittests/test_switch_case.py b/python/paddle/fluid/tests/unittests/test_switch_case.py index 598e415e5fb..814e46fb341 100644 --- a/python/paddle/fluid/tests/unittests/test_switch_case.py +++ b/python/paddle/fluid/tests/unittests/test_switch_case.py @@ -25,7 +25,9 @@ from functools import partial class TestAPISwitchCase(unittest.TestCase): + def test_return_single_var(self): + def fn_1(): return layers.fill_constant(shape=[4, 2], dtype='int32', value=1) @@ -43,68 +45,71 @@ class TestAPISwitchCase(unittest.TestCase): index_5 = layers.fill_constant(shape=[1], dtype='int32', value=5) # call fn_1 - out_0 = layers.switch_case( - branch_index=index_1, branch_fns={1: fn_1, - 2: fn_2, - 3: fn_3}) + out_0 = layers.switch_case(branch_index=index_1, + branch_fns={ + 1: fn_1, + 2: fn_2, + 3: fn_3 + }) # call fn_2 : branch_fns={0: fn_1, 1:fn_2, 2:fn_3} - out_1 = layers.switch_case( - branch_index=index_1, branch_fns=(fn_1, fn_2, fn_3)) + out_1 = layers.switch_case(branch_index=index_1, + branch_fns=(fn_1, fn_2, fn_3)) # call default fn_3 - out_2 = layers.switch_case( - branch_index=index_5, - branch_fns=((1, fn_1), (2, fn_2)), - default=fn_3) + out_2 = layers.switch_case(branch_index=index_5, + branch_fns=((1, fn_1), (2, fn_2)), + default=fn_3) # no default, call fn_2 - out_3 = layers.switch_case( - branch_index=index_2, branch_fns=[(1, fn_1), (2, fn_2)]) + out_3 = layers.switch_case(branch_index=index_2, + branch_fns=[(1, fn_1), (2, fn_2)]) # no default, call fn_2 but branch_index is 5 - out_4 = layers.switch_case( - branch_index=index_5, - branch_fns=[(1, fn_1), (3, fn_2), (2, fn_3)]) + out_4 = layers.switch_case(branch_index=index_5, + branch_fns=[(1, fn_1), (3, fn_2), + (2, fn_3)]) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=[out_0, out_1, out_2, out_3, out_4]) - self.assertTrue( - np.allclose(res[0], 1), - "result is {} but answer is {}".format(res[0], 1)) - self.assertTrue( - np.allclose(res[1], 2), - "result is {} but answer is {}".format(res[0], 2)) - self.assertTrue( - np.allclose(res[2], 3), - "result is {} but answer is {}".format(res[0], 3)) - self.assertTrue( - np.allclose(res[3], 2), - "result is {} but answer is {}".format(res[0], 2)) - self.assertTrue( - np.allclose(res[4], 2), - "result is {} but answer is {}".format(res[0], 2)) + self.assertTrue(np.allclose(res[0], 1), + "result is {} but answer is {}".format(res[0], 1)) + self.assertTrue(np.allclose(res[1], 2), + "result is {} but answer is {}".format(res[0], 2)) + self.assertTrue(np.allclose(res[2], 3), + "result is {} but answer is {}".format(res[0], 3)) + self.assertTrue(np.allclose(res[3], 2), + "result is {} but answer is {}".format(res[0], 2)) + self.assertTrue(np.allclose(res[4], 2), + "result is {} but answer is {}".format(res[0], 2)) def test_return_var_tuple(self): + def fn_1(): - return layers.fill_constant( - shape=[1, 2], dtype='int32', value=1), layers.fill_constant( - shape=[2, 3], dtype='float32', value=2) + return layers.fill_constant(shape=[1, 2], dtype='int32', + value=1), layers.fill_constant( + shape=[2, 3], + dtype='float32', + value=2) def fn_2(): - return layers.fill_constant( - shape=[3, 4], dtype='int32', value=3), layers.fill_constant( - shape=[4, 5], dtype='float32', value=4) + return layers.fill_constant(shape=[3, 4], dtype='int32', + value=3), layers.fill_constant( + shape=[4, 5], + dtype='float32', + value=4) def fn_3(): - return layers.fill_constant( - shape=[5], dtype='int32', value=5), layers.fill_constant( - shape=[5, 6], dtype='float32', value=6) + return layers.fill_constant(shape=[5], dtype='int32', + value=5), layers.fill_constant( + shape=[5, 6], + dtype='float32', + value=6) main_program = Program() startup_program = Program() @@ -113,61 +118,64 @@ class TestAPISwitchCase(unittest.TestCase): out = layers.switch_case(index_1, ((1, fn_1), (2, fn_2)), fn_3) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32))) self.assertTrue( - np.allclose( - np.asarray(ret[1]), np.full((2, 3), 2, np.float32))) + np.allclose(np.asarray(ret[1]), np.full((2, 3), 2, np.float32))) class TestAPISwitchCase_Nested(unittest.TestCase): + def test_nested_switch_case(self): + def fn_1(x=1): - out = layers.switch_case( - branch_index=layers.fill_constant( - shape=[1], dtype='int32', value=x), - branch_fns={ - 1: partial( - layers.fill_constant, shape=[1], dtype='int32', - value=1), - x: partial( - layers.fill_constant, shape=[2], dtype='int32', value=x) - }) + out = layers.switch_case(branch_index=layers.fill_constant( + shape=[1], dtype='int32', value=x), + branch_fns={ + 1: + partial(layers.fill_constant, + shape=[1], + dtype='int32', + value=1), + x: + partial(layers.fill_constant, + shape=[2], + dtype='int32', + value=x) + }) return out def fn_2(x=2): - out = layers.switch_case( - branch_index=layers.fill_constant( - shape=[1], dtype='int32', value=2), - branch_fns={ - 1: partial( - layers.fill_constant, - shape=[4, 3], - dtype='int32', - value=1), - 2: partial( - fn_1, x=x) - }) + out = layers.switch_case(branch_index=layers.fill_constant( + shape=[1], dtype='int32', value=2), + branch_fns={ + 1: + partial(layers.fill_constant, + shape=[4, 3], + dtype='int32', + value=1), + 2: + partial(fn_1, x=x) + }) return out def fn_3(): - out = layers.switch_case( - branch_index=layers.fill_constant( - shape=[1], dtype='int32', value=3), - branch_fns={ - 1: partial( - layers.fill_constant, - shape=[4, 3], - dtype='int32', - value=1), - 3: partial( - fn_2, x=3) - }) + out = layers.switch_case(branch_index=layers.fill_constant( + shape=[1], dtype='int32', value=3), + branch_fns={ + 1: + partial(layers.fill_constant, + shape=[4, 3], + dtype='int32', + value=1), + 3: + partial(fn_2, x=3) + }) return out main_program = Program() @@ -177,43 +185,47 @@ class TestAPISwitchCase_Nested(unittest.TestCase): index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2) index_3 = layers.fill_constant(shape=[1], dtype='int64', value=3) - out_1 = layers.switch_case( - branch_index=index_1, branch_fns={1: fn_1, - 2: fn_2, - 3: fn_3}) - out_2 = layers.switch_case( - branch_index=index_2, branch_fns={1: fn_1, - 2: fn_2, - 3: fn_3}) - - out_3 = layers.switch_case( - branch_index=index_3, branch_fns={1: fn_1, - 2: fn_2, - 3: fn_3}) - - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + out_1 = layers.switch_case(branch_index=index_1, + branch_fns={ + 1: fn_1, + 2: fn_2, + 3: fn_3 + }) + out_2 = layers.switch_case(branch_index=index_2, + branch_fns={ + 1: fn_1, + 2: fn_2, + 3: fn_3 + }) + + out_3 = layers.switch_case(branch_index=index_3, + branch_fns={ + 1: fn_1, + 2: fn_2, + 3: fn_3 + }) + + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, - feed={"index_1": np.array( - [1], dtype="uint8")}, + feed={"index_1": np.array([1], dtype="uint8")}, fetch_list=[out_1, out_2, out_3]) - self.assertTrue( - np.allclose(res[0], 1), - "result is {} but answer is {}".format(res[0], 1)) - self.assertTrue( - np.allclose(res[1], 2), - "result is {} but answer is {}".format(res[1], 2)) - self.assertTrue( - np.allclose(res[2], 3), - "result is {} but answer is {}".format(res[2], 3)) + self.assertTrue(np.allclose(res[0], 1), + "result is {} but answer is {}".format(res[0], 1)) + self.assertTrue(np.allclose(res[1], 2), + "result is {} but answer is {}".format(res[1], 2)) + self.assertTrue(np.allclose(res[2], 3), + "result is {} but answer is {}".format(res[2], 3)) # test TypeError and ValueError of api switch_case class TestAPISwitchCase_Error(unittest.TestCase): + def test_error(self): + def fn_1(): return layers.fill_constant(shape=[4, 2], dtype='int32', value=1) @@ -226,81 +238,82 @@ class TestAPISwitchCase_Error(unittest.TestCase): main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): - key_float32 = layers.fill_constant( - shape=[1], dtype='float32', value=0.23) - key_int32 = layers.fill_constant( - shape=[1], dtype='int32', value=0.23) + key_float32 = layers.fill_constant(shape=[1], + dtype='float32', + value=0.23) + key_int32 = layers.fill_constant(shape=[1], + dtype='int32', + value=0.23) # The type of 'branch_index' in Op(switch_case) must be Variable def type_error_branch_index(): - layers.switch_case( - branch_index=1, branch_fns=[(1, fn_1)], default=fn_3) + layers.switch_case(branch_index=1, + branch_fns=[(1, fn_1)], + default=fn_3) self.assertRaises(TypeError, type_error_branch_index) # The data type of 'branch_index' in Op(switch_case) must be int32, int64 or uint8 def dtype_error_branch_index(): - layers.switch_case( - branch_index=key_float32, - branch_fns=[(1, fn_1)], - default=fn_3) + layers.switch_case(branch_index=key_float32, + branch_fns=[(1, fn_1)], + default=fn_3) self.assertRaises(TypeError, dtype_error_branch_index) # The type of 'branch_fns' in Op(switch_case) must be list, tuple or dict def type_error_branch_fns(): - layers.switch_case( - branch_index=key_int32, branch_fns=1, default=fn_3) + layers.switch_case(branch_index=key_int32, + branch_fns=1, + default=fn_3) self.assertRaises(TypeError, type_error_branch_fns) # The elements' type of 'branch_fns' in Op(switch_case) must be tuple def type_error_index_fn_pair_1(): - layers.switch_case( - branch_index=key_int32, branch_fns=[1], default=fn_3) + layers.switch_case(branch_index=key_int32, + branch_fns=[1], + default=fn_3) self.assertRaises(TypeError, type_error_index_fn_pair_1) # The tuple's size of 'branch_fns' in Op(switch_case) must be 2 def type_error_index_fn_pair_2(): - layers.switch_case( - branch_index=key_int32, - branch_fns=[(1, 2, 3)], - default=fn_3) + layers.switch_case(branch_index=key_int32, + branch_fns=[(1, 2, 3)], + default=fn_3) self.assertRaises(TypeError, type_error_index_fn_pair_2) # The key's type of 'branch_fns' in Op(switch_case) must be int def type_error_key(): - layers.switch_case( - branch_index=key_int32, branch_fns=[(2.3, 2)], default=fn_3) + layers.switch_case(branch_index=key_int32, + branch_fns=[(2.3, 2)], + default=fn_3) self.assertRaises(TypeError, type_error_key) # The key in 'branch_fns' must be unique def value_error_key(): - layers.switch_case( - branch_index=key_int32, - branch_fns=[(2, fn_1), (2, fn_2)], - default=fn_3) + layers.switch_case(branch_index=key_int32, + branch_fns=[(2, fn_1), (2, fn_2)], + default=fn_3) self.assertRaises(ValueError, value_error_key) # The type of function in 'branch_fns' must be callable def type_error_fn(): - layers.switch_case( - branch_index=key_int32, - branch_fns=[(1, 1), (2, fn_2)], - default=fn_3) + layers.switch_case(branch_index=key_int32, + branch_fns=[(1, 1), (2, fn_2)], + default=fn_3) self.assertRaises(TypeError, type_error_fn) # The default in Op(case) must be callable def type_error_default(): - layers.switch_case( - branch_index=key_int32, - branch_fns=[(1, fn_1), (2, fn_2)], - default=1) + layers.switch_case(branch_index=key_int32, + branch_fns=[(1, fn_1), (2, fn_2)], + default=1) self.assertRaises(TypeError, type_error_default) diff --git a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py index 6bf811be2ad..06da617f26f 100644 --- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py @@ -73,11 +73,10 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): use_cudnn = self.dtype == np.float16 with fluid.unique_name.guard(): with fluid.program_guard(main, startup): - data = fluid.layers.data( - name='input', - shape=self.dshape, - dtype=self.dtype, - append_batch_size=False) + data = fluid.layers.data(name='input', + shape=self.dshape, + dtype=self.dtype, + append_batch_size=False) conv = fluid.layers.conv2d( input=data, num_filters=32, @@ -170,8 +169,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase): if sync_bn_val.shape != bn_val.shape: sync_bn_val = sync_bn_val[:bn_val.shape[0]] self.assertTrue( - np.allclose( - bn_val, sync_bn_val, atol=self.atol), + np.allclose(bn_val, sync_bn_val, atol=self.atol), "Output (" + fetch_names[i] + ") has diff. \n" + "\nBN " + str(bn_val) + "\n" + "Sync BN " + str(sync_bn_val)) @@ -211,14 +209,15 @@ class TestFP16SyncBatchNormOpTraining(TestSyncBatchNormOpTraining): class TestDygraphSyncBatchNormAPIError(unittest.TestCase): + def test_errors(self): if not core.is_compiled_with_cuda(): return with program_guard(Program(), Program()): my_sync_batch_norm = paddle.nn.SyncBatchNorm(10) - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CUDAPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.CUDAPlace(0)) self.assertRaises(TypeError, my_sync_batch_norm, x1) # the input dtype of SyncBatchNorm must be float16 or float32 or float64 @@ -228,17 +227,17 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase): class TestConvertSyncBatchNorm(unittest.TestCase): + def test_convert(self): if not core.is_compiled_with_cuda(): return with program_guard(Program(), Program()): - compare_model = paddle.nn.Sequential( - paddle.nn.Conv2D(3, 5, 3), - paddle.nn.BatchNorm2D(5), paddle.nn.BatchNorm2D(5)) + compare_model = paddle.nn.Sequential(paddle.nn.Conv2D(3, 5, 3), + paddle.nn.BatchNorm2D(5), + paddle.nn.BatchNorm2D(5)) model = paddle.nn.Sequential( - paddle.nn.Conv2D(3, 5, 3), - paddle.nn.BatchNorm2D(5), + paddle.nn.Conv2D(3, 5, 3), paddle.nn.BatchNorm2D(5), paddle.nn.BatchNorm2D( 5, weight_attr=fluid.ParamAttr(name='bn.scale'), @@ -251,11 +250,13 @@ class TestConvertSyncBatchNorm(unittest.TestCase): class TestConvertSyncBatchNormCast1(unittest.TestCase): + def test_convert(self): if not core.is_compiled_with_cuda(): return class Net(nn.Layer): + def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2D(3, 5, 3) @@ -280,6 +281,7 @@ class TestConvertSyncBatchNormCast1(unittest.TestCase): class TestConvertSyncBatchNormCase2(unittest.TestCase): + def test_convert(self): if not core.is_compiled_with_cuda(): return @@ -287,6 +289,7 @@ class TestConvertSyncBatchNormCase2(unittest.TestCase): with fluid.dygraph.guard(fluid.CUDAPlace(0)): class SyBNNet(paddle.nn.Layer): + def __init__(self, in_ch=3, out_ch=3, dirate=1): super(SyBNNet, self).__init__() self.bn_s1 = paddle.nn.SyncBatchNorm.convert_sync_batchnorm( @@ -295,8 +298,7 @@ class TestConvertSyncBatchNormCase2(unittest.TestCase): weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(0.)))) self.bn_s2 = paddle.nn.SyncBatchNorm.convert_sync_batchnorm( - paddle.nn.BatchNorm3D( - out_ch, data_format='NDHWC')) + paddle.nn.BatchNorm3D(out_ch, data_format='NDHWC')) def forward(self, x): x = self.bn_s1(x) @@ -304,6 +306,7 @@ class TestConvertSyncBatchNormCase2(unittest.TestCase): return out class BNNet(paddle.nn.Layer): + def __init__(self, in_ch=3, out_ch=3, dirate=1): super(BNNet, self).__init__() self.bn_s1 = paddle.nn.BatchNorm3D( @@ -311,8 +314,7 @@ class TestConvertSyncBatchNormCase2(unittest.TestCase): weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(0.))) self.bn_s2 = paddle.nn.SyncBatchNorm.convert_sync_batchnorm( - paddle.nn.BatchNorm3D( - out_ch, data_format='NDHWC')) + paddle.nn.BatchNorm3D(out_ch, data_format='NDHWC')) def forward(self, x): x = self.bn_s1(x) @@ -328,11 +330,12 @@ class TestConvertSyncBatchNormCase2(unittest.TestCase): sybn_out = sybn_model(x) self.assertTrue( np.allclose(bn_out.numpy(), sybn_out.numpy()), - "Output has diff. \n" + "\nBN " + str(bn_out.numpy()) + "\n" - + "Sync BN " + str(sybn_out.numpy())) + "Output has diff. \n" + "\nBN " + str(bn_out.numpy()) + + "\n" + "Sync BN " + str(sybn_out.numpy())) class TestDygraphSyncBatchNormDataFormatError(unittest.TestCase): + def test_errors(self): if not core.is_compiled_with_cuda(): return diff --git a/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py b/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py index b7650efc8c2..34ca5860a16 100644 --- a/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py +++ b/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestTakeAlongAxisOp(OpTest): + def setUp(self): self.init_data() self.op_type = "take_along_axis" @@ -52,13 +53,14 @@ class TestTakeAlongAxisOp(OpTest): self.x_type = "float64" self.x_shape = (5, 5, 5) self.index_type = "int32" - self.index = np.array( - [[[1]], [[1]], [[2]], [[4]], [[3]]]).astype(self.index_type) + self.index = np.array([[[1]], [[1]], [[2]], [[4]], + [[3]]]).astype(self.index_type) self.axis = 2 self.axis_type = "int64" class TestCase1(TestTakeAlongAxisOp): + def init_data(self): self.x_type = "float64" self.x_shape = (5, 5, 5) @@ -69,6 +71,7 @@ class TestCase1(TestTakeAlongAxisOp): class TestTakeAlongAxisAPI(unittest.TestCase): + def setUp(self): np.random.seed(0) self.shape = [3, 3] @@ -87,8 +90,10 @@ class TestTakeAlongAxisAPI(unittest.TestCase): index = paddle.fluid.data('Index', self.index_shape, "int64") out = paddle.take_along_axis(x, index, self.axis) exe = paddle.static.Executor(self.place[0]) - res = exe.run(feed={'X': self.x_np, - 'Index': self.index_np}, + res = exe.run(feed={ + 'X': self.x_np, + 'Index': self.index_np + }, fetch_list=[out]) out_ref = np.array( np.take_along_axis(self.x_np, self.index_np, self.axis)) @@ -107,12 +112,13 @@ class TestTakeAlongAxisAPI(unittest.TestCase): class TestTakeAlongAxisAPICase1(TestTakeAlongAxisAPI): + def setUp(self): np.random.seed(0) self.shape = [2, 2] self.index_shape = [4, 2] - self.index_np = np.array( - [[0, 0], [1, 0], [0, 0], [1, 0]]).astype('int64') + self.index_np = np.array([[0, 0], [1, 0], [0, 0], [1, + 0]]).astype('int64') self.x_np = np.random.random(self.shape).astype(np.float32) self.place = [paddle.CPUPlace()] self.axis = 0 diff --git a/python/paddle/fluid/tests/unittests/test_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_target_assign_op.py index aec219f8063..a283328a237 100644 --- a/python/paddle/fluid/tests/unittests/test_target_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_target_assign_op.py @@ -38,8 +38,9 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod): ret_ids = set([i for i in range(num_prior)]) - set(ids) l = neg_lod[n] neg_ids = random.sample(ret_ids, l) - neg_indices[offset:offset + neg_lod[n], :] = np.array(neg_ids).astype( - 'int32').reshape(l, 1) + neg_indices[offset:offset + + neg_lod[n], :] = np.array(neg_ids).astype('int32').reshape( + l, 1) offset += neg_lod[n] return match_indices, neg_indices @@ -86,6 +87,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, class TestTargetAssginFloatType(OpTest): + def setUp(self): self.op_type = "target_assign" num_prior = 120 @@ -97,11 +99,11 @@ class TestTargetAssginFloatType(OpTest): num_gt = sum(gt_lod) encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32') - gt_label = np.random.randint( - num_class, size=(num_gt, 1)).astype('int32') + gt_label = np.random.randint(num_class, + size=(num_gt, 1)).astype('int32') - match_indices, neg_indices = gen_match_and_neg_indices(num_prior, - gt_lod, neg_lod) + match_indices, neg_indices = gen_match_and_neg_indices( + num_prior, gt_lod, neg_lod) out, out_wt, _, _ = target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, neg_lod, @@ -124,6 +126,7 @@ class TestTargetAssginFloatType(OpTest): class TestTargetAssginIntType(OpTest): + def setUp(self): self.op_type = "target_assign" num_prior = 120 @@ -135,11 +138,11 @@ class TestTargetAssginIntType(OpTest): num_gt = sum(gt_lod) encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32') - gt_label = np.random.randint( - num_class, size=(num_gt, 1)).astype('int32') + gt_label = np.random.randint(num_class, + size=(num_gt, 1)).astype('int32') - match_indices, neg_indices = gen_match_and_neg_indices(num_prior, - gt_lod, neg_lod) + match_indices, neg_indices = gen_match_and_neg_indices( + num_prior, gt_lod, neg_lod) _, _, out, out_wt, = target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, neg_lod, diff --git a/python/paddle/fluid/tests/unittests/test_tcp_store.py b/python/paddle/fluid/tests/unittests/test_tcp_store.py index 11e1e8cd059..a051519d634 100644 --- a/python/paddle/fluid/tests/unittests/test_tcp_store.py +++ b/python/paddle/fluid/tests/unittests/test_tcp_store.py @@ -20,6 +20,7 @@ import paddle class TestTCPStore(unittest.TestCase): + def test_tcp_store(self): store = paddle.fluid.core.TCPStore("127.0.0.1", 6170, True, 1, datetime.timedelta(0)) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py index af7bbeaab05..f2987a2a614 100644 --- a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py +++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py @@ -59,14 +59,15 @@ def create_tdm_tree(): class TestTDMChildOp(OpTest): + def setUp(self): self.__class__.op_type = "tdm_child" self.config() tree_info = create_tdm_tree() tree_info_np = np.array(tree_info).astype(self.info_type) - x_np = np.random.randint( - low=0, high=26, size=self.x_shape).astype(self.x_type) + x_np = np.random.randint(low=0, high=26, + size=self.x_shape).astype(self.x_type) children_res = [] leaf_mask_res = [] for batch in x_np: @@ -106,6 +107,7 @@ class TestTDMChildOp(OpTest): class TestCase1(TestTDMChildOp): + def config(self): """check int int64_t """ self.x_shape = (10, 20) @@ -115,6 +117,7 @@ class TestCase1(TestTDMChildOp): class TestCase2(TestTDMChildOp): + def config(self): """check int64_t int64_t """ self.x_shape = (10, 20) @@ -124,6 +127,7 @@ class TestCase2(TestTDMChildOp): class TestCase3(TestTDMChildOp): + def config(self): """check int64 int32 """ self.x_shape = (10, 20) @@ -133,6 +137,7 @@ class TestCase3(TestTDMChildOp): class TestCase4(TestTDMChildOp): + def config(self): """check large shape """ self.x_shape = (100, 20) @@ -142,6 +147,7 @@ class TestCase4(TestTDMChildOp): class TestTDMChildShape(unittest.TestCase): + def test_shape(self): x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1) tdm_tree_info = create_tdm_tree() @@ -151,17 +157,17 @@ class TestTDMChildShape(unittest.TestCase): x=x, node_nums=26, child_nums=2, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - tree_info_np))) + param_attr=fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(tree_info_np))) place = fluid.CPUPlace() exe = fluid.Executor(place=place) exe.run(fluid.default_startup_program()) feed = { - 'x': np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], - [11], [12]]).astype('int32') + 'x': + np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], + [12]]).astype('int32') } exe.run(feed=feed) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py index e245529edc6..ffcf7630894 100644 --- a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py @@ -29,9 +29,9 @@ from sys import version_info def create_tdm_travel(): tree_travel = [[1, 3, 7, 14], [1, 3, 7, 15], [1, 3, 8, 16], [1, 3, 8, 17], - [1, 4, 9, 18], [1, 4, 9, 19], [1, 4, 10, 20], - [1, 4, 10, 21], [2, 5, 11, 22], [2, 5, 11, 23], - [2, 5, 12, 24], [2, 5, 12, 25], [2, 6, 13, 0]] + [1, 4, 9, 18], [1, 4, 9, 19], [1, 4, 10, 20], [1, 4, 10, 21], + [2, 5, 11, 22], [2, 5, 11, 23], [2, 5, 12, 24], + [2, 5, 12, 25], [2, 6, 13, 0]] return tree_travel @@ -48,6 +48,7 @@ type_dict = { class TestTDMSamplerOp(OpTest): + def setUp(self): self.__class__.op_type = "tdm_sampler" self.config() @@ -74,8 +75,8 @@ class TestTDMSamplerOp(OpTest): layer_np = np.array(tree_layer_flat).astype(self.tree_dtype) layer_np = layer_np.reshape([-1, 1]) - self.x_np = np.random.randint( - low=0, high=13, size=self.x_shape).astype(self.x_type) + self.x_np = np.random.randint(low=0, high=13, + size=self.x_shape).astype(self.x_type) out = np.random.random(self.output_shape).astype(self.out_dtype) label = np.random.random(self.output_shape).astype(self.out_dtype) @@ -133,8 +134,8 @@ class TestTDMSamplerOp(OpTest): sampling_res_list = sampling_res.tolist() positive_travel.append(sampling_res_list[0]) - label_sampling_res = label_res[batch_ids][start_offset: - end_offset] + label_sampling_res = label_res[batch_ids][ + start_offset:end_offset] mask_sampling_res = mask_res[batch_ids][start_offset:end_offset] # check unique @@ -142,9 +143,8 @@ class TestTDMSamplerOp(OpTest): assert len(set(sampling_res_list)) == len( sampling_res_list ), "len(set(sampling_res_list)): {}, len(sampling_res_list): {} , sample_res: {}, label_res:{}, mask_res: {}".format( - len(set(sampling_res_list)), - len(sampling_res_list), sampling_res, - label_sampling_res, mask_sampling_res) + len(set(sampling_res_list)), len(sampling_res_list), + sampling_res, label_sampling_res, mask_sampling_res) # check legal layer_node = self.tree_layer[layer_idx] layer_node.append(0) @@ -168,11 +168,12 @@ class TestTDMSamplerOp(OpTest): np.sum(mask_sampling_res[padding_index])) start_offset = end_offset # check travel legal - assert self.tree_travel[int(self.x_np[ - batch_ids])] == positive_travel + assert self.tree_travel[int( + self.x_np[batch_ids])] == positive_travel class TestCase1(TestTDMSamplerOp): + def config(self): """test input int64""" self.neg_samples_num_list = [0, 0, 0, 0] @@ -183,6 +184,7 @@ class TestCase1(TestTDMSamplerOp): class TestCase2(TestTDMSamplerOp): + def config(self): """test dtype int64""" self.neg_samples_num_list = [0, 0, 0, 0] @@ -193,6 +195,7 @@ class TestCase2(TestTDMSamplerOp): class TestCase3(TestTDMSamplerOp): + def config(self): """test all dtype int64""" self.neg_samples_num_list = [0, 0, 0, 0] @@ -203,6 +206,7 @@ class TestCase3(TestTDMSamplerOp): class TestCase4(TestTDMSamplerOp): + def config(self): """test one neg""" self.neg_samples_num_list = [1, 1, 1, 1] @@ -213,6 +217,7 @@ class TestCase4(TestTDMSamplerOp): class TestCase5(TestTDMSamplerOp): + def config(self): """test normal neg""" self.neg_samples_num_list = [1, 2, 3, 4] @@ -223,6 +228,7 @@ class TestCase5(TestTDMSamplerOp): class TestCase6(TestTDMSamplerOp): + def config(self): """test huge batchsize""" self.neg_samples_num_list = [1, 2, 3, 4] @@ -233,6 +239,7 @@ class TestCase6(TestTDMSamplerOp): class TestCase7(TestTDMSamplerOp): + def config(self): """test full neg""" self.neg_samples_num_list = [1, 3, 6, 11] @@ -243,6 +250,7 @@ class TestCase7(TestTDMSamplerOp): class TestTDMSamplerShape(unittest.TestCase): + def test_shape(self): x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1) tdm_tree_travel = create_tdm_travel() @@ -267,9 +275,8 @@ class TestTDMSamplerShape(unittest.TestCase): tree_travel_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( travel_array)), - tree_layer_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - layer_array)), + tree_layer_attr=fluid.ParamAttr(initializer=fluid.initializer. + NumpyArrayInitializer(layer_array)), output_positive=True, output_list=True, seed=0, @@ -281,8 +288,9 @@ class TestTDMSamplerShape(unittest.TestCase): exe.run(fluid.default_startup_program()) feed = { - 'x': np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], - [10], [11], [12]]).astype('int32') + 'x': + np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], + [11], [12]]).astype('int32') } exe.run(feed=feed) diff --git a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py index e0142776c83..6890e7d3a06 100644 --- a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py @@ -32,11 +32,12 @@ class TestTeacherStudentSigmoidLossOp(OpTest): batch_size = 100 num_classes = 1 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype("float64")), - 'Label': np.random.uniform(0, 2, (batch_size, num_classes)) - .astype("float64") + 'X': + logit( + np.random.uniform(0, 1, + (batch_size, num_classes)).astype("float64")), + 'Label': + np.random.uniform(0, 2, (batch_size, num_classes)).astype("float64") } outs = [] for index, label in enumerate(self.inputs["Label"]): @@ -61,7 +62,9 @@ class TestTeacherStudentSigmoidLossOp(OpTest): class TestTeacherStudentSigmoidLossInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): input = [512, 1] label = fluid.data(name='label', shape=[None, 1], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py index 407a252e1a5..e9561b3e0a5 100644 --- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py +++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py @@ -42,6 +42,7 @@ def temporal_shift(x, seg_num, shift_ratio, data_format): class TestTemporalShift(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'temporal_shift' @@ -53,7 +54,9 @@ class TestTemporalShift(OpTest): "data_format": self.data_format } - self.inputs = {"X": x, } + self.inputs = { + "X": x, + } output = temporal_shift(x, self.seg_num, self.shift_ratio, self.data_format) @@ -74,6 +77,7 @@ class TestTemporalShift(OpTest): class TestTemporalShift2(TestTemporalShift): + def initTestCase(self): self.x_shape = (4, 9, 7, 7) self.seg_num = 2 @@ -82,6 +86,7 @@ class TestTemporalShift2(TestTemporalShift): class TestTemporalShift3(TestTemporalShift): + def initTestCase(self): self.x_shape = (3, 10, 5, 5) self.seg_num = 1 @@ -90,6 +95,7 @@ class TestTemporalShift3(TestTemporalShift): class TestTemporalShift4(TestTemporalShift): + def initTestCase(self): self.x_shape = (6, 5, 5, 4) self.seg_num = 3 @@ -100,6 +106,7 @@ class TestTemporalShift4(TestTemporalShift): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestTemporalShiftFP16(TestTemporalShift): + def initTestCase(self): self.x_shape = (3, 10, 5, 5) self.seg_num = 1 @@ -119,25 +126,32 @@ class TestTemporalShiftFP16(TestTemporalShift): class TestTemporalShiftAPI(unittest.TestCase): + def test_api(self): input = paddle.randn([6, 4, 2, 2]) - out = paddle.fluid.layers.temporal_shift( - x=input, seg_num=2, shift_ratio=0.2) + out = paddle.fluid.layers.temporal_shift(x=input, + seg_num=2, + shift_ratio=0.2) - out_from_function = paddle.nn.functional.temporal_shift( - x=input, seg_num=2, shift_ratio=0.2) + out_from_function = paddle.nn.functional.temporal_shift(x=input, + seg_num=2, + shift_ratio=0.2) # dygraph with paddle.fluid.dygraph.guard(): input = paddle.randn([6, 4, 2, 2]) - out = paddle.nn.functional.temporal_shift( - x=input, seg_num=2, shift_ratio=0.2) + out = paddle.nn.functional.temporal_shift(x=input, + seg_num=2, + shift_ratio=0.2) def test_error(self): + def attr_data_format(): input = paddle.randn([6, 4, 2, 2]) - out = paddle.nn.functional.temporal_shift( - x=input, seg_num=2, shift_ratio=0.2, data_format="HWC") + out = paddle.nn.functional.temporal_shift(x=input, + seg_num=2, + shift_ratio=0.2, + data_format="HWC") self.assertRaises(ValueError, attr_data_format) diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py index da792903b7d..2ea88c89a37 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor.py @@ -22,6 +22,7 @@ import numbers class TestTensorPtr(unittest.TestCase): + def test_tensor_ptr(self): t = core.Tensor() np_arr = numpy.zeros([2, 3]) @@ -30,6 +31,7 @@ class TestTensorPtr(unittest.TestCase): class TestTensor(unittest.TestCase): + def setUp(self): self.support_dtypes = [ 'bool', 'uint8', 'int8', 'int16', 'int32', 'int64', 'float16', @@ -79,8 +81,10 @@ class TestTensor(unittest.TestCase): scope = core.Scope() var = scope.var("int8_tensor") cpu_tensor = var.get_tensor() - tensor_array = numpy.random.randint( - -127, high=128, size=[100, 200], dtype=numpy.int8) + tensor_array = numpy.random.randint(-127, + high=128, + size=[100, 200], + dtype=numpy.int8) place = core.CPUPlace() cpu_tensor.set(tensor_array, place) cpu_tensor_array_2 = numpy.array(cpu_tensor) @@ -88,8 +92,10 @@ class TestTensor(unittest.TestCase): if core.is_compiled_with_cuda(): cuda_tensor = var.get_tensor() - tensor_array = numpy.random.randint( - -127, high=128, size=[100, 200], dtype=numpy.int8) + tensor_array = numpy.random.randint(-127, + high=128, + size=[100, 200], + dtype=numpy.int8) place = core.CUDAPlace(0) cuda_tensor.set(tensor_array, place) cuda_tensor_array_2 = numpy.array(cuda_tensor) @@ -203,10 +209,10 @@ class TestTensor(unittest.TestCase): shape = [3, 3, 3] tensor._set_dims(shape) - tensor_array = numpy.array( - [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], - [[10, 11, 12], [13, 14, 15], [16, 17, 18]], - [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype(dtype) + tensor_array = numpy.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], + [25, 26, 27]]]).astype(dtype) tensor.set(tensor_array, place) n1 = tensor[1] @@ -284,16 +290,16 @@ class TestTensor(unittest.TestCase): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) self.assertTrue( - isinstance( - tensor._mutable_data(place, dtype), numbers.Integral)) + isinstance(tensor._mutable_data(place, dtype), + numbers.Integral)) place = core.CUDAPinnedPlace() self.assertTrue( - isinstance( - tensor._mutable_data(place, dtype), numbers.Integral)) + isinstance(tensor._mutable_data(place, dtype), + numbers.Integral)) places = fluid.cuda_pinned_places() self.assertTrue( - isinstance( - tensor._mutable_data(places[0], dtype), numbers.Integral)) + isinstance(tensor._mutable_data(places[0], dtype), + numbers.Integral)) def test_tensor_set_fp16(self): array = numpy.random.random((300, 500)).astype("float16") diff --git a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py index ff6cbdde066..d9c4d2c61b2 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py @@ -54,9 +54,8 @@ class TestLoDTensorArrayConcat(unittest.TestCase): program = fluid.Program() block = program.global_block() - input_arr = block.create_var( - name="tmp_lod_tensor_array", - type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + input_arr = block.create_var(name="tmp_lod_tensor_array", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) input_arr.persistable = True input_arr_var = scope.var('tmp_lod_tensor_array') input_tensor_array = input_arr_var.get_lod_tensor_array() @@ -80,27 +79,28 @@ class TestLoDTensorArrayConcat(unittest.TestCase): y_out_index = block.create_var(name="OutIndex") y_out_index.persistable = True - y_grad_arr = block.create_var( - name='Out@GRAD', dtype='float32', shape=[11]) + y_grad_arr = block.create_var(name='Out@GRAD', + dtype='float32', + shape=[11]) y_grad_arr.persistable = True y_grad = scope.var('Out@GRAD') y_grad_tensor = y_grad.get_tensor() y_grad_tensor.set(random_grad, cpu) - op = block.append_op( - type=self.op_type, - inputs={"X": input_arr}, - outputs={"Out": y_out, - "OutIndex": y_out_index}, - attrs=self.attrs) + op = block.append_op(type=self.op_type, + inputs={"X": input_arr}, + outputs={ + "Out": y_out, + "OutIndex": y_out_index + }, + attrs=self.attrs) - out_grad = block.create_var( - name="tmp_lod_tensor_array@GRAD", - type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + out_grad = block.create_var(name="tmp_lod_tensor_array@GRAD", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) out_grad.persistable = True - grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc, - set(), []) + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) @@ -124,8 +124,8 @@ class TestLoDTensorArrayConcat(unittest.TestCase): # test forward tensor_res = numpy.array(out[0]) tensor_res_out_idx = numpy.array(out[1]) - tensor_gt = numpy.array( - [0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float32') + tensor_gt = numpy.array([0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + dtype='float32') self.assertEqual(len(tensor_res), len(tensor_gt)) self.assertEqual(len(tensor_res_out_idx), 10) @@ -154,9 +154,8 @@ class TestLoDTensorArrayConcat(unittest.TestCase): numpy.array(grad_tensor_array[i])[1], numpy.array(random_grad[i + 1])) if i == 1: - self.assertEqual( - numpy.array(grad_tensor_array[i]), - numpy.array(random_grad[i + 1])) + self.assertEqual(numpy.array(grad_tensor_array[i]), + numpy.array(random_grad[i + 1])) class TestLoDTensorArrayStack(unittest.TestCase): @@ -171,10 +170,9 @@ class TestLoDTensorArrayStack(unittest.TestCase): numpy.random.rand(2, 3, 4).astype("float32") ] self.outputs = [ - numpy.stack( - self.inputs, axis=self.attrs["axis"]), numpy.array( - [x.shape[self.attrs["axis"]] for x in self.inputs], - dtype="int32") + numpy.stack(self.inputs, axis=self.attrs["axis"]), + numpy.array([x.shape[self.attrs["axis"]] for x in self.inputs], + dtype="int32") ] self.input_grads = [numpy.ones_like(x) for x in self.inputs] self.set_program() @@ -199,14 +197,15 @@ class TestLoDTensorArrayStack(unittest.TestCase): def run_check(self, executor, scope): executor.run(self.program, scope=scope) for i, output in enumerate(self.outputs): - numpy.allclose( - numpy.array(scope.var(self.output_vars[i].name).get_tensor()), - output, - atol=0) + numpy.allclose(numpy.array( + scope.var(self.output_vars[i].name).get_tensor()), + output, + atol=0) tensor_array_grad = scope.var(self.array.name).get_lod_tensor_array() for i, input_grad in enumerate(self.input_grads): - numpy.allclose( - numpy.array(tensor_array_grad[i]), input_grad, atol=0) + numpy.allclose(numpy.array(tensor_array_grad[i]), + input_grad, + atol=0) def test_cpu(self): scope = core.Scope() @@ -223,6 +222,7 @@ class TestLoDTensorArrayStack(unittest.TestCase): class TestTensorArrayToTensorAPI(unittest.TestCase): + def _test_case(self, inp1, inp2): x0 = fluid.layers.assign(inp1) x0.stop_gradient = False diff --git a/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py b/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py index 6a91c2182d1..64c4be260ed 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ from paddle.fluid.core import LoDTensor as Tensor class TestTensorCopyFrom(unittest.TestCase): + def test_main(self): place = paddle.CPUPlace() np_value = np.random.random(size=[10, 30]).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py index 4a34b2022b9..a6055e7b40e 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TensorFill_Test(unittest.TestCase): + def setUp(self): self.shape = [32, 32] @@ -36,8 +37,8 @@ class TensorFill_Test(unittest.TestCase): paddle.set_device('cpu') else: paddle.set_device('gpu') - np_arr = np.reshape( - np.array(six.moves.range(np.prod(self.shape))), self.shape) + np_arr = np.reshape(np.array(six.moves.range(np.prod(self.shape))), + self.shape) for dtype in typelist: var = 1. tensor = paddle.to_tensor(np_arr, place=p, dtype=dtype) @@ -64,8 +65,8 @@ class TensorFill_Test(unittest.TestCase): paddle.set_device('cpu') else: paddle.set_device('gpu') - np_arr = np.reshape( - np.array(six.moves.range(np.prod(self.shape))), self.shape) + np_arr = np.reshape(np.array(six.moves.range(np.prod(self.shape))), + self.shape) for dtype in typelist: var = int(1) tensor = paddle.to_tensor(np_arr, place=p, dtype=dtype) @@ -85,6 +86,7 @@ class TensorFill_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_test_errors(self): + def test_list(): x = paddle.to_tensor([2, 3, 4]) x.fill_([1]) diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py index 2b6d3a5ca5f..da8ca1f2d66 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py @@ -21,11 +21,12 @@ from paddle.fluid.framework import _test_eager_guard class TensorFillDiagonal_Test(unittest.TestCase): + def func_dim2_normal(self): - expected_np = np.array( - [[1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype('float32') - expected_grad = np.array( - [[0, 1, 1], [1, 0, 1], [1, 1, 0]]).astype('float32') + expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, + 1]]).astype('float32') + expected_grad = np.array([[0, 1, 1], [1, 0, 1], [1, 1, + 0]]).astype('float32') typelist = ['float32', 'float64', 'int32', 'int64'] places = [fluid.CPUPlace()] @@ -59,10 +60,10 @@ class TensorFillDiagonal_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_offset(self): - expected_np = np.array( - [[2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype('float32') - expected_grad = np.array( - [[1, 1, 0], [1, 1, 1], [1, 1, 1]]).astype('float32') + expected_np = np.array([[2, 2, 1], [2, 2, 2], [2, 2, + 2]]).astype('float32') + expected_grad = np.array([[1, 1, 0], [1, 1, 1], [1, 1, + 1]]).astype('float32') typelist = ['float32', 'float64', 'int32', 'int64'] places = [fluid.CPUPlace()] @@ -96,8 +97,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_bool(self): - expected_np = np.array( - [[False, True, True], [True, False, True], [True, True, False]]) + expected_np = np.array([[False, True, True], [True, False, True], + [True, True, False]]) typelist = ['bool'] places = [fluid.CPUPlace()] @@ -123,11 +124,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): def func_dim2_unnormal_wrap(self): expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2], - [1, 2, 2], [2, 1, 2], - [2, 2, 1]]).astype('float32') + [1, 2, 2], [2, 1, 2], [2, 2, + 1]]).astype('float32') expected_grad = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1], - [0, 1, 1], [1, 0, 1], - [1, 1, 0]]).astype('float32') + [0, 1, 1], [1, 0, 1], [1, 1, + 0]]).astype('float32') typelist = ['float32', 'float64', 'int32', 'int64'] places = [fluid.CPUPlace()] @@ -162,11 +163,11 @@ class TensorFillDiagonal_Test(unittest.TestCase): def func_dim2_unnormal_unwrap(self): expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2], - [2, 2, 2], [2, 2, 2], - [2, 2, 2]]).astype('float32') + [2, 2, 2], [2, 2, 2], [2, 2, + 2]]).astype('float32') expected_grad = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1], - [1, 1, 1], [1, 1, 1], - [1, 1, 1]]).astype('float32') + [1, 1, 1], [1, 1, 1], [1, 1, + 1]]).astype('float32') typelist = ['float32', 'float64', 'int32', 'int64'] places = [fluid.CPUPlace()] @@ -200,13 +201,14 @@ class TensorFillDiagonal_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim_larger2_normal(self): - expected_np = np.array([[[1, 2, 2], [2, 2, 2], [2, 2, 2]], [[2, 2, 2], [ - 2, 1, 2 - ], [2, 2, 2]], [[2, 2, 2], [2, 2, 2], [2, 2, 1]]]).astype('float32') - expected_grad = np.array( - [[[0, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 0, 1], - [1, 1, 1]], - [[1, 1, 1], [1, 1, 1], [1, 1, 0]]]).astype('float32') + expected_np = np.array([[[1, 2, 2], [2, 2, 2], [2, 2, 2]], + [[2, 2, 2], [2, 1, 2], [2, 2, 2]], + [[2, 2, 2], [2, 2, 2], [2, 2, + 1]]]).astype('float32') + expected_grad = np.array([[[0, 1, 1], [1, 1, 1], [1, 1, 1]], + [[1, 1, 1], [1, 0, 1], [1, 1, 1]], + [[1, 1, 1], [1, 1, 1], + [1, 1, 0]]]).astype('float32') typelist = ['float32', 'float64', 'int32', 'int64'] places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py index c140b61bdf4..4765b540c7e 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py @@ -21,6 +21,7 @@ import paddle class TensorFillDiagTensor_Test(unittest.TestCase): + def setUp(self): self.typelist = ['float32', 'float64', 'int32', 'int64'] self.places = [fluid.CPUPlace()] @@ -135,8 +136,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): else: paddle.set_device('gpu') for dtype in self.typelist: - v = paddle.to_tensor( - np.arange(12).reshape(2, 2, 3), dtype=dtype) + v = paddle.to_tensor(np.arange(12).reshape(2, 2, 3), + dtype=dtype) var = (np.random.random() + 1) x = paddle.ones((2, 4, 3, 2), dtype=dtype) x.stop_gradient = False @@ -158,8 +159,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): fsdim = 128 paddle.set_device('gpu') for dtype in self.typelist: - v = paddle.arange( - bsdim * fsdim, dtype=dtype).reshape((bsdim, fsdim)) + v = paddle.arange(bsdim * fsdim, dtype=dtype).reshape( + (bsdim, fsdim)) y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype) y.stop_gradient = False y = y * 2 diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py index 0bab3ec10d7..03608046414 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard class TensorFillDiagTensor_Test(unittest.TestCase): + def setUp(self): self.typelist = ['float32', 'float64', 'int32', 'int64'] self.places = [fluid.CPUPlace()] @@ -29,10 +30,10 @@ class TensorFillDiagTensor_Test(unittest.TestCase): self.places.append(fluid.CUDAPlace(0)) def func_dim2(self): - expected_np = np.array( - [[1, 2, 2], [2, 1, 2], [2, 2, 1], [2, 2, 2]]).astype('float32') - expected_grad = np.array( - [[0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1]]).astype('float32') + expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1], + [2, 2, 2]]).astype('float32') + expected_grad = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0], + [1, 1, 1]]).astype('float32') for idx, p in enumerate(self.places): if idx == 0: @@ -63,10 +64,10 @@ class TensorFillDiagTensor_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim2_offset_1(self): - expected_np = np.array( - [[2, 2, 2], [1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype('float32') - expected_grad = np.array( - [[1, 1, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]]).astype('float32') + expected_np = np.array([[2, 2, 2], [1, 2, 2], [2, 1, 2], + [2, 2, 1]]).astype('float32') + expected_grad = np.array([[1, 1, 1], [0, 1, 1], [1, 0, 1], + [1, 1, 0]]).astype('float32') for idx, p in enumerate(self.places): if idx == 0: @@ -97,10 +98,10 @@ class TensorFillDiagTensor_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim2_offset1(self): - expected_np = np.array( - [[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype('float32') - expected_grad = np.array( - [[1, 0, 1], [1, 1, 0], [1, 1, 1], [1, 1, 1]]).astype('float32') + expected_np = np.array([[2, 1, 2], [2, 2, 1], [2, 2, 2], + [2, 2, 2]]).astype('float32') + expected_grad = np.array([[1, 0, 1], [1, 1, 0], [1, 1, 1], + [1, 1, 1]]).astype('float32') for idx, p in enumerate(self.places): if idx == 0: @@ -131,18 +132,22 @@ class TensorFillDiagTensor_Test(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_dim4(self): - expected_np = np.array( - [[[[0, 3], [2, 2], [2, 2]], [[2, 2], [1, 4], [2, 2]], - [[2, 2], [2, 2], [2, 5]], [[2, 2], [2, 2], [2, 2]]], - [[[6, 9], [2, 2], [2, 2]], [[2, 2], [7, 10], [2, 2]], - [[2, 2], [2, 2], [8, 11]], - [[2, 2], [2, 2], [2, 2]]]]).astype('float32') - expected_grad = np.array( - [[[[0, 0], [1, 1], [1, 1]], [[1, 1], [0, 0], [1, 1]], - [[1, 1], [1, 1], [0, 0]], [[1, 1], [1, 1], [1, 1]]], - [[[0, 0], [1, 1], [1, 1]], [[1, 1], [0, 0], [1, 1]], - [[1, 1], [1, 1], [0, 0]], - [[1, 1], [1, 1], [1, 1]]]]).astype('float32') + expected_np = np.array([[[[0, 3], [2, 2], [2, 2]], + [[2, 2], [1, 4], [2, 2]], + [[2, 2], [2, 2], [2, 5]], + [[2, 2], [2, 2], [2, 2]]], + [[[6, 9], [2, 2], [2, 2]], + [[2, 2], [7, 10], [2, 2]], + [[2, 2], [2, 2], [8, 11]], + [[2, 2], [2, 2], [2, 2]]]]).astype('float32') + expected_grad = np.array([[[[0, 0], [1, 1], [1, 1]], + [[1, 1], [0, 0], [1, 1]], + [[1, 1], [1, 1], [0, 0]], + [[1, 1], [1, 1], [1, 1]]], + [[[0, 0], [1, 1], [1, 1]], + [[1, 1], [0, 0], [1, 1]], + [[1, 1], [1, 1], [0, 0]], + [[1, 1], [1, 1], [1, 1]]]]).astype('float32') for idx, p in enumerate(self.places): if idx == 0: @@ -150,8 +155,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): else: paddle.set_device('gpu') for dtype in self.typelist: - v = paddle.to_tensor( - np.arange(12).reshape(2, 2, 3), dtype=dtype) + v = paddle.to_tensor(np.arange(12).reshape(2, 2, 3), + dtype=dtype) var = (np.random.random() + 1) x = paddle.ones((2, 4, 3, 2), dtype=dtype) x.stop_gradient = False @@ -180,8 +185,8 @@ class TensorFillDiagTensor_Test(unittest.TestCase): fsdim = 128 paddle.set_device('gpu') for dtype in self.typelist: - v = paddle.arange( - bsdim * fsdim, dtype=dtype).reshape((bsdim, fsdim)) + v = paddle.arange(bsdim * fsdim, dtype=dtype).reshape( + (bsdim, fsdim)) y = paddle.ones((bsdim, fsdim, fsdim), dtype=dtype) y.stop_gradient = False y = y * 2 diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index d725a672c34..d8d1990a4fa 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -25,6 +25,7 @@ import paddle.fluid.core as core class SimpleNet(nn.Layer): + def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() self.linear1 = nn.Linear(in_size, in_size) @@ -43,6 +44,7 @@ class SimpleNet(nn.Layer): class SimpleNetForStatic(nn.Layer): + def __init__(self, in_size, out_size): super(SimpleNetForStatic, self).__init__() self.linear1 = nn.Linear(in_size, in_size) @@ -58,6 +60,7 @@ class SimpleNetForStatic(nn.Layer): class TestTensorRegisterHook(unittest.TestCase): + def setUp(self): self.seed = 2021 self.in_size = 10 @@ -68,6 +71,7 @@ class TestTensorRegisterHook(unittest.TestCase): self.devices.append("gpu") def func_hook_for_interior_var(self): + def run_double_hook_for_interior_var(double_hook, removed=False): for device in self.devices: paddle.set_device(device) @@ -165,6 +169,7 @@ class TestTensorRegisterHook(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_for_leaf_var(self): + def run_double_hook_for_leaf_var(double_hook, removed=False): for device in self.devices: paddle.set_device(device) @@ -211,8 +216,9 @@ class TestTensorRegisterHook(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_for_accumulated_grad_interior_var(self): - def run_double_hook_for_accumulated_grad_interior_var(double_hook, - removed=False): + + def run_double_hook_for_accumulated_grad_interior_var( + double_hook, removed=False): for device in self.devices: paddle.set_device(device) @@ -252,18 +258,18 @@ class TestTensorRegisterHook(unittest.TestCase): self.assertTrue(np.array_equal(x.grad.numpy(), base_grad)) # b.grad is changed by x.hook self.assertTrue( - np.array_equal(b.grad.numpy(), base_grad * 2 - if not removed else base_grad)) + np.array_equal(b.grad.numpy(), + base_grad * 2 if not removed else base_grad)) # a.grad is changed by x.hook and a.hook self.assertTrue( - np.array_equal(a.grad.numpy(), base_grad * 4 - if not removed else base_grad)) + np.array_equal(a.grad.numpy(), + base_grad * 4 if not removed else base_grad)) # register hook run_double_hook_for_accumulated_grad_interior_var(lambda grad: grad * 2) # register hook and removed - run_double_hook_for_accumulated_grad_interior_var( - lambda grad: grad * 2, removed=True) + run_double_hook_for_accumulated_grad_interior_var(lambda grad: grad * 2, + removed=True) def test_hook_for_accumulated_grad_interior_var(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) @@ -273,8 +279,9 @@ class TestTensorRegisterHook(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_for_accumulated_grad_leaf_var(self): - def run_double_hook_for_accumulated_grad_leaf_var(double_hook, - removed=False): + + def run_double_hook_for_accumulated_grad_leaf_var( + double_hook, removed=False): for device in self.devices: paddle.set_device(device) @@ -304,14 +311,14 @@ class TestTensorRegisterHook(unittest.TestCase): base_grad = np.array([5., 9., 13., 19.]) # x.grad is changed by x.hook self.assertTrue( - np.array_equal(x.grad.numpy(), base_grad * 2 - if not removed else base_grad)) + np.array_equal(x.grad.numpy(), + base_grad * 2 if not removed else base_grad)) # register hook run_double_hook_for_accumulated_grad_leaf_var(lambda grad: grad * 2) # register hook and removed - run_double_hook_for_accumulated_grad_leaf_var( - lambda grad: grad * 2, removed=True) + run_double_hook_for_accumulated_grad_leaf_var(lambda grad: grad * 2, + removed=True) def test_hook_for_accumulated_grad_leaf_var(self): with _test_eager_guard(): @@ -319,6 +326,7 @@ class TestTensorRegisterHook(unittest.TestCase): self.func_hook_for_accumulated_grad_leaf_var() def func_hook_in_model(self): + def run_double_hook_in_model(data, label, hook=None, @@ -373,6 +381,7 @@ class TestTensorRegisterHook(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_multiple_hooks_for_interior_var(self): + def run_multiple_hooks_for_interior_var(device, hooks, remove1=False, @@ -458,6 +467,7 @@ class TestTensorRegisterHook(unittest.TestCase): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) def func_hook_in_double_grad(self): + def double_print_hook(grad): grad = grad * 2 print(grad) @@ -473,8 +483,10 @@ class TestTensorRegisterHook(unittest.TestCase): y = x * x # Since y = x * x, dx = 2 * x - dx = paddle.grad( - outputs=[y], inputs=[x], create_graph=True, retain_graph=True)[0] + dx = paddle.grad(outputs=[y], + inputs=[x], + create_graph=True, + retain_graph=True)[0] z = y + dx self.assertTrue(x.grad is None) @@ -534,8 +546,9 @@ class TestTensorRegisterHook(unittest.TestCase): main_program = paddle.static.Program() with paddle.static.scope_guard(paddle.static.Scope()): with paddle.static.program_guard(main_program, startup_program): - x = paddle.static.data( - name='x', shape=[None, self.in_size], dtype='float32') + x = paddle.static.data(name='x', + shape=[None, self.in_size], + dtype='float32') net = SimpleNetForStatic(self.in_size, self.out_size) with self.assertRaises(AssertionError): @@ -573,6 +586,7 @@ def global_void_hook(): class TestTensorRegisterBackwardHook(unittest.TestCase): + def setUp(self): self.devices = ["cpu"] if paddle.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py index 774d40a17c6..ded9d42b9b5 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py @@ -24,12 +24,13 @@ from paddle.fluid.framework import _test_eager_guard # - Related paddle dtypes: # - int type: int64, (no test here: uint8, int8, int16, int32) # - float type: float32, (no test here: float64) -# - Python scalar dtypes: +# - Python scalar dtypes: # - int(64) # - float(64) class TestTensorScalarTypePromotionDynamic(unittest.TestCase): + def check_operation(self, a, b, c, op): if op == '+': c_rlt = a + b diff --git a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py index d697666e12d..701ff5c3d6e 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py @@ -25,12 +25,13 @@ from paddle.static import Program # - Related paddle dtypes: # - int type: int64, (no test here: uint8, int8, int16, int32) # - float type: float32, (no test here: float64) -# - Python scalar dtypes: +# - Python scalar dtypes: # - int(64) # - float(64) class TestTensorScalarTypePromotionStatic(unittest.TestCase): + def setUp(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_tensor_to_list.py b/python/paddle/fluid/tests/unittests/test_tensor_to_list.py index a78113030ed..5b8e2b18d9e 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_to_list.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_to_list.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TensorToListTest(unittest.TestCase): + def setUp(self): self.shape = [11, 25, 32, 43] @@ -31,8 +32,8 @@ class TensorToListTest(unittest.TestCase): places.append(fluid.CUDAPinnedPlace()) for p in places: - np_arr = np.reshape( - np.array(six.moves.range(np.prod(self.shape))), self.shape) + np_arr = np.reshape(np.array(six.moves.range(np.prod(self.shape))), + self.shape) expectlist = np_arr.tolist() t = paddle.to_tensor(np_arr, place=p) diff --git a/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py b/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py index 003f27652ef..635e08e3811 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py @@ -19,6 +19,7 @@ import six class TensorToNumpyTest(unittest.TestCase): + def setUp(self): self.shape = [11, 25, 32, 43] @@ -35,8 +36,8 @@ class TensorToNumpyTest(unittest.TestCase): for p in places: for dtype in dtypes: np_arr = np.reshape( - np.array(six.moves.range(np.prod(self.shape))).astype( - dtype), self.shape) + np.array(six.moves.range(np.prod( + self.shape))).astype(dtype), self.shape) t = fluid.LoDTensor() t.set(np_arr, p) diff --git a/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py b/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py index c2543645853..4aa8c429b0d 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import paddle class TestTensorTypePromotion(unittest.TestCase): + def setUp(self): self.x = paddle.to_tensor([2, 3]) self.y = paddle.to_tensor([1.0, 2.0]) @@ -30,29 +31,29 @@ class TestTensorTypePromotion(unittest.TestCase): warnings.simplefilter("always") self.x + self.y self.assertTrue( - "The dtype of left and right variables are not the same" in - str(context[-1].message)) + "The dtype of left and right variables are not the same" in str( + context[-1].message)) with warnings.catch_warnings(record=True) as context: warnings.simplefilter("always") self.x - self.y self.assertTrue( - "The dtype of left and right variables are not the same" in - str(context[-1].message)) + "The dtype of left and right variables are not the same" in str( + context[-1].message)) with warnings.catch_warnings(record=True) as context: warnings.simplefilter("always") self.x * self.y self.assertTrue( - "The dtype of left and right variables are not the same" in - str(context[-1].message)) + "The dtype of left and right variables are not the same" in str( + context[-1].message)) with warnings.catch_warnings(record=True) as context: warnings.simplefilter("always") self.x / self.y self.assertTrue( - "The dtype of left and right variables are not the same" in - str(context[-1].message)) + "The dtype of left and right variables are not the same" in str( + context[-1].message)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_tensor_uva.py b/python/paddle/fluid/tests/unittests/test_tensor_uva.py index 4af04b8f6d4..8e62d040041 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_uva.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_uva.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestTensorCopyFrom(unittest.TestCase): + def func_main(self): if paddle.fluid.core.is_compiled_with_cuda(): place = paddle.CPUPlace() @@ -35,6 +36,7 @@ class TestTensorCopyFrom(unittest.TestCase): class TestUVATensorFromNumpy(unittest.TestCase): + def func_uva_tensor_creation(self): if paddle.fluid.core.is_compiled_with_cuda(): dtype_list = [ diff --git a/python/paddle/fluid/tests/unittests/test_tensor_zero_.py b/python/paddle/fluid/tests/unittests/test_tensor_zero_.py index d47585f78bb..30825e637b9 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_zero_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_zero_.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _test_eager_guard class TensorFill_Test(unittest.TestCase): + def setUp(self): self.shape = [32, 32] @@ -32,8 +33,8 @@ class TensorFill_Test(unittest.TestCase): places.append(fluid.CUDAPinnedPlace()) for p in places: - np_arr = np.reshape( - np.array(six.moves.range(np.prod(self.shape))), self.shape) + np_arr = np.reshape(np.array(six.moves.range(np.prod(self.shape))), + self.shape) for dtype in typelist: tensor = paddle.to_tensor(np_arr, place=p, dtype=dtype) target = tensor.numpy() diff --git a/python/paddle/fluid/tests/unittests/test_tensordot.py b/python/paddle/fluid/tests/unittests/test_tensordot.py index 04b140cba4c..e5d563455e8 100644 --- a/python/paddle/fluid/tests/unittests/test_tensordot.py +++ b/python/paddle/fluid/tests/unittests/test_tensordot.py @@ -66,6 +66,7 @@ def tensordot_np(x, y, axes): class TestTensordotAPI(unittest.TestCase): + def setUp(self): self.set_place() self.set_dtype() @@ -90,50 +91,60 @@ class TestTensordotAPI(unittest.TestCase): self.y = np.random.random(self.y_shape).astype(self.dtype) def set_test_axes(self): - self.all_axes = [ - [[3, 2], [3]], [[2, 1, 0], [2, 1]], [[1, 2, 0], [1, 3, 2]], [3, 0], - [[], [0, 3, 1]], [[2, 1, 0, 3], [2, 0, 1, 3]], - [[3, 1, 2], [1, 3, 2, 0]], [[2, 1], [0, 2]], [[2, 0, 1, 3], [2]], - [[1, 2, 0, 3], [0, 2, 1]], [[2, 1, 3, 0], [1, 2, 3]], - [[2, 0, 1, 3], [3, 1, 0, 2]], [[0, 3], [0, 3, 2, 1]], - [[1, 3, 2, 0], [2, 1, 0, 3]], [[1, 3, 2, 0], [1, 3, 2, 0]], - [[1, 0, 2], [0, 1]], [[2, 3, 0], [3, 1]], - [[1, 3, 2, 0], [3, 0, 1, 2]], [[3, 2, 1], [2, 0, 1]], [[0], []], - [[2, 3, 0], [1, 2, 0]], [[3, 0, 2, 1], [2, 1, 0, 3]], - [[3, 1, 2], [2, 3, 1]], [[1, 0, 2, 3], []], [[1, 2], [1, 2, 3]], - [[2, 0, 1, 3], [2, 0, 1]], [[3, 1, 2], [1, 3, 2]], - [[3, 1, 2, 0], [1, 2, 3, 0]], [[0, 2, 3], [0, 1, 2]], - [[3, 2, 0], [2, 0, 3, 1]], [[2, 1, 0, 3], [3, 1, 2, 0]], - [[1, 2, 3, 0], [1, 3, 0, 2]], [[3, 0], [2, 1]], - [[0, 1, 3, 2], [0, 2, 1, 3]], [[1, 0], [2, 1, 3]], - [[1, 0, 3, 2], [2, 3, 0, 1]], [[1, 2], [3]], - [[1, 2, 3, 0], [3, 2, 1, 0]], [[0, 3, 2, 1], [2, 1, 3, 0]], [0], - [[0, 2, 3], [3, 2, 0, 1]], [[1, 2, 3, 0], [3, 2, 1, 0]], - [[3, 1], [3]], [[3, 2, 0, 1], [3, 2, 0]], [[2, 3, 0, 1], [0, 3, 2]], - [[1], [1, 3]], [[1, 2], [2, 1, 0]], [[3, 1, 2], [3, 1, 0]], - [[1, 3], [3, 1, 2]], [[2, 0, 1, 3], [3, 1, 0, 2]], - [[1, 3, 0], [1, 3]], [[2, 3, 1], [1, 0, 2]], - [[1, 2, 0, 3], [0, 2, 1, 3]], [[2], [0, 1, 3]], [[1], [1, 2]], - [[1, 0, 2, 3], [3, 0, 1, 2]], [[0, 1, 3, 2], [1, 3, 0, 2]], - [[3, 0, 2, 1], [0, 2, 3]], [[1, 2, 0], [1, 2, 3]], - [[1, 0, 3], [2, 3, 0]], [[2, 3, 0], [3, 1, 0]], [[1, 3], [1, 0]], - [[2, 1, 0, 3], [2, 0, 3, 1]], [[3, 2, 0], [2, 1, 0]], - [[0, 1, 3], [0, 3, 1]], [[3, 1, 0], [3, 2, 1]], [[3, 2], [3, 1]], - [[3], [2, 1, 0]], [[1, 2, 3, 0], []], [[1, 3, 2, 0], [3, 1, 2]], - [[1], [0, 2]], [[3, 2, 0], [3, 2, 0]], [[3], []], - [[1, 0, 3], [2, 1]], [[3, 1, 0, 2], [2, 3, 1, 0]], - [[0, 1], [0, 3, 2]], [[0, 2, 3], [0, 2, 1]], [[1, 3, 0], [3, 0, 2]], - [[3, 1, 2], [1, 2, 3]], [[3, 1, 2], [3, 1, 0]], - [[0, 3, 1, 2], [3, 2, 1, 0]], [[0, 3], [3, 2, 1]], - [[2, 3], [1, 3, 0]], [[0, 3, 2], [2, 0, 3, 1]], [[2, 3], [1, 3]], - [[3, 1, 2, 0], [2, 3, 1, 0]], [[1, 0, 3, 2], [3, 0, 1, 2]], - [[3, 2, 1, 0], [0, 1, 3, 2]], [[3, 1, 2], [3]], - [[0, 1, 3, 2], [2, 3, 0, 1]], [[1, 2, 3, 0], [1, 3, 0, 2]], - [3, 1, 2], [[3, 1, 2], [0, 3, 2]], [[2, 3, 0], [1, 2, 0]], - [[2, 0, 3], [2, 0]], [[3, 1, 0, 2], [3, 1, 0, 2]], - [[0, 1, 2], [2, 0, 1]], [[1, 0, 3], [2, 3, 0]], - [[2, 0, 1], [0, 1, 3]], [[2, 1], [0, 1, 3]] - ] + self.all_axes = [[[3, 2], [3]], [[2, 1, 0], [2, 1]], + [[1, 2, 0], [1, 3, 2]], [3, 0], [[], [0, 3, 1]], + [[2, 1, 0, 3], [2, 0, 1, 3]], [[3, 1, 2], [1, 3, 2, + 0]], + [[2, 1], [0, 2]], [[2, 0, 1, 3], [2]], + [[1, 2, 0, 3], [0, 2, 1]], [[2, 1, 3, 0], [1, 2, 3]], + [[2, 0, 1, 3], [3, 1, 0, 2]], [[0, 3], [0, 3, 2, 1]], + [[1, 3, 2, 0], [2, 1, 0, 3]], + [[1, 3, 2, 0], [1, 3, 2, 0]], [[1, 0, 2], [0, 1]], + [[2, 3, 0], [3, 1]], [[1, 3, 2, 0], [3, 0, 1, 2]], + [[3, 2, 1], [2, 0, 1]], [[0], []], + [[2, 3, 0], [1, 2, 0]], [[3, 0, 2, 1], [2, 1, 0, 3]], + [[3, 1, 2], [2, 3, 1]], [[1, 0, 2, 3], []], + [[1, 2], [1, 2, 3]], [[2, 0, 1, 3], [2, 0, 1]], + [[3, 1, 2], [1, 3, 2]], [[3, 1, 2, 0], [1, 2, 3, 0]], + [[0, 2, 3], [0, 1, 2]], [[3, 2, 0], [2, 0, 3, 1]], + [[2, 1, 0, 3], [3, 1, 2, 0]], + [[1, 2, 3, 0], [1, 3, 0, 2]], [[3, 0], [2, 1]], + [[0, 1, 3, 2], [0, 2, 1, 3]], [[1, 0], [2, 1, 3]], + [[1, 0, 3, 2], [2, 3, 0, 1]], [[1, 2], [3]], + [[1, 2, 3, 0], [3, 2, 1, 0]], + [[0, 3, 2, 1], [2, 1, 3, 0]], [0], + [[0, 2, 3], [3, 2, 0, 1]], [[1, 2, 3, 0], [3, 2, 1, + 0]], + [[3, 1], [3]], [[3, 2, 0, 1], [3, 2, 0]], + [[2, 3, 0, 1], [0, 3, 2]], [[1], [1, 3]], + [[1, 2], [2, 1, 0]], [[3, 1, 2], [3, 1, 0]], + [[1, 3], [3, 1, 2]], [[2, 0, 1, 3], [3, 1, 0, 2]], + [[1, 3, 0], [1, 3]], [[2, 3, 1], [1, 0, 2]], + [[1, 2, 0, 3], [0, 2, 1, 3]], [[2], [0, 1, 3]], + [[1], [1, 2]], [[1, 0, 2, 3], [3, 0, 1, 2]], + [[0, 1, 3, 2], [1, 3, 0, 2]], [[3, 0, 2, 1], [0, 2, + 3]], + [[1, 2, 0], [1, 2, 3]], [[1, 0, 3], [2, 3, 0]], + [[2, 3, 0], [3, 1, 0]], [[1, 3], [1, 0]], + [[2, 1, 0, 3], [2, 0, 3, 1]], [[3, 2, 0], [2, 1, 0]], + [[0, 1, 3], [0, 3, 1]], [[3, 1, 0], [3, 2, 1]], + [[3, 2], [3, 1]], [[3], [2, 1, 0]], [[1, 2, 3, 0], []], + [[1, 3, 2, 0], [3, 1, 2]], [[1], [0, 2]], + [[3, 2, 0], [3, 2, 0]], [[3], []], [[1, 0, 3], [2, 1]], + [[3, 1, 0, 2], [2, 3, 1, 0]], [[0, 1], [0, 3, 2]], + [[0, 2, 3], [0, 2, 1]], [[1, 3, 0], [3, 0, 2]], + [[3, 1, 2], [1, 2, 3]], [[3, 1, 2], [3, 1, 0]], + [[0, 3, 1, 2], [3, 2, 1, 0]], [[0, 3], [3, 2, 1]], + [[2, 3], [1, 3, 0]], [[0, 3, 2], [2, 0, 3, 1]], + [[2, 3], [1, 3]], [[3, 1, 2, 0], [2, 3, 1, 0]], + [[1, 0, 3, 2], [3, 0, 1, 2]], + [[3, 2, 1, 0], [0, 1, 3, 2]], [[3, 1, 2], [3]], + [[0, 1, 3, 2], [2, 3, 0, 1]], + [[1, 2, 3, 0], [1, 3, 0, 2]], [3, 1, 2], + [[3, 1, 2], [0, 3, 2]], [[2, 3, 0], [1, 2, 0]], + [[2, 0, 3], [2, 0]], [[3, 1, 0, 2], [3, 1, 0, 2]], + [[0, 1, 2], [2, 0, 1]], [[1, 0, 3], [2, 3, 0]], + [[2, 0, 1], [0, 1, 3]], [[2, 1], [0, 1, 3]]] def test_dygraph(self): paddle.disable_static() @@ -151,63 +162,74 @@ class TestTensordotAPI(unittest.TestCase): for place in self.places: with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - x = paddle.static.data( - name='x', shape=self.x_shape, dtype=self.dtype) - y = paddle.static.data( - name='y', shape=self.y_shape, dtype=self.dtype) + x = paddle.static.data(name='x', + shape=self.x_shape, + dtype=self.dtype) + y = paddle.static.data(name='y', + shape=self.y_shape, + dtype=self.dtype) z = paddle.tensordot(x, y, axes) exe = paddle.static.Executor(place) - paddle_res = exe.run(feed={'x': self.x, - 'y': self.y}, + paddle_res = exe.run(feed={ + 'x': self.x, + 'y': self.y + }, fetch_list=[z]) np_res = tensordot_np(self.x, self.y, axes) np.testing.assert_allclose(paddle_res[0], np_res, rtol=1e-6) class TestTensordotAPIFloat64(TestTensordotAPI): + def set_dtype(self): self.dtype = np.float64 class TestTensordotAPIBroadcastCase1(TestTensordotAPI): + def set_input_shape(self): self.x_shape = [1, 1, 1, 5] self.y_shape = [1, 5, 1, 1] class TestTensordotAPIBroadcastCase2(TestTensordotAPI): + def set_input_shape(self): self.x_shape = [1, 5, 5, 5] self.y_shape = [1, 1, 1, 5] class TestTensordotAPIBroadcastCase3(TestTensordotAPI): + def set_input_shape(self): self.x_shape = [5, 5, 5, 1] self.y_shape = [5, 5, 1, 5] class TestTensordotAPIBroadcastCase4(TestTensordotAPI): + def set_input_shape(self): self.x_shape = [5, 5, 5, 1] self.y_shape = [1, 1, 1, 1] class TestTensordotAPIBroadcastCase5(TestTensordotAPI): + def set_input_shape(self): self.x_shape = [1, 1, 5, 5] self.y_shape = [5, 5, 1, 5] class TestTensordotAPIAxesType(TestTensordotAPI): + def set_input_shape(self): self.x_shape = [3, 4, 4] self.y_shape = [4, 4, 5] def set_test_axes(self): self.all_axes = [ - 0, 1, 2, (1, ), [1], ((1, ), ), ([1], ), ((2, 1), (0, )), ( - (1, 2), (0, 1)), ([1, 2], [0, 1]), ([1, 2], [0, 1]), + 0, 1, 2, (1, ), [1], ((1, ), ), ([1], ), ((2, 1), (0, )), + ((1, 2), (0, 1)), ([1, 2], [0, 1]), ([1, 2], [0, 1]), [[1, 2], [0, 1]] ] @@ -217,7 +239,8 @@ class TestTensordotAPIAxesType(TestTensordotAPI): tensor_axes = [ paddle.to_tensor([1]), (paddle.to_tensor([1])), (paddle.to_tensor([1, 2]), paddle.to_tensor([0, 1])), - [paddle.to_tensor([1, 2]), paddle.to_tensor([0, 1])], + [paddle.to_tensor([1, 2]), + paddle.to_tensor([0, 1])], paddle.to_tensor([[1, 2], [0, 1]]) ] @@ -241,6 +264,7 @@ class TestTensordotAPIAxesType(TestTensordotAPI): class TestTensordotAPIAxesTypeFloat64(TestTensordotAPIAxesType): + def set_dtype(self): self.dtype = np.float64 diff --git a/python/paddle/fluid/tests/unittests/test_tf32_cublas.py b/python/paddle/fluid/tests/unittests/test_tf32_cublas.py index 32d8c3dc322..ce08c8db89e 100644 --- a/python/paddle/fluid/tests/unittests/test_tf32_cublas.py +++ b/python/paddle/fluid/tests/unittests/test_tf32_cublas.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core class TestTF32Switch(unittest.TestCase): + def test_on_off(self): if core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) @@ -36,6 +37,7 @@ class TestTF32Switch(unittest.TestCase): class TestTF32OnMatmul(unittest.TestCase): + def test_dygraph_without_out(self): if core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) diff --git a/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py b/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py index 48127c2a90b..fb1687bc1b7 100644 --- a/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py +++ b/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,6 +21,7 @@ import paddle.fluid.core as core class TestTF32Switch(unittest.TestCase): + def test_on_off(self): if core.is_compiled_with_cuda(): self.assertTrue(core.get_cudnn_switch()) # default diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py index 8359141f309..c1c6820d9c1 100644 --- a/python/paddle/fluid/tests/unittests/test_tile_op.py +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -24,6 +24,7 @@ from paddle.fluid import compiler, Program, program_guard #Situation 1: repeat_times is a list (without tensor) class TestTileOpRank1(OpTest): + def setUp(self): self.op_type = "tile" self.init_data() @@ -46,36 +47,42 @@ class TestTileOpRank1(OpTest): # with dimension expanding class TestTileOpRank2Expanding(TestTileOpRank1): + def init_data(self): self.ori_shape = [120] self.repeat_times = [2, 2] class TestTileOpRank2(TestTileOpRank1): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] class TestTileOpRank3_Corner(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.repeat_times = (1, 1, 1) class TestTileOpRank3_Corner2(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.repeat_times = (2, 2) class TestTileOpRank3(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 4, 15) self.repeat_times = (2, 1, 4) class TestTileOpRank4(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.repeat_times = (3, 2, 1, 2) @@ -83,6 +90,7 @@ class TestTileOpRank4(TestTileOpRank1): # Situation 2: repeat_times is a list (with tensor) class TestTileOpRank1_tensor_attr(OpTest): + def setUp(self): self.op_type = "tile" self.init_data() @@ -112,6 +120,7 @@ class TestTileOpRank1_tensor_attr(OpTest): class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [1, 1] @@ -119,6 +128,7 @@ class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] @@ -127,6 +137,7 @@ class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): # Situation 3: repeat_times is a tensor class TestTileOpRank1_tensor(OpTest): + def setUp(self): self.op_type = "tile" self.init_data() @@ -151,6 +162,7 @@ class TestTileOpRank1_tensor(OpTest): class TestTileOpRank2_tensor(TestTileOpRank1_tensor): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] @@ -158,11 +170,11 @@ class TestTileOpRank2_tensor(TestTileOpRank1_tensor): # Situation 4: input x is Integer class TestTileOpInteger(OpTest): + def setUp(self): self.op_type = "tile" self.inputs = { - 'X': np.random.randint( - 10, size=(4, 4, 5)).astype("int32") + 'X': np.random.randint(10, size=(4, 4, 5)).astype("int32") } self.attrs = {'repeat_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) @@ -174,6 +186,7 @@ class TestTileOpInteger(OpTest): # Situation 5: input x is Bool class TestTileOpBoolean(OpTest): + def setUp(self): self.op_type = "tile" self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} @@ -187,11 +200,11 @@ class TestTileOpBoolean(OpTest): # Situation 56: input x is Integer class TestTileOpInt64_t(OpTest): + def setUp(self): self.op_type = "tile" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 5)).astype("int64") + 'X': np.random.randint(10, size=(2, 4, 5)).astype("int64") } self.attrs = {'repeat_times': [2, 1, 4]} output = np.tile(self.inputs['X'], (2, 1, 4)) @@ -202,10 +215,11 @@ class TestTileOpInt64_t(OpTest): class TestTileError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) repeat_times = [2, 2] self.assertRaises(TypeError, paddle.tile, x1, repeat_times) x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") @@ -216,6 +230,7 @@ class TestTileError(unittest.TestCase): class TestTileAPIStatic(unittest.TestCase): + def test_api(self): with program_guard(Program(), Program()): repeat_times = [2, 2] @@ -227,6 +242,7 @@ class TestTileAPIStatic(unittest.TestCase): # Test python API class TestTileAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): np_x = np.random.random([12, 14]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_top_k_op.py b/python/paddle/fluid/tests/unittests/test_top_k_op.py index 83a940d064e..4b67c9fd11a 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_op.py @@ -22,6 +22,7 @@ import paddle class TestTopkOp(OpTest): + def setUp(self): self.variable_k = False self.set_args() diff --git a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py index c4f50414f95..4e2aecaca13 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py @@ -39,6 +39,7 @@ def numpy_topk(x, k=1, axis=-1, largest=True): class TestTopkOp(OpTest): + def init_args(self): self.k = 3 self.axis = 1 @@ -52,8 +53,10 @@ class TestTopkOp(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} def test_check_output(self): @@ -64,6 +67,7 @@ class TestTopkOp(OpTest): class TestTopkOp1(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 0 @@ -71,6 +75,7 @@ class TestTopkOp1(TestTopkOp): class TestTopkOp2(TestTopkOp): + def init_args(self): self.k = 4 self.axis = 0 @@ -78,6 +83,7 @@ class TestTopkOp2(TestTopkOp): class TestTopkOp3(OpTest): + def init_args(self): self.k = 6 self.axis = 1 @@ -91,12 +97,15 @@ class TestTopkOp3(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopkOp4(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -110,12 +119,15 @@ class TestTopkOp4(TestTopkOp): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopkOp5(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -129,12 +141,15 @@ class TestTopkOp5(TestTopkOp): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopkOp6(OpTest): + def init_args(self): self.k = 100 self.axis = 1 @@ -148,12 +163,15 @@ class TestTopkOp6(OpTest): self.init_args() self.inputs = {'X': self.input_data} self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} class TestTopKAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.input_data = np.random.rand(6, 7, 8) @@ -187,35 +205,44 @@ class TestTopKAPI(unittest.TestCase): np.allclose(paddle_result[1].numpy(), numpy_result[1])) # test case for basic test case 4 with tensor largest k_tensor = paddle.to_tensor(np.array([2])) - paddle_result = paddle.topk( - input_tensor, k=2, axis=1, largest=False) - numpy_result = numpy_topk( - self.input_data, k=2, axis=1, largest=False) + paddle_result = paddle.topk(input_tensor, + k=2, + axis=1, + largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=1, + largest=False) self.assertTrue( np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue( np.allclose(paddle_result[1].numpy(), numpy_result[1])) # test case for basic test case 5 with axis -1 k_tensor = paddle.to_tensor(np.array([2])) - paddle_result = paddle.topk( - input_tensor, k=2, axis=-1, largest=False) - numpy_result = numpy_topk( - self.input_data, k=2, axis=-1, largest=False) + paddle_result = paddle.topk(input_tensor, + k=2, + axis=-1, + largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=-1, + largest=False) self.assertTrue( np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue( np.allclose(paddle_result[1].numpy(), numpy_result[1])) - # test case for basic test case 6 for the partial sort + # test case for basic test case 6 for the partial sort paddle_result = paddle.topk(large_input_tensor, k=1, axis=-1) numpy_result = numpy_topk(self.large_input_data, k=1, axis=-1) self.assertTrue( np.allclose(paddle_result[0].numpy(), numpy_result[0])) self.assertTrue( np.allclose(paddle_result[1].numpy(), numpy_result[1])) - # test case for basic test case 7 for the unsorted + # test case for basic test case 7 for the unsorted paddle_result = paddle.topk(input_tensor, k=2, axis=1, sorted=False) - sort_paddle = numpy_topk( - np.array(paddle_result[0].numpy()), axis=1, k=2) + sort_paddle = numpy_topk(np.array(paddle_result[0].numpy()), + axis=1, + k=2) numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(sort_paddle[0], numpy_result[0])) @@ -223,10 +250,12 @@ class TestTopKAPI(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - input_tensor = paddle.static.data( - name="x", shape=[6, 7, 8], dtype="float64") - large_input_tensor = paddle.static.data( - name="large_x", shape=[2, 1030], dtype="float64") + input_tensor = paddle.static.data(name="x", + shape=[6, 7, 8], + dtype="float64") + large_input_tensor = paddle.static.data(name="large_x", + shape=[2, 1030], + dtype="float64") k_tensor = paddle.static.data(name="k", shape=[1], dtype="int32") result1 = paddle.topk(input_tensor, k=2) result2 = paddle.topk(input_tensor, k=2, axis=-1) @@ -240,17 +269,18 @@ class TestTopKAPI(unittest.TestCase): exe = paddle.static.Executor(place) input_data = np.random.rand(10, 20).astype("float64") large_input_data = np.random.rand(2, 100).astype("float64") - paddle_result = exe.run( - feed={ - "x": self.input_data, - "large_x": self.large_input_data, - "k": np.array([2]).astype("int32") - }, - fetch_list=[ - result1[0], result1[1], result2[0], result2[1], result3[0], - result3[1], result4[0], result4[1], result5[0], result5[1], - result6[0], result6[1], result7[0], result7[1] - ]) + paddle_result = exe.run(feed={ + "x": self.input_data, + "large_x": self.large_input_data, + "k": np.array([2]).astype("int32") + }, + fetch_list=[ + result1[0], result1[1], result2[0], + result2[1], result3[0], result3[1], + result4[0], result4[1], result5[0], + result5[1], result6[0], result6[1], + result7[0], result7[1] + ]) numpy_result = numpy_topk(self.input_data, k=2) self.assertTrue(np.allclose(paddle_result[0], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1], numpy_result[1])) @@ -260,12 +290,16 @@ class TestTopKAPI(unittest.TestCase): numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(paddle_result[4], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[5], numpy_result[1])) - numpy_result = numpy_topk( - self.input_data, k=2, axis=1, largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=1, + largest=False) self.assertTrue(np.allclose(paddle_result[6], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[7], numpy_result[1])) - numpy_result = numpy_topk( - self.input_data, k=2, axis=-1, largest=False) + numpy_result = numpy_topk(self.input_data, + k=2, + axis=-1, + largest=False) self.assertTrue(np.allclose(paddle_result[8], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[9], numpy_result[1])) numpy_result = numpy_topk(self.large_input_data, k=1, axis=-1) diff --git a/python/paddle/fluid/tests/unittests/test_trace_op.py b/python/paddle/fluid/tests/unittests/test_trace_op.py index 3320b240e56..bb6bbcf4e9c 100644 --- a/python/paddle/fluid/tests/unittests/test_trace_op.py +++ b/python/paddle/fluid/tests/unittests/test_trace_op.py @@ -25,6 +25,7 @@ import paddle class TestTraceOp(OpTest): + def setUp(self): self.op_type = "trace" self.init_config() @@ -44,30 +45,31 @@ class TestTraceOp(OpTest): class TestTraceOpCase1(TestTraceOp): + def init_config(self): self.case = np.random.randn(2, 20, 2, 3).astype('float32') self.inputs = {'Input': self.case} self.attrs = {'offset': 1, 'axis1': 0, 'axis2': 2} - self.target = np.trace( - self.inputs['Input'], - offset=self.attrs['offset'], - axis1=self.attrs['axis1'], - axis2=self.attrs['axis2']) + self.target = np.trace(self.inputs['Input'], + offset=self.attrs['offset'], + axis1=self.attrs['axis1'], + axis2=self.attrs['axis2']) class TestTraceOpCase2(TestTraceOp): + def init_config(self): self.case = np.random.randn(2, 20, 2, 3).astype('float32') self.inputs = {'Input': self.case} self.attrs = {'offset': -5, 'axis1': 1, 'axis2': -1} - self.target = np.trace( - self.inputs['Input'], - offset=self.attrs['offset'], - axis1=self.attrs['axis1'], - axis2=self.attrs['axis2']) + self.target = np.trace(self.inputs['Input'], + offset=self.attrs['offset'], + axis1=self.attrs['axis1'], + axis2=self.attrs['axis2']) class TestTraceAPICase(unittest.TestCase): + def test_case1(self): case = np.random.randn(2, 20, 2, 3).astype('float32') data1 = fluid.data(name='data1', shape=[2, 20, 2, 3], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py b/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py index 5703ce13131..a2ccfa925ed 100644 --- a/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py +++ b/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py @@ -22,6 +22,7 @@ import os class SimpleFCLayer(nn.Layer): + def __init__(self, feature_size, batch_size, fc_size): super(SimpleFCLayer, self).__init__() self._linear = nn.Linear(feature_size, fc_size) @@ -34,6 +35,7 @@ class SimpleFCLayer(nn.Layer): class LinearNetWithNone(nn.Layer): + def __init__(self, feature_size, fc_size): super(LinearNetWithNone, self).__init__() self._linear = nn.Linear(feature_size, fc_size) @@ -45,6 +47,7 @@ class LinearNetWithNone(nn.Layer): class TestTracedLayerErrMsg(unittest.TestCase): + def setUp(self): self.batch_size = 4 self.feature_size = 3 @@ -57,27 +60,27 @@ class TestTracedLayerErrMsg(unittest.TestCase): return with fluid.dygraph.guard(): in_x = fluid.dygraph.to_variable( - np.random.random((self.batch_size, self.feature_size)).astype( - 'float32')) + np.random.random( + (self.batch_size, self.feature_size)).astype('float32')) with self.assertRaises(AssertionError) as e: dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( None, [in_x]) self.assertEqual( - "The type of 'layer' in fluid.dygraph.jit.TracedLayer.trace must be fluid.dygraph.Layer, but received <{} 'NoneType'>.". - format(self.type_str), str(e.exception)) + "The type of 'layer' in fluid.dygraph.jit.TracedLayer.trace must be fluid.dygraph.Layer, but received <{} 'NoneType'>." + .format(self.type_str), str(e.exception)) with self.assertRaises(TypeError) as e: dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( self.layer, 3) self.assertEqual( - "The type of 'each element of inputs' in fluid.dygraph.jit.TracedLayer.trace must be fluid.Variable, but received <{} 'int'>.". - format(self.type_str), str(e.exception)) + "The type of 'each element of inputs' in fluid.dygraph.jit.TracedLayer.trace must be fluid.Variable, but received <{} 'int'>." + .format(self.type_str), str(e.exception)) with self.assertRaises(TypeError) as e: dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( self.layer, [True, 1]) self.assertEqual( - "The type of 'each element of inputs' in fluid.dygraph.jit.TracedLayer.trace must be fluid.Variable, but received <{} 'bool'>.". - format(self.type_str), str(e.exception)) + "The type of 'each element of inputs' in fluid.dygraph.jit.TracedLayer.trace must be fluid.Variable, but received <{} 'bool'>." + .format(self.type_str), str(e.exception)) dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( self.layer, [in_x]) @@ -87,22 +90,22 @@ class TestTracedLayerErrMsg(unittest.TestCase): return with fluid.dygraph.guard(): in_x = fluid.dygraph.to_variable( - np.random.random((self.batch_size, self.feature_size)).astype( - 'float32')) + np.random.random( + (self.batch_size, self.feature_size)).astype('float32')) dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( self.layer, [in_x]) with self.assertRaises(AssertionError) as e: traced_layer.set_strategy(1, fluid.ExecutionStrategy()) self.assertEqual( - "The type of 'build_strategy' in fluid.dygraph.jit.TracedLayer.set_strategy must be fluid.BuildStrategy, but received <{} 'int'>.". - format(self.type_str), str(e.exception)) + "The type of 'build_strategy' in fluid.dygraph.jit.TracedLayer.set_strategy must be fluid.BuildStrategy, but received <{} 'int'>." + .format(self.type_str), str(e.exception)) with self.assertRaises(AssertionError) as e: traced_layer.set_strategy(fluid.BuildStrategy(), False) self.assertEqual( - "The type of 'exec_strategy' in fluid.dygraph.jit.TracedLayer.set_strategy must be fluid.ExecutionStrategy, but received <{} 'bool'>.". - format(self.type_str), str(e.exception)) + "The type of 'exec_strategy' in fluid.dygraph.jit.TracedLayer.set_strategy must be fluid.ExecutionStrategy, but received <{} 'bool'>." + .format(self.type_str), str(e.exception)) traced_layer.set_strategy(build_strategy=fluid.BuildStrategy()) traced_layer.set_strategy(exec_strategy=fluid.ExecutionStrategy()) @@ -114,8 +117,8 @@ class TestTracedLayerErrMsg(unittest.TestCase): return with fluid.dygraph.guard(): in_x = fluid.dygraph.to_variable( - np.random.random((self.batch_size, self.feature_size)).astype( - 'float32')) + np.random.random( + (self.batch_size, self.feature_size)).astype('float32')) dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( self.layer, [in_x]) @@ -123,29 +126,29 @@ class TestTracedLayerErrMsg(unittest.TestCase): with self.assertRaises(TypeError) as e: traced_layer.save_inference_model([0]) self.assertEqual( - "The type of 'path' in fluid.dygraph.jit.TracedLayer.save_inference_model must be <{} 'str'>, but received <{} 'list'>. ". - format(self.type_str, self.type_str), str(e.exception)) + "The type of 'path' in fluid.dygraph.jit.TracedLayer.save_inference_model must be <{} 'str'>, but received <{} 'list'>. " + .format(self.type_str, self.type_str), str(e.exception)) with self.assertRaises(TypeError) as e: traced_layer.save_inference_model(path, [0], [None]) self.assertEqual( - "The type of 'each element of fetch' in fluid.dygraph.jit.TracedLayer.save_inference_model must be <{} 'int'>, but received <{} 'NoneType'>. ". - format(self.type_str, self.type_str), str(e.exception)) + "The type of 'each element of fetch' in fluid.dygraph.jit.TracedLayer.save_inference_model must be <{} 'int'>, but received <{} 'NoneType'>. " + .format(self.type_str, self.type_str), str(e.exception)) with self.assertRaises(TypeError) as e: traced_layer.save_inference_model(path, [0], False) self.assertEqual( - "The type of 'fetch' in fluid.dygraph.jit.TracedLayer.save_inference_model must be (<{} 'NoneType'>, <{} 'list'>), but received <{} 'bool'>. ". - format(self.type_str, self.type_str, self.type_str), + "The type of 'fetch' in fluid.dygraph.jit.TracedLayer.save_inference_model must be (<{} 'NoneType'>, <{} 'list'>), but received <{} 'bool'>. " + .format(self.type_str, self.type_str, self.type_str), str(e.exception)) with self.assertRaises(TypeError) as e: traced_layer.save_inference_model(path, [None], [0]) self.assertEqual( - "The type of 'each element of feed' in fluid.dygraph.jit.TracedLayer.save_inference_model must be <{} 'int'>, but received <{} 'NoneType'>. ". - format(self.type_str, self.type_str), str(e.exception)) + "The type of 'each element of feed' in fluid.dygraph.jit.TracedLayer.save_inference_model must be <{} 'int'>, but received <{} 'NoneType'>. " + .format(self.type_str, self.type_str), str(e.exception)) with self.assertRaises(TypeError) as e: traced_layer.save_inference_model(path, True, [0]) self.assertEqual( - "The type of 'feed' in fluid.dygraph.jit.TracedLayer.save_inference_model must be (<{} 'NoneType'>, <{} 'list'>), but received <{} 'bool'>. ". - format(self.type_str, self.type_str, self.type_str), + "The type of 'feed' in fluid.dygraph.jit.TracedLayer.save_inference_model must be (<{} 'NoneType'>, <{} 'list'>), but received <{} 'bool'>. " + .format(self.type_str, self.type_str, self.type_str), str(e.exception)) with self.assertRaises(ValueError) as e: traced_layer.save_inference_model("") @@ -165,8 +168,8 @@ class TestTracedLayerErrMsg(unittest.TestCase): for i in range(5): in_x = fluid.dygraph.to_variable( - np.random.random((self.batch_size, self.feature_size)) - .astype('float32')) + np.random.random( + (self.batch_size, self.feature_size)).astype('float32')) dygraph_out = layer(in_x) loss = fluid.layers.reduce_mean(dygraph_out) loss.backward() @@ -175,14 +178,15 @@ class TestTracedLayerErrMsg(unittest.TestCase): class TestOutVarWithNoneErrMsg(unittest.TestCase): + def test_linear_net_with_none(self): if fluid.framework.in_dygraph_mode(): return model = LinearNetWithNone(100, 16) in_x = paddle.to_tensor(np.random.random((4, 100)).astype('float32')) with self.assertRaises(TypeError): - dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace(model, - [in_x]) + dygraph_out, traced_layer = fluid.dygraph.TracedLayer.trace( + model, [in_x]) class TestTracedLayerSaveInferenceModel(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_trainable.py b/python/paddle/fluid/tests/unittests/test_trainable.py index 35ae9d9b47c..72edff9f29b 100644 --- a/python/paddle/fluid/tests/unittests/test_trainable.py +++ b/python/paddle/fluid/tests/unittests/test_trainable.py @@ -32,6 +32,7 @@ def test_trainable(): class TestTrainable(unittest.TestCase): + def check_trainable(self, model, feed_dict, @@ -64,18 +65,21 @@ class TestTrainable(unittest.TestCase): feed_dict = {'image': img, 'label': label} # Note that, because the Weight of FC is not trainable and the x is stop_gradient, # so the 'mul_grad' should not be appended. + self.check_trainable(test_trainable, + feed_dict, + op_count={ + 'adam': 1, + 'scale': 0, + 'mul_grad': 0 + }) self.check_trainable( test_trainable, feed_dict, - op_count={'adam': 1, - 'scale': 0, - 'mul_grad': 0}) - self.check_trainable( - test_trainable, - feed_dict, - op_count={'adamax': 1, - 'scale': 1, - 'mul_grad': 0}, + op_count={ + 'adamax': 1, + 'scale': 1, + 'mul_grad': 0 + }, optimizer=fluid.optimizer.Adamax(learning_rate=0.2)) diff --git a/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py b/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py index 637a6c14468..0d213994e0b 100644 --- a/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py +++ b/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,6 +24,7 @@ from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16 class TestTransferDtypeOpFp32ToFp64(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float32')} @@ -39,6 +40,7 @@ class TestTransferDtypeOpFp32ToFp64(OpTest): class TestTransferDtypeOpFp16ToFp32(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float16')} @@ -54,6 +56,7 @@ class TestTransferDtypeOpFp16ToFp32(OpTest): class TestTransferDtypeOpFp32ToFp16(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) self.inputs = {'X': ipt.astype('float32')} @@ -69,6 +72,7 @@ class TestTransferDtypeOpFp32ToFp16(OpTest): class TestTransferDtypeOpBf16ToFp32(OpTest): + def setUp(self): ipt = np.array(np.random.randint(10, size=[10, 10])).astype('uint16') self.inputs = {'X': ipt} @@ -84,6 +88,7 @@ class TestTransferDtypeOpBf16ToFp32(OpTest): class TestTransferDtypeFp32ToBf16(OpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]).astype('float32') self.inputs = {'X': ipt} diff --git a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py index 94644cf2fec..e7a373e4c24 100644 --- a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py +++ b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ from op_test import OpTest # default kNCHW class TestTransferLayoutOpkNCHWTokNHWC(OpTest): + def setUp(self): ipt = np.random.random(size=[2, 3, 10, 10]) self.inputs = {'X': ipt.astype('float32')} diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py index 587cedc6aad..6b254ac3115 100644 --- a/python/paddle/fluid/tests/unittests/test_transformer_api.py +++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py @@ -59,8 +59,8 @@ def generate_query_key_value_cache(self_attention, cache=None): query = np.random.rand(batch_size, query_length, embed_dim).astype("float32") - attn_mask = np.ones( - (batch_size, num_heads, query_length, key_length), dtype=attn_mask_type) + attn_mask = np.ones((batch_size, num_heads, query_length, key_length), + dtype=attn_mask_type) if attn_mask_type == 'int64': attn_mask = np.tril(attn_mask) elif attn_mask_type == 'float64': @@ -77,15 +77,18 @@ def generate_query_key_value_cache(self_attention, cache_dict = {} if cache: if not self_attention: - cache_dict["static_k"] = np.random.rand( - batch_size, num_heads, key_length, head_dim).astype("float32") - cache_dict["static_v"] = np.random.rand( - batch_size, num_heads, value_length, head_dim).astype("float32") + cache_dict["static_k"] = np.random.rand(batch_size, num_heads, + key_length, + head_dim).astype("float32") + cache_dict["static_v"] = np.random.rand(batch_size, num_heads, + value_length, + head_dim).astype("float32") else: cache_dict["k"] = np.random.rand(batch_size, num_heads, key_length, head_dim).astype("float32") - cache_dict["v"] = np.random.rand( - batch_size, num_heads, value_length, head_dim).astype("float32") + cache_dict["v"] = np.random.rand(batch_size, num_heads, + value_length, + head_dim).astype("float32") else: cache_dict = None return query, key, value, attn_mask, cache_dict @@ -110,8 +113,8 @@ def softmax(x): def batch_matmul(x, y): assert x.shape[0] == y.shape[0] assert x.shape[1] == y.shape[1] - retval = np.zeros( - (x.shape[0], x.shape[1], x.shape[2], y.shape[3]), dtype=np.float64) + retval = np.zeros((x.shape[0], x.shape[1], x.shape[2], y.shape[3]), + dtype=np.float64) for i in range(x.shape[0]): for j in range(x.shape[1]): retval[i, j, :, :] = np.matmul(x[i, j, :, :], y[i, j, :, :]) @@ -220,7 +223,9 @@ def ffn(src, encoder_layer, ffn_fc1_act="relu"): class TestTransformer(unittest.TestCase): + def test_multi_head_attention(self): + def multihead_attention_test_helper(self_attention, cache): paddle.seed(2020) paddle.framework.random._manual_program_seed(2020) @@ -236,8 +241,8 @@ class TestTransformer(unittest.TestCase): embed_dim, attn_mask_type, key_length, value_length, kdim, vdim, cache) if cache and self_attention: - attn_mask = np.concatenate( - (attn_mask, attn_mask), axis=3) + attn_mask = np.concatenate((attn_mask, attn_mask), + axis=3) need_weight, param_attr, bias_attr = False, None, None # call paddle's function multi_head_attn = MultiHeadAttention( @@ -256,15 +261,14 @@ class TestTransformer(unittest.TestCase): paddle.to_tensor(cache_dict['static_v'])) if attn_mask is not None: attn_output = multi_head_attn( - paddle.to_tensor(query), - paddle.to_tensor(key), + paddle.to_tensor(query), paddle.to_tensor(key), paddle.to_tensor(value), paddle.to_tensor(attn_mask), cache_obj) else: - attn_output = multi_head_attn( - paddle.to_tensor(query), - paddle.to_tensor(key), - paddle.to_tensor(value), attn_mask, cache_obj) + attn_output = multi_head_attn(paddle.to_tensor(query), + paddle.to_tensor(key), + paddle.to_tensor(value), + attn_mask, cache_obj) attn_output = attn_output[0] if cache_dict else attn_output # implementation by numpy @@ -279,8 +283,9 @@ class TestTransformer(unittest.TestCase): out_proj_weight = multi_head_attn.out_proj.weight.numpy() reference = fc(attn_heads, out_proj_weight) - np.testing.assert_allclose( - attn_output.numpy(), reference, atol=1e-6) + np.testing.assert_allclose(attn_output.numpy(), + reference, + atol=1e-6) multihead_attention_test_helper(True, True) multihead_attention_test_helper(True, False) @@ -306,21 +311,23 @@ class TestTransformer(unittest.TestCase): src_mask[0][0][0][0] = -np.inf # paddle - encoder_layer = TransformerEncoderLayer( - d_model, n_head, dim_feedforward, dropout, ffn_fc1_act, - attn_dropout, act_dropout) + encoder_layer = TransformerEncoderLayer(d_model, n_head, + dim_feedforward, dropout, + ffn_fc1_act, attn_dropout, + act_dropout) encoder_output = encoder_layer( paddle.to_tensor(src), paddle.to_tensor(src_mask)) # paddle.to_tensor(src_mask)) # 4.numpy: # paddle self attention - self_attn = MultiHeadAttention( - d_model, n_head, dropout=attn_dropout) - attn_output = self_attn( - paddle.to_tensor(src), - paddle.to_tensor(src), - paddle.to_tensor(src), paddle.to_tensor(src_mask)).numpy() + self_attn = MultiHeadAttention(d_model, + n_head, + dropout=attn_dropout) + attn_output = self_attn(paddle.to_tensor(src), + paddle.to_tensor(src), + paddle.to_tensor(src), + paddle.to_tensor(src_mask)).numpy() src = attn_output + residual src_norm = layer_norm(src, d_model, encoder_layer.norm1) @@ -330,8 +337,10 @@ class TestTransformer(unittest.TestCase): src = residual + ffn_output src = layer_norm(src, d_model, encoder_layer.norm2) - np.testing.assert_allclose( - encoder_output.numpy(), src, rtol=1e-5, atol=1e-6) + np.testing.assert_allclose(encoder_output.numpy(), + src, + rtol=1e-5, + atol=1e-6) def test_transformer_encoder_layer_attr_1(self): with fluid.dygraph.guard(fluid.CPUPlace()): @@ -351,29 +360,31 @@ class TestTransformer(unittest.TestCase): for cache in [True, False]: # paddle - encoder_layer = TransformerEncoderLayer( - d_model, n_head, dim_feedforward, dropout, ffn_fc1_act, - attn_dropout, act_dropout) + encoder_layer = TransformerEncoderLayer(d_model, n_head, + dim_feedforward, + dropout, ffn_fc1_act, + attn_dropout, + act_dropout) cache_objs = None if cache: cache_objs = encoder_layer.gen_cache(paddle.to_tensor(src)) - encoder_output = encoder_layer( - paddle.to_tensor(src), - paddle.to_tensor(src_mask), cache_objs) + encoder_output = encoder_layer(paddle.to_tensor(src), + paddle.to_tensor(src_mask), + cache_objs) encoder_output = encoder_output[0].numpy( ) if cache else encoder_output.numpy() # 4.numpy: residual = src # paddle self attention - self_attn = MultiHeadAttention( - d_model, n_head, dropout=attn_dropout) - attn_output = self_attn( - paddle.to_tensor(src), - paddle.to_tensor(src), - paddle.to_tensor(src), - paddle.to_tensor(src_mask), cache_objs) + self_attn = MultiHeadAttention(d_model, + n_head, + dropout=attn_dropout) + attn_output = self_attn(paddle.to_tensor(src), + paddle.to_tensor(src), + paddle.to_tensor(src), + paddle.to_tensor(src_mask), cache_objs) attn_output = attn_output[0].numpy( ) if cache else attn_output.numpy() @@ -385,8 +396,10 @@ class TestTransformer(unittest.TestCase): src = residual + ffn_output src = layer_norm(src, d_model, encoder_layer.norm2) - np.testing.assert_allclose( - encoder_output, src, rtol=1e-5, atol=1e-6) + np.testing.assert_allclose(encoder_output, + src, + rtol=1e-5, + atol=1e-6) def test_transformer_decoder_layer(self): with fluid.dygraph.guard(fluid.CPUPlace()): @@ -406,10 +419,12 @@ class TestTransformer(unittest.TestCase): source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 for cache in [True, False]: - self_attn = MultiHeadAttention( - d_model, n_head, dropout=attn_dropout) - cross_attn = MultiHeadAttention( - d_model, n_head, dropout=attn_dropout) + self_attn = MultiHeadAttention(d_model, + n_head, + dropout=attn_dropout) + cross_attn = MultiHeadAttention(d_model, + n_head, + dropout=attn_dropout) # paddle decoderlayer: decoder_layer = TransformerDecoderLayer( @@ -420,11 +435,11 @@ class TestTransformer(unittest.TestCase): cache_objs = decoder_layer.gen_cache( paddle.to_tensor(memory)) - decoder_output = decoder_layer( - paddle.to_tensor(tgt), - paddle.to_tensor(memory), - paddle.to_tensor(tgt_mask), - paddle.to_tensor(memory_mask), cache_objs) + decoder_output = decoder_layer(paddle.to_tensor(tgt), + paddle.to_tensor(memory), + paddle.to_tensor(tgt_mask), + paddle.to_tensor(memory_mask), + cache_objs) decoder_output = decoder_output[0].numpy( ) if cache else decoder_output.numpy() @@ -434,11 +449,9 @@ class TestTransformer(unittest.TestCase): # self-attn self_attn_cache = cache_objs[ 0] if cache_objs is not None else None - tgt = self_attn( - paddle.to_tensor(tgt), - paddle.to_tensor(tgt), - paddle.to_tensor(tgt), - paddle.to_tensor(tgt_mask), self_attn_cache) + tgt = self_attn(paddle.to_tensor(tgt), paddle.to_tensor(tgt), + paddle.to_tensor(tgt), + paddle.to_tensor(tgt_mask), self_attn_cache) tgt = tgt[0].numpy() if cache else tgt.numpy() @@ -449,11 +462,11 @@ class TestTransformer(unittest.TestCase): # cross-attn cross_attn_cache = cache_objs[ 1] if cache_objs is not None else None - tgt = cross_attn( - paddle.to_tensor(tgt_norm), - paddle.to_tensor(memory), - paddle.to_tensor(memory), - paddle.to_tensor(memory_mask), cross_attn_cache) + tgt = cross_attn(paddle.to_tensor(tgt_norm), + paddle.to_tensor(memory), + paddle.to_tensor(memory), + paddle.to_tensor(memory_mask), + cross_attn_cache) tgt = tgt[0].numpy() if cache else tgt.numpy() # postprocess @@ -466,8 +479,10 @@ class TestTransformer(unittest.TestCase): tgt = residual + ffn_output tgt_norm = layer_norm(tgt, d_model, decoder_layer.norm3) - np.testing.assert_allclose( - decoder_output, tgt_norm, rtol=1e-5, atol=1e-6) + np.testing.assert_allclose(decoder_output, + tgt_norm, + rtol=1e-5, + atol=1e-6) def test_encoder(self): batch_size, d_model, n_head, dim_feedforward, dropout, attn_dropout, act_dropout, sequence_length = generate_basic_params( @@ -485,8 +500,8 @@ class TestTransformer(unittest.TestCase): num_layers = 6 encoder = TransformerEncoder(encoder_layer, num_layers) # src, src_mask - enc_output = encoder( - paddle.to_tensor(src), paddle.to_tensor(src_mask)) + enc_output = encoder(paddle.to_tensor(src), + paddle.to_tensor(src_mask)) def test_encoder_attr_1(self): batch_size, d_model, n_head, dim_feedforward, dropout, attn_dropout, act_dropout, sequence_length = generate_basic_params( @@ -501,8 +516,9 @@ class TestTransformer(unittest.TestCase): with fluid.dygraph.guard(fluid.CPUPlace()): for cache in [True, False]: # paddle - encoder_layer = TransformerEncoderLayer( - d_model, n_head, dim_feedforward, dropout) + encoder_layer = TransformerEncoderLayer(d_model, n_head, + dim_feedforward, + dropout) num_layers = 6 encoder = TransformerEncoder(encoder_layer, num_layers) cache_objs = None @@ -510,9 +526,8 @@ class TestTransformer(unittest.TestCase): cache_objs = encoder.gen_cache(paddle.to_tensor(src)) # src, src_mask - enc_output = encoder( - paddle.to_tensor(src), - paddle.to_tensor(src_mask), cache_objs) + enc_output = encoder(paddle.to_tensor(src), + paddle.to_tensor(src_mask), cache_objs) def test_decoder(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( @@ -533,10 +548,9 @@ class TestTransformer(unittest.TestCase): num_layers = 6 decoder = TransformerDecoder(decoder_layer, num_layers) - output = decoder( - paddle.to_tensor(tgt), - paddle.to_tensor(memory), - paddle.to_tensor(tgt_mask), paddle.to_tensor(memory_mask)) + output = decoder(paddle.to_tensor(tgt), paddle.to_tensor(memory), + paddle.to_tensor(tgt_mask), + paddle.to_tensor(memory_mask)) def test_transformer(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( @@ -544,17 +558,16 @@ class TestTransformer(unittest.TestCase): # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 with fluid.dygraph.guard(fluid.CPUPlace()): - transformer = Transformer( - d_model, - n_head, - dim_feedforward=dim_feedforward, - dropout=dropout) + transformer = Transformer(d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout) src = paddle.to_tensor( - np.random.rand(batch_size, source_length, d_model).astype( - "float32")) + np.random.rand(batch_size, source_length, + d_model).astype("float32")) tgt = paddle.to_tensor( - np.random.rand(batch_size, target_length, d_model).astype( - "float32")) + np.random.rand(batch_size, target_length, + d_model).astype("float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf @@ -576,19 +589,18 @@ class TestTransformer(unittest.TestCase): # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 with fluid.dygraph.guard(fluid.CPUPlace()): - transformer = Transformer( - d_model, - n_head, - dim_feedforward=dim_feedforward, - dropout=dropout, - weight_attr=[None], - bias_attr=[False]) + transformer = Transformer(d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + weight_attr=[None], + bias_attr=[False]) src = paddle.to_tensor( - np.random.rand(batch_size, source_length, d_model).astype( - "float32")) + np.random.rand(batch_size, source_length, + d_model).astype("float32")) tgt = paddle.to_tensor( - np.random.rand(batch_size, target_length, d_model).astype( - "float32")) + np.random.rand(batch_size, target_length, + d_model).astype("float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf @@ -610,19 +622,18 @@ class TestTransformer(unittest.TestCase): # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 with fluid.dygraph.guard(fluid.CPUPlace()): - transformer = Transformer( - d_model, - n_head, - dim_feedforward=dim_feedforward, - dropout=dropout, - weight_attr=[None, None], - bias_attr=[False, False]) + transformer = Transformer(d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + weight_attr=[None, None], + bias_attr=[False, False]) src = paddle.to_tensor( - np.random.rand(batch_size, source_length, d_model).astype( - "float32")) + np.random.rand(batch_size, source_length, + d_model).astype("float32")) tgt = paddle.to_tensor( - np.random.rand(batch_size, target_length, d_model).astype( - "float32")) + np.random.rand(batch_size, target_length, + d_model).astype("float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf @@ -644,19 +655,18 @@ class TestTransformer(unittest.TestCase): # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 with fluid.dygraph.guard(fluid.CPUPlace()): - transformer = Transformer( - d_model, - n_head, - dim_feedforward=dim_feedforward, - dropout=dropout, - weight_attr=[None, None, None], - bias_attr=[False, False, True]) + transformer = Transformer(d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + weight_attr=[None, None, None], + bias_attr=[False, False, True]) src = paddle.to_tensor( - np.random.rand(batch_size, source_length, d_model).astype( - "float32")) + np.random.rand(batch_size, source_length, + d_model).astype("float32")) tgt = paddle.to_tensor( - np.random.rand(batch_size, target_length, d_model).astype( - "float32")) + np.random.rand(batch_size, target_length, + d_model).astype("float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf @@ -678,18 +688,17 @@ class TestTransformer(unittest.TestCase): # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 with fluid.dygraph.guard(fluid.CPUPlace()): - transformer = Transformer( - d_model, - n_head, - dim_feedforward=dim_feedforward, - dropout=dropout, - bias_attr=False) + transformer = Transformer(d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + bias_attr=False) src = paddle.to_tensor( - np.random.rand(batch_size, source_length, d_model).astype( - "float32")) + np.random.rand(batch_size, source_length, + d_model).astype("float32")) tgt = paddle.to_tensor( - np.random.rand(batch_size, target_length, d_model).astype( - "float32")) + np.random.rand(batch_size, target_length, + d_model).astype("float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf @@ -708,8 +717,9 @@ class TestTransformer(unittest.TestCase): def test_generate_square_subsequent_mask(self): length = 5 d_model, n_head, dim_feedforward = 8, 4, 64 - transformer = Transformer( - d_model, n_head, dim_feedforward=dim_feedforward) + transformer = Transformer(d_model, + n_head, + dim_feedforward=dim_feedforward) mask = transformer.generate_square_subsequent_mask(length) diff --git a/python/paddle/fluid/tests/unittests/test_translated_layer.py b/python/paddle/fluid/tests/unittests/test_translated_layer.py index 79652b37b77..4b0be989efe 100644 --- a/python/paddle/fluid/tests/unittests/test_translated_layer.py +++ b/python/paddle/fluid/tests/unittests/test_translated_layer.py @@ -31,6 +31,7 @@ CLASS_NUM = 10 # define a random dataset class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): self.num_samples = num_samples @@ -45,14 +46,16 @@ class RandomDataset(paddle.io.Dataset): class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) self._dropout = paddle.nn.Dropout(p=0.5) @paddle.jit.to_static(input_spec=[ - paddle.static.InputSpec( - shape=[None, IMAGE_SIZE], dtype='float32', name='x') + paddle.static.InputSpec(shape=[None, IMAGE_SIZE], + dtype='float32', + name='x') ]) def forward(self, x): return self._linear(x) @@ -72,6 +75,7 @@ def train(layer, loader, loss_fn, opt): class TestTranslatedLayer(unittest.TestCase): + def setUp(self): # enable dygraph mode place = paddle.CPUPlace() @@ -89,13 +93,12 @@ class TestTranslatedLayer(unittest.TestCase): # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) - self.loader = paddle.io.DataLoader( - dataset, - places=place, - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=0) + self.loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=0) # train train(self.layer, self.loader, self.loss_fn, self.sgd) @@ -137,10 +140,9 @@ class TestTranslatedLayer(unittest.TestCase): parameters=translated_layer.parameters()) loss = train(translated_layer, self.loader, self.loss_fn, sgd) - self.assertTrue( - np.array_equal(orig_loss.numpy(), loss.numpy()), - msg="original loss:\n{}\nnew loss:\n{}\n".format(orig_loss.numpy(), - loss.numpy())) + self.assertTrue(np.array_equal(orig_loss.numpy(), loss.numpy()), + msg="original loss:\n{}\nnew loss:\n{}\n".format( + orig_loss.numpy(), loss.numpy())) def test_get_program(self): # load @@ -161,8 +163,9 @@ class TestTranslatedLayer(unittest.TestCase): translated_layer = paddle.jit.load(self.model_path) expect_spec = [ - paddle.static.InputSpec( - shape=[None, IMAGE_SIZE], dtype='float32', name='x') + paddle.static.InputSpec(shape=[None, IMAGE_SIZE], + dtype='float32', + name='x') ] actual_spec = translated_layer._input_spec() @@ -174,10 +177,9 @@ class TestTranslatedLayer(unittest.TestCase): translated_layer = paddle.jit.load(self.model_path) expect_spec = [ - paddle.static.InputSpec( - shape=[None, CLASS_NUM], - dtype='float32', - name='translated_layer/scale_0.tmp_1') + paddle.static.InputSpec(shape=[None, CLASS_NUM], + dtype='float32', + name='translated_layer/scale_0.tmp_1') ] actual_spec = translated_layer._output_spec() diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index c890c3c607c..d9e293ba671 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -26,6 +26,7 @@ paddle.enable_static() class TestTransposeOp(OpTest): + def setUp(self): self.init_op_type() self.initTestCase() @@ -56,66 +57,77 @@ class TestTransposeOp(OpTest): class TestCase0(TestTransposeOp): + def initTestCase(self): self.shape = (100, ) self.axis = (0, ) class TestCase1(TestTransposeOp): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) class TestCase2(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) class TestCase3(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) class TestCase4(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) class TestCase5(TestTransposeOp): + def initTestCase(self): self.shape = (2, 16, 96) self.axis = (0, 2, 1) class TestCase6(TestTransposeOp): + def initTestCase(self): self.shape = (2, 10, 12, 16) self.axis = (3, 1, 2, 0) class TestCase7(TestTransposeOp): + def initTestCase(self): self.shape = (2, 10, 2, 16) self.axis = (0, 1, 3, 2) class TestCase8(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (0, 1, 3, 2, 4, 5, 6, 7) class TestCase9(TestTransposeOp): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (6, 1, 3, 5, 0, 2, 4, 7) class TestTransposeBF16Op(OpTest): + def setUp(self): self.init_op_type() self.initTestCase() @@ -128,9 +140,11 @@ class TestTransposeBF16Op(OpTest): 'use_mkldnn': self.use_mkldnn, } self.outputs = { - 'XShape': convert_float_to_uint16( + 'XShape': + convert_float_to_uint16( np.random.random(self.shape).astype("float32")), - 'Out': self.inputs['X'].transpose(self.axis) + 'Out': + self.inputs['X'].transpose(self.axis) } def init_op_type(self): @@ -149,11 +163,13 @@ class TestTransposeBF16Op(OpTest): class TestTransposeOpBool(TestTransposeOp): + def test_check_grad(self): pass class TestTransposeOpBool1D(TestTransposeOpBool): + def initTestCase(self): self.shape = (100, ) self.axis = (0, ) @@ -165,6 +181,7 @@ class TestTransposeOpBool1D(TestTransposeOpBool): class TestTransposeOpBool2D(TestTransposeOpBool): + def initTestCase(self): self.shape = (3, 40) self.axis = (1, 0) @@ -176,6 +193,7 @@ class TestTransposeOpBool2D(TestTransposeOpBool): class TestTransposeOpBool3D(TestTransposeOpBool): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) @@ -187,6 +205,7 @@ class TestTransposeOpBool3D(TestTransposeOpBool): class TestTransposeOpBool4D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) @@ -198,6 +217,7 @@ class TestTransposeOpBool4D(TestTransposeOpBool): class TestTransposeOpBool5D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) @@ -209,6 +229,7 @@ class TestTransposeOpBool5D(TestTransposeOpBool): class TestTransposeOpBool6D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) @@ -220,6 +241,7 @@ class TestTransposeOpBool6D(TestTransposeOpBool): class TestTransposeOpBool7D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3) self.axis = (0, 1, 3, 2, 4, 5, 6) @@ -231,6 +253,7 @@ class TestTransposeOpBool7D(TestTransposeOpBool): class TestTransposeOpBool8D(TestTransposeOpBool): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (6, 1, 3, 5, 0, 2, 4, 7) @@ -242,6 +265,7 @@ class TestTransposeOpBool8D(TestTransposeOpBool): class TestTransposeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -255,8 +279,9 @@ class TestTransposeOpError(unittest.TestCase): def test_x_dtype_check(): # the Input(x)'s dtype must be one of [bool, float16, float32, float64, int32, int64] - x1 = fluid.layers.data( - name='x1', shape=[10, 5, 3], dtype='int8') + x1 = fluid.layers.data(name='x1', + shape=[10, 5, 3], + dtype='int8') fluid.layers.transpose(x1, perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_dtype_check) @@ -282,6 +307,7 @@ class TestTransposeOpError(unittest.TestCase): class TestTransposeApi(unittest.TestCase): + def test_static_out(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): @@ -318,6 +344,7 @@ class TestTransposeApi(unittest.TestCase): class TestTAPI(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program()): data = fluid.data(shape=[10], dtype="float64", name="data") @@ -384,6 +411,7 @@ class TestTAPI(unittest.TestCase): class TestMoveAxis(unittest.TestCase): + def test_moveaxis1(self): x_np = np.random.randn(2, 3, 4, 5, 7) expected = np.moveaxis(x_np, [0, 4, 3, 2], [1, 3, 2, 0]) @@ -426,8 +454,8 @@ class TestMoveAxis(unittest.TestCase): def test_moveaxis3(self): paddle.disable_static() - x = paddle.to_tensor( - [[1 + 1j, -1 - 1j], [1 + 1j, -1 - 1j], [1 + 1j, -1 - 1j]]) + x = paddle.to_tensor([[1 + 1j, -1 - 1j], [1 + 1j, -1 - 1j], + [1 + 1j, -1 - 1j]]) out = x.moveaxis(0, 1) self.assertEqual(out.shape, [2, 3]) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_tree_conv_op.py b/python/paddle/fluid/tests/unittests/test_tree_conv_op.py index f35649dd3e8..114d713b092 100644 --- a/python/paddle/fluid/tests/unittests/test_tree_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_tree_conv_op.py @@ -51,6 +51,7 @@ def collect_node_patch(og, max_depth): class TestTreeConvOp(OpTest): + def setUp(self): self.n = 17 self.fea_size = 3 @@ -68,24 +69,30 @@ class TestTreeConvOp(OpTest): vectors = np.random.random( (self.batch_size, self.n, self.fea_size)).astype('float64') self.inputs = { - 'EdgeSet': adj, - 'NodesVector': vectors, - 'Filter': np.random.random((self.fea_size, 3, self.output_size, - self.num_filters)).astype('float64') + 'EdgeSet': + adj, + 'NodesVector': + vectors, + 'Filter': + np.random.random((self.fea_size, 3, self.output_size, + self.num_filters)).astype('float64') } self.attrs = {'max_depth': self.max_depth} vectors = [] for i in range(self.batch_size): vector = self.get_output_naive(i) vectors.append(vector) - self.outputs = {'Out': np.array(vectors).astype('float64'), } + self.outputs = { + 'Out': np.array(vectors).astype('float64'), + } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad( - ['NodesVector', 'Filter'], 'Out', max_relative_error=0.5) + self.check_grad(['NodesVector', 'Filter'], + 'Out', + max_relative_error=0.5) def get_output_naive(self, batch_id): og = [[] for i in range(1, self.n + 2)] @@ -112,28 +119,30 @@ class TestTreeConvOp(OpTest): result = result + res vec.append(result) vec = np.concatenate(vec, axis=0) - vec = np.concatenate( - [ - vec, np.zeros( - (self.n - vec.shape[0], W.shape[2], W.shape[3]), - dtype='float64') - ], - axis=0) + vec = np.concatenate([ + vec, + np.zeros((self.n - vec.shape[0], W.shape[2], W.shape[3]), + dtype='float64') + ], + axis=0) return vec class TestTreeConv_OpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): nodes_vector_1 = np.random.random((10, 5)).astype("float32") - edge_set_1 = fluid.layers.data( - name='edge_set_1', shape=[10, 2], dtype='float32') + edge_set_1 = fluid.layers.data(name='edge_set_1', + shape=[10, 2], + dtype='float32') # the nodes_vector of tree_conv must be Variable. self.assertRaises(TypeError, fluid.contrib.layers.tree_conv, nodes_vector_1, edge_set_1, 3) - nodes_vector_2 = fluid.layers.data( - name='vectors2', shape=[10, 5], dtype='float32') + nodes_vector_2 = fluid.layers.data(name='vectors2', + shape=[10, 5], + dtype='float32') edge_set_2 = np.random.random((10, 2)).astype("float32") # the edge_set of tree_conv must be Variable. self.assertRaises(TypeError, fluid.contrib.layers.tree_conv, @@ -141,19 +150,24 @@ class TestTreeConv_OpError(unittest.TestCase): class TestDygraphTreeConv_OpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - TreeConv = fluid.dygraph.nn.TreeConv( - feature_size=5, output_size=6, num_filters=1, max_depth=2) + TreeConv = fluid.dygraph.nn.TreeConv(feature_size=5, + output_size=6, + num_filters=1, + max_depth=2) nodes_vector_1 = np.random.random((10, 5)).astype("float32") - edge_set_1 = fluid.layers.data( - name='edge_set_1', shape=[10, 2], dtype='float32') + edge_set_1 = fluid.layers.data(name='edge_set_1', + shape=[10, 2], + dtype='float32') # the nodes_vector of TreeConv must be Variable. self.assertRaises(TypeError, TreeConv, nodes_vector_1, edge_set_1, 3) - nodes_vector_2 = fluid.layers.data( - name='vectors2', shape=[10, 5], dtype='float32') + nodes_vector_2 = fluid.layers.data(name='vectors2', + shape=[10, 5], + dtype='float32') edge_set_2 = np.random.random((10, 2)).astype("float32") # the edge_set of TreeConv must be Variable. self.assertRaises(TypeError, TreeConv, nodes_vector_2, edge_set_2, diff --git a/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py b/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py index 4e79e8dca13..32363e29f1a 100644 --- a/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py +++ b/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py @@ -18,6 +18,7 @@ import unittest import numpy as np import sys + sys.path.append("..") import paddle from op_test import OpTest @@ -42,8 +43,8 @@ class TestTriangularSolveOp(OpTest): self.dtype = "float64" def set_output(self): - self.output = np.linalg.solve( - np.triu(self.inputs['X']), self.inputs['Y']) + self.output = np.linalg.solve(np.triu(self.inputs['X']), + self.inputs['Y']) def setUp(self): self.op_type = "triangular_solve" @@ -159,11 +160,10 @@ class TestTriangularSolveOp5(TestTriangularSolveOp): grad_x = np.triu(grad_x) np.fill_diagonal(grad_x, 0.) - self.check_grad( - ['X', 'Y'], - 'Out', - user_defined_grads=[grad_x, grad_y], - user_defined_grad_outputs=[grad_out]) + self.check_grad(['X', 'Y'], + 'Out', + user_defined_grads=[grad_x, grad_y], + user_defined_grad_outputs=[grad_out]) # 4D(broadcast) + 4D(broadcast) @@ -247,6 +247,7 @@ class TestTriangularSolveOp9(TestTriangularSolveOp): class TestTriangularSolveAPI(unittest.TestCase): + def setUp(self): np.random.seed(2021) self.place = [paddle.CPUPlace()] @@ -266,8 +267,10 @@ class TestTriangularSolveAPI(unittest.TestCase): exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), - feed={"x": x_np, - "y": y_np}, + feed={ + "x": x_np, + "y": y_np + }, fetch_list=[z]) self.assertTrue(np.allclose(fetches[0], z_np)) @@ -276,6 +279,7 @@ class TestTriangularSolveAPI(unittest.TestCase): self.check_static_result(place=place) def test_dygraph(self): + def run(place): paddle.disable_static(place) x_np = np.random.random([3, 3]).astype(self.dtype) @@ -295,16 +299,17 @@ class TestTriangularSolveAPI(unittest.TestCase): class TestTriangularSolveOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of solve_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - y1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) + y1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.CPUPlace()) self.assertRaises(TypeError, paddle.linalg.triangular_solve, x1, y1) - # The data type of input must be float32 or float64. + # The data type of input must be float32 or float64. x2 = fluid.data(name="x2", shape=[30, 30], dtype="bool") y2 = fluid.data(name="y2", shape=[30, 10], dtype="bool") self.assertRaises(TypeError, paddle.linalg.triangular_solve, x2, y2) diff --git a/python/paddle/fluid/tests/unittests/test_tril_indices_op.py b/python/paddle/fluid/tests/unittests/test_tril_indices_op.py index 29b07a5fb84..c3a85daeee9 100644 --- a/python/paddle/fluid/tests/unittests/test_tril_indices_op.py +++ b/python/paddle/fluid/tests/unittests/test_tril_indices_op.py @@ -24,6 +24,7 @@ from paddle.fluid.framework import _test_eager_guard class TestTrilIndicesOp(OpTest): + def setUp(self): self.op_type = "tril_indices" self.inputs = {} @@ -42,6 +43,7 @@ class TestTrilIndicesOp(OpTest): class TestTrilIndicesOpCase1(TestTrilIndicesOp): + def init_config(self): self.attrs = {'rows': 0, 'cols': 0, 'offset': 0} self.target = np.tril_indices(0, 0, 0) @@ -49,6 +51,7 @@ class TestTrilIndicesOpCase1(TestTrilIndicesOp): class TestTrilIndicesOpCase2(TestTrilIndicesOp): + def init_config(self): self.attrs = {'rows': 4, 'cols': 4, 'offset': 2} self.target = np.tril_indices(self.attrs['rows'], self.attrs['offset'], @@ -57,6 +60,7 @@ class TestTrilIndicesOpCase2(TestTrilIndicesOp): class TestTrilIndicesAPICaseStatic(unittest.TestCase): + def test_static(self): places = [ paddle.CPUPlace(), paddle.fluid.CUDAPlace(0) @@ -73,6 +77,7 @@ class TestTrilIndicesAPICaseStatic(unittest.TestCase): class TestTrilIndicesAPICaseDygraph(unittest.TestCase): + def test_dygraph(self): places = [ paddle.CPUPlace(), paddle.fluid.CUDAPlace(0) @@ -89,7 +94,9 @@ class TestTrilIndicesAPICaseDygraph(unittest.TestCase): class TestTrilIndicesAPICaseError(unittest.TestCase): + def test_case_error(self): + def test_num_rows_type_check(): out1 = paddle.tril_indices(1.0, 1, 2) @@ -107,6 +114,7 @@ class TestTrilIndicesAPICaseError(unittest.TestCase): class TestTrilIndicesAPICaseDefault(unittest.TestCase): + def test_default_CPU(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), diff --git a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py index 00f6169fa31..3ed9e517098 100644 --- a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py +++ b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py @@ -38,7 +38,8 @@ class TrilTriuOpDefaultTest(OpTest): 'lower': True if self.real_op_type == 'tril' else False, } self.outputs = { - 'Out': self.real_np_op(self.X, self.diagonal) + 'Out': + self.real_np_op(self.X, self.diagonal) if self.diagonal else self.real_np_op(self.X) } @@ -70,15 +71,17 @@ def case_generator(op_type, Xshape, diagonal, expected): } class FailureCase(unittest.TestCase): + def test_failure(self): paddle.enable_static() data = fluid.data(shape=Xshape, dtype='float64', name=cls_name) - with self.assertRaisesRegexp( - eval(expected.split(':')[-1]), errmsg[expected]): + with self.assertRaisesRegexp(eval(expected.split(':')[-1]), + errmsg[expected]): getattr(tensor, op_type)(x=data, diagonal=diagonal) class SuccessCase(TrilTriuOpDefaultTest): + def initTestCase(self): paddle.enable_static() @@ -92,7 +95,7 @@ def case_generator(op_type, Xshape, diagonal, expected): ### NOTE: meaningful diagonal is [1 - min(H, W), max(H, W) -1] -### test the diagonal just at the border, upper/lower the border, +### test the diagonal just at the border, upper/lower the border, ### negative/positive integer within range and a zero cases = { 'success': { @@ -118,8 +121,9 @@ for _op_type in ['tril', 'triu']: for _expected, _params in cases.items(): for _Xshape, _diaglist in _params.items(): list( - map(lambda _diagonal: case_generator(_op_type, _Xshape, _diagonal, _expected), - _diaglist)) + map( + lambda _diagonal: case_generator( + _op_type, _Xshape, _diagonal, _expected), _diaglist)) class TestTrilTriuOpAPI(unittest.TestCase): @@ -144,7 +148,8 @@ class TestTrilTriuOpAPI(unittest.TestCase): tril_out, triu_out = exe.run( fluid.default_main_program(), feed={"x": data}, - fetch_list=[tril_out, triu_out], ) + fetch_list=[tril_out, triu_out], + ) self.assertTrue(np.allclose(tril_out, np.tril(data))) self.assertTrue(np.allclose(triu_out, np.triu(data))) diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py index 49699b8fafd..717f1b60049 100755 --- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py @@ -125,6 +125,7 @@ def trilinear_interp_np(input, class TestTrilinearInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -154,9 +155,10 @@ class TestTrilinearInterpOp(OpTest): out_h = self.out_h out_w = self.out_w - output_np = trilinear_interp_np( - input_np, out_d, out_h, out_w, self.out_size, self.actual_shape, - self.align_corners, self.align_mode, self.data_layout) + output_np = trilinear_interp_np(input_np, out_d, out_h, out_w, + self.out_size, self.actual_shape, + self.align_corners, self.align_mode, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -185,8 +187,10 @@ class TestTrilinearInterpOp(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'trilinear' @@ -201,6 +205,7 @@ class TestTrilinearInterpOp(OpTest): class TestTrilinearInterpCase1(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 1, 7, 8, 9] @@ -213,6 +218,7 @@ class TestTrilinearInterpCase1(TestTrilinearInterpOp): class TestTrilinearInterpCase2(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 9, 6, 8] @@ -225,6 +231,7 @@ class TestTrilinearInterpCase2(TestTrilinearInterpOp): class TestTrilinearInterpCase3(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 2, 16, 8, 4] @@ -237,6 +244,7 @@ class TestTrilinearInterpCase3(TestTrilinearInterpOp): class TestTrilinearInterpCase4(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [4, 1, 7, 8, 9] @@ -250,6 +258,7 @@ class TestTrilinearInterpCase4(TestTrilinearInterpOp): class TestTrilinearInterpCase5(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 3, 9, 6, 8] @@ -263,6 +272,7 @@ class TestTrilinearInterpCase5(TestTrilinearInterpOp): class TestTrilinearInterpCase6(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [1, 1, 16, 8, 4] @@ -276,6 +286,7 @@ class TestTrilinearInterpCase6(TestTrilinearInterpOp): class TestTrilinearInterpSame(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [1, 1, 16, 8, 4] @@ -288,6 +299,7 @@ class TestTrilinearInterpSame(TestTrilinearInterpOp): class TestTrilinearInterpSameHW(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [1, 1, 16, 8, 4] @@ -300,6 +312,7 @@ class TestTrilinearInterpSameHW(TestTrilinearInterpOp): class TestTrilinearInterpActualShape(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 2, 16, 8, 4] @@ -313,6 +326,7 @@ class TestTrilinearInterpActualShape(TestTrilinearInterpOp): class TestTrilinearInterpDatalayout(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 4, 4, 4, 3] @@ -327,14 +341,15 @@ class TestTrilinearInterpDatalayout(TestTrilinearInterpOp): class TestTrilinearInterpOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None self.init_test_case() self.op_type = "trilinear_interp" self.check_eager = True - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale > 0: out_d = int(self.input_shape[2] * self.scale) @@ -365,8 +380,9 @@ class TestTrilinearInterpOpUint8(OpTest): self.outputs = {'Out': output_np} def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_eager=self.check_eager) + self.check_output_with_place(place=core.CPUPlace(), + atol=1, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'trilinear' @@ -380,6 +396,7 @@ class TestTrilinearInterpOpUint8(OpTest): class TestTrilinearInterpCase1Uint8(TestTrilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 16, 8, 4] @@ -392,6 +409,7 @@ class TestTrilinearInterpCase1Uint8(TestTrilinearInterpOpUint8): class TestTrilinearInterpCase2Uint8(TestTrilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [4, 1, 7, 8, 9] @@ -405,24 +423,28 @@ class TestTrilinearInterpCase2Uint8(TestTrilinearInterpOpUint8): class TestTrilinearInterpOtherMethod1(TestTrilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 1 class TestTrilinearInterpWithMethod2(TestTrilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 0 class TestTrilinearInterpWithMethod3(TestTrilinearInterpOp): + def set_align_mode(self): self.align_corners = True self.align_mode = 0 class TestTrilinearInterpScale1(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 9] @@ -435,6 +457,7 @@ class TestTrilinearInterpScale1(TestTrilinearInterpOp): class TestTrilinearInterpScale2(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 9] @@ -447,6 +470,7 @@ class TestTrilinearInterpScale2(TestTrilinearInterpOp): class TestTrilinearInterpScale3(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 9] @@ -459,6 +483,7 @@ class TestTrilinearInterpScale3(TestTrilinearInterpOp): class TestTrilinearInterpZero(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 11] @@ -471,6 +496,7 @@ class TestTrilinearInterpZero(TestTrilinearInterpOp): class TestTrilinearInterpOp_attr_tensor(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -523,8 +549,10 @@ class TestTrilinearInterpOp_attr_tensor(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'trilinear' @@ -540,6 +568,7 @@ class TestTrilinearInterpOp_attr_tensor(OpTest): # out_size is a 1-D tensor class TestTrilinearInterp_attr_tensor_Case1(TestTrilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 2, 9, 6, 8] @@ -554,6 +583,7 @@ class TestTrilinearInterp_attr_tensor_Case1(TestTrilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestTrilinearInterp_attr_tensor_Case2(TestTrilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 8, 8, 4] @@ -569,6 +599,7 @@ class TestTrilinearInterp_attr_tensor_Case2(TestTrilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestTrilinearInterp_attr_tensor_Case3(TestTrilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 8, 8, 4] @@ -583,6 +614,7 @@ class TestTrilinearInterp_attr_tensor_Case3(TestTrilinearInterpOp_attr_tensor): class TestTrilinearInterpAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 9, 4], dtype="float32") y = fluid.data(name="y", shape=[2, 6, 9, 4, 3], dtype="float32") @@ -590,22 +622,31 @@ class TestTrilinearInterpAPI(unittest.TestCase): dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[3], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[3], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") - out1 = fluid.layers.resize_trilinear( - y, out_shape=[12, 18, 8], data_format='NDHWC') + out1 = fluid.layers.resize_trilinear(y, + out_shape=[12, 18, 8], + data_format='NDHWC') out2 = fluid.layers.resize_trilinear(x, out_shape=[12, dim, 8]) out3 = fluid.layers.resize_trilinear(x, out_shape=shape_tensor) - out4 = fluid.layers.resize_trilinear( - x, out_shape=[4, 4, 8], actual_shape=actual_size) + out4 = fluid.layers.resize_trilinear(x, + out_shape=[4, 4, 8], + actual_shape=actual_size) out5 = fluid.layers.resize_trilinear(x, scale=scale_tensor) - out6 = interpolate( - x, scale_factor=scale_tensor, mode='trilinear', data_format="NCDHW") - out7 = interpolate( - x, size=[4, 4, 8], mode='trilinear', data_format="NCDHW") - out8 = interpolate( - x, size=shape_tensor, mode='trilinear', data_format="NCDHW") + out6 = interpolate(x, + scale_factor=scale_tensor, + mode='trilinear', + data_format="NCDHW") + out7 = interpolate(x, + size=[4, 4, 8], + mode='trilinear', + data_format="NCDHW") + out8 = interpolate(x, + size=shape_tensor, + mode='trilinear', + data_format="NCDHW") x_data = np.random.random((2, 3, 6, 9, 4)).astype("float32") dim_data = np.array([18]).astype("int32") @@ -631,8 +672,11 @@ class TestTrilinearInterpAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = trilinear_interp_np( - x_data, out_d=12, out_h=18, out_w=8, align_mode=1) + expect_res = trilinear_interp_np(x_data, + out_d=12, + out_h=18, + out_w=8, + align_mode=1) self.assertTrue( np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 4, 1)))) for i in range(len(results) - 1): @@ -640,13 +684,15 @@ class TestTrilinearInterpAPI(unittest.TestCase): class TestTrilinearInterpOpException(unittest.TestCase): + def test_exception(self): input = fluid.data(name="input", shape=[2, 3, 6, 9, 4], dtype="float32") def attr_data_format(): # for 5-D input, data_format only can be NCDHW or NDHWC - out = fluid.layers.resize_trilinear( - input, out_shape=[4, 8, 4], data_format='NHWC') + out = fluid.layers.resize_trilinear(input, + out_shape=[4, 8, 4], + data_format='NHWC') self.assertRaises(ValueError, attr_data_format) diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py index 6d072e3c377..f494767d8d0 100755 --- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py @@ -139,6 +139,7 @@ def trilinear_interp_np(input, class TestTrilinearInterpOp(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -217,8 +218,10 @@ class TestTrilinearInterpOp(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'trilinear' @@ -233,6 +236,7 @@ class TestTrilinearInterpOp(OpTest): class TestTrilinearInterpCase1(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 1, 7, 8, 9] @@ -245,6 +249,7 @@ class TestTrilinearInterpCase1(TestTrilinearInterpOp): class TestTrilinearInterpCase2(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 9, 6, 8] @@ -257,6 +262,7 @@ class TestTrilinearInterpCase2(TestTrilinearInterpOp): class TestTrilinearInterpCase3(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 2, 16, 8, 4] @@ -269,6 +275,7 @@ class TestTrilinearInterpCase3(TestTrilinearInterpOp): class TestTrilinearInterpCase4(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [4, 1, 7, 8, 9] @@ -282,6 +289,7 @@ class TestTrilinearInterpCase4(TestTrilinearInterpOp): class TestTrilinearInterpCase5(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 3, 9, 6, 8] @@ -295,6 +303,7 @@ class TestTrilinearInterpCase5(TestTrilinearInterpOp): class TestTrilinearInterpCase6(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [1, 1, 16, 8, 4] @@ -308,6 +317,7 @@ class TestTrilinearInterpCase6(TestTrilinearInterpOp): class TestTrilinearInterpSame(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [1, 1, 16, 8, 4] @@ -320,6 +330,7 @@ class TestTrilinearInterpSame(TestTrilinearInterpOp): class TestTrilinearInterpSameHW(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [1, 1, 16, 8, 4] @@ -332,6 +343,7 @@ class TestTrilinearInterpSameHW(TestTrilinearInterpOp): class TestTrilinearInterpActualShape(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 2, 16, 8, 4] @@ -345,6 +357,7 @@ class TestTrilinearInterpActualShape(TestTrilinearInterpOp): class TestTrilinearInterpDatalayout(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 4, 4, 4, 3] @@ -359,6 +372,7 @@ class TestTrilinearInterpDatalayout(TestTrilinearInterpOp): class TestTrilinearInterpOpUint8(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -366,8 +380,8 @@ class TestTrilinearInterpOpUint8(OpTest): self.op_type = "trilinear_interp_v2" # TODO(dev): add self.python_api self.check_eager = False - input_np = np.random.randint( - low=0, high=256, size=self.input_shape).astype("uint8") + input_np = np.random.randint(low=0, high=256, + size=self.input_shape).astype("uint8") if self.scale > 0: if isinstance(self.scale, float) or isinstance(self.scale, int): @@ -411,8 +425,9 @@ class TestTrilinearInterpOpUint8(OpTest): self.outputs = {'Out': output_np} def test_check_output(self): - self.check_output_with_place( - place=core.CPUPlace(), atol=1, check_eager=self.check_eager) + self.check_output_with_place(place=core.CPUPlace(), + atol=1, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'trilinear' @@ -426,6 +441,7 @@ class TestTrilinearInterpOpUint8(OpTest): class TestTrilinearInterpCase1Uint8(TestTrilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 16, 8, 4] @@ -438,6 +454,7 @@ class TestTrilinearInterpCase1Uint8(TestTrilinearInterpOpUint8): class TestTrilinearInterpCase2Uint8(TestTrilinearInterpOpUint8): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [4, 1, 7, 8, 9] @@ -451,24 +468,28 @@ class TestTrilinearInterpCase2Uint8(TestTrilinearInterpOpUint8): class TestTrilinearInterpOtherMethod1(TestTrilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 1 class TestTrilinearInterpWithMethod2(TestTrilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 0 class TestTrilinearInterpWithMethod3(TestTrilinearInterpOp): + def set_align_mode(self): self.align_corners = True self.align_mode = 0 class TestTrilinearInterpScale1(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 9] @@ -481,6 +502,7 @@ class TestTrilinearInterpScale1(TestTrilinearInterpOp): class TestTrilinearInterpScale2(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 9] @@ -493,6 +515,7 @@ class TestTrilinearInterpScale2(TestTrilinearInterpOp): class TestTrilinearInterpScale3(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 9] @@ -505,6 +528,7 @@ class TestTrilinearInterpScale3(TestTrilinearInterpOp): class TestTrilinearInterpZero(TestTrilinearInterpOp): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 5, 7, 11] @@ -517,6 +541,7 @@ class TestTrilinearInterpZero(TestTrilinearInterpOp): class TestTrilinearInterpOp_attr_tensor(OpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -583,8 +608,10 @@ class TestTrilinearInterpOp_attr_tensor(OpTest): self.check_output(check_eager=self.check_eager) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', in_place=True, check_eager=self.check_eager) + self.check_grad(['X'], + 'Out', + in_place=True, + check_eager=self.check_eager) def init_test_case(self): self.interp_method = 'trilinear' @@ -600,6 +627,7 @@ class TestTrilinearInterpOp_attr_tensor(OpTest): # out_size is a 1-D tensor class TestTrilinearInterp_attr_tensor_Case1(TestTrilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [3, 2, 9, 6, 8] @@ -614,6 +642,7 @@ class TestTrilinearInterp_attr_tensor_Case1(TestTrilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestTrilinearInterp_attr_tensor_Case2(TestTrilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 8, 8, 4] @@ -629,6 +658,7 @@ class TestTrilinearInterp_attr_tensor_Case2(TestTrilinearInterpOp_attr_tensor): # scale is a 1-D tensor class TestTrilinearInterp_attr_tensor_Case3(TestTrilinearInterpOp_attr_tensor): + def init_test_case(self): self.interp_method = 'trilinear' self.input_shape = [2, 3, 8, 8, 4] @@ -643,6 +673,7 @@ class TestTrilinearInterp_attr_tensor_Case3(TestTrilinearInterpOp_attr_tensor): class TestTrilinearInterpAPI(unittest.TestCase): + def test_case(self): x = fluid.data(name="x", shape=[2, 3, 6, 9, 4], dtype="float32") y = fluid.data(name="y", shape=[2, 6, 9, 4, 3], dtype="float32") @@ -650,22 +681,31 @@ class TestTrilinearInterpAPI(unittest.TestCase): dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data(name="shape_tensor", shape=[3], dtype="int32") actual_size = fluid.data(name="actual_size", shape=[3], dtype="int32") - scale_tensor = fluid.data( - name="scale_tensor", shape=[1], dtype="float32") + scale_tensor = fluid.data(name="scale_tensor", + shape=[1], + dtype="float32") - out1 = fluid.layers.resize_trilinear( - y, out_shape=[12, 18, 8], data_format='NDHWC') + out1 = fluid.layers.resize_trilinear(y, + out_shape=[12, 18, 8], + data_format='NDHWC') out2 = fluid.layers.resize_trilinear(x, out_shape=[12, dim, 8]) out3 = fluid.layers.resize_trilinear(x, out_shape=shape_tensor) - out4 = fluid.layers.resize_trilinear( - x, out_shape=[4, 4, 8], actual_shape=actual_size) + out4 = fluid.layers.resize_trilinear(x, + out_shape=[4, 4, 8], + actual_shape=actual_size) out5 = fluid.layers.resize_trilinear(x, scale=scale_tensor) - out6 = interpolate( - x, scale_factor=scale_tensor, mode='trilinear', data_format="NCDHW") - out7 = interpolate( - x, size=[4, 4, 8], mode='trilinear', data_format="NCDHW") - out8 = interpolate( - x, size=shape_tensor, mode='trilinear', data_format="NCDHW") + out6 = interpolate(x, + scale_factor=scale_tensor, + mode='trilinear', + data_format="NCDHW") + out7 = interpolate(x, + size=[4, 4, 8], + mode='trilinear', + data_format="NCDHW") + out8 = interpolate(x, + size=shape_tensor, + mode='trilinear', + data_format="NCDHW") x_data = np.random.random((2, 3, 6, 9, 4)).astype("float32") dim_data = np.array([18]).astype("int32") @@ -691,8 +731,11 @@ class TestTrilinearInterpAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5], return_numpy=True) - expect_res = trilinear_interp_np( - x_data, out_d=12, out_h=18, out_w=8, align_mode=1) + expect_res = trilinear_interp_np(x_data, + out_d=12, + out_h=18, + out_w=8, + align_mode=1) self.assertTrue( np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 4, 1)))) for i in range(len(results) - 1): @@ -700,13 +743,15 @@ class TestTrilinearInterpAPI(unittest.TestCase): class TestTrilinearInterpOpException(unittest.TestCase): + def test_exception(self): input = fluid.data(name="input", shape=[2, 3, 6, 9, 4], dtype="float32") def attr_data_format(): # for 5-D input, data_format only can be NCDHW or NDHWC - out = fluid.layers.resize_trilinear( - input, out_shape=[4, 8, 4], data_format='NHWC') + out = fluid.layers.resize_trilinear(input, + out_shape=[4, 8, 4], + data_format='NHWC') self.assertRaises(ValueError, attr_data_format) diff --git a/python/paddle/fluid/tests/unittests/test_trunc_op.py b/python/paddle/fluid/tests/unittests/test_trunc_op.py index 1a6790728b1..56a39e5f692 100644 --- a/python/paddle/fluid/tests/unittests/test_trunc_op.py +++ b/python/paddle/fluid/tests/unittests/test_trunc_op.py @@ -27,6 +27,7 @@ paddle.enable_static() class TestTruncOp(OpTest): + def setUp(self): self.op_type = "trunc" self.python_api = paddle.trunc @@ -46,6 +47,7 @@ class TestTruncOp(OpTest): class TestFloatTruncOp(TestTruncOp): + def init_dtype_type(self): self.dtype = np.float32 self.__class__.exist_fp64_check_grad = True @@ -55,6 +57,7 @@ class TestFloatTruncOp(TestTruncOp): class TestIntTruncOp(TestTruncOp): + def init_dtype_type(self): self.dtype = np.int32 self.__class__.exist_fp64_check_grad = True @@ -64,6 +67,7 @@ class TestIntTruncOp(TestTruncOp): class TestTruncAPI(unittest.TestCase): + def setUp(self): self.shape = [20, 20] self.x = np.random.random((20, 20)).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py index fe28e0c9638..8016499d9ac 100644 --- a/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py @@ -27,6 +27,7 @@ from paddle.fluid.framework import _test_eager_guard class TestTrunctedGaussianRandomOp(unittest.TestCase): + def setUp(self): self.op_type = "truncated_gaussian_random" self.inputs = {} @@ -52,8 +53,9 @@ class TestTrunctedGaussianRandomOp(unittest.TestCase): program = fluid.Program() block = program.global_block() vout = block.create_var(name="Out") - op = block.append_op( - type=self.op_type, outputs={"Out": vout}, attrs=self.attrs) + op = block.append_op(type=self.op_type, + outputs={"Out": vout}, + attrs=self.attrs) op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) diff --git a/python/paddle/fluid/tests/unittests/test_unbind_op.py b/python/paddle/fluid/tests/unittests/test_unbind_op.py index 43f2f3526ac..5f8fb382eb9 100644 --- a/python/paddle/fluid/tests/unittests/test_unbind_op.py +++ b/python/paddle/fluid/tests/unittests/test_unbind_op.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestUnbind(unittest.TestCase): + def test_unbind(self): x_1 = fluid.data(shape=[2, 3], dtype='float32', name='x_1') @@ -34,8 +35,10 @@ class TestUnbind(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) [res_1, res_2] = exe.run(fluid.default_main_program(), - feed={"x_1": input_1, - "axis": 0}, + feed={ + "x_1": input_1, + "axis": 0 + }, fetch_list=[out_0, out_1]) assert np.array_equal(res_1, input_1[0, 0:100]) @@ -62,6 +65,7 @@ class TestUnbind(unittest.TestCase): class TestLayersUnbind(unittest.TestCase): + def test_layers_unbind(self): x_1 = fluid.data(shape=[2, 3], dtype='float32', name='x_1') @@ -71,8 +75,10 @@ class TestLayersUnbind(unittest.TestCase): exe = fluid.Executor(place=fluid.CPUPlace()) [res_1, res_2] = exe.run(fluid.default_main_program(), - feed={"x_1": input_1, - "axis": 0}, + feed={ + "x_1": input_1, + "axis": 0 + }, fetch_list=[out_0, out_1]) assert np.array_equal(res_1, input_1[0, 0:100]) @@ -80,6 +86,7 @@ class TestLayersUnbind(unittest.TestCase): class TestUnbindOp(OpTest): + def initParameters(self): pass @@ -118,6 +125,7 @@ class TestUnbindOp(OpTest): class TestUnbindOp1(TestUnbindOp): + def initParameters(self): self.axis = 1 self.num = 2 @@ -131,6 +139,7 @@ class TestUnbindOp1(TestUnbindOp): class TestUnbindOp2(TestUnbindOp): + def initParameters(self): self.axis = 2 self.num = 2 @@ -144,6 +153,7 @@ class TestUnbindOp2(TestUnbindOp): class TestUnbindOp3(TestUnbindOp): + def initParameters(self): self.axis = 2 self.num = 2 @@ -160,6 +170,7 @@ class TestUnbindOp3(TestUnbindOp): class TestUnbindOp4(TestUnbindOp): + def initParameters(self): self.axis = 1 self.num = 2 @@ -176,6 +187,7 @@ class TestUnbindOp4(TestUnbindOp): class TestUnbindBF16Op(OpTest): + def setUp(self): self._set_op_type() self.python_api = paddle.unbind @@ -203,6 +215,7 @@ class TestUnbindBF16Op(OpTest): class TestUnbindAxisError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x = fluid.data(shape=[2, 3], dtype='float32', name='x') diff --git a/python/paddle/fluid/tests/unittests/test_unfold_op.py b/python/paddle/fluid/tests/unittests/test_unfold_op.py index 7295cb83816..c990b67f9a4 100644 --- a/python/paddle/fluid/tests/unittests/test_unfold_op.py +++ b/python/paddle/fluid/tests/unittests/test_unfold_op.py @@ -52,8 +52,9 @@ class TestUnfoldOp(OpTest): dkernel_w = self.dilations[1] * (self.kernel_sizes[1] - 1) + 1 out_height = int((self.input_height + self.paddings[0] + self.paddings[2] - dkernel_h) / self.strides[0]) + 1 - out_width = int((self.input_width + self.paddings[1] + self.paddings[3] - - dkernel_w) / self.strides[1]) + 1 + out_width = int( + (self.input_width + self.paddings[1] + self.paddings[3] - dkernel_w) + / self.strides[1]) + 1 output_shape[2] = out_height * out_width output = np.zeros(output_shape).astype(np.float64) ############ calculate output ############## @@ -63,8 +64,8 @@ class TestUnfoldOp(OpTest): h_out = int(k / out_width) w_out = k % out_width w_offset = j % self.kernel_sizes[1] - h_offset = int(j / - self.kernel_sizes[1]) % self.kernel_sizes[0] + h_offset = int( + j / self.kernel_sizes[1]) % self.kernel_sizes[0] c_in = int(j / (self.kernel_sizes[0] * self.kernel_sizes[1])) h_in = h_offset * self.dilations[0] + h_out * self.strides[ diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py index 5f4989f6c5d..27dda75a736 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py @@ -24,6 +24,7 @@ from paddle.fluid.tests.unittests.test_uniform_random_op import output_hist, out class TestUniformRandomOpBF16(OpTest): + def setUp(self): self.op_type = "uniform_random" self.dtype = "uint16" @@ -48,9 +49,8 @@ class TestUniformRandomOpBF16(OpTest): result = np.array(outs[0]) hist, prob = self.output_hist(result) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) def test_check_output(self): outs = self.calc_output(core.CPUPlace()) @@ -60,6 +60,7 @@ class TestUniformRandomOpBF16(OpTest): class TestUniformRandomOpBF16AttrTensorList(TestUniformRandomOpBF16): + def setUp(self): self.op_type = "uniform_random" self.new_shape = (1000, 784) @@ -84,6 +85,7 @@ class TestUniformRandomOpBF16AttrTensorList(TestUniformRandomOpBF16): class TestUniformRandomOpBF16AttrTensorInt32( TestUniformRandomOpBF16AttrTensorList): + def setUp(self): self.op_type = "uniform_random" self.dtype = "uint16" @@ -93,6 +95,7 @@ class TestUniformRandomOpBF16AttrTensorInt32( class TestUniformRandomOpBF16WithDiagInit(TestUniformRandomOpBF16): + def init_attrs(self): self.attrs = { "shape": [1000, 784], @@ -108,6 +111,7 @@ class TestUniformRandomOpBF16WithDiagInit(TestUniformRandomOpBF16): class TestUniformRandomOpBF16SelectedRows(unittest.TestCase): + def test_check_output(self): self.check_with_place(core.CPUPlace()) @@ -115,57 +119,55 @@ class TestUniformRandomOpBF16SelectedRows(unittest.TestCase): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.seed(10) - op = Operator( - "uniform_random", - Out="X", - shape=[1000, 784], - min=-5.0, - max=10.0, - seed=10, - dtype=int(core.VarDesc.VarType.BF16)) + op = Operator("uniform_random", + Out="X", + shape=[1000, 784], + min=-5.0, + max=10.0, + seed=10, + dtype=int(core.VarDesc.VarType.BF16)) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) result = convert_uint16_to_float(np.array(out.get_tensor())) hist, prob = output_hist(result) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOpBF16SelectedRowsWithDiagInit( TestUniformRandomOpBF16SelectedRows): + def check_with_place(self, place): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.seed(10) - op = Operator( - "uniform_random", - Out="X", - shape=[500, 784], - min=-5.0, - max=10.0, - seed=10, - diag_num=500, - diag_step=784, - diag_val=1.0, - dtype=int(core.VarDesc.VarType.BF16)) + op = Operator("uniform_random", + Out="X", + shape=[500, 784], + min=-5.0, + max=10.0, + seed=10, + diag_num=500, + diag_step=784, + diag_val=1.0, + dtype=int(core.VarDesc.VarType.BF16)) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [500, 784]) result = convert_uint16_to_float(np.array(out.get_tensor())) hist, prob = output_hist(result) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOpBF16AttrTensorAPI(unittest.TestCase): + def test_attr_tensor_API(self): startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): dim_tensor = fluid.layers.fill_constant([1], "int64", 3) - ret = fluid.layers.nn.uniform_random( - [1, dim_tensor, 2], dtype=np.uint16) + ret = fluid.layers.nn.uniform_random([1, dim_tensor, 2], + dtype=np.uint16) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -175,6 +177,7 @@ class TestUniformRandomOpBF16AttrTensorAPI(unittest.TestCase): class TestUniformRandomOpAPISeed(unittest.TestCase): + def test_attr_tensor_API(self): _seed = 10 gen = paddle.seed(_seed) @@ -184,10 +187,14 @@ class TestUniformRandomOpAPISeed(unittest.TestCase): _min = 5 _max = 10 - ret = fluid.layers.nn.uniform_random( - [2, 3, 2], min=_min, max=_max, seed=_seed) - ret_2 = fluid.layers.nn.uniform_random( - [2, 3, 2], min=_min, max=_max, seed=_seed) + ret = fluid.layers.nn.uniform_random([2, 3, 2], + min=_min, + max=_max, + seed=_seed) + ret_2 = fluid.layers.nn.uniform_random([2, 3, 2], + min=_min, + max=_max, + seed=_seed) res = fluid.layers.equal(ret, ret_2) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -201,6 +208,7 @@ class TestUniformRandomOpAPISeed(unittest.TestCase): class TestUniformRandomOpBF16SelectedRowsShapeTensor(unittest.TestCase): + def test_check_output(self): place = core.CPUPlace() scope = core.Scope() @@ -208,25 +216,24 @@ class TestUniformRandomOpBF16SelectedRowsShapeTensor(unittest.TestCase): shape_tensor = scope.var("Shape").get_tensor() shape_tensor.set(np.array([1000, 784]).astype("int64"), place) paddle.seed(10) - op = Operator( - "uniform_random", - ShapeTensor="Shape", - Out="X", - min=-5.0, - max=10.0, - seed=10, - dtype=int(core.VarDesc.VarType.BF16)) + op = Operator("uniform_random", + ShapeTensor="Shape", + Out="X", + min=-5.0, + max=10.0, + seed=10, + dtype=int(core.VarDesc.VarType.BF16)) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) result = convert_uint16_to_float(np.array(out.get_tensor())) hist, prob = output_hist(result) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOpBF16SelectedRowsShapeTensorList( TestUniformRandomOpBF16SelectedRowsShapeTensor): + def test_check_output(self): place = core.CPUPlace() scope = core.Scope() @@ -236,24 +243,23 @@ class TestUniformRandomOpBF16SelectedRowsShapeTensorList( shape_2 = scope.var("shape2").get_tensor() shape_2.set(np.array([784]).astype("int64"), place) paddle.seed(10) - op = Operator( - "uniform_random", - ShapeTensorList=["shape1", "shape2"], - Out="X", - min=-5.0, - max=10.0, - seed=10, - dtype=int(core.VarDesc.VarType.BF16)) + op = Operator("uniform_random", + ShapeTensorList=["shape1", "shape2"], + Out="X", + min=-5.0, + max=10.0, + seed=10, + dtype=int(core.VarDesc.VarType.BF16)) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) result = convert_uint16_to_float(np.array(out.get_tensor())) hist, prob = output_hist(result) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomBatchSizeLikeOpBF16API(unittest.TestCase): + def test_attr_tensorlist_int32_API(self): startup_program = fluid.Program() train_program = fluid.Program() diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py index e7c88dd8398..2e0196d4b16 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py @@ -19,10 +19,12 @@ import numpy as np class TestUniformRandomInplaceOpDtype(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) def test_uniform_random_inplace_op_dtype(self): + def test_fp32(): tensor_fp32 = paddle.ones(self.shape, dtype=paddle.float32) tensor_fp32.uniform_() @@ -43,6 +45,7 @@ class TestUniformRandomInplaceOpDtype(unittest.TestCase): class TestUniformRandomInplaceOpIsInplace(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) @@ -53,6 +56,7 @@ class TestUniformRandomInplaceOpIsInplace(unittest.TestCase): class TestUniformRandomInplaceOpSeedIsZero(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) self.seed = 0 @@ -67,6 +71,7 @@ class TestUniformRandomInplaceOpSeedIsZero(unittest.TestCase): class TestUniformRandomInplaceOpSeedIsNotZero(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) self.seed = 10 @@ -81,6 +86,7 @@ class TestUniformRandomInplaceOpSeedIsNotZero(unittest.TestCase): class TestUniformRandomInplaceOpWithinRange(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) self.min = -2 @@ -91,11 +97,12 @@ class TestUniformRandomInplaceOpWithinRange(unittest.TestCase): tensor = paddle.ones(self.shape) tensor.uniform_(min=self.min, max=self.max, seed=self.seed) tensor_data = tensor.numpy() - self.assertTrue((tensor_data > self.min).all() and - (tensor_data < self.max).all()) + self.assertTrue((tensor_data > self.min).all() + and (tensor_data < self.max).all()) class TestUniformRandomInplaceOpShape(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) @@ -108,6 +115,7 @@ class TestUniformRandomInplaceOpShape(unittest.TestCase): class TestUniformRandomInplaceOpDistribution(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) self.min = -3 @@ -126,10 +134,12 @@ class TestUniformRandomInplaceOpDistribution(unittest.TestCase): class TestUniformRandomInplaceOpError(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) def test_uniform_random_inplace_op_error(self): + def test_attr_error(): tensor = paddle.ones(self.shape) tensor.uniform_(shape=self.shape, min=-2, max=2) @@ -138,6 +148,7 @@ class TestUniformRandomInplaceOpError(unittest.TestCase): class TestUniformRandomInplaceOpEmptyTensor(unittest.TestCase): + def test_uniform_random_inplace_op_empty_tensor(self): places = ['cpu'] if fluid.core.is_compiled_with_cuda(): @@ -154,6 +165,7 @@ class TestUniformRandomInplaceOpEmptyTensor(unittest.TestCase): class TestUniformRandomInplaceGrad(unittest.TestCase): + def setUp(self): self.shape = (1000, 784) diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py index 0bca3c08f3d..d80fe3b2d47 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py @@ -51,6 +51,7 @@ def output_hist_diag(out): class TestUniformRandomOp_attr_tensorlist(OpTest): + def setUp(self): self.op_type = "uniform_random" self.python_api = paddle.uniform @@ -72,18 +73,19 @@ class TestUniformRandomOp_attr_tensorlist(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestMaxMinAreInt(TestUniformRandomOp_attr_tensorlist): + def init_attrs(self): self.attrs = {"min": -5, "max": 10, "seed": 10} self.output_hist = output_hist class TestUniformRandomOp_attr_tensorlist_int32(OpTest): + def setUp(self): self.op_type = "uniform_random" self.python_api = paddle.uniform @@ -105,12 +107,12 @@ class TestUniformRandomOp_attr_tensorlist_int32(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOp_attr_tensor(OpTest): + def setUp(self): self.op_type = "uniform_random" self.python_api = paddle.uniform @@ -127,12 +129,12 @@ class TestUniformRandomOp_attr_tensor(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOp_attr_tensor_int32(OpTest): + def setUp(self): self.op_type = "uniform_random" self.python_api = paddle.uniform @@ -149,12 +151,12 @@ class TestUniformRandomOp_attr_tensor_int32(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOp(OpTest): + def setUp(self): self.op_type = "uniform_random" self.python_api = paddle.uniform @@ -176,9 +178,8 @@ class TestUniformRandomOp(OpTest): def verify_output(self, outs): hist, prob = self.output_hist(np.array(outs[0])) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) def test_check_api(self): places = self._get_places() @@ -194,14 +195,15 @@ class TestUniformRandomOp(OpTest): class TestUniformRandomOpError(unittest.TestCase): + def test_errors(self): main_prog = Program() start_prog = Program() with program_guard(main_prog, start_prog): def test_Variable(): - x1 = fluid.create_lod_tensor( - np.zeros((4, 784)), [[1, 1, 1, 1]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.zeros((4, 784)), [[1, 1, 1, 1]], + fluid.CPUPlace()) fluid.layers.uniform_random(x1) self.assertRaises(TypeError, test_Variable) @@ -213,8 +215,9 @@ class TestUniformRandomOpError(unittest.TestCase): self.assertRaises(TypeError, test_Variable2) def test_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[4, 784], dtype='float32') + x2 = fluid.layers.data(name='x2', + shape=[4, 784], + dtype='float32') fluid.layers.uniform_random(x2, 'int32') self.assertRaises(TypeError, test_dtype) @@ -227,6 +230,7 @@ class TestUniformRandomOpError(unittest.TestCase): class TestUniformRandomOpWithDiagInit(TestUniformRandomOp): + def init_attrs(self): self.attrs = { "shape": [1000, 784], @@ -241,6 +245,7 @@ class TestUniformRandomOpWithDiagInit(TestUniformRandomOp): class TestUniformRandomOpSelectedRows(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): @@ -255,58 +260,55 @@ class TestUniformRandomOpSelectedRows(unittest.TestCase): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.seed(10) - op = Operator( - "uniform_random", - Out="X", - shape=[1000, 784], - min=-5.0, - max=10.0, - seed=10) + op = Operator("uniform_random", + Out="X", + shape=[1000, 784], + min=-5.0, + max=10.0, + seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOpSelectedRowsWithDiagInit( TestUniformRandomOpSelectedRows): + def check_with_place(self, place): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.seed(10) - op = Operator( - "uniform_random", - Out="X", - shape=[500, 784], - min=-5.0, - max=10.0, - seed=10, - diag_num=500, - diag_step=784, - diag_val=1.0) + op = Operator("uniform_random", + Out="X", + shape=[500, 784], + min=-5.0, + max=10.0, + seed=10, + diag_num=500, + diag_step=784, + diag_val=1.0) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [500, 784]) hist, prob = output_hist_diag(np.array(out.get_tensor())) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOpApi(unittest.TestCase): + def test_api(self): paddle.seed(10) x = fluid.layers.data('x', shape=[16], dtype='float32', lod_level=1) y = fluid.layers.fc(x, size=16, - param_attr=fluid.initializer.Uniform( - low=-0.5, - high=0.5, - seed=10, - diag_num=16, - diag_step=16, - diag_val=1.0)) + param_attr=fluid.initializer.Uniform(low=-0.5, + high=0.5, + seed=10, + diag_num=16, + diag_step=16, + diag_val=1.0)) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( @@ -317,6 +319,7 @@ class TestUniformRandomOpApi(unittest.TestCase): class TestUniformRandomOp_attr_tensor_API(unittest.TestCase): + def test_attr_tensor_API(self): startup_program = fluid.Program() train_program = fluid.Program() @@ -367,6 +370,7 @@ class TestUniformRandomOp_attr_tensor_API(unittest.TestCase): class TestUniformRandomOp_API_seed(unittest.TestCase): + def test_attr_tensor_API(self): _seed = 10 gen = paddle.seed(_seed) @@ -376,10 +380,14 @@ class TestUniformRandomOp_API_seed(unittest.TestCase): _min = 5 _max = 10 - ret = fluid.layers.nn.uniform_random( - [2, 3, 2], min=_min, max=_max, seed=_seed) - ret_2 = fluid.layers.nn.uniform_random( - [2, 3, 2], min=_min, max=_max, seed=_seed) + ret = fluid.layers.nn.uniform_random([2, 3, 2], + min=_min, + max=_max, + seed=_seed) + ret_2 = fluid.layers.nn.uniform_random([2, 3, 2], + min=_min, + max=_max, + seed=_seed) res = fluid.layers.equal(ret, ret_2) place = fluid.CPUPlace() if fluid.core.is_compiled_with_cuda(): @@ -395,6 +403,7 @@ class TestUniformRandomOp_API_seed(unittest.TestCase): class TestUniformRandomOpSelectedRowsShapeTensor(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): @@ -411,22 +420,21 @@ class TestUniformRandomOpSelectedRowsShapeTensor(unittest.TestCase): shape_tensor = scope.var("Shape").get_tensor() shape_tensor.set(np.array([1000, 784]).astype("int64"), place) paddle.seed(10) - op = Operator( - "uniform_random", - ShapeTensor="Shape", - Out="X", - min=-5.0, - max=10.0, - seed=10) + op = Operator("uniform_random", + ShapeTensor="Shape", + Out="X", + min=-5.0, + max=10.0, + seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomOpSelectedRowsShapeTensorList(unittest.TestCase): + def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): @@ -445,60 +453,65 @@ class TestUniformRandomOpSelectedRowsShapeTensorList(unittest.TestCase): shape_2 = scope.var("shape2").get_tensor() shape_2.set(np.array([784]).astype("int64"), place) paddle.seed(10) - op = Operator( - "uniform_random", - ShapeTensorList=["shape1", "shape2"], - Out="X", - min=-5.0, - max=10.0, - seed=10) + op = Operator("uniform_random", + ShapeTensorList=["shape1", "shape2"], + Out="X", + min=-5.0, + max=10.0, + seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) - self.assertTrue( - np.allclose( - hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), + "hist: " + str(hist)) class TestUniformRandomDygraphMode(unittest.TestCase): + def test_check_output(self): with fluid.dygraph.guard(): - x = fluid.layers.uniform_random( - [10], dtype="float32", min=0.0, max=1.0) + x = fluid.layers.uniform_random([10], + dtype="float32", + min=0.0, + max=1.0) x_np = x.numpy() for i in range(10): self.assertTrue((x_np[i] > 0 and x_np[i] < 1.0)) class TestUniformRandomBatchSizeLikeOpError(unittest.TestCase): + def test_errors(self): main_prog = Program() start_prog = Program() with program_guard(main_prog, start_prog): def test_Variable(): - x1 = fluid.create_lod_tensor( - np.zeros((100, 784)), [[10, 10, 10, 70]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.zeros( + (100, 784)), [[10, 10, 10, 70]], fluid.CPUPlace()) fluid.layers.uniform_random_batch_size_like(x1) self.assertRaises(TypeError, test_Variable) def test_shape(): - x1 = fluid.layers.data( - name='x2', shape=[100, 784], dtype='float32') + x1 = fluid.layers.data(name='x2', + shape=[100, 784], + dtype='float32') fluid.layers.uniform_random_batch_size_like(x1, shape="shape") self.assertRaises(TypeError, test_shape) def test_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[100, 784], dtype='float32') + x2 = fluid.layers.data(name='x2', + shape=[100, 784], + dtype='float32') fluid.layers.uniform_random_batch_size_like(x2, 'int32') self.assertRaises(TypeError, test_dtype) class TestUniformAlias(unittest.TestCase): + def test_alias(self): paddle.uniform([2, 3], min=-5.0, max=5.0) paddle.tensor.uniform([2, 3], min=-5.0, max=5.0) @@ -511,14 +524,15 @@ class TestUniformAlias(unittest.TestCase): class TestUniformOpError(unittest.TestCase): + def test_errors(self): main_prog = Program() start_prog = Program() with program_guard(main_prog, start_prog): def test_Variable(): - x1 = fluid.create_lod_tensor( - np.zeros((100, 784)), [[10, 10, 10, 70]], fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.zeros( + (100, 784)), [[10, 10, 10, 70]], fluid.CPUPlace()) paddle.tensor.random.uniform(x1) self.assertRaises(TypeError, test_Variable) @@ -530,31 +544,36 @@ class TestUniformOpError(unittest.TestCase): self.assertRaises(TypeError, test_Variable2) def test_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[100, 784], dtype='float32') + x2 = fluid.layers.data(name='x2', + shape=[100, 784], + dtype='float32') paddle.tensor.random.uniform(x2, 'int32') self.assertRaises(TypeError, test_dtype) def test_out_dtype(): - out = paddle.tensor.random.uniform( - shape=[3, 4], dtype='float64') + out = paddle.tensor.random.uniform(shape=[3, 4], + dtype='float64') self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64) test_out_dtype() class TestUniformDygraphMode(unittest.TestCase): + def test_check_output(self): with fluid.dygraph.guard(): - x = paddle.tensor.random.uniform( - [10], dtype="float32", min=0.0, max=1.0) + x = paddle.tensor.random.uniform([10], + dtype="float32", + min=0.0, + max=1.0) x_np = x.numpy() for i in range(10): self.assertTrue((x_np[i] > 0 and x_np[i] < 1.0)) class TestUniformDtype(unittest.TestCase): + def test_default_dtype(self): paddle.disable_static() @@ -581,6 +600,7 @@ class TestUniformDtype(unittest.TestCase): class TestRandomValue(unittest.TestCase): + def test_fixed_random_number(self): # Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t' if not paddle.is_compiled_with_cuda(): @@ -624,8 +644,8 @@ class TestRandomValue(unittest.TestCase): 30.089634, 77.05225, 3.1201615, 68.34072, 59.266724, -25.33281, 12.973292, 27.41127, -17.412298, 27.931019 ] - out = paddle.empty( - [16, 16, 16, 16], dtype='float32').uniform_(-50, 100).numpy() + out = paddle.empty([16, 16, 16, 16], + dtype='float32').uniform_(-50, 100).numpy() self.assertEqual(np.mean(out), expect_mean) self.assertEqual(np.std(out), expect_std) self.assertTrue(np.allclose(out[10, 10, 10, 0:10], expect)) diff --git a/python/paddle/fluid/tests/unittests/test_unique.py b/python/paddle/fluid/tests/unittests/test_unique.py index 71dce5cc463..b70a342ab82 100644 --- a/python/paddle/fluid/tests/unittests/test_unique.py +++ b/python/paddle/fluid/tests/unittests/test_unique.py @@ -25,6 +25,7 @@ from paddle.fluid.framework import _test_eager_guard class TestUniqueOp(OpTest): + def setUp(self): self.op_type = "unique" self.init_config() @@ -33,29 +34,31 @@ class TestUniqueOp(OpTest): self.check_output() def init_config(self): - self.inputs = {'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), } + self.inputs = { + 'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': np.array( - [2, 3, 1, 5], dtype='int64'), - 'Index': np.array( - [0, 1, 1, 2, 3, 1], dtype='int32') + 'Out': np.array([2, 3, 1, 5], dtype='int64'), + 'Index': np.array([0, 1, 1, 2, 3, 1], dtype='int32') } class TestOne(TestUniqueOp): + def init_config(self): - self.inputs = {'X': np.array([2], dtype='int64'), } + self.inputs = { + 'X': np.array([2], dtype='int64'), + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': np.array( - [2], dtype='int64'), - 'Index': np.array( - [0], dtype='int32') + 'Out': np.array([2], dtype='int64'), + 'Index': np.array([0], dtype='int32') } class TestRandom(TestUniqueOp): + def init_config(self): self.inputs = {'X': np.random.randint(0, 100, (150, ), dtype='int64')} self.attrs = {'dtype': int(core.VarDesc.VarType.INT64)} @@ -72,7 +75,9 @@ class TestRandom(TestUniqueOp): class TestUniqueRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.unique([10]) @@ -88,14 +93,15 @@ class TestUniqueRaiseError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestOneGPU(TestUniqueOp): + def init_config(self): - self.inputs = {'X': np.array([2], dtype='int64'), } + self.inputs = { + 'X': np.array([2], dtype='int64'), + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': np.array( - [2], dtype='int64'), - 'Index': np.array( - [0], dtype='int32') + 'Out': np.array([2], dtype='int64'), + 'Index': np.array([0], dtype='int32') } def test_check_output(self): @@ -107,6 +113,7 @@ class TestOneGPU(TestUniqueOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestRandomGPU(TestUniqueOp): + def init_config(self): self.inputs = {'X': np.random.randint(0, 100, (150, ), dtype='int64')} self.attrs = {'dtype': int(core.VarDesc.VarType.INT64)} @@ -128,14 +135,14 @@ class TestRandomGPU(TestUniqueOp): class TestSortedUniqueOp(TestUniqueOp): + def init_config(self): self.inputs = {'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64')} - unique, indices, inverse, count = np.unique( - self.inputs['X'], - return_index=True, - return_inverse=True, - return_counts=True, - axis=None) + unique, indices, inverse, count = np.unique(self.inputs['X'], + return_index=True, + return_inverse=True, + return_counts=True, + axis=None) self.attrs = { 'dtype': int(core.VarDesc.VarType.INT32), "return_index": True, @@ -153,14 +160,14 @@ class TestSortedUniqueOp(TestUniqueOp): class TestUniqueOpAxisNone(TestUniqueOp): + def init_config(self): self.inputs = {'X': np.random.random((4, 7, 10)).astype('float64')} - unique, indices, inverse, counts = np.unique( - self.inputs['X'], - return_index=True, - return_inverse=True, - return_counts=True, - axis=None) + unique, indices, inverse, counts = np.unique(self.inputs['X'], + return_index=True, + return_inverse=True, + return_counts=True, + axis=None) self.attrs = { 'dtype': int(core.VarDesc.VarType.INT32), "return_index": True, @@ -178,14 +185,14 @@ class TestUniqueOpAxisNone(TestUniqueOp): class TestUniqueOpAxis1(TestUniqueOp): + def init_config(self): self.inputs = {'X': np.random.random((3, 8, 8)).astype('float64')} - unique, indices, inverse, counts = np.unique( - self.inputs['X'], - return_index=True, - return_inverse=True, - return_counts=True, - axis=1) + unique, indices, inverse, counts = np.unique(self.inputs['X'], + return_index=True, + return_inverse=True, + return_counts=True, + axis=1) self.attrs = { 'dtype': int(core.VarDesc.VarType.INT32), "return_index": True, @@ -203,6 +210,7 @@ class TestUniqueOpAxis1(TestUniqueOp): class TestUniqueAPI(unittest.TestCase): + def test_dygraph_api_out(self): paddle.disable_static() x_data = x_data = np.random.randint(0, 10, (120)) @@ -216,18 +224,16 @@ class TestUniqueAPI(unittest.TestCase): paddle.disable_static() x_data = np.random.random((3, 5, 5)).astype("float32") x = paddle.to_tensor(x_data) - out, index, inverse, counts = paddle.unique( - x, - return_index=True, - return_inverse=True, - return_counts=True, - axis=0) - np_out, np_index, np_inverse, np_counts = np.unique( - x_data, - return_index=True, - return_inverse=True, - return_counts=True, - axis=0) + out, index, inverse, counts = paddle.unique(x, + return_index=True, + return_inverse=True, + return_counts=True, + axis=0) + np_out, np_index, np_inverse, np_counts = np.unique(x_data, + return_index=True, + return_inverse=True, + return_counts=True, + axis=0) self.assertTrue((out.numpy() == np_out).all(), True) self.assertTrue((index.numpy() == np_index).all(), True) self.assertTrue((inverse.numpy() == np_inverse).all(), True) @@ -238,12 +244,11 @@ class TestUniqueAPI(unittest.TestCase): paddle.disable_static() x_data = x_data = np.random.randint(0, 10, (120)) x = paddle.to_tensor(x_data) - out, indices, inverse, counts = paddle.unique( - x, - return_index=True, - return_inverse=True, - return_counts=True, - dtype="int32") + out, indices, inverse, counts = paddle.unique(x, + return_index=True, + return_inverse=True, + return_counts=True, + dtype="int32") expected_out, np_indices, np_inverse, np_counts = np.unique( x_data, return_index=True, return_inverse=True, return_counts=True) self.assertTrue((out.numpy() == expected_out).all(), True) @@ -262,22 +267,28 @@ class TestUniqueAPI(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): x = paddle.fluid.data(name='x', shape=[3, 2], dtype='float64') - unique, inverse, counts = paddle.unique( - x, return_inverse=True, return_counts=True, axis=0) + unique, inverse, counts = paddle.unique(x, + return_inverse=True, + return_counts=True, + axis=0) place = paddle.CPUPlace() exe = paddle.static.Executor(place) x_np = np.array([[1, 2], [3, 4], [1, 2]]).astype('float64') result = exe.run(feed={"x": x_np}, fetch_list=[unique, inverse, counts]) - np_unique, np_inverse, np_counts = np.unique( - x_np, return_inverse=True, return_counts=True, axis=0) + np_unique, np_inverse, np_counts = np.unique(x_np, + return_inverse=True, + return_counts=True, + axis=0) self.assertTrue(np.allclose(result[0], np_unique)) self.assertTrue(np.allclose(result[1], np_inverse)) self.assertTrue(np.allclose(result[2], np_counts)) class TestUniqueError(unittest.TestCase): + def test_input_dtype(self): + def test_x_dtype(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py b/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py index a12f1aaff45..b4a4eac0ba7 100644 --- a/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py +++ b/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py @@ -85,9 +85,13 @@ class TestUniqueConsecutiveOp(OpTest): self.return_counts) out = reference_unique_consecutive(x) out = np.array(out).astype(self.dtype) - self.inputs = {'X': x, } + self.inputs = { + 'X': x, + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} - self.outputs = {'Out': out, } + self.outputs = { + 'Out': out, + } def test_check_output(self): self.check_output() @@ -111,7 +115,9 @@ class TestUniqueConsecutiveOp2(TestUniqueConsecutiveOp): self.return_counts) result = np.array(result).astype(self.dtype) inverse = inverse.astype(self.dtype) - self.inputs = {'X': x, } + self.inputs = { + 'X': x, + } self.attrs = { 'return_inverse': self.return_inverse, 'dtype': int(core.VarDesc.VarType.INT32) @@ -137,7 +143,9 @@ class TestUniqueConsecutiveOp3(TestUniqueConsecutiveOp): self.return_counts) result = np.array(result).astype(self.dtype) counts = counts.astype(self.dtype) - self.inputs = {'X': x, } + self.inputs = { + 'X': x, + } self.attrs = { 'return_counts': self.return_counts, 'dtype': int(core.VarDesc.VarType.INT32) @@ -164,7 +172,9 @@ class TestUniqueConsecutiveOp4(TestUniqueConsecutiveOp): result = np.array(result).astype(self.dtype) inverse = inverse.astype(self.dtype) counts = counts.astype(self.dtype) - self.inputs = {'X': x, } + self.inputs = { + 'X': x, + } self.attrs = { 'return_inverse': self.return_inverse, 'return_counts': self.return_counts, @@ -174,6 +184,7 @@ class TestUniqueConsecutiveOp4(TestUniqueConsecutiveOp): class TestUniqueConsecutiveAPI(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -182,7 +193,9 @@ class TestUniqueConsecutiveAPI(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): paddle.enable_static() - input_x = fluid.data(name="input_x", shape=[100, ], dtype="float32") + input_x = fluid.data(name="input_x", shape=[ + 100, + ], dtype="float32") result = paddle.unique_consecutive(input_x) x_np = np.random.randint(20, size=100).astype("float32") exe = fluid.Executor(place) @@ -203,6 +216,7 @@ class TestUniqueConsecutiveAPI(unittest.TestCase): class TestUniqueConsecutiveCase2API(unittest.TestCase): + def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): @@ -211,7 +225,9 @@ class TestUniqueConsecutiveCase2API(unittest.TestCase): def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): paddle.enable_static() - input_x = fluid.data(name="input_x", shape=[100, ], dtype="float32") + input_x = fluid.data(name="input_x", shape=[ + 100, + ], dtype="float32") result, inverse, counts = paddle.unique_consecutive( input_x, return_inverse=True, return_counts=True) x_np = np.random.randint(20, size=100).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_unique_name.py b/python/paddle/fluid/tests/unittests/test_unique_name.py index 4ffff252ee9..b3adc6c3f5e 100644 --- a/python/paddle/fluid/tests/unittests/test_unique_name.py +++ b/python/paddle/fluid/tests/unittests/test_unique_name.py @@ -19,6 +19,7 @@ import paddle.fluid as fluid class TestUniqueName(unittest.TestCase): + def test_guard(self): with fluid.unique_name.guard(): name_1 = fluid.unique_name.generate('') @@ -46,6 +47,7 @@ class TestUniqueName(unittest.TestCase): class TestImperativeUniqueName(unittest.TestCase): + def test_name_generator(self): with fluid.dygraph.guard(): tracer = fluid.framework._dygraph_tracer() diff --git a/python/paddle/fluid/tests/unittests/test_unique_with_counts.py b/python/paddle/fluid/tests/unittests/test_unique_with_counts.py index 6b02a63633c..61669a0a5aa 100644 --- a/python/paddle/fluid/tests/unittests/test_unique_with_counts.py +++ b/python/paddle/fluid/tests/unittests/test_unique_with_counts.py @@ -23,6 +23,7 @@ from paddle.fluid.op import Operator class TestUniqueWithCountsOp(OpTest): + def setUp(self): self.op_type = "unique_with_counts" self.init_config() @@ -31,33 +32,33 @@ class TestUniqueWithCountsOp(OpTest): self.check_output() def init_config(self): - self.inputs = {'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), } + self.inputs = { + 'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': np.array( - [2, 3, 1, 5], dtype='int64'), - 'Index': np.array( - [0, 1, 1, 2, 3, 1], dtype='int32'), - 'Count': np.array( - [1, 3, 1, 1], dtype='int32') + 'Out': np.array([2, 3, 1, 5], dtype='int64'), + 'Index': np.array([0, 1, 1, 2, 3, 1], dtype='int32'), + 'Count': np.array([1, 3, 1, 1], dtype='int32') } class TestOne(TestUniqueWithCountsOp): + def init_config(self): - self.inputs = {'X': np.array([2], dtype='int64'), } + self.inputs = { + 'X': np.array([2], dtype='int64'), + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': np.array( - [2], dtype='int64'), - 'Index': np.array( - [0], dtype='int32'), - 'Count': np.array( - [1], dtype='int32') + 'Out': np.array([2], dtype='int64'), + 'Index': np.array([0], dtype='int32'), + 'Count': np.array([1], dtype='int32') } class TestRandom(TestUniqueWithCountsOp): + def init_config(self): input_data = np.random.randint(0, 100, (2000, ), dtype='int64') self.inputs = {'X': input_data} @@ -82,7 +83,9 @@ class TestRandom(TestUniqueWithCountsOp): class TestUniqueWithCountsRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.unique_with_counts([10]) @@ -98,16 +101,16 @@ class TestUniqueWithCountsRaiseError(unittest.TestCase): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestOneGPU(TestUniqueWithCountsOp): + def init_config(self): - self.inputs = {'X': np.array([2], dtype='int64'), } + self.inputs = { + 'X': np.array([2], dtype='int64'), + } self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} self.outputs = { - 'Out': np.array( - [2], dtype='int64'), - 'Index': np.array( - [0], dtype='int32'), - 'Count': np.array( - [1], dtype='int32') + 'Out': np.array([2], dtype='int64'), + 'Index': np.array([0], dtype='int32'), + 'Count': np.array([1], dtype='int32') } def test_check_output(self): @@ -119,6 +122,7 @@ class TestOneGPU(TestUniqueWithCountsOp): @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestRandomGPU(TestUniqueWithCountsOp): + def init_config(self): input_data = np.random.randint(0, 100, (2000, ), dtype='int64') self.inputs = {'X': input_data} diff --git a/python/paddle/fluid/tests/unittests/test_unpool1d_op.py b/python/paddle/fluid/tests/unittests/test_unpool1d_op.py index 95d19210acb..30c58d3477c 100644 --- a/python/paddle/fluid/tests/unittests/test_unpool1d_op.py +++ b/python/paddle/fluid/tests/unittests/test_unpool1d_op.py @@ -28,8 +28,8 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size): input_size = x.shape default_size = [] for d in range(len(kernel_size)): - default_size.append((input_size[-len(kernel_size) + d] - 1) * stride[d] - + kernel_size[d] - 2 * padding[d]) + default_size.append((input_size[-len(kernel_size) + d] - 1) * + stride[d] + kernel_size[d] - 2 * padding[d]) if output_size is None: ret = default_size else: @@ -55,6 +55,7 @@ def unpool1dmax_forward_naive(input, indices, ksize, strides, paddings, class TestUnpool1DOpAPI_dygraph(unittest.TestCase): + def test_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -63,10 +64,14 @@ class TestUnpool1DOpAPI_dygraph(unittest.TestCase): paddle.disable_static() input_data = np.random.rand(1, 3, 16) input_x = paddle.to_tensor(input_data) - output, indices = F.max_pool1d( - input_x, kernel_size=2, stride=2, return_mask=True) - output_unpool = F.max_unpool1d( - output, indices, kernel_size=2, stride=2) + output, indices = F.max_pool1d(input_x, + kernel_size=2, + stride=2, + return_mask=True) + output_unpool = F.max_unpool1d(output, + indices, + kernel_size=2, + stride=2) expected_output_unpool = unpool1dmax_forward_naive( output.numpy(), indices.numpy(), [2], [2], [0], [16]) self.assertTrue( @@ -76,6 +81,7 @@ class TestUnpool1DOpAPI_dygraph(unittest.TestCase): class TestUnpool1DOpAPI_dygraph2(unittest.TestCase): + def test_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -84,10 +90,14 @@ class TestUnpool1DOpAPI_dygraph2(unittest.TestCase): paddle.disable_static() input_data = np.random.rand(1, 3, 16) input_x = paddle.to_tensor(input_data) - output, indices = F.max_pool1d( - input_x, kernel_size=2, stride=2, return_mask=True) - output_unpool = F.max_unpool1d( - output, indices, kernel_size=2, stride=None) + output, indices = F.max_pool1d(input_x, + kernel_size=2, + stride=2, + return_mask=True) + output_unpool = F.max_unpool1d(output, + indices, + kernel_size=2, + stride=None) expected_output_unpool = unpool1dmax_forward_naive( output.numpy(), indices.numpy(), [2], [2], [0], [16]) self.assertTrue( @@ -97,6 +107,7 @@ class TestUnpool1DOpAPI_dygraph2(unittest.TestCase): class TestUnpool1DOpAPI_dygraph3(unittest.TestCase): + def test_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -105,8 +116,9 @@ class TestUnpool1DOpAPI_dygraph3(unittest.TestCase): paddle.disable_static() input_data = np.random.rand(1, 3, 16) input_x = paddle.to_tensor(input_data) - Pool1d = paddle.nn.MaxPool1D( - kernel_size=2, stride=2, return_mask=True) + Pool1d = paddle.nn.MaxPool1D(kernel_size=2, + stride=2, + return_mask=True) UnPool1d = paddle.nn.MaxUnPool1D(kernel_size=2, stride=2) output, indices = Pool1d(input_x) @@ -120,6 +132,7 @@ class TestUnpool1DOpAPI_dygraph3(unittest.TestCase): class TestUnpool1DOpAPI_static(unittest.TestCase): + def test_case(self): paddle.enable_static() places = [paddle.CPUPlace()] @@ -131,22 +144,27 @@ class TestUnpool1DOpAPI_static(unittest.TestCase): input_data = np.array([[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]]).astype("float32") - x = paddle.fluid.data( - name='x', shape=[1, 3, 4], dtype='float32') - output, indices = F.max_pool1d( - x, kernel_size=2, stride=2, return_mask=True) - output_unpool = F.max_unpool1d( - output, indices, kernel_size=2, stride=None) + x = paddle.fluid.data(name='x', + shape=[1, 3, 4], + dtype='float32') + output, indices = F.max_pool1d(x, + kernel_size=2, + stride=2, + return_mask=True) + output_unpool = F.max_unpool1d(output, + indices, + kernel_size=2, + stride=None) exe = paddle.fluid.Executor(place) fetches = exe.run(paddle.fluid.default_main_program(), feed={"x": input_data}, fetch_list=[output_unpool], return_numpy=True) - pool1d_out_np = np.array( - [[[2., 4.], [6., 8.], [10., 12.]]]).astype("float32") - indices_np = np.array( - [[[1, 3], [1, 3], [1, 3]]]).astype("int32") + pool1d_out_np = np.array([[[2., 4.], [6., 8.], + [10., 12.]]]).astype("float32") + indices_np = np.array([[[1, 3], [1, 3], [1, + 3]]]).astype("int32") expected_output_unpool = unpool1dmax_forward_naive( pool1d_out_np, indices_np, [2], [2], [0], [4]) self.assertTrue(np.allclose(fetches[0], expected_output_unpool)) diff --git a/python/paddle/fluid/tests/unittests/test_unpool3d_op.py b/python/paddle/fluid/tests/unittests/test_unpool3d_op.py index e6031d9cee8..1fbff100a3d 100644 --- a/python/paddle/fluid/tests/unittests/test_unpool3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_unpool3d_op.py @@ -28,8 +28,8 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size): input_size = x.shape default_size = [] for d in range(len(kernel_size)): - default_size.append((input_size[-len(kernel_size) + d] - 1) * stride[d] - + kernel_size[d] - 2 * padding[d]) + default_size.append((input_size[-len(kernel_size) + d] - 1) * + stride[d] + kernel_size[d] - 2 * padding[d]) if output_size is None: ret = default_size else: @@ -53,10 +53,10 @@ def unpool3dmax_forward_naive(input, indices, ksize, strides, paddings, for w in range(s4): index = indices[nidx, cidx, d, h, w] didx = index // (out_wsize * out_hsize) - hidx = ( - index - didx * out_hsize * out_wsize) // out_wsize - widx = ( - index - didx * out_hsize * out_wsize) % out_wsize + hidx = (index - + didx * out_hsize * out_wsize) // out_wsize + widx = (index - + didx * out_hsize * out_wsize) % out_wsize out[nidx, cidx, didx, hidx, widx] = \ input[nidx, cidx, d, h, w] @@ -64,6 +64,7 @@ def unpool3dmax_forward_naive(input, indices, ksize, strides, paddings, class TestUnpool3DOp(OpTest): + def setUp(self): self.op_type = "unpool3d" self.init_test_case() @@ -72,8 +73,9 @@ class TestUnpool3DOp(OpTest): self.output_size = _unpool_output_size(inputs, self.ksize, self.strides, self.paddings, self.output_size) indices = np.random.permutation( - np.arange(0, self.output_size[0] * self.output_size[1] * - self.output_size[2]))[:dsize * hsize * wsize] + np.arange( + 0, self.output_size[0] * self.output_size[1] * + self.output_size[2]))[:dsize * hsize * wsize] indices = np.reshape(indices, [dsize, hsize, wsize]) idx_list = [] for n in range(nsize): @@ -116,6 +118,7 @@ class TestUnpool3DOp(OpTest): class TestUnpool3DOpcase1(TestUnpool3DOp): + def init_test_case(self): self.unpool3d_forward_naive = unpool3dmax_forward_naive self.unpooling_type = "max" @@ -127,6 +130,7 @@ class TestUnpool3DOpcase1(TestUnpool3DOp): class TestUnpool3DOpOutput(TestUnpool3DOp): + def init_test_case(self): self.unpool3d_forward_naive = unpool3dmax_forward_naive self.unpooling_type = "max" @@ -138,47 +142,50 @@ class TestUnpool3DOpOutput(TestUnpool3DOp): class TestUnpool3DOpException(unittest.TestCase): + def test_exception(self): + def indices_size_error(): data = paddle.randint(shape=[1, 1, 3, 3, 3]) - indices = paddle.reshape( - paddle.arange(0, 36), shape=[1, 1, 3, 3, 4]) + indices = paddle.reshape(paddle.arange(0, 36), + shape=[1, 1, 3, 3, 4]) MaxUnPool3D = F.maxunpool3d(data, indices, kernel_size=2, stride=2) def indices_value_error(): data = paddle.randint(shape=[1, 1, 3, 3, 3]) - indices = paddle.reshape( - paddle.arange(4, 40), shape=[1, 1, 3, 3, 3]) + indices = paddle.reshape(paddle.arange(4, 40), + shape=[1, 1, 3, 3, 3]) MaxUnPool3D = F.maxunpool3d(data, indices, kernel_size=2, stride=2) def data_format_error(): data = paddle.randint(shape=[1, 1, 3, 3, 3]) - indices = paddle.reshape( - paddle.arange(0, 27), shape=[1, 1, 3, 3, 3]) - MaxUnPool3D = F.maxunpool3d( - data, indices, kernel_size=2, stride=2, data_format="NDHWC") + indices = paddle.reshape(paddle.arange(0, 27), + shape=[1, 1, 3, 3, 3]) + MaxUnPool3D = F.maxunpool3d(data, + indices, + kernel_size=2, + stride=2, + data_format="NDHWC") def data_outputsize_error(): data = paddle.randint(shape=[1, 1, 3, 3, 3]) - indices = paddle.reshape( - paddle.arange(0, 27), shape=[1, 1, 3, 3, 3]) - MaxUnPool3D = F.maxunpool3d( - data, - indices, - kernel_size=2, - stride=2, - output_size=[2, 2, 3, 4, 5]) + indices = paddle.reshape(paddle.arange(0, 27), + shape=[1, 1, 3, 3, 3]) + MaxUnPool3D = F.maxunpool3d(data, + indices, + kernel_size=2, + stride=2, + output_size=[2, 2, 3, 4, 5]) def data_outputsize_error2(): data = paddle.randint(shape=[1, 1, 3, 3, 3]) - indices = paddle.reshape( - paddle.arange(0, 27), shape=[1, 1, 3, 3, 3]) - MaxUnPool3D = F.maxunpool3d( - data, - indices, - kernel_size=2, - stride=2, - output_size=[10, 10, 10]) + indices = paddle.reshape(paddle.arange(0, 27), + shape=[1, 1, 3, 3, 3]) + MaxUnPool3D = F.maxunpool3d(data, + indices, + kernel_size=2, + stride=2, + output_size=[10, 10, 10]) self.assertRaises(ValueError, indices_size_error) self.assertRaises(ValueError, indices_value_error) @@ -188,6 +195,7 @@ class TestUnpool3DOpException(unittest.TestCase): class TestUnpool3DOpAPI_dygraph(unittest.TestCase): + def test_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -196,13 +204,17 @@ class TestUnpool3DOpAPI_dygraph(unittest.TestCase): paddle.disable_static() input_data = np.random.rand(1, 3, 4, 4, 6) input_x = paddle.to_tensor(input_data) - output, indices = F.max_pool3d( - input_x, kernel_size=2, stride=2, return_mask=True) - output_unpool = F.max_unpool3d( - output, indices, kernel_size=2, stride=2) + output, indices = F.max_pool3d(input_x, + kernel_size=2, + stride=2, + return_mask=True) + output_unpool = F.max_unpool3d(output, + indices, + kernel_size=2, + stride=2) expected_output_unpool = unpool3dmax_forward_naive( - output.numpy(), - indices.numpy(), [2, 2, 2], [2, 2, 2], [0, 0, 0], [4, 4, 6]) + output.numpy(), indices.numpy(), [2, 2, 2], [2, 2, 2], + [0, 0, 0], [4, 4, 6]) self.assertTrue( np.allclose(output_unpool.numpy(), expected_output_unpool)) @@ -210,6 +222,7 @@ class TestUnpool3DOpAPI_dygraph(unittest.TestCase): class TestUnpool3DOpAPI_dygraph2(unittest.TestCase): + def test_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -218,13 +231,17 @@ class TestUnpool3DOpAPI_dygraph2(unittest.TestCase): paddle.disable_static() input_data = np.random.rand(1, 3, 4, 4, 6) input_x = paddle.to_tensor(input_data) - output, indices = F.max_pool3d( - input_x, kernel_size=2, stride=2, return_mask=True) - output_unpool = F.max_unpool3d( - output, indices, kernel_size=2, stride=None) + output, indices = F.max_pool3d(input_x, + kernel_size=2, + stride=2, + return_mask=True) + output_unpool = F.max_unpool3d(output, + indices, + kernel_size=2, + stride=None) expected_output_unpool = unpool3dmax_forward_naive( - output.numpy(), - indices.numpy(), [2, 2, 2], [2, 2, 2], [0, 0, 0], [4, 4, 6]) + output.numpy(), indices.numpy(), [2, 2, 2], [2, 2, 2], + [0, 0, 0], [4, 4, 6]) self.assertTrue( np.allclose(output_unpool.numpy(), expected_output_unpool)) @@ -232,6 +249,7 @@ class TestUnpool3DOpAPI_dygraph2(unittest.TestCase): class TestUnpool3DOpAPI_dygraph3(unittest.TestCase): + def test_case(self): places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): @@ -240,15 +258,16 @@ class TestUnpool3DOpAPI_dygraph3(unittest.TestCase): paddle.disable_static() input_data = np.random.rand(1, 3, 4, 4, 6) input_x = paddle.to_tensor(input_data) - Pool3d = paddle.nn.MaxPool3D( - kernel_size=2, stride=2, return_mask=True) + Pool3d = paddle.nn.MaxPool3D(kernel_size=2, + stride=2, + return_mask=True) UnPool3d = paddle.nn.MaxUnPool3D(kernel_size=2, stride=2) output, indices = Pool3d(input_x) output_unpool = UnPool3d(output, indices) expected_output_unpool = unpool3dmax_forward_naive( - output.numpy(), - indices.numpy(), [2, 2, 2], [2, 2, 2], [0, 0, 0], [4, 4, 6]) + output.numpy(), indices.numpy(), [2, 2, 2], [2, 2, 2], + [0, 0, 0], [4, 4, 6]) self.assertTrue( np.allclose(output_unpool.numpy(), expected_output_unpool)) @@ -256,6 +275,7 @@ class TestUnpool3DOpAPI_dygraph3(unittest.TestCase): class TestUnpool3DOpAPI_static(unittest.TestCase): + def test_case(self): paddle.enable_static() places = [paddle.CPUPlace()] @@ -268,20 +288,25 @@ class TestUnpool3DOpAPI_static(unittest.TestCase): input_data = np.array([[[[[1, 2, 3, 4], [5, 6, 7, 8], \ [9, 10, 11, 12], [13, 14, 15, 16]], [[1, 2, 3, 4], [5, 6, 7, 8], \ [9, 10, 11, 12], [13, 14, 15, 16]]]]]).astype("float32") - x = paddle.fluid.data( - name='x', shape=[1, 1, 2, 4, 4], dtype='float32') - output, indices = F.max_pool3d( - x, kernel_size=2, stride=2, return_mask=True) - output_unpool = F.max_unpool3d( - output, indices, kernel_size=2, stride=None) + x = paddle.fluid.data(name='x', + shape=[1, 1, 2, 4, 4], + dtype='float32') + output, indices = F.max_pool3d(x, + kernel_size=2, + stride=2, + return_mask=True) + output_unpool = F.max_unpool3d(output, + indices, + kernel_size=2, + stride=None) exe = paddle.fluid.Executor(place) fetches = exe.run(paddle.fluid.default_main_program(), feed={"x": input_data}, fetch_list=[output_unpool], return_numpy=True) - pool3d_out_np = np.array( - [[[[[6., 8.], [14., 16.]]]]]).astype("float32") + pool3d_out_np = np.array([[[[[6., 8.], + [14., 16.]]]]]).astype("float32") indices_np = np.array([[[[[5, 7], [13, 15]]]]]).astype("int32") expected_output_unpool = unpool3dmax_forward_naive( pool3d_out_np, indices_np, [2, 2, 2], [2, 2, 2], [0, 0, 0], diff --git a/python/paddle/fluid/tests/unittests/test_unpool_op.py b/python/paddle/fluid/tests/unittests/test_unpool_op.py index 95ad254a6df..1b6d3d9dfb7 100644 --- a/python/paddle/fluid/tests/unittests/test_unpool_op.py +++ b/python/paddle/fluid/tests/unittests/test_unpool_op.py @@ -23,8 +23,8 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size): input_size = x.shape default_size = [] for d in range(len(kernel_size)): - default_size.append((input_size[-len(kernel_size) + d] - 1) * stride[d] - + kernel_size[d] - 2 * padding[d]) + default_size.append((input_size[-len(kernel_size) + d] - 1) * + stride[d] + kernel_size[d] - 2 * padding[d]) if output_size is None: ret = default_size else: @@ -54,6 +54,7 @@ def unpool2dmax_forward_naive(input, indices, ksize, strides, paddings, class TestUnpoolOp(OpTest): + def setUp(self): self.op_type = "unpool" self.init_test_case() @@ -106,6 +107,7 @@ class TestUnpoolOp(OpTest): class TestUnpoolOpcase1(TestUnpoolOp): + def init_test_case(self): self.unpool2d_forward_naive = unpool2dmax_forward_naive self.unpooling_type = "max" @@ -117,6 +119,7 @@ class TestUnpoolOpcase1(TestUnpoolOp): class TestUnpoolOpOutputsize(TestUnpoolOp): + def init_test_case(self): self.unpool2d_forward_naive = unpool2dmax_forward_naive self.unpooling_type = "max" @@ -128,6 +131,7 @@ class TestUnpoolOpOutputsize(TestUnpoolOp): class TestUnpoolOpOutput(TestUnpoolOp): + def init_test_case(self): self.unpool2d_forward_naive = unpool2dmax_forward_naive self.unpooling_type = "max" @@ -139,6 +143,7 @@ class TestUnpoolOpOutput(TestUnpoolOp): class TestUnpoolOpException(unittest.TestCase): + def test_exception(self): import paddle.nn.functional as F import paddle @@ -156,24 +161,29 @@ class TestUnpoolOpException(unittest.TestCase): def data_format_error(): data = paddle.randint(shape=[1, 1, 3, 3]) indices = paddle.reshape(paddle.arange(4, 40), shape[1, 1, 3, 4]) - MaxPool2D = F.maxunpool2d( - data, indices, kernel_size=2, stride=2, data_format="NHWC") + MaxPool2D = F.maxunpool2d(data, + indices, + kernel_size=2, + stride=2, + data_format="NHWC") def data_outputsize_error(): data = paddle.randint(shape=[1, 1, 3, 3]) indices = paddle.reshape(paddle.arange(4, 40), shape[1, 1, 3, 4]) - MaxPool2D = F.maxunpool2d( - data, - indices, - kernel_size=2, - stride=2, - output_size=[5, 6, 7, 8]) + MaxPool2D = F.maxunpool2d(data, + indices, + kernel_size=2, + stride=2, + output_size=[5, 6, 7, 8]) def data_outputsize_error2(): data = paddle.randint(shape=[1, 1, 3, 3]) indices = paddle.reshape(paddle.arange(4, 40), shape[1, 1, 3, 4]) - MaxPool2D = F.maxunpool2d( - data, indices, kernel_size=2, stride=2, output_size=[100, 100]) + MaxPool2D = F.maxunpool2d(data, + indices, + kernel_size=2, + stride=2, + output_size=[100, 100]) self.assertRaises(ValueError, indices_size_error) self.assertRaises(ValueError, indices_value_error) @@ -183,6 +193,7 @@ class TestUnpoolOpException(unittest.TestCase): class TestUnpoolOpAPI_dy(unittest.TestCase): + def test_case(self): import paddle import paddle.nn.functional as F @@ -195,14 +206,19 @@ class TestUnpoolOpAPI_dy(unittest.TestCase): else: place = core.CPUPlace() with fluid.dygraph.guard(place): - input_data = np.array([[[[1, 2, 3, 4], [5, 6, 7, 8], - [9, 10, 11, 12], + input_data = np.array([[[[1, 2, 3, 4], [5, 6, 7, + 8], [9, 10, 11, 12], [13, 14, 15, 16]]]]).astype("float32") input_x = paddle.to_tensor(input_data) - output, indices = F.max_pool2d( - input_x, kernel_size=2, stride=2, return_mask=True) - out_pp = F.max_unpool2d( - output, indices, kernel_size=2, stride=2, output_size=(5, 5)) + output, indices = F.max_pool2d(input_x, + kernel_size=2, + stride=2, + return_mask=True) + out_pp = F.max_unpool2d(output, + indices, + kernel_size=2, + stride=2, + output_size=(5, 5)) output_np = output.numpy() indices_np = indices.numpy() expect_res =unpool2dmax_forward_naive(output_np, indices_np, [2,2], \ @@ -211,6 +227,7 @@ class TestUnpoolOpAPI_dy(unittest.TestCase): class TestUnpoolOpAPI_dy2(unittest.TestCase): + def test_case(self): import paddle import paddle.nn.functional as F @@ -223,14 +240,19 @@ class TestUnpoolOpAPI_dy2(unittest.TestCase): else: place = core.CPUPlace() with fluid.dygraph.guard(place): - input_data = np.array([[[[1, 2, 3, 4], [5, 6, 7, 8], - [9, 10, 11, 12], + input_data = np.array([[[[1, 2, 3, 4], [5, 6, 7, + 8], [9, 10, 11, 12], [13, 14, 15, 16]]]]).astype("float32") input_x = paddle.to_tensor(input_data) - output, indices = F.max_pool2d( - input_x, kernel_size=2, stride=2, return_mask=True) - out_pp = F.max_unpool2d( - output, indices, kernel_size=2, stride=None, output_size=(5, 5)) + output, indices = F.max_pool2d(input_x, + kernel_size=2, + stride=2, + return_mask=True) + out_pp = F.max_unpool2d(output, + indices, + kernel_size=2, + stride=None, + output_size=(5, 5)) output_np = output.numpy() indices_np = indices.numpy() expect_res =unpool2dmax_forward_naive(output_np, indices_np, [2,2], \ @@ -239,6 +261,7 @@ class TestUnpoolOpAPI_dy2(unittest.TestCase): class TestUnpoolOpAPI_dy3(unittest.TestCase): + def test_case(self): import paddle import paddle.nn.functional as F @@ -251,12 +274,13 @@ class TestUnpoolOpAPI_dy3(unittest.TestCase): else: place = core.CPUPlace() with fluid.dygraph.guard(place): - input_data = np.array([[[[1, 2, 3, 4], [5, 6, 7, 8], - [9, 10, 11, 12], + input_data = np.array([[[[1, 2, 3, 4], [5, 6, 7, + 8], [9, 10, 11, 12], [13, 14, 15, 16]]]]).astype("float32") input_x = paddle.to_tensor(input_data) - Pool2d = paddle.nn.MaxPool2D( - kernel_size=2, stride=2, return_mask=True) + Pool2d = paddle.nn.MaxPool2D(kernel_size=2, + stride=2, + return_mask=True) UnPool = paddle.nn.MaxUnPool2D(kernel_size=2, stride=2) output, indices = Pool2d(input_x) @@ -269,6 +293,7 @@ class TestUnpoolOpAPI_dy3(unittest.TestCase): class TestUnpoolOpAPI_st(unittest.TestCase): + def test_case(self): import paddle import paddle.nn.functional as F @@ -280,10 +305,15 @@ class TestUnpoolOpAPI_st(unittest.TestCase): [13, 14, 15, 16]]]]).astype("float32") x = fluid.data(name="x", shape=[1, 1, 4, 4], dtype="float32") - output, indices = F.max_pool2d( - x, kernel_size=2, stride=2, return_mask=True) - unpool_out = F.max_unpool2d( - output, indices, kernel_size=2, stride=None, output_size=(5, 5)) + output, indices = F.max_pool2d(x, + kernel_size=2, + stride=2, + return_mask=True) + unpool_out = F.max_unpool2d(output, + indices, + kernel_size=2, + stride=None, + output_size=(5, 5)) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py index af9d3db6295..c80555a66d0 100755 --- a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py +++ b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py @@ -26,6 +26,7 @@ paddle.enable_static() # Correct: General. class TestUnsqueezeOp(OpTest): + def setUp(self): self.init_test_case() self.op_type = "unsqueeze2" @@ -55,6 +56,7 @@ class TestUnsqueezeOp(OpTest): # Correct: Single input index. class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -63,6 +65,7 @@ class TestUnsqueezeOp1(TestUnsqueezeOp): # Correct: Mixed input axis. class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -71,6 +74,7 @@ class TestUnsqueezeOp2(TestUnsqueezeOp): # Correct: There is duplicated axis. class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -79,6 +83,7 @@ class TestUnsqueezeOp3(TestUnsqueezeOp): # Correct: Reversed axes. class TestUnsqueezeOp4(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -87,6 +92,7 @@ class TestUnsqueezeOp4(TestUnsqueezeOp): # axes is a list(with tensor) class TestUnsqueezeOp_AxesTensorList(OpTest): + def setUp(self): self.init_test_case() self.op_type = "unsqueeze2" @@ -124,6 +130,7 @@ class TestUnsqueezeOp_AxesTensorList(OpTest): class TestUnsqueezeOp1_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -131,6 +138,7 @@ class TestUnsqueezeOp1_AxesTensorList(TestUnsqueezeOp_AxesTensorList): class TestUnsqueezeOp2_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -138,6 +146,7 @@ class TestUnsqueezeOp2_AxesTensorList(TestUnsqueezeOp_AxesTensorList): class TestUnsqueezeOp3_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -145,6 +154,7 @@ class TestUnsqueezeOp3_AxesTensorList(TestUnsqueezeOp_AxesTensorList): class TestUnsqueezeOp4_AxesTensorList(TestUnsqueezeOp_AxesTensorList): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -153,6 +163,7 @@ class TestUnsqueezeOp4_AxesTensorList(TestUnsqueezeOp_AxesTensorList): # axes is a Tensor class TestUnsqueezeOp_AxesTensor(OpTest): + def setUp(self): self.init_test_case() self.op_type = "unsqueeze2" @@ -185,6 +196,7 @@ class TestUnsqueezeOp_AxesTensor(OpTest): class TestUnsqueezeOp1_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -192,6 +204,7 @@ class TestUnsqueezeOp1_AxesTensor(TestUnsqueezeOp_AxesTensor): class TestUnsqueezeOp2_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -199,6 +212,7 @@ class TestUnsqueezeOp2_AxesTensor(TestUnsqueezeOp_AxesTensor): class TestUnsqueezeOp3_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -206,6 +220,7 @@ class TestUnsqueezeOp3_AxesTensor(TestUnsqueezeOp_AxesTensor): class TestUnsqueezeOp4_AxesTensor(TestUnsqueezeOp_AxesTensor): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -214,6 +229,7 @@ class TestUnsqueezeOp4_AxesTensor(TestUnsqueezeOp_AxesTensor): # test api class TestUnsqueezeAPI(unittest.TestCase): + def setUp(self): self.executed_api() @@ -225,10 +241,12 @@ class TestUnsqueezeAPI(unittest.TestCase): x = paddle.static.data(name='x', shape=[3, 2, 5], dtype="float64") positive_3_int32 = fluid.layers.fill_constant([1], "int32", 3) positive_1_int64 = fluid.layers.fill_constant([1], "int64", 1) - axes_tensor_int32 = paddle.static.data( - name='axes_tensor_int32', shape=[3], dtype="int32") - axes_tensor_int64 = paddle.static.data( - name='axes_tensor_int64', shape=[3], dtype="int64") + axes_tensor_int32 = paddle.static.data(name='axes_tensor_int32', + shape=[3], + dtype="int32") + axes_tensor_int64 = paddle.static.data(name='axes_tensor_int64', + shape=[3], + dtype="int64") out_1 = self.unsqueeze(x, axis=[3, 1, 1]) out_2 = self.unsqueeze(x, axis=[positive_3_int32, positive_1_int64, 1]) @@ -253,6 +271,7 @@ class TestUnsqueezeAPI(unittest.TestCase): assert np.array_equal(res_5, input.reshape([3, 1, 1, 2, 5, 1])) def test_error(self): + def test_axes_type(): x2 = paddle.static.data(name="x2", shape=[2, 25], dtype="int32") self.unsqueeze(x2, axis=2.1) @@ -261,6 +280,7 @@ class TestUnsqueezeAPI(unittest.TestCase): class TestUnsqueezeInplaceAPI(TestUnsqueezeAPI): + def executed_api(self): self.unsqueeze = paddle.unsqueeze_ diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py index c1ec95fc8bf..fb250bc64b2 100755 --- a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py +++ b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py @@ -27,6 +27,7 @@ paddle.enable_static() # Correct: General. class TestUnsqueezeOp(OpTest): + def setUp(self): self.init_test_case() self.op_type = "unsqueeze" @@ -50,6 +51,7 @@ class TestUnsqueezeOp(OpTest): class TestUnsqueezeBF16Op(OpTest): + def setUp(self): self.init_test_case() self.op_type = "unsqueeze" @@ -77,6 +79,7 @@ class TestUnsqueezeBF16Op(OpTest): # Correct: Single input index. class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -85,6 +88,7 @@ class TestUnsqueezeOp1(TestUnsqueezeOp): # Correct: Mixed input axis. class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -93,6 +97,7 @@ class TestUnsqueezeOp2(TestUnsqueezeOp): # Correct: There is duplicated axis. class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -101,6 +106,7 @@ class TestUnsqueezeOp3(TestUnsqueezeOp): # Correct: Reversed axes. class TestUnsqueezeOp4(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -108,6 +114,7 @@ class TestUnsqueezeOp4(TestUnsqueezeOp): class API_TestUnsqueeze(unittest.TestCase): + def test_out(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), @@ -124,20 +131,23 @@ class API_TestUnsqueeze(unittest.TestCase): class TestUnsqueezeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): # The type of axis in split_op should be int or Variable. def test_axes_type(): - x6 = paddle.static.data( - shape=[-1, 10], dtype='float16', name='x3') + x6 = paddle.static.data(shape=[-1, 10], + dtype='float16', + name='x3') paddle.unsqueeze(x6, axis=3.2) self.assertRaises(TypeError, test_axes_type) class API_TestUnsqueeze2(unittest.TestCase): + def test_out(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), @@ -150,13 +160,16 @@ class API_TestUnsqueeze2(unittest.TestCase): input1 = np.random.random([5, 1, 10]).astype('float64') input2 = np.array([1]).astype('int32') input = np.squeeze(input1, axis=1) - result1, = exe.run(feed={"data1": input, - "data2": input2}, + result1, = exe.run(feed={ + "data1": input, + "data2": input2 + }, fetch_list=[result_squeeze]) self.assertTrue(np.allclose(input1, result1)) class API_TestUnsqueeze3(unittest.TestCase): + def test_out(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), @@ -169,14 +182,17 @@ class API_TestUnsqueeze3(unittest.TestCase): input1 = np.random.random([5, 1, 10, 1]).astype('float64') input2 = np.array([1]).astype('int32') input = np.squeeze(input1) - result1, = exe.run(feed={"data1": input, - "data2": input2}, + result1, = exe.run(feed={ + "data1": input, + "data2": input2 + }, fetch_list=[result_squeeze]) self.assertTrue(np.array_equal(input1, result1)) self.assertEqual(input1.shape, result1.shape) class API_TestDyUnsqueeze(unittest.TestCase): + def test_out(self): paddle.disable_static() input_1 = np.random.random([5, 1, 10]).astype("int32") @@ -189,6 +205,7 @@ class API_TestDyUnsqueeze(unittest.TestCase): class API_TestDyUnsqueeze2(unittest.TestCase): + def test_out(self): paddle.disable_static() input1 = np.random.random([5, 10]).astype("int32") @@ -201,6 +218,7 @@ class API_TestDyUnsqueeze2(unittest.TestCase): class API_TestDyUnsqueezeAxisTensor(unittest.TestCase): + def test_out(self): paddle.disable_static() input1 = np.random.random([5, 10]).astype("int32") @@ -214,6 +232,7 @@ class API_TestDyUnsqueezeAxisTensor(unittest.TestCase): class API_TestDyUnsqueezeAxisTensorList(unittest.TestCase): + def test_out(self): paddle.disable_static() input1 = np.random.random([5, 10]).astype("int32") @@ -223,13 +242,15 @@ class API_TestDyUnsqueezeAxisTensorList(unittest.TestCase): input = paddle.to_tensor(input1) output = paddle.unsqueeze( paddle.to_tensor(input1), - axis=[paddle.to_tensor([1]), paddle.to_tensor([2])]) + axis=[paddle.to_tensor([1]), + paddle.to_tensor([2])]) out_np = output.numpy() self.assertTrue(np.array_equal(out1, out_np)) self.assertEqual(out1.shape, out_np.shape) class API_TestDygraphUnSqueeze(unittest.TestCase): + def setUp(self): self.executed_api() @@ -283,6 +304,7 @@ class API_TestDygraphUnSqueeze(unittest.TestCase): class API_TestDygraphUnSqueezeInplace(API_TestDygraphUnSqueeze): + def executed_api(self): self.unsqueeze = paddle.unsqueeze_ diff --git a/python/paddle/fluid/tests/unittests/test_unstack_op.py b/python/paddle/fluid/tests/unittests/test_unstack_op.py index 01232293527..730a74dc54c 100644 --- a/python/paddle/fluid/tests/unittests/test_unstack_op.py +++ b/python/paddle/fluid/tests/unittests/test_unstack_op.py @@ -18,6 +18,7 @@ import unittest class TestUnStackOpBase(OpTest): + def initDefaultParameters(self): self.input_dim = (5, 6, 7) self.axis = 0 @@ -58,21 +59,25 @@ class TestUnStackOpBase(OpTest): class TestStackOp3(TestUnStackOpBase): + def initParameters(self): self.axis = -1 class TestStackOp4(TestUnStackOpBase): + def initParameters(self): self.axis = -3 class TestStackOp5(TestUnStackOpBase): + def initParameters(self): self.axis = 1 class TestStackOp6(TestUnStackOpBase): + def initParameters(self): self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py b/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py index 56f49f60bde..c1294628a4e 100644 --- a/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py +++ b/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py @@ -20,6 +20,7 @@ import paddle.fluid.contrib.mixed_precision.amp_nn as amp_nn class TestUpdateLossScalingOp(OpTest): + def setUp(self): self.op_type = "update_loss_scaling" self.init() @@ -61,6 +62,7 @@ class TestUpdateLossScalingOp(OpTest): class TestUpdateLossScalingOpBad(TestUpdateLossScalingOp): + def setUp(self): self.op_type = "update_loss_scaling" self.init() @@ -90,17 +92,21 @@ class TestUpdateLossScalingOpBad(TestUpdateLossScalingOp): class TestUpdateLossScalingLayer(unittest.TestCase): + def loss_scaling_check(self, use_cuda=True, scope=fluid.Scope()): a = fluid.data(name="a", shape=[1024, 1024], dtype='float32') b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') - prev_loss_scaling = fluid.data( - name="prev_loss_scaling", shape=[1], dtype='float32') - num_good_steps = fluid.data( - name="num_good_steps", shape=[1], dtype='int32') - num_bad_steps = fluid.data( - name="num_bad_steps", shape=[1], dtype='int32') + prev_loss_scaling = fluid.data(name="prev_loss_scaling", + shape=[1], + dtype='float32') + num_good_steps = fluid.data(name="num_good_steps", + shape=[1], + dtype='int32') + num_bad_steps = fluid.data(name="num_bad_steps", + shape=[1], + dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') @@ -114,17 +120,16 @@ class TestUpdateLossScalingLayer(unittest.TestCase): incr_ratio = 2 decr_ratio = 0.8 - result = amp_nn.update_loss_scaling( - x, - found_inf, - prev_loss_scaling, - num_good_steps, - num_bad_steps, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - name="update_loss_scaling") + result = amp_nn.update_loss_scaling(x, + found_inf, + prev_loss_scaling, + num_good_steps, + num_bad_steps, + incr_every_n_steps, + decr_every_n_nan_or_inf, + incr_ratio, + decr_ratio, + name="update_loss_scaling") place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) @@ -156,12 +161,15 @@ class TestUpdateLossScalingLayer(unittest.TestCase): b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') - prev_loss_scaling = fluid.data( - name="prev_loss_scaling", shape=[1], dtype='float32') - num_good_steps = fluid.data( - name="num_good_steps", shape=[1], dtype='int32') - num_bad_steps = fluid.data( - name="num_bad_steps", shape=[1], dtype='int32') + prev_loss_scaling = fluid.data(name="prev_loss_scaling", + shape=[1], + dtype='float32') + num_good_steps = fluid.data(name="num_good_steps", + shape=[1], + dtype='int32') + num_bad_steps = fluid.data(name="num_bad_steps", + shape=[1], + dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') @@ -178,17 +186,16 @@ class TestUpdateLossScalingLayer(unittest.TestCase): incr_ratio = 2 decr_ratio = 0.8 - result = amp_nn.update_loss_scaling( - x, - found_inf, - prev_loss_scaling, - num_good_steps, - num_bad_steps, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - name="update_loss_scaling") + result = amp_nn.update_loss_scaling(x, + found_inf, + prev_loss_scaling, + num_good_steps, + num_bad_steps, + incr_every_n_steps, + decr_every_n_nan_or_inf, + incr_ratio, + decr_ratio, + name="update_loss_scaling") place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index e6e608bea23..54c800c8754 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -26,12 +26,14 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestVarBase(unittest.TestCase): + def setUp(self): self.shape = [512, 1234] self.dtype = np.float32 self.array = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) def func_test_to_tensor(self): + def _test_place(place): with fluid.dygraph.guard(): paddle.set_default_dtype('float32') @@ -44,10 +46,9 @@ class TestVarBase(unittest.TestCase): self.assertEqual(str(x.place), str(y.place)) # set_default_dtype should not take effect on numpy - x = paddle.to_tensor( - np.array([1.2]).astype('float16'), - place=place, - stop_gradient=False) + x = paddle.to_tensor(np.array([1.2]).astype('float16'), + place=place, + stop_gradient=False) self.assertTrue( np.array_equal(x.numpy(), np.array([1.2], 'float16'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP16) @@ -59,8 +60,8 @@ class TestVarBase(unittest.TestCase): # set_default_dtype take effect on float x = paddle.to_tensor(1.2, place=place, stop_gradient=False) self.assertTrue( - np.array_equal(x.numpy(), np.array([1.2]).astype( - 'float32'))) + np.array_equal(x.numpy(), + np.array([1.2]).astype('float32'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) clone_x = x.clone() self.assertTrue( @@ -108,18 +109,24 @@ class TestVarBase(unittest.TestCase): self.assertTrue(np.array_equal(x.numpy(), [1 + 2j])) self.assertEqual(x.dtype, core.VarDesc.VarType.COMPLEX128) - x = paddle.to_tensor( - 1, dtype='float32', place=place, stop_gradient=False) + x = paddle.to_tensor(1, + dtype='float32', + place=place, + stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1.])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.shape, [1]) self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) - x = paddle.to_tensor( - (1, 2), dtype='float32', place=place, stop_gradient=False) - x = paddle.to_tensor( - [1, 2], dtype='float32', place=place, stop_gradient=False) + x = paddle.to_tensor((1, 2), + dtype='float32', + place=place, + stop_gradient=False) + x = paddle.to_tensor([1, 2], + dtype='float32', + place=place, + stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1., 2.])) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.grad, None) @@ -127,11 +134,10 @@ class TestVarBase(unittest.TestCase): self.assertEqual(x.stop_gradient, False) self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) - x = paddle.to_tensor( - self.array, - dtype='float32', - place=place, - stop_gradient=False) + x = paddle.to_tensor(self.array, + dtype='float32', + place=place, + stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), self.array)) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) self.assertEqual(x.shape, self.shape) @@ -148,8 +154,9 @@ class TestVarBase(unittest.TestCase): z = x + y self.assertTrue(np.array_equal(z.numpy(), 2 * self.array)) - x = paddle.to_tensor( - [1 + 2j, 1 - 2j], dtype='complex64', place=place) + x = paddle.to_tensor([1 + 2j, 1 - 2j], + dtype='complex64', + place=place) y = paddle.to_tensor(x) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j, 1 - 2j])) self.assertEqual(y.dtype, core.VarDesc.VarType.COMPLEX64) @@ -171,7 +178,8 @@ class TestVarBase(unittest.TestCase): self.assertTrue(isinstance(x.item(1, 0, 1), float)) self.assertEqual(x.item(5), x.item(1, 0, 1)) self.assertTrue( - np.array_equal(x.item(1, 0, 1), x.numpy().item(1, 0, 1))) + np.array_equal(x.item(1, 0, 1), + x.numpy().item(1, 0, 1))) x = paddle.to_tensor([[1.111111, 2.222222, 3.333333]]) self.assertEqual(x.item(0, 2), x.item(2)) @@ -402,18 +410,16 @@ class TestVarBase(unittest.TestCase): y = x + 1 self.assertTrue(y.is_leaf) - x = paddle.to_tensor( - np.random.uniform( - -1, 1, size=[10, 10]), stop_gradient=False) + x = paddle.to_tensor(np.random.uniform(-1, 1, size=[10, 10]), + stop_gradient=False) self.assertTrue(x.is_leaf) y = x + 1 self.assertFalse(y.is_leaf) linear = paddle.nn.Linear(10, 10) - input = paddle.to_tensor( - np.random.uniform( - -1, 1, size=[10, 10]).astype('float32'), - stop_gradient=False) + input = paddle.to_tensor(np.random.uniform( + -1, 1, size=[10, 10]).astype('float32'), + stop_gradient=False) self.assertTrue(input.is_leaf) out = linear(input) @@ -537,8 +543,8 @@ class TestVarBase(unittest.TestCase): core.VarDesc.VarType.SELECTED_ROWS, True) selected_rows = x.value().get_selected_rows() - selected_rows.get_tensor().set( - np.random.rand(3, 100), core.CPUPlace()) + selected_rows.get_tensor().set(np.random.rand(3, 100), + core.CPUPlace()) selected_rows.set_height(10) selected_rows.set_rows([3, 5, 7]) x_copy = copy.deepcopy(x) @@ -553,9 +559,8 @@ class TestVarBase(unittest.TestCase): selected_rows.height()) self.assertEqual(copy_selected_rows.rows(), selected_rows.rows()) self.assertTrue( - np.array_equal( - np.array(copy_selected_rows.get_tensor()), - np.array(selected_rows.get_tensor()))) + np.array_equal(np.array(copy_selected_rows.get_tensor()), + np.array(selected_rows.get_tensor()))) def test_deep_copy(self): with _test_eager_guard(): @@ -692,10 +697,10 @@ class TestVarBase(unittest.TestCase): nw = w[:, :, :-1] self.assertEqual((784, 100, 99), tuple(nw.shape)) - tensor_array = np.array( - [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], - [[10, 11, 12], [13, 14, 15], [16, 17, 18]], - [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32') + tensor_array = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], + [25, 26, 27]]]).astype('float32') var = fluid.dygraph.to_variable(tensor_array) var1 = var[0, 1, 1] var2 = var[1:] @@ -752,10 +757,10 @@ class TestVarBase(unittest.TestCase): self.assertTrue(np.array_equal(local_out[18], tensor_array[:, 1:1:2])) def _test_slice_for_tensor_attr(self): - tensor_array = np.array( - [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], - [[10, 11, 12], [13, 14, 15], [16, 17, 18]], - [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32') + tensor_array = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], + [25, 26, 27]]]).astype('float32') var = paddle.to_tensor(tensor_array) @@ -885,14 +890,14 @@ class TestVarBase(unittest.TestCase): self.assertTrue(np.array_equal(var[5], np_value[None, 2, 0, ...])) self.assertTrue(np.array_equal(var[6], np_value[None, 2, None, 1])) self.assertTrue(np.array_equal(var[7], np_value[None])) - self.assertTrue( - np.array_equal(var[8], np_value[0, 0, None, 0, 0, None])) + self.assertTrue(np.array_equal(var[8], np_value[0, 0, None, 0, 0, + None])) self.assertTrue( np.array_equal(var[9], np_value[None, None, 0, ..., None])) self.assertTrue(np.array_equal(var[10], np_value[..., None, :, None])) - # TODO(zyfncg) there is a bug of dimensions when slice step > 1 and - # indexs has int type + # TODO(zyfncg) there is a bug of dimensions when slice step > 1 and + # indexs has int type # self.assertTrue( # np.array_equal(var[11], np_value[0, 1:10:2, None, None, ...])) @@ -921,11 +926,11 @@ class TestVarBase(unittest.TestCase): self.assertTrue(np.array_equal(var[5], np_value[index2d])) self.assertTrue(np.array_equal(var[6], np_value[index[4]])) self.assertTrue( - np.array_equal(var_tensor[var_tensor > 0.67], np_value[np_value > - 0.67])) + np.array_equal(var_tensor[var_tensor > 0.67], + np_value[np_value > 0.67])) self.assertTrue( - np.array_equal(var_tensor[var_tensor < 0.55], np_value[np_value < - 0.55])) + np.array_equal(var_tensor[var_tensor < 0.55], + np_value[np_value < 0.55])) with self.assertRaises(ValueError): var_tensor[[False, False, False, False]] @@ -942,7 +947,9 @@ class TestVarBase(unittest.TestCase): var_tensor = paddle.to_tensor(np_value) index = [True] tensor_index = paddle.to_tensor(index) - var = [var_tensor[tensor_index].numpy(), ] + var = [ + var_tensor[tensor_index].numpy(), + ] self.assertTrue(np.array_equal(var[0], np_value[index])) def _test_for_var(self): @@ -957,8 +964,9 @@ class TestVarBase(unittest.TestCase): t = paddle.to_tensor(array) self.assertTrue(np.array_equal(t[np.longlong(0)].numpy(), array[0])) self.assertTrue( - np.array_equal(t[np.longlong(0):np.longlong(4):np.longlong(2)] - .numpy(), array[0:4:2])) + np.array_equal( + t[np.longlong(0):np.longlong(4):np.longlong(2)].numpy(), + array[0:4:2])) self.assertTrue(np.array_equal(t[np.int64(0)].numpy(), array[0])) self.assertTrue( np.array_equal(t[np.int32(1):np.int32(4):np.int32(2)].numpy(), @@ -1039,9 +1047,7 @@ class TestVarBase(unittest.TestCase): var = fluid.dygraph.to_variable(self.array) self.assertTrue(np.array_equal(var.numpy(), np.array(var))) self.assertTrue( - np.array_equal( - var.numpy(), np.array( - var, dtype=np.float32))) + np.array_equal(var.numpy(), np.array(var, dtype=np.float32))) def test_var_base_as_np(self): with _test_eager_guard(): @@ -1106,8 +1112,8 @@ class TestVarBase(unittest.TestCase): self.assertTrue(static_var.persistable, True) if isinstance(var_base, fluid.framework.ParamBase): for attr in ['trainable', 'is_distributed', 'do_model_average']: - self.assertEqual( - getattr(var_base, attr), getattr(static_var, attr)) + self.assertEqual(getattr(var_base, attr), + getattr(static_var, attr)) self.assertEqual(static_var.optimize_attr['learning_rate'], 0.001) @@ -1214,8 +1220,10 @@ class TestVarBase(unittest.TestCase): paddle.disable_static(paddle.CPUPlace()) paddle.seed(2021) x = paddle.rand([128]) - paddle.set_printoptions( - precision=4, threshold=1000, edgeitems=3, linewidth=80) + paddle.set_printoptions(precision=4, + threshold=1000, + edgeitems=3, + linewidth=80) a_str = str(x) expected = '''Tensor(shape=[128], dtype=float32, place=Place(cpu), stop_gradient=True, @@ -1308,6 +1316,7 @@ class TestVarBase(unittest.TestCase): class TestVarBaseSetitem(unittest.TestCase): + def func_setUp(self): self.set_dtype() self.tensor_x = paddle.to_tensor(np.ones((4, 2, 3)).astype(self.dtype)) @@ -1379,11 +1388,13 @@ class TestVarBaseSetitem(unittest.TestCase): class TestVarBaseSetitemInt64(TestVarBaseSetitem): + def set_dtype(self): self.dtype = "int64" class TestVarBaseSetitemFp32(TestVarBaseSetitem): + def set_dtype(self): self.dtype = "float32" @@ -1400,11 +1411,13 @@ class TestVarBaseSetitemFp32(TestVarBaseSetitem): class TestVarBaseSetitemFp64(TestVarBaseSetitem): + def set_dtype(self): self.dtype = "float64" class TestVarBaseSetitemBoolIndex(unittest.TestCase): + def func_setUp(self): paddle.disable_static() self.set_dtype() @@ -1483,6 +1496,7 @@ class TestVarBaseSetitemBoolIndex(unittest.TestCase): class TestVarBaseSetitemBoolScalarIndex(unittest.TestCase): + def set_input(self): self.tensor_x = paddle.to_tensor(np.ones((1, 2, 3)).astype(self.dtype)) self.np_value = np.random.random((2, 3)).astype(self.dtype) @@ -1508,6 +1522,7 @@ class TestVarBaseSetitemBoolScalarIndex(unittest.TestCase): class TestVarBaseInplaceVersion(unittest.TestCase): + def func_test_setitem(self): paddle.disable_static() @@ -1543,6 +1558,7 @@ class TestVarBaseInplaceVersion(unittest.TestCase): class TestVarBaseSlice(unittest.TestCase): + def func_test_slice(self): paddle.disable_static() np_x = np.random.random((3, 8, 8)) @@ -1558,6 +1574,7 @@ class TestVarBaseSlice(unittest.TestCase): class TestVarBaseClear(unittest.TestCase): + def func_test_clear(self): paddle.disable_static() np_x = np.random.random((3, 8, 8)) @@ -1572,6 +1589,7 @@ class TestVarBaseClear(unittest.TestCase): class TestVarBaseOffset(unittest.TestCase): + def func_offset(self): paddle.disable_static() np_x = np.random.random((3, 8, 8)) @@ -1588,6 +1606,7 @@ class TestVarBaseOffset(unittest.TestCase): class TestVarBaseShareBufferTo(unittest.TestCase): + def func_test_share_buffer_To(self): paddle.disable_static() np_src = np.random.random((3, 8, 8)) @@ -1607,6 +1626,7 @@ class TestVarBaseShareBufferTo(unittest.TestCase): class TestVarBaseTo(unittest.TestCase): + def func_setUp(self): paddle.disable_static() self.np_x = np.random.random((3, 8, 8)) @@ -1668,6 +1688,7 @@ class TestVarBaseTo(unittest.TestCase): class TestVarBaseInitVarBaseFromTensorWithDevice(unittest.TestCase): + def func_test_varbase_init(self): paddle.disable_static() t = fluid.Tensor() @@ -1697,6 +1718,7 @@ class TestVarBaseInitVarBaseFromTensorWithDevice(unittest.TestCase): class TestVarBaseNumel(unittest.TestCase): + def func_test_numel_normal(self): paddle.disable_static() np_x = np.random.random((3, 8, 8)) @@ -1726,6 +1748,7 @@ class TestVarBaseNumel(unittest.TestCase): class TestVarBaseCopyGradientFrom(unittest.TestCase): + def func_test_copy_gradient_from(self): paddle.disable_static() np_x = np.random.random((2, 2)) @@ -1744,6 +1767,7 @@ class TestVarBaseCopyGradientFrom(unittest.TestCase): class TestEagerTensorGradNameValue(unittest.TestCase): + def test_eager_tensor_grad_name_value(self): with _test_eager_guard(): a_np = np.array([2, 3]).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_var_conv_2d.py b/python/paddle/fluid/tests/unittests/test_var_conv_2d.py index 4e23b205811..9fd1e8573f8 100644 --- a/python/paddle/fluid/tests/unittests/test_var_conv_2d.py +++ b/python/paddle/fluid/tests/unittests/test_var_conv_2d.py @@ -20,6 +20,7 @@ from op_test import OpTest, skip_check_grad_ci class TestVarConv2DOp(OpTest): + def setUp(self): self.init_op_type() self.set_data() @@ -175,11 +176,14 @@ class TestVarConv2DOp(OpTest): self.check_output(check_dygraph=False) def test_check_grad(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.005, check_dygraph=False) + self.check_grad(['X'], + 'Out', + max_relative_error=0.005, + check_dygraph=False) class TestVarConv2DOpCase1(TestVarConv2DOp): + def set_data(self): # set in_ch 1 input_channel = 1 @@ -193,6 +197,7 @@ class TestVarConv2DOpCase1(TestVarConv2DOp): class TestVarConv2DOpCase2(TestVarConv2DOp): + def set_data(self): # set out_ch 1 input_channel = 2 @@ -206,6 +211,7 @@ class TestVarConv2DOpCase2(TestVarConv2DOp): class TestVarConv2DOpCase3(TestVarConv2DOp): + def set_data(self): # set batch 1 input_channel = 2 @@ -219,6 +225,7 @@ class TestVarConv2DOpCase3(TestVarConv2DOp): class TestVarConv2DOpCase4(TestVarConv2DOp): + def set_data(self): # set filter size very large input_channel = 3 @@ -232,6 +239,7 @@ class TestVarConv2DOpCase4(TestVarConv2DOp): class TestVarConv2DOpCase5(TestVarConv2DOp): + def set_data(self): # set input very small input_channel = 50 @@ -245,9 +253,11 @@ class TestVarConv2DOpCase5(TestVarConv2DOp): @skip_check_grad_ci( - reason="[skip shape check] Use shape of input_channel, row and col all is 1 to test special LoDTensor." + reason= + "[skip shape check] Use shape of input_channel, row and col all is 1 to test special LoDTensor." ) class TestVarConv2DOpCase6(TestVarConv2DOp): + def set_data(self): input_channel = 1 output_channel = 3 @@ -260,6 +270,7 @@ class TestVarConv2DOpCase6(TestVarConv2DOp): class TestVarConv2DOpCase7(TestVarConv2DOp): + def set_data(self): input_channel = 2 output_channel = 3 @@ -272,20 +283,20 @@ class TestVarConv2DOpCase7(TestVarConv2DOp): class TestVarConv2DApi(unittest.TestCase): + def test_api(self): import paddle.fluid as fluid x = fluid.layers.data(name='x', shape=[1], lod_level=1) row = fluid.layers.data(name='row', shape=[6], lod_level=1) col = fluid.layers.data(name='col', shape=[6], lod_level=1) - out = fluid.contrib.var_conv_2d( - input=x, - row=row, - col=col, - input_channel=3, - output_channel=5, - filter_size=[3, 3], - stride=1) + out = fluid.contrib.var_conv_2d(input=x, + row=row, + col=col, + input_channel=3, + output_channel=5, + filter_size=[3, 3], + stride=1) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( @@ -297,12 +308,13 @@ class TestVarConv2DApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run( - feed={'x': x_tensor, - 'row': row_tensor, - 'col': col_tensor}, - fetch_list=[out], - return_numpy=False) + ret = exe.run(feed={ + 'x': x_tensor, + 'row': row_tensor, + 'col': col_tensor + }, + fetch_list=[out], + return_numpy=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py index 3a924669b00..87802b83415 100644 --- a/python/paddle/fluid/tests/unittests/test_variable.py +++ b/python/paddle/fluid/tests/unittests/test_variable.py @@ -29,6 +29,7 @@ paddle.enable_static() class TestVariable(unittest.TestCase): + def test_np_dtype_convert(self): DT = core.VarDesc.VarType convert = convert_np_dtype_to_dtype_ @@ -44,8 +45,10 @@ class TestVariable(unittest.TestCase): def test_var(self): b = default_main_program().current_block() - w = b.create_var( - dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") + w = b.create_var(dtype="float64", + shape=[784, 100], + lod_level=0, + name="fc.w") self.assertNotEqual(str(w), "") self.assertEqual(core.VarDesc.VarType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) @@ -63,10 +66,9 @@ class TestVariable(unittest.TestCase): self.assertRaises(ValueError, lambda: b.create_var(name="fc.w", shape=(24, 100))) - w = b.create_var( - dtype=paddle.fluid.core.VarDesc.VarType.STRINGS, - shape=[1], - name="str_var") + w = b.create_var(dtype=paddle.fluid.core.VarDesc.VarType.STRINGS, + shape=[1], + name="str_var") self.assertEqual(None, w.lod_level) def test_element_size(self): @@ -101,8 +103,8 @@ class TestVariable(unittest.TestCase): def test_step_scopes(self): prog = Program() b = prog.current_block() - var = b.create_var( - name='step_scopes', type=core.VarDesc.VarType.STEP_SCOPES) + var = b.create_var(name='step_scopes', + type=core.VarDesc.VarType.STEP_SCOPES) self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type) def _test_slice(self, place): @@ -135,10 +137,10 @@ class TestVariable(unittest.TestCase): main = fluid.Program() with fluid.program_guard(main): exe = fluid.Executor(place) - tensor_array = np.array( - [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], - [[10, 11, 12], [13, 14, 15], [16, 17, 18]], - [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32') + tensor_array = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], + [25, 26, 27]]]).astype('float32') var = fluid.layers.assign(tensor_array) var1 = var[0, 1, 1] var2 = var[1:] @@ -276,7 +278,8 @@ class TestVariable(unittest.TestCase): expected = [ data[0:, ..., 1:], data[0:, ...], data[..., 1:], data[...], - data[[1, 0], [0, 0]], data[([1, 0], [0, 0])], np.array([1]) + data[[1, 0], [0, 0]], data[([1, 0], [0, 0])], + np.array([1]) ] self.assertTrue((result[0] == expected[0]).all()) @@ -413,12 +416,11 @@ class TestVariable(unittest.TestCase): def test_create_selected_rows(self): b = default_main_program().current_block() - var = b.create_var( - name="var", - shape=[1, 1], - dtype="float32", - type=fluid.core.VarDesc.VarType.SELECTED_ROWS, - persistable=True) + var = b.create_var(name="var", + shape=[1, 1], + dtype="float32", + type=fluid.core.VarDesc.VarType.SELECTED_ROWS, + persistable=True) def _test(): var.lod_level() @@ -453,8 +455,9 @@ class TestVariable(unittest.TestCase): scope = fluid.core.Scope() with paddle.static.scope_guard(scope): with paddle.static.program_guard(main, startup): - x = paddle.static.data( - name='x', shape=[3, 2, 1], dtype='float32') + x = paddle.static.data(name='x', + shape=[3, 2, 1], + dtype='float32') x.persistable = True feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32) detach_x = x.detach() @@ -472,8 +475,9 @@ class TestVariable(unittest.TestCase): self.assertTrue((result[1] == modified_value).all()) self.assertTrue((result[0] == result[1]).all()) - modified_value = np.random.uniform( - -1, 1, size=[3, 2, 1]).astype('float32') + modified_value = np.random.uniform(-1, 1, + size=[3, 2, + 1]).astype('float32') x.set_value(modified_value, scope) result = exe.run(main, fetch_list=[x, detach_x]) self.assertTrue((result[1] == modified_value).all()) @@ -481,6 +485,7 @@ class TestVariable(unittest.TestCase): class TestVariableSlice(unittest.TestCase): + def _test_item_none(self, place): data = np.random.rand(2, 3, 4).astype("float32") prog = paddle.static.Program() @@ -539,6 +544,7 @@ class TestVariableSlice(unittest.TestCase): class TestListIndex(unittest.TestCase): + def numel(self, shape): return reduce(lambda x, y: x * y, shape) @@ -546,8 +552,8 @@ class TestListIndex(unittest.TestCase): paddle.enable_static() inps_shape = [3, 4, 5, 2] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [3, 3, 2, 1] index = np.arange(self.numel(index_shape)).reshape(index_shape) @@ -558,8 +564,9 @@ class TestListIndex(unittest.TestCase): index_mod = (index % (array.shape[0])).tolist() with paddle.static.program_guard(program): - x = paddle.static.data( - name='x', shape=array.shape, dtype='float32') + x = paddle.static.data(name='x', + shape=array.shape, + dtype='float32') y = x[index_mod] @@ -612,16 +619,16 @@ class TestListIndex(unittest.TestCase): def test_static_graph_list_index_muti_dim(self): paddle.enable_static() inps_shape = [3, 4, 5] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [2, 2] index1 = np.arange(self.numel(index_shape)).reshape(index_shape) index2 = np.arange(self.numel(index_shape)).reshape(index_shape) + 2 value_shape = [3, 2, 2, 3] - value_np = np.arange( - self.numel(value_shape), dtype='float32').reshape(value_shape) + 100 + value_np = np.arange(self.numel(value_shape), + dtype='float32').reshape(value_shape) + 100 index_mod1 = (index1 % (min(array.shape))).tolist() index_mod2 = (index2 % (min(array.shape))).tolist() @@ -631,12 +638,15 @@ class TestListIndex(unittest.TestCase): x = paddle.static.data(name='x', shape=array.shape, dtype='float32') - value = paddle.static.data( - name='value', shape=value_np.shape, dtype='float32') - index1 = paddle.static.data( - name='index1', shape=index1.shape, dtype='int32') - index2 = paddle.static.data( - name='index2', shape=index2.shape, dtype='int32') + value = paddle.static.data(name='value', + shape=value_np.shape, + dtype='float32') + index1 = paddle.static.data(name='index1', + shape=index1.shape, + dtype='int32') + index2 = paddle.static.data(name='index2', + shape=index2.shape, + dtype='int32') y = x[index1, index2] @@ -661,23 +671,23 @@ class TestListIndex(unittest.TestCase): }, fetch_list=fetch_list) - self.assertTrue( - np.array_equal(y2, getitem_pp[0]), - msg='\n numpy:{},\n paddle:{}'.format(y2, getitem_pp[0])) + self.assertTrue(np.array_equal(y2, getitem_pp[0]), + msg='\n numpy:{},\n paddle:{}'.format( + y2, getitem_pp[0])) def test_dygraph_list_index_muti_dim(self): paddle.disable_static() inps_shape = [3, 4, 5] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [2, 2] index1 = np.arange(self.numel(index_shape)).reshape(index_shape) index2 = np.arange(self.numel(index_shape)).reshape(index_shape) + 2 value_shape = [3, 2, 2, 3] - value_np = np.arange( - self.numel(value_shape), dtype='float32').reshape(value_shape) + 100 + value_np = np.arange(self.numel(value_shape), + dtype='float32').reshape(value_shape) + 100 index_mod1 = (index1 % (min(array.shape))).tolist() index_mod2 = (index2 % (min(array.shape))).tolist() @@ -714,9 +724,9 @@ class TestListIndex(unittest.TestCase): getitem_pp = exe.run(prog, feed={x.name: array}, fetch_list=fetch_list) print(getitem_pp) - self.assertTrue( - np.array_equal(value_np, getitem_pp[0]), - msg='\n numpy:{},\n paddle:{}'.format(value_np, getitem_pp[0])) + self.assertTrue(np.array_equal(value_np, getitem_pp[0]), + msg='\n numpy:{},\n paddle:{}'.format( + value_np, getitem_pp[0])) def test_static_graph_getitem_bool_index(self): paddle.enable_static() @@ -748,8 +758,9 @@ class TestListIndex(unittest.TestCase): def run_setitem_list_index(self, array, index, value_np): x = paddle.static.data(name='x', shape=array.shape, dtype='float32') - value = paddle.static.data( - name='value', shape=value_np.shape, dtype='float32') + value = paddle.static.data(name='value', + shape=value_np.shape, + dtype='float32') x[index] = value y = x @@ -766,34 +777,37 @@ class TestListIndex(unittest.TestCase): array2[index] = value_np except: with self.assertRaises(ValueError): - setitem_pp = exe.run( - prog, - feed={x.name: array, - value.name: value_np}, - fetch_list=fetch_list) + setitem_pp = exe.run(prog, + feed={ + x.name: array, + value.name: value_np + }, + fetch_list=fetch_list) return setitem_pp = exe.run(prog, - feed={x.name: array, - value.name: value_np}, + feed={ + x.name: array, + value.name: value_np + }, fetch_list=fetch_list) - self.assertTrue( - np.allclose(array2, setitem_pp[0]), - msg='\n numpy:{},\n paddle:{}'.format(array2, setitem_pp[0])) + self.assertTrue(np.allclose(array2, setitem_pp[0]), + msg='\n numpy:{},\n paddle:{}'.format( + array2, setitem_pp[0])) def test_static_graph_setitem_list_index(self): paddle.enable_static() # case 1: inps_shape = [3, 4, 5, 2, 3] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [3, 3, 1, 2] index = np.arange(self.numel(index_shape)).reshape(index_shape) value_shape = inps_shape[3:] - value_np = np.arange( - self.numel(value_shape), dtype='float32').reshape(value_shape) + 100 + value_np = np.arange(self.numel(value_shape), + dtype='float32').reshape(value_shape) + 100 for _ in range(3): program = paddle.static.Program() @@ -808,15 +822,15 @@ class TestListIndex(unittest.TestCase): # case 2: inps_shape = [3, 4, 5, 4, 3] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [4, 3, 2, 2] index = np.arange(self.numel(index_shape)).reshape(index_shape) value_shape = [3] - value_np = np.arange( - self.numel(value_shape), dtype='float32').reshape(value_shape) + 100 + value_np = np.arange(self.numel(value_shape), + dtype='float32').reshape(value_shape) + 100 for _ in range(4): program = paddle.static.Program() @@ -830,15 +844,15 @@ class TestListIndex(unittest.TestCase): # case 3: inps_shape = [3, 4, 5, 3, 3] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [4, 3, 2, 2] index = np.arange(self.numel(index_shape)).reshape(index_shape) value_shape = [3, 2, 2, 3] - value_np = np.arange( - self.numel(value_shape), dtype='float32').reshape(value_shape) + 100 + value_np = np.arange(self.numel(value_shape), + dtype='float32').reshape(value_shape) + 100 index_mod = (index % (min(array.shape))).tolist() self.run_setitem_list_index(array, index_mod, value_np) @@ -881,18 +895,18 @@ class TestListIndex(unittest.TestCase): def test_static_graph_tensor_index_setitem_muti_dim(self): paddle.enable_static() inps_shape = [3, 4, 5, 4] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [2, 3, 4] - index1 = np.arange( - self.numel(index_shape), dtype='int32').reshape(index_shape) - index2 = np.arange( - self.numel(index_shape), dtype='int32').reshape(index_shape) + 2 + index1 = np.arange(self.numel(index_shape), + dtype='int32').reshape(index_shape) + index2 = np.arange(self.numel(index_shape), + dtype='int32').reshape(index_shape) + 2 value_shape = [4] - value_np = np.arange( - self.numel(value_shape), dtype='float32').reshape(value_shape) + 100 + value_np = np.arange(self.numel(value_shape), + dtype='float32').reshape(value_shape) + 100 for _ in range(3): index_mod1 = index1 % (min(array.shape)) @@ -906,17 +920,22 @@ class TestListIndex(unittest.TestCase): program = paddle.static.Program() with paddle.static.program_guard(program): - x1 = paddle.static.data( - name='x1', shape=array.shape, dtype='float32') - x2 = paddle.static.data( - name='x2', shape=array.shape, dtype='float32') - - value = paddle.static.data( - name='value', shape=value_np.shape, dtype='float32') - index_1 = paddle.static.data( - name='index_1', shape=index1.shape, dtype='int32') - index_2 = paddle.static.data( - name='index_2', shape=index2.shape, dtype='int32') + x1 = paddle.static.data(name='x1', + shape=array.shape, + dtype='float32') + x2 = paddle.static.data(name='x2', + shape=array.shape, + dtype='float32') + + value = paddle.static.data(name='value', + shape=value_np.shape, + dtype='float32') + index_1 = paddle.static.data(name='index_1', + shape=index1.shape, + dtype='int32') + index_2 = paddle.static.data(name='index_2', + shape=index2.shape, + dtype='int32') x1[index_1, index_2] = value x2[index_1] = value @@ -940,14 +959,12 @@ class TestListIndex(unittest.TestCase): index_2.name: index_mod2 }, fetch_list=fetch_list) - self.assertTrue( - np.array_equal(array2, setitem_pp[0]), - msg='\n numpy:{},\n paddle:{}'.format(array2, - setitem_pp[0])) - self.assertTrue( - np.array_equal(array3, setitem_pp[1]), - msg='\n numpy:{},\n paddle:{}'.format(array3, - setitem_pp[1])) + self.assertTrue(np.array_equal(array2, setitem_pp[0]), + msg='\n numpy:{},\n paddle:{}'.format( + array2, setitem_pp[0])) + self.assertTrue(np.array_equal(array3, setitem_pp[1]), + msg='\n numpy:{},\n paddle:{}'.format( + array3, setitem_pp[1])) array = array[0] index1 = index1[0] index2 = index2[0] @@ -955,14 +972,14 @@ class TestListIndex(unittest.TestCase): def test_static_graph_array_index_muti_dim(self): paddle.enable_static() inps_shape = [3, 4, 5, 4] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [2, 3, 4] - index1 = np.arange( - self.numel(index_shape), dtype='int32').reshape(index_shape) - index2 = np.arange( - self.numel(index_shape), dtype='int32').reshape(index_shape) + 2 + index1 = np.arange(self.numel(index_shape), + dtype='int32').reshape(index_shape) + index2 = np.arange(self.numel(index_shape), + dtype='int32').reshape(index_shape) + 2 for _ in range(3): index_mod1 = index1 % (min(array.shape)) @@ -978,10 +995,12 @@ class TestListIndex(unittest.TestCase): program = paddle.static.Program() with paddle.static.program_guard(program): - x1 = paddle.static.data( - name='x1', shape=array.shape, dtype='float32') - x2 = paddle.static.data( - name='x2', shape=array.shape, dtype='float32') + x1 = paddle.static.data(name='x1', + shape=array.shape, + dtype='float32') + x2 = paddle.static.data(name='x2', + shape=array.shape, + dtype='float32') x1[index_mod1, index_mod2] = 1 x2[index_mod1] = 2.5 @@ -997,24 +1016,24 @@ class TestListIndex(unittest.TestCase): fetch_list = [x1.name, x2.name, y1.name, y2.name] setitem_pp = exe.run(prog, - feed={x1.name: array, - x2.name: array}, + feed={ + x1.name: array, + x2.name: array + }, fetch_list=fetch_list) - self.assertTrue( - np.array_equal(array2, setitem_pp[0]), - msg='\n numpy:{},\n paddle:{}'.format(array2, - setitem_pp[0])) - self.assertTrue( - np.array_equal(array3, setitem_pp[1]), - msg='\n numpy:{},\n paddle:{}'.format(array3, - setitem_pp[1])) - - self.assertTrue( - np.array_equal(y_np1, setitem_pp[2]), - msg='\n numpy:{},\n paddle:{}'.format(y_np1, setitem_pp[2])) - self.assertTrue( - np.array_equal(y_np2, setitem_pp[3]), - msg='\n numpy:{},\n paddle:{}'.format(y_np2, setitem_pp[3])) + self.assertTrue(np.array_equal(array2, setitem_pp[0]), + msg='\n numpy:{},\n paddle:{}'.format( + array2, setitem_pp[0])) + self.assertTrue(np.array_equal(array3, setitem_pp[1]), + msg='\n numpy:{},\n paddle:{}'.format( + array3, setitem_pp[1])) + + self.assertTrue(np.array_equal(y_np1, setitem_pp[2]), + msg='\n numpy:{},\n paddle:{}'.format( + y_np1, setitem_pp[2])) + self.assertTrue(np.array_equal(y_np2, setitem_pp[3]), + msg='\n numpy:{},\n paddle:{}'.format( + y_np2, setitem_pp[3])) array = array[0] index1 = index1[0] index2 = index2[0] @@ -1022,13 +1041,13 @@ class TestListIndex(unittest.TestCase): def test_dygraph_array_index_muti_dim(self): paddle.disable_static() inps_shape = [3, 4, 5, 4] - array = np.arange( - self.numel(inps_shape), dtype='float32').reshape(inps_shape) + array = np.arange(self.numel(inps_shape), + dtype='float32').reshape(inps_shape) index_shape = [2, 3, 4] - index1 = np.arange( - self.numel(index_shape), dtype='int32').reshape(index_shape) - index2 = np.arange( - self.numel(index_shape), dtype='int32').reshape(index_shape) + 2 + index1 = np.arange(self.numel(index_shape), + dtype='int32').reshape(index_shape) + index2 = np.arange(self.numel(index_shape), + dtype='int32').reshape(index_shape) + 2 for _ in range(3): @@ -1043,26 +1062,26 @@ class TestListIndex(unittest.TestCase): y_t1 = tensor1[index_mod_t2, index_mod_t1] - self.assertTrue( - np.array_equal(y_t1.numpy(), y_np1), - msg='\n numpy:{},\n paddle:{}'.format(y_np1, y_t1.numpy())) + self.assertTrue(np.array_equal(y_t1.numpy(), y_np1), + msg='\n numpy:{},\n paddle:{}'.format( + y_np1, y_t1.numpy())) # 1 dim getitem array2 = array.copy() y_np2 = array2[index_mod2] tensor2 = paddle.to_tensor(array) y_t2 = tensor2[index_mod_t2] - self.assertTrue( - np.array_equal(y_t2.numpy(), y_np2), - msg='\n numpy:{},\n paddle:{}'.format(y_np2, y_t2.numpy())) + self.assertTrue(np.array_equal(y_t2.numpy(), y_np2), + msg='\n numpy:{},\n paddle:{}'.format( + y_np2, y_t2.numpy())) # 2 dim setitem array1 = array.copy() array1[index_mod1, index_mod2] = 1 tensor1[index_mod_t1, index_mod_t2] = 1 - self.assertTrue( - np.array_equal(tensor1.numpy(), array1), - msg='\n numpy:{},\n paddle:{}'.format(array1, tensor1.numpy())) + self.assertTrue(np.array_equal(tensor1.numpy(), array1), + msg='\n numpy:{},\n paddle:{}'.format( + array1, tensor1.numpy())) # 1 dim setitem array2 = array.copy() @@ -1070,9 +1089,9 @@ class TestListIndex(unittest.TestCase): tensor2[index_mod_t1] = 2.5 - self.assertTrue( - np.array_equal(tensor2.numpy(), array2), - msg='\n numpy:{},\n paddle:{}'.format(array2, tensor2.numpy())) + self.assertTrue(np.array_equal(tensor2.numpy(), array2), + msg='\n numpy:{},\n paddle:{}'.format( + array2, tensor2.numpy())) array = array[0] index1 = index1[0] diff --git a/python/paddle/fluid/tests/unittests/test_variance_layer.py b/python/paddle/fluid/tests/unittests/test_variance_layer.py index 13e3cf4df11..cf46d82b11d 100644 --- a/python/paddle/fluid/tests/unittests/test_variance_layer.py +++ b/python/paddle/fluid/tests/unittests/test_variance_layer.py @@ -27,6 +27,7 @@ def ref_var(x, axis=None, unbiased=True, keepdim=False): class TestVarAPI(unittest.TestCase): + def setUp(self): self.dtype = 'float64' self.shape = [1, 3, 4, 10] @@ -67,36 +68,43 @@ class TestVarAPI(unittest.TestCase): class TestVarAPI_dtype(TestVarAPI): + def set_attrs(self): self.dtype = 'float32' class TestVarAPI_axis_int(TestVarAPI): + def set_attrs(self): self.axis = 2 class TestVarAPI_axis_list(TestVarAPI): + def set_attrs(self): self.axis = [1, 2] class TestVarAPI_axis_tuple(TestVarAPI): + def set_attrs(self): self.axis = (1, 3) class TestVarAPI_keepdim(TestVarAPI): + def set_attrs(self): self.keepdim = False class TestVarAPI_unbiased(TestVarAPI): + def set_attrs(self): self.unbiased = False class TestVarAPI_alias(unittest.TestCase): + def test_alias(self): paddle.disable_static() x = paddle.to_tensor(np.array([10, 12], 'float32')) @@ -109,6 +117,7 @@ class TestVarAPI_alias(unittest.TestCase): class TestVarError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.fluid.data('X', [2, 3, 4], 'int32') diff --git a/python/paddle/fluid/tests/unittests/test_version.py b/python/paddle/fluid/tests/unittests/test_version.py index 42a0e5c802c..d3128886686 100644 --- a/python/paddle/fluid/tests/unittests/test_version.py +++ b/python/paddle/fluid/tests/unittests/test_version.py @@ -21,6 +21,7 @@ import paddle.version as fluid_version class VersionTest(unittest.TestCase): + def setUp(self): self._major_regex = "[0-9]+" self._minor_regex = "[0-9]+" diff --git a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py index 0d4e379660b..a70d8e209b3 100644 --- a/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py +++ b/python/paddle/fluid/tests/unittests/test_view_op_reuse_allocation.py @@ -28,6 +28,7 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode # reuse the input varbase's allocation. # View APIs include: `squeeze`, `unsqueeze`, `reshape`, `flatten`, `detach` class TestDygraphViewReuseAllocation(unittest.TestCase): + def setUp(self): self.init_shape() @@ -104,6 +105,7 @@ class TestDygraphViewReuseAllocation(unittest.TestCase): class TestUnsqueezeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): + def init_shape(self): self.input_shape = [2, 3] self.output_shape = [2, 3, 1] @@ -113,6 +115,7 @@ class TestUnsqueezeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): class TestReshapeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): + def init_shape(self): self.input_shape = [3, 4] self.output_shape = [2, 2, 3] @@ -122,6 +125,7 @@ class TestReshapeDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): class TestFlattenDygraphViewReuseAllocation(TestDygraphViewReuseAllocation): + def init_shape(self): self.input_shape = [3, 4] self.output_shape = [12] diff --git a/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py b/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py index 163e246b715..7b789577771 100644 --- a/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py +++ b/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py @@ -14,10 +14,12 @@ import paddle.fluid as fluid from paddle.fluid import core import unittest import paddle + paddle.enable_static() class Decoder(object): + def __init__(self, transitions, use_tag=True): self.transitions = transitions self.use_tag = use_tag @@ -67,6 +69,7 @@ class Decoder(object): class TestViterbiOp(OpTest): + def set_attr(self): self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" self.use_tag = True @@ -87,7 +90,9 @@ class TestViterbiOp(OpTest): 'Transition': self.trans, 'Length': self.length } - self.attrs = {'include_bos_eos_tag': self.use_tag, } + self.attrs = { + 'include_bos_eos_tag': self.use_tag, + } self.outputs = {'Scores': scores, 'Path': path} def test_output(self): @@ -95,6 +100,7 @@ class TestViterbiOp(OpTest): class TestViterbiAPI(unittest.TestCase): + def set_attr(self): self.use_tag = True self.bz, self.len, self.ntags = 4, 8, 10 @@ -113,10 +119,12 @@ class TestViterbiAPI(unittest.TestCase): def check_static_result(self, place): bz, length, ntags = self.bz, self.len, self.ntags with fluid.program_guard(fluid.Program(), fluid.Program()): - Input = fluid.data( - name="Input", shape=[bz, length, ntags], dtype="float32") - Transition = fluid.data( - name="Transition", shape=[ntags, ntags], dtype="float32") + Input = fluid.data(name="Input", + shape=[bz, length, ntags], + dtype="float32") + Transition = fluid.data(name="Transition", + shape=[ntags, ntags], + dtype="float32") Length = fluid.data(name="Length", shape=[bz], dtype="int64") decoder = paddle.text.ViterbiDecoder(Transition, self.use_tag) score, path = decoder(Input, Length) diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 53f3b3cf53d..1d9d9a180d0 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -31,6 +31,7 @@ CUDA_BLOCK_SIZE = 32 class CTCForward(object): + def __init__(self, softmax, softmax_lod, labels, labels_lod, num_classes, batch_size, blank, norm_by_times): self.softmax = softmax @@ -115,15 +116,15 @@ class CTCForward(object): # calculate the forward and backward variables, # reference Chapter 7.3 of "Alex Grave, Supervised Sequence # Labelling with Recurrent Neural Networks" - log_acts = np.zeros( - [total_times, self.num_classes], dtype=softmax_a_sequence.dtype) + log_acts = np.zeros([total_times, self.num_classes], + dtype=softmax_a_sequence.dtype) for i in range(total_times): for j in range(self.num_classes): log_acts[i, j] = self.safe_log(softmax_a_sequence[i, j]) # calculate the forward variables - forward_vars = np.zeros( - [total_times, total_segments], dtype=softmax_a_sequence.dtype) + forward_vars = np.zeros([total_times, total_segments], + dtype=softmax_a_sequence.dtype) for i in range(total_times): for j in range(total_segments): forward_vars[i, j] = self.LOG_ZERO @@ -174,8 +175,8 @@ class CTCForward(object): labels_start_i = labels_offset labels_end_i = labels_offset + self.labels_lod[self.level][i] - softmax_a_sequence = self.softmax[softmax_start_i: - softmax_end_i, :] + softmax_a_sequence = self.softmax[ + softmax_start_i:softmax_end_i, :] labels_a_sequence = self.labels[labels_start_i:labels_end_i, :] self.loss[i] = self.forward_a_sequence(softmax_a_sequence, labels_a_sequence) @@ -191,6 +192,7 @@ class CTCForward(object): class TestWarpCTCOp(OpTest): + def config(self): self.batch_size = 4 self.num_classes = 12 @@ -208,10 +210,10 @@ class TestWarpCTCOp(OpTest): [sum(self.logits_lod[0]), self.num_classes]).astype("float32") softmax = np.apply_along_axis(stable_softmax, 1, logits) # labels should not be blank - labels = np.random.randint( - 0, - self.num_classes - 1, [sum(self.labels_lod[0]), 1], - dtype="int32") + labels = np.random.randint(0, + self.num_classes - 1, + [sum(self.labels_lod[0]), 1], + dtype="int32") ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, self.num_classes, self.batch_size, self.blank, @@ -242,20 +244,19 @@ class TestWarpCTCOp(OpTest): def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient if core.is_compiled_with_rocm(): - self.check_grad( - ["Logits"], - "Loss", - max_relative_error=0.009, - check_dygraph=False) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.009, + check_dygraph=False) else: - self.check_grad( - ["Logits"], - "Loss", - max_relative_error=0.007, - check_dygraph=False) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.007, + check_dygraph=False) class TestWarpCTCOpCase1(TestWarpCTCOp): + def config(self): self.batch_size = 4 self.num_classes = CUDA_BLOCK_SIZE + 2 @@ -266,6 +267,7 @@ class TestWarpCTCOpCase1(TestWarpCTCOp): class TestWarpCTCOpWithPadding(OpTest): + def config(self): self.batch_size = 4 self.num_classes = 8 @@ -285,10 +287,10 @@ class TestWarpCTCOpWithPadding(OpTest): [sum(self.logits_length), self.num_classes]).astype("float32") softmax = np.apply_along_axis(stable_softmax, 1, logits) # labels should not be blank - labels = np.random.randint( - 0, - self.num_classes - 1, [sum(self.labels_length), 1], - dtype="int32") + labels = np.random.randint(0, + self.num_classes - 1, + [sum(self.labels_length), 1], + dtype="int32") ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, self.num_classes, self.batch_size, self.blank, @@ -316,8 +318,8 @@ class TestWarpCTCOpWithPadding(OpTest): for i in range(self.batch_size): max_target_seq_length = max(max_target_seq_length, self.labels_length[i]) - new_labels = np.zeros( - [self.batch_size, max_target_seq_length], dtype="int32") + new_labels = np.zeros([self.batch_size, max_target_seq_length], + dtype="int32") cur = 0 for batch_id in range(self.batch_size): @@ -347,20 +349,19 @@ class TestWarpCTCOpWithPadding(OpTest): def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient if core.is_compiled_with_rocm(): - self.check_grad( - ["Logits"], - "Loss", - max_relative_error=0.009, - check_dygraph=False) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.009, + check_dygraph=False) else: - self.check_grad( - ["Logits"], - "Loss", - max_relative_error=0.007, - check_dygraph=False) + self.check_grad(["Logits"], + "Loss", + max_relative_error=0.007, + check_dygraph=False) class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding): + def config(self): self.batch_size = 4 self.num_classes = CUDA_BLOCK_SIZE + 2 @@ -373,6 +374,7 @@ class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding): class TestWarpCTCOpFp64(OpTest): + def config(self): self.batch_size = 4 self.num_classes = 8 @@ -392,10 +394,10 @@ class TestWarpCTCOpFp64(OpTest): [sum(self.logits_length), self.num_classes]).astype("float64") softmax = np.apply_along_axis(stable_softmax, 1, logits) # labels should not be blank - labels = np.random.randint( - 0, - self.num_classes - 1, [sum(self.labels_length), 1], - dtype="int32") + labels = np.random.randint(0, + self.num_classes - 1, + [sum(self.labels_length), 1], + dtype="int32") ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, self.num_classes, self.batch_size, self.blank, @@ -423,8 +425,8 @@ class TestWarpCTCOpFp64(OpTest): for i in range(self.batch_size): max_target_seq_length = max(max_target_seq_length, self.labels_length[i]) - new_labels = np.zeros( - [self.batch_size, max_target_seq_length], dtype="int32") + new_labels = np.zeros([self.batch_size, max_target_seq_length], + dtype="int32") cur = 0 for batch_id in range(self.batch_size): @@ -457,57 +459,58 @@ class TestWarpCTCOpFp64(OpTest): class TestWarpCTCOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): - logits = fluid.data( - name='logits', shape=[5, 16, 6], dtype='float32') - logits_length = fluid.data( - name='logits_length', shape=[None], dtype='int64') + logits = fluid.data(name='logits', + shape=[5, 16, 6], + dtype='float32') + logits_length = fluid.data(name='logits_length', + shape=[None], + dtype='int64') label = fluid.data(name='label', shape=[16, 3], dtype='int32') - label_length = fluid.data( - name='labels_length', shape=[None], dtype='int64') + label_length = fluid.data(name='labels_length', + shape=[None], + dtype='int64') def test_logits_Variable(): logits_data = np.random.rand(5, 16, 6).astype(logits.dtype) - fluid.layers.warpctc( - input=logits_data, - label=label, - input_length=logits_length, - label_length=label_length) + fluid.layers.warpctc(input=logits_data, + label=label, + input_length=logits_length, + label_length=label_length) self.assertRaises(TypeError, test_logits_Variable) def test_label_Variable(): label_data = np.random.randint(0, 5, [5, 1]).astype("int32") - fluid.layers.warpctc( - input=logits, - label=label_data, - input_length=logits_length, - label_length=label_length) + fluid.layers.warpctc(input=logits, + label=label_data, + input_length=logits_length, + label_length=label_length) self.assertRaises(TypeError, test_label_Variable) def test_logits_len_Variable(): logits_length_data = np.array([5] * 16).astype("int64") - fluid.layers.warpctc( - input=logits, - label=label, - input_length=logits_length_data, - label_length=label_length) + fluid.layers.warpctc(input=logits, + label=label, + input_length=logits_length_data, + label_length=label_length) self.assertRaises(TypeError, test_logits_len_Variable) def test_label_len_Variable(): label_length_data = np.array([3] * 16).astype("int64") - fluid.layers.warpctc( - input=logits, - label=label, - input_length=logits_length, - label_length=label_length_data) + fluid.layers.warpctc(input=logits, + label=label, + input_length=logits_length, + label_length=label_length_data) self.assertRaises(TypeError, test_label_len_Variable) def test_dygraph_errors(self): + def test_dygraph_with_lod(): logits = np.random.uniform(0.1, 1.0, [20, 15]).astype("float32") @@ -524,6 +527,7 @@ class TestWarpCTCOpError(unittest.TestCase): class TestCTCLossAPICase(unittest.TestCase): + def test_functinal_api(self): self.batch_size = 4 self.num_classes = CUDA_BLOCK_SIZE + 2 @@ -532,14 +536,16 @@ class TestCTCLossAPICase(unittest.TestCase): self.blank = self.num_classes - 1 self.norm_by_times = False - logits = np.random.uniform(0.1, 1.0, [ - max(self.logits_length), self.batch_size, self.num_classes - ]).astype("float32") + logits = np.random.uniform( + 0.1, 1.0, + [max(self.logits_length), self.batch_size, self.num_classes + ]).astype("float32") softmax = np.apply_along_axis(stable_softmax, -1, logits) # labels should not be blank labels = np.random.randint( 0, - self.num_classes - 1, [self.batch_size, max(self.labels_length)], + self.num_classes - 1, + [self.batch_size, max(self.labels_length)], dtype="int32") ctc = CTCForward(softmax, self.logits_length, labels, @@ -552,22 +558,20 @@ class TestCTCLossAPICase(unittest.TestCase): labels = paddle.to_tensor(labels) logits_length = paddle.to_tensor(self.logits_length) labels_length = paddle.to_tensor(self.labels_length) - loss_pd_mean = F.ctc_loss( - softmax, - labels, - logits_length, - labels_length, - blank=self.blank, - reduction='mean') + loss_pd_mean = F.ctc_loss(softmax, + labels, + logits_length, + labels_length, + blank=self.blank, + reduction='mean') loss_pd_mean = loss_pd_mean.numpy() - loss_pd_sum = F.ctc_loss( - softmax, - labels, - logits_length, - labels_length, - blank=self.blank, - reduction='sum') + loss_pd_sum = F.ctc_loss(softmax, + labels, + logits_length, + labels_length, + blank=self.blank, + reduction='sum') loss_pd_sum = loss_pd_sum.numpy() paddle.enable_static() loss_np = np.squeeze(loss_np, axis=-1) @@ -585,14 +589,16 @@ class TestCTCLossAPICase(unittest.TestCase): self.blank = 0 self.norm_by_times = False - logits = np.random.uniform(0.1, 1.0, [ - max(self.logits_length), self.batch_size, self.num_classes - ]).astype("float32") + logits = np.random.uniform( + 0.1, 1.0, + [max(self.logits_length), self.batch_size, self.num_classes + ]).astype("float32") softmax = np.apply_along_axis(stable_softmax, -1, logits) # labels should not be blank labels = np.random.randint( 1, - self.num_classes, [self.batch_size, max(self.labels_length)], + self.num_classes, + [self.batch_size, max(self.labels_length)], dtype="int32") ctc = CTCForward(softmax, self.logits_length, labels, @@ -606,8 +612,9 @@ class TestCTCLossAPICase(unittest.TestCase): logits_length = paddle.to_tensor(self.logits_length) labels_length = paddle.to_tensor(self.labels_length) - loss_pd = paddle.nn.CTCLoss(self.blank, 'none')( - softmax, labels, logits_length, labels_length) + loss_pd = paddle.nn.CTCLoss(self.blank, + 'none')(softmax, labels, logits_length, + labels_length) loss_pd = loss_pd.numpy() paddle.enable_static() loss_np = np.squeeze(loss_np, axis=-1) diff --git a/python/paddle/fluid/tests/unittests/test_weight_decay.py b/python/paddle/fluid/tests/unittests/test_weight_decay.py index 2a2ad0f6d03..b42bfb1a684 100644 --- a/python/paddle/fluid/tests/unittests/test_weight_decay.py +++ b/python/paddle/fluid/tests/unittests/test_weight_decay.py @@ -54,8 +54,9 @@ def bow_net(data, This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( - input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, + is_sparse=is_sparse, + size=[dict_dim, emb_dim]) bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') bow_tanh = fluid.layers.tanh(bow) fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") @@ -68,10 +69,11 @@ def bow_net(data, class TestWeightDecay(unittest.TestCase): + def setUp(self): self.word_dict = paddle.dataset.imdb.word_dict() - reader = paddle.batch( - paddle.dataset.imdb.train(self.word_dict), batch_size=4)() + reader = paddle.batch(paddle.dataset.imdb.train(self.word_dict), + batch_size=4)() self.train_data = [next(reader) for _ in range(5)] self.learning_rate = .5 @@ -111,11 +113,11 @@ class TestWeightDecay(unittest.TestCase): if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce build_strategy.memory_optimize = use_ir_memory_optimize - train_cp = compiler.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - loss_name=loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) + train_cp = compiler.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name, + exec_strategy=exec_strategy, + build_strategy=build_strategy) loss_set = [] for data in self.train_data: @@ -135,8 +137,10 @@ class TestWeightDecay(unittest.TestCase): startup_prog = fluid.framework.Program() startup_prog.random_seed = 1 with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog): - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) + data = fluid.layers.data(name="words", + shape=[1], + dtype="int64", + lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost = model(data, label, len(self.word_dict)) @@ -148,13 +152,14 @@ class TestWeightDecay(unittest.TestCase): optimizer.minimize(avg_cost) for params in param_list: - updated_p = fluid.layers.elementwise_sub( - x=params[0], y=params[1]) + updated_p = fluid.layers.elementwise_sub(x=params[0], + y=params[1]) fluid.layers.assign(input=updated_p, output=params[0]) if use_parallel_exe: - loss = self.run_parallel_exe( - place, [data, label], loss=avg_cost, use_reduce=use_reduce) + loss = self.run_parallel_exe(place, [data, label], + loss=avg_cost, + use_reduce=use_reduce) else: loss = self.run_executor(place, [data, label], loss=avg_cost) @@ -166,15 +171,16 @@ class TestWeightDecay(unittest.TestCase): loss = self.check_weight_decay(place, model, use_parallel_exe=False) # TODO(zcd): should test use_reduce=True - loss2 = self.check_weight_decay( - place, model, use_parallel_exe=True, use_reduce=False) + loss2 = self.check_weight_decay(place, + model, + use_parallel_exe=True, + use_reduce=False) for i in range(len(loss)): self.assertTrue( - np.isclose( - a=loss[i], b=loss2[i], rtol=5e-5), - "Expect " + str(loss[i]) + "\n" + "But Got" + str(loss2[i]) - + " in class " + self.__class__.__name__) + np.isclose(a=loss[i], b=loss2[i], rtol=5e-5), + "Expect " + str(loss[i]) + "\n" + "But Got" + + str(loss2[i]) + " in class " + self.__class__.__name__) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py index e990d8b2498..95cfe40084f 100644 --- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py +++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py @@ -34,8 +34,8 @@ class TestWeightNormalization(unittest.TestCase): @classmethod def set_program(cls): - data = fluid.layers.data( - name=cls.data_desc[0][0], shape=cls.data_desc[0][1]) + data = fluid.layers.data(name=cls.data_desc[0][0], + shape=cls.data_desc[0][1]) out = fluid.layers.fc(input=data, size=cls.hidden_size, param_attr=WeightNormParamAttr( @@ -82,8 +82,8 @@ class TestWeightNormalization(unittest.TestCase): if i == 0 else sum(lod_level_i)).tolist() data_lod.append(lod_level_i) data_value = numpy.random.random( - size=[sum(data_lod[-1]) if data_lod else self.batch_size - ] + data_shape).astype('float32') + size=[sum(data_lod[-1]) if data_lod else self.batch_size] + + data_shape).astype('float32') self.data[data_name] = (data_value, data_lod) def set_inputs(self, place): @@ -96,14 +96,15 @@ class TestWeightNormalization(unittest.TestCase): self.inputs[desc[0]] = tensor def weight_normalize(self): - v = numpy.ones((self.data[self.data_desc[0][0]][0].shape[-1], - self.hidden_size)) + v = numpy.ones( + (self.data[self.data_desc[0][0]][0].shape[-1], self.hidden_size)) g = numpy.linalg.norm(v, axis=None, keepdims=True) w = g * v / numpy.linalg.norm(v, axis=None, keepdims=True) x = self.data[self.data_desc[0][0]][0] out = numpy.dot(x, w) - g_grad = (numpy.dot(x.T, numpy.ones_like(out)) * (v / numpy.linalg.norm( - v, axis=None, keepdims=True))).sum(axis=None, keepdims=True) + g_grad = (numpy.dot(x.T, numpy.ones_like(out)) * + (v / numpy.linalg.norm(v, axis=None, keepdims=True))).sum( + axis=None, keepdims=True) return g, v, g_grad def test_weight_normalization(self): @@ -113,8 +114,7 @@ class TestWeightNormalization(unittest.TestCase): for actual_output in self.actual_outputs: [ self.assertTrue( - numpy.allclose( - numpy.array(actual), expect, atol=0.001)) + numpy.allclose(numpy.array(actual), expect, atol=0.001)) for expect, actual in zip(expect_output, actual_output) ] diff --git a/python/paddle/fluid/tests/unittests/test_where_index.py b/python/paddle/fluid/tests/unittests/test_where_index.py index 1c5705023b8..250bd3fa61f 100644 --- a/python/paddle/fluid/tests/unittests/test_where_index.py +++ b/python/paddle/fluid/tests/unittests/test_where_index.py @@ -24,6 +24,7 @@ from paddle.fluid import Program, program_guard class TestWhereIndexOp(OpTest): + def setUp(self): self.op_type = "where_index" self.init_config() @@ -32,12 +33,15 @@ class TestWhereIndexOp(OpTest): self.check_output() def init_config(self): - self.inputs = {'Condition': np.array([True, False, True]), } + self.inputs = { + 'Condition': np.array([True, False, True]), + } self.outputs = {'Out': np.array([[0], [2]], dtype='int64')} class TestAllFalse(unittest.TestCase): + def setUp(self): self.op_type = "where_index" self.init_config() @@ -69,28 +73,34 @@ class TestAllFalse(unittest.TestCase): class TestRank2(TestWhereIndexOp): + def init_config(self): - self.inputs = {'Condition': np.array([[True, False], [False, True]]), } + self.inputs = { + 'Condition': np.array([[True, False], [False, True]]), + } self.outputs = {'Out': np.array([[0, 0], [1, 1]], dtype='int64')} class TestRank3(TestWhereIndexOp): + def init_config(self): self.inputs = { - 'Condition': np.array([[[True, False], [False, True]], - [[False, True], [True, False]], - [[False, False], [False, True]]]), + 'Condition': + np.array([[[True, False], [False, True]], + [[False, True], [True, False]], + [[False, False], [False, True]]]), } self.outputs = { - 'Out': np.array( - [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [2, 1, 1]], - dtype='int64') + 'Out': + np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [2, 1, 1]], + dtype='int64') } class TestWhereOpError(unittest.TestCase): + def test_api(self): with program_guard(Program(), Program()): cond = fluid.layers.data(name='cond', shape=[4], dtype='bool') @@ -103,7 +113,9 @@ class TestWhereOpError(unittest.TestCase): class TestWhereRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.where([10]) diff --git a/python/paddle/fluid/tests/unittests/test_where_op.py b/python/paddle/fluid/tests/unittests/test_where_op.py index 36819e089ed..51cb380be84 100644 --- a/python/paddle/fluid/tests/unittests/test_where_op.py +++ b/python/paddle/fluid/tests/unittests/test_where_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ from paddle.fluid.framework import _test_eager_guard class TestWhereOp(OpTest): + def setUp(self): self.op_type = 'where' self.python_api = paddle.where @@ -47,6 +48,7 @@ class TestWhereOp(OpTest): class TestWhereOp2(TestWhereOp): + def init_config(self): self.x = np.random.uniform((-5), 5, (60, 2)).astype('float64') self.y = np.random.uniform((-5), 5, (60, 2)).astype('float64') @@ -54,6 +56,7 @@ class TestWhereOp2(TestWhereOp): class TestWhereOp3(TestWhereOp): + def init_config(self): self.x = np.random.uniform((-3), 5, (20, 2, 4)).astype('float64') self.y = np.random.uniform((-3), 5, (20, 2, 4)).astype('float64') @@ -61,6 +64,7 @@ class TestWhereOp3(TestWhereOp): class TestWhereAPI(unittest.TestCase): + def setUp(self): self.init_data() @@ -81,19 +85,22 @@ class TestWhereAPI(unittest.TestCase): for x_stop_gradient in [False, True]: for y_stop_gradient in [False, True]: with fluid.program_guard(Program(), Program()): - cond = fluid.layers.data( - name='cond', shape=self.shape, dtype='bool') - x = fluid.layers.data( - name='x', shape=self.shape, dtype='float32') - y = fluid.layers.data( - name='y', shape=self.shape, dtype='float32') + cond = fluid.layers.data(name='cond', + shape=self.shape, + dtype='bool') + x = fluid.layers.data(name='x', + shape=self.shape, + dtype='float32') + y = fluid.layers.data(name='y', + shape=self.shape, + dtype='float32') x.stop_gradient = x_stop_gradient y.stop_gradient = y_stop_gradient result = paddle.where(cond, x, y) append_backward(layers.mean(result)) for use_cuda in [False, True]: - if (use_cuda and - (not fluid.core.is_compiled_with_cuda())): + if (use_cuda + and (not fluid.core.is_compiled_with_cuda())): break place = (fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()) @@ -103,12 +110,13 @@ class TestWhereAPI(unittest.TestCase): fetch_list.append(x.grad_name) if (y_stop_gradient is False): fetch_list.append(y.grad_name) - out = exe.run( - fluid.default_main_program(), - feed={'cond': self.cond, - 'x': self.x, - 'y': self.y}, - fetch_list=fetch_list) + out = exe.run(fluid.default_main_program(), + feed={ + 'cond': self.cond, + 'x': self.x, + 'y': self.y + }, + fetch_list=fetch_list) assert np.array_equal(out[0], self.out) if (x_stop_gradient is False): assert np.array_equal(out[2], @@ -126,8 +134,8 @@ class TestWhereAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32') y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32') x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype('float32') - y_i = np.array( - [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]).astype('float32') + y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, + 1.0]]).astype('float32') result = paddle.where((x > 1), x=x, y=y) for use_cuda in [False, True]: if (use_cuda and (not fluid.core.is_compiled_with_cuda())): @@ -135,8 +143,10 @@ class TestWhereAPI(unittest.TestCase): place = (fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()) exe = fluid.Executor(place) out = exe.run(fluid.default_main_program(), - feed={'x': x_i, - 'y': y_i}, + feed={ + 'x': x_i, + 'y': y_i + }, fetch_list=[result]) assert np.array_equal(out[0], np.where((x_i > 1), x_i, y_i)) @@ -145,8 +155,9 @@ class TestWhereAPI(unittest.TestCase): main_program = Program() with fluid.program_guard(main_program): cond_shape = [2, 4] - cond = fluid.layers.data( - name='cond', shape=cond_shape, dtype='bool') + cond = fluid.layers.data(name='cond', + shape=cond_shape, + dtype='bool') x_data = 1.0 y_data = 2.0 cond_data = np.array([False, False, True, True]).astype('bool') @@ -166,8 +177,9 @@ class TestWhereAPI(unittest.TestCase): paddle.enable_static() main_program = Program() with fluid.program_guard(main_program): - cond = fluid.layers.data( - name='cond', shape=cond_shape, dtype='bool') + cond = fluid.layers.data(name='cond', + shape=cond_shape, + dtype='bool') x = fluid.layers.data(name='x', shape=x_shape, dtype='float32') y = fluid.layers.data(name='y', shape=y_shape, dtype='float32') cond_data_tmp = np.random.random(size=cond_shape).astype('float32') @@ -180,12 +192,13 @@ class TestWhereAPI(unittest.TestCase): return place = (fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()) exe = fluid.Executor(place) - out = exe.run( - fluid.default_main_program(), - feed={'cond': cond_data, - 'x': x_data, - 'y': y_data}, - fetch_list=[result]) + out = exe.run(fluid.default_main_program(), + feed={ + 'cond': cond_data, + 'x': x_data, + 'y': y_data + }, + fetch_list=[result]) expect = np.where(cond_data, x_data, y_data) assert np.array_equal(out[0], expect) @@ -239,6 +252,7 @@ class TestWhereAPI(unittest.TestCase): class TestWhereDygraphAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(): x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype('float64') @@ -360,6 +374,7 @@ class TestWhereDygraphAPI(unittest.TestCase): class TestWhereOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype('float64') diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py index 83ca577faa5..baf111df633 100644 --- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py @@ -29,7 +29,9 @@ paddle.enable_static() class TestApiWhileLoop(unittest.TestCase): + def test_var_tuple(self): + def cond(i): return layers.less_than(i, ten) @@ -44,14 +46,15 @@ class TestApiWhileLoop(unittest.TestCase): ten = layers.fill_constant(shape=[1], dtype='int64', value=10) out = layers.while_loop(cond, body, (i, )) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(res[0]), np.full((1), 10, np.int64))) def test_var_list(self): + def cond(i, mem): return layers.less_than(i, ten) @@ -72,8 +75,8 @@ class TestApiWhileLoop(unittest.TestCase): data = np.random.rand(10).astype('float32') data_one = np.ones(10).astype('float32') - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={'mem': data}, fetch_list=out) for i in range(10): @@ -81,6 +84,7 @@ class TestApiWhileLoop(unittest.TestCase): self.assertTrue(np.allclose(np.asarray(res[1]), data)) def test_var_dict(self): + def cond(i, ten, test_dict, test_list, test_list_dict): return layers.less_than(i, ten) @@ -91,8 +95,8 @@ class TestApiWhileLoop(unittest.TestCase): test_list[0] = fluid.layers.reshape(test_list[0], [2, -1]) + 1 test_list_dict[0]["test_key"] += 1 - test_list_dict[0]["test_key"] = fluid.layers.relu(test_list_dict[0][ - "test_key"]) + test_list_dict[0]["test_key"] = fluid.layers.relu( + test_list_dict[0]["test_key"]) i = layers.increment(i) return [i, ten, test_dict, test_list, test_list_dict] @@ -106,18 +110,17 @@ class TestApiWhileLoop(unittest.TestCase): test_dict = {"test_key": test_data} test_list = [ - layers.fill_constant( - shape=[1, 2], dtype='int64', value=0) + layers.fill_constant(shape=[1, 2], dtype='int64', value=0) ] test_list_dict = [{ - "test_key": layers.fill_constant( - shape=[1], dtype='float32', value=0) + "test_key": + layers.fill_constant(shape=[1], dtype='float32', value=0) }] i, ten, test_dict, test_list, test_list_dict = layers.while_loop( cond, body, [i, ten, test_dict, test_list, test_list_dict]) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=[ @@ -125,28 +128,25 @@ class TestApiWhileLoop(unittest.TestCase): test_list_dict[0]["test_key"] ]) self.assertTrue( - np.allclose( - np.asarray(res[0]), - np.full( - shape=(1), fill_value=10, dtype=np.int64))) + np.allclose(np.asarray(res[0]), + np.full(shape=(1), fill_value=10, dtype=np.int64))) self.assertTrue( - np.allclose( - np.asarray(res[1]), - np.full( - shape=(2, 1), fill_value=10, dtype=np.int64))) + np.allclose(np.asarray(res[1]), + np.full(shape=(2, 1), fill_value=10, dtype=np.int64))) self.assertTrue( - np.allclose( - np.asarray(res[2]), - np.full( - shape=(1), fill_value=10, dtype=np.float32))) + np.allclose(np.asarray(res[2]), + np.full(shape=(1), fill_value=10, dtype=np.float32))) class TestApiWhileLoop_Nested(unittest.TestCase): + def test_nested_net(self): + def external_cond(i, j, init, sums): return layers.less_than(i, loop_len1) def external_body(i, j, init, sums): + def internal_cond(j, init, sums): return layers.less_than(j, loop_len2) @@ -182,12 +182,14 @@ class TestApiWhileLoop_Nested(unittest.TestCase): data = np.random.rand(3, 3).astype('float32') data_sums = np.zeros([3, 3]).astype('float32') - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, - feed={'init': data, - 'sums': data_sums}, + feed={ + 'init': data, + 'sums': data_sums + }, fetch_list=out) for i in range(3): data = np.add(data, 1) @@ -198,7 +200,9 @@ class TestApiWhileLoop_Nested(unittest.TestCase): class TestApiWhileLoop_Backward(unittest.TestCase): + def test_while_loop_backward(self): + def cond(i, x): return layers.less_than(i, eleven) @@ -221,8 +225,8 @@ class TestApiWhileLoop_Backward(unittest.TestCase): mean = layers.mean(out[1]) append_backward(mean) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') @@ -231,15 +235,18 @@ class TestApiWhileLoop_Backward(unittest.TestCase): i_grad = np.asarray([110]).astype('float32') res = exe.run(main_program, - feed={'i': feed_i, - 'x': feed_x}, + feed={ + 'i': feed_i, + 'x': feed_x + }, fetch_list=[mean.name, i.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) - self.assertTrue( - np.allclose(np.asarray(res[1]), i_grad), - msg=" \nres = \n{} \n\n ans = \n{}".format(res[1], i_grad)) + self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), + msg=" \nres = \n{} \n\n ans = \n{}".format( + res[1], i_grad)) def test_while_loop_backward2(self): + def cond(i, x): return i < 3 @@ -260,8 +267,8 @@ class TestApiWhileLoop_Backward(unittest.TestCase): mean = layers.mean(out[1]) append_backward(mean) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') @@ -271,24 +278,29 @@ class TestApiWhileLoop_Backward(unittest.TestCase): x_grad = np.asarray([2]).astype('float32') res = exe.run(main_program, - feed={'i': feed_i, - 'x': feed_x}, + feed={ + 'i': feed_i, + 'x': feed_x + }, fetch_list=[mean.name, i.grad_name, x.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) - self.assertTrue( - np.allclose(np.asarray(res[1]), i_grad), - msg=" \nres = \n{} \n\n ans = \n{}".format(res[1], i_grad)) - self.assertTrue( - np.allclose(np.asarray(res[2]), x_grad), - msg=" \nres = \n{} \n\n ans = \n{}".format(res[2], x_grad)) + self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), + msg=" \nres = \n{} \n\n ans = \n{}".format( + res[1], i_grad)) + self.assertTrue(np.allclose(np.asarray(res[2]), x_grad), + msg=" \nres = \n{} \n\n ans = \n{}".format( + res[2], x_grad)) class TestApiWhileLoop_NestedWithBackwardAndLoDTensorArray(unittest.TestCase): + def test_nested_net_with_backward_and_lodtensor(self): + def external_cond(i, j, x, mem_array): return layers.less_than(i, array_len) def external_body(i, j, x, mem_array): + def internal_cond(j, x, mem_array): return layers.less_than(j, array_len2) @@ -342,8 +354,8 @@ class TestApiWhileLoop_NestedWithBackwardAndLoDTensorArray(unittest.TestCase): mean = layers.mean(sum_result) append_backward(mean) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) d = [] @@ -352,23 +364,27 @@ class TestApiWhileLoop_NestedWithBackwardAndLoDTensorArray(unittest.TestCase): feed_x = np.ones(10).astype('float32') data_sum = d[0] + d[1] + d[2] + 3 * feed_x x_grad = [0.3] * 10 - res = exe.run( - main_program, - feed={'d0': d[0], - 'd1': d[1], - 'd2': d[2], - 'x': feed_x}, - fetch_list=[sum_result.name, x.grad_name]) + res = exe.run(main_program, + feed={ + 'd0': d[0], + 'd1': d[1], + 'd2': d[2], + 'x': feed_x + }, + fetch_list=[sum_result.name, x.grad_name]) self.assertTrue(np.allclose(res[0], data_sum)) self.assertTrue(np.allclose(res[1], x_grad)) class TestApiWhileLoopWithSwitchCase(unittest.TestCase): + def test_with_switch_case(self): + def cond(i): return layers.less_than(i, ten) def body(i): + def fn_add_three(): data_add_three = layers.elementwise_add(x=i, y=three) return data_add_three @@ -381,11 +397,12 @@ class TestApiWhileLoopWithSwitchCase(unittest.TestCase): data_add_one = layers.elementwise_add(x=i, y=one) return data_add_one - return layers.switch_case( - branch_index=i, - branch_fns={2: fn_add_three, - 5: fn_square}, - default=fn_add_one) + return layers.switch_case(branch_index=i, + branch_fns={ + 2: fn_add_three, + 5: fn_square + }, + default=fn_add_one) main_program = Program() startup_program = Program() @@ -396,8 +413,8 @@ class TestApiWhileLoopWithSwitchCase(unittest.TestCase): one = layers.fill_constant(shape=[1], dtype='int64', value=1) out = layers.while_loop(cond, body, [i]) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=out) @@ -406,7 +423,9 @@ class TestApiWhileLoopWithSwitchCase(unittest.TestCase): class TestApiWhileLoop_Error(unittest.TestCase): + def test_error(self): + def cond_returns_constant(i): return 1 @@ -436,8 +455,9 @@ class TestApiWhileLoop_Error(unittest.TestCase): return i > 0 def body_returns_with_mutable_dict(i, test_dict): - test_dict['new_key'] = layers.fill_constant( - shape=[1], dtype='int64', value=1) + test_dict['new_key'] = layers.fill_constant(shape=[1], + dtype='int64', + value=1) return layers.increment(i), test_dict def cond_returns_with_mutable_list(i, test_list): @@ -445,8 +465,7 @@ class TestApiWhileLoop_Error(unittest.TestCase): def body_returns_with_mutable_list(i, test_list): test_list.append( - layers.fill_constant( - shape=[1], dtype='int64', value=1)) + layers.fill_constant(shape=[1], dtype='int64', value=1)) return layers.increment(i), test_list main_program = Program() @@ -519,8 +538,8 @@ class TestApiWhileLoop_Error(unittest.TestCase): # The length of `output_vars` with mutable value should keep same with `loop_vars` def value_error_body_returns_with_mutable_dict(): test_dict = { - "int_constant": layers.fill_constant( - shape=[2, 2], dtype='int64', value=1) + "int_constant": + layers.fill_constant(shape=[2, 2], dtype='int64', value=1) } out = layers.while_loop(cond_returns_with_mutable_dict, body_returns_with_mutable_dict, @@ -531,8 +550,7 @@ class TestApiWhileLoop_Error(unittest.TestCase): def value_error_body_returns_with_mutable_list(): test_list = [ - layers.fill_constant( - shape=[2, 2], dtype='int64', value=1) + layers.fill_constant(shape=[2, 2], dtype='int64', value=1) ] out = layers.while_loop(cond_returns_with_mutable_list, body_returns_with_mutable_list, @@ -543,7 +561,9 @@ class TestApiWhileLoop_Error(unittest.TestCase): class TestApiWhileLoopSliceInBody(unittest.TestCase): + def test_var_slice(self): + def cond(z, i): return i + 1 <= x_shape[0] @@ -561,8 +581,8 @@ class TestApiWhileLoopSliceInBody(unittest.TestCase): i = fluid.layers.fill_constant([1], 'int32', 0) z, _ = fluid.layers.while_loop(cond, body, [z, i]) - place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( - ) else fluid.CPUPlace() + place = fluid.CUDAPlace( + 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) np_x = np.array([1, 2, 3, 4, 5], dtype='int32') diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py index 8af9a39634f..dee83692bd3 100644 --- a/python/paddle/fluid/tests/unittests/test_while_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_op.py @@ -28,13 +28,20 @@ paddle.enable_static() class TestWhileOp(unittest.TestCase): + def simple_net(self): - d0 = layers.data( - "d0", shape=[10], append_batch_size=False, dtype='float32') - d1 = layers.data( - "d1", shape=[10], append_batch_size=False, dtype='float32') - d2 = layers.data( - "d2", shape=[10], append_batch_size=False, dtype='float32') + d0 = layers.data("d0", + shape=[10], + append_batch_size=False, + dtype='float32') + d1 = layers.data("d1", + shape=[10], + append_batch_size=False, + dtype='float32') + d2 = layers.data("d2", + shape=[10], + append_batch_size=False, + dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') @@ -92,9 +99,11 @@ class TestWhileOp(unittest.TestCase): for i in range(3): d.append(numpy.random.random(size=[10]).astype('float32')) - outs = exe.run(feed={'d0': d[0], - 'd1': d[1], - 'd2': d[2]}, + outs = exe.run(feed={ + 'd0': d[0], + 'd1': d[1], + 'd2': d[2] + }, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01) @@ -127,6 +136,7 @@ class TestWhileOp(unittest.TestCase): class BadInputTest(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): @@ -138,7 +148,9 @@ class BadInputTest(unittest.TestCase): class TestIgnoreVarNameInWhile(unittest.TestCase): + def test_ignore_var(self): + def cond(i, ten, temp, y): return i < ten @@ -169,8 +181,10 @@ class TestIgnoreVarNameInWhile(unittest.TestCase): input_y = input_y.reshape(3, 1, 1) res, = exe.run(fluid.default_main_program(), - feed={'x': input_x, - 'y': input_y}, + feed={ + 'x': input_x, + 'y': input_y + }, fetch_list=[output]) self.assertListEqual(list(res.shape), [3, 1, 5]) diff --git a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py index 19dcb49cd95..139c671947b 100644 --- a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -55,8 +55,8 @@ def YoloBox(x, img_size, attrs): h) anchors = [(anchors[i], anchors[(i + 1)]) for i in range(0, len(anchors), 2)] - anchors_s = np.array( - [((an_w / input_w), (an_h / input_h)) for (an_w, an_h) in anchors]) + anchors_s = np.array([((an_w / input_w), (an_h / input_h)) + for (an_w, an_h) in anchors]) anchor_w = anchors_s[:, 0:1].reshape((1, an_num, 1, 1)) anchor_h = anchors_s[:, 1:2].reshape((1, an_num, 1, 1)) pred_box[:, :, :, :, 2] = (np.exp(pred_box[:, :, :, :, 2]) * anchor_w) @@ -70,9 +70,9 @@ def YoloBox(x, img_size, attrs): pred_score = (sigmoid(x[:, :, :, :, 5:]) * pred_conf) pred_box = (pred_box * (pred_conf > 0.0).astype('float32')) pred_box = pred_box.reshape((n, (-1), 4)) - (pred_box[:, :, :2], pred_box[:, :, 2:4]) = ( - (pred_box[:, :, :2] - (pred_box[:, :, 2:4] / 2.0)), - (pred_box[:, :, :2] + (pred_box[:, :, 2:4] / 2.0))) + (pred_box[:, :, :2], + pred_box[:, :, 2:4]) = ((pred_box[:, :, :2] - (pred_box[:, :, 2:4] / 2.0)), + (pred_box[:, :, :2] + (pred_box[:, :, 2:4] / 2.0))) pred_box[:, :, 0] = (pred_box[:, :, 0] * img_size[:, 1][:, np.newaxis]) pred_box[:, :, 1] = (pred_box[:, :, 1] * img_size[:, 0][:, np.newaxis]) pred_box[:, :, 2] = (pred_box[:, :, 2] * img_size[:, 1][:, np.newaxis]) @@ -89,6 +89,7 @@ def YoloBox(x, img_size, attrs): class TestYoloBoxOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'yolo_box' @@ -129,6 +130,7 @@ class TestYoloBoxOp(OpTest): class TestYoloBoxOpNoClipBbox(TestYoloBoxOp): + def initTestCase(self): self.anchors = [10, 13, 16, 30, 33, 23] an_num = int((len(self.anchors) // 2)) @@ -146,6 +148,7 @@ class TestYoloBoxOpNoClipBbox(TestYoloBoxOp): class TestYoloBoxOpScaleXY(TestYoloBoxOp): + def initTestCase(self): self.anchors = [10, 13, 16, 30, 33, 23] an_num = int((len(self.anchors) // 2)) @@ -163,6 +166,7 @@ class TestYoloBoxOpScaleXY(TestYoloBoxOp): class TestYoloBoxOpIoUAware(TestYoloBoxOp): + def initTestCase(self): self.anchors = [10, 13, 16, 30, 33, 23] an_num = int((len(self.anchors) // 2)) @@ -180,35 +184,34 @@ class TestYoloBoxOpIoUAware(TestYoloBoxOp): class TestYoloBoxDygraph(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() img_size = np.ones((2, 2)).astype('int32') img_size = paddle.to_tensor(img_size) x1 = np.random.random([2, 14, 8, 8]).astype('float32') x1 = paddle.to_tensor(x1) - (boxes, scores) = paddle.vision.ops.yolo_box( - x1, - img_size=img_size, - anchors=[10, 13, 16, 30], - class_num=2, - conf_thresh=0.01, - downsample_ratio=8, - clip_bbox=True, - scale_x_y=1.0) + (boxes, scores) = paddle.vision.ops.yolo_box(x1, + img_size=img_size, + anchors=[10, 13, 16, 30], + class_num=2, + conf_thresh=0.01, + downsample_ratio=8, + clip_bbox=True, + scale_x_y=1.0) assert ((boxes is not None) and (scores is not None)) x2 = np.random.random([2, 16, 8, 8]).astype('float32') x2 = paddle.to_tensor(x2) - (boxes, scores) = paddle.vision.ops.yolo_box( - x2, - img_size=img_size, - anchors=[10, 13, 16, 30], - class_num=2, - conf_thresh=0.01, - downsample_ratio=8, - clip_bbox=True, - scale_x_y=1.0, - iou_aware=True, - iou_aware_factor=0.5) + (boxes, scores) = paddle.vision.ops.yolo_box(x2, + img_size=img_size, + anchors=[10, 13, 16, 30], + class_num=2, + conf_thresh=0.01, + downsample_ratio=8, + clip_bbox=True, + scale_x_y=1.0, + iou_aware=True, + iou_aware_factor=0.5) paddle.enable_static() def test_eager(self): @@ -217,35 +220,35 @@ class TestYoloBoxDygraph(unittest.TestCase): class TestYoloBoxStatic(unittest.TestCase): + def test_static(self): x1 = paddle.static.data('x1', [2, 14, 8, 8], 'float32') img_size = paddle.static.data('img_size', [2, 2], 'int32') - (boxes, scores) = paddle.vision.ops.yolo_box( - x1, - img_size=img_size, - anchors=[10, 13, 16, 30], - class_num=2, - conf_thresh=0.01, - downsample_ratio=8, - clip_bbox=True, - scale_x_y=1.0) + (boxes, scores) = paddle.vision.ops.yolo_box(x1, + img_size=img_size, + anchors=[10, 13, 16, 30], + class_num=2, + conf_thresh=0.01, + downsample_ratio=8, + clip_bbox=True, + scale_x_y=1.0) assert ((boxes is not None) and (scores is not None)) x2 = paddle.static.data('x2', [2, 16, 8, 8], 'float32') - (boxes, scores) = paddle.vision.ops.yolo_box( - x2, - img_size=img_size, - anchors=[10, 13, 16, 30], - class_num=2, - conf_thresh=0.01, - downsample_ratio=8, - clip_bbox=True, - scale_x_y=1.0, - iou_aware=True, - iou_aware_factor=0.5) + (boxes, scores) = paddle.vision.ops.yolo_box(x2, + img_size=img_size, + anchors=[10, 13, 16, 30], + class_num=2, + conf_thresh=0.01, + downsample_ratio=8, + clip_bbox=True, + scale_x_y=1.0, + iou_aware=True, + iou_aware_factor=0.5) assert ((boxes is not None) and (scores is not None)) class TestYoloBoxOpHW(TestYoloBoxOp): + def initTestCase(self): self.anchors = [10, 13, 16, 30, 33, 23] an_num = int((len(self.anchors) // 2)) diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py index 3f0e4f7a400..61f955e917d 100644 --- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py @@ -53,8 +53,8 @@ def batch_xywh_box_iou(box1, box2): left = np.maximum(b1_left[:, :, np.newaxis], b2_left[:, np.newaxis, :]) right = np.minimum(b1_right[:, :, np.newaxis], b2_right[:, np.newaxis, :]) top = np.maximum(b1_top[:, :, np.newaxis], b2_top[:, np.newaxis, :]) - bottom = np.minimum(b1_bottom[:, :, np.newaxis], - b2_bottom[:, np.newaxis, :]) + bottom = np.minimum(b1_bottom[:, :, np.newaxis], b2_bottom[:, + np.newaxis, :]) inter_w = np.clip(right - left, 0., 1.) inter_h = np.clip(bottom - top, 0., 1.) @@ -91,16 +91,18 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): pred_box = x[:, :, :, :, :4].copy() grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1)) grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w)) - pred_box[:, :, :, :, 0] = ( - grid_x + sigmoid(pred_box[:, :, :, :, 0]) * scale_x_y + bias_x_y) / w - pred_box[:, :, :, :, 1] = ( - grid_y + sigmoid(pred_box[:, :, :, :, 1]) * scale_x_y + bias_x_y) / h + pred_box[:, :, :, :, + 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0]) * scale_x_y + + bias_x_y) / w + pred_box[:, :, :, :, + 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1]) * scale_x_y + + bias_x_y) / h mask_anchors = [] for m in anchor_mask: mask_anchors.append((anchors[2 * m], anchors[2 * m + 1])) - anchors_s = np.array( - [(an_w / input_size, an_h / input_size) for an_w, an_h in mask_anchors]) + anchors_s = np.array([(an_w / input_size, an_h / input_size) + for an_w, an_h in mask_anchors]) anchor_w = anchors_s[:, 0:1].reshape((1, mask_num, 1, 1)) anchor_h = anchors_s[:, 1:2].reshape((1, mask_num, 1, 1)) pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) * anchor_w @@ -119,10 +121,10 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): gtbox_shift[:, :, 1] = 0 anchors = [(anchors[2 * i], anchors[2 * i + 1]) for i in range(0, an_num)] - anchors_s = np.array( - [(an_w / input_size, an_h / input_size) for an_w, an_h in anchors]) - anchor_boxes = np.concatenate( - [np.zeros_like(anchors_s), anchors_s], axis=-1) + anchors_s = np.array([(an_w / input_size, an_h / input_size) + for an_w, an_h in anchors]) + anchor_boxes = np.concatenate([np.zeros_like(anchors_s), anchors_s], + axis=-1) anchor_boxes = np.tile(anchor_boxes[np.newaxis, :, :], (n, 1, 1)) ious = batch_xywh_box_iou(gtbox_shift, anchor_boxes) iou_matches = np.argmax(ious, axis=-1) @@ -153,9 +155,9 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j] for label_idx in range(class_num): - loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos - if label_idx == gtlabel[i, j] else - label_neg) * gtscore[i, j] + loss[i] += sce( + x[i, an_idx, gj, gi, 5 + label_idx], label_pos if label_idx + == gtlabel[i, j] else label_neg) * gtscore[i, j] for j in range(mask_num * h * w): if objness[i, j] > 0: @@ -168,6 +170,7 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): class TestYolov3LossOp(OpTest): + def setUp(self): self.initTestCase() self.op_type = 'yolov3_loss' @@ -232,6 +235,7 @@ class TestYolov3LossOp(OpTest): class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp): + def initTestCase(self): self.anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, @@ -249,6 +253,7 @@ class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp): class TestYolov3LossNoGTScore(TestYolov3LossOp): + def initTestCase(self): self.anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, @@ -266,6 +271,7 @@ class TestYolov3LossNoGTScore(TestYolov3LossOp): class TestYolov3LossWithScaleXY(TestYolov3LossOp): + def initTestCase(self): self.anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, @@ -283,6 +289,7 @@ class TestYolov3LossWithScaleXY(TestYolov3LossOp): class TestYolov3LossDygraph(unittest.TestCase): + def test_dygraph(self): paddle.disable_static() x = np.random.random([2, 14, 8, 8]).astype('float32') @@ -293,54 +300,52 @@ class TestYolov3LossDygraph(unittest.TestCase): gt_box = paddle.to_tensor(gt_box) gt_label = paddle.to_tensor(gt_label) - loss = paddle.vision.ops.yolo_loss( - x, - gt_box=gt_box, - gt_label=gt_label, - anchors=[10, 13, 16, 30], - anchor_mask=[0, 1], - class_num=2, - ignore_thresh=0.7, - downsample_ratio=8, - use_label_smooth=True, - scale_x_y=1.) + loss = paddle.vision.ops.yolo_loss(x, + gt_box=gt_box, + gt_label=gt_label, + anchors=[10, 13, 16, 30], + anchor_mask=[0, 1], + class_num=2, + ignore_thresh=0.7, + downsample_ratio=8, + use_label_smooth=True, + scale_x_y=1.) assert loss is not None assert loss.shape == [2] paddle.enable_static() class TestYolov3LossStatic(unittest.TestCase): + def test_static(self): x = paddle.static.data('x', [2, 14, 8, 8], 'float32') gt_box = paddle.static.data('gt_box', [2, 10, 4], 'float32') gt_label = paddle.static.data('gt_label', [2, 10], 'int32') gt_score = paddle.static.data('gt_score', [2, 10], 'float32') - loss = paddle.vision.ops.yolo_loss( - x, - gt_box=gt_box, - gt_label=gt_label, - anchors=[10, 13, 16, 30], - anchor_mask=[0, 1], - class_num=2, - ignore_thresh=0.7, - downsample_ratio=8, - gt_score=gt_score, - use_label_smooth=True, - scale_x_y=1.) + loss = paddle.vision.ops.yolo_loss(x, + gt_box=gt_box, + gt_label=gt_label, + anchors=[10, 13, 16, 30], + anchor_mask=[0, 1], + class_num=2, + ignore_thresh=0.7, + downsample_ratio=8, + gt_score=gt_score, + use_label_smooth=True, + scale_x_y=1.) assert loss is not None - loss = paddle.vision.ops.yolo_loss( - x, - gt_box=gt_box, - gt_label=gt_label, - anchors=[10, 13, 16, 30], - anchor_mask=[0, 1], - class_num=2, - ignore_thresh=0.7, - downsample_ratio=8, - use_label_smooth=True, - scale_x_y=1.) + loss = paddle.vision.ops.yolo_loss(x, + gt_box=gt_box, + gt_label=gt_label, + anchors=[10, 13, 16, 30], + anchor_mask=[0, 1], + class_num=2, + ignore_thresh=0.7, + downsample_ratio=8, + use_label_smooth=True, + scale_x_y=1.) assert loss is not None diff --git a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py index 80b4db793ff..3be1fb85565 100644 --- a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,6 +23,7 @@ from paddle.fluid.framework import _test_eager_guard class TestZerosLikeAPIError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): x = paddle.fluid.data('x', [3, 4]) @@ -34,6 +35,7 @@ class TestZerosLikeAPIError(unittest.TestCase): class TestZerosLikeAPI(unittest.TestCase): + def test_api(self): shape = [3, 4] startup_program = Program() @@ -62,6 +64,7 @@ class TestZerosLikeAPI(unittest.TestCase): class TestZerosLikeImpeartive(unittest.TestCase): + def test_out(self): shape = [3, 4] place = (fluid.CUDAPlace(0) diff --git a/python/paddle/fluid/tests/unittests/test_zeros_op.py b/python/paddle/fluid/tests/unittests/test_zeros_op.py index 01d7107cfae..ce30cab5016 100644 --- a/python/paddle/fluid/tests/unittests/test_zeros_op.py +++ b/python/paddle/fluid/tests/unittests/test_zeros_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,6 +26,7 @@ from paddle.fluid.framework import _test_eager_guard class TestZerosOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): shape = [4] @@ -38,6 +39,7 @@ class TestZerosOpError(unittest.TestCase): class ApiZerosTest(unittest.TestCase): + def test_out(self): with program_guard(Program()): zeros = paddle.zeros(shape=[10], dtype='float64') @@ -84,7 +86,9 @@ class ApiZerosTest(unittest.TestCase): class ApiZerosError(unittest.TestCase): + def test_errors(self): + def test_error1(): with paddle.static.program_guard(fluid.Program()): ones = fluid.layers.zeros(shape=10, dtype='int64') diff --git a/python/paddle/fluid/tests/unittests/testsuite.py b/python/paddle/fluid/tests/unittests/testsuite.py index c92d9a429b6..e106f33c8a0 100644 --- a/python/paddle/fluid/tests/unittests/testsuite.py +++ b/python/paddle/fluid/tests/unittests/testsuite.py @@ -68,6 +68,7 @@ def create_op(scope, op_type, inputs, outputs, attrs, cache_list=None): def set_input(scope, op, inputs, place): + def __set_input__(var_name, var): if isinstance(var, tuple) or isinstance(var, np.ndarray): tensor = scope.find_var(var_name).get_tensor() @@ -116,8 +117,10 @@ def append_input_output(block, op_proto, np_list, is_input, dtype): if is_input: shape = list(np_value.shape) lod_level = 0 - return block.create_var( - dtype=dtype, shape=shape, lod_level=lod_level, name=name) + return block.create_var(dtype=dtype, + shape=shape, + lod_level=lod_level, + name=name) var_dict = {} for var_proto in proto_list: @@ -146,34 +149,34 @@ def append_loss_ops(block, output_names): if len(mean_inputs) == 1: loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) - op = block.append_op( - inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') + op = block.append_op(inputs={"X": mean_inputs}, + outputs={"Out": loss}, + type='mean') op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) else: avg_sum = [] for cur_loss in mean_inputs: cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) - op = block.append_op( - inputs={"X": [cur_loss]}, - outputs={"Out": [cur_avg_loss]}, - type="mean") + op = block.append_op(inputs={"X": [cur_loss]}, + outputs={"Out": [cur_avg_loss]}, + type="mean") op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) - op_sum = block.append_op( - inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') + op_sum = block.append_op(inputs={"X": avg_sum}, + outputs={"Out": loss_sum}, + type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) - op_loss = block.append_op( - inputs={"X": loss_sum}, - outputs={"Out": loss}, - type='scale', - attrs={'scale': 1.0 / float(len(avg_sum))}) + op_loss = block.append_op(inputs={"X": loss_sum}, + outputs={"Out": loss}, + type='scale', + attrs={'scale': 1.0 / float(len(avg_sum))}) op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_shape(block.desc) return loss diff --git a/python/paddle/fluid/tests/unittests/tokenizer/__init__.py b/python/paddle/fluid/tests/unittests/tokenizer/__init__.py index b9a7651e449..185a92b8d94 100644 --- a/python/paddle/fluid/tests/unittests/tokenizer/__init__.py +++ b/python/paddle/fluid/tests/unittests/tokenizer/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py b/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py index 00d5f4e7725..f396e892ecf 100755 --- a/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py +++ b/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py @@ -364,8 +364,8 @@ class BertTokenizer(PretrainedTokenizer): self.vocab = self.load_vocabulary(vocab_file, unk_token=unk_token) self.do_lower_case = do_lower_case self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) - self.wordpiece_tokenizer = WordpieceTokenizer( - vocab=self.vocab, unk_token=unk_token) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab, + unk_token=unk_token) self.special_tokens_map = { 'unk_token': unk_token, 'sep_token': sep_token, @@ -433,8 +433,8 @@ class BertTokenizer(PretrainedTokenizer): token_ids_0 = [] token_ids_1 = [] return len( - self.build_inputs_with_special_tokens(token_ids_0, token_ids_1 - if pair else None)) + self.build_inputs_with_special_tokens( + token_ids_0, token_ids_1 if pair else None)) def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): """ @@ -508,7 +508,9 @@ class BertTokenizer(PretrainedTokenizer): "ids is already formatted with special tokens for the model." ) return list( - map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, + map( + lambda x: 1 + if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) if token_ids_1 is not None: diff --git a/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py b/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py index 7da3cd56e25..d2cf118b632 100644 --- a/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py +++ b/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py @@ -90,8 +90,8 @@ def _is_punctuation(char): # Characters such as "^", "$", and "`" are not in the Unicode # Punctuation class but we treat them as punctuation anyways, for # consistency. - if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or - (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) + or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): return True cat = unicodedata.category(char) if cat.startswith("P"): @@ -183,7 +183,7 @@ class PretrainedTokenizer(object): def __call__(self, text, text_pair=None, - max_seq_len: Optional[int]=None, + max_seq_len: Optional[int] = None, stride=0, is_split_into_words=False, pad_to_max_seq_len=False, @@ -288,26 +288,28 @@ class PretrainedTokenizer(object): """ # Input type checking for clearer error assert isinstance(text, str) or ( - isinstance(text, (list, tuple)) and (len(text) == 0 or ( - isinstance(text[0], str) or - (isinstance(text[0], (list, tuple)) and - (len(text[0]) == 0 or isinstance(text[0][0], str))))) + isinstance(text, (list, tuple)) and + (len(text) == 0 or + (isinstance(text[0], str) or + (isinstance(text[0], (list, tuple)) and + (len(text[0]) == 0 or isinstance(text[0][0], str))))) ), ("text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) " "or `List[List[str]]` (batch of pretokenized examples).") - assert (text_pair is None or isinstance(text_pair, str) or ( - isinstance(text_pair, (list, tuple)) and (len(text_pair) == 0 or ( - isinstance(text_pair[0], str) or - (isinstance(text_pair[0], (list, tuple)) and - (len(text_pair[0]) == 0 or isinstance(text_pair[0][0], str))))) - )), ( - "text_pair input must of type `str` (single example), `List[str]` (batch or single pretokenized example) " + assert ( + text_pair is None or isinstance(text_pair, str) or + (isinstance(text_pair, (list, tuple)) and + (len(text_pair) == 0 or + (isinstance(text_pair[0], str) or + (isinstance(text_pair[0], (list, tuple)) and + (len(text_pair[0]) == 0 or isinstance(text_pair[0][0], str)))))) + ), ("text_pair input must of type `str` (single example), `List[str]` (batch or single pretokenized example) " "or `List[List[str]]` (batch of pretokenized examples).") is_batched = bool( - (not is_split_into_words and isinstance(text, (list, tuple))) or - (is_split_into_words and isinstance(text, (list, tuple)) and - text and isinstance(text[0], (list, tuple)))) + (not is_split_into_words and isinstance(text, (list, tuple))) + or (is_split_into_words and isinstance(text, (list, tuple)) and text + and isinstance(text[0], (list, tuple)))) if is_batched: batch_text_or_text_pairs = list(zip( @@ -348,8 +350,8 @@ class PretrainedTokenizer(object): all_toks = [] set_attr = self.special_tokens_map for attr_value in set_attr.values(): - all_toks = all_toks + (list(attr_value) if isinstance(attr_value, ( - list, tuple)) else [attr_value]) + all_toks = all_toks + (list(attr_value) if isinstance( + attr_value, (list, tuple)) else [attr_value]) all_toks = list(set(all_toks)) return all_toks @@ -420,8 +422,8 @@ class PretrainedTokenizer(object): for file_id, map_list in cls.pretrained_resource_files_map.items(): vocab_files[file_id] = map_list[pretrained_model_name_or_path] init_configuration = copy.deepcopy( - cls.pretrained_init_configuration[ - pretrained_model_name_or_path]) + cls.pretrained_init_configuration[pretrained_model_name_or_path] + ) # From local dir path elif os.path.isdir(pretrained_model_name_or_path): for file_id, file_name in cls.resource_files_names.items(): @@ -488,8 +490,8 @@ class PretrainedTokenizer(object): # does include a vocab file path in it. However, if the vocab file # path included in json does not exist, such as was deleted, to make # it still work, use the vocab file under this dir. - elif not os.path.isfile(init_kwargs[args_name]) and os.path.isfile( - file_path): + elif not os.path.isfile( + init_kwargs[args_name]) and os.path.isfile(file_path): init_kwargs[args_name] = file_path # TODO(guosheng): avoid reduplication of position args and key word args tokenizer = cls(*init_args, **init_kwargs) @@ -696,8 +698,8 @@ class PretrainedTokenizer(object): results (List[int]): The list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. """ - return [0] * ((len(token_ids_1) - if token_ids_1 else 0) + len(token_ids_0)) + return [0] * ( + (len(token_ids_1) if token_ids_1 else 0) + len(token_ids_0)) def create_token_type_ids_from_sequences(self, token_ids_0, @@ -731,8 +733,8 @@ class PretrainedTokenizer(object): token_ids_0 = [] token_ids_1 = [] return len( - self.build_inputs_with_special_tokens(token_ids_0, token_ids_1 - if pair else None)) + self.build_inputs_with_special_tokens( + token_ids_0, token_ids_1 if pair else None)) def encode(self, text, @@ -864,7 +866,8 @@ class PretrainedTokenizer(object): ids, pair_ids=pair_ids, num_tokens_to_remove=total_len - max_seq_len, - truncation_strategy=truncation_strategy, ) + truncation_strategy=truncation_strategy, + ) if return_overflowing_tokens: encoded_inputs["overflowing_tokens"] = overflowing_tokens encoded_inputs["num_truncated_tokens"] = total_len - max_seq_len @@ -872,8 +875,8 @@ class PretrainedTokenizer(object): # Add special tokens sequence = self.build_inputs_with_special_tokens(ids, pair_ids) - token_type_ids = self.create_token_type_ids_from_sequences(ids, - pair_ids) + token_type_ids = self.create_token_type_ids_from_sequences( + ids, pair_ids) # Build output dictionnary encoded_inputs["input_ids"] = sequence @@ -881,14 +884,14 @@ class PretrainedTokenizer(object): encoded_inputs["token_type_ids"] = token_type_ids if return_special_tokens_mask: encoded_inputs[ - "special_tokens_mask"] = self.get_special_tokens_mask(ids, - pair_ids) + "special_tokens_mask"] = self.get_special_tokens_mask( + ids, pair_ids) if return_length: encoded_inputs["seq_len"] = len(encoded_inputs["input_ids"]) # Check lengths - assert max_seq_len is None or len(encoded_inputs[ - "input_ids"]) <= max_seq_len + assert max_seq_len is None or len( + encoded_inputs["input_ids"]) <= max_seq_len # Padding needs_to_be_padded = pad_to_max_seq_len and \ @@ -898,8 +901,8 @@ class PretrainedTokenizer(object): difference = max_seq_len - len(encoded_inputs["input_ids"]) if self.padding_side == 'right': if return_attention_mask: - encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ - "input_ids"]) + [0] * difference + encoded_inputs["attention_mask"] = [1] * len( + encoded_inputs["input_ids"]) + [0] * difference if return_token_type_ids: encoded_inputs["token_type_ids"] = ( encoded_inputs["token_type_ids"] + @@ -907,8 +910,9 @@ class PretrainedTokenizer(object): if return_special_tokens_mask: encoded_inputs["special_tokens_mask"] = encoded_inputs[ "special_tokens_mask"] + [1] * difference - encoded_inputs["input_ids"] = encoded_inputs[ - "input_ids"] + [self.pad_token_id] * difference + encoded_inputs["input_ids"] = encoded_inputs["input_ids"] + [ + self.pad_token_id + ] * difference elif self.padding_side == 'left': if return_attention_mask: encoded_inputs["attention_mask"] = [0] * difference + [ @@ -927,8 +931,8 @@ class PretrainedTokenizer(object): ] * difference + encoded_inputs["input_ids"] else: if return_attention_mask: - encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ - "input_ids"]) + encoded_inputs["attention_mask"] = [1] * len( + encoded_inputs["input_ids"]) if return_position_ids: encoded_inputs["position_ids"] = list( @@ -1092,8 +1096,8 @@ class PretrainedTokenizer(object): offset_mapping = self.build_offset_mapping_with_special_tokens( mapping, pair_mapping) - sequence = self.build_inputs_with_special_tokens(ids, - pair_ids) + sequence = self.build_inputs_with_special_tokens( + ids, pair_ids) token_type_ids = self.create_token_type_ids_from_sequences( ids, pair_ids) @@ -1106,12 +1110,12 @@ class PretrainedTokenizer(object): "special_tokens_mask"] = self.get_special_tokens_mask( ids, pair_ids) if return_length: - encoded_inputs["seq_len"] = len(encoded_inputs[ - "input_ids"]) + encoded_inputs["seq_len"] = len( + encoded_inputs["input_ids"]) # Check lengths - assert max_seq_len is None or len(encoded_inputs[ - "input_ids"]) <= max_seq_len + assert max_seq_len is None or len( + encoded_inputs["input_ids"]) <= max_seq_len # Padding needs_to_be_padded = pad_to_max_seq_len and \ @@ -1120,13 +1124,13 @@ class PretrainedTokenizer(object): encoded_inputs['offset_mapping'] = offset_mapping if needs_to_be_padded: - difference = max_seq_len - len(encoded_inputs[ - "input_ids"]) + difference = max_seq_len - len( + encoded_inputs["input_ids"]) if self.padding_side == 'right': if return_attention_mask: encoded_inputs["attention_mask"] = [1] * len( - encoded_inputs[ - "input_ids"]) + [0] * difference + encoded_inputs["input_ids"] + ) + [0] * difference if return_token_type_ids: # 0 for padding token mask encoded_inputs["token_type_ids"] = ( @@ -1145,8 +1149,8 @@ class PretrainedTokenizer(object): if return_attention_mask: encoded_inputs["attention_mask"] = [ 0 - ] * difference + [1] * len(encoded_inputs[ - "input_ids"]) + ] * difference + [1] * len( + encoded_inputs["input_ids"]) if return_token_type_ids: # 0 for padding token mask encoded_inputs["token_type_ids"] = ( @@ -1209,8 +1213,8 @@ class PretrainedTokenizer(object): split_tokens = [] for token in self.basic_tokenizer.tokenize(text): for sub_token in self.wordpiece_tokenizer.tokenize(token): - split_tokens.append(sub_token - if sub_token != self.unk_token else token) + split_tokens.append( + sub_token if sub_token != self.unk_token else token) normalized_text, char_mapping = '', [] diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index 970eb2daea5..fd9f2ec95de 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -23,7 +23,8 @@ import paddle.fluid.layers as layers pos_enc_param_names = ( "src_pos_enc_table", - "trg_pos_enc_table", ) + "trg_pos_enc_table", +) batch_size = 2 @@ -122,8 +123,8 @@ def multi_head_attention(queries, return layers.reshape( x=trans_x, shape=list( - map(int, [batch_size, -1, trans_x.shape[2] * trans_x.shape[3] - ]))) + map(int, + [batch_size, -1, trans_x.shape[2] * trans_x.shape[3]]))) def scaled_dot_product_attention(q, k, v, attn_bias, d_model, dropout_rate): """ @@ -148,8 +149,9 @@ def multi_head_attention(queries, product = layers.matmul(x=scaled_q, y=k, transpose_y=True) weights = __softmax(layers.elementwise_add(x=product, y=attn_bias)) if dropout_rate: - weights = layers.dropout( - weights, dropout_prob=dropout_rate, is_test=False) + weights = layers.dropout(weights, + dropout_prob=dropout_rate, + is_test=False) out = layers.matmul(weights, v) return out @@ -182,8 +184,8 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid): hidden = layers.fc(input=x, size=d_inner_hid, num_flatten_dims=2, - param_attr=fluid.initializer.Uniform( - low=-(d_hid**-0.5), high=(d_hid**-0.5)), + param_attr=fluid.initializer.Uniform(low=-(d_hid**-0.5), + high=(d_hid**-0.5)), act="relu") out = layers.fc(input=hidden, size=d_hid, @@ -205,11 +207,10 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout=0.): if cmd == "a": # add residual connection out = out + prev_out if prev_out else out elif cmd == "n": # add layer normalization - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.initializer.Constant(1.), - bias_attr=fluid.initializer.Constant(0.)) + out = layers.layer_norm(out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.initializer.Constant(1.), + bias_attr=fluid.initializer.Constant(0.)) elif cmd == "d": # add dropout if dropout: out = layers.dropout(out, dropout_prob=dropout, is_test=False) @@ -235,31 +236,28 @@ def prepare_encoder(src_word, This module is used at the bottom of the encoder stacks. """ - src_word_emb = layers.embedding( - src_word, - size=[src_vocab_size, src_emb_dim], - padding_idx=src_pad_idx, - param_attr=fluid.initializer.Normal(0., 1.)) + src_word_emb = layers.embedding(src_word, + size=[src_vocab_size, src_emb_dim], + padding_idx=src_pad_idx, + param_attr=fluid.initializer.Normal(0., 1.)) src_pos_enc = layers.embedding( src_pos, size=[src_max_len, src_emb_dim], padding_idx=pos_pad_idx, - param_attr=fluid.ParamAttr( - name=pos_enc_param_name, trainable=False)) + param_attr=fluid.ParamAttr(name=pos_enc_param_name, trainable=False)) src_pos_enc.stop_gradient = True enc_input = src_word_emb + src_pos_enc # FIXME(guosheng): Decouple the program desc with batch_size. enc_input = layers.reshape(x=enc_input, shape=[batch_size, -1, src_emb_dim]) - return layers.dropout( - enc_input, dropout_prob=dropout, - is_test=False) if dropout else enc_input + return layers.dropout(enc_input, dropout_prob=dropout, + is_test=False) if dropout else enc_input -prepare_encoder = partial( - prepare_encoder, pos_enc_param_name=pos_enc_param_names[0]) -prepare_decoder = partial( - prepare_encoder, pos_enc_param_name=pos_enc_param_names[1]) +prepare_encoder = partial(prepare_encoder, + pos_enc_param_name=pos_enc_param_names[0]) +prepare_decoder = partial(prepare_encoder, + pos_enc_param_name=pos_enc_param_names[1]) def encoder_layer(enc_input, @@ -330,12 +328,14 @@ def decoder_layer(dec_input, d_value, d_model, n_head, - dropout_rate, ) + dropout_rate, + ) slf_attn_output = post_process_layer( dec_input, slf_attn_output, "dan", # residual connection + dropout + layer normalization - dropout_rate, ) + dropout_rate, + ) enc_attn_output = multi_head_attention( slf_attn_output, enc_output, @@ -345,21 +345,25 @@ def decoder_layer(dec_input, d_value, d_model, n_head, - dropout_rate, ) + dropout_rate, + ) enc_attn_output = post_process_layer( slf_attn_output, enc_attn_output, "dan", # residual connection + dropout + layer normalization - dropout_rate, ) + dropout_rate, + ) ffd_output = positionwise_feed_forward( enc_attn_output, d_inner_hid, - d_model, ) + d_model, + ) dec_output = post_process_layer( enc_attn_output, ffd_output, "dan", # residual connection + dropout + layer normalization - dropout_rate, ) + dropout_rate, + ) return dec_output @@ -388,7 +392,8 @@ def decoder(dec_input, d_value, d_model, d_inner_hid, - dropout_rate, ) + dropout_rate, + ) dec_input = dec_output return dec_output @@ -433,25 +438,28 @@ def build_inputs(max_length, n_head): all_inputs = [] for name, shape, dtype in zip(names, shapes, dtypes): all_inputs.append( - fluid.layers.data( - name=name, shape=shape, dtype=dtype, append_batch_size=False)) + fluid.layers.data(name=name, + shape=shape, + dtype=dtype, + append_batch_size=False)) return all_inputs def transformer( - src_vocab_size, - trg_vocab_size, - max_length, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - dropout_rate, - src_pad_idx, - trg_pad_idx, - pos_pad_idx, ): + src_vocab_size, + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + src_pad_idx, + trg_pad_idx, + pos_pad_idx, +): src_word, src_pos, trg_word, trg_pos, src_slf_attn_bias, trg_slf_attn_bias, trg_src_attn_bias, gold, weights = build_inputs( max_length, n_head) @@ -463,7 +471,8 @@ def transformer( d_model, src_pad_idx, max_length, - dropout_rate, ) + dropout_rate, + ) enc_output = encoder( enc_input, src_slf_attn_bias, @@ -473,7 +482,8 @@ def transformer( d_value, d_model, d_inner_hid, - dropout_rate, ) + dropout_rate, + ) dec_input = prepare_decoder( trg_word, @@ -482,7 +492,8 @@ def transformer( d_model, trg_pad_idx, max_length, - dropout_rate, ) + dropout_rate, + ) dec_output = decoder( dec_input, enc_output, @@ -494,18 +505,19 @@ def transformer( d_value, d_model, d_inner_hid, - dropout_rate, ) + dropout_rate, + ) # TODO(guosheng): Share the weight matrix between the embedding layers and # the pre-softmax linear transformation. - predict = layers.reshape( - x=layers.fc(input=dec_output, - size=trg_vocab_size, - param_attr=fluid.initializer.Xavier(uniform=False), - bias_attr=False, - num_flatten_dims=2), - shape=[-1, trg_vocab_size], - act="softmax") + predict = layers.reshape(x=layers.fc( + input=dec_output, + size=trg_vocab_size, + param_attr=fluid.initializer.Xavier(uniform=False), + bias_attr=False, + num_flatten_dims=2), + shape=[-1, trg_vocab_size], + act="softmax") cost = layers.cross_entropy(input=predict, label=gold) weighted_cost = cost * weights diff --git a/python/paddle/fluid/tests/unittests/utils.py b/python/paddle/fluid/tests/unittests/utils.py index 07edd8171fe..66376382a97 100644 --- a/python/paddle/fluid/tests/unittests/utils.py +++ b/python/paddle/fluid/tests/unittests/utils.py @@ -88,6 +88,7 @@ def _is_equal_program(prog1, prog2): def load_dygraph_vars_to_scope(model_path, scope, place): + def load_dict_to_scope(scope, dictionary): if scope is None: scope = fluid.global_scope() @@ -107,6 +108,7 @@ def load_dygraph_vars_to_scope(model_path, scope, place): class DyGraphProgramDescTracerTestHelper(object): + def __init__(self, unittest_obj): self.unittest_obj = unittest_obj diff --git a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py index aa3dcb6519c..33a84823460 100644 --- a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py +++ b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py @@ -90,6 +90,7 @@ xpu_test_device_op_type_white_list = [] class XPUOpTestWrapper(object): + def create_classes(self): base_class = None classes = [] @@ -177,13 +178,13 @@ def make_xpu_op_list(xpu_version): def get_xpu_op_support_types(op_name, dev_id=0): xpu_version = core.get_xpu_device_version(dev_id) - support_type_list = core.get_xpu_device_op_support_types(op_name, - xpu_version) + support_type_list = core.get_xpu_device_op_support_types( + op_name, xpu_version) support_type_str_list = [] for stype in support_type_list: if stype == paddle.bfloat16: - support_type_str_list.append(type_dict_paddle_to_str[ - paddle.bfloat16]) + support_type_str_list.append( + type_dict_paddle_to_str[paddle.bfloat16]) else: support_type_str_list.append(type_dict_paddle_to_str[stype]) type_white_list = get_type_white_list() @@ -239,11 +240,12 @@ def create_test_class(func_globals, continue class_obj = test_class[1] cls_name = "{0}_{1}".format(test_class[0], str(test_type)) - func_globals[cls_name] = type(cls_name, (class_obj, ), { - 'in_type': type_dict_str_to_numpy[test_type], - 'in_type_str': test_type, - 'op_type_need_check_grad': True - }) + func_globals[cls_name] = type( + cls_name, (class_obj, ), { + 'in_type': type_dict_str_to_numpy[test_type], + 'in_type_str': test_type, + 'op_type_need_check_grad': True + }) if hasattr(test_class_obj, 'use_dynamic_create_class' ) and test_class_obj.use_dynamic_create_class: diff --git a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py index b0bb9a37c16..fff2531a9c2 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid @@ -30,11 +31,13 @@ paddle.enable_static() class XPUTestAccuracyOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'accuracy' self.use_dynamic_create_class = False class TestXPUAccuracyOp(XPUOpTest): + def setUp(self): self.op_type = "accuracy" self.init_dtype() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py index 9e2825ab631..63a0aa2e59b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -29,6 +30,7 @@ paddle.enable_static() class TestActivationOPBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_dtype() @@ -54,11 +56,13 @@ class TestActivationOPBase(XPUOpTest): class XPUTestExpOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'exp' self.use_dynamic_create_class = False class XPUTestExp(TestActivationOPBase): + def set_case(self): self.op_type = 'exp' self.dtype = self.in_type @@ -76,11 +80,13 @@ for stype in support_types: class XPUTestSigmoidOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'sigmoid' self.use_dynamic_create_class = False class XPUTestSigmoid(TestActivationOPBase): + def set_case(self): self.op_type = "sigmoid" self.dtype = self.in_type @@ -95,18 +101,22 @@ class XPUTestSigmoidOP(XPUOpTestWrapper): self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) class XPUTestSigmoid2(XPUTestSigmoid): + def init_config(self): self.x = np.random.uniform(-2, 2, [100]).astype(self.dtype) class XPUTestSigmoid3(XPUTestSigmoid): + def init_config(self): self.x = np.random.uniform(-2, 2, [10, 12, 15]).astype(self.dtype) class XPUTestSigmoid4(XPUTestSigmoid): + def init_config(self): self.x = np.random.uniform(-2, 2, [19, 19]).astype(self.dtype) class XPUTestSigmoid5(XPUTestSigmoid): + def init_config(self): self.x = np.random.uniform(-2, 2, [10, 20, 30, 40]).astype(self.dtype) @@ -118,11 +128,13 @@ for stype in support_types: class XPUTestTanhOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'tanh' self.use_dynamic_create_class = False class XPUTestTanh(TestActivationOPBase): + def set_case(self): self.op_type = "tanh" self.dtype = self.in_type @@ -140,11 +152,13 @@ for stype in support_types: class XPUTestSqrtOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'sqrt' self.use_dynamic_create_class = False class XPUTestSqrt(TestActivationOPBase): + def set_case(self): self.op_type = "sqrt" self.dtype = self.in_type @@ -163,11 +177,13 @@ for stype in support_types: class XPUTestAbsOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'abs' self.use_dynamic_create_class = False class XPUTestAbs(TestActivationOPBase): + def set_case(self): self.op_type = "abs" self.dtype = self.in_type @@ -191,11 +207,13 @@ for stype in support_types: class XPUTestReluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'relu' self.use_dynamic_create_class = False class XPUTestRelu(TestActivationOPBase): + def set_case(self): self.op_type = "relu" self.dtype = self.in_type @@ -216,11 +234,13 @@ for stype in support_types: class XPUTestGeluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'gelu' self.use_dynamic_create_class = False class XPUTestGelu(TestActivationOPBase): + def set_case(self): self.op_type = "gelu" self.dtype = self.in_type @@ -242,19 +262,21 @@ for stype in support_types: def gelu(x, approximate): from scipy.special import erf if approximate: - y_ref = 0.5 * x * (1.0 + np.tanh( - np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) + y_ref = 0.5 * x * ( + 1.0 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) else: y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2))) return y_ref.astype(x.dtype) class XPUTestHardSwishOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'hard_swish' self.use_dynamic_create_class = False class XPUTestHardSwish(TestActivationOPBase): + def set_case(self): self.op_type = "hard_swish" self.dtype = self.in_type @@ -281,11 +303,13 @@ def hard_swish(x, offset, threshold, scale): class XPUTestLogOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'log' self.use_dynamic_create_class = False class XPUTestLog(TestActivationOPBase): + def set_case(self): self.op_type = "log" self.dtype = self.in_type @@ -304,11 +328,13 @@ for stype in support_types: class XPUTestSquareOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'square' self.use_dynamic_create_class = False class XPUTestSquare(TestActivationOPBase): + def set_case(self): self.op_type = "square" self.dtype = self.in_type @@ -323,18 +349,22 @@ class XPUTestSquareOP(XPUOpTestWrapper): self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) class XPUTestSquare2(XPUTestSquare): + def init_config(self): self.x = np.random.uniform(-2, 2, [100]).astype(self.dtype) class XPUTestSquare3(XPUTestSquare): + def init_config(self): self.x = np.random.uniform(-2, 2, [1, 15, 19]).astype(self.dtype) class XPUTestSquare4(XPUTestSquare): + def init_config(self): self.x = np.random.uniform(-2, 2, [100, 10]).astype(self.dtype) class XPUTestSquare5(XPUTestSquare): + def init_config(self): self.x = np.random.uniform(-2, 2, [1, 2, 5, 17]).astype(self.dtype) @@ -345,11 +375,13 @@ for stype in support_types: class XPUTestPowOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'pow' self.use_dynamic_create_class = False class XPUTestPowBase(TestActivationOPBase): + def set_case(self): self.op_type = "pow" self.dtype = self.in_type @@ -366,34 +398,40 @@ class XPUTestPowOP(XPUOpTestWrapper): self.factor = 3.0 class XPUTestPow1(XPUTestPowBase): + def init_config(self): self.x = np.random.uniform(-1, 1, [1024, 8]).astype(self.dtype) self.factor = 1 class XPUTestPow2(XPUTestPowBase): + def init_config(self): self.x = np.random.uniform(-1, 1, [1024, 8]).astype(self.dtype) self.factor = 2 class XPUTestPow3(XPUTestPowBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [4, 512, 15, 15]).astype(self.dtype) self.factor = 3 class XPUTestPow4(XPUTestPowBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [4, 256, 22, 22]).astype(self.dtype) self.factor = 4 class XPUTestPow5(XPUTestPowBase): + def init_config(self): self.x = np.random.uniform(0, 1, [4, 256, 22, 22]).astype(self.dtype) self.factor = 1.2 class XPUTestPow6(XPUTestPowBase): + def init_config(self): self.x = np.random.uniform(0, 1, [1024, 8]).astype(self.dtype) self.factor = 3.2 @@ -405,11 +443,13 @@ for stype in support_types: class XPUTestLeakyReluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'leaky_relu' self.use_dynamic_create_class = False class XPUTestLeakyRelu(TestActivationOPBase): + def set_case(self): self.op_type = "leaky_relu" self.dtype = self.in_type @@ -417,7 +457,8 @@ class XPUTestLeakyReluOP(XPUOpTestWrapper): x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) alpha = np.random.uniform( 0, - 1, ) + 1, + ) out = leaky_relu(x, alpha) self.inputs = {'X': x} @@ -439,11 +480,13 @@ def leaky_relu(x, alpha): class XPUTestReciprocalOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'reciprocal' self.use_dynamic_create_class = False class XPUTestRecipocal(TestActivationOPBase): + def set_case(self): self.op_type = "reciprocal" self.dtype = self.in_type @@ -463,11 +506,13 @@ for stype in support_types: class XPUTestSoftPlusOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'softplus' self.use_dynamic_create_class = False class XPUTestSoftPlusBase(TestActivationOPBase): + def set_case(self): self.op_type = "softplus" self.dtype = self.in_type @@ -485,15 +530,18 @@ class XPUTestSoftPlusOP(XPUOpTestWrapper): self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) class XPUTestSoftPlus2(XPUTestSoftPlusBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype) class XPUTestSoftPlus3(XPUTestSoftPlusBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [4, 512, 15, 15]).astype(self.dtype) class XPUTestSoftPlus4(XPUTestSoftPlusBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [4, 256, 22, 22]).astype(self.dtype) @@ -513,11 +561,13 @@ def ref_softplus(x, beta=1, threshold=20): # XPU_KP unittests, these ops can be found from xpu_op_kpfirst_list.h class XPUTestBReluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'brelu' self.use_dynamic_create_class = False class XPUTestBRelu(TestActivationOPBase): + def set_case(self): self.op_type = "brelu" self.dtype = self.in_type @@ -544,11 +594,13 @@ for stype in support_types: class XPUTestCeilOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'ceil' self.use_dynamic_create_class = False class XPUTestCeil(TestActivationOPBase): + def set_case(self): self.op_type = "ceil" self.dtype = self.in_type @@ -568,11 +620,13 @@ for stype in support_types: class XPUTestCeluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'celu' self.use_dynamic_create_class = False class XPUTestCelu(TestActivationOPBase): + def set_case(self): self.op_type = "celu" self.dtype = self.in_type @@ -597,11 +651,13 @@ def ref_celu(x, alpha): class XPUTestEluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elu' self.use_dynamic_create_class = False class XPUTestElu(TestActivationOPBase): + def set_case(self): self.op_type = "elu" self.dtype = self.in_type @@ -626,11 +682,13 @@ def ref_elu(x, alpha): class XPUTestFloorOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'floor' self.use_dynamic_create_class = False class XPUTestFloor(TestActivationOPBase): + def set_case(self): self.op_type = "floor" self.dtype = self.in_type @@ -650,11 +708,13 @@ for stype in support_types: class XPUTestHardShrinkOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'hard_shrink' self.use_dynamic_create_class = False class XPUTestHardShrink(TestActivationOPBase): + def set_case(self): self.op_type = "hard_shrink" self.dtype = self.in_type @@ -682,11 +742,13 @@ def ref_hardshrink(x, threshold): class XPUTestHardSigmoidOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'hard_sigmoid' self.use_dynamic_create_class = False class XPUTestHardSigmoid(TestActivationOPBase): + def set_case(self): self.op_type = "hard_sigmoid" self.dtype = self.in_type @@ -723,11 +785,13 @@ def ref_hardsigmoid(x, slope=0.166666666666667, offset=0.5): class XPUTestLog1pOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'log1p' self.use_dynamic_create_class = False class XPUTestLog1p(TestActivationOPBase): + def set_case(self): self.op_type = "log1p" self.dtype = self.in_type @@ -747,11 +811,13 @@ for stype in support_types: class XPUTestLogsigmoidOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'logsigmoid' self.use_dynamic_create_class = False class XPUTestLogsigmoid(TestActivationOPBase): + def set_case(self): self.op_type = "logsigmoid" self.dtype = self.in_type @@ -771,11 +837,13 @@ for stype in support_types: class XPUTestRelu6OP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'relu6' self.use_dynamic_create_class = False class XPUTestRelu6(TestActivationOPBase): + def set_case(self): self.op_type = "relu6" self.dtype = self.in_type @@ -803,11 +871,13 @@ def ref_relu6(x, threshold=6.0): class XPUTestSiluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'silu' self.use_dynamic_create_class = False class XPUTestSilu(TestActivationOPBase): + def set_case(self): self.op_type = "silu" self.dtype = self.in_type @@ -827,11 +897,13 @@ for stype in support_types: class XPUTestSoftReluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'soft_relu' self.use_dynamic_create_class = False class XPUTestSoftRelu(TestActivationOPBase): + def set_case(self): self.op_type = "soft_relu" self.dtype = self.in_type @@ -858,11 +930,13 @@ for stype in support_types: class XPUTestSoftSignOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'softsign' self.use_dynamic_create_class = False class XPUTestSoftSign(TestActivationOPBase): + def set_case(self): self.op_type = "softsign" self.dtype = self.in_type @@ -887,11 +961,13 @@ def ref_softsign(x): class XPUTestSoftshrinkOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'softshrink' self.use_dynamic_create_class = False class XPUTestSoftshrink(TestActivationOPBase): + def set_case(self): self.op_type = "softshrink" self.dtype = self.in_type @@ -919,11 +995,13 @@ def ref_softshrink(x, threshold=0.5): class XPUTestSwishOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'swish' self.use_dynamic_create_class = False class XPUTestSwishBase(TestActivationOPBase): + def set_case(self): self.op_type = "swish" self.dtype = self.in_type @@ -939,15 +1017,18 @@ class XPUTestSwishOP(XPUOpTestWrapper): self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) class XPUTestSwish2(XPUTestSwishBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype) class XPUTestSwish3(XPUTestSwishBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [4, 512, 15, 15]).astype(self.dtype) class XPUTestSwish4(XPUTestSwishBase): + def init_config(self): self.x = np.random.uniform(-2, 2, [4, 256, 22, 22]).astype(self.dtype) @@ -965,11 +1046,13 @@ def ref_swish(x): class XPUTestThresholdedReluOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'thresholded_relu' self.use_dynamic_create_class = False class XPUTestThresholdedRelu(TestActivationOPBase): + def set_case(self): self.op_type = "thresholded_relu" self.dtype = self.in_type diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py index 6495c0af1a1..3be4cac81ca 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -28,6 +29,7 @@ from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, class XPUTestAdamOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'adam' self.use_dynamic_create_class = False @@ -50,9 +52,12 @@ class XPUTestAdamOp(XPUOpTestWrapper): moment2_out = adam_step(self.inputs, self.attrs) self.outputs = { - 'Moment1Out': moment1_out, - 'Moment2Out': moment2_out, - 'ParamOut': param_out, + 'Moment1Out': + moment1_out, + 'Moment2Out': + moment2_out, + 'ParamOut': + param_out, 'Beta1PowOut': np.array([self.beta1_pow]).astype("float32") * self.beta1, 'Beta2PowOut': @@ -177,8 +182,8 @@ class XPUTestAdamOp(XPUOpTestWrapper): } # Verify output for this step - self.check_output_with_place( - place=paddle.XPUPlace(0), atol=1e-2) + self.check_output_with_place(place=paddle.XPUPlace(0), + atol=1e-2) # Output of this step becomes input for next step self.inputs['Param'] = param_out @@ -254,13 +259,13 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, param_out = np.zeros(shape=[height, row_numel]) def update_row(row_id, update_value): - moment1_out[row_id] = beta1 * moment1[row_id] + (1 - beta1 - ) * update_value + moment1_out[row_id] = beta1 * moment1[row_id] + (1 - + beta1) * update_value moment2_out[row_id] = beta2 * moment2[row_id] + ( 1 - beta2) * np.square(update_value) lr_t = lr * np.sqrt(1 - beta2_pow) / (1 - beta1_pow) - param_out[row_id] = param[row_id] - lr_t * (moment1_out[row_id] / ( - np.sqrt(moment2_out[row_id]) + epsilon)) + param_out[row_id] = param[row_id] - lr_t * ( + moment1_out[row_id] / (np.sqrt(moment2_out[row_id]) + epsilon)) if lazy_mode: for idx, row_id in enumerate(rows): @@ -276,6 +281,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, class TestSparseAdamOp(unittest.TestCase): + def setup(self, scope, place, lazy_mode): beta1 = 0.78 beta2 = 0.836 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py index 99e9fdd123e..1ccf8a1fdaa 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py @@ -13,6 +13,7 @@ # limitations under the License. import sys + sys.path.append("..") import unittest @@ -76,11 +77,13 @@ def simple_lr_setting(param, decay_rate, n_layers): class XPUTestAdamwOp1(XPUOpTestWrapper): + def __init__(self): self.op_name = 'adamw' self.use_dynamic_create_class = False class TestAdamW(XPUOpTest): + def setUp(self): #Test AdamW Op with supplied attributes self.op_type = "adamw" @@ -136,20 +139,26 @@ class XPUTestAdamwOp1(XPUOpTestWrapper): self.check_output_with_place(place=paddle.XPUPlace(0)) class TestAdamW2(TestAdamW): + def init_shape(self): - self.shape = [1000, ] + self.shape = [ + 1000, + ] class TestAdamW3(TestAdamW): + def init_shape(self): self.shape = [200, 3000] class XPUTestAdamwOp2(XPUOpTestWrapper): + def __init__(self): self.op_name = 'adamw' self.use_dynamic_create_class = False class TestAdamWOp(unittest.TestCase): + def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype(self.in_type_str) @@ -203,12 +212,11 @@ class XPUTestAdamwOp2(XPUOpTestWrapper): dtype=self.in_type_str, persistable=True) betas = [beta1, beta2] - opt = paddle.optimizer.AdamW( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8) + opt = paddle.optimizer.AdamW(learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) opt.minimize(loss) exe.run(startup) @@ -223,16 +231,20 @@ class XPUTestAdamwOp2(XPUOpTestWrapper): paddle.disable_static() linear = paddle.nn.Linear(10, 10) with self.assertRaises(ValueError): - adam = paddle.optimizer.AdamW( - 0.1, beta1=-1, parameters=linear.parameters()) + adam = paddle.optimizer.AdamW(0.1, + beta1=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.AdamW( - 0.1, beta2=-1, parameters=linear.parameters()) + adam = paddle.optimizer.AdamW(0.1, + beta2=-1, + parameters=linear.parameters()) with self.assertRaises(ValueError): - adam = paddle.optimizer.AdamW( - 0.1, epsilon=-1, parameters=linear.parameters()) + adam = paddle.optimizer.AdamW(0.1, + epsilon=-1, + parameters=linear.parameters()) class TestAdamWOpGroup(TestAdamWOp): + def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype(self.in_type_str) @@ -258,6 +270,7 @@ class XPUTestAdamwOp2(XPUOpTestWrapper): adam.clear_gradients() class TestAdamWOpGroupWithLR(TestAdamWOp): + def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype(self.in_type_str) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py index 3385d671d73..b78648f1d7f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py @@ -18,6 +18,7 @@ Unit testing for affine_channel_op from __future__ import print_function import sys + sys.path.append("..") import unittest @@ -40,6 +41,7 @@ def affine_channel(x, scale, bias, layout): class TestAffineChannelOp(XPUOpTest): + def setUp(self): self.op_type = "affine_channel" self.init_test_case() @@ -70,15 +72,17 @@ class TestAffineChannelOp(XPUOpTest): if core.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Scale', 'Bias'], 'Out', no_grad_set=set('X')) + self.check_grad_with_place(place, ['Scale', 'Bias'], + 'Out', + no_grad_set=set('X')) def test_check_grad_stopgrad_dscale_dbias(self): if core.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', no_grad_set=set(['Scale', 'Bias'])) + self.check_grad_with_place(place, ['X'], + 'Out', + no_grad_set=set(['Scale', 'Bias'])) def init_test_case(self): self.shape = [2, 100, 3, 3] @@ -87,6 +91,7 @@ class TestAffineChannelOp(XPUOpTest): class TestAffineChannelOpError(unittest.TestCase): + def test_errors(self): with fluid.program_guard(fluid.Program()): @@ -97,28 +102,32 @@ class TestAffineChannelOpError(unittest.TestCase): self.assertRaises(TypeError, test_x_type) def test_x_dtype(): - x2 = fluid.layers.data( - name='x2', shape=[None, 1, 2, 2], dtype='int32') + x2 = fluid.layers.data(name='x2', + shape=[None, 1, 2, 2], + dtype='int32') fluid.layers.affine_channel(x2) self.assertRaises(TypeError, test_x_dtype) def test_scale_type(): - x3 = fluid.layers.data( - name='x3', shape=[None, 1, 2, 2], dtype='float32') + x3 = fluid.layers.data(name='x3', + shape=[None, 1, 2, 2], + dtype='float32') fluid.layers.affine_channel(x3, scale=1) self.assertRaises(TypeError, test_scale_type) def test_bias_type(): - x4 = fluid.layers.data( - name='x4', shape=[None, 1, 2, 2], dtype='float32') + x4 = fluid.layers.data(name='x4', + shape=[None, 1, 2, 2], + dtype='float32') fluid.layers.affine_channel(x4, bias=1) self.assertRaises(TypeError, test_bias_type) class TestAffineChannelNHWC(TestAffineChannelOp): + def init_test_case(self): self.shape = [2, 3, 3, 100] self.C = 100 @@ -132,6 +141,7 @@ class TestAffineChannelNHWC(TestAffineChannelOp): class TestAffineChannel2D(TestAffineChannelOp): + def init_test_case(self): self.shape = [2, 100] self.C = 100 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py index 9a2976f82a4..3ef4701cdf3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,6 +13,7 @@ # limitations under the License. import sys + sys.path.append("..") import paddle import unittest @@ -20,10 +21,12 @@ import numpy as np from op_test_xpu import XPUOpTest from op_test import OpTest, skip_check_grad_ci import paddle.fluid as fluid + paddle.enable_static() class TestCheckFiniteAndUnscaleOp(XPUOpTest): + def setUp(self): self.op_type = "check_finite_and_unscale" self.init_dtype() @@ -65,7 +68,7 @@ class TestCheckFiniteAndUnscaleOp(XPUOpTest): # self.dtype = np.float32 # def test_check_output(self): -# # When input contains nan, do not check the output, +# # When input contains nan, do not check the output, # # since the output may be nondeterministic and will be discarded. # if paddle.is_compiled_with_xpu(): # place = paddle.XPUPlace(0) @@ -89,7 +92,7 @@ class TestCheckFiniteAndUnscaleOp(XPUOpTest): # self.dtype = np.float32 # def test_check_output(self): -# # When input contains inf, do not check the output, +# # When input contains inf, do not check the output, # # since the output may be nondeterministic and will be discarded. # if paddle.is_compiled_with_xpu(): # place = paddle.XPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py index 519a185250a..792a729d1fa 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -28,10 +29,12 @@ paddle.enable_static() class XPUTestArgMax(XPUOpTestWrapper): + def __init__(self): self.op_name = 'arg_max' class XPUBaseTestCase(XPUOpTest): + def initTestCase(self): self.dims = (3, 4) self.axis = 1 @@ -52,51 +55,61 @@ class XPUTestArgMax(XPUOpTestWrapper): self.check_output_with_place(place) class TestArgMaxCase1(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.axis = -1 class TestArgMaxCase2(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.axis = 0 class TestArgMaxCase3(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.axis = 1 class TestArgMaxCase4(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.axis = 2 class TestArgMaxCase5(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4) self.axis = -1 class TestArgMaxCase6(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4) self.axis = 0 class TestArgMaxCase7(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, 4) self.axis = 1 class TestArgMaxCase8(XPUBaseTestCase): + def initTestCase(self): self.dims = (1, ) self.axis = 0 class TestArgMaxCase9(XPUBaseTestCase): + def initTestCase(self): self.dims = (2, ) self.axis = 0 class TestArgMaxCase10(XPUBaseTestCase): + def initTestCase(self): self.dims = (3, ) self.axis = 0 @@ -108,6 +121,7 @@ for stype in support_types: class TestArgMaxAPI(unittest.TestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.dtype = 'float32' @@ -119,6 +133,7 @@ class TestArgMaxAPI(unittest.TestCase): self.place = [paddle.XPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) @@ -126,8 +141,8 @@ class TestArgMaxAPI(unittest.TestCase): tensor_input = paddle.to_tensor(numpy_input) numpy_output = np.argmax(numpy_input, axis=self.axis) paddle_output = paddle.argmax(tensor_input, axis=self.axis) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) paddle.enable_static() for place in self.place: @@ -135,6 +150,7 @@ class TestArgMaxAPI(unittest.TestCase): class TestArgMaxAPI_2(unittest.TestCase): + def initTestCase(self): self.dims = (3, 4, 5) self.dtype = 'float32' @@ -147,17 +163,19 @@ class TestArgMaxAPI_2(unittest.TestCase): self.place = [paddle.XPUPlace(0)] def test_dygraph_api(self): + def run(place): paddle.disable_static(place) np.random.seed(2021) numpy_input = (np.random.random(self.dims)).astype(self.dtype) tensor_input = paddle.to_tensor(numpy_input) - numpy_output = np.argmax( - numpy_input, axis=self.axis).reshape(1, 4, 5) - paddle_output = paddle.argmax( - tensor_input, axis=self.axis, keepdim=self.keep_dims) - self.assertEqual( - np.allclose(numpy_output, paddle_output.numpy()), True) + numpy_output = np.argmax(numpy_input, + axis=self.axis).reshape(1, 4, 5) + paddle_output = paddle.argmax(tensor_input, + axis=self.axis, + keepdim=self.keep_dims) + self.assertEqual(np.allclose(numpy_output, paddle_output.numpy()), + True) self.assertEqual(numpy_output.shape, paddle_output.numpy().shape) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py index 4290c0abf12..7f7ee2e7a12 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") import paddle @@ -28,6 +29,7 @@ paddle.enable_static() class XPUTestArgsortOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'argsort' self.use_dynamic_create_class = True @@ -44,6 +46,7 @@ class XPUTestArgsortOp(XPUOpTestWrapper): return base_class, classes class TestArgsortOp(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = "argsort" @@ -57,9 +60,10 @@ class XPUTestArgsortOp(XPUOpTestWrapper): if self.dtype == np.float32: self.x = np.random.random(self.input_shape).astype(self.dtype) else: - self.x = np.random.randint( - low=-1000, high=1000, - size=self.input_shape).astype(self.dtype) + self.x = np.random.randint(low=-1000, + high=1000, + size=self.input_shape).astype( + self.dtype) self.inputs = {"X": self.x} self.attrs = {"axis": self.axis, "descending": self.descending} @@ -69,15 +73,14 @@ class XPUTestArgsortOp(XPUOpTestWrapper): def get_output(self): if self.descending: self.indices = np.flip( - np.argsort( - self.x, kind='heapsort', axis=self.axis), + np.argsort(self.x, kind='heapsort', axis=self.axis), self.axis) self.sorted_x = np.flip( - np.sort( - self.x, kind='heapsort', axis=self.axis), self.axis) + np.sort(self.x, kind='heapsort', axis=self.axis), self.axis) else: - self.indices = np.argsort( - self.x, kind='heapsort', axis=self.axis) + self.indices = np.argsort(self.x, + kind='heapsort', + axis=self.axis) self.sorted_x = np.sort(self.x, kind='heapsort', axis=self.axis) def set_xpu(self): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py index 6c2fe6ba930..2175243ef1d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -71,8 +72,8 @@ def ref_batch_norm_train(x, y_grad, scale, bias, mean, variance, momentum, saved_mean_tile = np.tile(saved_mean_tile, (n, 1, h, w)) saved_variance_tile = np.reshape(saved_variance, (1, c, 1, 1)) saved_variance_tile = np.tile(saved_variance_tile, (n, 1, h, w)) - normalized_x = ( - x - saved_mean_tile) / np.sqrt(saved_variance_tile + epsilon) + normalized_x = (x - saved_mean_tile) / np.sqrt(saved_variance_tile + + epsilon) scale_tile = np.reshape(scale, (1, c, 1, 1)) scale_tile = np.tile(scale_tile, (n, 1, h, w)) bias_tile = np.reshape(bias, (1, c, 1, 1)) @@ -109,9 +110,8 @@ def ref_batch_norm_train(x, y_grad, scale, bias, mean, variance, momentum, x = np.transpose(x, (0, 2, 3, 1)) y_grad = np.transpose(y_grad, (0, 2, 3, 1)) x_grad = scale * ( - y_grad - np.mean( - y_grad, axis=(0, 1, 2)) - (x - saved_mean) * np.mean( - y_grad * (x - saved_mean), axis=(0, 1, 2)) / + y_grad - np.mean(y_grad, axis=(0, 1, 2)) - + (x - saved_mean) * np.mean(y_grad * (x - saved_mean), axis=(0, 1, 2)) / (saved_variance + epsilon)) / np.sqrt(saved_variance + epsilon) scale_grad = np.sum(y_grad * (x - saved_mean) / np.sqrt(saved_variance + epsilon), @@ -126,6 +126,7 @@ def ref_batch_norm_train(x, y_grad, scale, bias, mean, variance, momentum, class XPUTestBatchNormOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'batch_norm' self.use_dynamic_create_class = False @@ -133,6 +134,7 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestBatchNormOp(unittest.TestCase): + def setUp(self): self.op_type = "batch_norm" self.dtype = np.float32 @@ -154,10 +156,10 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): + self.data_layout) np.random.seed(1024) self.x_np = np.random.random_sample(self.shape).astype(self.dtype) - self.scale_np = np.random.random_sample( - [channel_size]).astype(self.dtype) - self.bias_np = np.random.random_sample( - [channel_size]).astype(self.dtype) + self.scale_np = np.random.random_sample([channel_size + ]).astype(self.dtype) + self.bias_np = np.random.random_sample([channel_size + ]).astype(self.dtype) self.mean_np = np.zeros([channel_size]).astype(self.dtype) self.variance_np = np.ones([channel_size]).astype(self.dtype) self.saved_mean_np = np.zeros([channel_size]).astype(self.dtype) @@ -197,9 +199,10 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): 'Variance': self.variance_np }, fetch_list=[y]) - y_np_ref = ref_batch_norm_infer( - self.x_np, self.scale_np, self.bias_np, self.mean_np, - self.variance_np, self.momentum, self.epsilon, self.data_layout) + y_np_ref = ref_batch_norm_infer(self.x_np, self.scale_np, + self.bias_np, self.mean_np, + self.variance_np, self.momentum, + self.epsilon, self.data_layout) self.assertEqual(np.allclose(y_np_ref, y_np), True) def test_train(self): @@ -244,10 +247,9 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): arg_name = var_name np_value = inputs[var_name] if not block.has_var(var_name): - block.create_var( - name=var_name, - shape=np_value.shape, - dtype=np_value.dtype) + block.create_var(name=var_name, + shape=np_value.shape, + dtype=np_value.dtype) input_vars[arg_name] = block.var(var_name) fetch_list = [] output_vars = {} @@ -255,21 +257,19 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): arg_name = var_name np_value = outputs[var_name] if not block.has_var(var_name): - block.create_var( - name=var_name, - shape=np_value.shape, - dtype=np_value.dtype) + block.create_var(name=var_name, + shape=np_value.shape, + dtype=np_value.dtype) if var_name == 'Mean': arg_name = 'MeanOut' # Share memory if var_name == 'Variance': arg_name = 'VarianceOut' # Share memory output_vars[arg_name] = block.var(var_name) fetch_list.append(var_name) - batch_norm_op = block.append_op( - type="batch_norm", - inputs=input_vars, - outputs=output_vars, - attrs=attrs) + batch_norm_op = block.append_op(type="batch_norm", + inputs=input_vars, + outputs=output_vars, + attrs=attrs) # Generate the backward op_desc of batch_norm grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( batch_norm_op.desc, set(), []) @@ -281,10 +281,10 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): outs = exe.run(program, feed=inputs, fetch_list=fetch_list) for id, name in enumerate(fetch_list): self.assertEqual( - np.allclose( - outputs[name], outs[id], atol=1e-4), True) + np.allclose(outputs[name], outs[id], atol=1e-4), True) class TestBatchNormOpUseGlobalStats(unittest.TestCase): + def setUp(self): self.places = [paddle.XPUPlace(0)] self.init_test() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py index a8173f054a1..b10a6210d34 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle import paddle.fluid as fluid @@ -31,11 +32,13 @@ def bce_loss(input, label): class XPUTestBceLossOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'bce_loss' self.use_dynamic_create_class = False class TestBceLossOp(XPUOpTest): + def setUp(self): self.op_type = "bce_loss" self.dtype = self.in_type @@ -59,10 +62,12 @@ class XPUTestBceLossOp(XPUOpTestWrapper): self.shape = [10, 10] class TestBceLossOpCase1(TestBceLossOp): + def init_test_cast(self): self.shape = [2, 3, 4, 5] class TestBceLossOpCase2(TestBceLossOp): + def init_test_cast(self): self.shape = [2, 3, 20] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py index ddc2b49ebe0..9f15b72fe7d 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py @@ -20,6 +20,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test_xpu import XPUOpTest import paddle.fluid as fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py index 19dae7068cb..60abc31922d 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from paddle.nn.functional import interpolate import paddle @@ -24,6 +25,7 @@ from op_test_xpu import XPUOpTest import unittest import paddle.fluid as fluid from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper + paddle.enable_static() @@ -108,11 +110,13 @@ def bilinear_interp_np(input, class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = 'bilinear_interp_v2' self.use_dynamic_create_class = False class TestBilinearInterpOp(XPUOpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -146,9 +150,10 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): out_h = self.out_h out_w = self.out_w - output_np = bilinear_interp_np( - input_np, out_h, out_w, 0, 0, self.out_size, self.actual_shape, - self.align_corners, self.align_mode, self.data_layout) + output_np = bilinear_interp_np(input_np, out_h, out_w, 0, 0, + self.out_size, self.actual_shape, + self.align_corners, self.align_mode, + self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -192,6 +197,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.place = paddle.XPUPlace(0) class TestBilinearInterpCase1(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -202,6 +208,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpCase2(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -212,6 +219,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpCase3(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -222,6 +230,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpCase4(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [4, 1, 7, 8] @@ -233,6 +242,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpCase5(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -244,6 +254,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpCase6(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -255,6 +266,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpCase7(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [1, 1, 32, 64] @@ -265,6 +277,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpSame(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 32, 64] @@ -275,6 +288,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpActualShape(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -286,21 +300,25 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 1 class TestBilinearInterpWithMethod2(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = False self.align_mode = 0 class TestBilinearInterpWithMethod3(TestBilinearInterpOp): + def set_align_mode(self): self.align_corners = True self.align_mode = 0 class TestBilinearInterpScale1(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -311,6 +329,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpScale2(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -321,6 +340,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpScale3(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -331,6 +351,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpScale4(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -341,6 +362,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 1 class TestBilinearInterpZero(TestBilinearInterpOp): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [2, 3, 5, 7] @@ -351,6 +373,7 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_mode = 0 class TestBilinearInterpOp_attr_tensor(XPUOpTest): + def setUp(self): self.out_size = None self.actual_shape = None @@ -427,8 +450,9 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.place = paddle.XPUPlace(0) # out_size is a 1-D tensor - class TestBilinearInterp_attr_tensor_Case1( - TestBilinearInterpOp_attr_tensor): + class TestBilinearInterp_attr_tensor_Case1(TestBilinearInterpOp_attr_tensor + ): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 3, 9, 6] @@ -439,8 +463,9 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.align_corners = True # scale is a 1-D tensor - class TestBilinearInterp_attr_tensor_Case2( - TestBilinearInterpOp_attr_tensor): + class TestBilinearInterp_attr_tensor_Case2(TestBilinearInterpOp_attr_tensor + ): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] @@ -452,8 +477,9 @@ class XPUTestBilinearInterpV2Op(XPUOpTestWrapper): self.shape_by_1Dtensor = True # scale is a 1-D tensor - class TestBilinearInterp_attr_tensor_Case3( - TestBilinearInterpOp_attr_tensor): + class TestBilinearInterp_attr_tensor_Case3(TestBilinearInterpOp_attr_tensor + ): + def init_test_case(self): self.interp_method = 'bilinear' self.input_shape = [3, 2, 32, 16] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py index 9a1c9a61fff..ea86f3f8661 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -29,10 +30,12 @@ paddle.enable_static() ################## TEST OP: BitwiseAnd ################## class XPUTestBitwiseAnd(XPUOpTestWrapper): + def __init__(self): self.op_name = 'bitwise_and' class XPUTestBitwiseAndBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -41,10 +44,14 @@ class XPUTestBitwiseAnd(XPUOpTestWrapper): def set_case(self): self.op_type = 'bitwise_and' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.bitwise_and(x, y) self.attrs = {'use_xpu': True} @@ -68,6 +75,7 @@ class XPUTestBitwiseAnd(XPUOpTestWrapper): pass class XPUTestBitwiseAndCase1(XPUTestBitwiseAndBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [4, 5] @@ -76,6 +84,7 @@ class XPUTestBitwiseAnd(XPUOpTestWrapper): self.high = 100 class XPUTestBitwiseAndCase2(XPUTestBitwiseAndBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [2, 3, 4, 5] @@ -84,6 +93,7 @@ class XPUTestBitwiseAnd(XPUOpTestWrapper): self.high = 100 class XPUTestBitwiseAndCase3(XPUTestBitwiseAndBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [2, 3, 4, 5] @@ -99,10 +109,12 @@ for stype in support_types: ################## TEST OP: BitwiseOr ################## class XPUTestBitwiseOr(XPUOpTestWrapper): + def __init__(self): self.op_name = 'bitwise_or' class XPUTestBitwiseOrBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -111,10 +123,14 @@ class XPUTestBitwiseOr(XPUOpTestWrapper): def set_case(self): self.op_type = 'bitwise_or' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.bitwise_or(x, y) self.attrs = {'use_xpu': True} @@ -138,6 +154,7 @@ class XPUTestBitwiseOr(XPUOpTestWrapper): pass class XPUTestBitwiseOrCase1(XPUTestBitwiseOrBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [4, 5] @@ -146,6 +163,7 @@ class XPUTestBitwiseOr(XPUOpTestWrapper): self.high = 100 class XPUTestBitwiseOrCase2(XPUTestBitwiseOrBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [2, 3, 4, 5] @@ -154,6 +172,7 @@ class XPUTestBitwiseOr(XPUOpTestWrapper): self.high = 100 class XPUTestBitwiseOrCase3(XPUTestBitwiseOrBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [2, 3, 4, 5] @@ -169,10 +188,12 @@ for stype in support_types: ################## TEST OP: BitwiseXor ################## class XPUTestBitwiseXor(XPUOpTestWrapper): + def __init__(self): self.op_name = 'bitwise_xor' class XPUTestBitwiseXorBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -181,10 +202,14 @@ class XPUTestBitwiseXor(XPUOpTestWrapper): def set_case(self): self.op_type = 'bitwise_xor' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.bitwise_xor(x, y) self.attrs = {'use_xpu': True} @@ -208,6 +233,7 @@ class XPUTestBitwiseXor(XPUOpTestWrapper): pass class XPUTestBitwiseXorCase1(XPUTestBitwiseXorBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [4, 5] @@ -216,6 +242,7 @@ class XPUTestBitwiseXor(XPUOpTestWrapper): self.high = 100 class XPUTestBitwiseXorCase2(XPUTestBitwiseXorBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [2, 3, 4, 5] @@ -224,6 +251,7 @@ class XPUTestBitwiseXor(XPUOpTestWrapper): self.high = 100 class XPUTestBitwiseXorCase3(XPUTestBitwiseXorBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [2, 3, 4, 5] @@ -239,10 +267,12 @@ for stype in support_types: ################## TEST OP: BitwiseNot ################## class XPUTestBitwiseNot(XPUOpTestWrapper): + def __init__(self): self.op_name = 'bitwise_not' class XPUTestBitwiseNotBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -251,8 +281,10 @@ class XPUTestBitwiseNot(XPUOpTestWrapper): def set_case(self): self.op_type = 'bitwise_not' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) out = np.bitwise_not(x) self.attrs = {'use_xpu': True} @@ -272,6 +304,7 @@ class XPUTestBitwiseNot(XPUOpTestWrapper): pass class XPUTestBitwiseNotBool(XPUTestBitwiseNotBase): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py index f6893150c9e..164908495b1 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py @@ -13,6 +13,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle @@ -39,6 +40,7 @@ class XPUTestBmmOp(XPUOpTestWrapper): self.use_dynamic_create_class = False class TestBmmOp(XPUOpTest): + def setUp(self): self.init_dtype() self.set_xpu() @@ -71,26 +73,31 @@ class XPUTestBmmOp(XPUOpTestWrapper): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') class TestBmmOp1(TestBmmOp): + def set_shape(self): self.Xshape = (3, 3, 3) self.Yshape = (3, 3, 3) class TestBmmOp2(TestBmmOp): + def set_shape(self): self.Xshape = (128, 3, 16) self.Yshape = (128, 16, 3) class TestBmmOp3(TestBmmOp): + def set_shape(self): self.Xshape = (2048, 16, 27) self.Yshape = (2048, 27, 16) class TestBmmOp4(TestBmmOp): + def set_shape(self): self.Xshape = (2, 27, 27) self.Yshape = (2, 27, 27) class TestBmmOp5(TestBmmOp): + def set_shape(self): self.Xshape = (2, 1, 1) self.Yshape = (2, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py index 201e758c0ac..cd7062f66d9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py @@ -37,6 +37,7 @@ typeid_dict = { class XPUTestCastOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'cast' self.use_dynamic_create_class = True @@ -51,6 +52,7 @@ class XPUTestCastOp(XPUOpTestWrapper): return base_class, classes class TestCastOp(XPUOpTest): + def setUp(self): ipt = np.random.random(size=[10, 10]) in_typename = self.in_type_str @@ -76,11 +78,12 @@ for stype in support_types: class TestCastOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of cast_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.XPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.XPUPlace(0)) self.assertRaises(TypeError, fluid.layers.cast, x1, 'int32') diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py index 8698df9e7ee..074acf2112f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -26,6 +27,7 @@ from paddle.fluid import Program, program_guard class TestXPUClipByNormOp(XPUOpTest): + def setUp(self): self.op_type = "clip_by_norm" self.dtype = np.float32 @@ -34,7 +36,9 @@ class TestXPUClipByNormOp(XPUOpTest): self.initTestCase() input = np.random.random(self.shape).astype("float32") input[np.abs(input) < self.max_relative_error] = 0.5 - self.inputs = {'X': input, } + self.inputs = { + 'X': input, + } self.attrs = {} self.attrs['max_norm'] = self.max_norm norm = np.sqrt(np.sum(np.square(input))) @@ -56,18 +60,21 @@ class TestXPUClipByNormOp(XPUOpTest): class TestCase1(TestXPUClipByNormOp): + def initTestCase(self): self.shape = (100, ) self.max_norm = 1e20 class TestCase2(TestXPUClipByNormOp): + def initTestCase(self): self.shape = (16, 16) self.max_norm = 0.1 class TestCase3(TestXPUClipByNormOp): + def initTestCase(self): self.shape = (4, 8, 16) self.max_norm = 1.0 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py index 7f8f5d6bc74..33198a28933 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -30,11 +31,13 @@ from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, class XPUTestClipOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'clip' self.use_dynamic_create_class = False class TestClipOp(XPUOpTest): + def setUp(self): self.init_dtype() self.set_xpu() @@ -91,24 +94,28 @@ class XPUTestClipOp(XPUOpTestWrapper): paddle.disable_static() class TestClipOp1(TestClipOp): + def init_data(self): self.shape = (8, 16, 8) self.max = 0.7 self.min = 0.0 class TestClipOp2(TestClipOp): + def init_data(self): self.shape = (8, 16) self.max = 1.0 self.min = 0.0 class TestClipOp3(TestClipOp): + def init_data(self): self.shape = (4, 8, 16) self.max = 0.7 self.min = 0.2 class TestClipOp4(TestClipOp): + def init_data(self): self.shape = (4, 8, 8) self.max = 0.7 @@ -117,6 +124,7 @@ class XPUTestClipOp(XPUOpTestWrapper): self.inputs['Min'] = np.array([0.3]).astype('float32') class TestClipOp5(TestClipOp): + def init_data(self): self.shape = (4, 8, 16) self.max = 0.5 @@ -124,6 +132,7 @@ class XPUTestClipOp(XPUOpTestWrapper): class TestClipOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): @@ -143,6 +152,7 @@ class TestClipOpError(unittest.TestCase): class TestClipAPI(unittest.TestCase): + def _executed_api(self, x, min=None, max=None): return paddle.clip(x, min, max) @@ -154,8 +164,8 @@ class TestClipAPI(unittest.TestCase): min = fluid.data(name='min', shape=[1], dtype='float32') max = fluid.data(name='max', shape=[1], dtype='float32') - place = fluid.XPUPlace(0) if fluid.core.is_compiled_with_xpu( - ) else fluid.CPUPlace() + place = fluid.XPUPlace( + 0) if fluid.core.is_compiled_with_xpu() else fluid.CPUPlace() exe = fluid.Executor(place) out_1 = self._executed_api(images, min=min, max=max) @@ -174,9 +184,7 @@ class TestClipAPI(unittest.TestCase): "min": np.array([0.2]).astype('float32'), "max": np.array([0.8]).astype('float32') }, - fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 - ]) + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8]) self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) self.assertTrue(np.allclose(res2, data.clip(0.2, 0.9))) @@ -190,8 +198,8 @@ class TestClipAPI(unittest.TestCase): def test_clip_dygraph(self): paddle.disable_static() - place = fluid.XPUPlace(0) if fluid.core.is_compiled_with_xpu( - ) else fluid.CPUPlace() + place = fluid.XPUPlace( + 0) if fluid.core.is_compiled_with_xpu() else fluid.CPUPlace() paddle.disable_static(place) data_shape = [1, 9, 9, 4] data = np.random.random(data_shape).astype('float32') @@ -219,6 +227,7 @@ class TestClipAPI(unittest.TestCase): class TestInplaceClipAPI(TestClipAPI): + def _executed_api(self, x, min=None, max=None): return x.clip_(min, max) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py index 32b27652f76..a4175ec25cf 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -26,7 +27,9 @@ from paddle.fluid import Program, program_guard def create_test_class(op_type, typename, callback): + class Cls(OpTest): + def setUp(self): a = np.random.random(size=(10, 7)).astype(typename) b = np.random.random(size=(10, 7)).astype(typename) @@ -49,12 +52,11 @@ def create_test_class(op_type, typename, callback): y = fluid.layers.data(name='y', shape=[2], dtype='int32') a = fluid.layers.data(name='a', shape=[2], dtype='int16') if self.op_type == "less_than": - self.assertRaises( - TypeError, - fluid.layers.less_than, - x=x, - y=y, - force_cpu=1) + self.assertRaises(TypeError, + fluid.layers.less_than, + x=x, + y=y, + force_cpu=1) op = eval("fluid.layers.%s" % self.op_type) self.assertRaises(TypeError, op, x=x, y=y, cond=1) self.assertRaises(TypeError, op, x=x, y=a) @@ -78,14 +80,16 @@ for _type_name in {'int32'}: def create_paddle_case(op_type, callback): + class PaddleCls(unittest.TestCase): + def setUp(self): self.op_type = op_type self.input_x = np.array([1, 2, 3, 4]).astype(np.int64) self.input_y = np.array([1, 3, 2, 4]).astype(np.int64) self.real_result = callback(self.input_x, self.input_y) - self.place = fluid.XPUPlace(0) if fluid.core.is_compiled_with_xpu( - ) else fluid.CPUPlace() + self.place = fluid.XPUPlace( + 0) if fluid.core.is_compiled_with_xpu() else fluid.CPUPlace() def test_api(self): paddle.enable_static() @@ -95,8 +99,10 @@ def create_paddle_case(op_type, callback): op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = fluid.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": self.input_y}, + res, = exe.run(feed={ + "x": self.input_x, + "y": self.input_y + }, fetch_list=[out]) self.assertEqual((res == self.real_result).all(), True) @@ -109,8 +115,10 @@ def create_paddle_case(op_type, callback): op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = fluid.Executor(self.place) - res, = exe.run(feed={"x": self.input_x, - "y": 1.0}, + res, = exe.run(feed={ + "x": self.input_x, + "y": 1.0 + }, fetch_list=[out]) self.real_result = np.array([1, 0, 0, 0]).astype(np.int64) self.assertEqual((res == self.real_result).all(), True) @@ -145,6 +153,7 @@ def create_paddle_case(op_type, callback): paddle.enable_static() def test_assert(self): + def test_dynamic_api_string(self): if self.op_type == "equal": paddle.disable_static() @@ -168,8 +177,9 @@ def create_paddle_case(op_type, callback): def test_broadcast_api_1(self): paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.static.data( - name='x', shape=[1, 2, 1, 3], dtype='int32') + x = paddle.static.data(name='x', + shape=[1, 2, 1, 3], + dtype='int32') y = paddle.static.data(name='y', shape=[1, 2, 3], dtype='int32') op = eval("paddle.%s" % (self.op_type)) out = op(x, y) @@ -177,8 +187,10 @@ def create_paddle_case(op_type, callback): input_x = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(np.int32) input_y = np.arange(0, 6).reshape((1, 2, 3)).astype(np.int32) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -186,16 +198,19 @@ def create_paddle_case(op_type, callback): paddle.enable_static() with program_guard(Program(), Program()): x = paddle.static.data(name='x', shape=[1, 2, 3], dtype='int32') - y = paddle.static.data( - name='y', shape=[1, 2, 1, 3], dtype='int32') + y = paddle.static.data(name='y', + shape=[1, 2, 1, 3], + dtype='int32') op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) input_x = np.arange(0, 6).reshape((1, 2, 3)).astype(np.int32) input_y = np.arange(1, 7).reshape((1, 2, 1, 3)).astype(np.int32) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -210,8 +225,10 @@ def create_paddle_case(op_type, callback): input_x = np.arange(0, 5).reshape((5)).astype(np.int32) input_y = np.array([5, 3, 2]).reshape((3, 1)).astype(np.int32) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -226,8 +243,10 @@ def create_paddle_case(op_type, callback): input_x = np.array([True, False, True]).astype(np.bool) input_y = np.array([True, True, False]).astype(np.bool) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) @@ -242,8 +261,10 @@ def create_paddle_case(op_type, callback): input_x = np.array([True, False, True]).astype(np.bool) input_y = np.array([True]).astype(np.bool) real_result = callback(input_x, input_y) - res, = exe.run(feed={"x": input_x, - "y": input_y}, + res, = exe.run(feed={ + "x": input_x, + "y": input_y + }, fetch_list=[out]) self.assertEqual((res == real_result).all(), True) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py index 3f188e78f86..2355f5de9fd 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py @@ -31,11 +31,13 @@ paddle.enable_static() class XPUTestConcatOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'concat' self.use_dynamic_create_class = False class TestConcatOp(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = "concat" @@ -54,8 +56,9 @@ class XPUTestConcatOp(XPUOpTestWrapper): self.actual_axis = self.axis self.outputs = { - 'Out': np.concatenate( - (self.x0, self.x1, self.x2), axis=self.actual_axis) + 'Out': + np.concatenate((self.x0, self.x1, self.x2), + axis=self.actual_axis) } def set_inputs(self): @@ -84,10 +87,12 @@ class XPUTestConcatOp(XPUOpTestWrapper): self.check_grad_with_place(place, ['x2'], 'Out') class TestConcatOpAxis0XPU(TestConcatOp): + def init_axis(self): self.axis = 0 class TestConcatOpAxis1XPU(TestConcatOp): + def set_inputs(self): self.x0 = np.random.random((5, 1, 4, 5)).astype(self.dtype) self.x1 = np.random.random((5, 2, 4, 5)).astype(self.dtype) @@ -97,28 +102,34 @@ class XPUTestConcatOp(XPUOpTestWrapper): self.axis = 1 class TestConcatOpAxis2XPU(TestConcatOp): + def init_axis(self): self.axis = 2 class TestConcatOpAxis3XPU(TestConcatOp): + def init_axis(self): self.axis = 3 class TestConcatOpAxisNeg1XPU(TestConcatOp): + def init_axis(self): self.axis = -1 class TestConcatOpAxisNeg2XPU(TestConcatOp): + def init_axis(self): self.axis = -2 class TestConcatOpAxisNeg3XPU(TestConcatOp): + def init_axis(self): self.axis = -3 @skip_check_grad_ci( reason="The function 'check_grad' for large inputs is too slow.") class TestConcatOp3(TestConcatOp): + def set_inputs(self): self.x0 = np.random.random((1, 256, 170, 256)).astype(self.dtype) self.x1 = np.random.random((1, 128, 170, 256)).astype(self.dtype) @@ -129,9 +140,11 @@ class XPUTestConcatOp(XPUOpTestWrapper): pass @skip_check_grad_ci( - reason="This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." + reason= + "This test will meet fetch error when there is a null grad. The detailed information is in PR#17015." ) class TestConcatOp4(TestConcatOp): + def set_inputs(self): self.x0 = np.random.random((2, 3, 4, 5)).astype(self.dtype) self.x1 = np.random.random((2, 3, 4, 5)).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py index 5f954659c2d..751c4cdf302 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -63,8 +64,8 @@ def conv2d_forward_naive(input, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -93,14 +94,14 @@ def conv2d_forward_naive(input, d_bolck_h = (dilation[0] * (f_h - 1) + 1) d_bolck_w = (dilation[1] * (f_w - 1) + 1) - input_pad = np.pad(input, ((0, 0), (0, 0), (pad_h_0, pad_h_1), - (pad_w_0, pad_w_1)), + input_pad = np.pad(input, + ((0, 0), (0, 0), (pad_h_0, pad_h_1), (pad_w_0, pad_w_1)), mode='constant', constant_values=0) filter_dilation = np.zeros((f_n, f_c, d_bolck_h, d_bolck_w)) - filter_dilation[:, :, 0:d_bolck_h:dilation[0], 0:d_bolck_w:dilation[ - 1]] = filter + filter_dilation[:, :, 0:d_bolck_h:dilation[0], + 0:d_bolck_w:dilation[1]] = filter for i in range(out_h): for j in range(out_w): @@ -125,7 +126,9 @@ def conv2d_forward_naive(input, def create_test_channel_last_class(parent): + class TestChannelLastCase(parent): + def init_data_format(self): self.data_format = "NHWC" @@ -139,7 +142,9 @@ def create_test_channel_last_class(parent): def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): self.pad = [0, 0] self.padding_algorithm = "SAME" @@ -150,7 +155,9 @@ def create_test_padding_SAME_class(parent): def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): self.pad = [1, 1] self.padding_algorithm = "VALID" @@ -161,11 +168,13 @@ def create_test_padding_VALID_class(parent): class XPUTestConv2DOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'conv2d' self.use_dynamic_create_class = False class TestConv2DOp(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.place = paddle.XPUPlace(0) @@ -225,8 +234,8 @@ class XPUTestConv2DOp(XPUOpTestWrapper): self.outputs = {'Output': output} def has_cuda(self): - return core.is_compiled_with_cuda() and (self.use_cudnn or - self.use_cuda) + return core.is_compiled_with_cuda() and (self.use_cudnn + or self.use_cuda) def test_check_output(self): if core.is_compiled_with_xpu(): @@ -234,8 +243,8 @@ class XPUTestConv2DOp(XPUOpTestWrapper): self.check_output_with_place(self.place) def test_check_grad(self): - if (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return if core.is_compiled_with_xpu(): paddle.enable_static() @@ -243,26 +252,24 @@ class XPUTestConv2DOp(XPUOpTestWrapper): 'Output') def test_check_grad_no_filter(self): - if (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return if core.is_compiled_with_xpu(): paddle.enable_static() - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - no_grad_set=set(['Filter'])) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): - if (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return if core.is_compiled_with_xpu(): paddle.enable_static() - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - no_grad_set=set(['Input'])) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + no_grad_set=set(['Input'])) def init_test_case(self): self.pad = [0, 0] @@ -285,6 +292,7 @@ class XPUTestConv2DOp(XPUOpTestWrapper): pass class TestWithPad(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -294,6 +302,7 @@ class XPUTestConv2DOp(XPUOpTestWrapper): self.filter_size = [6, f_c, 3, 3] class TestWithStride(TestConv2DOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -303,6 +312,7 @@ class XPUTestConv2DOp(XPUOpTestWrapper): self.filter_size = [6, f_c, 3, 3] class TestWith1x1(TestConv2DOp): + def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] @@ -317,11 +327,13 @@ class XPUTestConv2DOp(XPUOpTestWrapper): # ---- test asymmetric padding ---- class XPUTestConv2DOp_v2(XPUOpTestWrapper): + def __init__(self): self.op_name = 'conv2d' self.use_dynamic_create_class = False class TestConv2DOp_v2(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.place = paddle.XPUPlace(0) @@ -359,9 +371,10 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): np.random.seed(8) filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) - output, _, _, _, _ = conv2d_forward_naive( - input2, filter, self.groups, conv2d_param, - self.padding_algorithm, self.data_format) + output, _, _, _, _ = conv2d_forward_naive(input2, filter, + self.groups, conv2d_param, + self.padding_algorithm, + self.data_format) output = output.astype(self.dtype) self.inputs = { @@ -384,8 +397,8 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): self.outputs = {'Output': output} def has_cuda(self): - return core.is_compiled_with_cuda() and (self.use_cudnn or - self.use_cuda) + return core.is_compiled_with_cuda() and (self.use_cudnn + or self.use_cuda) def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode @@ -395,8 +408,8 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): def test_check_grad(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - if (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return if core.is_compiled_with_xpu(): paddle.enable_static() @@ -405,27 +418,25 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): def test_check_grad_no_filter(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - if (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return if core.is_compiled_with_xpu(): paddle.enable_static() - self.check_grad_with_place( - self.place, ['Input'], - 'Output', - no_grad_set=set(['Filter'])) + self.check_grad_with_place(self.place, ['Input'], + 'Output', + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode - if (hasattr(self, "no_need_check_grad") and - self.no_need_check_grad == True): + if (hasattr(self, "no_need_check_grad") + and self.no_need_check_grad == True): return if core.is_compiled_with_xpu(): paddle.enable_static() - self.check_grad_with_place( - self.place, ['Filter'], - 'Output', - no_grad_set=set(['Input'])) + self.check_grad_with_place(self.place, ['Filter'], + 'Output', + no_grad_set=set(['Input'])) def init_test_case(self): self.pad = [0, 0] @@ -455,11 +466,13 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): pass class TestConv2DOp_AsyPadding(TestConv2DOp_v2): + def init_paddings(self): self.pad = [0, 0, 0, 0] self.padding_algorithm = "EXPLICIT" class TestWithPad_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW @@ -472,6 +485,7 @@ class XPUTestConv2DOp_v2(XPUOpTestWrapper): self.padding_algorithm = "EXPLICIT" class TestWithStride_AsyPadding(TestConv2DOp_v2): + def init_test_case(self): self.stride = [2, 2] self.input_size = [2, 3, 6, 6] # NCHW diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py index b4f9f639ac7..4204a73524d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -48,11 +49,12 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): # update pad and dilation def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride): padding = [] - for input_size, filter_size, stride_size in zip( - input_shape, kernel_size, kernel_stride): + for input_size, filter_size, stride_size in zip(input_shape, + kernel_size, + kernel_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -86,8 +88,8 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): if 'output_padding' in attrs: out_pad_h = attrs['output_padding'][0] out_pad_w = attrs['output_padding'][1] - out = np.zeros( - (in_n, out_c, out_h + out_pad_h, out_w + out_pad_w), dtype=input_.dtype) + out = np.zeros((in_n, out_c, out_h + out_pad_h, out_w + out_pad_w), + dtype=input_.dtype) for n in range(in_n): for i in range(in_h): @@ -105,17 +107,18 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): axis=0) i1, i2 = i * stride[0], i * stride[0] + d_bolck_h j1, j2 = j * stride[1], j * stride[1] + d_bolck_w - out[n, g * f_out_c + k, i1:i2:dilations[0], j1:j2: - dilations[1]] += tmp_out + out[n, g * f_out_c + k, i1:i2:dilations[0], + j1:j2:dilations[1]] += tmp_out - out = out[:, :, pad_h_0:out_h - pad_h_1 + out_pad_h, pad_w_0:out_w - pad_w_1 - + out_pad_w] + out = out[:, :, pad_h_0:out_h - pad_h_1 + out_pad_h, + pad_w_0:out_w - pad_w_1 + out_pad_w] if attrs['data_format'] == 'NHWC': out = np.transpose(out, [0, 2, 3, 1]) return out class TestConv2DTransposeOp(XPUOpTest): + def setUp(self): # init as conv transpose self.dtype = np.float32 @@ -169,24 +172,26 @@ class TestConv2DTransposeOp(XPUOpTest): if core.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Filter'], 'Output', no_grad_set=set(['Input'])) + self.check_grad_with_place(place, ['Filter'], + 'Output', + no_grad_set=set(['Input'])) def test_check_grad_no_filter(self): if self.need_check_grad: if core.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Input'], 'Output', no_grad_set=set(['Filter'])) + self.check_grad_with_place(place, ['Input'], + 'Output', + no_grad_set=set(['Filter'])) def test_check_grad(self): if self.need_check_grad: if core.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place(place, - set(['Input', 'Filter']), 'Output') + self.check_grad_with_place(place, set(['Input', 'Filter']), + 'Output') def init_test_case(self): self.pad = [0, 0] @@ -202,6 +207,7 @@ class TestConv2DTransposeOp(XPUOpTest): class TestWithSymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -213,6 +219,7 @@ class TestWithSymmetricPad(TestConv2DTransposeOp): class TestWithAsymmetricPad(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 0, 1, 2] self.stride = [1, 1] @@ -224,6 +231,7 @@ class TestWithAsymmetricPad(TestConv2DTransposeOp): class TestWithSAMEPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [2, 1] self.dilations = [1, 2] @@ -235,6 +243,7 @@ class TestWithSAMEPad(TestConv2DTransposeOp): class TestWithVALIDPad(TestConv2DTransposeOp): + def init_test_case(self): self.stride = [1, 1] self.dilations = [1, 1] @@ -246,6 +255,7 @@ class TestWithVALIDPad(TestConv2DTransposeOp): class TestWithGroups(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -257,6 +267,7 @@ class TestWithGroups(TestConv2DTransposeOp): class TestWithStride(TestConv2DTransposeOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py index 5c611b62998..9f774889835 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -96,8 +97,8 @@ def dconv_im2col_gemm(input, offset, mask, filter, group, conv_param): val = dmc_bilinear(input[n, c], in_h, in_w, im_h, im_w) val_out = val * mask_table[kh, kw] - col_buffer[n, c * f_h * f_w + kh * f_w + kw, h * - in_w + w] = val_out + col_buffer[n, c * f_h * f_w + kh * f_w + kw, + h * in_w + w] = val_out out = np.zeros((in_n, group, int(out_c // group), out_h * out_w)) weight = filter.reshape(group, int(out_c // group), f_c * f_h * f_w) @@ -111,6 +112,7 @@ def dconv_im2col_gemm(input, offset, mask, filter, group, conv_param): class TestModulatedDeformableConvOp(XPUOpTest): + def setUp(self): self.op_type = "deformable_conv" self.dtype = np.float32 @@ -149,8 +151,8 @@ class TestModulatedDeformableConvOp(XPUOpTest): self.outputs = {'Output': output} def has_cuda(self): - return core.is_compiled_with_cuda() and (self.use_cudnn or - self.use_cuda) + return core.is_compiled_with_cuda() and (self.use_cudnn + or self.use_cuda) def test_check_output(self): if core.is_compiled_with_xpu(): @@ -162,10 +164,10 @@ class TestModulatedDeformableConvOp(XPUOpTest): if core.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, {'Input', 'Offset', 'Mask', 'Filter'}, - 'Output', - max_relative_error=0.06) + self.check_grad_with_place(place, + {'Input', 'Offset', 'Mask', 'Filter'}, + 'Output', + max_relative_error=0.06) def init_test_case(self): self.pad = [1, 1] @@ -196,6 +198,7 @@ class TestModulatedDeformableConvOp(XPUOpTest): class TestWithDilation(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [2, 2] self.stride = [1, 1] @@ -221,6 +224,7 @@ class TestWithDilation(TestModulatedDeformableConvOp): class TestWith3x3(TestModulatedDeformableConvOp): + def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -243,29 +247,42 @@ class TestWith3x3(TestModulatedDeformableConvOp): class TestModulatedDeformableConvInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): paddle.enable_static() input = [1, 3, 32, 32] - offset = fluid.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32') - mask = fluid.data( - name='mask', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, offset, mask, num_filters=4, filter_size=1) + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=1) self.assertRaises(TypeError, test_invalid_input) def test_invalid_offset(): paddle.enable_static() - input = fluid.data( - name='input', shape=[None, 3, 32, 32], dtype='int32') - offset = fluid.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32') - mask = fluid.data( - name='mask', shape=[None, 3, 32, 32], dtype='float32') - loss = fluid.layers.deformable_conv( - input, offset, mask, num_filters=4, filter_size=1) + input = fluid.data(name='input', + shape=[None, 3, 32, 32], + dtype='int32') + offset = fluid.data(name='offset', + shape=[None, 3, 32, 32], + dtype='float32') + mask = fluid.data(name='mask', + shape=[None, 3, 32, 32], + dtype='float32') + loss = fluid.layers.deformable_conv(input, + offset, + mask, + num_filters=4, + filter_size=1) self.assertRaises(TypeError, test_invalid_offset) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py index 2baa837b23a..b4e8cf6b10e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -23,17 +24,20 @@ import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard from op_test_xpu import XPUOpTest + paddle.enable_static() from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper class XPUTestDropoutOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'dropout' self.use_dynamic_create_class = False class TestDropoutOp(XPUOpTest): + def setUp(self): self.init_inputs_shape() self.init_attrs() @@ -79,10 +83,12 @@ class XPUTestDropoutOp(XPUOpTestWrapper): self.check_grad(['X'], 'Out') class TestDropoutOpInput1d(TestDropoutOp): + def init_inputs_shape(self): self.shape = [2000] class TestDropoutOp2(TestDropoutOp): + def init_inputs_shape(self): self.shape = [32, 64] @@ -93,10 +99,12 @@ class XPUTestDropoutOp(XPUOpTestWrapper): self.dropout_implementation = "upscale_in_train" class TestDropoutOp3(TestDropoutOp): + def init_inputs_shape(self): self.shape = [32, 64, 2] class TestDropoutOp4(TestDropoutOp): + def init_attrs(self): self.__class__.no_need_check_grad = True self.dropout_prob = 0.35 @@ -105,6 +113,7 @@ class XPUTestDropoutOp(XPUOpTestWrapper): self.dropout_implementation = "downgrade_in_infer" class TestDropoutOp5(TestDropoutOp): + def init_inputs_shape(self): self.shape = [32, 64, 3] @@ -116,14 +125,15 @@ class XPUTestDropoutOp(XPUOpTestWrapper): self.dropout_implementation = "downgrade_in_infer" class TestDropoutOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): def test_Variable(): # the input of dropout must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], - fluid.CPUPlace()) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, + 5]), [[1, 1, 1, 1]], + fluid.CPUPlace()) fluid.layers.dropout(x1, dropout_prob=0.5) self.assertRaises(TypeError, test_Variable) @@ -131,13 +141,15 @@ class XPUTestDropoutOp(XPUOpTestWrapper): def test_dtype(): # the input dtype of dropout must be float16 or float32 or float64 # float16 only can be set on GPU place - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="int32") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="int32") fluid.layers.dropout(x2, dropout_prob=0.5) self.assertRaises(TypeError, test_dtype) class TestDropoutCAPI(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -155,6 +167,7 @@ class XPUTestDropoutOp(XPUOpTestWrapper): self.assertTrue(np.allclose(result.numpy(), result_np)) class TestDropoutBackward(unittest.TestCase): + def setUp(self): np.random.seed(123) self.places = [fluid.CPUPlace()] @@ -176,8 +189,9 @@ class XPUTestDropoutOp(XPUOpTestWrapper): out.backward() self.assertTrue( - np.array_equal(input.gradient( - ), self.cal_grad_downscale_in_infer(mask.numpy()))) + np.array_equal( + input.gradient(), + self.cal_grad_downscale_in_infer(mask.numpy()))) def test_backward_upscale_train(self): for place in self.places: @@ -192,8 +206,9 @@ class XPUTestDropoutOp(XPUOpTestWrapper): out.backward() self.assertTrue( - np.allclose(input.gradient( - ), self.cal_grad_upscale_train(mask.numpy(), prob))) + np.allclose( + input.gradient(), + self.cal_grad_upscale_train(mask.numpy(), prob))) def test_backward_upscale_train_2(self): for place in self.places: @@ -208,8 +223,9 @@ class XPUTestDropoutOp(XPUOpTestWrapper): out.backward() self.assertTrue( - np.allclose(input.gradient( - ), self.cal_grad_upscale_train(mask.numpy(), prob))) + np.allclose( + input.gradient(), + self.cal_grad_upscale_train(mask.numpy(), prob))) support_types = get_xpu_op_support_types('dropout') diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py index 2fc3a42df12..84cf048d068 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import numpy as np import sys + sys.path.append("..") import paddle from op_test import OpTest, skip_check_grad_ci @@ -28,11 +29,13 @@ paddle.enable_static() class XPUTestElementwiseAddOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_add' self.use_dynamic_create_class = False class TestElementwiseAddOp(XPUOpTest): + def setUp(self): self.op_type = "elementwise_add" self.init_dtype() @@ -94,6 +97,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_scalar(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -102,18 +106,21 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) self.out = self.x + self.y class TestElementwiseAddOp_Vector(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) self.out = np.add(self.x, self.y) class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -123,6 +130,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = 0 class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -132,12 +140,14 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = 1 class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(1, 1, 100) class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -147,6 +157,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = 1 class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -156,24 +167,28 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = 0 class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) self.out = self.x + self.y class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) self.out = self.x + self.y class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype) self.out = self.x + self.y class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -185,6 +200,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -194,6 +210,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = 1 class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -203,6 +220,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = -1 class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -212,6 +230,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = -1 class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype) @@ -221,6 +240,7 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = -1 class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 3, 10, 12).astype(self.dtype) @@ -230,26 +250,30 @@ class XPUTestElementwiseAddOp(XPUOpTestWrapper): self.axis = 2 class TestElementwiseAddOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.XPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.XPUPlace(0)) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 # float16 only can be set on GPU place - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="uint8") - y2 = fluid.layers.data( - name='y2', shape=[3, 4, 5, 6], dtype="uint8") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="uint8") + y2 = fluid.layers.data(name='y2', + shape=[3, 4, 5, 6], + dtype="uint8") self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2) class TestAddOp(unittest.TestCase): + def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2, 3], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py index 7cc97ccc82f..9ac16ab745a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py @@ -15,6 +15,7 @@ from __future__ import print_function import numpy as np import sys + sys.path.append("..") import paddle from op_test import OpTest, skip_check_grad_ci @@ -22,12 +23,14 @@ from op_test_xpu import XPUOpTest import unittest import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard + paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp(XPUOpTest): + def setUp(self): self.op_type = "elementwise_add" self.init_dtype() @@ -92,6 +95,7 @@ class TestElementwiseAddOp(XPUOpTest): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_scalar(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -103,6 +107,7 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 4).astype(self.dtype) self.y = np.random.rand(1, 1).astype(self.dtype) @@ -112,6 +117,7 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_Vector(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype) @@ -121,6 +127,7 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -133,6 +140,7 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -145,6 +153,7 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) @@ -154,6 +163,7 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -166,6 +176,7 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) @@ -178,6 +189,7 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.y = np.random.rand(10, 1, 12).astype(self.dtype) @@ -187,6 +199,7 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) @@ -196,6 +209,7 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype) @@ -205,6 +219,7 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -219,6 +234,7 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 1).astype(self.dtype) self.y = np.random.rand(1).astype(self.dtype) @@ -231,6 +247,7 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100, 1, 1).astype(self.dtype) @@ -243,6 +260,7 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(1, 1, 100).astype(self.dtype) @@ -255,6 +273,7 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype) @@ -267,6 +286,7 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): + def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 3, 10, 12).astype(self.dtype) @@ -279,13 +299,14 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestElementwiseAddOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_add must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.XPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.XPUPlace(0)) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 @@ -298,6 +319,7 @@ class TestElementwiseAddOpError(unittest.TestCase): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestAddOp(unittest.TestCase): + def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2, 3], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py index 3b593818b4e..c784ac83a54 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -26,11 +27,13 @@ paddle.enable_static() class XPUTestElementwiseDivOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_div' self.use_dynamic_create_class = False class ElementwiseDivOp(XPUOpTest): + def setUp(self): self.op_type = "elementwise_div" self.dtype = self.in_type @@ -60,26 +63,25 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): def test_check_grad_normal(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.05) + self.check_grad_with_place(place, ['X', 'Y'], + 'Out', + max_relative_error=0.05) def test_check_grad_ingore_x(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.05, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.05, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.05, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.05, + no_grad_set=set('Y')) def init_dtype(self): pass @@ -87,6 +89,7 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseDivOp_scalar(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [20, 3, 4]).astype(self.dtype), @@ -95,6 +98,7 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']} class TestElementwiseDivOp_Vector(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [100]).astype(self.dtype), @@ -105,6 +109,7 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [100, 3, 4]).astype(self.dtype), @@ -118,6 +123,7 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 100, 4]).astype(self.dtype), @@ -131,6 +137,7 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype(self.dtype), @@ -143,20 +150,23 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp): + def init_input_output(self): self.inputs = { - 'X': - np.random.uniform(0.1, 1, [2, 10, 12, 5]).astype(self.dtype), + 'X': np.random.uniform(0.1, 1, + [2, 10, 12, 5]).astype(self.dtype), 'Y': np.random.uniform(0.1, 1, [10, 12]).astype(self.dtype) } self.attrs = {'axis': 1} self.outputs = { - 'Out': np.divide(self.inputs['X'], - self.inputs['Y'].reshape(1, 10, 12, 1)) + 'Out': + np.divide(self.inputs['X'], + self.inputs['Y'].reshape(1, 10, 12, 1)) } class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 3, 50]).astype(self.dtype), @@ -167,10 +177,11 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp): + def init_input_output(self): self.inputs = { - 'X': - np.random.uniform(0.1, 1, [2, 3, 4, 20]).astype(self.dtype), + 'X': np.random.uniform(0.1, 1, + [2, 3, 4, 20]).astype(self.dtype), 'Y': np.random.uniform(0.1, 1, [2, 3, 1, 20]).astype(self.dtype) } self.outputs = { @@ -178,6 +189,7 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype(self.dtype), @@ -188,23 +200,25 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp): + def init_input_output(self): self.inputs = { - 'X': - np.random.uniform(0.1, 1, [30, 3, 1, 5]).astype(self.dtype), - 'Y': - np.random.uniform(0.1, 1, [30, 1, 4, 1]).astype(self.dtype), + 'X': np.random.uniform(0.1, 1, + [30, 3, 1, 5]).astype(self.dtype), + 'Y': np.random.uniform(0.1, 1, + [30, 1, 4, 1]).astype(self.dtype), } self.outputs = { 'Out': np.divide(self.inputs['X'], self.inputs['Y']) } class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp): + def init_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [10, 12]).astype(self.dtype), - 'Y': - np.random.uniform(0.1, 1, [2, 3, 10, 12]).astype(self.dtype), + 'Y': np.random.uniform(0.1, 1, + [2, 3, 10, 12]).astype(self.dtype), } self.attrs = {'axis': 2} @@ -214,10 +228,12 @@ class XPUTestElementwiseDivOp(XPUOpTestWrapper): } class TestElementwiseDivBroadcast(unittest.TestCase): + def test_shape_with_batch_sizes(self): with fluid.program_guard(fluid.Program()): - x_var = fluid.data( - name='x', dtype='float32', shape=[None, 3, None, None]) + x_var = fluid.data(name='x', + dtype='float32', + shape=[None, 3, None, None]) one = 2. out = one / x_var exe = fluid.Executor(fluid.XPUPlace(0)) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py index ea01a38f4b3..93d30fef11b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -21,16 +22,19 @@ import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper + paddle.enable_static() import random class XPUTestElementwiseModOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_floordiv' self.use_dynamic_create_class = False class TestElementwiseModOp(XPUOpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -63,6 +67,7 @@ class XPUTestElementwiseModOp(XPUOpTestWrapper): pass class TestElementwiseModOp_scalar(TestElementwiseModOp): + def init_input_output(self): scale_x = random.randint(0, 100000) scale_y = random.randint(1, 100000) @@ -71,6 +76,7 @@ class XPUTestElementwiseModOp(XPUOpTestWrapper): self.out = np.floor_divide(self.x, self.y) class TestElementwiseModOpInverse(TestElementwiseModOp): + def init_input_output(self): self.x = np.random.uniform(0, 10000, [10]).astype(self.dtype) self.y = np.random.uniform(1, 1000, [10, 10]).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py index 3d9566dc71d..3d60dcdd16d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -19,15 +20,18 @@ from op_test import OpTest, skip_check_grad_ci from op_test_xpu import XPUOpTest import paddle from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper + paddle.enable_static() class XPUTestElementwiseMaxOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_max' self.use_dynamic_create_class = False class TestElementwiseOp(XPUOpTest): + def setUp(self): self.use_xpu = True self.op_type = "elementwise_max" @@ -59,24 +63,23 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): def test_check_grad_ingore_x(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.006, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.006, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.006, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.006, + no_grad_set=set('Y')) @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMaxOp_scalar(TestElementwiseOp): + def init_input_output(self): x = np.random.random_integers(-5, 5, [2, 3, 20]).astype(self.dtype) y = np.array([0.5]).astype(self.dtype) @@ -86,6 +89,7 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): } class TestElementwiseMaxOp_Vector(TestElementwiseOp): + def init_input_output(self): x = np.random.random((100, )).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -96,6 +100,7 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): } class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (100, 5, 2)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -105,11 +110,13 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): self.attrs = {'axis': 0} self.outputs = { - 'Out': np.maximum(self.inputs['X'], - self.inputs['Y'].reshape(100, 1, 1)) + 'Out': + np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -119,11 +126,13 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): self.attrs = {'axis': 1} self.outputs = { - 'Out': np.maximum(self.inputs['X'], - self.inputs['Y'].reshape(1, 100, 1)) + 'Out': + np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(1, 100, 1)) } class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (1, 3, 100)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -132,11 +141,13 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': np.maximum(self.inputs['X'], - self.inputs['Y'].reshape(1, 1, 100)) + 'Out': + np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(1, 1, 100)) } class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 50, 2, 1)).astype(self.dtype) sgn = np.random.choice([-1, 1], (50, 2)).astype(self.dtype) @@ -146,11 +157,13 @@ class XPUTestElementwiseMaxOp(XPUOpTestWrapper): self.attrs = {'axis': 1} self.outputs = { - 'Out': np.maximum(self.inputs['X'], - self.inputs['Y'].reshape(1, 50, 2, 1)) + 'Out': + np.maximum(self.inputs['X'], + self.inputs['Y'].reshape(1, 50, 2, 1)) } class TestElementwiseMaxOp_broadcast_4(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(self.dtype) sgn = np.random.choice([-1, 1], (2, 3, 1, 5)).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py index 9233097b3ad..422fe087cbc 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -26,11 +27,13 @@ paddle.enable_static() class XPUTestElementwiseMinOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_min' self.use_dynamic_create_class = False class TestElementwiseOp(XPUOpTest): + def setUp(self): self.op_type = "elementwise_min" # If x and y have the same value, the min() is not differentiable. @@ -61,24 +64,23 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): def test_check_grad_ingore_x(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMinOp_scalar(TestElementwiseOp): + def init_input_output(self): x = np.random.random_integers(-5, 5, [10, 3, 4]).astype(self.dtype) y = np.array([0.5]).astype(self.dtype) @@ -88,6 +90,7 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): } class TestElementwiseMinOp_Vector(TestElementwiseOp): + def init_input_output(self): x = np.random.random((100, )).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -98,6 +101,7 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): } class TestElementwiseMinOp_broadcast_0(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (100, 3, 2)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -106,11 +110,13 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): self.attrs = {'axis': 0} self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': np.minimum(self.inputs['X'], - self.inputs['Y'].reshape(100, 1, 1)) + 'Out': + np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(100, 1, 1)) } class TestElementwiseMinOp_broadcast_1(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -119,11 +125,13 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): self.attrs = {'axis': 1} self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': np.minimum(self.inputs['X'], - self.inputs['Y'].reshape(1, 100, 1)) + 'Out': + np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(1, 100, 1)) } class TestElementwiseMinOp_broadcast_2(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 3, 100)).astype(self.dtype) sgn = np.random.choice([-1, 1], (100, )).astype(self.dtype) @@ -131,11 +139,13 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): np.random.uniform(1, 2, (100, )).astype(self.dtype) self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': np.minimum(self.inputs['X'], - self.inputs['Y'].reshape(1, 1, 100)) + 'Out': + np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(1, 1, 100)) } class TestElementwiseMinOp_broadcast_3(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 25, 4, 1)).astype(self.dtype) sgn = np.random.choice([-1, 1], (25, 4)).astype(self.dtype) @@ -144,11 +154,13 @@ class XPUTestElementwiseMinOp(XPUOpTestWrapper): self.attrs = {'axis': 1} self.inputs = {'X': x, 'Y': y} self.outputs = { - 'Out': np.minimum(self.inputs['X'], - self.inputs['Y'].reshape(1, 25, 4, 1)) + 'Out': + np.minimum(self.inputs['X'], + self.inputs['Y'].reshape(1, 25, 4, 1)) } class TestElementwiseMinOp_broadcast_4(TestElementwiseOp): + def init_input_output(self): x = np.random.uniform(0.5, 1, (2, 10, 2, 5)).astype(self.dtype) sgn = np.random.choice([-1, 1], (2, 10, 1, 5)).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py index 9ef2c093604..de0c7000e1d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -27,11 +28,13 @@ paddle.enable_static() class XPUTestElementwiseModOp(XPUOpTestWrapper): + def __init__(self) -> None: self.op_name = 'elementwise_mod' self.use_dynamic_create_class = False class ElementwiseModOp(XPUOpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -68,6 +71,7 @@ class XPUTestElementwiseModOp(XPUOpTestWrapper): self.check_output_with_place(place) class TestElementwiseModOp_broadcast_1(ElementwiseModOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 100, 3).astype(self.dtype), @@ -78,6 +82,7 @@ class XPUTestElementwiseModOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] % self.inputs['Y']} class TestElementwiseModOp_broadcast_2(ElementwiseModOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(22, 128, 3).astype(self.dtype), @@ -88,6 +93,7 @@ class XPUTestElementwiseModOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] % self.inputs['Y']} class TestRemainderOp(unittest.TestCase): + def test_dygraph(self): with fluid.dygraph.guard(): np_x = np.random.rand(22, 128, 3).astype('int64') diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py index b4dbb7cf045..7d5feab778f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -26,11 +27,13 @@ paddle.enable_static() class XPUTestElementwiseMulOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_mul' self.use_dynamic_create_class = False class ElementwiseMulOp(XPUOpTest): + def init_kernel_type(self): self.use_mkldnn = False @@ -95,6 +98,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseMulOp_scalar(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 3, 4).astype(self.dtype), @@ -103,6 +107,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} class TestElementwiseMulOp_Vector(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.random((100, )).astype(self.dtype), @@ -113,6 +118,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): } class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(100, 2, 3).astype(self.dtype), @@ -124,6 +130,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.attrs = {'axis': 0} class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 100, 3).astype(self.dtype), @@ -136,6 +143,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): } class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 3, 100).astype(self.dtype), @@ -147,6 +155,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): } class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 10, 12, 3).astype(self.dtype), @@ -155,10 +164,12 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.attrs = {'axis': 1} self.outputs = { - 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 10, 12, 1) + 'Out': + self.inputs['X'] * self.inputs['Y'].reshape(1, 10, 12, 1) } class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 2, 11).astype(self.dtype), @@ -167,6 +178,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 4, 2, 3).astype(self.dtype), @@ -175,6 +187,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 3, 100).astype(self.dtype), @@ -183,6 +196,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(30, 3, 1, 5).astype(self.dtype), @@ -191,6 +205,7 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 10).astype(self.dtype), @@ -204,21 +219,24 @@ class XPUTestElementwiseMulOp(XPUOpTestWrapper): } class TestElementwiseMulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_mul must be Variable. - x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) - y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.XPUPlace(0)) + y1 = fluid.create_lod_tensor(np.array([-1, 3, 5, 5]), + [[1, 1, 1, 1]], fluid.XPUPlace(0)) self.assertRaises(TypeError, fluid.layers.elementwise_mul, x1, y1) # the input dtype of elementwise_mul must be float32 - x2 = fluid.layers.data( - name='x2', shape=[3, 4, 5, 6], dtype="uint8") - y2 = fluid.layers.data( - name='y2', shape=[3, 4, 5, 6], dtype="uint8") + x2 = fluid.layers.data(name='x2', + shape=[3, 4, 5, 6], + dtype="uint8") + y2 = fluid.layers.data(name='y2', + shape=[3, 4, 5, 6], + dtype="uint8") self.assertRaises(TypeError, fluid.layers.elementwise_mul, x2, y2) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py index 59c5dd685e1..a1163070091 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys + sys.path.append("..") import unittest import numpy as np @@ -27,11 +28,13 @@ paddle.enable_static() @skip_check_grad_ci(reason="XPU does not support grad op currently") class XPUTestElementwisePowOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_pow' self.use_dynamic_create_class = False class TestElementwisePowOp(XPUOpTest): + def setUp(self): self.op_type = "elementwise_pow" self.dtype = self.in_type @@ -51,6 +54,7 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): self.check_output_with_place(place) class TestElementwisePowOp_big_shape_1(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(1, 2, [10, 10]).astype(self.dtype), @@ -59,6 +63,7 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} class TestElementwisePowOp_big_shape_2(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(1, 2, [10, 10]).astype(self.dtype), @@ -69,6 +74,7 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwisePowOp_scalar(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [3, 3, 4]).astype(self.dtype), @@ -77,6 +83,7 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} class TestElementwisePowOp_tensor(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [100]).astype(self.dtype), @@ -85,6 +92,7 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} class TestElementwisePowOp_broadcast_0(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 1, 100]).astype(self.dtype), @@ -93,6 +101,7 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} class TestElementwisePowOp_broadcast_1(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 100, 1]).astype(self.dtype), @@ -100,11 +109,12 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): } self.attrs = {'axis': 1} self.outputs = { - 'Out': - np.power(self.inputs['X'], self.inputs['Y'].reshape(100, 1)) + 'Out': np.power(self.inputs['X'], + self.inputs['Y'].reshape(100, 1)) } class TestElementwisePowOp_broadcast_2(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { 'X': np.random.uniform(0.1, 1, [100, 3, 1]).astype(self.dtype), @@ -117,28 +127,32 @@ class XPUTestElementwisePowOp(XPUOpTestWrapper): } class TestElementwisePowOp_broadcast_3(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { - 'X': - np.random.uniform(0.1, 1, [2, 20, 5, 1]).astype(self.dtype), + 'X': np.random.uniform(0.1, 1, + [2, 20, 5, 1]).astype(self.dtype), 'Y': np.random.uniform(0.1, 1, [20, 5]).astype(self.dtype) } self.attrs = {'axis': 1} self.outputs = { - 'Out': np.power(self.inputs['X'], - self.inputs['Y'].reshape(1, 20, 5, 1)) + 'Out': + np.power(self.inputs['X'], + self.inputs['Y'].reshape(1, 20, 5, 1)) } class TestElementwisePowOp_broadcast_4(TestElementwisePowOp): + def compute_input_output(self): self.inputs = { - 'X': - np.random.uniform(0.1, 1, [2, 10, 3, 5]).astype(self.dtype), + 'X': np.random.uniform(0.1, 1, + [2, 10, 3, 5]).astype(self.dtype), 'Y': np.random.uniform(0.1, 1, [2, 10, 1, 5]).astype(self.dtype) } self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} class TestElementwisePowOpInt(OpTest): + def setUp(self): self.op_type = "elementwise_pow" self.inputs = { diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py index 204485f3432..fe4283f5598 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py @@ -14,6 +14,7 @@ import numpy as np import sys + sys.path.append("..") import paddle from op_test import OpTest, skip_check_grad_ci @@ -25,11 +26,13 @@ paddle.enable_static() class XPUTestElementwiseSubOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'elementwise_sub' self.use_dynamic_create_class = False class TestElementwiseOp(XPUOpTest): + def setUp(self): self.op_type = "elementwise_sub" self.use_xpu = True @@ -56,24 +59,23 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): def test_check_grad_ingore_x(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.005, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.005, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseSubOp_scalar(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 3, 4).astype(self.dtype), @@ -82,6 +84,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} class TestElementwiseSubOp_Vector(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.random((100, )).astype(self.dtype), @@ -90,6 +93,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} class TestElementwiseSubOp_broadcast_0(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(100, 3, 2).astype(self.dtype), @@ -102,6 +106,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): } class TestElementwiseSubOp_broadcast_1(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 100, 3).astype(self.dtype), @@ -114,6 +119,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): } class TestElementwiseSubOp_broadcast_2(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 3, 100).astype(self.dtype), @@ -125,6 +131,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): } class TestElementwiseSubOp_broadcast_3(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 10, 12, 3).astype(self.dtype), @@ -133,10 +140,12 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): self.attrs = {'axis': 1} self.outputs = { - 'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 10, 12, 1) + 'Out': + self.inputs['X'] - self.inputs['Y'].reshape(1, 10, 12, 1) } class TestElementwiseSubOp_broadcast_4(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 5, 3, 12).astype(self.dtype), @@ -145,6 +154,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} class TestElementwiseSubOp_commonuse_1(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(2, 3, 100).astype(self.dtype), @@ -153,6 +163,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} class TestElementwiseSubOp_commonuse_2(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 3, 1, 4).astype(self.dtype), @@ -161,6 +172,7 @@ class XPUTestElementwiseSubOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseOp): + def init_input_output(self): self.inputs = { 'X': np.random.rand(10, 12).astype(self.dtype), diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py index acba0012a0a..0ca73b931b9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest from op_test_xpu import XPUOpTest @@ -29,11 +30,13 @@ np.random.seed(10) class XPUTestExpandAsV2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = 'expand_as_v2' self.use_dynamic_create_class = False class TestExpandAsV2XPUOp(XPUOpTest): + def setUp(self): self.init_dtype() self.set_xpu() @@ -65,6 +68,7 @@ class XPUTestExpandAsV2Op(XPUOpTestWrapper): self.check_output_with_place(self.place) class TestExpandAsOpRank2(TestExpandAsV2XPUOp): + def set_inputs(self): x = np.random.rand(10, 12).astype(self.dtype) self.inputs = {'X': x} @@ -77,6 +81,7 @@ class XPUTestExpandAsV2Op(XPUOpTestWrapper): self.outputs = {'Out': output} class TestExpandAsOpRank3(TestExpandAsV2XPUOp): + def set_inputs(self): x = np.random.rand(2, 3, 20).astype(self.dtype) self.inputs = {'X': x} @@ -89,6 +94,7 @@ class XPUTestExpandAsV2Op(XPUOpTestWrapper): self.outputs = {'Out': output} class TestExpandAsOpRank4(TestExpandAsV2XPUOp): + def set_inputs(self): x = np.random.rand(1, 1, 7, 16).astype(self.dtype) self.inputs = {'X': x} @@ -101,6 +107,7 @@ class XPUTestExpandAsV2Op(XPUOpTestWrapper): self.outputs = {'Out': output} class TestExpandAsOpRank5(TestExpandAsV2XPUOp): + def set_inputs(self): x = np.random.rand(1, 1, 7, 16, 1).astype(self.dtype) self.inputs = {'X': x} @@ -113,6 +120,7 @@ class XPUTestExpandAsV2Op(XPUOpTestWrapper): self.outputs = {'Out': output} class TestExpandAsOpRank6(TestExpandAsV2XPUOp): + def set_inputs(self): x = np.random.rand(1, 1, 7, 16, 1, 1).astype(self.dtype) self.inputs = {'X': x} @@ -127,24 +135,28 @@ class XPUTestExpandAsV2Op(XPUOpTestWrapper): # Test python API class TestExpandAsV2API(unittest.TestCase): + def test_api(self): input1 = np.random.random([12, 14]).astype("float32") input2 = np.random.random([2, 12, 14]).astype("float32") - x = fluid.layers.data( - name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") - y = fluid.layers.data( - name='target_tensor', - shape=[2, 12, 14], - append_batch_size=False, - dtype="float32") + y = fluid.layers.data(name='target_tensor', + shape=[2, 12, 14], + append_batch_size=False, + dtype="float32") out_1 = paddle.expand_as(x, y=y) exe = fluid.Executor(place=fluid.XPUPlace(0)) res_1 = exe.run(fluid.default_main_program(), - feed={"x": input1, - "target_tensor": input2}, + feed={ + "x": input1, + "target_tensor": input2 + }, fetch_list=[out_1]) assert np.array_equal(res_1[0], np.tile(input1, (2, 1, 1))) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py index b5fa473ee26..f7319df270d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import sys import numpy as np + sys.path.append("..") from op_test import OpTest from op_test_xpu import XPUOpTest @@ -31,11 +32,13 @@ np.random.seed(10) # CANN Op Support X: float32, int32, int64 # Situation 1: shape is a list(without tensor) class XPUTestExpandV2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = 'expand_v2' self.use_dynamic_create_class = False class TestExpandV2XPUOp(XPUOpTest): + def setUp(self): self.init_dtype() self.set_xpu() @@ -65,36 +68,42 @@ class XPUTestExpandV2Op(XPUOpTestWrapper): self.check_output_with_place(self.place) class TestExpandV2OpRank2_DimExpanding(TestExpandV2XPUOp): + def init_data(self): self.ori_shape = [120] self.shape = [2, 120] self.expand_times = [2, 1] class TestExpandV2OpRank2(TestExpandV2XPUOp): + def init_data(self): self.ori_shape = [1, 140] self.shape = [12, 140] self.expand_times = [12, 1] class TestExpandV2OpRank3_Corner(TestExpandV2XPUOp): + def init_data(self): self.ori_shape = (2, 10, 5) self.shape = (2, 10, 5) self.expand_times = (1, 1, 1) class TestExpandV2OpRank4(TestExpandV2XPUOp): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.shape = (-1, -1, -1, -1) self.expand_times = (1, 1, 1, 1) class TestExpandV2OpRank5(TestExpandV2XPUOp): + def init_data(self): self.ori_shape = (2, 4, 1, 15) self.shape = (2, -1, 4, -1) self.expand_times = (1, 1, 4, 1) class TestExpandV2OpRank6(TestExpandV2XPUOp): + def init_data(self): self.ori_shape = (4, 1, 30) self.shape = (2, -1, 4, 30) @@ -102,6 +111,7 @@ class XPUTestExpandV2Op(XPUOpTestWrapper): # Situation 2: shape is a list(with tensor) class TestExpandV2OpXPURank1_tensor_attr(TestExpandV2XPUOp): + def setUp(self): self.set_xpu() self.place = paddle.XPUPlace(0) @@ -129,6 +139,7 @@ class XPUTestExpandV2Op(XPUOpTestWrapper): class TestExpandV2OpRank2_Corner_tensor_attr( TestExpandV2OpXPURank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.expand_times = [1, 1] @@ -137,6 +148,7 @@ class XPUTestExpandV2Op(XPUOpTestWrapper): # Situation 3: shape is a tensor class TestExpandV2XPUOp_tensor(TestExpandV2XPUOp): + def setUp(self): self.set_xpu() self.place = paddle.XPUPlace(0) @@ -161,6 +173,7 @@ class XPUTestExpandV2Op(XPUOpTestWrapper): # Situation 5: input x is int32 # skip grad check for int32 class TestExpandV2OpInteger(XPUOpTest): + def init_type(self): self.dtype = 'int32' @@ -170,8 +183,7 @@ class TestExpandV2OpInteger(XPUOpTest): self.place = paddle.XPUPlace(0) self.op_type = "expand_v2" self.inputs = { - 'X': np.random.randint( - 10, size=(2, 4, 20)).astype(self.dtype) + 'X': np.random.randint(10, size=(2, 4, 20)).astype(self.dtype) } self.attrs = {'shape': [2, 4, 20]} output = np.tile(self.inputs['X'], (1, 1, 1)) @@ -189,21 +201,20 @@ class TestExpandV2OpInteger(XPUOpTest): # Test python API class TestExpandV2API(unittest.TestCase): + def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): input = np.random.random([12, 14]).astype("float32") - x = fluid.layers.data( - name='x', - shape=[12, 14], - append_batch_size=False, - dtype="float32") + x = fluid.layers.data(name='x', + shape=[12, 14], + append_batch_size=False, + dtype="float32") positive_2 = fluid.layers.fill_constant([1], "int32", 12) - expand_shape = fluid.layers.data( - name="expand_shape", - shape=[2], - append_batch_size=False, - dtype="int32") + expand_shape = fluid.layers.data(name="expand_shape", + shape=[2], + append_batch_size=False, + dtype="int32") out_1 = paddle.expand(x, shape=[12, 14]) out_2 = paddle.expand(x, shape=[positive_2, 14]) @@ -214,7 +225,8 @@ class TestExpandV2API(unittest.TestCase): exe = fluid.Executor(place=paddle.XPUPlace(0)) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ - "x": input, + "x": + input, "expand_shape": np.array([12, 14]).astype("int32") }, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py index 333d57f0406..5b42da95829 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle @@ -32,11 +33,13 @@ paddle.enable_static() class XPUTestFillAnyLikeOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'fill_any_like' self.use_dynamic_create_class = False class TestFillAnyLikeOp(XPUOpTest): + def setUp(self): self.init_dtype() self.set_xpu() @@ -64,18 +67,22 @@ class XPUTestFillAnyLikeOp(XPUOpTestWrapper): self.check_output_with_place(self.place) class TestFillAnyLikeOp2(TestFillAnyLikeOp): + def set_value(self): self.value = -0.0 class TestFillAnyLikeOp3(TestFillAnyLikeOp): + def set_value(self): self.value = 1.0 class TestFillAnyLikeOp4(TestFillAnyLikeOp): + def init(self): self.value = 1e-9 class TestFillAnyLikeOp5(TestFillAnyLikeOp): + def set_value(self): if self.dtype == "float16": self.value = 0.05 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py index d989fd0afad..81c3685fe8b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import paddle @@ -26,12 +27,14 @@ from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, class XPUTestFillConstantOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'fill_constant' self.use_dynamic_create_class = False # Situation 1: Attr(shape) is a list(without tensor) class TestFillConstantOp(XPUOpTest): + def setUp(self): '''Test fill_constant op with specified value ''' @@ -161,9 +164,10 @@ class XPUTestFillConstantOp(XPUOpTestWrapper): if self.index == 22: self.outputs = { 'Out': - np.full(self.shape, - convert_float_to_uint16( - np.array([self.value]).astype("float32"))) + np.full( + self.shape, + convert_float_to_uint16( + np.array([self.value]).astype("float32"))) } def set_shape(self): @@ -190,12 +194,14 @@ class XPUTestFillConstantOp(XPUOpTestWrapper): class TestFillConstantOp3_ShapeTensorList( TestFillConstantOp1_ShapeTensorList): + def set_shape(self): self.shape = [123, 3, 2, 1] self.infer_shape = [123, 111, 11, 1] class TestFillConstantOp4_ShapeTensorList( TestFillConstantOp1_ShapeTensorList): + def set_shape(self): self.shape = [123] self.infer_shape = [1] @@ -212,9 +218,10 @@ class XPUTestFillConstantOp(XPUOpTestWrapper): if self.index == 22: self.outputs = { 'Out': - np.full(self.shape, - convert_float_to_uint16( - np.array([self.value]).astype("float32"))) + np.full( + self.shape, + convert_float_to_uint16( + np.array([self.value]).astype("float32"))) } def set_shape(self): @@ -232,7 +239,8 @@ class XPUTestFillConstantOp(XPUOpTestWrapper): } if self.index == 22: self.inputs = { - 'ValueTensor': convert_float_to_uint16( + 'ValueTensor': + convert_float_to_uint16( np.array([self.value]).astype("float32")) } self.attrs = {'value': self.value, 'dtype': self.index} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py index 9cbc83950d1..819fd1248fe 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py @@ -16,16 +16,19 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np import paddle import paddle.fluid as fluid from op_test import OpTest from op_test_xpu import XPUOpTest + paddle.enable_static() class TestFlatten2Op(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = "flatten2" @@ -57,6 +60,7 @@ class TestFlatten2Op(XPUOpTest): class TestFlatten2OpWithCornerAxis(TestFlatten2Op): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.axis = 0 @@ -64,6 +68,7 @@ class TestFlatten2OpWithCornerAxis(TestFlatten2Op): class TestFlatten2OpWithDefaultAxis(TestFlatten2Op): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -73,6 +78,7 @@ class TestFlatten2OpWithDefaultAxis(TestFlatten2Op): class TestFlatten2OpSixDims(TestFlatten2Op): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py index dcad3c479f4..06fc12f5108 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py @@ -15,11 +15,13 @@ from __future__ import print_function import sys + sys.path.append("..") import numpy as np import unittest import sys + sys.path.append("..") from op_test import OpTest from op_test_xpu import XPUOpTest @@ -30,6 +32,7 @@ paddle.enable_static() class TestFlattenOp(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = "flatten_contiguous_range" @@ -72,6 +75,7 @@ class TestFlattenOp(XPUOpTest): class TestFlattenOp_1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 1 @@ -86,6 +90,7 @@ class TestFlattenOp_1(TestFlattenOp): class TestFlattenOp_2(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -100,6 +105,7 @@ class TestFlattenOp_2(TestFlattenOp): class TestFlattenOp_3(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -114,6 +120,7 @@ class TestFlattenOp_3(TestFlattenOp): class TestFlattenOp_4(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = -2 @@ -128,6 +135,7 @@ class TestFlattenOp_4(TestFlattenOp): class TestFlattenOp_5(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 2 @@ -142,6 +150,7 @@ class TestFlattenOp_5(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.start_axis = 3 @@ -156,6 +165,7 @@ class TestFlattenOpSixDims(TestFlattenOp): class TestFlattenOp_Float32(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -171,6 +181,7 @@ class TestFlattenOp_Float32(TestFlattenOp): class TestFlattenOp_int32(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -190,6 +201,7 @@ class TestFlattenOp_int32(TestFlattenOp): class TestFlattenOp_int8(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -208,6 +220,7 @@ class TestFlattenOp_int8(TestFlattenOp): class TestFlattenOp_int64(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 5, 4) self.start_axis = 0 @@ -226,6 +239,7 @@ class TestFlattenOp_int64(TestFlattenOp): class TestFlatten2OpError(unittest.TestCase): + def test_errors(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * @@ -233,22 +247,25 @@ class TestFlatten2OpError(unittest.TestCase): x = x.astype('float32') def test_ValueError1(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') out = paddle.flatten(x_var, start_axis=2, stop_axis=1) self.assertRaises(ValueError, test_ValueError1) def test_ValueError2(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=10, stop_axis=1) self.assertRaises(ValueError, test_ValueError2) def test_ValueError3(): - x_var = paddle.static.data( - name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data(name="x", + shape=image_shape, + dtype='float32') paddle.flatten(x_var, start_axis=2, stop_axis=10) self.assertRaises(ValueError, test_ValueError3) @@ -258,8 +275,9 @@ class TestFlatten2OpError(unittest.TestCase): x2 = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3]).reshape(image_shape) / 100. x2 = x2.astype('float16') - x2_var = paddle.fluid.data( - name='x2', shape=[3, 2, 4, 5], dtype='float16') + x2_var = paddle.fluid.data(name='x2', + shape=[3, 2, 4, 5], + dtype='float16') paddle.flatten(x2_var) self.assertRaises(TypeError, test_type) @@ -271,6 +289,7 @@ class TestFlatten2OpError(unittest.TestCase): class TestStaticFlattenPythonAPI(unittest.TestCase): + def execute_api(self, x, start_axis=0, stop_axis=-1): return paddle.flatten(x, start_axis, stop_axis) @@ -280,8 +299,9 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): main_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, paddle.static.Program()): - x = paddle.static.data( - name="x", shape=[2, 3, 4, 4], dtype='float32') + x = paddle.static.data(name="x", + shape=[2, 3, 4, 4], + dtype='float32') out = self.execute_api(x, start_axis=-2, stop_axis=-1) exe = paddle.static.Executor(place=paddle.XPUPlace(0)) @@ -290,11 +310,13 @@ class TestStaticFlattenPythonAPI(unittest.TestCase): class TestStaticInplaceFlattenPythonAPI(TestStaticFlattenPythonAPI): + def execute_api(self, x, start_axis=0, stop_axis=-1): return x.flatten_(start_axis, stop_axis) class TestFlattenPython(unittest.TestCase): + def test_python_api(self): image_shape = (2, 3, 4, 4) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py index ed435198353..9622fc5bb1a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py @@ -16,16 +16,19 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np import paddle import paddle.fluid as fluid from op_test import OpTest from op_test_xpu import XPUOpTest + paddle.enable_static() class TestFlattenOp(XPUOpTest): + def setUp(self): self.op_type = "flatten" self.use_xpu = True @@ -51,6 +54,7 @@ class TestFlattenOp(XPUOpTest): class TestFlattenOp1(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 2, 10) self.axis = 0 @@ -58,6 +62,7 @@ class TestFlattenOp1(TestFlattenOp): class TestFlattenOpWithDefaultAxis(TestFlattenOp): + def init_test_case(self): self.in_shape = (10, 2, 2, 3) self.new_shape = (10, 12) @@ -67,6 +72,7 @@ class TestFlattenOpWithDefaultAxis(TestFlattenOp): class TestFlattenOpSixDims(TestFlattenOp): + def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) self.axis = 4 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py index 68854edb0eb..0198bfde590 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -27,10 +28,12 @@ paddle.enable_static() class XPUTestGatherNd(XPUOpTestWrapper): + def __init__(self): self.op_name = 'gather_nd' class XPUTestGatherNdBase(XPUOpTest): + def setUp(self): self.op_type = "gather_nd" self.dtype = self.in_type @@ -39,7 +42,9 @@ class XPUTestGatherNd(XPUOpTestWrapper): self.init_data() self.inputs = {'X': self.xnp, 'Index': self.inp} - self.outputs = {'Out': self.output, } + self.outputs = { + 'Out': self.output, + } def test_check_output(self): self.check_output_with_place(self.place) @@ -51,6 +56,7 @@ class XPUTestGatherNd(XPUOpTestWrapper): (self.xnp[np.newaxis, :], self.xnp[np.newaxis, :])) class XPUTestGatherNdOpWithEmptyIndex1(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.random((5, 20)).astype(self.in_type) self.inp = np.array([[], []]).astype("int32") @@ -58,6 +64,7 @@ class XPUTestGatherNd(XPUOpTestWrapper): (self.xnp[np.newaxis, :], self.xnp[np.newaxis, :])) class XPUTestGatherNdOpWithEmptyIndex2(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.random((5, 20)).astype(self.in_type) self.inp = np.array([[], []]).astype("int64") @@ -65,84 +72,96 @@ class XPUTestGatherNd(XPUOpTestWrapper): (self.xnp[np.newaxis, :], self.xnp[np.newaxis, :])) class XPUTestGatherNdOpWithIndex1(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.random((5, 20)).astype(self.in_type) self.inp = np.array([1]).astype("int32") self.output = self.xnp[self.inp] class XPUTestGatherNdOpWithIndex2(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.random((5, 20)).astype(self.in_type) self.inp = np.array([1]).astype("int64") self.output = self.xnp[self.inp] class XPUTestGatherNdOpWithLowIndex1(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.uniform(0, 100, (10, 10)).astype(self.in_type) self.inp = np.array([[1], [2]]).astype("int32") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithLowIndex2(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.uniform(0, 100, (10, 10)).astype(self.in_type) self.inp = np.array([1, 2]).astype("int64") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithHighRankSame1(XPUTestGatherNdBase): + def init_data(self): shape = (5, 2, 3, 1, 10) self.xnp = np.random.rand(*shape).astype(self.in_type) - self.inp = np.vstack( - [np.random.randint( - 0, s, size=2) for s in shape]).T.astype("int32") + self.inp = np.vstack([ + np.random.randint(0, s, size=2) for s in shape + ]).T.astype("int32") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithHighRankSame2(XPUTestGatherNdBase): + def init_data(self): shape = (5, 2, 3, 1, 10) self.xnp = np.random.rand(*shape).astype(self.in_type) - self.inp = np.vstack( - [np.random.randint( - 0, s, size=2) for s in shape]).T.astype("int64") + self.inp = np.vstack([ + np.random.randint(0, s, size=2) for s in shape + ]).T.astype("int64") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithHighRankDiff1(XPUTestGatherNdBase): + def init_data(self): shape = (2, 3, 4, 1, 10) self.xnp = np.random.rand(*shape).astype(self.in_type) - self.inp = np.vstack( - [np.random.randint( - 0, s, size=200) for s in shape]).T.astype("int32") + self.inp = np.vstack([ + np.random.randint(0, s, size=200) for s in shape + ]).T.astype("int32") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithHighRankDiff2(XPUTestGatherNdBase): + def init_data(self): shape = (2, 3, 4, 1, 10) self.xnp = np.random.rand(*shape).astype(self.in_type) - self.inp = np.vstack( - [np.random.randint( - 0, s, size=200) for s in shape]).T.astype("int64") + self.inp = np.vstack([ + np.random.randint(0, s, size=200) for s in shape + ]).T.astype("int64") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithSameIndexAsX1(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.uniform(0, 100, (10, 10)).astype(self.in_type) self.inp = np.array([[1, 1], [2, 1]]).astype("int32") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpWithSameIndexAsX2(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.uniform(0, 100, (10, 10)).astype(self.in_type) self.inp = np.array([[1, 1], [2, 1]]).astype("int64") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpIndex1(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.uniform(0, 100, (10, 10)).astype(self.in_type) self.inp = np.array([1, 2]).astype("int32") self.output = self.xnp[tuple(self.inp.T)] class XPUTestGatherNdOpIndex2(XPUTestGatherNdBase): + def init_data(self): self.xnp = np.random.uniform(0, 100, (10, 10)).astype(self.in_type) self.inp = np.array([1, 2]).astype("int64") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py index f0e6315514f..4b9cf40a38f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -34,10 +35,12 @@ def gather_numpy(x, index, axis): class XPUTestGather(XPUOpTestWrapper): + def __init__(self): self.op_name = 'gather' class TestXPUGatherOp(XPUOpTest): + def setUp(self): self.op_type = "gather" self.place = paddle.XPUPlace(0) @@ -65,24 +68,28 @@ class XPUTestGather(XPUOpTestWrapper): self.check_grad_with_place(self.place, ['X'], 'Out') class TestCase1(TestXPUGatherOp): + def init_config(self): self.x_shape = (100) self.index = [1, 3, 5] self.index_type = np.int32 class TestCase2(TestXPUGatherOp): + def init_config(self): self.x_shape = (100) self.index = [1, 3, 5] self.index_type = np.int64 class TestCase3(TestXPUGatherOp): + def init_config(self): self.x_shape = (10, 20) self.index = [1, 3, 5] self.index_type = np.int32 class TestCase4(TestXPUGatherOp): + def init_config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': False} @@ -90,6 +97,7 @@ class XPUTestGather(XPUOpTestWrapper): self.index_type = np.int32 class TestCase5(TestXPUGatherOp): + def init_config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': False} @@ -97,6 +105,7 @@ class XPUTestGather(XPUOpTestWrapper): self.index_type = np.int32 class TestCase6(TestXPUGatherOp): + def init_config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': True} @@ -104,6 +113,7 @@ class XPUTestGather(XPUOpTestWrapper): self.index_type = np.int32 class TestCase7(TestXPUGatherOp): + def init_config(self): self.x_shape = (10, 20) self.attrs = {'overwrite': True} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py index 454c3144908..0a0a9bb3d36 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -30,6 +31,7 @@ paddle.enable_static() class TestXPUGaussianRandomOp(TestGaussianRandomOp): + def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py b/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py index dbac796eee8..3d8035b4e32 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py @@ -16,6 +16,7 @@ import unittest import os import copy import sys + sys.path.append("..") from launch_function_helper import wait, _find_free_port from multiprocessing import Pool, Process @@ -69,6 +70,7 @@ def run_gen_bkc_id(attr): class TestGenBKCLIdOp(unittest.TestCase): + def setUp(self): try: self._dist_ut_port_0 = int(os.environ["PADDLE_DIST_UT_PORT"]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py index 30c91f87a24..c3caa0bad10 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -38,11 +39,13 @@ def huber_loss_forward(val, delta): class XPUTestHuberLossOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'huber_loss' self.use_dynamic_create_class = False class TestHuberLossOp(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = 'huber_loss' @@ -89,22 +92,27 @@ class XPUTestHuberLossOp(XPUOpTestWrapper): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], 'Out', no_grad_set=set("residual")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("residual")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', no_grad_set=set('residual')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set('residual')) class TestHuberLossOp1(TestHuberLossOp): + def set_shape(self): return (640) class TestHuberLossOp2(TestHuberLossOp): + def set_shape(self): return (10, 10) class TestHuberLossOp3(TestHuberLossOp): + def set_shape(self): return (10, 10, 1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py index b745dce9efe..ceb154f1e35 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import unittest @@ -30,6 +31,7 @@ paddle.enable_static() class TestXPUIOUSimilarityOp(XPUOpTest): + def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) @@ -41,7 +43,7 @@ class TestXPUIOUSimilarityOp(XPUOpTest): self.boxes2 = random.rand(3, 4).astype('float32') self.output = random.rand(2, 3).astype('float32') self.box_normalized = False - # run python iou computation + # run python iou computation self._compute_iou() self.inputs = {'X': self.boxes1, 'Y': self.boxes2} self.attrs = {"box_normalized": self.box_normalized, 'use_xpu': True} @@ -77,6 +79,7 @@ class TestXPUIOUSimilarityOp(XPUOpTest): class TestXPUIOUSimilarityOpWithLoD(TestXPUIOUSimilarityOp): + def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) @@ -87,7 +90,7 @@ class TestXPUIOUSimilarityOpWithLoD(TestXPUIOUSimilarityOp): self.boxes1_lod = [[1, 1]] self.output_lod = [[1, 1]] self.box_normalized = False - # run python iou computation + # run python iou computation self._compute_iou() self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2} self.attrs = {"box_normalized": self.box_normalized} @@ -95,6 +98,7 @@ class TestXPUIOUSimilarityOpWithLoD(TestXPUIOUSimilarityOp): class TestXPUIOUSimilarityOpWithBoxNormalized(TestXPUIOUSimilarityOp): + def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) @@ -105,7 +109,7 @@ class TestXPUIOUSimilarityOpWithBoxNormalized(TestXPUIOUSimilarityOp): self.boxes1_lod = [[1, 1]] self.output_lod = [[1, 1]] self.box_normalized = True - # run python iou computation + # run python iou computation self._compute_iou() self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2} self.attrs = {"box_normalized": self.box_normalized} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py index afe1662ce5c..415796988d1 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py @@ -18,6 +18,7 @@ import unittest import paddle import numpy as np import sys + sys.path.append("..") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper @@ -26,6 +27,7 @@ paddle.enable_static() class XPUTestLabelSmoothOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'label_smooth' self.use_dynamic_create_class = True @@ -45,6 +47,7 @@ class XPUTestLabelSmoothOp(XPUOpTestWrapper): return base_class, classes class TestLabelSmoothOp(XPUOpTest): + def setUp(self): self.op_type = "label_smooth" self.epsilon = 0.1 @@ -54,19 +57,19 @@ class XPUTestLabelSmoothOp(XPUOpTestWrapper): self.label_dim = 12 self.label = np.zeros( (self.batch_size, self.label_dim)).astype("float32") - nonzero_index = np.random.randint( - self.label_dim, size=(self.batch_size)) + nonzero_index = np.random.randint(self.label_dim, + size=(self.batch_size)) self.label[np.arange(self.batch_size), nonzero_index] = 1 - smoothed_label = (1 - self.epsilon - ) * self.label + self.epsilon / self.label_dim + smoothed_label = ( + 1 - self.epsilon) * self.label + self.epsilon / self.label_dim self.inputs = {'X': self.label} self.attrs = {'epsilon': self.epsilon} self.outputs = {'Out': smoothed_label} if hasattr(self, 'is_3d') and self.is_3d: self.inputs['X'] = self.inputs['X'].reshape( [2, -1, self.inputs['X'].shape[-1]]) - self.outputs['Out'] = self.outputs['Out'].reshape(self.inputs[ - 'X'].shape) + self.outputs['Out'] = self.outputs['Out'].reshape( + self.inputs['X'].shape) def test_check_output(self): if not paddle.is_compiled_with_xpu(): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py index f6aa82d596b..6bdc45e6a33 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np diff --git a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py index b166661c3d6..1f2caa9fbe9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py @@ -17,6 +17,7 @@ import numpy as np import sys import unittest from functools import reduce + sys.path.append("..") from op_test import OpTest from operator import mul @@ -44,6 +45,7 @@ def ref_layer_norm(x, scale, bias, epsilon, begin_norm_axis=1): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestXPULayerNormOp(OpTest): + def setUp(self): self.op_type = "layer_norm" self.dtype = np.float32 @@ -75,13 +77,15 @@ class TestXPULayerNormOp(OpTest): self.check_output_with_place(paddle.XPUPlace(0), atol=1e-4) def test_check_grad(self): - self.check_grad_with_place( - paddle.XPUPlace(0), ['X'], 'Y', max_relative_error=0.02) + self.check_grad_with_place(paddle.XPUPlace(0), ['X'], + 'Y', + max_relative_error=0.02) @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestXPULayerNormOpAxis2(TestXPULayerNormOp): + def set_attrs(self): self.begin_norm_axis = 2 @@ -89,6 +93,7 @@ class TestXPULayerNormOpAxis2(TestXPULayerNormOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestXPULayerNormOpAxis3(TestXPULayerNormOp): + def set_attrs(self): self.begin_norm_axis = 3 @@ -96,6 +101,7 @@ class TestXPULayerNormOpAxis3(TestXPULayerNormOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestXPULayerNormOp2D(TestXPULayerNormOp): + def set_attrs(self): self.shape = [10, 12] @@ -103,6 +109,7 @@ class TestXPULayerNormOp2D(TestXPULayerNormOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestXPULayerNormOp3D(TestXPULayerNormOp): + def set_attrs(self): self.shape = [4, 5, 6] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py index 3ba3a8b5eef..3924c1bd0f3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle.fluid.core as core import unittest @@ -30,6 +31,7 @@ def sigmoid_array(x): class TestXPULogLossOp(OpTest): + def setUp(self): self.op_type = 'log_loss' samples_num = 100 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py index 6b720b9717b..34eafd20811 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -29,10 +30,12 @@ paddle.enable_static() ################## TEST OP: logical_and ################## class XPUTestLogicalAnd(XPUOpTestWrapper): + def __init__(self): self.op_name = 'logical_and' class XPUTestLogicalAndBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -41,10 +44,14 @@ class XPUTestLogicalAnd(XPUOpTestWrapper): def set_case(self): self.op_type = 'logical_and' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.logical_and(x, y) self.attrs = {'use_xpu': True} @@ -68,6 +75,7 @@ class XPUTestLogicalAnd(XPUOpTestWrapper): pass class XPUTestLogicalAndCase1(XPUTestLogicalAndBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [4, 5] @@ -83,10 +91,12 @@ for stype in support_types: ################## TEST OP: logical_or ################## class XPUTestLogicalOr(XPUOpTestWrapper): + def __init__(self): self.op_name = 'logical_or' class XPUTestLogicalOrBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -95,10 +105,14 @@ class XPUTestLogicalOr(XPUOpTestWrapper): def set_case(self): self.op_type = 'logical_or' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.logical_or(x, y) self.attrs = {'use_xpu': True} @@ -122,6 +136,7 @@ class XPUTestLogicalOr(XPUOpTestWrapper): pass class XPUTestLogicalOrCase1(XPUTestLogicalOrBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [4, 5] @@ -137,10 +152,12 @@ for stype in support_types: ################## TEST OP: logical_xor ################## class XPUTestLogicalXor(XPUOpTestWrapper): + def __init__(self): self.op_name = 'logical_xor' class XPUTestLogicalXorBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -149,10 +166,14 @@ class XPUTestLogicalXor(XPUOpTestWrapper): def set_case(self): self.op_type = 'logical_xor' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) - y = np.random.randint( - self.low, self.high, self.y_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) + y = np.random.randint(self.low, + self.high, + self.y_shape, + dtype=self.dtype) out = np.logical_xor(x, y) self.attrs = {'use_xpu': True} @@ -176,6 +197,7 @@ class XPUTestLogicalXor(XPUOpTestWrapper): pass class XPUTestLogicalXorCase1(XPUTestLogicalXorBase): + def init_case(self): self.dtype = np.int32 self.x_shape = [4, 5] @@ -191,10 +213,12 @@ for stype in support_types: ################## TEST OP: LogicalNot ################## class XPUTestLogicalNot(XPUOpTestWrapper): + def __init__(self): self.op_name = 'logical_not' class XPUTestLogicalNotBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -203,8 +227,10 @@ class XPUTestLogicalNot(XPUOpTestWrapper): def set_case(self): self.op_type = 'logical_not' - x = np.random.randint( - self.low, self.high, self.x_shape, dtype=self.dtype) + x = np.random.randint(self.low, + self.high, + self.x_shape, + dtype=self.dtype) out = np.logical_not(x) self.attrs = {'use_xpu': True} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py index c4e1363bd9c..6c621a6853b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py @@ -15,6 +15,7 @@ import paddle import unittest import sys + sys.path.append("..") import numpy as np from op_test import OpTest @@ -35,6 +36,7 @@ def ref_logsumexp(x, axis=None, keepdim=False, reduce_all=False): class XPUTestLogsumexp(XPUOpTest): + def setUp(self): self.op_type = 'logsumexp' self.shape = [2, 3, 4, 5] @@ -69,26 +71,31 @@ class XPUTestLogsumexp(XPUOpTest): class TestLogsumexp_shape(XPUTestLogsumexp): + def set_attrs(self): self.shape = [4, 5, 6] class TestLogsumexp_axis(XPUTestLogsumexp): + def set_attrs(self): self.axis = [0, -1] class TestLogsumexp_axis_all(XPUTestLogsumexp): + def set_attrs(self): self.axis = [0, 1, 2, 3] class TestLogsumexp_keepdim(XPUTestLogsumexp): + def set_attrs(self): self.keepdim = True class TestLogsumexp_reduce_all(XPUTestLogsumexp): + def set_attrs(self): self.reduce_all = True diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py index d29684b11b0..2dbabdb7c58 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -31,6 +32,7 @@ paddle.enable_static() class TestLookupTableOp(OpTest): + def setUp(self): self.op_type = "lookup_table_v2" table = np.random.random((17, 31)).astype("float64") @@ -43,15 +45,15 @@ class TestLookupTableOp(OpTest): def test_check_grad(self): - self.check_grad_with_place( - inputs_to_check=['W'], - output_names='Out', - no_grad_set=set('Ids'), - place=paddle.XPUPlace(0), - in_place=True) + self.check_grad_with_place(inputs_to_check=['W'], + output_names='Out', + no_grad_set=set('Ids'), + place=paddle.XPUPlace(0), + in_place=True) class TestLookupTableOpWithTensorIds(OpTest): + def setUp(self): self.op_type = "lookup_table_v2" table = np.random.random((17, 31)).astype("float64") @@ -63,12 +65,11 @@ class TestLookupTableOpWithTensorIds(OpTest): self.check_output_with_place(place=paddle.XPUPlace(0)) def test_check_grad(self): - self.check_grad_with_place( - inputs_to_check=['W'], - output_names='Out', - no_grad_set=set('Ids'), - place=paddle.XPUPlace(0), - in_place=True) + self.check_grad_with_place(inputs_to_check=['W'], + output_names='Out', + no_grad_set=set('Ids'), + place=paddle.XPUPlace(0), + in_place=True) @skip_check_grad_ci( @@ -76,6 +77,7 @@ class TestLookupTableOpWithTensorIds(OpTest): "the gradient of paddings makes no sense and we don't " "test the gradient here.") class TestLookupTableOpWithPadding(TestLookupTableOp): + def test_check_output(self): ids = np.squeeze(self.inputs['Ids']) padding_idx = np.random.choice(ids, 1)[0] @@ -89,6 +91,7 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): "the gradient of paddings makes no sense and we don't " "test the gradient here.") class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): + def test_check_output(self): ids = self.inputs['Ids'] flatten_idx = ids.flatten() @@ -99,6 +102,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): class TestLookupTableWIsSelectedRows(unittest.TestCase): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() ids_array = np.array([0, 4, 3, 5]).astype("int64") @@ -146,12 +150,13 @@ class TestLookupTableWIsSelectedRows(unittest.TestCase): self.check_with_place(place) -class TestLookupTableWithTensorIdsWIsSelectedRows( - TestLookupTableWIsSelectedRows): +class TestLookupTableWithTensorIdsWIsSelectedRows(TestLookupTableWIsSelectedRows + ): + def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.random.randint( - low=0, high=6, size=(2, 4, 3)).astype("int64") + ids_array = np.random.randint(low=0, high=6, + size=(2, 4, 3)).astype("int64") ids_tensor.set(ids_array, place) return ids_array @@ -161,6 +166,7 @@ class TestLookupTableWithTensorIdsWIsSelectedRows( class TestLookupTableApi(unittest.TestCase): + def test_api(self): x = fluid.layers.data(name='x', shape=[20], dtype='int64') emb = fluid.embedding(input=x, size=[128, 64]) @@ -170,12 +176,15 @@ class TestLookupTableApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'x': x_data, }, + ret = exe.run(feed={ + 'x': x_data, + }, fetch_list=[emb], return_numpy=False) class TestEmbedOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): input_data = np.random.randint(0, 10, (4, 6)).astype("int64") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py index 990594e1f9e..2c1ef7755ab 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") import paddle @@ -36,10 +37,12 @@ def np_masked_select(x, mask): class XPUTestMaskedSelectOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'masked_select' class TestMaskedSelectOp(XPUOpTest): + def setUp(self): self.init() self.dtype = self.in_type @@ -60,10 +63,12 @@ class XPUTestMaskedSelectOp(XPUOpTestWrapper): self.shape = (50, 3) class TestMaskedSelectOp1(TestMaskedSelectOp): + def init(self): self.shape = (6, 8, 9, 18) class TestMaskedSelectOp2(TestMaskedSelectOp): + def init(self): self.shape = (168, ) @@ -74,6 +79,7 @@ for stype in support_types: class TestMaskedSelectAPI(unittest.TestCase): + def test_imperative_mode(self): paddle.disable_static(paddle.XPUPlace(0)) shape = (88, 6, 8) @@ -99,13 +105,16 @@ class TestMaskedSelectAPI(unittest.TestCase): exe = paddle.static.Executor(place=paddle.XPUPlace(0)) res = exe.run(paddle.static.default_main_program(), - feed={"x": np_x, - "mask": np_mask}, + feed={ + "x": np_x, + "mask": np_mask + }, fetch_list=[out]) self.assertEqual(np.allclose(res, np_out), True) class TestMaskedSelectError(unittest.TestCase): + def test_error(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): @@ -113,8 +122,9 @@ class TestMaskedSelectError(unittest.TestCase): shape = [8, 9, 6] x = paddle.fluid.data(shape=shape, dtype='float32', name='x') mask = paddle.fluid.data(shape=shape, dtype='bool', name='mask') - mask_float = paddle.fluid.data( - shape=shape, dtype='float32', name='mask_float') + mask_float = paddle.fluid.data(shape=shape, + dtype='float32', + name='mask_float') np_x = np.random.random(shape).astype('float32') np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py index 3120f1973f4..bc6fa19a354 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle.fluid.core as core import unittest @@ -161,18 +162,22 @@ def test_negative_dims_program(obj): obj.assertEqual(Ref.shape[idx], output.shape[idx]) exe = fluid.Executor(fluid.XPUPlace(0)) res, = exe.run(fluid.default_main_program(), - feed={'x': X, - 'y': Y}, + feed={ + 'x': X, + 'y': Y + }, fetch_list=[output]) np.allclose(res, Ref, atol=1e-3) class XPUTestMatmulOpErr(XPUOpTestWrapper): + def __init__(self): self.op_name = "matmul" self.use_dynamic_create_class = False class TestMatmulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The inputs type of matmul_op must be Variable. @@ -180,15 +185,18 @@ class XPUTestMatmulOpErr(XPUOpTestWrapper): self.assertRaises(TypeError, fluid.layers.matmul, input1, input1) # The inputs dtype of matmul_op must be float32, float16 - input2 = fluid.layers.data( - name='input2', shape=[10, 10], dtype="int32") + input2 = fluid.layers.data(name='input2', + shape=[10, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.matmul, input2, input2) - input3 = fluid.layers.data( - name='input3', shape=[2, 2], dtype="float16") + input3 = fluid.layers.data(name='input3', + shape=[2, 2], + dtype="float16") fluid.layers.matmul(input3, input3) class API_TestMm(unittest.TestCase): + def test_out(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[2], dtype=self.in_type) @@ -198,15 +206,16 @@ class XPUTestMatmulOpErr(XPUOpTestWrapper): exe = fluid.Executor(fluid.XPUPlace(0)) data1 = np.random.rand(2).astype(self.in_type) data2 = np.random.rand(2).astype(self.in_type) - np_res = exe.run(feed={'x': data1, - 'y': data2}, + np_res = exe.run(feed={ + 'x': data1, + 'y': data2 + }, fetch_list=[result]) - expected_result = np.matmul( - data1.reshape(1, 2), data2.reshape(2, 1)) + expected_result = np.matmul(data1.reshape(1, 2), + data2.reshape(2, 1)) self.assertTrue( - np.allclose( - np_res, expected_result, atol=1e-3), + np.allclose(np_res, expected_result, atol=1e-3), "two value is\ {}\n{}, check diff!".format(np_res, expected_result)) @@ -220,10 +229,10 @@ class XPUTestMatmulOpErr(XPUOpTestWrapper): out = paddle.mm(data1, data2) expected_result = np.matmul(input_array1, input_array2) self.assertTrue( - np.allclose( - expected_result, out.numpy(), atol=1e-3)) + np.allclose(expected_result, out.numpy(), atol=1e-3)) class Test_API_Matmul(unittest.TestCase): + def test_dygraph_without_out(self): device = fluid.XPUPlace(0) with fluid.dygraph.guard(device): @@ -236,43 +245,51 @@ class XPUTestMatmulOpErr(XPUOpTestWrapper): out = paddle.matmul(data1, data2) expected_result = np.matmul(input_array1, input_array2) self.assertTrue( - np.allclose( - expected_result, out.numpy(), atol=1e-3)) + np.allclose(expected_result, out.numpy(), atol=1e-3)) class API_TestMmError(unittest.TestCase): + def test_errors(self): + def test_error1(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data1 = fluid.data( - name="data1", shape=[10, 2], dtype="float32") - data2 = fluid.data( - name="data2", shape=[3, 10], dtype="float32") + data1 = fluid.data(name="data1", + shape=[10, 2], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[3, 10], + dtype="float32") paddle.mm(data1, data2) self.assertRaises(ValueError, test_error1) def test_error2(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data1 = fluid.data( - name="data1", shape=[-1, 10, 2], dtype="float32") - data2 = fluid.data( - name="data2", shape=[-1, 2, 10], dtype="float32") + data1 = fluid.data(name="data1", + shape=[-1, 10, 2], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[-1, 2, 10], + dtype="float32") paddle.mm(data1, data2) test_error2() def test_error3(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data1 = fluid.data( - name="data1", shape=[10, 10, 2], dtype="float32") - data2 = fluid.data( - name="data2", shape=[3, 2, 10], dtype="float32") + data1 = fluid.data(name="data1", + shape=[10, 10, 2], + dtype="float32") + data2 = fluid.data(name="data2", + shape=[3, 2, 10], + dtype="float32") paddle.mm(data1, data2) self.assertRaises(ValueError, test_error3) class TestMatmulBaseGenerator(XPUOpTest): + def setUp(self): self.op_type = "matmul" self.dtype = np.float32 if not hasattr(self, @@ -297,21 +314,27 @@ class TestMatmulBaseGenerator(XPUOpTest): def test_check_grad_normal(self): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=5e-2) + self.check_grad_with_place(place, ['X', 'Y'], + 'Out', + max_relative_error=5e-2) def test_check_grad_ignore_x(self): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], 'Out', max_relative_error=5e-2, no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=5e-2, + no_grad_set=set("X")) def test_check_grad_ignore_y(self): place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=5e-2, no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=5e-2, + no_grad_set=set('Y')) class XPUTestMatmulOp1(XPUOpTestWrapper): + def __init__(self): self.op_name = "matmul" self.use_dynamic_create_class = True @@ -328,8 +351,9 @@ class XPUTestMatmulOp1(XPUOpTestWrapper): for transose_y in [True, False]: for batch in batch_size: class_name = ( - 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}_batch_{}'. - format(dim_X, dim_Y, transose_x, transose_y, batch)) + 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}_batch_{}' + .format(dim_X, dim_Y, transose_x, transose_y, + batch)) shape_x, shape_y = generate_compatible_shapes( dim_X, dim_Y, transose_x, transose_y, batch) attr_dict = { @@ -345,6 +369,7 @@ class XPUTestMatmulOp1(XPUOpTestWrapper): class XPUTestMatmulOp2(XPUOpTestWrapper): + def __init__(self): self.op_name = "matmul" self.use_dynamic_create_class = True @@ -361,8 +386,9 @@ class XPUTestMatmulOp2(XPUOpTestWrapper): for transose_y in [True, False]: for batch in batch_size: class_name = ( - 'TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}_batch_{}'. - format(dim_X, dim_Y, transose_x, transose_y, batch)) + 'TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}_batch_{}' + .format(dim_X, dim_Y, transose_x, transose_y, + batch)) shape_x, shape_y = generate_compatible_shapes( dim_X, dim_Y, transose_x, transose_y, batch) attr_dict = { @@ -377,6 +403,7 @@ class XPUTestMatmulOp2(XPUOpTestWrapper): class XPUTestMatmulOp3(XPUOpTestWrapper): + def __init__(self): self.op_name = "matmul" self.use_dynamic_create_class = True diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py index 3db3031f44c..8f319813554 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -59,6 +60,7 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): class XPUTestMatmulV2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = "matmul_v2" self.use_dynamic_create_class = False diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py index 896821552c9..0ddc38dbceb 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test_xpu import XPUOpTest from op_test import OpTest @@ -29,6 +30,7 @@ np.random.seed(10) class TestMeanOp(XPUOpTest): + def setUp(self): self.op_type = "mean" self.init_dtype_type() @@ -52,21 +54,25 @@ class TestMeanOp(XPUOpTest): class TestMeanOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of mean_op must be Variable. input1 = 12 self.assertRaises(TypeError, fluid.layers.mean, input1) # The input dtype of mean_op must be float16, float32, float64. - input2 = fluid.layers.data( - name='input2', shape=[12, 10], dtype="int32") + input2 = fluid.layers.data(name='input2', + shape=[12, 10], + dtype="int32") self.assertRaises(TypeError, fluid.layers.mean, input2) - input3 = fluid.layers.data( - name='input3', shape=[4], dtype="float16") + input3 = fluid.layers.data(name='input3', + shape=[4], + dtype="float16") fluid.layers.softmax(input3) class TestXPUMeanOp(TestMeanOp): + def init_dtype_type(self): self.dtype = np.float32 @@ -84,6 +90,7 @@ class TestXPUMeanOp(TestMeanOp): class TestXPUMeanOpFp16(TestMeanOp): + def init_dtype_type(self): self.dtype = np.float16 @@ -97,8 +104,9 @@ class TestXPUMeanOpFp16(TestMeanOp): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=1.e1) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=1.e1) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py index f7c1f0041e8..a33b3e47551 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -50,11 +51,13 @@ def calculate_momentum_by_numpy(param, grad, mu, velocity, use_nesterov, class XPUTestMomentumOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'momentum' self.use_dynamic_create_class = False class TestMomentumOPBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.xpu_version = core.get_xpu_device_version(0) @@ -70,8 +73,8 @@ class XPUTestMomentumOP(XPUOpTestWrapper): self.input_shape).astype(self.dtype) self.grad = np.random.uniform(-1, 1, self.input_shape).astype(self.dtype) - self.velocity = np.random.uniform( - -1, 1, self.input_shape).astype(self.dtype) + self.velocity = np.random.uniform(-1, 1, self.input_shape).astype( + self.dtype) param_out, velocity_out = calculate_momentum_by_numpy( param=self.param, @@ -112,6 +115,7 @@ class XPUTestMomentumOP(XPUOpTestWrapper): self.regularization_coeff = 0 class XPUTestMomentum1(TestMomentumOPBase): + def init_config(self): self.input_shape = [2, 768] self.learning_rate = np.array([0.002]).astype(self.dtype) @@ -121,6 +125,7 @@ class XPUTestMomentumOP(XPUOpTestWrapper): self.regularization_coeff = 0 class XPUTestMomentum2(TestMomentumOPBase): + def init_config(self): self.input_shape = [3, 8, 4096] self.learning_rate = np.array([0.005]).astype(self.dtype) @@ -130,6 +135,7 @@ class XPUTestMomentumOP(XPUOpTestWrapper): self.regularization_coeff = 0 class XPUTestMomentum3(TestMomentumOPBase): + def init_config(self): self.input_shape = [1024] self.learning_rate = np.array([0.01]).astype(self.dtype) @@ -144,6 +150,7 @@ class XPUTestMomentumOP(XPUOpTestWrapper): self.regularization_coeff = 0 class XPUTestMomentum4(TestMomentumOPBase): + def init_config(self): self.input_shape = [2, 2, 255] self.learning_rate = np.array([0.0005]).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py index 9d98ab70041..87667e4f139 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test_xpu import XPUOpTest import paddle.fluid as fluid @@ -31,13 +32,14 @@ from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, class TestMulOpError(unittest.TestCase): + def test_errors(self): with program_guard(Program(), Program()): # The input type of mul_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.XPUPlace(0)) - x2 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.XPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.XPUPlace(0)) + x2 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + fluid.XPUPlace(0)) self.assertRaises(TypeError, fluid.layers.mul, x1, x2) # The input dtype of mul_op must be float32. x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32") @@ -46,11 +48,13 @@ class TestMulOpError(unittest.TestCase): class XPUTestMulOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'mul' self.use_dynamic_create_class = False class TestXPUMulOp1(XPUOpTest): + def setUp(self): self.op_type = "mul" self.dtype = self.in_type @@ -75,28 +79,28 @@ class XPUTestMulOp(XPUOpTestWrapper): def test_check_grad_normal(self): place = paddle.XPUPlace(0) paddle.enable_static() - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.1) + self.check_grad_with_place(place, ['X', 'Y'], + 'Out', + max_relative_error=0.1) def test_check_grad_ingore_x(self): place = paddle.XPUPlace(0) paddle.enable_static() - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.1, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.1, + no_grad_set=set("X")) def test_check_grad_ignore_y(self): place = paddle.XPUPlace(0) paddle.enable_static() - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.1, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.1, + no_grad_set=set('Y')) class TestXPUMulOp2(XPUOpTest): + def setUp(self): self.op_type = "mul" self.use_xpu = True @@ -115,26 +119,25 @@ class XPUTestMulOp(XPUOpTestWrapper): def test_check_grad_normal(self): place = paddle.XPUPlace(0) paddle.enable_static() - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.1) + self.check_grad_with_place(place, ['X', 'Y'], + 'Out', + max_relative_error=0.1) def test_check_grad_ingore_x(self): place = paddle.XPUPlace(0) paddle.enable_static() - self.check_grad_with_place( - place, ['Y'], - 'Out', - max_relative_error=0.1, - no_grad_set=set("X")) + self.check_grad_with_place(place, ['Y'], + 'Out', + max_relative_error=0.1, + no_grad_set=set("X")) def test_check_grad_ingore_y(self): place = paddle.XPUPlace(0) paddle.enable_static() - self.check_grad_with_place( - place, ['X'], - 'Out', - max_relative_error=0.1, - no_grad_set=set('Y')) + self.check_grad_with_place(place, ['X'], + 'Out', + max_relative_error=0.1, + no_grad_set=set('Y')) support_types = get_xpu_op_support_types('mul') diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py index 731358d5304..0bfa73d6863 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test_xpu import XPUOpTest import paddle.fluid as fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py index 7a3b4a5a217..6e80f501243 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -160,11 +161,13 @@ def nearest_neighbor_interp3d_np(X, class XPUNearestInterpOpWrapper(XPUOpTestWrapper): + def __init__(self): self.op_name = 'nearest_interp_v2' self.use_dynamic_create_class = False class TestNearestInterpOp(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_dtype() @@ -298,18 +301,21 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): """ class TestNearestNeighborInterpCase2(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 self.out_w = 12 class TestNearestNeighborInterpCase3(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 self.out_w = 32 class TestNearestNeighborInterpCase4(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [4, 1, 7, 8] self.out_h = 1 @@ -317,6 +323,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): self.out_size = np.array([2, 2]).astype("int32") class TestNearestNeighborInterpCase5(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 @@ -324,6 +331,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): self.out_size = np.array([11, 11]).astype("int32") class TestNearestNeighborInterpCase6(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [1, 1, 32, 64] self.out_h = 64 @@ -331,12 +339,14 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): self.out_size = np.array([65, 129]).astype("int32") class TestNearestNeighborInterpSame(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [2, 3, 32, 64] self.out_h = 32 self.out_w = 64 class TestNearestNeighborInterpActualShape(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 64 @@ -358,10 +368,12 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): """ class TestNearestInterpWithoutCorners(TestNearestInterpOp): + def set_align_corners(self): self.align_corners = False class TestNearestNeighborInterpScale1(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [3, 2, 7, 5] self.out_h = 64 @@ -370,6 +382,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): self.out_size = np.array([66, 40]).astype("int32") class TestNearestNeighborInterpScale2(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [3, 2, 5, 7] self.out_h = 64 @@ -378,6 +391,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): self.out_size = np.array([66, 40]).astype("int32") class TestNearestNeighborInterpScale3(TestNearestInterpOp): + def init_test_case(self): self.input_shape = [3, 2, 7, 5] self.out_h = 64 @@ -400,6 +414,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): """ class TestNearestInterpOp_attr_tensor(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_dtype() @@ -458,9 +473,10 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): if isinstance(self.scale, list) and len(self.scale) == 1: self.scale = [self.scale[0], self.scale[0]] self.attrs['scale'] = self.scale - output_np = nearest_neighbor_interp_np( - input_np, out_h, out_w, 0, 0, self.out_size, self.actual_shape, - self.align_corners) + output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, 0, 0, + self.out_size, + self.actual_shape, + self.align_corners) self.outputs = {'Out': output_np} def init_dtype(self): @@ -480,6 +496,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): # out_size is a tensor list class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.input_shape = [3, 3, 9, 6] self.out_h = 12 @@ -488,6 +505,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): # out_size is a 1-D tensor class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 64 @@ -497,6 +515,7 @@ class XPUNearestInterpOpWrapper(XPUOpTestWrapper): # scale is a 1-D tensor class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): + def init_test_case(self): self.input_shape = [3, 2, 32, 16] self.out_h = 64 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py index 8c8406ba433..33b59a8de65 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test_xpu import XPUOpTest import paddle.fluid as fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py index 9f937caa37e..afeccd637a2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py @@ -19,6 +19,7 @@ import numpy as np import paddle import paddle.fluid.core as core import sys + sys.path.append("..") from op_test_xpu import XPUOpTest import paddle.fluid as fluid @@ -29,6 +30,7 @@ paddle.enable_static() class TestOneHotOp(XPUOpTest): + def setUp(self): self.use_xpu = True self.op_type = 'one_hot_v2' @@ -54,6 +56,7 @@ class TestOneHotOp(XPUOpTest): class TestOneHotOp_attr(XPUOpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -78,6 +81,7 @@ class TestOneHotOp_attr(XPUOpTest): class TestOneHotOp_default_dtype(XPUOpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -102,6 +106,7 @@ class TestOneHotOp_default_dtype(XPUOpTest): class TestOneHotOp_default_dtype_attr(XPUOpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -126,6 +131,7 @@ class TestOneHotOp_default_dtype_attr(XPUOpTest): class TestOneHotOp_out_of_range(XPUOpTest): + def setUp(self): self.op_type = 'one_hot_v2' depth = 10 @@ -145,6 +151,7 @@ class TestOneHotOp_out_of_range(XPUOpTest): class TestOneHotOpApi(unittest.TestCase): + def test_api(self): depth = 10 self._run(depth) @@ -171,21 +178,23 @@ class TestOneHotOpApi(unittest.TestCase): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - ret = exe.run(feed={'label': label_data, }, + ret = exe.run(feed={ + 'label': label_data, + }, fetch_list=[one_hot_label], return_numpy=False) class BadInputTestOnehotV2(unittest.TestCase): + def test_error(self): with fluid.program_guard(fluid.Program()): def test_bad_x(): - label = fluid.layers.data( - name="label", - shape=[4], - append_batch_size=False, - dtype="float32") + label = fluid.layers.data(name="label", + shape=[4], + append_batch_size=False, + dtype="float32") one_hot_label = fluid.one_hot(input=label, depth=4) self.assertRaises(TypeError, test_bad_x) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py index fcd0de2a1fd..5ab62af7104 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py @@ -16,6 +16,7 @@ from __future__ import print_function from __future__ import division import sys + sys.path.append("..") import unittest import numpy as np @@ -46,11 +47,11 @@ def max_pool2D_forward_naive(x, if adaptive: H_out, W_out = ksize else: - H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 - W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - 1 - ) // strides[1] + 1 if ceil_mode else ( + W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - + 1) // strides[1] + 1 if ceil_mode else ( W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) for i in range(H_out): @@ -86,11 +87,11 @@ def avg_pool2D_forward_naive(x, if adaptive: H_out, W_out = ksize else: - H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( + H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - + 1) // strides[0] + 1 if ceil_mode else ( H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 - W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - 1 - ) // strides[1] + 1 if ceil_mode else ( + W_out = (W - ksize[1] + 2 * paddings[1] + strides[1] - + 1) // strides[1] + 1 if ceil_mode else ( W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) for i in range(H_out): @@ -117,9 +118,9 @@ def avg_pool2D_forward_naive(x, field_size = (r_end - r_start) * (c_end - c_start) if data_type == np.int8 or data_type == np.uint8: - out[:, :, i, j] = (np.rint( - np.sum(x_masked, axis=(2, 3)) / - field_size)).astype(data_type) + out[:, :, i, + j] = (np.rint(np.sum(x_masked, axis=(2, 3)) / + field_size)).astype(data_type) else: out[:, :, i, j] = (np.sum(x_masked, axis=(2, 3)) / field_size).astype(data_type) @@ -144,8 +145,8 @@ def pool2D_forward_naive(x, for input_size, filter_size, stride_size in zip(input_shape, pool_size, pool_stride): out_size = int((input_size + stride_size - 1) / stride_size) - pad_sum = np.max(( - (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) pad_0 = int(pad_sum / 2) pad_1 = int(pad_sum - pad_0) padding.append(pad_0) @@ -228,8 +229,9 @@ def pool2D_forward_naive(x, x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] if pool_type == 'avg': if (exclusive or adaptive): - field_size = (in_h_end - in_h_start) * ( - in_w_end - in_w_start) + field_size = (in_h_end - in_h_start) * (in_w_end - + in_w_start) + # if (exclusive or adaptive) else (ksize[0] * ksize[1]) out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size @@ -239,8 +241,8 @@ def pool2D_forward_naive(x, x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] if pool_type == 'avg': if (exclusive or adaptive): - field_size = (in_h_end - in_h_start) * ( - in_w_end - in_w_start) + field_size = (in_h_end - in_h_start) * (in_w_end - + in_w_start) out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size elif pool_type == 'max': out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) @@ -248,11 +250,13 @@ def pool2D_forward_naive(x, class XPUTestPool2D_Op(XPUOpTestWrapper): + def __init__(self): self.op_name = 'pool2d' self.use_dynamic_create_class = False class TestPool2D_Op(XPUOpTest): + def setUp(self): self.op_type = "pool2d" self.dtype = self.in_type @@ -337,6 +341,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.adaptive = False class TestCase1(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -355,6 +360,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 7, 7] class TestCase2(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -373,21 +379,25 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 7, 7] class TestCase3(TestPool2D_Op): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase4(TestCase1): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestCase5(TestCase2): + def init_pool_type(self): self.pool_type = "max" self.pool2D_forward_naive = max_pool2D_forward_naive class TestPool2D_AsyPadding(TestPool2D_Op): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -397,6 +407,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 5, 5] class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -406,6 +417,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 7, 7] class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -415,6 +427,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 7, 7] class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -424,6 +437,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 5, 5] class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -433,6 +447,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 7, 7] class TestCase5_AsyPadding(TestCase5): + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] @@ -442,6 +457,7 @@ class XPUTestPool2D_Op(XPUOpTestWrapper): self.shape = [2, 3, 7, 7] class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): self.exclusive = False diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py index 0830237d5a8..c8fcffbd3d3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py @@ -18,6 +18,7 @@ import math import numpy as np import sys import unittest + sys.path.append("..") import paddle @@ -29,11 +30,13 @@ paddle.enable_static() class XPUTestPriorBoxOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'prior_box' self.use_dynamic_create_class = False class TestPriorBoxOp(XPUOpTest): + def setUp(self): self.op_type = "prior_box" self.use_xpu = True @@ -94,8 +97,8 @@ class XPUTestPriorBoxOp(XPUOpTestWrapper): self.flip = True self.set_min_max_aspect_ratios_order() self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0] - self.aspect_ratios = np.array( - self.aspect_ratios, dtype=np.float).flatten() + self.aspect_ratios = np.array(self.aspect_ratios, + dtype=np.float).flatten() self.variances = [0.1, 0.1, 0.2, 0.2] self.variances = np.array(self.variances, dtype=np.float).flatten() @@ -133,40 +136,40 @@ class XPUTestPriorBoxOp(XPUOpTestWrapper): ar = self.real_aspect_ratios[r] c_w = min_size * math.sqrt(ar) / 2 c_h = (min_size / math.sqrt(ar)) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 if len(self.max_sizes) > 0: max_size = self.max_sizes[s] # second prior: aspect_ratio = 1, c_w = c_h = math.sqrt(min_size * max_size) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 else: c_w = c_h = min_size / 2. - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 if len(self.max_sizes) > 0: max_size = self.max_sizes[s] # second prior: aspect_ratio = 1, c_w = c_h = math.sqrt(min_size * max_size) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 # rest of priors @@ -176,27 +179,29 @@ class XPUTestPriorBoxOp(XPUOpTestWrapper): continue c_w = min_size * math.sqrt(ar) / 2 c_h = (min_size / math.sqrt(ar)) / 2 - out_boxes[h, w, idx, :] = [ - (c_x - c_w) / self.image_w, (c_y - c_h) / - self.image_h, (c_x + c_w) / self.image_w, - (c_y + c_h) / self.image_h - ] + out_boxes[h, w, + idx, :] = [(c_x - c_w) / self.image_w, + (c_y - c_h) / self.image_h, + (c_x + c_w) / self.image_w, + (c_y + c_h) / self.image_h] idx += 1 # clip the prior's coordidate such that it is within[0, 1] if self.clip: out_boxes = np.clip(out_boxes, 0.0, 1.0) # set the variance. - out_var = np.tile(self.variances, (self.layer_h, self.layer_w, - self.num_priors, 1)) + out_var = np.tile(self.variances, + (self.layer_h, self.layer_w, self.num_priors, 1)) self.out_boxes = out_boxes.astype(self.dtype) self.out_var = out_var.astype(self.dtype) class TestPriorBoxOpWithoutMaxSize(TestPriorBoxOp): + def set_max_sizes(self): self.max_sizes = [] class TestPriorBoxOpWithSpecifiedOutOrder(TestPriorBoxOp): + def set_min_max_aspect_ratios_order(self): self.min_max_aspect_ratios_order = True diff --git a/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py index f9c49a81ef3..2ea100a2def 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py @@ -18,6 +18,7 @@ import unittest import paddle import numpy as np import sys + sys.path.append("..") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper @@ -26,11 +27,13 @@ paddle.enable_static() class XPUTestRangeOp(XPUOpTestWrapper): + def __init__(self): self.op_name = "range" self.use_dynamic_create_class = False class TestRangeOp(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = "range" @@ -43,8 +46,9 @@ class XPUTestRangeOp(XPUOpTestWrapper): } self.outputs = { - 'Out': np.arange(self.case[0], self.case[1], - self.case[2]).astype(self.dtype) + 'Out': + np.arange(self.case[0], self.case[1], + self.case[2]).astype(self.dtype) } def set_xpu(self): @@ -61,22 +65,27 @@ class XPUTestRangeOp(XPUOpTestWrapper): self.check_output_with_place(place, check_dygraph=False) class TestRangeOpCase0(TestRangeOp): + def init_config(self): self.case = (0, 5, 1) class TestRangeOpCase1(TestRangeOp): + def init_config(self): self.case = (0, 5, 2) class TestRangeOpCase2(TestRangeOp): + def init_config(self): self.case = (10, 1, -2) class TestRangeOpCase3(TestRangeOp): + def init_config(self): self.case = (-1, -10, -2) class TestRangeOpCase4(TestRangeOp): + def init_config(self): self.case = (10, -10, -11) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py index b4dc8e7b7cf..ceb38c22630 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -28,10 +29,12 @@ paddle.enable_static() class XPUTestReduceAllOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'reduce_all' class XPUTestReduceAllBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.set_case() @@ -45,8 +48,8 @@ class XPUTestReduceAllOp(XPUOpTestWrapper): 'dim': (3, 5, 4) } self.inputs = { - 'X': np.random.randint(0, 2, - (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool") + 'X': + np.random.randint(0, 2, (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool") } self.outputs = {'Out': self.inputs['X'].all(axis=self.attrs['dim'])} @@ -57,6 +60,7 @@ class XPUTestReduceAllOp(XPUOpTestWrapper): pass class XPUTestReduceAllCase1(XPUTestReduceAllBase): + def set_case(self): self.op_type = 'reduce_all' self.attrs = { @@ -71,6 +75,7 @@ class XPUTestReduceAllOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'].all()} class XPUTestReduceAllCase2(XPUTestReduceAllBase): + def set_case(self): self.op_type = 'reduce_all' self.attrs = { @@ -80,12 +85,13 @@ class XPUTestReduceAllOp(XPUOpTestWrapper): 'dim': (3, 6) } self.inputs = { - 'X': np.random.randint(0, 2, - (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool") + 'X': + np.random.randint(0, 2, (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool") } self.outputs = {'Out': self.inputs['X'].all(axis=self.attrs['dim'])} class XPUTestReduceAllCase3(XPUTestReduceAllBase): + def set_case(self): self.op_type = 'reduce_all' self.attrs = { @@ -98,8 +104,7 @@ class XPUTestReduceAllOp(XPUOpTestWrapper): 'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool") } self.outputs = { - 'Out': np.expand_dims( - self.inputs['X'].all(axis=1), axis=1) + 'Out': np.expand_dims(self.inputs['X'].all(axis=1), axis=1) } diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py index 1dd7b42e5eb..ac827b6738f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -28,10 +29,12 @@ paddle.enable_static() class XPUTestReduceMaxOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'reduce_max' class XPUTestReduceMaxBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -49,8 +52,9 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'].max()} else: self.outputs = { - 'Out': self.inputs['X'].max(axis=self.axis, - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].max(axis=self.axis, + keepdims=self.attrs['keep_dim']) } def init_case(self): @@ -66,6 +70,7 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper): pass class XPUTestReduceMaxCase1(XPUTestReduceMaxBase): + def init_case(self): self.shape = (5, 6, 10) self.axis = (0, ) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py index 18a588b1b88..ef483870c68 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest, skip_check_grad_ci import paddle @@ -27,6 +28,7 @@ from paddle.fluid.framework import convert_np_dtype_to_dtype_ class TestMeanOp(OpTest): + def setUp(self): self.op_type = "reduce_mean" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")} @@ -43,6 +45,7 @@ class TestMeanOp(OpTest): class TestMeanOp5D(OpTest): + def setUp(self): self.op_type = "reduce_mean" self.inputs = { @@ -61,6 +64,7 @@ class TestMeanOp5D(OpTest): class TestMeanOp6D(OpTest): + def setUp(self): self.op_type = "reduce_mean" self.inputs = { @@ -79,6 +83,7 @@ class TestMeanOp6D(OpTest): class TestMeanOp8D(OpTest): + def setUp(self): self.op_type = "reduce_mean" self.inputs = { @@ -97,6 +102,7 @@ class TestMeanOp8D(OpTest): class Test1DReduce(OpTest): + def setUp(self): self.op_type = "reduce_mean" self.inputs = {'X': np.random.random(120).astype("float32")} @@ -113,6 +119,7 @@ class Test1DReduce(OpTest): class Test2DReduce0(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [0], 'use_xpu': True} @@ -121,6 +128,7 @@ class Test2DReduce0(Test1DReduce): class Test2DReduce1(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [1], 'use_xpu': True} @@ -131,6 +139,7 @@ class Test2DReduce1(Test1DReduce): class Test3DReduce0(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [1], 'use_xpu': True} @@ -141,6 +150,7 @@ class Test3DReduce0(Test1DReduce): class Test3DReduce1(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [2], 'use_xpu': True} @@ -151,6 +161,7 @@ class Test3DReduce1(Test1DReduce): class Test3DReduce2(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [-2], 'use_xpu': True} @@ -161,6 +172,7 @@ class Test3DReduce2(Test1DReduce): class Test3DReduce3(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [1, 2], 'use_xpu': True} @@ -171,17 +183,20 @@ class Test3DReduce3(Test1DReduce): class TestKeepDimReduce(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")} self.attrs = {'dim': [1], 'keep_dim': True, 'use_xpu': True} self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].mean(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } class TestKeepDim8DReduce(Test1DReduce): + def setUp(self): self.op_type = "reduce_mean" self.inputs = { @@ -189,8 +204,9 @@ class TestKeepDim8DReduce(Test1DReduce): } self.attrs = {'dim': (3, 4, 5), 'keep_dim': True, 'use_xpu': True} self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].mean(axis=tuple(self.attrs['dim']), + keepdims=self.attrs['keep_dim']) } diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py index cf77ea09a58..85a12bea3be 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -28,10 +29,12 @@ paddle.enable_static() class XPUTestReduceMinOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'reduce_min' class XPUTestReduceMinBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -49,8 +52,9 @@ class XPUTestReduceMinOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'].min()} else: self.outputs = { - 'Out': self.inputs['X'].min(axis=self.axis, - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].min(axis=self.axis, + keepdims=self.attrs['keep_dim']) } def init_case(self): @@ -66,6 +70,7 @@ class XPUTestReduceMinOp(XPUOpTestWrapper): pass class XPUTestReduceMinCase1(XPUTestReduceMinBase): + def init_case(self): self.shape = (5, 6, 10) self.axis = (0, ) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py index b621cb59c0e..155adaa37c0 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -29,11 +30,13 @@ paddle.enable_static() class XPUTestReduceProdOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'reduce_prod' self.use_dynamic_create_class = False class TestXPUReduceProdOp(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_dtype() @@ -52,8 +55,9 @@ class XPUTestReduceProdOP(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'].prod()} else: self.outputs = { - 'Out': self.inputs['X'].prod( - axis=self.axis, keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].prod(axis=self.axis, + keepdims=self.attrs['keep_dim']) } def initTestCase(self): @@ -70,68 +74,81 @@ class XPUTestReduceProdOP(XPUOpTestWrapper): self.check_grad_with_place(self.place, ['X'], 'Out') class TestProdOp5D(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (1, 2, 5, 6, 10) self.axis = (0, ) class TestProdOp6D(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (1, 1, 2, 5, 6, 10) self.axis = (0, ) class TestProdOp8D(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (1, 3, 1, 2, 1, 4, 3, 10) self.axis = (0, 3) class Test1DReduce(TestXPUReduceProdOp): + def initTestCase(self): self.shape = 120 self.axis = (0, ) class Test2DReduce0(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (20, 10) self.axis = (0, ) class Test2DReduce1(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (20, 10) self.axis = (1, ) class Test3DReduce0(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (1, ) class Test3DReduce1(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (2, ) class Test3DReduce2(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (-2, ) class Test3DReduce3(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (5, 6, 7) self.axis = (1, 2) class TestKeepDimReduce(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (5, 6, 10) self.axis = (1, ) self.keep_dim = True class TestKeepDim8DReduce(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (2, 5, 3, 2, 2, 3, 4, 2) self.axis = (3, 4, 5) self.keep_dim = True class TestReduceAll(TestXPUReduceProdOp): + def initTestCase(self): self.shape = (5, 6, 2, 10) self.axis = (0, ) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py index 9f42a509624..d80fd187dfd 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -28,10 +29,12 @@ paddle.enable_static() class XPUTestReduceSumOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'reduce_sum' class XPUTestReduceSumBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_case() @@ -49,8 +52,9 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): self.outputs = {'Out': self.inputs['X'].sum()} else: self.outputs = { - 'Out': self.inputs['X'].sum(axis=self.axis, - keepdims=self.attrs['keep_dim']) + 'Out': + self.inputs['X'].sum(axis=self.axis, + keepdims=self.attrs['keep_dim']) } def init_case(self): @@ -66,6 +70,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): pass class XPUTestReduceSumCase1(XPUTestReduceSumBase): + def init_case(self): self.shape = (5, 6, 10) self.axis = (0, ) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py index 9d1a5ca1fbd..9b71482fcc6 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -43,6 +44,7 @@ def huber_loss_forward(val, delta): # 1.动态生成不同参数的测试case,wrapper类中必须实现dynamic_create_class方法 # self.use_dynamic_create_class置为True class XPUTestArgsortOp1(XPUOpTestWrapper): + def __init__(self): self.op_name = 'argsort' self.use_dynamic_create_class = True @@ -59,6 +61,7 @@ class XPUTestArgsortOp1(XPUOpTestWrapper): return base_class, classes class TestArgsortOp(XPUOpTest): + def setUp(self): self.op_type = "argsort" self.place = paddle.XPUPlace(0) @@ -73,9 +76,10 @@ class XPUTestArgsortOp1(XPUOpTestWrapper): if self.in_type == np.float32: self.x = np.random.random(self.input_shape).astype(self.dtype) else: - self.x = np.random.randint( - low=-1000, high=1000, - size=self.input_shape).astype(self.dtype) + self.x = np.random.randint(low=-1000, + high=1000, + size=self.input_shape).astype( + self.dtype) self.inputs = {"X": self.x} self.attrs = {"axis": self.axis, "descending": self.descending} self.get_output() @@ -84,15 +88,14 @@ class XPUTestArgsortOp1(XPUOpTestWrapper): def get_output(self): if self.descending: self.indices = np.flip( - np.argsort( - self.x, kind='heapsort', axis=self.axis), + np.argsort(self.x, kind='heapsort', axis=self.axis), self.axis) self.sorted_x = np.flip( - np.sort( - self.x, kind='heapsort', axis=self.axis), self.axis) + np.sort(self.x, kind='heapsort', axis=self.axis), self.axis) else: - self.indices = np.argsort( - self.x, kind='heapsort', axis=self.axis) + self.indices = np.argsort(self.x, + kind='heapsort', + axis=self.axis) self.sorted_x = np.sort(self.x, kind='heapsort', axis=self.axis) def test_check_output(self): @@ -101,11 +104,13 @@ class XPUTestArgsortOp1(XPUOpTestWrapper): # 2. 为不同参数的测试case定义一个测试类,self.use_dynamic_create_class需要置为False class XPUTestArgsortOp2(XPUOpTestWrapper): + def __init__(self): self.op_name = 'argsort' self.use_dynamic_create_class = False class TestArgsortOp(XPUOpTest): + def setUp(self): self.op_type = "argsort" self.place = paddle.XPUPlace(0) @@ -119,9 +124,10 @@ class XPUTestArgsortOp2(XPUOpTestWrapper): if self.in_type == np.float32: self.x = np.random.random(self.input_shape).astype(self.dtype) else: - self.x = np.random.randint( - low=-1000, high=1000, - size=self.input_shape).astype(self.dtype) + self.x = np.random.randint(low=-1000, + high=1000, + size=self.input_shape).astype( + self.dtype) self.inputs = {"X": self.x} self.attrs = {"axis": self.axis, "descending": self.descending} self.get_output() @@ -130,15 +136,14 @@ class XPUTestArgsortOp2(XPUOpTestWrapper): def get_output(self): if self.descending: self.indices = np.flip( - np.argsort( - self.x, kind='heapsort', axis=self.axis), + np.argsort(self.x, kind='heapsort', axis=self.axis), self.axis) self.sorted_x = np.flip( - np.sort( - self.x, kind='heapsort', axis=self.axis), self.axis) + np.sort(self.x, kind='heapsort', axis=self.axis), self.axis) else: - self.indices = np.argsort( - self.x, kind='heapsort', axis=self.axis) + self.indices = np.argsort(self.x, + kind='heapsort', + axis=self.axis) self.sorted_x = np.sort(self.x, kind='heapsort', axis=self.axis) def init_inputshape(self): @@ -157,46 +162,57 @@ class XPUTestArgsortOp2(XPUOpTestWrapper): self.descending = False class TestArgsortOpAxis0XPU(TestArgsortOp): + def init_axis(self): self.axis = 0 class TestArgsortOpAxis1XPU(TestArgsortOp): + def init_axis(self): self.axis = 1 class TestArgsortOpAxis2XPU(TestArgsortOp): + def init_axis(self): self.axis = 2 class TestArgsortOpAxisNeg1XPU(TestArgsortOp): + def init_axis(self): self.axis = -1 class TestArgsortOpAxisNeg2XPU(TestArgsortOp): + def init_axis(self): self.axis = -2 class TestArgsortOpDescendingAxisXPU(TestArgsortOp): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis0XPU(TestArgsortOpAxis0XPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis1XPU(TestArgsortOpAxis1XPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxis2XPU(TestArgsortOpAxis2XPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg1XPU(TestArgsortOpAxisNeg1XPU): + def init_direction(self): self.descending = True class TestArgsortOpDescendingAxisNeg2XPU(TestArgsortOpAxisNeg2XPU): + def init_direction(self): self.descending = True @@ -208,11 +224,13 @@ for stype in support_types: class XPUTestHuberLossOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'huber_loss' self.use_dynamic_create_class = False class TestHuberLossOp(XPUOpTest): + def setUp(self): self.op_type = 'huber_loss' self.place = paddle.XPUPlace(0) @@ -252,22 +270,27 @@ class XPUTestHuberLossOp(XPUOpTestWrapper): self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): - self.check_grad_with_place( - self.place, ['Y'], 'Out', no_grad_set=set("residual")) + self.check_grad_with_place(self.place, ['Y'], + 'Out', + no_grad_set=set("residual")) def test_check_grad_ingore_y(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', no_grad_set=set('residual')) + self.check_grad_with_place(self.place, ['X'], + 'Out', + no_grad_set=set('residual')) class TestHuberLossOp1(TestHuberLossOp): + def set_shape(self): return (640) class TestHuberLossOp2(TestHuberLossOp): + def set_shape(self): return (10, 10) class TestHuberLossOp3(TestHuberLossOp): + def set_shape(self): return (10, 10, 1) @@ -275,11 +298,10 @@ class XPUTestHuberLossOp(XPUOpTestWrapper): support_types = get_xpu_op_support_types('huber_loss') for stype in support_types: create_test_class(globals(), XPUTestHuberLossOp, stype) - create_test_class( - globals(), - XPUTestHuberLossOp, - stype, - ignore_deivce_version=[core.XPUVersion.XPU1]) + create_test_class(globals(), + XPUTestHuberLossOp, + stype, + ignore_deivce_version=[core.XPUVersion.XPU1]) if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py index 0b000fc924a..2f7300d22c8 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import sys import unittest + sys.path.append("..") import paddle @@ -28,12 +29,14 @@ paddle.enable_static() class XPUTestReshapeOp(XPUOpTestWrapper): + def __init__(self): self.op_name = "reshape2" self.use_dynamic_create_class = False # situation 1: have shape( list, no tensor), no actual shape(Tensor) class TestReshapeOp(XPUOpTest): + def setUp(self): self.init_data() self.op_type = "reshape2" @@ -71,12 +74,14 @@ class XPUTestReshapeOp(XPUOpTestWrapper): self.check_grad_with_place(place, ["X"], "Out") class TestReshapeOpDimInfer1(TestReshapeOp): + def init_data(self): self.ori_shape = (5, 25) self.new_shape = (5, -1, 5) self.infered_shape = (5, -1, 5) class TestReshapeOpDimInfer2(TestReshapeOp): + def init_data(self): self.ori_shape = (10, 2, 6) self.new_shape = (10, 0, 3, -1) @@ -84,6 +89,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): # situation 2: have shape(list, no tensor), have actual shape(Tensor) class TestReshapeOpWithInputShape(TestReshapeOp): + def init_data(self): self.ori_shape = (6, 20) self.new_shape = (0, -1, 20) @@ -92,8 +98,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): def init_test_input(self): self.inputs = { "X": np.random.random(self.ori_shape).astype(self.dtype), - "Shape": np.array( - self.actual_shape, dtype="int32") + "Shape": np.array(self.actual_shape, dtype="int32") } def init_test_output(self): @@ -104,6 +109,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): # Situation 3: have shape(list, have tensor), no actual shape(Tensor) class TestReshapeOp_attr_ShapeTensor(TestReshapeOp): + def init_data(self): self.ori_shape = (4, 25) self.new_shape = (10, 10) @@ -124,16 +130,18 @@ class XPUTestReshapeOp(XPUOpTestWrapper): def init_attrs(self): self.attrs = {'shape': self.shape, "use_xpu": True} - class TestReshapeOpDimInfer1_attr_ShapeTensor( - TestReshapeOp_attr_ShapeTensor): + class TestReshapeOpDimInfer1_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor + ): + def init_data(self): self.ori_shape = (5, 20) self.new_shape = (5, -1, 20) self.infered_shape = (5, -1, 20) self.shape = (5, -1, -1) - class TestReshapeOpDimInfer2_attr_ShapeTensor( - TestReshapeOp_attr_ShapeTensor): + class TestReshapeOpDimInfer2_attr_ShapeTensor(TestReshapeOp_attr_ShapeTensor + ): + def init_data(self): self.ori_shape = (10, 2, 6) self.new_shape = (10, 0, 3, -1) @@ -142,6 +150,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): # Situation 4: have shape(Tensor), no actual shape(Tensor) class TestReshapeOp_attr_OnlyShape(TestReshapeOp): + def init_data(self): self.ori_shape = (4, 25) self.new_shape = (10, 10) @@ -150,14 +159,14 @@ class XPUTestReshapeOp(XPUOpTestWrapper): def init_test_input(self): self.inputs = { "X": np.random.random(self.ori_shape).astype(self.dtype), - "Shape": np.array( - self.new_shape, dtype="int32") + "Shape": np.array(self.new_shape, dtype="int32") } def init_attrs(self): self.attrs = {"use_xpu": True} class TestReshapeOpDimInfer1_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): + def init_data(self): self.ori_shape = (5, 20) self.new_shape = (5, -1, 10) @@ -165,6 +174,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): self.shape = (5, -1, -1) class TestReshapeOpDimInfer2_attr_OnlyShape(TestReshapeOp_attr_OnlyShape): + def init_data(self): self.ori_shape = (10, 2, 6) self.new_shape = (10, 0, 3, -1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py index a94a9d5541f..2e8853de44a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py index 84edbab1eac..af8532fd96a 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py @@ -13,6 +13,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -25,6 +26,7 @@ import random from op_test import OpTest from op_test_xpu import XPUOpTest + sys.path.append("../rnn") from rnn_numpy import SimpleRNN, LSTM, GRU from convert import get_params_for_net @@ -36,11 +38,13 @@ paddle.enable_static() class XPUTestRNNOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'rnn' self.use_dynamic_create_class = False class TestRNNOp(XPUOpTest): + def setUp(self): self.init_size() self.init_dtype() @@ -57,28 +61,27 @@ class XPUTestRNNOp(XPUOpTestWrapper): self.direction_num = 2 if self.is_bidirec else 1 direction = "bidirectional" if self.is_bidirec else "forward" - input = np.random.uniform( - low=-0.1, - high=0.1, - size=(self.seq_length, self.batch_size, - self.input_size)).astype(self.dtype) + input = np.random.uniform(low=-0.1, + high=0.1, + size=(self.seq_length, self.batch_size, + self.input_size)).astype(self.dtype) input[11][1:][:] = 0 input[10][2:][:] = 0 input[9][3:][:] = 0 input[8][4:][:] = 0 - rnn1 = LSTM( - self.input_size, - self.hidden_size, - num_layers=self.num_layers, - time_major=True, - direction=direction, - dropout=self.dropout, - dtype=self.dtype) + rnn1 = LSTM(self.input_size, + self.hidden_size, + num_layers=self.num_layers, + time_major=True, + direction=direction, + dropout=self.dropout, + dtype=self.dtype) flat_w = get_params_for_net(rnn1) - output, (last_hidden, last_cell) = rnn1( - input, sequence_length=self.sequence_length) + output, (last_hidden, + last_cell) = rnn1(input, + sequence_length=self.sequence_length) init_h = np.zeros( (self.num_layers * self.direction_num, self.batch_size, @@ -111,8 +114,8 @@ class XPUTestRNNOp(XPUOpTestWrapper): } self.outputs = { 'Out': output, - "State": - [('last_hidden', last_hidden), ('last_cell', last_cell)], + "State": [('last_hidden', last_hidden), + ('last_cell', last_cell)], 'Reserve': np.ndarray((400)).astype("uint8"), 'DropoutState': state_out } @@ -127,16 +130,14 @@ class XPUTestRNNOp(XPUOpTestWrapper): def test_check_output(self): self.check_output_with_place( - self.place, atol=0.01, - no_check_set=['Reserve', 'DropoutState']) + self.place, atol=0.01, no_check_set=['Reserve', 'DropoutState']) def test_grad(self): if not self.is_test: var_name_list = self.get_weight_names() grad_check_list = ['Input', 'init_h', 'init_c'] grad_check_list.extend(var_name_list) - self.check_grad_with_place(self.place, - set(grad_check_list), + self.check_grad_with_place(self.place, set(grad_check_list), ['Out', 'last_hidden', 'last_cell']) def init_size(self): @@ -159,36 +160,43 @@ class XPUTestRNNOp(XPUOpTestWrapper): pass class TestRNNOp1(TestRNNOp): + def set_attrs(self): self.sequence_length = None class TestRNNOp2(TestRNNOp): + def set_attrs(self): self.num_layers = 1 self.is_bidirec = True class TestRNNOp3(TestRNNOp): + def set_attrs(self): self.num_layers = 2 self.is_bidirec = False class TestRNNOp4(TestRNNOp): + def set_attrs(self): self.num_layers = 3 self.is_bidirec = False class TestRNNOp5(TestRNNOp): + def set_attrs(self): self.num_layers = 2 self.is_bidirec = True class TestRNNOp6(TestRNNOp): + def set_attrs(self): self.num_layers = 2 self.is_bidirec = True self.sequence_length = None class TestRNNOp7(TestRNNOp): + def set_attrs(self): self.num_layers = 3 self.is_bidirec = True @@ -196,11 +204,10 @@ class XPUTestRNNOp(XPUOpTestWrapper): support_types = get_xpu_op_support_types('rnn') for stype in support_types: - create_test_class( - globals(), - XPUTestRNNOp, - stype, - ignore_deivce_version=[core.XPUVersion.XPU1]) + create_test_class(globals(), + XPUTestRNNOp, + stype, + ignore_deivce_version=[core.XPUVersion.XPU1]) if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py index e80b1e4c50e..4c830b1e872 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import math @@ -27,6 +28,7 @@ from paddle.fluid import Program, program_guard class TestROIAlignOp(XPUOpTest): + def set_data(self): self.init_test_case() self.make_rois() @@ -73,8 +75,8 @@ class TestROIAlignOp(XPUOpTest): bilinear_pos = np.zeros( [self.channels, self.pooled_height, self.pooled_width, count, 4], np.float32) - bilinear_w = np.zeros( - [self.pooled_height, self.pooled_width, count, 4], np.float32) + bilinear_w = np.zeros([self.pooled_height, self.pooled_width, count, 4], + np.float32) for ph in range(self.pooled_width): for pw in range(self.pooled_height): c = 0 @@ -196,6 +198,7 @@ class TestROIAlignOp(XPUOpTest): class TestROIAlignInLodOp(TestROIAlignOp): + def set_data(self): self.init_test_case() self.make_rois() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py index b27eefb6a16..83642fa5420 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -31,11 +32,13 @@ from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, class XPUTestScaleOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'scale' self.use_dynamic_create_class = False class TestScaleOp(XPUOpTest): + def setUp(self): self.init_dtype() self.set_xpu() @@ -72,27 +75,33 @@ class XPUTestScaleOp(XPUOpTestWrapper): self.check_output_with_place(place) class TestScaleOp1(TestScaleOp): + def set_attrs(self): self.attrs = {'scale': 3.5} class TestScaleOp2(TestScaleOp): + def set_attrs(self): self.attrs = {'scale': 6.77} class TestScaleOp3(TestScaleOp): + def set_attrs(self): self.attrs = {'scale': -9.19} class TestScaleOp4(TestScaleOp): + def set_attrs(self): self.attrs = {'scale': 0.0} class TestScaleOp5(TestScaleOp): + def set_attrs(self): self.attrs = {'scale': -0.003} class TestScaleApiStatic(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): return paddle.scale(x, scale, bias) @@ -110,11 +119,13 @@ class TestScaleApiStatic(unittest.TestCase): class TestScaleInplaceApiStatic(TestScaleApiStatic): + def _executed_api(self, x, scale=1.0, bias=0.0): return x.scale_(scale, bias) class TestScaleApiDygraph(unittest.TestCase): + def _executed_api(self, x, scale=1.0, bias=0.0): return paddle.scale(x, scale, bias) @@ -128,6 +139,7 @@ class TestScaleApiDygraph(unittest.TestCase): class TestScaleInplaceApiDygraph(TestScaleApiDygraph): + def _executed_api(self, x, scale=1.0, bias=0.0): return x.scale_(scale, bias) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py index 68a39f3c001..9331ad73a67 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") import paddle @@ -27,6 +28,7 @@ paddle.enable_static() class XPUTestScatterOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'scatter' self.use_dynamic_create_class = True @@ -97,6 +99,7 @@ class XPUTestScatterOp(XPUOpTestWrapper): return base_class, classes class TestScatterOp(XPUOpTest): + def setUp(self): self.init_config() self.index_type = np.int32 if not hasattr( diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py index 99992170418..17abd1842f4 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py @@ -19,6 +19,7 @@ import numpy as np import paddle import random import sys + sys.path.append("../") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types @@ -52,8 +53,8 @@ def seqconv(x, [offset[i] - in_begin, offset[i + 1] - offset[i]]) if padding_trainable: sub_w = padding_data[j:j + pad_size, :] - col[offset[i]:offset[i] + pad_size, j * M:(j + 1) * - M] = sub_w + col[offset[i]:offset[i] + pad_size, + j * M:(j + 1) * M] = sub_w out_begin = offset[i] + pad_size in_begin = offset[i] @@ -64,8 +65,8 @@ def seqconv(x, sub_w = padding_data[begin_pad + context_start + j - pad_size:begin_pad + context_start + j, :] - col[offset[i + 1] - pad_size:offset[i + 1], j * M:(j + 1) * - M] = sub_w + col[offset[i + 1] - pad_size:offset[i + 1], + j * M:(j + 1) * M] = sub_w in_end = offset[i + 1] out_end = offset[i + 1] - pad_size if in_end <= in_begin: @@ -76,10 +77,12 @@ def seqconv(x, class XPUTestSequenceConv(XPUOpTestWrapper): + def __init__(self): self.op_name = 'sequence_conv' class TestSeqProject(XPUOpTest): + def setUp(self): self.init_test_case() self.op_type = 'sequence_conv' @@ -95,9 +98,9 @@ class XPUTestSequenceConv(XPUOpTestWrapper): return # one level, batch size - x = np.random.uniform(-6.10907e-05, 0.000104218, - [self.input_size[0], - self.input_size[1]]).astype(self.dtype) + x = np.random.uniform( + -6.10907e-05, 0.000104218, + [self.input_size[0], self.input_size[1]]).astype(self.dtype) w = np.random.uniform(-3.17068e-05, 0.000159822, [ self.context_length * self.input_size[1], self.output_represention @@ -143,27 +146,32 @@ class XPUTestSequenceConv(XPUOpTestWrapper): def test_check_grad_padding_data(self): if self.padding_trainable: - self.check_grad( - ['PaddingData'], 'Out', no_grad_set=set(['X', 'Filter'])) + self.check_grad(['PaddingData'], + 'Out', + no_grad_set=set(['X', 'Filter'])) def test_check_grad_Filter(self): - self.check_grad( - ['Filter'], 'Out', no_grad_set=set(self.inputs_val_no_f)) + self.check_grad(['Filter'], + 'Out', + no_grad_set=set(self.inputs_val_no_f)) def test_check_grad_input_filter(self): if self.padding_trainable: - self.check_grad( - ['X', 'Filter'], 'Out', no_grad_set=set(['PaddingData'])) + self.check_grad(['X', 'Filter'], + 'Out', + no_grad_set=set(['PaddingData'])) def test_check_grad_padding_input(self): if self.padding_trainable: - self.check_grad( - self.inputs_val_no_f, 'Out', no_grad_set=set(['Filter'])) + self.check_grad(self.inputs_val_no_f, + 'Out', + no_grad_set=set(['Filter'])) def test_check_grad_padding_filter(self): if self.padding_trainable: - self.check_grad( - self.inputs_val_no_x, 'Out', no_grad_set=set(['X'])) + self.check_grad(self.inputs_val_no_x, + 'Out', + no_grad_set=set(['X'])) def init_test_case(self): self.input_row = 7 @@ -182,6 +190,7 @@ class XPUTestSequenceConv(XPUOpTestWrapper): self.output_represention = 8 # output feature size class TestSeqProjectCase1(TestSeqProject): + def init_test_case(self): self.input_row = 11 self.context_start = -2 @@ -198,6 +207,7 @@ class XPUTestSequenceConv(XPUOpTestWrapper): self.output_represention = 8 # output feature size class TestSeqProjectCase2Len0(TestSeqProject): + def init_test_case(self): self.input_row = 11 self.context_start = -2 @@ -214,6 +224,7 @@ class XPUTestSequenceConv(XPUOpTestWrapper): self.output_represention = 8 # output feature size class TestSeqProjectCase3(TestSeqProject): + def init_test_case(self): self.input_row = 25 self.context_start = -2 @@ -233,6 +244,7 @@ class XPUTestSequenceConv(XPUOpTestWrapper): self.output_represention = 8 # output feature size class TestSeqProjectCase4(TestSeqProject): + def init_test_case(self): self.input_row = 7835 self.input_col = 128 @@ -270,12 +282,15 @@ for stype in support_types: class TestSeqConvApi(unittest.TestCase): + def test_api(self): import paddle.fluid as fluid x = fluid.layers.data('x', shape=[32], lod_level=1) - y = fluid.layers.sequence_conv( - input=x, num_filters=2, filter_size=3, padding_start=None) + y = fluid.layers.sequence_conv(input=x, + num_filters=2, + filter_size=3, + padding_start=None) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py index 67fd9f87120..e174d245332 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py @@ -18,6 +18,7 @@ import unittest import numpy as np import sys import os + sys.path.append("..") from op_test import OpTest import paddle @@ -30,11 +31,13 @@ from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, class XPUTestSgdOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'sgd' self.use_dynamic_create_class = False class TestSGDOp(XPUOpTest): + def setUp(self): self.op_type = "sgd" self.dtype = self.in_type @@ -54,6 +57,7 @@ class XPUTestSgdOp(XPUOpTestWrapper): self.check_output_with_place(paddle.XPUPlace(0)) class TestSGDOpCase8X(TestSGDOp): + def conf(self): self.h = 10 self.w = 64 @@ -65,10 +69,12 @@ for stype in support_types: class TestSGDOpWithLargeInput(unittest.TestCase): + def runTest(self): data = fluid.layers.fill_constant(shape=[1], value=128, dtype='int64') - label = fluid.layers.fill_constant( - shape=[1, 150], value=0.5, dtype='float32') + label = fluid.layers.fill_constant(shape=[1, 150], + value=0.5, + dtype='float32') emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32') out = fluid.layers.l2_normalize(x=emb, axis=-1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py index c7fa72ca770..23eb66f0ed0 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper @@ -28,11 +29,13 @@ paddle.enable_static() class XPUTestShapeOp(XPUOpTestWrapper): + def __init__(self): self.op_name = "shape" self.use_dynamic_create_class = False class TestShapeOp(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.op_type = "shape" @@ -50,26 +53,32 @@ class XPUTestShapeOp(XPUOpTestWrapper): self.check_output_with_place(place) class TestShapeOp1(TestShapeOp): + def config(self): self.shape = [2] class TestShapeOp2(TestShapeOp): + def config(self): self.shape = [1, 2, 3] class TestShapeOp3(TestShapeOp): + def config(self): self.shape = [1, 2, 3, 4] class TestShapeOp4(TestShapeOp): + def config(self): self.shape = [1, 2, 3, 4, 1024] class TestShapeOp5(TestShapeOp): + def config(self): self.shape = [1, 2, 3, 4, 1, 201] class TestShapeWithSelectedRows(unittest.TestCase): + def setUp(self): self.dtype = self.in_type diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py index 1aac42f2d63..accd489d596 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test_xpu import OpTest, XPUOpTest import paddle @@ -43,6 +44,7 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): self.use_dynamic_create_class = False class TestSigmoidCrossEntropyWithLogitsOp(XPUOpTest): + def setUp(self): self.set_xpu() self.op_type = "sigmoid_cross_entropy_with_logits" @@ -65,11 +67,13 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): batch_size = 64 num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype)), - 'Label': np.random.randint(0, 2, (batch_size, num_classes)) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, (batch_size, num_classes)).astype( + self.dtype)), + 'Label': + np.random.randint(0, 2, + (batch_size, num_classes)).astype(self.dtype) } self.attrs = {'num_classes': num_classes, 'batch_size': batch_size} @@ -98,11 +102,13 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): ignore_index = -1 self.ignore_index = ignore_index self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype)), - 'Label': np.random.randint(-1, 2, (batch_size, num_classes)) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, (batch_size, num_classes)).astype( + self.dtype)), + 'Label': + np.random.randint(-1, 2, + (batch_size, num_classes)).astype(self.dtype) } self.attrs = {'ignore_index': ignore_index} @@ -126,11 +132,13 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): batch_size = 64 num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype)), - 'Label': np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, (batch_size, num_classes)).astype( + self.dtype)), + 'Label': + np.random.uniform(0, 1, + (batch_size, num_classes)).astype(self.dtype) } self.attrs = {'num_classes': num_classes, 'batch_size': batch_size} @@ -154,11 +162,13 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): ignore_index = -1 self.ignore_index = ignore_index self.inputs = { - 'X': logit( - np.random.uniform(0, 1, (batch_size, num_classes)) - .astype(self.dtype)), - 'Label': np.random.randint(-1, 2, (batch_size, num_classes)) - .astype(self.dtype) + 'X': + logit( + np.random.uniform(0, 1, (batch_size, num_classes)).astype( + self.dtype)), + 'Label': + np.random.randint(-1, 2, + (batch_size, num_classes)).astype(self.dtype) } self.attrs = {'ignore_index': ignore_index, 'normalize': True} @@ -185,12 +195,14 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): batch_size = [10, 10] num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype)), + 'X': + logit( + np.random.uniform(0, 1, + tuple(batch_size + [num_classes])).astype( + self.dtype)), 'Label': - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype) + np.random.uniform(0, 1, tuple(batch_size + + [num_classes])).astype(self.dtype) } self.attrs = {'num_classes': num_classes, 'batch_size': batch_size} @@ -212,12 +224,14 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): batch_size = [10, 10] num_classes = 20 self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype)), + 'X': + logit( + np.random.uniform(0, 1, + tuple(batch_size + [num_classes])).astype( + self.dtype)), 'Label': - np.random.randint(0, 2, tuple(batch_size + [num_classes])) - .astype(self.dtype) + np.random.randint(0, 2, tuple(batch_size + + [num_classes])).astype(self.dtype) } self.attrs = {'num_classes': num_classes, 'batch_size': batch_size} @@ -241,12 +255,14 @@ class XPUTestSigmoidCrossEntropyWithLogitsOp(XPUOpTestWrapper): ignore_index = -1 self.ignore_index = ignore_index self.inputs = { - 'X': logit( - np.random.uniform(0, 1, tuple(batch_size + [num_classes])) - .astype(self.dtype)), + 'X': + logit( + np.random.uniform(0, 1, + tuple(batch_size + [num_classes])).astype( + self.dtype)), 'Label': - np.random.randint(-1, 2, tuple(batch_size + [num_classes])) - .astype(self.dtype) + np.random.randint( + -1, 2, tuple(batch_size + [num_classes])).astype(self.dtype) } self.attrs = {'ignore_index': ignore_index, 'normalize': True} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py index 9254a84ec42..c00e0b5217a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") import paddle @@ -29,11 +30,13 @@ paddle.enable_static() class XPUTestSignOP(XPUOpTestWrapper): + def __init__(self): self.op_name = 'sign' self.use_dynamic_create_class = False class TestSignOPBase(XPUOpTest): + def setUp(self): self.place = paddle.XPUPlace(0) self.init_dtype() @@ -62,18 +65,22 @@ class XPUTestSignOP(XPUOpTestWrapper): self.input_shape = [864] class XPUTestSign1(TestSignOPBase): + def init_config(self): self.input_shape = [2, 768] class XPUTestSign2(TestSignOPBase): + def init_config(self): self.input_shape = [3, 8, 4096] class XPUTestSign3(TestSignOPBase): + def init_config(self): self.input_shape = [1024] class XPUTestSign4(TestSignOPBase): + def init_config(self): self.input_shape = [2, 2, 255] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py index 3d7c9959db9..34823301737 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py @@ -16,6 +16,7 @@ import paddle import numpy as np import sys import unittest + sys.path.append("..") from op_test import OpTest from op_test_xpu import XPUOpTest @@ -27,11 +28,13 @@ paddle.enable_static() # Situation 1: starts(list, no tensor), ends(list, no tensor) # 1.1 without attr(decrease) class XPUTestSliceOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'slice' self.use_dynamic_create_class = False class TestSliceOp(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.place = paddle.XPUPlace(0) @@ -67,6 +70,7 @@ class XPUTestSliceOp(XPUOpTestWrapper): user_defined_grad_outputs=user_defined_grad_outputs) class TestCase1(TestSliceOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-3, 0, 2] @@ -76,6 +80,7 @@ class XPUTestSliceOp(XPUOpTestWrapper): self.out = self.input[-3:3, 0:100, 2:-1, :] class TestCase2(TestSliceOp): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-3, 0, 2] @@ -87,11 +92,13 @@ class XPUTestSliceOp(XPUOpTestWrapper): # 1.2 with attr(decrease) class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): + def __init__(self): self.op_name = 'slice' self.use_dynamic_create_class = False class TestSliceOp_decs_dim(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.place = paddle.XPUPlace(0) @@ -132,6 +139,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): user_defined_grad_outputs=user_defined_grad_outputs) class TestSliceOp_decs_dim_2(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [1, 0, 2] @@ -142,6 +150,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.out = self.input[1, 0, 2:4, :] class TestSliceOp_decs_dim_3(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-1, 0, 2] @@ -152,6 +161,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.out = self.input[-1, 0, 2:4, :] class TestSliceOp_decs_dim_4(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 7]).astype(self.dtype) self.starts = [0, 1, 2, 3] @@ -162,6 +172,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.out = self.input[0, 1, 2, 3:4] class TestSliceOp_decs_dim_5(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [-1] @@ -172,6 +183,7 @@ class XPUTestSliceOp_decs_dim(XPUOpTestWrapper): self.out = self.input[:, :, :, -1] class TestSliceOp_decs_dim_6(TestSliceOp_decs_dim): + def config(self): self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) self.starts = [0, 1, 2, 3] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py index aa56a463b90..a4997c91ffb 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py @@ -16,6 +16,7 @@ import paddle import numpy as np import sys import unittest + sys.path.append("..") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper @@ -43,6 +44,7 @@ def ref_softmax(x, axis=None, dtype=None): class XPUTestSoftmaxOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'softmax' self.use_dynamic_create_class = True @@ -61,6 +63,7 @@ class XPUTestSoftmaxOp(XPUOpTestWrapper): return base_class, classes class TestSoftmaxOp(XPUOpTest): + def setUp(self): self.op_type = "softmax" if not hasattr(self, 'shape'): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py index 59907fe9f68..661f1170418 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") from test_softmax_op import stable_softmax @@ -47,6 +48,7 @@ def cross_entropy(softmax, label, soft_label, axis, ignore_index=-1): class XPUTestSoftmaxWithCrossEntropyOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'softmax_with_cross_entropy' self.use_dynamic_create_class = True @@ -106,8 +108,10 @@ class XPUTestSoftmaxWithCrossEntropyOp(XPUOpTestWrapper): else: axis_dim = self.shape[self.axis] self.shape[self.axis] = 1 - labels = np.random.randint( - 0, axis_dim, self.shape, dtype="int64") + labels = np.random.randint(0, + axis_dim, + self.shape, + dtype="int64") loss = cross_entropy(softmax, labels, self.soft_label, self.axis, self.ignore_index) @@ -136,8 +140,9 @@ class XPUTestSoftmaxWithCrossEntropyOp(XPUOpTestWrapper): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, ["Logits"], "Loss", max_relative_error=0.2) + self.check_grad_with_place(place, ["Logits"], + "Loss", + max_relative_error=0.2) support_types = get_xpu_op_support_types('softmax_with_cross_entropy') diff --git a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py index 1b8bf64a0de..a27d94e7399 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -30,12 +31,14 @@ paddle.enable_static() class XPUTestSplitOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'split' self.use_dynamic_create_class = False # test with attr(num) class TestSplitOp(XPUOpTest): + def setUp(self): self.init_dtype() self.__class__.use_xpu = True @@ -68,6 +71,7 @@ class XPUTestSplitOp(XPUOpTestWrapper): # unknown sections class TestSplitOp1(TestSplitOp): + def initParameters(self): self.x = np.random.random((4, 5, 6)).astype(self.dtype) self.axis = 2 @@ -77,6 +81,7 @@ class XPUTestSplitOp(XPUOpTestWrapper): # test with int32 class TestSplitOp2(TestSplitOp): + def initParameters(self): self.x = np.random.random((4, 5, 6)).astype(np.int32) self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py index 705e7c4cb0f..cdc67cf4647 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -28,11 +29,13 @@ paddle.enable_static() class XPUTestSqueeze2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = "squeeze2" self.use_dynamic_create_class = False class TestSqueeze2Op(XPUOpTest): + def setUp(self): self.op_type = "squeeze2" self.use_mkldnn = False @@ -78,6 +81,7 @@ class XPUTestSqueeze2Op(XPUOpTestWrapper): # Correct: There is mins axis. class TestSqueeze2Op1(TestSqueeze2Op): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = (0, -2) @@ -85,13 +89,15 @@ class XPUTestSqueeze2Op(XPUOpTestWrapper): # Correct: No axes input. class TestSqueeze2Op2(TestSqueeze2Op): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) - # Correct: Just part of axes be squeezed. + # Correct: Just part of axes be squeezed. class TestSqueeze2Op3(TestSqueeze2Op): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py index de701bfc513..b766b6e3c00 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -30,6 +31,7 @@ paddle.enable_static() # Correct: General. class TestSqueezeOp(XPUOpTest): + def setUp(self): self.op_type = "squeeze" self.use_xpu = True @@ -37,7 +39,9 @@ class TestSqueezeOp(XPUOpTest): self.init_test_case() self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")} self.init_attrs() - self.outputs = {"Out": self.inputs["X"].reshape(self.new_shape), } + self.outputs = { + "Out": self.inputs["X"].reshape(self.new_shape), + } def test_check_output(self): if paddle.is_compiled_with_xpu(): @@ -60,6 +64,7 @@ class TestSqueezeOp(XPUOpTest): # Correct: There is mins axis. class TestSqueezeOp1(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 3, 1, 40) self.axes = (0, -2) @@ -68,14 +73,16 @@ class TestSqueezeOp1(TestSqueezeOp): # Correct: No axes input. class TestSqueezeOp2(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (1, 20, 1, 5) self.axes = () self.new_shape = (20, 5) -# Correct: Just part of axes be squeezed. +# Correct: Just part of axes be squeezed. class TestSqueezeOp3(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, -1) @@ -84,6 +91,7 @@ class TestSqueezeOp3(TestSqueezeOp): # Correct: The demension of axis is not of size 1 remains unchanged. class TestSqueezeOp4(TestSqueezeOp): + def init_test_case(self): self.ori_shape = (6, 1, 5, 1, 4, 1) self.axes = (1, 2) @@ -91,12 +99,13 @@ class TestSqueezeOp4(TestSqueezeOp): class TestSqueezeOpError(unittest.TestCase): + def test_errors(self): paddle.enable_static() with program_guard(Program(), Program()): # The input type of softmax_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], paddle.XPUPlace(0)) + x1 = fluid.create_lod_tensor(np.array([[-1]]), [[1]], + paddle.XPUPlace(0)) self.assertRaises(TypeError, paddle.squeeze, x1) # The input axes of squeeze must be list. x2 = paddle.static.data(name='x2', shape=[4], dtype="int32") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py index 86126f976ab..b6d547e7059 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -30,12 +31,14 @@ paddle.enable_static() class XPUTestStackOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'stack' self.use_dynamic_create_class = False @skip_check_grad_ci(reason="There is no grad kernel for stack_xpu op.") class TestStackOp(XPUOpTest): + def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) @@ -80,18 +83,21 @@ class XPUTestStackOp(XPUOpTestWrapper): if self.dtype == np.int32 or self.dtype == np.int64: pass else: - self.check_grad_with_place( - paddle.XPUPlace(0), self.get_x_names(), 'Y') + self.check_grad_with_place(paddle.XPUPlace(0), + self.get_x_names(), 'Y') class TestStackOp1(TestStackOp): + def initParameters(self): self.num_inputs = 16 class TestStackOp2(TestStackOp): + def initParameters(self): self.num_inputs = 30 class TestStackOp3(TestStackOp): + def initParameters(self): self.axis = -1 @@ -99,6 +105,7 @@ class XPUTestStackOp(XPUOpTestWrapper): pass class TestStackOp4(TestStackOp): + def initParameters(self): self.axis = -4 @@ -106,14 +113,17 @@ class XPUTestStackOp(XPUOpTestWrapper): pass class TestStackOp5(TestStackOp): + def initParameters(self): self.axis = 1 class TestStackOp6(TestStackOp): + def initParameters(self): self.axis = 3 class TestStackOp7(TestStackOp): + def initParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) @@ -124,6 +134,7 @@ class XPUTestStackOp(XPUOpTestWrapper): pass class TestStackOp8(TestStackOp): + def initParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py index 8ab556efd42..61f7bcda08c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py @@ -14,6 +14,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy as np @@ -23,14 +24,16 @@ from paddle import enable_static import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.op import Operator -from paddle.fluid.tests.unittests.op_test import ( - OpTest, convert_float_to_uint16, convert_uint16_to_float) +from paddle.fluid.tests.unittests.op_test import (OpTest, + convert_float_to_uint16, + convert_uint16_to_float) from paddle import _C_ops paddle.enable_static() class TestSumOp(XPUOpTest): + def setUp(self): self.op_type = "sum" self.init_kernel_type() @@ -54,6 +57,7 @@ class TestSumOp(XPUOpTest): #----------- test fp16 ----------- class TestFP16SumOp(TestSumOp): + def init_kernel_type(self): self.dtype = np.float16 @@ -67,12 +71,15 @@ class TestFP16SumOp(TestSumOp): def test_check_grad(self): place = core.XPUPlace(0) # if core.is_float16_supported(place): - self.check_grad_with_place( - place, ['x0'], 'Out', max_relative_error=0.15) + self.check_grad_with_place(place, ['x0'], + 'Out', + max_relative_error=0.15) def create_test_sum_fp16_class(parent): + class TestSumFp16Case(parent): + def init_kernel_type(self): self.dtype = np.float16 @@ -88,12 +95,15 @@ def create_test_sum_fp16_class(parent): class API_Test_Add_n(unittest.TestCase): + def test_api(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - input0 = fluid.layers.fill_constant( - shape=[2, 3], dtype='int64', value=5) - input1 = fluid.layers.fill_constant( - shape=[2, 3], dtype='int64', value=3) + input0 = fluid.layers.fill_constant(shape=[2, 3], + dtype='int64', + value=5) + input1 = fluid.layers.fill_constant(shape=[2, 3], + dtype='int64', + value=3) expected_result = np.empty((2, 3)) expected_result.fill(8) sum_value = paddle.add_n([input0, input1]) @@ -112,7 +122,9 @@ class API_Test_Add_n(unittest.TestCase): class TestRaiseSumError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.sum([11, 22]) @@ -133,7 +145,9 @@ class TestRaiseSumError(unittest.TestCase): class TestRaiseSumsError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.sums([11, 22]) @@ -169,7 +183,9 @@ class TestRaiseSumsError(unittest.TestCase): class TestSumOpError(unittest.TestCase): + def test_errors(self): + def test_empty_list_input(): with fluid.dygraph.guard(): fluid._C_ops.sum([]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py index cd18bd63a88..163c5628e74 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test import OpTest from op_test_xpu import XPUOpTest @@ -32,11 +33,13 @@ np.random.seed(10) #Situation 1: repeat_times is a list (without tensor) class XPUTestTileOpRank1(XPUOpTestWrapper): + def __init__(self): self.op_name = 'tile' self.use_dynamic_create_class = False class TestTileOpRank1(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.__class__.no_need_check_grad = True @@ -59,31 +62,37 @@ class XPUTestTileOpRank1(XPUOpTestWrapper): #with dimension expanding class TestTileOpRank2Expanding(TestTileOpRank1): + def init_data(self): self.ori_shape = [120] self.repeat_times = [2, 2] class TestTileOpRank2(TestTileOpRank1): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] class TestTileOpRank3_Corner(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.repeat_times = (1, 1, 1) class TestTileOpRank3_Corner2(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 10, 5) self.repeat_times = (2, 2) class TestTileOpRank3(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 4, 15) self.repeat_times = (2, 1, 4) class TestTileOpRank4(TestTileOpRank1): + def init_data(self): self.ori_shape = (2, 4, 5, 7) self.repeat_times = (3, 2, 1, 2) @@ -91,11 +100,13 @@ class XPUTestTileOpRank1(XPUOpTestWrapper): # Situation 2: repeat_times is a list (with tensor) class XPUTestTileOpRank1_tensor_attr(XPUOpTestWrapper): + def __init__(self): self.op_name = 'tile' self.use_dynamic_create_class = False class TestTileOpRank1_tensor_attr(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.__class__.no_need_check_grad = True @@ -124,12 +135,14 @@ class XPUTestTileOpRank1_tensor_attr(XPUOpTestWrapper): self.check_output_with_place(self.place) class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [1, 1] self.infer_repeat_times = [1, -1] class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] @@ -138,11 +151,13 @@ class XPUTestTileOpRank1_tensor_attr(XPUOpTestWrapper): # Situation 3: repeat_times is a tensor class XPUTestTileOpRank1_tensor(XPUOpTestWrapper): + def __init__(self): self.op_name = 'tile' self.use_dynamic_create_class = False class TestTileOpRank1_tensor(XPUOpTest): + def setUp(self): self.dtype = self.in_type self.__class__.no_need_check_grad = True @@ -166,6 +181,7 @@ class XPUTestTileOpRank1_tensor(XPUOpTestWrapper): self.check_output_with_place(self.place) class TestTileOpRank2_tensor(TestTileOpRank1_tensor): + def init_data(self): self.ori_shape = [12, 14] self.repeat_times = [2, 3] @@ -180,6 +196,7 @@ for stype in support_types: # Test python API class TestTileAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(paddle.XPUPlace(0)): np_x = np.random.random([12, 14]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py index c4418bd55c1..1fa4a5e8b7d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from paddle.fluid.op import Operator import paddle.fluid.core as core @@ -29,6 +30,7 @@ paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") class TestTopkOp(OpTest): + def setUp(self): self.variable_k = False self.use_xpu = True diff --git a/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py index 71895db4ae9..0a3bd54a593 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np import sys + sys.path.append("..") from op_test_xpu import XPUOpTest import paddle @@ -43,11 +44,13 @@ def numpy_topk(x, k=1, axis=-1, largest=True): class XPUTestTopKV2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = 'top_k_v2' self.use_dynamic_create_class = False class TestTopkOp(XPUOpTest): + def init_args(self): self.k = 3 self.axis = 1 @@ -64,8 +67,10 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): 'axis': self.axis, 'largest': self.largest } - output, indices = numpy_topk( - self.input_data, axis=self.axis, k=self.k, largest=self.largest) + output, indices = numpy_topk(self.input_data, + axis=self.axis, + k=self.k, + largest=self.largest) self.outputs = {'Out': output, 'Indices': indices} def test_check_output(self): @@ -79,6 +84,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.check_grad(set(['X']), 'Out') class TestTopkOp1(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -86,6 +92,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(100, 155).astype(self.dtype) class TestTopkOp2(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -93,6 +100,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp3(TestTopkOp): + def init_args(self): self.k = 5 self.axis = 1 @@ -100,6 +108,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp4(TestTopkOp): + def init_args(self): self.k = 1 self.axis = 1 @@ -107,6 +116,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp5(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 2 @@ -114,6 +124,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp6(TestTopkOp): + def init_args(self): self.k = 5 self.axis = 1 @@ -121,6 +132,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(8, 32, 64).astype(self.dtype) class TestTopkOp7(TestTopkOp): + def init_args(self): self.k = 10 self.axis = 2 @@ -128,6 +140,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(8, 5, 10, 16).astype(self.dtype) class TestTopkOp8(TestTopkOp): + def init_args(self): self.k = 1 self.axis = 1 @@ -135,6 +148,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(8, 32, 64).astype(self.dtype) class TestTopkOp9(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -142,6 +156,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp10(TestTopkOp): + def init_args(self): self.k = 3 self.axis = 1 @@ -149,6 +164,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp11(TestTopkOp): + def init_args(self): self.k = 5 self.axis = 1 @@ -156,6 +172,7 @@ class XPUTestTopKV2Op(XPUOpTestWrapper): self.input_data = np.random.rand(10, 10, 5).astype(self.dtype) class TestTopkOp12(TestTopkOp): + def init_args(self): self.k = 1 self.axis = 1 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py index 41df4481e2d..b3a1a636e8a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py @@ -27,6 +27,7 @@ from paddle.fluid import compiler, Program, program_guard class TestXPUTransposeOp(XPUOpTest): + def setUp(self): self.init_op_type() self.initTestCase() @@ -65,60 +66,70 @@ class TestXPUTransposeOp(XPUOpTest): class TestCase0(TestXPUTransposeOp): + def initTestCase(self): self.shape = (100, ) self.axis = (0, ) class TestCase1(TestXPUTransposeOp): + def initTestCase(self): self.shape = (3, 4, 10) self.axis = (0, 2, 1) class TestCase2(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5) self.axis = (0, 2, 3, 1) class TestCase3(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6) self.axis = (4, 2, 3, 1, 0) class TestCase4(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 3, 4, 5, 6, 1) self.axis = (4, 2, 3, 1, 0, 5) class TestCase5(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 16, 96) self.axis = (0, 2, 1) class TestCase6(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 10, 12, 16) self.axis = (3, 1, 2, 0) class TestCase7(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 10, 2, 16) self.axis = (0, 1, 3, 2) class TestCase8(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (0, 1, 3, 2, 4, 5, 6, 7) class TestCase9(TestXPUTransposeOp): + def initTestCase(self): self.shape = (2, 3, 2, 3, 2, 4, 3, 3) self.axis = (6, 1, 3, 5, 0, 2, 4, 7) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py index ee689efbb38..28fff5981b7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py @@ -13,6 +13,7 @@ from __future__ import print_function import sys + sys.path.append("..") import paddle @@ -30,11 +31,13 @@ paddle.enable_static() class XPUTestTrilTriuOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'tril_triu' self.use_dynamic_create_class = False class TestTrilTriuOp(XPUOpTest): + def setUp(self): self.init_dtype() self.initTestCase() @@ -44,9 +47,9 @@ class XPUTestTrilTriuOp(XPUOpTestWrapper): self.op_type = "tril_triu" self.place = paddle.XPUPlace(0) if self.dtype == np.int32: - self.X = np.arange( - 1, self.get_Xshape_prod() + 1, - dtype=self.dtype).reshape(self.Xshape) + self.X = np.arange(1, + self.get_Xshape_prod() + 1, + dtype=self.dtype).reshape(self.Xshape) else: self.X = np.random.random(self.Xshape).astype(dtype=self.dtype) self.inputs = {'X': self.X} @@ -55,7 +58,8 @@ class XPUTestTrilTriuOp(XPUOpTestWrapper): 'lower': True if self.real_op_type == 'tril' else False, } self.outputs = { - 'Out': self.real_np_op(self.X, self.diagonal) + 'Out': + self.real_np_op(self.X, self.diagonal) if self.diagonal else self.real_np_op(self.X) } @@ -92,42 +96,50 @@ class XPUTestTrilTriuOp(XPUOpTestWrapper): self.Xshape = (10, 10) class TestTrilTriuOp1(TestTrilTriuOp): + def initTestCase(self): self.diagonal = -3 self.Xshape = (5, 5) class TestTrilTriuOp2(TestTrilTriuOp): + def initTestCase(self): self.diagonal = 4 self.Xshape = (11, 17) class TestTrilTriuOp3(TestTrilTriuOp): + def initTestCase(self): self.diagonal = 10 self.Xshape = (2, 25, 25) class TestTrilTriuOp4(TestTrilTriuOp): + def initTestCase(self): self.diagonal = -10 self.Xshape = (1, 2, 33, 11) class TestTrilTriuOp5(TestTrilTriuOp): + def initTestCase(self): self.diagonal = 11 self.Xshape = (1, 1, 99) class TestTrilTriuOp6(TestTrilTriuOp): + def initTestCase(self): self.diagonal = 5 self.Xshape = (1, 2, 3, 5, 99) class TestTrilTriuOp7(TestTrilTriuOp): + def initTestCase(self): self.diagonal = -100 self.Xshape = (2, 2, 3, 4, 5) class TestTrilTriuOpError(unittest.TestCase): + def test_errors1(self): paddle.enable_static() data = fluid.data(shape=(20, 22), dtype='float32', name="data1") @@ -137,8 +149,8 @@ class TestTrilTriuOpError(unittest.TestCase): "diagonal in {} must be a python Int".format(op_type), } expected = list(errmsg.keys())[0] - with self.assertRaisesRegex( - eval(expected.split(':')[-1]), errmsg[expected]): + with self.assertRaisesRegex(eval(expected.split(':')[-1]), + errmsg[expected]): getattr(tensor, op_type)(x=data, diagonal='2022') def test_errors2(self): @@ -150,8 +162,8 @@ class TestTrilTriuOpError(unittest.TestCase): "x shape in {} must be at least 2-D".format(op_type), } expected = list(errmsg.keys())[0] - with self.assertRaisesRegex( - eval(expected.split(':')[-1]), errmsg[expected]): + with self.assertRaisesRegex(eval(expected.split(':')[-1]), + errmsg[expected]): getattr(tensor, op_type)(x=data, diagonal=[None]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py index d096cb8ec13..f9ccf0576a2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import unittest import numpy @@ -30,6 +31,7 @@ paddle.enable_static() class TestXPUTrunctedGaussianRandomOp(TestTrunctedGaussianRandomOp): + def test_xpu(self): if paddle.is_compiled_with_xpu(): self.gaussian_random_test(place=fluid.XPUPlace(0)) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py index ab59fd26656..d28029d1883 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import sys + sys.path.append("..") import subprocess import unittest @@ -31,6 +32,7 @@ paddle.enable_static() class TestXPUUniformRandomOp(TestUniformRandomOp): + def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) @@ -41,6 +43,7 @@ class TestXPUUniformRandomOp(TestUniformRandomOp): class TestXPUUniformRandomOpSelectedRows(TestUniformRandomOpSelectedRows): + def test_check_output(self): if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py index f6c540d6c2c..6daa4739412 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -29,11 +30,13 @@ paddle.enable_static() class XPUTestUnsqueeze2Op(XPUOpTestWrapper): + def __init__(self): self.op_name = "unsqueeze2" self.use_dynamic_create_class = False class TestUnsqueeze2Op(XPUOpTest): + def setUp(self): self.op_type = "unsqueeze2" self.use_mkldnn = False @@ -79,6 +82,7 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): # Correct: Single input index. class TestUnsqueeze2Op1(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -86,6 +90,7 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): # Correct: Mixed input axis. class TestUnsqueeze2Op2(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -93,6 +98,7 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): # Correct: There is duplicated axis. class TestUnsqueeze2Op3(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -100,6 +106,7 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): # Correct: Reversed axes. class TestUnsqueeze2Op4(TestUnsqueeze2Op): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -107,6 +114,7 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): # axes is a list(with tensor) class TestUnsqueeze2Op_AxesTensorList(XPUOpTest): + def setUp(self): self.op_type = "unsqueeze2" self.use_mkldnn = False @@ -151,24 +159,28 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): self.attrs = {} class TestUnsqueeze2Op1_AxesTensorList(TestUnsqueeze2Op_AxesTensorList): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) self.new_shape = (20, 5, 1) class TestUnsqueeze2Op2_AxesTensorList(TestUnsqueeze2Op_AxesTensorList): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) self.new_shape = (1, 20, 5, 1) class TestUnsqueeze2Op3_AxesTensorList(TestUnsqueeze2Op_AxesTensorList): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) self.new_shape = (1, 10, 2, 1, 1, 5) class TestUnsqueeze2Op4_AxesTensorList(TestUnsqueeze2Op_AxesTensorList): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) @@ -176,6 +188,7 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): # axes is a Tensor class TestUnsqueeze2Op_AxesTensor(XPUOpTest): + def setUp(self): self.op_type = "unsqueeze2" self.use_mkldnn = False @@ -215,24 +228,28 @@ class XPUTestUnsqueeze2Op(XPUOpTestWrapper): self.attrs = {} class TestUnsqueeze2Op1_AxesTensor(TestUnsqueeze2Op_AxesTensor): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) self.new_shape = (20, 5, 1) class TestUnsqueeze2Op2_AxesTensor(TestUnsqueeze2Op_AxesTensor): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) self.new_shape = (1, 20, 5, 1) class TestUnsqueeze2Op3_AxesTensor(TestUnsqueeze2Op_AxesTensor): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) self.new_shape = (1, 10, 2, 1, 1, 5) class TestUnsqueeze2Op4_AxesTensor(TestUnsqueeze2Op_AxesTensor): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py index 5e40073e731..9e505fe08a6 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest import sys + sys.path.append("..") import numpy as np @@ -29,6 +30,7 @@ paddle.enable_static() # Correct: General. class TestUnsqueezeOp(XPUOpTest): + def setUp(self): self.init_test_case() self.op_type = "unsqueeze" @@ -59,6 +61,7 @@ class TestUnsqueezeOp(XPUOpTest): # Correct: Single input index. class TestUnsqueezeOp1(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (-1, ) @@ -67,6 +70,7 @@ class TestUnsqueezeOp1(TestUnsqueezeOp): # Correct: Mixed input axis. class TestUnsqueezeOp2(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (20, 5) self.axes = (0, -1) @@ -75,6 +79,7 @@ class TestUnsqueezeOp2(TestUnsqueezeOp): # Correct: There is duplicated axis. class TestUnsqueezeOp3(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (0, 3, 3) @@ -83,6 +88,7 @@ class TestUnsqueezeOp3(TestUnsqueezeOp): # Correct: Reversed axes. class TestUnsqueezeOp4(TestUnsqueezeOp): + def init_test_case(self): self.ori_shape = (10, 2, 5) self.axes = (3, 1, 1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py index 33b13081b54..0aecc48fe35 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py @@ -14,6 +14,7 @@ import unittest import sys + sys.path.append("..") import numpy as np from op_test import OpTest @@ -26,6 +27,7 @@ paddle.enable_static() class TestUpdateLossScalingOp(XPUOpTest): + def setUp(self): self.op_type = "update_loss_scaling" self.init() @@ -69,6 +71,7 @@ class TestUpdateLossScalingOp(XPUOpTest): class TestUpdateLossScalingOpBad(TestUpdateLossScalingOp): + def setUp(self): self.op_type = "update_loss_scaling" self.init() @@ -101,17 +104,21 @@ class TestUpdateLossScalingOpBad(TestUpdateLossScalingOp): class TestUpdateLossScalingLayer(unittest.TestCase): + def loss_scaling_check(self, scope=fluid.Scope()): a = fluid.data(name="a", shape=[1024, 1024], dtype='float32') b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') - prev_loss_scaling = fluid.data( - name="prev_loss_scaling", shape=[1], dtype='float32') - num_good_steps = fluid.data( - name="num_good_steps", shape=[1], dtype='int32') - num_bad_steps = fluid.data( - name="num_bad_steps", shape=[1], dtype='int32') + prev_loss_scaling = fluid.data(name="prev_loss_scaling", + shape=[1], + dtype='float32') + num_good_steps = fluid.data(name="num_good_steps", + shape=[1], + dtype='int32') + num_bad_steps = fluid.data(name="num_bad_steps", + shape=[1], + dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') @@ -125,17 +132,16 @@ class TestUpdateLossScalingLayer(unittest.TestCase): incr_ratio = 2 decr_ratio = 0.8 - result = amp_nn.update_loss_scaling( - x, - found_inf, - prev_loss_scaling, - num_good_steps, - num_bad_steps, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - name="update_loss_scaling") + result = amp_nn.update_loss_scaling(x, + found_inf, + prev_loss_scaling, + num_good_steps, + num_bad_steps, + incr_every_n_steps, + decr_every_n_nan_or_inf, + incr_ratio, + decr_ratio, + name="update_loss_scaling") place = fluid.XPUPlace(0) exe = fluid.Executor(place) @@ -167,12 +173,15 @@ class TestUpdateLossScalingLayer(unittest.TestCase): b = fluid.data(name="b", shape=[512, 128], dtype='float32') x = [a, b] found_inf = fluid.data(name="found_inf", shape=[1], dtype='bool') - prev_loss_scaling = fluid.data( - name="prev_loss_scaling", shape=[1], dtype='float32') - num_good_steps = fluid.data( - name="num_good_steps", shape=[1], dtype='int32') - num_bad_steps = fluid.data( - name="num_bad_steps", shape=[1], dtype='int32') + prev_loss_scaling = fluid.data(name="prev_loss_scaling", + shape=[1], + dtype='float32') + num_good_steps = fluid.data(name="num_good_steps", + shape=[1], + dtype='int32') + num_bad_steps = fluid.data(name="num_bad_steps", + shape=[1], + dtype='int32') a_v = np.random.random([1024, 1024]).astype('float32') b_v = np.random.random([512, 128]).astype('float32') @@ -189,17 +198,16 @@ class TestUpdateLossScalingLayer(unittest.TestCase): incr_ratio = 2 decr_ratio = 0.8 - result = amp_nn.update_loss_scaling( - x, - found_inf, - prev_loss_scaling, - num_good_steps, - num_bad_steps, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - name="update_loss_scaling") + result = amp_nn.update_loss_scaling(x, + found_inf, + prev_loss_scaling, + num_good_steps, + num_bad_steps, + incr_every_n_steps, + decr_every_n_nan_or_inf, + incr_ratio, + decr_ratio, + name="update_loss_scaling") place = fluid.XPUPlace(0) exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py index 9c86286d3d8..1b90fa93588 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py @@ -17,6 +17,7 @@ from __future__ import print_function import numpy as np import unittest import sys + sys.path.append("..") import paddle @@ -30,10 +31,12 @@ paddle.enable_static() class XPUTestWhereIndexOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'where_index' class TestWhereIndexOp(XPUOpTest): + def setUp(self): self.init_config() self.init_data() @@ -54,6 +57,7 @@ class XPUTestWhereIndexOp(XPUOpTestWrapper): self.__class__.no_need_check_grad = True class TestAllFalse(TestWhereIndexOp): + def init_data(self): self.inputs = { 'Condition': np.array([False, False, False]).astype(self.dtype), @@ -61,6 +65,7 @@ class XPUTestWhereIndexOp(XPUOpTestWrapper): self.outputs = {'Out': np.array([], dtype='int64')} class TestRank2(TestWhereIndexOp): + def init_data(self): self.inputs = { 'Condition': @@ -69,6 +74,7 @@ class XPUTestWhereIndexOp(XPUOpTestWrapper): self.outputs = {'Out': np.array([[0, 0], [1, 1]], dtype='int64')} class TestRank3(TestWhereIndexOp): + def init_data(self): self.inputs = { 'Condition': @@ -78,7 +84,8 @@ class XPUTestWhereIndexOp(XPUOpTestWrapper): } self.outputs = { - 'Out': np.array( + 'Out': + np.array( [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [2, 1, 1]], dtype='int64') } @@ -90,6 +97,7 @@ for stype in support_types: class TestWhereOpError(unittest.TestCase): + def test_api(self): with program_guard(Program(), Program()): cond = fluid.layers.data(name='cond', shape=[4], dtype='bool') @@ -102,7 +110,9 @@ class TestWhereOpError(unittest.TestCase): class TestWhereRaiseError(unittest.TestCase): + def test_errors(self): + def test_type(): fluid.layers.where([10]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py index 461b56ff0d8..ad22ab86b93 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ from __future__ import print_function, division import numpy as np import unittest import sys + sys.path.append("..") import paddle @@ -31,10 +32,12 @@ paddle.enable_static() class XPUTestWhereOp(XPUOpTestWrapper): + def __init__(self): self.op_name = 'where' class TestXPUWhereOp(XPUOpTest): + def setUp(self): self.init_config() self.init_data() @@ -56,18 +59,19 @@ class XPUTestWhereOp(XPUOpTestWrapper): self.check_output_with_place(self.place) class TestXPUWhereOp2(TestXPUWhereOp): + def init_data(self): self.x = np.random.uniform(-5, 5, (60, 2)).astype(self.dtype) self.y = np.random.uniform(-5, 5, (60, 2)).astype(self.dtype) self.cond = np.ones((60, 2)).astype("bool") class TestXPUWhereOp3(TestXPUWhereOp): + def init_data(self): self.x = np.random.uniform(-3, 5, (20, 2, 4)).astype(self.dtype) self.y = np.random.uniform(-3, 5, (20, 2, 4)).astype(self.dtype) - self.cond = np.array( - np.random.randint( - 2, size=(20, 2, 4)), dtype=bool) + self.cond = np.array(np.random.randint(2, size=(20, 2, 4)), + dtype=bool) support_types = get_xpu_op_support_types('where') @@ -76,6 +80,7 @@ for stype in support_types: class TestXPUWhereAPI(unittest.TestCase): + def setUp(self): self.__class__.use_xpu = True self.place = paddle.XPUPlace(0) @@ -100,8 +105,9 @@ class TestXPUWhereAPI(unittest.TestCase): train_prog = fluid.Program() startup = fluid.Program() with fluid.program_guard(train_prog, startup): - cond = fluid.data( - name='cond', shape=self.shape, dtype='bool') + cond = fluid.data(name='cond', + shape=self.shape, + dtype='bool') x = fluid.data(name='x', shape=self.shape, dtype='float32') y = fluid.data(name='y', shape=self.shape, dtype='float32') @@ -119,12 +125,13 @@ class TestXPUWhereAPI(unittest.TestCase): fetch_list.append(x.grad_name) if y_stop_gradient is False: fetch_list.append(y.grad_name) - out = exe.run( - train_prog, - feed={'cond': self.cond, - 'x': self.x, - 'y': self.y}, - fetch_list=fetch_list) + out = exe.run(train_prog, + feed={ + 'cond': self.cond, + 'x': self.x, + 'y': self.y + }, + fetch_list=fetch_list) assert np.array_equal(out[0], self.out) if x_stop_gradient is False: @@ -144,21 +151,24 @@ class TestXPUWhereAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32') y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32') x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype("float32") - y_i = np.array([[1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0]]).astype("float32") + y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, + 1.0]]).astype("float32") result = paddle.where(x > 1, x=x, y=y) exe = fluid.Executor(self.place) exe.run(startup) out = exe.run(train_prog, - feed={'x': x_i, - 'y': y_i}, + feed={ + 'x': x_i, + 'y': y_i + }, fetch_list=[result]) assert np.array_equal(out[0], np.where(x_i > 1, x_i, y_i)) class TestWhereDygraphAPI(unittest.TestCase): + def test_api(self): with fluid.dygraph.guard(paddle.XPUPlace(0)): x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py b/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py index 57d456d0193..cc898e3537a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py @@ -24,6 +24,7 @@ import paddle.static as static class Test_XPU_Places(unittest.TestCase): + def assert_places_equal(self, places0, places1): self.assertEqual(len(places0), len(places1)) for place0, place1 in zip(places0, places1): diff --git a/python/paddle/fluid/trainer_desc.py b/python/paddle/fluid/trainer_desc.py index cdc9b14b6e3..613d04a7f69 100644 --- a/python/paddle/fluid/trainer_desc.py +++ b/python/paddle/fluid/trainer_desc.py @@ -15,6 +15,7 @@ import sys import os + __all__ = [ 'TrainerDesc', 'MultiTrainer', 'DistMultiTrainer', 'PipelineTrainer', 'HeterXpuTrainer', 'HeterPipelineTrainer' diff --git a/python/paddle/fluid/trainer_factory.py b/python/paddle/fluid/trainer_factory.py index d64f4f17ae3..a34fb2dea7d 100644 --- a/python/paddle/fluid/trainer_factory.py +++ b/python/paddle/fluid/trainer_factory.py @@ -19,8 +19,9 @@ import logging import numpy as np from paddle.fluid.log_helper import get_logger -local_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +local_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') from .trainer_desc import MultiTrainer, DistMultiTrainer, PipelineTrainer, HeterXpuTrainer, PSGPUTrainer, HeterPipelineTrainer from .device_worker import Hogwild, DownpourSGD, DownpourLite, Section, DownpourSGDOPT, HeterSection @@ -84,13 +85,13 @@ class TrainerFactory(object): if opt_info.get("use_ps_gpu") is not None: trainer._set_use_ps_gpu(opt_info["use_ps_gpu"]) if opt_info.get("enable_random_dump") is not None: - trainer._set_enable_random_dump(opt_info[ - "enable_random_dump"]) + trainer._set_enable_random_dump( + opt_info["enable_random_dump"]) if opt_info.get("dump_interval") is not None: trainer._set_dump_interval(opt_info["dump_interval"]) if opt_info.get("random_with_lineid") is not None: - trainer._set_random_with_lineid(opt_info[ - "random_with_lineid"]) + trainer._set_random_with_lineid( + opt_info["random_with_lineid"]) if "fleet_desc" in opt_info: device_worker._set_fleet_desc(opt_info["fleet_desc"]) @@ -101,18 +102,18 @@ class TrainerFactory(object): trainer._set_no_cvm(opt_info["no_cvm"]) if opt_info.get( "scale_sparse_gradient_with_batch_size") is not None: - trainer._set_scale_sparse_grad_with_batch_size(opt_info[ - "scale_sparse_gradient_with_batch_size"]) + trainer._set_scale_sparse_grad_with_batch_size( + opt_info["scale_sparse_gradient_with_batch_size"]) if opt_info.get("scale_datanorm") is not None: trainer._set_scale_datanorm(opt_info["scale_datanorm"]) if opt_info.get("adjust_ins_weight") is not None: - trainer._set_adjust_ins_weight(opt_info[ - "adjust_ins_weight"]) + trainer._set_adjust_ins_weight( + opt_info["adjust_ins_weight"]) if opt_info.get("copy_table") is not None: trainer._set_copy_table_config(opt_info["copy_table"]) if opt_info.get("check_nan_var_names") is not None: - trainer._set_check_nan_var_names(opt_info[ - "check_nan_var_names"]) + trainer._set_check_nan_var_names( + opt_info["check_nan_var_names"]) if opt_info.get("loss_names") is not None: trainer._set_loss_names(opt_info["loss_names"]) trainer._set_device_worker(device_worker) @@ -127,8 +128,8 @@ class FetchHandlerMonitor(object): def __init__(self, scope, handler): self.fetch_instance = handler - self.fetch_thread = threading.Thread( - target=self.handler_launch_func, args=(scope, self.fetch_instance)) + self.fetch_thread = threading.Thread(target=self.handler_launch_func, + args=(scope, self.fetch_instance)) self.running_lock = threading.Lock() self.running = False @@ -140,8 +141,8 @@ class FetchHandlerMonitor(object): if isinstance(fetch_instance.var_dict[key], Variable): var_name_to_key[fetch_instance.var_dict[key].name] = key else: - local_logger.warning("the value of {} is not a Variable".format( - key)) + local_logger.warning( + "the value of {} is not a Variable".format(key)) var_name_to_key["None.var"] = key elapsed_secs = 0 while True: @@ -159,8 +160,9 @@ class FetchHandlerMonitor(object): var = scope.find_var(key) fetch_dict[key] = var if var == None: - local_logger.warning("{} value currently not available". - format(var_name_to_key[key])) + local_logger.warning( + "{} value currently not available".format( + var_name_to_key[key])) res_dict = {} for key in fetch_dict: user_name = var_name_to_key[key] diff --git a/python/paddle/fluid/transpiler/ascend_transpiler.py b/python/paddle/fluid/transpiler/ascend_transpiler.py index 5593c91b5bc..69fb2b18336 100644 --- a/python/paddle/fluid/transpiler/ascend_transpiler.py +++ b/python/paddle/fluid/transpiler/ascend_transpiler.py @@ -14,11 +14,13 @@ from . import collective from .. import core + OpRole = core.op_proto_and_checker_maker.OpRole from paddle.distributed import fleet class AscendTranspiler(collective.Collective): + def __init__(self, startup_program, main_program): self.nrings = 1 super(AscendTranspiler, self).__init__(self.nrings) @@ -48,24 +50,22 @@ class AscendTranspiler(collective.Collective): # As we search ops reversedly, we should insert c_allreduce_sum # op in the same way to keep the ring_id alternate ring_id = (ring_id + 1) % self.nrings - block._insert_op( - offset + 1, - type='c_allreduce_sum', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - self.op_role_key: OpRole.Backward - }) - block._insert_op( - offset + 2, - type='scale', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'scale': 1.0 / fleet.worker_num(), - self.op_role_key: OpRole.Backward - }) + block._insert_op(offset + 1, + type='c_allreduce_sum', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Backward + }) + block._insert_op(offset + 2, + type='scale', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'scale': 1.0 / fleet.worker_num(), + self.op_role_key: OpRole.Backward + }) if grad is None: return diff --git a/python/paddle/fluid/transpiler/collective.py b/python/paddle/fluid/transpiler/collective.py index 1ddebad286d..cb57ea2a421 100644 --- a/python/paddle/fluid/transpiler/collective.py +++ b/python/paddle/fluid/transpiler/collective.py @@ -122,69 +122,63 @@ class Collective(object): block = program.global_block() if core.is_compiled_with_npu(): - hccl_id_var = block.create_var( - name=unique_name.generate('hccl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) + hccl_id_var = block.create_var(name=unique_name.generate('hccl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) endpoint_to_index_map = {e: idx for idx, e in enumerate(endpoints)} - block.append_op( - type='c_gen_hccl_id', - inputs={}, - outputs={'Out': hccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - self.op_role_key: OpRole.Forward - }) - block.append_op( - type='c_comm_init_hccl', - inputs={'X': hccl_id_var}, - outputs={}, - attrs={ - 'rank': rank, - 'ring_id': ring_id, - 'device_id': int(os.getenv("FLAGS_selected_npus")), - 'rank_ids': nranks, - self.op_role_key: OpRole.Forward - }) + block.append_op(type='c_gen_hccl_id', + inputs={}, + outputs={'Out': hccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints, + self.op_role_key: OpRole.Forward + }) + block.append_op(type='c_comm_init_hccl', + inputs={'X': hccl_id_var}, + outputs={}, + attrs={ + 'rank': rank, + 'ring_id': ring_id, + 'device_id': + int(os.getenv("FLAGS_selected_npus")), + 'rank_ids': nranks, + self.op_role_key: OpRole.Forward + }) else: - nccl_id_var = block.create_var( - name=unique_name.generate('nccl_id'), - persistable=True, - type=core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': nccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - self.op_role_key: OpRole.Forward - }) + nccl_id_var = block.create_var(name=unique_name.generate('nccl_id'), + persistable=True, + type=core.VarDesc.VarType.RAW) + block.append_op(type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints, + self.op_role_key: OpRole.Forward + }) if not has_multitrainer: - block.append_op( - type='c_comm_init', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': ring_id, - self.op_role_key: OpRole.Forward - }) + block.append_op(type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': ring_id, + self.op_role_key: OpRole.Forward + }) else: - block.append_op( - type='c_comm_init_multitrainer', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'ntrainers': nranks, - 'trainer_id': rank, - 'ring_id': ring_id, - self.op_role_key: OpRole.Forward - }) + block.append_op(type='c_comm_init_multitrainer', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'ntrainers': nranks, + 'trainer_id': rank, + 'ring_id': ring_id, + self.op_role_key: OpRole.Forward + }) def _broadcast_params(self): block = self.startup_program.global_block() @@ -194,23 +188,23 @@ class Collective(object): continue ring_id = (ring_id + 1) % self.nrings - block.append_op( - type='c_broadcast', - inputs={'X': param}, - outputs={'Out': param}, - attrs={ - 'ring_id': ring_id, - 'root': 0, - self.op_role_key: OpRole.Forward - }) + block.append_op(type='c_broadcast', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + 'root': 0, + self.op_role_key: OpRole.Forward + }) for ring_id in range(self.nrings): - block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={'ring_id': ring_id, - self.op_role_key: OpRole.Forward}) + block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Forward + }) def _is_loss_grad_op(self, op): if self.op_role_key not in op.attr_names: @@ -252,15 +246,14 @@ class GradAllReduce(Collective): for idx, op in reversed(list(enumerate(block.ops))): if self._is_loss_grad_op(op): loss_grad_var = block.vars[op.output_arg_names[0]] - block._insert_op( - idx + 1, - type='scale', - inputs={'X': loss_grad_var}, - outputs={'Out': loss_grad_var}, - attrs={ - 'scale': 1.0 / self.nranks, - self.op_role_key: OpRole.Backward - }) + block._insert_op(idx + 1, + type='scale', + inputs={'X': loss_grad_var}, + outputs={'Out': loss_grad_var}, + attrs={ + 'scale': 1.0 / self.nranks, + self.op_role_key: OpRole.Backward + }) def _insert_allreduce_ops(self): block = self.main_program.global_block() @@ -295,15 +288,14 @@ class GradAllReduce(Collective): # As we search ops reversedly, we should insert c_allreduce_sum # op in the same way to keep the ring_id alternate ring_id = (ring_id + 1) % self.nrings - block._insert_op( - offset, - type='c_allreduce_sum', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - self.op_role_key: OpRole.Backward - }) + block._insert_op(offset, + type='c_allreduce_sum', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Backward + }) if grad is None: return @@ -311,15 +303,14 @@ class GradAllReduce(Collective): for idx, op in enumerate(block.ops): if self._is_optimizer_op(op): for ring_id in range(self.nrings): - block._insert_op( - idx + ring_id, - type='c_sync_comm_stream', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - self.op_role_key: OpRole.Backward - }) + block._insert_op(idx + ring_id, + type='c_sync_comm_stream', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Backward + }) break @@ -342,16 +333,14 @@ class LocalSGD(Collective): non_dist_params.append(param) for param in non_dist_params: - snapshot = block.create_var( - name=self.snapshot_name(param.name), - shape=param.shape, - persistable=True, - stop_gradient=True) - block.append_op( - type='assign', - inputs={'X': [param]}, - outputs={'Out': [snapshot]}, - attrs={self.op_role_key: OpRole.Forward}) + snapshot = block.create_var(name=self.snapshot_name(param.name), + shape=param.shape, + persistable=True, + stop_gradient=True) + block.append_op(type='assign', + inputs={'X': [param]}, + outputs={'Out': [snapshot]}, + attrs={self.op_role_key: OpRole.Forward}) def snapshot_name(self, param_name): return param_name + self.snapshot_key @@ -366,69 +355,67 @@ class LocalSGD(Collective): if param.is_distributed: continue - snapshot = block.create_var( - name=self.snapshot_name(param.name), - shape=param.shape, - persistable=True, - stop_gradient=True, - dtype=param.dtype) - - block._insert_op( - idx + 1, - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={self.op_role_key: OpRole.Optimize}) - block._insert_op( - idx + 2, - type='c_sync_calc_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={self.op_role_key: OpRole.Optimize}) + snapshot = block.create_var(name=self.snapshot_name(param.name), + shape=param.shape, + persistable=True, + stop_gradient=True, + dtype=param.dtype) + + block._insert_op(idx + 1, + type='elementwise_sub', + inputs={ + 'X': [snapshot], + 'Y': [param] + }, + outputs={'Out': [param]}, + attrs={self.op_role_key: OpRole.Optimize}) + block._insert_op(idx + 2, + type='c_sync_calc_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={self.op_role_key: OpRole.Optimize}) ring_id = (ring_id + 1) % self.nrings - block._insert_op( - idx + 3, - type='c_allreduce_sum', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'ring_id': ring_id, - self.op_role_key: OpRole.Optimize - }) + block._insert_op(idx + 3, + type='c_allreduce_sum', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Optimize + }) ordered_param_snapshot.append((param, snapshot)) for ring_id in range(self.nrings): - block.append_op( - type='c_sync_comm_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={'ring_id': ring_id, - self.op_role_key: OpRole.Optimize}) + block.append_op(type='c_sync_comm_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Optimize + }) for param_snapshot in reversed(ordered_param_snapshot): param = param_snapshot[0] snapshot = param_snapshot[1] - block.append_op( - type='scale', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'scale': 1.0 / self.nranks, - self.op_role_key: OpRole.Optimize - }) - block.append_op( - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={self.op_role_key: OpRole.Optimize}) - block.append_op( - type='assign', - inputs={'X': [param]}, - outputs={'Out': [snapshot]}, - attrs={self.op_role_key: OpRole.Optimize}) + block.append_op(type='scale', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'scale': 1.0 / self.nranks, + self.op_role_key: OpRole.Optimize + }) + block.append_op(type='elementwise_sub', + inputs={ + 'X': [snapshot], + 'Y': [param] + }, + outputs={'Out': [param]}, + attrs={self.op_role_key: OpRole.Optimize}) + block.append_op(type='assign', + inputs={'X': [param]}, + outputs={'Out': [snapshot]}, + attrs={self.op_role_key: OpRole.Optimize}) class SingleProcessMultiThread(GradAllReduce): @@ -464,22 +451,26 @@ class MultiThread(GradAllReduce): print("total endpoints: ", self.endpoints) print("rank: %d, ring_id: %d" % (self.rank, self.nrings)) for ring_id in range(self.nrings): - self._init_communicator( - self.startup_program, self.current_endpoint, self.endpoints, - self.rank, ring_id, self.wait_port, True) + self._init_communicator(self.startup_program, + self.current_endpoint, self.endpoints, + self.rank, ring_id, self.wait_port, + True) else: if "xpu" in self.trans_mode: print( - "begin to _transpile_startup_program for single-node in XPU") + "begin to _transpile_startup_program for single-node in XPU" + ) block = self.startup_program.global_block() block.append_op( type='c_comm_init_all', attrs={ - 'devices': list( + 'devices': + list( map(int, os.getenv("FLAGS_selected_gpus").split(","))), - 'ring_id': 0 + 'ring_id': + 0 }) else: print("begin to _transpile_startup_program for single-node") @@ -546,16 +537,15 @@ class MultiThread(GradAllReduce): # As we search ops reversedly, we should insert c_allgather # op in the same way to keep the ring_id alternate ring_id = (ring_id + 1) % self.nrings - block._insert_op( - offset, - type='c_allgather', - inputs={'X': grad}, - outputs={'Out': new_grad_var}, - attrs={ - 'nranks': self.allgather_ranks, - 'ring_id': ring_id, - self.op_role_key: OpRole.Backward - }) + block._insert_op(offset, + type='c_allgather', + inputs={'X': grad}, + outputs={'Out': new_grad_var}, + attrs={ + 'nranks': self.allgather_ranks, + 'ring_id': ring_id, + self.op_role_key: OpRole.Backward + }) if grad is None: return @@ -563,15 +553,14 @@ class MultiThread(GradAllReduce): for idx, op in enumerate(block.ops): if self._is_optimizer_op(op): for ring_id in range(self.nrings): - block._insert_op( - idx + ring_id, - type='c_sync_comm_stream', - inputs={'X': grad}, - outputs={'Out': grad}, - attrs={ - 'ring_id': ring_id, - self.op_role_key: OpRole.Backward - }) + block._insert_op(idx + ring_id, + type='c_sync_comm_stream', + inputs={'X': grad}, + outputs={'Out': grad}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Backward + }) break def _update_adam_ops(self): @@ -602,10 +591,14 @@ class MultiThread(GradAllReduce): "Beta2PowOut": block.vars[op.output("Beta2PowOut")[0]] } attrs = { - "epsilon": op.attr('epsilon'), - "beta1": op.attr('beta1'), - "beta2": op.attr('beta2'), - "lazy_mode": op.attr('lazy_mode'), + "epsilon": + op.attr('epsilon'), + "beta1": + op.attr('beta1'), + "beta2": + op.attr('beta2'), + "lazy_mode": + op.attr('lazy_mode'), "min_row_size_to_use_multithread": op.attr('min_row_size_to_use_multithread') } @@ -617,25 +610,27 @@ class MultiThread(GradAllReduce): dtype=core.VarDesc.VarType.FP32, stop_gradient=True) for i in range(self.allgather_ranks) ] - block._insert_op( - offset, - type="split", - inputs={ - 'X': block.vars[op.input("Param")[0] + "_allgather"] - }, - outputs={'Out': split_vars}, - attrs={'num': self.allgather_ranks, - 'axis': 0}) + block._insert_op(offset, + type="split", + inputs={ + 'X': + block.vars[op.input("Param")[0] + + "_allgather"] + }, + outputs={'Out': split_vars}, + attrs={ + 'num': self.allgather_ranks, + 'axis': 0 + }) offset += 1 for i in range(self.allgather_ranks): inputs["Grad"] = split_vars[i] - block._insert_op( - offset, - type=op.type, - inputs=inputs, - outputs=outputs, - attrs=attrs) + block._insert_op(offset, + type=op.type, + inputs=inputs, + outputs=outputs, + attrs=attrs) offset += 1 # remove the original adam op block._remove_op(offset) @@ -685,47 +680,45 @@ class MultiThread(GradAllReduce): if self._is_optimizer_op(op): for segment in segments: # insert coalesce tensor - tmp_var = block.create_var( - name=unique_name.generate('FusedOutput_{}'.format( - segment[0].name)), - dtype=segment[0].dtype, - persistable=False, - stop_gradient=True) + tmp_var = block.create_var(name=unique_name.generate( + 'FusedOutput_{}'.format(segment[0].name)), + dtype=segment[0].dtype, + persistable=False, + stop_gradient=True) fused_vars.append(tmp_var) - block._insert_op( - idx, - type="coalesce_tensor", - inputs={"Input": segment}, - outputs={"Output": segment, - "FusedOutput": tmp_var}, - attrs={ - "copy_data": True, - "use_align": True, - "dtype": segment[0].dtype, - self.op_role_key: OpRole.Backward - }) + block._insert_op(idx, + type="coalesce_tensor", + inputs={"Input": segment}, + outputs={ + "Output": segment, + "FusedOutput": tmp_var + }, + attrs={ + "copy_data": True, + "use_align": True, + "dtype": segment[0].dtype, + self.op_role_key: OpRole.Backward + }) break # insert the allreduce_sum op for idx, op in enumerate(block.ops): if self._is_optimizer_op(op): for fused_var in fused_vars: - block._insert_op( - idx, - type='c_allreduce_sum', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={ - 'ring_id': ring_id, - 'use_calc_stream': False, - self.op_role_key: OpRole.Backward - }) - block._insert_op( - idx, - type='c_sync_calc_stream', - inputs={'X': fused_var}, - outputs={'Out': fused_var}, - attrs={self.op_role_key: OpRole.Backward}) + block._insert_op(idx, + type='c_allreduce_sum', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': False, + self.op_role_key: OpRole.Backward + }) + block._insert_op(idx, + type='c_sync_calc_stream', + inputs={'X': fused_var}, + outputs={'Out': fused_var}, + attrs={self.op_role_key: OpRole.Backward}) break if len(fused_vars) == 0: @@ -735,14 +728,13 @@ class MultiThread(GradAllReduce): # insert the sync comm op for idx, op in enumerate(block.ops): if self._is_optimizer_op(op): - block._insert_op( - idx, - type='c_sync_comm_stream', - inputs={'X': fused_vars[0]}, - outputs={'Out': fused_vars[0]}, - attrs={ - 'ring_id': ring_id, - self.op_role_key: OpRole.Backward - }) + block._insert_op(idx, + type='c_sync_comm_stream', + inputs={'X': fused_vars[0]}, + outputs={'Out': fused_vars[0]}, + attrs={ + 'ring_id': ring_id, + self.op_role_key: OpRole.Backward + }) break block._sync_with_cpp() diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 0e879264f74..31d3c817d1e 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -78,6 +78,7 @@ def log(*args): class VarBlock: + def __init__(self, varname, offset, size): self.varname = varname # NOTE: real offset is offset * size @@ -131,8 +132,8 @@ def slice_variable(var_list, slice_count, min_block_size): # update split_count after aligning split_count = int(math.ceil(var_numel / float(block_size))) for block_id in range(split_count): - curr_block_size = min(block_size, var_numel - ( - (block_id) * block_size)) + curr_block_size = min(block_size, + var_numel - ((block_id) * block_size)) block = VarBlock(var.name, block_id, curr_block_size) blocks.append(str(block)) return blocks @@ -244,6 +245,7 @@ class DistributeTranspilerConfig(object): class ServerRuntimeConfig(object): + def __init__(self): self._rpc_send_thread_num = int( os.getenv("FLAGS_rpc_send_thread_num", "12")) @@ -392,9 +394,12 @@ class DistributeTranspiler(object): inputs={}, outputs={"NCCLID": nccl_id_var}, attrs={ - "trainers": trainers.split(","), - "trainer_id": trainer_id, - "nccl_comm_num": self.config.nccl_comm_num, + "trainers": + trainers.split(","), + "trainer_id": + trainer_id, + "nccl_comm_num": + self.config.nccl_comm_num, "use_hierarchical_allreduce": self.config.use_hierarchical_allreduce, "hierarchical_allreduce_inter_nranks": @@ -439,13 +444,12 @@ class DistributeTranspiler(object): else: raise ValueError('invalid collective_mode: %s' % collective_mode) - transpiler.transpile( - startup_program=startup_program, - main_program=main_program, - rank=trainer_id, - endpoints=endpoints, - current_endpoint=current_endpoint, - wait_port=wait_port) + transpiler.transpile(startup_program=startup_program, + main_program=main_program, + rank=trainer_id, + endpoints=endpoints, + current_endpoint=current_endpoint, + wait_port=wait_port) def _get_all_remote_sparse_update_op(self, main_program): sparse_update_ops = [] @@ -517,8 +521,10 @@ class DistributeTranspiler(object): program.global_block()._insert_op( index=distributed_idx, type="distributed_lookup_table", - inputs={"Ids": inputs, - 'W': w}, + inputs={ + "Ids": inputs, + 'W': w + }, outputs={"Outputs": outputs}, attrs={ "table_names": table_names, @@ -624,12 +630,11 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler self.origin_program._hierarchical_allreduce_inter_nranks = \ int(self.config.hierarchical_allreduce_inter_nranks) - self._transpile_nccl2( - trainer_id, - trainers, - current_endpoint, - startup_program=startup_program, - wait_port=self.config.wait_port) + self._transpile_nccl2(trainer_id, + trainers, + current_endpoint, + startup_program=startup_program, + wait_port=self.config.wait_port) return if self.config.mode == "collective": @@ -704,21 +709,24 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler splited_grad_varname = grad_varname if len(splited_vars) == 1: splited_grad_varname = splited_vars[0].name - index = find_op_by_output_arg( - program.global_block(), splited_grad_varname, reverse=True) + index = find_op_by_output_arg(program.global_block(), + splited_grad_varname, + reverse=True) elif len(splited_vars) > 1: orig_var = program.global_block().vars[splited_grad_varname] - index = find_op_by_output_arg( - program.global_block(), splited_grad_varname, reverse=True) + index = find_op_by_output_arg(program.global_block(), + splited_grad_varname, + reverse=True) if not self.config.runtime_split_send_recv: self._insert_split_op(program, orig_var, index, splited_vars) index += 1 else: - AssertionError("Can not insert the send op by original " - "variable name :", splited_grad_varname) + AssertionError( + "Can not insert the send op by original " + "variable name :", splited_grad_varname) if splited_vars[0].type == core.VarDesc.VarType.SELECTED_ROWS: sparse_param_name = self.grad_name_to_param_name[grad_varname] @@ -759,10 +767,14 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler inputs={"X": send_input_vars}, outputs={"Out": dummy_output}, attrs={ - "epmap": eplist, - "sections": sections, - "send_varnames": send_varnames, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE, + "epmap": + eplist, + "sections": + sections, + "send_varnames": + send_varnames, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE, OP_ROLE_VAR_ATTR_NAME: [ self.grad_name_to_param_name[grad_varname], splited_grad_varname @@ -795,12 +807,18 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler inputs={"X": self.counter_var}, outputs={"Out": decay_dummy_output}, attrs={ - "epmap": pserver_endpoints, - "sections": sections, - "send_varnames": send_varnames, - "merge_add": True, - "use_send_handler": False, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE, + "epmap": + pserver_endpoints, + "sections": + sections, + "send_varnames": + send_varnames, + "merge_add": + True, + "use_send_handler": + False, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE, OP_ROLE_VAR_ATTR_NAME: [self.counter_var.name, self.counter_var.name] }) @@ -809,16 +827,19 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if self.sync_mode: fetch_barrier_input = [] - program.global_block().append_op( - type="send_barrier", - inputs={"X": list(input_deps)}, - outputs={"Out": send_barrier_out}, - attrs={ - "endpoints": pserver_endpoints, - "trainer_id": self.trainer_id, - "half_async": False, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="send_barrier", + inputs={"X": list(input_deps)}, + outputs={"Out": send_barrier_out}, + attrs={ + "endpoints": + pserver_endpoints, + "trainer_id": + self.trainer_id, + "half_async": + False, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) fetch_barrier_input.append(send_barrier_out) else: @@ -896,10 +917,14 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler inputs={"X": [recv_dep_in]}, outputs={"Out": splited_var}, attrs={ - "epmap": eps, - "recv_varnames": recv_varnames, - "trainer_id": self.trainer_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE, + "epmap": + eps, + "recv_varnames": + recv_varnames, + "trainer_id": + self.trainer_id, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE, OP_ROLE_VAR_ATTR_NAME: [param_varname, recv_op_role_var_name] }) @@ -908,15 +933,17 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if self.sync_mode: # form a WAW dependency - program.global_block().append_op( - type="fetch_barrier", - inputs={"X": fetch_barrier_input}, - outputs={"Out": all_recv_outputs}, - attrs={ - "endpoints": pserver_endpoints, - "trainer_id": self.trainer_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) + program.global_block().append_op(type="fetch_barrier", + inputs={"X": fetch_barrier_input}, + outputs={"Out": all_recv_outputs}, + attrs={ + "endpoints": + pserver_endpoints, + "trainer_id": + self.trainer_id, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE + }) for param_varname, splited_var in six.iteritems(self.param_var_mapping): if len(splited_var) <= 1: @@ -969,8 +996,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler table_param_init_op.append(op) init_op_num = len(table_param_init_op) if init_op_num != 1: - raise ValueError("table init op num should be 1, now is " + str( - init_op_num)) + raise ValueError("table init op num should be 1, now is " + + str(init_op_num)) table_init_op = table_param_init_op[0] self.startup_program.global_block().append_op( type="fake_init", @@ -1134,8 +1161,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if varname in startup_program.global_block().vars: orig_param = startup_program.global_block().vars[varname] else: - origin_param_var = self.origin_program.global_block().vars[ - varname] + origin_param_var = self.origin_program.global_block( + ).vars[varname] orig_param = startup_program.global_block().create_var( name=varname, persistable=origin_param_var.persistable, @@ -1331,8 +1358,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler # find the origin grad var before clipping/L2Decay, # merged_var should be the input var name of L2Decay grad_varname_for_block = op.attr(OP_ROLE_VAR_ATTR_NAME)[1] - if op.attr(OP_ROLE_VAR_ATTR_NAME)[ - 0] == optimize_target_param_name: + if op.attr( + OP_ROLE_VAR_ATTR_NAME)[0] == optimize_target_param_name: merged_var = self._append_pserver_grad_merge_ops( per_opt_block, grad_varname_for_block, endpoint, grad_to_block_id, self.origin_program) @@ -1410,11 +1437,10 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler 'prefetch_var_name_to_block_id'] = prefetch_var_name_to_block_id # step5 append the listen_and_serv op - pserver_program.global_block().append_op( - type="listen_and_serv", - inputs={'X': recv_inputs}, - outputs={}, - attrs=attrs) + pserver_program.global_block().append_op(type="listen_and_serv", + inputs={'X': recv_inputs}, + outputs={}, + attrs=attrs) pserver_program._sync_with_cpp() # save pserver program to generate pserver side startup relatively. @@ -1448,8 +1474,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler pserver_program, pserver_startup_program = t.get_pserver_programs(current_endpoint) """ pserver_prog = self.get_pserver_program(endpoint) - pserver_startup = self.get_startup_program( - endpoint, pserver_program=pserver_prog) + pserver_startup = self.get_startup_program(endpoint, + pserver_program=pserver_prog) return pserver_prog, pserver_startup def get_startup_program(self, @@ -1531,20 +1557,18 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler "truncated_gaussian_random" ]: op._set_attr("shape", list(new_outputs["Out"].shape)) - s_prog.global_block().append_op( - type=op.type, - inputs=new_inputs, - outputs=new_outputs, - attrs=op.all_attrs()) + s_prog.global_block().append_op(type=op.type, + inputs=new_inputs, + outputs=new_outputs, + attrs=op.all_attrs()) if self.config.enable_dc_asgd: for p, p_bak in self.param_bak_list: startup_param_var = s_prog.global_block().vars[p.name] startup_tmpvar = s_prog.global_block().vars[p_bak.name] # copy init random value to param_bak - s_prog.global_block().append_op( - type="assign", - inputs={"X": startup_param_var}, - outputs={"Out": startup_tmpvar}) + s_prog.global_block().append_op(type="assign", + inputs={"X": startup_param_var}, + outputs={"Out": startup_tmpvar}) return s_prog @@ -1578,6 +1602,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler return is_slice, block_idx, offset def _get_distributed_optimizer_vars(self): + def _get_distributed_optimizer_var(endpoint): opt_op_on_pserver = [] for _, op in enumerate(self.optimize_ops): @@ -1702,8 +1727,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if self.config.slice_var_up: # when we slice var up into blocks, we will slice the var according to # pserver services' count. A pserver may have two or more listening ports. - grad_blocks = slice_variable(grad_list, - len(self.pserver_endpoints), + grad_blocks = slice_variable(grad_list, len(self.pserver_endpoints), self.config.min_block_size) param_blocks = slice_variable(param_list, len(self.pserver_endpoints), @@ -1728,13 +1752,12 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler is_slice, block_id, offset = self._get_slice_var_info( splited_var) - self.vars_overview.add_distributed_var( - origin_var=orig_var, - slice_var=splited_var, - block_id=block_id, - offset=offset, - is_slice=is_slice, - vtype="Param") + self.vars_overview.add_distributed_var(origin_var=orig_var, + slice_var=splited_var, + block_id=block_id, + offset=offset, + is_slice=is_slice, + vtype="Param") # origin_grad_name -> [splited_grad_vars] self.grad_var_mapping = self._create_vars_from_blocklist( @@ -1752,12 +1775,10 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler # create mapping of endpoint -> split var to create pserver side program self.param_grad_ep_mapping = collections.OrderedDict() [ - self.param_grad_ep_mapping.update({ - ep: { - "params": [], - "grads": [] - } - }) for ep in self.pserver_endpoints + self.param_grad_ep_mapping.update({ep: { + "params": [], + "grads": [] + }}) for ep in self.pserver_endpoints ] # transpiler function for dis lookup_table @@ -1873,9 +1894,12 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if self.sync_mode else [] }, attrs={ - "epmap": pserver_endpoints, - "trainer_id": self.trainer_id, - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE, + "epmap": + pserver_endpoints, + "trainer_id": + self.trainer_id, + RPC_OP_ROLE_ATTR_NAME: + RPC_OP_ROLE_ATTR_VALUE, OP_ROLE_VAR_ATTR_NAME: [ self.grad_name_to_param_name[table_grad_name], table_grad_name @@ -1903,16 +1927,18 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler dtype=trainer_out.dtype) prefetch_block.append_op( type="lookup_sparse_table", - inputs={'Ids': pserver_ids, - "W": table_var}, + inputs={ + 'Ids': pserver_ids, + "W": table_var + }, outputs={"Out": pserver_out}, attrs={ "is_sparse": True, # has no effect on lookup_table op "is_distributed": True, "padding_idx": -1 }) - prefetch_var_name_to_block_id.append(trainer_ids.name + ":" + str( - prefetch_block.idx)) + prefetch_var_name_to_block_id.append(trainer_ids.name + ":" + + str(prefetch_block.idx)) return prefetch_var_name_to_block_id def _create_table_optimize_block(self, pserver_index, pserver_program, @@ -1922,17 +1948,16 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler # create table param and grad var in pserver program # create table optimize block in pserver program table_opt_op = [ - op for op in self.optimize_ops - if 'Param' in op.input_names and op.input("Param")[0] == - self.table_name + op for op in self.optimize_ops if 'Param' in op.input_names + and op.input("Param")[0] == self.table_name ][0] origin_param_var = self.origin_program.global_block().vars[ self.table_name] zero_dim = int( - math.ceil(origin_param_var.shape[0] / float( - len(self.pserver_endpoints)))) + math.ceil(origin_param_var.shape[0] / + float(len(self.pserver_endpoints)))) table_shape = list(origin_param_var.shape) table_shape[0] = zero_dim @@ -2005,18 +2030,16 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler create a new block to handle save checkpoint. """ - pserver_program.global_block().create_var( - name="kLookupTablePath", - persistable=True, - type=core.VarDesc.VarType.RAW) + pserver_program.global_block().create_var(name="kLookupTablePath", + persistable=True, + type=core.VarDesc.VarType.RAW) checkpoint_save_block = pserver_program._create_block(pre_block_idx) # this 'file_path' do not be used in save lookup table variable - checkpoint_save_block.append_op( - type='save', - inputs={'X': [self.table_name]}, - outputs={}, - attrs={'file_path': "none"}) + checkpoint_save_block.append_op(type='save', + inputs={'X': [self.table_name]}, + outputs={}, + attrs={'file_path': "none"}) return checkpoint_save_block.idx @@ -2090,13 +2113,12 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler return var_mapping def _clone_var(self, block, var, persistable=True): - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - persistable=persistable) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + persistable=persistable) @staticmethod def _get_splited_var_sections(splited_vars): @@ -2113,25 +2135,27 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if self._is_input_of_remote_sparse_update_op(sparse_param_name): self.sparse_param_to_height_sections[ sparse_param_name] = height_sections - program.global_block()._insert_op( - index=index + 1, - type="split_selected_rows", - inputs={"X": orig_var}, - outputs={"Out": splited_vars}, - attrs={ - "height_sections": height_sections, - RPC_OP_ROLE_ATTR_NAME: DIST_OP_ROLE_ATTR_VALUE - }) + program.global_block()._insert_op(index=index + 1, + type="split_selected_rows", + inputs={"X": orig_var}, + outputs={"Out": splited_vars}, + attrs={ + "height_sections": + height_sections, + RPC_OP_ROLE_ATTR_NAME: + DIST_OP_ROLE_ATTR_VALUE + }) elif orig_var.type == core.VarDesc.VarType.LOD_TENSOR: - program.global_block()._insert_op( - index=index + 1, - type="split_byref", - inputs={"X": orig_var}, - outputs={"Out": splited_vars}, - attrs={ - "sections": height_sections, - RPC_OP_ROLE_ATTR_NAME: DIST_OP_ROLE_ATTR_VALUE - }) + program.global_block()._insert_op(index=index + 1, + type="split_byref", + inputs={"X": orig_var}, + outputs={"Out": splited_vars}, + attrs={ + "sections": + height_sections, + RPC_OP_ROLE_ATTR_NAME: + DIST_OP_ROLE_ATTR_VALUE + }) else: AssertionError("Variable type should be in set " "[LOD_TENSOR, SELECTED_ROWS]") @@ -2225,11 +2249,10 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler per_trainer_name = "%s.trainer_%d" % \ (merged_var_name, i) vars2merge.append(pserver_block.vars[per_trainer_name]) - optimize_block.append_op( - type="sum", - inputs={"X": vars2merge}, - outputs={"Out": merged_var}, - attrs={"use_mkldnn": False}) + optimize_block.append_op(type="sum", + inputs={"X": vars2merge}, + outputs={"Out": merged_var}, + attrs={"use_mkldnn": False}) optimize_block.append_op( type="scale", inputs={"X": merged_var}, @@ -2239,64 +2262,66 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler def _append_dc_asgd_ops(self, block, param_var, grad_var): # NOTE: can not use grammar candy here, should put ops in specific block - local_param_bak = block.create_var( - name="%s.local_bak" % param_var.name, - shape=param_var.shape, - type=param_var.type, - dtype=param_var.dtype, - persistable=False) + local_param_bak = block.create_var(name="%s.local_bak" % param_var.name, + shape=param_var.shape, + type=param_var.type, + dtype=param_var.dtype, + persistable=False) # trainer_id_var is block local - trainer_id_var = block.create_var( - name="@TRAINER_ID@", - type=core.VarDesc.VarType.LOD_TENSOR, - dtype=core.VarDesc.VarType.INT64, - shape=[1], - persistable=False) + trainer_id_var = block.create_var(name="@TRAINER_ID@", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype=core.VarDesc.VarType.INT64, + shape=[1], + persistable=False) # ref_inputs = [x[1] for x in self.param_bak_list] ref_inputs = [] for p, p_bak in self.param_bak_list: if p.name == param_var.name: ref_inputs.append(p_bak) - block.append_op( - type="ref_by_trainer_id", - inputs={"X": ref_inputs, - "TrainerId": trainer_id_var}, - outputs={"Out": local_param_bak}) + block.append_op(type="ref_by_trainer_id", + inputs={ + "X": ref_inputs, + "TrainerId": trainer_id_var + }, + outputs={"Out": local_param_bak}) def __create_temp_var__(): - return block.create_var( - name=unique_name.generate("tmp_dc_output"), - shape=param_var.shape, - type=param_var.type, - dtype=param_var.dtype, - persistable=False) + return block.create_var(name=unique_name.generate("tmp_dc_output"), + shape=param_var.shape, + type=param_var.type, + dtype=param_var.dtype, + persistable=False) o1 = __create_temp_var__() - block.append_op( - type="elementwise_sub", - inputs={"X": param_var, - "Y": local_param_bak}, - outputs={"Out": o1}) + block.append_op(type="elementwise_sub", + inputs={ + "X": param_var, + "Y": local_param_bak + }, + outputs={"Out": o1}) o2 = __create_temp_var__() - block.append_op( - type="elementwise_mul", - inputs={"X": o1, - "Y": grad_var}, - outputs={"Out": o2}) + block.append_op(type="elementwise_mul", + inputs={ + "X": o1, + "Y": grad_var + }, + outputs={"Out": o2}) o3 = __create_temp_var__() - block.append_op( - type="elementwise_mul", - inputs={"X": o2, - "Y": grad_var}, - outputs={"Out": o3}) + block.append_op(type="elementwise_mul", + inputs={ + "X": o2, + "Y": grad_var + }, + outputs={"Out": o3}) # TODO(typhoonzero): append scale o4 = __create_temp_var__() - block.append_op( - type="elementwise_add", - inputs={"X": grad_var, - "Y": o3}, - outputs={"Out": o4}) + block.append_op(type="elementwise_add", + inputs={ + "X": grad_var, + "Y": o3 + }, + outputs={"Out": o4}) return o4 def _append_pserver_ops(self, optimize_block, opt_op, endpoint, @@ -2338,11 +2363,10 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler param_block = _get_param_block(opt_op) if not param_block: return - tmpvar = pserver_block.create_var( - name=param_block.name, - persistable=True, - dtype=param_block.dtype, - shape=param_block.shape) + tmpvar = pserver_block.create_var(name=param_block.name, + persistable=True, + dtype=param_block.dtype, + shape=param_block.shape) new_inputs[key] = tmpvar elif key == "LearningRate": # learning rate variable has already be created by non-optimize op, @@ -2369,30 +2393,29 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler var = self.origin_program.global_block().vars[opt_op.input(key)[0]] param_var = new_inputs["Param"] # update accumulator variable shape - new_shape = self._get_optimizer_input_shape( - opt_op.type, key, var.shape, param_var.shape) - tmpvar = pserver_block.create_var( - name=var.name, - persistable=var.persistable, - dtype=var.dtype, - shape=new_shape) + new_shape = self._get_optimizer_input_shape(opt_op.type, key, + var.shape, + param_var.shape) + tmpvar = pserver_block.create_var(name=var.name, + persistable=var.persistable, + dtype=var.dtype, + shape=new_shape) new_inputs[key] = tmpvar # change output's ParamOut variable outputs = self._get_output_map_from_op( self.origin_program.global_block().vars, opt_op) outputs["ParamOut"] = new_inputs["Param"] - optimize_block.append_op( - type=opt_op.type, - inputs=new_inputs, - outputs=outputs, - attrs=opt_op.all_attrs()) + optimize_block.append_op(type=opt_op.type, + inputs=new_inputs, + outputs=outputs, + attrs=opt_op.all_attrs()) # record sparse grad to param name if new_inputs["Grad"].type == core.VarDesc.VarType.SELECTED_ROWS: sparse_grad_to_param.append( - str(new_inputs["Grad"].name) + ":" + str(new_inputs["Param"] - .name)) + str(new_inputs["Grad"].name) + ":" + + str(new_inputs["Param"].name)) def _get_pserver_grad_param_var(self, var, var_dict): """ @@ -2436,8 +2459,10 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler if var not in program.global_block().vars: block._clone_variable(var) - return block.append_op( - type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs()) + return block.append_op(type=op.type, + inputs=inputs, + outputs=outputs, + attrs=op.all_attrs()) def _append_pserver_non_opt_ops(self, optimize_block, opt_op): program = optimize_block.program @@ -2452,7 +2477,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler # for ops like clipping and weight decay, get the split var (xxx.block0) # for inputs/outputs grad_block = self._get_pserver_grad_param_var( - var, program.global_block().vars) + var, + program.global_block().vars) if grad_block: varlist[i] = grad_block elif var.name not in program.global_block().vars: @@ -2470,7 +2496,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler for i in range(len(varlist)): var = varlist[i] grad_block = self._get_pserver_grad_param_var( - var, program.global_block().vars) + var, + program.global_block().vars) if grad_block: varlist[i] = grad_block elif var.name not in program.global_block().vars: @@ -2480,11 +2507,10 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler varlist[i] = program.global_block().vars[var.name] outputs[key] = varlist - return optimize_block.append_op( - type=opt_op.type, - inputs=inputs, - outputs=outputs, - attrs=opt_op.all_attrs()) + return optimize_block.append_op(type=opt_op.type, + inputs=inputs, + outputs=outputs, + attrs=opt_op.all_attrs()) def _is_op_connected(self, op1, op2): # If one op's input is another op's output or @@ -2575,16 +2601,15 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler persistable=counter_var.persistable) for id_ in range(self.trainer_num) ] - for i, op in enumerate(self.startup_program.global_block() - .ops): + for i, op in enumerate( + self.startup_program.global_block().ops): if op.type == 'fill_constant': for key in op.output_names: - if len(op.output(key)) == 1 and op.output(key)[ - 0] == counter_var.name: - self.startup_program.global_block().ops[ - i]._set_attr( - 'value', - float(0.0 - self.trainer_num)) + if len(op.output(key)) == 1 and op.output( + key)[0] == counter_var.name: + self.startup_program.global_block( + ).ops[i]._set_attr( + 'value', float(0.0 - self.trainer_num)) for var in all_trainer_counter_inputs: if var.name == "%s.trainer_%d" % (counter_var.name, self.trainer_id): diff --git a/python/paddle/fluid/transpiler/geo_sgd_transpiler.py b/python/paddle/fluid/transpiler/geo_sgd_transpiler.py index 5fbbedc12d0..eb86ffd36a2 100644 --- a/python/paddle/fluid/transpiler/geo_sgd_transpiler.py +++ b/python/paddle/fluid/transpiler/geo_sgd_transpiler.py @@ -47,6 +47,7 @@ RPC_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.RPC class GeoSgdTranspiler(DistributeTranspiler): + def __init__(self, config=None): if config is not None: self.config = config @@ -195,8 +196,8 @@ class GeoSgdTranspiler(DistributeTranspiler): def get_pserver_programs(self, endpoint): pserver_prog = self.get_pserver_program(endpoint) self.param_grad_ep_mapping = self.param_opt_ep_mapping - pserver_startup = self.get_startup_program( - endpoint, pserver_program=pserver_prog) + pserver_startup = self.get_startup_program(endpoint, + pserver_program=pserver_prog) return pserver_prog, pserver_startup def get_pserver_program(self, endpoint): @@ -230,19 +231,17 @@ class GeoSgdTranspiler(DistributeTranspiler): [delta_var_name, param.name])) else: delta_type = param.type - delta_var = pserver_block.create_var( - name=delta_var_name, - persistable=False, - type=delta_type, - dtype=param.dtype, - shape=param.shape) - - per_opt_block.append_op( - type="sum", - inputs={"X": [param, delta_var]}, - outputs={"Out": param}) - param_to_block_id.append(delta_var_name + ":" + str( - per_opt_block.idx)) + delta_var = pserver_block.create_var(name=delta_var_name, + persistable=False, + type=delta_type, + dtype=param.dtype, + shape=param.shape) + + per_opt_block.append_op(type="sum", + inputs={"X": [param, delta_var]}, + outputs={"Out": param}) + param_to_block_id.append(delta_var_name + ":" + + str(per_opt_block.idx)) attrs = { "optimize_blocks": optimize_block, @@ -258,11 +257,10 @@ class GeoSgdTranspiler(DistributeTranspiler): } # step5 append the listen_and_serv op - pserver_program.global_block().append_op( - type="listen_and_serv", - inputs={'X': recv_inputs}, - outputs={}, - attrs=attrs) + pserver_program.global_block().append_op(type="listen_and_serv", + inputs={'X': recv_inputs}, + outputs={}, + attrs=attrs) pserver_program._sync_with_cpp() # save pserver program to generate pserver side startup relatively. @@ -289,8 +287,7 @@ class GeoSgdTranspiler(DistributeTranspiler): # step 2. Slice vars into numbers of piece with block_size # when we slice var up into blocks, we will slice the var according to # pserver services' count. A pserver may have two or more listening ports. - param_blocks = slice_variable(param_list, - len(self.pserver_endpoints), + param_blocks = slice_variable(param_list, len(self.pserver_endpoints), self.config.min_block_size) # step 3. Create split param from split blocks @@ -302,11 +299,9 @@ class GeoSgdTranspiler(DistributeTranspiler): # step 4. Create mapping of endpoint -> split var to create pserver side program self.param_opt_ep_mapping = collections.OrderedDict() [ - self.param_opt_ep_mapping.update({ - ep: { - "params": [], - } - }) for ep in self.pserver_endpoints + self.param_opt_ep_mapping.update({ep: { + "params": [], + }}) for ep in self.pserver_endpoints ] # step 5. Create delta var of Geo-Sgd & record vars information @@ -340,13 +335,12 @@ class GeoSgdTranspiler(DistributeTranspiler): for splited_var in splited_vars: is_slice, block_id, offset = self._get_slice_var_info( splited_var) - self.vars_overview.add_distributed_var( - origin_var=origin_var, - slice_var=splited_var, - block_id=block_id, - offset=offset, - is_slice=is_slice, - vtype="Param") + self.vars_overview.add_distributed_var(origin_var=origin_var, + slice_var=splited_var, + block_id=block_id, + offset=offset, + is_slice=is_slice, + vtype="Param") self.split_to_origin_mapping[splited_var.name] = origin_name if origin_name in self.sparse_var_list: self.sparse_var_splited_list.append(splited_var.name) diff --git a/python/paddle/fluid/unique_name.py b/python/paddle/fluid/unique_name.py index fc3bd43a5e5..090d0e8dcbb 100644 --- a/python/paddle/fluid/unique_name.py +++ b/python/paddle/fluid/unique_name.py @@ -109,7 +109,7 @@ def generate(key): # cause memory leak in dygraph mode. It is because the previous # naming rule would use `conv_0.tmp` as the key, and in dygraph # mode, `conv_i` increases as batch increases. Thus, keys would -# increase in a way like `conv_0.tmp`, `conv_1.tmp`, .... +# increase in a way like `conv_0.tmp`, `conv_1.tmp`, .... # Not find a better way to fix this bug in dygraph mode. In TF, # variable name is meaningless in eager execution mode, and in # PyTorch, there is no variable name at all. Maybe we should @@ -118,10 +118,10 @@ def generate(key): # Another concern is that save/load interfaces. Usually, user # would save model in static graph mode, and load it in dygraph # mode. Therefore, we keep the variable name of Parameter currently. -# -# Please fix me if a better method is found. -# -# NOTE(zhiqiu): use c++ unique_name_generator in dygraph mode, +# +# Please fix me if a better method is found. +# +# NOTE(zhiqiu): use c++ unique_name_generator in dygraph mode, # in order to keep name consistency. def generate_with_ignorable_key(key): from .framework import _non_static_mode, _dygraph_tracer diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py index 7d1bbf8162c..e24a6a3aff7 100644 --- a/python/paddle/fluid/variable_index.py +++ b/python/paddle/fluid/variable_index.py @@ -22,6 +22,7 @@ MAX_INTEGER = 2**31 - 1 def is_list_tuple(index, contain_type): + def _is_list_tuple(item): if not (isinstance(item, (list, tuple)) or type(item) == contain_type): return False @@ -64,14 +65,15 @@ def get_list_index_shape(var_dims, index_dims): class SliceInfo: + def __init__(self): self.pre_shape = None self.indexes = [] self.dtype = None def update(self, index): - if is_list_tuple(index, int) or isinstance(index, ( - paddle.fluid.Variable, np.ndarray)): + if is_list_tuple(index, int) or isinstance( + index, (paddle.fluid.Variable, np.ndarray)): # convert index to Tensor if not isinstance(index, paddle.fluid.Variable): index = paddle.assign(index) @@ -81,8 +83,8 @@ class SliceInfo: else: if index.dtype != self.dtype: raise IndexError( - "Data type of Tensor/List index should be same. The current data type is {}, but the previous data type is {}.". - format(index.dtype, self.dtype)) + "Data type of Tensor/List index should be same. The current data type is {}, but the previous data type is {}." + .format(index.dtype, self.dtype)) self.indexes.append(index) @@ -90,12 +92,12 @@ class SliceInfo: self.pre_shape = index.shape else: if self.pre_shape != index.shape: - # broadcast + # broadcast cur_shape = paddle.broadcast_shape(self.pre_shape, index.shape) for i in range(len(self.indexes)): - self.indexes[i] = paddle.broadcast_to(self.indexes[i], - cur_shape) + self.indexes[i] = paddle.broadcast_to( + self.indexes[i], cur_shape) self.pre_shape = self.indexes[-1].shape else: raise ValueError( @@ -121,12 +123,15 @@ class SliceInfo: if len(self.indexes) <= len(tensor_shape) or len(self.indexes) == 1: shape = paddle.stack(self.indexes) - axes = list(range(1, len(self.pre_shape) + 1)) + [0, ] + axes = list(range(1, + len(self.pre_shape) + 1)) + [ + 0, + ] else: raise ValueError( - "too many indices for tensor: tensor is {}-dimensional, but {} were indexed". - format(len(tensor_shape), self.pre_shape[0])) + "too many indices for tensor: tensor is {}-dimensional, but {} were indexed" + .format(len(tensor_shape), self.pre_shape[0])) shape_transpose = paddle.transpose(shape, axes) return shape_transpose @@ -156,22 +161,25 @@ class SliceInfo: shape_transpose = self.get_offset_stride(tensor_origin.shape) index = paddle.assign(shape_transpose) - gather_tensor_shape = get_list_index_shape( - tensor.shape, [len(self.indexes), ] + list(self.indexes[-1].shape)) + gather_tensor_shape = get_list_index_shape(tensor.shape, [ + len(self.indexes), + ] + list(self.indexes[-1].shape)) - value_dims_bd = [1, ] * len(gather_tensor_shape) + value_dims_bd = [ + 1, + ] * len(gather_tensor_shape) value_dims_bd[-len(value.shape):] = list(value.shape) for i in range(len(gather_tensor_shape)): - if not (value_dims_bd[i] == gather_tensor_shape[i] or - value_dims_bd[i] == 1): + if not (value_dims_bd[i] == gather_tensor_shape[i] + or value_dims_bd[i] == 1): raise ValueError("{} can not broadcast into {}".format( value.shape, gather_tensor_shape)) value_broadcast = paddle.broadcast_to(value, gather_tensor_shape) - value_1d = value_broadcast.reshape([-1] + gather_tensor_shape[len( - index.shape) - 1:]) + value_1d = value_broadcast.reshape( + [-1] + gather_tensor_shape[len(index.shape) - 1:]) index_1d = index.reshape([-1, index.shape[-1]]) @@ -218,8 +226,9 @@ def replace_ellipsis(var, item): if ell_idx == len(item) - 1: return item[:-1] else: - item[ell_idx:ell_idx + 1] = [slice(None)] * ( - len(var.shape) - len(item) + item.count(None) + 1) + item[ell_idx:ell_idx + + 1] = [slice(None) + ] * (len(var.shape) - len(item) + item.count(None) + 1) return item @@ -267,8 +276,8 @@ def deal_attrs(attrs, attr, attr_name, tensor_attr_name, inputs, infer_flags): from .layers import utils if utils._contain_var(attr): - inputs[tensor_attr_name] = utils._convert_to_tensor_list( - attr, dtype="int64") + inputs[tensor_attr_name] = utils._convert_to_tensor_list(attr, + dtype="int64") for i, dim in enumerate(attr): if isinstance(dim, Variable): attrs[attr_name].append(-1) @@ -279,7 +288,7 @@ def deal_attrs(attrs, attr, attr_name, tensor_attr_name, inputs, infer_flags): attrs[attr_name] = attr -# the item is a tensor of bool +# the item is a tensor of bool def get_value_for_bool_tensor(var, item): if len(item.shape) > len(var.shape): raise IndexError("The dims of bool index doesn't match indexed array, " @@ -306,9 +315,8 @@ def get_value_for_bool_tensor(var, item): return paddle.empty(var_shape, dtype=var.dtype) from .layers.control_flow import cond - return cond( - paddle.logical_not(item.any()), lambda: idx_empty(var), - lambda: idx_not_empty(var, item)) + return cond(paddle.logical_not(item.any()), lambda: idx_empty(var), + lambda: idx_not_empty(var, item)) def _getitem_impl_(var, item): @@ -343,8 +351,8 @@ def _getitem_impl_(var, item): slice_info = SliceInfo() for dim, slice_item in enumerate(item): - if is_integer_or_scalar_tensor(slice_item) and not is_bool_tensor( - slice_item): + if is_integer_or_scalar_tensor( + slice_item) and not is_bool_tensor(slice_item): if isinstance(slice_item, int) and var.shape[dim] is not None and var.shape[ dim] >= 0 and slice_item >= var.shape[dim]: @@ -377,9 +385,9 @@ def _getitem_impl_(var, item): start = 0 if step > 0 else MAX_INTEGER if end is None: if var.shape[dim] != -1 and ( - paddle.fluid.framework._non_static_mode() or - var.desc.type() != core.VarDesc.VarType.LOD_TENSOR_ARRAY - ): + paddle.fluid.framework._non_static_mode() + or var.desc.type() != + core.VarDesc.VarType.LOD_TENSOR_ARRAY): end = var.shape[dim] if step > 0 else -1 else: end = MAX_INTEGER if step > 0 else -1 @@ -399,8 +407,8 @@ def _getitem_impl_(var, item): if len(item) != 1: raise IndexError( - "When index contains a list, its length must be 1, but received {}.". - format(len(item))) + "When index contains a list, its length must be 1, but received {}." + .format(len(item))) new_slice_item = [] if all_bool: if len(slice_item) != var.shape[0]: @@ -447,8 +455,8 @@ def _getitem_impl_(var, item): else: raise IndexError( - "Valid index accept int or slice or ellipsis or list, but received {}.". - format(slice_item)) + "Valid index accept int or slice or ellipsis or list, but received {}." + .format(slice_item)) axes.append(dim) starts.append(start) @@ -459,8 +467,8 @@ def _getitem_impl_(var, item): if slice_info.indexes: if len(slice_info.indexes) != len(item): raise IndexError( - "Valid index accept int or slice or ellipsis or list, but received {}.". - format(item)) + "Valid index accept int or slice or ellipsis or list, but received {}." + .format(item)) return slice_info.get_item(var) inputs = {'Input': [var]} @@ -489,11 +497,10 @@ def _getitem_impl_(var, item): name=unique_name.generate_with_ignorable_key(var.name + "_" + op_type), dtype=var.dtype) - target_block.append_op( - type=op_type, - inputs=inputs, - outputs={'Out': [slice_out_var]}, - attrs=attrs) + target_block.append_op(type=op_type, + inputs=inputs, + outputs={'Out': [slice_out_var]}, + attrs=attrs) out = slice_out_var if len(reverse_axes) > 0: @@ -555,8 +562,8 @@ def _setitem_impl_(var, item, value): slice_info = SliceInfo() dim = 0 for _, slice_item in enumerate(item): - if is_integer_or_scalar_tensor(slice_item) and not is_bool_tensor( - slice_item): + if is_integer_or_scalar_tensor( + slice_item) and not is_bool_tensor(slice_item): decrease_axes.append(dim) start = slice_item end = slice_item + 1 if slice_item != -1 else MAX_INTEGER @@ -601,8 +608,8 @@ def _setitem_impl_(var, item, value): if len(item) != 1: raise IndexError( - "When index contains a bool list, its length must be 1, but received {}.". - format(len(item))) + "When index contains a bool list, its length must be 1, but received {}." + .format(len(item))) from .layers import assign idx_tensor = assign(slice_item) @@ -612,8 +619,8 @@ def _setitem_impl_(var, item, value): if slice_item.dtype == core.VarDesc.VarType.BOOL: if len(item) != 1: raise IndexError( - "When index contains a bool tensor, its length must be 1, but received {}.". - format(len(item))) + "When index contains a bool tensor, its length must be 1, but received {}." + .format(len(item))) return set_value_for_bool_tensor(var, slice_item, value) else: slice_info.update(slice_item) @@ -632,8 +639,8 @@ def _setitem_impl_(var, item, value): if slice_info.indexes: if len(slice_info.indexes) != len(item): raise IndexError( - "Valid index accept int or slice or ellipsis or list, but received {}.". - format(item)) + "Valid index accept int or slice or ellipsis or list, but received {}." + .format(item)) return slice_info.set_item(var, value) attrs = { 'axes': axes, @@ -702,17 +709,16 @@ def _setitem_impl_(var, item, value): var._bump_inplace_version() cur_block = default_main_program().current_block() - cur_block.append_op( - type="set_value", - inputs=inputs, - outputs={'Out': var}, - attrs=attrs, - inplace_map={"Input": "Out"}) + cur_block.append_op(type="set_value", + inputs=inputs, + outputs={'Out': var}, + attrs=attrs, + inplace_map={"Input": "Out"}) return var -# the item is a tensor of bool +# the item is a tensor of bool def set_value_for_bool_tensor(var, item, value): if len(item.shape) > len(var.shape): raise IndexError("The dims of bool index doesn't match indexed array, " diff --git a/python/paddle/fluid/wrapped_decorator.py b/python/paddle/fluid/wrapped_decorator.py index 7e7dbff6561..5f837b57563 100644 --- a/python/paddle/fluid/wrapped_decorator.py +++ b/python/paddle/fluid/wrapped_decorator.py @@ -19,6 +19,7 @@ __all__ = ['wrap_decorator', 'signature_safe_contextmanager'] def wrap_decorator(decorator_func): + @decorator.decorator def __impl__(func, *args, **kwargs): wrapped_func = decorator_func(func) diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index a3584a73dfa..34423e3f3ed 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: import framework api under this directory +# TODO: import framework api under this directory from . import random # noqa: F401 from .random import seed # noqa: F401 diff --git a/python/paddle/framework/dtype.py b/python/paddle/framework/dtype.py index f49f7489758..56a95f48b5f 100644 --- a/python/paddle/framework/dtype.py +++ b/python/paddle/framework/dtype.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py index 350b1f1567b..41fd0c0703b 100644 --- a/python/paddle/framework/framework.py +++ b/python/paddle/framework/framework.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define framework api +# TODO: define framework api from paddle.fluid.layer_helper_base import LayerHelperBase from paddle.fluid.data_feeder import convert_dtype from paddle.fluid.framework import _dygraph_tracer diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index 8e8dd785511..09f3c512401 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -96,7 +96,7 @@ def _load_state_dict_from_save_inference_model(model_path, config): def _load_state_dict_from_save_params(model_path): - # Try to load all the files in the directory in VarBase format, + # Try to load all the files in the directory in VarBase format, # the file name is used as the name of VarBase load_var_list = [] @@ -157,7 +157,7 @@ def _build_load_path_and_config(path, config): elif not prefix_format_exist and not directory_format_exist: error_msg = "The ``path`` (%s) to load model not exists." # if current path is a prefix, and the path.pdparams or path.pdopt - # is exist, users may want use `paddle.load` load the result of + # is exist, users may want use `paddle.load` load the result of # `fluid.save_dygraph`, we raise error here for users params_file_path = path + ".pdparams" opti_file_path = path + ".pdopt" @@ -237,8 +237,9 @@ def _pickle_save(obj, f, protocol): type(protocol))) if protocol < 2 or protocol > 4: - raise ValueError("Expected 1<'protocol'<5, but received protocol={}". - format(protocol)) + raise ValueError( + "Expected 1<'protocol'<5, but received protocol={}".format( + protocol)) def reduce_varbase(self): data = self.numpy() @@ -330,20 +331,20 @@ def _is_state_dict(obj): if isinstance(obj, dict): def condition(obj): - return isinstance(obj, (fluid.Layer, Program, core.VarBase, - core.eager.Tensor, core.LoDTensor, - core.SelectedRows)) + return isinstance( + obj, (fluid.Layer, Program, core.VarBase, core.eager.Tensor, + core.LoDTensor, core.SelectedRows)) - # If the value of a dict is a core.VarBase/LoDTensor or a dict - # that does not contain a paddle type(Layer, Program, VarBase, LoDTensor, SelectedRows), + # If the value of a dict is a core.VarBase/LoDTensor or a dict + # that does not contain a paddle type(Layer, Program, VarBase, LoDTensor, SelectedRows), # the dict is considered to be a state_ dict. for key, value in obj.items(): if isinstance(value, dict): for k, v in value.items(): if _contain_x(v, condition): return False - elif not isinstance(value, (core.VarBase, core.eager.Tensor, - core.LoDTensor)): + elif not isinstance( + value, (core.VarBase, core.eager.Tensor, core.LoDTensor)): return False return True @@ -432,12 +433,13 @@ def _parse_every_object(obj, condition_func, convert_func): obj, (str, np.ndarray, core.VarBase, core.eager.Tensor, core.LoDTensor)): raise NotImplementedError( - "The iteratable objects supported are tuple, list, dict, OrderedDict, string. But received {}.". - format(type(obj))) + "The iteratable objects supported are tuple, list, dict, OrderedDict, string. But received {}." + .format(type(obj))) return obj def _parse_load_result(obj, return_numpy): + def is_layer(obj): return isinstance(obj, fluid.Layer) @@ -460,12 +462,12 @@ def _parse_load_result(obj, return_numpy): def ndarray_to_tensor(obj): return _ndarray_to_tensor(obj, return_numpy=return_numpy) - # tuple(name, ndarry) was converted from varbase of paddle2.1, + # tuple(name, ndarry) was converted from varbase of paddle2.1, # and all tuple(name, ndarry) are converted to tensor. if _contain_x(obj, _transformed_from_varbase): return _parse_every_object(obj, _transformed_from_varbase, tuple_to_tensor) - # If there is no tuple(name, ndary), it is considered to be saved by paddle2.0 + # If there is no tuple(name, ndary), it is considered to be saved by paddle2.0 # or converted from LoDTensor, and all ndarrays are converted to tensor. else: return _parse_every_object(obj, _transformed_from_lodtensor, @@ -565,8 +567,8 @@ def _save_binary_var(obj, path): else: # Since the concept of 'Tensor' is only exposed to users, the error message can only contain tensor instead of 'LoDTensor' or 'SelectedRows' raise NotImplementedError( - "When use_binary_format = True, `paddle.save` expected Tensor, but received {}.". - format(type(obj))) + "When use_binary_format = True, `paddle.save` expected Tensor, but received {}." + .format(type(obj))) def save(obj, path, protocol=4, **configs): @@ -752,8 +754,9 @@ def _legacy_save(obj, path, protocol=2): type(protocol))) if protocol < 2 or protocol > 4: - raise ValueError("Expected 1<'protocol'<5, but received protocol={}". - format(protocol)) + raise ValueError( + "Expected 1<'protocol'<5, but received protocol={}".format( + protocol)) if _is_file_path(path): filename = os.path.basename(path) @@ -968,8 +971,8 @@ def load(path, **configs): del load_result["StructuredToParameterName@@"] else: # paddle2.1 static.save/load - load_result = _parse_load_result(load_result, - config.return_numpy) + load_result = _parse_load_result( + load_result, config.return_numpy) else: load_result = _parse_load_result(load_result, @@ -1030,18 +1033,18 @@ def _legacy_load(path, **configs): if os.path.exists(model_file_path): # Load state dict by `jit.save/io.save_inference_model` save format # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] - # The model saved by `save_inference_model` does not completely correspond to - # the information required by the `state_dict` under the dygraph. - # `save_inference_model` not save structured name, we need to remind + # The model saved by `save_inference_model` does not completely correspond to + # the information required by the `state_dict` under the dygraph. + # `save_inference_model` not save structured name, we need to remind # the user to configure the `use_structured_name` argument when `set_state_dict` - # NOTE(chenweihang): `jit.save` doesn't save optimizer state - load_result = _load_state_dict_from_save_inference_model(model_path, - config) + # NOTE(chenweihang): `jit.save` doesn't save optimizer state + load_result = _load_state_dict_from_save_inference_model( + model_path, config) else: # load state dict by `io.save_params/persistables` save format # TODO(chenweihang): [ Now only supports loading parameters separately ] # If users save all parameters as one file, the [ variable.name -> variable ] - # mapping info will lost, so users need to give variable list, but users build + # mapping info will lost, so users need to give variable list, but users build # variable list in dygraph mode is difficult, we recommend users to use # paddle.static.load_program_state in this case load_result = _load_state_dict_from_save_params(model_path) diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index b58d36b8e7d..6c5ff2c8efb 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -38,7 +38,7 @@ def seed(seed): """ #TODO(zhiqiu): 1. remove program.random_seed when all random-related op upgrade - # 2. support gpu generator by global device + # 2. support gpu generator by global device seed = int(seed) diff --git a/python/paddle/hapi/callbacks.py b/python/paddle/hapi/callbacks.py index a8e034c87b8..1ba33a6b52b 100644 --- a/python/paddle/hapi/callbacks.py +++ b/python/paddle/hapi/callbacks.py @@ -68,6 +68,7 @@ def config_callbacks(callbacks=None, class CallbackList(object): + def __init__(self, callbacks=None): # copy self.callbacks = [c for c in callbacks] @@ -441,8 +442,8 @@ class ProgBarLogger(Callback): 'samples': 0, } - self.eval_progbar = ProgressBar( - num=self.eval_steps, verbose=self.verbose) + self.eval_progbar = ProgressBar(num=self.eval_steps, + verbose=self.verbose) if self._is_print(): print('Eval begin...') @@ -485,8 +486,8 @@ class ProgBarLogger(Callback): 'samples': 0, } - self.test_progbar = ProgressBar( - num=self.test_steps, verbose=self.verbose) + self.test_progbar = ProgressBar(num=self.test_steps, + verbose=self.verbose) if self._is_print(): print('Predict begin...') @@ -925,8 +926,9 @@ class VisualDL(Callback): else: continue - self.writer.add_scalar( - tag=temp_tag, step=total_step, value=temp_value) + self.writer.add_scalar(tag=temp_tag, + step=total_step, + value=temp_value) def on_train_batch_end(self, step, logs=None): logs = logs or {} @@ -1057,8 +1059,8 @@ class ReduceLROnPlateau(Callback): warnings.warn('Learning rate reduction mode %s is unknown, ' 'fallback to auto mode.' % self.mode) self.mode = 'auto' - if (self.mode == 'min' or - (self.mode == 'auto' and 'acc' not in self.monitor)): + if (self.mode == 'min' + or (self.mode == 'auto' and 'acc' not in self.monitor)): self.monitor_op = lambda a, b: np.less(a, b - self.min_delta) self.best = np.Inf else: @@ -1085,8 +1087,8 @@ class ReduceLROnPlateau(Callback): return except Exception as e: warnings.warn( - 'There are something wrong when get learning_rate from optimizer: {}.'. - format(e)) + 'There are something wrong when get learning_rate from optimizer: {}.' + .format(e)) return current = logs[self.monitor] diff --git a/python/paddle/hapi/dynamic_flops.py b/python/paddle/hapi/dynamic_flops.py index 4dd1aa03aa2..214af9f2f59 100644 --- a/python/paddle/hapi/dynamic_flops.py +++ b/python/paddle/hapi/dynamic_flops.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -106,11 +106,10 @@ def flops(net, input_size, custom_ops=None, print_detail=False): _, net.forward = unwrap_decorators(net.forward) inputs = paddle.randn(input_size) - return dynamic_flops( - net, - inputs=inputs, - custom_ops=custom_ops, - print_detail=print_detail) + return dynamic_flops(net, + inputs=inputs, + custom_ops=custom_ops, + print_detail=print_detail) elif isinstance(net, paddle.static.Program): return static_flops(net, print_detail=print_detail) else: @@ -124,8 +123,8 @@ def count_convNd(m, x, y): x = x[0] kernel_ops = np.product(m.weight.shape[2:]) bias_ops = 1 if m.bias is not None else 0 - total_ops = int(y.numel()) * ( - x.shape[1] / m._groups * kernel_ops + bias_ops) + total_ops = int( + y.numel()) * (x.shape[1] / m._groups * kernel_ops + bias_ops) m.total_ops += abs(int(total_ops)) @@ -227,8 +226,8 @@ def dynamic_flops(model, inputs, custom_ops=None, print_detail=False): if m_type in custom_ops: flops_fn = custom_ops[m_type] if m_type not in types_collection: - print("Customize Function has been applied to {}".format( - m_type)) + print( + "Customize Function has been applied to {}".format(m_type)) elif m_type in register_hooks: flops_fn = register_hooks[m_type] if m_type not in types_collection: @@ -236,8 +235,8 @@ def dynamic_flops(model, inputs, custom_ops=None, print_detail=False): else: if m_type not in types_collection: print( - "Cannot find suitable count function for {}. Treat it as zero FLOPs.". - format(m_type)) + "Cannot find suitable count function for {}. Treat it as zero FLOPs." + .format(m_type)) if flops_fn is not None: flops_handler = m.register_forward_post_hook(flops_fn) @@ -280,8 +279,10 @@ def dynamic_flops(model, inputs, custom_ops=None, print_detail=False): if {'total_ops', 'total_params', 'input_shape', 'output_shape'}.issubset(set(list(m._buffers.keys()))): table.add_row([ - m.full_name(), list(m.input_shape.numpy()), - list(m.output_shape.numpy()), int(m.total_params), + m.full_name(), + list(m.input_shape.numpy()), + list(m.output_shape.numpy()), + int(m.total_params), int(m.total_ops) ]) m._buffers.pop("total_ops") @@ -290,6 +291,6 @@ def dynamic_flops(model, inputs, custom_ops=None, print_detail=False): m._buffers.pop('output_shape') if print_detail: table.print_table() - print('Total Flops: {} Total Params: {}'.format( - int(total_ops), int(total_params))) + print('Total Flops: {} Total Params: {}'.format(int(total_ops), + int(total_params))) return int(total_ops) diff --git a/python/paddle/hapi/hub.py b/python/paddle/hapi/hub.py index 6cb2aae5ae2..3217059c647 100644 --- a/python/paddle/hapi/hub.py +++ b/python/paddle/hapi/hub.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -163,8 +163,8 @@ def _check_dependencies(m): pkg for pkg in dependencies if not _check_module_exists(pkg) ] if len(missing_deps): - raise RuntimeError('Missing dependencies: {}'.format(', '.join( - missing_deps))) + raise RuntimeError('Missing dependencies: {}'.format( + ', '.join(missing_deps))) def list(repo_dir, source='github', force_reload=False): @@ -194,12 +194,14 @@ def list(repo_dir, source='github', force_reload=False): """ if source not in ('github', 'gitee', 'local'): raise ValueError( - 'Unknown source: "{}". Allowed values: "github" | "gitee" | "local".'. - format(source)) + 'Unknown source: "{}". Allowed values: "github" | "gitee" | "local".' + .format(source)) if source in ('github', 'gitee'): - repo_dir = _get_cache_or_reload( - repo_dir, force_reload, True, source=source) + repo_dir = _get_cache_or_reload(repo_dir, + force_reload, + True, + source=source) hub_module = _import_module(MODULE_HUBCONF.split('.')[0], repo_dir) @@ -239,12 +241,14 @@ def help(repo_dir, model, source='github', force_reload=False): """ if source not in ('github', 'gitee', 'local'): raise ValueError( - 'Unknown source: "{}". Allowed values: "github" | "gitee" | "local".'. - format(source)) + 'Unknown source: "{}". Allowed values: "github" | "gitee" | "local".' + .format(source)) if source in ('github', 'gitee'): - repo_dir = _get_cache_or_reload( - repo_dir, force_reload, True, source=source) + repo_dir = _get_cache_or_reload(repo_dir, + force_reload, + True, + source=source) hub_module = _import_module(MODULE_HUBCONF.split('.')[0], repo_dir) @@ -280,12 +284,14 @@ def load(repo_dir, model, source='github', force_reload=False, **kwargs): """ if source not in ('github', 'gitee', 'local'): raise ValueError( - 'Unknown source: "{}". Allowed values: "github" | "gitee" | "local".'. - format(source)) + 'Unknown source: "{}". Allowed values: "github" | "gitee" | "local".' + .format(source)) if source in ('github', 'gitee'): - repo_dir = _get_cache_or_reload( - repo_dir, force_reload, True, source=source) + repo_dir = _get_cache_or_reload(repo_dir, + force_reload, + True, + source=source) hub_module = _import_module(MODULE_HUBCONF.split('.')[0], repo_dir) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index a7a5e59f394..c78c89964c9 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -104,8 +104,10 @@ def extract_args(func): def _all_gather(x, nranks, ring_id=0, use_calc_stream=True): - return collective._c_allgather( - x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream) + return collective._c_allgather(x, + nranks, + ring_id=ring_id, + use_calc_stream=use_calc_stream) def wait_server_ready(endpoints): @@ -143,49 +145,45 @@ def init_communicator(program, rank, nranks, wait_port, current_endpoint, persistable=True, type=fluid.core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_nccl_id', - inputs={}, - outputs={'Out': nccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) - - block.append_op( - type='c_comm_init', - inputs={'X': nccl_id_var}, - outputs={}, - attrs={ - 'nranks': nranks, - 'rank': rank, - 'ring_id': 0, - }) + block.append_op(type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + + block.append_op(type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': 0, + }) elif core.is_compiled_with_npu(): hccl_id_var = block.create_var( name=fluid.unique_name.generate('hccl_id'), persistable=True, type=core.VarDesc.VarType.RAW) - block.append_op( - type='c_gen_hccl_id', - inputs={}, - outputs={'Out': hccl_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints - }) - block.append_op( - type='c_comm_init_hccl', - inputs={'X': hccl_id_var}, - outputs={}, - attrs={ - 'rank': rank, - 'ring_id': 0, - 'device_id': int(os.getenv("FLAGS_selected_npus")), - 'rank_ids': nranks - }) + block.append_op(type='c_gen_hccl_id', + inputs={}, + outputs={'Out': hccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + block.append_op(type='c_comm_init_hccl', + inputs={'X': hccl_id_var}, + outputs={}, + attrs={ + 'rank': rank, + 'ring_id': 0, + 'device_id': int(os.getenv("FLAGS_selected_npus")), + 'rank_ids': nranks + }) def prepare_distributed_context(place=None): @@ -308,6 +306,7 @@ class StaticGraphAdapter(object): return self.model.network.parameters(*args, **kwargs) def save(self, path): + def _save(state, path): if not state: return @@ -348,8 +347,8 @@ class StaticGraphAdapter(object): # restore parameter states fluid.core._create_loaded_parameter( - [param for param, state in param_state_pairs], - global_scope(), executor) + [param for param, state in param_state_pairs], global_scope(), + executor) for param, state in param_state_pairs: self._set_var(param, state) @@ -396,25 +395,24 @@ class StaticGraphAdapter(object): opt_cls_name = self.model._optimizer.__class__.__name__ opt_unq_name = None for name in self.model._optimizer._accumulators.keys(): - accum_name = name if opt_name is None else name[len( - opt_name) + 1:] + accum_name = name if opt_name is None else name[ + len(opt_name) + 1:] for param_name, state_var in self.model._optimizer._accumulators[ name].items(): if opt_unq_name is None: # can not infer out the exact unique(opt_name), # thus try to extract rather than generate - for state_key in sorted( - state.keys(), - key=lambda x: len(x), - reverse=True): + for state_key in sorted(state.keys(), + key=lambda x: len(x), + reverse=True): prefix = param_name + "_" + ( opt_cls_name if opt_name is None else opt_name) + "_" if state_key.startswith(prefix): prefix_offset = state_key[len( prefix):].find("_") + len(prefix) - opt_unq_name = state_key[len( - param_name + "_"):prefix_offset] + opt_unq_name = state_key[ + len(param_name + "_"):prefix_offset] # TODO: assert # assert opt_unq_name is None # gen(param.name + "_" + gen(opt_name) + "_" + accum_name) @@ -601,8 +599,8 @@ class StaticGraphAdapter(object): self.model._optimizer, strategy=dist_strategy) elif self._amp_level != "O0" and core.is_compiled_with_cuda: amp_lists = paddle.static.amp.AutoMixedPrecisionLists( - **self. - _amp_custom_lists) if self._amp_custom_lists else None + **self._amp_custom_lists + ) if self._amp_custom_lists else None self.model._optimizer = paddle.static.amp.decorate( self.model._optimizer, amp_lists=amp_lists, @@ -665,6 +663,7 @@ class StaticGraphAdapter(object): class DynamicGraphAdapter(object): + def __init__(self, model): super(DynamicGraphAdapter, self).__init__() self.model = model @@ -716,10 +715,9 @@ class DynamicGraphAdapter(object): if self._amp_level != "O0" and self.model._scaler is None: self.model._scaler = paddle.amp.GradScaler(**self._amp_configs) - with paddle.amp.auto_cast( - enable=self._amp_level != 'O0', - **self._amp_custom_lists, - level=self._amp_level): + with paddle.amp.auto_cast(enable=self._amp_level != 'O0', + **self._amp_custom_lists, + level=self._amp_level): if self._nranks > 1: outputs = self.ddp_model.forward( *[to_variable(x) for x in inputs]) @@ -863,8 +861,9 @@ class DynamicGraphAdapter(object): opt_cls_name = self.model._optimizer.__class__.__name__ opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx param_names = [param.name for param in self.model.network.parameters()] - for var_name, state_var in sorted( - optim_state.items(), key=lambda x: len(x[0]), reverse=True): + for var_name, state_var in sorted(optim_state.items(), + key=lambda x: len(x[0]), + reverse=True): if var_name in ["@LR_DECAY_COUNTER@", "global_step"]: # NOTE: dygraph saved global_step is 1 larger than that in # static-graph, since the time of global_step to increase is @@ -1413,6 +1412,7 @@ class Model(object): return self._adapter.parameters() def _prepare_amp(self, amp_configs): + def _check_pure_fp16_configs(): # pure float16 training has some restricts now if self._adapter._amp_level == "O2" and self._optimizer._grad_clip: @@ -1476,8 +1476,8 @@ class Model(object): } if amp_config_key_set - accepted_param_set: raise ValueError( - "Except for 'level', the keys of 'amp_configs' must be accepted by mixed precision APIs, but {} could not be recognized.". - format(tuple(amp_config_key_set - accepted_param_set))) + "Except for 'level', the keys of 'amp_configs' must be accepted by mixed precision APIs, but {} could not be recognized." + .format(tuple(amp_config_key_set - accepted_param_set))) if 'use_fp16_guard' in amp_config_key_set: if _non_static_mode(): @@ -1492,7 +1492,10 @@ class Model(object): for key in amp_configs_set: self._adapter._amp_configs[key] = amp_configs[key] - def prepare(self, optimizer=None, loss=None, metrics=None, + def prepare(self, + optimizer=None, + loss=None, + metrics=None, amp_configs=None): """ Configures the model before runing. @@ -1716,29 +1719,26 @@ class Model(object): "train_data must be given!" if isinstance(train_data, Dataset): - train_sampler = DistributedBatchSampler( - train_data, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last) - train_loader = DataLoader( - train_data, - batch_sampler=train_sampler, - places=self._place, - num_workers=num_workers, - return_list=True) + train_sampler = DistributedBatchSampler(train_data, + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last) + train_loader = DataLoader(train_data, + batch_sampler=train_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) else: train_loader = train_data if eval_data is not None and isinstance(eval_data, Dataset): - eval_sampler = DistributedBatchSampler( - eval_data, batch_size=batch_size) - eval_loader = DataLoader( - eval_data, - batch_sampler=eval_sampler, - places=self._place, - num_workers=num_workers, - return_list=True) + eval_sampler = DistributedBatchSampler(eval_data, + batch_size=batch_size) + eval_loader = DataLoader(eval_data, + batch_sampler=eval_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) elif eval_data is not None: eval_loader = eval_data else: @@ -1765,7 +1765,8 @@ class Model(object): save_freq=save_freq, save_dir=save_dir, verbose=verbose, - metrics=self._metrics_name(), ) + metrics=self._metrics_name(), + ) if any(isinstance(k, EarlyStopping) for k in cbks) and not do_eval: warnings.warn("EarlyStopping needs validation data.") @@ -1853,14 +1854,13 @@ class Model(object): """ if eval_data is not None and isinstance(eval_data, Dataset): - eval_sampler = DistributedBatchSampler( - eval_data, batch_size=batch_size) - eval_loader = DataLoader( - eval_data, - batch_sampler=eval_sampler, - places=self._place, - num_workers=num_workers, - return_list=True) + eval_sampler = DistributedBatchSampler(eval_data, + batch_size=batch_size) + eval_loader = DataLoader(eval_data, + batch_sampler=eval_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) else: eval_loader = eval_data @@ -1871,7 +1871,8 @@ class Model(object): model=self, log_freq=log_freq, verbose=verbose, - metrics=self._metrics_name(), ) + metrics=self._metrics_name(), + ) eval_steps = self._len_data_loader(eval_loader) self.num_iters = num_iters @@ -1880,9 +1881,10 @@ class Model(object): assert num_iters > 0, "num_iters must be greater than 0!" eval_steps = min(num_iters, eval_steps) self.num_iters = eval_steps - cbks.on_begin('eval', - {'steps': eval_steps, - 'metrics': self._metrics_name()}) + cbks.on_begin('eval', { + 'steps': eval_steps, + 'metrics': self._metrics_name() + }) logs = self._run_one_epoch(eval_loader, cbks, 'eval') @@ -1972,14 +1974,13 @@ class Model(object): """ if test_data is not None and isinstance(test_data, Dataset): - test_sampler = DistributedBatchSampler( - test_data, batch_size=batch_size) - test_loader = DataLoader( - test_data, - batch_sampler=test_sampler, - places=self._place, - num_workers=num_workers, - return_list=True) + test_sampler = DistributedBatchSampler(test_data, + batch_size=batch_size) + test_loader = DataLoader(test_data, + batch_sampler=test_sampler, + places=self._place, + num_workers=num_workers, + return_list=True) else: test_loader = test_data @@ -2059,21 +2060,21 @@ class Model(object): input_names = [v.name for v in self._adapter._input_vars['test']] endpoints = self._adapter._endpoints['test']['output'] - fluid.io.save_inference_model( - model_path, - input_names, - endpoints, - self._adapter._executor, - main_program=infer_prog, - model_filename=model_filename, - params_filename=params_filename) + fluid.io.save_inference_model(model_path, + input_names, + endpoints, + self._adapter._executor, + main_program=infer_prog, + model_filename=model_filename, + params_filename=params_filename) def _run_one_epoch( - self, - data_loader, - callbacks, - mode, - logs={}, ): + self, + data_loader, + callbacks, + mode, + logs={}, + ): outputs = [] for step, data in enumerate(data_loader): # data might come from different types of data_loader and have @@ -2091,16 +2092,16 @@ class Model(object): # LoDTensor.shape is callable, where LoDTensor comes from # DataLoader in static graph - batch_size = data[0].shape()[0] if callable(data[ - 0].shape) else data[0].shape[0] + batch_size = data[0].shape()[0] if callable( + data[0].shape) else data[0].shape[0] callbacks.on_batch_begin(mode, step, logs) if mode != 'predict': _inputs = [data[:len(self._inputs)], data[len(self._inputs):]] if mode == 'train': - _inputs.append((step + 1) % self._accumulate == 0 or - step + 1 == len(data_loader)) + _inputs.append((step + 1) % self._accumulate == 0 + or step + 1 == len(data_loader)) outs = getattr(self, mode + '_batch')(*_inputs) @@ -2182,8 +2183,8 @@ class Model(object): print(params_info) """ - assert (input_size is not None or self._inputs is not None - ), "'input_size' or 'self._input' must be set" + assert (input_size is not None or self._inputs + is not None), "'input_size' or 'self._input' must be set" if input_size is not None: _input_size = input_size else: @@ -2203,8 +2204,7 @@ class Model(object): if shapes is not None and dtypes is not None and fluid._non_static_mode( ): out_specs = [ - Input( - name=n, dtype=dtypes[i], shape=shapes[i]) + Input(name=n, dtype=dtypes[i], shape=shapes[i]) for i, n in enumerate(arg_names) ] else: diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py index c3c043bd3fc..6928bc75f5f 100644 --- a/python/paddle/hapi/model_summary.py +++ b/python/paddle/hapi/model_summary.py @@ -207,8 +207,8 @@ def summary(net, input_size=None, dtypes=None, input=None): elif isinstance(item, numbers.Number): if item <= 0: raise ValueError( - "Expected element in input size greater than zero, but got {}". - format(item)) + "Expected element in input size greater than zero, but got {}" + .format(item)) new_shape.append(item) return tuple(new_shape) @@ -231,6 +231,7 @@ def summary(net, input_size=None, dtypes=None, input=None): @paddle.no_grad() def summary_string(model, input_size=None, dtypes=None, input=None): + def _all_is_numper(items): for item in items: if not isinstance(item, numbers.Number): @@ -271,6 +272,7 @@ def summary_string(model, input_size=None, dtypes=None, input=None): return output_shape def register_hook(layer): + def hook(layer, input, output): class_name = str(layer.__class__).split(".")[-1].split("'")[0] @@ -319,9 +321,9 @@ def summary_string(model, input_size=None, dtypes=None, input=None): summary[m_key]["nb_params"] = params - if (not isinstance(layer, nn.Sequential) and - not isinstance(layer, nn.LayerList) and - (not (layer == model) or depth < 1)): + if (not isinstance(layer, nn.Sequential) + and not isinstance(layer, nn.LayerList) + and (not (layer == model) or depth < 1)): hooks.append(layer.register_forward_post_hook(hook)) # For rnn, gru and lstm layer @@ -416,15 +418,13 @@ def summary_string(model, input_size=None, dtypes=None, input=None): str(summary[layer]["input_shape"]), table_width['input_shape_width'], str(summary[layer]["output_shape"]), - table_width['output_shape_width'], - "{0:,}".format(summary[layer]["nb_params"]), - table_width['params_width']) + table_width['output_shape_width'], "{0:,}".format( + summary[layer]["nb_params"]), table_width['params_width']) total_params += summary[layer]["nb_params"] try: total_output += np.sum( - np.prod( - summary[layer]["output_shape"], axis=-1)) + np.prod(summary[layer]["output_shape"], axis=-1)) except: for output_shape in summary[layer]["output_shape"]: total_output += np.sum(np.prod(output_shape, axis=-1)) diff --git a/python/paddle/hapi/progressbar.py b/python/paddle/hapi/progressbar.py index 8020029be2a..58dfdef604e 100644 --- a/python/paddle/hapi/progressbar.py +++ b/python/paddle/hapi/progressbar.py @@ -51,10 +51,11 @@ class ProgressBar(object): self._last_update = 0 self.name = name - self._dynamic_display = ( - (hasattr(self.file, 'isatty') and - self.file.isatty()) or 'ipykernel' in sys.modules or - 'posix' in sys.modules or 'PYCHARM_HOSTED' in os.environ) + self._dynamic_display = ((hasattr(self.file, 'isatty') + and self.file.isatty()) + or 'ipykernel' in sys.modules + or 'posix' in sys.modules + or 'PYCHARM_HOSTED' in os.environ) def _get_max_width(self): if sys.version_info > (3, 3): @@ -119,8 +120,8 @@ class ProgressBar(object): if self._num is not None: numdigits = int(np.log10(self._num)) + 1 - bar_chars = (self.name + ' %' + str(numdigits) + 'd/%d [') % ( - current_num, self._num) + bar_chars = (self.name + ' %' + str(numdigits) + + 'd/%d [') % (current_num, self._num) prog = float(current_num) / self._num prog_width = int(self._width * prog) @@ -179,8 +180,8 @@ class ProgressBar(object): elif self._verbose == 2 or self._verbose == 3: if self._num: numdigits = int(np.log10(self._num)) + 1 - count = (self.name + ' %' + str(numdigits) + 'd/%d') % ( - current_num, self._num) + count = (self.name + ' %' + str(numdigits) + + 'd/%d') % (current_num, self._num) else: count = self.name + ' %3d' % current_num info = count + info diff --git a/python/paddle/hapi/static_flops.py b/python/paddle/hapi/static_flops.py index f386bbd0dd6..297199b7326 100644 --- a/python/paddle/hapi/static_flops.py +++ b/python/paddle/hapi/static_flops.py @@ -22,6 +22,7 @@ __all__ = [] class VarWrapper(object): + def __init__(self, var, graph): assert isinstance(var, Variable) assert isinstance(graph, GraphWrapper) @@ -42,6 +43,7 @@ class VarWrapper(object): class OpWrapper(object): + def __init__(self, op, graph): assert isinstance(graph, GraphWrapper) self._op = op @@ -212,6 +214,7 @@ def static_flops(program, print_detail=False): class Table(object): + def __init__(self, table_heads): self.table_heads = table_heads self.table_len = [] @@ -225,8 +228,8 @@ class Table(object): print('The row_str should be a list') if len(row_str) != self.col_num: print( - 'The length of row data should be equal the length of table heads, but the data: {} is not equal table heads {}'. - format(len(row_str), self.col_num)) + 'The length of row data should be equal the length of table heads, but the data: {} is not equal table heads {}' + .format(len(row_str), self.col_num)) for i in range(self.col_num): if len(str(row_str[i])) > self.table_len[i]: self.table_len[i] = len(str(row_str[i])) diff --git a/python/paddle/incubate/asp/__init__.py b/python/paddle/incubate/asp/__init__.py index 59f794ef28a..d2a56fd117c 100644 --- a/python/paddle/incubate/asp/__init__.py +++ b/python/paddle/incubate/asp/__init__.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,10 +19,7 @@ from ...fluid.contrib.sparsity import prune_model #noqa: F401 from ...fluid.contrib.sparsity import set_excluded_layers #noqa: F401 from ...fluid.contrib.sparsity import reset_excluded_layers #noqa: F401 -__all__ = [ #noqa - 'calculate_density', - 'decorate', - 'prune_model', - 'set_excluded_layers', +__all__ = [ #noqa + 'calculate_density', 'decorate', 'prune_model', 'set_excluded_layers', 'reset_excluded_layers' ] diff --git a/python/paddle/incubate/autograd/__init__.py b/python/paddle/incubate/autograd/__init__.py index a57dac02be4..718bc018d9f 100644 --- a/python/paddle/incubate/autograd/__init__.py +++ b/python/paddle/incubate/autograd/__init__.py @@ -16,12 +16,6 @@ from .primx import prim2orig from .utils import enable_prim, disable_prim, prim_enabled __all__ = [ # noqa - 'vjp', - 'jvp', - 'Jacobian', - 'Hessian', - 'prim2orig', - 'enable_prim', - 'disable_prim', - 'prim_enabled' + 'vjp', 'jvp', 'Jacobian', 'Hessian', 'prim2orig', 'enable_prim', + 'disable_prim', 'prim_enabled' ] diff --git a/python/paddle/incubate/autograd/primops.py b/python/paddle/incubate/autograd/primops.py index 11e0e51cb76..6017ac35989 100644 --- a/python/paddle/incubate/autograd/primops.py +++ b/python/paddle/incubate/autograd/primops.py @@ -33,9 +33,13 @@ def _simple_binop(helper): if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=optype, inputs={'X': x, - 'Y': y}, outputs={'Z': out}, attrs={}) + helper.append_op(type=optype, + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Z': out}, + attrs={}) return out @@ -51,8 +55,10 @@ def _manipulation_unop(helper): if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=optype, inputs={'X': x}, outputs={'Y': out}, attrs=attrs) + helper.append_op(type=optype, + inputs={'X': x}, + outputs={'Y': out}, + attrs=attrs) return out @@ -75,12 +81,13 @@ def set_value(x, y, axis, starts, ends, strides, out): assert x is out, "x and out should be the same Tensor in set_value" attrs = {'axes': axis, 'starts': starts, 'ends': ends, 'steps': strides} helper = LayerHelper('set_value', **locals()) - helper.append_op( - type=helper.layer_type, - inputs={'Input': x, - 'ValueTensor': y}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={ + 'Input': x, + 'ValueTensor': y + }, + outputs={'Out': out}, + attrs=attrs) return out @@ -136,7 +143,8 @@ def split(x, num_or_sections, axis=0, outs=None): else: if not isinstance(num_or_sections, int): raise TypeError( - f'num_or_sections must be int, but got {type(num_or_sections)}.') + f'num_or_sections must be int, but got {type(num_or_sections)}.' + ) n = num_or_sections attrs = {'num_or_sections': num_or_sections, 'axis': axis} @@ -147,11 +155,10 @@ def split(x, num_or_sections, axis=0, outs=None): helper.create_variable_for_type_inference(dtype=x.dtype) for i in range(n) ] - helper.append_op( - type=helper.layer_type, - inputs={'X': x}, - outputs={'YS': outs}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={'X': x}, + outputs={'YS': outs}, + attrs=attrs) return outs @@ -163,11 +170,10 @@ def concat(xs, axis=0, out=None): helper = LayerHelper('concat_p', **locals()) if out is None: out = helper.create_variable_for_type_inference(dtype=xs[0].dtype) - helper.append_op( - type=helper.layer_type, - inputs={'XS': xs}, - outputs={'Y': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={'XS': xs}, + outputs={'Y': out}, + attrs=attrs) return out @@ -183,11 +189,10 @@ def reduce(x, axis, keepdim=False, out=None): if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=helper.layer_type, - inputs={'X': x}, - outputs={'Y': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={'X': x}, + outputs={'Y': out}, + attrs=attrs) return out @@ -217,11 +222,10 @@ def slice_select(x, axis, starts, ends, strides, out=None): helper = LayerHelper('slice_select_p', **locals()) if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=helper.layer_type, - inputs={'X': x}, - outputs={'Y': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={'X': x}, + outputs={'Y': out}, + attrs=attrs) return out @@ -239,12 +243,13 @@ def slice_assign(x, y, axis, starts, ends, strides, out=None): helper = LayerHelper('slice_assign_p', **locals()) if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=helper.layer_type, - inputs={'X': x, - 'Y': y}, - outputs={'Z': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Z': out}, + attrs=attrs) return out @@ -254,12 +259,13 @@ def gather(x, indextensor, axis, out=None): helper = LayerHelper('gather_p', **locals()) if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=helper.layer_type, - inputs={'X': x, - 'IndexTensor': indextensor}, - outputs={'Y': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={ + 'X': x, + 'IndexTensor': indextensor + }, + outputs={'Y': out}, + attrs=attrs) return out @@ -279,11 +285,12 @@ def scatter_add(x, y, indextensor, axis, out=None): helper = LayerHelper('scatter_add_p', **locals()) if out is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type=helper.layer_type, - inputs={'X': x, - 'Y': y, - 'IndexTensor': indextensor}, - outputs={'Z': out}, - attrs=attrs) + helper.append_op(type=helper.layer_type, + inputs={ + 'X': x, + 'Y': y, + 'IndexTensor': indextensor + }, + outputs={'Z': out}, + attrs=attrs) return out diff --git a/python/paddle/incubate/autograd/primreg.py b/python/paddle/incubate/autograd/primreg.py index 35a0dbcfc29..6c3ece09a6b 100644 --- a/python/paddle/incubate/autograd/primreg.py +++ b/python/paddle/incubate/autograd/primreg.py @@ -186,6 +186,7 @@ def REGISTER_ORIG2PRIM(op_type): raise TypeError(f'op_type must be str, but got {type(op_type)}.') def wrapper(f): + def _lower(op, *args, **kwargs): assert op.type == op_type, f'op.type should be equal to op_type, but op.type is {op.type} and op_type is {op_type}' return f(op, *args, **kwargs) @@ -217,6 +218,7 @@ def REGISTER_PRIM2ORIG(op_type): raise TypeError(f'op_type must be str, but got {type(op_type)}.') def wrapper(f): + def _lower(op, *args, **kwargs): assert op.type == op_type, f'op.type should be equal to op_type, but op.type is {op.type} and op_type is {op_type}' return f(op, *args, **kwargs) @@ -247,6 +249,7 @@ def REGISTER_JVP(op_type): raise TypeError(f'op_type must be str, but got {type(op_type)}.') def wrapper(f): + def _jvp(op, *args, **kwargs): assert op.type == op_type, f'op.type should be equal to op_type, but op.type is {op.type} and op_type is {op_type}' return f(op, *args, **kwargs) @@ -279,6 +282,7 @@ def REGISTER_TRANSPOSE(op_type): raise TypeError(f'op_type must be str, but got {type(op_type)}.') def wrapper(f): + def _transpose(op, dot_checker, *args, **kwargs): assert op.type == op_type, f'op.type should be equal to op_type, but op.type is {op.type} and op_type is {op_type}' return f(op, dot_checker, *args, **kwargs) diff --git a/python/paddle/incubate/autograd/primrules.py b/python/paddle/incubate/autograd/primrules.py index 075fe83e252..7983032f1a1 100644 --- a/python/paddle/incubate/autograd/primrules.py +++ b/python/paddle/incubate/autograd/primrules.py @@ -79,17 +79,20 @@ def elementwise_add_orig2prim(op, x, y): if x.shape != y.shape: y = broadcast(y, shape=x.shape) if op.attr('Scale_x') - 1.0 > 1e-5: - scale_x = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('Scale_x')) + scale_x = fill_const(shape=x.shape, + dtype=x.dtype, + value=op.attr('Scale_x')) x = mul(x, scale_x) if op.attr('Scale_y') - 1.0 > 1e-5: - scale_y = fill_const( - shape=y.shape, dtype=y.dtype, value=op.attr('Scale_y')) + scale_y = fill_const(shape=y.shape, + dtype=y.dtype, + value=op.attr('Scale_y')) y = mul(y, scale_y) z = add(x, y) if op.attr('Scale_out') - 1.0 > 1e-5: - scale_out = fill_const( - shape=z.shape, dtype=z.dtype, value=op.attr('Scale_out')) + scale_out = fill_const(shape=z.shape, + dtype=z.dtype, + value=op.attr('Scale_out')) z = mul(z, scale_out) return z @@ -99,17 +102,20 @@ def elementwise_sub_orig2prim(op, x, y): if x.shape != y.shape: y = broadcast(y, shape=x.shape) if op.attr('Scale_x') - 1.0 > 1e-5: - scale_x = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('Scale_x')) + scale_x = fill_const(shape=x.shape, + dtype=x.dtype, + value=op.attr('Scale_x')) x = mul(x, scale_x) if op.attr('Scale_y') - 1.0 > 1e-5: - scale_y = fill_const( - shape=y.shape, dtype=y.dtype, value=op.attr('Scale_y')) + scale_y = fill_const(shape=y.shape, + dtype=y.dtype, + value=op.attr('Scale_y')) y = mul(y, scale_y) z = sub(x, y) if op.attr('Scale_out') - 1.0 > 1e-5: - scale_out = fill_const( - shape=z.shape, dtype=z.dtype, value=op.attr('Scale_out')) + scale_out = fill_const(shape=z.shape, + dtype=z.dtype, + value=op.attr('Scale_out')) z = mul(z, scale_out) return z @@ -119,17 +125,20 @@ def elementwise_mul_orig2prim(op, x, y): if x.shape != y.shape: y = broadcast(y, shape=x.shape) if op.attr('Scale_x') - 1.0 > 1e-5: - scale_x = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('Scale_x')) + scale_x = fill_const(shape=x.shape, + dtype=x.dtype, + value=op.attr('Scale_x')) x = mul(x, scale_x) if op.attr('Scale_y') - 1.0 > 1e-5: - scale_y = fill_const( - shape=y.shape, dtype=y.dtype, value=op.attr('Scale_y')) + scale_y = fill_const(shape=y.shape, + dtype=y.dtype, + value=op.attr('Scale_y')) y = mul(y, scale_y) z = mul(x, y) if op.attr('Scale_out') - 1.0 > 1e-5: - scale_out = fill_const( - shape=z.shape, dtype=z.dtype, value=op.attr('Scale_out')) + scale_out = fill_const(shape=z.shape, + dtype=z.dtype, + value=op.attr('Scale_out')) z = mul(z, scale_out) return z @@ -160,8 +169,9 @@ def index_select_orig2prim(op, index_t, x): @REGISTER_ORIG2PRIM('scale') def scale_orig2prim(op, scale_t, x): if scale_t is None: - scale_t = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('scale')) + scale_t = fill_const(shape=x.shape, + dtype=x.dtype, + value=op.attr('scale')) bias_t = fill_const(shape=x.shape, dtype=x.dtype, value=op.attr('bias')) if op.attr('bias_after_scale'): return add(mul(x, scale_t), bias_t) @@ -182,6 +192,7 @@ def sqrt_orig2prim(op, x): @REGISTER_ORIG2PRIM('matmul_v2') def matmul_v2_orig2prim(op, x, y): + def trans(shape): ret = [i for i in range(len(shape))] ret[-1], ret[-2] = ret[-2], ret[-1] @@ -207,9 +218,9 @@ def reshape2_orig2prim(op, shape_t, shape_tl, x): assert shape_t is None, 'Can not lower reshape2 into prim ops with shapetensor.' assert shape_tl is None, 'Can not lower reshape2 into prim ops with shapetensorlist.' y, xshape = get_output_var_list(op) - return reshape( - x, shape=y.shape), fill_const( - shape=xshape.shape, dtype=xshape.dtype, value=0.0) + return reshape(x, shape=y.shape), fill_const(shape=xshape.shape, + dtype=xshape.dtype, + value=0.0) @REGISTER_ORIG2PRIM('concat') @@ -236,6 +247,7 @@ def slice_orig2prim(op, ends_t, ends_tl, x, starts_t, starts_tl): @REGISTER_ORIG2PRIM('p_norm') def p_norm_orig2prim(op, x): + def num_el(shape): n = 1 for s in shape: @@ -308,8 +320,9 @@ def split_prim2orig(op, x): num_or_sections = op.attr('num_or_sections') if len(num_or_sections) == 1: num_or_sections = num_or_sections[0] - return paddle.split( - x, num_or_sections=num_or_sections, axis=op.attr('axis')) + return paddle.split(x, + num_or_sections=num_or_sections, + axis=op.attr('axis')) @REGISTER_PRIM2ORIG('concat_p') @@ -329,25 +342,23 @@ def matmul_prim2orig(op, x, y): @REGISTER_PRIM2ORIG('slice_select_p') def slice_select_prim2orig(op, x): - return paddle.strided_slice( - x, - axes=op.attr('axis'), - starts=op.attr('starts'), - ends=op.attr('ends'), - strides=op.attr('strides')) + return paddle.strided_slice(x, + axes=op.attr('axis'), + starts=op.attr('starts'), + ends=op.attr('ends'), + strides=op.attr('strides')) @REGISTER_PRIM2ORIG('slice_assign_p') def slice_assign_prim2orig(op, x, y): x_copy = paddle.assign(x) - return set_value( - x_copy, - y, - axis=op.attr('axis'), - starts=op.attr('starts'), - ends=op.attr('ends'), - strides=op.attr('strides'), - out=x_copy) + return set_value(x_copy, + y, + axis=op.attr('axis'), + starts=op.attr('starts'), + ends=op.attr('ends'), + strides=op.attr('strides'), + out=x_copy) @REGISTER_PRIM2ORIG('gather_p') @@ -365,10 +376,9 @@ def scatter_add_prim2orig(op, index_t, x, y): @REGISTER_PRIM2ORIG('fill_constant_p') def fill_constant_prim2orig(op): - return paddle.full( - shape=op.attr('shape'), - fill_value=op.attr('value'), - dtype=INT_DTYPE_2_STRING[op.attr('dtype')]) + return paddle.full(shape=op.attr('shape'), + fill_value=op.attr('value'), + dtype=INT_DTYPE_2_STRING[op.attr('dtype')]) ## Register linearize rules @@ -515,8 +525,12 @@ def slice_select_jvp(op, x_dot): starts = op.attr('starts') ends = op.attr('ends') strides = op.attr('strides') - return linear_jvp( - op, x_dot, axis=axis, starts=starts, ends=ends, strides=strides) + return linear_jvp(op, + x_dot, + axis=axis, + starts=starts, + ends=ends, + strides=strides) @REGISTER_JVP('slice_assign_p') @@ -530,8 +544,13 @@ def slice_assign_jvp(op, x_dot, y_dot): starts = op.attr('starts') ends = op.attr('ends') strides = op.attr('strides') - return linear_jvp( - op, x_dot, y_dot, axis=axis, starts=starts, ends=ends, strides=strides) + return linear_jvp(op, + x_dot, + y_dot, + axis=axis, + starts=starts, + ends=ends, + strides=strides) @REGISTER_JVP('gather_p') @@ -677,8 +696,12 @@ def slice_select_transpose(op, check_dot, y_bar): starts = op.attr('starts') ends = op.attr('ends') strides = op.attr('strides') - return slice_assign( - zeros, y_bar, axis=axis, starts=starts, ends=ends, strides=strides) + return slice_assign(zeros, + y_bar, + axis=axis, + starts=starts, + ends=ends, + strides=strides) @REGISTER_TRANSPOSE('slice_assign_p') @@ -692,10 +715,17 @@ def slice_assign_transpose(op, check_dot, z_bar): starts = op.attr('starts') ends = op.attr('ends') strides = op.attr('strides') - x_bar = slice_assign( - z_bar, zeros, axis=axis, starts=starts, ends=ends, strides=strides) - y_bar = slice_select( - z_bar, axis=axis, starts=starts, ends=ends, strides=strides) + x_bar = slice_assign(z_bar, + zeros, + axis=axis, + starts=starts, + ends=ends, + strides=strides) + y_bar = slice_select(z_bar, + axis=axis, + starts=starts, + ends=ends, + strides=strides) return x_bar, y_bar diff --git a/python/paddle/incubate/autograd/primx.py b/python/paddle/incubate/autograd/primx.py index 1f5c4f9a5ce..5ee45116e66 100644 --- a/python/paddle/incubate/autograd/primx.py +++ b/python/paddle/incubate/autograd/primx.py @@ -51,7 +51,9 @@ def topo_path(xs, ys, block=None): reached_vars[id(x)] = x # Reaching test, returning whether an op is reached from the given input - reaching = lambda op: any(id(v) in reached_vars for v in flatten_and_remove_none(get_input_var_list(op))) + reaching = lambda op: any( + id(v) in reached_vars + for v in flatten_and_remove_none(get_input_var_list(op))) # block.ops are supposedly in the order that preserves correct data # dependence. @@ -63,7 +65,9 @@ def topo_path(xs, ys, block=None): reached_vars[id(var)] = var used_vars = OrderedDict((id(y), y) for y in ys if id(y) in reached_vars) - back_reaching = lambda op: any(id(out) in used_vars for out in flatten_and_remove_none(get_output_var_list(op))) + back_reaching = lambda op: any( + id(out) in used_vars + for out in flatten_and_remove_none(get_output_var_list(op))) # Backward pass to find all used variables for op in reversed(path): @@ -276,7 +280,7 @@ class Transform(object): self.var2dot.delete(x) for op in path: - # An input var may not be on the input-output path, which implies + # An input var may not be on the input-output path, which implies # there may be None's in `ins_dot`. In this case we place # the original input in the position of the otherwise forward # gradient. @@ -476,13 +480,12 @@ def _lower(block, reverse): from paddle.fluid.dygraph.base import param_guard new_op_desc = block.desc.append_op() with param_guard(inputs), param_guard(outputs): - op = Operator( - block=block, - desc=new_op_desc, - type=op.type, - inputs=inputs, - outputs=outputs, - attrs=attrs) + op = Operator(block=block, + desc=new_op_desc, + type=op.type, + inputs=inputs, + outputs=outputs, + attrs=attrs) block.ops.append(op) # Step3: Do some post-processing work @@ -594,7 +597,7 @@ def _gradients(ys, xs, ys_bar=None): assert el is None or el.block == block, f'variable in xs and ys should be None or in current block of main program' # TODO(Tongxin) without any prior knowledge about whether the program # is completely lowered to primitive ops, it's mandatory to run the lowering - # pass once and again. This is obviously inefficient and needs to be + # pass once and again. This is obviously inefficient and needs to be # optimized. orig2prim(block) diff --git a/python/paddle/incubate/autograd/utils.py b/python/paddle/incubate/autograd/utils.py index ec4f0915ba3..44bbd32bc9c 100644 --- a/python/paddle/incubate/autograd/utils.py +++ b/python/paddle/incubate/autograd/utils.py @@ -17,6 +17,7 @@ from paddle.fluid import framework as framework class PrimOption(object): + def __init__(self): self.enable_prim = False diff --git a/python/paddle/incubate/autotune.py b/python/paddle/incubate/autotune.py index 7ac555e2520..db7f881e4cf 100644 --- a/python/paddle/incubate/autotune.py +++ b/python/paddle/incubate/autotune.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/incubate/distributed/models/moe/__init__.py b/python/paddle/incubate/distributed/models/moe/__init__.py index fd06b4b8e52..795c939e81f 100644 --- a/python/paddle/incubate/distributed/models/moe/__init__.py +++ b/python/paddle/incubate/distributed/models/moe/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,4 +15,5 @@ from .gate import GShardGate, BaseGate, SwitchGate, NaiveGate from .moe_layer import MoELayer from .grad_clip import ClipGradForMOEByGlobalNorm + ClipGradByGlobalNorm = ClipGradForMOEByGlobalNorm diff --git a/python/paddle/incubate/distributed/models/moe/gate/__init__.py b/python/paddle/incubate/distributed/models/moe/gate/__init__.py index d4bf666eb69..2bfa5cd62cd 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/__init__.py +++ b/python/paddle/incubate/distributed/models/moe/gate/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/incubate/distributed/models/moe/gate/base_gate.py b/python/paddle/incubate/distributed/models/moe/gate/base_gate.py index f527e82f043..9715f4b2a25 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/base_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/base_gate.py @@ -1,17 +1,17 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# +# # The file has been adapted from the file: # https://github.com/laekov/fastmoe/blob/master/fmoe/gates/base_gate.py # Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 @@ -23,6 +23,7 @@ import paddle.nn as nn class BaseGate(nn.Layer): + def __init__(self, num_expert, world_size): super().__init__() self.world_size = world_size diff --git a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py index 3618ec56e96..643e23feff1 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,6 +28,7 @@ from ..utils import limit_by_capacity class GShardGate(NaiveGate): + def __init__(self, d_model, num_expert, @@ -43,32 +44,29 @@ class GShardGate(NaiveGate): self.group = group def forward(self, x): - topk_val, topk_idx, gate_score = super().forward( - x, return_all_scores=True) + topk_val, topk_idx, gate_score = super().forward(x, + return_all_scores=True) s = gate_score.shape[0] top1_idx = topk_idx.flatten() - c_e = paddle.scatter( - paddle.zeros(shape=[self.tot_expert]), - top1_idx, - paddle.ones_like( - top1_idx, dtype="float32"), - overwrite=False) / s + c_e = paddle.scatter(paddle.zeros(shape=[self.tot_expert]), + top1_idx, + paddle.ones_like(top1_idx, dtype="float32"), + overwrite=False) / s m_e = paddle.mean(F.softmax(gate_score, axis=1), axis=0) loss = paddle.mean(c_e * m_e) * (self.num_expert**2) self.set_loss(loss) cap_rate = self.capacity[0 if self.training else 1] capacity = math.ceil(cap_rate * x.shape[0]) - _new_lec, _new_gec, topk_idx = limit_by_capacity( - topk_idx, - self.num_expert, - self.world_size, - capacity, - group=self.group) + _new_lec, _new_gec, topk_idx = limit_by_capacity(topk_idx, + self.num_expert, + self.world_size, + capacity, + group=self.group) if self.random_routing: - rand_routing_prob = paddle.rand( - shape=[gate_score.shape[0]], dtype="float32") + rand_routing_prob = paddle.rand(shape=[gate_score.shape[0]], + dtype="float32") topk_idx = paddle.distributed.models.moe.utils._random_routing( topk_idx, topk_val, rand_routing_prob) return topk_val, topk_idx diff --git a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py index c3c68685445..476f99b9f44 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py @@ -3,9 +3,9 @@ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -27,6 +27,7 @@ import paddle.nn.functional as F class NaiveGate(BaseGate): + def __init__(self, d_model, num_expert, world_size, topk=2): super().__init__(num_expert, world_size) self.gate = nn.Linear(d_model, self.tot_expert) @@ -36,8 +37,11 @@ class NaiveGate(BaseGate): def forward(self, inp, return_all_scores=False): gate = self.gate(inp) - gate_top_k_val, gate_top_k_idx = paddle.topk( - gate, k=self.top_k, axis=-1, largest=True, sorted=False) + gate_top_k_val, gate_top_k_idx = paddle.topk(gate, + k=self.top_k, + axis=-1, + largest=True, + sorted=False) if return_all_scores: return gate_top_k_val, gate_top_k_idx, gate diff --git a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py index 776516989e5..60475198540 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py @@ -3,9 +3,9 @@ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,6 +28,7 @@ from ..utils import limit_by_capacity class SwitchGate(NaiveGate): + def __init__(self, d_model, num_expert, @@ -55,20 +56,19 @@ class SwitchGate(NaiveGate): cap_rate = self.capacity[0 if self.training else 1] capacity = math.ceil(cap_rate * inp.shape[0]) - _new_lec, _new_gec, top1_idx = limit_by_capacity( - top1_idx, - self.num_expert, - self.world_size, - capacity, - group=self.group) + _new_lec, _new_gec, top1_idx = limit_by_capacity(top1_idx, + self.num_expert, + self.world_size, + capacity, + group=self.group) valid_idx = top1_idx[top1_idx > -1] valid_idx_tmp = paddle.reshape(valid_idx, shape=[len(valid_idx), 1]) fraction_expert = paddle.scatter_nd_add( x=paddle.zeros(shape=[self.tot_expert]), index=valid_idx_tmp, - updates=paddle.ones_like( - valid_idx, dtype=paddle.float32).reshape( - shape=[len(valid_idx)]), ) / valid_idx.numel() + updates=paddle.ones_like(valid_idx, dtype=paddle.float32).reshape( + shape=[len(valid_idx)]), + ) / valid_idx.numel() prob_expert = score.sum(axis=0) / valid_idx.numel() loss = (fraction_expert * prob_expert).sum() * self.tot_expert self.set_loss(loss) diff --git a/python/paddle/incubate/distributed/models/moe/grad_clip.py b/python/paddle/incubate/distributed/models/moe/grad_clip.py index cf56f74d1f1..83e491a0874 100644 --- a/python/paddle/incubate/distributed/models/moe/grad_clip.py +++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py @@ -178,10 +178,9 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase): global_norm_var_moe, _ \ = self.get_l2_norm_pow(moe_params_grads, sum_dtype) if global_norm_var_moe is not None: - collective.all_reduce( - global_norm_var_moe, - op=collective.ReduceOp.SUM, - group=self.moe_group) + collective.all_reduce(global_norm_var_moe, + op=collective.ReduceOp.SUM, + group=self.moe_group) if global_norm_var_normal is None and global_norm_var_moe is None: return params_grads @@ -199,12 +198,13 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase): params_and_grads = [] global_norm_var = layers.sqrt(global_norm_var) - max_global_norm = layers.fill_constant( - shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) - clip_var = layers.elementwise_div( - x=max_global_norm, - y=layers.elementwise_max( - x=global_norm_var, y=max_global_norm)) + max_global_norm = layers.fill_constant(shape=[1], + dtype=global_norm_var.dtype, + value=self.clip_norm) + clip_var = layers.elementwise_div(x=max_global_norm, + y=layers.elementwise_max( + x=global_norm_var, + y=max_global_norm)) for p, g in params_grads: if g is None: continue diff --git a/python/paddle/incubate/distributed/models/moe/moe_layer.py b/python/paddle/incubate/distributed/models/moe/moe_layer.py index 8ac0add8014..367b2c189e3 100644 --- a/python/paddle/incubate/distributed/models/moe/moe_layer.py +++ b/python/paddle/incubate/distributed/models/moe/moe_layer.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -51,12 +51,11 @@ def _local_gather(inp, pos, out_batch_size, maybe_overlap=True): if pos.shape != [0]: origin_dtype = inp.dtype inp = paddle.cast(inp, dtype="float32") - inp_buf = paddle.scatter( - paddle.zeros( - shape=[out_batch_size, inp.shape[-1]], dtype="float32"), - pos, - inp, - overwrite=True) + inp_buf = paddle.scatter(paddle.zeros( + shape=[out_batch_size, inp.shape[-1]], dtype="float32"), + pos, + inp, + overwrite=True) inp_buf = paddle.cast(inp_buf, dtype=origin_dtype) else: inp_buf = paddle.zeros([out_batch_size, inp.shape[-1]], dtype=inp.dtype) @@ -104,11 +103,10 @@ class MoEScatter(PyLayer): group=None): local_input_buf = _local_scatter(inp, pos) if world_size > 1: - global_input_buf = global_scatter( - local_input_buf, - local_expert_count, - global_expert_count, - group=group) + global_input_buf = global_scatter(local_input_buf, + local_expert_count, + global_expert_count, + group=group) else: global_input_buf = local_input_buf @@ -124,8 +122,10 @@ class MoEScatter(PyLayer): (inp_batch_size, world_size, group) = ctx.moe_args if world_size > 1: - local_grad_in = global_gather( - grad, local_expert_count, global_expert_count, group=group) + local_grad_in = global_gather(grad, + local_expert_count, + global_expert_count, + group=group) else: local_grad_in = grad grad_in = _local_gather(local_grad_in, pos, inp_batch_size) @@ -150,11 +150,10 @@ class EagerMoEScatter(EagerPyLayer): group=None): local_input_buf = _local_scatter(inp, pos) if world_size > 1: - global_input_buf = global_scatter( - local_input_buf, - local_expert_count, - global_expert_count, - group=group) + global_input_buf = global_scatter(local_input_buf, + local_expert_count, + global_expert_count, + group=group) else: global_input_buf = local_input_buf @@ -170,8 +169,10 @@ class EagerMoEScatter(EagerPyLayer): (inp_batch_size, world_size, group) = ctx.moe_args if world_size > 1: - local_grad_in = global_gather( - grad, local_expert_count, global_expert_count, group=group) + local_grad_in = global_gather(grad, + local_expert_count, + global_expert_count, + group=group) else: local_grad_in = grad grad_in = _local_gather(local_grad_in, pos, inp_batch_size) @@ -194,15 +195,16 @@ class MoEGather(PyLayer): world_size, group=None): if world_size > 1: - local_output_buf = global_gather( - global_output_buf, - local_expert_count, - global_expert_count, - group=group) + local_output_buf = global_gather(global_output_buf, + local_expert_count, + global_expert_count, + group=group) else: local_output_buf = global_output_buf - output = _local_gather( - local_output_buf, pos, local_batch_size, maybe_overlap=False) + output = _local_gather(local_output_buf, + pos, + local_batch_size, + maybe_overlap=False) ctx.moe_args = (global_output_buf.shape[0], world_size, group) variables = (pos, local_expert_count, global_expert_count) @@ -215,11 +217,10 @@ class MoEGather(PyLayer): fwd_batch_size, world_size, group = ctx.moe_args grad_out_buf = _local_scatter(grad_out, pos) if world_size > 1: - global_grad_out_buf = global_scatter( - grad_out_buf, - local_expert_count, - global_expert_count, - group=group) + global_grad_out_buf = global_scatter(grad_out_buf, + local_expert_count, + global_expert_count, + group=group) else: global_grad_out_buf = grad_out_buf return global_grad_out_buf, None, None, None @@ -241,15 +242,16 @@ class EagerMoEGather(EagerPyLayer): world_size, group=None): if world_size > 1: - local_output_buf = global_gather( - global_output_buf, - local_expert_count, - global_expert_count, - group=group) + local_output_buf = global_gather(global_output_buf, + local_expert_count, + global_expert_count, + group=group) else: local_output_buf = global_output_buf - output = _local_gather( - local_output_buf, pos, local_batch_size, maybe_overlap=False) + output = _local_gather(local_output_buf, + pos, + local_batch_size, + maybe_overlap=False) ctx.moe_args = (global_output_buf.shape[0], world_size, group) variables = (pos, local_expert_count, global_expert_count) @@ -262,11 +264,10 @@ class EagerMoEGather(EagerPyLayer): fwd_batch_size, world_size, group = ctx.moe_args grad_out_buf = _local_scatter(grad_out, pos) if world_size > 1: - global_grad_out_buf = global_scatter( - grad_out_buf, - local_expert_count, - global_expert_count, - group=group) + global_grad_out_buf = global_scatter(grad_out_buf, + local_expert_count, + global_expert_count, + group=group) else: global_grad_out_buf = grad_out_buf return global_grad_out_buf, None, None, None @@ -288,8 +289,10 @@ class AllGather(PyLayer): @staticmethod def backward(ctx, grad_out): rank, dim0 = ctx.args - return paddle.slice( - grad_out, axes=[0], starts=[rank * dim0], ends=[(rank + 1) * dim0]) + return paddle.slice(grad_out, + axes=[0], + starts=[rank * dim0], + ends=[(rank + 1) * dim0]) class EagerAllGather(EagerPyLayer): @@ -308,8 +311,10 @@ class EagerAllGather(EagerPyLayer): @staticmethod def backward(ctx, grad_out): rank, dim0 = ctx.args - return paddle.slice( - grad_out, axes=[0], starts=[rank * dim0], ends=[(rank + 1) * dim0]) + return paddle.slice(grad_out, + axes=[0], + starts=[rank * dim0], + ends=[(rank + 1) * dim0]) class Slice(PyLayer): @@ -323,8 +328,10 @@ class Slice(PyLayer): local_batch_size = B // world_size batch_start = local_batch_size * rank batch_end = min(batch_start + local_batch_size, B) - inp = paddle.slice( - inp, axes=[0], starts=[batch_start], ends=[batch_end]) + inp = paddle.slice(inp, + axes=[0], + starts=[batch_start], + ends=[batch_end]) ctx.args = world_size, group return inp @@ -345,8 +352,10 @@ class EagerSlice(EagerPyLayer): local_batch_size = B // world_size batch_start = local_batch_size * rank batch_end = min(batch_start + local_batch_size, B) - inp = paddle.slice( - inp, axes=[0], starts=[batch_start], ends=[batch_end]) + inp = paddle.slice(inp, + axes=[0], + starts=[batch_start], + ends=[batch_end]) ctx.args = world_size, group return inp @@ -368,7 +377,8 @@ def prepare_forward(gate, num_expert, world_size, moe_group): local_expert_count, global_expert_count, fwd_expert_count, - fwd_batch_size, ) + fwd_batch_size, + ) class MoELayer(nn.Layer): @@ -467,25 +477,22 @@ class MoELayer(nn.Layer): self.top_k = gate.get("top_k", 2) gate = gate.get("type", "gshard") if gate == "naive" or gate is None: - gate = NaiveGate( - self.d_model, - num_expert=len(experts), - world_size=self.world_size, - topk=self.top_k) + gate = NaiveGate(self.d_model, + num_expert=len(experts), + world_size=self.world_size, + topk=self.top_k) elif gate == "gshard": - gate = GShardGate( - self.d_model, - num_expert=len(experts), - world_size=self.world_size, - topk=self.top_k, - group=self.group) + gate = GShardGate(self.d_model, + num_expert=len(experts), + world_size=self.world_size, + topk=self.top_k, + group=self.group) elif gate == "switch": - gate = SwitchGate( - self.d_model, - num_expert=len(experts), - world_size=self.world_size, - topk=self.top_k, - group=self.group) + gate = SwitchGate(self.d_model, + num_expert=len(experts), + world_size=self.world_size, + topk=self.top_k, + group=self.group) else: assert False, "We only support naive gate, \ gshard gate and switch gate, \ @@ -521,8 +528,8 @@ class MoELayer(nn.Layer): local_expert_count, global_expert_count, fwd_expert_count, - fwd_batch_size, ) = prepare_forward(gate, self.num_expert, - self.world_size, self.group) + fwd_batch_size, + ) = prepare_forward(gate, self.num_expert, self.world_size, self.group) topk = 1 if len(gate.shape) == 2: @@ -563,8 +570,8 @@ class MoELayer(nn.Layer): if self.recompute_interval <= 0 or x.shape[0] == 0: x = experts_fwd(x, fwd_expert_count.numpy(), self.experts) else: - x = _hp_recompute(experts_fwd, x, - fwd_expert_count.numpy(), self.experts) + x = _hp_recompute(experts_fwd, x, fwd_expert_count.numpy(), + self.experts) out_batch_size = inp.shape[0] if len(gate.shape) == 2: diff --git a/python/paddle/incubate/distributed/models/moe/utils.py b/python/paddle/incubate/distributed/models/moe/utils.py index 09a6b788b78..b195ffdb815 100644 --- a/python/paddle/incubate/distributed/models/moe/utils.py +++ b/python/paddle/incubate/distributed/models/moe/utils.py @@ -3,9 +3,9 @@ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -60,10 +60,13 @@ def count_by_gate(gate, num_expert, world_size, require_pos=True, group=None): def limit_by_capacity(topk_idx, num_expert, world_size, capacity, group=None): with paddle.no_grad(): - capacity = paddle.ones( - shape=[num_expert], dtype=paddle.int64) * capacity - pos, lec, gec = count_by_gate( - topk_idx, num_expert, world_size, require_pos=False, group=group) + capacity = paddle.ones(shape=[num_expert], + dtype=paddle.int64) * capacity + pos, lec, gec = count_by_gate(topk_idx, + num_expert, + world_size, + require_pos=False, + group=group) new_gec = _limit_by_capacity(gec, capacity, world_size) if world_size > 1: assert group.nranks == world_size diff --git a/python/paddle/incubate/multiprocessing/reductions.py b/python/paddle/incubate/multiprocessing/reductions.py index cfbc55afd3b..54d40312268 100644 --- a/python/paddle/incubate/multiprocessing/reductions.py +++ b/python/paddle/incubate/multiprocessing/reductions.py @@ -47,6 +47,7 @@ def _supported_check(): class LRUSharedCache(OrderedDict): + def __init__(self): self.limit = 128 self._after_fork() diff --git a/python/paddle/incubate/nn/functional/fused_matmul_bias.py b/python/paddle/incubate/nn/functional/fused_matmul_bias.py index bcc2e621445..d963c5e1ade 100644 --- a/python/paddle/incubate/nn/functional/fused_matmul_bias.py +++ b/python/paddle/incubate/nn/functional/fused_matmul_bias.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -62,14 +62,17 @@ def fused_matmul_bias(x, helper = LayerHelper('fused_matmul_bias', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='fused_gemm_epilogue', - inputs={'X': x, - 'Y': y, - 'Bias': bias}, - outputs={'Out': out}, - attrs={'trans_x': transpose_x, - 'trans_y': transpose_y}) + helper.append_op(type='fused_gemm_epilogue', + inputs={ + 'X': x, + 'Y': y, + 'Bias': bias + }, + outputs={'Out': out}, + attrs={ + 'trans_x': transpose_x, + 'trans_y': transpose_y + }) return out diff --git a/python/paddle/incubate/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py index 232e16415a5..ab7e135adc6 100644 --- a/python/paddle/incubate/nn/functional/fused_transformer.py +++ b/python/paddle/incubate/nn/functional/fused_transformer.py @@ -115,7 +115,8 @@ def fused_feedforward(x, seed = None if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( - "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" + ) mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer if _non_static_mode(): @@ -128,11 +129,10 @@ def fused_feedforward(x, 'ln2_epsilon', ln2_epsilon, 'act_method', activation, 'dropout1_rate', dropout1_rate, 'dropout2_rate', dropout2_rate, "dropout1_is_test", not training, "dropout2_is_test", not training, - "dropout1_fix_seed", seed is not None, "dropout2_fix_seed", - seed is not None, "dropout1_seed", seed - if seed is not None else 0, "dropout2_seed", seed - if seed is not None else 0, 'dropout1_implementation', mode, - 'dropout2_implementation', mode) + "dropout1_fix_seed", seed is not None, "dropout2_fix_seed", seed + is not None, "dropout1_seed", seed if seed is not None else 0, + "dropout2_seed", seed if seed is not None else 0, + 'dropout1_implementation', mode, 'dropout2_implementation', mode) return out helper = LayerHelper("fused_feedforward") @@ -147,68 +147,67 @@ def fused_feedforward(x, 'uint8', stop_gradient=True) dropout2_mask = helper.create_variable_for_type_inference( 'uint8', stop_gradient=True) - ln1_mean = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln1_variance = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln2_mean = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln2_variance = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - linear1_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - ln1_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - dropout1_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) - dropout2_out = helper.create_variable_for_type_inference( - x.dtype, stop_gradient=True) + ln1_mean = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln1_variance = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln2_mean = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln2_variance = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + linear1_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + ln1_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + dropout1_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) + dropout2_out = helper.create_variable_for_type_inference(x.dtype, + stop_gradient=True) if (seed is None or seed == 0) and helper.main_program.random_seed != 0: seed = helper.main_program.random_seed - helper.append_op( - type='fused_feedforward', - inputs={ - 'X': x, - 'Linear1Weight': linear1_weight, - 'Linear1Bias': linear1_bias, - 'Linear2Weight': linear2_weight, - 'Linear2Bias': linear2_bias, - 'Ln1Scale': ln1_scale, - 'Ln1Bias': ln1_bias, - 'Ln2Scale': ln2_scale, - 'Ln2Bias': ln2_bias, - }, - outputs={ - 'Out': out, - 'Dropout1Mask': dropout1_mask, - 'Dropout2Mask': dropout2_mask, - 'Ln1Mean': ln1_mean, - 'Ln1Variance': ln1_variance, - 'Ln2Mean': ln2_mean, - 'Ln2Variance': ln2_variance, - 'Linear1Out': linear1_out, - 'Ln1Out': ln1_out, - 'Dropout1Out': dropout1_out, - 'Dropout2Out': dropout2_out, - }, - attrs={ - 'dropout1_rate': dropout1_rate, - 'dropout2_rate': dropout2_rate, - 'act_method': activation, - 'pre_layer_norm': pre_layer_norm, - 'ln1_epsilon': ln1_epsilon, - 'ln2_epsilon': ln2_epsilon, - 'dropout1_is_test': not training, - 'dropout2_is_test': not training, - 'dropout1_fix_seed': seed is not None, - 'dropout2_fix_seed': seed is not None, - 'dropout1_seed': seed if seed is not None else 0, - 'dropout2_seed': seed if seed is not None else 0, - 'dropout1_implementation': mode, - 'dropout2_implementation': mode - }) + helper.append_op(type='fused_feedforward', + inputs={ + 'X': x, + 'Linear1Weight': linear1_weight, + 'Linear1Bias': linear1_bias, + 'Linear2Weight': linear2_weight, + 'Linear2Bias': linear2_bias, + 'Ln1Scale': ln1_scale, + 'Ln1Bias': ln1_bias, + 'Ln2Scale': ln2_scale, + 'Ln2Bias': ln2_bias, + }, + outputs={ + 'Out': out, + 'Dropout1Mask': dropout1_mask, + 'Dropout2Mask': dropout2_mask, + 'Ln1Mean': ln1_mean, + 'Ln1Variance': ln1_variance, + 'Ln2Mean': ln2_mean, + 'Ln2Variance': ln2_variance, + 'Linear1Out': linear1_out, + 'Ln1Out': ln1_out, + 'Dropout1Out': dropout1_out, + 'Dropout2Out': dropout2_out, + }, + attrs={ + 'dropout1_rate': dropout1_rate, + 'dropout2_rate': dropout2_rate, + 'act_method': activation, + 'pre_layer_norm': pre_layer_norm, + 'ln1_epsilon': ln1_epsilon, + 'ln2_epsilon': ln2_epsilon, + 'dropout1_is_test': not training, + 'dropout2_is_test': not training, + 'dropout1_fix_seed': seed is not None, + 'dropout2_fix_seed': seed is not None, + 'dropout1_seed': seed if seed is not None else 0, + 'dropout2_seed': seed if seed is not None else 0, + 'dropout1_implementation': mode, + 'dropout2_implementation': mode + }) return out @@ -279,12 +278,13 @@ def fused_bias_dropout_residual_layer_norm(x, seed = None if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( - "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" + ) mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer if ln_scale is not None: - assert len(ln_scale. - shape) == 1, "The dims of the shape of ln_scale should be 1." + assert len(ln_scale.shape + ) == 1, "The dims of the shape of ln_scale should be 1." assert x.shape[len(x.shape) - 1] == ln_scale.shape[ 0], "The dim of ln_scale must equal to the last dim of x." if ln_bias is not None: @@ -299,8 +299,8 @@ def fused_bias_dropout_residual_layer_norm(x, _, _, _, _, final_out = _C_ops.fused_bias_dropout_residual_layer_norm( x, residual, bias, ln_scale, ln_bias, 'dropout_rate', dropout_rate, 'ln_epsilon', ln_epsilon, 'is_test', not training, - 'dropout_fix_seed', seed is not None, 'dropout_seed', seed - if seed is not None else 0, 'dropout_implementation', mode) + 'dropout_fix_seed', seed is not None, 'dropout_seed', + seed if seed is not None else 0, 'dropout_implementation', mode) return final_out else: helper = LayerHelper('fused_bias_dropout_residual_layer_norm', @@ -343,17 +343,17 @@ def fused_bias_dropout_residual_layer_norm(x, dtype=dtype) final_out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='fused_bias_dropout_residual_layer_norm', - inputs=inputs, - outputs={ - "BiasDropoutResidualOut": bias_dropout_residual_out, - "DropoutMaskOut": dropout_mask_out, - "LnMean": ln_mean_out, - "LnVariance": ln_variance_out, - 'Y': final_out, - }, - attrs=attrs) + helper.append_op(type='fused_bias_dropout_residual_layer_norm', + inputs=inputs, + outputs={ + "BiasDropoutResidualOut": + bias_dropout_residual_out, + "DropoutMaskOut": dropout_mask_out, + "LnMean": ln_mean_out, + "LnVariance": ln_variance_out, + 'Y': final_out, + }, + attrs=attrs) return final_out @@ -490,7 +490,8 @@ def fused_multi_head_attention(x, seed = None if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( - "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" + ) mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer if _non_static_mode(): @@ -515,10 +516,10 @@ def fused_multi_head_attention(x, 'dropout_rate', dropout_rate, 'attn_dropout_rate', attn_dropout_rate, 'ln_epsilon', ln_epsilon, 'is_test', not training, 'attn_dropout_fix_seed', seed is not None, - 'dropout_fix_seed', seed is not None, 'attn_dropout_seed', seed - if seed is not None else 0, 'dropout_seed', seed - if seed is not None else 0, 'attn_dropout_implementation', mode, - 'dropout_implementation', mode, 'ring_id', ring_id) + 'dropout_fix_seed', seed is not None, 'attn_dropout_seed', + seed if seed is not None else 0, 'dropout_seed', + seed if seed is not None else 0, 'attn_dropout_implementation', + mode, 'dropout_implementation', mode, 'ring_id', ring_id) if cache_kv is not None: return final_out, cache_kv_out return final_out @@ -603,32 +604,32 @@ def fused_multi_head_attention(x, final_out = helper.create_variable_for_type_inference(dtype=dtype) cache_kv_out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='fused_attention', - inputs=inputs, - outputs={ - "LnMean": pre_ln_mean_out, - "LnVariance": pre_ln_variance_out, - "LnOut": pre_ln_out, - "QKVOut": qkv_out, - "QKVBiasOut": qkv_bias_out, - "TransposeOut2": transpose_out, - "QKOut": qk_out, - "QKTVOut": qktv_out, - "SoftmaxOut": softmax_out, - "AttnDropoutMaskOut": attn_dropout_mask_out, - "AttnDropoutOut": attn_dropout_out, - "SrcMaskOut": attn_mask_out, - "FMHAOut": fmha_out, - "OutLinearOut": out_linear_out, - "DropoutMaskOut": dropout_mask_out, - "Ln2Mean": ln_mean_out, - "Ln2Variance": ln_variance_out, - "BiasDropoutResidualOut": bias_dropout_residual_out, - 'Y': final_out, - 'CacheKVOut': cache_kv_out - }, - attrs=attrs) + helper.append_op(type='fused_attention', + inputs=inputs, + outputs={ + "LnMean": pre_ln_mean_out, + "LnVariance": pre_ln_variance_out, + "LnOut": pre_ln_out, + "QKVOut": qkv_out, + "QKVBiasOut": qkv_bias_out, + "TransposeOut2": transpose_out, + "QKOut": qk_out, + "QKTVOut": qktv_out, + "SoftmaxOut": softmax_out, + "AttnDropoutMaskOut": attn_dropout_mask_out, + "AttnDropoutOut": attn_dropout_out, + "SrcMaskOut": attn_mask_out, + "FMHAOut": fmha_out, + "OutLinearOut": out_linear_out, + "DropoutMaskOut": dropout_mask_out, + "Ln2Mean": ln_mean_out, + "Ln2Variance": ln_variance_out, + "BiasDropoutResidualOut": + bias_dropout_residual_out, + 'Y': final_out, + 'CacheKVOut': cache_kv_out + }, + attrs=attrs) return (final_out, cache_kv_out) if cache_kv else final_out @@ -790,7 +791,8 @@ def fused_multi_transformer(x, """ if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( - "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" + ) mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer if _non_static_mode(): @@ -859,10 +861,9 @@ def fused_multi_transformer(x, # NOTE: inplace outputs['CacheKVOut'] = cache_kvs - helper.append_op( - type='fused_multi_transformer', - inputs=inputs, - outputs=outputs, - attrs=attrs) + helper.append_op(type='fused_multi_transformer', + inputs=inputs, + outputs=outputs, + attrs=attrs) return (final_out, cache_kvs) if cache_kvs else final_out diff --git a/python/paddle/incubate/nn/layer/fused_linear.py b/python/paddle/incubate/nn/layer/fused_linear.py index f7c872c3993..8a8800afce6 100644 --- a/python/paddle/incubate/nn/layer/fused_linear.py +++ b/python/paddle/incubate/nn/layer/fused_linear.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -79,10 +79,14 @@ class FusedLinear(Layer): else: weight_shape = [in_features, out_features] dtype = self._helper.get_default_dtype() - self.weight = self.create_parameter( - shape=weight_shape, attr=weight_attr, dtype=dtype, is_bias=False) - self.bias = self.create_parameter( - shape=[out_features], attr=bias_attr, dtype=dtype, is_bias=True) + self.weight = self.create_parameter(shape=weight_shape, + attr=weight_attr, + dtype=dtype, + is_bias=False) + self.bias = self.create_parameter(shape=[out_features], + attr=bias_attr, + dtype=dtype, + is_bias=True) self.transpose_weight = transpose_weight self.name = name diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py index a64b7e50602..595b1d27fea 100644 --- a/python/paddle/incubate/nn/layer/fused_transformer.py +++ b/python/paddle/incubate/nn/layer/fused_transformer.py @@ -80,17 +80,17 @@ class FusedBiasDropoutResidualLayerNorm(Layer): self._bias_attr = bias_attr self._weight_attr = weight_attr self.embed_dim = embed_dim - self.linear_bias = self.create_parameter( - shape=[embed_dim], - attr=self._bias_attr, - dtype=self._dtype, - is_bias=True) + self.linear_bias = self.create_parameter(shape=[embed_dim], + attr=self._bias_attr, + dtype=self._dtype, + is_bias=True) self.ln_scale = self.create_parameter( attr=self._weight_attr, shape=[embed_dim], default_initializer=Constant(value=1.0)) - self.ln_bias = self.create_parameter( - attr=self._bias_attr, shape=[embed_dim], is_bias=True) + self.ln_bias = self.create_parameter(attr=self._bias_attr, + shape=[embed_dim], + is_bias=True) self.dropout_rate = dropout_rate self._epsilon = epsilon @@ -227,29 +227,29 @@ class FusedMultiHeadAttention(Layer): attr=self._bias_attr, dtype=self._dtype, is_bias=True) - self.linear_weight = self.create_parameter( - shape=[embed_dim, embed_dim], - attr=self._weight_attr, - dtype=self._dtype, - is_bias=False) - self.linear_bias = self.create_parameter( - shape=[embed_dim], - attr=self._bias_attr, - dtype=self._dtype, - is_bias=True) + self.linear_weight = self.create_parameter(shape=[embed_dim, embed_dim], + attr=self._weight_attr, + dtype=self._dtype, + is_bias=False) + self.linear_bias = self.create_parameter(shape=[embed_dim], + attr=self._bias_attr, + dtype=self._dtype, + is_bias=True) self.pre_ln_scale = self.create_parameter( attr=self._weight_attr, shape=[embed_dim], default_initializer=Constant(value=1.0)) - self.pre_ln_bias = self.create_parameter( - attr=self._bias_attr, shape=[embed_dim], is_bias=True) + self.pre_ln_bias = self.create_parameter(attr=self._bias_attr, + shape=[embed_dim], + is_bias=True) self.ln_scale = self.create_parameter( attr=self._weight_attr, shape=[embed_dim], default_initializer=Constant(value=1.0)) - self.ln_bias = self.create_parameter( - attr=self._bias_attr, shape=[embed_dim], is_bias=True) + self.ln_bias = self.create_parameter(attr=self._bias_attr, + shape=[embed_dim], + is_bias=True) self.dropout_rate = dropout_rate self.attn_dropout_rate = attn_dropout_rate @@ -395,11 +395,10 @@ class FusedFeedForward(Layer): attr=weight_attr, dtype=self._dtype, is_bias=False) - self._linear1_bias = self.create_parameter( - shape=[dim_feedforward], - attr=bias_attr, - dtype=self._dtype, - is_bias=True) + self._linear1_bias = self.create_parameter(shape=[dim_feedforward], + attr=bias_attr, + dtype=self._dtype, + is_bias=True) self._linear2_weight = self.create_parameter( shape=[dim_feedforward, d_model], @@ -407,24 +406,28 @@ class FusedFeedForward(Layer): dtype=self._dtype, is_bias=False) - self._linear2_bias = self.create_parameter( - shape=[d_model], attr=bias_attr, dtype=self._dtype, is_bias=True) + self._linear2_bias = self.create_parameter(shape=[d_model], + attr=bias_attr, + dtype=self._dtype, + is_bias=True) self._ln1_scale = self.create_parameter( shape=[d_model], attr=None, is_bias=False, default_initializer=Constant(1.0)) - self._ln1_bias = self.create_parameter( - shape=[d_model], attr=None, is_bias=True) + self._ln1_bias = self.create_parameter(shape=[d_model], + attr=None, + is_bias=True) self._ln2_scale = self.create_parameter( shape=[d_model], attr=None, is_bias=False, default_initializer=Constant(1.0)) - self._ln2_bias = self.create_parameter( - shape=[d_model], attr=None, is_bias=True) + self._ln2_bias = self.create_parameter(shape=[d_model], + attr=None, + is_bias=True) self.name = name def forward(self, src, cache=None): @@ -553,15 +556,14 @@ class FusedTransformerEncoderLayer(Layer): weight_attr=weight_attrs[0], bias_attr=bias_attrs[0]) - self.ffn = FusedFeedForward( - d_model, - dim_feedforward, - dropout_rate=dropout_rate, - activation=activation, - act_dropout_rate=act_dropout_rate, - normalize_before=self.normalize_before, - weight_attr=weight_attrs[1], - bias_attr=bias_attrs[1]) + self.ffn = FusedFeedForward(d_model, + dim_feedforward, + dropout_rate=dropout_rate, + activation=activation, + act_dropout_rate=act_dropout_rate, + normalize_before=self.normalize_before, + weight_attr=weight_attrs[1], + bias_attr=bias_attrs[1]) def forward(self, src, src_mask=None, cache=None): """ @@ -597,8 +599,9 @@ class FusedTransformerEncoderLayer(Layer): if cache is None: attn_out = self.fused_attn(src, attn_mask=src_mask) else: - attn_out, incremental_cache = self.fused_attn( - src, attn_mask=src_mask, cache=cache) + attn_out, incremental_cache = self.fused_attn(src, + attn_mask=src_mask, + cache=cache) ffn_out = self.ffn(attn_out) @@ -967,8 +970,9 @@ class FusedMultiTransformer(Layer): attr=ln_scale_attr, shape=[embed_dim], default_initializer=Constant(value=1.0)) - ln_bias = self.create_parameter( - attr=ln_bias_attr, shape=[embed_dim], is_bias=True) + ln_bias = self.create_parameter(attr=ln_bias_attr, + shape=[embed_dim], + is_bias=True) qkv_weight = self.create_parameter( shape=[3, num_heads, self.head_dim, embed_dim], attr=qkv_weight_attr, @@ -984,39 +988,37 @@ class FusedMultiTransformer(Layer): attr=linear_weight_attr, dtype=self._dtype, is_bias=False) - linear_bias = self.create_parameter( - shape=[embed_dim], - attr=linear_bias_attr, - dtype=self._dtype, - is_bias=True) + linear_bias = self.create_parameter(shape=[embed_dim], + attr=linear_bias_attr, + dtype=self._dtype, + is_bias=True) ffn_ln_scale = self.create_parameter( shape=[embed_dim], attr=ffn_ln_scale_attr, is_bias=False, default_initializer=Constant(1.0)) - ffn_ln_bias = self.create_parameter( - shape=[embed_dim], attr=ffn_ln_bias_attr, is_bias=True) + ffn_ln_bias = self.create_parameter(shape=[embed_dim], + attr=ffn_ln_bias_attr, + is_bias=True) ffn1_weight = self.create_parameter( shape=[embed_dim, dim_feedforward], attr=ffn1_weight_attr, dtype=self._dtype, is_bias=False) - ffn1_bias = self.create_parameter( - shape=[dim_feedforward], - attr=ffn1_bias_attr, - dtype=self._dtype, - is_bias=True) + ffn1_bias = self.create_parameter(shape=[dim_feedforward], + attr=ffn1_bias_attr, + dtype=self._dtype, + is_bias=True) ffn2_weight = self.create_parameter( shape=[dim_feedforward, embed_dim], attr=ffn2_weight_attr, dtype=self._dtype, is_bias=False) - ffn2_bias = self.create_parameter( - shape=[embed_dim], - attr=ffn2_bias_attr, - dtype=self._dtype, - is_bias=True) + ffn2_bias = self.create_parameter(shape=[embed_dim], + attr=ffn2_bias_attr, + dtype=self._dtype, + is_bias=True) # tensor model parallel if nranks > 1: diff --git a/python/paddle/incubate/operators/graph_khop_sampler.py b/python/paddle/incubate/operators/graph_khop_sampler.py index 64aecca8411..89014a7ad59 100644 --- a/python/paddle/incubate/operators/graph_khop_sampler.py +++ b/python/paddle/incubate/operators/graph_khop_sampler.py @@ -125,23 +125,24 @@ def graph_khop_sampler(row, sample_index = helper.create_variable_for_type_inference(dtype=row.dtype) reindex_nodes = helper.create_variable_for_type_inference(dtype=row.dtype) edge_eids = helper.create_variable_for_type_inference(dtype=row.dtype) - helper.append_op( - type="graph_khop_sampler", - inputs={ - "Row": row, - "Eids": sorted_eids, - "Col_Ptr": colptr, - "X": input_nodes - }, - outputs={ - "Out_Src": edge_src, - "Out_Dst": edge_dst, - "Sample_Index": sample_index, - "Reindex_X": reindex_nodes, - "Out_Eids": edge_eids - }, - attrs={"sample_sizes": sample_sizes, - "return_eids": return_eids}) + helper.append_op(type="graph_khop_sampler", + inputs={ + "Row": row, + "Eids": sorted_eids, + "Col_Ptr": colptr, + "X": input_nodes + }, + outputs={ + "Out_Src": edge_src, + "Out_Dst": edge_dst, + "Sample_Index": sample_index, + "Reindex_X": reindex_nodes, + "Out_Eids": edge_eids + }, + attrs={ + "sample_sizes": sample_sizes, + "return_eids": return_eids + }) if return_eids: return edge_src, edge_dst, sample_index, reindex_nodes, edge_eids else: diff --git a/python/paddle/incubate/operators/graph_reindex.py b/python/paddle/incubate/operators/graph_reindex.py index 4cfd96ebf44..1c49d6af950 100644 --- a/python/paddle/incubate/operators/graph_reindex.py +++ b/python/paddle/incubate/operators/graph_reindex.py @@ -128,19 +128,23 @@ def graph_reindex(x, reindex_src = helper.create_variable_for_type_inference(dtype=x.dtype) reindex_dst = helper.create_variable_for_type_inference(dtype=x.dtype) out_nodes = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="graph_reindex", - inputs={ - "X": x, - "Neighbors": neighbors, - "Count": count, - "HashTable_Value": value_buffer if flag_buffer_hashtable else None, - "HashTable_Index": index_buffer if flag_buffer_hashtable else None, - }, - outputs={ - "Reindex_Src": reindex_src, - "Reindex_Dst": reindex_dst, - "Out_Nodes": out_nodes - }, - attrs={"flag_buffer_hashtable": flag_buffer_hashtable}) + helper.append_op(type="graph_reindex", + inputs={ + "X": + x, + "Neighbors": + neighbors, + "Count": + count, + "HashTable_Value": + value_buffer if flag_buffer_hashtable else None, + "HashTable_Index": + index_buffer if flag_buffer_hashtable else None, + }, + outputs={ + "Reindex_Src": reindex_src, + "Reindex_Dst": reindex_dst, + "Out_Nodes": out_nodes + }, + attrs={"flag_buffer_hashtable": flag_buffer_hashtable}) return reindex_src, reindex_dst, out_nodes diff --git a/python/paddle/incubate/operators/graph_sample_neighbors.py b/python/paddle/incubate/operators/graph_sample_neighbors.py index d5a85af7272..63424b395c7 100644 --- a/python/paddle/incubate/operators/graph_sample_neighbors.py +++ b/python/paddle/incubate/operators/graph_sample_neighbors.py @@ -126,25 +126,25 @@ def graph_sample_neighbors(row, out_neighbors = helper.create_variable_for_type_inference(dtype=row.dtype) out_count = helper.create_variable_for_type_inference(dtype=row.dtype) out_eids = helper.create_variable_for_type_inference(dtype=row.dtype) - helper.append_op( - type="graph_sample_neighbors", - inputs={ - "Row": row, - "Col_Ptr": colptr, - "X": input_nodes, - "Eids": eids if return_eids else None, - "Perm_Buffer": perm_buffer if flag_perm_buffer else None - }, - outputs={ - "Out": out_neighbors, - "Out_Count": out_count, - "Out_Eids": out_eids - }, - attrs={ - "sample_size": sample_size, - "return_eids": return_eids, - "flag_perm_buffer": flag_perm_buffer - }) + helper.append_op(type="graph_sample_neighbors", + inputs={ + "Row": row, + "Col_Ptr": colptr, + "X": input_nodes, + "Eids": eids if return_eids else None, + "Perm_Buffer": + perm_buffer if flag_perm_buffer else None + }, + outputs={ + "Out": out_neighbors, + "Out_Count": out_count, + "Out_Eids": out_eids + }, + attrs={ + "sample_size": sample_size, + "return_eids": return_eids, + "flag_perm_buffer": flag_perm_buffer + }) if return_eids: return out_neighbors, out_count, out_eids return out_neighbors, out_count diff --git a/python/paddle/incubate/operators/graph_send_recv.py b/python/paddle/incubate/operators/graph_send_recv.py index 80a21aec6cf..e9937558e9b 100644 --- a/python/paddle/incubate/operators/graph_send_recv.py +++ b/python/paddle/incubate/operators/graph_send_recv.py @@ -119,9 +119,10 @@ def graph_send_recv(x, pool_type.upper(), 0) else: if _in_legacy_dygraph(): - out, tmp = _C_ops.graph_send_recv( - x, src_index, dst_index, 'pool_type', - pool_type.upper(), 'out_size', out_size) + out, tmp = _C_ops.graph_send_recv(x, src_index, + dst_index, 'pool_type', + pool_type.upper(), 'out_size', + out_size) return out if in_dygraph_mode(): if isinstance(out_size, core.eager.Tensor): @@ -143,17 +144,22 @@ def graph_send_recv(x, helper = LayerHelper("graph_send_recv", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - dst_count = helper.create_variable_for_type_inference( - dtype="int32", stop_gradient=True) - helper.append_op( - type="graph_send_recv", - inputs={"X": x, - "Src_index": src_index, - "Dst_index": dst_index}, - outputs={"Out": out, - "Dst_count": dst_count}, - attrs={ - "pool_type": pool_type.upper(), - "out_size": 0 if out_size is None or out_size <= 0 else out_size - }) + dst_count = helper.create_variable_for_type_inference(dtype="int32", + stop_gradient=True) + helper.append_op(type="graph_send_recv", + inputs={ + "X": x, + "Src_index": src_index, + "Dst_index": dst_index + }, + outputs={ + "Out": out, + "Dst_count": dst_count + }, + attrs={ + "pool_type": + pool_type.upper(), + "out_size": + 0 if out_size is None or out_size <= 0 else out_size + }) return out diff --git a/python/paddle/incubate/operators/resnet_unit.py b/python/paddle/incubate/operators/resnet_unit.py index 4ddcfbac879..6333ddafe10 100644 --- a/python/paddle/incubate/operators/resnet_unit.py +++ b/python/paddle/incubate/operators/resnet_unit.py @@ -45,11 +45,11 @@ def resnet_unit(x, filter_x, scale_x, bias_x, mean_x, var_x, z, filter_z, bn_param_dtype = fluid.core.VarDesc.VarType.FP32 bit_mask_dtype = fluid.core.VarDesc.VarType.INT32 out = helper.create_variable_for_type_inference(x.dtype) - bit_mask = helper.create_variable_for_type_inference( - dtype=bit_mask_dtype, stop_gradient=True) + bit_mask = helper.create_variable_for_type_inference(dtype=bit_mask_dtype, + stop_gradient=True) # intermediate_out for x - conv_x = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True) + conv_x = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=True) saved_mean_x = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) saved_invstd_x = helper.create_variable_for_type_inference( @@ -57,8 +57,8 @@ def resnet_unit(x, filter_x, scale_x, bias_x, mean_x, var_x, z, filter_z, running_mean_x = mean_x running_var_x = var_x # intermediate_out for z - conv_z = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True) + conv_z = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=True) saved_mean_z = helper.create_variable_for_type_inference( dtype=bn_param_dtype, stop_gradient=True) saved_invstd_z = helper.create_variable_for_type_inference( @@ -114,8 +114,10 @@ def resnet_unit(x, filter_x, scale_x, bias_x, mean_x, var_x, z, filter_z, 'RunningVarZ': running_var_z, } - helper.append_op( - type='resnet_unit', inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type='resnet_unit', + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -194,26 +196,23 @@ class ResNetUnit(Layer): attr=scale_x_attr, dtype=bn_param_dtype, default_initializer=I.Constant(1.0)) - self.bias_x = self.create_parameter( - shape=bn_param_shape, - attr=bias_x_attr, - dtype=bn_param_dtype, - is_bias=True) - self.mean_x = self.create_parameter( - attr=ParamAttr( - name=moving_mean_x_name, - initializer=I.Constant(0.0), - trainable=False), - shape=bn_param_shape, - dtype=bn_param_dtype) + self.bias_x = self.create_parameter(shape=bn_param_shape, + attr=bias_x_attr, + dtype=bn_param_dtype, + is_bias=True) + self.mean_x = self.create_parameter(attr=ParamAttr( + name=moving_mean_x_name, + initializer=I.Constant(0.0), + trainable=False), + shape=bn_param_shape, + dtype=bn_param_dtype) self.mean_x.stop_gradient = True - self.var_x = self.create_parameter( - attr=ParamAttr( - name=moving_var_x_name, - initializer=I.Constant(1.0), - trainable=False), - shape=bn_param_shape, - dtype=bn_param_dtype) + self.var_x = self.create_parameter(attr=ParamAttr( + name=moving_var_x_name, + initializer=I.Constant(1.0), + trainable=False), + shape=bn_param_shape, + dtype=bn_param_dtype) self.var_x.stop_gradient = True if has_shortcut: self.filter_z = self.create_parameter( @@ -226,26 +225,23 @@ class ResNetUnit(Layer): attr=scale_z_attr, dtype=bn_param_dtype, default_initializer=I.Constant(1.0)) - self.bias_z = self.create_parameter( - shape=bn_param_shape, - attr=bias_z_attr, - dtype=bn_param_dtype, - is_bias=True) - self.mean_z = self.create_parameter( - attr=ParamAttr( - name=moving_mean_z_name, - initializer=I.Constant(0.0), - trainable=False), - shape=bn_param_shape, - dtype=bn_param_dtype) + self.bias_z = self.create_parameter(shape=bn_param_shape, + attr=bias_z_attr, + dtype=bn_param_dtype, + is_bias=True) + self.mean_z = self.create_parameter(attr=ParamAttr( + name=moving_mean_z_name, + initializer=I.Constant(0.0), + trainable=False), + shape=bn_param_shape, + dtype=bn_param_dtype) self.mean_z.stop_gradient = True - self.var_z = self.create_parameter( - attr=ParamAttr( - name=moving_var_z_name, - initializer=I.Constant(1.0), - trainable=False), - shape=bn_param_shape, - dtype=bn_param_dtype) + self.var_z = self.create_parameter(attr=ParamAttr( + name=moving_var_z_name, + initializer=I.Constant(1.0), + trainable=False), + shape=bn_param_shape, + dtype=bn_param_dtype) self.var_z.stop_gradient = True else: self.filter_z = None @@ -258,11 +254,12 @@ class ResNetUnit(Layer): if self._fuse_add and z is None: raise ValueError("z can not be None") - out = resnet_unit( - x, self.filter_x, self.scale_x, self.bias_x, self.mean_x, - self.var_x, z, self.filter_z, self.scale_z, self.bias_z, - self.mean_z, self.var_z, self._stride, self._stride_z, - self._padding, self._dilation, self._groups, self._momentum, - self._eps, self._data_format, self._fuse_add, self._has_shortcut, - self._use_global_stats, self._is_test, self._act) + out = resnet_unit(x, self.filter_x, self.scale_x, self.bias_x, + self.mean_x, self.var_x, z, self.filter_z, + self.scale_z, self.bias_z, self.mean_z, self.var_z, + self._stride, self._stride_z, self._padding, + self._dilation, self._groups, self._momentum, + self._eps, self._data_format, self._fuse_add, + self._has_shortcut, self._use_global_stats, + self._is_test, self._act) return out diff --git a/python/paddle/incubate/operators/softmax_mask_fuse.py b/python/paddle/incubate/operators/softmax_mask_fuse.py index e9cd0e9ab61..1b70dfce6d0 100644 --- a/python/paddle/incubate/operators/softmax_mask_fuse.py +++ b/python/paddle/incubate/operators/softmax_mask_fuse.py @@ -63,9 +63,10 @@ def softmax_mask_fuse(x, mask, name=None): return out helper = LayerHelper('fused_softmax_mask', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='fused_softmax_mask', - inputs={'X': [x], - 'Mask': [mask]}, - outputs={'Out': [out]}) + helper.append_op(type='fused_softmax_mask', + inputs={ + 'X': [x], + 'Mask': [mask] + }, + outputs={'Out': [out]}) return out diff --git a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py index 5bd4b111b69..dda5981f5ad 100644 --- a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py +++ b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py @@ -66,8 +66,7 @@ def softmax_mask_fuse_upper_triangle(x): out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='fused_softmax_mask_upper_triangle', - inputs={'X': [x]}, - outputs={'Out': [out]}) + helper.append_op(type='fused_softmax_mask_upper_triangle', + inputs={'X': [x]}, + outputs={'Out': [out]}) return out diff --git a/python/paddle/incubate/optimizer/distributed_fused_lamb.py b/python/paddle/incubate/optimizer/distributed_fused_lamb.py index 4d40a477ffc..4fddaff7ec9 100644 --- a/python/paddle/incubate/optimizer/distributed_fused_lamb.py +++ b/python/paddle/incubate/optimizer/distributed_fused_lamb.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ import numpy as np class DistributedFusedLamb(Optimizer): + def __init__(self, learning_rate=0.001, lamb_weight_decay=0.01, @@ -42,8 +43,9 @@ class DistributedFusedLamb(Optimizer): name=None): assert not framework._non_static_mode( ), "DistributedFusedLamb does not support dygraph mode" - super(DistributedFusedLamb, self).__init__( - learning_rate=learning_rate, grad_clip=None, name=name) + super(DistributedFusedLamb, self).__init__(learning_rate=learning_rate, + grad_clip=None, + name=name) self._beta1 = beta1 self._beta2 = beta2 @@ -106,12 +108,11 @@ class DistributedFusedLamb(Optimizer): def _create_scale_from_constant(self, value): name = unique_name.generate('global_scale') - return layers.create_global_var( - name=name, - shape=[1], - dtype='float32', - value=float(value), - persistable=True) + return layers.create_global_var(name=name, + shape=[1], + dtype='float32', + value=float(value), + persistable=True) def _get_or_create_scale(self): if self._scale is None: @@ -122,19 +123,17 @@ class DistributedFusedLamb(Optimizer): startup_block = self.helper.startup_program.global_block() if name is not None: name = unique_name.generate(name) - startup_var = startup_block.create_var( - name=name, - shape=shape, - dtype=dtype, - persistable=True, - stop_gradient=True) + startup_var = startup_block.create_var(name=name, + shape=shape, + dtype=dtype, + persistable=True, + stop_gradient=True) main_block = self.helper.main_program.global_block() - main_var = main_block.create_var( - name=startup_var.name, - shape=startup_var.shape, - dtype=startup_var.dtype, - persistable=True, - stop_gradient=True) + main_var = main_block.create_var(name=startup_var.name, + shape=startup_var.shape, + dtype=startup_var.dtype, + persistable=True, + stop_gradient=True) return main_var def _get_parameter(self, name, scope=None): @@ -174,10 +173,10 @@ class DistributedFusedLamb(Optimizer): fp32_fused_param = self._create_persistable_var('fp32_fused_param') fp32_fused_grad = self._create_persistable_var('fp32_fused_grad') - fp16_fused_param = self._create_persistable_var( - 'fp16_fused_param', dtype='float16') - fp16_fused_grad = self._create_persistable_var( - 'fp16_fused_grad', dtype='float16') + fp16_fused_param = self._create_persistable_var('fp16_fused_param', + dtype='float16') + fp16_fused_grad = self._create_persistable_var('fp16_fused_grad', + dtype='float16') master_params = [] for p, g in params_grads: @@ -195,8 +194,8 @@ class DistributedFusedLamb(Optimizer): param_info = self._create_persistable_var('param_info', dtype='int32') param_info.is_distributed = True - fused_offsets = self._create_persistable_var( - 'fused_offsets', dtype='int32') + fused_offsets = self._create_persistable_var('fused_offsets', + dtype='int32') fp32_partial_fused_offsets = self._create_persistable_var( 'fp32_partial_fused_offsets', dtype='int32') @@ -214,8 +213,8 @@ class DistributedFusedLamb(Optimizer): self._create_persistable_var('fp32_acc_fused_grad') ] fp16_acc_fused_grad = [ - self._create_persistable_var( - 'fp16_acc_fused_grad', dtype='float16') + self._create_persistable_var('fp16_acc_fused_grad', + dtype='float16') ] acc_step = [self._create_persistable_var('acc_step', dtype='int64')] else: @@ -239,49 +238,52 @@ class DistributedFusedLamb(Optimizer): startup_block = self.helper.startup_program.global_block() for g in grads: - startup_block.create_var( - name=g.name, - type=g.type, - dtype=g.dtype, - persistable=g.persistable, - shape=g.shape) - - startup_block.append_op( - type='distributed_fused_lamb_init', - inputs={ - 'Param': params, - 'Grad': grads, - }, - outputs={ - 'FP32FusedParam': [fp32_fused_param], - 'FP32FusedGrad': [fp32_fused_grad], - 'FP16FusedParam': [fp16_fused_param], - 'FP16FusedGrad': [fp16_fused_grad], - 'Moment1': [moment1], - 'Moment2': [moment2], - 'Beta1Pow': [beta1pow], - 'Beta2Pow': [beta2pow], - 'GlobalScale': [scale], - 'ParamInfo': [param_info], - 'ParamOut': params, - 'MasterParamOut': master_params, - 'GradOut': grads, - 'FP32ShardFusedParamOffsets': [fp32_partial_fused_offsets], - 'FP16ShardFusedParamOffsets': [fp16_partial_fused_offsets], - 'FusedParamOffsets': [fused_offsets], - 'ParamOrder': [param_order], - 'Step': [step], - }, - attrs={ - 'alignment': self._alignment, - 'rank': rank, - 'nranks': nranks, - 'apply_weight_decay': apply_weight_decay, - 'moment1': 0.0, - 'moment2': 0.0, - 'beta1': self._beta1, - 'beta2': self._beta2, - }) + startup_block.create_var(name=g.name, + type=g.type, + dtype=g.dtype, + persistable=g.persistable, + shape=g.shape) + + startup_block.append_op(type='distributed_fused_lamb_init', + inputs={ + 'Param': params, + 'Grad': grads, + }, + outputs={ + 'FP32FusedParam': [fp32_fused_param], + 'FP32FusedGrad': [fp32_fused_grad], + 'FP16FusedParam': [fp16_fused_param], + 'FP16FusedGrad': [fp16_fused_grad], + 'Moment1': [moment1], + 'Moment2': [moment2], + 'Beta1Pow': [beta1pow], + 'Beta2Pow': [beta2pow], + 'GlobalScale': [scale], + 'ParamInfo': [param_info], + 'ParamOut': + params, + 'MasterParamOut': + master_params, + 'GradOut': + grads, + 'FP32ShardFusedParamOffsets': + [fp32_partial_fused_offsets], + 'FP16ShardFusedParamOffsets': + [fp16_partial_fused_offsets], + 'FusedParamOffsets': [fused_offsets], + 'ParamOrder': [param_order], + 'Step': [step], + }, + attrs={ + 'alignment': self._alignment, + 'rank': rank, + 'nranks': nranks, + 'apply_weight_decay': apply_weight_decay, + 'moment1': 0.0, + 'moment2': 0.0, + 'beta1': self._beta1, + 'beta2': self._beta2, + }) main_block = self.helper.main_program.global_block() self._create_global_learning_rate() @@ -324,14 +326,19 @@ class DistributedFusedLamb(Optimizer): 'Moment2Out': [moment2], 'Beta1PowOut': [beta1pow], 'Beta2PowOut': [beta2pow], - 'ParamOut': params, - 'GradOut': grads, + 'ParamOut': + params, + 'GradOut': + grads, 'FoundInf': [self._found_inf], - 'FP32AccFusedGrad': fp32_acc_fused_grad, - 'FP16AccFusedGrad': fp16_acc_fused_grad, - 'AccStep': acc_step, - 'StopUpdate': self._stop_update - if self._stop_update is not None else [], + 'FP32AccFusedGrad': + fp32_acc_fused_grad, + 'FP16AccFusedGrad': + fp16_acc_fused_grad, + 'AccStep': + acc_step, + 'StopUpdate': + self._stop_update if self._stop_update is not None else [], 'Step': [step], }, attrs={ diff --git a/python/paddle/incubate/optimizer/functional/bfgs.py b/python/paddle/incubate/optimizer/functional/bfgs.py index 2065b3c1c94..8bf7b71c65a 100644 --- a/python/paddle/incubate/optimizer/functional/bfgs.py +++ b/python/paddle/incubate/optimizer/functional/bfgs.py @@ -91,8 +91,8 @@ def minimize_bfgs(objective_func, if dtype not in ['float32', 'float64']: raise ValueError( - "The dtype must be 'float32' or 'float64', but the specified is {}.". - format(dtype)) + "The dtype must be 'float32' or 'float64', but the specified is {}." + .format(dtype)) op_name = 'minimize_bfgs' check_input_type(initial_position, 'initial_position', op_name) @@ -134,8 +134,8 @@ def minimize_bfgs(objective_func, dtype=dtype) else: raise NotImplementedError( - "Currently only support line_search_fn = 'strong_wolfe', but the specified is '{}'". - format(line_search_fn)) + "Currently only support line_search_fn = 'strong_wolfe', but the specified is '{}'" + .format(line_search_fn)) num_func_calls += ls_func_calls ############# update Hk ############# @@ -150,7 +150,9 @@ def minimize_bfgs(objective_func, rhok_inv = paddle.dot(yk, sk) rhok = paddle.static.nn.cond( - rhok_inv == 0., lambda: paddle.full(shape=[1], fill_value=1000.0, dtype=dtype), lambda: 1. / rhok_inv) + rhok_inv == 0., + lambda: paddle.full(shape=[1], fill_value=1000.0, dtype=dtype), + lambda: 1. / rhok_inv) Vk_transpose = I - rhok * sk * yk.t() Vk = I - rhok * yk * sk.t() @@ -162,8 +164,9 @@ def minimize_bfgs(objective_func, ############# check convergence ############# gnorm = paddle.linalg.norm(g1, p=np.inf) pk_norm = paddle.linalg.norm(pk, p=np.inf) - paddle.assign(done | (gnorm < tolerance_grad) | - (pk_norm < tolerance_change), done) + paddle.assign( + done | (gnorm < tolerance_grad) | (pk_norm < tolerance_change), + done) paddle.assign(done, is_converge) # when alpha=0, there is no chance to get xk change. paddle.assign(done | (alpha == 0.), done) diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py index e15ad56dc2d..d09ba5c6952 100644 --- a/python/paddle/incubate/optimizer/functional/lbfgs.py +++ b/python/paddle/incubate/optimizer/functional/lbfgs.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -91,8 +91,8 @@ def minimize_lbfgs(objective_func, """ if dtype not in ['float32', 'float64']: raise ValueError( - "The dtype must be 'float32' or 'float64', but the specified is {}.". - format(dtype)) + "The dtype must be 'float32' or 'float64', but the specified is {}." + .format(dtype)) op_name = 'minimize_lbfgs' check_input_type(initial_position, 'initial_position', op_name) @@ -114,8 +114,9 @@ def minimize_lbfgs(objective_func, is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool') num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64') - history_size = paddle.full( - shape=[1], fill_value=history_size, dtype='int64') + history_size = paddle.full(shape=[1], + fill_value=history_size, + dtype='int64') head = paddle.full(shape=[1], fill_value=1, dtype='int64') tail = paddle.full(shape=[1], fill_value=0, dtype='int64') @@ -140,8 +141,9 @@ def minimize_lbfgs(objective_func, ############# compute p_k by two-loop recursion ############# q = paddle.assign(g1) # In a array circle, the index may out of range, so must use mod. - i = paddle.full( - shape=[1], fill_value=(head - 1).mod(history_size), dtype='int64') + i = paddle.full(shape=[1], + fill_value=(head - 1).mod(history_size), + dtype='int64') def cond(i, q): return i != tail @@ -181,8 +183,8 @@ def minimize_lbfgs(objective_func, dtype=dtype) else: raise NotImplementedError( - "Currently only support line_search_fn = 'strong_wolfe', but the specified is '{}'". - format(line_search_fn)) + "Currently only support line_search_fn = 'strong_wolfe', but the specified is '{}'" + .format(line_search_fn)) paddle.assign(num_func_calls + ls_func_calls, num_func_calls) ############# update sk_vec, yk_vec, rhok_vec ############# @@ -191,7 +193,9 @@ def minimize_lbfgs(objective_func, rhok_inv = paddle.dot(yk, sk) rhok = paddle.static.nn.cond( - rhok_inv == 0., lambda: paddle.full(shape=[1], fill_value=1000.0, dtype=dtype), lambda: 1. / rhok_inv) + rhok_inv == 0., + lambda: paddle.full(shape=[1], fill_value=1000.0, dtype=dtype), + lambda: 1. / rhok_inv) sk_vec[head] = sk yk_vec[head] = yk @@ -211,8 +215,9 @@ def minimize_lbfgs(objective_func, ############# check convergence ############# gnorm = paddle.linalg.norm(g1, p=np.inf) pk_norm = paddle.linalg.norm(pk, p=np.inf) - paddle.assign(done | (gnorm < tolerance_grad) | - (pk_norm < tolerance_change), done) + paddle.assign( + done | (gnorm < tolerance_grad) | (pk_norm < tolerance_change), + done) paddle.assign(done, is_converge) # when alpha=0, there is no chance to get xk change. paddle.assign(done | (alpha == 0.), done) @@ -222,11 +227,10 @@ def minimize_lbfgs(objective_func, rhok_vec, head, tail ] - paddle.static.nn.while_loop( - cond=cond, - body=body, - loop_vars=[ - k, done, is_converge, num_func_calls, value, xk, g1, sk_vec, yk_vec, - rhok_vec, head, tail - ]) + paddle.static.nn.while_loop(cond=cond, + body=body, + loop_vars=[ + k, done, is_converge, num_func_calls, value, + xk, g1, sk_vec, yk_vec, rhok_vec, head, tail + ]) return is_converge, num_func_calls, xk, value, g1 diff --git a/python/paddle/incubate/optimizer/functional/line_search.py b/python/paddle/incubate/optimizer/functional/line_search.py index d42732e605e..3aacb137e6e 100644 --- a/python/paddle/incubate/optimizer/functional/line_search.py +++ b/python/paddle/incubate/optimizer/functional/line_search.py @@ -31,8 +31,8 @@ def cubic_interpolation_(x1, f1, g1, x2, f2, g2): Returns: min_pos: the minimun point between the specified points in the cubic curve. """ - xmin, xmax = paddle.static.nn.cond(x1 <= x2, lambda: (x1, x2), - lambda: (x2, x1)) + xmin, xmax = paddle.static.nn.cond(x1 <= x2, lambda: (x1, x2), lambda: + (x2, x1)) d1 = g1 + g2 - 3 * (f1 - f2) / (x1 - x2) d2_square = d1**2 - g1 * g2 @@ -169,8 +169,8 @@ def strong_wolfe(f, aj = cubic_interpolation_(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, derphi_hi) # 21 min_change = 0.1 * paddle.abs(a_hi - a_lo) - pred = paddle.minimum( - paddle.abs(aj - a_lo), paddle.abs(aj - a_hi)) < min_change + pred = paddle.minimum(paddle.abs(aj - a_lo), + paddle.abs(aj - a_hi)) < min_change aj = paddle.static.nn.cond(pred, lambda: 0.5 * (a_lo + a_hi), lambda: aj) @@ -208,13 +208,12 @@ def strong_wolfe(f, derphi_hi ] - paddle.static.nn.while_loop( - cond=cond_zoom, - body=body_zoom, - loop_vars=[ - j, done_zoom, a_lo, phi_lo, derphi_lo, derf_lo, a_hi, phi_hi, - derphi_hi - ]) + paddle.static.nn.while_loop(cond=cond_zoom, + body=body_zoom, + loop_vars=[ + j, done_zoom, a_lo, phi_lo, derphi_lo, + derf_lo, a_hi, phi_hi, derphi_hi + ]) # j is the number of object function called in zoom. return j @@ -253,8 +252,8 @@ def strong_wolfe(f, paddle.assign(derf_1, derf_star) paddle.assign(ls_func_calls + j, ls_func_calls) - pred1 = ~done & ((phi_2 > phi_0 + c1 * a2 * derphi_0) | ( - (phi_2 >= phi_0) & (i > 1))) + pred1 = ~done & ((phi_2 > phi_0 + c1 * a2 * derphi_0) | + ((phi_2 >= phi_0) & (i > 1))) paddle.assign(done | pred1, done) paddle.static.nn.cond(pred1, true_fn1, None) diff --git a/python/paddle/incubate/optimizer/functional/utils.py b/python/paddle/incubate/optimizer/functional/utils.py index 3000c82a71e..d4f69a35491 100644 --- a/python/paddle/incubate/optimizer/functional/utils.py +++ b/python/paddle/incubate/optimizer/functional/utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -53,18 +53,19 @@ def check_initial_inverse_hessian_estimate(H0): else: def create_tmp_var(program, name, dtype, shape): - return program.current_block().create_var( - name=name, dtype=dtype, shape=shape) + return program.current_block().create_var(name=name, + dtype=dtype, + shape=shape) - out_var = create_tmp_var( - paddle.static.default_main_program(), - name='output', - dtype='float32', - shape=[-1]) + out_var = create_tmp_var(paddle.static.default_main_program(), + name='output', + dtype='float32', + shape=[-1]) def false_fn(): - paddle.static.nn.py_func( - func=raise_func, x=is_symmetric, out=out_var) + paddle.static.nn.py_func(func=raise_func, + x=is_symmetric, + out=out_var) paddle.static.nn.cond(is_symmetric, None, false_fn) # eigvals only support cpu diff --git a/python/paddle/incubate/optimizer/lookahead.py b/python/paddle/incubate/optimizer/lookahead.py index 720a84a24f0..8f70f321c0d 100644 --- a/python/paddle/incubate/optimizer/lookahead.py +++ b/python/paddle/incubate/optimizer/lookahead.py @@ -129,12 +129,11 @@ class LookAhead(Optimizer): else: parameters = self.inner_optimizer._parameter_list - super(LookAhead, self).__init__( - learning_rate=alpha, - parameters=parameters, - weight_decay=None, - grad_clip=None, - name=name) + super(LookAhead, self).__init__(learning_rate=alpha, + parameters=parameters, + weight_decay=None, + grad_clip=None, + name=name) self.alpha = alpha self.k = k @@ -180,8 +179,9 @@ class LookAhead(Optimizer): grad_var = param._grad_ivar() params_grads.append((param, grad_var)) - self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -198,16 +198,16 @@ class LookAhead(Optimizer): dtype='int32', persistable=True) - self.helper.append_op( - type='increment', - inputs={'X': [self._global_step_var]}, - outputs={'Out': [self._global_step_var]}, - attrs={'step': 1.0}) + self.helper.append_op(type='increment', + inputs={'X': [self._global_step_var]}, + outputs={'Out': [self._global_step_var]}, + attrs={'step': 1.0}) def _append_optimize_op(self, block, param_and_grad): one_var = paddle.ones(shape=[1], dtype='int32', name='lookahead_ones') - zero_var = paddle.zeros( - shape=[1], dtype='int32', name='lookahead_zeros') + zero_var = paddle.zeros(shape=[1], + dtype='int32', + name='lookahead_zeros') k_var = layers.create_global_var( name=unique_name.generate("lookahead_k"), shape=[1], @@ -291,7 +291,8 @@ class LookAhead(Optimizer): self._increment_global_var() - _ = self._apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + _ = self._apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) return optimize_ops, params_grads diff --git a/python/paddle/incubate/optimizer/modelaverage.py b/python/paddle/incubate/optimizer/modelaverage.py index c3ca6dc8731..361827ba48d 100644 --- a/python/paddle/incubate/optimizer/modelaverage.py +++ b/python/paddle/incubate/optimizer/modelaverage.py @@ -168,12 +168,11 @@ class ModelAverage(Optimizer): min_average_window=10000, max_average_window=10000, name=None): - super(ModelAverage, self).__init__( - learning_rate=0.0, - parameters=parameters, - weight_decay=None, - grad_clip=None, - name=name) + super(ModelAverage, self).__init__(learning_rate=0.0, + parameters=parameters, + weight_decay=None, + grad_clip=None, + name=name) self.helper = LayerHelper(self.__class__.__name__) self.average_window = average_window_rate @@ -208,12 +207,18 @@ class ModelAverage(Optimizer): self._add_accumulator('sum_2', param) self._add_accumulator('sum_3', param) self._add_accumulator('restore', param) - self._add_accumulator( - 'num_accumulates', param, dtype='int64', shape=[1]) - self._add_accumulator( - 'old_num_accumulates', param, dtype='int64', shape=[1]) - self._add_accumulator( - 'num_updates', param, dtype='int64', shape=[1]) + self._add_accumulator('num_accumulates', + param, + dtype='int64', + shape=[1]) + self._add_accumulator('old_num_accumulates', + param, + dtype='int64', + shape=[1]) + self._add_accumulator('num_updates', + param, + dtype='int64', + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -262,12 +267,11 @@ class ModelAverage(Optimizer): "out_num_updates": num_updates, } - average_accumulates_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + average_accumulates_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return average_accumulates_op @@ -425,8 +429,8 @@ class ModelAverage(Optimizer): total_param = sum_1 + sum_2 + sum_3 total_accumulates = num_accumulates + old_num_accumulates total_param = paddle.cast(total_param, dtype='float32') - total_accumulates = paddle.cast( - total_accumulates, dtype='float32') + total_accumulates = paddle.cast(total_accumulates, + dtype='float32') average_param = total_param / total_accumulates paddle.assign(average_param, param) try: diff --git a/python/paddle/incubate/passes/fuse_resnet_unit_pass.py b/python/paddle/incubate/passes/fuse_resnet_unit_pass.py index 4b5dca61418..451ea1908f9 100644 --- a/python/paddle/incubate/passes/fuse_resnet_unit_pass.py +++ b/python/paddle/incubate/passes/fuse_resnet_unit_pass.py @@ -22,30 +22,32 @@ def set_resnet_unit_attrs(resnet_unit, has_shortcut): resnet_unit.SetAttr("has_shortcut", has_shortcut) resnet_unit.SetAttr("data_format", 'NHWC') resnet_unit.SetAttr("dilation", 1) - resnet_unit.Attr("stride").MappedPattern( - op="conv2d", name="strides", element_index=0) - resnet_unit.Attr("padding").MappedPattern( - op="conv2d", name="paddings", element_index=0) + resnet_unit.Attr("stride").MappedPattern(op="conv2d", + name="strides", + element_index=0) + resnet_unit.Attr("padding").MappedPattern(op="conv2d", + name="paddings", + element_index=0) resnet_unit.Attr("group").MappedPattern(op="conv2d", name="groups") resnet_unit.Attr("op_device").MappedPattern(op="conv2d", name="op_device") - resnet_unit.Attr("op_namescope").MappedPattern( - op="conv2d", name="op_namescope") + resnet_unit.Attr("op_namescope").MappedPattern(op="conv2d", + name="op_namescope") resnet_unit.Attr("momentum").MappedPattern(op="batch_norm", name="momentum") resnet_unit.Attr("epsilon").MappedPattern(op="batch_norm", name="epsilon") - resnet_unit.Attr("use_global_stats").MappedPattern( - op="batch_norm", name="use_global_stats") + resnet_unit.Attr("use_global_stats").MappedPattern(op="batch_norm", + name="use_global_stats") def set_resnet_unit_outputs(resnet_unit, meanX, varX, meanZ=None, varZ=None): - resnet_unit.SetOutputs( - RunningMeanX=meanX, - RunningVarX=varX, - RunningMeanZ=meanZ, - RunningVarZ=varZ) + resnet_unit.SetOutputs(RunningMeanX=meanX, + RunningVarX=varX, + RunningMeanZ=meanZ, + RunningVarZ=varZ) @ir.RegisterPass def fuse_resnet_unit(): + def pattern_conv_bn(x, filter, scale, bias, mean, var): filter.Attr("shape")[0].Mod(32).EQ(0) filter.Attr("shape")[1].Mod(8).EQ(0) @@ -53,8 +55,11 @@ def fuse_resnet_unit(): filter.Attr("shape")[3].EQ(1) conv2d = ir.PassDesc.OP.conv2d(Input=x, Filter=filter) conv2d.SetAttr("data_format", 'NHWC') - bn = ir.PassDesc.OP.batch_norm( - X=conv2d, Bias=bias, Mean=mean, Scale=scale, Variance=var) + bn = ir.PassDesc.OP.batch_norm(X=conv2d, + Bias=bias, + Mean=mean, + Scale=scale, + Variance=var) return bn def pattern_one_input(x, filter, scale, bias, mean, var): @@ -63,8 +68,12 @@ def fuse_resnet_unit(): return relu def replace_one_input(x, filter, scale, bias, mean, var): - resnet_unit = ir.PassDesc.OP.resnet_unit( - X=x, FilterX=filter, ScaleX=scale, BiasX=bias, MeanX=mean, VarX=var) + resnet_unit = ir.PassDesc.OP.resnet_unit(X=x, + FilterX=filter, + ScaleX=scale, + BiasX=bias, + MeanX=mean, + VarX=var) set_resnet_unit_attrs(resnet_unit, False) set_resnet_unit_outputs(resnet_unit, mean, var) return resnet_unit.Output("Y") @@ -73,26 +82,25 @@ def fuse_resnet_unit(): scaleZ, biasZ, meanZ, varZ): bnX = pattern_conv_bn(x, filterX, scaleX, biasX, meanX, varX) bnZ = pattern_conv_bn(x, filterZ, scaleZ, biasZ, meanZ, varZ) - ewadd = ir.PassDesc.OP.elementwise_add( - X=bnX.Output("Y"), Y=bnZ.Output("Y")) + ewadd = ir.PassDesc.OP.elementwise_add(X=bnX.Output("Y"), + Y=bnZ.Output("Y")) relu = ir.PassDesc.OP.relu(X=ewadd) return relu def replace_two_input(x, filterX, scaleX, biasX, meanX, varX, z, filterZ, scaleZ, biasZ, meanZ, varZ): - resnet_unit = ir.PassDesc.OP.resnet_unit( - X=x, - FilterX=filterX, - ScaleX=scaleX, - BiasX=biasX, - MeanX=meanX, - VarX=varX, - Z=z, - FilterZ=filterZ, - ScaleZ=scaleZ, - BiasZ=biasZ, - MeanZ=meanZ, - VarZ=varZ) + resnet_unit = ir.PassDesc.OP.resnet_unit(X=x, + FilterX=filterX, + ScaleX=scaleX, + BiasX=biasX, + MeanX=meanX, + VarX=varX, + Z=z, + FilterZ=filterZ, + ScaleZ=scaleZ, + BiasZ=biasZ, + MeanZ=meanZ, + VarZ=varZ) set_resnet_unit_attrs(resnet_unit, True) set_resnet_unit_outputs(resnet_unit, meanX, varX, meanZ, varZ) return resnet_unit.Output("Y") diff --git a/python/paddle/incubate/sparse/creation.py b/python/paddle/incubate/sparse/creation.py index 7c30910071c..74167a9527a 100644 --- a/python/paddle/incubate/sparse/creation.py +++ b/python/paddle/incubate/sparse/creation.py @@ -48,8 +48,9 @@ def _get_place(place): place = _get_paddle_place(place) if place is None: place = _current_expected_place() - elif not isinstance(place, (core.Place, core.CPUPlace, core.CUDAPinnedPlace, - core.CUDAPlace)): + elif not isinstance( + place, + (core.Place, core.CPUPlace, core.CUDAPinnedPlace, core.CUDAPlace)): raise ValueError( "'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace" ) @@ -122,8 +123,10 @@ def sparse_coo_tensor(indices, place = _get_place(place) if not isinstance(indices, core.eager.Tensor): - indices = to_tensor( - indices, dtype=None, place=place, stop_gradient=True) + indices = to_tensor(indices, + dtype=None, + place=place, + stop_gradient=True) if not isinstance(values, core.eager.Tensor): values = to_tensor(values, dtype, place, stop_gradient) if len(indices.shape) != 2: @@ -136,8 +139,8 @@ def sparse_coo_tensor(indices, if nnz != values.shape[0]: raise ValueError( - "the indices and values must have same number of non-zero, but get {} and {}". - format(nnz, values.shape[0])) + "the indices and values must have same number of non-zero, but get {} and {}" + .format(nnz, values.shape[0])) dense_dim = len(values.shape) - 1 @@ -155,15 +158,16 @@ def sparse_coo_tensor(indices, shape = min_shape else: if shape < min_shape: - raise ValueError("the minimun shape required is {}, but get {}". - format(min_shape, shape)) + raise ValueError( + "the minimun shape required is {}, but get {}".format( + min_shape, shape)) if len(shape) != sparse_dim + dense_dim: raise ValueError( - "the number of dimensions(len(shape) must be sparse_dim({}) + dense_dim({}), but get {}". - format(sparse_dim, dense_dim, len(shape))) + "the number of dimensions(len(shape) must be sparse_dim({}) + dense_dim({}), but get {}" + .format(sparse_dim, dense_dim, len(shape))) - return _C_ops.final_state_sparse_create_sparse_coo_tensor(values, indices, - shape) + return _C_ops.final_state_sparse_create_sparse_coo_tensor( + values, indices, shape) #TODO: need to support shape is None @@ -266,8 +270,8 @@ def sparse_csr_tensor(crows, if len(shape) == 2: if crows.shape[0] != shape[0] + 1: raise ValueError( - "The length({}) of crows must be equal to the rows({})+1 of matrix.". - format(crows.shape[0], shape[0])) + "The length({}) of crows must be equal to the rows({})+1 of matrix." + .format(crows.shape[0], shape[0])) if crows[0] != 0: raise ValueError("the 0th value of crows must be 0") @@ -277,9 +281,9 @@ def sparse_csr_tensor(crows, else: if crows.shape[0] % (shape[0] + 1) != 0: raise ValueError( - "The length({}) of crows must be divisible the rows({})+1 of matrix.". - format(crows.shape[0], shape[0])) - # TODO(zkh2016): check whether the value in crows and cols is legal + "The length({}) of crows must be divisible the rows({})+1 of matrix." + .format(crows.shape[0], shape[0])) + # TODO(zkh2016): check whether the value in crows and cols is legal return core.eager.sparse_csr_tensor(crows, cols, values, shape, stop_gradient) diff --git a/python/paddle/incubate/sparse/nn/functional/conv.py b/python/paddle/incubate/sparse/nn/functional/conv.py index d67d67e8d74..75c0514da8e 100644 --- a/python/paddle/incubate/sparse/nn/functional/conv.py +++ b/python/paddle/incubate/sparse/nn/functional/conv.py @@ -67,11 +67,10 @@ def _conv3d(x, if bias is not None: values = pre_bias.values() add_bias = elementwise_add(values, bias, axis=1) - return sparse_coo_tensor( - pre_bias.indices(), - add_bias, - shape=pre_bias.shape, - stop_gradient=pre_bias.stop_gradient) + return sparse_coo_tensor(pre_bias.indices(), + add_bias, + shape=pre_bias.shape, + stop_gradient=pre_bias.stop_gradient) else: return pre_bias diff --git a/python/paddle/incubate/sparse/nn/functional/pooling.py b/python/paddle/incubate/sparse/nn/functional/pooling.py index 0c0b0cbadad..8ed4444e89c 100644 --- a/python/paddle/incubate/sparse/nn/functional/pooling.py +++ b/python/paddle/incubate/sparse/nn/functional/pooling.py @@ -87,8 +87,10 @@ def max_pool3d(x, channel_last = True - padding, padding_algorithm = _update_padding_nd( - padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + 3, + channel_last=channel_last, + ceil_mode=ceil_mode) #TODO(zkh2016): remove the dependency on dilation from the backend dilation = [1, 1, 1] diff --git a/python/paddle/incubate/sparse/nn/layer/conv.py b/python/paddle/incubate/sparse/nn/layer/conv.py index e00ca78f784..05309e5bbfe 100644 --- a/python/paddle/incubate/sparse/nn/layer/conv.py +++ b/python/paddle/incubate/sparse/nn/layer/conv.py @@ -23,6 +23,7 @@ __all__ = [] class _Conv3D(Layer): + def __init__(self, in_channels, out_channels, @@ -86,16 +87,15 @@ class _Conv3D(Layer): self.bias = None def forward(self, x): - out = F.conv._conv3d( - x, - self.weight, - bias=self.bias, - stride=self._stride, - padding=self._updated_padding, - dilation=self._dilation, - groups=self._groups, - subm=self._subm, - data_format=self._data_format) + out = F.conv._conv3d(x, + self.weight, + bias=self.bias, + stride=self._stride, + padding=self._updated_padding, + dilation=self._dilation, + groups=self._groups, + subm=self._subm, + data_format=self._data_format) return out def extra_repr(self): @@ -232,19 +232,18 @@ class Conv3D(_Conv3D): weight_attr=None, bias_attr=None, data_format="NDHWC"): - super(Conv3D, self).__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - subm=False, - padding_mode=padding_mode, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv3D, self).__init__(in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + subm=False, + padding_mode=padding_mode, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) class SubmConv3D(_Conv3D): @@ -365,16 +364,15 @@ class SubmConv3D(_Conv3D): weight_attr=None, bias_attr=None, data_format="NDHWC"): - super(SubmConv3D, self).__init__( - in_channels, - out_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - subm=True, - padding_mode=padding_mode, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(SubmConv3D, self).__init__(in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + subm=True, + padding_mode=padding_mode, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) diff --git a/python/paddle/incubate/sparse/nn/layer/norm.py b/python/paddle/incubate/sparse/nn/layer/norm.py index 1a9b1f15e4c..4d4cf7df2f2 100644 --- a/python/paddle/incubate/sparse/nn/layer/norm.py +++ b/python/paddle/incubate/sparse/nn/layer/norm.py @@ -115,15 +115,14 @@ class BatchNorm(paddle.nn.BatchNorm1D): data_format='NDHWC', use_global_stats=None, name=None): - super(BatchNorm, self).__init__( - num_features, - momentum=momentum, - epsilon=epsilon, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format, - use_global_stats=use_global_stats, - name=name) + super(BatchNorm, self).__init__(num_features, + momentum=momentum, + epsilon=epsilon, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format, + use_global_stats=use_global_stats, + name=name) def _check_data_format(self, input): if input != "NDHWC": diff --git a/python/paddle/incubate/sparse/nn/layer/pooling.py b/python/paddle/incubate/sparse/nn/layer/pooling.py index 98be6e125f4..9fb67ecc0a6 100644 --- a/python/paddle/incubate/sparse/nn/layer/pooling.py +++ b/python/paddle/incubate/sparse/nn/layer/pooling.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -91,14 +91,13 @@ class MaxPool3D(Layer): self.name = name def forward(self, x): - return F.max_pool3d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - ceil_mode=self.ceil_mode, - data_format=self.data_format, - name=self.name) + return F.max_pool3d(x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + data_format=self.data_format, + name=self.name) def extra_repr(self): return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format( diff --git a/python/paddle/incubate/tensor/__init__.py b/python/paddle/incubate/tensor/__init__.py index b585a0dd4d8..01dfab4482d 100644 --- a/python/paddle/incubate/tensor/__init__.py +++ b/python/paddle/incubate/tensor/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/incubate/tensor/math.py b/python/paddle/incubate/tensor/math.py index 07dc7c1581f..7ce2e735b6f 100644 --- a/python/paddle/incubate/tensor/math.py +++ b/python/paddle/incubate/tensor/math.py @@ -57,21 +57,25 @@ def segment_sum(data, segment_ids, name=None): out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "SUM") return out - check_variable_and_dtype(data, "X", ("float32", "float64", "int32", - "int64"), "segment_pool") + check_variable_and_dtype(data, "X", + ("float32", "float64", "int32", "int64"), + "segment_pool") check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool") helper = LayerHelper("segment_sum", **locals()) out = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) - helper.append_op( - type="segment_pool", - inputs={"X": data, - "SegmentIds": segment_ids}, - outputs={"Out": out, - "SummedIds": summed_ids}, - attrs={"pooltype": "SUM"}) + helper.append_op(type="segment_pool", + inputs={ + "X": data, + "SegmentIds": segment_ids + }, + outputs={ + "Out": out, + "SummedIds": summed_ids + }, + attrs={"pooltype": "SUM"}) return out @@ -114,21 +118,25 @@ def segment_mean(data, segment_ids, name=None): out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MEAN") return out - check_variable_and_dtype(data, "X", ("float32", "float64", "int32", - "int64"), "segment_pool") + check_variable_and_dtype(data, "X", + ("float32", "float64", "int32", "int64"), + "segment_pool") check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool") helper = LayerHelper("segment_mean", **locals()) out = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) - helper.append_op( - type="segment_pool", - inputs={"X": data, - "SegmentIds": segment_ids}, - outputs={"Out": out, - "SummedIds": summed_ids}, - attrs={"pooltype": "MEAN"}) + helper.append_op(type="segment_pool", + inputs={ + "X": data, + "SegmentIds": segment_ids + }, + outputs={ + "Out": out, + "SummedIds": summed_ids + }, + attrs={"pooltype": "MEAN"}) return out @@ -171,21 +179,25 @@ def segment_min(data, segment_ids, name=None): out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MIN") return out - check_variable_and_dtype(data, "X", ("float32", "float64", "int32", - "int64"), "segment_pool") + check_variable_and_dtype(data, "X", + ("float32", "float64", "int32", "int64"), + "segment_pool") check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool") helper = LayerHelper("segment_min", **locals()) out = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) - helper.append_op( - type="segment_pool", - inputs={"X": data, - "SegmentIds": segment_ids}, - outputs={"Out": out, - "SummedIds": summed_ids}, - attrs={"pooltype": "MIN"}) + helper.append_op(type="segment_pool", + inputs={ + "X": data, + "SegmentIds": segment_ids + }, + outputs={ + "Out": out, + "SummedIds": summed_ids + }, + attrs={"pooltype": "MIN"}) return out @@ -229,19 +241,23 @@ def segment_max(data, segment_ids, name=None): out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MAX") return out - check_variable_and_dtype(data, "X", ("float32", "float64", "int32", - "int64"), "segment_pool") + check_variable_and_dtype(data, "X", + ("float32", "float64", "int32", "int64"), + "segment_pool") check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool") helper = LayerHelper("segment_max", **locals()) out = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) - helper.append_op( - type="segment_pool", - inputs={"X": data, - "SegmentIds": segment_ids}, - outputs={"Out": out, - "SummedIds": summed_ids}, - attrs={"pooltype": "MAX"}) + helper.append_op(type="segment_pool", + inputs={ + "X": data, + "SegmentIds": segment_ids + }, + outputs={ + "Out": out, + "SummedIds": summed_ids + }, + attrs={"pooltype": "MAX"}) return out diff --git a/python/paddle/inference/__init__.py b/python/paddle/inference/__init__.py index ec5295b6dfe..670c2cc8e4a 100644 --- a/python/paddle/inference/__init__.py +++ b/python/paddle/inference/__init__.py @@ -26,16 +26,7 @@ from ..fluid.inference import get_num_bytes_of_data_type # noqa: F401 from ..fluid.inference import PredictorPool # noqa: F401 __all__ = [ # noqa - 'Config', - 'DataType', - 'PlaceType', - 'PrecisionType', - 'Tensor', - 'Predictor', - 'create_predictor', - 'get_version', - 'get_trt_compile_version', - 'get_trt_runtime_version', - 'get_num_bytes_of_data_type', - 'PredictorPool' + 'Config', 'DataType', 'PlaceType', 'PrecisionType', 'Tensor', 'Predictor', + 'create_predictor', 'get_version', 'get_trt_compile_version', + 'get_trt_runtime_version', 'get_num_bytes_of_data_type', 'PredictorPool' ] diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index 5781f78c6e4..87acda904b5 100755 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define all functions about input & output in this directory +# TODO: define all functions about input & output in this directory from ..fluid.io import DataLoader # noqa: F401 from ..fluid.dataloader import Dataset # noqa: F401 @@ -30,20 +30,9 @@ from ..fluid.dataloader import WeightedRandomSampler # noqa: F401 from ..fluid.dataloader import Subset # noqa: F401 from ..fluid.dataloader import random_split # noqa: F401 -__all__ = [ #noqa - 'Dataset', - 'IterableDataset', - 'TensorDataset', - 'ComposeDataset', - 'ChainDataset', - 'BatchSampler', - 'DistributedBatchSampler', - 'DataLoader', - 'get_worker_info', - 'Sampler', - 'SequenceSampler', - 'RandomSampler', - 'WeightedRandomSampler', - 'random_split', - 'Subset' +__all__ = [ #noqa + 'Dataset', 'IterableDataset', 'TensorDataset', 'ComposeDataset', + 'ChainDataset', 'BatchSampler', 'DistributedBatchSampler', 'DataLoader', + 'get_worker_info', 'Sampler', 'SequenceSampler', 'RandomSampler', + 'WeightedRandomSampler', 'random_split', 'Subset' ] diff --git a/python/paddle/jit/__init__.py b/python/paddle/jit/__init__.py index a2af493faca..7a31dad82e0 100644 --- a/python/paddle/jit/__init__.py +++ b/python/paddle/jit/__init__.py @@ -28,13 +28,6 @@ from ..fluid.dygraph.io import TranslatedLayer # noqa: F401 from . import dy2static # noqa: F401 __all__ = [ # noqa - 'save', - 'load', - 'TracedLayer', - 'to_static', - 'ProgramTranslator', - 'TranslatedLayer', - 'set_code_level', - 'set_verbosity', - 'not_to_static' + 'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator', + 'TranslatedLayer', 'set_code_level', 'set_verbosity', 'not_to_static' ] diff --git a/python/paddle/metric/__init__.py b/python/paddle/metric/__init__.py index 2f2ef4c6f54..70fe075e577 100644 --- a/python/paddle/metric/__init__.py +++ b/python/paddle/metric/__init__.py @@ -19,11 +19,6 @@ from .metrics import Recall # noqa: F401 from .metrics import Auc # noqa: F401 from .metrics import accuracy # noqa: F401 -__all__ = [ #noqa - 'Metric', - 'Accuracy', - 'Precision', - 'Recall', - 'Auc', - 'accuracy' +__all__ = [ #noqa + 'Metric', 'Accuracy', 'Precision', 'Recall', 'Auc', 'accuracy' ] diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index d399cb20524..4d28b68f994 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -120,8 +120,9 @@ class Metric(object): """ Reset states and result """ - raise NotImplementedError("function 'reset' not implemented in {}.". - format(self.__class__.__name__)) + raise NotImplementedError( + "function 'reset' not implemented in {}.".format( + self.__class__.__name__)) @abc.abstractmethod def update(self, *args): @@ -135,8 +136,9 @@ class Metric(object): see :code:`Metric.compute` """ - raise NotImplementedError("function 'update' not implemented in {}.". - format(self.__class__.__name__)) + raise NotImplementedError( + "function 'update' not implemented in {}.".format( + self.__class__.__name__)) @abc.abstractmethod def accumulate(self): @@ -152,8 +154,9 @@ class Metric(object): """ Returns metric name """ - raise NotImplementedError("function 'name' not implemented in {}.". - format(self.__class__.__name__)) + raise NotImplementedError( + "function 'name' not implemented in {}.".format( + self.__class__.__name__)) def compute(self, *args): """ @@ -256,8 +259,10 @@ class Accuracy(Metric): Tensor: Correct mask, a tensor with shape [batch_size, d0, ..., topk]. """ pred = paddle.argsort(pred, descending=True) - pred = paddle.slice( - pred, axes=[len(pred.shape) - 1], starts=[0], ends=[self.maxk]) + pred = paddle.slice(pred, + axes=[len(pred.shape) - 1], + starts=[0], + ends=[self.maxk]) if (len(label.shape) == 1) or \ (len(label.shape) == 2 and label.shape[-1] == 1): # In static mode, the real label data shape may be different @@ -812,16 +817,15 @@ def accuracy(input, label, k=1, correct=None, total=None, name=None): correct = helper.create_variable_for_type_inference(dtype="int32") if total is None: total = helper.create_variable_for_type_inference(dtype="int32") - helper.append_op( - type="accuracy", - inputs={ - "Out": [topk_out], - "Indices": [topk_indices], - "Label": [label] - }, - outputs={ - "Accuracy": [acc_out], - "Correct": [correct], - "Total": [total], - }) + helper.append_op(type="accuracy", + inputs={ + "Out": [topk_out], + "Indices": [topk_indices], + "Label": [label] + }, + outputs={ + "Accuracy": [acc_out], + "Correct": [correct], + "Total": [total], + }) return acc_out diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index b4be291b069..de416ca8093 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -157,11 +157,10 @@ from . import quant # noqa: F401 import paddle.utils.deprecated as deprecated -@deprecated( - since="2.0.0", - update_to="paddle.nn.funcitional.diag_embed", - level=1, - reason="diag_embed in paddle.nn will be removed in future") +@deprecated(since="2.0.0", + update_to="paddle.nn.funcitional.diag_embed", + level=1, + reason="diag_embed in paddle.nn will be removed in future") def diag_embed(*args): ''' alias name of paddle.nn.functional.diag_embed @@ -169,11 +168,10 @@ def diag_embed(*args): return functional.diag_embed(*args) -@deprecated( - since="2.0.0", - update_to="paddle.nn.utils.remove_weight_norm", - level=1, - reason="remove_weight_norm in paddle.nn will be removed in future") +@deprecated(since="2.0.0", + update_to="paddle.nn.utils.remove_weight_norm", + level=1, + reason="remove_weight_norm in paddle.nn will be removed in future") def remove_weight_norm(*args): ''' alias name of paddle.nn.utils.remove_weight_norm @@ -181,11 +179,10 @@ def remove_weight_norm(*args): return utils.remove_weight_norm(*args) -@deprecated( - since="2.0.0", - update_to="paddle.nn.utils.weight_norm", - level=1, - reason="weight_norm in paddle.nn will be removed in future") +@deprecated(since="2.0.0", + update_to="paddle.nn.utils.weight_norm", + level=1, + reason="weight_norm in paddle.nn will be removed in future") def weight_norm(*args): ''' alias name of paddle.nn.utils.weight_norm @@ -193,126 +190,126 @@ def weight_norm(*args): return utils.weight_norm(*args) -__all__ = [ #noqa - 'BatchNorm', - 'CELU', - 'GroupNorm', - 'LayerNorm', - 'SpectralNorm', - 'BatchNorm1D', - 'BatchNorm2D', - 'BatchNorm3D', - 'InstanceNorm1D', - 'InstanceNorm2D', - 'InstanceNorm3D', - 'SyncBatchNorm', - 'LocalResponseNorm', - 'Embedding', - 'Linear', - 'Upsample', - 'UpsamplingNearest2D', - 'UpsamplingBilinear2D', - 'Pad1D', - 'Pad2D', - 'Pad3D', - 'CosineSimilarity', - 'Dropout', - 'Dropout2D', - 'Dropout3D', - 'Bilinear', - 'AlphaDropout', - 'Unfold', - 'Fold', - 'RNNCellBase', - 'SimpleRNNCell', - 'LSTMCell', - 'GRUCell', - 'RNN', - 'BiRNN', - 'SimpleRNN', - 'LSTM', - 'GRU', - 'dynamic_decode', - 'MultiHeadAttention', - 'Maxout', - 'Softsign', - 'Transformer', - 'MSELoss', - 'LogSigmoid', - 'BeamSearchDecoder', - 'ClipGradByNorm', - 'ReLU', - 'PairwiseDistance', - 'BCEWithLogitsLoss', - 'SmoothL1Loss', - 'MaxPool3D', - 'AdaptiveMaxPool2D', - 'Hardshrink', - 'Softplus', - 'KLDivLoss', - 'AvgPool2D', - 'L1Loss', - 'LeakyReLU', - 'AvgPool1D', - 'AdaptiveAvgPool3D', - 'AdaptiveMaxPool3D', - 'NLLLoss', - 'Conv1D', - 'Sequential', - 'Hardswish', - 'Conv1DTranspose', - 'AdaptiveMaxPool1D', - 'TransformerEncoder', - 'Softmax', - 'Softmax2D', - 'ParameterList', - 'Conv2D', - 'Softshrink', - 'Hardtanh', - 'TransformerDecoderLayer', - 'CrossEntropyLoss', - 'GELU', - 'SELU', - 'Silu', - 'Conv2DTranspose', - 'CTCLoss', - 'ThresholdedReLU', - 'AdaptiveAvgPool2D', - 'MaxPool1D', - 'Layer', - 'TransformerDecoder', - 'Conv3D', - 'Tanh', - 'Conv3DTranspose', - 'Flatten', - 'AdaptiveAvgPool1D', - 'Tanhshrink', - 'HSigmoidLoss', - 'PReLU', - 'TransformerEncoderLayer', - 'AvgPool3D', - 'MaxPool2D', - 'MarginRankingLoss', - 'LayerList', - 'ClipGradByValue', - 'BCELoss', - 'Hardsigmoid', - 'ClipGradByGlobalNorm', - 'LogSoftmax', - 'Sigmoid', - 'Swish', - 'Mish', - 'PixelShuffle', - 'PixelUnshuffle', - 'ChannelShuffle', - 'ELU', - 'ReLU6', - 'LayerDict', - 'ZeroPad2D', - 'MaxUnPool1D', - 'MaxUnPool2D', - 'MaxUnPool3D', - 'HingeEmbeddingLoss', - 'Identity', - 'RReLU', +__all__ = [ #noqa + 'BatchNorm', + 'CELU', + 'GroupNorm', + 'LayerNorm', + 'SpectralNorm', + 'BatchNorm1D', + 'BatchNorm2D', + 'BatchNorm3D', + 'InstanceNorm1D', + 'InstanceNorm2D', + 'InstanceNorm3D', + 'SyncBatchNorm', + 'LocalResponseNorm', + 'Embedding', + 'Linear', + 'Upsample', + 'UpsamplingNearest2D', + 'UpsamplingBilinear2D', + 'Pad1D', + 'Pad2D', + 'Pad3D', + 'CosineSimilarity', + 'Dropout', + 'Dropout2D', + 'Dropout3D', + 'Bilinear', + 'AlphaDropout', + 'Unfold', + 'Fold', + 'RNNCellBase', + 'SimpleRNNCell', + 'LSTMCell', + 'GRUCell', + 'RNN', + 'BiRNN', + 'SimpleRNN', + 'LSTM', + 'GRU', + 'dynamic_decode', + 'MultiHeadAttention', + 'Maxout', + 'Softsign', + 'Transformer', + 'MSELoss', + 'LogSigmoid', + 'BeamSearchDecoder', + 'ClipGradByNorm', + 'ReLU', + 'PairwiseDistance', + 'BCEWithLogitsLoss', + 'SmoothL1Loss', + 'MaxPool3D', + 'AdaptiveMaxPool2D', + 'Hardshrink', + 'Softplus', + 'KLDivLoss', + 'AvgPool2D', + 'L1Loss', + 'LeakyReLU', + 'AvgPool1D', + 'AdaptiveAvgPool3D', + 'AdaptiveMaxPool3D', + 'NLLLoss', + 'Conv1D', + 'Sequential', + 'Hardswish', + 'Conv1DTranspose', + 'AdaptiveMaxPool1D', + 'TransformerEncoder', + 'Softmax', + 'Softmax2D', + 'ParameterList', + 'Conv2D', + 'Softshrink', + 'Hardtanh', + 'TransformerDecoderLayer', + 'CrossEntropyLoss', + 'GELU', + 'SELU', + 'Silu', + 'Conv2DTranspose', + 'CTCLoss', + 'ThresholdedReLU', + 'AdaptiveAvgPool2D', + 'MaxPool1D', + 'Layer', + 'TransformerDecoder', + 'Conv3D', + 'Tanh', + 'Conv3DTranspose', + 'Flatten', + 'AdaptiveAvgPool1D', + 'Tanhshrink', + 'HSigmoidLoss', + 'PReLU', + 'TransformerEncoderLayer', + 'AvgPool3D', + 'MaxPool2D', + 'MarginRankingLoss', + 'LayerList', + 'ClipGradByValue', + 'BCELoss', + 'Hardsigmoid', + 'ClipGradByGlobalNorm', + 'LogSoftmax', + 'Sigmoid', + 'Swish', + 'Mish', + 'PixelShuffle', + 'PixelUnshuffle', + 'ChannelShuffle', + 'ELU', + 'ReLU6', + 'LayerDict', + 'ZeroPad2D', + 'MaxUnPool1D', + 'MaxUnPool2D', + 'MaxUnPool3D', + 'HingeEmbeddingLoss', + 'Identity', + 'RReLU', ] diff --git a/python/paddle/nn/clip.py b/python/paddle/nn/clip.py index e868cbdbacc..61143175fd4 100644 --- a/python/paddle/nn/clip.py +++ b/python/paddle/nn/clip.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define the functions to clip gradient of parameter +# TODO: define the functions to clip gradient of parameter from ..fluid.clip import ClipGradByGlobalNorm # noqa: F401 from ..fluid.clip import ClipGradByNorm # noqa: F401 from ..fluid.clip import ClipGradByValue # noqa: F401 diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index fa5a56c4686..5e4d0dd3558 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -124,110 +124,110 @@ from .extension import temporal_shift # noqa: F401 from .sparse_attention import sparse_attention -__all__ = [ #noqa - 'celu', - 'conv1d', - 'conv1d_transpose', - 'conv2d', - 'conv2d_transpose', - 'conv3d', - 'conv3d_transpose', - 'elu', - 'elu_', - 'gelu', - 'hardshrink', - 'hardtanh', - 'hardsigmoid', - 'hardswish', - 'leaky_relu', - 'log_sigmoid', - 'maxout', - 'prelu', - 'relu', - 'relu_', - 'relu6', - 'selu', - 'softmax', - 'softmax_', - 'softplus', - 'softshrink', - 'softsign', - 'sigmoid', - 'silu', - 'swish', - 'mish', - 'tanh', - 'tanh_', - 'tanhshrink', - 'thresholded_relu', - 'log_softmax', - 'glu', - 'gumbel_softmax', - 'diag_embed', - 'sequence_mask', - 'dropout', - 'dropout2d', - 'dropout3d', - 'alpha_dropout', - 'label_smooth', - 'linear', - 'pad', - 'zeropad2d', - 'unfold', - 'interpolate', - 'upsample', - 'bilinear', - 'cosine_similarity', - 'avg_pool1d', - 'avg_pool2d', - 'avg_pool3d', - 'max_pool1d', - 'max_pool2d', - 'max_pool3d', - 'max_unpool1d', - 'max_unpool2d', - 'max_unpool3d', - 'adaptive_avg_pool1d', - 'adaptive_avg_pool2d', - 'adaptive_avg_pool3d', - 'adaptive_max_pool1d', - 'adaptive_max_pool2d', - 'adaptive_max_pool3d', - 'binary_cross_entropy', - 'binary_cross_entropy_with_logits', - 'cross_entropy', - 'dice_loss', - 'hsigmoid_loss', - 'kl_div', - 'l1_loss', - 'log_loss', - 'mse_loss', - 'margin_ranking_loss', - 'nll_loss', - 'npair_loss', - 'sigmoid_focal_loss', - 'smooth_l1_loss', - 'softmax_with_cross_entropy', - 'margin_cross_entropy', - 'square_error_cost', - 'ctc_loss', - 'hinge_embedding_loss', - 'affine_grid', - 'grid_sample', - 'local_response_norm', - 'pixel_shuffle', - 'pixel_unshuffle', - 'channel_shuffle', - 'embedding', - 'gather_tree', - 'one_hot', - 'normalize', - 'temporal_shift', - 'batch_norm', - 'layer_norm', - 'instance_norm', - 'class_center_sample', - 'sparse_attention', - 'fold', - 'rrelu', +__all__ = [ #noqa + 'celu', + 'conv1d', + 'conv1d_transpose', + 'conv2d', + 'conv2d_transpose', + 'conv3d', + 'conv3d_transpose', + 'elu', + 'elu_', + 'gelu', + 'hardshrink', + 'hardtanh', + 'hardsigmoid', + 'hardswish', + 'leaky_relu', + 'log_sigmoid', + 'maxout', + 'prelu', + 'relu', + 'relu_', + 'relu6', + 'selu', + 'softmax', + 'softmax_', + 'softplus', + 'softshrink', + 'softsign', + 'sigmoid', + 'silu', + 'swish', + 'mish', + 'tanh', + 'tanh_', + 'tanhshrink', + 'thresholded_relu', + 'log_softmax', + 'glu', + 'gumbel_softmax', + 'diag_embed', + 'sequence_mask', + 'dropout', + 'dropout2d', + 'dropout3d', + 'alpha_dropout', + 'label_smooth', + 'linear', + 'pad', + 'zeropad2d', + 'unfold', + 'interpolate', + 'upsample', + 'bilinear', + 'cosine_similarity', + 'avg_pool1d', + 'avg_pool2d', + 'avg_pool3d', + 'max_pool1d', + 'max_pool2d', + 'max_pool3d', + 'max_unpool1d', + 'max_unpool2d', + 'max_unpool3d', + 'adaptive_avg_pool1d', + 'adaptive_avg_pool2d', + 'adaptive_avg_pool3d', + 'adaptive_max_pool1d', + 'adaptive_max_pool2d', + 'adaptive_max_pool3d', + 'binary_cross_entropy', + 'binary_cross_entropy_with_logits', + 'cross_entropy', + 'dice_loss', + 'hsigmoid_loss', + 'kl_div', + 'l1_loss', + 'log_loss', + 'mse_loss', + 'margin_ranking_loss', + 'nll_loss', + 'npair_loss', + 'sigmoid_focal_loss', + 'smooth_l1_loss', + 'softmax_with_cross_entropy', + 'margin_cross_entropy', + 'square_error_cost', + 'ctc_loss', + 'hinge_embedding_loss', + 'affine_grid', + 'grid_sample', + 'local_response_norm', + 'pixel_shuffle', + 'pixel_unshuffle', + 'channel_shuffle', + 'embedding', + 'gather_tree', + 'one_hot', + 'normalize', + 'temporal_shift', + 'batch_norm', + 'layer_norm', + 'instance_norm', + 'class_center_sample', + 'sparse_attention', + 'fold', + 'rrelu', ] diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index dd314868b69..aed8fbb0f58 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -71,11 +71,10 @@ def celu(x, alpha=1.0, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'celu') helper = LayerHelper("celu", **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='celu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'alpha': alpha}) + helper.append_op(type='celu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) return out @@ -123,11 +122,10 @@ def elu(x, alpha=1.0, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu') helper = LayerHelper("elu", **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='elu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'alpha': alpha}) + helper.append_op(type='elu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) return out @@ -190,11 +188,10 @@ def gelu(x, approximate=False, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'gelu') helper = LayerHelper("gelu", **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='gelu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'approximate': approximate}) + helper.append_op(type='gelu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'approximate': approximate}) return out @@ -239,11 +236,10 @@ def hardshrink(x, threshold=0.5, name=None): 'hardshrink') helper = LayerHelper('hardshrink', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='hard_shrink', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'threshold': threshold}) + helper.append_op(type='hard_shrink', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) return out @@ -291,12 +287,13 @@ def hardtanh(x, min=-1.0, max=1.0, name=None): helper = LayerHelper('hardtanh', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='brelu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'t_min': min, - 't_max': max}) + helper.append_op(type='brelu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 't_min': min, + 't_max': max + }) return out @@ -346,12 +343,13 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None): helper = LayerHelper('hardsigmoid', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='hard_sigmoid', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'slope': slope, - 'offset': offset}) + helper.append_op(type='hard_sigmoid', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'slope': slope, + 'offset': offset + }) return out @@ -449,11 +447,10 @@ def leaky_relu(x, negative_slope=0.01, name=None): 'leaky_relu') helper = LayerHelper('leaky_relu', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='leaky_relu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'alpha': negative_slope}) + helper.append_op(type='leaky_relu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': negative_slope}) return out @@ -540,13 +537,16 @@ def prelu(x, weight, data_format="NCHW", name=None): helper = LayerHelper('prelu', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type="prelu", - inputs={"X": x, - "Alpha": weight}, - outputs={"Out": out}, - attrs={"mode": mode, - "data_format": data_format}) + helper.append_op(type="prelu", + inputs={ + "X": x, + "Alpha": weight + }, + outputs={"Out": out}, + attrs={ + "mode": mode, + "data_format": data_format + }) return out @@ -628,18 +628,18 @@ def rrelu(x, lower=1. / 8., upper=1. / 3., training=True, name=None): if not isinstance(lower, float) or not isinstance(upper, float): raise TypeError( - "The lower and upper values must be float type. Received: lower {}, upper {}.". - format(lower, upper)) + "The lower and upper values must be float type. Received: lower {}, upper {}." + .format(lower, upper)) if lower < 0 or lower > 1: raise ValueError( - "The lower value must be no less than zero or greater than one. Received: {}.". - format(lower)) + "The lower value must be no less than zero or greater than one. Received: {}." + .format(lower)) if upper < lower: raise ValueError( - "The upper value must be greater than lower value. Received: lower {}, upper {}.". - format(lower, upper)) + "The upper value must be greater than lower value. Received: lower {}, upper {}." + .format(lower, upper)) if upper > 1: raise ValueError( @@ -657,12 +657,13 @@ def rrelu(x, lower=1. / 8., upper=1. / 3., training=True, name=None): out = helper.create_variable_for_type_inference(x.dtype) noise = helper.create_variable_for_type_inference(dtype=x.dtype) attrs = {'lower': lower, 'upper': upper, 'is_test': is_test} - helper.append_op( - type='rrelu', - inputs={"X": x}, - outputs={"Out": out, - "Noise": noise}, - attrs=attrs) + helper.append_op(type='rrelu', + inputs={"X": x}, + outputs={ + "Out": out, + "Noise": noise + }, + attrs=attrs) return out @@ -822,12 +823,13 @@ def maxout(x, groups, axis=1, name=None): helper = LayerHelper('maxout', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='maxout', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'groups': groups, - 'axis': axis}) + helper.append_op(type='maxout', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'groups': groups, + 'axis': axis + }) return out @@ -864,11 +866,10 @@ def relu6(x, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu6') helper = LayerHelper('relu6', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='relu6', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'threshold': threshold}) + helper.append_op(type='relu6', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) return out @@ -925,12 +926,13 @@ def selu(x, check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'selu') helper = LayerHelper('selu', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='selu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'scale': scale, - 'alpha': alpha}) + helper.append_op(type='selu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'scale': scale, + 'alpha': alpha + }) return out @@ -1104,27 +1106,30 @@ def softmax(x, axis=-1, dtype=None, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'softmax') else: - check_dtype(dtype, 'dtype', ['float32', 'float64'], 'softmax', - 'If dtype is not None, it only support float32 or float64.') + check_dtype( + dtype, 'dtype', ['float32', 'float64'], 'softmax', + 'If dtype is not None, it only support float32 or float64.') helper = LayerHelper("softmax", **locals()) outs_cast = x if dtype is not None: outs_cast = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='cast', - inputs={'X': x}, - outputs={'Out': outs_cast}, - attrs={'in_dtype': x.dtype, - 'out_dtype': dtype}) + helper.append_op(type='cast', + inputs={'X': x}, + outputs={'Out': outs_cast}, + attrs={ + 'in_dtype': x.dtype, + 'out_dtype': dtype + }) outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype) - helper.append_op( - type='softmax', - inputs={'X': outs_cast}, - outputs={'Out': outs_softmax}, - attrs={'axis': axis, - 'use_cudnn': use_cudnn}) + helper.append_op(type='softmax', + inputs={'X': outs_cast}, + outputs={'Out': outs_softmax}, + attrs={ + 'axis': axis, + 'use_cudnn': use_cudnn + }) return outs_softmax @@ -1177,12 +1182,13 @@ def softplus(x, beta=1, threshold=20, name=None): 'softplus') helper = LayerHelper('softplus', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='softplus', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'beta': beta, - 'threshold': threshold}) + helper.append_op(type='softplus', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'beta': beta, + 'threshold': threshold + }) return out @@ -1234,11 +1240,10 @@ def softshrink(x, threshold=0.5, name=None): 'softshrink') helper = LayerHelper('softshrink', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='softshrink', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'lambda': threshold}) + helper.append_op(type='softshrink', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'lambda': threshold}) return out @@ -1313,11 +1318,10 @@ def swish(x, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'swish') helper = LayerHelper('swish', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='swish', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'beta': 1.0}) + helper.append_op(type='swish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'beta': 1.0}) return out @@ -1442,11 +1446,10 @@ def thresholded_relu(x, threshold=1.0, name=None): 'thresholded_relu') helper = LayerHelper('thresholded_relu', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='thresholded_relu', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'threshold': threshold}) + helper.append_op(type='thresholded_relu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) return out @@ -1520,26 +1523,27 @@ def log_softmax(x, axis=-1, dtype=None, name=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'log_softmax') else: - check_dtype(dtype, 'dtype', ['float32', 'float64'], 'log_softmax', - 'If dtype is not None, it only support float32 or float64.') + check_dtype( + dtype, 'dtype', ['float32', 'float64'], 'log_softmax', + 'If dtype is not None, it only support float32 or float64.') helper = LayerHelper("log_softmax", **locals()) out_cast = x if dtype is not None: out_cast = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='cast', - inputs={'X': x}, - outputs={'Out': out_cast}, - attrs={'in_dtype': x.dtype, - 'out_dtype': dtype}) + helper.append_op(type='cast', + inputs={'X': x}, + outputs={'Out': out_cast}, + attrs={ + 'in_dtype': x.dtype, + 'out_dtype': dtype + }) out = helper.create_variable_for_type_inference(out_cast.dtype) - helper.append_op( - type='log_softmax', - inputs={'X': out_cast}, - outputs={'Out': out}, - attrs={'axis': axis}) + helper.append_op(type='log_softmax', + inputs={'X': out_cast}, + outputs={'Out': out}, + attrs={'axis': axis}) return out @@ -1659,11 +1663,12 @@ def gumbel_softmax(x, temperature=1.0, hard=False, axis=-1, name=None): helper = LayerHelper("gumbel_softmax", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'gumbel_softmax') out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='gumbel_softmax', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'temperature': temperature, - 'hard': hard, - 'axis': axis}) + helper.append_op(type='gumbel_softmax', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'temperature': temperature, + 'hard': hard, + 'axis': axis + }) return out diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 7fed1dbb487..e10a1c10691 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -20,7 +20,7 @@ from ...tensor import concat from ...tensor.creation import zeros from paddle.static import Variable from ...fluid import dygraph_utils -# TODO: define the common functions to build a neural network +# TODO: define the common functions to build a neural network from ...tensor.manipulation import squeeze from ...tensor.manipulation import unsqueeze from ...tensor import clip @@ -157,16 +157,15 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None): dilations) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="unfold", - inputs={"X": x}, - outputs={"Y": out}, - attrs={ - "kernel_sizes": kernel_sizes, - "strides": strides, - "paddings": paddings, - "dilations": dilations - }) + helper.append_op(type="unfold", + inputs={"X": x}, + outputs={"Y": out}, + attrs={ + "kernel_sizes": kernel_sizes, + "strides": strides, + "paddings": paddings, + "dilations": dilations + }) return out @@ -517,8 +516,11 @@ def interpolate(x, assert (isinstance(dim, int)) temp_out = helper.create_variable_for_type_inference( 'int32') - fill_constant( - [1], 'int32', dim, force_cpu=True, out=temp_out) + fill_constant([1], + 'int32', + dim, + force_cpu=True, + out=temp_out) new_size_tensor.append(temp_out) size_list.append(dim) inputs['SizeTensor'] = new_size_tensor @@ -603,11 +605,10 @@ def interpolate(x, out = _C_ops.bicubic_interp_v2(x, *dy_attr) return out out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='{}_interp_v2'.format(resample_type), - inputs=inputs, - outputs={"Out": out}, - attrs=attrs) + helper.append_op(type='{}_interp_v2'.format(resample_type), + inputs=inputs, + outputs={"Out": out}, + attrs=attrs) return out @@ -862,8 +863,9 @@ def bilinear(x1, x2, weight, bias=None, name=None): helper = LayerHelper("bilinear", **locals()) out = helper.create_variable_for_type_inference(dtype=x1.dtype) - helper.append_op( - type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}) + helper.append_op(type="bilinear_tensor_product", + inputs=inputs, + outputs={"Out": out}) return out @@ -1013,7 +1015,8 @@ def dropout(x, raise ValueError("p argument should between 0 and 1") if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( - "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" + ) if axis and not isinstance(axis, (int, list, tuple)): raise TypeError("datatype of axis argument should be int or list") @@ -1030,10 +1033,11 @@ def dropout(x, seed if seed is not None else 0, seed is not None) return out - out, mask = _C_ops.dropout( - x, 'dropout_prob', p, 'is_test', not training, 'fix_seed', - seed is not None, 'seed', seed - if seed is not None else 0, 'dropout_implementation', mode) + out, mask = _C_ops.dropout(x, 'dropout_prob', p, 'is_test', + not training, 'fix_seed', seed + is not None, 'seed', + seed if seed is not None else 0, + 'dropout_implementation', mode) return out helper = LayerHelper('dropout', **locals()) @@ -1058,12 +1062,13 @@ def dropout(x, attrs = get_attrs(helper.main_program, p, not training, seed) - helper.append_op( - type='dropout', - inputs={'X': [x]}, - outputs={'Out': [out], - 'Mask': [mask]}, - attrs=attrs) + helper.append_op(type='dropout', + inputs={'X': [x]}, + outputs={ + 'Out': [out], + 'Mask': [mask] + }, + attrs=attrs) return out else: #sometimes called dropout_nd #TODO: optimize with c++ if not in_dynamic_mode(): @@ -1087,8 +1092,8 @@ def dropout(x, .format(len(input_shape), max(drop_axes))) if len(drop_axes) > len(input_shape): raise ValueError( - "length of axis should not be greater than dimensions of x:{}, but get length of axis: {}". - format(len(input_shape), len(drop_axes))) + "length of axis should not be greater than dimensions of x:{}, but get length of axis: {}" + .format(len(input_shape), len(drop_axes))) mask_shape = [1] * len(input_shape) if not in_dynamic_mode(): for i in drop_axes: @@ -1098,8 +1103,10 @@ def dropout(x, mask_shape[i] = input_shape[i] #get mask - random_tensor = paddle.uniform( - mask_shape, dtype='float32', min=0., max=1.0) + random_tensor = paddle.uniform(mask_shape, + dtype='float32', + min=0., + max=1.0) p = full(shape=[1], fill_value=p, dtype='float32') keep_mask = paddle.greater_equal(random_tensor, p) @@ -1159,13 +1166,12 @@ def dropout2d(x, p=0.5, training=True, data_format='NCHW', name=None): "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " "Attr(data_format): %s." % str(data_format)) - return dropout( - x, - p=p, - axis=[0, 1] if data_format == 'NCHW' else [0, 3], - training=training, - mode="upscale_in_train", - name=name) + return dropout(x, + p=p, + axis=[0, 1] if data_format == 'NCHW' else [0, 3], + training=training, + mode="upscale_in_train", + name=name) def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None): @@ -1213,13 +1219,12 @@ def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None): "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " "Attr(data_format): %s." % str(data_format)) - return dropout( - x, - p=p, - axis=[0, 1] if data_format == 'NCDHW' else [0, 4], - training=training, - mode="upscale_in_train", - name=name) + return dropout(x, + p=p, + axis=[0, 1] if data_format == 'NCDHW' else [0, 4], + training=training, + mode="upscale_in_train", + name=name) def alpha_dropout(x, p=0.5, training=True, name=None): @@ -1276,20 +1281,20 @@ def alpha_dropout(x, p=0.5, training=True, name=None): input_shape = x.shape #get mask - random_tensor = paddle.uniform( - input_shape, dtype='float32', min=0., max=1.0) + random_tensor = paddle.uniform(input_shape, + dtype='float32', + min=0., + max=1.0) p = full(shape=[1], fill_value=p, dtype='float32') keep_mask = paddle.greater_equal(random_tensor, p) keep_mask = paddle.cast(keep_mask, dtype) drop_mask = paddle.subtract( - full( - shape=input_shape, fill_value=1., dtype=dtype), keep_mask) + full(shape=input_shape, fill_value=1., dtype=dtype), keep_mask) #apply mask b = full(shape=[1], fill_value=b, dtype=dtype) y = paddle.add(paddle.multiply(x, keep_mask), - paddle.scale( - drop_mask, scale=alpha_p)) + paddle.scale(drop_mask, scale=alpha_p)) res = paddle.add(paddle.scale(y, scale=a), b, name=name) return res else: # test @@ -1419,8 +1424,8 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): x_dim = len(x.shape) - if mode == "constant" and isinstance(pad, ( - list, tuple)) and len(pad) == x_dim * 2: + if mode == "constant" and isinstance( + pad, (list, tuple)) and len(pad) == x_dim * 2: paddings = pad pad_value = value check_variable_and_dtype(x, 'x', [ @@ -1431,12 +1436,13 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): helper = LayerHelper('pad', **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='pad', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'paddings': paddings, - 'pad_value': float(pad_value)}) + helper.append_op(type='pad', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'paddings': paddings, + 'pad_value': float(pad_value) + }) return out assert x_dim in [ @@ -1521,8 +1527,10 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): dtype = helper.input_dtype(input_param_name='input') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='pad3d', inputs=inputs, outputs={"Out": out}, attrs=attrs) + helper.append_op(type='pad3d', + inputs=inputs, + outputs={"Out": out}, + attrs=attrs) if len(unsqueezed_dim) != 0: out = squeeze(out, axis=unsqueezed_dim) @@ -1676,7 +1684,7 @@ def linear(x, weight, bias=None, name=None): # [2.1077576 2.1077576 2.1077576 2.1077576 ]] """ if in_dygraph_mode(): - #TODO(jiabin): using addmm for fast forward route + #TODO(jiabin): using addmm for fast forward route return _C_ops.final_state_linear(x, weight, bias) else: if _in_legacy_dygraph(): @@ -1699,19 +1707,19 @@ def linear(x, weight, bias=None, name=None): inputs = {'X': [x], 'Y': [weight]} attrs = {'trans_x': False, 'trans_y': False} tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='matmul_v2', - inputs=inputs, - outputs={'Out': tmp}, - attrs=attrs) + helper.append_op(type='matmul_v2', + inputs=inputs, + outputs={'Out': tmp}, + attrs=attrs) if bias is not None: res = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='elementwise_add', - inputs={'X': [tmp], - 'Y': [bias]}, - outputs={'Out': [res]}, - attrs={'axis': len(x.shape) - 1}) + helper.append_op(type='elementwise_add', + inputs={ + 'X': [tmp], + 'Y': [bias] + }, + outputs={'Out': [res]}, + attrs={'axis': len(x.shape) - 1}) else: res = tmp return res @@ -1791,12 +1799,13 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): helper = LayerHelper("label_smooth", **locals()) label.stop_gradient = True smooth_label = helper.create_variable_for_type_inference(label.dtype) - helper.append_op( - type="label_smooth", - inputs={"X": label, - "PriorDist": prior_dist} if prior_dist else {"X": label}, - outputs={"Out": smooth_label}, - attrs={"epsilon": float(epsilon)}) + helper.append_op(type="label_smooth", + inputs={ + "X": label, + "PriorDist": prior_dist + } if prior_dist else {"X": label}, + outputs={"Out": smooth_label}, + attrs={"epsilon": float(epsilon)}) return smooth_label @@ -1948,8 +1957,8 @@ def class_center_sample(label, num_classes, num_samples, group=None): if in_dynamic_mode(): remapped_label, sampled_class_center = _C_ops.class_center_sample( label, 'num_classes', num_classes, 'num_samples', num_samples, - 'ring_id', ring_id, 'nranks', nranks, 'rank', rank, 'fix_seed', - seed is not None, 'seed', seed if seed is not None else 0) + 'ring_id', ring_id, 'nranks', nranks, 'rank', rank, 'fix_seed', seed + is not None, 'seed', seed if seed is not None else 0) return remapped_label, sampled_class_center check_variable_and_dtype(label, 'label', ['int64', 'int32'], @@ -1960,22 +1969,21 @@ def class_center_sample(label, num_classes, num_samples, group=None): dtype=label.dtype) sampled_class_center = helper.create_variable_for_type_inference( dtype=label.dtype) - helper.append_op( - type=op_type, - inputs={'Label': label}, - outputs={ - 'RemappedLabel': remapped_label, - 'SampledLocalClassCenter': sampled_class_center - }, - attrs={ - 'num_classes': num_classes, - 'num_samples': num_samples, - 'ring_id': ring_id, - 'nranks': nranks, - 'rank': rank, - 'fix_seed': seed is not None, - 'seed': seed if seed is not None else 0 - }) + helper.append_op(type=op_type, + inputs={'Label': label}, + outputs={ + 'RemappedLabel': remapped_label, + 'SampledLocalClassCenter': sampled_class_center + }, + attrs={ + 'num_classes': num_classes, + 'num_samples': num_samples, + 'ring_id': ring_id, + 'nranks': nranks, + 'rank': rank, + 'fix_seed': seed is not None, + 'seed': seed if seed is not None else 0 + }) return remapped_label, sampled_class_center @@ -2099,15 +2107,14 @@ def fold(x, paddings, "dilations", dilations) else: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="fold", - inputs={"X": x}, - outputs={"Y": out}, - attrs={ - "output_sizes": output_sizes, - "kernel_sizes": kernel_sizes, - "strides": strides, - "paddings": paddings, - "dilations": dilations - }) + helper.append_op(type="fold", + inputs={"X": x}, + outputs={"Y": out}, + attrs={ + "output_sizes": output_sizes, + "kernel_sizes": kernel_sizes, + "strides": strides, + "paddings": paddings, + "dilations": dilations + }) return out diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 419014daf64..26f07c2f9a1 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -79,8 +79,8 @@ def _update_padding_nd(padding, channel_last, num_dims): "Non-zero padding({}) in the batch or channel dimensions " "is not supported.".format(padding)) padding_algorithm = "EXPLICIT" - padding = _exclude_padding_in_batch_and_channel(padding, - channel_last) + padding = _exclude_padding_in_batch_and_channel( + padding, channel_last) if _is_symmetric_padding(padding, num_dims): padding = padding[0::2] # for padding like [pad_before, pad_after, pad_before, pad_after, ...] @@ -101,8 +101,8 @@ def _update_padding_nd(padding, channel_last, num_dims): padding = convert_to_list(padding, num_dims, 'padding') if not all([p >= 0 for p in padding]): raise ValueError( - "Invalid padding, all value should be larger than or equal to 0, but received: {}". - format(padding)) + "Invalid padding, all value should be larger than or equal to 0, but received: {}" + .format(padding)) return padding, padding_algorithm @@ -123,9 +123,10 @@ def _conv_nd(x, # Due to the poor performance of NHWC, we transpose the input to NCHW. if in_dygraph_mode() and op_type == "conv2d": - pre_bias = _C_ops.final_state_conv2d( - x, weight, stride, padding, padding_algorithm, groups, dilation, - data_format, False, -1, False) + pre_bias = _C_ops.final_state_conv2d(x, weight, stride, padding, + padding_algorithm, groups, + dilation, data_format, False, -1, + False) if bias is not None: channel_dim = channel_dim + len( x.shape) if channel_dim < 0 else channel_dim @@ -147,22 +148,23 @@ def _conv_nd(x, channel_dim = channel_dim + len( x.shape) if channel_dim < 0 else channel_dim tmp_bias = _C_ops.final_state_reshape( - bias, bias.shape + - [1 for i in range(len(x.shape) - channel_dim - 1)]) + bias, + bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)]) return _C_ops.final_state_add(pre_bias, tmp_bias) else: return pre_bias if in_dygraph_mode() and op_type == "conv3d": - pre_bias = _C_ops.final_state_conv3d( - x, weight, stride, padding, padding_algorithm, groups, dilation, - data_format, False, -1, False) + pre_bias = _C_ops.final_state_conv3d(x, weight, stride, padding, + padding_algorithm, groups, + dilation, data_format, False, -1, + False) if bias is not None: channel_dim = channel_dim + len( x.shape) if channel_dim < 0 else channel_dim tmp_bias = _C_ops.final_state_reshape( - bias, bias.shape + - [1 for i in range(len(x.shape) - channel_dim - 1)]) + bias, + bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)]) return _C_ops.final_state_add(pre_bias, tmp_bias) else: return pre_bias @@ -197,17 +199,22 @@ def _conv_nd(x, dtype = helper.input_dtype(input_param_name='x') pre_bias = helper.create_variable_for_type_inference(dtype) outputs = {"Output": [pre_bias]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) if bias is not None: out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [bias]}, - outputs={'Out': [out]}, - attrs={'axis': channel_dim, - 'use_mkldnn': use_mkldnn}) + helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [bias] + }, + outputs={'Out': [out]}, + attrs={ + 'axis': channel_dim, + 'use_mkldnn': use_mkldnn + }) else: out = pre_bias return out @@ -364,8 +371,8 @@ def conv1d(x, x.shape, num_channels)) if groups <= 0: raise ValueError( - "The groups of conv1d should be greater than 0. Received groups: {}". - format(groups)) + "The groups of conv1d should be greater than 0. Received groups: {}" + .format(groups)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," @@ -386,8 +393,8 @@ def conv1d(x, padding = [0] + padding else: raise ValueError( - "The size of padding's dimension should be 1 or 2. But got padding={}". - format(padding)) + "The size of padding's dimension should be 1 or 2. But got padding={}" + .format(padding)) stride = [1] + convert_to_list(stride, 1, 'stride') dilation = [1] + convert_to_list(dilation, 1, 'dilation') weight = unsqueeze(weight, axis=[-2]) @@ -395,8 +402,8 @@ def conv1d(x, l_type = "conv2d" # When "groups==num_channels and num_filters% num_channels == 0" using depthwise_conv2d has better performance - if (is_compiled_with_cuda() and num_channels == groups and - num_channels != 1 and num_filters % num_channels == 0): + if (is_compiled_with_cuda() and num_channels == groups and num_channels != 1 + and num_filters % num_channels == 0): l_type = 'depthwise_conv2d' use_cudnn = False @@ -437,8 +444,10 @@ def conv1d(x, dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) outputs = {"Output": [out]} - helper.append_op( - type=l_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=l_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) out = squeeze(out, axis=[squeeze_aixs]) @@ -591,8 +600,8 @@ def conv2d(x, x.shape, num_channels)) if groups <= 0: raise ValueError( - "The groups of conv2d should be greater than 0. Received groups: {}". - format(groups)) + "The groups of conv2d should be greater than 0. Received groups: {}" + .format(groups)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," @@ -606,8 +615,8 @@ def conv2d(x, cudnn_version = get_cudnn_version() - use_cudnn = True if (is_compiled_with_cuda() and - cudnn_version is not None) else False + use_cudnn = True if (is_compiled_with_cuda() + and cudnn_version is not None) else False # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) @@ -615,8 +624,8 @@ def conv2d(x, dilation = convert_to_list(dilation, 2, 'dilation') l_type = "conv2d" - if (num_channels == groups and num_channels != 1 and - num_filters % num_channels == 0): + if (num_channels == groups and num_channels != 1 + and num_filters % num_channels == 0): l_type = 'depthwise_conv2d' if is_compiled_with_rocm(): use_cudnn = True @@ -624,9 +633,10 @@ def conv2d(x, use_cudnn = False else: if in_dygraph_mode(): - pre_bias = _C_ops.final_state_conv2d( - x, weight, stride, padding, padding_algorithm, groups, dilation, - data_format, False, -1, False) + pre_bias = _C_ops.final_state_conv2d(x, weight, stride, padding, + padding_algorithm, groups, + dilation, data_format, False, + -1, False) if bias is not None: out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) return out @@ -642,8 +652,8 @@ def conv2d(x, else: l_type = 'conv2d' - if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")[ - "FLAGS_conv2d_disable_cudnn"]): + if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn") + ["FLAGS_conv2d_disable_cudnn"]): use_cudnn = False return _conv_nd(x, weight, bias, stride, padding, padding_algorithm, @@ -818,8 +828,8 @@ def conv1d_transpose(x, x.shape, num_channels)) if groups <= 0: raise ValueError( - "The groups of conv1d_transpose should be greater than 0. Received groups: {}". - format(groups)) + "The groups of conv1d_transpose should be greater than 0. Received groups: {}" + .format(groups)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," @@ -862,13 +872,13 @@ def conv1d_transpose(x, if len(output_padding) > 0 and output_padding[0] > stride[0]: raise ValueError( "The size of output_padding should not be greater than stride." - "But got output_padding={} and stride={}".format(output_padding[0], - stride[0])) + "But got output_padding={} and stride={}".format( + output_padding[0], stride[0])) op_type = 'conv2d_transpose' num_filters = weight.shape[1] - if (num_channels == groups and num_channels != 1 and num_filters == 1 and - not use_cudnn): + if (num_channels == groups and num_channels != 1 and num_filters == 1 + and not use_cudnn): op_type = 'depthwise_conv2d_transpose' use_cudnn = False @@ -905,8 +915,10 @@ def conv1d_transpose(x, dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) outputs = {"Output": [out]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) @@ -1079,8 +1091,8 @@ def conv2d_transpose(x, x.shape, num_channels)) if groups <= 0: raise ValueError( - "The groups of conv2d_transpose should be greater than 0. Received groups: {}". - format(groups)) + "The groups of conv2d_transpose should be greater than 0. Received groups: {}" + .format(groups)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," @@ -1089,8 +1101,8 @@ def conv2d_transpose(x, cudnn_version = get_cudnn_version() - use_cudnn = True if (is_compiled_with_cuda() and - cudnn_version is not None) else False + use_cudnn = True if (is_compiled_with_cuda() + and cudnn_version is not None) else False # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) @@ -1158,8 +1170,10 @@ def conv2d_transpose(x, helper = LayerHelper(op_type, **locals()) pre_bias = helper.create_variable_for_type_inference(x.dtype) outputs = {"Output": [pre_bias]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) if bias is not None: out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) @@ -1301,22 +1315,22 @@ def conv3d(x, "Received: {}.".format(x.shape, num_channels)) if groups <= 0: raise ValueError( - "The groups of conv3d should be greater than 0. Received groups: {}". - format(groups)) + "The groups of conv3d should be greater than 0. Received groups: {}" + .format(groups)) if num_channels % groups != 0: raise ValueError( "The number of input channels must be divisible by Attr(groups). " - "Received: number of channels({}), groups({}).".format(num_channels, - groups)) + "Received: number of channels({}), groups({}).".format( + num_channels, groups)) if num_filters % groups != 0: raise ValueError( "The number of filters must be divisible by Attr(groups). " - "Received: number of filters({}), groups({}).".format(num_filters, - groups)) + "Received: number of filters({}), groups({}).".format( + num_filters, groups)) cudnn_version = get_cudnn_version() - use_cudnn = True if (is_compiled_with_cuda() and - cudnn_version is not None) else False + use_cudnn = True if (is_compiled_with_cuda() + and cudnn_version is not None) else False padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3) stride = convert_to_list(stride, 3, 'stride') @@ -1500,13 +1514,13 @@ def conv3d_transpose(x, "Received: {}.".format(x.shape, num_channels)) if groups <= 0: raise ValueError( - "The groups of conv3d_transpose should be greater than 0. Received groups: {}". - format(groups)) + "The groups of conv3d_transpose should be greater than 0. Received groups: {}" + .format(groups)) if num_channels % groups != 0: raise ValueError( "The number of input channels must be divisible by Attr(groups). " - "Received: number of channels({}), groups({}).".format(num_channels, - groups)) + "Received: number of channels({}), groups({}).".format( + num_channels, groups)) padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3) stride = convert_to_list(stride, 3, 'stride') @@ -1531,8 +1545,8 @@ def conv3d_transpose(x, cudnn_version = get_cudnn_version() #TODO(LielinJiang): whether to use cudnn according to the version of cudnn - use_cudnn = True if (is_compiled_with_cuda() and - cudnn_version is not None) else False + use_cudnn = True if (is_compiled_with_cuda() + and cudnn_version is not None) else False op_type = 'conv3d_transpose' data_format_ = "NHWC" if channel_last else "NCHW" @@ -1576,8 +1590,10 @@ def conv3d_transpose(x, pre_bias = helper.create_variable_for_type_inference(x.dtype) outputs = {"Output": [pre_bias]} - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) if bias is not None: out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index 5a6bf4c0fa6..27bc2ef70bc 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -135,13 +135,14 @@ def diag_embed(input, offset=0, dim1=-2, dim2=-1): out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='diag_embed', - inputs={'Input': [input]}, - attrs={'offset': offset, - 'dim1': dim1, - 'dim2': dim2}, - outputs={'Out': [out]}) + helper.append_op(type='diag_embed', + inputs={'Input': [input]}, + attrs={ + 'offset': offset, + 'dim1': dim1, + 'dim2': dim2 + }, + outputs={'Out': [out]}) out.stop_gradient = True return out @@ -230,8 +231,10 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None): else: attrs['maxlen'] = maxlen - helper.append_op( - type='sequence_mask', inputs=inputs, outputs={'Y': out}, attrs=attrs) + helper.append_op(type='sequence_mask', + inputs=inputs, + outputs={'Y': out}, + attrs=attrs) out.stop_gradient = True return out @@ -311,11 +314,12 @@ def gather_tree(ids, parents): 'gather_tree') out = helper.create_variable_for_type_inference(dtype=ids.dtype) - helper.append_op( - type="gather_tree", - inputs={"Ids": ids, - "Parents": parents}, - outputs={"Out": out}) + helper.append_op(type="gather_tree", + inputs={ + "Ids": ids, + "Parents": parents + }, + outputs={"Out": out}) return out @@ -371,13 +375,12 @@ def temporal_shift(x, seg_num, shift_ratio=0.25, name=None, data_format="NCHW"): if not isinstance(seg_num, int): raise TypeError("seg_num must be int type.") - helper.append_op( - type="temporal_shift", - inputs={"X": x}, - outputs={"Out": out}, - attrs={ - "seg_num": seg_num, - "shift_ratio": shift_ratio, - "data_format": data_format - }) + helper.append_op(type="temporal_shift", + inputs={"X": x}, + outputs={"Out": out}, + attrs={ + "seg_num": seg_num, + "shift_ratio": shift_ratio, + "data_format": data_format + }) return out diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index 92b3a7054d4..01a5f991f42 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -20,6 +20,7 @@ from ...fluid.data_feeder import check_variable_and_dtype, check_dtype from paddle import _C_ops from paddle import in_dynamic_mode from ...fluid.framework import _in_legacy_dygraph, in_dygraph_mode + __all__ = [] @@ -107,12 +108,11 @@ def one_hot(x, num_classes, name=None): num_classes.stop_gradient = True inputs = {'X': x, 'depth_tensor': num_classes} attrs = {'allow_out_of_range': False} - helper.append_op( - type="one_hot_v2", - inputs=inputs, - attrs=attrs, - outputs={'Out': one_hot_out}, - stop_gradient=True) + helper.append_op(type="one_hot_v2", + inputs=inputs, + attrs=attrs, + outputs={'Out': one_hot_out}, + stop_gradient=True) return one_hot_out @@ -203,9 +203,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): if in_dygraph_mode(): return _C_ops.final_state_embedding(x, weight, padding_idx, sparse) elif _in_legacy_dygraph(): - return _C_ops.lookup_table_v2( - weight, x, 'is_sparse', sparse, 'is_distributed', False, - 'remote_prefetch', False, 'padding_idx', padding_idx) + return _C_ops.lookup_table_v2(weight, x, 'is_sparse', sparse, + 'is_distributed', False, + 'remote_prefetch', False, 'padding_idx', + padding_idx) else: helper = LayerHelper('embedding', **locals()) dtype = helper.input_dtype(input_param_name='weight') @@ -219,15 +220,16 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): tmp = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='lookup_table_v2', - inputs={'Ids': x, - 'W': weight}, - outputs={'Out': tmp}, - attrs={ - 'is_sparse': sparse, - 'is_distributed': is_distributed, - 'remote_prefetch': remote_prefetch, - 'padding_idx': padding_idx - }) + helper.append_op(type='lookup_table_v2', + inputs={ + 'Ids': x, + 'W': weight + }, + outputs={'Out': tmp}, + attrs={ + 'is_sparse': sparse, + 'is_distributed': is_distributed, + 'remote_prefetch': remote_prefetch, + 'padding_idx': padding_idx + }) return tmp diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index c0527a7a652..e6a3fdb464c 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -30,6 +30,7 @@ from paddle import _C_ops from paddle import in_dynamic_mode from paddle.framework import core from ...fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _non_static_mode, _current_expected_place + __all__ = [] @@ -79,10 +80,10 @@ def dice_loss(input, label, epsilon=0.00001, name=None): assert label.dtype in (paddle.int32, paddle.int64) assert len(input.shape) >= 2, \ "The rank of input should be greater than or equal to 2." - assert len(input.shape) == len(label.shape), ( - "The rank of input and label should be equal, " - "but received input: %d, label: %d." % - (len(input.shape), len(label.shape))) + assert len(input.shape) == len( + label.shape), ("The rank of input and label should be equal, " + "but received input: %d, label: %d." % + (len(input.shape), len(label.shape))) assert label.shape[-1] == 1, ("The last dimension of label should be 1, " "but received %d." % label.shape[-1]) assert input.shape[:-1] == label.shape[:-1], ( @@ -146,12 +147,13 @@ def log_loss(input, label, epsilon=1e-4, name=None): loss = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='log_loss', - inputs={'Predicted': [input], - 'Labels': [label]}, - outputs={'Loss': [loss]}, - attrs={'epsilon': epsilon}) + helper.append_op(type='log_loss', + inputs={ + 'Predicted': [input], + 'Labels': [label] + }, + outputs={'Loss': [loss]}, + attrs={'epsilon': epsilon}) return loss @@ -291,12 +293,13 @@ def fluid_softmax_with_cross_entropy(logits, if core.is_compiled_with_npu() or core.is_compiled_with_mlu(): backprop = helper.create_variable_for_type_inference(dtype=logits.dtype) outputs['Backprop'] = backprop - helper.append_op( - type='softmax_with_cross_entropy', - inputs={'Logits': logits, - 'Label': label}, - outputs=outputs, - attrs=attrs) + helper.append_op(type='softmax_with_cross_entropy', + inputs={ + 'Logits': logits, + 'Label': label + }, + outputs=outputs, + attrs=attrs) if return_softmax: return loss, softmax @@ -354,19 +357,22 @@ def npair_loss(anchor, positive, labels, l2_reg=0.002): labels = paddle.reshape(labels, shape=[batch_size, 1]) labels = paddle.tile(labels, repeat_times=[1, batch_size]) - labels = paddle.equal( - labels, paddle.transpose( - labels, perm=[1, 0])).astype('float32') + labels = paddle.equal(labels, paddle.transpose(labels, + perm=[1, + 0])).astype('float32') labels = labels / paddle.sum(labels, axis=1, keepdim=True) l2loss = paddle.mean(paddle.sum(paddle.square(anchor), 1)) \ + paddle.mean(paddle.sum(paddle.square(positive), 1)) l2loss = l2loss * Beta * l2_reg - similarity_matrix = paddle.matmul( - anchor, positive, transpose_x=False, transpose_y=True) - softmax_ce = fluid_softmax_with_cross_entropy( - logits=similarity_matrix, label=labels, soft_label=True) + similarity_matrix = paddle.matmul(anchor, + positive, + transpose_x=False, + transpose_y=True) + softmax_ce = fluid_softmax_with_cross_entropy(logits=similarity_matrix, + label=labels, + soft_label=True) cross_entropy = paddle.sum(labels * softmax_ce, 0) celoss = paddle.mean(cross_entropy) @@ -418,16 +424,17 @@ def square_error_cost(input, label): 'square_error_cost') helper = LayerHelper('square_error_cost', **locals()) minus_out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='elementwise_sub', - inputs={'X': [input], - 'Y': [label]}, - outputs={'Out': [minus_out]}) + helper.append_op(type='elementwise_sub', + inputs={ + 'X': [input], + 'Y': [label] + }, + outputs={'Out': [minus_out]}) square_out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='square', inputs={'X': [minus_out]}, - outputs={'Out': [square_out]}) + helper.append_op(type='square', + inputs={'X': [minus_out]}, + outputs={'Out': [square_out]}) return square_out @@ -513,18 +520,16 @@ def edit_distance(input, erased_input = helper.create_variable_for_type_inference(dtype="int64") erased_label = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="sequence_erase", - inputs={"X": [input]}, - outputs={"Out": [erased_input]}, - attrs={"tokens": ignored_tokens}) + helper.append_op(type="sequence_erase", + inputs={"X": [input]}, + outputs={"Out": [erased_input]}, + attrs={"tokens": ignored_tokens}) input = erased_input - helper.append_op( - type="sequence_erase", - inputs={"X": [label]}, - outputs={"Out": [erased_label]}, - attrs={"tokens": ignored_tokens}) + helper.append_op(type="sequence_erase", + inputs={"X": [label]}, + outputs={"Out": [erased_label]}, + attrs={"tokens": ignored_tokens}) label = erased_label this_inputs = {"Hyps": [input], "Refs": [label]} @@ -535,17 +540,21 @@ def edit_distance(input, # edit distance op edit_distance_out = helper.create_variable_for_type_inference(dtype="int64") sequence_num = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="edit_distance", - inputs=this_inputs, - outputs={"Out": [edit_distance_out], - "SequenceNum": [sequence_num]}, - attrs={"normalized": normalized}) + helper.append_op(type="edit_distance", + inputs=this_inputs, + outputs={ + "Out": [edit_distance_out], + "SequenceNum": [sequence_num] + }, + attrs={"normalized": normalized}) return edit_distance_out, sequence_num -def binary_cross_entropy(input, label, weight=None, reduction='mean', +def binary_cross_entropy(input, + label, + weight=None, + reduction='mean', name=None): """ This op measures the binary_cross_entropy loss between input predictions ``input`` @@ -650,13 +659,12 @@ def binary_cross_entropy(input, label, weight=None, reduction='mean', sub_name = name if weight is None and reduction == 'none' else None helper = LayerHelper("binary_cross_entropy", name=sub_name) out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='bce_loss', - inputs={ - 'X': [input], - 'Label': [label], - }, - outputs={'Out': [out]}) + helper.append_op(type='bce_loss', + inputs={ + 'X': [input], + 'Label': [label], + }, + outputs={'Out': [out]}) if weight is not None: if isinstance(weight, paddle.static.Variable): @@ -765,16 +773,16 @@ def binary_cross_entropy_with_logits(logit, if _non_static_mode(): if in_dygraph_mode(): - one = _C_ops.final_state_full([1], - float(1.0), core.VarDesc.VarType.FP32, + one = _C_ops.final_state_full([1], float(1.0), + core.VarDesc.VarType.FP32, _current_expected_place()) out = _C_ops.final_state_sigmoid_cross_entropy_with_logits( logit, label, False, -100) else: one = _varbase_creator(dtype=logit.dtype) - _C_ops.fill_constant(one, 'value', - float(1.0), 'force_cpu', False, 'dtype', - one.dtype, 'str_value', '1.0', 'shape', [1]) + _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False, + 'dtype', one.dtype, 'str_value', '1.0', + 'shape', [1]) out = _C_ops.sigmoid_cross_entropy_with_logits(logit, label) if pos_weight is not None: log_weight = _C_ops.elementwise_add( @@ -914,9 +922,11 @@ def hsigmoid_loss(input, """ if _non_static_mode(): - out, _, _ = _C_ops.hierarchical_sigmoid( - input, weight, label, path_table, path_code, bias, 'num_classes', - num_classes, 'is_sparse', is_sparse, 'remote_prefetch', is_sparse) + out, _, _ = _C_ops.hierarchical_sigmoid(input, weight, label, + path_table, path_code, bias, + 'num_classes', num_classes, + 'is_sparse', is_sparse, + 'remote_prefetch', is_sparse) return out check_variable_and_dtype(input, 'input', ['float32', 'float64'], @@ -954,11 +964,10 @@ def hsigmoid_loss(input, pre_out = helper.create_variable_for_type_inference(input.dtype) outputs = {"Out": out, "PreOut": pre_out, "W_Out": weight} - helper.append_op( - type="hierarchical_sigmoid", - inputs=inputs, - outputs=outputs, - attrs=attrs) + helper.append_op(type="hierarchical_sigmoid", + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -1033,13 +1042,16 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): dtype=helper.input_dtype()) out = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) - helper.append_op( - type='huber_loss', - inputs={'X': input, - 'Y': label}, - outputs={'Out': out, - 'Residual': residual}, - attrs={'delta': delta}) + helper.append_op(type='huber_loss', + inputs={ + 'X': input, + 'Y': label + }, + outputs={ + 'Out': out, + 'Residual': residual + }, + attrs={'delta': delta}) if reduction not in ['sum', 'mean', 'none']: raise ValueError( @@ -1148,25 +1160,24 @@ def margin_ranking_loss(input, result_out = helper.create_variable_for_type_inference(input.dtype) if reduction == 'none': - helper.append_op( - type="relu", inputs={"X": out}, outputs={"Out": result_out}) + helper.append_op(type="relu", + inputs={"X": out}, + outputs={"Out": result_out}) return result_out elif reduction == 'sum': out = paddle.nn.functional.relu(out) attrs = {"dim": [0], "keep_dim": False, "reduce_all": True} - helper.append_op( - type="reduce_sum", - inputs={"X": out}, - outputs={"Out": result_out}, - attrs=attrs) + helper.append_op(type="reduce_sum", + inputs={"X": out}, + outputs={"Out": result_out}, + attrs=attrs) return result_out elif reduction == 'mean': out = paddle.nn.functional.relu(out) - helper.append_op( - type="mean", - inputs={"X": out}, - outputs={"Out": result_out}, - attrs={}) + helper.append_op(type="mean", + inputs={"X": out}, + outputs={"Out": result_out}, + attrs={}) return result_out @@ -1233,8 +1244,11 @@ def l1_loss(input, label, reduction='mean', name=None): "received %s, which is not allowed." % reduction) if in_dygraph_mode(): - unreduced = _elementwise_op_in_dygraph( - input, label, axis=-1, act='abs', op_name='elementwise_sub') + unreduced = _elementwise_op_in_dygraph(input, + label, + axis=-1, + act='abs', + op_name='elementwise_sub') if reduction == 'mean': return _C_ops.final_state_mean_all(unreduced) elif reduction == 'sum': @@ -1243,8 +1257,11 @@ def l1_loss(input, label, reduction='mean', name=None): else: return unreduced elif in_dynamic_mode(): - unreduced = _elementwise_op_in_dygraph( - input, label, axis=-1, act='abs', op_name='elementwise_sub') + unreduced = _elementwise_op_in_dygraph(input, + label, + axis=-1, + act='abs', + op_name='elementwise_sub') if reduction == 'mean': return _C_ops.mean(unreduced) elif reduction == 'sum': @@ -1253,10 +1270,12 @@ def l1_loss(input, label, reduction='mean', name=None): else: return unreduced - check_variable_and_dtype( - input, 'input', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') - check_variable_and_dtype( - label, 'label', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') + check_variable_and_dtype(input, 'input', + ['float32', 'float64', 'int32', 'int64'], + 'l1_loss') + check_variable_and_dtype(label, 'label', + ['float32', 'float64', 'int32', 'int64'], + 'l1_loss') if reduction == 'sum': unreduced = paddle.fluid.layers.elementwise_sub(input, label, act='abs') @@ -1265,8 +1284,10 @@ def l1_loss(input, label, reduction='mean', name=None): unreduced = paddle.fluid.layers.elementwise_sub(input, label, act='abs') return paddle.mean(unreduced, name=name) else: - return paddle.fluid.layers.elementwise_sub( - input, label, act='abs', name=name) + return paddle.fluid.layers.elementwise_sub(input, + label, + act='abs', + name=name) def nll_loss(input, @@ -1328,8 +1349,8 @@ def nll_loss(input, input_shape = list(input.shape) input_dims = len(input_shape) if input_dims < 2: - raise ValueError('Expected 2 or more dimensions (got {})'.format( - input_dims)) + raise ValueError( + 'Expected 2 or more dimensions (got {})'.format(input_dims)) n = input_shape[0] c = input_shape[1] if in_dygraph_mode(): @@ -1374,8 +1395,10 @@ def nll_loss(input, total_weight = helper.create_variable_for_type_inference(dtype=input.dtype) outputs = {'Out': out, 'Total_weight': total_weight} - helper.append_op( - type='nll_loss', inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type='nll_loss', + inputs=inputs, + outputs=outputs, + attrs=attrs) if input_dims != 2 and input_dims != 4 and reduction == 'none': out = reshape(out, shape=out_shape) @@ -1489,12 +1512,13 @@ def kl_div(input, label, reduction='mean', name=None): fluid.data_feeder.check_type(reduction, 'reduction', str, 'kl_div') loss = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='kldiv_loss', - inputs={'X': input, - 'Target': label}, - outputs={'Loss': loss}, - attrs={'reduction': 'none'}) + helper.append_op(type='kldiv_loss', + inputs={ + 'X': input, + 'Target': label + }, + outputs={'Loss': loss}, + attrs={'reduction': 'none'}) if reduction == 'mean': loss = paddle.mean(loss) @@ -1570,8 +1594,8 @@ def mse_loss(input, label, reduction='mean', name=None): if reduction == 'none': return paddle.square(paddle.subtract(input, label), name=name) elif reduction == 'mean': - return paddle.mean( - paddle.square(paddle.subtract(input, label)), name=name) + return paddle.mean(paddle.square(paddle.subtract(input, label)), + name=name) else: return paddle.sum(paddle.square(paddle.subtract(input, label)), name=name) @@ -1924,22 +1948,25 @@ def margin_cross_entropy(logits, check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'margin_cross_entropy') - helper.append_op( - type=op_type, - inputs={'Logits': logits, - 'Label': label}, - outputs={'Softmax': softmax, - 'Loss': loss}, - attrs={ - 'return_softmax': return_softmax, - 'ring_id': ring_id, - 'rank': rank, - 'nranks': nranks, - 'margin1': margin1, - 'margin2': margin2, - 'margin3': margin3, - 'scale': scale, - }) + helper.append_op(type=op_type, + inputs={ + 'Logits': logits, + 'Label': label + }, + outputs={ + 'Softmax': softmax, + 'Loss': loss + }, + attrs={ + 'return_softmax': return_softmax, + 'ring_id': ring_id, + 'rank': rank, + 'nranks': nranks, + 'margin1': margin1, + 'margin2': margin2, + 'margin3': margin3, + 'scale': scale, + }) if reduction == 'mean': loss = paddle.mean(loss) @@ -1956,9 +1983,9 @@ def margin_cross_entropy(logits, since="2.0.0", update_to="paddle.nn.functional.cross_entropy", level=1, - reason=( - 'Please notice that behavior of "paddle.nn.functional.softmax_with_cross_entropy" ' - 'and "paddle.nn.functional.cross_entropy" is different.')) + reason= + ('Please notice that behavior of "paddle.nn.functional.softmax_with_cross_entropy" ' + 'and "paddle.nn.functional.cross_entropy" is different.')) def softmax_with_cross_entropy(logits, label, soft_label=False, @@ -2247,8 +2274,8 @@ def cross_entropy(input, if _non_static_mode(): if soft_label == False: - valid_label = paddle.cast( - label != ignore_index, dtype=label.dtype) * label + valid_label = paddle.cast(label != ignore_index, + dtype=label.dtype) * label label_min = paddle.min(valid_label) label_max = paddle.max(valid_label) if label_min < 0: @@ -2281,11 +2308,11 @@ def cross_entropy(input, # weight's shape is C, where C is class num. # for 1d case: label's shape is [N,C], weight_gather's shape is N. # for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W]. - weight_gather = paddle.matmul( - x=paddle.cast(label, weight.dtype), - y=weight, - transpose_x=False, - transpose_y=True) + weight_gather = paddle.matmul(x=paddle.cast( + label, weight.dtype), + y=weight, + transpose_x=False, + transpose_y=True) out_shape = list(out.shape) weight_gather_reshape = reshape(weight_gather, shape=out_shape) out = paddle.cast(out, weight_gather_reshape.dtype) @@ -2318,8 +2345,8 @@ def cross_entropy(input, weight_gather = _C_ops.elementwise_mul(weight_gather, ignore_weight_mask) input_shape = list(label.shape) - weight_gather_reshape = reshape( - weight_gather, shape=input_shape) + weight_gather_reshape = reshape(weight_gather, + shape=input_shape) out = paddle.cast(out, weight_gather_reshape.dtype) out = _C_ops.elementwise_mul(out, weight_gather_reshape) @@ -2390,12 +2417,13 @@ def cross_entropy(input, if core.is_compiled_with_npu() or core.is_compiled_with_mlu(): backprop = helper.create_variable_for_type_inference(dtype=input.dtype) outputs['Backprop'] = backprop - helper.append_op( - type='softmax_with_cross_entropy', - inputs={'Logits': input, - 'Label': label}, - outputs=outputs, - attrs=attrs) + helper.append_op(type='softmax_with_cross_entropy', + inputs={ + 'Logits': input, + 'Label': label + }, + outputs=outputs, + attrs=attrs) if weight is not None: check_variable_and_dtype(weight, 'weight', ['float32', 'float64'], @@ -2407,11 +2435,10 @@ def cross_entropy(input, # weight's shape is C, where C is class num. # for 1d case: label's shape is [N,C], weight_gather's shape is N. # for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W]. - weight_gather = paddle.matmul( - x=paddle.cast(label, weight.dtype), - y=weight, - transpose_x=False, - transpose_y=True) + weight_gather = paddle.matmul(x=paddle.cast(label, weight.dtype), + y=weight, + transpose_x=False, + transpose_y=True) out_shape = list(out.shape) weight_gather_reshape = reshape(weight_gather, shape=out_shape) @@ -2424,8 +2451,7 @@ def cross_entropy(input, .format(input.shape[axis], weight.shape[-1])) valid_label = paddle.multiply( - paddle.cast( - label != ignore_index, dtype=label.dtype), label) + paddle.cast(label != ignore_index, dtype=label.dtype), label) ignore_weight_mask = paddle.cast((label != ignore_index), input.dtype) if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[ @@ -2567,14 +2593,14 @@ def sigmoid_focal_loss(logit, normalizer_dims = len(normalizer_shape) if normalizer_dims > 1: raise ValueError( - "Expected one dimension of normalizer in sigmoid_focal_loss but got {}.". - format(normalizer_dims)) + "Expected one dimension of normalizer in sigmoid_focal_loss but got {}." + .format(normalizer_dims)) if _non_static_mode(): one = _varbase_creator(dtype=logit.dtype) - _C_ops.fill_constant(one, 'value', - float(1.0), 'force_cpu', False, 'dtype', one.dtype, - 'str_value', '1.0', 'shape', logit.shape) + _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False, + 'dtype', one.dtype, 'str_value', '1.0', 'shape', + logit.shape) if in_dygraph_mode(): loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits( logit, label, False, -100) @@ -2583,21 +2609,19 @@ def sigmoid_focal_loss(logit, pred = _C_ops.sigmoid(logit) p_t = _C_ops.elementwise_add( _C_ops.elementwise_mul(pred, label), - _C_ops.elementwise_mul( - _C_ops.elementwise_sub(one, pred), - _C_ops.elementwise_sub(one, label))) + _C_ops.elementwise_mul(_C_ops.elementwise_sub(one, pred), + _C_ops.elementwise_sub(one, label))) alpha = fluid.dygraph.base.to_variable([alpha], dtype=loss.dtype) alpha_t = _C_ops.elementwise_add( _C_ops.elementwise_mul(alpha, label), - _C_ops.elementwise_mul( - _C_ops.elementwise_sub(one, alpha), - _C_ops.elementwise_sub(one, label))) + _C_ops.elementwise_mul(_C_ops.elementwise_sub(one, alpha), + _C_ops.elementwise_sub(one, label))) loss = _C_ops.elementwise_mul(alpha_t, loss) gamma = fluid.dygraph.base.to_variable([gamma], dtype=loss.dtype) - gamma_t = _C_ops.elementwise_pow( - _C_ops.elementwise_sub(one, p_t), gamma) + gamma_t = _C_ops.elementwise_pow(_C_ops.elementwise_sub(one, p_t), + gamma) loss = _C_ops.elementwise_mul(gamma_t, loss) if normalizer is not None: diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index f64e731342e..7bc9f105cac 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -86,8 +86,8 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): if _in_legacy_dygraph(): eps = fluid.dygraph.base.to_variable([epsilon], dtype=x.dtype) - out = _C_ops.p_norm(x, 'axis', axis, 'porder', - float(p), 'keepdim', True, 'epsilon', epsilon) + out = _C_ops.p_norm(x, 'axis', axis, 'porder', float(p), 'keepdim', + True, 'epsilon', epsilon) return x / _C_ops.elementwise_max(out, eps) check_type(p, 'p', (float, int), 'normalize') @@ -96,8 +96,8 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): 'normalize') if len(x.shape) == 1 and axis != 0 and axis != -1: raise ValueError( - "Axis must be 0 or -1 when x is a 1-D tensor, but received axis = {}". - format(axis)) + "Axis must be 0 or -1 when x is a 1-D tensor, but received axis = {}" + .format(axis)) attrs = { 'axis': axis, @@ -107,8 +107,10 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): } helper = LayerHelper('p_norm', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='p_norm', inputs={'X': x}, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='p_norm', + inputs={'X': x}, + outputs={'Out': out}, + attrs=attrs) eps = out.block.create_var(dtype=out.dtype) eps = paddle.full(shape=[1], fill_value=epsilon, dtype=out.dtype) return paddle.divide(x, paddle.maximum(out, eps), name=name) @@ -192,8 +194,8 @@ def batch_norm(x, data_format, not training, use_global_stats, trainable_statistics, False) - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=None) + return dygraph_utils._append_activation_in_dygraph(batch_norm_out, + act=None) elif _in_legacy_dygraph(): # for dygraph need tuple @@ -206,8 +208,8 @@ def batch_norm(x, x, weight, bias, running_mean, running_var, None, mean_out, variance_out, *attrs) - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=None) + return dygraph_utils._append_activation_in_dygraph(batch_norm_out, + act=None) check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], 'BatchNorm') @@ -235,8 +237,8 @@ def batch_norm(x, helper = LayerHelper('batch_norm', **locals()) param_dtype = x.dtype if x.dtype != 'float16' else 'float32' - saved_mean = helper.create_variable_for_type_inference( - dtype=param_dtype, stop_gradient=True) + saved_mean = helper.create_variable_for_type_inference(dtype=param_dtype, + stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=param_dtype, stop_gradient=True) batch_norm_out = helper.create_variable_for_type_inference(x.dtype) @@ -255,8 +257,10 @@ def batch_norm(x, dtype=x.dtype, stop_gradient=True) outputs["ReserveSpace"] = [reserve_space] - helper.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) return helper.append_activation(batch_norm_out) @@ -315,8 +319,8 @@ def layer_norm(x, str_normalized_shape = str(normalized_shape) raise ValueError('Given normalized_shape is ' + str_normalized_shape + ', expected input with shape [*, ' + - str_normalized_shape[ - 1:] + ', but got input shape ' + str(input_shape)) + str_normalized_shape[1:] + ', but got input shape ' + + str(input_shape)) if in_dygraph_mode(): pre_act, _, _, = _C_ops.final_state_layer_norm(x, weight, bias, epsilon, @@ -344,22 +348,23 @@ def layer_norm(x, helper = LayerHelper('layer_norm', **locals()) dtype = x.dtype - mean_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) - variance_out = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True) + mean_out = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) + variance_out = helper.create_variable_for_type_inference(dtype=dtype, + stop_gradient=True) layer_norm_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="layer_norm", - inputs=inputs, - outputs={ - "Y": layer_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={"epsilon": epsilon, - "begin_norm_axis": begin_norm_axis}) + helper.append_op(type="layer_norm", + inputs=inputs, + outputs={ + "Y": layer_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis + }) return helper.append_activation(layer_norm_out) @@ -426,8 +431,8 @@ def instance_norm(x, inputs = {"X": [x]} helper = LayerHelper('instance_norm', **locals()) - saved_mean = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True) + saved_mean = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=x.dtype, stop_gradient=True) instance_norm_out = helper.create_variable_for_type_inference(x.dtype) @@ -438,8 +443,10 @@ def instance_norm(x, "SavedVariance": [saved_variance] } - helper.append_op( - type="instance_norm", inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type="instance_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) return instance_norm_out @@ -516,8 +523,8 @@ def local_response_norm(x, for i, sz in enumerate(sizes): if not sz > 0 and i > 0: raise ValueError("Expected every dim's size to be larger than 0, " - "but the size of the {}-th dim is {}".format(i, - sz)) + "but the size of the {}-th dim is {}".format( + i, sz)) channel_last = True if data_format[-1] == "C" else False @@ -538,24 +545,26 @@ def local_response_norm(x, pad4d_shape = [size // 2, (size - 1) // 2, 0, 0] pool2d_shape = (1, size) reshape_shape = [ - sizes[0], 1, sizes[1], int(sum_sizes / (sizes[1] * sizes[-1])), - sizes[-1] + sizes[0], 1, sizes[1], + int(sum_sizes / (sizes[1] * sizes[-1])), sizes[-1] ] pad5d_shape = [size // 2, (size - 1) // 2, 0, 0, 0, 0] pool3d_shape = (1, 1, size) if dim == 3: div = paddle.nn.functional.pad(div, pad=pad4d_shape) - div = paddle.nn.functional.avg_pool2d( - div, kernel_size=pool2d_shape, stride=1) + div = paddle.nn.functional.avg_pool2d(div, + kernel_size=pool2d_shape, + stride=1) div = paddle.squeeze(div, axis=1) else: div = paddle.reshape(div, shape=reshape_shape) div = paddle.nn.functional.pad(div, pad=pad5d_shape, data_format='NCDHW') - div = paddle.nn.functional.avg_pool3d( - div, kernel_size=pool3d_shape, stride=1) + div = paddle.nn.functional.avg_pool3d(div, + kernel_size=pool3d_shape, + stride=1) div = paddle.reshape(paddle.squeeze(div, axis=1), sizes) div = paddle.scale(div, scale=alpha, bias=k) diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 6a573005f45..f79a43fbc03 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -38,16 +38,18 @@ def _check_input(x, dimension): def _check_instance(x, x_name, types=(int, float)): if not isinstance(x, types): - raise ValueError("Excepted {} type for {} but received type: {}. ". - format(types, x_name, type(x))) + raise ValueError( + "Excepted {} type for {} but received type: {}. ".format( + types, x_name, type(x))) def _check_value_limitation(x, x_name, min_limit=1e-3): + def _check_value(x, x_name, min_limit=1e-3): if isinstance(x, int) and min_limit is not None and x < min_limit: raise ValueError( - "Excepted the input {} to be greater than {} but received x: {}. ". - format(x_name, min_limit, x)) + "Excepted the input {} to be greater than {} but received x: {}. " + .format(x_name, min_limit, x)) for ele in x: _check_value(ele, x_name) @@ -118,8 +120,8 @@ def _update_padding_nd(padding, num_dims, channel_last=False, ceil_mode=False): "Non-zero padding({}) in the batch or channel dimensions " "is not supported.".format(padding)) padding_algorithm = "EXPLICIT" - padding = _exclude_padding_in_batch_and_channel(padding, - channel_last) + padding = _exclude_padding_in_batch_and_channel( + padding, channel_last) if utils._is_symmetric_padding(padding, num_dims): padding = padding[0::2] # for padding like [pad_before, pad_after, pad_before, pad_after, ...] @@ -149,8 +151,8 @@ def _expand_low_nd_padding(padding): padding = [0] + padding else: raise ValueError( - "The size of padding's dimmention should be 1 or 2. But got padding={}". - format(padding)) + "The size of padding's dimmention should be 1 or 2. But got padding={}" + .format(padding)) return padding @@ -226,19 +228,22 @@ def avg_pool1d(x, _check_value_limitation(stride, "stride", min_limit=1e-3) channel_last = _channel_last("NCL", 1) - padding, padding_algorithm = _update_padding_nd( - padding, 1, channel_last=channel_last, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + 1, + channel_last=channel_last, + ceil_mode=ceil_mode) # use 2d to implenment 1d should expand padding in advance. padding = _expand_low_nd_padding(padding) if in_dynamic_mode(): - output = _C_ops.pool2d( - x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', - False, 'strides', stride, 'paddings', padding, 'padding_algorithm', - padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, - 'use_mkldnn', False, 'exclusive', exclusive, 'data_format', - data_format) + output = _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', kernel_size, + 'global_pooling', False, 'strides', stride, + 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'use_cudnn', True, + 'ceil_mode', ceil_mode, 'use_mkldnn', False, + 'exclusive', exclusive, 'data_format', + data_format) return squeeze(output, [2]) op_type = 'pool2d' @@ -246,23 +251,22 @@ def avg_pool1d(x, dtype = helper.input_dtype(input_param_name='x') pool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": x}, - outputs={"Out": pool_out}, - attrs={ - "pooling_type": 'avg', - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": exclusive, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": x}, + outputs={"Out": pool_out}, + attrs={ + "pooling_type": 'avg', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": exclusive, + "data_format": data_format, + }) return squeeze(pool_out, [2]) @@ -343,21 +347,25 @@ def avg_pool2d(x, _check_value_limitation(stride, "stride", min_limit=1e-3) channel_last = _channel_last(data_format, 2) - padding, padding_algorithm = _update_padding_nd( - padding, 2, channel_last, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + 2, + channel_last, + ceil_mode=ceil_mode) if in_dygraph_mode() or _in_legacy_dygraph(): if in_dygraph_mode(): - output = _C_ops.final_state_pool2d( - x, kernel_size, stride, padding, ceil_mode, exclusive, - data_format, 'avg', False, False, padding_algorithm) + output = _C_ops.final_state_pool2d(x, kernel_size, stride, padding, + ceil_mode, exclusive, + data_format, 'avg', False, False, + padding_algorithm) else: - output = _C_ops.pool2d( - x, 'pooling_type', 'avg', 'ksize', kernel_size, - 'global_pooling', False, 'padding_algorithm', padding_algorithm, - 'strides', stride, 'paddings', padding, 'use_cudnn', True, - 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', - exclusive, 'data_format', data_format) + output = _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', + kernel_size, 'global_pooling', False, + 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, + 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', exclusive, + 'data_format', data_format) if divisor_override is None: return output else: @@ -370,23 +378,22 @@ def avg_pool2d(x, dtype = helper.input_dtype(input_param_name='x') pool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": x}, - outputs={"Out": pool_out}, - attrs={ - "pooling_type": "avg", - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": exclusive, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": x}, + outputs={"Out": pool_out}, + attrs={ + "pooling_type": "avg", + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": exclusive, + "data_format": data_format, + }) if divisor_override is None: return pool_out @@ -467,24 +474,28 @@ def avg_pool3d(x, stride = utils.convert_to_list(stride, 3, 'pool_stride') channel_last = _channel_last(data_format, 3) - padding, padding_algorithm = _update_padding_nd( - padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + 3, + channel_last=channel_last, + ceil_mode=ceil_mode) _check_value_limitation(kernel_size, "kernel_size", min_limit=1e-3) _check_value_limitation(stride, "stride", min_limit=1e-3) if in_dygraph_mode() or _in_legacy_dygraph(): if in_dygraph_mode(): - output = _C_ops.final_state_pool3d( - x, kernel_size, stride, padding, ceil_mode, exclusive, - data_format, 'avg', False, False, padding_algorithm) + output = _C_ops.final_state_pool3d(x, kernel_size, stride, padding, + ceil_mode, exclusive, + data_format, 'avg', False, False, + padding_algorithm) if _in_legacy_dygraph(): - output = _C_ops.pool3d( - x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', - stride, 'paddings', padding, 'global_pooling', False, - 'padding_algorithm', padding_algorithm, 'use_cudnn', True, - 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', - exclusive, 'data_format', data_format) + output = _C_ops.pool3d(x, 'pooling_type', 'avg', 'ksize', + kernel_size, 'strides', stride, 'paddings', + padding, 'global_pooling', False, + 'padding_algorithm', padding_algorithm, + 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', exclusive, + 'data_format', data_format) if divisor_override is None: return output else: @@ -499,23 +510,22 @@ def avg_pool3d(x, pool_out = helper.create_variable_for_type_inference(dtype) outputs = {"Out": pool_out} - helper.append_op( - type=op_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": 'avg', - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": exclusive, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'avg', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": exclusive, + "data_format": data_format, + }) if divisor_override is None: return pool_out @@ -591,8 +601,9 @@ def max_pool1d(x, else: stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride') - padding, padding_algorithm = _update_padding_nd( - padding, 1, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + 1, + ceil_mode=ceil_mode) # use 2d to implenment 1d should expand padding in advance. padding = _expand_low_nd_padding(padding) @@ -602,12 +613,13 @@ def max_pool1d(x, pool_out = _C_ops.final_state_max_pool2d_with_index( x, kernel_size, stride, padding, False, False) return (squeeze(pool_out[0], [2]), - squeeze(pool_out[1], - [2])) if return_mask else squeeze(pool_out[0], [2]) + squeeze(pool_out[1], [2])) if return_mask else squeeze( + pool_out[0], [2]) else: - pool_out = _C_ops.final_state_pool2d( - x, kernel_size, stride, padding, ceil_mode, True, data_format, - 'max', False, False, padding_algorithm) + pool_out = _C_ops.final_state_pool2d(x, kernel_size, stride, + padding, ceil_mode, True, + data_format, 'max', False, + False, padding_algorithm) return squeeze(pool_out, [2]) if _in_legacy_dygraph(): @@ -619,15 +631,16 @@ def max_pool1d(x, 'use_mkldnn', False, 'exclusive', True, 'data_format', data_format) return (squeeze(pool_out[0], [2]), - squeeze(pool_out[1], - [2])) if return_mask else squeeze(pool_out[0], [2]) + squeeze(pool_out[1], [2])) if return_mask else squeeze( + pool_out[0], [2]) else: - pool_out = _C_ops.pool2d( - x, 'pooling_type', 'max', 'ksize', kernel_size, - 'global_pooling', False, 'padding_algorithm', padding_algorithm, - 'strides', stride, 'paddings', padding, 'use_cudnn', True, - 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, - 'data_format', data_format) + pool_out = _C_ops.pool2d(x, 'pooling_type', 'max', 'ksize', + kernel_size, 'global_pooling', False, + 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, + 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) return squeeze(pool_out, [2]) op_type = 'max_pool2d_with_index' if return_mask else "pool2d" @@ -637,23 +650,22 @@ def max_pool1d(x, mask = helper.create_variable_for_type_inference('int32') outputs = {"Out": pool_out, "Mask": mask} - helper.append_op( - type=op_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": 'max', - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": True, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": True, + "data_format": data_format, + }) return (squeeze(pool_out, [2]), squeeze(mask, [2])) if return_mask else squeeze(pool_out, [2]) @@ -663,8 +675,8 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size): input_size = x.shape default_size = [] for d in range(len(kernel_size)): - default_size.append((input_size[-len(kernel_size) + d] - 1) * stride[d] - + kernel_size[d] - 2 * padding[d]) + default_size.append((input_size[-len(kernel_size) + d] - 1) * + stride[d] + kernel_size[d] - 2 * padding[d]) if output_size is None: ret = default_size else: @@ -674,14 +686,15 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size): raise ValueError( "output_size should be a sequence containing " "{} or {} elements, but it has a length of '{}'".format( - len(kernel_size), len(kernel_size) + 2, len(output_size))) + len(kernel_size), + len(kernel_size) + 2, len(output_size))) for d in range(len(kernel_size)): min_size = default_size[d] - stride[d] max_size = default_size[d] + stride[d] if not (min_size < output_size[d] < max_size): raise ValueError( - 'invalid output_size "{}" (dim {} must be between {} and {})'. - format(output_size, d, min_size, max_size)) + 'invalid output_size "{}" (dim {} must be between {} and {})' + .format(output_size, d, min_size, max_size)) ret = output_size return ret @@ -781,18 +794,19 @@ def max_unpool1d(x, dtype = helper.input_dtype(input_param_name="x") unpool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": x, - "Indices": indices}, - outputs={"Out": unpool_out}, - attrs={ - "unpooling_type": "max", - "ksize": kernel_size, - "strides": stride, - "paddings": padding, - "output_size": output_size - }) + helper.append_op(type=op_type, + inputs={ + "X": x, + "Indices": indices + }, + outputs={"Out": unpool_out}, + attrs={ + "unpooling_type": "max", + "ksize": kernel_size, + "strides": stride, + "paddings": padding, + "output_size": output_size + }) return squeeze(unpool_out, [2]) @@ -896,18 +910,19 @@ def max_unpool2d(x, dtype = helper.input_dtype(input_param_name="x") unpool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": x, - "Indices": indices}, - outputs={"Out": unpool_out}, - attrs={ - "unpooling_type": "max", - "ksize": kernel_size, - "strides": stride, - "paddings": padding, - "output_size": output_size - }) + helper.append_op(type=op_type, + inputs={ + "X": x, + "Indices": indices + }, + outputs={"Out": unpool_out}, + attrs={ + "unpooling_type": "max", + "ksize": kernel_size, + "strides": stride, + "paddings": padding, + "output_size": output_size + }) return unpool_out @@ -1008,18 +1023,19 @@ def max_unpool3d(x, dtype = helper.input_dtype(input_param_name="x") unpool_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type=op_type, - inputs={"X": x, - "Indices": indices}, - outputs={"Out": unpool_out}, - attrs={ - "unpooling_type": "max", - "ksize": kernel_size, - "strides": stride, - "paddings": padding, - "output_size": output_size - }) + helper.append_op(type=op_type, + inputs={ + "X": x, + "Indices": indices + }, + outputs={"Out": unpool_out}, + attrs={ + "unpooling_type": "max", + "ksize": kernel_size, + "strides": stride, + "paddings": padding, + "output_size": output_size + }) return unpool_out @@ -1044,8 +1060,10 @@ def max_pool2d(x, channel_last = True if data_format == "NHWC" else False - padding, padding_algorithm = _update_padding_nd( - padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + num_dims=2, + channel_last=channel_last, + ceil_mode=ceil_mode) if data_format == "NHWC" and return_mask: raise ValueError( @@ -1058,9 +1076,10 @@ def max_pool2d(x, x, kernel_size, stride, padding, False, False) return output if return_mask else output[0] else: - return _C_ops.final_state_pool2d( - x, kernel_size, stride, padding, ceil_mode, True, data_format, - 'max', False, False, padding_algorithm) + return _C_ops.final_state_pool2d(x, kernel_size, stride, padding, + ceil_mode, True, data_format, + 'max', False, False, + padding_algorithm) if _in_legacy_dygraph(): if return_mask: @@ -1072,12 +1091,13 @@ def max_pool2d(x, data_format) return output if return_mask else output[0] else: - output = _C_ops.pool2d( - x, 'pooling_type', 'max', 'ksize', kernel_size, - 'global_pooling', False, 'padding_algorithm', padding_algorithm, - 'strides', stride, 'paddings', padding, 'use_cudnn', True, - 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, - 'data_format', data_format) + output = _C_ops.pool2d(x, 'pooling_type', 'max', 'ksize', + kernel_size, 'global_pooling', False, + 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, + 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) return output op_type = 'max_pool2d_with_index' if return_mask else "pool2d" @@ -1089,23 +1109,22 @@ def max_pool2d(x, mask = helper.create_variable_for_type_inference("int32") outputs = {"Out": pool_out, "Mask": mask} - helper.append_op( - type=op_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": 'max', - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": True, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": True, + "data_format": data_format, + }) return (pool_out, mask) if return_mask else pool_out @@ -1184,8 +1203,10 @@ def max_pool3d(x, channel_last = _channel_last(data_format, 3) - padding, padding_algorithm = _update_padding_nd( - padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) + padding, padding_algorithm = _update_padding_nd(padding, + 3, + channel_last=channel_last, + ceil_mode=ceil_mode) if data_format == "NDHWC" and return_mask: raise ValueError( @@ -1198,9 +1219,10 @@ def max_pool3d(x, x, kernel_size, stride, padding, False, False) return output if return_mask else output[0] else: - return _C_ops.final_state_pool3d( - x, kernel_size, stride, padding, ceil_mode, True, data_format, - 'max', False, False, padding_algorithm) + return _C_ops.final_state_pool3d(x, kernel_size, stride, padding, + ceil_mode, True, data_format, + 'max', False, False, + padding_algorithm) if _in_legacy_dygraph(): if return_mask: @@ -1212,12 +1234,13 @@ def max_pool3d(x, 'data_format', data_format) return output if return_mask else output[0] else: - output = _C_ops.pool3d( - x, 'pooling_type', 'max', 'ksize', kernel_size, - 'global_pooling', False, 'padding_algorithm', padding_algorithm, - 'strides', stride, 'paddings', padding, 'use_cudnn', True, - 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, - 'data_format', data_format) + output = _C_ops.pool3d(x, 'pooling_type', 'max', 'ksize', + kernel_size, 'global_pooling', False, + 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, + 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) return output op_type = "max_pool3d_with_index" if return_mask else "pool3d" @@ -1228,23 +1251,22 @@ def max_pool3d(x, mask = helper.create_variable_for_type_inference('int32') outputs = {"Out": pool_out, "Mask": mask} - helper.append_op( - type=op_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": 'max', - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": False, - "data_format": data_format, - }) + helper.append_op(type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": False, + "data_format": data_format, + }) return (pool_out, mask) if return_mask else pool_out @@ -1310,15 +1332,14 @@ def adaptive_avg_pool1d(x, output_size, name=None): pool_out = helper.create_variable_for_type_inference(dtype) outputs = {"Out": pool_out} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "adaptive": True, - }) + helper.append_op(type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) return squeeze(pool_out, [2]) @@ -1398,9 +1419,10 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): output_size[1] = in_w if in_dygraph_mode(): - return _C_ops.final_state_pool2d_gpudnn_unused( - x, output_size, [1, 1], [0, 0], False, True, data_format, 'avg', - False, True, "EXPLICIT") + return _C_ops.final_state_pool2d_gpudnn_unused(x, output_size, [1, 1], + [0, 0], False, True, + data_format, 'avg', + False, True, "EXPLICIT") if _in_legacy_dygraph(): return _C_ops.pool2d(x, 'pooling_type', 'avg', 'ksize', output_size, @@ -1415,16 +1437,15 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): outputs = {"Out": pool_out} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": "avg", - "ksize": output_size, - "adaptive": True, - "data_format": data_format, - }) + helper.append_op(type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": "avg", + "ksize": output_size, + "adaptive": True, + "data_format": data_format, + }) return pool_out @@ -1519,16 +1540,15 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): pool_out = helper.create_variable_for_type_inference(dtype) outputs = {"Out": pool_out} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": "avg", - "ksize": output_size, - "adaptive": True, - "data_format": data_format, - }) + helper.append_op(type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": "avg", + "ksize": output_size, + "adaptive": True, + "data_format": data_format, + }) return pool_out @@ -1591,8 +1611,9 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None): x = unsqueeze(x, [2]) if in_dynamic_mode(): - pool_out = _C_ops.max_pool2d_with_index( - x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True) + pool_out = _C_ops.max_pool2d_with_index(x, 'pooling_type', pool_type, + 'ksize', pool_size, 'adaptive', + True) return (squeeze(pool_out[0], [2]), squeeze( pool_out[1], [2])) if return_mask else squeeze(pool_out[0], [2]) @@ -1605,15 +1626,14 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None): mask = helper.create_variable_for_type_inference('int32') outputs = {"Out": pool_out, "Mask": mask} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "adaptive": True, - }) + helper.append_op(type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) return (squeeze(pool_out, [2]), squeeze(mask, [2])) if return_mask else squeeze(pool_out, [2]) @@ -1680,8 +1700,9 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None): output_size[1] = in_w if in_dynamic_mode(): - pool_out = _C_ops.max_pool2d_with_index( - x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True) + pool_out = _C_ops.max_pool2d_with_index(x, 'pooling_type', 'max', + 'ksize', output_size, + 'adaptive', True) return pool_out if return_mask else pool_out[0] l_type = 'max_pool2d_with_index' @@ -1693,15 +1714,14 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None): mask = helper.create_variable_for_type_inference('int32') outputs = {"Out": pool_out, "Mask": mask} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": 'max', - "ksize": output_size, - "adaptive": True, - }) + helper.append_op(type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": output_size, + "adaptive": True, + }) #return (pool_out, mask) if return_mask else pool_out return pool_out @@ -1773,8 +1793,9 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): output_size[2] = in_w if in_dynamic_mode(): - pool_out = _C_ops.max_pool3d_with_index( - x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True) + pool_out = _C_ops.max_pool3d_with_index(x, 'pooling_type', 'max', + 'ksize', output_size, + 'adaptive', True) return pool_out if return_mask else pool_out[0] l_type = 'max_pool3d_with_index' @@ -1786,14 +1807,13 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): mask = helper.create_variable_for_type_inference('int32') outputs = {"Out": pool_out, "Mask": mask} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": 'max', - "ksize": output_size, - "adaptive": True, - }) + helper.append_op(type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": output_size, + "adaptive": True, + }) return (pool_out, mask) if return_mask else pool_out diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 9a9c2ee4cf7..521a44f758b 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -112,11 +112,10 @@ def affine_grid(theta, out_shape, align_corners=True, name=None): else: attrs['output_shape'] = out_shape - helper.append_op( - type='affine_grid', - inputs=ipts, - outputs={'Output': out}, - attrs=None if len(attrs) == 0 else attrs) + helper.append_op(type='affine_grid', + inputs=ipts, + outputs={'Output': out}, + attrs=None if len(attrs) == 0 else attrs) return out @@ -256,8 +255,8 @@ def grid_sample(x, format(_modes, mode)) if padding_mode not in _padding_modes: raise ValueError( - "The padding mode of grid sample function should be in {}, but got: {}". - format(_padding_modes, padding_mode)) + "The padding mode of grid sample function should be in {}, but got: {}" + .format(_padding_modes, padding_mode)) if not isinstance(align_corners, bool): raise ValueError("The align corners should be bool, but got: {}".format( @@ -290,11 +289,10 @@ def grid_sample(x, 'use_cudnn': use_cudnn } out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='grid_sampler', - inputs=ipts, - attrs=attrs, - outputs={'Output': out}) + helper.append_op(type='grid_sampler', + inputs=ipts, + attrs=attrs, + outputs={'Output': out}) return out @@ -327,9 +325,9 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None): raise TypeError("upscale factor must be int type") if data_format not in ["NCHW", "NHWC"]: - raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'." - "But recevie Attr(data_format): {} ".format( - data_format)) + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'." + "But recevie Attr(data_format): {} ".format(data_format)) if in_dynamic_mode(): return _C_ops.pixel_shuffle(x, "upscale_factor", upscale_factor, @@ -338,12 +336,13 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None): helper = LayerHelper("pixel_shuffle", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'pixel_shuffle') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="pixel_shuffle", - inputs={"X": x}, - outputs={"Out": out}, - attrs={"upscale_factor": upscale_factor, - "data_format": data_format}) + helper.append_op(type="pixel_shuffle", + inputs={"X": x}, + outputs={"Out": out}, + attrs={ + "upscale_factor": upscale_factor, + "data_format": data_format + }) return out @@ -383,9 +382,9 @@ def pixel_unshuffle(x, downscale_factor, data_format="NCHW", name=None): raise ValueError("Downscale factor must be positive") if data_format not in ["NCHW", "NHWC"]: - raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'." - "But recevie Attr(data_format): {} ".format( - data_format)) + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'." + "But recevie Attr(data_format): {} ".format(data_format)) if _non_static_mode(): return _C_ops.pixel_unshuffle(x, "downscale_factor", downscale_factor, @@ -394,14 +393,13 @@ def pixel_unshuffle(x, downscale_factor, data_format="NCHW", name=None): helper = LayerHelper("pixel_unshuffle", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'pixel_unshuffle') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="pixel_unshuffle", - inputs={"X": x}, - outputs={"Out": out}, - attrs={ - "downscale_factor": downscale_factor, - "data_format": data_format - }) + helper.append_op(type="pixel_unshuffle", + inputs={"X": x}, + outputs={"Out": out}, + attrs={ + "downscale_factor": downscale_factor, + "data_format": data_format + }) return out @@ -453,9 +451,9 @@ def channel_shuffle(x, groups, data_format="NCHW", name=None): raise ValueError("groups must be positive") if data_format not in ["NCHW", "NHWC"]: - raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'." - "But recevie Attr(data_format): {} ".format( - data_format)) + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'." + "But recevie Attr(data_format): {} ".format(data_format)) if _non_static_mode(): return _C_ops.channel_shuffle(x, "groups", groups, "data_format", @@ -464,10 +462,11 @@ def channel_shuffle(x, groups, data_format="NCHW", name=None): helper = LayerHelper("channel_shuffle", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'channel_shuffle') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="channel_shuffle", - inputs={"X": x}, - outputs={"Out": out}, - attrs={"groups": groups, - "data_format": data_format}) + helper.append_op(type="channel_shuffle", + inputs={"X": x}, + outputs={"Out": out}, + attrs={ + "groups": groups, + "data_format": data_format + }) return out diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index e048ee2b1e9..530c52bf5f2 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -36,19 +36,8 @@ from .orthogonal import Orthogonal # noqa: F401 from .dirac import Dirac # noqa: F401 -__all__ = [ #noqa - 'Bilinear', - 'Constant', - 'KaimingUniform', - 'KaimingNormal', - 'XavierNormal', - 'XavierUniform', - 'Assign', - 'Normal', - 'TruncatedNormal', - 'Uniform', - 'Orthogonal', - 'Dirac', - 'set_global_initializer', - 'calculate_gain' +__all__ = [ #noqa + 'Bilinear', 'Constant', 'KaimingUniform', 'KaimingNormal', 'XavierNormal', + 'XavierUniform', 'Assign', 'Normal', 'TruncatedNormal', 'Uniform', + 'Orthogonal', 'Dirac', 'set_global_initializer', 'calculate_gain' ] diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py index 9c84b01ecb9..1b5697ede40 100644 --- a/python/paddle/nn/initializer/dirac.py +++ b/python/paddle/nn/initializer/dirac.py @@ -20,6 +20,7 @@ from paddle import in_dynamic_mode from paddle.utils import unique_name from paddle import _C_ops from ... import fluid + __all__ = [] @@ -106,42 +107,42 @@ class Dirac(Initializer): block = self._check_block(block) assert isinstance(var, framework.Parameter) assert isinstance(block, framework.Block) - check_variable_and_dtype( - var, "Out", ['float16', 'bfloat16', 'float32', 'float64'], 'Dirac') + check_variable_and_dtype(var, "Out", + ['float16', 'bfloat16', 'float32', 'float64'], + 'Dirac') assert len(var.shape) in [ 3, 4, 5 ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac" - assert (var.shape[0] % self._groups - ) == 0, "Tensor 0-dimension must be divisible by groups" + assert ( + var.shape[0] % + self._groups) == 0, "Tensor 0-dimension must be divisible by groups" if var.dtype != VarDesc.VarType.FP32: - out_var = block.create_var( - name=unique_name.generate(".".join(['dirac', var.name, 'tmp'])), - shape=var.shape, - dtype=VarDesc.VarType.FP32, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) + out_var = block.create_var(name=unique_name.generate(".".join( + ['dirac', var.name, 'tmp'])), + shape=var.shape, + dtype=VarDesc.VarType.FP32, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) else: out_var = var op = None if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): - _C_ops.fill_constant(out_var, 'value', - float(0), 'force_cpu', False, 'dtype', - out_var.dtype, 'str_value', + _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu', + False, 'dtype', out_var.dtype, 'str_value', str(float(0)), 'shape', out_var.shape) else: - block.append_op( - type='fill_constant', - inputs={}, - outputs={'Out': out_var}, - attrs={ - 'value': float(0), - 'dtype': out_var.dtype, - 'shape': out_var.shape, - }, - stop_gradient=True) + block.append_op(type='fill_constant', + inputs={}, + outputs={'Out': out_var}, + attrs={ + 'value': float(0), + 'dtype': out_var.dtype, + 'shape': out_var.shape, + }, + stop_gradient=True) origin_shape = var.shape num_per_group = origin_shape[0] // self._groups @@ -171,20 +172,21 @@ class Dirac(Initializer): tmp_out, _ = _C_ops.reshape2(out_var, None, 'shape', [-1]) tmp_out._share_underline_tensor_to(out_var) else: - x_shape = block.create_var( - name=unique_name.generate(".".join([out_var.name, "XShape"])), - dtype=out_var.dtype, - shape=out_var.shape, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - stop_gradient=True) - block.append_op( - type="reshape2", - inputs={"X": out_var}, - attrs={'shape': [-1]}, - outputs={"Out": out_var, - "XShape": x_shape}, - stop_gradient=True) + x_shape = block.create_var(name=unique_name.generate(".".join( + [out_var.name, "XShape"])), + dtype=out_var.dtype, + shape=out_var.shape, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=True) + block.append_op(type="reshape2", + inputs={"X": out_var}, + attrs={'shape': [-1]}, + outputs={ + "Out": out_var, + "XShape": x_shape + }, + stop_gradient=True) index_tensor = block.create_var( name=unique_name.generate('scatter_index'), @@ -199,15 +201,14 @@ class Dirac(Initializer): 'int64_values', idx_list) tmp_tensor._share_underline_tensor_to(index_tensor) else: - block.append_op( - type='assign_value', - outputs={'Out': index_tensor}, - attrs={ - 'dtype': VarDesc.VarType.INT64, - 'shape': [len(idx_list)], - 'int64_values': idx_list - }, - stop_gradient=True) + block.append_op(type='assign_value', + outputs={'Out': index_tensor}, + attrs={ + 'dtype': VarDesc.VarType.INT64, + 'shape': [len(idx_list)], + 'int64_values': idx_list + }, + stop_gradient=True) value_tensor = block.create_var( name=unique_name.generate('scatter_value'), @@ -222,15 +223,14 @@ class Dirac(Initializer): 'fp32_values', value_list) tmp_tensor._share_underline_tensor_to(value_tensor) else: - block.append_op( - type='assign_value', - outputs={'Out': value_tensor}, - attrs={ - 'dtype': VarDesc.VarType.FP32, - 'shape': [len(value_list)], - 'fp32_values': value_list - }, - stop_gradient=True) + block.append_op(type='assign_value', + outputs={'Out': value_tensor}, + attrs={ + 'dtype': VarDesc.VarType.FP32, + 'shape': [len(value_list)], + 'fp32_values': value_list + }, + stop_gradient=True) if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): @@ -247,38 +247,39 @@ class Dirac(Initializer): tmp_cast_out._share_underline_tensor_to(var) else: - op = block.append_op( - type="scatter", - inputs={ - "X": out_var, - "Ids": index_tensor, - "Updates": value_tensor - }, - attrs={'overwrite': True}, - outputs={"Out": out_var}, - stop_gradient=True) - x_shape = block.create_var( - name=unique_name.generate(".".join([out_var.name, "XShape"])), - dtype=out_var.dtype, - shape=out_var.shape, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - stop_gradient=True) - block.append_op( - type="reshape2", - inputs={"X": out_var}, - attrs={'shape': origin_shape}, - outputs={"Out": out_var, - "XShape": x_shape}, - stop_gradient=True) + op = block.append_op(type="scatter", + inputs={ + "X": out_var, + "Ids": index_tensor, + "Updates": value_tensor + }, + attrs={'overwrite': True}, + outputs={"Out": out_var}, + stop_gradient=True) + x_shape = block.create_var(name=unique_name.generate(".".join( + [out_var.name, "XShape"])), + dtype=out_var.dtype, + shape=out_var.shape, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=True) + block.append_op(type="reshape2", + inputs={"X": out_var}, + attrs={'shape': origin_shape}, + outputs={ + "Out": out_var, + "XShape": x_shape + }, + stop_gradient=True) if var.dtype != VarDesc.VarType.FP32: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}, - stop_gradient=True) + block.append_op(type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={ + "in_dtype": out_var.dtype, + "out_dtype": var.dtype + }, + stop_gradient=True) if not in_dynamic_mode(): var.op = op return op diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index 88a52268776..b8ed7febb6b 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -57,8 +57,9 @@ class KaimingNormal(MSRAInitializer): """ def __init__(self, fan_in=None): - super(KaimingNormal, self).__init__( - uniform=False, fan_in=fan_in, seed=0) + super(KaimingNormal, self).__init__(uniform=False, + fan_in=fan_in, + seed=0) class KaimingUniform(MSRAInitializer): @@ -99,5 +100,6 @@ class KaimingUniform(MSRAInitializer): """ def __init__(self, fan_in=None): - super(KaimingUniform, self).__init__( - uniform=True, fan_in=fan_in, seed=0) + super(KaimingUniform, self).__init__(uniform=True, + fan_in=fan_in, + seed=0) diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py index 84cdb971d77..2a9ba126e2f 100644 --- a/python/paddle/nn/initializer/orthogonal.py +++ b/python/paddle/nn/initializer/orthogonal.py @@ -101,105 +101,107 @@ class Orthogonal(Initializer): flatten_shape = [max(row, col), min(row, col)] - normal_var = block.create_var( - name=unique_name.generate('.'.join(['gaussian_random', 'tmp'])), - dtype=var.dtype, - persistable=False, - stop_gradient=True) - block.append_op( - type='gaussian_random', - inputs={}, - outputs={'Out': normal_var}, - attrs={ - 'mean': 0.0, - 'std': 1.0, - 'shape': flatten_shape, - 'seed': self._seed, - 'dtype': var.dtype - }, - stop_gradient=True) - - q = block.create_var( - name=unique_name.generate('.'.join(['qr', 'q', 'tmp'])), - dtype=normal_var.dtype, - persistable=False, - stop_gradient=True) - r = block.create_var( - name=unique_name.generate('.'.join(['qr', 'r', 'tmp'])), - dtype=normal_var.dtype, - persistable=False, - stop_gradient=True) - block.append_op( - type='qr', - inputs={'X': [normal_var]}, - outputs={ - 'Q': q, - 'R': r, - }, - attrs={'mode': 'reduced'}, - stop_gradient=True) - - r_diag = block.create_var( - name=unique_name.generate('.'.join(['diag', 'tmp'])), - dtype=r.dtype, - persistable=False, - stop_gradient=True) - block.append_op( - type='diag_v2', - inputs={'X': r}, - outputs={'Out': r_diag}, - attrs={'offset': 0, - 'padding_value': 0}, - stop_gradient=True) + normal_var = block.create_var(name=unique_name.generate('.'.join( + ['gaussian_random', 'tmp'])), + dtype=var.dtype, + persistable=False, + stop_gradient=True) + block.append_op(type='gaussian_random', + inputs={}, + outputs={'Out': normal_var}, + attrs={ + 'mean': 0.0, + 'std': 1.0, + 'shape': flatten_shape, + 'seed': self._seed, + 'dtype': var.dtype + }, + stop_gradient=True) + + q = block.create_var(name=unique_name.generate('.'.join( + ['qr', 'q', 'tmp'])), + dtype=normal_var.dtype, + persistable=False, + stop_gradient=True) + r = block.create_var(name=unique_name.generate('.'.join( + ['qr', 'r', 'tmp'])), + dtype=normal_var.dtype, + persistable=False, + stop_gradient=True) + block.append_op(type='qr', + inputs={'X': [normal_var]}, + outputs={ + 'Q': q, + 'R': r, + }, + attrs={'mode': 'reduced'}, + stop_gradient=True) + + r_diag = block.create_var(name=unique_name.generate('.'.join( + ['diag', 'tmp'])), + dtype=r.dtype, + persistable=False, + stop_gradient=True) + block.append_op(type='diag_v2', + inputs={'X': r}, + outputs={'Out': r_diag}, + attrs={ + 'offset': 0, + 'padding_value': 0 + }, + stop_gradient=True) r_sign = r_diag - block.append_op( - type='sign', - inputs={'X': [r_diag]}, - outputs={'Out': r_sign}, - stop_gradient=True) - - block.append_op( - type='elementwise_mul', - inputs={'X': q, - 'Y': r_sign}, - outputs={'Out': q}, - attrs={}, - stop_gradient=True) - - x_shape = block.create_var( - name=unique_name.generate('.'.join(['transpose', 'shape', 'tmp'])), - dtype=q.dtype, - persistable=False, - stop_gradient=True) + block.append_op(type='sign', + inputs={'X': [r_diag]}, + outputs={'Out': r_sign}, + stop_gradient=True) + + block.append_op(type='elementwise_mul', + inputs={ + 'X': q, + 'Y': r_sign + }, + outputs={'Out': q}, + attrs={}, + stop_gradient=True) + + x_shape = block.create_var(name=unique_name.generate('.'.join( + ['transpose', 'shape', 'tmp'])), + dtype=q.dtype, + persistable=False, + stop_gradient=True) if row < col: - q_transpose = block.create_var( - name=unique_name.generate('.'.join(['transpose', 'tmp'])), - dtype=q.dtype, - persistable=False, - stop_gradient=True) - block.append_op( - type='transpose2', - inputs={'X': q}, - outputs={'Out': q_transpose, - 'XShape': x_shape}, - attrs={'axis': [1, 0]}, - stop_gradient=True) + q_transpose = block.create_var(name=unique_name.generate('.'.join( + ['transpose', 'tmp'])), + dtype=q.dtype, + persistable=False, + stop_gradient=True) + block.append_op(type='transpose2', + inputs={'X': q}, + outputs={ + 'Out': q_transpose, + 'XShape': x_shape + }, + attrs={'axis': [1, 0]}, + stop_gradient=True) q = q_transpose - block.append_op( - type='reshape2', - inputs={'X': q}, - outputs={'Out': q, - "XShape": x_shape}, - attrs={'shape': var.shape}, - stop_gradient=True) - - op = block.append_op( - type='scale', - inputs={'X': q}, - outputs={'Out': var}, - attrs={'scale': self._gain, - 'bias': 0.0}) + block.append_op(type='reshape2', + inputs={'X': q}, + outputs={ + 'Out': q, + "XShape": x_shape + }, + attrs={'shape': var.shape}, + stop_gradient=True) + + op = block.append_op(type='scale', + inputs={'X': q}, + outputs={'Out': var}, + attrs={ + 'scale': self._gain, + 'bias': 0.0 + }) return op diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index f07883adbb0..ee9b36ecf7c 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -56,5 +56,9 @@ class Uniform(UniformInitializer): assert low is not None, 'low should not be None' assert high is not None, 'high should not be None' assert high >= low, 'high should greater or equal than low' - super(Uniform, self).__init__( - low=low, high=high, seed=0, diag_num=0, diag_step=0, diag_val=1.0) + super(Uniform, self).__init__(low=low, + high=high, + seed=0, + diag_num=0, + diag_step=0, + diag_val=1.0) diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index aff3a2c15ae..e11790df7df 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -66,8 +66,10 @@ class XavierNormal(XavierInitializer): """ def __init__(self, fan_in=None, fan_out=None, name=None): - super(XavierNormal, self).__init__( - uniform=False, fan_in=fan_in, fan_out=fan_out, seed=0) + super(XavierNormal, self).__init__(uniform=False, + fan_in=fan_in, + fan_out=fan_out, + seed=0) class XavierUniform(XavierInitializer): @@ -120,5 +122,7 @@ class XavierUniform(XavierInitializer): """ def __init__(self, fan_in=None, fan_out=None, name=None): - super(XavierUniform, self).__init__( - uniform=True, fan_in=fan_in, fan_out=fan_out, seed=0) + super(XavierUniform, self).__init__(uniform=True, + fan_in=fan_in, + fan_out=fan_out, + seed=0) diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index 1a3768e9190..6e2a11c89cc 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -419,12 +419,12 @@ class PReLU(Layer): self._name = name self._data_format = data_format - self._weight = self.create_parameter( - attr=self._weight_attr, - shape=[self._num_parameters], - dtype=get_default_dtype(), - is_bias=False, - default_initializer=Constant(self._init)) + self._weight = self.create_parameter(attr=self._weight_attr, + shape=[self._num_parameters], + dtype=get_default_dtype(), + is_bias=False, + default_initializer=Constant( + self._init)) def forward(self, x): return F.prelu(x, self._weight, data_format=self._data_format) @@ -514,8 +514,10 @@ class RReLU(Layer): self._name = name def forward(self, x): - return F.rrelu( - x, lower=self._lower, upper=self._upper, training=self.training) + return F.rrelu(x, + lower=self._lower, + upper=self._upper, + training=self.training) def extra_repr(self): name_str = ', name={}'.format(self._name) if self._name else '' diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index dac4cf5f272..7c034d37ba6 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -155,21 +155,21 @@ class Linear(Layer): self._dtype = self._helper.get_default_dtype() self._weight_attr = weight_attr self._bias_attr = bias_attr - self.weight = self.create_parameter( - shape=[in_features, out_features], - attr=self._weight_attr, - dtype=self._dtype, - is_bias=False) - self.bias = self.create_parameter( - shape=[out_features], - attr=self._bias_attr, - dtype=self._dtype, - is_bias=True) + self.weight = self.create_parameter(shape=[in_features, out_features], + attr=self._weight_attr, + dtype=self._dtype, + is_bias=False) + self.bias = self.create_parameter(shape=[out_features], + attr=self._bias_attr, + dtype=self._dtype, + is_bias=True) self.name = name def forward(self, input): - out = F.linear( - x=input, weight=self.weight, bias=self.bias, name=self.name) + out = F.linear(x=input, + weight=self.weight, + bias=self.bias, + name=self.name) return out def extra_repr(self): @@ -406,15 +406,14 @@ class Upsample(Layer): self.name = name def forward(self, x): - out = F.interpolate( - x, - size=self.size, - scale_factor=self.scale_factor, - mode=self.mode, - align_corners=self.align_corners, - align_mode=self.align_mode, - data_format=self.data_format, - name=self.name) + out = F.interpolate(x, + size=self.size, + scale_factor=self.scale_factor, + mode=self.mode, + align_corners=self.align_corners, + align_mode=self.align_mode, + data_format=self.data_format, + name=self.name) return out @@ -492,15 +491,14 @@ class UpsamplingNearest2D(Layer): self.name = name def forward(self, x): - out = F.interpolate( - x, - size=self.size, - scale_factor=self.scale_factor, - mode='nearest', - align_corners=False, - align_mode=0, - data_format=self.data_format, - name=self.name) + out = F.interpolate(x, + size=self.size, + scale_factor=self.scale_factor, + mode='nearest', + align_corners=False, + align_mode=0, + data_format=self.data_format, + name=self.name) return out @@ -578,15 +576,14 @@ class UpsamplingBilinear2D(Layer): self.name = name def forward(self, x): - out = F.interpolate( - x, - size=self.size, - scale_factor=self.scale_factor, - mode='bilinear', - align_corners=True, - align_mode=0, - data_format=self.data_format, - name=self.name) + out = F.interpolate(x, + size=self.size, + scale_factor=self.scale_factor, + mode='bilinear', + align_corners=True, + align_mode=0, + data_format=self.data_format, + name=self.name) return out @@ -673,17 +670,15 @@ class Bilinear(Layer): weight_shape = [ self._out_features, self._in1_features, self._in2_features ] - self.weight = self.create_parameter( - attr=self._weight_attr, - shape=weight_shape, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._weight_attr, + shape=weight_shape, + dtype=self._dtype, + is_bias=False) bias_shape = [1, self._out_features] - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=bias_shape, - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=bias_shape, + dtype=self._dtype, + is_bias=True) def forward(self, x1, x2): return F.bilinear(x1, x2, self.weight, self.bias, self._name) @@ -754,13 +749,12 @@ class Dropout(Layer): self.name = name def forward(self, input): - out = F.dropout( - input, - p=self.p, - axis=self.axis, - training=self.training, - mode=self.mode, - name=self.name) + out = F.dropout(input, + p=self.p, + axis=self.axis, + training=self.training, + mode=self.mode, + name=self.name) return out def extra_repr(self): @@ -816,12 +810,11 @@ class Dropout2D(Layer): self.name = name def forward(self, input): - out = F.dropout2d( - input, - p=self.p, - training=self.training, - data_format=self.data_format, - name=self.name) + out = F.dropout2d(input, + p=self.p, + training=self.training, + data_format=self.data_format, + name=self.name) return out def extra_repr(self): @@ -877,12 +870,11 @@ class Dropout3D(Layer): self.name = name def forward(self, input): - out = F.dropout3d( - input, - p=self.p, - training=self.training, - data_format=self.data_format, - name=self.name) + out = F.dropout3d(input, + p=self.p, + training=self.training, + data_format=self.data_format, + name=self.name) return out def extra_repr(self): @@ -935,8 +927,10 @@ class AlphaDropout(Layer): self.name = name def forward(self, input): - out = F.alpha_dropout( - input, p=self.p, training=self.training, name=self.name) + out = F.alpha_dropout(input, + p=self.p, + training=self.training, + name=self.name) return out def extra_repr(self): @@ -1171,8 +1165,9 @@ class ZeroPad2D(Layer): def extra_repr(self): name_str = ', name={}'.format(self._name) if self._name else '' - return 'padding={}, data_format={}{}'.format( - self._pad, self._data_format, name_str) + return 'padding={}, data_format={}{}'.format(self._pad, + self._data_format, + name_str) class Pad3D(Layer): @@ -1450,23 +1445,21 @@ class Embedding(Layer): self._weight_attr = weight_attr self._remote_prefetch = False self._name = name - self.weight = self.create_parameter( - attr=self._weight_attr, - shape=self._size, - dtype=self._dtype, - is_bias=False) + self.weight = self.create_parameter(attr=self._weight_attr, + shape=self._size, + dtype=self._dtype, + is_bias=False) if in_dynamic_mode() and padding_idx != -1: with paddle.no_grad(): self.weight[padding_idx] = 0.0 def forward(self, x): - return F.embedding( - x, - weight=self.weight, - padding_idx=self._padding_idx, - sparse=self._sparse, - name=self._name) + return F.embedding(x, + weight=self.weight, + padding_idx=self._padding_idx, + sparse=self._sparse, + name=self._name) def extra_repr(self): main_str = '{_num_embeddings}, {_embedding_dim}' @@ -1539,13 +1532,12 @@ class Unfold(Layer): self.name = name def forward(self, input): - return F.unfold( - input, - kernel_sizes=self.kernel_sizes, - strides=self.strides, - paddings=self.paddings, - dilations=self.dilations, - name=self.name) + return F.unfold(input, + kernel_sizes=self.kernel_sizes, + strides=self.strides, + paddings=self.paddings, + dilations=self.dilations, + name=self.name) def extra_repr(self): name_str = ', name={}'.format(self.name) if self.name else '' @@ -1626,14 +1618,13 @@ class Fold(Layer): self.name = name def forward(self, input): - return F.fold( - input, - output_sizes=self.output_sizes, - kernel_sizes=self.kernel_sizes, - strides=self.strides, - paddings=self.paddings, - dilations=self.dilations, - name=self.name) + return F.fold(input, + output_sizes=self.output_sizes, + kernel_sizes=self.kernel_sizes, + strides=self.strides, + paddings=self.paddings, + dilations=self.dilations, + name=self.name) def extra_repr(self): name_str = ', name={}'.format(self.name) if self.name else '' diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py index aadaf1efce5..0b1bf6bc565 100644 --- a/python/paddle/nn/layer/container.py +++ b/python/paddle/nn/layer/container.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -288,6 +288,6 @@ class LayerDict(Layer): for i, kv in enumerate(sublayers): if len(kv) != 2: raise ValueError("The length of the " + str(i) + - "'s element in sublayers is " + str( - len(kv)) + ", which must be 2.") + "'s element in sublayers is " + + str(len(kv)) + ", which must be 2.") self.add_sublayer(kv[0], kv[1]) diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index bb1cbbfc03e..f724f7cfee5 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -44,6 +44,7 @@ def _reverse_repeat_list(t, n): class _ConvNd(Layer): + def __init__(self, in_channels, out_channels, @@ -86,8 +87,9 @@ class _ConvNd(Layer): "data_format must be one of {}, but got data_format='{}'". format(valid_format, data_format)) - channel_last = (data_format == "NHWC") or (data_format == "NDHWC") or ( - data_format == "NLC") + channel_last = (data_format == "NHWC") or (data_format + == "NDHWC") or (data_format + == "NLC") if channel_last: self._channel_dim = len(data_format) - 1 else: @@ -134,26 +136,27 @@ class _ConvNd(Layer): shape=filter_shape, attr=self._param_attr, default_initializer=_get_default_param_initializer()) - self.bias = self.create_parameter( - attr=self._bias_attr, shape=[self._out_channels], is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._out_channels], + is_bias=True) cudnn_version = get_cudnn_version() - self._use_cudnn = True if (is_compiled_with_cuda() and - cudnn_version is not None) else False + self._use_cudnn = True if (is_compiled_with_cuda() + and cudnn_version is not None) else False self._op_type = "conv" + str(dims) + 'd' - if self._op_type == 'conv2d' and (in_channels == groups and - in_channels != 1 and - out_channels % in_channels == 0): + if self._op_type == 'conv2d' and (in_channels == groups + and in_channels != 1 + and out_channels % in_channels == 0): self._op_type = 'depthwise_conv2d' if is_compiled_with_rocm(): self._use_cudnn = True else: self._use_cudnn = False - if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")[ - "FLAGS_conv2d_disable_cudnn"]): + if (is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn") + ["FLAGS_conv2d_disable_cudnn"]): self._use_cudnn = False def extra_repr(self): @@ -311,20 +314,19 @@ class Conv1D(_ConvNd): weight_attr=None, bias_attr=None, data_format="NCL"): - super(Conv1D, self).__init__( - in_channels, - out_channels, - kernel_size, - False, - 1, - stride=stride, - padding=padding, - padding_mode=padding_mode, - dilation=dilation, - groups=groups, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv1D, self).__init__(in_channels, + out_channels, + kernel_size, + False, + 1, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x): padding = 0 @@ -336,15 +338,14 @@ class Conv1D(_ConvNd): else: padding = self._padding - out = F.conv1d( - x, - self.weight, - bias=self.bias, - padding=padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format) + out = F.conv1d(x, + self.weight, + bias=self.bias, + padding=padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) return out @@ -488,33 +489,31 @@ class Conv1DTranspose(_ConvNd): weight_attr=None, bias_attr=None, data_format="NCL"): - super(Conv1DTranspose, self).__init__( - in_channels, - out_channels, - kernel_size, - True, - 1, - stride=stride, - padding=padding, - dilation=dilation, - output_padding=output_padding, - groups=groups, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv1DTranspose, self).__init__(in_channels, + out_channels, + kernel_size, + True, + 1, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x, output_size=None): - out = F.conv1d_transpose( - x, - self.weight, - bias=self.bias, - output_size=output_size, - output_padding=self.output_padding, - padding=self._padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format) + out = F.conv1d_transpose(x, + self.weight, + bias=self.bias, + output_size=output_size, + output_padding=self.output_padding, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) return out @@ -641,20 +640,19 @@ class Conv2D(_ConvNd): weight_attr=None, bias_attr=None, data_format="NCHW"): - super(Conv2D, self).__init__( - in_channels, - out_channels, - kernel_size, - False, - 2, - stride=stride, - padding=padding, - padding_mode=padding_mode, - dilation=dilation, - groups=groups, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv2D, self).__init__(in_channels, + out_channels, + kernel_size, + False, + 2, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x): if self._padding_mode != 'zeros': @@ -663,19 +661,18 @@ class Conv2D(_ConvNd): mode=self._padding_mode, data_format=self._data_format) - out = F.conv._conv_nd( - x, - self.weight, - bias=self.bias, - stride=self._stride, - padding=self._updated_padding, - padding_algorithm=self._padding_algorithm, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format, - channel_dim=self._channel_dim, - op_type=self._op_type, - use_cudnn=self._use_cudnn) + out = F.conv._conv_nd(x, + self.weight, + bias=self.bias, + stride=self._stride, + padding=self._updated_padding, + padding_algorithm=self._padding_algorithm, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format, + channel_dim=self._channel_dim, + op_type=self._op_type, + use_cudnn=self._use_cudnn) return out @@ -808,20 +805,19 @@ class Conv2DTranspose(_ConvNd): weight_attr=None, bias_attr=None, data_format="NCHW"): - super(Conv2DTranspose, self).__init__( - in_channels, - out_channels, - kernel_size, - True, - 2, - stride=stride, - padding=padding, - dilation=dilation, - output_padding=output_padding, - groups=groups, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv2DTranspose, self).__init__(in_channels, + out_channels, + kernel_size, + True, + 2, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x, output_size=None): if output_size is None: @@ -829,17 +825,16 @@ class Conv2DTranspose(_ConvNd): else: output_padding = 0 - out = F.conv2d_transpose( - x, - self.weight, - bias=self.bias, - padding=self._padding, - output_padding=output_padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - output_size=output_size, - data_format=self._data_format) + out = F.conv2d_transpose(x, + self.weight, + bias=self.bias, + padding=self._padding, + output_padding=output_padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + output_size=output_size, + data_format=self._data_format) return out @@ -966,20 +961,19 @@ class Conv3D(_ConvNd): weight_attr=None, bias_attr=None, data_format="NCDHW"): - super(Conv3D, self).__init__( - in_channels, - out_channels, - kernel_size, - False, - 3, - stride=stride, - padding=padding, - padding_mode=padding_mode, - dilation=dilation, - groups=groups, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv3D, self).__init__(in_channels, + out_channels, + kernel_size, + False, + 3, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x): if self._padding_mode != 'zeros': @@ -988,19 +982,18 @@ class Conv3D(_ConvNd): mode=self._padding_mode, data_format=self._data_format) - out = F.conv._conv_nd( - x, - self.weight, - bias=self.bias, - stride=self._stride, - padding=self._updated_padding, - padding_algorithm=self._padding_algorithm, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format, - channel_dim=self._channel_dim, - op_type=self._op_type, - use_cudnn=self._use_cudnn) + out = F.conv._conv_nd(x, + self.weight, + bias=self.bias, + stride=self._stride, + padding=self._updated_padding, + padding_algorithm=self._padding_algorithm, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format, + channel_dim=self._channel_dim, + op_type=self._op_type, + use_cudnn=self._use_cudnn) return out @@ -1145,20 +1138,19 @@ class Conv3DTranspose(_ConvNd): weight_attr=None, bias_attr=None, data_format="NCDHW"): - super(Conv3DTranspose, self).__init__( - in_channels, - out_channels, - kernel_size, - True, - 3, - stride=stride, - padding=padding, - dilation=dilation, - output_padding=output_padding, - groups=groups, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + super(Conv3DTranspose, self).__init__(in_channels, + out_channels, + kernel_size, + True, + 3, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x, output_size=None): if output_size is None: @@ -1166,15 +1158,14 @@ class Conv3DTranspose(_ConvNd): else: output_padding = 0 - out = F.conv3d_transpose( - x, - self.weight, - bias=self.bias, - padding=self._padding, - output_padding=output_padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - output_size=output_size, - data_format=self._data_format) + out = F.conv3d_transpose(x, + self.weight, + bias=self.bias, + padding=self._padding, + output_padding=output_padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + output_size=output_size, + data_format=self._data_format) return out diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py index eb85de57110..7c08e358fcc 100644 --- a/python/paddle/nn/layer/distance.py +++ b/python/paddle/nn/layer/distance.py @@ -103,8 +103,10 @@ class PairwiseDistance(Layer): 'epsilon': self.epsilon, } out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='p_norm', inputs={'X': sub}, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='p_norm', + inputs={'X': sub}, + outputs={'Out': out}, + attrs=attrs) return out diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index a20e7de751d..c720ec7d1be 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -394,16 +394,15 @@ class CrossEntropyLoss(Layer): self.name = name def forward(self, input, label): - ret = paddle.nn.functional.cross_entropy( - input, - label, - weight=self.weight, - ignore_index=self.ignore_index, - reduction=self.reduction, - soft_label=self.soft_label, - axis=self.axis, - use_softmax=self.use_softmax, - name=self.name) + ret = paddle.nn.functional.cross_entropy(input, + label, + weight=self.weight, + ignore_index=self.ignore_index, + reduction=self.reduction, + soft_label=self.soft_label, + axis=self.axis, + use_softmax=self.use_softmax, + name=self.name) return ret @@ -512,25 +511,25 @@ class HSigmoidLoss(Layer): " small parameter prefetch may cause speed down") C = self._num_classes if is_custom else self._num_classes - 1 - self.weight = self.create_parameter( - [C, self._feature_size], - attr=self._weight_attr, - is_bias=False, - dtype=self._dtype) - self.bias = self.create_parameter( - [C, 1], attr=self._bias_attr, is_bias=True, dtype=self._dtype) + self.weight = self.create_parameter([C, self._feature_size], + attr=self._weight_attr, + is_bias=False, + dtype=self._dtype) + self.bias = self.create_parameter([C, 1], + attr=self._bias_attr, + is_bias=True, + dtype=self._dtype) def forward(self, input, label, path_table=None, path_code=None): - out = F.hsigmoid_loss( - input, - label, - self._num_classes, - self.weight, - self.bias, - path_table=path_table, - path_code=path_code, - is_sparse=self._is_sparse, - name=self._name) + out = F.hsigmoid_loss(input, + label, + self._num_classes, + self.weight, + self.bias, + path_table=path_table, + path_code=path_code, + is_sparse=self._is_sparse, + name=self._name) return out @@ -596,10 +595,12 @@ class MSELoss(Layer): def forward(self, input, label): if not in_dynamic_mode(): - fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'MSELoss') - fluid.data_feeder.check_variable_and_dtype( - label, 'label', ['float32', 'float64'], 'MSELoss') + fluid.data_feeder.check_variable_and_dtype(input, 'input', + ['float32', 'float64'], + 'MSELoss') + fluid.data_feeder.check_variable_and_dtype(label, 'label', + ['float32', 'float64'], + 'MSELoss') if in_dygraph_mode(): square_out = paddle._C_ops.final_state_square( @@ -691,8 +692,10 @@ class L1Loss(Layer): self.name = name def forward(self, input, label): - return paddle.nn.functional.l1_loss( - input, label, self.reduction, name=self.name) + return paddle.nn.functional.l1_loss(input, + label, + self.reduction, + name=self.name) class BCELoss(Layer): @@ -780,8 +783,10 @@ class BCELoss(Layer): self.name = name def forward(self, input, label): - out = paddle.nn.functional.binary_cross_entropy( - input, label, self.weight, self.reduction, self.name) + out = paddle.nn.functional.binary_cross_entropy(input, label, + self.weight, + self.reduction, + self.name) return out @@ -888,13 +893,12 @@ class NLLLoss(Layer): self._name = name def forward(self, input, label): - return F.nll_loss( - input, - label, - weight=self._weight, - ignore_index=self._ignore_index, - reduction=self._reduction, - name=self._name) + return F.nll_loss(input, + label, + weight=self._weight, + ignore_index=self._ignore_index, + reduction=self._reduction, + name=self._name) class KLDivLoss(Layer): @@ -1037,8 +1041,10 @@ class MarginRankingLoss(Layer): self.name = name def forward(self, input, other, label): - out = paddle.nn.functional.margin_ranking_loss( - input, other, label, self.margin, self.reduction, self.name) + out = paddle.nn.functional.margin_ranking_loss(input, other, label, + self.margin, + self.reduction, + self.name) return out @@ -1128,14 +1134,13 @@ class CTCLoss(Layer): input_lengths, label_lengths, norm_by_times=False): - return paddle.nn.functional.ctc_loss( - log_probs, - labels, - input_lengths, - label_lengths, - self.blank, - self.reduction, - norm_by_times=norm_by_times) + return paddle.nn.functional.ctc_loss(log_probs, + labels, + input_lengths, + label_lengths, + self.blank, + self.reduction, + norm_by_times=norm_by_times) class SmoothL1Loss(Layer): @@ -1205,12 +1210,11 @@ class SmoothL1Loss(Layer): self.name = name def forward(self, input, label): - return F.smooth_l1_loss( - input, - label, - reduction=self.reduction, - delta=self.delta, - name=self.name) + return F.smooth_l1_loss(input, + label, + reduction=self.reduction, + delta=self.delta, + name=self.name) class HingeEmbeddingLoss(Layer): @@ -1300,9 +1304,8 @@ class HingeEmbeddingLoss(Layer): self.name = name def forward(self, input, label): - return F.hinge_embedding_loss( - input, - label, - reduction=self.reduction, - margin=self.margin, - name=self.name) + return F.hinge_embedding_loss(input, + label, + reduction=self.reduction, + margin=self.margin, + name=self.name) diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 6cdfc36d5d6..e549859fe62 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -25,7 +25,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define normalization api +# TODO: define normalization api import six @@ -83,11 +83,10 @@ class _InstanceNormBase(Layer): shape=[num_features], default_initializer=Constant(1.0), is_bias=False) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[num_features], - default_initializer=Constant(0.0), - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[num_features], + default_initializer=Constant(0.0), + is_bias=True) else: self.scale = None self.bias = None @@ -98,8 +97,10 @@ class _InstanceNormBase(Layer): def forward(self, input): self._check_input_dim(input) - return instance_norm( - input, weight=self.scale, bias=self.bias, eps=self._epsilon) + return instance_norm(input, + weight=self.scale, + bias=self.bias, + eps=self._epsilon) def extra_repr(self): return 'num_features={}, epsilon={}'.format(self._num_features, @@ -392,15 +393,15 @@ class GroupNorm(Layer): self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. if bias_attr == False: - self.bias = self.create_parameter( - attr=None, - shape=param_shape, - default_initializer=Constant(0.0), - is_bias=True) + self.bias = self.create_parameter(attr=None, + shape=param_shape, + default_initializer=Constant(0.0), + is_bias=True) self.bias.stop_gradient = True else: - self.bias = self.create_parameter( - attr=self._bias_attr, shape=param_shape, is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=param_shape, + is_bias=True) self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0. def forward(self, input): @@ -419,9 +420,10 @@ class GroupNorm(Layer): 'epsilon', self._epsilon, 'groups', - self._num_groups, ) - return dygraph_utils._append_activation_in_dygraph( - pre_act, act=None) + self._num_groups, + ) + return dygraph_utils._append_activation_in_dygraph(pre_act, + act=None) inputs = {'X': input} if self.bias is not None: @@ -433,16 +435,17 @@ class GroupNorm(Layer): group_norm_out = self._helper.create_variable_for_type_inference( dtype=input.dtype) - self._helper.append_op( - type="group_norm", - inputs=inputs, - outputs={ - "Y": group_norm_out, - "Mean": mean_out, - "Variance": variance_out, - }, - attrs={"epsilon": self._epsilon, - "groups": self._num_groups}) + self._helper.append_op(type="group_norm", + inputs=inputs, + outputs={ + "Y": group_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={ + "epsilon": self._epsilon, + "groups": self._num_groups + }) return self._helper.append_activation(group_norm_out, None) @@ -543,16 +546,16 @@ class LayerNorm(Layer): if bias_attr is False: self.bias = None else: - self.bias = self.create_parameter( - attr=self._bias_attr, shape=param_shape, is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=param_shape, + is_bias=True) def forward(self, input): - return layer_norm( - input, - normalized_shape=self._normalized_shape, - weight=self.weight, - bias=self.bias, - epsilon=self._epsilon) + return layer_norm(input, + normalized_shape=self._normalized_shape, + weight=self.weight, + bias=self.bias, + epsilon=self._epsilon) def extra_repr(self): return 'normalized_shape={}, epsilon={}'.format(self._normalized_shape, @@ -603,19 +606,17 @@ class _BatchNormBase(Layer): self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. if bias_attr == False: - self.bias = self.create_parameter( - attr=None, - shape=param_shape, - dtype=self._dtype, - default_initializer=Constant(0.0), - is_bias=True) + self.bias = self.create_parameter(attr=None, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(0.0), + is_bias=True) self.bias.stop_gradient = True else: - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=True) self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0. moving_mean_name = None @@ -625,24 +626,22 @@ class _BatchNormBase(Layer): moving_mean_name = name + "_mean" moving_variance_name = name + "_variance" - self._mean = self.create_parameter( - dtype=self._dtype, - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=True), - shape=param_shape) + self._mean = self.create_parameter(dtype=self._dtype, + attr=ParamAttr( + name=moving_mean_name, + initializer=Constant(0.0), + trainable=False, + do_model_average=True), + shape=param_shape) self._mean.stop_gradient = True - self._variance = self.create_parameter( - dtype=self._dtype, - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=True), - shape=param_shape) + self._variance = self.create_parameter(dtype=self._dtype, + attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False, + do_model_average=True), + shape=param_shape) self._variance.stop_gradient = True self._data_format = data_format @@ -668,17 +667,16 @@ class _BatchNormBase(Layer): warnings.warn( "When training, we now always track global mean and variance.") - return batch_norm( - input, - self._mean, - self._variance, - weight=self.weight, - bias=self.bias, - training=self.training, - momentum=self._momentum, - epsilon=self._epsilon, - data_format=self._data_format, - use_global_stats=self._use_global_stats) + return batch_norm(input, + self._mean, + self._variance, + weight=self.weight, + bias=self.bias, + training=self.training, + momentum=self._momentum, + epsilon=self._epsilon, + data_format=self._data_format, + use_global_stats=self._use_global_stats) def extra_repr(self): main_str = 'num_features={}, momentum={}, epsilon={}'.format( @@ -1151,8 +1149,10 @@ class SyncBatchNorm(_BatchNormBase): "SavedVariance": [saved_variance] } - self._helper.append_op( - type="sync_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + self._helper.append_op(type="sync_batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs) return sync_batch_norm_out @classmethod diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index c664c6e318c..990d0b61078 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -204,16 +204,15 @@ class AvgPool2D(Layer): self.name = name def forward(self, x): - return F.avg_pool2d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - ceil_mode=self.ceil_mode, - exclusive=self.exclusive, - divisor_override=self.divisor, - data_format=self.data_format, - name=self.name) + return F.avg_pool2d(x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + exclusive=self.exclusive, + divisor_override=self.divisor, + data_format=self.data_format, + name=self.name) def extra_repr(self): return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format( @@ -302,16 +301,15 @@ class AvgPool3D(Layer): self.name = name def forward(self, x): - return F.avg_pool3d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - ceil_mode=self.ceil_mode, - exclusive=self.exclusive, - divisor_override=self.divisor, - data_format=self.data_format, - name=self.name) + return F.avg_pool3d(x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + exclusive=self.exclusive, + divisor_override=self.divisor, + data_format=self.data_format, + name=self.name) def extra_repr(self): return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format( @@ -507,15 +505,14 @@ class MaxPool2D(Layer): self.name = name def forward(self, x): - return F.max_pool2d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - return_mask=self.return_mask, - ceil_mode=self.ceil_mode, - data_format=self.data_format, - name=self.name) + return F.max_pool2d(x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + return_mask=self.return_mask, + ceil_mode=self.ceil_mode, + data_format=self.data_format, + name=self.name) def extra_repr(self): return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format( @@ -605,15 +602,14 @@ class MaxPool3D(Layer): self.name = name def forward(self, x): - return F.max_pool3d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - return_mask=self.return_mask, - ceil_mode=self.ceil_mode, - data_format=self.data_format, - name=self.name) + return F.max_pool3d(x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + return_mask=self.return_mask, + ceil_mode=self.ceil_mode, + data_format=self.data_format, + name=self.name) def extra_repr(self): return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format( @@ -769,11 +765,10 @@ class AdaptiveAvgPool2D(Layer): self._name = name def forward(self, x): - return F.adaptive_avg_pool2d( - x, - output_size=self._output_size, - data_format=self._data_format, - name=self._name) + return F.adaptive_avg_pool2d(x, + output_size=self._output_size, + data_format=self._data_format, + name=self._name) def extra_repr(self): return 'output_size={}'.format(self._output_size) @@ -862,11 +857,10 @@ class AdaptiveAvgPool3D(Layer): self._name = name def forward(self, x): - return F.adaptive_avg_pool3d( - x, - output_size=self._output_size, - data_format=self._data_format, - name=self._name) + return F.adaptive_avg_pool3d(x, + output_size=self._output_size, + data_format=self._data_format, + name=self._name) def extra_repr(self): return 'output_size={}'.format(self._output_size) @@ -1026,11 +1020,10 @@ class AdaptiveMaxPool2D(Layer): self._name = name def forward(self, x): - return F.adaptive_max_pool2d( - x, - output_size=self._output_size, - return_mask=self._return_mask, - name=self._name) + return F.adaptive_max_pool2d(x, + output_size=self._output_size, + return_mask=self._return_mask, + name=self._name) def extra_repr(self): return 'output_size={}, return_mask={}'.format(self._output_size, @@ -1119,11 +1112,10 @@ class AdaptiveMaxPool3D(Layer): self._name = name def forward(self, x): - return F.adaptive_max_pool3d( - x, - output_size=self._output_size, - return_mask=self._return_mask, - name=self._name) + return F.adaptive_max_pool3d(x, + output_size=self._output_size, + return_mask=self._return_mask, + name=self._name) def extra_repr(self): return 'output_size={}, return_mask={}'.format(self._output_size, @@ -1198,15 +1190,14 @@ class MaxUnPool1D(Layer): self.name = name def forward(self, x, indices): - return F.max_unpool1d( - x, - indices, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - data_format=self.data_format, - output_size=self.output_size, - name=self.name) + return F.max_unpool1d(x, + indices, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + data_format=self.data_format, + output_size=self.output_size, + name=self.name) def extra_repr(self): return 'output_size={}'.format(self.output_size) @@ -1283,15 +1274,14 @@ class MaxUnPool2D(Layer): self.name = name def forward(self, x, indices): - return F.max_unpool2d( - x, - indices, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - data_format=self.data_format, - output_size=self.output_size, - name=self.name) + return F.max_unpool2d(x, + indices, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + data_format=self.data_format, + output_size=self.output_size, + name=self.name) def extra_repr(self): return 'output_size={}'.format(self.output_size) @@ -1372,15 +1362,14 @@ class MaxUnPool3D(Layer): self.name = name def forward(self, x, indices): - return F.max_unpool3d( - x, - indices, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - data_format=self.data_format, - output_size=self.output_size, - name=self.name) + return F.max_unpool3d(x, + indices, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + data_format=self.data_format, + output_size=self.output_size, + name=self.name) def extra_repr(self): return 'output_size={}'.format(self.output_size) diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 461ac03899e..53dfad4106d 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -190,7 +190,8 @@ class RNNCellBase(Layer): if sys.version_info < (3, ): integer_types = ( int, - long, ) + long, + ) else: integer_types = (int, ) """For shape, list/tuple of integer is the finest-grained objection""" @@ -201,10 +202,11 @@ class RNNCellBase(Layer): # TODO: Add check for the illegal if isinstance(seq, dict): return True - return (isinstance(seq, Sequence) and - not isinstance(seq, six.string_types)) + return (isinstance(seq, Sequence) + and not isinstance(seq, six.string_types)) class Shape(object): + def __init__(self, shape): self.shape = shape if shape[0] == -1 else ([-1] + list(shape)) @@ -225,12 +227,13 @@ class RNNCellBase(Layer): states_dtypes = map_structure(lambda shape: dtype, states_shapes) init_states = map_structure( - lambda shape, dtype: paddle.fluid.layers.fill_constant_batch_size_like( - input=batch_ref, - shape=shape.shape, - dtype=dtype, - value=init_value, - input_dim_idx=batch_dim_idx), states_shapes, states_dtypes) + lambda shape, dtype: paddle.fluid.layers. + fill_constant_batch_size_like(input=batch_ref, + shape=shape.shape, + dtype=dtype, + value=init_value, + input_dim_idx=batch_dim_idx), + states_shapes, states_dtypes) return init_states @property @@ -343,8 +346,8 @@ class SimpleRNNCell(RNNCellBase): super(SimpleRNNCell, self).__init__() if hidden_size <= 0: raise ValueError( - "hidden_size of {} must be greater than 0, but now equals to {}". - format(self.__class__.__name__, hidden_size)) + "hidden_size of {} must be greater than 0, but now equals to {}" + .format(self.__class__.__name__, hidden_size)) std = 1.0 / math.sqrt(hidden_size) self.weight_ih = self.create_parameter( (hidden_size, input_size), @@ -495,8 +498,8 @@ class LSTMCell(RNNCellBase): super(LSTMCell, self).__init__() if hidden_size <= 0: raise ValueError( - "hidden_size of {} must be greater than 0, but now equals to {}". - format(self.__class__.__name__, hidden_size)) + "hidden_size of {} must be greater than 0, but now equals to {}" + .format(self.__class__.__name__, hidden_size)) std = 1.0 / math.sqrt(hidden_size) self.weight_ih = self.create_parameter( (4 * hidden_size, input_size), @@ -646,8 +649,8 @@ class GRUCell(RNNCellBase): super(GRUCell, self).__init__() if hidden_size <= 0: raise ValueError( - "hidden_size of {} must be greater than 0, but now equals to {}". - format(self.__class__.__name__, hidden_size)) + "hidden_size of {} must be greater than 0, but now equals to {}" + .format(self.__class__.__name__, hidden_size)) std = 1.0 / math.sqrt(hidden_size) self.weight_ih = self.create_parameter( (3 * hidden_size, input_size), @@ -971,10 +974,9 @@ class RNNBase(LayerList): # add both to main_program and startup_program for static-graph. # Use Constant initializer to avoid make effect on random generator. self._flat_weight = [ - self.create_parameter( - shape=[np.sum(shape)], - dtype=params[0].dtype, - default_initializer=I.Constant(0.0)) + self.create_parameter(shape=[np.sum(shape)], + dtype=params[0].dtype, + default_initializer=I.Constant(0.0)) ] # dropout state may also can be hided and avoid saving # should dropout state be persistable for static-graph @@ -991,18 +993,17 @@ class RNNBase(LayerList): with program_guard(default_startup_program(), default_startup_program()): with paddle.no_grad(): - self._helper.append_op( - type="coalesce_tensor", - inputs={"Input": self._all_weights}, - outputs={ - "Output": self._all_weights, - "FusedOutput": self._flat_weight - }, - attrs={ - "copy_data": True, - "use_align": False, - "dtype": params[0].dtype - }) + self._helper.append_op(type="coalesce_tensor", + inputs={"Input": self._all_weights}, + outputs={ + "Output": self._all_weights, + "FusedOutput": self._flat_weight + }, + attrs={ + "copy_data": True, + "use_align": False, + "dtype": params[0].dtype + }) def _cudnn_impl(self, inputs, initial_states, sequence_length): if not self.time_major: @@ -1048,8 +1049,10 @@ class RNNBase(LayerList): 'DropoutState': self._dropout_state, } - self._helper.append_op( - type="rnn", inputs=inputs, outputs=outputs, attrs=attrs) + self._helper.append_op(type="rnn", + inputs=inputs, + outputs=outputs, + attrs=attrs) out = paddle.tensor.transpose(out, [1, 0, 2]) if not self.time_major else out @@ -1070,9 +1073,8 @@ class RNNBase(LayerList): initial_states = [initial_states] if isinstance( initial_states, paddle.static.Variable) else initial_states - if self.could_use_cudnn and ( - not paddle.device.is_compiled_with_rocm() or - sequence_length is None): + if self.could_use_cudnn and (not paddle.device.is_compiled_with_rocm() + or sequence_length is None): # Add CPU kernel and dispatch in backend later return self._cudnn_impl(inputs, initial_states, sequence_length) @@ -1082,11 +1084,10 @@ class RNNBase(LayerList): for i, rnn_layer in enumerate(self): if i > 0: - inputs = F.dropout( - inputs, - self.dropout, - training=self.training, - mode="upscale_in_train") + inputs = F.dropout(inputs, + self.dropout, + training=self.training, + mode="upscale_in_train") outputs, final_state = rnn_layer(inputs, states[i], sequence_length) final_states.append(final_state) inputs = outputs @@ -1211,9 +1212,10 @@ class SimpleRNN(RNNBase): else: raise ValueError("Unknown activation '{}'".format(activation)) self.activation = activation - super(SimpleRNN, self).__init__( - mode, input_size, hidden_size, num_layers, direction, time_major, - dropout, weight_ih_attr, weight_hh_attr, bias_ih_attr, bias_hh_attr) + super(SimpleRNN, + self).__init__(mode, input_size, hidden_size, num_layers, + direction, time_major, dropout, weight_ih_attr, + weight_hh_attr, bias_ih_attr, bias_hh_attr) class LSTM(RNNBase): @@ -1325,9 +1327,10 @@ class LSTM(RNNBase): bias_ih_attr=None, bias_hh_attr=None, name=None): - super(LSTM, self).__init__( - "LSTM", input_size, hidden_size, num_layers, direction, time_major, - dropout, weight_ih_attr, weight_hh_attr, bias_ih_attr, bias_hh_attr) + super(LSTM, + self).__init__("LSTM", input_size, hidden_size, num_layers, + direction, time_major, dropout, weight_ih_attr, + weight_hh_attr, bias_ih_attr, bias_hh_attr) class GRU(RNNBase): @@ -1432,6 +1435,7 @@ class GRU(RNNBase): bias_ih_attr=None, bias_hh_attr=None, name=None): - super(GRU, self).__init__( - "GRU", input_size, hidden_size, num_layers, direction, time_major, - dropout, weight_ih_attr, weight_hh_attr, bias_ih_attr, bias_hh_attr) + super(GRU, + self).__init__("GRU", input_size, hidden_size, num_layers, + direction, time_major, dropout, weight_ih_attr, + weight_hh_attr, bias_ih_attr, bias_hh_attr) diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 340372f9b6a..35acaded2ea 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -177,14 +177,22 @@ class MultiHeadAttention(Layer): self.head_dim = embed_dim // num_heads assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" - self.q_proj = Linear( - embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) - self.k_proj = Linear( - self.kdim, embed_dim, weight_attr, bias_attr=bias_attr) - self.v_proj = Linear( - self.vdim, embed_dim, weight_attr, bias_attr=bias_attr) - self.out_proj = Linear( - embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) + self.q_proj = Linear(embed_dim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.k_proj = Linear(self.kdim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.v_proj = Linear(self.vdim, + embed_dim, + weight_attr, + bias_attr=bias_attr) + self.out_proj = Linear(embed_dim, + embed_dim, + weight_attr, + bias_attr=bias_attr) def _prepare_qkv(self, query, key, value, cache=None): r""" @@ -402,19 +410,19 @@ class MultiHeadAttention(Layer): q, k, v, cache = self._prepare_qkv(query, key, value, cache) # scale dot product attention - product = paddle.matmul( - x=q * (self.head_dim**-0.5), y=k, transpose_y=True) + product = paddle.matmul(x=q * (self.head_dim**-0.5), + y=k, + transpose_y=True) if attn_mask is not None: # Support bool or int mask attn_mask = _convert_attention_mask(attn_mask, product.dtype) product = product + attn_mask weights = F.softmax(product) if self.dropout: - weights = F.dropout( - weights, - self.dropout, - training=self.training, - mode="upscale_in_train") + weights = F.dropout(weights, + self.dropout, + training=self.training, + mode="upscale_in_train") out = tensor.matmul(weights, v) @@ -522,17 +530,20 @@ class TransformerEncoderLayer(Layer): weight_attrs = _convert_param_attr_to_list(weight_attr, 2) bias_attrs = _convert_param_attr_to_list(bias_attr, 2) - self.self_attn = MultiHeadAttention( - d_model, - nhead, - dropout=attn_dropout, - weight_attr=weight_attrs[0], - bias_attr=bias_attrs[0]) - self.linear1 = Linear( - d_model, dim_feedforward, weight_attrs[1], bias_attr=bias_attrs[1]) + self.self_attn = MultiHeadAttention(d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0]) + self.linear1 = Linear(d_model, + dim_feedforward, + weight_attrs[1], + bias_attr=bias_attrs[1]) self.dropout = Dropout(act_dropout, mode="upscale_in_train") - self.linear2 = Linear( - dim_feedforward, d_model, weight_attrs[1], bias_attr=bias_attrs[1]) + self.linear2 = Linear(dim_feedforward, + d_model, + weight_attrs[1], + bias_attr=bias_attrs[1]) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout, mode="upscale_in_train") @@ -613,8 +624,8 @@ class TransformerEncoderLayer(Layer): `MultiHeadAttention.gen_cache` and `MultiHeadAttention.forward` \ for more details. """ - incremental_cache = self.self_attn.gen_cache( - src, type=self.self_attn.Cache) + incremental_cache = self.self_attn.gen_cache(src, + type=self.self_attn.Cache) return incremental_cache @@ -648,9 +659,10 @@ class TransformerEncoder(Layer): def __init__(self, encoder_layer, num_layers, norm=None): super(TransformerEncoder, self).__init__() - self.layers = LayerList([(encoder_layer if i == 0 else - type(encoder_layer)(**encoder_layer._config)) - for i in range(num_layers)]) + self.layers = LayerList([ + (encoder_layer if i == 0 else type(encoder_layer)( + **encoder_layer._config)) for i in range(num_layers) + ]) self.num_layers = num_layers self.norm = norm @@ -827,23 +839,25 @@ class TransformerDecoderLayer(Layer): weight_attrs = _convert_param_attr_to_list(weight_attr, 3) bias_attrs = _convert_param_attr_to_list(bias_attr, 3) - self.self_attn = MultiHeadAttention( - d_model, - nhead, - dropout=attn_dropout, - weight_attr=weight_attrs[0], - bias_attr=bias_attrs[0]) - self.cross_attn = MultiHeadAttention( - d_model, - nhead, - dropout=attn_dropout, - weight_attr=weight_attrs[1], - bias_attr=bias_attrs[1]) - self.linear1 = Linear( - d_model, dim_feedforward, weight_attrs[2], bias_attr=bias_attrs[2]) + self.self_attn = MultiHeadAttention(d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0]) + self.cross_attn = MultiHeadAttention(d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[1], + bias_attr=bias_attrs[1]) + self.linear1 = Linear(d_model, + dim_feedforward, + weight_attrs[2], + bias_attr=bias_attrs[2]) self.dropout = Dropout(act_dropout, mode="upscale_in_train") - self.linear2 = Linear( - dim_feedforward, d_model, weight_attrs[2], bias_attr=bias_attrs[2]) + self.linear2 = Linear(dim_feedforward, + d_model, + weight_attrs[2], + bias_attr=bias_attrs[2]) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.norm3 = LayerNorm(d_model) @@ -958,8 +972,8 @@ class TransformerDecoderLayer(Layer): See `MultiHeadAttention.gen_cache` and `MultiHeadAttention.forward` \ for more details. """ - incremental_cache = self.self_attn.gen_cache( - memory, type=self.self_attn.Cache) + incremental_cache = self.self_attn.gen_cache(memory, + type=self.self_attn.Cache) static_cache = self.cross_attn.gen_cache( memory, memory, type=self.cross_attn.StaticCache) return incremental_cache, static_cache @@ -1002,9 +1016,10 @@ class TransformerDecoder(Layer): def __init__(self, decoder_layer, num_layers, norm=None): super(TransformerDecoder, self).__init__() - self.layers = LayerList([(decoder_layer if i == 0 else - type(decoder_layer)(**decoder_layer._config)) - for i in range(num_layers)]) + self.layers = LayerList([ + (decoder_layer if i == 0 else type(decoder_layer)( + **decoder_layer._config)) for i in range(num_layers) + ]) self.num_layers = num_layers self.norm = norm @@ -1344,8 +1359,10 @@ class Transformer(Layer): tgt_mask = _convert_attention_mask(tgt_mask, tgt.dtype) memory_mask = _convert_attention_mask(memory_mask, memory.dtype) - output = self.decoder( - tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask) + output = self.decoder(tgt, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask) return output def generate_square_subsequent_mask(self, length): @@ -1379,7 +1396,5 @@ class Transformer(Layer): # [ 0. 0. 0. 0. 0.]] """ - return paddle.tensor.triu( - (paddle.ones( - (length, length), dtype=paddle.get_default_dtype()) * -np.inf), - 1) + return paddle.tensor.triu((paddle.ones( + (length, length), dtype=paddle.get_default_dtype()) * -np.inf), 1) diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py index 6d5c112d757..2fa150dcbdf 100644 --- a/python/paddle/nn/layer/vision.py +++ b/python/paddle/nn/layer/vision.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define specitial functions used in computer vision task +# TODO: define specitial functions used in computer vision task from .. import Layer from .. import functional diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py index 2c0eb88e087..ca1eb5f4fb3 100644 --- a/python/paddle/nn/quant/functional_layers.py +++ b/python/paddle/nn/quant/functional_layers.py @@ -19,11 +19,13 @@ __all__ = [] class FloatFunctionalLayer(Layer): + def __init__(self): super(FloatFunctionalLayer, self).__init__() class add(FloatFunctionalLayer): + def __init__(self): super(add, self).__init__() @@ -32,6 +34,7 @@ class add(FloatFunctionalLayer): class subtract(FloatFunctionalLayer): + def __init__(self): super(subtract, self).__init__() @@ -40,6 +43,7 @@ class subtract(FloatFunctionalLayer): class multiply(FloatFunctionalLayer): + def __init__(self): super(multiply, self).__init__() @@ -48,6 +52,7 @@ class multiply(FloatFunctionalLayer): class divide(FloatFunctionalLayer): + def __init__(self): super(divide, self).__init__() @@ -56,6 +61,7 @@ class divide(FloatFunctionalLayer): class reshape(FloatFunctionalLayer): + def __init__(self): super(reshape, self).__init__() @@ -64,6 +70,7 @@ class reshape(FloatFunctionalLayer): class transpose(FloatFunctionalLayer): + def __init__(self): super(transpose, self).__init__() @@ -72,6 +79,7 @@ class transpose(FloatFunctionalLayer): class concat(FloatFunctionalLayer): + def __init__(self): super(concat, self).__init__() @@ -80,6 +88,7 @@ class concat(FloatFunctionalLayer): class flatten(FloatFunctionalLayer): + def __init__(self): super(flatten, self).__init__() diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py index 8e9316a1962..62fe8087c4f 100644 --- a/python/paddle/nn/quant/quant_layers.py +++ b/python/paddle/nn/quant/quant_layers.py @@ -39,8 +39,9 @@ __all__ = [ 'QuantStub', ] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') class FakeQuantAbsMax(Layer): @@ -65,12 +66,12 @@ class FakeQuantAbsMax(Layer): name) if name else 'quant_dequant.scale' self._scale_name = unique_name.generate(scale_prefix) if quant_on_weight: - scale_attr = ParamAttr( - name=self._scale_name, - initializer=Constant(0.001), - trainable=False) - self._scale = self.create_parameter( - shape=[1], attr=scale_attr, dtype=self._dtype) + scale_attr = ParamAttr(name=self._scale_name, + initializer=Constant(0.001), + trainable=False) + self._scale = self.create_parameter(shape=[1], + attr=scale_attr, + dtype=self._dtype) self._scale.stop_gradient = True else: self._scale = None @@ -78,12 +79,12 @@ class FakeQuantAbsMax(Layer): def forward(self, input): if in_dynamic_mode(): attrs = ('bit_length', self._quant_bits) - quant_out = _varbase_creator( - type=input.type, - name="{}.quantized.dequantized".format(input.name), - shape=input.shape, - dtype=input.dtype, - persistable=False) + quant_out = _varbase_creator(type=input.type, + name="{}.quantized.dequantized".format( + input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) out_scale = self._scale if not out_scale: out_scale = _varbase_creator( @@ -93,8 +94,8 @@ class FakeQuantAbsMax(Layer): dtype=self._dtype, persistable=False) out_scale.stop_gradient = True - out, _, = _C_ops.fake_quantize_dequantize_abs_max(input, quant_out, - out_scale, *attrs) + out, _, = _C_ops.fake_quantize_dequantize_abs_max( + input, quant_out, out_scale, *attrs) return out check_variable_and_dtype(input, 'input', ['float32'], "FakeQuantAbsMax") @@ -116,11 +117,10 @@ class FakeQuantAbsMax(Layer): stop_gradient=True) outputs = {"Out": [quant_out], "OutScale": [out_scale]} - self._helper.append_op( - type="fake_quantize_dequantize_abs_max", - inputs=inputs, - outputs=outputs, - attrs=attrs) + self._helper.append_op(type="fake_quantize_dequantize_abs_max", + inputs=inputs, + outputs=outputs, + attrs=attrs) return quant_out @@ -146,44 +146,44 @@ class FakeQuantMovingAverageAbsMax(Layer): scale_prefix = "{}.scale".format( name) if name else 'quant_dequant.scale' - scale_attr = ParamAttr( - name=unique_name.generate(scale_prefix), - initializer=Constant(0.001), - trainable=False) - self._scale = self.create_parameter( - shape=[1], attr=scale_attr, dtype=dtype) + scale_attr = ParamAttr(name=unique_name.generate(scale_prefix), + initializer=Constant(0.001), + trainable=False) + self._scale = self.create_parameter(shape=[1], + attr=scale_attr, + dtype=dtype) self._scale.stop_gradient = True state_prefix = "{}.state".format( name) if name else 'quant_dequant.state' - state_attr = ParamAttr( - name=unique_name.generate(state_prefix), - initializer=Constant(1), - trainable=False) - self._state = self.create_parameter( - shape=[1], attr=state_attr, dtype=dtype) + state_attr = ParamAttr(name=unique_name.generate(state_prefix), + initializer=Constant(1), + trainable=False) + self._state = self.create_parameter(shape=[1], + attr=state_attr, + dtype=dtype) self._state.stop_gradient = True accum_prefix = "{}.accum".format( name) if name else 'quant_dequant.accum' - accum_attr = ParamAttr( - name=unique_name.generate(accum_prefix), - initializer=Constant(1), - trainable=False) - self._accum = self.create_parameter( - shape=[1], attr=accum_attr, dtype=dtype) + accum_attr = ParamAttr(name=unique_name.generate(accum_prefix), + initializer=Constant(1), + trainable=False) + self._accum = self.create_parameter(shape=[1], + attr=accum_attr, + dtype=dtype) self._accum.stop_gradient = True def forward(self, input): if in_dynamic_mode(): attrs = ('moving_rate', self._moving_rate, 'bit_length', self._quant_bits, 'is_test', not self.training) - quant_out = _varbase_creator( - type=input.type, - name="{}.quantized.dequantized".format(input.name), - shape=input.shape, - dtype=input.dtype, - persistable=False) + quant_out = _varbase_creator(type=input.type, + name="{}.quantized.dequantized".format( + input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) state = self._state if self.training else None accum = self._accum if self.training else None @@ -224,6 +224,7 @@ class FakeQuantMovingAverageAbsMax(Layer): class FakeQuantChannelWiseAbsMax(Layer): + def __init__(self, name=None, channel_num=None, @@ -242,12 +243,12 @@ class FakeQuantChannelWiseAbsMax(Layer): name) if name else 'quant_dequant.scale' self._scale_name = unique_name.generate(scale_prefix) if quant_on_weight: - scale_attr = ParamAttr( - name=self._scale_name, - initializer=Constant(0.0), - trainable=False) - self._scale = self.create_parameter( - shape=[self._channel_num], attr=scale_attr, dtype=self._dtype) + scale_attr = ParamAttr(name=self._scale_name, + initializer=Constant(0.0), + trainable=False) + self._scale = self.create_parameter(shape=[self._channel_num], + attr=scale_attr, + dtype=self._dtype) self._scale.stop_gradient = True else: self._scale = None @@ -256,12 +257,12 @@ class FakeQuantChannelWiseAbsMax(Layer): if in_dynamic_mode(): attrs = ('bit_length', self._quant_bits, 'quant_axis', self._quant_axis) - quant_out = _varbase_creator( - type=input.type, - name="{}.quantized.dequantized".format(input.name), - shape=input.shape, - dtype=input.dtype, - persistable=False) + quant_out = _varbase_creator(type=input.type, + name="{}.quantized.dequantized".format( + input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) out_scale = self._scale if out_scale is None: @@ -307,6 +308,7 @@ class FakeQuantChannelWiseAbsMax(Layer): class MovingAverageAbsMaxScale(Layer): + def __init__(self, name=None, moving_rate=0.9, dtype='float32'): r""" MovingAverageMaxScale layer is used to calculating the output quantization @@ -320,28 +322,30 @@ class MovingAverageAbsMaxScale(Layer): scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale' scale_name = unique_name.generate(scale_prefix) - scale_attr = ParamAttr( - name=scale_name, initializer=Constant(0), trainable=False) - self._scale = self.create_parameter( - shape=[1], attr=scale_attr, dtype=dtype) + scale_attr = ParamAttr(name=scale_name, + initializer=Constant(0), + trainable=False) + self._scale = self.create_parameter(shape=[1], + attr=scale_attr, + dtype=dtype) self._scale.stop_gradient = True state_prefix = "{}.state".format(name) if name else 'outscale.state' - state_attr = ParamAttr( - name=unique_name.generate(state_prefix), - initializer=Constant(0), - trainable=False) - self._state = self.create_parameter( - shape=[1], attr=state_attr, dtype=dtype) + state_attr = ParamAttr(name=unique_name.generate(state_prefix), + initializer=Constant(0), + trainable=False) + self._state = self.create_parameter(shape=[1], + attr=state_attr, + dtype=dtype) self._state.stop_gradient = True accum_prefix = "{}.accum".format(name) if name else 'outscale.accum' - accum_attr = ParamAttr( - name=unique_name.generate(accum_prefix), - initializer=Constant(0), - trainable=False) - self._accum = self.create_parameter( - shape=[1], attr=accum_attr, dtype=dtype) + accum_attr = ParamAttr(name=unique_name.generate(accum_prefix), + initializer=Constant(0), + trainable=False) + self._accum = self.create_parameter(shape=[1], + attr=accum_attr, + dtype=dtype) self._accum.stop_gradient = True def forward(self, input): @@ -350,12 +354,11 @@ class MovingAverageAbsMaxScale(Layer): not self.training) state = self._state if self.training else None accum = self._accum if self.training else None - quant_out = _varbase_creator( - type=input.type, - name="{}.tmp".format(input.name), - shape=input.shape, - dtype=input.dtype, - persistable=False) + quant_out = _varbase_creator(type=input.type, + name="{}.tmp".format(input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) out, _, _, _ = _C_ops.moving_average_abs_max_scale( input, accum, state, quant_out, self._scale, state, accum, @@ -381,11 +384,10 @@ class MovingAverageAbsMaxScale(Layer): outputs['OutState'] = [self._state] outputs['OutAccum'] = [self._accum] - self._helper.append_op( - type="moving_average_abs_max_scale", - inputs=inputs, - outputs=outputs, - attrs=attrs) + self._helper.append_op(type="moving_average_abs_max_scale", + inputs=inputs, + outputs=outputs, + attrs=attrs) return quant_out @@ -471,15 +473,14 @@ class QuantizedConv2D(Layer): data_format=self._data_format) self._padding = 0 - return F.conv2d( - quant_input, - quant_weight, - bias=self.bias, - padding=self._padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format) + return F.conv2d(quant_input, + quant_weight, + bias=self.bias, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) class QuantizedConv2DTranspose(Layer): @@ -575,17 +576,16 @@ class QuantizedConv2DTranspose(Layer): else: output_padding = 0 - return F.conv2d_transpose( - quant_input, - quant_weight, - bias=self.bias, - padding=self._padding, - output_padding=output_padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - output_size=output_size, - data_format=self._data_format) + return F.conv2d_transpose(quant_input, + quant_weight, + bias=self.bias, + padding=self._padding, + output_padding=output_padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + output_size=output_size, + data_format=self._data_format) class QuantizedLinear(Layer): @@ -652,8 +652,10 @@ class QuantizedLinear(Layer): weight = self._weight_preprocess(self.weight) quant_weight = self._fake_quant_weight(weight) - out = F.linear( - x=quant_input, weight=quant_weight, bias=self.bias, name=self.name) + out = F.linear(x=quant_input, + weight=quant_weight, + bias=self.bias, + name=self.name) return out @@ -677,8 +679,8 @@ class MAOutputScaleLayer(Layer): def forward(self, *inputs, **kwargs): out = self._layer(*inputs, **kwargs) # TODO (jc): support the ops of several outputs - if (isinstance(out, list) or isinstance(out, tuple) or - isinstance(out, dict)): + if (isinstance(out, list) or isinstance(out, tuple) + or isinstance(out, dict)): return out else: return self._ma_output_scale(out) diff --git a/python/paddle/nn/utils/__init__.py b/python/paddle/nn/utils/__init__.py index 8ec4e8cfd60..5afdaa8d848 100644 --- a/python/paddle/nn/utils/__init__.py +++ b/python/paddle/nn/utils/__init__.py @@ -17,5 +17,6 @@ from .weight_norm_hook import weight_norm, remove_weight_norm # noqa: F401 from .transform_parameters import parameters_to_vector, vector_to_parameters, _stride_column # noqa: F401 __all__ = [ #noqa - 'weight_norm', 'remove_weight_norm', 'spectral_norm', 'parameters_to_vector', 'vector_to_parameters' + 'weight_norm', 'remove_weight_norm', 'spectral_norm', + 'parameters_to_vector', 'vector_to_parameters' ] diff --git a/python/paddle/nn/utils/spectral_norm_hook.py b/python/paddle/nn/utils/spectral_norm_hook.py index 56c9e83c38b..375fe9013b8 100644 --- a/python/paddle/nn/utils/spectral_norm_hook.py +++ b/python/paddle/nn/utils/spectral_norm_hook.py @@ -30,13 +30,14 @@ def normal_(x, mean=0., std=1.): class SpectralNorm(object): + def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12): self.name = name self.dim = dim if n_power_iterations <= 0: - raise ValueError('Expected n_power_iterations to be positive, but ' - 'got n_power_iterations={}'.format( - n_power_iterations)) + raise ValueError( + 'Expected n_power_iterations to be positive, but ' + 'got n_power_iterations={}'.format(n_power_iterations)) self.n_power_iterations = n_power_iterations self.eps = eps @@ -44,9 +45,9 @@ class SpectralNorm(object): weight_mat = weight if self.dim != 0: # transpose dim to front - weight_mat = weight_mat.transpose([self.dim] + [ - d for d in range(weight_mat.dim()) if d != self.dim - ]) + weight_mat = weight_mat.transpose( + [self.dim] + + [d for d in range(weight_mat.dim()) if d != self.dim]) height = weight_mat.shape[0] @@ -63,19 +64,20 @@ class SpectralNorm(object): for _ in range(self.n_power_iterations): v.set_value( F.normalize( - paddle.matmul( - weight_mat, - u, - transpose_x=True, - transpose_y=False), + paddle.matmul(weight_mat, + u, + transpose_x=True, + transpose_y=False), axis=0, - epsilon=self.eps, )) + epsilon=self.eps, + )) u.set_value( F.normalize( paddle.matmul(weight_mat, v), axis=0, - epsilon=self.eps, )) + epsilon=self.eps, + )) if self.n_power_iterations > 0: u = u.clone() v = v.clone() @@ -85,11 +87,8 @@ class SpectralNorm(object): return weight def __call__(self, layer, inputs): - setattr( - layer, - self.name, - self.compute_weight( - layer, do_power_iteration=layer.training)) + setattr(layer, self.name, + self.compute_weight(layer, do_power_iteration=layer.training)) @staticmethod def apply(layer, name, n_power_iterations, dim, eps): @@ -201,8 +200,9 @@ def spectral_norm(layer, """ if dim is None: - if isinstance(layer, (Conv1DTranspose, Conv2DTranspose, Conv3DTranspose, - Linear)): + if isinstance( + layer, + (Conv1DTranspose, Conv2DTranspose, Conv3DTranspose, Linear)): dim = 1 else: dim = 0 diff --git a/python/paddle/nn/utils/transform_parameters.py b/python/paddle/nn/utils/transform_parameters.py index feb70e02d59..36b0dcdf507 100644 --- a/python/paddle/nn/utils/transform_parameters.py +++ b/python/paddle/nn/utils/transform_parameters.py @@ -27,13 +27,14 @@ def _inplace_reshape_dygraph(x, shape): tmp_out, _ = _C_ops.reshape2(x, None, 'shape', shape) tmp_out._share_underline_tensor_to(x) else: - _dygraph_tracer().trace_op( - type="reshape2", - inputs={'X': x}, - outputs={'Out': x, - 'XShape': x_shape}, - attrs={'shape': shape}, - stop_gradient=True) + _dygraph_tracer().trace_op(type="reshape2", + inputs={'X': x}, + outputs={ + 'Out': x, + 'XShape': x_shape + }, + attrs={'shape': shape}, + stop_gradient=True) @dygraph_only @@ -106,12 +107,11 @@ def parameters_to_vector(parameters, name=None): _C_ops.concat(parameters, tmp, 'axis', 0) tmp._share_underline_tensor_to(out) else: - _dygraph_tracer().trace_op( - type='concat', - inputs={'X': parameters}, - outputs={'Out': [out]}, - attrs={'axis': 0}, - stop_gradient=True) + _dygraph_tracer().trace_op(type='concat', + inputs={'X': parameters}, + outputs={'Out': [out]}, + attrs={'axis': 0}, + stop_gradient=True) for i, param in enumerate(parameters): _inplace_reshape_dygraph(param, origin_shapes[i]) return out @@ -160,13 +160,14 @@ def vector_to_parameters(vec, parameters, name=None): for i in range(0, len(res)): res[i]._share_underline_tensor_to(parameters[i]) else: - _dygraph_tracer().trace_op( - type='split', - inputs={'X': [vec]}, - outputs={'Out': parameters}, - attrs={'axis': 0, - 'sections': sections}, - stop_gradient=True) + _dygraph_tracer().trace_op(type='split', + inputs={'X': [vec]}, + outputs={'Out': parameters}, + attrs={ + 'axis': 0, + 'sections': sections + }, + stop_gradient=True) for i, param in enumerate(parameters): _inplace_reshape_dygraph(param, origin_shapes[i]) diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py index 84644ccc484..c805d3949e8 100755 --- a/python/paddle/nn/utils/weight_norm_hook.py +++ b/python/paddle/nn/utils/weight_norm_hook.py @@ -30,15 +30,16 @@ def l2_norm(x, axis, epsilon=1e-12, name=None): helper = LayerHelper("l2_normalize", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) norm = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="norm", - inputs={"X": x}, - outputs={"Out": out, - "Norm": norm}, - attrs={ - "axis": 1 if axis is None else axis, - "epsilon": epsilon, - }) + helper.append_op(type="norm", + inputs={"X": x}, + outputs={ + "Out": out, + "Norm": norm + }, + attrs={ + "axis": 1 if axis is None else axis, + "epsilon": epsilon, + }) return paddle.squeeze(norm, axis=[axis]) @@ -85,12 +86,14 @@ def _weight_norm(v, g, dim): v_normalized = F.l2_normalize(p_matrix, axis=1) v_normalized = paddle.reshape(v_normalized, transposed_shape) v_normalized = paddle.transpose(v_normalized, perm) - weight = F.elementwise_mul( - v_normalized, g, axis=dim if dim is not None else -1) + weight = F.elementwise_mul(v_normalized, + g, + axis=dim if dim is not None else -1) return weight class WeightNorm(object): + def __init__(self, name, dim): if dim is None: dim = -1 diff --git a/python/paddle/onnx/export.py b/python/paddle/onnx/export.py index b8a217a5134..666cd7c0862 100644 --- a/python/paddle/onnx/export.py +++ b/python/paddle/onnx/export.py @@ -91,15 +91,14 @@ def export(layer, path, input_spec=None, opset_version=9, **configs): file_prefix = os.path.basename(path) if file_prefix == "": - raise ValueError("The input path MUST be format of dirname/file_prefix " - "[dirname\\file_prefix in Windows system], but " - "the file_prefix is empty in received path: {}".format( - path)) + raise ValueError( + "The input path MUST be format of dirname/file_prefix " + "[dirname\\file_prefix in Windows system], but " + "the file_prefix is empty in received path: {}".format(path)) save_file = path + '.onnx' - p2o.dygraph2onnx( - layer, - save_file, - input_spec=input_spec, - opset_version=opset_version, - **configs) + p2o.dygraph2onnx(layer, + save_file, + input_spec=input_spec, + opset_version=opset_version, + **configs) diff --git a/python/paddle/optimizer/__init__.py b/python/paddle/optimizer/__init__.py index 07d2935bc76..cd75fd4906e 100644 --- a/python/paddle/optimizer/__init__.py +++ b/python/paddle/optimizer/__init__.py @@ -24,15 +24,7 @@ from .momentum import Momentum # noqa: F401 from .lamb import Lamb # noqa: F401 from . import lr # noqa: F401 -__all__ = [ #noqa - 'Optimizer', - 'Adagrad', - 'Adam', - 'AdamW', - 'Adamax', - 'RMSProp', - 'Adadelta', - 'SGD', - 'Momentum', - 'Lamb' +__all__ = [ #noqa + 'Optimizer', 'Adagrad', 'Adam', 'AdamW', 'Adamax', 'RMSProp', 'Adadelta', + 'SGD', 'Momentum', 'Lamb' ] diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index 32050c12ec1..ca166713964 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -120,12 +120,11 @@ class Adadelta(Optimizer): raise ValueError("epsilon is not set.") if rho is None: raise ValueError("rho is not set.") - super(Adadelta, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=weight_decay, - grad_clip=grad_clip, - name=name) + super(Adadelta, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) self.type = "adadelta" self._epsilon = epsilon self._rho = rho @@ -157,22 +156,28 @@ class Adadelta(Optimizer): self._avg_squared_update_acc_str, param_and_grad[0]) # Create the adadelta optimizer op - adadelta_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "AvgSquaredGrad": avg_squared_grad_acc, - "AvgSquaredUpdate": avg_squared_update_acc - }, - outputs={ - "ParamOut": param_and_grad[0], - "AvgSquaredGradOut": avg_squared_grad_acc, - "AvgSquaredUpdateOut": avg_squared_update_acc - }, - attrs={"epsilon": self._epsilon, - "rho": self._rho}, - stop_gradient=True) + adadelta_op = block.append_op(type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "AvgSquaredGrad": + avg_squared_grad_acc, + "AvgSquaredUpdate": + avg_squared_update_acc + }, + outputs={ + "ParamOut": + param_and_grad[0], + "AvgSquaredGradOut": + avg_squared_grad_acc, + "AvgSquaredUpdateOut": + avg_squared_update_acc + }, + attrs={ + "epsilon": self._epsilon, + "rho": self._rho + }, + stop_gradient=True) return adadelta_op diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index 7ca4ab648a1..f5cd7bdaa83 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -118,12 +118,11 @@ class Adagrad(Optimizer): initial_accumulator_value=0.0): assert learning_rate is not None assert epsilon is not None - super(Adagrad, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=weight_decay, - grad_clip=grad_clip, - name=name) + super(Adagrad, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) self.type = "adagrad" self._epsilon = epsilon self.initial_accumulator_value = initial_accumulator_value @@ -139,10 +138,9 @@ class Adagrad(Optimizer): parameters = self._update_param_group(parameters) for p in parameters: - self._add_accumulator( - self._moment_acc_str, - p, - fill_value=self.initial_accumulator_value) + self._add_accumulator(self._moment_acc_str, + p, + fill_value=self.initial_accumulator_value) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -153,18 +151,23 @@ class Adagrad(Optimizer): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) # Create the adagrad optimizer op - adagrad_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "Moment": moment_acc, - "LearningRate": self._create_param_lr(param_and_grad) - }, - outputs={"ParamOut": param_and_grad[0], - "MomentOut": moment_acc}, - attrs={"epsilon": self._epsilon}, - stop_gradient=True) + adagrad_op = block.append_op(type=self.type, + inputs={ + "Param": + param_and_grad[0], + "Grad": + param_and_grad[1], + "Moment": + moment_acc, + "LearningRate": + self._create_param_lr(param_and_grad) + }, + outputs={ + "ParamOut": param_and_grad[0], + "MomentOut": moment_acc + }, + attrs={"epsilon": self._epsilon}, + stop_gradient=True) return adagrad_op diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index de09193ac79..ac2685972d1 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -191,12 +191,11 @@ class Adam(Optimizer): if not 0 <= epsilon: raise ValueError( "Invaild value of epsilon, expect epsilon >= 0.") - super(Adam, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=weight_decay, - grad_clip=grad_clip, - name=name) + super(Adam, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) self.type = "adam" self._beta1 = beta1 self._beta2 = beta2 @@ -237,21 +236,19 @@ class Adam(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -269,10 +266,11 @@ class Adam(Optimizer): target_param = self._master_weights[ param.name] if find_master else param target_name = target_param.name - if (name not in self._accumulators or - target_name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, target_name)) + if (name not in self._accumulators + or target_name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, target_name)) return self._accumulators[name][target_name] def _add_moments_pows(self, p): @@ -407,12 +405,11 @@ class Adam(Optimizer): inputs["MasterParam"] = master_weight outputs["MasterParamOut"] = master_weight - adam_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + adam_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return adam_op @@ -455,16 +452,17 @@ class Adam(Optimizer): "Adam don't support weight_decay with sparse parameters, please set it to None." ) else: - if hasattr(grad_var, - "_is_sparse") and grad_var._is_sparse( - ) and self.regularization is not None: + if hasattr( + grad_var, "_is_sparse") and grad_var._is_sparse( + ) and self.regularization is not None: raise RuntimeError( "Adam don't support weight_decay with sparse parameters, please set it to None." ) params_grads.append((param, grad_var)) - optimize_ops = self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + optimize_ops = self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) else: # optimize parameters in groups for param_group in self._param_groups: @@ -478,8 +476,9 @@ class Adam(Optimizer): params_grads.update( {k: v for k, v in param_group.items() if k != 'params'}) - self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) def _multi_tensor_init(self, target_block, parameters): """ @@ -623,12 +622,11 @@ class Adam(Optimizer): outputs["MasterParamOut"] = self._master_weight_dict[ key] attrs["multi_precision"] = find_master - target_block.append_op( - type="merged_adam", - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + target_block.append_op(type="merged_adam", + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return None def _update_param_group(self, parameters): diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index 4c4a85559c0..9a54435f8b8 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -151,12 +151,11 @@ class Adamax(Optimizer): raise ValueError("Invaild value of beta2, expect beta2 in [0,1).") if not 0 <= epsilon: raise ValueError("Invaild value of epsilon, expect epsilon >= 0.") - super(Adamax, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=weight_decay, - grad_clip=grad_clip, - name=name) + super(Adamax, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) self.type = "adamax" self._beta1 = beta1 self._beta2 = beta2 @@ -175,11 +174,10 @@ class Adamax(Optimizer): for p in parameters: self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p) - self._add_accumulator( - name=self._beta1_pow_acc_str, - param=p, - fill_value=self._beta1, - shape=[1]) + self._add_accumulator(name=self._beta1_pow_acc_str, + param=p, + fill_value=self._beta1, + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -236,12 +234,11 @@ class Adamax(Optimizer): [param, grad]), name_scope('adamax'): beta1_pow_acc = self._get_accumulator( self._beta1_pow_acc_str, param) - block.append_op( - type="scale", - inputs={"X": beta1_pow_acc}, - outputs={"Out": beta1_pow_acc}, - attrs={"scale": self._beta1}, - stop_gradient=True) + block.append_op(type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}, + stop_gradient=True) else: for param, grad in parameters_and_grads['params']: if grad is None or param.stop_gradient is True: @@ -252,12 +249,11 @@ class Adamax(Optimizer): self._beta1_pow_acc_str, param) self._beta1 = parameters_and_grads.get( 'beta1', self._default_dict['beta1']) - block.append_op( - type="scale", - inputs={"X": beta1_pow_acc}, - outputs={"Out": beta1_pow_acc}, - attrs={"scale": self._beta1}, - stop_gradient=True) + block.append_op(type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}, + stop_gradient=True) def _update_param_group(self, parameters): self._beta1 = parameters.get('beta1', self._default_dict['beta1']) diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index 0b61f3cb9a7..25f4006327d 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -187,8 +187,8 @@ class AdamW(Optimizer): if isinstance(parameters, (paddle.Tensor, core.eager.Tensor)): raise TypeError( "`parameters` argument given to the optimizer should be " - "an iterable of paddle Tensors, but got argument type is `{}`.". - format(type(parameters))) + "an iterable of paddle Tensors, but got argument type is `{}`." + .format(type(parameters))) if isinstance(parameters, dict): raise TypeError( "`parameters` argument should not get dict type, " @@ -327,21 +327,19 @@ class AdamW(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -359,10 +357,11 @@ class AdamW(Optimizer): target_param = self._master_weights[ param.name] if find_master else param target_name = target_param.name - if (name not in self._accumulators or - target_name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, target_name)) + if (name not in self._accumulators + or target_name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, target_name)) return self._accumulators[name][target_name] def _add_moments_pows(self, p): @@ -487,13 +486,18 @@ class AdamW(Optimizer): "Beta2PowOut": [beta2_pow_acc], } attrs = { - "lazy_mode": self._lazy_mode, - "min_row_size_to_use_multithread": 1000, - "multi_precision": find_master, - "with_decay": with_decay, - "coeff": self._weight_decay, - "lr_ratio": 1. - if self._lr_ratio is None else self._lr_ratio(param_and_grad[0]) + "lazy_mode": + self._lazy_mode, + "min_row_size_to_use_multithread": + 1000, + "multi_precision": + find_master, + "with_decay": + with_decay, + "coeff": + self._weight_decay, + "lr_ratio": + 1. if self._lr_ratio is None else self._lr_ratio(param_and_grad[0]) } if isinstance(self._beta1, Variable): @@ -513,12 +517,11 @@ class AdamW(Optimizer): inputs["MasterParam"] = master_weight outputs["MasterParamOut"] = master_weight - adamw_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + adamw_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return adamw_op @@ -564,16 +567,17 @@ class AdamW(Optimizer): "AdamW don't support weight_decay with sparse parameters, please set it to None." ) else: - if hasattr(grad_var, - "_is_sparse") and grad_var._is_sparse( - ) and self.regularization is not None: + if hasattr( + grad_var, "_is_sparse") and grad_var._is_sparse( + ) and self.regularization is not None: raise RuntimeError( "AdamW don't support weight_decay with sparse parameters, please set it to None." ) params_grads.append((param, grad_var)) - optimize_ops = self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + optimize_ops = self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) else: # optimize parameters in groups for param_group in self._param_groups: @@ -601,8 +605,9 @@ class AdamW(Optimizer): params_grads.update( {k: v for k, v in param_group.items() if k != 'params'}) - self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) def _update_param_group(self, parameters): self._beta1 = parameters.get('beta1', self._default_dict['beta1']) diff --git a/python/paddle/optimizer/lamb.py b/python/paddle/optimizer/lamb.py index e61bc8101b7..29233e6ced0 100644 --- a/python/paddle/optimizer/lamb.py +++ b/python/paddle/optimizer/lamb.py @@ -112,12 +112,11 @@ class Lamb(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(Lamb, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=None, - grad_clip=grad_clip, - name=name) + super(Lamb, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=None, + grad_clip=grad_clip, + name=name) self.type = "lamb" self._beta1 = beta1 self._beta2 = beta2 @@ -160,21 +159,19 @@ class Lamb(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -205,10 +202,11 @@ class Lamb(Optimizer): target_param = self._master_weights[ param.name] if find_master else param target_name = target_param.name - if (name not in self._accumulators or - target_name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, target_name)) + if (name not in self._accumulators + or target_name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, target_name)) return self._accumulators[name][target_name] def _add_moments_pows(self, p): @@ -310,12 +308,11 @@ class Lamb(Optimizer): if found_inf: inputs["SkipUpdate"] = found_inf - lamb_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + lamb_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return lamb_op diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 12b8272707b..883b2c14817 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -20,21 +20,10 @@ import paddle.fluid.core as core from ..fluid.framework import _in_legacy_dygraph __all__ = [ # noqa - 'LRScheduler', - 'NoamDecay', - 'PiecewiseDecay', - 'NaturalExpDecay', - 'InverseTimeDecay', - 'PolynomialDecay', - 'LinearWarmup', - 'ExponentialDecay', - 'MultiStepDecay', - 'StepDecay', - 'LambdaDecay', - 'ReduceOnPlateau', - 'CosineAnnealingDecay', - 'MultiplicativeDecay', - 'OneCycleLR' + 'LRScheduler', 'NoamDecay', 'PiecewiseDecay', 'NaturalExpDecay', + 'InverseTimeDecay', 'PolynomialDecay', 'LinearWarmup', 'ExponentialDecay', + 'MultiStepDecay', 'StepDecay', 'LambdaDecay', 'ReduceOnPlateau', + 'CosineAnnealingDecay', 'MultiplicativeDecay', 'OneCycleLR' ] @@ -183,8 +172,8 @@ class LRScheduler(object): self.__dict__[key] = state_dict[key] else: raise RuntimeError( - "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict". - format(key)) + "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict" + .format(key)) if len(state_dict) > len(self.keys): warnings.warn( "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" @@ -378,8 +367,8 @@ class PiecewiseDecay(LRScheduler): def __init__(self, boundaries, values, last_epoch=-1, verbose=False): self.boundaries = boundaries self.values = values - super(PiecewiseDecay, self).__init__( - last_epoch=last_epoch, verbose=verbose) + super(PiecewiseDecay, self).__init__(last_epoch=last_epoch, + verbose=verbose) def get_lr(self): for i in range(len(self.boundaries)): @@ -668,8 +657,8 @@ class PolynomialDecay(LRScheduler): tmp_epoch_num = min(self.last_epoch, self.decay_steps) return (self.base_lr - self.end_lr) * ( - (1 - float(tmp_epoch_num) / float(tmp_decay_steps) - )**self.power) + self.end_lr + (1 - float(tmp_epoch_num) / float(tmp_decay_steps))** + self.power) + self.end_lr class LinearWarmup(LRScheduler): @@ -768,8 +757,8 @@ class LinearWarmup(LRScheduler): learning_rate, int) or isinstance(learning_rate, LRScheduler) if not type_check: raise TypeError( - "the type of learning_rate should be [int, float or LRScheduler], the current type is {}". - format(learning_rate)) + "the type of learning_rate should be [int, float or LRScheduler], the current type is {}" + .format(learning_rate)) self.learning_rate = learning_rate assert warmup_steps > 0 and isinstance( warmup_steps, int), " 'warmup_steps' must be a positive integer." @@ -1373,8 +1362,8 @@ class ReduceOnPlateau(LRScheduler): elif not isinstance(metrics, (int, float, numpy.float32, numpy.float64)): raise TypeError( - "metrics must be 'int', 'float', 'np.float', 'numpy.ndarray' or 'paddle.Tensor', but receive {}". - format(type(metrics))) + "metrics must be 'int', 'float', 'np.float', 'numpy.ndarray' or 'paddle.Tensor', but receive {}" + .format(type(metrics))) if self.cooldown_counter > 0: self.cooldown_counter -= 1 @@ -1517,16 +1506,16 @@ class CosineAnnealingDecay(LRScheduler): if self.last_epoch == 0: return self.base_lr elif (self.last_epoch - 1 - self.T_max) % (2 * self.T_max) == 0: - return self.last_lr + (self.base_lr - self.eta_min) * (1 - math.cos( - math.pi / self.T_max)) / 2 + return self.last_lr + (self.base_lr - self.eta_min) * ( + 1 - math.cos(math.pi / self.T_max)) / 2 return (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / ( 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max)) * ( self.last_lr - self.eta_min) + self.eta_min def _get_closed_form_lr(self): - return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos( - math.pi * self.last_epoch / self.T_max)) / 2 + return self.eta_min + (self.base_lr - self.eta_min) * ( + 1 + math.cos(math.pi * self.last_epoch / self.T_max)) / 2 class MultiplicativeDecay(LRScheduler): @@ -1706,16 +1695,18 @@ class OneCycleLR(LRScheduler): # Check type and value of total_steps if not isinstance(total_steps, int): - raise TypeError("'total_step' must be 'int', but received {}". - format(type(total_steps))) + raise TypeError( + "'total_step' must be 'int', but received {}".format( + type(total_steps))) if total_steps <= 0: raise ValueError("'total_step' must be a positive integer.") self.total_steps = total_steps # Check type and value of pac_start if not isinstance(phase_pct, float): - raise TypeError("'phase_pct' must be 'float', but received {}". - format(type(phase_pct))) + raise TypeError( + "'phase_pct' must be 'float', but received {}".format( + type(phase_pct))) if phase_pct < 0 or phase_pct > 1: raise ValueError( "'phase_pct' must be between 0 and 1, but received {}".format( @@ -1774,8 +1765,8 @@ class OneCycleLR(LRScheduler): self.anneal_func = self._linear_annealing else: raise ValueError( - "'anneal_strategy' must by one of 'cos' or 'linear', but received {}". - format(anneal_strategy)) + "'anneal_strategy' must by one of 'cos' or 'linear', but received {}" + .format(anneal_strategy)) super(OneCycleLR, self).__init__(initial_lr, last_epoch, verbose) def _cos_annealing(self, start_lr, end_lr, pct): @@ -1793,8 +1784,8 @@ class OneCycleLR(LRScheduler): "Tried to step {} times. However the number of total steps is {}" .format(current_step, self.total_steps)) - for (i, (end_step, step_size) - ) in enumerate(zip(self._step_config[1:], self._steps_size)): + for (i, (end_step, step_size)) in enumerate( + zip(self._step_config[1:], self._steps_size)): # i == len(self._lr_config) - 2 catch the last step, otherwise it will return None. if current_step <= end_step or i == len(self._lr_config) - 2: # self._step_config[i] means start step of a phase. diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index ce112c19250..bb7765ac715 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -139,7 +139,8 @@ class Momentum(Optimizer): if momentum is None: raise ValueError("momentum is not set") - predicate = lambda regular: isinstance(regular, (L2DecayRegularizer, float)) + predicate = lambda regular: isinstance(regular, + (L2DecayRegularizer, float)) if isinstance(parameters, list): if isinstance(parameters[0], dict): for param_group in parameters: @@ -152,12 +153,11 @@ class Momentum(Optimizer): param_group['weight_decay'] = py_regular py_regular = None if predicate(weight_decay) else weight_decay - super(Momentum, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=py_regular, - grad_clip=grad_clip, - name=name) + super(Momentum, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=py_regular, + grad_clip=grad_clip, + name=name) self.type = "momentum" self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -211,21 +211,19 @@ class Momentum(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -245,10 +243,11 @@ class Momentum(Optimizer): target_param = self._master_weights[ param.name] if find_master else param target_name = target_param.name - if (name not in self._accumulators or - target_name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, target_name)) + if (name not in self._accumulators + or target_name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, target_name)) return self._accumulators[name][target_name] def _create_accumulators(self, block, parameters): @@ -295,7 +294,7 @@ class Momentum(Optimizer): param_and_grad[0]) lr = self._create_param_lr(param_and_grad) - # For fusion of momentum and l2decay + # For fusion of momentum and l2decay param = param_and_grad[0] regularization_method = self._regularization_method regularization_coeff = self._regularization_coeff @@ -360,12 +359,11 @@ class Momentum(Optimizer): outputs["MasterParamOut"] = master_weight # create the momentum optimize op - momentum_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + momentum_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return momentum_op @@ -498,8 +496,10 @@ class Momentum(Optimizer): "VelocityOut": self._velocity_dict[key], } attrs = { - "mu": self._momentum, - "use_nesterov": self._use_nesterov, + "mu": + self._momentum, + "use_nesterov": + self._use_nesterov, "regularization_method": self._regularization_method_dict[key], "regularization_coeff": @@ -510,12 +510,11 @@ class Momentum(Optimizer): outputs["MasterParamOut"] = self._master_weight_dict[ key] attrs["multi_precision"] = find_master - target_block.append_op( - type="merged_momentum", - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + target_block.append_op(type="merged_momentum", + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return None def _update_param_group(self, parameters): diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index cf180fccc48..e3e7257f757 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -177,8 +177,8 @@ class Optimizer(object): if isinstance(parameters, (paddle.Tensor, core.eager.Tensor)): raise TypeError( "`parameters` argument given to the optimizer should be " - "an iterable of paddle Tensors, but got argument type is `{}`.". - format(type(parameters))) + "an iterable of paddle Tensors, but got argument type is `{}`." + .format(type(parameters))) if isinstance(parameters, dict): raise TypeError( "`parameters` argument should not get dict type, " @@ -197,9 +197,8 @@ class Optimizer(object): if weight_decay is not None: if not isinstance(self._parameter_list[0], dict): for param in self._parameter_list: - if hasattr( - param, - 'regularizer') and param.regularizer is not None: + if hasattr(param, 'regularizer' + ) and param.regularizer is not None: logging.info( "If regularizer of a Parameter has been set by 'paddle.ParamAttr' or 'static.WeightNormParamAttr' already. " "The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" @@ -345,7 +344,7 @@ class Optimizer(object): if isinstance(self._learning_rate, LRScheduler): self._learning_rate.set_state_dict(state_dict["LR_Scheduler"]) - # NOTE: exclude learning rate scheduler's state from + # NOTE: exclude learning rate scheduler's state from # _accumulators_holder. state_dict = state_dict.copy() if "LR_Scheduler" in state_dict: @@ -406,8 +405,8 @@ class Optimizer(object): main_prog.lr_sheduler = self._learning_rate main_prog.lr_var = lr_var - self._learning_rate_map[framework.default_main_program( - )] = lr_var + self._learning_rate_map[ + framework.default_main_program()] = lr_var lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( @@ -475,20 +474,19 @@ class Optimizer(object): current_lr = self._global_learning_rate() if current_lr is not None: if framework._non_static_mode(): - _C_ops.fill_constant(current_lr, 'value', - float(value), 'dtype', current_lr.dtype, - 'shape', list(current_lr.shape)) + _C_ops.fill_constant(current_lr, 'value', float(value), 'dtype', + current_lr.dtype, 'shape', + list(current_lr.shape)) else: global_block = framework.default_main_program().global_block() - global_block.append_op( - type='fill_constant', - outputs={'Out': [current_lr]}, - attrs={ - 'dtype': current_lr.dtype, - 'shape': list(current_lr.shape), - 'value': float(value) - }, - stop_gradient=True) + global_block.append_op(type='fill_constant', + outputs={'Out': [current_lr]}, + attrs={ + 'dtype': current_lr.dtype, + 'shape': list(current_lr.shape), + 'value': float(value) + }, + stop_gradient=True) def get_lr(self): """ @@ -627,12 +625,13 @@ class Optimizer(object): """ if self._name is not None: name = self._name + "_" + name - if (name in self._accumulators and - param.name in self._accumulators[name]): + if (name in self._accumulators + and param.name in self._accumulators[name]): if framework._non_static_mode(): return self._accumulators[name][param.name] - raise Exception("Accumulator {} already exists for parameter {}". - format(name, param.name)) + raise Exception( + "Accumulator {} already exists for parameter {}".format( + name, param.name)) if shape == None: shape = param.shape assert isinstance(self.helper, LayerHelper) @@ -677,10 +676,11 @@ class Optimizer(object): """ if self._name is not None: name = self._name + "_" + name - if (name not in self._accumulators or - param.name not in self._accumulators[name]): - raise Exception("Accumulator {} does not exist for parameter {}". - format(name, param.name)) + if (name not in self._accumulators + or param.name not in self._accumulators[name]): + raise Exception( + "Accumulator {} does not exist for parameter {}".format( + name, param.name)) return self._accumulators[name][param.name] def _update_param_device_map(self, parameters_and_grads, target_block): @@ -749,8 +749,8 @@ class Optimizer(object): self._param_dict['FP16_LODTensor']) == 0: if isinstance(parameters_and_grads, list): self._multi_tensor_init(target_block, [ - p[0] for p in parameters_and_grads - if not p[0].stop_gradient + p[0] + for p in parameters_and_grads if not p[0].stop_gradient ]) else: self._update_param_group(parameters_and_grads) @@ -827,8 +827,8 @@ class Optimizer(object): with param_and_grad[0].block.program._optimized_guard( param_and_grad), name_scope("optimizer"): if param_and_grad[0].stop_gradient is False: - device = self._get_device_for_param(param_and_grad[ - 0].name) + device = self._get_device_for_param( + param_and_grad[0].name) with device_guard(device): optimize_op = self._append_optimize_op( target_block, param_and_grad) @@ -923,8 +923,9 @@ class Optimizer(object): with program_guard(program, startup_program): from paddle.incubate.autograd.utils import prim_enabled if prim_enabled(): - params_grads = append_backward_new( - [loss], parameter_list, act_no_grad_set, callbacks) + params_grads = append_backward_new([loss], parameter_list, + act_no_grad_set, + callbacks) else: params_grads = append_backward(loss, parameter_list, act_no_grad_set, callbacks) @@ -1001,8 +1002,8 @@ class Optimizer(object): else: grad_clip = params_grads['grad_clip'] if grad_clip is not None: - params_grads['params'] = grad_clip(params_grads[ - 'params']) + params_grads['params'] = grad_clip( + params_grads['params']) params_grads['params'] = self.append_regularization_ops( params_grads['params'], self.regularization) @@ -1019,10 +1020,10 @@ class Optimizer(object): Function helper of append_regularization_ops. """ # If no gradient or no regularization is specified, then we don't need to do anything - if grad is None or ((not hasattr(param, 'regularizer') or - (hasattr(param, 'regularizer') and - param.regularizer is None)) and - regularization is None): + if grad is None or ( + (not hasattr(param, 'regularizer') or + (hasattr(param, 'regularizer') and param.regularizer is None)) + and regularization is None): return grad regularization_term = None if hasattr(param, 'regularizer') and param.regularizer is not None: @@ -1083,8 +1084,8 @@ class Optimizer(object): params_and_grads = [] if framework._non_static_mode(): for param, grad in parameters_and_grads: - new_grad = self._create_regularization_of_grad(param, grad, - regularization) + new_grad = self._create_regularization_of_grad( + param, grad, regularization) params_and_grads.append((param, new_grad)) else: repeate_regularizer = False @@ -1105,9 +1106,8 @@ class Optimizer(object): def _get_no_grad_set(self, loss, no_grad_set=None): no_grad_set = _get_no_grad_set_name(no_grad_set) parameters = loss.block.program.global_block().all_parameters() - param_no_trainable = set([ - param.name for param in parameters if param.stop_gradient is True - ]) + param_no_trainable = set( + [param.name for param in parameters if param.stop_gradient is True]) # If the parameter is no trainable, it should not have a gradient. no_grad_set.update(param_no_trainable) @@ -1217,14 +1217,14 @@ class Optimizer(object): parameter_list = parameters if parameters \ else self._parameter_list - params_grads = self.backward( - loss, - startup_program=startup_program, - parameters=parameter_list, - no_grad_set=no_grad_set) + params_grads = self.backward(loss, + startup_program=startup_program, + parameters=parameter_list, + no_grad_set=no_grad_set) - optimize_ops = self._apply_optimize( - loss, startup_program=startup_program, params_grads=params_grads) + optimize_ops = self._apply_optimize(loss, + startup_program=startup_program, + params_grads=params_grads) return optimize_ops, params_grads @@ -1264,8 +1264,9 @@ class Optimizer(object): grad_var = param._grad_ivar() params_grads.append((param, grad_var)) - self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) else: # optimize parameters in groups @@ -1280,8 +1281,9 @@ class Optimizer(object): params_grads.update( {k: v for k, v in param_group.items() if k != 'params'}) - self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) + self._apply_optimize(loss=None, + startup_program=None, + params_grads=params_grads) def _add_param_group(self, param_group): """ diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index 88c39ba5a29..7205a434d38 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -171,12 +171,11 @@ class RMSProp(Optimizer): if not 0.0 <= rho: raise ValueError("Invalid value of rho, expect rho >= 0.") - super(RMSProp, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=weight_decay, - grad_clip=grad_clip, - name=name) + super(RMSProp, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) self.type = "rmsprop" self._rho = rho @@ -215,29 +214,34 @@ class RMSProp(Optimizer): param_and_grad[0]) mean_grad_acc = self._get_accumulator(self._mean_grad_acc_str, param_and_grad[0]) - rmsprop_op = block.append_op( - type=self.type, - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "Moment": momentum_acc, - "MeanSquare": mean_square_acc, - "MeanGrad": mean_grad_acc, - "LearningRate": self._create_param_lr(param_and_grad), - }, - outputs={ - "ParamOut": param_and_grad[0], - "MomentOut": momentum_acc, - "MeanSquareOut": mean_square_acc, - "MeanGradOut": mean_grad_acc - }, - attrs={ - "epsilon": self._epsilon, - "decay": self._rho, - "momentum": self._momentum, - "centered": self._centered - }, - stop_gradient=True) + rmsprop_op = block.append_op(type=self.type, + inputs={ + "Param": + param_and_grad[0], + "Grad": + param_and_grad[1], + "Moment": + momentum_acc, + "MeanSquare": + mean_square_acc, + "MeanGrad": + mean_grad_acc, + "LearningRate": + self._create_param_lr(param_and_grad), + }, + outputs={ + "ParamOut": param_and_grad[0], + "MomentOut": momentum_acc, + "MeanSquareOut": mean_square_acc, + "MeanGradOut": mean_grad_acc + }, + attrs={ + "epsilon": self._epsilon, + "decay": self._rho, + "momentum": self._momentum, + "centered": self._centered + }, + stop_gradient=True) return rmsprop_op diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index 46dd0b73a5e..60b5e385761 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -82,12 +82,11 @@ class SGD(Optimizer): name=None): if learning_rate is None: raise ValueError("learning_rate is not set") - super(SGD, self).__init__( - learning_rate=learning_rate, - parameters=parameters, - weight_decay=weight_decay, - grad_clip=grad_clip, - name=name) + super(SGD, self).__init__(learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) self.type = "sgd" self._multi_precision = multi_precision self._master_weights = {} @@ -100,21 +99,19 @@ class SGD(Optimizer): var_name = param.name + "_fp32_master" var_name = unique_name.generate(var_name) - var = layers.create_global_var( - name=var_name, - shape=param.shape, - value=0, - dtype='float32', - persistable=True) + var = layers.create_global_var(name=var_name, + shape=param.shape, + value=0, + dtype='float32', + persistable=True) block = self.helper.startup_program.global_block() - block.append_op( - type="cast", - inputs={"X": [param]}, - outputs={"Out": [var]}, - attrs={ - "in_dtype": param.dtype, - "out_dtype": core.VarDesc.VarType.FP32 - }) + block.append_op(type="cast", + inputs={"X": [param]}, + outputs={"Out": [var]}, + attrs={ + "in_dtype": param.dtype, + "out_dtype": core.VarDesc.VarType.FP32 + }) self._master_weights[param.name] = var return var @@ -170,12 +167,11 @@ class SGD(Optimizer): inputs["MasterParam"] = master_weight outputs["MasterParamOut"] = master_weight - sgd_op = block.append_op( - type=self.type, - inputs=inputs, - outputs=outputs, - attrs=attrs, - stop_gradient=True) + sgd_op = block.append_op(type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) return sgd_op diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py index 9df595bc3ae..c277dcedb4d 100644 --- a/python/paddle/profiler/profiler.py +++ b/python/paddle/profiler/profiler.py @@ -72,8 +72,8 @@ def make_scheduler(*, closed: int, ready: int, record: int, - repeat: int=0, - skip_first: int=0) -> Callable: + repeat: int = 0, + skip_first: int = 0) -> Callable: r""" Return a scheduler function, which scheduler the :ref:`state ` according to the setting. The state transform confirms to: @@ -156,7 +156,7 @@ def _default_state_scheduler(step: int): def export_chrome_tracing(dir_name: str, - worker_name: Optional[str]=None) -> Callable: + worker_name: Optional[str] = None) -> Callable: r""" Return a callable, used for outputing tracing data to chrome tracing format file. The output file will be saved in directory ``dir_name``, and file name will be set as worker_name. @@ -206,7 +206,8 @@ def export_chrome_tracing(dir_name: str, return handle_fn -def export_protobuf(dir_name: str, worker_name: Optional[str]=None) -> Callable: +def export_protobuf(dir_name: str, + worker_name: Optional[str] = None) -> Callable: r""" Return a callable, used for outputing tracing data to protobuf file. The output file will be saved in directory ``dir_name``, and file name will be set as worker_name. @@ -391,13 +392,13 @@ class Profiler: # | ips | 1086.42904 | 1227.30604 | 959.92796 | """ - def __init__( - self, - *, - targets: Optional[Iterable[ProfilerTarget]]=None, - scheduler: Union[Callable[[int], ProfilerState], tuple, None]=None, - on_trace_ready: Optional[Callable[..., Any]]=None, - timer_only: Optional[bool]=False): + def __init__(self, + *, + targets: Optional[Iterable[ProfilerTarget]] = None, + scheduler: Union[Callable[[int], ProfilerState], tuple, + None] = None, + on_trace_ready: Optional[Callable[..., Any]] = None, + timer_only: Optional[bool] = False): supported_targets = _get_supported_targets() if targets: self.targets = set(targets) @@ -424,17 +425,17 @@ class Profiler: start_batch, end_batch = scheduler start_batch = max(start_batch, 0) if start_batch >= 1: - self.scheduler = make_scheduler( - closed=max(start_batch - 1, 0), - ready=1, - record=(end_batch - start_batch), - repeat=1) + self.scheduler = make_scheduler(closed=max(start_batch - 1, 0), + ready=1, + record=(end_batch - + start_batch), + repeat=1) else: - self.scheduler = make_scheduler( - closed=0, - ready=0, - record=(end_batch - start_batch), - repeat=1) + self.scheduler = make_scheduler(closed=0, + ready=0, + record=(end_batch - + start_batch), + repeat=1) else: self.scheduler = _default_state_scheduler @@ -492,9 +493,9 @@ class Profiler: elif self.current_state == ProfilerState.RECORD_AND_RETURN: self.profiler.prepare() self.profiler.start() - self.record_event = RecordEvent( - name="ProfileStep#{}".format(self.step_num), - event_type=TracerEventType.ProfileStep) + self.record_event = RecordEvent(name="ProfileStep#{}".format( + self.step_num), + event_type=TracerEventType.ProfileStep) self.record_event.begin() def stop(self): @@ -538,7 +539,7 @@ class Profiler: self.on_trace_ready(self) utils._is_profiler_used = False - def step(self, num_samples: Optional[int]=None): + def step(self, num_samples: Optional[int] = None): r""" Signals the profiler that the next profiling step has started. Get the new ProfilerState and trigger corresponding action. @@ -574,9 +575,9 @@ class Profiler: self.step_num += 1 self.current_state = self.scheduler(self.step_num) self._trigger_action() - self.record_event = RecordEvent( - name="ProfileStep#{}".format(self.step_num), - event_type=TracerEventType.ProfileStep) + self.record_event = RecordEvent(name="ProfileStep#{}".format( + self.step_num), + event_type=TracerEventType.ProfileStep) self.record_event.begin() def step_info(self, unit=None): @@ -747,12 +748,11 @@ class Profiler: self.profiler_result.get_data(), self.profiler_result.get_extra_info()) print( - _build_table( - statistic_data, - sorted_by=sorted_by, - op_detail=op_detail, - thread_sep=thread_sep, - time_unit=time_unit)) + _build_table(statistic_data, + sorted_by=sorted_by, + op_detail=op_detail, + thread_sep=thread_sep, + time_unit=time_unit)) def get_profiler(config_path): @@ -820,6 +820,7 @@ def get_profiler(config_path): translated_config_dict['timer_only'] = config_dict['timer_only'] else: print( - 'Set timer_only parameter error, use default parameter instead.') + 'Set timer_only parameter error, use default parameter instead.' + ) return Profiler(**translated_config_dict) diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py index 50aa3a1f11f..daa6925c4b9 100755 --- a/python/paddle/profiler/profiler_statistic.py +++ b/python/paddle/profiler/profiler_statistic.py @@ -197,8 +197,8 @@ class TimeRangeSummary: def __init__(self): self.CPUTimeRange = collections.defaultdict(list) self.GPUTimeRange = collections.defaultdict( - lambda: collections.defaultdict(list) - ) # GPU events should be divided into different devices + lambda: collections.defaultdict( + list)) # GPU events should be divided into different devices self.CPUTimeRangeSum = collections.defaultdict(int) self.GPUTimeRangeSum = collections.defaultdict( lambda: collections.defaultdict(int)) @@ -212,8 +212,8 @@ class TimeRangeSummary: for threadid, hostnodes in thread2hostnodes.items(): CPUTimeRange = collections.defaultdict(list) GPUTimeRange = collections.defaultdict( - lambda: collections.defaultdict(lambda: collections.defaultdict(list)) - ) # device_id/type/stream_id + lambda: collections.defaultdict(lambda: collections.defaultdict( + list))) # device_id/type/stream_id for hostnode in hostnodes[1:]: #skip root node CPUTimeRange[hostnode.type].append( (hostnode.start_ns, hostnode.end_ns)) @@ -235,8 +235,8 @@ class TimeRangeSummary: for device_id, device_time_ranges in GPUTimeRange.items(): for event_type, event_time_ranges in device_time_ranges.items(): for stream_id, time_ranges in event_time_ranges.items(): - time_ranges = merge_self_ranges( - time_ranges, is_sorted=False) + time_ranges = merge_self_ranges(time_ranges, + is_sorted=False) self.GPUTimeRange[device_id][event_type] = merge_ranges( self.GPUTimeRange[device_id][event_type], time_ranges, @@ -310,25 +310,27 @@ class DistributedSummary: for devicenode in runtimenode.device_node: if devicenode.type == TracerEventType.Kernel: if 'nccl' in devicenode.name.lower(): - self.gpu_communication_range.append(( - devicenode.start_ns, devicenode.end_ns)) + self.gpu_communication_range.append( + (devicenode.start_ns, + devicenode.end_ns)) else: - self.computation_range.append(( - devicenode.start_ns, devicenode.end_ns)) + self.computation_range.append( + (devicenode.start_ns, + devicenode.end_ns)) self.cpu_calls = len(set(self.cpu_communication_range)) self.gpu_calls = len(set(self.gpu_communication_range)) self.cpu_communication_range = merge_self_ranges( self.cpu_communication_range, is_sorted=False) self.gpu_communication_range = merge_self_ranges( self.gpu_communication_range, is_sorted=False) - self.communication_range = merge_ranges( - self.cpu_communication_range, - self.gpu_communication_range, - is_sorted=True) - self.computation_range = merge_self_ranges( - self.computation_range, is_sorted=False) - self.overlap_range = intersection_ranges( - self.communication_range, self.computation_range, is_sorted=True) + self.communication_range = merge_ranges(self.cpu_communication_range, + self.gpu_communication_range, + is_sorted=True) + self.computation_range = merge_self_ranges(self.computation_range, + is_sorted=False) + self.overlap_range = intersection_ranges(self.communication_range, + self.computation_range, + is_sorted=True) class EventSummary: @@ -337,6 +339,7 @@ class EventSummary: """ class DeviceItem: + def __init__(self, name): self.name = name self.call = 0 @@ -360,6 +363,7 @@ class EventSummary: self.add_gpu_time(node.end_ns - node.start_ns) class OperatorItem: + def __init__(self, name): self.name = name self.call = 0 @@ -430,6 +434,7 @@ class EventSummary: self.devices[name].add_item(devicenode) class GeneralItem: + def __init__(self, name): self.name = name self.call = 0 @@ -688,13 +693,14 @@ def _build_table(statistic_data, append(row_format.format(*headers)) append(header_sep) row_values = [ - 'CPU(Process)', format_ratio( - float(statistic_data.extra_info['Process Cpu Utilization'])) + 'CPU(Process)', + format_ratio(float( + statistic_data.extra_info['Process Cpu Utilization'])) ] append(row_format.format(*row_values)) row_values = [ - 'CPU(System)', format_ratio( - float(statistic_data.extra_info['System Cpu Utilization'])) + 'CPU(System)', + format_ratio(float(statistic_data.extra_info['System Cpu Utilization'])) ] append(row_format.format(*row_values)) for gpu_name in statistic_data.time_range_summary.get_gpu_devices(): @@ -783,20 +789,22 @@ def _build_table(statistic_data, TracerEventType. Communication] = statistic_data.distributed_summary.gpu_calls - sorted_items = sorted( - cpu_type_time.items(), key=lambda x: x[1], reverse=True) + sorted_items = sorted(cpu_type_time.items(), + key=lambda x: x[1], + reverse=True) event_type, time = sorted_items[0] row_values = [ '{}'.format(str(event_type).split('.')[1]), cpu_call_times[event_type], - format_time( - time, unit=time_unit), format_ratio(float(time) / total_time) + format_time(time, unit=time_unit), + format_ratio(float(time) / total_time) ] append(row_format.format(*row_values)) for event_type, time in sorted_items[1:]: row_values = [ ' {}'.format(str(event_type).split('.')[1]), - cpu_call_times[event_type], format_time( - time, unit=time_unit), format_ratio(float(time) / total_time) + cpu_call_times[event_type], + format_time(time, unit=time_unit), + format_ratio(float(time) / total_time) ] append(row_format.format(*row_values)) append(header_sep) @@ -806,8 +814,9 @@ def _build_table(statistic_data, for event_type, time in gpu_type_time.items(): row_values = [ ' {}'.format(str(event_type).split('.')[1]), - gpu_call_times[event_type], format_time( - time, unit=time_unit), format_ratio(float(time) / total_time) + gpu_call_times[event_type], + format_time(time, unit=time_unit), + format_ratio(float(time) / total_time) ] append(row_format.format(*row_values)) @@ -851,24 +860,16 @@ def _build_table(statistic_data, row_values = [ '{}'.format(name), item.call, '{} / {} / {} / {} / {}'.format( - format_time( - item.cpu_time, unit=time_unit), - format_time( - item.avg_cpu_time, unit=time_unit), - format_time( - item.max_cpu_time, unit=time_unit), - format_time( - item.min_cpu_time, unit=time_unit), + format_time(item.cpu_time, unit=time_unit), + format_time(item.avg_cpu_time, unit=time_unit), + format_time(item.max_cpu_time, unit=time_unit), + format_time(item.min_cpu_time, unit=time_unit), format_ratio(float(item.cpu_time) / total_time)), '{} / {} / {} / {} / {}'.format( - format_time( - item.gpu_time, unit=time_unit), - format_time( - item.avg_gpu_time, unit=time_unit), - format_time( - item.max_gpu_time, unit=time_unit), - format_time( - item.min_gpu_time, unit=time_unit), + format_time(item.gpu_time, unit=time_unit), + format_time(item.avg_gpu_time, unit=time_unit), + format_time(item.max_gpu_time, unit=time_unit), + format_time(item.min_gpu_time, unit=time_unit), format_ratio(gpu_ratio)) ] all_row_values.append(row_values) @@ -884,12 +885,10 @@ def _build_table(statistic_data, gpu_ratio = float(other_gpu_time) / gpu_total_time row_values = [ ' Others', '-', '{} / - / - / - / {}'.format( - format_time( - other_time, unit=time_unit), + format_time(other_time, unit=time_unit), format_ratio(float(other_time) / total_time)), '{} / - / - / - / {}'.format( - format_time( - other_gpu_time, unit=time_unit), + format_time(other_gpu_time, unit=time_unit), format_ratio(gpu_ratio)) ] all_row_values.append(row_values) @@ -971,28 +970,28 @@ def _build_table(statistic_data, overlap_time = sum_ranges( statistic_data.distributed_summary.overlap_range) row_values = [ - 'ProfileStep', format_time( - total_time, unit=time_unit), + 'ProfileStep', + format_time(total_time, unit=time_unit), format_ratio(float(total_time) / total_time) ] append(row_format.format(*row_values)) row_values = [ - ' Communication', format_time( - communication_time, unit=time_unit), + ' Communication', + format_time(communication_time, unit=time_unit), format_ratio(float(communication_time) / total_time) ] append(row_format.format(*row_values)) row_values = [ - ' Computation', format_time( - computation_time, unit=time_unit), + ' Computation', + format_time(computation_time, unit=time_unit), format_ratio(float(computation_time) / total_time) ] append(row_format.format(*row_values)) row_values = [ - ' Overlap', format_time( - overlap_time, unit=time_unit), + ' Overlap', + format_time(overlap_time, unit=time_unit), format_ratio(float(overlap_time) / total_time) ] append(row_format.format(*row_values)) @@ -1026,39 +1025,35 @@ def _build_table(statistic_data, for thread_id, items in thread_items.items(): all_row_values.append("Thread: {}".format(thread_id)) if sorted_by == SortedKeys.CPUTotal: - sorted_items = sorted( - items.items(), key=lambda x: x[1].cpu_time, reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].cpu_time, + reverse=True) elif sorted_by == SortedKeys.CPUAvg: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].avg_cpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].avg_cpu_time, + reverse=True) elif sorted_by == SortedKeys.CPUMax: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].max_cpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].max_cpu_time, + reverse=True) elif sorted_by == SortedKeys.CPUMin: - sorted_items = sorted( - items.items(), key=lambda x: x[1].min_cpu_time) + sorted_items = sorted(items.items(), + key=lambda x: x[1].min_cpu_time) elif sorted_by == SortedKeys.GPUTotal: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].general_gpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].general_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUAvg: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].avg_general_gpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].avg_general_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUMax: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].max_general_gpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].max_general_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUMin: - sorted_items = sorted( - items.items(), key=lambda x: x[1].min_general_gpu_time) + sorted_items = sorted(items.items(), + key=lambda x: x[1].min_general_gpu_time) total_op_cpu_time = 0 total_op_gpu_time = 0 @@ -1077,24 +1072,16 @@ def _build_table(statistic_data, gpu_ratio = float(item.general_gpu_time) / total_op_gpu_time row_values = [ name, item.call, '{} / {} / {} / {} / {}'.format( - format_time( - item.cpu_time, unit=time_unit), - format_time( - item.avg_cpu_time, unit=time_unit), - format_time( - item.max_cpu_time, unit=time_unit), - format_time( - item.min_cpu_time, unit=time_unit), + format_time(item.cpu_time, unit=time_unit), + format_time(item.avg_cpu_time, unit=time_unit), + format_time(item.max_cpu_time, unit=time_unit), + format_time(item.min_cpu_time, unit=time_unit), format_ratio(cpu_ratio)), '{} / {} / {} / {} / {}'.format( - format_time( - item.general_gpu_time, unit=time_unit), - format_time( - item.avg_general_gpu_time, unit=time_unit), - format_time( - item.max_general_gpu_time, unit=time_unit), - format_time( - item.min_general_gpu_time, unit=time_unit), + format_time(item.general_gpu_time, unit=time_unit), + format_time(item.avg_general_gpu_time, unit=time_unit), + format_time(item.max_general_gpu_time, unit=time_unit), + format_time(item.min_general_gpu_time, unit=time_unit), format_ratio(gpu_ratio)) ] all_row_values.append(row_values) @@ -1117,28 +1104,24 @@ def _build_table(statistic_data, row_values = [ ' {}'.format(innerop_name), innerop_node.call, '{} / {} / {} / {} / {}'.format( - format_time( - innerop_node.cpu_time, unit=time_unit), - format_time( - innerop_node.avg_cpu_time, unit=time_unit), - format_time( - innerop_node.max_cpu_time, unit=time_unit), - format_time( - innerop_node.min_cpu_time, unit=time_unit), + format_time(innerop_node.cpu_time, + unit=time_unit), + format_time(innerop_node.avg_cpu_time, + unit=time_unit), + format_time(innerop_node.max_cpu_time, + unit=time_unit), + format_time(innerop_node.min_cpu_time, + unit=time_unit), format_ratio(cpu_ratio)), '{} / {} / {} / {} / {}'.format( - format_time( - innerop_node.general_gpu_time, - unit=time_unit), - format_time( - innerop_node.avg_general_gpu_time, - unit=time_unit), - format_time( - innerop_node.max_general_gpu_time, - unit=time_unit), - format_time( - innerop_node.min_general_gpu_time, - unit=time_unit), + format_time(innerop_node.general_gpu_time, + unit=time_unit), + format_time(innerop_node.avg_general_gpu_time, + unit=time_unit), + format_time(innerop_node.max_general_gpu_time, + unit=time_unit), + format_time(innerop_node.min_general_gpu_time, + unit=time_unit), format_ratio(gpu_ratio)) ] all_row_values.append(row_values) @@ -1148,8 +1131,8 @@ def _build_table(statistic_data, gpu_ratio = 0 else: gpu_ratio = float( - device_node. - gpu_time) / innerop_node.general_gpu_time + device_node.gpu_time + ) / innerop_node.general_gpu_time if len(device_node_name) + 4 > name_column_width: device_node_name = device_node_name[: name_column_width @@ -1159,17 +1142,14 @@ def _build_table(statistic_data, ' {}'.format(device_node_name), device_node.call, '- / - / - / - / -', '{} / {} / {} / {} / {}'.format( - format_time( - device_node.gpu_time, unit=time_unit), - format_time( - device_node.avg_gpu_time, - unit=time_unit), - format_time( - device_node.max_gpu_time, - unit=time_unit), - format_time( - device_node.min_gpu_time, - unit=time_unit), + format_time(device_node.gpu_time, + unit=time_unit), + format_time(device_node.avg_gpu_time, + unit=time_unit), + format_time(device_node.max_gpu_time, + unit=time_unit), + format_time(device_node.min_gpu_time, + unit=time_unit), format_ratio(gpu_ratio)) ] all_row_values.append(row_values) @@ -1188,14 +1168,14 @@ def _build_table(statistic_data, ' {}'.format(device_node_name), device_node.call, '- / - / - / - / -', '{} / {} / {} / {} / {}'.format( - format_time( - device_node.gpu_time, unit=time_unit), - format_time( - device_node.avg_gpu_time, unit=time_unit), - format_time( - device_node.max_gpu_time, unit=time_unit), - format_time( - device_node.min_gpu_time, unit=time_unit), + format_time(device_node.gpu_time, + unit=time_unit), + format_time(device_node.avg_gpu_time, + unit=time_unit), + format_time(device_node.max_gpu_time, + unit=time_unit), + format_time(device_node.min_gpu_time, + unit=time_unit), format_ratio(gpu_ratio)) ] all_row_values.append(row_values) @@ -1249,21 +1229,20 @@ def _build_table(statistic_data, all_row_values = [] kernel_items = statistic_data.event_summary.kernel_items if sorted_by == SortedKeys.GPUAvg: - sorted_items = sorted( - kernel_items.items(), - key=lambda x: x[1].avg_gpu_time, - reverse=True) + sorted_items = sorted(kernel_items.items(), + key=lambda x: x[1].avg_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUMax: - sorted_items = sorted( - kernel_items.items(), - key=lambda x: x[1].max_gpu_time, - reverse=True) + sorted_items = sorted(kernel_items.items(), + key=lambda x: x[1].max_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUMin: - sorted_items = sorted( - kernel_items.items(), key=lambda x: x[1].min_gpu_time) + sorted_items = sorted(kernel_items.items(), + key=lambda x: x[1].min_gpu_time) else: - sorted_items = sorted( - kernel_items.items(), key=lambda x: x[1].gpu_time, reverse=True) + sorted_items = sorted(kernel_items.items(), + key=lambda x: x[1].gpu_time, + reverse=True) total_kernel_gpu_time = 0 for name, item in sorted_items: @@ -1277,14 +1256,10 @@ def _build_table(statistic_data, name, item.call, '{} / {} / {} / {} / {}'.format( - format_time( - item.gpu_time, unit=time_unit), - format_time( - item.avg_gpu_time, unit=time_unit), - format_time( - item.max_gpu_time, unit=time_unit), - format_time( - item.min_gpu_time, unit=time_unit), + format_time(item.gpu_time, unit=time_unit), + format_time(item.avg_gpu_time, unit=time_unit), + format_time(item.max_gpu_time, unit=time_unit), + format_time(item.min_gpu_time, unit=time_unit), format_ratio(gpu_ratio)), ] all_row_values.append(row_values) @@ -1349,24 +1324,16 @@ def _build_table(statistic_data, name, item.call, '{} / {} / {} / {} / {}'.format( - format_time( - item.cpu_time, unit=time_unit), - format_time( - item.avg_cpu_time, unit=time_unit), - format_time( - item.max_cpu_time, unit=time_unit), - format_time( - item.min_cpu_time, unit=time_unit), + format_time(item.cpu_time, unit=time_unit), + format_time(item.avg_cpu_time, unit=time_unit), + format_time(item.max_cpu_time, unit=time_unit), + format_time(item.min_cpu_time, unit=time_unit), format_ratio(float(item.cpu_time) / total_time)), '{} / {} / {} / {} / {}'.format( - format_time( - item.general_gpu_time, unit=time_unit), - format_time( - item.avg_general_gpu_time, unit=time_unit), - format_time( - item.max_general_gpu_time, unit=time_unit), - format_time( - item.min_general_gpu_time, unit=time_unit), + format_time(item.general_gpu_time, unit=time_unit), + format_time(item.avg_general_gpu_time, unit=time_unit), + format_time(item.max_general_gpu_time, unit=time_unit), + format_time(item.min_general_gpu_time, unit=time_unit), format_ratio(gpu_ratio)), ] all_row_values.append(row_values) @@ -1429,39 +1396,35 @@ def _build_table(statistic_data, for thread_id, items in userdefined_thread_items.items(): all_row_values.append("Thread: {}".format(thread_id)) if sorted_by == SortedKeys.CPUTotal: - sorted_items = sorted( - items.items(), key=lambda x: x[1].cpu_time, reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].cpu_time, + reverse=True) elif sorted_by == SortedKeys.CPUAvg: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].avg_cpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].avg_cpu_time, + reverse=True) elif sorted_by == SortedKeys.CPUMax: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].max_cpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].max_cpu_time, + reverse=True) elif sorted_by == SortedKeys.CPUMin: - sorted_items = sorted( - items.items(), key=lambda x: x[1].min_cpu_time) + sorted_items = sorted(items.items(), + key=lambda x: x[1].min_cpu_time) elif sorted_by == SortedKeys.GPUTotal: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].general_gpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].general_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUAvg: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].avg_general_gpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].avg_general_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUMax: - sorted_items = sorted( - items.items(), - key=lambda x: x[1].max_general_gpu_time, - reverse=True) + sorted_items = sorted(items.items(), + key=lambda x: x[1].max_general_gpu_time, + reverse=True) elif sorted_by == SortedKeys.GPUMin: - sorted_items = sorted( - items.items(), key=lambda x: x[1].min_general_gpu_time) + sorted_items = sorted(items.items(), + key=lambda x: x[1].min_general_gpu_time) for name, item in sorted_items: if gpu_total_time == 0: @@ -1472,24 +1435,16 @@ def _build_table(statistic_data, name, item.call, '{} / {} / {} / {} / {}'.format( - format_time( - item.cpu_time, unit=time_unit), - format_time( - item.avg_cpu_time, unit=time_unit), - format_time( - item.max_cpu_time, unit=time_unit), - format_time( - item.min_cpu_time, unit=time_unit), + format_time(item.cpu_time, unit=time_unit), + format_time(item.avg_cpu_time, unit=time_unit), + format_time(item.max_cpu_time, unit=time_unit), + format_time(item.min_cpu_time, unit=time_unit), format_ratio(float(item.cpu_time) / total_time)), '{} / {} / {} / {} / {}'.format( - format_time( - item.general_gpu_time, unit=time_unit), - format_time( - item.avg_general_gpu_time, unit=time_unit), - format_time( - item.max_general_gpu_time, unit=time_unit), - format_time( - item.min_general_gpu_time, unit=time_unit), + format_time(item.general_gpu_time, unit=time_unit), + format_time(item.avg_general_gpu_time, unit=time_unit), + format_time(item.max_general_gpu_time, unit=time_unit), + format_time(item.min_general_gpu_time, unit=time_unit), format_ratio(gpu_ratio)), ] all_row_values.append(row_values) diff --git a/python/paddle/profiler/statistic_helper.py b/python/paddle/profiler/statistic_helper.py index 76dd1f0a643..358f2a09b92 100644 --- a/python/paddle/profiler/statistic_helper.py +++ b/python/paddle/profiler/statistic_helper.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/profiler/timer.py b/python/paddle/profiler/timer.py index 815775ebc6a..35689feb56c 100644 --- a/python/paddle/profiler/timer.py +++ b/python/paddle/profiler/timer.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -127,24 +127,20 @@ class Event(object): else: speed_avg = float(self.total_iters) / self.batch_records['total'] - reader_summary = dict( - max=self.reader_records['max'], - min=self.reader_records['min'], - avg=reader_avg) - batch_summary = dict( - max=self.batch_records['max'], - min=self.batch_records['min'], - avg=batch_avg) - ips_summary = dict( - max=self.speed_records['max'], - min=self.speed_records['min'], - avg=speed_avg) + reader_summary = dict(max=self.reader_records['max'], + min=self.reader_records['min'], + avg=reader_avg) + batch_summary = dict(max=self.batch_records['max'], + min=self.batch_records['min'], + avg=batch_avg) + ips_summary = dict(max=self.speed_records['max'], + min=self.speed_records['min'], + avg=speed_avg) reader_ratio = (reader_avg / batch_avg) * 100 - summary = dict( - reader_summary=reader_summary, - batch_summary=batch_summary, - ips_summary=ips_summary, - reader_ratio=reader_ratio) + summary = dict(reader_summary=reader_summary, + batch_summary=batch_summary, + ips_summary=ips_summary, + reader_ratio=reader_ratio) return summary @@ -225,8 +221,8 @@ class TimerHook(Hook): """ - if (benchmark.current_event is None) or ( - not benchmark.current_event.need_record): + if (benchmark.current_event is + None) or (not benchmark.current_event.need_record): return batch_cost = timeit.default_timer() - self.start_time benchmark.current_event.record_batch(batch_cost, benchmark.num_samples) @@ -269,9 +265,7 @@ class TimerHook(Hook): avg_str = '%.5f' % (message_dict['avg']) max_str = '%.5f' % (message_dict['max']) min_str = '%.5f' % (message_dict['min']) - print('|', - item.center(15), '|', - avg_str.center(15), '|', + print('|', item.center(15), '|', avg_str.center(15), '|', max_str.center(15), '|', min_str.center(15), '|') @@ -399,7 +393,7 @@ class Benchmark(object): elif self.current_event.reader.__dict__[ '_dataset'] != reader.__dict__['_dataset']: # enter a new task but not calling beign() to record it. - # we pause the timer until the end of new task, so that + # we pause the timer until the end of new task, so that # the cost of new task is not added to the current event. # eg. start evaluation in the training task self.current_event.need_record = False diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py index 5e95c83129f..a02311cc929 100644 --- a/python/paddle/profiler/utils.py +++ b/python/paddle/profiler/utils.py @@ -62,9 +62,10 @@ class RecordEvent(ContextDecorator): RecordEvent will take effect only when :ref:`Profiler ` is on and at the state of RECORD. """ - def __init__(self, - name: str, - event_type: TracerEventType=TracerEventType.PythonUserDefined): + def __init__( + self, + name: str, + event_type: TracerEventType = TracerEventType.PythonUserDefined): self.name = name self.event_type = event_type self.event = None @@ -158,13 +159,14 @@ def in_profiler_mode(): def wrap_optimizers(): + def optimizer_warpper(func): + @functools.wraps(func) def warpper(*args, **kwargs): if in_profiler_mode(): - with RecordEvent( - 'Optimization Step', - event_type=TracerEventType.Optimization): + with RecordEvent('Optimization Step', + event_type=TracerEventType.Optimization): return func(*args, **kwargs) else: return func(*args, **kwargs) diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 66f971c59d7..981f6e9253c 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -34,10 +34,10 @@ from paddle.fluid.reader import QUEUE_GET_TIMEOUT __all__ = [] # On macOS, the 'spawn' start method is now the default in Python3.8 multiprocessing, -# Paddle is currently unable to solve this, so forces the process to start using +# Paddle is currently unable to solve this, so forces the process to start using # the 'fork' start method. # -# TODO: This solution is not good, because the fork start method could lead to +# TODO: This solution is not good, because the fork start method could lead to # crashes of the subprocess. Figure out how to make 'spawn' work. # # For more details, please refer to @@ -350,10 +350,10 @@ def buffered(reader, size): def data_reader(): r = reader() q = Queue(maxsize=size) - t = Thread( - target=read_worker, args=( - r, - q, )) + t = Thread(target=read_worker, args=( + r, + q, + )) t.daemon = True t.start() e = q.get() @@ -477,8 +477,8 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): t.start() # start several handle_workers target = order_handle_worker if order else handle_worker - args = (in_queue, out_queue, mapper, out_order) if order else ( - in_queue, out_queue, mapper) + args = (in_queue, out_queue, mapper, + out_order) if order else (in_queue, out_queue, mapper) workers = [] for i in range(process_num): worker = Thread(target=target, args=args) @@ -614,8 +614,8 @@ def multiprocess_reader(readers, use_pipe=True, queue_size=1000): def queue_reader(): queue = fork_context.Queue(queue_size) for reader in readers: - p = fork_context.Process( - target=_read_into_queue, args=(reader, queue)) + p = fork_context.Process(target=_read_into_queue, + args=(reader, queue)) p.start() reader_num = len(readers) @@ -656,8 +656,8 @@ def multiprocess_reader(readers, use_pipe=True, queue_size=1000): for reader in readers: parent_conn, child_conn = fork_context.Pipe() conns.append(parent_conn) - p = fork_context.Process( - target=_read_into_pipe, args=(reader, child_conn)) + p = fork_context.Process(target=_read_into_pipe, + args=(reader, child_conn)) p.start() reader_num = len(readers) diff --git a/python/paddle/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py index e11600a06fb..902a8cbe06c 100644 --- a/python/paddle/reader/tests/decorator_test.py +++ b/python/paddle/reader/tests/decorator_test.py @@ -23,6 +23,7 @@ __all__ = [] def reader_creator_10(dur): + def reader(): for i in range(10): # this invocation helps testing paddle.reader.buffer @@ -33,6 +34,7 @@ def reader_creator_10(dur): class TestMap(unittest.TestCase): + def test_map(self): d = {"h": 0, "i": 1} @@ -49,6 +51,7 @@ class TestMap(unittest.TestCase): class TestBuffered(unittest.TestCase): + def test_read(self): for size in range(20): b = paddle.reader.buffered(reader_creator_10(0), size) @@ -73,9 +76,10 @@ class TestBuffered(unittest.TestCase): class TestCompose(unittest.TestCase): + def test_compse(self): - reader = paddle.reader.compose( - reader_creator_10(0), reader_creator_10(0)) + reader = paddle.reader.compose(reader_creator_10(0), + reader_creator_10(0)) for idx, e in enumerate(reader()): self.assertEqual(e, (idx, idx)) @@ -92,10 +96,10 @@ class TestCompose(unittest.TestCase): def test_compose_not_aligned_no_check(self): total = 0 - reader = paddle.reader.compose( - paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), - reader_creator_10(0), - check_alignment=False) + reader = paddle.reader.compose(paddle.reader.chain( + reader_creator_10(0), reader_creator_10(0)), + reader_creator_10(0), + check_alignment=False) for e in reader(): total += 1 # expecting 10, not 20 @@ -103,6 +107,7 @@ class TestCompose(unittest.TestCase): class TestChain(unittest.TestCase): + def test_chain(self): c = paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)) idx = 0 @@ -113,6 +118,7 @@ class TestChain(unittest.TestCase): class TestShuffle(unittest.TestCase): + def test_shuffle(self): case = [(0, True), (1, True), (10, False), (100, False)] a = reader_creator_10(0) @@ -127,7 +133,9 @@ class TestShuffle(unittest.TestCase): class TestXmap(unittest.TestCase): + def test_xmap(self): + def mapper(x): return (x + 1) @@ -151,6 +159,7 @@ class TestXmap(unittest.TestCase): class TestMultiProcessReader(unittest.TestCase): + def setup(self): self.samples = [] for i in range(1000): diff --git a/python/paddle/signal.py b/python/paddle/signal.py index ba2f842c395..6725373d057 100644 --- a/python/paddle/signal.py +++ b/python/paddle/signal.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -134,20 +134,19 @@ def frame(x, frame_length, hop_length, axis=-1, name=None): out = op(x, *attrs) else: check_variable_and_dtype( - x, 'x', ['int32', 'int64', 'float16', 'float32', - 'float64'], op_type) + x, 'x', ['int32', 'int64', 'float16', 'float32', 'float64'], + op_type) helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type=op_type, - inputs={'X': x}, - attrs={ - 'frame_length': frame_length, - 'hop_length': hop_length, - 'axis': axis - }, - outputs={'Out': out}) + helper.append_op(type=op_type, + inputs={'X': x}, + attrs={ + 'frame_length': frame_length, + 'hop_length': hop_length, + 'axis': axis + }, + outputs={'Out': out}) return out @@ -220,17 +219,18 @@ def overlap_add(x, hop_length, axis=-1, name=None): out = op(x, *attrs) else: check_variable_and_dtype( - x, 'x', ['int32', 'int64', 'float16', 'float32', - 'float64'], op_type) + x, 'x', ['int32', 'int64', 'float16', 'float32', 'float64'], + op_type) helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type=op_type, - inputs={'X': x}, - attrs={'hop_length': hop_length, - 'axis': axis}, - outputs={'Out': out}) + helper.append_op(type=op_type, + inputs={'X': x}, + attrs={ + 'hop_length': hop_length, + 'axis': axis + }, + outputs={'Out': out}) return out @@ -306,8 +306,9 @@ def stft(x, paddle.randn([8, 48000], dtype=paddle.float64)*1j # [8, 48000] complex128 y1 = stft(x, n_fft=512, center=False, onesided=False) # [8, 512, 372] """ - check_variable_and_dtype( - x, 'x', ['float32', 'float64', 'complex64', 'complex128'], 'stft') + check_variable_and_dtype(x, 'x', + ['float32', 'float64', 'complex64', 'complex128'], + 'stft') x_rank = len(x.shape) assert x_rank in [1, 2], \ @@ -368,17 +369,20 @@ def stft(x, 'onesided should be False when input or window is a complex Tensor.' if not is_complex(x): - out = fft_r2c( - x=x_frames, - n=None, - axis=-1, - norm=norm, - forward=True, - onesided=onesided, - name=name) + out = fft_r2c(x=x_frames, + n=None, + axis=-1, + norm=norm, + forward=True, + onesided=onesided, + name=name) else: - out = fft_c2c( - x=x_frames, n=None, axis=-1, norm=norm, forward=True, name=name) + out = fft_c2c(x=x_frames, + n=None, + axis=-1, + norm=norm, + forward=True, + name=name) out = out.transpose(perm=[0, 2, 1]) # (batch, n_fft, num_frames) @@ -541,14 +545,14 @@ def istft(x, out = paddle.multiply(out, window).transpose( perm=[0, 2, 1]) # (batch, n_fft, num_frames) - out = overlap_add( - x=out, hop_length=hop_length, axis=-1) # (batch, seq_length) + out = overlap_add(x=out, hop_length=hop_length, + axis=-1) # (batch, seq_length) window_envelop = overlap_add( x=paddle.tile( x=paddle.multiply(window, window).unsqueeze(0), - repeat_times=[n_frames, 1]).transpose( - perm=[1, 0]), # (n_fft, num_frames) + repeat_times=[n_frames, + 1]).transpose(perm=[1, 0]), # (n_fft, num_frames) hop_length=hop_length, axis=-1) # (seq_length, ) diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index bce77380d1f..8707c259ead 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -69,52 +69,17 @@ from ..fluid.layers import create_global_var # noqa: F401 from ..fluid.layers.metric_op import auc # noqa: F401 from ..fluid.layers.metric_op import accuracy # noqa: F401 -__all__ = [ #noqa - 'append_backward', - 'gradients', - 'Executor', - 'global_scope', - 'scope_guard', - 'BuildStrategy', - 'CompiledProgram', - 'ipu_shard_guard', - 'IpuCompiledProgram', - 'IpuStrategy', - 'Print', - 'py_func', - 'ExecutionStrategy', - 'name_scope', - 'ParallelExecutor', - 'program_guard', - 'WeightNormParamAttr', - 'ExponentialMovingAverage', - 'default_main_program', - 'default_startup_program', - 'Program', - 'data', - 'InputSpec', - 'save', - 'load', - 'save_inference_model', - 'load_inference_model', - 'serialize_program', - 'serialize_persistables', - 'save_to_file', - 'deserialize_program', - 'deserialize_persistables', - 'load_from_file', - 'normalize_program', - 'load_program_state', - 'set_program_state', - 'cpu_places', - 'cuda_places', - 'xpu_places', - 'npu_places', - 'mlu_places', - 'Variable', - 'create_global_var', - 'accuracy', - 'auc', - 'device_guard', - 'create_parameter' +__all__ = [ #noqa + 'append_backward', 'gradients', 'Executor', 'global_scope', 'scope_guard', + 'BuildStrategy', 'CompiledProgram', 'ipu_shard_guard', 'IpuCompiledProgram', + 'IpuStrategy', 'Print', 'py_func', 'ExecutionStrategy', 'name_scope', + 'ParallelExecutor', 'program_guard', 'WeightNormParamAttr', + 'ExponentialMovingAverage', 'default_main_program', + 'default_startup_program', 'Program', 'data', 'InputSpec', 'save', 'load', + 'save_inference_model', 'load_inference_model', 'serialize_program', + 'serialize_persistables', 'save_to_file', 'deserialize_program', + 'deserialize_persistables', 'load_from_file', 'normalize_program', + 'load_program_state', 'set_program_state', 'cpu_places', 'cuda_places', + 'xpu_places', 'npu_places', 'mlu_places', 'Variable', 'create_global_var', + 'accuracy', 'auc', 'device_guard', 'create_parameter' ] diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py index f58c06c9b51..4098ae5dbf3 100644 --- a/python/paddle/static/input.py +++ b/python/paddle/static/input.py @@ -251,8 +251,9 @@ class InputSpec(object): format(batch_size, len(batch_size))) batch_size = batch_size[1] elif not isinstance(batch_size, six.integer_types): - raise TypeError("type(batch_size) shall be `int`, but received {}.". - format(type(batch_size).__name__)) + raise TypeError( + "type(batch_size) shall be `int`, but received {}.".format( + type(batch_size).__name__)) new_shape = [batch_size] + list(self.shape) self.shape = tuple(new_shape) @@ -289,19 +290,20 @@ class InputSpec(object): """ if not isinstance(shape, (list, tuple)): raise TypeError( - "Type of `shape` in InputSpec should be one of (tuple, list), but received {}.". - format(type(shape).__name__)) + "Type of `shape` in InputSpec should be one of (tuple, list), but received {}." + .format(type(shape).__name__)) if len(shape) == 0: raise ValueError( - "`shape` in InputSpec should contain at least 1 element, but received {}.". - format(shape)) + "`shape` in InputSpec should contain at least 1 element, but received {}." + .format(shape)) for i, ele in enumerate(shape): if ele is not None: if not isinstance(ele, six.integer_types): raise ValueError( "shape[{}] should be an `int`, but received `{}`:{}.". - format(i, type(ele).__name__, ele)) + format(i, + type(ele).__name__, ele)) if ele is None or ele < -1: shape[i] = -1 diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index 05a3389fd15..19508351510 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -31,7 +31,8 @@ from paddle.fluid import ( Program, layers, unique_name, - program_guard, ) + program_guard, +) from paddle.fluid.io import prepend_feed_ops, append_fetch_ops from paddle.fluid.framework import static_only, Parameter from paddle.fluid.executor import Executor, global_scope @@ -39,8 +40,9 @@ from paddle.fluid.log_helper import get_logger __all__ = [] -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_logger = get_logger(__name__, + logging.INFO, + fmt='%(asctime)s-%(levelname)s: %(message)s') def _check_args(caller, args, supported_args=None, deprecated_args=None): @@ -49,12 +51,12 @@ def _check_args(caller, args, supported_args=None, deprecated_args=None): for arg in args: if arg in deprecated_args: raise ValueError( - "argument '{}' in function '{}' is deprecated, only {} are supported.". - format(arg, caller, supported_args)) + "argument '{}' in function '{}' is deprecated, only {} are supported." + .format(arg, caller, supported_args)) elif arg not in supported_args: raise ValueError( - "function '{}' doesn't support argument '{}',\n only {} are supported.". - format(caller, arg, supported_args)) + "function '{}' doesn't support argument '{}',\n only {} are supported." + .format(caller, arg, supported_args)) def _check_vars(name, var_list): @@ -102,20 +104,18 @@ def _get_valid_program(program=None): def _clone_var_in_block(block, var): assert isinstance(var, Variable) if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR: - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - persistable=True) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + persistable=True) else: - return block.create_var( - name=var.name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - persistable=True) + return block.create_var(name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + persistable=True) def normalize_program(program, feed_vars, fetch_vars): @@ -193,8 +193,9 @@ def normalize_program(program, feed_vars, fetch_vars): uniq_fetch_vars = [] for i, var in enumerate(fetch_vars): if var.dtype != paddle.bool: - var = layers.scale( - var, 1., name="save_infer_model/scale_{}".format(i)) + var = layers.scale(var, + 1., + name="save_infer_model/scale_{}".format(i)) uniq_fetch_vars.append(var) fetch_vars = uniq_fetch_vars @@ -394,15 +395,16 @@ def _serialize_persistables(program, executor): in_vars.append(save_var_map[name]) out_var_name = unique_name.generate("out_var") - out_var = save_block.create_var( - type=core.VarDesc.VarType.RAW, name=out_var_name) + out_var = save_block.create_var(type=core.VarDesc.VarType.RAW, + name=out_var_name) out_var.desc.set_persistable(True) - save_block.append_op( - type='save_combine', - inputs={'X': in_vars}, - outputs={'Y': out_var}, - attrs={'file_path': '', - 'save_to_memory': True}) + save_block.append_op(type='save_combine', + inputs={'X': in_vars}, + outputs={'Y': out_var}, + attrs={ + 'file_path': '', + 'save_to_memory': True + }) # run save_program to save vars # NOTE(zhiqiu): save op will add variable kLookupTablePath to save_program.desc, # which leads to diff between save_program and its desc. Call _sync_with_cpp @@ -645,8 +647,10 @@ def deserialize_persistables(program, data, executor): inputs={}, outputs={"Out": load_var_list}, # if load from memory, file_path is data - attrs={'file_path': data, - 'model_from_memory': True}) + attrs={ + 'file_path': data, + 'model_from_memory': True + }) executor.run(load_program) # check var shape for var in check_vars: diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index b589d9f8789..65ed35df364 100644 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -57,7 +57,7 @@ from ...fluid.layers.sequence_lod import sequence_scatter # noqa: F401 from ...fluid.layers.sequence_lod import sequence_enumerate # noqa: F401 from ...fluid.layers.sequence_lod import sequence_reverse # noqa: F401 -__all__ = [ #noqa +__all__ = [ #noqa 'fc', 'batch_norm', 'embedding', diff --git a/python/paddle/static/sparsity/__init__.py b/python/paddle/static/sparsity/__init__.py index b4543b8d000..11ff30c78e2 100644 --- a/python/paddle/static/sparsity/__init__.py +++ b/python/paddle/static/sparsity/__init__.py @@ -1,12 +1,12 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,14 +21,11 @@ from ...fluid.contrib import sparsity #noqa: F401 def set_excluded_layers(main_program, param_names): - sparsity.set_excluded_layers( - param_names=param_names, main_program=main_program) + sparsity.set_excluded_layers(param_names=param_names, + main_program=main_program) -__all__ = [ #noqa - 'calculate_density', - 'decorate', - 'prune_model', - 'set_excluded_layers', +__all__ = [ #noqa + 'calculate_density', 'decorate', 'prune_model', 'set_excluded_layers', 'reset_excluded_layers' ] diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 478f4b6351f..3ea3ba49825 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -277,228 +277,228 @@ from .array import create_array # noqa: F401 from .einsum import einsum # noqa: F401 #this list used in math_op_patch.py for _binary_creator_ -tensor_method_func = [ #noqa - 'matmul', - 'dot', - 'cov', - 'corrcoef', - 'norm', - 'cond', - 'transpose', - 'lstsq', - 'dist', - 't', - 'cross', - 'cholesky', - 'bmm', - 'histogram', - 'bincount', - 'mv', - 'matrix_power', - 'qr', - 'eigvals', - 'eigvalsh', - 'abs', - 'acos', - 'all', - 'any', - 'asin', - 'atan', - 'ceil', - 'ceil_', - 'cos', - 'cosh', - 'cumsum', - 'cumprod', - 'logit', - 'exp', - 'exp_', - 'floor', - 'floor_', - 'increment', - 'log', - 'log2', - 'log10', - 'logsumexp', - 'multiplex', - 'pow', - 'prod', - 'reciprocal', - 'reciprocal_', - 'round', - 'round_', - 'rsqrt', - 'rsqrt_', - 'scale', - 'scale_', - 'sign', - 'sin', - 'sinh', - 'sqrt', - 'sqrt_', - 'square', - 'stanh', - 'sum', - 'nansum', - 'nanmean', - 'tanh', - 'tanh_', - 'add_n', - 'max', - 'amax', - 'maximum', - 'min', - 'amin', - 'minimum', - 'fmax', - 'fmin', - 'mm', - 'inner', - 'outer', - 'divide', - 'floor_divide', - 'remainder', - 'mod', - 'floor_mod', - 'multiply', - 'add', - 'add_', - 'subtract', - 'subtract_', - 'atan', - 'logsumexp', - 'inverse', - 'log1p', - 'erf', - 'addmm', - 'clip', - 'clip_', - 'trace', - 'kron', - 'kthvalue', - 'isfinite', - 'isinf', - 'isnan', - 'broadcast_shape', - 'conj', - 'neg', - 'lgamma', - 'equal', - 'equal_all', - 'greater_equal', - 'greater_than', - 'is_empty', - 'less_equal', - 'less_than', - 'logical_and', - 'logical_not', - 'logical_or', - 'logical_xor', - 'not_equal', - 'allclose', - 'isclose', - 'is_tensor', - 'cast', - 'concat', - 'expand', - 'broadcast_to', - 'expand_as', - 'flatten', - 'flatten_', - 'gather', - 'gather_nd', - 'reshape', - 'reshape_', - 'reverse', - 'scatter', - 'scatter_', - 'scatter_nd_add', - 'scatter_nd', - 'shard_index', - 'slice', - 'split', - 'chunk', - 'tensordot', - 'squeeze', - 'squeeze_', - 'stack', - 'strided_slice', - 'transpose', - 'unique', - 'unique_consecutive', - 'unsqueeze', - 'unsqueeze_', - 'unstack', - 'flip', - 'rot90', - 'unbind', - 'roll', - 'tile', - 'argmax', - 'argmin', - 'argsort', - 'masked_select', - 'topk', - 'where', - 'index_select', - 'nonzero', - 'sort', - 'index_sample', - 'mean', - 'std', - 'var', - 'numel', - 'median', - 'nanmedian', - 'quantile', - 'nanquantile', - 'is_complex', - 'is_integer', - 'rank', - 'shape', - 'real', - 'imag', - 'is_floating_point', - 'digamma', - 'diagonal', - 'trunc', - 'frac', - 'bitwise_and', - 'bitwise_or', - 'bitwise_xor', - 'bitwise_not', - 'broadcast_tensors', - 'eig', - 'uniform_', - 'multi_dot', - 'solve', - 'cholesky_solve', - 'triangular_solve', - 'asinh', - 'atanh', - 'acosh', - 'lu', - 'lu_unpack', - 'as_complex', - 'as_real', - 'rad2deg', - 'deg2rad', - 'gcd', - 'lcm', - 'diff', - "mode", - 'lerp', - 'lerp_', - 'erfinv', - 'erfinv_', - 'angle', - 'moveaxis', - 'repeat_interleave', - 'take_along_axis', - 'put_along_axis', - 'put_along_axis_', - 'exponential_', - 'heaviside', +tensor_method_func = [ #noqa + 'matmul', + 'dot', + 'cov', + 'corrcoef', + 'norm', + 'cond', + 'transpose', + 'lstsq', + 'dist', + 't', + 'cross', + 'cholesky', + 'bmm', + 'histogram', + 'bincount', + 'mv', + 'matrix_power', + 'qr', + 'eigvals', + 'eigvalsh', + 'abs', + 'acos', + 'all', + 'any', + 'asin', + 'atan', + 'ceil', + 'ceil_', + 'cos', + 'cosh', + 'cumsum', + 'cumprod', + 'logit', + 'exp', + 'exp_', + 'floor', + 'floor_', + 'increment', + 'log', + 'log2', + 'log10', + 'logsumexp', + 'multiplex', + 'pow', + 'prod', + 'reciprocal', + 'reciprocal_', + 'round', + 'round_', + 'rsqrt', + 'rsqrt_', + 'scale', + 'scale_', + 'sign', + 'sin', + 'sinh', + 'sqrt', + 'sqrt_', + 'square', + 'stanh', + 'sum', + 'nansum', + 'nanmean', + 'tanh', + 'tanh_', + 'add_n', + 'max', + 'amax', + 'maximum', + 'min', + 'amin', + 'minimum', + 'fmax', + 'fmin', + 'mm', + 'inner', + 'outer', + 'divide', + 'floor_divide', + 'remainder', + 'mod', + 'floor_mod', + 'multiply', + 'add', + 'add_', + 'subtract', + 'subtract_', + 'atan', + 'logsumexp', + 'inverse', + 'log1p', + 'erf', + 'addmm', + 'clip', + 'clip_', + 'trace', + 'kron', + 'kthvalue', + 'isfinite', + 'isinf', + 'isnan', + 'broadcast_shape', + 'conj', + 'neg', + 'lgamma', + 'equal', + 'equal_all', + 'greater_equal', + 'greater_than', + 'is_empty', + 'less_equal', + 'less_than', + 'logical_and', + 'logical_not', + 'logical_or', + 'logical_xor', + 'not_equal', + 'allclose', + 'isclose', + 'is_tensor', + 'cast', + 'concat', + 'expand', + 'broadcast_to', + 'expand_as', + 'flatten', + 'flatten_', + 'gather', + 'gather_nd', + 'reshape', + 'reshape_', + 'reverse', + 'scatter', + 'scatter_', + 'scatter_nd_add', + 'scatter_nd', + 'shard_index', + 'slice', + 'split', + 'chunk', + 'tensordot', + 'squeeze', + 'squeeze_', + 'stack', + 'strided_slice', + 'transpose', + 'unique', + 'unique_consecutive', + 'unsqueeze', + 'unsqueeze_', + 'unstack', + 'flip', + 'rot90', + 'unbind', + 'roll', + 'tile', + 'argmax', + 'argmin', + 'argsort', + 'masked_select', + 'topk', + 'where', + 'index_select', + 'nonzero', + 'sort', + 'index_sample', + 'mean', + 'std', + 'var', + 'numel', + 'median', + 'nanmedian', + 'quantile', + 'nanquantile', + 'is_complex', + 'is_integer', + 'rank', + 'shape', + 'real', + 'imag', + 'is_floating_point', + 'digamma', + 'diagonal', + 'trunc', + 'frac', + 'bitwise_and', + 'bitwise_or', + 'bitwise_xor', + 'bitwise_not', + 'broadcast_tensors', + 'eig', + 'uniform_', + 'multi_dot', + 'solve', + 'cholesky_solve', + 'triangular_solve', + 'asinh', + 'atanh', + 'acosh', + 'lu', + 'lu_unpack', + 'as_complex', + 'as_real', + 'rad2deg', + 'deg2rad', + 'gcd', + 'lcm', + 'diff', + "mode", + 'lerp', + 'lerp_', + 'erfinv', + 'erfinv_', + 'angle', + 'moveaxis', + 'repeat_interleave', + 'take_along_axis', + 'put_along_axis', + 'put_along_axis_', + 'exponential_', + 'heaviside', ] #this list used in math_op_patch.py for magic_method bind diff --git a/python/paddle/tensor/array.py b/python/paddle/tensor/array.py index 856b79c2a68..02da6926a3f 100644 --- a/python/paddle/tensor/array.py +++ b/python/paddle/tensor/array.py @@ -62,8 +62,9 @@ def array_length(array): helper = LayerHelper('array_length', **locals()) tmp = helper.create_variable_for_type_inference(dtype='int64') tmp.stop_gradient = True - helper.append_op( - type='lod_array_length', inputs={'X': [array]}, outputs={'Out': [tmp]}) + helper.append_op(type='lod_array_length', + inputs={'X': [array]}, + outputs={'Out': [tmp]}) return tmp @@ -126,11 +127,12 @@ def array_read(array, i): Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError("array should be tensor array vairable") out = helper.create_variable_for_type_inference(dtype=array.dtype) - helper.append_op( - type='read_from_array', - inputs={'X': [array], - 'I': [i]}, - outputs={'Out': [out]}) + helper.append_op(type='read_from_array', + inputs={ + 'X': [array], + 'I': [i] + }, + outputs={'Out': [out]}) return out @@ -195,8 +197,8 @@ def array_write(x, i, array=None): helper = LayerHelper('array_write', **locals()) if array is not None: if not isinstance( - array, - Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: + array, Variable + ) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError( "array should be tensor array vairable in array_write Op") if array is None: @@ -204,11 +206,12 @@ def array_write(x, i, array=None): name="{0}.out".format(helper.name), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype=x.dtype) - helper.append_op( - type='write_to_array', - inputs={'X': [x], - 'I': [i]}, - outputs={'Out': [array]}) + helper.append_op(type='write_to_array', + inputs={ + 'X': [x], + 'I': [i] + }, + outputs={'Out': [array]}) return array @@ -245,16 +248,16 @@ def create_array(dtype, initialized_list=None): if initialized_list is not None: if not isinstance(initialized_list, (list, tuple)): raise TypeError( - "Require type(initialized_list) should be list/tuple, but received {}". - format(type(initialized_list))) + "Require type(initialized_list) should be list/tuple, but received {}" + .format(type(initialized_list))) array = list(initialized_list) # NOTE: Only support plain list like [x, y,...], not support nested list in static mode. for val in array: if not isinstance(val, Variable): raise TypeError( - "All values in `initialized_list` should be Variable, but recevied {}.". - format(type(val))) + "All values in `initialized_list` should be Variable, but recevied {}." + .format(type(val))) if _non_static_mode(): return array diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index ca8abdaf4b3..e3bd7bae7d4 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -129,11 +129,10 @@ def shape(input): ], 'shape') helper = LayerHelper('shape', **locals()) out = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type='shape', - inputs={'Input': input}, - outputs={'Out': out}, - stop_gradient=True) + helper.append_op(type='shape', + inputs={'Input': input}, + outputs={'Out': out}, + stop_gradient=True) return out @@ -168,8 +167,8 @@ def is_complex(x): raise TypeError("Expected Tensor, but received type of x: {}".format( type(x))) dtype = x.dtype - is_complex_dtype = (dtype == core.VarDesc.VarType.COMPLEX64 or - dtype == core.VarDesc.VarType.COMPLEX128) + is_complex_dtype = (dtype == core.VarDesc.VarType.COMPLEX64 + or dtype == core.VarDesc.VarType.COMPLEX128) return is_complex_dtype @@ -199,10 +198,10 @@ def is_floating_point(x): raise TypeError("Expected Tensor, but received type of x: {}".format( type(x))) dtype = x.dtype - is_fp_dtype = (dtype == core.VarDesc.VarType.FP32 or - dtype == core.VarDesc.VarType.FP64 or - dtype == core.VarDesc.VarType.FP16 or - dtype == core.VarDesc.VarType.BF16) + is_fp_dtype = (dtype == core.VarDesc.VarType.FP32 + or dtype == core.VarDesc.VarType.FP64 + or dtype == core.VarDesc.VarType.FP16 + or dtype == core.VarDesc.VarType.BF16) return is_fp_dtype @@ -236,11 +235,11 @@ def is_integer(x): raise TypeError("Expected Tensor, but received type of x: {}".format( type(x))) dtype = x.dtype - is_int_dtype = (dtype == core.VarDesc.VarType.UINT8 or - dtype == core.VarDesc.VarType.INT8 or - dtype == core.VarDesc.VarType.INT16 or - dtype == core.VarDesc.VarType.INT32 or - dtype == core.VarDesc.VarType.INT64) + is_int_dtype = (dtype == core.VarDesc.VarType.UINT8 + or dtype == core.VarDesc.VarType.INT8 + or dtype == core.VarDesc.VarType.INT16 + or dtype == core.VarDesc.VarType.INT32 + or dtype == core.VarDesc.VarType.INT64) return is_int_dtype diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 5a1aa5dcfce..67547212bb1 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -25,7 +25,7 @@ from ..framework import in_dygraph_mode, _non_static_mode from ..framework import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from ..framework import convert_np_dtype_to_dtype_, _varbase_creator, OpProtoHolder -# TODO: define functions to get create a tensor +# TODO: define functions to get create a tensor import paddle from paddle import _C_ops from ..fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check @@ -123,10 +123,10 @@ def linspace(start, stop, num, dtype=None, name=None): check_dtype(num.dtype, 'num', ['int32'], 'linspace') check_dtype(dtype, 'dtype', ['int32', 'int64', 'float32', 'float64'], 'linspace') - if ((stop_dtype == "float64" or start_dtype == "float64") and - out_dtype in ["float32", "int32"]) or ((stop_dtype == "int64" or - start_dtype == "int64") and - out_dtype == "int32"): + if ((stop_dtype == "float64" or start_dtype == "float64") + and out_dtype in ["float32", "int32"]) or ( + (stop_dtype == "int64" or start_dtype == "int64") + and out_dtype == "int32"): raise ValueError( "The dtype of start/stop is {}/{} but the attr(dtype) of linspace is {}, " "which may cause data type overflows. Please reset attr(dtype) of linspace." @@ -134,13 +134,14 @@ def linspace(start, stop, num, dtype=None, name=None): out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='linspace', - inputs={'Start': tensor_start, - 'Stop': tensor_stop, - 'Num': tensor_num}, - attrs={'dtype': dtype}, - outputs={'Out': [out]}) + helper.append_op(type='linspace', + inputs={ + 'Start': tensor_start, + 'Stop': tensor_stop, + 'Num': tensor_num + }, + attrs={'dtype': dtype}, + outputs={'Out': [out]}) if isinstance(num, int): out.desc.set_shape((num, )) return out @@ -255,16 +256,15 @@ def logspace(start, stop, num, base=10.0, dtype=None, name=None): out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='logspace', - inputs={ - 'Start': tensor_start, - 'Stop': tensor_stop, - 'Num': tensor_num, - 'Base': tensor_base - }, - attrs={'dtype': dtype}, - outputs={'Out': [out]}) + helper.append_op(type='logspace', + inputs={ + 'Start': tensor_start, + 'Stop': tensor_stop, + 'Num': tensor_num, + 'Base': tensor_base + }, + attrs={'dtype': dtype}, + outputs={'Out': [out]}) if isinstance(num, int): out.desc.set_shape((num, )) return out @@ -332,9 +332,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): place = _get_paddle_place(place) if place is None: place = _current_expected_place() - elif not isinstance(place, (core.Place, core.CPUPlace, core.CUDAPinnedPlace, - core.CUDAPlace, core.NPUPlace, core.XPUPlace, - core.MLUPlace, core.CustomPlace)): + elif not isinstance( + place, + (core.Place, core.CPUPlace, core.CUDAPinnedPlace, core.CUDAPlace, + core.NPUPlace, core.XPUPlace, core.MLUPlace, core.CustomPlace)): raise ValueError( "'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace, paddle.NPUPlace, paddle.XPUPlace, paddle.MLUPlace, paddle.CustomPlace" ) @@ -381,8 +382,8 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): return data else: raise TypeError( - "Can't constructs a 'paddle.Tensor' with data type {}, data type must be scalar|list|tuple|np.ndarray|paddle.Tensor". - format(type(data))) + "Can't constructs a 'paddle.Tensor' with data type {}, data type must be scalar|list|tuple|np.ndarray|paddle.Tensor" + .format(type(data))) if not dtype: if data.dtype in [ 'float16', 'float32', 'float64', 'complex64', 'complex128' @@ -402,20 +403,18 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): data = data.astype(convert_dtype(dtype)) if _in_eager_without_dygraph_check() and isinstance(data, np.ndarray): - return core.eager.Tensor( - value=data, - place=place, - persistable=False, - zero_copy=False, - name=None, - stop_gradient=stop_gradient) + return core.eager.Tensor(value=data, + place=place, + persistable=False, + zero_copy=False, + name=None, + stop_gradient=stop_gradient) else: - return paddle.Tensor( - value=data, - place=place, - persistable=False, - zero_copy=False, - stop_gradient=stop_gradient) + return paddle.Tensor(value=data, + place=place, + persistable=False, + zero_copy=False, + stop_gradient=stop_gradient) def full_like(x, fill_value, dtype=None, name=None): @@ -469,12 +468,13 @@ def full_like(x, fill_value, dtype=None, name=None): 'full_like/zeros_like/ones_like') out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='fill_any_like', - inputs={'X': [x]}, - attrs={'value': fill_value, - "dtype": dtype}, - outputs={'Out': [out]}) + helper.append_op(type='fill_any_like', + inputs={'X': [x]}, + attrs={ + 'value': fill_value, + "dtype": dtype + }, + outputs={'Out': [out]}) out.stop_gradient = True return out @@ -681,16 +681,15 @@ def eye(num_rows, num_columns=None, dtype=None, name=None): if not isinstance(num_rows, int) or num_rows < 0: raise TypeError("num_rows should be a non-negative int") out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='eye', - inputs={}, - outputs={'Out': [out]}, - attrs={ - 'num_rows': num_rows, - 'num_columns': num_columns, - 'dtype': dtype - }, - stop_gradient=True) + helper.append_op(type='eye', + inputs={}, + outputs={'Out': [out]}, + attrs={ + 'num_rows': num_rows, + 'num_columns': num_columns, + 'dtype': dtype + }, + stop_gradient=True) out.stop_gradient = True return out @@ -848,12 +847,13 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): 'range/arange') helper = LayerHelper('range', **locals()) out = helper.create_variable_for_type_inference(dtype, shape=out_shape) - helper.append_op( - type='range', - inputs={'Start': start, - 'End': end, - 'Step': step}, - outputs={'Out': out}) + helper.append_op(type='range', + inputs={ + 'Start': start, + 'End': end, + 'Step': step + }, + outputs={'Out': out}) out.stop_gradient = True if out_shape is not None: out.desc.set_shape(out_shape) @@ -879,8 +879,9 @@ def _tril_triu_op(helper): if name is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + out = helper.create_variable(name=name, + dtype=x.dtype, + persistable=False) helper.append_op( type="tril_triu", @@ -889,7 +890,8 @@ def _tril_triu_op(helper): "diagonal": diagonal, "lower": True if op_type == 'tril' else False, }, - outputs={"Out": out}, ) + outputs={"Out": out}, + ) return out @@ -1081,8 +1083,9 @@ def meshgrid(*args, **kwargs): helper.create_variable_for_type_inference(dtype=args[i].dtype) for i in range(num) ] - helper.append_op( - type='meshgrid', inputs={'X': list(args)}, outputs={'Out': out}) + helper.append_op(type='meshgrid', + inputs={'X': list(args)}, + outputs={'Out': out}) return out @@ -1186,28 +1189,33 @@ def diagflat(x, offset=0, name=None): out2 = helper.create_variable_for_type_inference(dtype=x.dtype) if len(x.shape) == 1: - helper.append_op( - type='diag_v2', - inputs={'X': x}, - outputs={'Out': out2}, - attrs={'offset': offset, - 'padding_value': padding_value}) + helper.append_op(type='diag_v2', + inputs={'X': x}, + outputs={'Out': out2}, + attrs={ + 'offset': offset, + 'padding_value': padding_value + }) else: - helper.append_op( - type='flatten_contiguous_range', - inputs={'X': x}, - outputs={'Out': out1, - 'XShape': out1_shape}, - attrs={'start_axis': 0, - 'stop_axis': -1}) + helper.append_op(type='flatten_contiguous_range', + inputs={'X': x}, + outputs={ + 'Out': out1, + 'XShape': out1_shape + }, + attrs={ + 'start_axis': 0, + 'stop_axis': -1 + }) out1.stop_gradient = True - helper.append_op( - type='diag_v2', - inputs={'X': out1}, - outputs={'Out': out2}, - attrs={'offset': offset, - 'padding_value': padding_value}) + helper.append_op(type='diag_v2', + inputs={'X': out1}, + outputs={'Out': out2}, + attrs={ + 'offset': offset, + 'padding_value': padding_value + }) out2.stop_gradient = True return out2 @@ -1293,19 +1301,20 @@ def diag(x, offset=0, padding_value=0, name=None): check_type(padding_value, 'padding_value', (int, float), 'diag_v2') if len(x.shape) != 1 and len(x.shape) != 2: raise ValueError( - "The dimension of input x must be either 1 or 2, but received {}". - format(len(x.shape))) + "The dimension of input x must be either 1 or 2, but received {}" + .format(len(x.shape))) helper = LayerHelper("diag_v2", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='diag_v2', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'offset': offset, - 'padding_value': padding_value}) + helper.append_op(type='diag_v2', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'offset': offset, + 'padding_value': padding_value + }) out.stop_gradient = True return out @@ -1385,17 +1394,18 @@ def empty(shape, dtype=None, name=None): check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'empty') attrs = {} - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='empty') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='empty') out = helper.create_variable_for_type_inference(dtype=dtype) attrs['dtype'] = convert_np_dtype_to_dtype_(dtype) - helper.append_op( - type='empty', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs, - stop_gradient=True) + helper.append_op(type='empty', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs, + stop_gradient=True) out.stop_gradient = True return out @@ -1452,15 +1462,16 @@ def empty_like(x, dtype=None, name=None): attrs = {} attrs['dtype'] = convert_np_dtype_to_dtype_(dtype) shape = paddle.shape(x) - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='empty_like') - - helper.append_op( - type='empty', - inputs=inputs, - outputs={'Out': [out]}, - attrs=attrs, - stop_gradient=True) + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='empty_like') + + helper.append_op(type='empty', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs, + stop_gradient=True) out.stop_gradient = True return out @@ -1497,8 +1508,8 @@ def assign(x, output=None): """ input = x helper = LayerHelper('assign', **locals()) - check_type(input, 'input', (Variable, np.ndarray, list, tuple, float, int, - bool), 'assign') + check_type(input, 'input', + (Variable, np.ndarray, list, tuple, float, int, bool), 'assign') is_inplace = True if output is not None else False if np.isscalar(input) and not isinstance(input, str): @@ -1528,9 +1539,9 @@ def assign(x, output=None): if output is None: output = helper.create_variable_for_type_inference( dtype=input.dtype) - helper.append_op( - type='assign', inputs={'X': [input]}, - outputs={'Out': [output]}) + helper.append_op(type='assign', + inputs={'X': [input]}, + outputs={'Out': [output]}) elif isinstance(input, np.ndarray): # Not support [var, var, ...] currently. if len(input.shape) > 0 and any(isinstance(x, Variable) for x in input): @@ -1570,18 +1581,16 @@ def assign(x, output=None): output = helper.create_variable_for_type_inference( dtype=input.dtype) if _non_static_mode(): - _C_ops.assign_value(output, 'shape', - list(input.shape), 'dtype', dtype, value_name, - values) + _C_ops.assign_value(output, 'shape', list(input.shape), 'dtype', + dtype, value_name, values) else: - helper.append_op( - type='assign_value', - outputs={'Out': [output]}, - attrs={ - 'dtype': dtype, - 'shape': list(input.shape), - value_name: values - }) + helper.append_op(type='assign_value', + outputs={'Out': [output]}, + attrs={ + 'dtype': dtype, + 'shape': list(input.shape), + value_name: values + }) if is_inplace and _in_legacy_dygraph(): output._bump_inplace_version() @@ -1619,7 +1628,7 @@ def clone(x, name=None): return x.clone() -#NOTE(zhiqiu): not public +#NOTE(zhiqiu): not public def _memcpy(input, place=None, output=None): """ @@ -1672,11 +1681,10 @@ def _memcpy(input, place=None, output=None): dst_place_type = 4 attrs = {'dst_place_type': dst_place_type} - helper.append_op( - type='memcpy', - inputs={'X': [input]}, - outputs={'Out': [output]}, - attrs=attrs) + helper.append_op(type='memcpy', + inputs={'X': [input]}, + outputs={'Out': [output]}, + attrs=attrs) return output @@ -1799,12 +1807,13 @@ def tril_indices(row, col, offset=0, dtype='int64'): out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='tril_indices', - inputs={}, - outputs={'out': [out]}, - attrs={'rows': row, - 'cols': col, - 'offset': offset, - 'dtype': dtype}) + helper.append_op(type='tril_indices', + inputs={}, + outputs={'out': [out]}, + attrs={ + 'rows': row, + 'cols': col, + 'offset': offset, + 'dtype': dtype + }) return out diff --git a/python/paddle/tensor/einsum.py b/python/paddle/tensor/einsum.py index 49cc426a00f..0cdced2cf9b 100644 --- a/python/paddle/tensor/einsum.py +++ b/python/paddle/tensor/einsum.py @@ -159,10 +159,11 @@ def build_view(in_labels, out_labels): # fill the broadcast dimension indices from right to left. if s: for ax, dim in zip( - range(start, end)[::-1], range(s.start(), s.end())[::-1]): + range(start, end)[::-1], + range(s.start(), s.end())[::-1]): inv_map[ax] = dim - # Now work on non-broadcast dimensions + # Now work on non-broadcast dimensions if r: it = itertools.chain(range(start), range(end, len(out_labels))) else: @@ -384,7 +385,7 @@ def plan_matmul(plan, g_view, op1, op2, g_supports, g_shape, I, J1, J2, K): step = matmul, [var1, var2], var2, False, True plan.add_step(step) - # In the rest cases we opt for ops other than matmul + # In the rest cases we opt for ops other than matmul else: # unsqueeze operands include J1...J2... dimensions if j2: @@ -410,14 +411,16 @@ def plan_matmul(plan, g_view, op1, op2, g_supports, g_shape, I, J1, J2, K): plan.add_step(step) step = squeeze, [var2], var2, [-1, -2] plan.add_step(step) - elif j1 + j2 == 0 and not-1 in np.concatenate( + elif j1 + j2 == 0 and not -1 in np.concatenate( (op1_vshape[K], op2_vshape[K])): assert all(op1_vshape[K] == op2_vshape[K]) - step = reshape, [var1], var1, list(op1_vshape[ - I]) + [1] + [np.prod(op1_vshape[K])] + step = reshape, [ + var1 + ], var1, list(op1_vshape[I]) + [1] + [np.prod(op1_vshape[K])] plan.add_step(step) - step = reshape, [var2], var2, list(op2_vshape[ - I]) + [1] + [np.prod(op2_vshape[K])] + step = reshape, [ + var2 + ], var2, list(op2_vshape[I]) + [1] + [np.prod(op2_vshape[K])] plan.add_step(step) step = matmul, [var1, var2], var2, False, True plan.add_step(step) @@ -461,8 +464,8 @@ def plan_summation(plan, g_view, op1, op2, g_supports, g_shape, g_count, I, K, J1, J2 = list(range(n_bcast)), [], [], [] - for ax, dim1, dim2 in zip( - range(n_bcast, ndim), op1_view[n_bcast:], op2_view[n_bcast:]): + for ax, dim1, dim2 in zip(range(n_bcast, ndim), op1_view[n_bcast:], + op2_view[n_bcast:]): if (dim1 != -1) != (dim2 != -1): if dim1 != -1: @@ -528,6 +531,7 @@ def plan_broadcast(plan, operands, nop_axes): class Plan: + def __init__(self): self.env = {} self.steps = [] @@ -613,8 +617,8 @@ def plan_einsum(operands, g_view, g_shape, g_supports, g_count, n_bcast): # We'd like to arrange the dimensions in the following way: # [I... J... K...] # [I... J... K...] - # where - # I... are aligned and not to be combined immediately + # where + # I... are aligned and not to be combined immediately # J... are not aligned and not to be combined immediately # K... are aligned and should be immediately combined # At this point the non-trivial broadcast dimensinos in K are already reduced @@ -693,8 +697,8 @@ def preprocess(equation, *operands): assert not ('...' in lhs and '...' not in rhs ), f'Invalid equation: missing ellipsis in output labels.' - assert not (len(list(filter(has_duplicated_labels, lhs.split(',')))) > 0 - ), f'Duplicate labels are not supported.' + assert not (len(list(filter(has_duplicated_labels, lhs.split(',')))) > + 0), f'Duplicate labels are not supported.' assert not has_duplicated_labels( rhs), f'Invalid equation: duplicate output labels are found.' @@ -730,6 +734,7 @@ def parse_fake_shape(equation, operands, labels): def rhs_inference(lhs): + def is_free(key): return cnt.get(key) == 1 and key not in ['.', ','] @@ -804,7 +809,7 @@ def gen_einsum_op(equation, *operands): # dygraph return _C_ops.einsum(operands, len(operands), 'equation', equation)[0] - # static graph + # static graph for inp in operands: check_variable_and_dtype(inp, 'dtype', ['float32', 'float64'], 'einsum') check_type(equation, 'equation', str, 'einsum') @@ -816,12 +821,13 @@ def gen_einsum_op(equation, *operands): helper.create_variable_for_type_inference(dtype=operands[0].dtype) for i in range(len(operands)) ] - helper.append_op( - type='einsum', - inputs={'Operands': operands}, - outputs={'Out': out, - "InnerCache": caches}, - attrs=attrs) + helper.append_op(type='einsum', + inputs={'Operands': operands}, + outputs={ + 'Out': out, + "InnerCache": caches + }, + attrs=attrs) return out @@ -1008,12 +1014,12 @@ def einsum(equation, *operands): n_bcast_dims = max(map(lambda s: s.count('.'), nop_labels)) # Build the data structures for planning. It's helpful to think of all the operands - # broadcasting together from a global view. In this view, dimensions from multiple + # broadcasting together from a global view. In this view, dimensions from multiple # operands are mapped to the same position if they are labeled uniquely. Broadcasting # dimensions are mapped to adjacent positions with the right bound fixed. Subject to - # each operand, the map is injective but for all operands the map is on-to. + # each operand, the map is injective but for all operands the map is on-to. # g_labels: - # The labels of the global view + # The labels of the global view # g_view: # Includes a list of maps from each operand's dimensions to the global view's dimensions # which we refer to as ax or axes in the code to distinguish from operand's dims @@ -1027,8 +1033,8 @@ def einsum(equation, *operands): # g_count # Counting how many non-trivial dimensions remain for each ax - g_labels, g_view, g_nout, g_count = build_global_view(nop_labels, rhs, - n_bcast_dims) + g_labels, g_view, g_nout, g_count = build_global_view( + nop_labels, rhs, n_bcast_dims) g_shape, g_supports = build_global_shape(g_view, g_labels, [op.shape for op in operands]) diff --git a/python/paddle/tensor/layer_function_generator.py b/python/paddle/tensor/layer_function_generator.py index 72e5eb64012..c6e8df67dec 100644 --- a/python/paddle/tensor/layer_function_generator.py +++ b/python/paddle/tensor/layer_function_generator.py @@ -185,8 +185,8 @@ def generate_layer_fn(op_type): for each in val: if not isinstance(each, Variable): - raise ValueError("input of {0} must be variable".format( - op_type)) + raise ValueError( + "input of {0} must be variable".format(op_type)) if dtype is None: dtype = each.dtype @@ -225,8 +225,8 @@ def generate_layer_fn(op_type): outputs = dict() out = kwargs.pop(_convert_(o_name), []) if out: - out_var = out[0] if (isinstance(out, list) or - isinstance(out, tuple)) else out + out_var = out[0] if (isinstance(out, list) + or isinstance(out, tuple)) else out else: out_var = helper.create_variable_for_type_inference(dtype=dtype) outputs[o_name] = [out_var] @@ -234,8 +234,10 @@ def generate_layer_fn(op_type): outputs[name] = [ helper.create_variable_for_type_inference(dtype=dtype) ] - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=kwargs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=kwargs) return helper.append_activation(out_var) func.__name__ = op_type @@ -307,8 +309,8 @@ def generate_inplace_fn(inplace_op_type): op = getattr(_C_ops, inplace_op_type) return op(x) warnings.warn( - "In static mode, {}() is the same as {}() and does not perform inplace operation.". - format(inplace_op_type, origin_op_type)) + "In static mode, {}() is the same as {}() and does not perform inplace operation." + .format(inplace_op_type, origin_op_type)) return generate_activation_fn(origin_op_type)(x, name) func.__name__ = inplace_op_type diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 9ba7ef532f2..0089ef21dc9 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -115,12 +115,13 @@ def transpose(x, perm, name=None): helper = LayerHelper('transpose', **locals()) out = helper.create_variable_for_type_inference(x.dtype) x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='transpose2', - inputs={'X': [x]}, - outputs={'Out': [out], - 'XShape': [x_shape]}, - attrs={'axis': perm}) + helper.append_op(type='transpose2', + inputs={'X': [x]}, + outputs={ + 'Out': [out], + 'XShape': [x_shape] + }, + attrs={'axis': perm}) return out @@ -243,12 +244,13 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): helper = LayerHelper('matmul_v2', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='matmul_v2', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='matmul_v2', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs=attrs) return out @@ -339,8 +341,8 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): if in_dygraph_mode(): if dim is None: - return _C_ops.final_state_frobenius_norm(input, [], keepdim, - True) + return _C_ops.final_state_frobenius_norm( + input, [], keepdim, True) return _C_ops.final_state_frobenius_norm(input, dim, keepdim, False) if _in_legacy_dygraph(): if dim is None: @@ -358,11 +360,10 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): out = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) - helper.append_op( - type='frobenius_norm', - inputs={'X': input}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='frobenius_norm', + inputs={'X': input}, + outputs={'Out': out}, + attrs=attrs) return out def vector_norm(input, @@ -407,11 +408,10 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): out = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) - helper.append_op( - type='p_norm', - inputs={'X': input}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='p_norm', + inputs={'X': input}, + outputs={'Out': out}, + attrs=attrs) return out def inf_norm(input, @@ -432,13 +432,14 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): reduce_type = 'reduce_max' if porder == np.float( 'inf') else 'reduce_min' - helper.append_op( - type=reduce_type, - inputs={'X': out}, - outputs={'Out': reduce_out}, - attrs={'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all}) + helper.append_op(type=reduce_type, + inputs={'X': out}, + outputs={'Out': reduce_out}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) return reduce_out @@ -452,33 +453,31 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): dtype=block.input_dtype()) abs_out = block.create_variable_for_type_inference( dtype=block.input_dtype()) - block.append_op( - type='abs', inputs={'X': input}, outputs={'Out': abs_out}) + block.append_op(type='abs', + inputs={'X': input}, + outputs={'Out': abs_out}) pow_out = block.create_variable_for_type_inference( dtype=block.input_dtype()) - block.append_op( - type='pow', - inputs={'X': abs_out}, - outputs={'Out': pow_out}, - attrs={'factor': porder}) + block.append_op(type='pow', + inputs={'X': abs_out}, + outputs={'Out': pow_out}, + attrs={'factor': porder}) sum_out = block.create_variable_for_type_inference( dtype=block.input_dtype()) - block.append_op( - type='reduce_sum', - inputs={'X': pow_out}, - outputs={'Out': sum_out}, - attrs={ - 'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': True if axis is None else False - }) + block.append_op(type='reduce_sum', + inputs={'X': pow_out}, + outputs={'Out': sum_out}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': True if axis is None else False + }) porder - block.append_op( - type='pow', - inputs={'X': sum_out}, - outputs={'Out': out}, - attrs={'factor': float(1. / porder)}) + block.append_op(type='pow', + inputs={'X': sum_out}, + outputs={'Out': out}, + attrs={'factor': float(1. / porder)}) return out if axis is None and p is not None: @@ -489,16 +488,16 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): raise ValueError( "only valid string values are 'fro', found {}".format(p)) elif isinstance(p, (int, float)): - return vector_norm( - x, - porder=p, - axis=axis, - keepdim=keepdim, - asvector=True, - name=name) + return vector_norm(x, + porder=p, + axis=axis, + keepdim=keepdim, + asvector=True, + name=name) else: - raise ValueError("only valid p type is string or float, found {}". - format(type(p))) + raise ValueError( + "only valid p type is string or float, found {}".format( + type(p))) if isinstance(axis, tuple): axis = list(axis) @@ -509,25 +508,23 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): if isinstance(axis, int): if isinstance(p, str): if p == "fro": - return vector_norm( - x, - porder=2, - axis=axis, - keepdim=keepdim, - asvector=False, - name=name) + return vector_norm(x, + porder=2, + axis=axis, + keepdim=keepdim, + asvector=False, + name=name) else: raise ValueError( "only valid string values are 'fro', found {}".format(p)) elif isinstance(p, (int, float)): - return vector_norm( - x, - axis=axis, - porder=p, - keepdim=keepdim, - asvector=False, - name=name) + return vector_norm(x, + axis=axis, + porder=p, + keepdim=keepdim, + asvector=False, + name=name) else: raise ValueError( "unspport p for p-order vector norm. except float, found {}". @@ -540,11 +537,14 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): return inf_norm(x, porder=p, axis=axis, keepdim=keepdim, name=name) elif p == 0: raise ValueError( - "just suport axis type int or list (length of list <=1) if p = 0, found {}". - format(axis)) + "just suport axis type int or list (length of list <=1) if p = 0, found {}" + .format(axis)) else: - return p_matrix_norm( - x, porder=p, axis=axis, keepdim=keepdim, name=name) + return p_matrix_norm(x, + porder=p, + axis=axis, + keepdim=keepdim, + name=name) else: raise ValueError( "except axis type int or list (length of list <=2), found {}". @@ -646,8 +646,10 @@ def dist(x, y, p=2, name=None): inputs = {"X": [x], "Y": [y]} outputs = {'Out': [out]} attrs = {"p": float(p)} - helper.append_op( - type='dist', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='dist', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -769,35 +771,35 @@ def cond(x, p=None, name=None): dtype=block.input_dtype()) out = block.create_variable_for_type_inference( dtype=block.input_dtype()) - block.append_op( - type='abs', inputs={'X': input}, outputs={'Out': abs_out}) - block.append_op( - type='reduce_sum', - inputs={'X': abs_out}, - outputs={'Out': sum_out}, - attrs={'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all}) + block.append_op(type='abs', + inputs={'X': input}, + outputs={'Out': abs_out}) + block.append_op(type='reduce_sum', + inputs={'X': abs_out}, + outputs={'Out': sum_out}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) if porder == 1 or porder == np.inf: - block.append_op( - type='reduce_max', - inputs={'X': sum_out}, - outputs={'Out': out}, - attrs={ - 'dim': [-1], - 'keep_dim': keepdim, - 'reduce_all': reduce_all - }) + block.append_op(type='reduce_max', + inputs={'X': sum_out}, + outputs={'Out': out}, + attrs={ + 'dim': [-1], + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) if porder == -1 or porder == -np.inf: - block.append_op( - type='reduce_min', - inputs={'X': sum_out}, - outputs={'Out': out}, - attrs={ - 'dim': [-1], - 'keep_dim': keepdim, - 'reduce_all': reduce_all - }) + block.append_op(type='reduce_min', + inputs={'X': sum_out}, + outputs={'Out': out}, + attrs={ + 'dim': [-1], + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) return out def fro_norm(input, porder=2, axis=[-1]): @@ -830,30 +832,30 @@ def cond(x, p=None, name=None): dtype=block.input_dtype()) out = block.create_variable_for_type_inference( dtype=block.input_dtype()) - block.append_op( - type='pow', - inputs={'X': input}, - outputs={'Out': pow_out}, - attrs={'factor': porder}) - block.append_op( - type='reduce_sum', - inputs={'X': pow_out}, - outputs={'Out': sum_out_1}, - attrs={'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all}) - block.append_op( - type='reduce_sum', - inputs={'X': sum_out_1}, - outputs={'Out': sum_out_2}, - attrs={'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all}) - block.append_op( - type='pow', - inputs={'X': sum_out_2}, - outputs={'Out': out}, - attrs={'factor': float(1. / porder)}) + block.append_op(type='pow', + inputs={'X': input}, + outputs={'Out': pow_out}, + attrs={'factor': porder}) + block.append_op(type='reduce_sum', + inputs={'X': pow_out}, + outputs={'Out': sum_out_1}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) + block.append_op(type='reduce_sum', + inputs={'X': sum_out_1}, + outputs={'Out': sum_out_2}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) + block.append_op(type='pow', + inputs={'X': sum_out_2}, + outputs={'Out': out}, + attrs={'factor': float(1. / porder)}) return out def svd_norm(input, porder, axis=[-1]): @@ -889,51 +891,58 @@ def cond(x, p=None, name=None): out = block.create_variable_for_type_inference( dtype=block.input_dtype()) if porder == "nuc": - block.append_op( - type='reduce_sum', - inputs={'X': s}, - outputs={'Out': out}, - attrs={ - 'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all - }) + block.append_op(type='reduce_sum', + inputs={'X': s}, + outputs={'Out': out}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) return out max_out = block.create_variable_for_type_inference( dtype=block.input_dtype()) min_out = block.create_variable_for_type_inference( dtype=block.input_dtype()) - block.append_op( - type='reduce_max', - inputs={'X': s}, - outputs={'Out': max_out}, - attrs={'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all}) - block.append_op( - type='reduce_min', - inputs={'X': s}, - outputs={'Out': min_out}, - attrs={'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': reduce_all}) + block.append_op(type='reduce_max', + inputs={'X': s}, + outputs={'Out': max_out}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) + block.append_op(type='reduce_min', + inputs={'X': s}, + outputs={'Out': min_out}, + attrs={ + 'dim': axis, + 'keep_dim': keepdim, + 'reduce_all': reduce_all + }) if porder == 2: - block.append_op( - type='elementwise_div', - inputs={'X': max_out, - 'Y': min_out}, - outputs={'Out': out}, - attrs={'aixs': axis, - 'use_mkldnn': False}) + block.append_op(type='elementwise_div', + inputs={ + 'X': max_out, + 'Y': min_out + }, + outputs={'Out': out}, + attrs={ + 'aixs': axis, + 'use_mkldnn': False + }) return out if porder == -2: - block.append_op( - type='elementwise_div', - inputs={'X': min_out, - 'Y': max_out}, - outputs={'Out': out}, - attrs={'aixs': axis, - 'use_mkldnn': False}) + block.append_op(type='elementwise_div', + inputs={ + 'X': min_out, + 'Y': max_out + }, + outputs={'Out': out}, + attrs={ + 'aixs': axis, + 'use_mkldnn': False + }) return out def empty_tensor(input, shape): @@ -943,9 +952,9 @@ def cond(x, p=None, name=None): x_shape = list(x.shape) if not len(x_shape) >= 2: - raise ValueError("input should be a matrix or batches of matrices, " + - "but the dimention of received input is {}".format( - len(x_shape))) + raise ValueError( + "input should be a matrix or batches of matrices, " + + "but the dimention of received input is {}".format(len(x_shape))) if p == None: p = 2 x_size = 0 if (0 in x_shape) else 1 @@ -959,13 +968,11 @@ def cond(x, p=None, name=None): if p == "nuc": return svd_norm(x, p) * svd_norm(x_inv, p) if p in (1, -1): - return mat_norm( - x, porder=p, axis=[-2]) * mat_norm( - x_inv, porder=p, axis=[-2]) + return mat_norm(x, porder=p, axis=[-2]) * mat_norm( + x_inv, porder=p, axis=[-2]) if p in (np.inf, -np.inf): - return mat_norm( - x, porder=p, axis=[-1]) * mat_norm( - x_inv, porder=p, axis=[-1]) + return mat_norm(x, porder=p, axis=[-1]) * mat_norm( + x_inv, porder=p, axis=[-1]) else: raise ValueError("only support p is {} when input is a ".format(p) + "square matrix or batches of square matrices") @@ -975,8 +982,8 @@ def cond(x, p=None, name=None): return svd_norm(x, porder=p) else: raise ValueError( - "unsupported {} for p, only supporting ('fro', 'nuc', ".format( - p) + "1, -1, 2, -2, inf, -inf) or none") + "unsupported {} for p, only supporting ('fro', 'nuc', ".format(p) + + "1, -1, 2, -2, inf, -inf) or none") def dot(x, y, name=None): @@ -1028,11 +1035,16 @@ def dot(x, y, name=None): if name is None: out = helper.create_variable_for_type_inference(dtype=x.dtype) else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) - helper.append_op( - type="dot", inputs={'X': x, - 'Y': y}, attrs={}, outputs={"Out": out}) + out = helper.create_variable(name=name, + dtype=x.dtype, + persistable=False) + helper.append_op(type="dot", + inputs={ + 'X': x, + 'Y': y + }, + attrs={}, + outputs={"Out": out}) return out @@ -1210,8 +1222,8 @@ def t(input, name=None): return out check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64', 'int32', - 'int64'], 'transpose') + input, 'input', ['float16', 'float32', 'float64', 'int32', 'int64'], + 'transpose') helper = LayerHelper('t', **locals()) out = helper.create_variable_for_type_inference(input.dtype) @@ -1219,12 +1231,13 @@ def t(input, name=None): if len(input.shape) == 1: out = input else: - helper.append_op( - type='transpose2', - inputs={'X': [input]}, - outputs={'Out': [out], - 'XShape': [input_shape]}, - attrs={'axis': [1, 0]}) + helper.append_op(type='transpose2', + inputs={'X': [input]}, + outputs={ + 'Out': [out], + 'XShape': [input_shape] + }, + attrs={'axis': [1, 0]}) return out @@ -1281,12 +1294,13 @@ def cross(x, y, axis=9, name=None): attrs = dict() attrs['dim'] = axis - helper.append_op( - type='cross', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='cross', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs=attrs) return out @@ -1339,11 +1353,10 @@ def cholesky(x, upper=False, name=None): check_type(upper, 'upper', bool, 'cholesky') helper = LayerHelper('cholesky', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='cholesky', - inputs={'X': [x]}, - outputs={'Out': out}, - attrs={'upper': upper}) + helper.append_op(type='cholesky', + inputs={'X': [x]}, + outputs={'Out': out}, + attrs={'upper': upper}) return out @@ -1393,8 +1406,9 @@ def matrix_rank(x, tol=None, hermitian=False, name=None): else: tol_tensor = tol use_default_tol = False - return _C_ops.final_state_matrix_rank_tol( - x, tol_tensor, use_default_tol, hermitian) + return _C_ops.final_state_matrix_rank_tol(x, tol_tensor, + use_default_tol, + hermitian) if tol is None: tol_attr = 0.0 @@ -1445,8 +1459,10 @@ def matrix_rank(x, tol=None, hermitian=False, name=None): helper = LayerHelper('matrix_rank', **locals()) out = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type='matrix_rank', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='matrix_rank', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -1493,16 +1509,16 @@ def bmm(x, y, name=None): y_shape = y.shape if not len(x_shape) == len(y_shape) == 3: raise ValueError( - "x and y should be 3-dimensional. But received x's dimention: {}, y's dimention: {}". - format(x_shape, y_shape)) + "x and y should be 3-dimensional. But received x's dimention: {}, y's dimention: {}" + .format(x_shape, y_shape)) if x_shape[2] != y_shape[1]: raise ValueError( - "x's width must be equal with y's height. But received x's shape: {}, y's shape: {}". - format(x_shape, y_shape)) + "x's width must be equal with y's height. But received x's shape: {}, y's shape: {}" + .format(x_shape, y_shape)) if x_shape[0] != y_shape[0]: raise ValueError( - "x's batch (shape[0]) must be equal with y's batch (shape[0]). But received x's shape: {}, y's shape: {}". - format(x_shape, y_shape)) + "x's batch (shape[0]) must be equal with y's batch (shape[0]). But received x's shape: {}, y's shape: {}" + .format(x_shape, y_shape)) if paddle.in_dynamic_mode(): return _C_ops.bmm(x, y) @@ -1545,16 +1561,18 @@ def histogram(input, bins=100, min=0, max=0, name=None): return _C_ops.histogram(input, "bins", bins, "min", min, "max", max) helper = LayerHelper('histogram', **locals()) - check_variable_and_dtype( - input, 'X', ['int32', 'int64', 'float32', 'float64'], 'histogram') + check_variable_and_dtype(input, 'X', + ['int32', 'int64', 'float32', 'float64'], + 'histogram') out = helper.create_variable_for_type_inference(VarDesc.VarType.INT64) - helper.append_op( - type='histogram', - inputs={'X': input}, - outputs={'Out': out}, - attrs={'bins': bins, - 'min': min, - 'max': max}) + helper.append_op(type='histogram', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'bins': bins, + 'min': min, + 'max': max + }) return out @@ -1602,12 +1620,13 @@ def bincount(x, weights=None, minlength=0, name=None): out = helper.create_variable_for_type_inference(dtype=weights.dtype) else: out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='bincount', - inputs={'X': x, - 'Weights': weights}, - outputs={'Out': out}, - attrs={'minlength': minlength}) + helper.append_op(type='bincount', + inputs={ + 'X': x, + 'Weights': weights + }, + outputs={'Out': out}, + attrs={'minlength': minlength}) return out @@ -1658,20 +1677,23 @@ def mv(x, vec, name=None): vec_shape = list(vec.shape) if len(x_shape) != 2: raise ValueError( - "x should be 2-dimensional. But received x's dimention: {}". - format(x_shape)) + "x should be 2-dimensional. But received x's dimention: {}" + .format(x_shape)) if len(vec_shape) != 1: raise ValueError( - "vec should be 1-dimensional. But received vec's dimention: {}". - format(vec_shape)) + "vec should be 1-dimensional. But received vec's dimention: {}" + .format(vec_shape)) __check_input(x, vec) helper = LayerHelper('mv', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='mv', inputs={'X': x, - 'Vec': vec}, outputs={'Out': out}) + helper.append_op(type='mv', + inputs={ + 'X': x, + 'Vec': vec + }, + outputs={'Out': out}) return out @@ -1721,8 +1743,9 @@ def det(x, name=None): helper = LayerHelper('determinant', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='determinant', inputs={'Input': [x]}, outputs={'Out': [out]}) + helper.append_op(type='determinant', + inputs={'Input': [x]}, + outputs={'Out': [out]}) return out @@ -1776,8 +1799,9 @@ def slogdet(x, name=None): helper = LayerHelper('slogdeterminant', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='slogdeterminant', inputs={'Input': [x]}, outputs={'Out': [out]}) + helper.append_op(type='slogdeterminant', + inputs={'Input': [x]}, + outputs={'Out': [out]}) return out @@ -1843,10 +1867,13 @@ def svd(x, full_matrices=False, name=None): helper.append_op( type='svd', inputs={'X': [x]}, - outputs={'U': u, - 'VH': vh, - 'S': s}, - attrs=attrs, ) + outputs={ + 'U': u, + 'VH': vh, + 'S': s + }, + attrs=attrs, + ) return u, s, vh @@ -1915,11 +1942,10 @@ def matrix_power(x, n, name=None): check_type(n, 'n', int, 'matrix_power') helper = LayerHelper('matrix_power', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='matrix_power', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'n': n}) + helper.append_op(type='matrix_power', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'n': n}) return out @@ -1978,9 +2004,13 @@ def qr(x, mode="reduced", name=None): r = helper.create_variable_for_type_inference(dtype=x.dtype) attrs = dict() attrs['mode'] = mode - helper.append_op( - type='qr', inputs={'X': [x]}, outputs={'Q': q, - 'R': r}, attrs=attrs) + helper.append_op(type='qr', + inputs={'X': [x]}, + outputs={ + 'Q': q, + 'R': r + }, + attrs=attrs) if mode == "r": return r else: @@ -2077,13 +2107,14 @@ def lu(x, pivot=True, get_infos=False, name=None): info = helper.create_variable_for_type_inference(dtype='int') attrs = dict() attrs['pivots'] = pivot - helper.append_op( - type='lu', - inputs={'X': x}, - outputs={'Out': lu, - 'Pivots': p, - 'Infos': info}, - attrs=attrs) + helper.append_op(type='lu', + inputs={'X': x}, + outputs={ + 'Out': lu, + 'Pivots': p, + 'Infos': info + }, + attrs=attrs) if get_infos: return lu, p, info else: @@ -2175,14 +2206,17 @@ def lu_unpack(x, y, unpack_ludata=True, unpack_pivots=True, name=None): attrs = dict() attrs['unpack_ludata'] = unpack_ludata attrs['unpack_pivots'] = unpack_pivots - helper.append_op( - type='lu_unpack', - inputs={'X': x, - 'Pivots': y}, - outputs={'Pmat': p, - 'L': l, - 'U': u}, - attrs=attrs) + helper.append_op(type='lu_unpack', + inputs={ + 'X': x, + 'Pivots': y + }, + outputs={ + 'Pmat': p, + 'L': l, + 'U': u + }, + attrs=attrs) return p, l, u @@ -2238,8 +2272,9 @@ def eig(x, name=None): w, v = _C_ops.eig(x) return w, v - check_variable_and_dtype( - x, 'X', ['float32', 'float64', 'complex64', 'complex128'], 'eig') + check_variable_and_dtype(x, 'X', + ['float32', 'float64', 'complex64', 'complex128'], + 'eig') helper = LayerHelper('eig', **locals()) w = helper.create_variable_for_type_inference(x.dtype) @@ -2289,19 +2324,19 @@ def eigvals(x, name=None): """ check_variable_and_dtype(x, 'dtype', - ['float32', 'float64', 'complex64', - 'complex128'], 'eigvals') + ['float32', 'float64', 'complex64', 'complex128'], + 'eigvals') x_shape = list(x.shape) if len(x_shape) < 2: raise ValueError( - "The dimension of Input(x) should be at least 2, but received x's dimention = {}, x's shape = {}". - format(len(x_shape), x_shape)) + "The dimension of Input(x) should be at least 2, but received x's dimention = {}, x's shape = {}" + .format(len(x_shape), x_shape)) if x_shape[-1] != x_shape[-2]: raise ValueError( - "The last two dimensions of Input(x) should be equal, but received x's shape = {}". - format(x_shape)) + "The last two dimensions of Input(x) should be equal, but received x's shape = {}" + .format(x_shape)) if paddle.in_dynamic_mode(): return _C_ops.eigvals(x) @@ -2442,8 +2477,8 @@ def eigh(x, UPLO='L', name=None): "length of Input(input) is %s." % len(x.shape)) if x_shape[-1] != x_shape[-2]: raise ValueError( - "The input matrix must be batches of square matrices. But received x's dimention: {}". - format(x_shape)) + "The input matrix must be batches of square matrices. But received x's dimention: {}" + .format(x_shape)) if UPLO != 'L' and UPLO != 'U': raise ValueError( "UPLO must be L or U. But received UPLO is: {}".format(UPLO)) @@ -2451,18 +2486,20 @@ def eigh(x, UPLO='L', name=None): __check_input(x, UPLO) helper = LayerHelper('eigh', **locals()) - check_variable_and_dtype( - x, 'dtype', ['float32', 'float64', 'complex64', 'complex128'], 'eigh') + check_variable_and_dtype(x, 'dtype', + ['float32', 'float64', 'complex64', 'complex128'], + 'eigh') out_value = helper.create_variable_for_type_inference(dtype=x.dtype) out_vector = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='eigh', - inputs={'X': x}, - outputs={'Eigenvalues': out_value, - 'Eigenvectors': out_vector}, - attrs={'UPLO': UPLO}) + helper.append_op(type='eigh', + inputs={'X': x}, + outputs={ + 'Eigenvalues': out_value, + 'Eigenvectors': out_vector + }, + attrs={'UPLO': UPLO}) return out_value, out_vector @@ -2599,19 +2636,23 @@ def pinv(x, rcond=1e-15, hermitian=False, name=None): helper.append_op( type='svd', inputs={'X': [x]}, - outputs={'U': u, - 'VH': vt, - 'S': s}, - attrs={'full_matrices': False}, ) + outputs={ + 'U': u, + 'VH': vt, + 'S': s + }, + attrs={'full_matrices': False}, + ) max_singular_val = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='reduce_max', - inputs={'X': s}, - outputs={'Out': max_singular_val}, - attrs={'dim': [-1], - 'keep_dim': True, - 'reduce_all': False}) + helper.append_op(type='reduce_max', + inputs={'X': s}, + outputs={'Out': max_singular_val}, + attrs={ + 'dim': [-1], + 'keep_dim': True, + 'reduce_all': False + }) rcond = full(shape=[1], fill_value=rcond, dtype=dtype) cutoff = rcond * max_singular_val @@ -2627,49 +2668,59 @@ def pinv(x, rcond=1e-15, hermitian=False, name=None): st = helper.create_variable_for_type_inference(dtype=dtype) st_shape = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='unsqueeze2', - inputs={'X': singular}, - attrs={'axes': [-2]}, - outputs={'Out': st, - 'XShape': st_shape}) + helper.append_op(type='unsqueeze2', + inputs={'X': singular}, + attrs={'axes': [-2]}, + outputs={ + 'Out': st, + 'XShape': st_shape + }) dims = list(range(len(vt.shape))) perm = dims[:-2] + [dims[-1]] + [dims[-2]] v = helper.create_variable_for_type_inference(dtype) v_shape = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='transpose2', - inputs={'X': [vt]}, - outputs={'Out': [v], - 'XShape': [v_shape]}, - attrs={'axis': perm}) + helper.append_op(type='transpose2', + inputs={'X': [vt]}, + outputs={ + 'Out': [v], + 'XShape': [v_shape] + }, + attrs={'axis': perm}) out_1 = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='elementwise_mul', - inputs={'X': v, - 'Y': st}, - outputs={'Out': out_1}, - attrs={'axis': -1, - 'use_mkldnn': False}) + helper.append_op(type='elementwise_mul', + inputs={ + 'X': v, + 'Y': st + }, + outputs={'Out': out_1}, + attrs={ + 'axis': -1, + 'use_mkldnn': False + }) out_1 = helper.append_activation(out_1) out_2 = helper.create_variable_for_type_inference(dtype) helper.append_op( type='matmul_v2', - inputs={'X': out_1, - 'Y': u}, + inputs={ + 'X': out_1, + 'Y': u + }, outputs={'Out': out_2}, - attrs={'trans_x': False, - 'trans_y': True}, ) + attrs={ + 'trans_x': False, + 'trans_y': True + }, + ) return out_2 else: helper = LayerHelper('pinv', **locals()) dtype = x.dtype check_variable_and_dtype( - x, 'dtype', ['float32', 'float64', 'complex64', - 'complex128'], 'pinv') + x, 'dtype', ['float32', 'float64', 'complex64', 'complex128'], + 'pinv') if dtype == paddle.complex128: s_type = 'float64' @@ -2680,23 +2731,26 @@ def pinv(x, rcond=1e-15, hermitian=False, name=None): u = helper.create_variable_for_type_inference(dtype) s = helper.create_variable_for_type_inference(s_type) - helper.append_op( - type='eigh', - inputs={'X': x}, - outputs={'Eigenvalues': s, - 'Eigenvectors': u}, - attrs={'UPLO': 'L'}) + helper.append_op(type='eigh', + inputs={'X': x}, + outputs={ + 'Eigenvalues': s, + 'Eigenvectors': u + }, + attrs={'UPLO': 'L'}) s_abs = helper.create_variable_for_type_inference(s_type) - helper.append_op( - type='abs', inputs={'X': s}, outputs={'Out': s_abs}) + helper.append_op(type='abs', + inputs={'X': s}, + outputs={'Out': s_abs}) max_singular_val = helper.create_variable_for_type_inference(s_type) - helper.append_op( - type='reduce_max', - inputs={'X': s_abs}, - outputs={'Out': max_singular_val}, - attrs={'dim': [-1], - 'keep_dim': True, - 'reduce_all': False}) + helper.append_op(type='reduce_max', + inputs={'X': s_abs}, + outputs={'Out': max_singular_val}, + attrs={ + 'dim': [-1], + 'keep_dim': True, + 'reduce_all': False + }) rcond = full(shape=[1], fill_value=rcond, dtype=s_type) cutoff = rcond * max_singular_val @@ -2712,35 +2766,45 @@ def pinv(x, rcond=1e-15, hermitian=False, name=None): st = helper.create_variable_for_type_inference(dtype=s_type) st_shape = helper.create_variable_for_type_inference(dtype=s_type) - helper.append_op( - type='unsqueeze2', - inputs={'X': singular}, - attrs={'axes': [-2]}, - outputs={'Out': st, - 'XShape': st_shape}) + helper.append_op(type='unsqueeze2', + inputs={'X': singular}, + attrs={'axes': [-2]}, + outputs={ + 'Out': st, + 'XShape': st_shape + }) out_1 = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='elementwise_mul', - inputs={'X': u, - 'Y': st}, - outputs={'Out': out_1}, - attrs={'axis': -1, - 'use_mkldnn': False}) + helper.append_op(type='elementwise_mul', + inputs={ + 'X': u, + 'Y': st + }, + outputs={'Out': out_1}, + attrs={ + 'axis': -1, + 'use_mkldnn': False + }) out_1 = helper.append_activation(out_1) u_conj = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='conj', inputs={'X': u}, outputs={'Out': [u_conj]}) + helper.append_op(type='conj', + inputs={'X': u}, + outputs={'Out': [u_conj]}) out_2 = helper.create_variable_for_type_inference(dtype) helper.append_op( type='matmul_v2', - inputs={'X': out_1, - 'Y': u_conj}, + inputs={ + 'X': out_1, + 'Y': u_conj + }, outputs={'Out': out_2}, - attrs={'trans_x': False, - 'trans_y': True}, ) + attrs={ + 'trans_x': False, + 'trans_y': True + }, + ) return out_2 @@ -2795,9 +2859,12 @@ def solve(x, y, name=None): check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'solve') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="solve", inputs={"X": x, - "Y": y}, outputs={"Out": out}) + helper.append_op(type="solve", + inputs={ + "X": x, + "Y": y + }, + outputs={"Out": out}) return out @@ -2865,16 +2932,17 @@ def triangular_solve(x, check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'triangular_solve') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='triangular_solve', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs={ - 'upper': upper, - 'transpose': transpose, - 'unitriangular': unitriangular - }) + helper.append_op(type='triangular_solve', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs={ + 'upper': upper, + 'transpose': transpose, + 'unitriangular': unitriangular + }) return out @@ -2922,12 +2990,13 @@ def cholesky_solve(x, y, upper=False, name=None): check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'cholesky_solve') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='cholesky_solve', - inputs={'X': x, - 'Y': y}, - outputs={'Out': out}, - attrs={'upper': upper}) + helper.append_op(type='cholesky_solve', + inputs={ + 'X': x, + 'Y': y + }, + outputs={'Out': out}, + attrs={'upper': upper}) return out @@ -2971,8 +3040,8 @@ def eigvalsh(x, UPLO='L', name=None): "length of Input(input) is %s." % len(x.shape)) if x_shape[-1] != x_shape[-2]: raise ValueError( - "The input matrix must be batches of square matrices. But received x's dimention: {}". - format(x_shape)) + "The input matrix must be batches of square matrices. But received x's dimention: {}" + .format(x_shape)) if UPLO != 'L' and UPLO != 'U': raise ValueError( "UPLO must be L or U. But received UPLO is: {}".format(UPLO)) @@ -2988,13 +3057,16 @@ def eigvalsh(x, UPLO='L', name=None): out_vector = helper.create_variable_for_type_inference(dtype=x.dtype) is_test = x.stop_gradient - helper.append_op( - type='eigvalsh', - inputs={'X': x}, - outputs={'Eigenvalues': out_value, - 'Eigenvectors': out_vector}, - attrs={'UPLO': UPLO, - 'is_test': is_test}) + helper.append_op(type='eigvalsh', + inputs={'X': x}, + outputs={ + 'Eigenvalues': out_value, + 'Eigenvectors': out_vector + }, + attrs={ + 'UPLO': UPLO, + 'is_test': is_test + }) return out_value @@ -3061,14 +3133,14 @@ def lstsq(x, y, rcond=None, driver=None, name=None): if device == "cpu": if driver not in (None, "gels", "gelss", "gelsd", "gelsy"): raise ValueError( - "Only support valid driver is 'gels', 'gelss', 'gelsd', 'gelsy' or None for CPU inputs. But got {}". - format(driver)) + "Only support valid driver is 'gels', 'gelss', 'gelsd', 'gelsy' or None for CPU inputs. But got {}" + .format(driver)) driver = "gelsy" if driver is None else driver elif "gpu" in device: if driver not in (None, "gels"): raise ValueError( - "Only support valid driver is 'gels' or None for CUDA inputs. But got {}". - format(driver)) + "Only support valid driver is 'gels' or None for CUDA inputs. But got {}" + .format(driver)) driver = "gels" if driver is None else driver else: raise RuntimeError("Only support lstsq api for CPU or CUDA device.") @@ -3112,60 +3184,67 @@ def lstsq(x, y, rcond=None, driver=None, name=None): return solution, residuals, rank, singular_values helper = LayerHelper('lstsq', **locals()) - check_variable_and_dtype( - x, 'dtype', ['float32', 'float64', 'complex64', 'complex128'], 'lstsq') - check_variable_and_dtype( - y, 'dtype', ['float32', 'float64', 'complex64', 'complex128'], 'lstsq') + check_variable_and_dtype(x, 'dtype', + ['float32', 'float64', 'complex64', 'complex128'], + 'lstsq') + check_variable_and_dtype(y, 'dtype', + ['float32', 'float64', 'complex64', 'complex128'], + 'lstsq') solution = helper.create_variable_for_type_inference(dtype=x.dtype) residuals = helper.create_variable_for_type_inference(dtype=x.dtype) rank = helper.create_variable_for_type_inference(dtype=paddle.int32) singular_values = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='lstsq', - inputs={'X': x, - 'Y': y}, - outputs={ - 'Solution': solution, - 'Rank': rank, - 'SingularValues': singular_values - }, - attrs={'rcond': rcond, - 'driver': driver}) + helper.append_op(type='lstsq', + inputs={ + 'X': x, + 'Y': y + }, + outputs={ + 'Solution': solution, + 'Rank': rank, + 'SingularValues': singular_values + }, + attrs={ + 'rcond': rcond, + 'driver': driver + }) matmul_out = helper.create_variable_for_type_inference(dtype=x.dtype) minus_out = helper.create_variable_for_type_inference(dtype=x.dtype) pow_out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='matmul_v2', - inputs={'X': x, - 'Y': solution}, - outputs={'Out': matmul_out}, - attrs={ - 'trans_x': False, - 'trans_y': False, - }) - - helper.append_op( - type='elementwise_sub', - inputs={'X': matmul_out, - 'Y': y}, - outputs={'Out': minus_out}) - - helper.append_op( - type='pow', - inputs={'X': minus_out}, - outputs={'Out': pow_out}, - attrs={'factor': 2}) - - helper.append_op( - type='reduce_sum', - inputs={'X': pow_out}, - outputs={'Out': residuals}, - attrs={'dim': [-2], - 'keep_dim': False, - 'reduce_all': False}) + helper.append_op(type='matmul_v2', + inputs={ + 'X': x, + 'Y': solution + }, + outputs={'Out': matmul_out}, + attrs={ + 'trans_x': False, + 'trans_y': False, + }) + + helper.append_op(type='elementwise_sub', + inputs={ + 'X': matmul_out, + 'Y': y + }, + outputs={'Out': minus_out}) + + helper.append_op(type='pow', + inputs={'X': minus_out}, + outputs={'Out': pow_out}, + attrs={'factor': 2}) + + helper.append_op(type='reduce_sum', + inputs={'X': pow_out}, + outputs={'Out': residuals}, + attrs={ + 'dim': [-2], + 'keep_dim': False, + 'reduce_all': False + }) if driver == "gels": rank = paddle.static.data(name='rank', shape=[0]) @@ -3237,8 +3316,8 @@ def corrcoef(x, rowvar=True, name=None): # Clip to [-1, 1]. This does not guarantee if paddle.is_complex(c): - return paddle.complex( - paddle.clip(c.real(), -1, 1), paddle.clip(c.imag(), -1, 1)) + return paddle.complex(paddle.clip(c.real(), -1, 1), + paddle.clip(c.imag(), -1, 1)) else: c = paddle.clip(c, -1, 1) diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index 31d2ec0557d..c4b4c552c67 100755 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -26,7 +26,7 @@ else: from ..framework import in_dygraph_mode, _non_static_mode from ..framework import LayerHelper from ..fluid.framework import _in_legacy_dygraph -# TODO: define logic functions of a tensor +# TODO: define logic functions of a tensor from paddle import _C_ops from paddle.tensor.creation import full @@ -40,13 +40,15 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): return op(x, y) else: return op(x) - check_variable_and_dtype(x, "x", [ - "bool", "int8", "int16", "int32", "int64", "float32", "float64" - ], op_name) + check_variable_and_dtype( + x, "x", + ["bool", "int8", "int16", "int32", "int64", "float32", "float64"], + op_name) if y is not None: - check_variable_and_dtype(y, "y", [ - "bool", "int8", "int16", "int32", "int64", "float32", "float64" - ], op_name) + check_variable_and_dtype( + y, "y", + ["bool", "int8", "int16", "int32", "int64", "float32", "float64"], + op_name) if out is not None: check_type(out, "out", Variable, op_name) @@ -61,9 +63,12 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): out = helper.create_variable_for_type_inference(dtype=x.dtype) if binary_op: - helper.append_op( - type=op_name, inputs={"X": x, - "Y": y}, outputs={"Out": out}) + helper.append_op(type=op_name, + inputs={ + "X": x, + "Y": y + }, + outputs={"Out": out}) else: helper.append_op(type=op_name, inputs={"X": x}, outputs={"Out": out}) @@ -105,8 +110,12 @@ def logical_and(x, y, out=None, name=None): if in_dygraph_mode(): return _C_ops.final_state_logical_and(x, y) - return _logical_op( - op_name="logical_and", x=x, y=y, name=name, out=out, binary_op=True) + return _logical_op(op_name="logical_and", + x=x, + y=y, + name=name, + out=out, + binary_op=True) def logical_or(x, y, out=None, name=None): @@ -146,8 +155,12 @@ def logical_or(x, y, out=None, name=None): """ if in_dygraph_mode(): return _C_ops.final_state_logical_or(x, y) - return _logical_op( - op_name="logical_or", x=x, y=y, name=name, out=out, binary_op=True) + return _logical_op(op_name="logical_or", + x=x, + y=y, + name=name, + out=out, + binary_op=True) def logical_xor(x, y, out=None, name=None): @@ -188,8 +201,12 @@ def logical_xor(x, y, out=None, name=None): if in_dygraph_mode(): return _C_ops.final_state_logical_xor(x, y) - return _logical_op( - op_name="logical_xor", x=x, y=y, name=name, out=out, binary_op=True) + return _logical_op(op_name="logical_xor", + x=x, + y=y, + name=name, + out=out, + binary_op=True) @templatedoc() @@ -222,8 +239,12 @@ def logical_not(x, out=None, name=None): """ if in_dygraph_mode(): return _C_ops.final_state_logical_not(x) - return _logical_op( - op_name="logical_not", x=x, y=None, name=name, out=out, binary_op=False) + return _logical_op(op_name="logical_not", + x=x, + y=None, + name=name, + out=out, + binary_op=False) def is_empty(x, name=None): @@ -268,8 +289,9 @@ def is_empty(x, name=None): helper = LayerHelper("is_empty", **locals()) cond = helper.create_variable_for_type_inference(dtype='bool') cond.stop_gradient = True - helper.append_op( - type='is_empty', inputs={'X': [x]}, outputs={'Out': [cond]}) + helper.append_op(type='is_empty', + inputs={'X': [x]}, + outputs={'Out': [cond]}) return cond @@ -310,9 +332,12 @@ def equal_all(x, y, name=None): helper = LayerHelper("equal_all", **locals()) out = helper.create_variable_for_type_inference(dtype='bool') - helper.append_op( - type='equal_all', inputs={'X': [x], - 'Y': [y]}, outputs={'Out': [out]}) + helper.append_op(type='equal_all', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -364,14 +389,13 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): if in_dygraph_mode(): # NOTE(dev): Pass tol as Tensor to fix precision loss problem, because # C++ backend will cast it into float32 if passing float from python. - as_tensor = lambda x: paddle.to_tensor([x], dtype='float64', place='cpu') - return _C_ops.final_state_allclose(x, y, - as_tensor(rtol), + as_tensor = lambda x: paddle.to_tensor( + [x], dtype='float64', place='cpu') + return _C_ops.final_state_allclose(x, y, as_tensor(rtol), as_tensor(atol), equal_nan) if _in_legacy_dygraph(): - return _C_ops.allclose(x, y, 'rtol', - str(rtol), 'atol', - str(atol), 'equal_nan', equal_nan) + return _C_ops.allclose(x, y, 'rtol', str(rtol), 'atol', str(atol), + 'equal_nan', equal_nan) check_variable_and_dtype(x, "input", ['float32', 'float64'], 'allclose') check_variable_and_dtype(y, "input", ['float32', 'float64'], 'allclose') check_type(rtol, 'rtol', float, 'allclose') @@ -384,8 +408,10 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): inputs = {'Input': x, 'Other': y} outputs = {'Out': out} attrs = {'rtol': str(rtol), 'atol': str(atol), 'equal_nan': equal_nan} - helper.append_op( - type='allclose', inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type='allclose', + inputs=inputs, + outputs=outputs, + attrs=attrs) return out @@ -421,8 +447,8 @@ def equal(x, y, name=None): """ if not isinstance(y, (int, bool, float, Variable)): raise TypeError( - "Type of input args must be float, bool, int or Tensor, but received type {}". - format(type(y))) + "Type of input args must be float, bool, int or Tensor, but received type {}" + .format(type(y))) if not isinstance(y, Variable): y = full(shape=[1], dtype=x.dtype, fill_value=y) @@ -443,11 +469,12 @@ def equal(x, y, name=None): out = helper.create_variable_for_type_inference(dtype='bool') out.stop_gradient = True - helper.append_op( - type='equal', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [out]}) + helper.append_op(type='equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -494,11 +521,12 @@ def greater_equal(x, y, name=None): out = helper.create_variable_for_type_inference(dtype='bool') out.stop_gradient = True - helper.append_op( - type='greater_equal', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [out]}) + helper.append_op(type='greater_equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -544,11 +572,12 @@ def greater_than(x, y, name=None): out = helper.create_variable_for_type_inference(dtype='bool') out.stop_gradient = True - helper.append_op( - type='greater_than', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [out]}) + helper.append_op(type='greater_than', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -596,11 +625,12 @@ def less_equal(x, y, name=None): out = helper.create_variable_for_type_inference(dtype='bool') out.stop_gradient = True - helper.append_op( - type='less_equal', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [out]}) + helper.append_op(type='less_equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -648,11 +678,12 @@ def less_than(x, y, name=None): out = helper.create_variable_for_type_inference(dtype='bool') out.stop_gradient = True - helper.append_op( - type='less_than', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [out]}) + helper.append_op(type='less_than', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -700,11 +731,12 @@ def not_equal(x, y, name=None): out = helper.create_variable_for_type_inference(dtype='bool') out.stop_gradient = True - helper.append_op( - type='not_equal', - inputs={'X': [x], - 'Y': [y]}, - outputs={'Out': [out]}) + helper.append_op(type='not_equal', + inputs={ + 'X': [x], + 'Y': [y] + }, + outputs={'Out': [out]}) return out @@ -761,9 +793,12 @@ def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True): out = helper.create_variable_for_type_inference(dtype=x.dtype) if binary_op: - helper.append_op( - type=op_name, inputs={"X": x, - "Y": y}, outputs={"Out": out}) + helper.append_op(type=op_name, + inputs={ + "X": x, + "Y": y + }, + outputs={"Out": out}) else: helper.append_op(type=op_name, inputs={"X": x}, outputs={"Out": out}) @@ -794,8 +829,12 @@ def bitwise_and(x, y, out=None, name=None): """ if in_dygraph_mode() and out is None: return _C_ops.final_state_bitwise_and(x, y) - return _bitwise_op( - op_name="bitwise_and", x=x, y=y, name=name, out=out, binary_op=True) + return _bitwise_op(op_name="bitwise_and", + x=x, + y=y, + name=name, + out=out, + binary_op=True) @templatedoc() @@ -823,8 +862,12 @@ def bitwise_or(x, y, out=None, name=None): if in_dygraph_mode() and out is None: return _C_ops.final_state_bitwise_or(x, y) - return _bitwise_op( - op_name="bitwise_or", x=x, y=y, name=name, out=out, binary_op=True) + return _bitwise_op(op_name="bitwise_or", + x=x, + y=y, + name=name, + out=out, + binary_op=True) @templatedoc() @@ -851,8 +894,12 @@ def bitwise_xor(x, y, out=None, name=None): """ if in_dygraph_mode() and out is None: return _C_ops.final_state_bitwise_xor(x, y) - return _bitwise_op( - op_name="bitwise_xor", x=x, y=y, name=name, out=out, binary_op=True) + return _bitwise_op(op_name="bitwise_xor", + x=x, + y=y, + name=name, + out=out, + binary_op=True) @templatedoc() @@ -878,8 +925,12 @@ def bitwise_not(x, out=None, name=None): if in_dygraph_mode() and out is None: return _C_ops.final_state_bitwise_not(x) - return _bitwise_op( - op_name="bitwise_not", x=x, y=None, name=name, out=out, binary_op=False) + return _bitwise_op(op_name="bitwise_not", + x=x, + y=None, + name=name, + out=out, + binary_op=False) @templatedoc() @@ -937,14 +988,13 @@ def isclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): if in_dygraph_mode(): # NOTE(dev): Pass tol as Tensor to fix precision loss problem, because # C++ backend will cast it into float32 if passing float from python. - as_tensor = lambda x: paddle.to_tensor([x], dtype='float64', place='cpu') - return _C_ops.final_state_isclose(x, y, - as_tensor(rtol), + as_tensor = lambda x: paddle.to_tensor( + [x], dtype='float64', place='cpu') + return _C_ops.final_state_isclose(x, y, as_tensor(rtol), as_tensor(atol), equal_nan) if _in_legacy_dygraph(): - return _C_ops.isclose(x, y, 'rtol', - str(rtol), 'atol', - str(atol), 'equal_nan', equal_nan) + return _C_ops.isclose(x, y, 'rtol', str(rtol), 'atol', str(atol), + 'equal_nan', equal_nan) check_variable_and_dtype(x, "input", ['float32', 'float64'], 'isclose') check_variable_and_dtype(y, "input", ['float32', 'float64'], 'isclose') @@ -958,6 +1008,8 @@ def isclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): inputs = {'Input': x, 'Other': y} outputs = {'Out': out} attrs = {'rtol': str(rtol), 'atol': str(atol), 'equal_nan': equal_nan} - helper.append_op( - type='isclose', inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type='isclose', + inputs=inputs, + outputs=outputs, + attrs=attrs) return out diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 57785c16e60..96d24a7f915 100755 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -23,7 +23,7 @@ from ..framework import OpProtoHolder, convert_np_dtype_to_dtype_, dygraph_only from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype from ..fluid.layers import utils import numpy as np -# TODO: define functions to manipulate a tensor +# TODO: define functions to manipulate a tensor from ..fluid.layers.nn import _elementwise_op_in_dygraph from ..fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only import paddle @@ -84,12 +84,13 @@ def cast(x, dtype): helper = LayerHelper('cast', **locals()) out = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=x.stop_gradient) - helper.append_op( - type='cast', - inputs={'X': [x]}, - outputs={'Out': [out]}, - attrs={'in_dtype': x.dtype, - 'out_dtype': out.dtype}) + helper.append_op(type='cast', + inputs={'X': [x]}, + outputs={'Out': [out]}, + attrs={ + 'in_dtype': x.dtype, + 'out_dtype': out.dtype + }) return out @@ -233,8 +234,8 @@ def slice(input, axes, starts, ends): else: raise ValueError( - "Input axes must be a python list or tuple, but reveived {}". - format(type(axes))) + "Input axes must be a python list or tuple, but reveived {}" + .format(type(axes))) infer_flags = list(1 for i in range(len(axes))) @@ -321,8 +322,10 @@ def slice(input, axes, starts, ends): attrs['infer_flags'] = infer_flags out = helper.create_variable_for_type_inference( dtype=helper.input_dtype('input')) - helper.append_op( - type='slice', inputs=inputs, attrs=attrs, outputs={'Out': out}) + helper.append_op(type='slice', + inputs=inputs, + attrs=attrs, + outputs={'Out': out}) return out @@ -407,12 +410,13 @@ def transpose(x, perm, name=None): helper = LayerHelper('transpose', **locals()) out = helper.create_variable_for_type_inference(x.dtype) x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='transpose2', - inputs={'X': [x]}, - outputs={'Out': [out], - 'XShape': [x_shape]}, - attrs={'axis': perm}) + helper.append_op(type='transpose2', + inputs={'X': [x]}, + outputs={ + 'Out': [out], + 'XShape': [x_shape] + }, + attrs={'axis': perm}) return out @@ -468,12 +472,13 @@ def unstack(x, axis=0, num=None): for _ in range(num): outs.append(helper.create_variable_for_type_inference(x.dtype)) - helper.append_op( - type='unstack', - inputs={'X': [x]}, - outputs={'Y': outs}, - attrs={'axis': axis, - 'num': num}) + helper.append_op(type='unstack', + inputs={'X': [x]}, + outputs={'Y': outs}, + attrs={ + 'axis': axis, + 'num': num + }) return outs @@ -535,17 +540,16 @@ def shard_index(input, index_num, nshards, shard_id, ignore_value=-1): (shard_id, nshards)) out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type=op_type, - inputs={'X': [input]}, - outputs={'Out': out}, - attrs={ - 'index_num': index_num, - 'nshards': nshards, - 'shard_id': shard_id, - 'ignore_value': ignore_value - }, - stop_gradient=True) + helper.append_op(type=op_type, + inputs={'X': [input]}, + outputs={'Out': out}, + attrs={ + 'index_num': index_num, + 'nshards': nshards, + 'shard_id': shard_id, + 'ignore_value': ignore_value + }, + stop_gradient=True) return out @@ -713,8 +717,11 @@ def crop(x, shape=None, offsets=None, name=None): else: _attr_shape_check(dim_size) temp_out = helper.create_variable_for_type_inference('int32') - fill_constant( - [1], 'int32', dim_size, force_cpu=True, out=temp_out) + fill_constant([1], + 'int32', + dim_size, + force_cpu=True, + out=temp_out) new_shape_tensor.append(temp_out) shape_attr.append(dim_size) ipts['ShapeTensor'] = new_shape_tensor @@ -724,11 +731,10 @@ def crop(x, shape=None, offsets=None, name=None): _attr_shape_check(dim_size) attrs['shape'] = shape - helper.append_op( - type='crop_tensor', - inputs=ipts, - outputs={'Out': out}, - attrs=None if len(attrs) == 0 else attrs) + helper.append_op(type='crop_tensor', + inputs=ipts, + outputs={'Out': out}, + attrs=None if len(attrs) == 0 else attrs) return out @@ -762,8 +768,8 @@ def fill_(x, value): raise TypeError( "The type of 'value' must be int or float, but received %s." % (type(value))) - return _C_ops.fill_any_(x, "value_float", - float(value), "value_int", int(value)) + return _C_ops.fill_any_(x, "value_float", float(value), "value_int", + int(value)) @dygraph_only @@ -857,12 +863,11 @@ def _fill_diagonal_tensor_impl(x, y, offset=0, dim1=0, dim2=1, inplace=False): for i in range(len(inshape)): if i != dim1 and i != dim2: predshape.append(inshape[i]) - diaglen = min( - min(inshape[dim1], inshape[dim1] + offset), - min(inshape[dim2], inshape[dim2] - offset)) + diaglen = min(min(inshape[dim1], inshape[dim1] + offset), + min(inshape[dim2], inshape[dim2] - offset)) predshape.append(diaglen) - assert tuple(predshape) == tuple(y.shape), ( - "the y shape should be {}".format(predshape)) + assert tuple(predshape) == tuple( + y.shape), ("the y shape should be {}".format(predshape)) if len(y.shape) == 1: y = y.reshape([1, -1]) @@ -902,8 +907,12 @@ def fill_diagonal_tensor_(x, y, offset=0, dim1=0, dim2=1, name=None): print(x.tolist()) #[[1.0, 2.0, 2.0], [2.0, 1.0, 2.0], [2.0, 2.0, 1.0], [2.0, 2.0, 2.0]] """ - return _fill_diagonal_tensor_impl( - x, y, offset=offset, dim1=dim1, dim2=dim2, inplace=True) + return _fill_diagonal_tensor_impl(x, + y, + offset=offset, + dim1=dim1, + dim2=dim2, + inplace=True) def fill_diagonal_tensor(x, y, offset=0, dim1=0, dim2=1, name=None): @@ -932,8 +941,12 @@ def fill_diagonal_tensor(x, y, offset=0, dim1=0, dim2=1, name=None): print(nx.tolist()) #[[1.0, 2.0, 2.0], [2.0, 1.0, 2.0], [2.0, 2.0, 1.0], [2.0, 2.0, 2.0]] """ - return _fill_diagonal_tensor_impl( - x, y, offset=offset, dim1=dim1, dim2=dim2, inplace=False) + return _fill_diagonal_tensor_impl(x, + y, + offset=offset, + dim1=dim1, + dim2=dim2, + inplace=False) @dygraph_only @@ -1038,7 +1051,8 @@ def concat(x, axis=0, name=None): 'concat') if x.dtype != input[0].dtype: raise TypeError( - "All the Tensors in the input must have the same data type.") + "All the Tensors in the input must have the same data type." + ) else: input = [input] check_type(axis, 'axis', (int, Variable), 'concat') @@ -1046,7 +1060,8 @@ def concat(x, axis=0, name=None): if isinstance(axis, Variable): check_dtype( axis.dtype, 'axis', ['int32', 'int64'], 'concat', - "The data type of axis must be int32 or int64 when axis is a Tensor") + "The data type of axis must be int32 or int64 when axis is a Tensor" + ) helper = LayerHelper('concat', **locals()) out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) @@ -1059,13 +1074,16 @@ def concat(x, axis=0, name=None): assert len(input) == 1, "If the elements of 'input' in concat are Variable(LoDTensorArray), " \ "number of the elements must be 1, but received %s." % len(input) out_index = helper.create_variable_for_type_inference(dtype="int32") - helper.append_op( - type='tensor_array_to_tensor', - inputs={'X': input[0]}, - outputs={'Out': [out], - 'OutIndex': [out_index]}, - attrs={'axis': axis, - 'use_stack': False}) + helper.append_op(type='tensor_array_to_tensor', + inputs={'X': input[0]}, + outputs={ + 'Out': [out], + 'OutIndex': [out_index] + }, + attrs={ + 'axis': axis, + 'use_stack': False + }) else: inputs = {'X': input} attrs = {} @@ -1075,8 +1093,10 @@ def concat(x, axis=0, name=None): else: attrs['axis'] = axis - helper.append_op( - type='concat', inputs=inputs, outputs={'Out': [out]}, attrs=attrs) + helper.append_op(type='concat', + inputs=inputs, + outputs={'Out': [out]}, + attrs=attrs) return out @@ -1142,8 +1162,8 @@ def broadcast_tensors(input, name=None): output_shape_r.append(shape[i]) output_shape_r_last_tensor_index.append(j) else: - invalid = (output_shape_r[i] != shape[i] and - output_shape_r[i] != 1 and shape[i] != 1) + invalid = (output_shape_r[i] != shape[i] + and output_shape_r[i] != 1 and shape[i] != 1) if invalid: last_index = output_shape_r_last_tensor_index[i] raise TypeError( @@ -1161,14 +1181,15 @@ def broadcast_tensors(input, name=None): out = [] while i < num_inputs: out.append( - helper.create_variable_for_type_inference(dtype=helper.input_dtype( - ))) + helper.create_variable_for_type_inference( + dtype=helper.input_dtype())) i += 1 inputs = {'X': input} - helper.append_op( - type='broadcast_tensors', inputs=inputs, outputs={'Out': out}, - attrs={}) + helper.append_op(type='broadcast_tensors', + inputs=inputs, + outputs={'Out': out}, + attrs={}) return out @@ -1223,11 +1244,10 @@ def flip(x, axis, name=None): else: out = helper.create_variable(name=name, dtype=dtype, persistable=False) - helper.append_op( - type="flip", - inputs={"X": x}, - outputs={"Out": out}, - attrs={"axis": axis}) + helper.append_op(type="flip", + inputs={"X": x}, + outputs={"Out": out}, + attrs={"axis": axis}) return out @@ -1294,23 +1314,25 @@ def rot90(x, k=1, axes=[0, 1], name=None): input_total_dims = len(x.shape) total_rot_dims = len(axes) if total_rot_dims != 2: - raise ValueError("expected total rotation axes == 2, but got axes = {}". - format(total_rot_dims)) + raise ValueError( + "expected total rotation axes == 2, but got axes = {}".format( + total_rot_dims)) if input_total_dims < 2: - raise ValueError("expected total dims >= 2, but got total dims = {}". - format(input_total_dims)) + raise ValueError( + "expected total dims >= 2, but got total dims = {}".format( + input_total_dims)) if not (axes[0] != axes[1] and abs(axes[0] - axes[1]) != input_total_dims): raise ValueError( - "expected rotation axes to be different, but got axis0 = {}, and axis1 = {}". - format(axes[0], axes[1])) + "expected rotation axes to be different, but got axis0 = {}, and axis1 = {}" + .format(axes[0], axes[1])) if not (axes[0] < input_total_dims and axes[0] >= -input_total_dims): - raise ValueError("Rotation axis0 out of range, axis0 = {}".format(axes[ - 0])) + raise ValueError("Rotation axis0 out of range, axis0 = {}".format( + axes[0])) if not (axes[1] < input_total_dims and axes[1] >= -input_total_dims): - raise ValueError("Rotation axis1 out of range, axis1 = {}".format(axes[ - 1])) + raise ValueError("Rotation axis1 out of range, axis1 = {}".format( + axes[1])) k %= 4 if k == 0: @@ -1408,12 +1430,12 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): 'flatten') x_dim = len(x.shape) - if not (isinstance(start_axis, int)) or ( - start_axis > x_dim - 1) or start_axis < -x_dim: + if not (isinstance(start_axis, + int)) or (start_axis > x_dim - 1) or start_axis < -x_dim: raise ValueError( "The start_axis should be a int, and in range [-rank(x), rank(x))") - if not (isinstance(stop_axis, int)) or ( - stop_axis > x_dim - 1) or stop_axis < -x_dim: + if not (isinstance(stop_axis, + int)) or (stop_axis > x_dim - 1) or stop_axis < -x_dim: raise ValueError( "The stop_axis should be a int, and in range [-rank(x), rank(x))") if start_axis < 0: @@ -1434,13 +1456,16 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): helper = LayerHelper('flatten', **locals()) out = helper.create_variable_for_type_inference(x.dtype) x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='flatten_contiguous_range', - inputs={"X": x}, - outputs={'Out': out, - 'XShape': x_shape}, - attrs={"start_axis": start_axis, - "stop_axis": stop_axis}) + helper.append_op(type='flatten_contiguous_range', + inputs={"X": x}, + outputs={ + 'Out': out, + 'XShape': x_shape + }, + attrs={ + "start_axis": start_axis, + "stop_axis": stop_axis + }) return out @@ -1454,12 +1479,12 @@ def flatten_(x, start_axis=0, stop_axis=-1, name=None): raise ValueError("The input x should be a Tensor") x_dim = len(x.shape) - if not (isinstance(start_axis, int)) or ( - start_axis > x_dim - 1) or start_axis < -x_dim: + if not (isinstance(start_axis, + int)) or (start_axis > x_dim - 1) or start_axis < -x_dim: raise ValueError( "The start_axis should be a int, and in range [-rank(x), rank(x))") - if not (isinstance(stop_axis, int)) or ( - stop_axis > x_dim - 1) or stop_axis < -x_dim: + if not (isinstance(stop_axis, + int)) or (stop_axis > x_dim - 1) or stop_axis < -x_dim: raise ValueError( "The stop_axis should be a int, and in range [-rank(x), rank(x))") if start_axis < 0: @@ -1528,8 +1553,8 @@ def roll(x, shifts, axis=None, name=None): for i in range(len(axis)): if axis[i] >= len_origin_shape or axis[i] < -len_origin_shape: raise ValueError( - "axis is out of range, it should be in range [{}, {}), but received {}". - format(-len_origin_shape, len_origin_shape, axis)) + "axis is out of range, it should be in range [{}, {}), but received {}" + .format(-len_origin_shape, len_origin_shape, axis)) else: axis = [] @@ -1545,20 +1570,22 @@ def roll(x, shifts, axis=None, name=None): out = helper.create_variable_for_type_inference(x.dtype) if isinstance(shifts, Variable): - helper.append_op( - type='roll', - inputs={'X': x, - "ShiftsTensor": shifts}, - outputs={'Out': out}, - attrs={'axis': axis}) + helper.append_op(type='roll', + inputs={ + 'X': x, + "ShiftsTensor": shifts + }, + outputs={'Out': out}, + attrs={'axis': axis}) else: check_type(shifts, 'shifts', (list, tuple), 'roll') - helper.append_op( - type='roll', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'axis': axis, - 'shifts': shifts}) + helper.append_op(type='roll', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'axis': axis, + 'shifts': shifts + }) return out @@ -1663,10 +1690,10 @@ def stack(x, axis=0, name=None): ) == core.VarDesc.VarType.LOD_TENSOR_ARRAY: x = [x] else: - raise TypeError("The type of '%s' in %s must be %s, but received %s" - % ('x', 'stack', - 'list[Tensor], tuple[Tensor] or TensorArray', - type(x))) + raise TypeError( + "The type of '%s' in %s must be %s, but received %s" % + ('x', 'stack', 'list[Tensor], tuple[Tensor] or TensorArray', + type(x))) helper = LayerHelper('stack', **locals()) @@ -1680,19 +1707,21 @@ def stack(x, axis=0, name=None): check_variable_and_dtype(i, 'x', \ ['float16', 'float32', 'float64', 'int32', 'int64'], 'stack') - helper.append_op( - type='tensor_array_to_tensor', - inputs={'X': x[0]}, - outputs={'Out': [out], - 'OutIndex': [out_index]}, - attrs={'axis': axis, - 'use_stack': True}) + helper.append_op(type='tensor_array_to_tensor', + inputs={'X': x[0]}, + outputs={ + 'Out': [out], + 'OutIndex': [out_index] + }, + attrs={ + 'axis': axis, + 'use_stack': True + }) else: - helper.append_op( - type='stack', - inputs={'X': x}, - outputs={'Y': out}, - attrs={'axis': axis}) + helper.append_op(type='stack', + inputs={'X': x}, + outputs={'Y': out}, + attrs={'axis': axis}) return out @@ -1766,8 +1795,8 @@ def split(x, num_or_sections, axis=0, name=None): if utils._contain_var(num_or_sections): for index, item in enumerate(num_or_sections): if isinstance(item, Variable): - num_or_sections[index] = num_or_sections[index].numpy()[ - 0] + num_or_sections[index] = num_or_sections[index].numpy( + )[0] attrs += ('sections', list(num_or_sections)) else: attrs += ('sections', list(num_or_sections)) @@ -1809,8 +1838,11 @@ def split(x, num_or_sections, axis=0, name=None): idx) unk_dim_idx = idx temp_out = helper.create_variable_for_type_inference('int32') - fill_constant( - [1], 'int32', dim_size, force_cpu=True, out=temp_out) + fill_constant([1], + 'int32', + dim_size, + force_cpu=True, + out=temp_out) tensor_list.append(temp_out) return tensor_list @@ -1836,8 +1868,8 @@ def split(x, num_or_sections, axis=0, name=None): dim], 'len(num_or_sections) must not be more than input.shape[dim].' num = len(num_or_sections) attrs['sections'] = list( - map(lambda ele: -1 if isinstance(ele, Variable) else ele, - num_or_sections)) + map(lambda ele: -1 + if isinstance(ele, Variable) else ele, num_or_sections)) if utils._contain_var(num_or_sections): inputs['SectionsTensorList'] = _get_SectionsTensorList( num_or_sections) @@ -1846,8 +1878,10 @@ def split(x, num_or_sections, axis=0, name=None): helper.create_variable_for_type_inference(dtype=helper.input_dtype()) for i in range(num) ] - helper.append_op( - type='split', inputs=inputs, outputs={'Out': outs}, attrs=attrs) + helper.append_op(type='split', + inputs=inputs, + outputs={'Out': outs}, + attrs=attrs) return outs @@ -1947,12 +1981,13 @@ def squeeze(x, axis=None, name=None): check_type(axes, 'axis/axes', (list, tuple), 'squeeze') out = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type="squeeze2", - inputs={"X": input}, - attrs={"axes": axes}, - outputs={"Out": out, - "XShape": x_shape}) + helper.append_op(type="squeeze2", + inputs={"X": input}, + attrs={"axes": axes}, + outputs={ + "Out": out, + "XShape": x_shape + }) return out @@ -2058,23 +2093,22 @@ def unique_consecutive(x, "return_counts": return_counts, "axis": axis, } - out = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True) - inverse = helper.create_variable_for_type_inference( - dtype=attr_dtype, stop_gradient=True) - counts = helper.create_variable_for_type_inference( - dtype=attr_dtype, stop_gradient=True) + out = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=True) + inverse = helper.create_variable_for_type_inference(dtype=attr_dtype, + stop_gradient=True) + counts = helper.create_variable_for_type_inference(dtype=attr_dtype, + stop_gradient=True) outputs = {"Out": out, "Index": inverse, "Counts": counts} outs = [out] if return_inverse: outs.append(inverse) if return_counts: outs.append(counts) - helper.append_op( - type="unique_consecutive", - inputs={"X": x}, - attrs=attrs, - outputs=outputs) + helper.append_op(type="unique_consecutive", + inputs={"X": x}, + attrs=attrs, + outputs=outputs) if len(outs) == 1: return outs[0] return tuple(outs) @@ -2177,14 +2211,14 @@ def unique(x, "axis": axis, "is_sorted": True } - out = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True) - indices = helper.create_variable_for_type_inference( - dtype=attr_dtype, stop_gradient=True) - inverse = helper.create_variable_for_type_inference( - dtype=attr_dtype, stop_gradient=True) - counts = helper.create_variable_for_type_inference( - dtype=attr_dtype, stop_gradient=True) + out = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=True) + indices = helper.create_variable_for_type_inference(dtype=attr_dtype, + stop_gradient=True) + inverse = helper.create_variable_for_type_inference(dtype=attr_dtype, + stop_gradient=True) + counts = helper.create_variable_for_type_inference(dtype=attr_dtype, + stop_gradient=True) outputs = { "Out": out, "Indices": indices, @@ -2199,8 +2233,10 @@ def unique(x, if return_counts: outs.append(counts) - helper.append_op( - type="unique", inputs={"X": x}, attrs=attrs, outputs=outputs) + helper.append_op(type="unique", + inputs={"X": x}, + attrs=attrs, + outputs=outputs) if len(outs) == 1: return outs[0] @@ -2301,12 +2337,13 @@ def unsqueeze(x, axis, name=None): out = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type="unsqueeze2", - inputs=inputs, - attrs=attrs, - outputs={"Out": out, - "XShape": x_shape}) + helper.append_op(type="unsqueeze2", + inputs=inputs, + attrs=attrs, + outputs={ + "Out": out, + "XShape": x_shape + }) return out @@ -2397,21 +2434,25 @@ def gather(x, index, axis=None, name=None): dtype = helper.input_dtype('x') out = helper.create_variable_for_type_inference(dtype) if not isinstance(axis, Variable): - helper.append_op( - type="gather", - inputs={"X": x, - "Index": index}, - attrs={'axis': axis, - 'overwrite': False}, - outputs={"Out": out}) + helper.append_op(type="gather", + inputs={ + "X": x, + "Index": index + }, + attrs={ + 'axis': axis, + 'overwrite': False + }, + outputs={"Out": out}) else: - helper.append_op( - type="gather", - inputs={"X": x, - "Index": index, - "Axis": axis}, - attrs={"overwrite": False}, - outputs={"Out": out}) + helper.append_op(type="gather", + inputs={ + "X": x, + "Index": index, + "Axis": axis + }, + attrs={"overwrite": False}, + outputs={"Out": out}) return out @@ -2470,11 +2511,10 @@ def unbind(input, axis=0): helper.create_variable_for_type_inference(dtype=helper.input_dtype()) for i in range(num) ] - helper.append_op( - type="unbind", - inputs={"X": input}, - outputs={"Out": outs}, - attrs={"axis": axis}) + helper.append_op(type="unbind", + inputs={"X": input}, + outputs={"Out": outs}, + attrs={"axis": axis}) return outs @@ -2559,18 +2599,19 @@ def scatter(x, index, updates, overwrite=True, name=None): return _C_ops.scatter(x, index, updates, 'overwrite', overwrite) else: check_variable_and_dtype( - x, 'dtype', - ['float32', 'float64', 'float16', 'int32', 'int64'], 'scatter') + x, 'dtype', ['float32', 'float64', 'float16', 'int32', 'int64'], + 'scatter') check_type(overwrite, 'overwrite', bool, 'scatter') helper = LayerHelper('scatter', **locals()) out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type="scatter", - inputs={"X": x, - "Ids": index, - "Updates": updates}, - attrs={'overwrite': overwrite}, - outputs={"Out": out}) + helper.append_op(type="scatter", + inputs={ + "X": x, + "Ids": index, + "Updates": updates + }, + attrs={'overwrite': overwrite}, + outputs={"Out": out}) return out @@ -2666,12 +2707,13 @@ def scatter_nd_add(x, index, updates, name=None): helper = LayerHelper('scatter_nd_add', **locals()) dtype = helper.input_dtype(input_param_name='x') output = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="scatter_nd_add", - inputs={"X": x, - "Index": index, - "Updates": updates}, - outputs={"Out": output}) + helper.append_op(type="scatter_nd_add", + inputs={ + "X": x, + "Index": index, + "Updates": updates + }, + outputs={"Out": output}) return output @@ -2811,8 +2853,8 @@ def tile(x, repeat_times, name=None): check_type(repeat_times, 'repeat_times', (list, tuple, Variable), 'tile') if isinstance(repeat_times, Variable): - assert len(repeat_times.shape) == 1, ( - 'repeat_times must be an 1-D Tensor.') + assert len( + repeat_times.shape) == 1, ('repeat_times must be an 1-D Tensor.') else: for elem in repeat_times: if isinstance(elem, Variable): @@ -2823,8 +2865,9 @@ def tile(x, repeat_times, name=None): assert isinstance(elem, type_tuple), ( 'Elements in repeat_times must be 1-D Tensors or integers.') - check_variable_and_dtype( - x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'tile') + check_variable_and_dtype(x, 'x', + ['bool', 'float32', 'float64', 'int32', 'int64'], + 'tile') if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: raise ValueError( "When the date type is bool for the input 'x' of tile op, you " @@ -2859,8 +2902,10 @@ def tile(x, repeat_times, name=None): dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='tile', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='tile', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -2896,8 +2941,9 @@ def expand_as(x, y, name=None): if _non_static_mode(): return _C_ops.expand_as_v2(x, 'target_shape', y.shape) - check_variable_and_dtype( - x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'expand_as') + check_variable_and_dtype(x, 'x', + ['bool', 'float32', 'float64', 'int32', 'int64'], + 'expand_as') check_type(y, 'y', Variable, 'expand_as') if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: @@ -2911,11 +2957,10 @@ def expand_as(x, y, name=None): helper = LayerHelper('expand_as', **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='expand_as_v2', - inputs=inputs, - attrs={'target_shape': y.shape}, - outputs={'Out': out}) + helper.append_op(type='expand_as_v2', + inputs=inputs, + attrs={'target_shape': y.shape}, + outputs={'Out': out}) return out @@ -3000,8 +3045,10 @@ def broadcast_to(x, shape, name=None): dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='expand_v2', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='expand_v2', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -3088,8 +3135,10 @@ def expand(x, shape, name=None): dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='expand_v2', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='expand_v2', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) return out @@ -3263,12 +3312,13 @@ def reshape(x, shape, name=None): out = x if inplace else helper.create_variable_for_type_inference( dtype=x.dtype) x_shape = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type="reshape2", - inputs=inputs, - attrs=attrs, - outputs={"Out": out, - "XShape": x_shape}) + helper.append_op(type="reshape2", + inputs=inputs, + attrs=attrs, + outputs={ + "Out": out, + "XShape": x_shape + }) return helper.append_activation(out) @@ -3376,11 +3426,12 @@ def gather_nd(x, index, name=None): helper = LayerHelper('gather_nd', **locals()) dtype = helper.input_dtype() output = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="gather_nd", - inputs={"X": x, - "Index": index}, - outputs={"Out": output}) + helper.append_op(type="gather_nd", + inputs={ + "X": x, + "Index": index + }, + outputs={"Out": output}) return output @@ -3572,8 +3623,10 @@ def strided_slice(x, axes, starts, ends, strides, name=None): attrs['infer_flags'] = infer_flags out = helper.create_variable_for_type_inference( dtype=helper.input_dtype('x')) - helper.append_op( - type='strided_slice', inputs=inputs, attrs=attrs, outputs={'Out': out}) + helper.append_op(type='strided_slice', + inputs=inputs, + attrs=attrs, + outputs={'Out': out}) return out @@ -3930,17 +3983,18 @@ def repeat_interleave(x, repeats, axis=None, name=None): out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='repeat_interleave', - inputs={ - 'X': x, - 'RepeatsTensor': repeats if isinstance(repeats, Variable) else None - }, - outputs={'Out': out}, - attrs={ - 'dim': axis, - 'Repeats': repeats if isinstance(repeats, int) else 0 - }) + helper.append_op(type='repeat_interleave', + inputs={ + 'X': + x, + 'RepeatsTensor': + repeats if isinstance(repeats, Variable) else None + }, + outputs={'Out': out}, + attrs={ + 'dim': axis, + 'Repeats': repeats if isinstance(repeats, int) else 0 + }) return out @@ -4036,12 +4090,13 @@ def moveaxis(x, source, destination, name=None): helper = LayerHelper('moveaxis', **locals()) out = helper.create_variable_for_type_inference(x.dtype) x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='transpose2', - inputs={'X': [x]}, - outputs={'Out': [out], - 'XShape': [x_shape]}, - attrs={'axis': perm}) + helper.append_op(type='transpose2', + inputs={'X': [x]}, + outputs={ + 'Out': [out], + 'XShape': [x_shape] + }, + attrs={'axis': perm}) return out @@ -4059,7 +4114,7 @@ def non_negative_axis(arr, axis): def infer_broadcast_shape(arr, indices, axis): - # This function is used in take/put_along_axis + # This function is used in take/put_along_axis broadcast_shape_list = list(arr.shape) broadcast_shape_list[axis] = list(indices.shape)[axis] broadcast_shape = tuple(broadcast_shape_list) @@ -4126,12 +4181,13 @@ def take_along_axis(arr, indices, axis): helper = LayerHelper('take_along_axis', **locals()) dtype = helper.input_dtype() result = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="take_along_axis", - inputs={"Input": arr, - "Index": indices}, - attrs={"Axis": axis}, - outputs={"Result": result}) + helper.append_op(type="take_along_axis", + inputs={ + "Input": arr, + "Index": indices + }, + attrs={"Axis": axis}, + outputs={"Result": result}) return result @@ -4192,14 +4248,17 @@ def put_along_axis(arr, indices, values, axis, reduce='assign'): helper = LayerHelper('put_along_axis', **locals()) dtype = helper.input_dtype() result = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="put_along_axis", - inputs={"Input": arr, - "Index": indices, - "Value": values}, - attrs={"Axis": axis, - "Reduce": reduce}, - outputs={"Result": result}) + helper.append_op(type="put_along_axis", + inputs={ + "Input": arr, + "Index": indices, + "Value": values + }, + attrs={ + "Axis": axis, + "Reduce": reduce + }, + outputs={"Result": result}) return result diff --git a/python/paddle/tensor/ops.py b/python/paddle/tensor/ops.py index 7626552a85d..9a5fcb852a2 100644 --- a/python/paddle/tensor/ops.py +++ b/python/paddle/tensor/ops.py @@ -107,7 +107,8 @@ for _OP in set(__inplace_unary_func__): _func = generate_inplace_fn(_OP) globals()[_OP] = _func -add_sample_code(globals()["sigmoid"], r""" +add_sample_code( + globals()["sigmoid"], r""" Examples: .. code-block:: python @@ -121,7 +122,8 @@ Examples: """) -add_sample_code(globals()["silu"], r""" +add_sample_code( + globals()["silu"], r""" Examples: .. code-block:: python @@ -135,7 +137,8 @@ Examples: """) -add_sample_code(globals()["logsigmoid"], r""" +add_sample_code( + globals()["logsigmoid"], r""" Examples: .. code-block:: python @@ -149,7 +152,8 @@ Examples: """) -add_sample_code(globals()["exp"], r""" +add_sample_code( + globals()["exp"], r""" Examples: .. code-block:: python @@ -162,7 +166,8 @@ Examples: """) -add_sample_code(globals()["expm1"], r""" +add_sample_code( + globals()["expm1"], r""" Examples: .. code-block:: python @@ -175,7 +180,8 @@ Examples: """) -add_sample_code(globals()["tanh"], r""" +add_sample_code( + globals()["tanh"], r""" Examples: .. code-block:: python @@ -188,7 +194,8 @@ Examples: """) -add_sample_code(globals()["atan"], r""" +add_sample_code( + globals()["atan"], r""" Examples: .. code-block:: python @@ -201,7 +208,8 @@ Examples: """) -add_sample_code(globals()["tanh_shrink"], r""" +add_sample_code( + globals()["tanh_shrink"], r""" Examples: .. code-block:: python @@ -215,7 +223,8 @@ Examples: """) -add_sample_code(globals()["sqrt"], r""" +add_sample_code( + globals()["sqrt"], r""" Examples: .. code-block:: python @@ -228,7 +237,8 @@ Examples: """) -add_sample_code(globals()["rsqrt"], r""" +add_sample_code( + globals()["rsqrt"], r""" Examples: .. code-block:: python @@ -241,7 +251,8 @@ Examples: """) -add_sample_code(globals()["abs"], r""" +add_sample_code( + globals()["abs"], r""" Examples: .. code-block:: python @@ -254,7 +265,8 @@ Examples: """) -add_sample_code(globals()["ceil"], r""" +add_sample_code( + globals()["ceil"], r""" Examples: .. code-block:: python @@ -267,7 +279,8 @@ Examples: """) -add_sample_code(globals()["floor"], r""" +add_sample_code( + globals()["floor"], r""" Examples: .. code-block:: python @@ -280,7 +293,8 @@ Examples: """) -add_sample_code(globals()["cos"], r""" +add_sample_code( + globals()["cos"], r""" Examples: .. code-block:: python @@ -293,7 +307,8 @@ Examples: """) -add_sample_code(globals()["tan"], r""" +add_sample_code( + globals()["tan"], r""" Examples: .. code-block:: python @@ -306,7 +321,8 @@ Examples: """) -add_sample_code(globals()["acos"], r""" +add_sample_code( + globals()["acos"], r""" Examples: .. code-block:: python @@ -319,7 +335,8 @@ Examples: """) -add_sample_code(globals()["sin"], r""" +add_sample_code( + globals()["sin"], r""" Examples: .. code-block:: python @@ -332,7 +349,8 @@ Examples: """) -add_sample_code(globals()["asin"], r""" +add_sample_code( + globals()["asin"], r""" Examples: .. code-block:: python @@ -345,7 +363,8 @@ Examples: """) -add_sample_code(globals()["cosh"], r""" +add_sample_code( + globals()["cosh"], r""" Examples: .. code-block:: python @@ -358,7 +377,8 @@ Examples: """) -add_sample_code(globals()["sinh"], r""" +add_sample_code( + globals()["sinh"], r""" Examples: .. code-block:: python @@ -371,7 +391,8 @@ Examples: """) -add_sample_code(globals()["asinh"], r""" +add_sample_code( + globals()["asinh"], r""" Examples: .. code-block:: python @@ -384,7 +405,8 @@ Examples: """) -add_sample_code(globals()["acosh"], r""" +add_sample_code( + globals()["acosh"], r""" Examples: .. code-block:: python @@ -397,7 +419,8 @@ Examples: """) -add_sample_code(globals()["atanh"], r""" +add_sample_code( + globals()["atanh"], r""" Examples: .. code-block:: python @@ -410,7 +433,8 @@ Examples: """) -add_sample_code(globals()["round"], r""" +add_sample_code( + globals()["round"], r""" Examples: .. code-block:: python @@ -423,7 +447,8 @@ Examples: """) -add_sample_code(globals()["reciprocal"], r""" +add_sample_code( + globals()["reciprocal"], r""" Examples: .. code-block:: python @@ -436,7 +461,8 @@ Examples: """) -add_sample_code(globals()["square"], r""" +add_sample_code( + globals()["square"], r""" Examples: .. code-block:: python @@ -449,7 +475,8 @@ Examples: """) -add_sample_code(globals()["lgamma"], r""" +add_sample_code( + globals()["lgamma"], r""" Examples: .. code-block:: python @@ -462,7 +489,8 @@ Examples: """) -add_sample_code(globals()["softplus"], r""" +add_sample_code( + globals()["softplus"], r""" Examples: .. code-block:: python @@ -476,7 +504,8 @@ Examples: """) -add_sample_code(globals()["softsign"], r""" +add_sample_code( + globals()["softsign"], r""" Examples: .. code-block:: python diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 0def896db8f..f43bda11295 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define random functions +# TODO: define random functions from ..framework import core from ..framework import convert_np_dtype_to_dtype_, dygraph_only @@ -76,9 +76,11 @@ def bernoulli(x, name=None): helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( - dtype=x.dtype) # maybe set out to int32 ? - helper.append_op( - type='bernoulli', inputs={"X": x}, outputs={'Out': out}, attrs={}) + dtype=x.dtype) # maybe set out to int32 ? + helper.append_op(type='bernoulli', + inputs={"X": x}, + outputs={'Out': out}, + attrs={}) out.stop_gradient = True return out @@ -121,8 +123,10 @@ def poisson(x, name=None): helper = LayerHelper("poisson", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='poisson', inputs={'X': x}, outputs={'Out': out}, attrs={}) + helper.append_op(type='poisson', + inputs={'X': x}, + outputs={'Out': out}, + attrs={}) return out @@ -189,12 +193,13 @@ def multinomial(x, num_samples=1, replacement=False, name=None): helper = LayerHelper("multinomial", **locals()) out = helper.create_variable_for_type_inference( dtype=convert_np_dtype_to_dtype_('int64')) - helper.append_op( - type='multinomial', - inputs={"X": x}, - outputs={'Out': out}, - attrs={'num_samples': num_samples, - 'replacement': replacement}) + helper.append_op(type='multinomial', + inputs={"X": x}, + outputs={'Out': out}, + attrs={ + 'num_samples': num_samples, + 'replacement': replacement + }) out.stop_gradient = True return out @@ -239,15 +244,14 @@ def gaussian(shape, mean=0.0, std=1.0, dtype=None, name=None): if in_dygraph_mode(): shape = utils.convert_shape_to_list(shape) place = _current_expected_place() - return _C_ops.final_state_gaussian_random(shape, - float(mean), + return _C_ops.final_state_gaussian_random(shape, float(mean), float(std), seed, dtype, place) if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) - return _C_ops.gaussian_random('shape', shape, 'mean', - float(mean), 'std', + return _C_ops.gaussian_random('shape', + shape, 'mean', float(mean), 'std', float(std), 'seed', seed, 'dtype', dtype) check_shape(shape, op_type_for_check) @@ -261,16 +265,17 @@ def gaussian(shape, mean=0.0, std=1.0, dtype=None, name=None): 'dtype': dtype, 'use_mkldnn': False } - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type=op_type_for_check) + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type=op_type_for_check) helper = LayerHelper('gaussian', **locals()) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='gaussian_random', - inputs=inputs, - outputs={'Out': out}, - attrs=attrs) + helper.append_op(type='gaussian_random', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) out.stop_gradient = True return out @@ -543,23 +548,21 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): dtype = paddle.framework.get_default_dtype() if dtype not in ['float32', 'float64']: raise TypeError( - "uniform/rand only supports [float32, float64], but the default dtype is {}". - format(dtype)) + "uniform/rand only supports [float32, float64], but the default dtype is {}" + .format(dtype)) if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): shape = utils.convert_shape_to_list(shape) - return _C_ops.final_state_uniform_random(shape, dtype, - float(min), + return _C_ops.final_state_uniform_random(shape, dtype, float(min), float(max), seed, _current_expected_place()) if _in_legacy_dygraph(): shape = utils.convert_shape_to_list(shape) - return _C_ops.uniform_random('shape', shape, 'min', - float(min), 'max', + return _C_ops.uniform_random('shape', shape, 'min', float(min), 'max', float(max), 'seed', seed, 'dtype', dtype) check_type(shape, 'shape', (list, tuple, Variable), 'uniform/rand') @@ -567,14 +570,17 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): inputs = dict() attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype} - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='uniform/rand') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='uniform/rand') helper = LayerHelper("uniform", **locals()) out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="uniform_random", inputs=inputs, attrs=attrs, - outputs={"Out": out}) + helper.append_op(type="uniform_random", + inputs=inputs, + attrs=attrs, + outputs={"Out": out}) out.stop_gradient = True return out @@ -686,8 +692,8 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): if high is None: if low <= 0: raise ValueError( - "If high is None, low must be greater than 0, but received low = {0}.". - format(low)) + "If high is None, low must be greater than 0, but received low = {0}." + .format(low)) high = low low = 0 if dtype is None: @@ -713,13 +719,17 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): inputs = dict() attrs = {'low': low, 'high': high, 'seed': 0, 'dtype': dtype} - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='randint') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='randint') helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference(dtype=dtype) - helper.append_op( - type='randint', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='randint', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) out.stop_gradient = True return out @@ -851,8 +861,8 @@ def randint_like(x, low=0, high=None, dtype=None, name=None): if high is None: if low <= 0: raise ValueError( - "If high is None, low must be greater than 0, but received low = {0}.". - format(low)) + "If high is None, low must be greater than 0, but received low = {0}." + .format(low)) high = low low = 0 if dtype is None: @@ -875,8 +885,8 @@ def randint_like(x, low=0, high=None, dtype=None, name=None): check_shape(shape, 'randint_like') check_dtype(dtype, 'dtype', - ['bool', 'float16', 'float32', 'float64', 'int32', - 'int64'], 'randint_like') + ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], + 'randint_like') inputs = dict() attrs = { @@ -885,14 +895,18 @@ def randint_like(x, low=0, high=None, dtype=None, name=None): 'seed': 0, 'dtype': core.VarDesc.VarType.INT64 } - utils.get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='randint_like') + utils.get_shape_tensor_inputs(inputs=inputs, + attrs=attrs, + shape=shape, + op_type='randint_like') helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.INT64) - helper.append_op( - type='randint', inputs=inputs, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='randint', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) out.stop_gradient = True out = paddle.cast(out, dtype) return out @@ -944,8 +958,10 @@ def randperm(n, dtype="int64", name=None): helper = LayerHelper("randperm", **locals()) out = helper.create_variable_for_type_inference(dtype) attrs = {'n': n, 'dtype': dtype, 'seed': 0} - helper.append_op( - type='randperm', inputs={}, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='randperm', + inputs={}, + outputs={'Out': out}, + attrs=attrs) out.stop_gradient = True return out @@ -1042,9 +1058,8 @@ def exponential_(x, lam=1.0, name=None): check_variable_and_dtype(x, "x", ["float32", "float64"], "exponential") helper = LayerHelper("exponential", **locals()) - helper.append_op( - type='exponential', - inputs={"X": x}, - outputs={'Out': x}, - attrs={"lambda": lam}) + helper.append_op(type='exponential', + inputs={"X": x}, + outputs={'Out': x}, + attrs={"lambda": lam}) return x diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 02a71a80b9e..42087ac7daf 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -25,7 +25,7 @@ from paddle.common_ops_import import VarDesc from paddle import _C_ops from .logic import logical_not -# TODO: define searching & indexing functions of a tensor +# TODO: define searching & indexing functions of a tensor # from ..fluid.layers import has_inf #DEFINE_ALIAS # from ..fluid.layers import has_nan #DEFINE_ALIAS @@ -106,17 +106,20 @@ def argsort(x, axis=-1, descending=False, name=None): 'argsort') helper = LayerHelper("argsort", **locals()) - out = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=True) - ids = helper.create_variable_for_type_inference( - VarDesc.VarType.INT64, stop_gradient=True) - helper.append_op( - type='argsort', - inputs={'X': x}, - outputs={'Out': out, - 'Indices': ids}, - attrs={'axis': axis, - 'descending': descending}) + out = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=True) + ids = helper.create_variable_for_type_inference(VarDesc.VarType.INT64, + stop_gradient=True) + helper.append_op(type='argsort', + inputs={'X': x}, + outputs={ + 'Out': out, + 'Indices': ids + }, + attrs={ + 'axis': axis, + 'descending': descending + }) return ids @@ -194,8 +197,10 @@ def argmax(x, axis=None, keepdim=False, dtype="int64", name=None): attrs['axis'] = axis attrs['flatten'] = flatten attrs['dtype'] = var_dtype - helper.append_op( - type='arg_max', inputs={'X': x}, outputs={'Out': [out]}, attrs=attrs) + helper.append_op(type='arg_max', + inputs={'X': x}, + outputs={'Out': [out]}, + attrs=attrs) out.stop_gradient = True return out @@ -276,8 +281,10 @@ def argmin(x, axis=None, keepdim=False, dtype="int64", name=None): attrs['axis'] = axis attrs['flatten'] = flatten attrs['dtype'] = var_dtype - helper.append_op( - type='arg_min', inputs={'X': x}, outputs={'Out': [out]}, attrs=attrs) + helper.append_op(type='arg_min', + inputs={'X': x}, + outputs={'Out': [out]}, + attrs=attrs) out.stop_gradient = True return out @@ -334,12 +341,13 @@ def index_select(x, index, axis=0, name=None): out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='index_select', - inputs={'X': x, - 'Index': index}, - outputs={'Out': out}, - attrs={'dim': axis}) + helper.append_op(type='index_select', + inputs={ + 'X': x, + 'Index': index + }, + outputs={'Out': out}, + attrs={'dim': axis}) return out @@ -409,10 +417,9 @@ def nonzero(x, as_tuple=False): outs = helper.create_variable_for_type_inference( dtype=core.VarDesc.VarType.INT64) - helper.append_op( - type='where_index', - inputs={'Condition': x}, - outputs={'Out': [outs]}) + helper.append_op(type='where_index', + inputs={'Condition': x}, + outputs={'Out': [outs]}) if not as_tuple: return outs @@ -421,8 +428,7 @@ def nonzero(x, as_tuple=False): else: for i in range(rank): list_out.append( - paddle.slice( - outs, axes=[1], starts=[i], ends=[i + 1])) + paddle.slice(outs, axes=[1], starts=[i], ends=[i + 1])) return tuple(list_out) @@ -491,17 +497,20 @@ def sort(x, axis=-1, descending=False, name=None): outs, _ = _C_ops.argsort(x, 'axis', axis, 'descending', descending) return outs helper = LayerHelper("sort", **locals()) - out = helper.create_variable_for_type_inference( - dtype=x.dtype, stop_gradient=False) - ids = helper.create_variable_for_type_inference( - VarDesc.VarType.INT64, stop_gradient=True) - helper.append_op( - type='argsort', - inputs={'X': x}, - outputs={'Out': out, - 'Indices': ids}, - attrs={'axis': axis, - 'descending': descending}) + out = helper.create_variable_for_type_inference(dtype=x.dtype, + stop_gradient=False) + ids = helper.create_variable_for_type_inference(VarDesc.VarType.INT64, + stop_gradient=True) + helper.append_op(type='argsort', + inputs={'X': x}, + outputs={ + 'Out': out, + 'Indices': ids + }, + attrs={ + 'axis': axis, + 'descending': descending + }) return out @@ -550,12 +559,13 @@ def mode(x, axis=-1, keepdim=False, name=None): values = helper.create_variable_for_type_inference(dtype=x.dtype) indices = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="mode", - inputs=inputs, - outputs={"Out": [values], - "Indices": [indices]}, - attrs=attrs) + helper.append_op(type="mode", + inputs=inputs, + outputs={ + "Out": [values], + "Indices": [indices] + }, + attrs=attrs) indices.stop_gradient = True return values, indices @@ -620,10 +630,12 @@ def where(condition, x=None, y=None, name=None): if not paddle.in_dynamic_mode(): check_variable_and_dtype(condition, 'condition', ['bool'], 'where') - check_variable_and_dtype( - x, 'x', ['float32', 'float64', 'int32', 'int64'], 'where') - check_variable_and_dtype( - y, 'y', ['float32', 'float64', 'int32', 'int64'], 'where') + check_variable_and_dtype(x, 'x', + ['float32', 'float64', 'int32', 'int64'], + 'where') + check_variable_and_dtype(y, 'y', + ['float32', 'float64', 'int32', 'int64'], + 'where') condition_shape = list(condition.shape) x_shape = list(x.shape) @@ -665,14 +677,13 @@ def where(condition, x=None, y=None, name=None): helper = LayerHelper("where", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='where', - inputs={ - 'Condition': broadcast_condition, - 'X': broadcast_x, - 'Y': broadcast_y - }, - outputs={'Out': [out]}) + helper.append_op(type='where', + inputs={ + 'Condition': broadcast_condition, + 'X': broadcast_x, + 'Y': broadcast_y + }, + outputs={'Out': [out]}) return out @@ -764,11 +775,12 @@ def index_sample(x, index): 'paddle.tensor.search.index_sample') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='index_sample', - inputs={'X': x, - 'Index': index}, - outputs={'Out': out}) + helper.append_op(type='index_sample', + inputs={ + 'X': x, + 'Index': index + }, + outputs={'Out': out}) return out @@ -814,9 +826,12 @@ def masked_select(x, mask, name=None): check_variable_and_dtype(mask, 'mask', ['bool'], 'paddle.tensor.search.masked_select') out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='masked_select', inputs={'X': x, - 'Mask': mask}, outputs={'Y': out}) + helper.append_op(type='masked_select', + inputs={ + 'X': x, + 'Mask': mask + }, + outputs={'Y': out}) return out @@ -884,13 +899,11 @@ def topk(x, k, axis=None, largest=True, sorted=True, name=None): if _non_static_mode(): if axis is None: - out, indices = _C_ops.top_k_v2(x, 'k', - int(k), 'largest', largest, 'sorted', - sorted) + out, indices = _C_ops.top_k_v2(x, 'k', int(k), 'largest', largest, + 'sorted', sorted) else: - out, indices = _C_ops.top_k_v2(x, 'k', - int(k), 'axis', axis, 'largest', - largest, 'sorted', sorted) + out, indices = _C_ops.top_k_v2(x, 'k', int(k), 'axis', axis, + 'largest', largest, 'sorted', sorted) return out, indices helper = LayerHelper("top_k_v2", **locals()) @@ -908,12 +921,13 @@ def topk(x, k, axis=None, largest=True, sorted=True, name=None): values = helper.create_variable_for_type_inference(dtype=x.dtype) indices = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="top_k_v2", - inputs=inputs, - outputs={"Out": [values], - "Indices": [indices]}, - attrs=attrs) + helper.append_op(type="top_k_v2", + inputs=inputs, + outputs={ + "Out": [values], + "Indices": [indices] + }, + attrs=attrs) indices.stop_gradient = True return values, indices @@ -982,13 +996,16 @@ def searchsorted(sorted_sequence, helper = LayerHelper('searchsorted', **locals()) out_type = 'int32' if out_int32 else 'int64' out = helper.create_variable_for_type_inference(dtype=out_type) - helper.append_op( - type='searchsorted', - inputs={'SortedSequence': sorted_sequence, - "Values": values}, - outputs={'Out': out}, - attrs={"out_int32": out_int32, - "right": right}) + helper.append_op(type='searchsorted', + inputs={ + 'SortedSequence': sorted_sequence, + "Values": values + }, + outputs={'Out': out}, + attrs={ + "out_int32": out_int32, + "right": right + }) return out @@ -1050,11 +1067,12 @@ def kthvalue(x, k, axis=None, keepdim=False, name=None): values = helper.create_variable_for_type_inference(dtype=x.dtype) indices = helper.create_variable_for_type_inference(dtype="int64") - helper.append_op( - type="kthvalue", - inputs=inputs, - outputs={"Out": [values], - "Indices": [indices]}, - attrs=attrs) + helper.append_op(type="kthvalue", + inputs=inputs, + outputs={ + "Out": [values], + "Indices": [indices] + }, + attrs=attrs) indices.stop_gradient = True return values, indices diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index 372454b97a6..2073e241a3b 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define statistical functions of a tensor +# TODO: define statistical functions of a tensor import numpy as np from ..static import Variable @@ -107,8 +107,10 @@ def mean(x, axis=None, keepdim=False, name=None): helper = LayerHelper('mean', **locals()) attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all} out = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='reduce_mean', inputs={'X': x}, outputs={'Out': out}, attrs=attrs) + helper.append_op(type='reduce_mean', + inputs={'X': x}, + outputs={'Out': out}, + attrs=attrs) return out @@ -305,8 +307,8 @@ def nanmedian(x, axis=None, keepdim=True, name=None): ) for i in range(len(axis)): - if not isinstance(axis[i], int) or not (axis[i] < dims and - axis[i] >= -dims): + if not isinstance(axis[i], int) or not (axis[i] < dims + and axis[i] >= -dims): raise ValueError( "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))." ) @@ -329,12 +331,13 @@ def nanmedian(x, axis=None, keepdim=True, name=None): attrs = {'axis': axis, 'keepdim': keepdim} out = helper.create_variable_for_type_inference(x.dtype) medians = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='nanmedian', - inputs={'X': x}, - outputs={'Out': out, - 'MedianIndex': medians}, - attrs=attrs) + helper.append_op(type='nanmedian', + inputs={'X': x}, + outputs={ + 'Out': out, + 'MedianIndex': medians + }, + attrs=attrs) return out @@ -412,13 +415,13 @@ def median(x, axis=None, keepdim=False, name=None): tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]) out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2 else: - out_tensor = paddle.cast( - paddle.slice( - tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]), - dtype=dtype) + out_tensor = paddle.cast(paddle.slice(tensor_topk, + axes=[axis], + starts=[kth], + ends=[kth + 1]), + dtype=dtype) out_tensor = out_tensor + paddle.sum( - paddle.cast( - paddle.isnan(x), dtype=dtype) * x, axis=axis, keepdim=True) + paddle.cast(paddle.isnan(x), dtype=dtype) * x, axis=axis, keepdim=True) if not keepdim or is_flatten: if not is_flatten: newshape = x.shape[:axis] + x.shape[axis + 1:] @@ -537,14 +540,15 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False): for index in indices: indices_below = paddle.floor(index).astype(paddle.int32) indices_upper = paddle.ceil(index).astype(paddle.int32) - tensor_upper = paddle.take_along_axis( - sorted_tensor, indices_upper, axis=axis) - tensor_below = paddle.take_along_axis( - sorted_tensor, indices_below, axis=axis) + tensor_upper = paddle.take_along_axis(sorted_tensor, + indices_upper, + axis=axis) + tensor_below = paddle.take_along_axis(sorted_tensor, + indices_below, + axis=axis) weights = (index - indices_below.astype('float64')) - out = paddle.lerp( - tensor_below.astype('float64'), - tensor_upper.astype('float64'), weights) + out = paddle.lerp(tensor_below.astype('float64'), + tensor_upper.astype('float64'), weights) if not keepdim: out = paddle.squeeze(out, axis=axis) else: diff --git a/python/paddle/tensor/tensor.py b/python/paddle/tensor/tensor.py index ec7b50c63c0..16963516090 100644 --- a/python/paddle/tensor/tensor.py +++ b/python/paddle/tensor/tensor.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define the basic tensor classes +# TODO: define the basic tensor classes diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py index 7935b4f2755..fb21c793f42 100644 --- a/python/paddle/tensor/to_string.py +++ b/python/paddle/tensor/to_string.py @@ -196,8 +196,8 @@ def _format_tensor(var, summary, indent=0, max_width=0, signed=False): items[i:i + items_per_line] for i in range(0, len(items), items_per_line) ] - s = (',\n' + ' ' * - (indent + 1)).join([', '.join(line) for line in lines]) + s = (',\n' + ' ' * (indent + 1)).join( + [', '.join(line) for line in lines]) return '[' + s + ']' else: # recursively handle all dimensions @@ -249,17 +249,19 @@ def to_string(var, prefix='Tensor'): max_width, signed = _get_max_width(_to_summary(np_var)) - data = _format_tensor( - np_var, summary, indent=indent, max_width=max_width, signed=signed) + data = _format_tensor(np_var, + summary, + indent=indent, + max_width=max_width, + signed=signed) - return _template.format( - prefix=prefix, - shape=var.shape, - dtype=dtype, - place=var._place_str, - stop_gradient=var.stop_gradient, - indent=' ' * indent, - data=data) + return _template.format(prefix=prefix, + shape=var.shape, + dtype=dtype, + place=var._place_str, + stop_gradient=var.stop_gradient, + indent=' ' * indent, + data=data) def _format_dense_tensor(tensor, indent): @@ -281,8 +283,11 @@ def _format_dense_tensor(tensor, indent): max_width, signed = _get_max_width(_to_summary(np_tensor)) - data = _format_tensor( - np_tensor, sumary, indent=indent, max_width=max_width, signed=signed) + data = _format_tensor(np_tensor, + sumary, + indent=indent, + max_width=max_width, + signed=signed) return data @@ -292,41 +297,39 @@ def sparse_tensor_to_string(tensor, prefix='Tensor'): _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient}, \n{indent}{indices}, \n{indent}{values})" indices_tensor = tensor.indices() values_tensor = tensor.values() - indices_data = 'indices=' + _format_dense_tensor(indices_tensor, indent - + len('indices=')) - values_data = 'values=' + _format_dense_tensor(values_tensor, indent + - len('values=')) - return _template.format( - prefix=prefix, - shape=tensor.shape, - dtype=tensor.dtype, - place=tensor._place_str, - stop_gradient=tensor.stop_gradient, - indent=' ' * indent, - indices=indices_data, - values=values_data) + indices_data = 'indices=' + _format_dense_tensor( + indices_tensor, indent + len('indices=')) + values_data = 'values=' + _format_dense_tensor(values_tensor, + indent + len('values=')) + return _template.format(prefix=prefix, + shape=tensor.shape, + dtype=tensor.dtype, + place=tensor._place_str, + stop_gradient=tensor.stop_gradient, + indent=' ' * indent, + indices=indices_data, + values=values_data) else: _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient}, \n{indent}{crows}, \n{indent}{cols}, \n{indent}{values})" crows_tensor = tensor.crows() cols_tensor = tensor.cols() elements_tensor = tensor.values() - crows_data = 'crows=' + _format_dense_tensor(crows_tensor, indent + - len('crows=')) - cols_data = 'cols=' + _format_dense_tensor(cols_tensor, indent + - len('cols=')) - values_data = 'values=' + _format_dense_tensor(elements_tensor, indent + - len('values=')) - - return _template.format( - prefix=prefix, - shape=tensor.shape, - dtype=tensor.dtype, - place=tensor._place_str, - stop_gradient=tensor.stop_gradient, - indent=' ' * indent, - crows=crows_data, - cols=cols_data, - values=values_data) + crows_data = 'crows=' + _format_dense_tensor(crows_tensor, + indent + len('crows=')) + cols_data = 'cols=' + _format_dense_tensor(cols_tensor, + indent + len('cols=')) + values_data = 'values=' + _format_dense_tensor(elements_tensor, + indent + len('values=')) + + return _template.format(prefix=prefix, + shape=tensor.shape, + dtype=tensor.dtype, + place=tensor._place_str, + stop_gradient=tensor.stop_gradient, + indent=' ' * indent, + crows=crows_data, + cols=cols_data, + values=values_data) def tensor_to_string(tensor, prefix='Tensor'): @@ -345,11 +348,10 @@ def tensor_to_string(tensor, prefix='Tensor'): return "Tensor(Not initialized)" else: data = _format_dense_tensor(tensor, indent) - return _template.format( - prefix=prefix, - shape=tensor.shape, - dtype=dtype, - place=tensor._place_str, - stop_gradient=tensor.stop_gradient, - indent=' ' * indent, - data=data) + return _template.format(prefix=prefix, + shape=tensor.shape, + dtype=dtype, + place=tensor._place_str, + stop_gradient=tensor.stop_gradient, + indent=' ' * indent, + data=data) diff --git a/python/paddle/tests/dist_hapi_mnist_dynamic.py b/python/paddle/tests/dist_hapi_mnist_dynamic.py index de0518e229b..08d6629c78a 100644 --- a/python/paddle/tests/dist_hapi_mnist_dynamic.py +++ b/python/paddle/tests/dist_hapi_mnist_dynamic.py @@ -32,6 +32,7 @@ from paddle.vision.datasets import MNIST class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -58,6 +59,7 @@ def compute_accuracy(pred, gt): @unittest.skipIf(not fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestDistTraning(unittest.TestCase): + def test_dynamic_multiple_gpus(self): device = set_device('gpu') @@ -68,8 +70,9 @@ class TestDistTraning(unittest.TestCase): labels = [Input([None, 1], 'int64', 'label')] model = Model(LeNet(), inputs, labels) - optim = fluid.optimizer.Momentum( - learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) + optim = fluid.optimizer.Momentum(learning_rate=0.001, + momentum=.9, + parameter_list=model.parameters()) model.prepare(optim, CrossEntropyLoss(), Accuracy()) train_dataset = MnistDataset(mode='train') @@ -85,8 +88,9 @@ class TestDistTraning(unittest.TestCase): eval_result = model.evaluate(val_dataset, batch_size=batch_size) - output = model.predict( - test_dataset, batch_size=batch_size, stack_outputs=True) + output = model.predict(test_dataset, + batch_size=batch_size, + stack_outputs=True) np.testing.assert_equal(output[0].shape[0], len(test_dataset)) diff --git a/python/paddle/tests/dist_hapi_mnist_static.py b/python/paddle/tests/dist_hapi_mnist_static.py index 6120ae90e99..b143326780f 100644 --- a/python/paddle/tests/dist_hapi_mnist_static.py +++ b/python/paddle/tests/dist_hapi_mnist_static.py @@ -32,6 +32,7 @@ from paddle.vision.datasets import MNIST class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -58,6 +59,7 @@ def compute_accuracy(pred, gt): @unittest.skipIf(not fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestDistTraning(unittest.TestCase): + def test_static_multiple_gpus(self): paddle.enable_static() device = set_device('gpu') @@ -69,8 +71,9 @@ class TestDistTraning(unittest.TestCase): labels = [Input([None, 1], 'int64', 'label')] model = Model(LeNet(), inputs, labels) - optim = fluid.optimizer.Momentum( - learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) + optim = fluid.optimizer.Momentum(learning_rate=0.001, + momentum=.9, + parameter_list=model.parameters()) model.prepare(optim, CrossEntropyLoss(), Accuracy()) train_dataset = MnistDataset(mode='train') @@ -86,8 +89,9 @@ class TestDistTraning(unittest.TestCase): eval_result = model.evaluate(val_dataset, batch_size=batch_size) - output = model.predict( - test_dataset, batch_size=batch_size, stack_outputs=True) + output = model.predict(test_dataset, + batch_size=batch_size, + stack_outputs=True) np.testing.assert_equal(output[0].shape[0], len(test_dataset)) diff --git a/python/paddle/tests/dist_hapi_pure_fp16_static.py b/python/paddle/tests/dist_hapi_pure_fp16_static.py index 0174e4f54e3..d6a18f145b4 100644 --- a/python/paddle/tests/dist_hapi_pure_fp16_static.py +++ b/python/paddle/tests/dist_hapi_pure_fp16_static.py @@ -31,6 +31,7 @@ from paddle.vision.models import LeNet @unittest.skipIf(not fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestDistTraningWithPureFP16(unittest.TestCase): + def test_amp_training_purefp16(self): if not fluid.is_compiled_with_cuda(): self.skipTest('module not tested when ONLY_CPU compling') @@ -44,15 +45,13 @@ class TestDistTraningWithPureFP16(unittest.TestCase): inputs = InputSpec([None, 1, 28, 28], "float32", 'x') labels = InputSpec([None, 1], "int64", "y") model = Model(net, inputs, labels) - optim = paddle.optimizer.Adam( - learning_rate=0.001, - parameters=model.parameters(), - multi_precision=True) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=model.parameters(), + multi_precision=True) amp_configs = {"level": amp_level, "use_fp16_guard": False} - model.prepare( - optimizer=optim, - loss=CrossEntropyLoss(reduction="sum"), - amp_configs=amp_configs) + model.prepare(optimizer=optim, + loss=CrossEntropyLoss(reduction="sum"), + amp_configs=amp_configs) model.train_batch([data], [label]) diff --git a/python/paddle/tests/hapi_mnist_bf16_static.py b/python/paddle/tests/hapi_mnist_bf16_static.py index 7eb4d61a21e..c1a2f23581c 100644 --- a/python/paddle/tests/hapi_mnist_bf16_static.py +++ b/python/paddle/tests/hapi_mnist_bf16_static.py @@ -41,17 +41,17 @@ set_device('cpu') def parse_args(): parser = argparse.ArgumentParser("Lenet BF16 train static script") - parser.add_argument( - '-bf16', - '--bf16', - type=ast.literal_eval, - default=False, - help="whether use bf16") + parser.add_argument('-bf16', + '--bf16', + type=ast.literal_eval, + default=False, + help="whether use bf16") args = parser.parse_args() return args class MnistDataset(MNIST): + def __init__(self, mode, return_label=True): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -92,11 +92,10 @@ def main(args): if args.bf16: optim = amp.bf16.decorate_bf16( optim, - amp_lists=amp.bf16.AutoMixedPrecisionListsBF16( - custom_bf16_list={ - 'matmul_v2', 'pool2d', 'relu', 'scale', 'elementwise_add', - 'reshape2', 'slice', 'reduce_mean', 'conv2d' - }, )) + amp_lists=amp.bf16.AutoMixedPrecisionListsBF16(custom_bf16_list={ + 'matmul_v2', 'pool2d', 'relu', 'scale', 'elementwise_add', + 'reshape2', 'slice', 'reduce_mean', 'conv2d' + }, )) # Configuration model model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy()) @@ -108,8 +107,9 @@ def main(args): model.fit(train_dataset, epochs=2, batch_size=batch_size, verbose=1) eval_result = model.evaluate(val_dataset, batch_size=batch_size, verbose=1) - output = model.predict( - test_dataset, batch_size=batch_size, stack_outputs=True) + output = model.predict(test_dataset, + batch_size=batch_size, + stack_outputs=True) np.testing.assert_equal(output[0].shape[0], len(test_dataset)) diff --git a/python/paddle/tests/hubconf.py b/python/paddle/tests/hubconf.py index 4b4a853ef2c..8e0a5f297a3 100644 --- a/python/paddle/tests/hubconf.py +++ b/python/paddle/tests/hubconf.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/tests/test_async_read_write.py b/python/paddle/tests/test_async_read_write.py index 14320634215..5336ca0da17 100644 --- a/python/paddle/tests/test_async_read_write.py +++ b/python/paddle/tests/test_async_read_write.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,18 +22,17 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestAsyncRead(unittest.TestCase): + def func_setUp(self): - self.empty = paddle.to_tensor( - np.array( - [], dtype="int64"), place=paddle.CPUPlace()) + self.empty = paddle.to_tensor(np.array([], dtype="int64"), + place=paddle.CPUPlace()) data = np.random.randn(100, 50, 50).astype("float32") self.src = paddle.to_tensor(data, place=paddle.CUDAPinnedPlace()) self.dst = paddle.empty(shape=[100, 50, 50], dtype="float32") - self.index = paddle.to_tensor( - np.array( - [1, 3, 5, 7, 9], dtype="int64")).cpu() - self.buffer = paddle.empty( - shape=[50, 50, 50], dtype="float32").pin_memory() + self.index = paddle.to_tensor(np.array([1, 3, 5, 7, 9], + dtype="int64")).cpu() + self.buffer = paddle.empty(shape=[50, 50, 50], + dtype="float32").pin_memory() self.stream = cuda.Stream() def func_test_async_read_empty_offset_and_count(self): @@ -50,12 +49,10 @@ class TestAsyncRead(unittest.TestCase): self.assertTrue(np.allclose(array1.numpy(), array2.numpy())) def func_test_async_read_success(self): - offset = paddle.to_tensor( - np.array( - [10, 20], dtype="int64"), place=paddle.CPUPlace()) - count = paddle.to_tensor( - np.array( - [5, 10], dtype="int64"), place=paddle.CPUPlace()) + offset = paddle.to_tensor(np.array([10, 20], dtype="int64"), + place=paddle.CPUPlace()) + count = paddle.to_tensor(np.array([5, 10], dtype="int64"), + place=paddle.CPUPlace()) with cuda.stream_guard(self.stream): if _in_legacy_dygraph(): core.async_read(self.src, self.dst, self.index, self.buffer, @@ -109,19 +106,18 @@ class TestAsyncRead(unittest.TestCase): class TestAsyncWrite(unittest.TestCase): + def func_setUp(self): self.src = paddle.rand(shape=[100, 50, 50, 5], dtype="float32") - self.dst = paddle.empty( - shape=[200, 50, 50, 5], dtype="float32").pin_memory() + self.dst = paddle.empty(shape=[200, 50, 50, 5], + dtype="float32").pin_memory() self.stream = cuda.Stream() def func_test_async_write_success(self): - offset = paddle.to_tensor( - np.array( - [0, 60], dtype="int64"), place=paddle.CPUPlace()) - count = paddle.to_tensor( - np.array( - [40, 60], dtype="int64"), place=paddle.CPUPlace()) + offset = paddle.to_tensor(np.array([0, 60], dtype="int64"), + place=paddle.CPUPlace()) + count = paddle.to_tensor(np.array([40, 60], dtype="int64"), + place=paddle.CPUPlace()) with cuda.stream_guard(self.stream): if _in_legacy_dygraph(): core.async_write(self.src, self.dst, offset, count) diff --git a/python/paddle/tests/test_callback_early_stop.py b/python/paddle/tests/test_callback_early_stop.py index 132f0e385c8..03741d98820 100644 --- a/python/paddle/tests/test_callback_early_stop.py +++ b/python/paddle/tests/test_callback_early_stop.py @@ -30,6 +30,7 @@ from paddle.nn.layer.loss import CrossEntropyLoss class MnistDataset(MNIST): + def __init__(self, mode, return_label=True, sample_num=None): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -49,6 +50,7 @@ class MnistDataset(MNIST): class TestCallbacks(unittest.TestCase): + def setUp(self): self.save_dir = tempfile.mkdtemp() @@ -65,49 +67,44 @@ class TestCallbacks(unittest.TestCase): val_dataset = MnistDataset(mode='test', sample_num=sample_num) net = LeNet() - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=net.parameters()) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=net.parameters()) inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(net, inputs=inputs, labels=labels) - model.prepare( - optim, - loss=CrossEntropyLoss(reduction="sum"), - metrics=[Accuracy()]) - callbacks_0 = paddle.callbacks.EarlyStopping( - 'loss', - mode='min', - patience=1, - verbose=1, - min_delta=0, - baseline=None, - save_best_model=True) - callbacks_1 = paddle.callbacks.EarlyStopping( - 'acc', - mode='auto', - patience=1, - verbose=1, - min_delta=0, - baseline=0, - save_best_model=True) - callbacks_2 = paddle.callbacks.EarlyStopping( - 'loss', - mode='auto_', - patience=1, - verbose=1, - min_delta=0, - baseline=None, - save_best_model=True) - callbacks_3 = paddle.callbacks.EarlyStopping( - 'acc_', - mode='max', - patience=1, - verbose=1, - min_delta=0, - baseline=0, - save_best_model=True) + model.prepare(optim, + loss=CrossEntropyLoss(reduction="sum"), + metrics=[Accuracy()]) + callbacks_0 = paddle.callbacks.EarlyStopping('loss', + mode='min', + patience=1, + verbose=1, + min_delta=0, + baseline=None, + save_best_model=True) + callbacks_1 = paddle.callbacks.EarlyStopping('acc', + mode='auto', + patience=1, + verbose=1, + min_delta=0, + baseline=0, + save_best_model=True) + callbacks_2 = paddle.callbacks.EarlyStopping('loss', + mode='auto_', + patience=1, + verbose=1, + min_delta=0, + baseline=None, + save_best_model=True) + callbacks_3 = paddle.callbacks.EarlyStopping('acc_', + mode='max', + patience=1, + verbose=1, + min_delta=0, + baseline=0, + save_best_model=True) model.fit( train_dataset, val_dataset, diff --git a/python/paddle/tests/test_callback_reduce_lr_on_plateau.py b/python/paddle/tests/test_callback_reduce_lr_on_plateau.py index d7680537f37..e841a3f2fa5 100644 --- a/python/paddle/tests/test_callback_reduce_lr_on_plateau.py +++ b/python/paddle/tests/test_callback_reduce_lr_on_plateau.py @@ -34,24 +34,27 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph # Accelerate unittest class CustomMnist(MNIST): + def __len__(self): return 8 class TestReduceLROnPlateau(unittest.TestCase): + def func_reduce_lr_on_plateau(self): transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = CustomMnist(mode='train', transform=transform) val_dataset = CustomMnist(mode='test', transform=transform) net = LeNet() - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=net.parameters()) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=net.parameters()) inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(net, inputs=inputs, labels=labels) model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()]) - callbacks = paddle.callbacks.ReduceLROnPlateau( - patience=1, verbose=1, cooldown=1) + callbacks = paddle.callbacks.ReduceLROnPlateau(patience=1, + verbose=1, + cooldown=1) model.fit(train_dataset, val_dataset, batch_size=8, @@ -75,14 +78,15 @@ class TestReduceLROnPlateau(unittest.TestCase): train_dataset = CustomMnist(mode='train', transform=transform) val_dataset = CustomMnist(mode='test', transform=transform) net = LeNet() - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=net.parameters()) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=net.parameters()) inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] model = Model(net, inputs=inputs, labels=labels) model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()]) - callbacks = paddle.callbacks.ReduceLROnPlateau( - monitor='miou', patience=3, verbose=1) + callbacks = paddle.callbacks.ReduceLROnPlateau(monitor='miou', + patience=3, + verbose=1) model.fit(train_dataset, val_dataset, batch_size=8, @@ -97,8 +101,11 @@ class TestReduceLROnPlateau(unittest.TestCase): parameters=net.parameters()) model.prepare(optim, loss=CrossEntropyLoss(), metrics=[Accuracy()]) - callbacks = paddle.callbacks.ReduceLROnPlateau( - monitor='acc', mode='max', patience=3, verbose=1, cooldown=1) + callbacks = paddle.callbacks.ReduceLROnPlateau(monitor='acc', + mode='max', + patience=3, + verbose=1, + cooldown=1) model.fit(train_dataset, val_dataset, batch_size=8, diff --git a/python/paddle/tests/test_callback_visualdl.py b/python/paddle/tests/test_callback_visualdl.py index 355e88edd2b..e62c045d601 100644 --- a/python/paddle/tests/test_callback_visualdl.py +++ b/python/paddle/tests/test_callback_visualdl.py @@ -33,11 +33,13 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class MnistDataset(MNIST): + def __len__(self): return 512 class TestCallbacks(unittest.TestCase): + def setUp(self): self.save_dir = tempfile.mkdtemp() @@ -60,10 +62,9 @@ class TestCallbacks(unittest.TestCase): model = paddle.Model(net, inputs, labels) optim = paddle.optimizer.Adam(0.001, parameters=net.parameters()) - model.prepare( - optimizer=optim, - loss=paddle.nn.CrossEntropyLoss(), - metrics=paddle.metric.Accuracy()) + model.prepare(optimizer=optim, + loss=paddle.nn.CrossEntropyLoss(), + metrics=paddle.metric.Accuracy()) callback = paddle.callbacks.VisualDL(log_dir='visualdl_log_dir') model.fit(train_dataset, diff --git a/python/paddle/tests/test_callbacks.py b/python/paddle/tests/test_callbacks.py index 2c81549bab9..b0b23b0efa3 100644 --- a/python/paddle/tests/test_callbacks.py +++ b/python/paddle/tests/test_callbacks.py @@ -32,6 +32,7 @@ from paddle.nn.layer.loss import CrossEntropyLoss class MnistDataset(MNIST): + def __init__(self, mode, return_label=True, sample_num=None): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -51,6 +52,7 @@ class MnistDataset(MNIST): class TestCallbacks(unittest.TestCase): + def setUp(self): self.save_dir = tempfile.mkdtemp() @@ -67,15 +69,14 @@ class TestCallbacks(unittest.TestCase): lenet = Model(LeNet(), inputs) lenet.prepare() - cbks = config_callbacks( - model=lenet, - batch_size=128, - epochs=epochs, - steps=steps, - log_freq=freq, - verbose=self.verbose, - metrics=['loss', 'acc'], - save_dir=self.save_dir) + cbks = config_callbacks(model=lenet, + batch_size=128, + epochs=epochs, + steps=steps, + log_freq=freq, + verbose=self.verbose, + metrics=['loss', 'acc'], + save_dir=self.save_dir) cbks.on_begin('train') logs = {'loss': 50.341673, 'acc': 0.00256} diff --git a/python/paddle/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py index abf79fb1e39..95bf21c4da1 100644 --- a/python/paddle/tests/test_dataset_cifar.py +++ b/python/paddle/tests/test_dataset_cifar.py @@ -19,6 +19,7 @@ from paddle.vision.datasets import Cifar10, Cifar100 class TestCifar10Train(unittest.TestCase): + def test_main(self): cifar = Cifar10(mode='train') self.assertTrue(len(cifar) == 50000) @@ -36,6 +37,7 @@ class TestCifar10Train(unittest.TestCase): class TestCifar10Test(unittest.TestCase): + def test_main(self): cifar = Cifar10(mode='test') self.assertTrue(len(cifar) == 10000) @@ -70,6 +72,7 @@ class TestCifar10Test(unittest.TestCase): class TestCifar100Train(unittest.TestCase): + def test_main(self): cifar = Cifar100(mode='train') self.assertTrue(len(cifar) == 50000) @@ -87,6 +90,7 @@ class TestCifar100Train(unittest.TestCase): class TestCifar100Test(unittest.TestCase): + def test_main(self): cifar = Cifar100(mode='test') self.assertTrue(len(cifar) == 10000) diff --git a/python/paddle/tests/test_dataset_conll05.py b/python/paddle/tests/test_dataset_conll05.py index 9eb0036718b..8dc1f56779f 100644 --- a/python/paddle/tests/test_dataset_conll05.py +++ b/python/paddle/tests/test_dataset_conll05.py @@ -20,6 +20,7 @@ from paddle.text.datasets import Conll05st class TestConll05st(unittest.TestCase): + def test_main(self): conll05st = Conll05st() self.assertTrue(len(conll05st) == 5267) diff --git a/python/paddle/tests/test_dataset_imdb.py b/python/paddle/tests/test_dataset_imdb.py index aed8c387409..c70b7fedf36 100644 --- a/python/paddle/tests/test_dataset_imdb.py +++ b/python/paddle/tests/test_dataset_imdb.py @@ -19,6 +19,7 @@ from paddle.text.datasets import Imdb class TestImdbTrain(unittest.TestCase): + def test_main(self): imdb = Imdb(mode='train') self.assertTrue(len(imdb) == 25000) @@ -33,6 +34,7 @@ class TestImdbTrain(unittest.TestCase): class TestImdbTest(unittest.TestCase): + def test_main(self): imdb = Imdb(mode='test') self.assertTrue(len(imdb) == 25000) diff --git a/python/paddle/tests/test_dataset_imikolov.py b/python/paddle/tests/test_dataset_imikolov.py index 6ffeeda73c3..6379ed11e5d 100644 --- a/python/paddle/tests/test_dataset_imikolov.py +++ b/python/paddle/tests/test_dataset_imikolov.py @@ -19,6 +19,7 @@ from paddle.text.datasets import Imikolov class TestImikolovTrain(unittest.TestCase): + def test_main(self): imikolov = Imikolov(mode='train', data_type='NGRAM', window_size=2) self.assertTrue(len(imikolov) == 929589) @@ -31,6 +32,7 @@ class TestImikolovTrain(unittest.TestCase): class TestImikolovTest(unittest.TestCase): + def test_main(self): imikolov = Imikolov(mode='test', data_type='NGRAM', window_size=2) self.assertTrue(len(imikolov) == 82430) diff --git a/python/paddle/tests/test_dataset_movielens.py b/python/paddle/tests/test_dataset_movielens.py index e5c6d8376ee..78a62116476 100644 --- a/python/paddle/tests/test_dataset_movielens.py +++ b/python/paddle/tests/test_dataset_movielens.py @@ -19,6 +19,7 @@ from paddle.text.datasets import Movielens class TestMovielensTrain(unittest.TestCase): + def test_main(self): movielens = Movielens(mode='train') # movielens dataset random split train/test @@ -36,6 +37,7 @@ class TestMovielensTrain(unittest.TestCase): class TestMovielensTest(unittest.TestCase): + def test_main(self): movielens = Movielens(mode='test') # movielens dataset random split train/test diff --git a/python/paddle/tests/test_dataset_uci_housing.py b/python/paddle/tests/test_dataset_uci_housing.py index bdf960b4336..beff1f71fe6 100644 --- a/python/paddle/tests/test_dataset_uci_housing.py +++ b/python/paddle/tests/test_dataset_uci_housing.py @@ -23,6 +23,7 @@ from paddle.text.datasets import UCIHousing, WMT14 class TestUCIHousingTrain(unittest.TestCase): + def test_main(self): uci_housing = UCIHousing(mode='train') self.assertTrue(len(uci_housing) == 404) @@ -39,6 +40,7 @@ class TestUCIHousingTrain(unittest.TestCase): class TestUCIHousingTest(unittest.TestCase): + def test_main(self): uci_housing = UCIHousing(mode='test') self.assertTrue(len(uci_housing) == 102) @@ -55,6 +57,7 @@ class TestUCIHousingTest(unittest.TestCase): class TestWMT14Train(unittest.TestCase): + def test_main(self): wmt14 = WMT14(mode='train', dict_size=50) self.assertTrue(len(wmt14) == 191155) @@ -70,6 +73,7 @@ class TestWMT14Train(unittest.TestCase): class TestWMT14Test(unittest.TestCase): + def test_main(self): wmt14 = WMT14(mode='test', dict_size=50) self.assertTrue(len(wmt14) == 5957) @@ -85,6 +89,7 @@ class TestWMT14Test(unittest.TestCase): class TestWMT14Gen(unittest.TestCase): + def test_main(self): wmt14 = WMT14(mode='gen', dict_size=50) self.assertTrue(len(wmt14) == 3001) diff --git a/python/paddle/tests/test_dataset_voc.py b/python/paddle/tests/test_dataset_voc.py index 6ca2a8e184c..f52abab2ca6 100644 --- a/python/paddle/tests/test_dataset_voc.py +++ b/python/paddle/tests/test_dataset_voc.py @@ -24,6 +24,7 @@ voc2012.VOC_MD5 = '34cb1fe5bdc139a5454b25b16118fff8' class TestVOC2012Train(unittest.TestCase): + def test_main(self): voc2012 = VOC2012(mode='train') self.assertTrue(len(voc2012) == 3) @@ -40,6 +41,7 @@ class TestVOC2012Train(unittest.TestCase): class TestVOC2012Valid(unittest.TestCase): + def test_main(self): voc2012 = VOC2012(mode='valid') self.assertTrue(len(voc2012) == 1) @@ -56,6 +58,7 @@ class TestVOC2012Valid(unittest.TestCase): class TestVOC2012Test(unittest.TestCase): + def test_main(self): voc2012 = VOC2012(mode='test') self.assertTrue(len(voc2012) == 2) diff --git a/python/paddle/tests/test_dataset_wmt.py b/python/paddle/tests/test_dataset_wmt.py index 3e63090c9f0..48186ab1864 100644 --- a/python/paddle/tests/test_dataset_wmt.py +++ b/python/paddle/tests/test_dataset_wmt.py @@ -19,6 +19,7 @@ from paddle.text.datasets import WMT14, WMT16 class TestWMT14Train(unittest.TestCase): + def test_main(self): wmt14 = WMT14(mode='train', dict_size=50) self.assertTrue(len(wmt14) == 191155) @@ -34,6 +35,7 @@ class TestWMT14Train(unittest.TestCase): class TestWMT14Test(unittest.TestCase): + def test_main(self): wmt14 = WMT14(mode='test', dict_size=50) self.assertTrue(len(wmt14) == 5957) @@ -49,6 +51,7 @@ class TestWMT14Test(unittest.TestCase): class TestWMT14Gen(unittest.TestCase): + def test_main(self): wmt14 = WMT14(mode='gen', dict_size=50) self.assertTrue(len(wmt14) == 3001) @@ -64,9 +67,12 @@ class TestWMT14Gen(unittest.TestCase): class TestWMT16Train(unittest.TestCase): + def test_main(self): - wmt16 = WMT16( - mode='train', src_dict_size=50, trg_dict_size=50, lang='en') + wmt16 = WMT16(mode='train', + src_dict_size=50, + trg_dict_size=50, + lang='en') self.assertTrue(len(wmt16) == 29000) # traversal whole dataset may cost a @@ -80,9 +86,12 @@ class TestWMT16Train(unittest.TestCase): class TestWMT16Test(unittest.TestCase): + def test_main(self): - wmt16 = WMT16( - mode='test', src_dict_size=50, trg_dict_size=50, lang='en') + wmt16 = WMT16(mode='test', + src_dict_size=50, + trg_dict_size=50, + lang='en') self.assertTrue(len(wmt16) == 1000) # traversal whole dataset may cost a @@ -96,6 +105,7 @@ class TestWMT16Test(unittest.TestCase): class TestWMT16Val(unittest.TestCase): + def test_main(self): wmt16 = WMT16(mode='val', src_dict_size=50, trg_dict_size=50, lang='en') self.assertTrue(len(wmt16) == 1014) diff --git a/python/paddle/tests/test_datasets.py b/python/paddle/tests/test_datasets.py index be26dff6c04..ae55377dfdf 100644 --- a/python/paddle/tests/test_datasets.py +++ b/python/paddle/tests/test_datasets.py @@ -26,6 +26,7 @@ from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestFolderDatasets(unittest.TestCase): + def setUp(self): self.data_dir = tempfile.mkdtemp() self.empty_dir = tempfile.mkdtemp() @@ -76,6 +77,7 @@ class TestFolderDatasets(unittest.TestCase): self.func_test_folder() def func_test_transform(self): + def fake_transform(img): return img @@ -110,6 +112,7 @@ class TestFolderDatasets(unittest.TestCase): class TestMNISTTest(unittest.TestCase): + def func_test_main(self): transform = T.Transpose() mnist = MNIST(mode='test', transform=transform) @@ -130,6 +133,7 @@ class TestMNISTTest(unittest.TestCase): class TestMNISTTrain(unittest.TestCase): + def func_test_main(self): transform = T.Transpose() mnist = MNIST(mode='train', transform=transform) @@ -166,6 +170,7 @@ class TestMNISTTrain(unittest.TestCase): class TestFASHIONMNISTTest(unittest.TestCase): + def func_test_main(self): transform = T.Transpose() mnist = FashionMNIST(mode='test', transform=transform) @@ -186,6 +191,7 @@ class TestFASHIONMNISTTest(unittest.TestCase): class TestFASHIONMNISTTrain(unittest.TestCase): + def func_test_main(self): transform = T.Transpose() mnist = FashionMNIST(mode='train', transform=transform) @@ -234,6 +240,7 @@ class TestFASHIONMNISTTrain(unittest.TestCase): class TestFlowersTrain(unittest.TestCase): + def func_test_main(self): flowers = Flowers(mode='train') self.assertTrue(len(flowers) == 6149) @@ -254,6 +261,7 @@ class TestFlowersTrain(unittest.TestCase): class TestFlowersValid(unittest.TestCase): + def func_test_main(self): flowers = Flowers(mode='valid') self.assertTrue(len(flowers) == 1020) @@ -274,6 +282,7 @@ class TestFlowersValid(unittest.TestCase): class TestFlowersTest(unittest.TestCase): + def func_test_main(self): flowers = Flowers(mode='test') self.assertTrue(len(flowers) == 1020) diff --git a/python/paddle/tests/test_dist_hapi_model.py b/python/paddle/tests/test_dist_hapi_model.py index 006800d3cae..895d2bc0c47 100644 --- a/python/paddle/tests/test_dist_hapi_model.py +++ b/python/paddle/tests/test_dist_hapi_model.py @@ -103,6 +103,7 @@ def start_local_trainers(cluster, class TestMultipleGpus(unittest.TestCase): + def run_mnist_2gpu(self, target_file_name, eager_mode=True): if fluid.core.get_cuda_device_count() == 0: return @@ -113,12 +114,11 @@ class TestMultipleGpus(unittest.TestCase): cluster, pod = get_cluster_from_args(selected_gpus) - procs = start_local_trainers( - cluster, - pod, - eager_mode=eager_mode, - training_script=target_file_name, - training_script_args=[]) + procs = start_local_trainers(cluster, + pod, + eager_mode=eager_mode, + training_script=target_file_name, + training_script_args=[]) while True: alive = watch_local_trainers(procs, cluster.trainers_nranks()) diff --git a/python/paddle/tests/test_dlpack.py b/python/paddle/tests/test_dlpack.py index 458efd047de..5ca49a09fe8 100644 --- a/python/paddle/tests/test_dlpack.py +++ b/python/paddle/tests/test_dlpack.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode class TestDLPack(unittest.TestCase): + def func_test_dlpack_dygraph(self): paddle.disable_static() tensor = paddle.to_tensor(np.array([1, 2, 3, 4]).astype('int')) @@ -33,9 +34,8 @@ class TestDLPack(unittest.TestCase): else: self.assertTrue(isinstance(out_from_dlpack, paddle.Tensor)) self.assertTrue( - np.array_equal( - np.array(out_from_dlpack), np.array([1, 2, 3, 4]).astype( - 'int'))) + np.array_equal(np.array(out_from_dlpack), + np.array([1, 2, 3, 4]).astype('int'))) def test_dlpack_dygraph(self): with _test_eager_guard(): @@ -65,9 +65,8 @@ class TestDLPack(unittest.TestCase): out_from_dlpack = paddle.utils.dlpack.from_dlpack(dlpack) self.assertTrue(isinstance(out_from_dlpack, fluid.core.Tensor)) self.assertTrue( - np.array_equal( - np.array(out_from_dlpack), - np.array([[1], [2], [3], [4]]).astype('int'))) + np.array_equal(np.array(out_from_dlpack), + np.array([[1], [2], [3], [4]]).astype('int'))) # when build with cuda if core.is_compiled_with_cuda(): @@ -78,9 +77,8 @@ class TestDLPack(unittest.TestCase): gout_from_dlpack = paddle.utils.dlpack.from_dlpack(gdlpack) self.assertTrue(isinstance(gout_from_dlpack, fluid.core.Tensor)) self.assertTrue( - np.array_equal( - np.array(gout_from_dlpack), - np.array([[1], [2], [3], [4]]).astype('int'))) + np.array_equal(np.array(gout_from_dlpack), + np.array([[1], [2], [3], [4]]).astype('int'))) def func_test_dlpack_dtype_conversion(self): paddle.disable_static() @@ -120,6 +118,7 @@ class TestDLPack(unittest.TestCase): class TestRaiseError(unittest.TestCase): + def func_test_from_dlpack_raise_type_error(self): self.assertRaises(TypeError, paddle.utils.dlpack.from_dlpack, np.zeros(5)) diff --git a/python/paddle/tests/test_download.py b/python/paddle/tests/test_download.py index 49e76d9416e..3e6fcc54297 100644 --- a/python/paddle/tests/test_download.py +++ b/python/paddle/tests/test_download.py @@ -20,6 +20,7 @@ from paddle.utils.download import get_path_from_url class TestDownload(unittest.TestCase): + def download(self, url, md5sum): get_weights_path_from_url(url, md5sum) @@ -106,7 +107,8 @@ class TestDownload(unittest.TestCase): from paddle.utils.download import _download _download( 'www.baidu.com', - './test', ) + './test', + ) def test_wget_download_error(self, ): with self.assertRaises(RuntimeError): @@ -131,7 +133,8 @@ class TestDownload(unittest.TestCase): _download( url, path='./test', - method=method, ) + method=method, + ) if __name__ == '__main__': diff --git a/python/paddle/tests/test_hapi_amp.py b/python/paddle/tests/test_hapi_amp.py index d17b6f35947..eaf10dbfc4c 100644 --- a/python/paddle/tests/test_hapi_amp.py +++ b/python/paddle/tests/test_hapi_amp.py @@ -16,6 +16,7 @@ from __future__ import division from __future__ import print_function import os + os.environ['FLAGS_cudnn_deterministic'] = '1' import unittest @@ -36,17 +37,17 @@ import paddle.vision.transforms as T @unittest.skipIf(not fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestHapiWithAmp(unittest.TestCase): + def get_model(self, amp_config): net = LeNet() inputs = InputSpec([None, 1, 28, 28], "float32", 'x') labels = InputSpec([None, 1], "int64", "y") model = Model(net, inputs, labels) - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - model.prepare( - optimizer=optim, - loss=CrossEntropyLoss(reduction="sum"), - amp_configs=amp_config) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=model.parameters()) + model.prepare(optimizer=optim, + loss=CrossEntropyLoss(reduction="sum"), + amp_configs=amp_config) return model def run_model(self, model): @@ -82,7 +83,9 @@ class TestHapiWithAmp(unittest.TestCase): self.run_amp(amp_config) def test_fp32(self): - amp_config = {"level": "O0", } + amp_config = { + "level": "O0", + } self.run_amp(amp_config) def test_save_load(self): @@ -122,9 +125,11 @@ class TestHapiWithAmp(unittest.TestCase): self.assertEqual(new_model._scaler.state_dict()['decr_count'], model._scaler.state_dict()['decr_count']) self.assertTrue( - np.array_equal(new_model._optimizer.state_dict( - )['conv2d_1.w_0_moment1_0'].numpy( - ), model._optimizer.state_dict()['conv2d_1.w_0_moment1_0'].numpy())) + np.array_equal( + new_model._optimizer.state_dict() + ['conv2d_1.w_0_moment1_0'].numpy(), + model._optimizer.state_dict() + ['conv2d_1.w_0_moment1_0'].numpy())) def test_dynamic_check_input(self): paddle.disable_static() @@ -147,21 +152,21 @@ class TestHapiWithAmp(unittest.TestCase): paddle.set_device('gpu') net = LeNet() model = Model(net) - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=model.parameters()) loss = CrossEntropyLoss(reduction="sum") with self.assertRaises(ValueError): for amp_configs in amp_configs_list: - model.prepare( - optimizer=optim, loss=loss, amp_configs=amp_configs) + model.prepare(optimizer=optim, + loss=loss, + amp_configs=amp_configs) model.prepare(optimizer=optim, loss=loss, amp_configs="O2") - model.prepare( - optimizer=optim, - loss=loss, - amp_configs={ - "custom_white_list": {"matmul"}, - "init_loss_scaling": 1.0 - }) + model.prepare(optimizer=optim, + loss=loss, + amp_configs={ + "custom_white_list": {"matmul"}, + "init_loss_scaling": 1.0 + }) def test_static_check_input(self): paddle.enable_static() @@ -175,8 +180,8 @@ class TestHapiWithAmp(unittest.TestCase): labels = InputSpec([None, 1], "int64", "y") model = Model(net, inputs, labels) - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=model.parameters()) loss = CrossEntropyLoss(reduction="sum") with self.assertRaises(ValueError): model.prepare(optimizer=optim, loss=loss, amp_configs=amp_configs) diff --git a/python/paddle/tests/test_hapi_hub.py b/python/paddle/tests/test_hapi_hub.py index 06000d6c833..3ebe69d01c7 100644 --- a/python/paddle/tests/test_hapi_hub.py +++ b/python/paddle/tests/test_hapi_hub.py @@ -25,42 +25,44 @@ import numpy as np class TestHub(unittest.TestCase): + def setUp(self, ): self.local_repo = os.path.dirname(os.path.abspath(__file__)) self.github_repo = 'lyuwenyu/paddlehub_demo:main' def testLoad(self, ): - model = hub.load( - self.local_repo, model='MM', source='local', out_channels=8) + model = hub.load(self.local_repo, + model='MM', + source='local', + out_channels=8) data = paddle.rand((1, 3, 100, 100)) out = model(data) np.testing.assert_equal(out.shape, [1, 8, 50, 50]) - model = hub.load( - self.github_repo, model='MM', source='github', force_reload=True) - - model = hub.load( - self.github_repo, - model='MM', - source='github', - force_reload=False, - pretrained=False) - - model = hub.load( - self.github_repo.split(':')[0], - model='MM', - source='github', - force_reload=False, - pretrained=False) - - model = hub.load( - self.github_repo, - model='MM', - source='github', - force_reload=False, - pretrained=True, - out_channels=8) + model = hub.load(self.github_repo, + model='MM', + source='github', + force_reload=True) + + model = hub.load(self.github_repo, + model='MM', + source='github', + force_reload=False, + pretrained=False) + + model = hub.load(self.github_repo.split(':')[0], + model='MM', + source='github', + force_reload=False, + pretrained=False) + + model = hub.load(self.github_repo, + model='MM', + source='github', + force_reload=False, + pretrained=True, + out_channels=8) data = paddle.ones((1, 3, 2, 2)) out = model(data) @@ -70,17 +72,21 @@ class TestHub(unittest.TestCase): 0.37345418 ]) np.testing.assert_equal(out.shape, [1, 8, 1, 1]) - np.testing.assert_almost_equal( - out.numpy(), gt.reshape(1, 8, 1, 1), decimal=5) + np.testing.assert_almost_equal(out.numpy(), + gt.reshape(1, 8, 1, 1), + decimal=5) def testHelp(self, ): docs1 = hub.help( self.local_repo, model='MM', - source='local', ) + source='local', + ) - docs2 = hub.help( - self.github_repo, model='MM', source='github', force_reload=False) + docs2 = hub.help(self.github_repo, + model='MM', + source='github', + force_reload=False) assert docs1 == docs2 == 'This is a test demo for paddle hub\n ', '' @@ -88,44 +94,46 @@ class TestHub(unittest.TestCase): models1 = hub.list( self.local_repo, source='local', - force_reload=False, ) + force_reload=False, + ) models2 = hub.list( self.github_repo, source='github', - force_reload=False, ) + force_reload=False, + ) assert models1 == models2 == ['MM'], '' def testExcept(self, ): with self.assertRaises(ValueError): - _ = hub.help( - self.github_repo, - model='MM', - source='github-test', - force_reload=False) + _ = hub.help(self.github_repo, + model='MM', + source='github-test', + force_reload=False) with self.assertRaises(ValueError): - _ = hub.load( - self.github_repo, - model='MM', - source='github-test', - force_reload=False) + _ = hub.load(self.github_repo, + model='MM', + source='github-test', + force_reload=False) with self.assertRaises(ValueError): - _ = hub.list( - self.github_repo, source='github-test', force_reload=False) + _ = hub.list(self.github_repo, + source='github-test', + force_reload=False) with self.assertRaises(ValueError): - _ = hub.load( - self.local_repo, model=123, source='local', force_reload=False) + _ = hub.load(self.local_repo, + model=123, + source='local', + force_reload=False) with self.assertRaises(RuntimeError): - _ = hub.load( - self.local_repo, - model='123', - source='local', - force_reload=False) + _ = hub.load(self.local_repo, + model='123', + source='local', + force_reload=False) if __name__ == '__main__': diff --git a/python/paddle/tests/test_hapi_hub_model.py b/python/paddle/tests/test_hapi_hub_model.py index 774c7f6f33a..e058a6e39aa 100644 --- a/python/paddle/tests/test_hapi_hub_model.py +++ b/python/paddle/tests/test_hapi_hub_model.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ import paddle.nn.functional as F class MM(nn.Layer): + def __init__(self, out_channels): super(MM, self).__init__() self.conv = nn.Conv2D(3, out_channels, 3, 2, 1) diff --git a/python/paddle/tests/test_logger.py b/python/paddle/tests/test_logger.py index b6edec8674a..c42775938dc 100644 --- a/python/paddle/tests/test_logger.py +++ b/python/paddle/tests/test_logger.py @@ -25,6 +25,7 @@ from paddle.hapi.logger import setup_logger class TestSetupLogger(unittest.TestCase): + def setUp(self): self.save_dir = tempfile.mkdtemp() self.save_file = os.path.join(self.save_dir, 'logger.txt') diff --git a/python/paddle/tests/test_metrics.py b/python/paddle/tests/test_metrics.py index 0cf52b35e44..5b1d5a7f195 100644 --- a/python/paddle/tests/test_metrics.py +++ b/python/paddle/tests/test_metrics.py @@ -59,6 +59,7 @@ def convert_to_one_hot(y, C): class TestAccuracy(unittest.TestCase): + def test_acc(self, squeeze_y=False): x = paddle.to_tensor( np.array([[0.1, 0.2, 0.3, 0.4], [0.1, 0.4, 0.3, 0.2], @@ -126,6 +127,7 @@ class TestAccuracy(unittest.TestCase): class TestAccuracyDynamic(unittest.TestCase): + def setUp(self): self.topk = (1, ) self.class_num = 5 @@ -153,7 +155,7 @@ class TestAccuracyDynamic(unittest.TestCase): label_var = paddle.to_tensor(label) pred_var = paddle.to_tensor(pred) state = to_list(acc.compute(pred_var, label_var)) - acc.update(* [s.numpy() for s in state]) + acc.update(*[s.numpy() for s in state]) res_m = acc.accumulate() res_f = accuracy(pred, label, self.topk) assert np.all(np.isclose(np.array(res_m, dtype='float64'), @@ -165,6 +167,7 @@ class TestAccuracyDynamic(unittest.TestCase): class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic): + def setUp(self): self.topk = (1, 5) self.class_num = 10 @@ -174,6 +177,7 @@ class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic): class TestAccuracyStatic(TestAccuracyDynamic): + def setUp(self): self.topk = (1, ) self.class_num = 5 @@ -189,8 +193,9 @@ class TestAccuracyStatic(TestAccuracyDynamic): main_prog.random_seed = 1024 startup_prog.random_seed = 1024 with fluid.program_guard(main_prog, startup_prog): - pred = fluid.data( - name='pred', shape=[None, self.class_num], dtype='float32') + pred = fluid.data(name='pred', + shape=[None, self.class_num], + dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') acc = paddle.metric.Accuracy(topk=self.topk, name=self.name) state = acc.compute(pred, label) @@ -201,8 +206,10 @@ class TestAccuracyStatic(TestAccuracyDynamic): for _ in range(10): label, pred = self.random_pred_label() state_ret = exe.run(compiled_main_prog, - feed={'pred': pred, - 'label': label}, + feed={ + 'pred': pred, + 'label': label + }, fetch_list=[s.name for s in to_list(state)], return_numpy=True) acc.update(*state_ret) @@ -218,6 +225,7 @@ class TestAccuracyStatic(TestAccuracyDynamic): class TestAccuracyStaticMultiTopk(TestAccuracyStatic): + def setUp(self): self.topk = (1, 5) self.class_num = 10 @@ -227,6 +235,7 @@ class TestAccuracyStaticMultiTopk(TestAccuracyStatic): class TestPrecision(unittest.TestCase): + def test_1d(self): x = np.array([0.1, 0.5, 0.6, 0.7]) @@ -266,6 +275,7 @@ class TestPrecision(unittest.TestCase): class TestRecall(unittest.TestCase): + def test_1d(self): x = np.array([0.1, 0.5, 0.6, 0.7]) y = np.array([1, 0, 1, 1]) @@ -289,6 +299,7 @@ class TestRecall(unittest.TestCase): class TestAuc(unittest.TestCase): + def test_auc_numpy(self): x = np.array([[0.78, 0.22], [0.62, 0.38], [0.55, 0.45], [0.30, 0.70], [0.14, 0.86], [0.59, 0.41], [0.91, 0.08], [0.16, 0.84]]) diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index 41de8ae189f..e93ec67d481 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -42,22 +42,19 @@ from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTra class LeNetDygraph(paddle.nn.Layer): + def __init__(self, num_classes=10): super(LeNetDygraph, self).__init__() self.num_classes = num_classes - self.features = Sequential( - Conv2D( - 1, 6, 3, stride=1, padding=1), - ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2), - Conv2D( - 6, 16, 5, stride=1, padding=0), - ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2)) + self.features = Sequential(Conv2D(1, 6, 3, stride=1, padding=1), ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2), + Conv2D(6, 16, 5, stride=1, padding=0), + ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2)) if num_classes > 0: - self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10)) + self.fc = Sequential(Linear(400, 120), Linear(120, 84), + Linear(84, 10)) def forward(self, inputs): x = self.features(inputs) @@ -69,6 +66,7 @@ class LeNetDygraph(paddle.nn.Layer): class ModelInner(paddle.nn.Layer): + def __init__(self): super(ModelInner, self).__init__() self.fc = paddle.nn.Linear(3, 4) @@ -79,6 +77,7 @@ class ModelInner(paddle.nn.Layer): class ModelOutter(paddle.nn.Layer): + def __init__(self): super(ModelOutter, self).__init__() self.module1 = ModelInner() @@ -91,24 +90,22 @@ class ModelOutter(paddle.nn.Layer): class LeNetListInput(paddle.nn.Layer): + def __init__(self, num_classes=10): super(LeNetListInput, self).__init__() self.num_classes = num_classes self.cov = Conv2D(1, 6, 3, stride=1, padding=1) for param in self.cov.parameters(): param.trainable = False - self.features = Sequential( - self.cov, - ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2), - Conv2D( - 6, 16, 5, stride=1, padding=0), - ReLU(), - paddle.fluid.dygraph.Pool2D(2, 'max', 2)) + self.features = Sequential(self.cov, ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2), + Conv2D(6, 16, 5, stride=1, padding=0), + ReLU(), + paddle.fluid.dygraph.Pool2D(2, 'max', 2)) if num_classes > 0: - self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10)) + self.fc = Sequential(Linear(400, 120), Linear(120, 84), + Linear(84, 10)) def forward(self, inputs): x = inputs[0] @@ -121,6 +118,7 @@ class LeNetListInput(paddle.nn.Layer): class LeNetDictInput(LeNetDygraph): + def forward(self, inputs): x = self.features(inputs['x1']) @@ -131,6 +129,7 @@ class LeNetDictInput(LeNetDygraph): class MnistDataset(MNIST): + def __init__(self, mode, return_label=True, sample_num=None): super(MnistDataset, self).__init__(mode=mode) self.return_label = return_label @@ -157,8 +156,8 @@ def compute_acc(pred, label): def dynamic_train(model, dataloader): - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=model.parameters()) + optim = fluid.optimizer.Adam(learning_rate=0.001, + parameter_list=model.parameters()) model.train() for inputs, labels in dataloader: outputs = model(inputs) @@ -176,8 +175,9 @@ def dynamic_evaluate(model, dataloader): for inputs, labels in dataloader: outputs = model(inputs) - cnt += (np.argmax(outputs.numpy(), -1)[:, np.newaxis] == - labels.numpy()).astype('int').sum() + cnt += (np.argmax( + outputs.numpy(), + -1)[:, np.newaxis] == labels.numpy()).astype('int').sum() return cnt / len(dataloader.dataset) @@ -185,6 +185,7 @@ def dynamic_evaluate(model, dataloader): @unittest.skipIf(not fluid.is_compiled_with_cuda(), 'CPU testing is not supported') class TestModel(unittest.TestCase): + @classmethod def setUpClass(cls): if not fluid.is_compiled_with_cuda(): @@ -195,15 +196,19 @@ class TestModel(unittest.TestCase): sp_num = 1280 cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num) cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num) - cls.test_dataset = MnistDataset( - mode='test', return_label=False, sample_num=sp_num) - - cls.train_loader = fluid.io.DataLoader( - cls.train_dataset, places=cls.device, batch_size=64) - cls.val_loader = fluid.io.DataLoader( - cls.val_dataset, places=cls.device, batch_size=64) - cls.test_loader = fluid.io.DataLoader( - cls.test_dataset, places=cls.device, batch_size=64) + cls.test_dataset = MnistDataset(mode='test', + return_label=False, + sample_num=sp_num) + + cls.train_loader = fluid.io.DataLoader(cls.train_dataset, + places=cls.device, + batch_size=64) + cls.val_loader = fluid.io.DataLoader(cls.val_dataset, + places=cls.device, + batch_size=64) + cls.test_loader = fluid.io.DataLoader(cls.test_dataset, + places=cls.device, + batch_size=64) seed = 333 paddle.seed(seed) @@ -276,13 +281,12 @@ class TestModel(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) net = LeNet() - optim_new = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=net.parameters()) + optim_new = fluid.optimizer.Adam(learning_rate=0.001, + parameter_list=net.parameters()) model = Model(net, inputs=self.inputs, labels=self.labels) - model.prepare( - optim_new, - loss=CrossEntropyLoss(reduction="sum"), - metrics=Accuracy()) + model.prepare(optim_new, + loss=CrossEntropyLoss(reduction="sum"), + metrics=Accuracy()) model.fit(self.train_dataset, batch_size=64, shuffle=False) result = model.evaluate(self.val_dataset, batch_size=64) @@ -293,33 +297,30 @@ class TestModel(unittest.TestCase): shuffle=False, num_iters=num_iters) - result = model.evaluate( - self.val_dataset, batch_size=64, num_iters=num_iters) - - train_sampler = DistributedBatchSampler( - self.train_dataset, - batch_size=64, - shuffle=False, - num_replicas=num_replicas, - rank=rank) - val_sampler = DistributedBatchSampler( - self.val_dataset, - batch_size=64, - shuffle=False, - num_replicas=num_replicas, - rank=rank) - - train_loader = fluid.io.DataLoader( - self.train_dataset, - batch_sampler=train_sampler, - places=self.device, - return_list=True) - - val_loader = fluid.io.DataLoader( - self.val_dataset, - batch_sampler=val_sampler, - places=self.device, - return_list=True) + result = model.evaluate(self.val_dataset, + batch_size=64, + num_iters=num_iters) + + train_sampler = DistributedBatchSampler(self.train_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) + val_sampler = DistributedBatchSampler(self.val_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) + + train_loader = fluid.io.DataLoader(self.train_dataset, + batch_sampler=train_sampler, + places=self.device, + return_list=True) + + val_loader = fluid.io.DataLoader(self.val_dataset, + batch_sampler=val_sampler, + places=self.device, + return_list=True) model.fit(train_loader, val_loader) fluid.disable_dygraph() if dynamic else None @@ -331,42 +332,37 @@ class TestModel(unittest.TestCase): paddle.framework.random._manual_program_seed(seed) net = LeNet() - optim_new = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=net.parameters()) + optim_new = fluid.optimizer.Adam(learning_rate=0.001, + parameter_list=net.parameters()) model = Model(net, inputs=tuple(self.inputs), labels=tuple(self.labels)) - model.prepare( - optim_new, - loss=CrossEntropyLoss(reduction="sum"), - metrics=Accuracy()) + model.prepare(optim_new, + loss=CrossEntropyLoss(reduction="sum"), + metrics=Accuracy()) model.fit(self.train_dataset, batch_size=64, shuffle=False) result = model.evaluate(self.val_dataset, batch_size=64) np.testing.assert_allclose(result['acc'], self.acc1) - train_sampler = DistributedBatchSampler( - self.train_dataset, - batch_size=64, - shuffle=False, - num_replicas=num_replicas, - rank=rank) - val_sampler = DistributedBatchSampler( - self.val_dataset, - batch_size=64, - shuffle=False, - num_replicas=num_replicas, - rank=rank) - - train_loader = fluid.io.DataLoader( - self.train_dataset, - batch_sampler=train_sampler, - places=self.device, - return_list=True) - - val_loader = fluid.io.DataLoader( - self.val_dataset, - batch_sampler=val_sampler, - places=self.device, - return_list=True) + train_sampler = DistributedBatchSampler(self.train_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) + val_sampler = DistributedBatchSampler(self.val_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) + + train_loader = fluid.io.DataLoader(self.train_dataset, + batch_sampler=train_sampler, + places=self.device, + return_list=True) + + val_loader = fluid.io.DataLoader(self.val_dataset, + batch_sampler=val_sampler, + places=self.device, + return_list=True) model.fit(train_loader, val_loader) fluid.disable_dygraph() if dynamic else None @@ -379,14 +375,14 @@ class TestModel(unittest.TestCase): result = model.evaluate(self.val_dataset, batch_size=64) np.testing.assert_allclose(result['acc'], self.acc1) - sampler = DistributedBatchSampler( - self.val_dataset, batch_size=64, shuffle=False) + sampler = DistributedBatchSampler(self.val_dataset, + batch_size=64, + shuffle=False) - val_loader = fluid.io.DataLoader( - self.val_dataset, - batch_sampler=sampler, - places=self.device, - return_list=True) + val_loader = fluid.io.DataLoader(self.val_dataset, + batch_sampler=sampler, + places=self.device, + return_list=True) model.evaluate(val_loader) @@ -397,21 +393,22 @@ class TestModel(unittest.TestCase): model = Model(LeNet(), self.inputs) model.prepare() model.load(self.weight_path) - output = model.predict( - self.test_dataset, batch_size=64, stack_outputs=True) + output = model.predict(self.test_dataset, + batch_size=64, + stack_outputs=True) np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) acc = compute_acc(output[0], self.val_dataset.labels) np.testing.assert_allclose(acc, self.acc1) - sampler = DistributedBatchSampler( - self.test_dataset, batch_size=64, shuffle=False) + sampler = DistributedBatchSampler(self.test_dataset, + batch_size=64, + shuffle=False) - test_loader = fluid.io.DataLoader( - self.test_dataset, - batch_sampler=sampler, - places=self.device, - return_list=True) + test_loader = fluid.io.DataLoader(self.test_dataset, + batch_sampler=sampler, + places=self.device, + return_list=True) model.evaluate(test_loader) @@ -423,19 +420,21 @@ class TestModel(unittest.TestCase): model.prepare() model.load(self.weight_path) model._inputs = None - output = model.predict( - self.test_dataset, batch_size=64, stack_outputs=True) + output = model.predict(self.test_dataset, + batch_size=64, + stack_outputs=True) np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) fluid.disable_dygraph() def test_summary_gpu(self): paddle.disable_static(self.device) rnn = paddle.nn.LSTM(16, 32, 2) - params_info = paddle.summary( - rnn, [(-1, 23, 16), ((2, None, 32), (2, -1, 32))]) + params_info = paddle.summary(rnn, [(-1, 23, 16), + ((2, None, 32), (2, -1, 32))]) class MyModel(paddle.nn.Layer): + def __init__(self): super(MyModel, self).__init__() self._fc = Linear(20, 10) @@ -446,6 +445,7 @@ class MyModel(paddle.nn.Layer): class MyDataset(Dataset): + def __getitem__(self, idx): return np.random.random(size=(20,)).astype(np.float32), \ np.random.randint(0, 10, size=(1,)).astype(np.int64) @@ -455,6 +455,7 @@ class MyDataset(Dataset): class TestModelFunction(unittest.TestCase): + def set_seed(self, seed=1024): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) @@ -538,8 +539,8 @@ class TestModelFunction(unittest.TestCase): optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) - model.prepare( - optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) + model.prepare(optimizer=optim, + loss=CrossEntropyLoss(reduction="sum")) model.save(path) model.load(path) fluid.disable_dygraph() if dynamic else None @@ -558,14 +559,14 @@ class TestModelFunction(unittest.TestCase): inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] if new_optimizer: - optim = paddle.optimizer.Adam( - learning_rate=0.001, parameters=net.parameters()) + optim = paddle.optimizer.Adam(learning_rate=0.001, + parameters=net.parameters()) else: - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=net.parameters()) + optim = fluid.optimizer.Adam(learning_rate=0.001, + parameter_list=net.parameters()) model = Model(net, inputs, labels) - model.prepare( - optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) + model.prepare(optimizer=optim, + loss=CrossEntropyLoss(reduction="sum")) model.fit(mnist_data, batch_size=64, verbose=0) model.save(path) model.load(path) @@ -638,6 +639,7 @@ class TestModelFunction(unittest.TestCase): fluid.disable_dygraph() if dynamic else None def test_summary(self): + def _get_param_from_state_dict(state_dict): params = 0 for k, v in state_dict.items(): @@ -665,6 +667,7 @@ class TestModelFunction(unittest.TestCase): paddle.summary(ModelOutter(), input_size=(-1, 3)) def test_summary_nlp(self): + def _get_param_from_state_dict(state_dict): params = 0 for k, v in state_dict.items(): @@ -678,8 +681,8 @@ class TestModelFunction(unittest.TestCase): paddle.summary(nlp_net, (1, 1, 2)) rnn = paddle.nn.LSTM(16, 32, 2) - params_info = paddle.summary( - rnn, [(-1, 23, 16), ((2, None, 32), (2, -1, 32))]) + params_info = paddle.summary(rnn, [(-1, 23, 16), + ((2, None, 32), (2, -1, 32))]) gt_params = _get_param_from_state_dict(rnn.state_dict()) np.testing.assert_allclose(params_info['total_params'], gt_params / 2.0) @@ -748,22 +751,22 @@ class TestModelFunction(unittest.TestCase): def customize_dropout(m, x, y): m.total_ops += 0 - paddle.flops( - net, [1, 3, 224, 224], - custom_ops={paddle.nn.Dropout: customize_dropout}, - print_detail=True) + paddle.flops(net, [1, 3, 224, 224], + custom_ops={paddle.nn.Dropout: customize_dropout}, + print_detail=True) def test_dynamic_flops_with_multiple_outputs(self): - net = paddle.nn.MaxPool2D( - kernel_size=2, stride=2, padding=0, return_mask=True) + net = paddle.nn.MaxPool2D(kernel_size=2, + stride=2, + padding=0, + return_mask=True) def customize_dropout(m, x, y): m.total_ops += 0 - paddle.flops( - net, [1, 2, 32, 32], - custom_ops={paddle.nn.Dropout: customize_dropout}, - print_detail=True) + paddle.flops(net, [1, 2, 32, 32], + custom_ops={paddle.nn.Dropout: customize_dropout}, + print_detail=True) def test_export_deploy_model(self): self.set_seed() @@ -783,26 +786,28 @@ class TestModelFunction(unittest.TestCase): model = Model(net, inputs) model.prepare() - tensor_img = np.array( - np.random.random((1, 1, 28, 28)), dtype=np.float32) + tensor_img = np.array(np.random.random((1, 1, 28, 28)), + dtype=np.float32) model.save(save_dir, training=False) ori_results = model.predict_batch(tensor_img) fluid.disable_dygraph() if dynamic else None - place = fluid.CPUPlace() if not fluid.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace( + ) if not fluid.is_compiled_with_cuda() else fluid.CUDAPlace(0) new_scope = fluid.Scope() with fluid.scope_guard(new_scope): exe = fluid.Executor(place) - [inference_program, feed_target_names, fetch_targets] = ( - paddle.static.io.load_inference_model( - path_prefix=save_dir, executor=exe)) + [inference_program, feed_target_names, + fetch_targets] = (paddle.static.io.load_inference_model( + path_prefix=save_dir, executor=exe)) results = exe.run(inference_program, feed={feed_target_names[0]: tensor_img}, fetch_list=fetch_targets) - np.testing.assert_allclose( - results, ori_results, rtol=1e-5, atol=1e-6) + np.testing.assert_allclose(results, + ori_results, + rtol=1e-5, + atol=1e-6) paddle.enable_static() @@ -821,15 +826,15 @@ class TestModelFunction(unittest.TestCase): for initial in ["fit", "train_batch", "eval_batch", "predict_batch"]: net = LeNet() model = Model(net) - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=model.parameters()) - model.prepare( - optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) + optim = fluid.optimizer.Adam(learning_rate=0.001, + parameter_list=model.parameters()) + model.prepare(optimizer=optim, + loss=CrossEntropyLoss(reduction="sum")) if initial == "fit": model.fit(mnist_data, batch_size=64, verbose=0) else: - img = np.array( - np.random.random((1, 1, 28, 28)), dtype=np.float32) + img = np.array(np.random.random((1, 1, 28, 28)), + dtype=np.float32) label = np.array(np.random.rand(1, 1), dtype=np.int64) if initial == "train_batch": model.train_batch([img], [label]) @@ -848,8 +853,8 @@ class TestModelFunction(unittest.TestCase): net = LeNet() inputs = InputSpec([None, 1, 28, 28], 'float32', 'x') model = Model(net, inputs) - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=model.parameters()) + optim = fluid.optimizer.Adam(learning_rate=0.001, + parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.save(save_dir, training=False) shutil.rmtree(save_dir) @@ -866,10 +871,9 @@ class TestModelFunction(unittest.TestCase): for amp_cfg in [None, 'O1']: model = Model(net, inputs, labels) - model.prepare( - optim, - loss=CrossEntropyLoss(reduction="sum"), - amp_configs=amp_cfg) + model.prepare(optim, + loss=CrossEntropyLoss(reduction="sum"), + amp_configs=amp_cfg) losses, grads = [], [] for stat in [False, False, True]: loss, = model.train_batch([data], [label], update=stat) @@ -878,14 +882,16 @@ class TestModelFunction(unittest.TestCase): for grad1, grad2, grad3 in zip(*grads): np.testing.assert_almost_equal(grad1 * 2, grad2, decimal=4) - np.testing.assert_almost_equal( - grad3, np.zeros_like(grad3), decimal=4) + np.testing.assert_almost_equal(grad3, + np.zeros_like(grad3), + decimal=4) np.testing.assert_almost_equal(losses[0], losses[1], decimal=4) np.testing.assert_almost_equal(losses[0], losses[2], decimal=4) class TestModelWithLRScheduler(unittest.TestCase): + def test_fit_by_step(self): base_lr = 1e-3 boundaries = [5, 8] @@ -902,11 +908,10 @@ class TestModelWithLRScheduler(unittest.TestCase): start_lr=base_lr / 5., end_lr=base_lr, verbose=True) - optimizer = paddle.optimizer.Momentum( - learning_rate=learning_rate, - weight_decay=weight_decay, - momentum=momentum, - parameters=parameters) + optimizer = paddle.optimizer.Momentum(learning_rate=learning_rate, + weight_decay=weight_decay, + momentum=momentum, + parameters=parameters) return optimizer # dynamic test @@ -958,11 +963,10 @@ class TestModelWithLRScheduler(unittest.TestCase): start_lr=base_lr / 5., end_lr=base_lr, verbose=True) - optimizer = paddle.optimizer.Momentum( - learning_rate=learning_rate, - weight_decay=weight_decay, - momentum=momentum, - parameters=parameters) + optimizer = paddle.optimizer.Momentum(learning_rate=learning_rate, + weight_decay=weight_decay, + momentum=momentum, + parameters=parameters) return optimizer # dynamic test @@ -977,8 +981,8 @@ class TestModelWithLRScheduler(unittest.TestCase): dataset = MyDataset() - lr_scheduler_callback = paddle.callbacks.LRScheduler( - by_step=False, by_epoch=True) + lr_scheduler_callback = paddle.callbacks.LRScheduler(by_step=False, + by_epoch=True) model.fit(dataset, dataset, @@ -1006,8 +1010,8 @@ class TestModelWithLRScheduler(unittest.TestCase): dataset = MyDataset() - lr_scheduler_callback = paddle.callbacks.LRScheduler( - by_step=False, by_epoch=True) + lr_scheduler_callback = paddle.callbacks.LRScheduler(by_step=False, + by_epoch=True) model.fit(dataset, dataset, @@ -1026,6 +1030,7 @@ class TestModelWithLRScheduler(unittest.TestCase): class TestRaiseError(unittest.TestCase): + def test_input_without_name(self): net = MyModel() inputs = [InputSpec([None, 10], 'float32')] @@ -1058,8 +1063,8 @@ class TestRaiseError(unittest.TestCase): model = Model(net, inputs) model.prepare() path = "" - tensor_img = np.array( - np.random.random((1, 1, 28, 28)), dtype=np.float32) + tensor_img = np.array(np.random.random((1, 1, 28, 28)), + dtype=np.float32) with self.assertRaises(ValueError): model.save(path, training=False) diff --git a/python/paddle/tests/test_ops_roi_align.py b/python/paddle/tests/test_ops_roi_align.py index 4a37831a0cc..145f77e846b 100644 --- a/python/paddle/tests/test_ops_roi_align.py +++ b/python/paddle/tests/test_ops_roi_align.py @@ -20,6 +20,7 @@ from paddle.vision.ops import roi_align, RoIAlign class TestRoIAlign(unittest.TestCase): + def setUp(self): self.data = np.random.rand(1, 256, 32, 32).astype('float32') boxes = np.random.rand(3, 4) @@ -39,22 +40,27 @@ class TestRoIAlign(unittest.TestCase): boxes = paddle.to_tensor(self.boxes) boxes_num = paddle.to_tensor(self.boxes_num) - align_out = roi_align( - data, boxes, boxes_num=boxes_num, output_size=output_size) + align_out = roi_align(data, + boxes, + boxes_num=boxes_num, + output_size=output_size) np.testing.assert_equal(align_out.shape, output_shape) else: - data = paddle.static.data( - shape=self.data.shape, dtype=self.data.dtype, name='data') - boxes = paddle.static.data( - shape=self.boxes.shape, dtype=self.boxes.dtype, name='boxes') - boxes_num = paddle.static.data( - shape=self.boxes_num.shape, - dtype=self.boxes_num.dtype, - name='boxes_num') - - align_out = roi_align( - data, boxes, boxes_num=boxes_num, output_size=output_size) + data = paddle.static.data(shape=self.data.shape, + dtype=self.data.dtype, + name='data') + boxes = paddle.static.data(shape=self.boxes.shape, + dtype=self.boxes.dtype, + name='boxes') + boxes_num = paddle.static.data(shape=self.boxes_num.shape, + dtype=self.boxes_num.dtype, + name='boxes_num') + + align_out = roi_align(data, + boxes, + boxes_num=boxes_num, + output_size=output_size) place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -90,8 +96,8 @@ class TestRoIAlign(unittest.TestCase): def test_value(self, ): data = np.array([i for i in range(1, 17)]).reshape(1, 1, 4, 4).astype(np.float32) - boxes = np.array( - [[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(np.float32) + boxes = np.array([[1., 1., 2., 2.], [1.5, 1.5, 3., + 3.]]).astype(np.float32) boxes_num = np.array([2]).astype(np.int32) output = np.array([[[[6.]]], [[[9.75]]]], dtype=np.float32) diff --git a/python/paddle/tests/test_ops_roi_pool.py b/python/paddle/tests/test_ops_roi_pool.py index 3c84a55da1e..eaeb785df7d 100644 --- a/python/paddle/tests/test_ops_roi_pool.py +++ b/python/paddle/tests/test_ops_roi_pool.py @@ -20,6 +20,7 @@ from paddle.vision.ops import roi_pool, RoIPool class TestRoIPool(unittest.TestCase): + def setUp(self): self.data = np.random.rand(1, 256, 32, 32).astype('float32') boxes = np.random.rand(3, 4) @@ -40,22 +41,27 @@ class TestRoIPool(unittest.TestCase): boxes = paddle.to_tensor(self.boxes) boxes_num = paddle.to_tensor(self.boxes_num) - pool_out = roi_pool( - data, boxes, boxes_num=boxes_num, output_size=output_size) + pool_out = roi_pool(data, + boxes, + boxes_num=boxes_num, + output_size=output_size) np.testing.assert_equal(pool_out.shape, output_shape) else: - data = paddle.static.data( - shape=self.data.shape, dtype=self.data.dtype, name='data') - boxes = paddle.static.data( - shape=self.boxes.shape, dtype=self.boxes.dtype, name='boxes') - boxes_num = paddle.static.data( - shape=self.boxes_num.shape, - dtype=self.boxes_num.dtype, - name='boxes_num') - - pool_out = roi_pool( - data, boxes, boxes_num=boxes_num, output_size=output_size) + data = paddle.static.data(shape=self.data.shape, + dtype=self.data.dtype, + name='data') + boxes = paddle.static.data(shape=self.boxes.shape, + dtype=self.boxes.dtype, + name='boxes') + boxes_num = paddle.static.data(shape=self.boxes_num.shape, + dtype=self.boxes_num.dtype, + name='boxes_num') + + pool_out = roi_pool(data, + boxes, + boxes_num=boxes_num, + output_size=output_size) place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -91,8 +97,8 @@ class TestRoIPool(unittest.TestCase): def test_value(self, ): data = np.array([i for i in range(1, 17)]).reshape(1, 1, 4, 4).astype(np.float32) - boxes = np.array( - [[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(np.float32) + boxes = np.array([[1., 1., 2., 2.], [1.5, 1.5, 3., + 3.]]).astype(np.float32) boxes_num = np.array([2]).astype(np.int32) output = np.array([[[[11.]]], [[[16.]]]], dtype=np.float32) diff --git a/python/paddle/tests/test_pretrained_model.py b/python/paddle/tests/test_pretrained_model.py index 4441faee14e..4c655ce3c1f 100644 --- a/python/paddle/tests/test_pretrained_model.py +++ b/python/paddle/tests/test_pretrained_model.py @@ -26,6 +26,7 @@ import paddle.vision.models as models # test the predicted resutls of static graph and dynamic graph are equal # when used pretrained model class TestPretrainedModel(unittest.TestCase): + def infer(self, arch): path = os.path.join(tempfile.mkdtemp(), '.cache_test_pretrained_model') if not os.path.exists(path): @@ -56,8 +57,8 @@ class TestPretrainedModel(unittest.TestCase): def test_models(self): # TODO (LielinJiang): when model file cache is ok. add following test back - # 'resnet18', 'vgg16', 'alexnet', 'resnext50_32x4d', 'inception_v3', - # 'densenet121', 'googlenet', 'wide_resnet50_2', 'wide_resnet101_2' + # 'resnet18', 'vgg16', 'alexnet', 'resnext50_32x4d', 'inception_v3', + # 'densenet121', 'googlenet', 'wide_resnet50_2', 'wide_resnet101_2' arches = [ 'mobilenet_v1', 'mobilenet_v2', diff --git a/python/paddle/tests/test_progressbar.py b/python/paddle/tests/test_progressbar.py index a68aee7aa8f..c42f1e4db0f 100644 --- a/python/paddle/tests/test_progressbar.py +++ b/python/paddle/tests/test_progressbar.py @@ -21,6 +21,7 @@ from paddle.hapi.progressbar import ProgressBar class TestProgressBar(unittest.TestCase): + def prog_bar(self, num, epoch, width, verbose=1): for epoch in range(epoch): progbar = ProgressBar(num, verbose=verbose) diff --git a/python/paddle/tests/test_read_file.py b/python/paddle/tests/test_read_file.py index fbcba9a6bbf..0dad971a730 100644 --- a/python/paddle/tests/test_read_file.py +++ b/python/paddle/tests/test_read_file.py @@ -23,6 +23,7 @@ from paddle.vision.ops import read_file, decode_jpeg class TestReadFile(unittest.TestCase): + def setUp(self): fake_img = (np.random.random((400, 300, 3)) * 255).astype('uint8') cv2.imwrite('fake.jpg', fake_img) diff --git a/python/paddle/tests/test_transforms.py b/python/paddle/tests/test_transforms.py index e07ac47a0f8..35a0f8edc48 100644 --- a/python/paddle/tests/test_transforms.py +++ b/python/paddle/tests/test_transforms.py @@ -28,6 +28,7 @@ import paddle.vision.transforms.functional as F class TestTransformsCV2(unittest.TestCase): + def setUp(self): self.backend = self.get_backend() set_image_backend(self.backend) @@ -52,8 +53,8 @@ class TestTransformsCV2(unittest.TestCase): if self.backend == 'cv2': return (np.random.rand(*shape) * 255).astype('uint8') elif self.backend == 'pil': - return Image.fromarray((np.random.rand(*shape) * 255).astype( - 'uint8')) + return Image.fromarray( + (np.random.rand(*shape) * 255).astype('uint8')) def get_shape(self, img): if isinstance(img, paddle.Tensor): @@ -76,11 +77,14 @@ class TestTransformsCV2(unittest.TestCase): def test_trans_all(self): normalize = transforms.Normalize( mean=[123.675, 116.28, 103.53], - std=[58.395, 57.120, 57.375], ) + std=[58.395, 57.120, 57.375], + ) trans = transforms.Compose([ transforms.RandomResizedCrop(224), - transforms.ColorJitter( - brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4), + transforms.ColorJitter(brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.4), transforms.RandomHorizontalFlip(), transforms.Transpose(), normalize, @@ -126,38 +130,34 @@ class TestTransformsCV2(unittest.TestCase): def test_affine(self): trans = transforms.Compose([ transforms.RandomAffine(90), - transforms.RandomAffine( - [-10, 10], translate=[0.1, 0.3]), - transforms.RandomAffine( - 45, translate=[0.2, 0.2], scale=[0.2, 0.5]), - transforms.RandomAffine( - 10, translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 10]), - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 20, 40]), - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 20, 40], - interpolation='bilinear'), - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 20, 40], - interpolation='bilinear', - fill=114), - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 20, 40], - interpolation='bilinear', - fill=114, - center=(60, 80)), + transforms.RandomAffine([-10, 10], translate=[0.1, 0.3]), + transforms.RandomAffine(45, translate=[0.2, 0.2], scale=[0.2, 0.5]), + transforms.RandomAffine(10, + translate=[0.2, 0.2], + scale=[0.5, 0.5], + shear=[-10, 10]), + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 20, 40]), + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 20, 40], + interpolation='bilinear'), + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 20, 40], + interpolation='bilinear', + fill=114), + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 20, 40], + interpolation='bilinear', + fill=114, + center=(60, 80)), ]) self.do_transform(trans) @@ -165,18 +165,15 @@ class TestTransformsCV2(unittest.TestCase): trans = transforms.Compose([ transforms.RandomRotation(90), transforms.RandomRotation([-10, 10]), - transforms.RandomRotation( - 45, expand=True), - transforms.RandomRotation( - 10, expand=True, center=(60, 80)), + transforms.RandomRotation(45, expand=True), + transforms.RandomRotation(10, expand=True, center=(60, 80)), ]) self.do_transform(trans) def test_perspective(self): trans = transforms.Compose([ transforms.RandomPerspective(prob=1.0), - transforms.RandomPerspective( - prob=1.0, distortion_scale=0.9), + transforms.RandomPerspective(prob=1.0, distortion_scale=0.9), ]) self.do_transform(trans) @@ -214,8 +211,8 @@ class TestTransformsCV2(unittest.TestCase): trans_random_crop_same = transforms.RandomCrop((140, 160)) img = trans_random_crop_same(fake_img_crop2) - trans_random_crop_bigger = transforms.RandomCrop( - (180, 200), pad_if_needed=True) + trans_random_crop_bigger = transforms.RandomCrop((180, 200), + pad_if_needed=True) img = trans_random_crop_bigger(img) trans_random_crop_pad = transforms.RandomCrop((224, 256), 2, True) @@ -223,7 +220,8 @@ class TestTransformsCV2(unittest.TestCase): def test_erase(self): trans = transforms.Compose([ - transforms.RandomErasing(), transforms.RandomErasing(value="random") + transforms.RandomErasing(), + transforms.RandomErasing(value="random") ]) self.do_transform(trans) @@ -334,24 +332,24 @@ class TestTransformsCV2(unittest.TestCase): transforms.RandomAffine(10, translate=[0.2, 0.2], scale=[1, 2, 3]), with self.assertRaises(ValueError): - transforms.RandomAffine( - 10, translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[1, 2, 3]), + transforms.RandomAffine(10, + translate=[0.2, 0.2], + scale=[0.5, 0.5], + shear=[1, 2, 3]), with self.assertRaises(ValueError): - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 0, 20, 40]) + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 0, 20, 40]) with self.assertRaises(ValueError): - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 20, 40], - fill=114, - center=(1, 2, 3)) + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 20, 40], + fill=114, + center=(1, 2, 3)) with self.assertRaises(ValueError): transforms.RandomRotation(-2) @@ -404,11 +402,13 @@ class TestTransformsCV2(unittest.TestCase): class TestTransformsPIL(TestTransformsCV2): + def get_backend(self): return 'pil' class TestTransformsTensor(TestTransformsCV2): + def get_backend(self): return 'tensor' @@ -426,7 +426,8 @@ class TestTransformsTensor(TestTransformsCV2): def test_trans_all(self): normalize = transforms.Normalize( mean=[123.675, 116.28, 103.53], - std=[58.395, 57.120, 57.375], ) + std=[58.395, 57.120, 57.375], + ) trans = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), @@ -508,8 +509,8 @@ class TestTransformsTensor(TestTransformsCV2): trans_random_crop_same = transforms.RandomCrop((140, 160)) img = trans_random_crop_same(fake_img_crop2) - trans_random_crop_bigger = transforms.RandomCrop( - (180, 200), pad_if_needed=True) + trans_random_crop_bigger = transforms.RandomCrop((180, 200), + pad_if_needed=True) img = trans_random_crop_bigger(img) trans_random_crop_pad = transforms.RandomCrop((224, 256), 2, True) @@ -585,15 +586,16 @@ class TestTransformsTensor(TestTransformsCV2): transforms.RandomAffine(10, translate=[0.2, 0.2], scale=[1, 2, 3]), with self.assertRaises(ValueError): - transforms.RandomAffine( - 10, translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[1, 2, 3]), + transforms.RandomAffine(10, + translate=[0.2, 0.2], + scale=[0.5, 0.5], + shear=[1, 2, 3]), with self.assertRaises(ValueError): - transforms.RandomAffine( - 10, - translate=[0.5, 0.3], - scale=[0.7, 1.3], - shear=[-10, 10, 0, 20, 40]) + transforms.RandomAffine(10, + translate=[0.5, 0.3], + scale=[0.7, 1.3], + shear=[-10, 10, 0, 20, 40]) with self.assertRaises(ValueError): transforms.RandomRotation(-2) @@ -614,13 +616,14 @@ class TestTransformsTensor(TestTransformsCV2): class TestFunctional(unittest.TestCase): + def test_errors(self): with self.assertRaises(TypeError): F.to_tensor(1) with self.assertRaises(ValueError): - fake_img = Image.fromarray((np.random.rand(28, 28, 3) * 255).astype( - 'uint8')) + fake_img = Image.fromarray( + (np.random.rand(28, 28, 3) * 255).astype('uint8')) F.to_tensor(fake_img, data_format=1) with self.assertRaises(ValueError): @@ -632,8 +635,8 @@ class TestFunctional(unittest.TestCase): F.resize(fake_img, {1: 1}) with self.assertRaises(TypeError): - fake_img = Image.fromarray((np.random.rand(28, 28, 3) * 255).astype( - 'uint8')) + fake_img = Image.fromarray( + (np.random.rand(28, 28, 3) * 255).astype('uint8')) F.resize(fake_img, '1') with self.assertRaises(TypeError): @@ -682,16 +685,18 @@ class TestFunctional(unittest.TestCase): F.affine(45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 0, 10]) with self.assertRaises(TypeError): - F.affine( - 45, - translate=[0.2, 0.2], - scale=0.5, - shear=[-10, 10], - interpolation=2) + F.affine(45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10], + interpolation=2) with self.assertRaises(TypeError): - F.affine( - 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10], center=0) + F.affine(45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10], + center=0) with self.assertRaises(TypeError): F.rotate(1, 0.1) @@ -715,17 +720,23 @@ class TestFunctional(unittest.TestCase): std = [0.5, 0.5, 0.5] normalized_img = F.normalize(tensor_img, mean, std) - normalized_img_tensor = F.normalize( - tensor_img_hwc, mean, std, data_format='HWC') + normalized_img_tensor = F.normalize(tensor_img_hwc, + mean, + std, + data_format='HWC') normalized_img_pil = F.normalize(pil_img, mean, std, data_format='HWC') - normalized_img_np = F.normalize( - np_img, mean, std, data_format='HWC', to_rgb=False) - - np.testing.assert_almost_equal( - np.array(normalized_img_pil), normalized_img_np) - np.testing.assert_almost_equal( - normalized_img_tensor.numpy(), normalized_img_np, decimal=4) + normalized_img_np = F.normalize(np_img, + mean, + std, + data_format='HWC', + to_rgb=False) + + np.testing.assert_almost_equal(np.array(normalized_img_pil), + normalized_img_np) + np.testing.assert_almost_equal(normalized_img_tensor.numpy(), + normalized_img_np, + decimal=4) def test_center_crop(self): np_img = (np.random.rand(28, 24, 3) * 255).astype('uint8') @@ -738,10 +749,10 @@ class TestFunctional(unittest.TestCase): np.testing.assert_almost_equal(np_cropped_img, np.array(pil_cropped_img)) - np.testing.assert_almost_equal( - np_cropped_img, - tensor_cropped_img.numpy().transpose((1, 2, 0)), - decimal=4) + np.testing.assert_almost_equal(np_cropped_img, + tensor_cropped_img.numpy().transpose( + (1, 2, 0)), + decimal=4) def test_color_jitter_sub_function(self): np.random.seed(555) @@ -806,10 +817,10 @@ class TestFunctional(unittest.TestCase): tensor_padded_img = F.pad(tensor_img, [1, 2], padding_mode='reflect') np.testing.assert_almost_equal(np_padded_img, np.array(pil_padded_img)) - np.testing.assert_almost_equal( - np_padded_img, - tensor_padded_img.numpy().transpose((1, 2, 0)), - decimal=3) + np.testing.assert_almost_equal(np_padded_img, + tensor_padded_img.numpy().transpose( + (1, 2, 0)), + decimal=3) tensor_padded_img = F.pad(tensor_img, 1, padding_mode='reflect') tensor_padded_img = F.pad(tensor_img, [1, 2, 1, 2], @@ -831,14 +842,14 @@ class TestFunctional(unittest.TestCase): np.testing.assert_almost_equal(np_reseized_img, np.array(pil_reseized_img)) - np.testing.assert_almost_equal( - np_reseized_img, - tensor_reseized_img.numpy().transpose((1, 2, 0)), - decimal=3) - np.testing.assert_almost_equal( - np_reseized_img, - tensor_reseized_img2.numpy().transpose((1, 2, 0)), - decimal=3) + np.testing.assert_almost_equal(np_reseized_img, + tensor_reseized_img.numpy().transpose( + (1, 2, 0)), + decimal=3) + np.testing.assert_almost_equal(np_reseized_img, + tensor_reseized_img2.numpy().transpose( + (1, 2, 0)), + decimal=3) gray_img = (np.zeros([28, 32])).astype('uint8') gray_resize_img = F.resize(gray_img, 40) @@ -852,7 +863,7 @@ class TestFunctional(unittest.TestCase): np.testing.assert_allclose(np_tensor.numpy(), pil_tensor.numpy()) - # test float dtype + # test float dtype float_img = np.random.rand(28, 28) float_tensor = F.to_tensor(float_img) @@ -902,9 +913,8 @@ class TestFunctional(unittest.TestCase): def test_erase_backward(self): img = paddle.randn((3, 14, 14), dtype=np.float32) img.stop_gradient = False - erased = F.erase( - img, 3, 3, 5, 5, paddle.ones( - (1, 1, 1), dtype='float32')) + erased = F.erase(img, 3, 3, 5, 5, paddle.ones((1, 1, 1), + dtype='float32')) loss = erased.sum() loss.backward() @@ -913,8 +923,8 @@ class TestFunctional(unittest.TestCase): np.testing.assert_equal(img.grad.numpy(), expected_grad) def test_image_load(self): - fake_img = Image.fromarray((np.random.random((32, 32, 3)) * 255).astype( - 'uint8')) + fake_img = Image.fromarray((np.random.random( + (32, 32, 3)) * 255).astype('uint8')) path = 'temp.jpg' fake_img.save(path) @@ -936,25 +946,35 @@ class TestFunctional(unittest.TestCase): pil_img = Image.fromarray(np_img).convert('RGB') tensor_img = F.to_tensor(pil_img, data_format='CHW') * 255 - np.testing.assert_almost_equal( - np_img, tensor_img.transpose((1, 2, 0)), decimal=4) - - np_affined_img = F.affine( - np_img, 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10]) - pil_affined_img = F.affine( - pil_img, 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10]) - tensor_affined_img = F.affine( - tensor_img, 45, translate=[0.2, 0.2], scale=0.5, shear=[-10, 10]) + np.testing.assert_almost_equal(np_img, + tensor_img.transpose((1, 2, 0)), + decimal=4) + + np_affined_img = F.affine(np_img, + 45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10]) + pil_affined_img = F.affine(pil_img, + 45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10]) + tensor_affined_img = F.affine(tensor_img, + 45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10]) np.testing.assert_equal(np_affined_img.shape, np.array(pil_affined_img).shape) np.testing.assert_equal(np_affined_img.shape, tensor_affined_img.transpose((1, 2, 0)).shape) - np.testing.assert_almost_equal( - np.array(pil_affined_img), - tensor_affined_img.numpy().transpose((1, 2, 0)), - decimal=4) + np.testing.assert_almost_equal(np.array(pil_affined_img), + tensor_affined_img.numpy().transpose( + (1, 2, 0)), + decimal=4) def test_rotate(self): np_img = (np.random.rand(28, 28, 3) * 255).astype('uint8') @@ -966,12 +986,11 @@ class TestFunctional(unittest.TestCase): rotated_tensor_img1 = F.rotate(tensor_img, 80, expand=True) - rotated_tensor_img2 = F.rotate( - tensor_img, - 80, - interpolation='bilinear', - center=(10, 10), - expand=False) + rotated_tensor_img2 = F.rotate(tensor_img, + 80, + interpolation='bilinear', + center=(10, 10), + expand=False) np.testing.assert_equal(rotated_np_img.shape, np.array(rotated_pil_img).shape) @@ -982,10 +1001,16 @@ class TestFunctional(unittest.TestCase): np_img = (np.random.rand(28, 28, 3) * 255).astype('uint8') pil_img = Image.fromarray(np_img).convert('RGB') - rotated_np_img = F.rotate( - np_img, 80, expand=True, center=[0, 0], fill=[0, 0, 0]) - rotated_pil_img = F.rotate( - pil_img, 80, expand=True, center=[0, 0], fill=[0, 0, 0]) + rotated_np_img = F.rotate(np_img, + 80, + expand=True, + center=[0, 0], + fill=[0, 0, 0]) + rotated_pil_img = F.rotate(pil_img, + 80, + expand=True, + center=[0, 0], + fill=[0, 0, 0]) np.testing.assert_equal(rotated_np_img.shape, np.array(rotated_pil_img).shape) @@ -995,8 +1020,9 @@ class TestFunctional(unittest.TestCase): pil_img = Image.fromarray(np_img).convert('RGB') tensor_img = F.to_tensor(pil_img, data_format='CHW') * 255 - np.testing.assert_almost_equal( - np_img, tensor_img.transpose((1, 2, 0)), decimal=4) + np.testing.assert_almost_equal(np_img, + tensor_img.transpose((1, 2, 0)), + decimal=4) startpoints = [[0, 0], [13, 0], [13, 15], [0, 15]] endpoints = [[3, 2], [12, 3], [10, 14], [2, 15]] @@ -1008,9 +1034,9 @@ class TestFunctional(unittest.TestCase): np.testing.assert_equal(np_perspectived_img.shape, np.array(pil_perspectived_img).shape) - np.testing.assert_equal(np_perspectived_img.shape, - tensor_perspectived_img.transpose( - (1, 2, 0)).shape) + np.testing.assert_equal( + np_perspectived_img.shape, + tensor_perspectived_img.transpose((1, 2, 0)).shape) result_pil = np.array(pil_perspectived_img) result_tensor = tensor_perspectived_img.numpy().transpose( @@ -1041,24 +1067,22 @@ class TestFunctional(unittest.TestCase): def test_affine(batch_tensor): input1, input2 = paddle.unbind(batch_tensor, axis=0) target_result = paddle.stack([ - F.affine( - input1, - 45, - translate=[0.2, 0.2], - scale=0.5, - shear=[-10, 10]), F.affine( - input2, - 45, - translate=[0.2, 0.2], - scale=0.5, - shear=[-10, 10]) + F.affine(input1, + 45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10]), + F.affine(input2, + 45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10]) ]) - batch_result = F.affine( - batch_tensor, - 45, - translate=[0.2, 0.2], - scale=0.5, - shear=[-10, 10]) + batch_result = F.affine(batch_tensor, + 45, + translate=[0.2, 0.2], + scale=0.5, + shear=[-10, 10]) return paddle.allclose(batch_result, target_result) @@ -1095,7 +1119,8 @@ class TestFunctional(unittest.TestCase): def test_adjust_contrast(batch_tensor): input1, input2 = paddle.unbind(batch_tensor, axis=0) target_result = paddle.stack([ - F.adjust_contrast(input1, 0.3), F.adjust_contrast(input2, 0.3) + F.adjust_contrast(input1, 0.3), + F.adjust_contrast(input2, 0.3) ]) batch_result = F.adjust_contrast(batch_tensor, 0.3) @@ -1120,7 +1145,8 @@ class TestFunctional(unittest.TestCase): def test_adjust_hue(batch_tensor): input1, input2 = paddle.unbind(batch_tensor, axis=0) target_result = paddle.stack( - [F.adjust_hue(input1, -0.2), F.adjust_hue(input2, -0.2)]) + [F.adjust_hue(input1, -0.2), + F.adjust_hue(input2, -0.2)]) batch_result = F.adjust_hue(batch_tensor, -0.2) diff --git a/python/paddle/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py index dc98fc3219b..1f53060beb0 100644 --- a/python/paddle/tests/test_vision_models.py +++ b/python/paddle/tests/test_vision_models.py @@ -20,6 +20,7 @@ import paddle.vision.models as models class TestVisonModels(unittest.TestCase): + def models_infer(self, arch, pretrained=False, batch_norm=False): x = np.array(np.random.random((2, 3, 224, 224)), dtype=np.float32) diff --git a/python/paddle/text/__init__.py b/python/paddle/text/__init__.py index f6bfa1c7358..5775a247858 100644 --- a/python/paddle/text/__init__.py +++ b/python/paddle/text/__init__.py @@ -21,14 +21,7 @@ from .datasets import UCIHousing # noqa: F401 from .datasets import WMT14 # noqa: F401 from .datasets import WMT16 # noqa: F401 -__all__ = [ #noqa - 'Conll05st', - 'Imdb', - 'Imikolov', - 'Movielens', - 'UCIHousing', - 'WMT14', - 'WMT16', - 'ViterbiDecoder', - 'viterbi_decode' +__all__ = [ #noqa + 'Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'UCIHousing', 'WMT14', + 'WMT16', 'ViterbiDecoder', 'viterbi_decode' ] diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py index 88ae5e3d8c6..09f54d674fd 100644 --- a/python/paddle/text/datasets/conll05.py +++ b/python/paddle/text/datasets/conll05.py @@ -106,8 +106,9 @@ class Conll05st(Dataset): self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" - self.data_file = _check_exists_and_download( - data_file, DATA_URL, DATA_MD5, 'conll05st', download) + self.data_file = _check_exists_and_download(data_file, DATA_URL, + DATA_MD5, 'conll05st', + download) self.word_dict_file = word_dict_file if self.word_dict_file is None: @@ -133,8 +134,9 @@ class Conll05st(Dataset): self.emb_file = emb_file if self.emb_file is None: assert download, "emb_file is not set and downloading automatically is disabled" - self.emb_file = _check_exists_and_download( - emb_file, EMB_URL, EMB_MD5, 'conll05st', download) + self.emb_file = _check_exists_and_download(emb_file, EMB_URL, + EMB_MD5, 'conll05st', + download) self.word_dict = self._load_dict(self.word_dict_file) self.predicate_dict = self._load_dict(self.verb_dict_file) diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py index f4fe7eb174b..dc100795acc 100644 --- a/python/paddle/text/datasets/imdb.py +++ b/python/paddle/text/datasets/imdb.py @@ -114,9 +114,10 @@ class Imdb(Dataset): if bool(pattern.match(tf.name)): # newline and punctuations removal and ad-hoc tokenization. data.append( - tarf.extractfile(tf).read().rstrip(six.b("\n\r")) - .translate(None, six.b(string.punctuation)).lower( - ).split()) + tarf.extractfile(tf).read().rstrip( + six.b("\n\r")).translate( + None, + six.b(string.punctuation)).lower().split()) tf = tarf.next() return data diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py index 798a7c590e1..94ebf6b594d 100644 --- a/python/paddle/text/datasets/movielens.py +++ b/python/paddle/text/datasets/movielens.py @@ -79,8 +79,8 @@ class UserInfo(object): def __str__(self): return "" % ( - self.index, "M" - if self.is_male else "F", age_table[self.age], self.job_id) + self.index, "M" if self.is_male else "F", age_table[self.age], + self.job_id) def __repr__(self): return str(self) @@ -188,8 +188,10 @@ class Movielens(Dataset): for line in user_file: line = cpt.to_text(line, encoding='latin') uid, gender, age, job, _ = line.strip().split("::") - self.user_info[int(uid)] = UserInfo( - index=uid, gender=gender, age=age, job_id=job) + self.user_info[int(uid)] = UserInfo(index=uid, + gender=gender, + age=age, + job_id=job) def _load_data(self): self.data = [] diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py index 597b1e1e818..c283aeaf733 100644 --- a/python/paddle/text/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -94,8 +94,8 @@ class UCIHousing(Dataset): def _load_data(self, feature_num=14, ratio=0.8): data = np.fromfile(self.data_file, sep=' ') data = data.reshape(data.shape[0] // feature_num, feature_num) - maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum( - axis=0) / data.shape[0] + maximums, minimums, avgs = data.max(axis=0), data.min( + axis=0), data.sum(axis=0) / data.shape[0] for i in six.moves.range(feature_num - 1): data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) offset = int(data.shape[0] * ratio) diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py index a6d49d70ab3..133c304a02a 100644 --- a/python/paddle/text/datasets/wmt14.py +++ b/python/paddle/text/datasets/wmt14.py @@ -99,8 +99,9 @@ class WMT14(Dataset): self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" - self.data_file = _check_exists_and_download( - data_file, URL_TRAIN, MD5_TRAIN, 'wmt14', download) + self.data_file = _check_exists_and_download(data_file, URL_TRAIN, + MD5_TRAIN, 'wmt14', + download) # read dataset into memory assert dict_size > 0, "dict_size should be set as positive number" @@ -108,6 +109,7 @@ class WMT14(Dataset): self._load_data() def _load_data(self): + def __to_dict(fd, size): out_dict = dict() for line_count, line in enumerate(fd): diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py index 5e88023a49d..ee2245ae4fe 100644 --- a/python/paddle/text/datasets/wmt16.py +++ b/python/paddle/text/datasets/wmt16.py @@ -120,16 +120,17 @@ class WMT16(Dataset): self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" - self.data_file = _check_exists_and_download( - data_file, DATA_URL, DATA_MD5, 'wmt16', download) + self.data_file = _check_exists_and_download(data_file, DATA_URL, + DATA_MD5, 'wmt16', + download) self.lang = lang assert src_dict_size > 0, "dict_size should be set as positive number" assert trg_dict_size > 0, "dict_size should be set as positive number" - self.src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if lang == "en" - else TOTAL_DE_WORDS)) - self.trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if lang == "en" - else TOTAL_EN_WORDS)) + self.src_dict_size = min( + src_dict_size, (TOTAL_EN_WORDS if lang == "en" else TOTAL_DE_WORDS)) + self.trg_dict_size = min( + trg_dict_size, (TOTAL_DE_WORDS if lang == "en" else TOTAL_EN_WORDS)) # load source and target word dict self.src_dict = self._load_dict(lang, src_dict_size) @@ -173,10 +174,9 @@ class WMT16(Dataset): fout.write( cpt.to_bytes("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))) for idx, word in enumerate( - sorted( - six.iteritems(word_dict), - key=lambda x: x[1], - reverse=True)): + sorted(six.iteritems(word_dict), + key=lambda x: x[1], + reverse=True)): if idx + 3 == dict_size: break fout.write(cpt.to_bytes(word[0])) fout.write(cpt.to_bytes('\n')) diff --git a/python/paddle/text/viterbi_decode.py b/python/paddle/text/viterbi_decode.py index ce5667b134a..cf6bdd04c26 100644 --- a/python/paddle/text/viterbi_decode.py +++ b/python/paddle/text/viterbi_decode.py @@ -75,16 +75,17 @@ def viterbi_decode(potentials, attrs = {'include_bos_eos_tag': include_bos_eos_tag} scores = helper.create_variable_for_type_inference(potentials.dtype) path = helper.create_variable_for_type_inference('int64') - helper.append_op( - type='viterbi_decode', - inputs={ - 'Input': potentials, - 'Transition': transition_params, - 'Length': lengths - }, - outputs={'Scores': scores, - 'Path': path}, - attrs=attrs) + helper.append_op(type='viterbi_decode', + inputs={ + 'Input': potentials, + 'Transition': transition_params, + 'Length': lengths + }, + outputs={ + 'Scores': scores, + 'Path': path + }, + attrs=attrs) return scores, path diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index 1f19dec992d..41e0d2de5c0 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -19,6 +19,7 @@ PREFIX_META_TENSOR_NAME = 'meta_' class BaseAPI(object): + def __init__(self, api_item_yaml): self.api = self.get_api_name(api_item_yaml) @@ -41,12 +42,12 @@ class BaseAPI(object): self.invoke = api_item_yaml['invoke'] else: if 'infer_meta' in api_item_yaml: - self.infer_meta = self.parse_infer_meta(api_item_yaml[ - 'infer_meta']) + self.infer_meta = self.parse_infer_meta( + api_item_yaml['infer_meta']) self.kernel = self.parse_kernel(api_item_yaml['kernel']) - self.support_selected_rows_kernel = False if len(self.kernel[ - 'func']) == 1 or not self.kernel['func'][1].endswith( - '_sr') else True + self.support_selected_rows_kernel = False if len( + self.kernel['func'] + ) == 1 or not self.kernel['func'][1].endswith('_sr') else True self.data_transform = self.parse_data_transform(api_item_yaml) self.inplace_map, self.view_map = {}, {} @@ -65,8 +66,9 @@ class BaseAPI(object): for name in self.inputs['names']: name = name.split('@')[0] if inplace_flag and name in self.inplace_map.values(): - input_args.append(inplace_type_map[self.inputs['input_info'][ - name]] + ' ' + name) + input_args.append( + inplace_type_map[self.inputs['input_info'][name]] + ' ' + + name) else: input_args.append(self.inputs['input_info'][name] + ' ' + name) return input_args @@ -95,8 +97,9 @@ class BaseAPI(object): optional_vars = [ item.strip() for item in api_item_yaml['optional'].split(',') ] - inputs, attrs = self.parse_input_and_attr( - api_name, api_item_yaml['args'], optional_vars) + inputs, attrs = self.parse_input_and_attr(api_name, + api_item_yaml['args'], + optional_vars) output_type_list, output_names, out_size_expr = self.parse_output( api_name, api_item_yaml['output']) return inputs, attrs, { @@ -199,6 +202,7 @@ class BaseAPI(object): return inputs, attrs def parse_output(self, api_name, output_config): + def parse_output_item(output_item): output_type_map = { 'Tensor': 'Tensor', @@ -526,7 +530,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d def get_kernel_args(self, code_indent): input_trans_map = { - 'const Tensor&': 'const phi::DenseTensor&', + 'const Tensor&': + 'const phi::DenseTensor&', 'const std::vector&': 'const std::vector&', 'const paddle::optional': @@ -617,8 +622,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d kernel_args_type_list.append('const phi::Scalar&') param = 'phi::Scalar(' + param + ')' else: - kernel_args_type_list.append(self.attrs['attr_info'][param][ - 0]) + kernel_args_type_list.append( + self.attrs['attr_info'][param][0]) kernel_args = kernel_args + param + ", " elif isinstance(param, bool): kernel_args = kernel_args + str(param).lower() + ", " @@ -634,7 +639,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d def get_selected_rows_kernel_args(self, code_indent): input_trans_map = { - 'const Tensor&': 'const phi::SelectedRows&', + 'const Tensor&': + 'const phi::SelectedRows&', 'const paddle::optional&': 'const paddle::optional&' } @@ -682,8 +688,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d kernel_args_type_list.append('const phi::Scalar&') param = 'phi::Scalar(' + param + ')' else: - kernel_args_type_list.append(self.attrs['attr_info'][param][ - 0]) + kernel_args_type_list.append( + self.attrs['attr_info'][param][0]) kernel_args = kernel_args + param + ", " elif isinstance(param, bool): kernel_args = kernel_args + str(param).lower() + ", " diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 1721da19295..7548c047ff5 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -26,6 +26,7 @@ inplace_out_type_map = { class ForwardAPI(BaseAPI): + def __init__(self, api_item_yaml): super(ForwardAPI, self).__init__(api_item_yaml) self.is_dygraph_api, self.intermediate_outs = self.parse_intermediate( @@ -131,9 +132,9 @@ class ForwardAPI(BaseAPI): if len(output_type_list) == 1: kernel_output = 'kernel_out' output_names.append('kernel_out') - inplace_assign = " = " + self.inplace_map[self.outputs['names'][ - 0]] if inplace_flag and self.outputs['names'][ - 0] in self.inplace_map else "" + inplace_assign = " = " + self.inplace_map[ + self.outputs['names'][0]] if inplace_flag and self.outputs[ + 'names'][0] in self.inplace_map else "" output_create = f""" {code_indent} {return_type} api_output{inplace_assign};""" @@ -287,21 +288,18 @@ def generate_api(api_yaml_path, header_file_path, source_file_path): def main(): parser = argparse.ArgumentParser( description='Generate PaddlePaddle C++ API files') - parser.add_argument( - '--api_yaml_path', - help='path to api yaml file', - nargs='+', - default='python/paddle/utils/code_gen/api.yaml') - - parser.add_argument( - '--api_header_path', - help='output of generated api header code file', - default='paddle/phi/api/include/api.h') - - parser.add_argument( - '--api_source_path', - help='output of generated api source code file', - default='paddle/phi/api/lib/api.cc') + parser.add_argument('--api_yaml_path', + help='path to api yaml file', + nargs='+', + default='python/paddle/utils/code_gen/api.yaml') + + parser.add_argument('--api_header_path', + help='output of generated api header code file', + default='paddle/phi/api/include/api.h') + + parser.add_argument('--api_source_path', + help='output of generated api source code file', + default='paddle/phi/api/lib/api.cc') options = parser.parse_args() diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index 886748eeb29..48bff2d1d3f 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -21,6 +21,7 @@ from api_base import BaseAPI class BackwardAPI(BaseAPI): + def __init__(self, backward_item_yaml): super(BackwardAPI, self).__init__(backward_item_yaml) self.check_args(backward_item_yaml['forward']) @@ -271,20 +272,17 @@ def generate_backward_api(backward_yaml_path, header_file_path, def main(): parser = argparse.ArgumentParser( description='Generate PaddlePaddle C++ backward API files') - parser.add_argument( - '--backward_yaml_path', - help='path to backward yaml file', - nargs='+', - default='python/paddle/utils/code_gen/backward.yaml') - parser.add_argument( - '--backward_header_path', - help='output of generated backward header code file', - default='paddle/phi/api/backward/backward_api.h') - - parser.add_argument( - '--backward_source_path', - help='output of generated backward source code file', - default='paddle/phi/api/lib/backward_api.cc') + parser.add_argument('--backward_yaml_path', + help='path to backward yaml file', + nargs='+', + default='python/paddle/utils/code_gen/backward.yaml') + parser.add_argument('--backward_header_path', + help='output of generated backward header code file', + default='paddle/phi/api/backward/backward_api.h') + + parser.add_argument('--backward_source_path', + help='output of generated backward source code file', + default='paddle/phi/api/lib/backward_api.cc') options = parser.parse_args() diff --git a/python/paddle/utils/code_gen/cross_validate.py b/python/paddle/utils/code_gen/cross_validate.py index 30fbf2e0a7d..3eca85e8ff4 100644 --- a/python/paddle/utils/code_gen/cross_validate.py +++ b/python/paddle/utils/code_gen/cross_validate.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,18 +35,16 @@ if __name__ == "__main__": current_dir = Path(__file__).parent / "temp" parser = argparse.ArgumentParser( description="Parse api yaml into canonical format.") - parser.add_argument( - '--forward_yaml_paths', - type=str, - nargs='+', - default=str(current_dir / "api.parsed.yaml"), - help="forward api yaml file.") - parser.add_argument( - '--backward_yaml_paths', - type=str, - nargs='+', - default=str(current_dir / "backward.yaml.yaml"), - help="backward api yaml file.") + parser.add_argument('--forward_yaml_paths', + type=str, + nargs='+', + default=str(current_dir / "api.parsed.yaml"), + help="forward api yaml file.") + parser.add_argument('--backward_yaml_paths', + type=str, + nargs='+', + default=str(current_dir / "backward.yaml.yaml"), + help="backward api yaml file.") args = parser.parse_args() main(args.forward_yaml_paths, args.backward_yaml_paths) diff --git a/python/paddle/utils/code_gen/filters.py b/python/paddle/utils/code_gen/filters.py index d37403adcba..832685f83e6 100644 --- a/python/paddle/utils/code_gen/filters.py +++ b/python/paddle/utils/code_gen/filters.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/utils/code_gen/generate_op.py b/python/paddle/utils/code_gen/generate_op.py index 0b314e4a11c..adaae66b979 100644 --- a/python/paddle/utils/code_gen/generate_op.py +++ b/python/paddle/utils/code_gen/generate_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,13 +26,12 @@ from filters import to_input_name from parse_utils import to_named_dict file_loader = FileSystemLoader(Path(__file__).parent / "templates") -env = Environment( - loader=file_loader, - keep_trailing_newline=True, - trim_blocks=True, - lstrip_blocks=True, - undefined=StrictUndefined, - extensions=['jinja2.ext.do']) +env = Environment(loader=file_loader, + keep_trailing_newline=True, + trim_blocks=True, + lstrip_blocks=True, + undefined=StrictUndefined, + extensions=['jinja2.ext.do']) env.filters["to_op_attr_type"] = to_op_attr_type env.filters["to_opmaker_name"] = to_opmaker_name env.filters["to_pascal_case"] = to_pascal_case @@ -82,8 +81,9 @@ def main(api_yaml_path, backward_yaml_path, output_op_path, op_template = env.get_template('op.c.j2') with open(output_op_path, "wt") as f: - msg = op_template.render( - apis=apis, backward_apis=backward_apis, api_dict=api_dict) + msg = op_template.render(apis=apis, + backward_apis=backward_apis, + api_dict=api_dict) f.write(msg) ks_template = env.get_template('ks.c.j2') @@ -95,14 +95,15 @@ def main(api_yaml_path, backward_yaml_path, output_op_path, if __name__ == "__main__": parser = argparse.ArgumentParser( description="Generate operator file from api yaml.") - parser.add_argument( - '--api_yaml_path', type=str, help="parsed api yaml file.") - parser.add_argument( - '--backward_api_yaml_path', - type=str, - help="parsed backward api yaml file.") - parser.add_argument( - "--output_op_path", type=str, help="path to save generated operators.") + parser.add_argument('--api_yaml_path', + type=str, + help="parsed api yaml file.") + parser.add_argument('--backward_api_yaml_path', + type=str, + help="parsed backward api yaml file.") + parser.add_argument("--output_op_path", + type=str, + help="path to save generated operators.") parser.add_argument( "--output_arg_map_path", type=str, diff --git a/python/paddle/utils/code_gen/intermediate_api_gen.py b/python/paddle/utils/code_gen/intermediate_api_gen.py index 4e4875b5961..25e28d6af6e 100644 --- a/python/paddle/utils/code_gen/intermediate_api_gen.py +++ b/python/paddle/utils/code_gen/intermediate_api_gen.py @@ -132,26 +132,22 @@ def generate_intermediate_api(api_yaml_path, sparse_api_yaml_path, def main(): parser = argparse.ArgumentParser( description='Generate PaddlePaddle C++ Sparse API files') - parser.add_argument( - '--api_yaml_path', - nargs='+', - help='path to api yaml file', - default='python/paddle/utils/code_gen/api.yaml') - - parser.add_argument( - '--sparse_api_yaml_path', - help='path to sparse api yaml file', - default='python/paddle/utils/code_gen/sparse_api.yaml') - - parser.add_argument( - '--dygraph_api_header_path', - help='output of generated dygraph api header code file', - default='paddle/phi/api/lib/dygraph_api.h') - - parser.add_argument( - '--dygraph_api_source_path', - help='output of generated dygraph api source code file', - default='paddle/phi/api/lib/dygraph_api.cc') + parser.add_argument('--api_yaml_path', + nargs='+', + help='path to api yaml file', + default='python/paddle/utils/code_gen/api.yaml') + + parser.add_argument('--sparse_api_yaml_path', + help='path to sparse api yaml file', + default='python/paddle/utils/code_gen/sparse_api.yaml') + + parser.add_argument('--dygraph_api_header_path', + help='output of generated dygraph api header code file', + default='paddle/phi/api/lib/dygraph_api.h') + + parser.add_argument('--dygraph_api_source_path', + help='output of generated dygraph api source code file', + default='paddle/phi/api/lib/dygraph_api.cc') options = parser.parse_args() diff --git a/python/paddle/utils/code_gen/parse_api.py b/python/paddle/utils/code_gen/parse_api.py index 63dc314d2e3..fcaf365951e 100644 --- a/python/paddle/utils/code_gen/parse_api.py +++ b/python/paddle/utils/code_gen/parse_api.py @@ -39,8 +39,9 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="Parse api yaml into canonical format.") parser.add_argument('--api_yaml_path', type=str, help="api yaml file.") - parser.add_argument( - "--output_path", type=str, help="path to save parsed yaml file.") + parser.add_argument("--output_path", + type=str, + help="path to save parsed yaml file.") parser.add_argument("--backward", action="store_true", default=False) args = parser.parse_args() diff --git a/python/paddle/utils/code_gen/parse_utils.py b/python/paddle/utils/code_gen/parse_utils.py index 8168328012e..11a0b49eeef 100644 --- a/python/paddle/utils/code_gen/parse_utils.py +++ b/python/paddle/utils/code_gen/parse_utils.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -149,8 +149,8 @@ def parse_plain_list(s: str, sep=",") -> List[str]: return items -def parse_kernel(api_name: str, - kernel_config: Dict[str, Any]) -> Dict[str, Any]: +def parse_kernel(api_name: str, kernel_config: Dict[str, + Any]) -> Dict[str, Any]: # kernel : # func : [], Kernel functions (example: scale, scale_sr) # param : [], Input params of kernel diff --git a/python/paddle/utils/code_gen/sparse_api_gen.py b/python/paddle/utils/code_gen/sparse_api_gen.py index bd73032e179..4c2f453e533 100644 --- a/python/paddle/utils/code_gen/sparse_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_api_gen.py @@ -21,6 +21,7 @@ from api_gen import ForwardAPI class SparseAPI(ForwardAPI): + def __init__(self, api_item_yaml): super(SparseAPI, self).__init__(api_item_yaml) @@ -95,8 +96,10 @@ class SparseAPI(ForwardAPI): def gen_sparse_kernel_context(self, kernel_output_names): input_trans_map = { - 'const Tensor&': 'const phi::TenseBase&', - 'const std::vector&': 'const std::vector&', + 'const Tensor&': + 'const phi::TenseBase&', + 'const std::vector&': + 'const std::vector&', 'const paddle::optional&': 'paddle::optional' } @@ -150,8 +153,8 @@ class SparseAPI(ForwardAPI): kernel_context_code = self.gen_sparse_kernel_context( kernel_output_names) - return_code = "" if len(self.gene_return_code( - )) == 0 else " " + self.gene_return_code() + return_code = "" if len( + self.gene_return_code()) == 0 else " " + self.gene_return_code() return f""" VLOG(6) << "{self.api} api sparse kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]"; auto phi_kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( @@ -199,8 +202,8 @@ class SparseAPI(ForwardAPI): api_func_name += '_' kernel_dispatch_code = f"{self.gene_kernel_select()}\n" for kernel_name in self.kernel['func']: - kernel_dispatch_code += self.gene_dispatch_code(kernel_name, - inplace_flag) + kernel_dispatch_code += self.gene_dispatch_code( + kernel_name, inplace_flag) return f""" PADDLE_API {self.get_return_type()} {api_func_name}({self.get_define_args()}) {{ @@ -285,20 +288,17 @@ def generate_api(api_yaml_path, header_file_path, source_file_path): def main(): parser = argparse.ArgumentParser( description='Generate PaddlePaddle C++ Sparse API files') - parser.add_argument( - '--api_yaml_path', - help='path to sparse api yaml file', - default='python/paddle/utils/code_gen/sparse_api.yaml') - - parser.add_argument( - '--api_header_path', - help='output of generated api header code file', - default='paddle/phi/api/include/sparse_api.h') - - parser.add_argument( - '--api_source_path', - help='output of generated api source code file', - default='paddle/phi/api/lib/sparse_api.cc') + parser.add_argument('--api_yaml_path', + help='path to sparse api yaml file', + default='python/paddle/utils/code_gen/sparse_api.yaml') + + parser.add_argument('--api_header_path', + help='output of generated api header code file', + default='paddle/phi/api/include/sparse_api.h') + + parser.add_argument('--api_source_path', + help='output of generated api source code file', + default='paddle/phi/api/lib/sparse_api.cc') options = parser.parse_args() diff --git a/python/paddle/utils/code_gen/sparse_bw_api_gen.py b/python/paddle/utils/code_gen/sparse_bw_api_gen.py index cf59726bbb1..3e0abead036 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_bw_api_gen.py @@ -22,6 +22,7 @@ from backward_api_gen import BackwardAPI class SparseBackwardAPI(SparseAPI, BackwardAPI): + def __init__(self, bw_api_item_yaml): BackwardAPI.__init__(self, bw_api_item_yaml) @@ -166,15 +167,13 @@ def main(): help='path to sparse api yaml file', default='python/paddle/utils/code_gen/sparse_bw_api.yaml') - parser.add_argument( - '--api_header_path', - help='output of generated api header code file', - default='paddle/phi/api/backward/sparse_bw_api.h') + parser.add_argument('--api_header_path', + help='output of generated api header code file', + default='paddle/phi/api/backward/sparse_bw_api.h') - parser.add_argument( - '--api_source_path', - help='output of generated api source code file', - default='paddle/phi/api/lib/sparse_bw_api.cc') + parser.add_argument('--api_source_path', + help='output of generated api source code file', + default='paddle/phi/api/lib/sparse_bw_api.cc') options = parser.parse_args() diff --git a/python/paddle/utils/code_gen/strings_api_gen.py b/python/paddle/utils/code_gen/strings_api_gen.py index d697ce39357..5b29c6076b4 100644 --- a/python/paddle/utils/code_gen/strings_api_gen.py +++ b/python/paddle/utils/code_gen/strings_api_gen.py @@ -18,11 +18,13 @@ import argparse import re from api_gen import ForwardAPI + PREFIX_TENSOR_NAME = 'input_' PREFIX_META_TENSOR_NAME = 'meta_' class StringsAPI(ForwardAPI): + def __init__(self, api_item_yaml): super(StringsAPI, self).__init__(api_item_yaml) @@ -99,7 +101,8 @@ class StringsAPI(ForwardAPI): def get_kernel_args(self, code_indent): input_trans_map = { - 'const Tensor&': 'const phi::StringTensor&', + 'const Tensor&': + 'const phi::StringTensor&', 'const std::vector&': 'const std::vector&', 'const paddle::optional&': @@ -151,8 +154,8 @@ class StringsAPI(ForwardAPI): kernel_args_type_list.append('const phi::Scalar&') param = 'phi::Scalar(' + param + ')' else: - kernel_args_type_list.append(self.attrs['attr_info'][param][ - 0]) + kernel_args_type_list.append( + self.attrs['attr_info'][param][0]) kernel_args = kernel_args + param + ", " elif isinstance(param, bool): kernel_args = kernel_args + str(param).lower() + ", " @@ -351,20 +354,17 @@ def generate_api(api_yaml_path, header_file_path, source_file_path): def main(): parser = argparse.ArgumentParser( description='Generate PaddlePaddle C++ Strings API files') - parser.add_argument( - '--api_yaml_path', - help='path to sparse api yaml file', - default='python/paddle/utils/code_gen/strings_api.yaml') - - parser.add_argument( - '--api_header_path', - help='output of generated api header code file', - default='paddle/phi/api/include/strings_api.h') - - parser.add_argument( - '--api_source_path', - help='output of generated api source code file', - default='paddle/phi/api/lib/strings_api.cc') + parser.add_argument('--api_yaml_path', + help='path to sparse api yaml file', + default='python/paddle/utils/code_gen/strings_api.yaml') + + parser.add_argument('--api_header_path', + help='output of generated api header code file', + default='paddle/phi/api/include/strings_api.h') + + parser.add_argument('--api_source_path', + help='output of generated api source code file', + default='paddle/phi/api/lib/strings_api.cc') options = parser.parse_args() diff --git a/python/paddle/utils/code_gen/tests.py b/python/paddle/utils/code_gen/tests.py index 453578b5cbd..d322fe1885b 100644 --- a/python/paddle/utils/code_gen/tests.py +++ b/python/paddle/utils/code_gen/tests.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/python/paddle/utils/code_gen/type_mapping.py b/python/paddle/utils/code_gen/type_mapping.py index c6e110907a9..448a2ab22f3 100644 --- a/python/paddle/utils/code_gen/type_mapping.py +++ b/python/paddle/utils/code_gen/type_mapping.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -105,10 +105,14 @@ dense_output_types_map = { #---------------------- phi selected rows------------------------------ # type mapping to phi, used in implementation -sr_input_types_map = {'Tensor': 'const phi::SelectedRows&', } +sr_input_types_map = { + 'Tensor': 'const phi::SelectedRows&', +} sr_optional_input_types_map = { 'Tensor': 'const paddle::optional&', } -sr_output_types_map = {'Tensor': 'phi::SelectedRows*', } +sr_output_types_map = { + 'Tensor': 'phi::SelectedRows*', +} diff --git a/python/paddle/utils/code_gen/wrapped_infermeta_gen.py b/python/paddle/utils/code_gen/wrapped_infermeta_gen.py index bf798f9734d..b41ebfb8487 100644 --- a/python/paddle/utils/code_gen/wrapped_infermeta_gen.py +++ b/python/paddle/utils/code_gen/wrapped_infermeta_gen.py @@ -51,15 +51,16 @@ PD_REGISTER_INFER_META_FN({api.kernel['func'][0]}, phi::{api.infer_meta['func']} for input_name in api.inputs['names']: if input_name in kernel_params: print("type", api.inputs['input_info']) - args.append(tensor_type_map[api.inputs['input_info'][ - input_name]] + ' ' + input_name) + args.append( + tensor_type_map[api.inputs['input_info'][input_name]] + + ' ' + input_name) for attr_name in api.attrs['names']: if attr_name in kernel_params: args.append(api.attrs['attr_info'][attr_name][0] + ' ' + attr_name) for i, out_type in enumerate(api.outputs['types']): - args.append(tensor_type_map[out_type] + ' ' + api.outputs[ - 'names'][i]) + args.append(tensor_type_map[out_type] + ' ' + + api.outputs['names'][i]) invoke_param = api.infer_meta['param'] invoke_param.extend(api.outputs['names']) @@ -157,11 +158,10 @@ def generate_wrapped_infermeta_and_register(api_yaml_path, header_file_path, def main(): parser = argparse.ArgumentParser( description='Generate PaddlePaddle C++ API files') - parser.add_argument( - '--api_yaml_path', - help='path to api yaml file', - nargs='+', - default='python/paddle/utils/code_gen/api.yaml') + parser.add_argument('--api_yaml_path', + help='path to api yaml file', + nargs='+', + default='python/paddle/utils/code_gen/api.yaml') parser.add_argument( '--wrapped_infermeta_header_path', help='output of generated wrapped_infermeta header code file', diff --git a/python/paddle/utils/cpp_extension/__init__.py b/python/paddle/utils/cpp_extension/__init__.py index cef2716b7f3..843f78d5c80 100644 --- a/python/paddle/utils/cpp_extension/__init__.py +++ b/python/paddle/utils/cpp_extension/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,6 @@ from .extension_utils import parse_op_info # noqa: F401 from .extension_utils import get_build_directory # noqa: F401 from .extension_utils import load_op_meta_info_and_register_op # noqa: F401 -__all__ = [ #noqa - 'CppExtension', - 'CUDAExtension', - 'load', - 'setup', - 'get_build_directory' +__all__ = [ #noqa + 'CppExtension', 'CUDAExtension', 'load', 'setup', 'get_build_directory' ] diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py index 3a7804d9012..73642e1a004 100644 --- a/python/paddle/utils/cpp_extension/cpp_extension.py +++ b/python/paddle/utils/cpp_extension/cpp_extension.py @@ -335,6 +335,7 @@ class BuildExtension(build_ext, object): """ class cls_with_options(cls): + def __init__(self, *args, **kwargs): kwargs.update(options) cls.__init__(self, *args, **kwargs) @@ -379,8 +380,8 @@ class BuildExtension(build_ext, object): # cflags have changed and delete the built shared library to re-compile the source # even though source file content keep unchanged. so_name = self.get_ext_fullpath(self.extensions[0].name) - clean_object_if_change_cflags( - os.path.abspath(so_name), self.extensions[0]) + clean_object_if_change_cflags(os.path.abspath(so_name), + self.extensions[0]) # Consider .cu, .cu.cc as valid source extensions. self.compiler.src_extensions += ['.cu', '.cu.cc'] @@ -448,8 +449,9 @@ class BuildExtension(build_ext, object): else: cflags.append('-DPADDLE_WITH_CUDA') - add_std_without_repeat( - cflags, self.compiler.compiler_type, use_std14=True) + add_std_without_repeat(cflags, + self.compiler.compiler_type, + use_std14=True) original_compile(obj, src, ext, cc_args, cflags, pp_opts) finally: # restore original_compiler @@ -611,12 +613,11 @@ class BuildExtension(build_ext, object): """ compiler_infos = ['clang'] + CLANG_COMPILE_FLAGS linker_infos = ['clang'] + CLANG_LINK_FLAGS - self.compiler.set_executables( - compiler=compiler_infos, - compiler_so=compiler_infos, - compiler_cxx=['clang'], - linker_exe=['clang'], - linker_so=linker_infos) + self.compiler.set_executables(compiler=compiler_infos, + compiler_so=compiler_infos, + compiler_cxx=['clang'], + linker_exe=['clang'], + linker_so=linker_infos) def _check_abi(self): """ @@ -711,6 +712,7 @@ class BuildCommand(build, object): """ class cls_with_options(cls): + def __init__(self, *args, **kwargs): kwargs.update(options) cls.__init__(self, *args, **kwargs) @@ -845,8 +847,9 @@ def load(name, ), "Required type(extra_cuda_cflags) == list[str], but received {}".format( extra_cuda_cflags) - log_v("additional extra_cxx_cflags: [{}], extra_cuda_cflags: [{}]".format( - ' '.join(extra_cxx_cflags), ' '.join(extra_cuda_cflags)), verbose) + log_v( + "additional extra_cxx_cflags: [{}], extra_cuda_cflags: [{}]".format( + ' '.join(extra_cxx_cflags), ' '.join(extra_cuda_cflags)), verbose) # write setup.py file and compile it build_base_dir = os.path.join(build_directory, name) diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index 41add6e764a..62fce336004 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -193,8 +193,8 @@ def custom_write_stub(resource, pyfile): with open(pyfile, 'w') as f: f.write( - _stub_template.format( - resource=resource, custom_api='\n\n'.join(api_content))) + _stub_template.format(resource=resource, + custom_api='\n\n'.join(api_content))) OpInfo = collections.namedtuple('OpInfo', ['so_name', 'so_path']) @@ -242,6 +242,7 @@ VersionFields = collections.namedtuple('VersionFields', [ class VersionManager: + def __init__(self, version_field): self.version_field = version_field self.version = self.hasher(version_field) @@ -258,8 +259,8 @@ class VersionManager: md5 = combine_hash(md5, tuple(flat_elem)) else: raise RuntimeError( - "Support types with list, tuple and dict, but received {} with {}.". - format(type(elem), elem)) + "Support types with list, tuple and dict, but received {} with {}." + .format(type(elem), elem)) return md5.hexdigest() @@ -313,8 +314,8 @@ def clean_object_if_change_cflags(so_path, extension): # delete shared library file if version is changed to re-compile it. if so_version is not None and so_version != versioner.version: log_v( - "Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.". - format(so_name, versioner.version, version_file)) + "Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}." + .format(so_name, versioner.version, version_file)) os.remove(so_path) # update new version information new_version_info = versioner.details @@ -436,8 +437,8 @@ def _reset_so_rpath(so_path): if OS_NAME.startswith("darwin"): origin_runtime_path = "@loader_path/../libs/" rpath = "@rpath/{}".format(_get_core_name()) - cmd = 'install_name_tool -change {} {} {}'.format(origin_runtime_path, - rpath, so_path) + cmd = 'install_name_tool -change {} {} {}'.format( + origin_runtime_path, rpath, so_path) run_cmd(cmd) @@ -569,9 +570,9 @@ def create_sym_link_if_not_exist(): except Exception: warnings.warn( "Failed to create soft symbol link for {}.\n You can run prompt as administrator and execute the " - "following command manually: `mklink {} {}`. Now it will create hard link for {} trickly.". - format(raw_core_name, new_dll_core_path, core_path, - raw_core_name)) + "following command manually: `mklink {} {}`. Now it will create hard link for {} trickly." + .format(raw_core_name, new_dll_core_path, core_path, + raw_core_name)) run_cmd('mklink /H {} {}'.format(new_dll_core_path, core_path)) # core_avx or core_noavx with lib suffix assert os.path.exists(new_dll_core_path) @@ -586,8 +587,8 @@ def create_sym_link_if_not_exist(): assert os.path.exists(new_lib_core_path) except Exception: raise RuntimeError( - "Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`". - format(raw_core_name, core_path, new_lib_core_path)) + "Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`" + .format(raw_core_name, core_path, new_lib_core_path)) # core_avx or core_noavx without suffix return raw_core_name[:-3] @@ -605,8 +606,8 @@ def find_cuda_home(): which_cmd = 'where' if IS_WINDOWS else 'which' try: with open(os.devnull, 'w') as devnull: - nvcc_path = subprocess.check_output( - [which_cmd, 'nvcc'], stderr=devnull) + nvcc_path = subprocess.check_output([which_cmd, 'nvcc'], + stderr=devnull) nvcc_path = nvcc_path.decode() # Multi CUDA, select the first nvcc_path = nvcc_path.split('\r\n')[0] @@ -643,8 +644,8 @@ def find_rocm_home(): which_cmd = 'where' if IS_WINDOWS else 'which' try: with open(os.devnull, 'w') as devnull: - hipcc_path = subprocess.check_output( - [which_cmd, 'hipcc'], stderr=devnull) + hipcc_path = subprocess.check_output([which_cmd, 'hipcc'], + stderr=devnull) hipcc_path = hipcc_path.decode() hipcc_path = hipcc_path.rstrip('\r\n') @@ -722,8 +723,8 @@ def find_clang_cpp_include(compiler='clang'): if "InstalledDir" in info: v1_path = info.split(':')[-1].strip() if v1_path and os.path.exists(v1_path): - std_v1_includes = os.path.join( - os.path.dirname(v1_path), 'include/c++/v1') + std_v1_includes = os.path.join(os.path.dirname(v1_path), + 'include/c++/v1') except Exception: # Just raise warnings because the include dir is not required. warnings.warn( @@ -823,14 +824,15 @@ def get_build_directory(verbose=False): root_extensions_directory = os.environ.get('PADDLE_EXTENSION_DIR') if root_extensions_directory is None: dir_name = "paddle_extensions" - root_extensions_directory = os.path.join( - os.path.expanduser('~/.cache'), dir_name) + root_extensions_directory = os.path.join(os.path.expanduser('~/.cache'), + dir_name) if IS_WINDOWS: root_extensions_directory = os.path.normpath( root_extensions_directory) - log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.". - format(root_extensions_directory), verbose) + log_v( + "$PADDLE_EXTENSION_DIR is not set, using path: {} by default.". + format(root_extensions_directory), verbose) if not os.path.exists(root_extensions_directory): os.makedirs(root_extensions_directory) @@ -845,8 +847,8 @@ def parse_op_info(op_name): """ if op_name not in OpProtoHolder.instance().op_proto_map: raise ValueError( - "Please load {} shared library file firstly by `paddle.utils.cpp_extension.load_op_meta_info_and_register_op(...)`". - format(op_name)) + "Please load {} shared library file firstly by `paddle.utils.cpp_extension.load_op_meta_info_and_register_op(...)`" + .format(op_name)) op_proto = OpProtoHolder.instance().get_op_proto(op_name) in_names = [x.name for x in op_proto.inputs] @@ -870,8 +872,8 @@ def _import_module_from_library(module_name, build_directory, verbose=False): dynamic_suffix = '.so' ext_path = os.path.join(build_directory, module_name + dynamic_suffix) if not os.path.exists(ext_path): - raise FileNotFoundError("Extension path: {} does not exist.".format( - ext_path)) + raise FileNotFoundError( + "Extension path: {} does not exist.".format(ext_path)) # load custom op_info and kernels from .so shared library log_v('loading shared library from: {}'.format(ext_path), verbose) @@ -901,7 +903,7 @@ def _generate_python_module(module_name, module_name + '_' + thread_id + '.py') log_v("generate api file: {}".format(api_file), verbose) - # delete the temp file before exit python process + # delete the temp file before exit python process atexit.register(lambda: remove_if_exit(api_file)) # write into .py file with RWLockc @@ -979,8 +981,8 @@ def _load_module_from_file(api_file_path, module_name, verbose=False): Load module from python file. """ if not os.path.exists(api_file_path): - raise FileNotFoundError("File : {} does not exist.".format( - api_file_path)) + raise FileNotFoundError( + "File : {} does not exist.".format(api_file_path)) # Unique readable module name to place custom api. log_v('import module from file: {}'.format(api_file_path), verbose) @@ -1006,12 +1008,14 @@ def _get_api_inputs_str(op_name): params_str = ','.join([p.split("@")[0].lower() for p in param_names]) # e.g: {'X': x, 'Y': y, 'Z': z} ins_str = "{%s}" % ','.join([ - "'{}' : {}".format(in_name, in_name.split("@")[0].lower()) + "'{}' : {}".format(in_name, + in_name.split("@")[0].lower()) for in_name in in_names ]) # e.g: {'num': n} attrs_str = "{%s}" % ",".join([ - "'{}' : {}".format(attr_name, attr_name.split("@")[0].lower()) + "'{}' : {}".format(attr_name, + attr_name.split("@")[0].lower()) for attr_name in attr_names ]) # e.g: ['Out', 'Index'] @@ -1055,15 +1059,14 @@ def _write_setup_file(name, with_cuda = True log_v("with_cuda: {}".format(with_cuda), verbose) - content = template.format( - name=name, - prefix='CUDA' if with_cuda else 'Cpp', - sources=list2str(sources), - include_dirs=list2str(include_dirs), - extra_cxx_cflags=list2str(extra_cxx_cflags), - extra_cuda_cflags=list2str(extra_cuda_cflags), - extra_link_args=list2str(link_args), - build_dir=build_dir) + content = template.format(name=name, + prefix='CUDA' if with_cuda else 'Cpp', + sources=list2str(sources), + include_dirs=list2str(include_dirs), + extra_cxx_cflags=list2str(extra_cxx_cflags), + extra_cuda_cflags=list2str(extra_cuda_cflags), + extra_link_args=list2str(link_args), + build_dir=build_dir) log_v('write setup.py into {}'.format(file_path), verbose) with open(file_path, 'w') as f: @@ -1093,8 +1096,9 @@ def _jit_compile(file_path, verbose=False): try: py_version = subprocess.check_output([interpreter, '-V']) py_version = py_version.decode() - log_v("Using Python interpreter: {}, version: {}".format( - interpreter, py_version.strip()), verbose) + log_v( + "Using Python interpreter: {}, version: {}".format( + interpreter, py_version.strip()), verbose) except Exception: _, error, _ = sys.exc_info() raise RuntimeError( @@ -1144,8 +1148,9 @@ def run_cmd(command, verbose=False): # execute command try: if verbose: - return subprocess.check_call( - command, shell=True, stderr=subprocess.STDOUT) + return subprocess.check_call(command, + shell=True, + stderr=subprocess.STDOUT) else: return subprocess.check_call(command, shell=True, stdout=DEVNULL) except Exception: @@ -1163,8 +1168,8 @@ def check_abi_compatibility(compiler, verbose=False): return True if not IS_WINDOWS: - cmd_out = subprocess.check_output( - ['which', compiler], stderr=subprocess.STDOUT) + cmd_out = subprocess.check_output(['which', compiler], + stderr=subprocess.STDOUT) compiler_path = os.path.realpath(cmd_out.decode()).strip() # if not found any suitable compiler, raise warning if not any(name in compiler_path @@ -1189,8 +1194,8 @@ def check_abi_compatibility(compiler, verbose=False): version = version_info.strip().split('.') elif IS_WINDOWS: mini_required_version = MSVC_MINI_VERSION - compiler_info = subprocess.check_output( - compiler, stderr=subprocess.STDOUT) + compiler_info = subprocess.check_output(compiler, + stderr=subprocess.STDOUT) try: compiler_info = compiler_info.decode('UTF-8') except UnicodeDecodeError: @@ -1210,8 +1215,8 @@ def check_abi_compatibility(compiler, verbose=False): if tuple(map(int, version)) >= mini_required_version: return True warnings.warn( - ABI_INCOMPATIBILITY_WARNING.format( - user_compiler=compiler, version='.'.join(version))) + ABI_INCOMPATIBILITY_WARNING.format(user_compiler=compiler, + version='.'.join(version))) return False diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py index b7f5ff28d6c..5d4a8996936 100755 --- a/python/paddle/utils/deprecated.py +++ b/python/paddle/utils/deprecated.py @@ -109,8 +109,9 @@ def deprecated(update_to="", since="", reason="", level=0): v_since = [int(i) for i in _since.split(".")] v_since += [0] * (4 - len(v_since)) if paddle.__version__ == "0.0.0" or _since == "" or v_current >= v_since: - warnings.warn( - warningmsg, category=DeprecationWarning, stacklevel=2) + warnings.warn(warningmsg, + category=DeprecationWarning, + stacklevel=2) return func(*args, **kwargs) diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py index bf40ff9ab22..234aac860b6 100644 --- a/python/paddle/utils/download.py +++ b/python/paddle/utils/download.py @@ -33,6 +33,7 @@ try: except: class tqdm(object): + def __init__(self, total=None): self.total = total self.n = 0 @@ -42,8 +43,8 @@ except: if self.total is None: sys.stderr.write("\r{0:.1f} bytes".format(self.n)) else: - sys.stderr.write("\r{0:.1f}%".format(100 * self.n / float( - self.total))) + sys.stderr.write("\r{0:.1f}%".format(100 * self.n / + float(self.total))) sys.stderr.flush() def __enter__(self): @@ -54,6 +55,7 @@ except: import logging + logger = logging.getLogger(__name__) __all__ = ['get_weights_path_from_url'] @@ -160,8 +162,8 @@ def get_path_from_url(url, time.sleep(1) if ParallelEnv().current_endpoint in unique_endpoints: - if decompress and (tarfile.is_tarfile(fullpath) or - zipfile.is_zipfile(fullpath)): + if decompress and (tarfile.is_tarfile(fullpath) + or zipfile.is_zipfile(fullpath)): fullpath = _decompress(fullpath) return fullpath @@ -207,8 +209,10 @@ def _wget_download(url, fullname): # –user-agent command = 'wget -O {} -t {} {}'.format(tmp_fullname, DOWNLOAD_RETRY_LIMIT, url) - subprc = subprocess.Popen( - command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subprc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) _ = subprc.communicate() if subprc.returncode != 0: diff --git a/python/paddle/utils/gast/ast3.py b/python/paddle/utils/gast/ast3.py index 58840d5c290..4696c1ba497 100644 --- a/python/paddle/utils/gast/ast3.py +++ b/python/paddle/utils/gast/ast3.py @@ -58,14 +58,16 @@ class Ast3ToGAst(AstToGAst): def visit_Num(self, node): new_node = gast.Constant( node.n, - None, ) + None, + ) gast.copy_location(new_node, node) return new_node def visit_Ellipsis(self, node): new_node = gast.Constant( Ellipsis, - None, ) + None, + ) gast.copy_location(new_node, node) new_node.end_lineno = new_node.end_col_offset = None return new_node @@ -73,14 +75,16 @@ class Ast3ToGAst(AstToGAst): def visit_Str(self, node): new_node = gast.Constant( node.s, - None, ) + None, + ) gast.copy_location(new_node, node) return new_node def visit_Bytes(self, node): new_node = gast.Constant( node.s, - None, ) + None, + ) gast.copy_location(new_node, node) return new_node @@ -169,7 +173,8 @@ class Ast3ToGAst(AstToGAst): new_node = gast.Call( self._visit(node.func), self._visit(node.args) + starred, - self._visit(node.keywords) + kwargs, ) + self._visit(node.keywords) + kwargs, + ) gast.copy_location(new_node, node) return new_node @@ -191,7 +196,8 @@ class Ast3ToGAst(AstToGAst): self._visit(node.kwonlyargs), self._visit(node.kw_defaults), self._visit(node.kwarg), - self._visit(node.defaults), ) + self._visit(node.defaults), + ) gast.copy_location(new_node, node) return new_node @@ -200,7 +206,8 @@ class Ast3ToGAst(AstToGAst): self._visit(node.id), self._visit(node.ctx), None, - None, ) + None, + ) ast.copy_location(new_node, node) return new_node @@ -237,7 +244,8 @@ class Ast3ToGAst(AstToGAst): target=self._visit(node.target), iter=self._visit(node.iter), ifs=self._visit(node.ifs), - is_async=0, ) + is_async=0, + ) return ast.copy_location(new_node, node) @@ -245,6 +253,7 @@ class GAstToAst3(GAstToAst): if sys.version_info.minor < 9: def visit_Subscript(self, node): + def adjust_slice(s): if isinstance(s, ast.Slice): return s @@ -253,9 +262,8 @@ class GAstToAst3(GAstToAst): if isinstance(node.slice, gast.Tuple): if any(isinstance(elt, gast.slice) for elt in node.slice.elts): - new_slice = ast.ExtSlice([ - adjust_slice(x) for x in self._visit(node.slice.elts) - ]) + new_slice = ast.ExtSlice( + [adjust_slice(x) for x in self._visit(node.slice.elts)]) else: value = ast.Tuple(self._visit(node.slice.elts), ast.Load()) ast.copy_location(value, node.slice) @@ -267,7 +275,8 @@ class GAstToAst3(GAstToAst): new_node = ast.Subscript( self._visit(node.value), new_slice, - self._visit(node.ctx), ) + self._visit(node.ctx), + ) ast.copy_location(new_node, node) return new_node @@ -302,21 +311,22 @@ class GAstToAst3(GAstToAst): else: extra_args = self._visit(node.type_comment), - new_node = ast.arg( - self._visit(node.id), self._visit(node.annotation), *extra_args) + new_node = ast.arg(self._visit(node.id), self._visit(node.annotation), + *extra_args) return ast.copy_location(new_node, node) def visit_Name(self, node): new_node = ast.Name( self._visit(node.id), - self._visit(node.ctx), ) + self._visit(node.ctx), + ) ast.copy_location(new_node, node) return new_node def visit_ExceptHandler(self, node): if node.name: - new_node = ast.ExceptHandler( - self._visit(node.type), node.name.id, self._visit(node.body)) + new_node = ast.ExceptHandler(self._visit(node.type), node.name.id, + self._visit(node.body)) return ast.copy_location(new_node, node) else: return self.generic_visit(node) @@ -343,7 +353,8 @@ class GAstToAst3(GAstToAst): self._visit(args), self._visit(keywords), self._visit(starargs), - self._visit(kwargs), ) + self._visit(kwargs), + ) ast.copy_location(new_node, node) return new_node @@ -356,7 +367,8 @@ class GAstToAst3(GAstToAst): body=self._visit(node.body), decorator_list=self._visit(node.decorator_list), starargs=None, - kwargs=None, ) + kwargs=None, + ) return ast.copy_location(new_node, node) elif sys.version_info.minor < 8: @@ -367,7 +379,8 @@ class GAstToAst3(GAstToAst): self._visit(node.args), self._visit(node.body), self._visit(node.decorator_list), - self._visit(node.returns), ) + self._visit(node.returns), + ) ast.copy_location(new_node, node) return new_node @@ -377,7 +390,8 @@ class GAstToAst3(GAstToAst): self._visit(node.args), self._visit(node.body), self._visit(node.decorator_list), - self._visit(node.returns), ) + self._visit(node.returns), + ) ast.copy_location(new_node, node) return new_node @@ -386,7 +400,8 @@ class GAstToAst3(GAstToAst): self._visit(node.target), self._visit(node.iter), self._visit(node.body), - self._visit(node.orelse), ) + self._visit(node.orelse), + ) ast.copy_location(new_node, node) return new_node @@ -404,14 +419,16 @@ class GAstToAst3(GAstToAst): def visit_With(self, node): new_node = ast.With( self._visit(node.items), - self._visit(node.body), ) + self._visit(node.body), + ) ast.copy_location(new_node, node) return new_node def visit_AsyncWith(self, node): new_node = ast.AsyncWith( self._visit(node.items), - self._visit(node.body), ) + self._visit(node.body), + ) ast.copy_location(new_node, node) return new_node @@ -419,7 +436,8 @@ class GAstToAst3(GAstToAst): new_node = ast.Call( self._visit(node.func), self._visit(node.args), - self._visit(node.keywords), ) + self._visit(node.keywords), + ) ast.copy_location(new_node, node) return new_node diff --git a/python/paddle/utils/gast/astn.py b/python/paddle/utils/gast/astn.py index bd88ba5efc5..eb45bd4e450 100644 --- a/python/paddle/utils/gast/astn.py +++ b/python/paddle/utils/gast/astn.py @@ -34,7 +34,9 @@ from . import gast def _generate_translators(to): + class Translator(ast.NodeTransformer): + def _visit(self, node): if isinstance(node, list): return [self._visit(n) for n in node] diff --git a/python/paddle/utils/gast/gast.py b/python/paddle/utils/gast/gast.py index f561c83995a..1248434fe35 100644 --- a/python/paddle/utils/gast/gast.py +++ b/python/paddle/utils/gast/gast.py @@ -44,6 +44,7 @@ except ImportError: def _make_node(Name, Fields, Attributes, Bases): + def create_node(self, *args, **kwargs): nbparam = len(args) + len(kwargs) assert nbparam in (0, len(Fields)), \ @@ -76,351 +77,434 @@ _nodes = ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('AsyncFunctionDef', (('name', 'args', 'body', 'decorator_list', 'returns', 'type_comment'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('ClassDef', (( 'name', 'bases', 'keywords', 'body', - 'decorator_list', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'decorator_list', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Return', (('value', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Delete', (('targets', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Assign', (( 'targets', - 'value', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'value', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('AugAssign', (( 'target', 'op', - 'value', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'value', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('AnnAssign', (( 'target', 'annotation', 'value', - 'simple', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'simple', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Print', (( 'dest', 'values', - 'nl', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'nl', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('For', (('target', 'iter', 'body', 'orelse', 'type_comment'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('AsyncFor', (('target', 'iter', 'body', 'orelse', 'type_comment'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('While', (( 'test', 'body', - 'orelse', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'orelse', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('If', (( 'test', 'body', - 'orelse', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'orelse', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('With', (('items', 'body', 'type_comment'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('AsyncWith', (('items', 'body', 'type_comment'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Raise', (( 'exc', - 'cause', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'cause', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Try', (( 'body', 'handlers', 'orelse', - 'finalbody', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'finalbody', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Assert', (( 'test', - 'msg', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'msg', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Import', (('names', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('ImportFrom', (( 'module', 'names', - 'level', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'level', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Exec', (( 'body', 'globals', - 'locals', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'locals', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (stmt, ))), ('Global', (('names', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Nonlocal', (('names', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Expr', (('value', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Pass', ((), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Break', ((), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), ('Continue', ((), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (stmt, ))), + 'end_col_offset', + ), (stmt, ))), # expr ('BoolOp', (( 'op', - 'values', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'values', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('BinOp', (( 'left', 'op', - 'right', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'right', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('UnaryOp', (( 'op', - 'operand', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'operand', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Lambda', (( 'args', - 'body', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'body', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('IfExp', (( 'test', 'body', - 'orelse', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'orelse', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Dict', (( 'keys', - 'values', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'values', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Set', (('elts', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('ListComp', (( 'elt', - 'generators', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'generators', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('SetComp', (( 'elt', - 'generators', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'generators', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('DictComp', (( 'key', 'value', - 'generators', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'generators', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('GeneratorExp', (( 'elt', - 'generators', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'generators', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Await', (('value', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('Yield', (('value', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('YieldFrom', (('value', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('Compare', (( 'left', 'ops', - 'comparators', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'comparators', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Call', (( 'func', 'args', - 'keywords', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'keywords', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Repr', (('value', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('FormattedValue', (( 'value', 'conversion', - 'format_spec', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'format_spec', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('JoinedStr', (('values', ), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('Constant', (('value', 'kind'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('Attribute', (( 'value', 'attr', - 'ctx', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'ctx', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Subscript', (( 'value', 'slice', - 'ctx', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'ctx', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Starred', (( 'value', - 'ctx', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'ctx', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Name', (('id', 'ctx', 'annotation', 'type_comment'), ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'end_col_offset', + ), (expr, ))), ('List', (( 'elts', - 'ctx', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'ctx', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), ('Tuple', (( 'elts', - 'ctx', ), ( - 'lineno', - 'col_offset', - 'end_lineno', - 'end_col_offset', ), (expr, ))), + 'ctx', + ), ( + 'lineno', + 'col_offset', + 'end_lineno', + 'end_col_offset', + ), (expr, ))), # expr_context ('Load', ((), (), (expr_context, ))), @@ -435,7 +519,8 @@ _nodes = ( 'lineno', 'col_offset', 'end_lineno', - 'end_col_offset', ), (slice, ))), + 'end_col_offset', + ), (slice, ))), # boolop ('And', ((), (), (boolop, ))), @@ -459,16 +544,20 @@ _nodes = ( # unaryop ('Invert', ((), (), ( unaryop, - AST, ))), + AST, + ))), ('Not', ((), (), ( unaryop, - AST, ))), + AST, + ))), ('UAdd', ((), (), ( unaryop, - AST, ))), + AST, + ))), ('USub', ((), (), ( unaryop, - AST, ))), + AST, + ))), # cmpop ('Eq', ((), (), (cmpop, ))), @@ -495,9 +584,8 @@ _nodes = ( 'kw_defaults', 'kwarg', 'defaults'), (), (AST, ))), # keyword - ('keyword', - (('arg', 'value'), - ('lineno', 'col_offset', 'end_lineno', 'end_col_offset'), (AST, ))), + ('keyword', (('arg', 'value'), ('lineno', 'col_offset', 'end_lineno', + 'end_col_offset'), (AST, ))), # alias ('alias', (('name', 'asname'), (), (AST, ))), @@ -506,7 +594,8 @@ _nodes = ( ('withitem', (('context_expr', 'optional_vars'), (), (AST, ))), # type_ignore - ('type_ignore', ((), ('lineno', 'tag'), (TypeIgnore, ))), ) + ('type_ignore', ((), ('lineno', 'tag'), (TypeIgnore, ))), +) for name, descr in _nodes: _make_node(name, *descr) diff --git a/python/paddle/utils/image_util.py b/python/paddle/utils/image_util.py index 18be9366c40..9c93d44eeec 100644 --- a/python/paddle/utils/image_util.py +++ b/python/paddle/utils/image_util.py @@ -56,8 +56,8 @@ def crop_img(im, inner_size, color=True, test=True): If True, crop the center of images. """ if color: - height, width = max(inner_size, im.shape[1]), max(inner_size, - im.shape[2]) + height, width = max(inner_size, + im.shape[1]), max(inner_size, im.shape[2]) padded_im = np.zeros((3, height, width)) startY = (height - im.shape[1]) / 2 startX = (width - im.shape[2]) / 2 @@ -65,8 +65,8 @@ def crop_img(im, inner_size, color=True, test=True): padded_im[:, startY:endY, startX:endX] = im else: im = im.astype('float32') - height, width = max(inner_size, im.shape[0]), max(inner_size, - im.shape[1]) + height, width = max(inner_size, + im.shape[0]), max(inner_size, im.shape[1]) padded_im = np.zeros((height, width)) startY = (height - im.shape[0]) / 2 startX = (width - im.shape[1]) / 2 @@ -122,13 +122,13 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True): if color: assert (mean_img_size * mean_img_size * 3 == mean.shape[0]) mean = mean.reshape(3, mean_img_size, mean_img_size) - mean = mean[:, border:border + crop_size, border:border + - crop_size].astype('float32') + mean = mean[:, border:border + crop_size, + border:border + crop_size].astype('float32') else: assert (mean_img_size * mean_img_size == mean.shape[0]) mean = mean.reshape(mean_img_size, mean_img_size) - mean = mean[border:border + crop_size, border:border + - crop_size].astype('float32') + mean = mean[border:border + crop_size, + border:border + crop_size].astype('float32') return mean @@ -170,9 +170,8 @@ def oversample(img, crop_dims): crops_ix = np.tile(crops_ix, (2, 1)) # Extract crops - crops = np.empty( - (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]), - dtype=np.float32) + crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]), + dtype=np.float32) ix = 0 for im in img: for crop in crops_ix: @@ -183,6 +182,7 @@ def oversample(img, crop_dims): class ImageTransformer: + def __init__(self, transpose=None, channel_swap=None, diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py index 9feda3d2dae..f0636e9a101 100644 --- a/python/paddle/utils/install_check.py +++ b/python/paddle/utils/install_check.py @@ -27,8 +27,9 @@ def _simple_network(): """ Define a simple network composed by a single linear layer. """ - input = paddle.static.data( - name="input", shape=[None, 2, 2], dtype="float32") + input = paddle.static.data(name="input", + shape=[None, 2, 2], + dtype="float32") weight = paddle.create_parameter( shape=[2, 3], dtype="float32", @@ -126,15 +127,17 @@ def _run_dygraph_single(use_cuda, use_xpu, use_npu): name="weight", initializer=paddle.nn.initializer.Constant(value=0.5)) bias_attr = paddle.ParamAttr( name="bias", initializer=paddle.nn.initializer.Constant(value=1.0)) - linear = paddle.nn.Linear( - 2, 4, weight_attr=weight_attr, bias_attr=bias_attr) + linear = paddle.nn.Linear(2, + 4, + weight_attr=weight_attr, + bias_attr=bias_attr) input_np = _prepare_data(1) input_tensor = paddle.to_tensor(input_np) linear_out = linear(input_tensor) out = paddle.tensor.sum(linear_out) out.backward() - opt = paddle.optimizer.Adam( - learning_rate=0.001, parameters=linear.parameters()) + opt = paddle.optimizer.Adam(learning_rate=0.001, + parameters=linear.parameters()) opt.step() @@ -195,8 +198,8 @@ def _run_static_parallel(use_cuda, use_xpu, use_npu, device_list): paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) compiled_prog = paddle.static.CompiledProgram( - train_prog).with_data_parallel( - loss_name=loss.name, places=device_list) + train_prog).with_data_parallel(loss_name=loss.name, + places=device_list) if use_cuda: place = paddle.CUDAPlace(0) @@ -269,8 +272,8 @@ def run_check(): try: _run_static_parallel(use_cuda, use_xpu, use_npu, device_list) - print("PaddlePaddle works well on {} {}s.".format(device_count, - device_str)) + print("PaddlePaddle works well on {} {}s.".format( + device_count, device_str)) print( "PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now." ) @@ -280,8 +283,8 @@ def run_check(): "\n 1. There is not enough GPUs visible on your system" "\n 2. Some GPUs are occupied by other process now" "\n 3. NVIDIA-NCCL2 is not installed correctly on your system. Please follow instruction on https://github.com/NVIDIA/nccl-tests " - "\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html". - format(device_count, device_str)) + "\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html" + .format(device_count, device_str)) logging.warning("\n Original Error is: {}".format(e)) print("PaddlePaddle is installed successfully ONLY for single {}! " diff --git a/python/paddle/utils/op_version.py b/python/paddle/utils/op_version.py index 6e81b5a2c17..575e5f40772 100644 --- a/python/paddle/utils/op_version.py +++ b/python/paddle/utils/op_version.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,6 +29,7 @@ def Singleton(cls): class OpUpdateInfoHelper(object): + def __init__(self, info): self._info = info @@ -48,6 +49,7 @@ class OpUpdateInfoHelper(object): @Singleton class OpLastCheckpointChecker(object): + def __init__(self): self.raw_version_map = core.get_op_version_map() self.checkpoints_map = {} @@ -63,8 +65,8 @@ class OpLastCheckpointChecker(object): updates = [] if op_name in self.checkpoints_map: for update in self.checkpoints_map[op_name]: - if (update.type() == type) or ( - type == core.OpUpdateType.kInvalid): + if (update.type() == type) or (type + == core.OpUpdateType.kInvalid): if OpUpdateInfoHelper(update.info()).verify_key_value(key): updates.append(update.info()) return updates diff --git a/python/paddle/utils/profiler.py b/python/paddle/utils/profiler.py index cc33342ec5a..288c17c9511 100644 --- a/python/paddle/utils/profiler.py +++ b/python/paddle/utils/profiler.py @@ -24,19 +24,14 @@ from ..fluid.profiler import profiler # noqa: F401 from ..fluid.profiler import stop_profiler from ..fluid.profiler import reset_profiler -__all__ = [ #noqa - 'Profiler', - 'get_profiler', - 'ProfilerOptions', - 'cuda_profiler', - 'start_profiler', - 'profiler', - 'stop_profiler', - 'reset_profiler' +__all__ = [ #noqa + 'Profiler', 'get_profiler', 'ProfilerOptions', 'cuda_profiler', + 'start_profiler', 'profiler', 'stop_profiler', 'reset_profiler' ] class ProfilerOptions(object): + def __init__(self, options=None): self.options = { 'state': 'All', @@ -74,6 +69,7 @@ _current_profiler = None class Profiler(object): + def __init__(self, enabled=True, options=None): if options is not None: self.profiler_options = options diff --git a/python/paddle/vision/datasets/__init__.py b/python/paddle/vision/datasets/__init__.py index a9673aae21e..10666b7c719 100644 --- a/python/paddle/vision/datasets/__init__.py +++ b/python/paddle/vision/datasets/__init__.py @@ -21,13 +21,7 @@ from .cifar import Cifar10 # noqa: F401 from .cifar import Cifar100 # noqa: F401 from .voc2012 import VOC2012 # noqa: F401 -__all__ = [ #noqa - 'DatasetFolder', - 'ImageFolder', - 'MNIST', - 'FashionMNIST', - 'Flowers', - 'Cifar10', - 'Cifar100', - 'VOC2012' +__all__ = [ #noqa + 'DatasetFolder', 'ImageFolder', 'MNIST', 'FashionMNIST', 'Flowers', + 'Cifar10', 'Cifar100', 'VOC2012' ] diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py index 3028d8697ae..f31aab9eccf 100644 --- a/python/paddle/vision/datasets/cifar.py +++ b/python/paddle/vision/datasets/cifar.py @@ -110,8 +110,8 @@ class Cifar10(Dataset): backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( - "Expected backend are one of ['pil', 'cv2'], but got {}" - .format(backend)) + "Expected backend are one of ['pil', 'cv2'], but got {}".format( + backend)) self.backend = backend self._init_url_md5_flag() @@ -119,8 +119,10 @@ class Cifar10(Dataset): self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" - self.data_file = _check_exists_and_download( - data_file, self.data_url, self.data_md5, 'cifar', download) + self.data_file = _check_exists_and_download(data_file, + self.data_url, + self.data_md5, 'cifar', + download) self.transform = transform @@ -146,8 +148,8 @@ class Cifar10(Dataset): batch = pickle.load(f.extractfile(name), encoding='bytes') data = batch[six.b('data')] - labels = batch.get( - six.b('labels'), batch.get(six.b('fine_labels'), None)) + labels = batch.get(six.b('labels'), + batch.get(six.b('fine_labels'), None)) assert labels is not None for sample, label in six.moves.zip(data, labels): self.data.append((sample, label)) diff --git a/python/paddle/vision/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py index 0b006ada4a0..ef59d24ed64 100644 --- a/python/paddle/vision/datasets/flowers.py +++ b/python/paddle/vision/datasets/flowers.py @@ -89,26 +89,29 @@ class Flowers(Dataset): backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( - "Expected backend are one of ['pil', 'cv2'], but got {}" - .format(backend)) + "Expected backend are one of ['pil', 'cv2'], but got {}".format( + backend)) self.backend = backend flag = MODE_FLAG_MAP[mode.lower()] if not data_file: assert download, "data_file is not set and downloading automatically is disabled" - data_file = _check_exists_and_download( - data_file, DATA_URL, DATA_MD5, 'flowers', download) + data_file = _check_exists_and_download(data_file, DATA_URL, + DATA_MD5, 'flowers', + download) if not label_file: assert download, "label_file is not set and downloading automatically is disabled" - label_file = _check_exists_and_download( - label_file, LABEL_URL, LABEL_MD5, 'flowers', download) + label_file = _check_exists_and_download(label_file, LABEL_URL, + LABEL_MD5, 'flowers', + download) if not setid_file: assert download, "setid_file is not set and downloading automatically is disabled" - setid_file = _check_exists_and_download( - setid_file, SETID_URL, SETID_MD5, 'flowers', download) + setid_file = _check_exists_and_download(setid_file, SETID_URL, + SETID_MD5, 'flowers', + download) self.transform = transform diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py index 220b3d8ecb4..c3f1b61f30e 100644 --- a/python/paddle/vision/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -139,9 +139,10 @@ class DatasetFolder(Dataset): samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file) if len(samples) == 0: - raise (RuntimeError( - "Found 0 directories in subfolders of: " + self.root + "\n" - "Supported extensions are: " + ",".join(extensions))) + raise (RuntimeError("Found 0 directories in subfolders of: " + + self.root + "\n" + "Supported extensions are: " + + ",".join(extensions))) self.loader = default_loader if loader is None else loader self.extensions = extensions @@ -297,9 +298,10 @@ class ImageFolder(Dataset): samples.append(f) if len(samples) == 0: - raise (RuntimeError( - "Found 0 files in subfolders of: " + self.root + "\n" - "Supported extensions are: " + ",".join(extensions))) + raise (RuntimeError("Found 0 files in subfolders of: " + self.root + + "\n" + "Supported extensions are: " + + ",".join(extensions))) self.loader = default_loader if loader is None else loader self.extensions = extensions diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py index 84760f9598b..703a4f64cf4 100644 --- a/python/paddle/vision/datasets/mnist.py +++ b/python/paddle/vision/datasets/mnist.py @@ -85,8 +85,8 @@ class MNIST(Dataset): backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( - "Expected backend are one of ['pil', 'cv2'], but got {}" - .format(backend)) + "Expected backend are one of ['pil', 'cv2'], but got {}".format( + backend)) self.backend = backend self.mode = mode.lower() @@ -134,8 +134,8 @@ class MNIST(Dataset): offset_lab = 0 # label file : 8B magic_byte_lab = '>II' - magic_lab, label_num = struct.unpack_from(magic_byte_lab, - lab_buf, offset_lab) + magic_lab, label_num = struct.unpack_from( + magic_byte_lab, lab_buf, offset_lab) offset_lab += struct.calcsize(magic_byte_lab) while True: @@ -149,8 +149,9 @@ class MNIST(Dataset): fmt_images = '>' + str(buffer_size * rows * cols) + 'B' images_temp = struct.unpack_from(fmt_images, img_buf, offset_img) - images = np.reshape(images_temp, (buffer_size, rows * - cols)).astype('float32') + images = np.reshape( + images_temp, + (buffer_size, rows * cols)).astype('float32') offset_img += struct.calcsize(fmt_images) for i in range(buffer_size): diff --git a/python/paddle/vision/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py index 5a82d7864cb..cd9ff70ca1e 100644 --- a/python/paddle/vision/datasets/voc2012.py +++ b/python/paddle/vision/datasets/voc2012.py @@ -99,8 +99,8 @@ class VOC2012(Dataset): backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( - "Expected backend are one of ['pil', 'cv2'], but got {}" - .format(backend)) + "Expected backend are one of ['pil', 'cv2'], but got {}".format( + backend)) self.backend = backend self.flag = MODE_FLAG_MAP[mode.lower()] @@ -108,8 +108,9 @@ class VOC2012(Dataset): self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" - self.data_file = _check_exists_and_download( - data_file, VOC_URL, VOC_MD5, CACHE_DIR, download) + self.data_file = _check_exists_and_download(data_file, VOC_URL, + VOC_MD5, CACHE_DIR, + download) self.transform = transform # read dataset into memory diff --git a/python/paddle/vision/image.py b/python/paddle/vision/image.py index 5c260b1d90a..755c8bcc9cc 100644 --- a/python/paddle/vision/image.py +++ b/python/paddle/vision/image.py @@ -82,8 +82,8 @@ def set_image_backend(backend): global _image_backend if backend not in ['pil', 'cv2', 'tensor']: raise ValueError( - "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}" - .format(backend)) + "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}". + format(backend)) _image_backend = backend @@ -152,8 +152,8 @@ def image_load(path, backend=None): backend = _image_backend if backend not in ['pil', 'cv2', 'tensor']: raise ValueError( - "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}" - .format(backend)) + "Expected backend are one of ['pil', 'cv2', 'tensor'], but got {}". + format(backend)) if backend == 'pil': return Image.open(path) diff --git a/python/paddle/vision/models/__init__.py b/python/paddle/vision/models/__init__.py index 85ff5f85dff..72bb6ee8e8d 100644 --- a/python/paddle/vision/models/__init__.py +++ b/python/paddle/vision/models/__init__.py @@ -64,56 +64,18 @@ from .shufflenetv2 import shufflenet_v2_x1_5 # noqa: F401 from .shufflenetv2 import shufflenet_v2_x2_0 # noqa: F401 from .shufflenetv2 import shufflenet_v2_swish # noqa: F401 -__all__ = [ #noqa - 'ResNet', - 'resnet18', - 'resnet34', - 'resnet50', - 'resnet101', - 'resnet152', - 'resnext50_32x4d', - 'resnext50_64x4d', - 'resnext101_32x4d', - 'resnext101_64x4d', - 'resnext152_32x4d', - 'resnext152_64x4d', - 'wide_resnet50_2', - 'wide_resnet101_2', - 'VGG', - 'vgg11', - 'vgg13', - 'vgg16', - 'vgg19', - 'MobileNetV1', - 'mobilenet_v1', - 'MobileNetV2', - 'mobilenet_v2', - 'MobileNetV3Small', - 'MobileNetV3Large', - 'mobilenet_v3_small', - 'mobilenet_v3_large', - 'LeNet', - 'DenseNet', - 'densenet121', - 'densenet161', - 'densenet169', - 'densenet201', - 'densenet264', - 'AlexNet', - 'alexnet', - 'InceptionV3', - 'inception_v3', - 'SqueezeNet', - 'squeezenet1_0', - 'squeezenet1_1', - 'GoogLeNet', - 'googlenet', - 'ShuffleNetV2', - 'shufflenet_v2_x0_25', - 'shufflenet_v2_x0_33', - 'shufflenet_v2_x0_5', - 'shufflenet_v2_x1_0', - 'shufflenet_v2_x1_5', - 'shufflenet_v2_x2_0', +__all__ = [ #noqa + 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', + 'resnext50_32x4d', 'resnext50_64x4d', 'resnext101_32x4d', + 'resnext101_64x4d', 'resnext152_32x4d', 'resnext152_64x4d', + 'wide_resnet50_2', 'wide_resnet101_2', 'VGG', 'vgg11', 'vgg13', 'vgg16', + 'vgg19', 'MobileNetV1', 'mobilenet_v1', 'MobileNetV2', 'mobilenet_v2', + 'MobileNetV3Small', 'MobileNetV3Large', 'mobilenet_v3_small', + 'mobilenet_v3_large', 'LeNet', 'DenseNet', 'densenet121', 'densenet161', + 'densenet169', 'densenet201', 'densenet264', 'AlexNet', 'alexnet', + 'InceptionV3', 'inception_v3', 'SqueezeNet', 'squeezenet1_0', + 'squeezenet1_1', 'GoogLeNet', 'googlenet', 'ShuffleNetV2', + 'shufflenet_v2_x0_25', 'shufflenet_v2_x0_33', 'shufflenet_v2_x0_5', + 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'shufflenet_v2_swish' ] diff --git a/python/paddle/vision/models/alexnet.py b/python/paddle/vision/models/alexnet.py index 1d36ef37b6c..411a8f01be2 100644 --- a/python/paddle/vision/models/alexnet.py +++ b/python/paddle/vision/models/alexnet.py @@ -30,13 +30,15 @@ from paddle.utils.download import get_weights_path_from_url model_urls = { "alexnet": ( "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams", - "7f0f9f737132e02732d75a1459d98a43", ) + "7f0f9f737132e02732d75a1459d98a43", + ) } __all__ = [] class ConvPoolLayer(nn.Layer): + def __init__(self, input_channels, output_channels, diff --git a/python/paddle/vision/models/densenet.py b/python/paddle/vision/models/densenet.py index 46c7b6dc52b..a764be95445 100644 --- a/python/paddle/vision/models/densenet.py +++ b/python/paddle/vision/models/densenet.py @@ -48,6 +48,7 @@ model_urls = { class BNACConvLayer(nn.Layer): + def __init__(self, num_channels, num_filters, @@ -59,15 +60,14 @@ class BNACConvLayer(nn.Layer): super(BNACConvLayer, self).__init__() self._batch_norm = BatchNorm(num_channels, act=act) - self._conv = Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=pad, - groups=groups, - weight_attr=ParamAttr(), - bias_attr=False) + self._conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=pad, + groups=groups, + weight_attr=ParamAttr(), + bias_attr=False) def forward(self, input): y = self._batch_norm(input) @@ -76,23 +76,22 @@ class BNACConvLayer(nn.Layer): class DenseLayer(nn.Layer): + def __init__(self, num_channels, growth_rate, bn_size, dropout): super(DenseLayer, self).__init__() self.dropout = dropout - self.bn_ac_func1 = BNACConvLayer( - num_channels=num_channels, - num_filters=bn_size * growth_rate, - filter_size=1, - pad=0, - stride=1) + self.bn_ac_func1 = BNACConvLayer(num_channels=num_channels, + num_filters=bn_size * growth_rate, + filter_size=1, + pad=0, + stride=1) - self.bn_ac_func2 = BNACConvLayer( - num_channels=bn_size * growth_rate, - num_filters=growth_rate, - filter_size=3, - pad=1, - stride=1) + self.bn_ac_func2 = BNACConvLayer(num_channels=bn_size * growth_rate, + num_filters=growth_rate, + filter_size=3, + pad=1, + stride=1) if dropout: self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer") @@ -107,6 +106,7 @@ class DenseLayer(nn.Layer): class DenseBlock(nn.Layer): + def __init__(self, num_channels, num_layers, @@ -123,11 +123,10 @@ class DenseBlock(nn.Layer): self.dense_layer_func.append( self.add_sublayer( "{}_{}".format(name, layer + 1), - DenseLayer( - num_channels=pre_channel, - growth_rate=growth_rate, - bn_size=bn_size, - dropout=dropout))) + DenseLayer(num_channels=pre_channel, + growth_rate=growth_rate, + bn_size=bn_size, + dropout=dropout))) pre_channel = pre_channel + growth_rate def forward(self, input): @@ -138,15 +137,15 @@ class DenseBlock(nn.Layer): class TransitionLayer(nn.Layer): + def __init__(self, num_channels, num_output_features): super(TransitionLayer, self).__init__() - self.conv_ac_func = BNACConvLayer( - num_channels=num_channels, - num_filters=num_output_features, - filter_size=1, - pad=0, - stride=1) + self.conv_ac_func = BNACConvLayer(num_channels=num_channels, + num_filters=num_output_features, + filter_size=1, + pad=0, + stride=1) self.pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0) @@ -157,6 +156,7 @@ class TransitionLayer(nn.Layer): class ConvBNLayer(nn.Layer): + def __init__(self, num_channels, num_filters, @@ -167,15 +167,14 @@ class ConvBNLayer(nn.Layer): act="relu"): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=pad, - groups=groups, - weight_attr=ParamAttr(), - bias_attr=False) + self._conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=pad, + groups=groups, + weight_attr=ParamAttr(), + bias_attr=False) self._batch_norm = BatchNorm(num_filters, act=act) def forward(self, input): @@ -232,13 +231,12 @@ class DenseNet(nn.Layer): } num_init_features, growth_rate, block_config = densenet_spec[layers] - self.conv1_func = ConvBNLayer( - num_channels=3, - num_filters=num_init_features, - filter_size=7, - stride=2, - pad=3, - act='relu') + self.conv1_func = ConvBNLayer(num_channels=3, + num_filters=num_init_features, + filter_size=7, + stride=2, + pad=3, + act='relu') self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) self.block_config = block_config self.dense_block_func_list = [] @@ -249,13 +247,12 @@ class DenseNet(nn.Layer): self.dense_block_func_list.append( self.add_sublayer( "db_conv_{}".format(i + 2), - DenseBlock( - num_channels=pre_num_channels, - num_layers=num_layers, - bn_size=bn_size, - growth_rate=growth_rate, - dropout=dropout, - name='conv' + str(i + 2)))) + DenseBlock(num_channels=pre_num_channels, + num_layers=num_layers, + bn_size=bn_size, + growth_rate=growth_rate, + dropout=dropout, + name='conv' + str(i + 2)))) num_features = num_features + num_layers * growth_rate pre_num_channels = num_features @@ -264,9 +261,8 @@ class DenseNet(nn.Layer): self.transition_func_list.append( self.add_sublayer( "tr_conv{}_blk".format(i + 2), - TransitionLayer( - num_channels=pre_num_channels, - num_output_features=num_features // 2))) + TransitionLayer(num_channels=pre_num_channels, + num_output_features=num_features // 2))) pre_num_channels = num_features // 2 num_features = num_features // 2 diff --git a/python/paddle/vision/models/googlenet.py b/python/paddle/vision/models/googlenet.py index 6afbc426038..b1d1d38e2ee 100644 --- a/python/paddle/vision/models/googlenet.py +++ b/python/paddle/vision/models/googlenet.py @@ -41,6 +41,7 @@ def xavier(channels, filter_size): class ConvLayer(nn.Layer): + def __init__(self, num_channels, num_filters, @@ -49,14 +50,13 @@ class ConvLayer(nn.Layer): groups=1): super(ConvLayer, self).__init__() - self._conv = Conv2D( - in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False) + self._conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) def forward(self, inputs): y = self._conv(inputs) @@ -64,6 +64,7 @@ class ConvLayer(nn.Layer): class Inception(nn.Layer): + def __init__(self, input_channels, output_channels, filter1, filter3R, filter3, filter5R, filter5, proj): super(Inception, self).__init__() @@ -151,8 +152,9 @@ class GoogLeNet(nn.Layer): if num_classes > 0: # out self._drop = Dropout(p=0.4, mode="downscale_in_infer") - self._fc_out = Linear( - 1024, num_classes, weight_attr=xavier(1024, 1)) + self._fc_out = Linear(1024, + num_classes, + weight_attr=xavier(1024, 1)) # out1 self._conv_o1 = ConvLayer(512, 128, 1) diff --git a/python/paddle/vision/models/inceptionv3.py b/python/paddle/vision/models/inceptionv3.py index 27650dbe09f..8ffb23e62ce 100644 --- a/python/paddle/vision/models/inceptionv3.py +++ b/python/paddle/vision/models/inceptionv3.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -37,42 +37,38 @@ model_urls = { class InceptionStem(nn.Layer): + def __init__(self): super().__init__() - self.conv_1a_3x3 = ConvNormActivation( - in_channels=3, - out_channels=32, - kernel_size=3, - stride=2, - padding=0, - activation_layer=nn.ReLU) - self.conv_2a_3x3 = ConvNormActivation( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=0, - activation_layer=nn.ReLU) - self.conv_2b_3x3 = ConvNormActivation( - in_channels=32, - out_channels=64, - kernel_size=3, - padding=1, - activation_layer=nn.ReLU) + self.conv_1a_3x3 = ConvNormActivation(in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + padding=0, + activation_layer=nn.ReLU) + self.conv_2a_3x3 = ConvNormActivation(in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=0, + activation_layer=nn.ReLU) + self.conv_2b_3x3 = ConvNormActivation(in_channels=32, + out_channels=64, + kernel_size=3, + padding=1, + activation_layer=nn.ReLU) self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0) - self.conv_3b_1x1 = ConvNormActivation( - in_channels=64, - out_channels=80, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.conv_4a_3x3 = ConvNormActivation( - in_channels=80, - out_channels=192, - kernel_size=3, - padding=0, - activation_layer=nn.ReLU) + self.conv_3b_1x1 = ConvNormActivation(in_channels=64, + out_channels=80, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.conv_4a_3x3 = ConvNormActivation(in_channels=80, + out_channels=192, + kernel_size=3, + padding=0, + activation_layer=nn.ReLU) def forward(self, x): x = self.conv_1a_3x3(x) @@ -86,55 +82,51 @@ class InceptionStem(nn.Layer): class InceptionA(nn.Layer): + def __init__(self, num_channels, pool_features): super().__init__() - self.branch1x1 = ConvNormActivation( - in_channels=num_channels, - out_channels=64, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - - self.branch5x5_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=48, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch5x5_2 = ConvNormActivation( - in_channels=48, - out_channels=64, - kernel_size=5, - padding=2, - activation_layer=nn.ReLU) - - self.branch3x3dbl_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=64, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch3x3dbl_2 = ConvNormActivation( - in_channels=64, - out_channels=96, - kernel_size=3, - padding=1, - activation_layer=nn.ReLU) - self.branch3x3dbl_3 = ConvNormActivation( - in_channels=96, - out_channels=96, - kernel_size=3, - padding=1, - activation_layer=nn.ReLU) - - self.branch_pool = AvgPool2D( - kernel_size=3, stride=1, padding=1, exclusive=False) - self.branch_pool_conv = ConvNormActivation( - in_channels=num_channels, - out_channels=pool_features, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) + self.branch1x1 = ConvNormActivation(in_channels=num_channels, + out_channels=64, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + + self.branch5x5_1 = ConvNormActivation(in_channels=num_channels, + out_channels=48, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch5x5_2 = ConvNormActivation(in_channels=48, + out_channels=64, + kernel_size=5, + padding=2, + activation_layer=nn.ReLU) + + self.branch3x3dbl_1 = ConvNormActivation(in_channels=num_channels, + out_channels=64, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch3x3dbl_2 = ConvNormActivation(in_channels=64, + out_channels=96, + kernel_size=3, + padding=1, + activation_layer=nn.ReLU) + self.branch3x3dbl_3 = ConvNormActivation(in_channels=96, + out_channels=96, + kernel_size=3, + padding=1, + activation_layer=nn.ReLU) + + self.branch_pool = AvgPool2D(kernel_size=3, + stride=1, + padding=1, + exclusive=False) + self.branch_pool_conv = ConvNormActivation(in_channels=num_channels, + out_channels=pool_features, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) def forward(self, x): branch1x1 = self.branch1x1(x) @@ -147,41 +139,38 @@ class InceptionA(nn.Layer): branch_pool = self.branch_pool(x) branch_pool = self.branch_pool_conv(branch_pool) - x = paddle.concat( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1) + x = paddle.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=1) return x class InceptionB(nn.Layer): + def __init__(self, num_channels): super().__init__() - self.branch3x3 = ConvNormActivation( - in_channels=num_channels, - out_channels=384, - kernel_size=3, - stride=2, - padding=0, - activation_layer=nn.ReLU) - - self.branch3x3dbl_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=64, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch3x3dbl_2 = ConvNormActivation( - in_channels=64, - out_channels=96, - kernel_size=3, - padding=1, - activation_layer=nn.ReLU) - self.branch3x3dbl_3 = ConvNormActivation( - in_channels=96, - out_channels=96, - kernel_size=3, - stride=2, - padding=0, - activation_layer=nn.ReLU) + self.branch3x3 = ConvNormActivation(in_channels=num_channels, + out_channels=384, + kernel_size=3, + stride=2, + padding=0, + activation_layer=nn.ReLU) + + self.branch3x3dbl_1 = ConvNormActivation(in_channels=num_channels, + out_channels=64, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch3x3dbl_2 = ConvNormActivation(in_channels=64, + out_channels=96, + kernel_size=3, + padding=1, + activation_layer=nn.ReLU) + self.branch3x3dbl_3 = ConvNormActivation(in_channels=96, + out_channels=96, + kernel_size=3, + stride=2, + padding=0, + activation_layer=nn.ReLU) self.branch_pool = MaxPool2D(kernel_size=3, stride=2) @@ -200,76 +189,69 @@ class InceptionB(nn.Layer): class InceptionC(nn.Layer): + def __init__(self, num_channels, channels_7x7): super().__init__() - self.branch1x1 = ConvNormActivation( - in_channels=num_channels, - out_channels=192, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - - self.branch7x7_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=channels_7x7, - kernel_size=1, - stride=1, - padding=0, - activation_layer=nn.ReLU) - self.branch7x7_2 = ConvNormActivation( - in_channels=channels_7x7, - out_channels=channels_7x7, - kernel_size=(1, 7), - stride=1, - padding=(0, 3), - activation_layer=nn.ReLU) - self.branch7x7_3 = ConvNormActivation( - in_channels=channels_7x7, - out_channels=192, - kernel_size=(7, 1), - stride=1, - padding=(3, 0), - activation_layer=nn.ReLU) - - self.branch7x7dbl_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=channels_7x7, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch7x7dbl_2 = ConvNormActivation( - in_channels=channels_7x7, - out_channels=channels_7x7, - kernel_size=(7, 1), - padding=(3, 0), - activation_layer=nn.ReLU) - self.branch7x7dbl_3 = ConvNormActivation( - in_channels=channels_7x7, - out_channels=channels_7x7, - kernel_size=(1, 7), - padding=(0, 3), - activation_layer=nn.ReLU) - self.branch7x7dbl_4 = ConvNormActivation( - in_channels=channels_7x7, - out_channels=channels_7x7, - kernel_size=(7, 1), - padding=(3, 0), - activation_layer=nn.ReLU) - self.branch7x7dbl_5 = ConvNormActivation( - in_channels=channels_7x7, - out_channels=192, - kernel_size=(1, 7), - padding=(0, 3), - activation_layer=nn.ReLU) - - self.branch_pool = AvgPool2D( - kernel_size=3, stride=1, padding=1, exclusive=False) - self.branch_pool_conv = ConvNormActivation( - in_channels=num_channels, - out_channels=192, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) + self.branch1x1 = ConvNormActivation(in_channels=num_channels, + out_channels=192, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + + self.branch7x7_1 = ConvNormActivation(in_channels=num_channels, + out_channels=channels_7x7, + kernel_size=1, + stride=1, + padding=0, + activation_layer=nn.ReLU) + self.branch7x7_2 = ConvNormActivation(in_channels=channels_7x7, + out_channels=channels_7x7, + kernel_size=(1, 7), + stride=1, + padding=(0, 3), + activation_layer=nn.ReLU) + self.branch7x7_3 = ConvNormActivation(in_channels=channels_7x7, + out_channels=192, + kernel_size=(7, 1), + stride=1, + padding=(3, 0), + activation_layer=nn.ReLU) + + self.branch7x7dbl_1 = ConvNormActivation(in_channels=num_channels, + out_channels=channels_7x7, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch7x7dbl_2 = ConvNormActivation(in_channels=channels_7x7, + out_channels=channels_7x7, + kernel_size=(7, 1), + padding=(3, 0), + activation_layer=nn.ReLU) + self.branch7x7dbl_3 = ConvNormActivation(in_channels=channels_7x7, + out_channels=channels_7x7, + kernel_size=(1, 7), + padding=(0, 3), + activation_layer=nn.ReLU) + self.branch7x7dbl_4 = ConvNormActivation(in_channels=channels_7x7, + out_channels=channels_7x7, + kernel_size=(7, 1), + padding=(3, 0), + activation_layer=nn.ReLU) + self.branch7x7dbl_5 = ConvNormActivation(in_channels=channels_7x7, + out_channels=192, + kernel_size=(1, 7), + padding=(0, 3), + activation_layer=nn.ReLU) + + self.branch_pool = AvgPool2D(kernel_size=3, + stride=1, + padding=1, + exclusive=False) + self.branch_pool_conv = ConvNormActivation(in_channels=num_channels, + out_channels=192, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) def forward(self, x): branch1x1 = self.branch1x1(x) @@ -287,54 +269,49 @@ class InceptionC(nn.Layer): branch_pool = self.branch_pool(x) branch_pool = self.branch_pool_conv(branch_pool) - x = paddle.concat( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1) + x = paddle.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=1) return x class InceptionD(nn.Layer): + def __init__(self, num_channels): super().__init__() - self.branch3x3_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=192, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch3x3_2 = ConvNormActivation( - in_channels=192, - out_channels=320, - kernel_size=3, - stride=2, - padding=0, - activation_layer=nn.ReLU) - - self.branch7x7x3_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=192, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch7x7x3_2 = ConvNormActivation( - in_channels=192, - out_channels=192, - kernel_size=(1, 7), - padding=(0, 3), - activation_layer=nn.ReLU) - self.branch7x7x3_3 = ConvNormActivation( - in_channels=192, - out_channels=192, - kernel_size=(7, 1), - padding=(3, 0), - activation_layer=nn.ReLU) - self.branch7x7x3_4 = ConvNormActivation( - in_channels=192, - out_channels=192, - kernel_size=3, - stride=2, - padding=0, - activation_layer=nn.ReLU) + self.branch3x3_1 = ConvNormActivation(in_channels=num_channels, + out_channels=192, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch3x3_2 = ConvNormActivation(in_channels=192, + out_channels=320, + kernel_size=3, + stride=2, + padding=0, + activation_layer=nn.ReLU) + + self.branch7x7x3_1 = ConvNormActivation(in_channels=num_channels, + out_channels=192, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch7x7x3_2 = ConvNormActivation(in_channels=192, + out_channels=192, + kernel_size=(1, 7), + padding=(0, 3), + activation_layer=nn.ReLU) + self.branch7x7x3_3 = ConvNormActivation(in_channels=192, + out_channels=192, + kernel_size=(7, 1), + padding=(3, 0), + activation_layer=nn.ReLU) + self.branch7x7x3_4 = ConvNormActivation(in_channels=192, + out_channels=192, + kernel_size=3, + stride=2, + padding=0, + activation_layer=nn.ReLU) self.branch_pool = MaxPool2D(kernel_size=3, stride=2) @@ -354,66 +331,60 @@ class InceptionD(nn.Layer): class InceptionE(nn.Layer): + def __init__(self, num_channels): super().__init__() - self.branch1x1 = ConvNormActivation( - in_channels=num_channels, - out_channels=320, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch3x3_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=384, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch3x3_2a = ConvNormActivation( - in_channels=384, - out_channels=384, - kernel_size=(1, 3), - padding=(0, 1), - activation_layer=nn.ReLU) - self.branch3x3_2b = ConvNormActivation( - in_channels=384, - out_channels=384, - kernel_size=(3, 1), - padding=(1, 0), - activation_layer=nn.ReLU) - - self.branch3x3dbl_1 = ConvNormActivation( - in_channels=num_channels, - out_channels=448, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) - self.branch3x3dbl_2 = ConvNormActivation( - in_channels=448, - out_channels=384, - kernel_size=3, - padding=1, - activation_layer=nn.ReLU) - self.branch3x3dbl_3a = ConvNormActivation( - in_channels=384, - out_channels=384, - kernel_size=(1, 3), - padding=(0, 1), - activation_layer=nn.ReLU) - self.branch3x3dbl_3b = ConvNormActivation( - in_channels=384, - out_channels=384, - kernel_size=(3, 1), - padding=(1, 0), - activation_layer=nn.ReLU) - - self.branch_pool = AvgPool2D( - kernel_size=3, stride=1, padding=1, exclusive=False) - self.branch_pool_conv = ConvNormActivation( - in_channels=num_channels, - out_channels=192, - kernel_size=1, - padding=0, - activation_layer=nn.ReLU) + self.branch1x1 = ConvNormActivation(in_channels=num_channels, + out_channels=320, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch3x3_1 = ConvNormActivation(in_channels=num_channels, + out_channels=384, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch3x3_2a = ConvNormActivation(in_channels=384, + out_channels=384, + kernel_size=(1, 3), + padding=(0, 1), + activation_layer=nn.ReLU) + self.branch3x3_2b = ConvNormActivation(in_channels=384, + out_channels=384, + kernel_size=(3, 1), + padding=(1, 0), + activation_layer=nn.ReLU) + + self.branch3x3dbl_1 = ConvNormActivation(in_channels=num_channels, + out_channels=448, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) + self.branch3x3dbl_2 = ConvNormActivation(in_channels=448, + out_channels=384, + kernel_size=3, + padding=1, + activation_layer=nn.ReLU) + self.branch3x3dbl_3a = ConvNormActivation(in_channels=384, + out_channels=384, + kernel_size=(1, 3), + padding=(0, 1), + activation_layer=nn.ReLU) + self.branch3x3dbl_3b = ConvNormActivation(in_channels=384, + out_channels=384, + kernel_size=(3, 1), + padding=(1, 0), + activation_layer=nn.ReLU) + + self.branch_pool = AvgPool2D(kernel_size=3, + stride=1, + padding=1, + exclusive=False) + self.branch_pool_conv = ConvNormActivation(in_channels=num_channels, + out_channels=192, + kernel_size=1, + padding=0, + activation_layer=nn.ReLU) def forward(self, x): branch1x1 = self.branch1x1(x) @@ -436,8 +407,8 @@ class InceptionE(nn.Layer): branch_pool = self.branch_pool(x) branch_pool = self.branch_pool_conv(branch_pool) - x = paddle.concat( - [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1) + x = paddle.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], + axis=1) return x diff --git a/python/paddle/vision/models/lenet.py b/python/paddle/vision/models/lenet.py index 46212f46f3a..a526bb719ef 100644 --- a/python/paddle/vision/models/lenet.py +++ b/python/paddle/vision/models/lenet.py @@ -37,20 +37,14 @@ class LeNet(nn.Layer): def __init__(self, num_classes=10): super(LeNet, self).__init__() self.num_classes = num_classes - self.features = nn.Sequential( - nn.Conv2D( - 1, 6, 3, stride=1, padding=1), - nn.ReLU(), - nn.MaxPool2D(2, 2), - nn.Conv2D( - 6, 16, 5, stride=1, padding=0), - nn.ReLU(), - nn.MaxPool2D(2, 2)) + self.features = nn.Sequential(nn.Conv2D(1, 6, 3, stride=1, padding=1), + nn.ReLU(), nn.MaxPool2D(2, 2), + nn.Conv2D(6, 16, 5, stride=1, padding=0), + nn.ReLU(), nn.MaxPool2D(2, 2)) if num_classes > 0: - self.fc = nn.Sequential( - nn.Linear(400, 120), - nn.Linear(120, 84), nn.Linear(84, num_classes)) + self.fc = nn.Sequential(nn.Linear(400, 120), nn.Linear(120, 84), + nn.Linear(84, num_classes)) def forward(self, inputs): x = self.features(inputs) diff --git a/python/paddle/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py index 6d8d96952fa..e8e4994a75b 100644 --- a/python/paddle/vision/models/mobilenetv1.py +++ b/python/paddle/vision/models/mobilenetv1.py @@ -28,24 +28,24 @@ model_urls = { class DepthwiseSeparable(nn.Layer): + def __init__(self, in_channels, out_channels1, out_channels2, num_groups, stride, scale): super(DepthwiseSeparable, self).__init__() - self._depthwise_conv = ConvNormActivation( - in_channels, - int(out_channels1 * scale), - kernel_size=3, - stride=stride, - padding=1, - groups=int(num_groups * scale)) - - self._pointwise_conv = ConvNormActivation( - int(out_channels1 * scale), - int(out_channels2 * scale), - kernel_size=1, - stride=1, - padding=0) + self._depthwise_conv = ConvNormActivation(in_channels, + int(out_channels1 * scale), + kernel_size=3, + stride=stride, + padding=1, + groups=int(num_groups * + scale)) + + self._pointwise_conv = ConvNormActivation(int(out_channels1 * scale), + int(out_channels2 * scale), + kernel_size=1, + stride=1, + padding=0) def forward(self, x): x = self._depthwise_conv(x) @@ -84,111 +84,101 @@ class MobileNetV1(nn.Layer): self.num_classes = num_classes self.with_pool = with_pool - self.conv1 = ConvNormActivation( - in_channels=3, - out_channels=int(32 * scale), - kernel_size=3, - stride=2, - padding=1) - - dws21 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(32 * scale), - out_channels1=32, - out_channels2=64, - num_groups=32, - stride=1, - scale=scale), - name="conv2_1") + self.conv1 = ConvNormActivation(in_channels=3, + out_channels=int(32 * scale), + kernel_size=3, + stride=2, + padding=1) + + dws21 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 32 * scale), + out_channels1=32, + out_channels2=64, + num_groups=32, + stride=1, + scale=scale), + name="conv2_1") self.dwsl.append(dws21) - dws22 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(64 * scale), - out_channels1=64, - out_channels2=128, - num_groups=64, - stride=2, - scale=scale), - name="conv2_2") + dws22 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 64 * scale), + out_channels1=64, + out_channels2=128, + num_groups=64, + stride=2, + scale=scale), + name="conv2_2") self.dwsl.append(dws22) - dws31 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(128 * scale), - out_channels1=128, - out_channels2=128, - num_groups=128, - stride=1, - scale=scale), - name="conv3_1") + dws31 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 128 * scale), + out_channels1=128, + out_channels2=128, + num_groups=128, + stride=1, + scale=scale), + name="conv3_1") self.dwsl.append(dws31) - dws32 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(128 * scale), - out_channels1=128, - out_channels2=256, - num_groups=128, - stride=2, - scale=scale), - name="conv3_2") + dws32 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 128 * scale), + out_channels1=128, + out_channels2=256, + num_groups=128, + stride=2, + scale=scale), + name="conv3_2") self.dwsl.append(dws32) - dws41 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(256 * scale), - out_channels1=256, - out_channels2=256, - num_groups=256, - stride=1, - scale=scale), - name="conv4_1") + dws41 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 256 * scale), + out_channels1=256, + out_channels2=256, + num_groups=256, + stride=1, + scale=scale), + name="conv4_1") self.dwsl.append(dws41) - dws42 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(256 * scale), - out_channels1=256, - out_channels2=512, - num_groups=256, - stride=2, - scale=scale), - name="conv4_2") + dws42 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 256 * scale), + out_channels1=256, + out_channels2=512, + num_groups=256, + stride=2, + scale=scale), + name="conv4_2") self.dwsl.append(dws42) for i in range(5): - tmp = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(512 * scale), - out_channels1=512, - out_channels2=512, - num_groups=512, - stride=1, - scale=scale), - name="conv5_" + str(i + 1)) - self.dwsl.append(tmp) - - dws56 = self.add_sublayer( - sublayer=DepthwiseSeparable( + tmp = self.add_sublayer(sublayer=DepthwiseSeparable( in_channels=int(512 * scale), out_channels1=512, - out_channels2=1024, + out_channels2=512, num_groups=512, - stride=2, + stride=1, scale=scale), - name="conv5_6") + name="conv5_" + str(i + 1)) + self.dwsl.append(tmp) + + dws56 = self.add_sublayer(sublayer=DepthwiseSeparable( + in_channels=int(512 * scale), + out_channels1=512, + out_channels2=1024, + num_groups=512, + stride=2, + scale=scale), + name="conv5_6") self.dwsl.append(dws56) - dws6 = self.add_sublayer( - sublayer=DepthwiseSeparable( - in_channels=int(1024 * scale), - out_channels1=1024, - out_channels2=1024, - num_groups=1024, - stride=1, - scale=scale), - name="conv6") + dws6 = self.add_sublayer(sublayer=DepthwiseSeparable(in_channels=int( + 1024 * scale), + out_channels1=1024, + out_channels2=1024, + num_groups=1024, + stride=1, + scale=scale), + name="conv6") self.dwsl.append(dws6) if with_pool: @@ -252,6 +242,8 @@ def mobilenet_v1(pretrained=False, scale=1.0, **kwargs): print(out.shape) """ - model = _mobilenet( - 'mobilenetv1_' + str(scale), pretrained, scale=scale, **kwargs) + model = _mobilenet('mobilenetv1_' + str(scale), + pretrained, + scale=scale, + **kwargs) return model diff --git a/python/paddle/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py index 9791462610d..f9111185de6 100644 --- a/python/paddle/vision/models/mobilenetv2.py +++ b/python/paddle/vision/models/mobilenetv2.py @@ -29,6 +29,7 @@ model_urls = { class InvertedResidual(nn.Layer): + def __init__(self, inp, oup, @@ -45,22 +46,19 @@ class InvertedResidual(nn.Layer): layers = [] if expand_ratio != 1: layers.append( - ConvNormActivation( - inp, - hidden_dim, - kernel_size=1, - norm_layer=norm_layer, - activation_layer=nn.ReLU6)) + ConvNormActivation(inp, + hidden_dim, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=nn.ReLU6)) layers.extend([ - ConvNormActivation( - hidden_dim, - hidden_dim, - stride=stride, - groups=hidden_dim, - norm_layer=norm_layer, - activation_layer=nn.ReLU6), - nn.Conv2D( - hidden_dim, oup, 1, 1, 0, bias_attr=False), + ConvNormActivation(hidden_dim, + hidden_dim, + stride=stride, + groups=hidden_dim, + norm_layer=norm_layer, + activation_layer=nn.ReLU6), + nn.Conv2D(hidden_dim, oup, 1, 1, 0, bias_attr=False), norm_layer(oup), ]) self.conv = nn.Sequential(*layers) @@ -120,12 +118,11 @@ class MobileNetV2(nn.Layer): self.last_channel = _make_divisible(last_channel * max(1.0, scale), round_nearest) features = [ - ConvNormActivation( - 3, - input_channel, - stride=2, - norm_layer=norm_layer, - activation_layer=nn.ReLU6) + ConvNormActivation(3, + input_channel, + stride=2, + norm_layer=norm_layer, + activation_layer=nn.ReLU6) ] for t, c, n, s in inverted_residual_setting: @@ -133,21 +130,19 @@ class MobileNetV2(nn.Layer): for i in range(n): stride = s if i == 0 else 1 features.append( - block( - input_channel, - output_channel, - stride, - expand_ratio=t, - norm_layer=norm_layer)) + block(input_channel, + output_channel, + stride, + expand_ratio=t, + norm_layer=norm_layer)) input_channel = output_channel features.append( - ConvNormActivation( - input_channel, - self.last_channel, - kernel_size=1, - norm_layer=norm_layer, - activation_layer=nn.ReLU6)) + ConvNormActivation(input_channel, + self.last_channel, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=nn.ReLU6)) self.features = nn.Sequential(*features) @@ -211,6 +206,8 @@ def mobilenet_v2(pretrained=False, scale=1.0, **kwargs): print(out.shape) """ - model = _mobilenet( - 'mobilenetv2_' + str(scale), pretrained, scale=scale, **kwargs) + model = _mobilenet('mobilenetv2_' + str(scale), + pretrained, + scale=scale, + **kwargs) return model diff --git a/python/paddle/vision/models/mobilenetv3.py b/python/paddle/vision/models/mobilenetv3.py index 70aa1b833d6..0dd97755b62 100644 --- a/python/paddle/vision/models/mobilenetv3.py +++ b/python/paddle/vision/models/mobilenetv3.py @@ -74,6 +74,7 @@ class SqueezeExcitation(nn.Layer): class InvertedResidualConfig: + def __init__(self, in_channels, kernel, @@ -85,8 +86,8 @@ class InvertedResidualConfig: scale=1.0): self.in_channels = self.adjust_channels(in_channels, scale=scale) self.kernel = kernel - self.expanded_channels = self.adjust_channels( - expanded_channels, scale=scale) + self.expanded_channels = self.adjust_channels(expanded_channels, + scale=scale) self.out_channels = self.adjust_channels(out_channels, scale=scale) self.use_se = use_se if activation is None: @@ -96,8 +97,9 @@ class InvertedResidualConfig: elif activation == "hardswish": self.activation_layer = nn.Hardswish else: - raise RuntimeError("The activation function is not supported: {}". - format(activation)) + raise RuntimeError( + "The activation function is not supported: {}".format( + activation)) self.stride = stride @staticmethod @@ -106,6 +108,7 @@ class InvertedResidualConfig: class InvertedResidual(nn.Layer): + def __init__(self, in_channels, expanded_channels, out_channels, filter_size, stride, use_se, activation_layer, norm_layer): super().__init__() @@ -134,19 +137,18 @@ class InvertedResidual(nn.Layer): activation_layer=activation_layer) if self.use_se: - self.mid_se = SqueezeExcitation( - expanded_channels, - _make_divisible(expanded_channels // 4), - scale_activation=nn.Hardsigmoid) - - self.linear_conv = ConvNormActivation( - in_channels=expanded_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0, - norm_layer=norm_layer, - activation_layer=None) + self.mid_se = SqueezeExcitation(expanded_channels, + _make_divisible(expanded_channels // + 4), + scale_activation=nn.Hardsigmoid) + + self.linear_conv = ConvNormActivation(in_channels=expanded_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + norm_layer=norm_layer, + activation_layer=None) def forward(self, x): identity = x @@ -192,26 +194,24 @@ class MobileNetV3(nn.Layer): self.lastconv_out_channels = self.lastconv_in_channels * 6 norm_layer = partial(nn.BatchNorm2D, epsilon=0.001, momentum=0.99) - self.conv = ConvNormActivation( - in_channels=3, - out_channels=self.firstconv_in_channels, - kernel_size=3, - stride=2, - padding=1, - groups=1, - activation_layer=nn.Hardswish, - norm_layer=norm_layer) + self.conv = ConvNormActivation(in_channels=3, + out_channels=self.firstconv_in_channels, + kernel_size=3, + stride=2, + padding=1, + groups=1, + activation_layer=nn.Hardswish, + norm_layer=norm_layer) self.blocks = nn.Sequential(*[ - InvertedResidual( - in_channels=cfg.in_channels, - expanded_channels=cfg.expanded_channels, - out_channels=cfg.out_channels, - filter_size=cfg.kernel, - stride=cfg.stride, - use_se=cfg.use_se, - activation_layer=cfg.activation_layer, - norm_layer=norm_layer) for cfg in self.config + InvertedResidual(in_channels=cfg.in_channels, + expanded_channels=cfg.expanded_channels, + out_channels=cfg.out_channels, + filter_size=cfg.kernel, + stride=cfg.stride, + use_se=cfg.use_se, + activation_layer=cfg.activation_layer, + norm_layer=norm_layer) for cfg in self.config ]) self.lastconv = ConvNormActivation( @@ -230,8 +230,7 @@ class MobileNetV3(nn.Layer): if num_classes > 0: self.classifier = nn.Sequential( nn.Linear(self.lastconv_out_channels, self.last_channel), - nn.Hardswish(), - nn.Dropout(p=0.2), + nn.Hardswish(), nn.Dropout(p=0.2), nn.Linear(self.last_channel, num_classes)) def forward(self, x): @@ -289,12 +288,11 @@ class MobileNetV3Small(MobileNetV3): InvertedResidualConfig(96, 5, 576, 96, True, "hardswish", 1, scale), ] last_channel = _make_divisible(1024 * scale, 8) - super().__init__( - config, - last_channel=last_channel, - scale=scale, - with_pool=with_pool, - num_classes=num_classes) + super().__init__(config, + last_channel=last_channel, + scale=scale, + with_pool=with_pool, + num_classes=num_classes) class MobileNetV3Large(MobileNetV3): @@ -350,12 +348,11 @@ class MobileNetV3Large(MobileNetV3): scale), ] last_channel = _make_divisible(1280 * scale, 8) - super().__init__( - config, - last_channel=last_channel, - scale=scale, - with_pool=with_pool, - num_classes=num_classes) + super().__init__(config, + last_channel=last_channel, + scale=scale, + with_pool=with_pool, + num_classes=num_classes) def _mobilenet_v3(arch, pretrained=False, scale=1.0, **kwargs): @@ -406,8 +403,10 @@ def mobilenet_v3_small(pretrained=False, scale=1.0, **kwargs): print(out.shape) """ - model = _mobilenet_v3( - "mobilenet_v3_small", scale=scale, pretrained=pretrained, **kwargs) + model = _mobilenet_v3("mobilenet_v3_small", + scale=scale, + pretrained=pretrained, + **kwargs) return model @@ -440,6 +439,8 @@ def mobilenet_v3_large(pretrained=False, scale=1.0, **kwargs): print(out.shape) """ - model = _mobilenet_v3( - "mobilenet_v3_large", scale=scale, pretrained=pretrained, **kwargs) + model = _mobilenet_v3("mobilenet_v3_large", + scale=scale, + pretrained=pretrained, + **kwargs) return model diff --git a/python/paddle/vision/models/resnet.py b/python/paddle/vision/models/resnet.py index 27536b6a9c6..ba58fe7f57d 100644 --- a/python/paddle/vision/models/resnet.py +++ b/python/paddle/vision/models/resnet.py @@ -39,24 +39,24 @@ model_urls = { "resnext50_64x4d": ('https://paddle-hapi.bj.bcebos.com/models/resnext50_64x4d.pdparams', '063d4b483e12b06388529450ad7576db'), - 'resnext101_32x4d': ( - 'https://paddle-hapi.bj.bcebos.com/models/resnext101_32x4d.pdparams', - '967b090039f9de2c8d06fe994fb9095f'), - 'resnext101_64x4d': ( - 'https://paddle-hapi.bj.bcebos.com/models/resnext101_64x4d.pdparams', - '98e04e7ca616a066699230d769d03008'), - 'resnext152_32x4d': ( - 'https://paddle-hapi.bj.bcebos.com/models/resnext152_32x4d.pdparams', - '18ff0beee21f2efc99c4b31786107121'), - 'resnext152_64x4d': ( - 'https://paddle-hapi.bj.bcebos.com/models/resnext152_64x4d.pdparams', - '77c4af00ca42c405fa7f841841959379'), - 'wide_resnet50_2': ( - 'https://paddle-hapi.bj.bcebos.com/models/wide_resnet50_2.pdparams', - '0282f804d73debdab289bd9fea3fa6dc'), - 'wide_resnet101_2': ( - 'https://paddle-hapi.bj.bcebos.com/models/wide_resnet101_2.pdparams', - 'd4360a2d23657f059216f5d5a1a9ac93'), + 'resnext101_32x4d': + ('https://paddle-hapi.bj.bcebos.com/models/resnext101_32x4d.pdparams', + '967b090039f9de2c8d06fe994fb9095f'), + 'resnext101_64x4d': + ('https://paddle-hapi.bj.bcebos.com/models/resnext101_64x4d.pdparams', + '98e04e7ca616a066699230d769d03008'), + 'resnext152_32x4d': + ('https://paddle-hapi.bj.bcebos.com/models/resnext152_32x4d.pdparams', + '18ff0beee21f2efc99c4b31786107121'), + 'resnext152_64x4d': + ('https://paddle-hapi.bj.bcebos.com/models/resnext152_64x4d.pdparams', + '77c4af00ca42c405fa7f841841959379'), + 'wide_resnet50_2': + ('https://paddle-hapi.bj.bcebos.com/models/wide_resnet50_2.pdparams', + '0282f804d73debdab289bd9fea3fa6dc'), + 'wide_resnet101_2': + ('https://paddle-hapi.bj.bcebos.com/models/wide_resnet101_2.pdparams', + 'd4360a2d23657f059216f5d5a1a9ac93'), } @@ -80,8 +80,12 @@ class BasicBlock(nn.Layer): raise NotImplementedError( "Dilation > 1 not supported in BasicBlock") - self.conv1 = nn.Conv2D( - inplanes, planes, 3, padding=1, stride=stride, bias_attr=False) + self.conv1 = nn.Conv2D(inplanes, + planes, + 3, + padding=1, + stride=stride, + bias_attr=False) self.bn1 = norm_layer(planes) self.relu = nn.ReLU() self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) @@ -129,19 +133,20 @@ class BottleneckBlock(nn.Layer): self.conv1 = nn.Conv2D(inplanes, width, 1, bias_attr=False) self.bn1 = norm_layer(width) - self.conv2 = nn.Conv2D( - width, - width, - 3, - padding=dilation, - stride=stride, - groups=groups, - dilation=dilation, - bias_attr=False) + self.conv2 = nn.Conv2D(width, + width, + 3, + padding=dilation, + stride=stride, + groups=groups, + dilation=dilation, + bias_attr=False) self.bn2 = norm_layer(width) - self.conv3 = nn.Conv2D( - width, planes * self.expansion, 1, bias_attr=False) + self.conv3 = nn.Conv2D(width, + planes * self.expansion, + 1, + bias_attr=False) self.bn3 = norm_layer(planes * self.expansion) self.relu = nn.ReLU() self.downsample = downsample @@ -235,13 +240,12 @@ class ResNet(nn.Layer): self.inplanes = 64 self.dilation = 1 - self.conv1 = nn.Conv2D( - 3, - self.inplanes, - kernel_size=7, - stride=2, - padding=3, - bias_attr=False) + self.conv1 = nn.Conv2D(3, + self.inplanes, + kernel_size=7, + stride=2, + padding=3, + bias_attr=False) self.bn1 = self._norm_layer(self.inplanes) self.relu = nn.ReLU() self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) @@ -264,13 +268,13 @@ class ResNet(nn.Layer): stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( - nn.Conv2D( - self.inplanes, - planes * block.expansion, - 1, - stride=stride, - bias_attr=False), - norm_layer(planes * block.expansion), ) + nn.Conv2D(self.inplanes, + planes * block.expansion, + 1, + stride=stride, + bias_attr=False), + norm_layer(planes * block.expansion), + ) layers = [] layers.append( @@ -279,12 +283,11 @@ class ResNet(nn.Layer): self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( - block( - self.inplanes, - planes, - groups=self.groups, - base_width=self.base_width, - norm_layer=norm_layer)) + block(self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + norm_layer=norm_layer)) return nn.Sequential(*layers) diff --git a/python/paddle/vision/models/shufflenetv2.py b/python/paddle/vision/models/shufflenetv2.py index 90e967ee22b..60304b95498 100644 --- a/python/paddle/vision/models/shufflenetv2.py +++ b/python/paddle/vision/models/shufflenetv2.py @@ -28,25 +28,32 @@ __all__ = [] model_urls = { "shufflenet_v2_x0_25": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_25.pdparams", - "1e509b4c140eeb096bb16e214796d03b", ), + "1e509b4c140eeb096bb16e214796d03b", + ), "shufflenet_v2_x0_33": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_33.pdparams", - "3d7b3ab0eaa5c0927ff1026d31b729bd", ), + "3d7b3ab0eaa5c0927ff1026d31b729bd", + ), "shufflenet_v2_x0_5": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_5.pdparams", - "5e5cee182a7793c4e4c73949b1a71bd4", ), + "5e5cee182a7793c4e4c73949b1a71bd4", + ), "shufflenet_v2_x1_0": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x1_0.pdparams", - "122d42478b9e81eb49f8a9ede327b1a4", ), + "122d42478b9e81eb49f8a9ede327b1a4", + ), "shufflenet_v2_x1_5": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x1_5.pdparams", - "faced5827380d73531d0ee027c67826d", ), + "faced5827380d73531d0ee027c67826d", + ), "shufflenet_v2_x2_0": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x2_0.pdparams", - "cd3dddcd8305e7bcd8ad14d1c69a5784", ), + "cd3dddcd8305e7bcd8ad14d1c69a5784", + ), "shufflenet_v2_swish": ( "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_swish.pdparams", - "adde0aa3b023e5b0c94a68be1c394b84", ), + "adde0aa3b023e5b0c94a68be1c394b84", + ), } @@ -79,28 +86,27 @@ def channel_shuffle(x, groups): class InvertedResidual(nn.Layer): + def __init__(self, in_channels, out_channels, stride, activation_layer=nn.ReLU): super(InvertedResidual, self).__init__() - self._conv_pw = ConvNormActivation( - in_channels=in_channels // 2, - out_channels=out_channels // 2, - kernel_size=1, - stride=1, - padding=0, - groups=1, - activation_layer=activation_layer) - self._conv_dw = ConvNormActivation( - in_channels=out_channels // 2, - out_channels=out_channels // 2, - kernel_size=3, - stride=stride, - padding=1, - groups=out_channels // 2, - activation_layer=None) + self._conv_pw = ConvNormActivation(in_channels=in_channels // 2, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + activation_layer=activation_layer) + self._conv_dw = ConvNormActivation(in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=stride, + padding=1, + groups=out_channels // 2, + activation_layer=None) self._conv_linear = ConvNormActivation( in_channels=out_channels // 2, out_channels=out_channels // 2, @@ -123,6 +129,7 @@ class InvertedResidual(nn.Layer): class InvertedResidualDS(nn.Layer): + def __init__(self, in_channels, out_channels, @@ -131,14 +138,13 @@ class InvertedResidualDS(nn.Layer): super(InvertedResidualDS, self).__init__() # branch1 - self._conv_dw_1 = ConvNormActivation( - in_channels=in_channels, - out_channels=in_channels, - kernel_size=3, - stride=stride, - padding=1, - groups=in_channels, - activation_layer=None) + self._conv_dw_1 = ConvNormActivation(in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + activation_layer=None) self._conv_linear_1 = ConvNormActivation( in_channels=in_channels, out_channels=out_channels // 2, @@ -148,22 +154,20 @@ class InvertedResidualDS(nn.Layer): groups=1, activation_layer=activation_layer) # branch2 - self._conv_pw_2 = ConvNormActivation( - in_channels=in_channels, - out_channels=out_channels // 2, - kernel_size=1, - stride=1, - padding=0, - groups=1, - activation_layer=activation_layer) - self._conv_dw_2 = ConvNormActivation( - in_channels=out_channels // 2, - out_channels=out_channels // 2, - kernel_size=3, - stride=stride, - padding=1, - groups=out_channels // 2, - activation_layer=None) + self._conv_pw_2 = ConvNormActivation(in_channels=in_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + groups=1, + activation_layer=activation_layer) + self._conv_dw_2 = ConvNormActivation(in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=stride, + padding=1, + groups=out_channels // 2, + activation_layer=None) self._conv_linear_2 = ConvNormActivation( in_channels=out_channels // 2, out_channels=out_channels // 2, @@ -232,13 +236,12 @@ class ShuffleNetV2(nn.Layer): raise NotImplementedError("This scale size:[" + str(scale) + "] is not implemented!") # 1. conv1 - self._conv1 = ConvNormActivation( - in_channels=3, - out_channels=stage_out_channels[1], - kernel_size=3, - stride=2, - padding=1, - activation_layer=activation_layer) + self._conv1 = ConvNormActivation(in_channels=3, + out_channels=stage_out_channels[1], + kernel_size=3, + stride=2, + padding=1, + activation_layer=activation_layer) self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) # 2. bottleneck sequences @@ -246,21 +249,21 @@ class ShuffleNetV2(nn.Layer): for stage_id, num_repeat in enumerate(stage_repeats): for i in range(num_repeat): if i == 0: - block = self.add_sublayer( - sublayer=InvertedResidualDS( - in_channels=stage_out_channels[stage_id + 1], - out_channels=stage_out_channels[stage_id + 2], - stride=2, - activation_layer=activation_layer), - name=str(stage_id + 2) + "_" + str(i + 1)) + block = self.add_sublayer(sublayer=InvertedResidualDS( + in_channels=stage_out_channels[stage_id + 1], + out_channels=stage_out_channels[stage_id + 2], + stride=2, + activation_layer=activation_layer), + name=str(stage_id + 2) + "_" + + str(i + 1)) else: - block = self.add_sublayer( - sublayer=InvertedResidual( - in_channels=stage_out_channels[stage_id + 2], - out_channels=stage_out_channels[stage_id + 2], - stride=1, - activation_layer=activation_layer), - name=str(stage_id + 2) + "_" + str(i + 1)) + block = self.add_sublayer(sublayer=InvertedResidual( + in_channels=stage_out_channels[stage_id + 2], + out_channels=stage_out_channels[stage_id + 2], + stride=1, + activation_layer=activation_layer), + name=str(stage_id + 2) + "_" + + str(i + 1)) self._block_list.append(block) # 3. last_conv self._last_conv = ConvNormActivation( @@ -335,8 +338,10 @@ def shufflenet_v2_x0_25(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_x0_25", scale=0.25, pretrained=pretrained, **kwargs) + return _shufflenet_v2("shufflenet_v2_x0_25", + scale=0.25, + pretrained=pretrained, + **kwargs) def shufflenet_v2_x0_33(pretrained=False, **kwargs): @@ -364,8 +369,10 @@ def shufflenet_v2_x0_33(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_x0_33", scale=0.33, pretrained=pretrained, **kwargs) + return _shufflenet_v2("shufflenet_v2_x0_33", + scale=0.33, + pretrained=pretrained, + **kwargs) def shufflenet_v2_x0_5(pretrained=False, **kwargs): @@ -393,8 +400,10 @@ def shufflenet_v2_x0_5(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_x0_5", scale=0.5, pretrained=pretrained, **kwargs) + return _shufflenet_v2("shufflenet_v2_x0_5", + scale=0.5, + pretrained=pretrained, + **kwargs) def shufflenet_v2_x1_0(pretrained=False, **kwargs): @@ -422,8 +431,10 @@ def shufflenet_v2_x1_0(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_x1_0", scale=1.0, pretrained=pretrained, **kwargs) + return _shufflenet_v2("shufflenet_v2_x1_0", + scale=1.0, + pretrained=pretrained, + **kwargs) def shufflenet_v2_x1_5(pretrained=False, **kwargs): @@ -451,8 +462,10 @@ def shufflenet_v2_x1_5(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_x1_5", scale=1.5, pretrained=pretrained, **kwargs) + return _shufflenet_v2("shufflenet_v2_x1_5", + scale=1.5, + pretrained=pretrained, + **kwargs) def shufflenet_v2_x2_0(pretrained=False, **kwargs): @@ -480,8 +493,10 @@ def shufflenet_v2_x2_0(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_x2_0", scale=2.0, pretrained=pretrained, **kwargs) + return _shufflenet_v2("shufflenet_v2_x2_0", + scale=2.0, + pretrained=pretrained, + **kwargs) def shufflenet_v2_swish(pretrained=False, **kwargs): @@ -509,9 +524,8 @@ def shufflenet_v2_swish(pretrained=False, **kwargs): print(out.shape) """ - return _shufflenet_v2( - "shufflenet_v2_swish", - scale=1.0, - act="swish", - pretrained=pretrained, - **kwargs) + return _shufflenet_v2("shufflenet_v2_swish", + scale=1.0, + act="swish", + pretrained=pretrained, + **kwargs) diff --git a/python/paddle/vision/models/squeezenet.py b/python/paddle/vision/models/squeezenet.py index 804be2622cf..b122a795286 100644 --- a/python/paddle/vision/models/squeezenet.py +++ b/python/paddle/vision/models/squeezenet.py @@ -38,15 +38,15 @@ model_urls = { class MakeFireConv(nn.Layer): + def __init__(self, input_channels, output_channels, filter_size, padding=0): super(MakeFireConv, self).__init__() - self._conv = Conv2D( - input_channels, - output_channels, - filter_size, - padding=padding, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) + self._conv = Conv2D(input_channels, + output_channels, + filter_size, + padding=padding, + weight_attr=ParamAttr(), + bias_attr=ParamAttr()) def forward(self, x): x = self._conv(x) @@ -55,13 +55,16 @@ class MakeFireConv(nn.Layer): class MakeFire(nn.Layer): + def __init__(self, input_channels, squeeze_channels, expand1x1_channels, expand3x3_channels): super(MakeFire, self).__init__() self._conv = MakeFireConv(input_channels, squeeze_channels, 1) self._conv_path1 = MakeFireConv(squeeze_channels, expand1x1_channels, 1) - self._conv_path2 = MakeFireConv( - squeeze_channels, expand3x3_channels, 3, padding=1) + self._conv_path2 = MakeFireConv(squeeze_channels, + expand3x3_channels, + 3, + padding=1) def forward(self, inputs): x = self._conv(inputs) @@ -110,13 +113,12 @@ class SqueezeNet(nn.Layer): supported_versions, version) if self.version == "1.0": - self._conv = Conv2D( - 3, - 96, - 7, - stride=2, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) + self._conv = Conv2D(3, + 96, + 7, + stride=2, + weight_attr=ParamAttr(), + bias_attr=ParamAttr()) self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) self._conv1 = MakeFire(96, 16, 64, 64) self._conv2 = MakeFire(128, 16, 64, 64) @@ -127,14 +129,13 @@ class SqueezeNet(nn.Layer): self._conv7 = MakeFire(384, 64, 256, 256) self._conv8 = MakeFire(512, 64, 256, 256) else: - self._conv = Conv2D( - 3, - 64, - 3, - stride=2, - padding=1, - weight_attr=ParamAttr(), - bias_attr=ParamAttr()) + self._conv = Conv2D(3, + 64, + 3, + stride=2, + padding=1, + weight_attr=ParamAttr(), + bias_attr=ParamAttr()) self._pool = MaxPool2D(kernel_size=3, stride=2, padding=0) self._conv1 = MakeFire(64, 16, 64, 64) self._conv2 = MakeFire(128, 16, 64, 64) @@ -146,8 +147,11 @@ class SqueezeNet(nn.Layer): self._conv8 = MakeFire(512, 64, 256, 256) self._drop = Dropout(p=0.5, mode="downscale_in_infer") - self._conv9 = Conv2D( - 512, num_classes, 1, weight_attr=ParamAttr(), bias_attr=ParamAttr()) + self._conv9 = Conv2D(512, + num_classes, + 1, + weight_attr=ParamAttr(), + bias_attr=ParamAttr()) self._avg_pool = AdaptiveAvgPool2D(1) def forward(self, inputs): diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py index 755f77aa297..dd88d064493 100644 --- a/python/paddle/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -68,7 +68,8 @@ class VGG(nn.Layer): nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(), - nn.Linear(4096, num_classes), ) + nn.Linear(4096, num_classes), + ) def forward(self, x): x = self.features(x) diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index d45c652885b..69fba204dd3 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -23,7 +23,7 @@ from ..fluid.framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygr from paddle.common_ops_import import * from paddle import _C_ops -__all__ = [ #noqa +__all__ = [ #noqa 'yolo_loss', 'yolo_box', 'deform_conv2d', @@ -238,15 +238,14 @@ def yolo_loss(x, "scale_x_y": scale_x_y, } - helper.append_op( - type='yolov3_loss', - inputs=inputs, - outputs={ - 'Loss': loss, - 'ObjectnessMask': objectness_mask, - 'GTMatchMask': gt_match_mask - }, - attrs=attrs) + helper.append_op(type='yolov3_loss', + inputs=inputs, + outputs={ + 'Loss': loss, + 'ObjectnessMask': objectness_mask, + 'GTMatchMask': gt_match_mask + }, + attrs=attrs) return loss @@ -379,9 +378,11 @@ def yolo_box(x, scale_x_y=1.) """ if in_dygraph_mode(): - boxes, scores = _C_ops.final_state_yolo_box( - x, img_size, anchors, class_num, conf_thresh, downsample_ratio, - clip_bbox, scale_x_y, iou_aware, iou_aware_factor) + boxes, scores = _C_ops.final_state_yolo_box(x, img_size, anchors, + class_num, conf_thresh, + downsample_ratio, clip_bbox, + scale_x_y, iou_aware, + iou_aware_factor) return boxes, scores if _non_static_mode(): @@ -413,17 +414,16 @@ def yolo_box(x, "iou_aware_factor": iou_aware_factor } - helper.append_op( - type='yolo_box', - inputs={ - "X": x, - "ImgSize": img_size, - }, - outputs={ - 'Boxes': boxes, - 'Scores': scores, - }, - attrs=attrs) + helper.append_op(type='yolo_box', + inputs={ + "X": x, + "ImgSize": img_size, + }, + outputs={ + 'Boxes': boxes, + 'Scores': scores, + }, + attrs=attrs) return boxes, scores @@ -559,9 +559,10 @@ def deform_conv2d(x, use_deform_conv2d_v1 = True if mask is None else False if in_dygraph_mode(): - pre_bias = _C_ops.final_state_deformable_conv( - x, offset, weight, mask, stride, padding, dilation, - deformable_groups, groups, 1) + pre_bias = _C_ops.final_state_deformable_conv(x, offset, weight, mask, + stride, padding, dilation, + deformable_groups, groups, + 1) if bias is not None: out = nn.elementwise_add(pre_bias, bias, axis=1) else: @@ -622,17 +623,20 @@ def deform_conv2d(x, 'deformable_groups': deformable_groups, 'im2col_step': 1, } - helper.append_op( - type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + helper.append_op(type=op_type, + inputs=inputs, + outputs=outputs, + attrs=attrs) if bias is not None: out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [bias]}, - outputs={'Out': [out]}, - attrs={'axis': 1}) + helper.append_op(type='elementwise_add', + inputs={ + 'X': [pre_bias], + 'Y': [bias] + }, + outputs={'Out': [out]}, + attrs={'axis': 1}) else: out = pre_bias return out @@ -813,21 +817,21 @@ class DeformConv2D(Layer): shape=filter_shape, attr=self._weight_attr, default_initializer=_get_default_param_initializer()) - self.bias = self.create_parameter( - attr=self._bias_attr, shape=[self._out_channels], is_bias=True) + self.bias = self.create_parameter(attr=self._bias_attr, + shape=[self._out_channels], + is_bias=True) def forward(self, x, offset, mask=None): - out = deform_conv2d( - x=x, - offset=offset, - weight=self.weight, - bias=self.bias, - stride=self._stride, - padding=self._padding, - dilation=self._dilation, - deformable_groups=self._deformable_groups, - groups=self._groups, - mask=mask) + out = deform_conv2d(x=x, + offset=offset, + weight=self.weight, + bias=self.bias, + stride=self._stride, + padding=self._padding, + dilation=self._dilation, + deformable_groups=self._deformable_groups, + groups=self._groups, + mask=mask) return out @@ -870,8 +874,10 @@ def read_file(filename, name=None): helper = LayerHelper("read_file", **locals()) out = helper.create_variable_for_type_inference('uint8') - helper.append_op( - type="read_file", inputs=inputs, attrs=attrs, outputs={"Out": out}) + helper.append_op(type="read_file", + inputs=inputs, + attrs=attrs, + outputs={"Out": out}) return out @@ -920,8 +926,10 @@ def decode_jpeg(x, mode='unchanged', name=None): helper = LayerHelper("decode_jpeg", **locals()) out = helper.create_variable_for_type_inference('uint8') - helper.append_op( - type="decode_jpeg", inputs=inputs, attrs=attrs, outputs={"Out": out}) + helper.append_op(type="decode_jpeg", + inputs=inputs, + attrs=attrs, + outputs={"Out": out}) return out @@ -983,17 +991,18 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): helper = LayerHelper('psroi_pool', **locals()) dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type='psroi_pool', - inputs={'X': x, - 'ROIs': boxes}, - outputs={'Out': out}, - attrs={ - 'output_channels': output_channels, - 'spatial_scale': spatial_scale, - 'pooled_height': pooled_height, - 'pooled_width': pooled_width - }) + helper.append_op(type='psroi_pool', + inputs={ + 'X': x, + 'ROIs': boxes + }, + outputs={'Out': out}, + attrs={ + 'output_channels': output_channels, + 'spatial_scale': spatial_scale, + 'pooled_height': pooled_height, + 'pooled_width': pooled_width + }) return out @@ -1090,9 +1099,10 @@ def roi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): pooled_width, spatial_scale) if _in_legacy_dygraph(): assert boxes_num is not None, "boxes_num should not be None in dygraph mode." - pool_out, argmaxes = _C_ops.roi_pool( - x, boxes, boxes_num, "pooled_height", pooled_height, "pooled_width", - pooled_width, "spatial_scale", spatial_scale) + pool_out, argmaxes = _C_ops.roi_pool(x, boxes, boxes_num, + "pooled_height", pooled_height, + "pooled_width", pooled_width, + "spatial_scale", spatial_scale) return pool_out else: @@ -1109,16 +1119,17 @@ def roi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): } if boxes_num is not None: inputs['RoisNum'] = boxes_num - helper.append_op( - type="roi_pool", - inputs=inputs, - outputs={"Out": pool_out, - "Argmax": argmaxes}, - attrs={ - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "spatial_scale": spatial_scale - }) + helper.append_op(type="roi_pool", + inputs=inputs, + outputs={ + "Out": pool_out, + "Argmax": argmaxes + }, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale + }) return pool_out @@ -1156,12 +1167,11 @@ class RoIPool(Layer): self._spatial_scale = spatial_scale def forward(self, x, boxes, boxes_num): - return roi_pool( - x=x, - boxes=boxes, - boxes_num=boxes_num, - output_size=self._output_size, - spatial_scale=self._spatial_scale) + return roi_pool(x=x, + boxes=boxes, + boxes_num=boxes_num, + output_size=self._output_size, + spatial_scale=self._spatial_scale) def extra_repr(self): main_str = 'output_size={_output_size}, spatial_scale={_spatial_scale}' @@ -1250,10 +1260,11 @@ def roi_align(x, sampling_ratio, aligned) if _in_legacy_dygraph(): assert boxes_num is not None, "boxes_num should not be None in dygraph mode." - align_out = _C_ops.roi_align( - x, boxes, boxes_num, "pooled_height", pooled_height, "pooled_width", - pooled_width, "spatial_scale", spatial_scale, "sampling_ratio", - sampling_ratio, "aligned", aligned) + align_out = _C_ops.roi_align(x, boxes, boxes_num, "pooled_height", + pooled_height, "pooled_width", + pooled_width, "spatial_scale", + spatial_scale, "sampling_ratio", + sampling_ratio, "aligned", aligned) return align_out else: @@ -1269,17 +1280,16 @@ def roi_align(x, } if boxes_num is not None: inputs['RoisNum'] = boxes_num - helper.append_op( - type="roi_align", - inputs=inputs, - outputs={"Out": align_out}, - attrs={ - "pooled_height": pooled_height, - "pooled_width": pooled_width, - "spatial_scale": spatial_scale, - "sampling_ratio": sampling_ratio, - "aligned": aligned, - }) + helper.append_op(type="roi_align", + inputs=inputs, + outputs={"Out": align_out}, + attrs={ + "pooled_height": pooled_height, + "pooled_width": pooled_width, + "spatial_scale": spatial_scale, + "sampling_ratio": sampling_ratio, + "aligned": aligned, + }) return align_out @@ -1321,13 +1331,12 @@ class RoIAlign(Layer): self._spatial_scale = spatial_scale def forward(self, x, boxes, boxes_num, aligned=True): - return roi_align( - x=x, - boxes=boxes, - boxes_num=boxes_num, - output_size=self._output_size, - spatial_scale=self._spatial_scale, - aligned=aligned) + return roi_align(x=x, + boxes=boxes, + boxes_num=boxes_num, + output_size=self._output_size, + spatial_scale=self._spatial_scale, + aligned=aligned) class ConvNormActivation(Sequential): @@ -1367,15 +1376,14 @@ class ConvNormActivation(Sequential): if bias is None: bias = norm_layer is None layers = [ - Conv2D( - in_channels, - out_channels, - kernel_size, - stride, - padding, - dilation=dilation, - groups=groups, - bias_attr=bias) + Conv2D(in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=dilation, + groups=groups, + bias_attr=bias) ] if norm_layer is not None: layers.append(norm_layer(out_channels)) @@ -1466,11 +1474,10 @@ def nms(boxes, helper = LayerHelper('nms', **locals()) out = helper.create_variable_for_type_inference('int64') - helper.append_op( - type='nms', - inputs={'Boxes': boxes}, - outputs={'KeepBoxesIdxs': out}, - attrs={'iou_threshold': iou_threshold}) + helper.append_op(type='nms', + inputs={'Boxes': boxes}, + outputs={'KeepBoxesIdxs': out}, + attrs={'iou_threshold': iou_threshold}) return out if scores is None: @@ -1500,8 +1507,8 @@ def nms(boxes, continue cur_category_boxes = boxes[cur_category_boxes_idxs] cur_category_scores = scores[cur_category_boxes_idxs] - cur_category_sorted_indices = paddle.argsort( - cur_category_scores, descending=True) + cur_category_sorted_indices = paddle.argsort(cur_category_scores, + descending=True) cur_category_sorted_boxes = cur_category_boxes[ cur_category_sorted_indices] @@ -1519,8 +1526,8 @@ def nms(boxes, keep_boxes_idxs = paddle.where(mask)[0] shape = keep_boxes_idxs.shape[0] keep_boxes_idxs = paddle.reshape(keep_boxes_idxs, [shape]) - sorted_sub_indices = paddle.argsort( - scores[keep_boxes_idxs], descending=True) + sorted_sub_indices = paddle.argsort(scores[keep_boxes_idxs], + descending=True) if top_k is None: return keep_boxes_idxs[sorted_sub_indices] diff --git a/python/paddle/vision/transforms/__init__.py b/python/paddle/vision/transforms/__init__.py index 5992a4f9774..d615598bf2b 100644 --- a/python/paddle/vision/transforms/__init__.py +++ b/python/paddle/vision/transforms/__init__.py @@ -51,7 +51,7 @@ from .functional import adjust_hue # noqa: F401 from .functional import normalize # noqa: F401 from .functional import erase # noqa: F401 -__all__ = [ #noqa +__all__ = [ #noqa 'BaseTransform', 'Compose', 'Resize', diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index 7927e9faee3..ecc160b0c0e 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -72,11 +72,11 @@ def to_tensor(pic, data_format='CHW'): print(tensor.shape) """ - if not (_is_pil_image(pic) or _is_numpy_image(pic) or - _is_tensor_image(pic)): + if not (_is_pil_image(pic) or _is_numpy_image(pic) + or _is_tensor_image(pic)): raise TypeError( - 'pic should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(pic))) + 'pic should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(pic))) if _is_pil_image(pic): return F_pil.to_tensor(pic, data_format) @@ -130,11 +130,11 @@ def resize(img, size, interpolation='bilinear'): print(converted_img.size) # (150, 200) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.resize(img, size, interpolation) @@ -194,11 +194,11 @@ def pad(img, padding, fill=0, padding_mode='constant'): padded_img = F.pad(fake_img, padding=(2, 1)) print(padded_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.pad(img, padding, fill, padding_mode) @@ -237,11 +237,11 @@ def crop(img, top, left, height, width): print(cropped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.crop(img, top, left, height, width) @@ -276,11 +276,11 @@ def center_crop(img, output_size): cropped_img = F.center_crop(fake_img, (150, 100)) print(cropped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.center_crop(img, output_size) @@ -314,11 +314,11 @@ def hflip(img): print(flpped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.hflip(img) @@ -352,11 +352,11 @@ def vflip(img): print(flpped_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.vflip(img) @@ -397,11 +397,11 @@ def adjust_brightness(img, brightness_factor): """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.adjust_brightness(img, brightness_factor) @@ -437,11 +437,11 @@ def adjust_contrast(img, contrast_factor): converted_img = F.adjust_contrast(fake_img, 0.4) print(converted_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.adjust_contrast(img, contrast_factor) @@ -478,11 +478,11 @@ def adjust_saturation(img, saturation_factor): print(converted_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.adjust_saturation(img, saturation_factor) @@ -528,11 +528,11 @@ def adjust_hue(img, hue_factor): print(converted_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.adjust_hue(img, hue_factor) @@ -549,7 +549,7 @@ def _get_affine_matrix(center, angle, translate, scale, shear): sx = math.radians(shear[0]) sy = math.radians(shear[1]) - # Rotate and Shear without scaling + # Rotate and Shear without scaling a = math.cos(rot - sy) / math.cos(sy) b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) c = math.sin(rot - sy) / math.cos(sy) @@ -621,11 +621,11 @@ def affine(img, print(affined_img.shape) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if not isinstance(angle, (int, float)): raise TypeError("Argument angle should be int or float") @@ -753,11 +753,11 @@ def rotate(img, print(rotated_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if isinstance(center, list): center = tuple(center) @@ -844,11 +844,11 @@ def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0): print(perspectived_img.shape) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): coeffs = _get_perspective_coeffs(startpoints, endpoints) @@ -888,11 +888,11 @@ def to_grayscale(img, num_output_channels=1): print(gray_img.size) """ - if not (_is_pil_image(img) or _is_numpy_image(img) or - _is_tensor_image(img)): + if not (_is_pil_image(img) or _is_numpy_image(img) + or _is_tensor_image(img)): raise TypeError( - 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}'. - format(type(img))) + 'img should be PIL Image or Tensor Image or ndarray with dim=[2 or 3]. Got {}' + .format(type(img))) if _is_pil_image(img): return F_pil.to_grayscale(img, num_output_channels) diff --git a/python/paddle/vision/transforms/functional_cv2.py b/python/paddle/vision/transforms/functional_cv2.py index 1b2485541c4..df31add6f77 100644 --- a/python/paddle/vision/transforms/functional_cv2.py +++ b/python/paddle/vision/transforms/functional_cv2.py @@ -52,8 +52,8 @@ def to_tensor(pic, data_format='CHW'): """ if data_format not in ['CHW', 'HWC']: - raise ValueError('data_format should be CHW or HWC. Got {}'.format( - data_format)) + raise ValueError( + 'data_format should be CHW or HWC. Got {}'.format(data_format)) if pic.ndim == 2: pic = pic[:, :, None] @@ -121,10 +121,9 @@ def resize(img, size, interpolation='bilinear'): dsize=(ow, oh), interpolation=_cv2_interp_from_str[interpolation]) else: - output = cv2.resize( - img, - dsize=(size[1], size[0]), - interpolation=_cv2_interp_from_str[interpolation]) + output = cv2.resize(img, + dsize=(size[1], size[0]), + interpolation=_cv2_interp_from_str[interpolation]) if len(img.shape) == 3 and img.shape[2] == 1: return output[:, :, np.newaxis] else: @@ -202,23 +201,21 @@ def pad(img, padding, fill=0, padding_mode='constant'): pad_bottom = padding[3] if len(img.shape) == 3 and img.shape[2] == 1: - return cv2.copyMakeBorder( - img, - top=pad_top, - bottom=pad_bottom, - left=pad_left, - right=pad_right, - borderType=_cv2_pad_from_str[padding_mode], - value=fill)[:, :, np.newaxis] + return cv2.copyMakeBorder(img, + top=pad_top, + bottom=pad_bottom, + left=pad_left, + right=pad_right, + borderType=_cv2_pad_from_str[padding_mode], + value=fill)[:, :, np.newaxis] else: - return cv2.copyMakeBorder( - img, - top=pad_top, - bottom=pad_bottom, - left=pad_left, - right=pad_right, - borderType=_cv2_pad_from_str[padding_mode], - value=fill) + return cv2.copyMakeBorder(img, + top=pad_top, + bottom=pad_bottom, + left=pad_left, + right=pad_right, + borderType=_cv2_pad_from_str[padding_mode], + value=fill) def crop(img, top, left, height, width): @@ -361,8 +358,8 @@ def adjust_saturation(img, saturation_factor): dtype = img.dtype img = img.astype(np.float32) - alpha = np.random.uniform( - max(0, 1 - saturation_factor), 1 + saturation_factor) + alpha = np.random.uniform(max(0, 1 - saturation_factor), + 1 + saturation_factor) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = gray_img[..., np.newaxis] img = img * alpha + gray_img * (1 - alpha) @@ -394,8 +391,8 @@ def adjust_hue(img, hue_factor): cv2 = try_import('cv2') if not (-0.5 <= hue_factor <= 0.5): - raise ValueError('hue_factor:{} is not in [-0.5, 0.5].'.format( - hue_factor)) + raise ValueError( + 'hue_factor:{} is not in [-0.5, 0.5].'.format(hue_factor)) dtype = img.dtype img = img.astype(np.uint8) @@ -476,19 +473,17 @@ def affine(img, M[1, 2] = ty if len(img.shape) == 3 and img.shape[2] == 1: - return cv2.warpAffine( - img, - M, - dsize=(w, h), - flags=_cv2_interp_from_str[interpolation], - borderValue=fill)[:, :, np.newaxis] + return cv2.warpAffine(img, + M, + dsize=(w, h), + flags=_cv2_interp_from_str[interpolation], + borderValue=fill)[:, :, np.newaxis] else: - return cv2.warpAffine( - img, - M, - dsize=(w, h), - flags=_cv2_interp_from_str[interpolation], - borderValue=fill) + return cv2.warpAffine(img, + M, + dsize=(w, h), + flags=_cv2_interp_from_str[interpolation], + borderValue=fill) def rotate(img, @@ -576,17 +571,15 @@ def rotate(img, w, h = int(nw), int(nh) if len(img.shape) == 3 and img.shape[2] == 1: - return cv2.warpAffine( - img, - M, (w, h), - flags=_cv2_interp_from_str[interpolation], - borderValue=fill)[:, :, np.newaxis] + return cv2.warpAffine(img, + M, (w, h), + flags=_cv2_interp_from_str[interpolation], + borderValue=fill)[:, :, np.newaxis] else: - return cv2.warpAffine( - img, - M, (w, h), - flags=_cv2_interp_from_str[interpolation], - borderValue=fill) + return cv2.warpAffine(img, + M, (w, h), + flags=_cv2_interp_from_str[interpolation], + borderValue=fill) def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0): @@ -624,19 +617,17 @@ def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0): matrix = cv2.getPerspectiveTransform(startpoints, endpoints) if len(img.shape) == 3 and img.shape[2] == 1: - return cv2.warpPerspective( - img, - matrix, - dsize=(w, h), - flags=_cv2_interp_from_str[interpolation], - borderValue=fill)[:, :, np.newaxis] + return cv2.warpPerspective(img, + matrix, + dsize=(w, h), + flags=_cv2_interp_from_str[interpolation], + borderValue=fill)[:, :, np.newaxis] else: - return cv2.warpPerspective( - img, - matrix, - dsize=(w, h), - flags=_cv2_interp_from_str[interpolation], - borderValue=fill) + return cv2.warpPerspective(img, + matrix, + dsize=(w, h), + flags=_cv2_interp_from_str[interpolation], + borderValue=fill) def to_grayscale(img, num_output_channels=1): diff --git a/python/paddle/vision/transforms/functional_pil.py b/python/paddle/vision/transforms/functional_pil.py index 4b86e14039e..50ed01f53e2 100644 --- a/python/paddle/vision/transforms/functional_pil.py +++ b/python/paddle/vision/transforms/functional_pil.py @@ -71,8 +71,8 @@ def to_tensor(pic, data_format='CHW'): """ if data_format not in ['CHW', 'HWC']: - raise ValueError('data_format should be CHW or HWC. Got {}'.format( - data_format)) + raise ValueError( + 'data_format should be CHW or HWC. Got {}'.format(data_format)) # PIL Image if pic.mode == 'I': @@ -231,8 +231,9 @@ def pad(img, padding, fill=0, padding_mode='constant'): img = np.asarray(img) # RGB image if len(img.shape) == 3: - img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), - (0, 0)), padding_mode) + img = np.pad(img, + ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), + padding_mode) # Grayscale image if len(img.shape) == 2: img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), @@ -391,8 +392,8 @@ def adjust_hue(img, hue_factor): """ if not (-0.5 <= hue_factor <= 0.5): - raise ValueError('hue_factor:{} is not in [-0.5, 0.5].'.format( - hue_factor)) + raise ValueError( + 'hue_factor:{} is not in [-0.5, 0.5].'.format(hue_factor)) input_mode = img.mode if input_mode in {'L', '1', 'I', 'F'}: @@ -471,12 +472,11 @@ def rotate(img, if isinstance(fill, int): fill = tuple([fill] * 3) - return img.rotate( - angle, - _pil_interp_from_str[interpolation], - expand, - center, - fillcolor=fill) + return img.rotate(angle, + _pil_interp_from_str[interpolation], + expand, + center, + fillcolor=fill) def perspective(img, coeffs, interpolation="nearest", fill=0): diff --git a/python/paddle/vision/transforms/functional_tensor.py b/python/paddle/vision/transforms/functional_tensor.py index 27f83029bab..4cf8253ec8b 100644 --- a/python/paddle/vision/transforms/functional_tensor.py +++ b/python/paddle/vision/transforms/functional_tensor.py @@ -128,25 +128,22 @@ def _hsv_to_rgb(img): q = paddle.clip(v * (1.0 - s * f), 0.0, 1.0) t = paddle.clip(v * (1.0 - s * (1.0 - f)), 0.0, 1.0) - mask = paddle.equal( - i.unsqueeze(axis=-3), - paddle.arange( - 6, dtype=i.dtype).reshape((-1, 1, 1))).astype(img.dtype) - matrix = paddle.stack( - [ - paddle.stack( - [v, q, p, p, t, v], axis=-3), paddle.stack( - [t, v, v, q, p, p], axis=-3), paddle.stack( - [p, p, t, v, v, q], axis=-3) - ], - axis=-4) + mask = paddle.equal(i.unsqueeze(axis=-3), + paddle.arange(6, dtype=i.dtype).reshape( + (-1, 1, 1))).astype(img.dtype) + matrix = paddle.stack([ + paddle.stack([v, q, p, p, t, v], axis=-3), + paddle.stack([t, v, v, q, p, p], axis=-3), + paddle.stack([p, p, t, v, v, q], axis=-3) + ], + axis=-4) return paddle.einsum("...ijk, ...xijk -> ...xjk", mask, matrix) def _blend_images(img1, img2, ratio): max_value = 1.0 if paddle.is_floating_point(img1) else 255.0 - return paddle.lerp(img2, img1, float(ratio)).clip( - 0, max_value).astype(img1.dtype) + return paddle.lerp(img2, img1, + float(ratio)).clip(0, max_value).astype(img1.dtype) def normalize(img, mean, std, data_format='CHW'): @@ -194,8 +191,8 @@ def to_grayscale(img, num_output_channels=1, data_format='CHW'): if num_output_channels not in (1, 3): raise ValueError('num_output_channels should be either 1 or 3') - rgb_weights = paddle.to_tensor( - [0.2989, 0.5870, 0.1140], place=img.place).astype(img.dtype) + rgb_weights = paddle.to_tensor([0.2989, 0.5870, 0.1140], + place=img.place).astype(img.dtype) if _is_channel_first(data_format): rgb_weights = rgb_weights.reshape((-1, 1, 1)) @@ -231,12 +228,15 @@ def _grid_transform(img, grid, mode, fill): shape=[img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]]) if fill is not None: - dummy = paddle.ones( - (img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), + dtype=img.dtype) img = paddle.concat((img, dummy), axis=1) - img = F.grid_sample( - img, grid, mode=mode, padding_mode="zeros", align_corners=False) + img = F.grid_sample(img, + grid, + mode=mode, + padding_mode="zeros", + align_corners=False) # Fill with required color if fill is not None: @@ -287,8 +287,11 @@ def affine(img, matrix, interpolation="nearest", fill=None, data_format='CHW'): matrix = matrix.reshape((1, 2, 3)) shape = img.shape - grid = _affine_grid( - matrix, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + grid = _affine_grid(matrix, + w=shape[-1], + h=shape[-2], + ow=shape[-1], + oh=shape[-2]) if isinstance(fill, int): fill = tuple([fill] * 3) @@ -377,8 +380,8 @@ def rotate(img, [0.5 * w, 0.5 * h, 1.0], [0.5 * w, -0.5 * h, 1.0]], place=matrix.place).astype(matrix.dtype) - _pos = corners.reshape( - (1, -1, 3)).bmm(matrix.transpose((0, 2, 1))).reshape((1, -1, 2)) + _pos = corners.reshape((1, -1, 3)).bmm(matrix.transpose( + (0, 2, 1))).reshape((1, -1, 2)) _min = _pos.min(axis=-2).floor() _max = _pos.max(axis=-2).ceil() @@ -574,13 +577,12 @@ def center_crop(img, output_size, data_format='CHW'): crop_height, crop_width = output_size crop_top = int(round((image_height - crop_height) / 2.)) crop_left = int(round((image_width - crop_width) / 2.)) - return crop( - img, - crop_top, - crop_left, - crop_height, - crop_width, - data_format=data_format) + return crop(img, + crop_top, + crop_left, + crop_height, + crop_width, + data_format=data_format) def pad(img, padding, fill=0, padding_mode='constant', data_format='CHW'): @@ -705,11 +707,10 @@ def resize(img, size, interpolation='bilinear', data_format='CHW'): oh, ow = size img = img.unsqueeze(0) - img = F.interpolate( - img, - size=(oh, ow), - mode=interpolation.lower(), - data_format='N' + data_format.upper()) + img = F.interpolate(img, + size=(oh, ow), + mode=interpolation.lower(), + data_format='N' + data_format.upper()) return img.squeeze(0) @@ -755,11 +756,13 @@ def adjust_contrast(img, contrast_factor): channels = _get_image_num_channels(img, 'CHW') dtype = img.dtype if paddle.is_floating_point(img) else paddle.float32 if channels == 1: - extreme_target = paddle.mean( - img.astype(dtype), axis=(-3, -2, -1), keepdim=True) + extreme_target = paddle.mean(img.astype(dtype), + axis=(-3, -2, -1), + keepdim=True) elif channels == 3: - extreme_target = paddle.mean( - to_grayscale(img).astype(dtype), axis=(-3, -2, -1), keepdim=True) + extreme_target = paddle.mean(to_grayscale(img).astype(dtype), + axis=(-3, -2, -1), + keepdim=True) else: raise ValueError("channels of input should be either 1 or 3.") diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 31f56e89055..79c0720f607 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -51,8 +51,8 @@ def _get_image_size(img): return img.shape[2:][::-1] # nchw -> wh else: raise ValueError( - "The dim for input Tensor should be 3-D or 4-D, but received {}". - format(len(img.shape))) + "The dim for input Tensor should be 3-D or 4-D, but received {}" + .format(len(img.shape))) else: raise TypeError("Unexpected type {}".format(type(img))) @@ -72,8 +72,8 @@ def _check_input(value, value[0] = max(value[0], 0) elif isinstance(value, (tuple, list)) and len(value) == 2: if not bound[0] <= value[0] <= value[1] <= bound[1]: - raise ValueError("{} values should be between {}".format(name, - bound)) + raise ValueError("{} values should be between {}".format( + name, bound)) else: raise TypeError( "{} should be a single number or a list/tuple with lenght 2.". @@ -418,8 +418,8 @@ class Resize(BaseTransform): def __init__(self, size, interpolation='bilinear', keys=None): super(Resize, self).__init__(keys) - assert isinstance(size, int) or (isinstance(size, Iterable) and - len(size) == 2) + assert isinstance(size, int) or (isinstance(size, Iterable) + and len(size) == 2) self.size = size self.interpolation = interpolation @@ -938,8 +938,11 @@ class HueTransform(BaseTransform): def __init__(self, value, keys=None): super(HueTransform, self).__init__(keys) - self.value = _check_input( - value, 'hue', center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + self.value = _check_input(value, + 'hue', + center=0, + bound=(-0.5, 0.5), + clip_first_on_zero=False) def _apply_image(self, img): if self.value is None: @@ -986,7 +989,11 @@ class ColorJitter(BaseTransform): """ - def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, + def __init__(self, + brightness=0, + contrast=0, + saturation=0, + hue=0, keys=None): super(ColorJitter, self).__init__(keys) self.brightness = brightness @@ -1405,12 +1412,11 @@ class RandomAffine(BaseTransform): ret = self._get_param(img_size, self.degrees, self.translate, self.scale, self.shear) - return F.affine( - img, - *ret, - interpolation=self.interpolation, - fill=self.fill, - center=self.center) + return F.affine(img, + *ret, + interpolation=self.interpolation, + fill=self.fill, + center=self.center) class RandomRotation(BaseTransform): @@ -1577,14 +1583,17 @@ class RandomPerspective(BaseTransform): half_height = height // 2 half_width = width // 2 topleft = [ - int(random.uniform(0, int(distortion_scale * half_width) + 1)), - int(random.uniform(0, int(distortion_scale * half_height) + 1)), + int(random.uniform(0, + int(distortion_scale * half_width) + 1)), + int(random.uniform(0, + int(distortion_scale * half_height) + 1)), ] topright = [ int( random.uniform(width - int(distortion_scale * half_width) - 1, width)), - int(random.uniform(0, int(distortion_scale * half_height) + 1)), + int(random.uniform(0, + int(distortion_scale * half_height) + 1)), ] botright = [ int( @@ -1595,7 +1604,8 @@ class RandomPerspective(BaseTransform): height)), ] botleft = [ - int(random.uniform(0, int(distortion_scale * half_width) + 1)), + int(random.uniform(0, + int(distortion_scale * half_width) + 1)), int( random.uniform(height - int(distortion_scale * half_height) - 1, height)), @@ -1723,10 +1733,10 @@ class RandomErasing(BaseTransform): ), "scale should be of kind (min, max) and in range [0, 1]" assert isinstance(ratio, (tuple, list)), "ratio should be a tuple or list" - assert (ratio[0] >= 0 and - ratio[0] <= ratio[1]), "ratio should be of kind (min, max)" - assert (prob >= 0 and - prob <= 1), "The probability should be in range [0, 1]" + assert (ratio[0] >= 0 + and ratio[0] <= ratio[1]), "ratio should be of kind (min, max)" + assert (prob >= 0 + and prob <= 1), "The probability should be in range [0, 1]" assert isinstance( value, (numbers.Number, str, tuple, list)), "value should be a number, tuple, list or str" @@ -1772,8 +1782,8 @@ class RandomErasing(BaseTransform): continue if F._is_tensor_image(img): if value is None: - v = paddle.normal( - shape=[c, erase_h, erase_w]).astype(img.dtype) + v = paddle.normal(shape=[c, erase_h, erase_w]).astype( + img.dtype) else: v = paddle.to_tensor(value, dtype=img.dtype)[:, None, None] else: @@ -1808,7 +1818,7 @@ class RandomErasing(BaseTransform): raise ValueError( "Value should be a single number or a sequence with length equals to image's channel." ) - top, left, erase_h, erase_w, v = self._get_param(img, self.scale, - self.ratio, value) + top, left, erase_h, erase_w, v = self._get_param( + img, self.scale, self.ratio, value) return F.erase(img, top, left, erase_h, erase_w, v, self.inplace) return img diff --git a/tools/CrossStackProfiler/CspChromeTraceFormatter.py b/tools/CrossStackProfiler/CspChromeTraceFormatter.py index a8030988aac..811e6020267 100755 --- a/tools/CrossStackProfiler/CspChromeTraceFormatter.py +++ b/tools/CrossStackProfiler/CspChromeTraceFormatter.py @@ -27,6 +27,7 @@ import pandas as pd class ChromeTraceFormatter(object): + def __init__(self): self._events = [] self._metadata = [] diff --git a/tools/CrossStackProfiler/CspFileReader.py b/tools/CrossStackProfiler/CspFileReader.py index 12de488aa69..485f6d0f6a6 100755 --- a/tools/CrossStackProfiler/CspFileReader.py +++ b/tools/CrossStackProfiler/CspFileReader.py @@ -45,8 +45,8 @@ dcgmMetricParameterMap = { "04_memUtility": [("FB_USED_RATIO", "FB_USED_RATIO"), ("DRAMA", "DRAMA")], "05_txUtility": [("NVLTX", "NVLTX"), ("NVLRX", "NVLRX"), ("PCITX", "PCITX"), ("PCIRX", "PCIRX")], - "06_calUtility": - [("FP32A", "FP32A"), ("FP16A", "FP16A"), ("TENSO", "TENSO")] + "06_calUtility": [("FP32A", "FP32A"), ("FP16A", "FP16A"), + ("TENSO", "TENSO")] } DCGMINFO_TRACE_NUM = len(dcgmMetricParameterMap.keys()) NETINFO_TRACE_NUM = 2 @@ -66,6 +66,7 @@ FILEORGANIZEFORM = [ class FileReader(object): + def __init__(self, logger, args): self._logger = logger self._args = args @@ -174,8 +175,8 @@ class FileReader(object): file) if not self._fileList: - if (self._getId(self._fileList[-1]) - self._getId(self._fileList[0]) - ) != len(self._fileList) - 1: + if (self._getId(self._fileList[-1]) - + self._getId(self._fileList[0])) != len(self._fileList) - 1: raise Exception("The file id should be countious!") # sort def _sortBySuffix(elem): @@ -194,8 +195,9 @@ class FileReader(object): def _getId(self, fileName, organizeForm, sed="."): if self._organizeForm != organizeForm: - raise TypeError("Can not get rank id when organizer form is not %s!" - % organizeForm) + raise TypeError( + "Can not get rank id when organizer form is not %s!" % + organizeForm) if not os.path.isfile(fileName): raise IOError("[%s] is not a valid file!" % (fileName)) @@ -294,12 +296,20 @@ class FileReader(object): gpuId, pretty=False, tmpPath="./tmp"): - return self.dumpDict( - data, "opinfo", groupId, gpuId, pretty=False, tmpPath="./tmp") + return self.dumpDict(data, + "opinfo", + groupId, + gpuId, + pretty=False, + tmpPath="./tmp") def dumpDCGMDict(self, data, groupId, gpuId, pretty=False, tmpPath="./tmp"): - return self.dumpDict( - data, "dcgm", groupId, gpuId, pretty=False, tmpPath="./tmp") + return self.dumpDict(data, + "dcgm", + groupId, + gpuId, + pretty=False, + tmpPath="./tmp") def dumpDict(self, data, diff --git a/tools/CrossStackProfiler/CspReporter.py b/tools/CrossStackProfiler/CspReporter.py index 1b8ae0e3855..dc12f725bb4 100755 --- a/tools/CrossStackProfiler/CspReporter.py +++ b/tools/CrossStackProfiler/CspReporter.py @@ -33,37 +33,41 @@ from CspFileReader import FILEORGANIZEFORM_BYRANK, FILEORGANIZEFORM_BYTRAINER, F def get_argparse(): parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - '--profile_path', - type=str, - default='.', - help='Working path that store the monitor data.') - - parser.add_argument( - '--timeline_path', - type=str, - default='.', - help='Output timeline file name.') - - parser.add_argument( - '--gpuPerTrainer', type=int, default=8, help='Gpus per trainer.') - - parser.add_argument( - '--trainerNum', type=int, default=4, help='Num of trainer.') - - parser.add_argument( - '--groupSize', type=int, default=8, help='Num of trainer in a group.') - - parser.add_argument( - '--displaySize', - type=int, - default=2, - help='Num of line need to display in a group.') + parser.add_argument('--profile_path', + type=str, + default='.', + help='Working path that store the monitor data.') + + parser.add_argument('--timeline_path', + type=str, + default='.', + help='Output timeline file name.') + + parser.add_argument('--gpuPerTrainer', + type=int, + default=8, + help='Gpus per trainer.') + + parser.add_argument('--trainerNum', + type=int, + default=4, + help='Num of trainer.') + + parser.add_argument('--groupSize', + type=int, + default=8, + help='Num of trainer in a group.') + + parser.add_argument('--displaySize', + type=int, + default=2, + help='Num of line need to display in a group.') return parser.parse_args() class CspReporter(object): + def __init__(self, args): self._args = args print(self._args) @@ -160,17 +164,17 @@ class CspReporter(object): opInfoDict = self._profileFileReader.getOpInfoDict(groupId, gpuId) traceObj = {} - traceObj["traceEvents"] = pipileInfo[str(gpuId)] + opInfoDict[ - "traceEvents"] + dcgmInfoDict["traceEvents"] + netInfo[ - "traceEvents"] + traceObj["traceEvents"] = pipileInfo[str( + gpuId)] + opInfoDict["traceEvents"] + dcgmInfoDict[ + "traceEvents"] + netInfo["traceEvents"] self._profileFileReader.dumpDict(traceObj, "traceFile", groupId, gpuId, False, self._saveFilePath) def _generateTraceFileByGroup(self, groupId, processNum): # first we need to generate pipeline info - pipileInfo = self._profileFileReader.getPipeLineInfo(groupId, - processNum) + pipileInfo = self._profileFileReader.getPipeLineInfo( + groupId, processNum) # second we need to generate dcgm info dcgmInfo = self._dcgmFileReader.getDCGMTraceInfo(groupId, processNum) @@ -187,13 +191,13 @@ class CspReporter(object): pidList = [] for gpuId in range(self._gpuPerTrainer): - subproc = Process( - target=self._generateTraceFileByGroupAndGpuId, - args=( - pipileInfo, - netInfo, - groupId, - gpuId, )) + subproc = Process(target=self._generateTraceFileByGroupAndGpuId, + args=( + pipileInfo, + netInfo, + groupId, + gpuId, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) @@ -212,11 +216,11 @@ class CspReporter(object): processPool = [] pidList = [] for groupId in range(self._trainerNum / self._groupSize): - subproc = Process( - target=self._generateTraceFileByGroup, - args=( - groupId, - processNum, )) + subproc = Process(target=self._generateTraceFileByGroup, + args=( + groupId, + processNum, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) diff --git a/tools/CrossStackProfiler/DCGMFileReader.py b/tools/CrossStackProfiler/DCGMFileReader.py index 599acb44c65..4ae15df5ad0 100755 --- a/tools/CrossStackProfiler/DCGMFileReader.py +++ b/tools/CrossStackProfiler/DCGMFileReader.py @@ -34,6 +34,7 @@ from CspFileReader import FILEORGANIZEFORM_BYRANK, FILEORGANIZEFORM_BYTRAINER, F class dcgmFileReader(FileReader): + def parseFileByGroup(self, groupId, processNum=8): fileFist = self.getFileListByGroup(groupId) displaySize = min(self._displaySize, len(fileFist)) @@ -53,10 +54,10 @@ class dcgmFileReader(FileReader): taskList = self._splitTaskListForMultiProcess(fileFist, processNum) for task in taskList: - subproc = Process( - target=self._parseTask, args=( - task, - q, )) + subproc = Process(target=self._parseTask, args=( + task, + q, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) @@ -77,8 +78,9 @@ class dcgmFileReader(FileReader): isFistProcess = False dcgm_data = q.get() else: - dcgm_data = pd.concat( - [dcgm_data, q.get()], axis=0, join='outer') + dcgm_data = pd.concat([dcgm_data, q.get()], + axis=0, + join='outer') return dcgm_data @@ -94,8 +96,9 @@ class dcgmFileReader(FileReader): is_first = False dcgm_data = tmp_data else: - dcgm_data = pd.concat( - [dcgm_data, tmp_data], axis=0, join='outer') + dcgm_data = pd.concat([dcgm_data, tmp_data], + axis=0, + join='outer') dcgm_data = dcgm_data.dropna() if not q is None: q.put(dcgm_data) @@ -123,8 +126,8 @@ class dcgmFileReader(FileReader): if 'nv-hostengine' in line or 'dmon' in line or 'Host Engine Listener Started' in line: continue - if not line.strip().startswith("GPU") and not line.strip( - ).startswith("# Entity"): + if not line.strip().startswith( + "GPU") and not line.strip().startswith("# Entity"): continue # skip non-needed headers (only the header in 1th line was needed) @@ -223,14 +226,14 @@ class dcgmFileReader(FileReader): pidList = [] for gpuId in range(self._gpuPerTrainer): - subproc = Process( - target=self._getDCGMTraceInfoByGpuId, - args=( - groupId, - gpuId, - dcgm_data, - pid_map, - q, )) + subproc = Process(target=self._getDCGMTraceInfoByGpuId, + args=( + groupId, + gpuId, + dcgm_data, + pid_map, + q, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) diff --git a/tools/CrossStackProfiler/NetFileReader.py b/tools/CrossStackProfiler/NetFileReader.py index fe900fab2ad..1ae8a6803d5 100755 --- a/tools/CrossStackProfiler/NetFileReader.py +++ b/tools/CrossStackProfiler/NetFileReader.py @@ -31,6 +31,7 @@ from CspFileReader import FILEORGANIZEFORM_BYRANK, FILEORGANIZEFORM_BYTRAINER, F class netFileReader(FileReader): + def _parseSingleFile(self, fileNameList, tx_pid, rx_pid, q=None): traceInfo = {} @@ -91,12 +92,13 @@ class netFileReader(FileReader): taskList = self._splitTaskListForMultiProcess(fileFist, processNum) for task in taskList: - subproc = Process( - target=self._parseSingleFile, args=( - task, - tx_pid, - rx_pid, - q, )) + subproc = Process(target=self._parseSingleFile, + args=( + task, + tx_pid, + rx_pid, + q, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) diff --git a/tools/CrossStackProfiler/ProfileFileReader.py b/tools/CrossStackProfiler/ProfileFileReader.py index 0f3299ef547..628592a159f 100755 --- a/tools/CrossStackProfiler/ProfileFileReader.py +++ b/tools/CrossStackProfiler/ProfileFileReader.py @@ -35,6 +35,7 @@ from CspFileReader import FILEORGANIZEFORM_BYRANK, FILEORGANIZEFORM_BYTRAINER, F class profileFileReader(FileReader): + def _parseSingleFile(self, profile): with open(profile, 'rb') as f: profile_s = f.read() @@ -71,6 +72,7 @@ class profileFileReader(FileReader): return False def _allocate_forwardBackwardInfo(self, restList, pid, tid): + def _cmp_ele(items): return items["ts"] @@ -135,8 +137,8 @@ class profileFileReader(FileReader): if self._is_forwardBackwardInfo(traceEvent): traceEventList.append(traceEvent) - pipeLineList = self._allocate_forwardBackwardInfo(traceEventList, - pid, tid) + pipeLineList = self._allocate_forwardBackwardInfo( + traceEventList, pid, tid) res[str(rankId)] = pipeLineList @@ -159,10 +161,10 @@ class profileFileReader(FileReader): taskList = self._splitTaskListForMultiProcess(fileFist, processNum) for task in taskList: - subproc = Process( - target=self._getPipeLineInfo, args=( - task, - q, )) + subproc = Process(target=self._getPipeLineInfo, args=( + task, + q, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) @@ -215,13 +217,13 @@ class profileFileReader(FileReader): devices[(k, event.device_id, "CPU")] = pid # -1 device id represents CUDA API(RunTime) call.(e.g. cudaLaunch, cudaMemcpy) if event.device_id == -1: - chrome_trace.emit_pid("%02d_%s:cuda_api" % - (lineNum, k), pid) + chrome_trace.emit_pid( + "%02d_%s:cuda_api" % (lineNum, k), pid) lineNum = lineNum + 1 else: - chrome_trace.emit_pid("%02d_%s:cpu:block:%d" % - (lineNum, k, event.device_id), - pid) + chrome_trace.emit_pid( + "%02d_%s:cpu:block:%d" % + (lineNum, k, event.device_id), pid) lineNum = lineNum + 1 elif event.type == profiler_pb2.Event.GPUKernel: if (k, event.device_id, "GPUKernel") not in devices: @@ -230,9 +232,9 @@ class profileFileReader(FileReader): initPid = initPid + 1 devices[(k, event.device_id, "GPUKernel")] = pid - chrome_trace.emit_pid("%02d_%s:gpu:%d" % - (lineNum, k, event.device_id), - pid) + chrome_trace.emit_pid( + "%02d_%s:gpu:%d" % + (lineNum, k, event.device_id), pid) lineNum = lineNum + 1 if not hasattr(profile_pb, "mem_events"): @@ -255,13 +257,13 @@ class profileFileReader(FileReader): initPid = initPid + 1 mem_devices[(k, mevent.device_id, "CPU")] = pid - chrome_trace.emit_pid("%02d_memory usage on %s:cpu:%d" % - (lineNum, k, mevent.device_id), - pid) + chrome_trace.emit_pid( + "%02d_memory usage on %s:cpu:%d" % + (lineNum, k, mevent.device_id), pid) lineNum = lineNum + 1 elif mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace: - if (k, mevent.device_id, "CUDAPinnedPlace" - ) not in mem_devices: + if (k, mevent.device_id, + "CUDAPinnedPlace") not in mem_devices: if gpuId == mevent.device_id: pid = initPid initPid = initPid + 1 @@ -277,8 +279,8 @@ class profileFileReader(FileReader): initPid = initPid + 1 mem_devices[(k, 0, "CPU")] = pid - chrome_trace.emit_pid("%02d_memory usage on %s:cpu:%d" % - (lineNum, k, 0), pid) + chrome_trace.emit_pid( + "%02d_memory usage on %s:cpu:%d" % (lineNum, k, 0), pid) lineNum = lineNum + 1 if (k, 0, "GPU") not in mem_devices: # if gpuId == mevent.device_id: @@ -286,8 +288,8 @@ class profileFileReader(FileReader): initPid = initPid + 1 mem_devices[(k, 0, "GPU")] = pid - chrome_trace.emit_pid("%02d_memory usage on %s:gpu:%d" % - (lineNum, k, 0), pid) + chrome_trace.emit_pid( + "%02d_memory usage on %s:gpu:%d" % (lineNum, k, 0), pid) lineNum = lineNum + 1 if (k, 0, "CUDAPinnedPlace") not in mem_devices: pid = initPid @@ -324,10 +326,10 @@ class profileFileReader(FileReader): args['detail_info'] = event.detail_info # TODO(panyx0718): Chrome tracing only handles ms. However, some # ops takes micro-seconds. Hence, we keep the ns here. - chrome_trace.emit_region( - self._align_ts(event.start_ns), - (event.end_ns - event.start_ns) / 1.0, pid, - event.sub_device_id, 'Op', event.name, args) + chrome_trace.emit_region(self._align_ts(event.start_ns), + (event.end_ns - event.start_ns) / 1.0, + pid, event.sub_device_id, 'Op', + event.name, args) return chrome_trace def _allocate_memory_event(self, profile_dict, mem_devices, gpuId): @@ -358,8 +360,8 @@ class profileFileReader(FileReader): else: place = "UnDefine" - if (mevent.place == profiler_pb2.MemEvent.CUDAPlace or - mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace + if (mevent.place == profiler_pb2.MemEvent.CUDAPlace + or mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace ) and mevent.device_id != gpuId: continue @@ -388,9 +390,10 @@ class profileFileReader(FileReader): total_size += mem_list[i + 1]['size'] i += 1 - chrome_trace.emit_counter( - "Memory", "Memory", mem_list[i]['pid'], - self._align_ts(mem_list[i]['time']), 0, total_size) + chrome_trace.emit_counter("Memory", "Memory", + mem_list[i]['pid'], + self._align_ts(mem_list[i]['time']), + 0, total_size) i += 1 return chrome_trace @@ -426,10 +429,11 @@ class profileFileReader(FileReader): pidList = [] for gpuId in range(self._gpuPerTrainer): - subproc = Process( - target=self._getOPTraceInfoByGpuId, args=( - groupId, - gpuId, )) + subproc = Process(target=self._getOPTraceInfoByGpuId, + args=( + groupId, + gpuId, + )) processPool.append(subproc) subproc.start() pidList.append(subproc.pid) diff --git a/tools/analysisPyXml.py b/tools/analysisPyXml.py index 5d6a5ac4594..9d70a159118 100644 --- a/tools/analysisPyXml.py +++ b/tools/analysisPyXml.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -47,8 +47,8 @@ def analysisPyXml(rootPath, ut): if output.strip().startswith( ('from', 'import', '__all__', 'def', 'class', '"""', '@', '\'\'\'', 'logger', '_logger', 'logging', 'r"""', - 'pass', 'try', 'except', 'if __name__ == "__main__"' - )) == False: + 'pass', 'try', 'except', + 'if __name__ == "__main__"')) == False: pattern = "(.*) = ('*')|(.*) = (\"*\")|(.*) = (\d)|(.*) = (-\d)|(.*) = (None)|(.*) = (True)|(.*) = (False)|(.*) = (URL_PREFIX*)|(.*) = (\[)|(.*) = (\{)|(.*) = (\()" #a='b'/a="b"/a=0 if re.match(pattern, output.strip()) == None: pyCov_file.append(clazz_filename) diff --git a/tools/analysis_build_time.py b/tools/analysis_build_time.py index 8ae94348f21..e2cc454f573 100644 --- a/tools/analysis_build_time.py +++ b/tools/analysis_build_time.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/check_api_compatible.py b/tools/check_api_compatible.py index f91112abd64..18fb4d7ecdf 100644 --- a/tools/check_api_compatible.py +++ b/tools/check_api_compatible.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -61,16 +61,16 @@ def check_compatible(old_api_spec, new_api_spec): """ check compatible, FullArgSpec """ - if not (isinstance(old_api_spec, inspect.FullArgSpec) and isinstance( - new_api_spec, inspect.FullArgSpec)): + if not (isinstance(old_api_spec, inspect.FullArgSpec) + and isinstance(new_api_spec, inspect.FullArgSpec)): logger.warning( "new_api_spec or old_api_spec is not instance of inspect.FullArgSpec" ) return False return _check_compatible( - old_api_spec.args, new_api_spec.args, [] - if old_api_spec.defaults is None else old_api_spec.defaults, [] - if new_api_spec.defaults is None else new_api_spec.defaults) + old_api_spec.args, new_api_spec.args, + [] if old_api_spec.defaults is None else old_api_spec.defaults, + [] if new_api_spec.defaults is None else new_api_spec.defaults) def check_compatible_str(old_api_spec_str, new_api_spec_str): @@ -129,13 +129,15 @@ def parse_args(): 'prev', type=argparse.FileType('r'), help='the previous version (the version from develop branch)') - parser.add_argument( - 'post', - type=argparse.FileType('r'), - help='the post version (the version from PullRequest)') + parser.add_argument('post', + type=argparse.FileType('r'), + help='the post version (the version from PullRequest)') for item in arguments: - parser.add_argument( - item[0], dest=item[1], help=item[4], type=item[2], default=item[3]) + parser.add_argument(item[0], + dest=item[1], + help=item[4], + type=item[2], + default=item[3]) if len(sys.argv) < 2: parser.print_help() diff --git a/tools/check_op_benchmark_result.py b/tools/check_op_benchmark_result.py index e45d12c7b1b..73075125ac4 100644 --- a/tools/check_op_benchmark_result.py +++ b/tools/check_op_benchmark_result.py @@ -61,7 +61,8 @@ def load_benchmark_result_from_logs_dir(logs_dir): check_path_exists(logs_dir) log_file_path = lambda log_file: os.path.join(logs_dir, log_file) - result_lambda = lambda log_file: (log_file, parse_log_file(log_file_path(log_file))) + result_lambda = lambda log_file: (log_file, + parse_log_file(log_file_path(log_file))) return dict(map(result_lambda, os.listdir(logs_dir))) @@ -183,11 +184,10 @@ if __name__ == "__main__": type=str, required=True, help="Specify the benchmark result directory of PR branch.") - parser.add_argument( - "--api_info_file", - type=str, - required=False, - help="Specify the api info to run benchmark test.") + parser.add_argument("--api_info_file", + type=str, + required=False, + help="Specify the api info to run benchmark test.") args = parser.parse_args() check_results = dict(accuracy=list(), speed=list()) diff --git a/tools/check_op_desc.py b/tools/check_op_desc.py index 19984a55a41..7367b88d5b8 100644 --- a/tools/check_op_desc.py +++ b/tools/check_op_desc.py @@ -321,8 +321,8 @@ def print_desc_error_message(error_message): for arg in changed_args: ori_value, new_value = changed_args.get(arg) print( - " * The arg '{}' of Input '{}' is changed: from '{}' to '{}'.". - format(arg, name, ori_value, new_value)) + " * The arg '{}' of Input '{}' is changed: from '{}' to '{}'." + .format(arg, name, ori_value, new_value)) for name in Inputs_error.get(QUANT, {}): print(" * The added Input '{}' is `quant`, need slim to review.". @@ -345,8 +345,8 @@ def print_desc_error_message(error_message): for arg in changed_args: ori_value, new_value = changed_args.get(arg) print( - " * The arg '{}' of Output '{}' is changed: from '{}' to '{}'.". - format(arg, name, ori_value, new_value)) + " * The arg '{}' of Output '{}' is changed: from '{}' to '{}'." + .format(arg, name, ori_value, new_value)) for name in Outputs_error.get(QUANT, {}): print(" * The added Output '{}' is `quant`, need slim to review.". @@ -371,8 +371,8 @@ def print_desc_error_message(error_message): for arg in changed_args: ori_value, new_value = changed_args.get(arg) print( - " * The arg '{}' of attr '{}' is changed: from '{}' to '{}'.". - format(arg, name, ori_value, new_value)) + " * The arg '{}' of attr '{}' is changed: from '{}' to '{}'." + .format(arg, name, ori_value, new_value)) for name in attrs_error.get(QUANT, {}): # TODO(Wilber): @@ -414,13 +414,15 @@ def print_version_error_message(error_message): error_list = attrs_error.get(ADD, []) if error_list: for tup in error_list: - print(" * The added attribute '{}' is not yet registered.". - format(tup[1])) - error_dic = error_message.get(op_name, {}).get(ATTRS, {}).get(CHANGE, - {}) + print( + " * The added attribute '{}' is not yet registered.".format( + tup[1])) + error_dic = error_message.get(op_name, {}).get(ATTRS, + {}).get(CHANGE, {}) for key, val in error_dic.items(): - print(" * The change of attribute '{}' is not yet registered.". - format(key)) + print( + " * The change of attribute '{}' is not yet registered.".format( + key)) def print_repeat_process(): @@ -446,8 +448,8 @@ if len(sys.argv) == 3: with open(sys.argv[2], 'r') as f: new_op_desc = f.read() - desc_error_message, version_error_message = compare_op_desc(origin_op_desc, - new_op_desc) + desc_error_message, version_error_message = compare_op_desc( + origin_op_desc, new_op_desc) if error: print("-" * 30) print_desc_error_message(desc_error_message) diff --git a/tools/check_op_register_type.py b/tools/check_op_register_type.py index b32eff05731..0b67e6e7f58 100644 --- a/tools/check_op_register_type.py +++ b/tools/check_op_register_type.py @@ -45,8 +45,8 @@ def get_all_kernels(): register_type = infos[0].split(":")[-1] op_kernel_types[op_type].append(register_type.lower()) - for (op_type, op_kernels) in sorted( - op_kernel_types.items(), key=lambda x: x[0]): + for (op_type, op_kernels) in sorted(op_kernel_types.items(), + key=lambda x: x[0]): print(op_type, " ".join(sorted(op_kernels))) @@ -64,8 +64,8 @@ def print_diff(op_type, register_types): if len(FLOATS - register_types) == 1: lack_types |= FLOATS - register_types - print("{} only supports [{}] now, but lacks [{}].".format(op_type, " ".join( - register_types), " ".join(lack_types))) + print("{} only supports [{}] now, but lacks [{}].".format( + op_type, " ".join(register_types), " ".join(lack_types))) def check_add_op_valid(): diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py index 823d9470230..c5a9d852691 100644 --- a/tools/codestyle/docstring_checker.py +++ b/tools/codestyle/docstring_checker.py @@ -134,12 +134,12 @@ class DocstringChecker(BaseChecker): symbol + "-missing", 'Add docstring longer >=10'), 'W9006': ('Docstring indent error, use 4 space for indent', symbol + "-indent-error", 'Use 4 space for indent'), - 'W9007': ('You should add `Returns` in comments', - symbol + "-with-returns", - 'There should be a `Returns` section in comments'), - 'W9008': ('You should add `Raises` section in comments', - symbol + "-with-raises", - 'There should be a `Raises` section in comments'), + 'W9007': + ('You should add `Returns` in comments', symbol + "-with-returns", + 'There should be a `Returns` section in comments'), + 'W9008': + ('You should add `Raises` section in comments', symbol + "-with-raises", + 'There should be a `Raises` section in comments'), } options = () @@ -333,17 +333,20 @@ class DocstringChecker(BaseChecker): parsed_args = doc.args args_not_documented = set(args) - set(parsed_args) if len(args) > 0 and len(parsed_args) <= 0: - self.add_message( - 'W9003', - node=node, - line=node.fromlineno, - args=list(args_not_documented)) + self.add_message('W9003', + node=node, + line=node.fromlineno, + args=list(args_not_documented)) return False for t in args: if t not in parsed_args: - self.add_message( - 'W9003', node=node, line=node.fromlineno, args=[t, ]) + self.add_message('W9003', + node=node, + line=node.fromlineno, + args=[ + t, + ]) return False return True diff --git a/tools/continuous_integration/bisect.py b/tools/continuous_integration/bisect.py index 21a46e5cef0..afffc60a449 100644 --- a/tools/continuous_integration/bisect.py +++ b/tools/continuous_integration/bisect.py @@ -27,36 +27,43 @@ import subprocess import sys parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - '--git_dir', type=str, default='', help='git repo root directory.') -parser.add_argument( - '--build_dir', type=str, default='', help='build directory.') -parser.add_argument( - '--good_commit', - type=str, - default='', - help='The old commit known to be good.') -parser.add_argument( - '--bad_commit', - type=str, - default='', - help='The new commit known to be bad.') -parser.add_argument( - '--test_target', type=str, default='', help='The test target to evaluate.') +parser.add_argument('--git_dir', + type=str, + default='', + help='git repo root directory.') +parser.add_argument('--build_dir', + type=str, + default='', + help='build directory.') +parser.add_argument('--good_commit', + type=str, + default='', + help='The old commit known to be good.') +parser.add_argument('--bad_commit', + type=str, + default='', + help='The new commit known to be bad.') +parser.add_argument('--test_target', + type=str, + default='', + help='The test target to evaluate.') parser.add_argument( '--bisect_branch', type=str, default='develop', help='The mainline branch to bisect (feature branch ignored.') -parser.add_argument( - '--log_file', type=str, default='', help='The file use to log outputs.') -parser.add_argument( - '--test_times', - type=int, - default=10, - help="Number of times to run the test target.") -parser.add_argument( - '--build_parallel', type=int, default=32, help="make parallelism.") +parser.add_argument('--log_file', + type=str, + default='', + help='The file use to log outputs.') +parser.add_argument('--test_times', + type=int, + default=10, + help="Number of times to run the test target.") +parser.add_argument('--build_parallel', + type=int, + default=32, + help="make parallelism.") args = parser.parse_args() if not args.log_file: @@ -74,12 +81,10 @@ print_arguments() # List the commits in mainline branch. os.chdir(args.git_dir) -ret = subprocess.check_output( - [ - 'git rev-list --first-parent %s...%s' % (args.good_commit, - args.bad_commit) - ], - shell=True) +ret = subprocess.check_output([ + 'git rev-list --first-parent %s...%s' % (args.good_commit, args.bad_commit) +], + shell=True) sys.stdout.write('commits found:\n%s\n' % ret) commits = ret.strip().split('\n') os.chdir(args.build_dir) @@ -90,12 +95,11 @@ last_culprit = '' while True: # Get to the mainline branch and clean up os.chdir(args.git_dir) - subprocess.check_output( - [ - 'git checkout %s && git clean -fd && git checkout .' % - args.bisect_branch - ], - shell=True) + subprocess.check_output([ + 'git checkout %s && git clean -fd && git checkout .' % + args.bisect_branch + ], + shell=True) if not commits: sys.stdout.write('no commits to bisect\n') diff --git a/tools/count_api_without_core_ops.py b/tools/count_api_without_core_ops.py index 5519859471a..6b5bffd3327 100644 --- a/tools/count_api_without_core_ops.py +++ b/tools/count_api_without_core_ops.py @@ -24,9 +24,11 @@ import functools import platform from paddle import _C_ops -__all__ = ['get_apis_with_and_without_core_ops', ] +__all__ = [ + 'get_apis_with_and_without_core_ops', +] -# APIs that should not be printed into API.spec +# APIs that should not be printed into API.spec omitted_list = [ "paddle.fluid.LoDTensor.set", # Do not know why it should be omitted "paddle.fluid.io.ComposeNotAligned", @@ -41,10 +43,9 @@ def md5(doc): md5sum = hashinst.hexdigest() except UnicodeDecodeError as e: md5sum = None - print( - "Error({}) occurred when `md5({})`, discard it.".format( - str(e), doc), - file=sys.stderr) + print("Error({}) occurred when `md5({})`, discard it.".format( + str(e), doc), + file=sys.stderr) return md5sum @@ -99,8 +100,8 @@ def visit_member(parent_name, member, func): if inspect.isclass(member): func(member, cur_name) for name, value in inspect.getmembers(member): - if hasattr(value, '__name__') and (not name.startswith("_") or - name == "__init__"): + if hasattr(value, '__name__') and (not name.startswith("_") + or name == "__init__"): visit_member(cur_name, value, func) elif inspect.ismethoddescriptor(member): return @@ -109,8 +110,9 @@ def visit_member(parent_name, member, func): elif inspect.isgetsetdescriptor(member): return else: - raise RuntimeError("Unsupported generate signature of member, type {0}". - format(str(type(member)))) + raise RuntimeError( + "Unsupported generate signature of member, type {0}".format( + str(type(member)))) def is_primitive(instance): @@ -175,8 +177,8 @@ def get_apis_with_and_without_core_ops(modules): api_with_ops = [] api_without_ops = [] for m in modules: - visit_all_module( - importlib.import_module(m), split_with_and_without_core_ops) + visit_all_module(importlib.import_module(m), + split_with_and_without_core_ops) return api_with_ops, api_without_ops diff --git a/tools/coverage/coverage_diff.py b/tools/coverage/coverage_diff.py index 6a400d293b2..fc5a34364c5 100644 --- a/tools/coverage/coverage_diff.py +++ b/tools/coverage/coverage_diff.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/coverage/coverage_diff_list.py b/tools/coverage/coverage_diff_list.py index 62834301209..13ba471c13a 100644 --- a/tools/coverage/coverage_diff_list.py +++ b/tools/coverage/coverage_diff_list.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/coverage/coverage_lines.py b/tools/coverage/coverage_lines.py index 553cd691e45..3c5df9d88e8 100644 --- a/tools/coverage/coverage_lines.py +++ b/tools/coverage/coverage_lines.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/coverage/cuda_clean.py b/tools/coverage/cuda_clean.py index 8c03edd0785..82bb6a553c9 100644 --- a/tools/coverage/cuda_clean.py +++ b/tools/coverage/cuda_clean.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/coverage/gcda_clean.py b/tools/coverage/gcda_clean.py index 12bd04a6907..062b8f356d6 100644 --- a/tools/coverage/gcda_clean.py +++ b/tools/coverage/gcda_clean.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/coverage/pull_request.py b/tools/coverage/pull_request.py index f3e88286ca9..53325d36820 100644 --- a/tools/coverage/pull_request.py +++ b/tools/coverage/pull_request.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/coverage/python_coverage.py b/tools/coverage/python_coverage.py index f2e52b5e23b..d45fb4d58c5 100644 --- a/tools/coverage/python_coverage.py +++ b/tools/coverage/python_coverage.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,8 +33,8 @@ for clazz in root.findall('packages/package/classes/class'): clazz_filename = path.join(source, clazz_filename) if clazz_filename.startswith('/paddle/build/python/'): - clazz_filename = '/paddle/python/' + clazz_filename[len( - '/paddle/build/python/'):] + clazz_filename = '/paddle/python/' + clazz_filename[ + len('/paddle/build/python/'):] if not path.exists(clazz_filename): continue diff --git a/tools/diff_api.py b/tools/diff_api.py index f086598945a..8dabf316c2d 100644 --- a/tools/diff_api.py +++ b/tools/diff_api.py @@ -1,13 +1,13 @@ #!/usr/bin/env python # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/diff_unittest.py b/tools/diff_unittest.py index fa70be0990e..178fd1647d9 100644 --- a/tools/diff_unittest.py +++ b/tools/diff_unittest.py @@ -1,13 +1,13 @@ #!/usr/bin/env python # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/externalError/spider.py b/tools/externalError/spider.py index e07f05f561c..520561d299b 100644 --- a/tools/externalError/spider.py +++ b/tools/externalError/spider.py @@ -306,8 +306,8 @@ def parsing(externalErrorDesc): res_strong = r'.*?' res_strong_detail = r'(.*?)' list_strong = re.findall(res_strong, m_message, re.S | re.M) - list_strong_detail = re.findall(res_strong_detail, m_message, re.S | - re.M) + list_strong_detail = re.findall(res_strong_detail, m_message, + re.S | re.M) assert len(list_strong) == len(list_strong_detail) for idx in range(len(list_strong)): m_message = m_message.replace(list_strong[idx], diff --git a/tools/final_ut_parallel_rule.py b/tools/final_ut_parallel_rule.py index 09ba48f0d43..a5c9f921148 100644 --- a/tools/final_ut_parallel_rule.py +++ b/tools/final_ut_parallel_rule.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -60,7 +60,7 @@ def classify_cases_by_mem(rootPath): 'test_parallel_dygraph_sync_batch_norm', 'test_conv3d_op', 'test_quant2_int8_resnet50_range_mkldnn', - ] # always timeout + ] # always timeout f = open(case_filename) lines = f.readlines() diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py index 6b90a656f01..4c21d59cbe2 100644 --- a/tools/get_pr_ut.py +++ b/tools/get_pr_ut.py @@ -88,8 +88,8 @@ class PRChecker(object): if code == 0: return True print( - 'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'. - format(url, ix, ix * 10, proxy)) + 'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]' + .format(url, ix, ix * 10, proxy)) time.sleep(ix * 10) ix += 1 return False @@ -111,8 +111,8 @@ class PRChecker(object): except Exception as e: print(e) print( - 'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]'. - format(url, ix, ix * 10, cur_proxy)) + 'PREC download {} error, retry {} time(s) after {} secs.[proxy_option={}]' + .format(url, ix, ix * 10, cur_proxy)) continue else: return True @@ -226,7 +226,9 @@ class PRChecker(object): if line_list: line_list.append(line) else: - file_to_diff_lines[filename] = [line, ] + file_to_diff_lines[filename] = [ + line, + ] if data[ix][0] != '-': lineno += 1 ix += 1 @@ -246,10 +248,9 @@ class PRChecker(object): return True def get_all_count(self): - p = subprocess.Popen( - "cd {}build && ctest -N".format(PADDLE_ROOT), - shell=True, - stdout=subprocess.PIPE) + p = subprocess.Popen("cd {}build && ctest -N".format(PADDLE_ROOT), + shell=True, + stdout=subprocess.PIPE) out, err = p.communicate() for line in out.splitlines(): if 'Total Tests:' in str(line): @@ -354,8 +355,8 @@ class PRChecker(object): else: print("remove file not hit mapFiles: %s" % f_judge) else: - notHitMapFiles.append(f_judge) if file_dict[ - f] != 'removed' else print( + notHitMapFiles.append( + f_judge) if file_dict[f] != 'removed' else print( "remove file not hit mapFiles: %s" % f_judge) else: if file_dict[f] not in ['removed']: diff --git a/tools/get_single_test_cov.py b/tools/get_single_test_cov.py index 9232924ddb0..cf670f87750 100644 --- a/tools/get_single_test_cov.py +++ b/tools/get_single_test_cov.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -75,7 +75,7 @@ def analysisFNDAFile(rootPath, test): (clazz_filename, notrelated_ut_map_file)) else: if clazz_filename != '': - if clazz_filename not in related_file_list: # xx.pb.cc in RELATED xx.pb.h not in RELATED + if clazz_filename not in related_file_list: # xx.pb.cc in RELATED xx.pb.h not in RELATED os.system('echo %s >> %s' % (clazz_filename, notrelated_ut_map_file)) f.close() diff --git a/tools/get_ut_file_map.py b/tools/get_ut_file_map.py index eaa1f3c5405..7011cc193aa 100644 --- a/tools/get_ut_file_map.py +++ b/tools/get_ut_file_map.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -78,8 +78,8 @@ def handle_ut_file_map(rootPath): source_file = line.replace('/build', '') #source_file = re.sub('.pb.*', '.proto', source_file) elif 'precise test map fileeee:' in line: - source_file = line.split('precise test map fileeee:')[ - 1].strip() + source_file = line.split( + 'precise test map fileeee:')[1].strip() else: source_file = line if source_file not in ut_file_map: diff --git a/tools/get_ut_mem_map.py b/tools/get_ut_mem_map.py index 37d167693c7..a8fd94c6f76 100644 --- a/tools/get_ut_mem_map.py +++ b/tools/get_ut_mem_map.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -42,14 +42,14 @@ def get_ut_mem(rootPath): if 'MAX_GPU_MEMORY_USE=' in line: mem_nvidia = round( float( - line.split('MAX_GPU_MEMORY_USE=')[1].split('\\n')[0] - .strip()), 2) + line.split('MAX_GPU_MEMORY_USE=')[1].split('\\n') + [0].strip()), 2) if mem_nvidia > mem_nvidia1: mem_nvidia1 = mem_nvidia if 'Total Test time (real)' in line: caseTime = float( - line.split('Total Test time (real) =')[1].split('sec')[ - 0].strip()) + line.split('Total Test time (real) =')[1].split('sec') + [0].strip()) if mem_reserved1 != -1: case_dic[ut]['mem_reserved'] = mem_reserved1 if mem_nvidia1 != -1: diff --git a/tools/group_case_for_parallel.py b/tools/group_case_for_parallel.py index b8aab3a3fe6..e4aea8f39f5 100644 --- a/tools/group_case_for_parallel.py +++ b/tools/group_case_for_parallel.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/handle_h_cu_file.py b/tools/handle_h_cu_file.py index ea01a1d8d41..389b460a791 100644 --- a/tools/handle_h_cu_file.py +++ b/tools/handle_h_cu_file.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -32,7 +32,9 @@ def worker(fun): def threadPool(threadPoolNum): threadPool = [] for i in range(threadPoolNum): - thread = threading.Thread(target=worker, args={doFun, }) + thread = threading.Thread(target=worker, args={ + doFun, + }) thread.daemon = True threadPool.append(thread) return threadPool diff --git a/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py b/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py index a4f93a5d6c3..f2c04f3cba8 100644 --- a/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py +++ b/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py @@ -304,40 +304,42 @@ def convert_op_proto_into_mlir(op_descs): # 2.3.2 attributes for attr in op_proto[ATTRS]: - if (op_proto[ATTRS][attr][EXTRA] == True) or ( - attr in skipped_attr_list): + if (op_proto[ATTRS][attr][EXTRA] + == True) or (attr in skipped_attr_list): continue if op_proto[ATTRS][attr][DEFAULT_VALUE] != None: if op_proto[ATTRS][attr][TYPE] in attr_mlir_converter: - default_value = str(op_proto[ATTRS][attr][ - DEFAULT_VALUE]) - if (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]] in - [ - 'I32ArrayAttr', 'F32ArrayAttr', 'StrArrayAttr', - 'BoolArrayAttr', 'I64ArrayAttr' - ]): - default_value = default_value.replace( - '[', '{').replace(']', '}') - if (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]] in - ['BoolAttr', 'BoolArrayAttr']): + default_value = str( + op_proto[ATTRS][attr][DEFAULT_VALUE]) + if (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]] + in [ + 'I32ArrayAttr', 'F32ArrayAttr', + 'StrArrayAttr', 'BoolArrayAttr', + 'I64ArrayAttr' + ]): + default_value = default_value.replace('[', + '{').replace( + ']', '}') + if (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]] + in ['BoolAttr', 'BoolArrayAttr']): default_value = default_value.lower() elif (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]] in ['StrAttr', 'StrArrayAttr']): default_value = default_value.replace('\'', '\\\"') - if attr_mlir_converter[op_proto[ATTRS][attr][ - TYPE]] == "StrAttr": + if attr_mlir_converter[op_proto[ATTRS][attr] + [TYPE]] == "StrAttr": default_value = '\\\"' + default_value + '\\\"' attr_list = " DefaultValuedAttr<" + attr_mlir_converter[ op_proto[ATTRS][attr] [TYPE]] + ", \"" + default_value + "\">:$" + attr + "," ARGUMENTS += attr_list else: - print("Error:" + op_type + ":" + attr + ":" + str( - op_proto[ATTRS][attr][TYPE])) + print("Error:" + op_type + ":" + attr + ":" + + str(op_proto[ATTRS][attr][TYPE])) else: if op_proto[ATTRS][attr][TYPE] in attr_mlir_converter: - attr_type_ = attr_mlir_converter[op_proto[ATTRS][attr][ - TYPE]] + attr_type_ = attr_mlir_converter[op_proto[ATTRS][attr] + [TYPE]] if (attr_type_ in [ 'StrAttr', 'I32ArrayAttr', 'F32ArrayAttr', 'StrArrayAttr', 'BoolArrayAttr', 'I64ArrayAttr' @@ -345,8 +347,8 @@ def convert_op_proto_into_mlir(op_descs): attr_list = attr_type_ + ":$" + attr + "," ARGUMENTS += attr_list else: - print(" ouch Error:" + op_type + ":" + attr + ":" + str( - op_proto[ATTRS][attr][TYPE])) + print(" ouch Error:" + op_type + ":" + attr + ":" + + str(op_proto[ATTRS][attr][TYPE])) ARGUMENTS = ARGUMENTS[:-1] + ");\n" # 2.4 results info @@ -375,8 +377,8 @@ def convert_op_proto_into_mlir(op_descs): ops_mlir_file.write("\n#endif // PD_OPS") print("Skipped ops num: " + str(len(skipped_op_list))) - print("Automatically generated op dialects num: " + str( - len(automatically_generated_op_dialect))) + print("Automatically generated op dialects num: " + + str(len(automatically_generated_op_dialect))) if __name__ == "__main__": diff --git a/tools/infrt/generate_phi_kernel_dialect.py b/tools/infrt/generate_phi_kernel_dialect.py index b83bfe911aa..826c9b03b42 100644 --- a/tools/infrt/generate_phi_kernel_dialect.py +++ b/tools/infrt/generate_phi_kernel_dialect.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -83,11 +83,15 @@ def generate_kernel_name(op_name, place_str): precision_ = precision_type_converter[precision_.strip()] class_name_ = "{}{}".format( op_name.replace("_", "").title(), "".join([ - target_.strip().title(), precision_.strip(), layout_.strip().title() - .title() + target_.strip().title(), + precision_.strip(), + layout_.strip().title().title() ])) - alias_ = "{}.{}".format(op_name, ".".join( - [target_.strip(), precision_.strip(), layout_.strip()])) + alias_ = "{}.{}".format( + op_name, + ".".join([target_.strip(), + precision_.strip(), + layout_.strip()])) return alias_, class_name_ @@ -101,8 +105,8 @@ def generate_attrs_info(op_name, attrs_info): for index in range(len(attrs_info)): attr_name = kernel_attrs_names[op_name]["attrs"][index] attr_type = attr_type_converter[attrs_info[index]] - attrs_args_ += '{type_}:${name_},'.format( - type_=attr_type, name_=attr_name) + attrs_args_ += '{type_}:${name_},'.format(type_=attr_type, + name_=attr_name) return attrs_args_[:-1] @@ -124,8 +128,8 @@ def generate_arguments_info(op_name, input_info, attr_info): input_args = generate_inputs_info(input_info) attr_args = generate_attrs_info(op_name, attr_info) context_args = "Context:$dev_ctx" - argument_list = [context_args] + input_args.split(",") + attr_args.split( - ",") + argument_list = [context_args + ] + input_args.split(",") + attr_args.split(",") while ("" in argument_list): argument_list.remove("") argument_ = ",".join(argument_list) @@ -295,8 +299,8 @@ def main(): op_name, kernel_alias_, kernel_info[kernel_alias_]) gpu_registry_ += kernel_registry else: - print("Unsupported backend:" + get_kernel_target( - kernel_alias_)) + print("Unsupported backend:" + + get_kernel_target(kernel_alias_)) end = "#endif // PTEN_KERNELS" with open("../../paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td", "w") as dst: diff --git a/tools/infrt/get_compat_kernel_signature.py b/tools/infrt/get_compat_kernel_signature.py index a66a236b0f9..9e112cafc85 100644 --- a/tools/infrt/get_compat_kernel_signature.py +++ b/tools/infrt/get_compat_kernel_signature.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -76,14 +76,14 @@ def get_compat_kernels_info(): if name in kernels_info: cur_reg = kernels_info[name] kernels_info[name]["inputs"] = list( - set(registry_info["inputs"] + kernels_info[name][ - "inputs"])) + set(registry_info["inputs"] + + kernels_info[name]["inputs"])) kernels_info[name]["attrs"] = list( - set(registry_info["attrs"] + kernels_info[name][ - "attrs"])) + set(registry_info["attrs"] + + kernels_info[name]["attrs"])) kernels_info[name]["outputs"] = list( - set(registry_info["outputs"] + kernels_info[name][ - "outputs"])) + set(registry_info["outputs"] + + kernels_info[name]["outputs"])) else: kernels_info[name] = registry_info diff --git a/tools/infrt/get_phi_kernel_info.py b/tools/infrt/get_phi_kernel_info.py index c4c02d67cf7..b5829328094 100644 --- a/tools/infrt/get_phi_kernel_info.py +++ b/tools/infrt/get_phi_kernel_info.py @@ -1,13 +1,13 @@ #!/bin/python # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -39,28 +39,29 @@ def get_skipped_kernel_list(): def parse_args(): parser = argparse.ArgumentParser("gather phi kernel and infermate info") - parser.add_argument( - "--paddle_root_path", - type=str, - required=True, - help="root path of paddle src[WORK_PATH/Paddle].") + parser.add_argument("--paddle_root_path", + type=str, + required=True, + help="root path of paddle src[WORK_PATH/Paddle].") parser.add_argument( "--kernel_info_file", type=str, required=True, help="kernel info file generated by get_phi_kernel_function.sh.") - parser.add_argument( - "--infermeta_wrap_file", - type=str, - required=True, - help="inferMeta wrap info file.") - parser.add_argument( - "--attr_info_file", type=str, required=True, help="attr info file.") + parser.add_argument("--infermeta_wrap_file", + type=str, + required=True, + help="inferMeta wrap info file.") + parser.add_argument("--attr_info_file", + type=str, + required=True, + help="attr info file.") parser.add_argument( "--generate_file", type=str, required=True, - default="../paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.cc", + default= + "../paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.cc", help="generated file.") args = parser.parse_args() return args @@ -311,8 +312,8 @@ registry->AddKernel("{ir_name}",""" return res -def gen_register_info(resources: List[List[str]], - attr_data: Dict[str, List[str]]): +def gen_register_info(resources: List[List[str]], attr_data: Dict[str, + List[str]]): """ resources: [['add', 'CPU', 'ALL_LAYOUT', 'AddKernel', 'float', 'double', '...'(varaidic types), 'ElementwiseInferMeta'], ...] attr_data: {'phi_cpu.arg_min.float32.any': ['axisBool', 'keepdimsBool', 'flatten', 'dtype']} diff --git a/tools/infrt/print_kernel_pass_info.py b/tools/infrt/print_kernel_pass_info.py index c2f3e36a675..ef9b0b59f37 100644 --- a/tools/infrt/print_kernel_pass_info.py +++ b/tools/infrt/print_kernel_pass_info.py @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -45,8 +45,8 @@ def get_compat_kernels_info(register): registry = False is_macro_defination = False for line in txt: - if line.strip().startswith("#define") and line.strip( - ).endswith("\\"): + if line.strip().startswith( + "#define") and line.strip().endswith("\\"): is_macro_defination = True continue if is_macro_defination: diff --git a/tools/jetson_infer_op.py b/tools/jetson_infer_op.py index d4aa3cb1404..d046483efda 100644 --- a/tools/jetson_infer_op.py +++ b/tools/jetson_infer_op.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -51,16 +51,14 @@ def parse_arguments(): :return: """ parser = argparse.ArgumentParser() - parser.add_argument( - '--shell_name', - type=str, - default='get_op_list.sh', - help='please input right name') - parser.add_argument( - '--op_list_file', - type=str, - default='list_op.txt', - help='please input right name') + parser.add_argument('--shell_name', + type=str, + default='get_op_list.sh', + help='please input right name') + parser.add_argument('--op_list_file', + type=str, + default='list_op.txt', + help='please input right name') return parser.parse_args() diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 7c43ef1a6d2..559f2d95b91 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -844,10 +844,11 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_sigmoid_cross_entropy_with_logits_op', 'copy_cross_scope_test', 'test_normalization_wrapper', 'test_flip', 'test_cosine_similarity_api', 'test_cumsum_op', 'test_range', 'test_log_loss_op', 'test_where_index', - 'test_tril_triu_op', 'test_lod_reset_op', 'test_lod_tensor', 'test_addmm_op', - 'test_index_select_op', 'test_nvprof', 'test_index_sample_op', - 'test_unstack_op', 'test_increment', 'strided_memcpy_test', - 'test_target_assign_op', 'test_trt_dynamic_shape_transformer_prune', + 'test_tril_triu_op', 'test_lod_reset_op', 'test_lod_tensor', + 'test_addmm_op', 'test_index_select_op', 'test_nvprof', + 'test_index_sample_op', 'test_unstack_op', 'test_increment', + 'strided_memcpy_test', 'test_target_assign_op', + 'test_trt_dynamic_shape_transformer_prune', 'test_box_decoder_and_assign_op', 'test_trt_dynamic_shape', 'test_mnist', 'test_convert_operators', 'test_fill_any_like_op', 'test_fill_constant_op', 'test_callback_reduce_lr_on_plateau', 'test_tile_op', 'test_logical', @@ -985,15 +986,16 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_pool_max_op', 'test_log_softmax', 'test_imperative_container_parameterlist', 'test_multiplex_op', 'test_trt_transpose_flatten_concat_fuse_pass', - 'test_seqconv_eltadd_relu_fuse_pass', 'test_assert_op', 'test_scatter_nd_op', - 'test_sequence_expand', 'test_arange', 'test_translated_layer', - 'test_decoupled_py_reader_data_check', 'test_analyzer_ernie_large', - 'test_tensor_array_to_tensor', 'test_functional_conv2d_transpose', - 'test_error', 'test_callbacks', 'test_imperative_recurrent_usage', - 'test_deform_conv2d', 'test_coalesce_tensor_op', 'test_tsm', - 'test_fused_multihead_matmul_op', 'test_softmax_mask_fuse_op', - 'test_optimizer_grad', 'test_complex_abs', 'test_gradient_accmulator', - 'test_instance_norm_op_v2', 'test_random_crop_op', 'test_mobile_net', + 'test_seqconv_eltadd_relu_fuse_pass', 'test_assert_op', + 'test_scatter_nd_op', 'test_sequence_expand', 'test_arange', + 'test_translated_layer', 'test_decoupled_py_reader_data_check', + 'test_analyzer_ernie_large', 'test_tensor_array_to_tensor', + 'test_functional_conv2d_transpose', 'test_error', 'test_callbacks', + 'test_imperative_recurrent_usage', 'test_deform_conv2d', + 'test_coalesce_tensor_op', 'test_tsm', 'test_fused_multihead_matmul_op', + 'test_softmax_mask_fuse_op', 'test_optimizer_grad', 'test_complex_abs', + 'test_gradient_accmulator', 'test_instance_norm_op_v2', + 'test_random_crop_op', 'test_mobile_net', 'test_parallel_executor_transformer', 'test_tensor_scalar_type_promotion_dynamic', 'test_eager_deletion_delete_vars', 'test_asp_pruning_1d', @@ -1021,11 +1023,12 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_embedding_id_stop_gradient', 'test_mkldnn_fc_act_fuse_pass', 'sequence_pooling_test', 'test_get_tensor_from_selected_rows_op', 'test_imperative_ptb_rnn_sorted_gradient', 'test_hapi_hub', - 'test_reverse_op', 'test_compiled_program', 'test_lambda', 'test_adadelta_op', - 'test_nn_sigmoid_op', 'test_nearest_interp_v2_op', 'test_sequence_slice_op', - 'test_program_translator', 'test_eager_deletion_lstm_net', 'malloc_test', - 'test_size_op', 'test_analysis_predictor', 'test_recognize_digits', - 'test_parameter', 'test_transpose_flatten_concat_fuse_pass', + 'test_reverse_op', 'test_compiled_program', 'test_lambda', + 'test_adadelta_op', 'test_nn_sigmoid_op', 'test_nearest_interp_v2_op', + 'test_sequence_slice_op', 'test_program_translator', + 'test_eager_deletion_lstm_net', 'malloc_test', 'test_size_op', + 'test_analysis_predictor', 'test_recognize_digits', 'test_parameter', + 'test_transpose_flatten_concat_fuse_pass', 'test_imperative_trace_non_persistable_inputs', 'test_pass_builder', 'thread_local_allocator_test', 'test_variable', 'test_fsp_op', 'test_elementwise_gradient_op', 'test_multinomial_op', @@ -1183,7 +1186,7 @@ LOWEST_PARALLEL_JOB_NEW = [ ] # *=======These unittest doesn't occupy GPU memory, just run as CPU unittest=======* # -# It run 16 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, +# It run 16 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, # just remove it from this list. CPU_PARALLEL_JOB = [ 'test_static_save_load_large', diff --git a/tools/print_signatures.py b/tools/print_signatures.py index b9be7f836a4..44083d660c6 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -49,10 +49,9 @@ def md5(doc): md5sum = hashinst.hexdigest() except UnicodeDecodeError as e: md5sum = None - print( - "Error({}) occurred when `md5({})`, discard it.".format( - str(e), doc), - file=sys.stderr) + print("Error({}) occurred when `md5({})`, discard it.".format( + str(e), doc), + file=sys.stderr) return md5sum @@ -110,8 +109,8 @@ def visit_all_module(mod): if hasattr(instance, '__name__') and member_name != instance.__name__: print( - "Found alias API, alias name is: {}, original name is: {}". - format(member_name, instance.__name__), + "Found alias API, alias name is: {}, original name is: {}" + .format(member_name, instance.__name__), file=sys.stderr) except: if not cur_name in ErrorSet and not cur_name in skiplist: @@ -168,8 +167,8 @@ def insert_api_into_dict(full_name, gen_doc_anno=None): logger.warning("AttributeError occurred when `id(eval(%s))`", full_name) return None except Exception as e: - logger.warning("Exception(%s) occurred when `id(eval(%s))`", - str(e), full_name) + logger.warning("Exception(%s) occurred when `id(eval(%s))`", str(e), + full_name) return None else: logger.debug("adding %s to api_info_dict.", full_name) @@ -190,8 +189,8 @@ def insert_api_into_dict(full_name, gen_doc_anno=None): api_info_dict[fc_id]["gen_doc_anno"] = gen_doc_anno if inspect.isfunction(obj): api_info_dict[fc_id]["signature"] = repr( - inspect.getfullargspec(obj)).replace('FullArgSpec', - 'ArgSpec', 1) + inspect.getfullargspec(obj)).replace( + 'FullArgSpec', 'ArgSpec', 1) return api_info_dict[fc_id] @@ -212,8 +211,8 @@ def process_module(m, attr="__all__"): api_counter += 1 if inspect.isclass(api_info['object']): for name, value in inspect.getmembers(api_info['object']): - if (not name.startswith("_")) and hasattr(value, - '__name__'): + if (not name.startswith("_")) and hasattr( + value, '__name__'): method_full_name = full_name + '.' + name # value.__name__ method_api_info = insert_api_into_dict( method_full_name, 'class_method') @@ -225,44 +224,17 @@ def process_module(m, attr="__all__"): def check_public_api(): import paddle modulelist = [ #npqa - paddle, - paddle.amp, - paddle.nn, - paddle.nn.functional, - paddle.nn.initializer, - paddle.nn.utils, - paddle.static, - paddle.static.nn, - paddle.io, - paddle.jit, - paddle.metric, - paddle.distribution, - paddle.optimizer, - paddle.optimizer.lr, - paddle.regularizer, - paddle.text, - paddle.utils, - paddle.utils.download, - paddle.utils.profiler, - paddle.utils.cpp_extension, - paddle.sysconfig, - paddle.vision, - paddle.vision.datasets, - paddle.vision.models, - paddle.vision.transforms, - paddle.vision.ops, - paddle.distributed, - paddle.distributed.fleet, - paddle.distributed.fleet.utils, - paddle.distributed.parallel, - paddle.distributed.utils, - paddle.callbacks, - paddle.hub, - paddle.autograd, - paddle.incubate, - paddle.inference, - paddle.onnx, - paddle.device + paddle, paddle.amp, paddle.nn, paddle.nn.functional, + paddle.nn.initializer, paddle.nn.utils, paddle.static, paddle.static.nn, + paddle.io, paddle.jit, paddle.metric, paddle.distribution, + paddle.optimizer, paddle.optimizer.lr, paddle.regularizer, paddle.text, + paddle.utils, paddle.utils.download, paddle.utils.profiler, + paddle.utils.cpp_extension, paddle.sysconfig, paddle.vision, + paddle.vision.datasets, paddle.vision.models, paddle.vision.transforms, + paddle.vision.ops, paddle.distributed, paddle.distributed.fleet, + paddle.distributed.fleet.utils, paddle.distributed.parallel, + paddle.distributed.utils, paddle.callbacks, paddle.hub, paddle.autograd, + paddle.incubate, paddle.inference, paddle.onnx, paddle.device ] apinum = 0 @@ -294,8 +266,8 @@ def check_public_api(): cur_name = module + '.' + member_name instance = eval(cur_name) doc_md5 = md5(instance.__doc__) - member_dict[cur_name] = "({}, ('document', '{}'))".format(cur_name, - doc_md5) + member_dict[cur_name] = "({}, ('document', '{}'))".format( + cur_name, doc_md5) def check_allmodule_callable(): @@ -313,14 +285,13 @@ def parse_args(): """ parser = argparse.ArgumentParser(description='Print Apis Signatures') parser.add_argument('--debug', dest='debug', action="store_true") - parser.add_argument( - '--method', - dest='method', - type=str, - default='get_all_api', - help="using get_all_api or from_modulelist") - parser.add_argument( - 'module', type=str, help='module', default='paddle') # not used + parser.add_argument('--method', + dest='method', + type=str, + default='get_all_api', + help="using get_all_api or from_modulelist") + parser.add_argument('module', type=str, help='module', + default='paddle') # not used if len(sys.argv) == 1: args = parser.parse_args(['paddle']) @@ -351,15 +322,13 @@ if __name__ == '__main__': for api_name in all_api_names_sorted: api_info = api_info_dict[all_api_names_to_k[api_name]] print("{0} ({2}, ('document', '{1}'))".format( - api_name, - md5(api_info['docstring']), api_info['signature'] + api_name, md5(api_info['docstring']), api_info['signature'] if 'signature' in api_info else 'ArgSpec()')) if len(ErrorSet) == 0: sys.exit(0) else: for erroritem in ErrorSet: - print( - "Error, new function {} is unreachable".format(erroritem), - file=sys.stderr) + print("Error, new function {} is unreachable".format(erroritem), + file=sys.stderr) sys.exit(1) diff --git a/tools/pyCov_multithreading.py b/tools/pyCov_multithreading.py index 20181fb6f93..cb2366075af 100644 --- a/tools/pyCov_multithreading.py +++ b/tools/pyCov_multithreading.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -35,7 +35,9 @@ def worker(fun): def threadPool(threadPoolNum): threadPool = [] for i in range(threadPoolNum): - thread = threading.Thread(target=worker, args={doFun, }) + thread = threading.Thread(target=worker, args={ + doFun, + }) thread.daemon = True threadPool.append(thread) return threadPool diff --git a/tools/remove_grad_op_and_kernel.py b/tools/remove_grad_op_and_kernel.py index e8ab321e961..bbf5616fc43 100644 --- a/tools/remove_grad_op_and_kernel.py +++ b/tools/remove_grad_op_and_kernel.py @@ -55,8 +55,9 @@ def update_operator_cmake(cmake_file): content = content.replace(pat1, code1) match = re.findall(pat2, content, flags=re.DOTALL) - content = content.replace(match[0], code2 + '\n' + match[0].replace( - 'py_func_op', 'py_func_op ${LOSS_OPS}')) + content = content.replace( + match[0], code2 + '\n' + + match[0].replace('py_func_op', 'py_func_op ${LOSS_OPS}')) with open(cmake_file, 'w') as f: f.write(content) @@ -67,12 +68,12 @@ if __name__ == '__main__': tool_dir = os.path.dirname(os.path.abspath(__file__)) if sys.version_info[0] == 3: - all_op = glob.glob( - os.path.join(tool_dir, '../paddle/fluid/operators/**/*.cc'), - recursive=True) - all_op += glob.glob( - os.path.join(tool_dir, '../paddle/fluid/operators/**/*.cu'), - recursive=True) + all_op = glob.glob(os.path.join(tool_dir, + '../paddle/fluid/operators/**/*.cc'), + recursive=True) + all_op += glob.glob(os.path.join(tool_dir, + '../paddle/fluid/operators/**/*.cu'), + recursive=True) elif sys.version_info[0] == 2: all_op = find_type_files( os.path.join(tool_dir, '../paddle/fluid/operators/'), '.cc') diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index 1bd9f029d55..6a9b4729e40 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -149,10 +149,14 @@ def extract_code_blocks_from_docstr(docstr): def _append_code_block(): # nonlocal code_blocks, cb_cur, cb_cur_name, cb_cur_seq_id, cb_required code_blocks.append({ - 'codes': inspect.cleandoc("\n".join(cb_info['cb_cur'])), - 'name': cb_info['cb_cur_name'], - 'id': cb_info['cb_cur_seq_id'], - 'required': cb_info['cb_required'], + 'codes': + inspect.cleandoc("\n".join(cb_info['cb_cur'])), + 'name': + cb_info['cb_cur_name'], + 'id': + cb_info['cb_cur_seq_id'], + 'required': + cb_info['cb_required'], }) for lineno, linecont in enumerate(ds_list): @@ -353,9 +357,10 @@ Please use '.. code-block:: python' to format the sample code.""") # False - it need other special equipment or environment. # so, the following conditional statements are intentionally arranged. if matched == True: - tfname = os.path.join(SAMPLECODE_TEMPDIR, '{}_example{}'.format( - name, '.py' - if len(codeblocks) == 1 else '_{}.py'.format(y + 1))) + tfname = os.path.join( + SAMPLECODE_TEMPDIR, '{}_example{}'.format( + name, + '.py' if len(codeblocks) == 1 else '_{}.py'.format(y + 1))) with open(tfname, 'w') as tempf: sampcd = insert_codes_into_codeblock(cb, name) tempf.write(sampcd) @@ -366,9 +371,9 @@ Please use '.. code-block:: python' to format the sample code.""") SUMMARY_INFO['skiptest'].append("{}-{}".format(name, cb['id'])) elif matched == False: logger.info( - '{}\' code block (name:{}, id:{}) required({}) not match capacity({}).'. - format(name, cb['name'], cb['id'], cb['required'], - SAMPLE_CODE_TEST_CAPACITY)) + '{}\' code block (name:{}, id:{}) required({}) not match capacity({}).' + .format(name, cb['name'], cb['id'], cb['required'], + SAMPLE_CODE_TEST_CAPACITY)) if cb['required'] not in SUMMARY_INFO: SUMMARY_INFO[cb['required']] = [] SUMMARY_INFO[cb['required']].append("{}-{}".format(name, cb['id'])) @@ -401,8 +406,9 @@ def execute_samplecode(tfname): logger.info("----example code check----") logger.info("executing sample code: %s", tfname) start_time = time.time() - subprc = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subprc = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) output, error = subprc.communicate() msg = "".join(output.decode(encoding='utf-8')) err = "".join(error.decode(encoding='utf-8')) @@ -410,7 +416,8 @@ def execute_samplecode(tfname): if subprc.returncode != 0: with open(tfname, 'r') as f: - logger.warning("""Sample code error found in %s: + logger.warning( + """Sample code error found in %s: ----------------------- %s ----------------------- @@ -462,8 +469,8 @@ def get_filenames(full_test=False): # paddle.Tensor. continue if hasattr(api_obj, '__doc__') and api_obj.__doc__: - sample_code_filenames = sampcd_extract_to_file(api_obj.__doc__, - api) + sample_code_filenames = sampcd_extract_to_file( + api_obj.__doc__, api) for tfname in sample_code_filenames: all_sample_code_filenames[tfname] = api return all_sample_code_filenames @@ -557,8 +564,9 @@ def exec_gen_doc(): cmd = ["bash", "document_preview.sh"] logger.info("----exec gen_doc----") start_time = time.time() - subprc = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subprc = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) output, error = subprc.communicate() msg = "".join(output.decode(encoding='utf-8')) err = "".join(error.decode(encoding='utf-8')) @@ -608,14 +616,16 @@ def parse_args(): parser.add_argument('--debug', dest='debug', action="store_true") parser.add_argument('--full-test', dest='full_test', action="store_true") parser.add_argument('mode', type=str, help='run on device', default='cpu') - parser.add_argument( - '--build-doc', - dest='build_doc', - action='store_true', - help='build doc if need.') + parser.add_argument('--build-doc', + dest='build_doc', + action='store_true', + help='build doc if need.') for item in arguments: - parser.add_argument( - item[0], dest=item[1], help=item[4], type=item[2], default=item[3]) + parser.add_argument(item[0], + dest=item[1], + help=item[4], + type=item[2], + default=item[3]) if len(sys.argv) == 1: args = parser.parse_args(['cpu']) @@ -723,8 +733,8 @@ if __name__ == '__main__': len(SUMMARY_INFO['success'])) for k, v in SUMMARY_INFO.items(): if k not in ['success', 'failed', 'skiptest', 'nocodes']: - logger.info("%d sample codes required not match for %s", - len(v), k) + logger.info("%d sample codes required not match for %s", len(v), + k) if len(SUMMARY_INFO['skiptest']): logger.info("%d sample codes skipped", len(SUMMARY_INFO['skiptest'])) diff --git a/tools/summary_env.py b/tools/summary_env.py index d12e644cc28..4e4100af422 100644 --- a/tools/summary_env.py +++ b/tools/summary_env.py @@ -64,9 +64,10 @@ def get_python_info(): def run_shell_command(cmd): - out, err = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - shell=True).communicate() + out, err = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True).communicate() if err: return None else: @@ -82,6 +83,7 @@ def get_cuda_info(): def get_cudnn_info(): + def _get_cudnn_ver(cmd): out = run_shell_command(cmd) if out: @@ -92,8 +94,8 @@ def get_cudnn_info(): if platform.system() == "Windows": cudnn_dll_path = run_shell_command('where cudnn*') if cudnn_dll_path: - cudnn_header_path = cudnn_dll_path.split('bin')[ - 0] + r'include\cudnn.h' + cudnn_header_path = cudnn_dll_path.split( + 'bin')[0] + r'include\cudnn.h' cmd = 'type "{0}" | findstr "{1}" | findstr /v "CUDNN_VERSION"' else: envs['cudnn_version'] = None @@ -119,8 +121,8 @@ def get_cudnn_info(): def get_driver_info(): driver_ver = run_shell_command('nvidia-smi') if driver_ver: - driver_ver = driver_ver.split('Driver Version:')[1].strip().split(' ')[ - 0] + driver_ver = driver_ver.split('Driver Version:')[1].strip().split( + ' ')[0] else: driver_ver = None envs['nvidia_driver_version'] = driver_ver diff --git a/tools/test_check_api_compatible.py b/tools/test_check_api_compatible.py index 24e7b3a8f8a..846fdefb7b6 100644 --- a/tools/test_check_api_compatible.py +++ b/tools/test_check_api_compatible.py @@ -1,13 +1,13 @@ #! /usr/bin/env python # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,6 +28,7 @@ from check_api_compatible import check_compatible_str class Test_check_compatible(unittest.TestCase): + def setUp(self) -> None: self.fullargspec_prefix = 'inspect.Full' self.argspec_str_o = self.fullargspec_prefix + '''ArgSpec(args=['shape', 'dtype', 'name'], varargs=None, varkw=None, defaults=(None, None), kwonlyargs=[], kwonlydefaults=None, annotations={})''' @@ -70,6 +71,7 @@ class Test_check_compatible(unittest.TestCase): class Test_check_compatible_str(unittest.TestCase): + def setUp(self) -> None: self.fullargspec_prefix = 'inspect.Full' # paddle.fluid.layer_helper_base.LayerHelperBase.create_parameter @@ -112,6 +114,7 @@ class Test_check_compatible_str(unittest.TestCase): class Test_read_argspec_from_file(unittest.TestCase): + def setUp(self) -> None: self.fullargspec_prefix = 'inspect.Full' self.argspec_str_o = self.fullargspec_prefix + '''ArgSpec(args=['shape', 'dtype', 'name'], varargs=None, varkw=None, defaults=(None, None), kwonlyargs=[], kwonlydefaults=None, annotations={})''' diff --git a/tools/test_check_pr_approval.py b/tools/test_check_pr_approval.py index f4c089ee0f8..5f3c7ca11cc 100644 --- a/tools/test_check_pr_approval.py +++ b/tools/test_check_pr_approval.py @@ -1,13 +1,13 @@ #! /usr/bin/env python # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ import sys class Test_check_approval(unittest.TestCase): + def setUp(self): self.codeset = 'UTF-8' # only key info in it @@ -71,21 +72,19 @@ class Test_check_approval(unittest.TestCase): def test_ids(self): cmd = [sys.executable, 'check_pr_approval.py', '1', '26408901'] - subprc = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subprc = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) output, error = subprc.communicate(input=self.jsonstr) self.assertEqual('TRUE', output.decode(self.codeset).rstrip()) def test_logins(self): cmd = [sys.executable, 'check_pr_approval.py', '1', 'pangyoki'] - subprc = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subprc = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) output, error = subprc.communicate(input=self.jsonstr) self.assertEqual('TRUE', output.decode(self.codeset).rstrip()) @@ -93,11 +92,10 @@ class Test_check_approval(unittest.TestCase): cmd = [ sys.executable, 'check_pr_approval.py', '2', 'pangyoki', '13469016' ] - subprc = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subprc = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) output, error = subprc.communicate(input=self.jsonstr) #self.assertEqual('', error.rstrip()) self.assertEqual('TRUE', output.decode(self.codeset).rstrip()) @@ -107,11 +105,10 @@ class Test_check_approval(unittest.TestCase): sys.executable, 'check_pr_approval.py', '2', 'wadefelix', ' 13469016' ] - subprc = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subprc = subprocess.Popen(cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) output, error = subprc.communicate(input=self.jsonstr) self.assertEqual('FALSE', output.decode(self.codeset).rstrip()) diff --git a/tools/test_print_signatures.py b/tools/test_print_signatures.py index 1ca1e4149fb..14275b6b7ae 100644 --- a/tools/test_print_signatures.py +++ b/tools/test_print_signatures.py @@ -1,13 +1,13 @@ #! /usr/bin/env python # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -55,6 +55,7 @@ class ClassExample(): class Test_all_in_print_signatures(unittest.TestCase): + def test_md5(self): algo = hashlib.md5() algo.update(func_example.__doc__.encode('utf-8')) @@ -63,12 +64,13 @@ class Test_all_in_print_signatures(unittest.TestCase): class Test_is_primitive(unittest.TestCase): + def test_single(self): self.assertTrue(is_primitive(2)) self.assertTrue(is_primitive(2.1)) self.assertTrue(is_primitive("2.1.1")) - self.assertFalse( - is_primitive("hello paddle".encode('UTF-8'))) # True for python2 + self.assertFalse(is_primitive( + "hello paddle".encode('UTF-8'))) # True for python2 self.assertFalse(is_primitive(1j)) self.assertTrue(is_primitive(True)) diff --git a/tools/test_runner.py b/tools/test_runner.py index 02d926914f9..2a66c4a26ff 100644 --- a/tools/test_runner.py +++ b/tools/test_runner.py @@ -54,11 +54,10 @@ def main(): if not res.wasSuccessful(): some_test_failed = True - print( - module_name, - 'failed\n', - buffer.getvalue(), - file=sys.stderr) + print(module_name, + 'failed\n', + buffer.getvalue(), + file=sys.stderr) if flag_need_static_mode: paddle.disable_static() diff --git a/tools/test_sampcd_processor.py b/tools/test_sampcd_processor.py index 2bcee0d2ae0..471deb9bedd 100644 --- a/tools/test_sampcd_processor.py +++ b/tools/test_sampcd_processor.py @@ -1,13 +1,13 @@ #! python # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -32,6 +32,7 @@ from sampcd_processor import is_required_match class Test_find_all(unittest.TestCase): + def test_find_none(self): self.assertEqual(0, len(find_all('hello', 'world'))) @@ -44,6 +45,7 @@ class Test_find_all(unittest.TestCase): class Test_find_last_future_line_end(unittest.TestCase): + def test_no_instant(self): samplecodes = """ print(10//3) @@ -58,8 +60,8 @@ class Test_find_last_future_line_end(unittest.TestCase): """ mo = re.search("print_function\n", samplecodes) self.assertIsNotNone(mo) - self.assertGreaterEqual( - find_last_future_line_end(samplecodes), mo.end()) + self.assertGreaterEqual(find_last_future_line_end(samplecodes), + mo.end()) def test_2_instant(self): samplecodes = """ @@ -70,11 +72,12 @@ class Test_find_last_future_line_end(unittest.TestCase): """ mo = re.search("division\n", samplecodes) self.assertIsNotNone(mo) - self.assertGreaterEqual( - find_last_future_line_end(samplecodes), mo.end()) + self.assertGreaterEqual(find_last_future_line_end(samplecodes), + mo.end()) class Test_extract_code_blocks_from_docstr(unittest.TestCase): + def test_no_samplecode(self): docstr = """ placeholder @@ -138,6 +141,7 @@ print(1+1)""", class Test_insert_codes_into_codeblock(unittest.TestCase): + def test_required_None(self): codeblock = { 'codes': """print(1/0)""", @@ -145,12 +149,13 @@ class Test_insert_codes_into_codeblock(unittest.TestCase): 'id': 1, 'required': None, } - self.assertEqual(""" + self.assertEqual( + """ import os os.environ["CUDA_VISIBLE_DEVICES"] = "" print(1/0) print("not-specified's sample code (name:None, id:1) is executed successfully!")""", - insert_codes_into_codeblock(codeblock)) + insert_codes_into_codeblock(codeblock)) def test_required_gpu(self): codeblock = { @@ -160,13 +165,14 @@ print(1+1)""", 'id': 1, 'required': 'gpu', } - self.assertEqual(""" + self.assertEqual( + """ import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" # required: gpu print(1+1) print("not-specified's sample code (name:None, id:1) is executed successfully!")""", - insert_codes_into_codeblock(codeblock)) + insert_codes_into_codeblock(codeblock)) def test_from_future(self): codeblock = { @@ -178,7 +184,8 @@ print(10//3)""", 'id': 1, 'required': None, } - self.assertEqual(""" + self.assertEqual( + """ from __future__ import print_function from __future__ import division @@ -186,7 +193,7 @@ import os os.environ["CUDA_VISIBLE_DEVICES"] = "" print(10//3) print("not-specified's sample code (name:future, id:1) is executed successfully!")""", - insert_codes_into_codeblock(codeblock)) + insert_codes_into_codeblock(codeblock)) def clear_capacity(): @@ -197,6 +204,7 @@ def clear_capacity(): class Test_get_test_capacity(unittest.TestCase): + def setUp(self): clear_capacity() get_test_capacity() @@ -208,8 +216,9 @@ class Test_get_test_capacity(unittest.TestCase): def test_NoEnvVar(self): clear_capacity() get_test_capacity() - self.assertCountEqual(['cpu', ], - sampcd_processor.SAMPLE_CODE_TEST_CAPACITY) + self.assertCountEqual([ + 'cpu', + ], sampcd_processor.SAMPLE_CODE_TEST_CAPACITY) def test_NoEnvVar_RUN_ON_DEVICE_gpu(self): clear_capacity() @@ -234,6 +243,7 @@ class Test_get_test_capacity(unittest.TestCase): class Test_is_required_match(unittest.TestCase): + def setUp(self): clear_capacity() @@ -274,6 +284,7 @@ class Test_is_required_match(unittest.TestCase): class Test_execute_samplecode(unittest.TestCase): + def setUp(self): if not os.path.exists(sampcd_processor.SAMPLECODE_TEMPDIR): os.mkdir(sampcd_processor.SAMPLECODE_TEMPDIR) @@ -315,6 +326,7 @@ def clear_summary_info(): class Test_sampcd_extract_to_file(unittest.TestCase): + def setUp(self): if not os.path.exists(sampcd_processor.SAMPLECODE_TEMPDIR): os.mkdir(sampcd_processor.SAMPLECODE_TEMPDIR) @@ -424,6 +436,7 @@ class Test_sampcd_extract_to_file(unittest.TestCase): class Test_get_api_md5(unittest.TestCase): + def setUp(self): self.api_pr_spec_filename = os.path.abspath( os.path.join(os.getcwd(), "..", 'paddle/fluid/API_PR.spec')) @@ -455,6 +468,7 @@ class Test_get_api_md5(unittest.TestCase): class Test_get_incrementapi(unittest.TestCase): + def setUp(self): self.api_pr_spec_filename = os.path.abspath( os.path.join(os.getcwd(), "..", 'paddle/fluid/API_PR.spec')) diff --git a/tools/timeline.py b/tools/timeline.py index 2a399b71b77..c1c3d88c995 100644 --- a/tools/timeline.py +++ b/tools/timeline.py @@ -28,12 +28,15 @@ parser.add_argument( default='', help='Input profile file name. If there are multiple file, the format ' 'should be trainer1=file1,trainer2=file2,ps=file3') -parser.add_argument( - '--timeline_path', type=str, default='', help='Output timeline file name.') +parser.add_argument('--timeline_path', + type=str, + default='', + help='Output timeline file name.') args = parser.parse_args() class _ChromeTraceFormatter(object): + def __init__(self): self._events = [] self._metadata = [] @@ -129,6 +132,7 @@ class _ChromeTraceFormatter(object): class Timeline(object): + def __init__(self, profile_dict): self._profile_dict = profile_dict self._pid = 0 @@ -158,8 +162,8 @@ class Timeline(object): if (k, event.device_id, "GPUKernel") not in self._devices: pid = self._allocate_pid() self._devices[(k, event.device_id, "GPUKernel")] = pid - self._chrome_trace.emit_pid("%s:gpu:%d" % - (k, event.device_id), pid) + self._chrome_trace.emit_pid( + "%s:gpu:%d" % (k, event.device_id), pid) if not hasattr(profile_pb, "mem_events"): continue for mevent in profile_pb.mem_events: @@ -178,8 +182,8 @@ class Timeline(object): "memory usage on %s:cpu:%d" % (k, mevent.device_id), pid) elif mevent.place == profiler_pb2.MemEvent.CUDAPinnedPlace: - if (k, mevent.device_id, "CUDAPinnedPlace" - ) not in self._mem_devices: + if (k, mevent.device_id, + "CUDAPinnedPlace") not in self._mem_devices: pid = self._allocate_pid() self._mem_devices[(k, mevent.device_id, "CUDAPinnedPlace")] = pid @@ -196,13 +200,13 @@ class Timeline(object): if (k, 0, "CPU") not in self._mem_devices: pid = self._allocate_pid() self._mem_devices[(k, 0, "CPU")] = pid - self._chrome_trace.emit_pid("memory usage on %s:cpu:%d" % - (k, 0), pid) + self._chrome_trace.emit_pid( + "memory usage on %s:cpu:%d" % (k, 0), pid) if (k, 0, "GPU") not in self._mem_devices: pid = self._allocate_pid() self._mem_devices[(k, 0, "GPU")] = pid - self._chrome_trace.emit_pid("memory usage on %s:gpu:%d" % - (k, 0), pid) + self._chrome_trace.emit_pid( + "memory usage on %s:gpu:%d" % (k, 0), pid) if (k, 0, "CUDAPinnedPlace") not in self._mem_devices: pid = self._allocate_pid() self._mem_devices[(k, 0, "CUDAPinnedPlace")] = pid @@ -211,8 +215,8 @@ class Timeline(object): if (k, 0, "NPU") not in self._mem_devices: pid = self._allocate_pid() self._mem_devices[(k, 0, "NPU")] = pid - self._chrome_trace.emit_pid("memory usage on %s:npu:%d" % - (k, 0), pid) + self._chrome_trace.emit_pid( + "memory usage on %s:npu:%d" % (k, 0), pid) def _allocate_events(self): for k, profile_pb in six.iteritems(self._profile_dict): @@ -278,9 +282,10 @@ class Timeline(object): total_size += mem_list[i + 1]['size'] i += 1 - self._chrome_trace.emit_counter( - "Memory", "Memory", mem_list[i]['pid'], mem_list[i]['time'], - 0, total_size) + self._chrome_trace.emit_counter("Memory", "Memory", + mem_list[i]['pid'], + mem_list[i]['time'], 0, + total_size) i += 1 def generate_chrome_trace(self): -- GitLab